diff options
author | Kent Overstreet <kmo@daterainc.com> | 2013-09-24 02:17:31 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-09-24 17:41:43 -0400 |
commit | c2a4f3183a1248f615a695fbd8905da55ad11bba (patch) | |
tree | 51233866301869506c0728c812fe3668ae1e94ce /drivers/md/bcache/writeback.c | |
parent | 61cbd250f867f98bb4738000afc6002d6f2b14bd (diff) |
bcache: Fix a writeback performance regression
Background writeback works by scanning the btree for dirty data and
adding those keys into a fixed size buffer, then for each dirty key in
the keybuf writing it to the backing device.
When read_dirty() finishes and it's time to scan for more dirty data, we
need to wait for the outstanding writeback IO to finish - they still
take up slots in the keybuf (so that foreground writes can check for
them to avoid races) - without that wait, we'll continually rescan when
we'll be able to add at most a key or two to the keybuf, and that takes
locks that starves foreground IO. Doh.
Signed-off-by: Kent Overstreet <kmo@daterainc.com>
Cc: linux-stable <stable@vger.kernel.org> # >= v3.10
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'drivers/md/bcache/writeback.c')
-rw-r--r-- | drivers/md/bcache/writeback.c | 43 |
1 files changed, 21 insertions, 22 deletions
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index 22cbff551628..27ac51934822 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c | |||
@@ -94,11 +94,15 @@ static void update_writeback_rate(struct work_struct *work) | |||
94 | 94 | ||
95 | static unsigned writeback_delay(struct cached_dev *dc, unsigned sectors) | 95 | static unsigned writeback_delay(struct cached_dev *dc, unsigned sectors) |
96 | { | 96 | { |
97 | uint64_t ret; | ||
98 | |||
97 | if (atomic_read(&dc->disk.detaching) || | 99 | if (atomic_read(&dc->disk.detaching) || |
98 | !dc->writeback_percent) | 100 | !dc->writeback_percent) |
99 | return 0; | 101 | return 0; |
100 | 102 | ||
101 | return bch_next_delay(&dc->writeback_rate, sectors * 10000000ULL); | 103 | ret = bch_next_delay(&dc->writeback_rate, sectors * 10000000ULL); |
104 | |||
105 | return min_t(uint64_t, ret, HZ); | ||
102 | } | 106 | } |
103 | 107 | ||
104 | /* Background writeback */ | 108 | /* Background writeback */ |
@@ -208,7 +212,7 @@ normal_refill: | |||
208 | 212 | ||
209 | up_write(&dc->writeback_lock); | 213 | up_write(&dc->writeback_lock); |
210 | 214 | ||
211 | ratelimit_reset(&dc->writeback_rate); | 215 | bch_ratelimit_reset(&dc->writeback_rate); |
212 | 216 | ||
213 | /* Punt to workqueue only so we don't recurse and blow the stack */ | 217 | /* Punt to workqueue only so we don't recurse and blow the stack */ |
214 | continue_at(cl, read_dirty, dirty_wq); | 218 | continue_at(cl, read_dirty, dirty_wq); |
@@ -318,9 +322,7 @@ static void write_dirty_finish(struct closure *cl) | |||
318 | } | 322 | } |
319 | 323 | ||
320 | bch_keybuf_del(&dc->writeback_keys, w); | 324 | bch_keybuf_del(&dc->writeback_keys, w); |
321 | atomic_dec_bug(&dc->in_flight); | 325 | up(&dc->in_flight); |
322 | |||
323 | closure_wake_up(&dc->writeback_wait); | ||
324 | 326 | ||
325 | closure_return_with_destructor(cl, dirty_io_destructor); | 327 | closure_return_with_destructor(cl, dirty_io_destructor); |
326 | } | 328 | } |
@@ -349,7 +351,7 @@ static void write_dirty(struct closure *cl) | |||
349 | 351 | ||
350 | closure_bio_submit(&io->bio, cl, &io->dc->disk); | 352 | closure_bio_submit(&io->bio, cl, &io->dc->disk); |
351 | 353 | ||
352 | continue_at(cl, write_dirty_finish, dirty_wq); | 354 | continue_at(cl, write_dirty_finish, system_wq); |
353 | } | 355 | } |
354 | 356 | ||
355 | static void read_dirty_endio(struct bio *bio, int error) | 357 | static void read_dirty_endio(struct bio *bio, int error) |
@@ -369,7 +371,7 @@ static void read_dirty_submit(struct closure *cl) | |||
369 | 371 | ||
370 | closure_bio_submit(&io->bio, cl, &io->dc->disk); | 372 | closure_bio_submit(&io->bio, cl, &io->dc->disk); |
371 | 373 | ||
372 | continue_at(cl, write_dirty, dirty_wq); | 374 | continue_at(cl, write_dirty, system_wq); |
373 | } | 375 | } |
374 | 376 | ||
375 | static void read_dirty(struct closure *cl) | 377 | static void read_dirty(struct closure *cl) |
@@ -394,12 +396,9 @@ static void read_dirty(struct closure *cl) | |||
394 | 396 | ||
395 | if (delay > 0 && | 397 | if (delay > 0 && |
396 | (KEY_START(&w->key) != dc->last_read || | 398 | (KEY_START(&w->key) != dc->last_read || |
397 | jiffies_to_msecs(delay) > 50)) { | 399 | jiffies_to_msecs(delay) > 50)) |
398 | w->private = NULL; | 400 | while (delay) |
399 | 401 | delay = schedule_timeout(delay); | |
400 | closure_delay(&dc->writeback, delay); | ||
401 | continue_at(cl, read_dirty, dirty_wq); | ||
402 | } | ||
403 | 402 | ||
404 | dc->last_read = KEY_OFFSET(&w->key); | 403 | dc->last_read = KEY_OFFSET(&w->key); |
405 | 404 | ||
@@ -424,15 +423,10 @@ static void read_dirty(struct closure *cl) | |||
424 | 423 | ||
425 | trace_bcache_writeback(&w->key); | 424 | trace_bcache_writeback(&w->key); |
426 | 425 | ||
427 | closure_call(&io->cl, read_dirty_submit, NULL, &dc->disk.cl); | 426 | down(&dc->in_flight); |
427 | closure_call(&io->cl, read_dirty_submit, NULL, cl); | ||
428 | 428 | ||
429 | delay = writeback_delay(dc, KEY_SIZE(&w->key)); | 429 | delay = writeback_delay(dc, KEY_SIZE(&w->key)); |
430 | |||
431 | atomic_inc(&dc->in_flight); | ||
432 | |||
433 | if (!closure_wait_event(&dc->writeback_wait, cl, | ||
434 | atomic_read(&dc->in_flight) < 64)) | ||
435 | continue_at(cl, read_dirty, dirty_wq); | ||
436 | } | 430 | } |
437 | 431 | ||
438 | if (0) { | 432 | if (0) { |
@@ -442,7 +436,11 @@ err: | |||
442 | bch_keybuf_del(&dc->writeback_keys, w); | 436 | bch_keybuf_del(&dc->writeback_keys, w); |
443 | } | 437 | } |
444 | 438 | ||
445 | refill_dirty(cl); | 439 | /* |
440 | * Wait for outstanding writeback IOs to finish (and keybuf slots to be | ||
441 | * freed) before refilling again | ||
442 | */ | ||
443 | continue_at(cl, refill_dirty, dirty_wq); | ||
446 | } | 444 | } |
447 | 445 | ||
448 | /* Init */ | 446 | /* Init */ |
@@ -484,6 +482,7 @@ void bch_sectors_dirty_init(struct cached_dev *dc) | |||
484 | 482 | ||
485 | void bch_cached_dev_writeback_init(struct cached_dev *dc) | 483 | void bch_cached_dev_writeback_init(struct cached_dev *dc) |
486 | { | 484 | { |
485 | sema_init(&dc->in_flight, 64); | ||
487 | closure_init_unlocked(&dc->writeback); | 486 | closure_init_unlocked(&dc->writeback); |
488 | init_rwsem(&dc->writeback_lock); | 487 | init_rwsem(&dc->writeback_lock); |
489 | 488 | ||
@@ -513,7 +512,7 @@ void bch_writeback_exit(void) | |||
513 | 512 | ||
514 | int __init bch_writeback_init(void) | 513 | int __init bch_writeback_init(void) |
515 | { | 514 | { |
516 | dirty_wq = create_singlethread_workqueue("bcache_writeback"); | 515 | dirty_wq = create_workqueue("bcache_writeback"); |
517 | if (!dirty_wq) | 516 | if (!dirty_wq) |
518 | return -ENOMEM; | 517 | return -ENOMEM; |
519 | 518 | ||