diff options
author | Joe Thornber <ejt@redhat.com> | 2014-11-06 05:18:04 -0500 |
---|---|---|
committer | Mike Snitzer <snitzer@redhat.com> | 2014-11-10 15:25:30 -0500 |
commit | 7ae34e7778966d39f66397491eb114b613202c20 (patch) | |
tree | d743fdd6c548a7452b4cc5c1ef0cf15fdf321068 /drivers/md | |
parent | 08b184514f65d160ce66381dafca5962e3d8f785 (diff) |
dm cache: improve discard support
Safely allow the discard blocksize to be larger than the cache blocksize
by using the bio prison's range locking support. This also improves
discard performance considerly because larger discards are issued to the
dm-cache device. The discard blocksize was always intended to be
greater than the cache blocksize. But until now it wasn't implemented
safely.
Also, by safely restoring the ability to have discard blocksize larger
than cache blocksize we're able to significantly reduce the memory used
for the cache's discard bitset. Before, with a small discard blocksize,
the discard bitset could get quite large because its size is a function
of the discard blocksize and the origin device's size. For example,
previously, using a 32KB cache blocksize with a 40TB origin resulted in
1280MB of incore memory use for the discard bitset! Now, the discard
blocksize is scaled up accordingly to ensure the discard bitset is
capped at 2**14 bits, or 16KB.
Signed-off-by: Joe Thornber <ejt@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/dm-cache-target.c | 166 |
1 files changed, 121 insertions, 45 deletions
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index c2ca74374944..6e36a0753105 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c | |||
@@ -310,6 +310,7 @@ struct dm_cache_migration { | |||
310 | dm_cblock_t cblock; | 310 | dm_cblock_t cblock; |
311 | 311 | ||
312 | bool err:1; | 312 | bool err:1; |
313 | bool discard:1; | ||
313 | bool writeback:1; | 314 | bool writeback:1; |
314 | bool demote:1; | 315 | bool demote:1; |
315 | bool promote:1; | 316 | bool promote:1; |
@@ -433,12 +434,12 @@ static void prealloc_put_cell(struct prealloc *p, struct dm_bio_prison_cell *cel | |||
433 | 434 | ||
434 | /*----------------------------------------------------------------*/ | 435 | /*----------------------------------------------------------------*/ |
435 | 436 | ||
436 | static void build_key(dm_oblock_t oblock, struct dm_cell_key *key) | 437 | static void build_key(dm_oblock_t begin, dm_oblock_t end, struct dm_cell_key *key) |
437 | { | 438 | { |
438 | key->virtual = 0; | 439 | key->virtual = 0; |
439 | key->dev = 0; | 440 | key->dev = 0; |
440 | key->block_begin = from_oblock(oblock); | 441 | key->block_begin = from_oblock(begin); |
441 | key->block_end = key->block_begin + 1ULL; | 442 | key->block_end = from_oblock(end); |
442 | } | 443 | } |
443 | 444 | ||
444 | /* | 445 | /* |
@@ -448,15 +449,15 @@ static void build_key(dm_oblock_t oblock, struct dm_cell_key *key) | |||
448 | */ | 449 | */ |
449 | typedef void (*cell_free_fn)(void *context, struct dm_bio_prison_cell *cell); | 450 | typedef void (*cell_free_fn)(void *context, struct dm_bio_prison_cell *cell); |
450 | 451 | ||
451 | static int bio_detain(struct cache *cache, dm_oblock_t oblock, | 452 | static int bio_detain_range(struct cache *cache, dm_oblock_t oblock_begin, dm_oblock_t oblock_end, |
452 | struct bio *bio, struct dm_bio_prison_cell *cell_prealloc, | 453 | struct bio *bio, struct dm_bio_prison_cell *cell_prealloc, |
453 | cell_free_fn free_fn, void *free_context, | 454 | cell_free_fn free_fn, void *free_context, |
454 | struct dm_bio_prison_cell **cell_result) | 455 | struct dm_bio_prison_cell **cell_result) |
455 | { | 456 | { |
456 | int r; | 457 | int r; |
457 | struct dm_cell_key key; | 458 | struct dm_cell_key key; |
458 | 459 | ||
459 | build_key(oblock, &key); | 460 | build_key(oblock_begin, oblock_end, &key); |
460 | r = dm_bio_detain(cache->prison, &key, bio, cell_prealloc, cell_result); | 461 | r = dm_bio_detain(cache->prison, &key, bio, cell_prealloc, cell_result); |
461 | if (r) | 462 | if (r) |
462 | free_fn(free_context, cell_prealloc); | 463 | free_fn(free_context, cell_prealloc); |
@@ -464,6 +465,16 @@ static int bio_detain(struct cache *cache, dm_oblock_t oblock, | |||
464 | return r; | 465 | return r; |
465 | } | 466 | } |
466 | 467 | ||
468 | static int bio_detain(struct cache *cache, dm_oblock_t oblock, | ||
469 | struct bio *bio, struct dm_bio_prison_cell *cell_prealloc, | ||
470 | cell_free_fn free_fn, void *free_context, | ||
471 | struct dm_bio_prison_cell **cell_result) | ||
472 | { | ||
473 | dm_oblock_t end = to_oblock(from_oblock(oblock) + 1ULL); | ||
474 | return bio_detain_range(cache, oblock, end, bio, | ||
475 | cell_prealloc, free_fn, free_context, cell_result); | ||
476 | } | ||
477 | |||
467 | static int get_cell(struct cache *cache, | 478 | static int get_cell(struct cache *cache, |
468 | dm_oblock_t oblock, | 479 | dm_oblock_t oblock, |
469 | struct prealloc *structs, | 480 | struct prealloc *structs, |
@@ -475,7 +486,7 @@ static int get_cell(struct cache *cache, | |||
475 | 486 | ||
476 | cell_prealloc = prealloc_get_cell(structs); | 487 | cell_prealloc = prealloc_get_cell(structs); |
477 | 488 | ||
478 | build_key(oblock, &key); | 489 | build_key(oblock, to_oblock(from_oblock(oblock) + 1ULL), &key); |
479 | r = dm_get_cell(cache->prison, &key, cell_prealloc, cell_result); | 490 | r = dm_get_cell(cache->prison, &key, cell_prealloc, cell_result); |
480 | if (r) | 491 | if (r) |
481 | prealloc_put_cell(structs, cell_prealloc); | 492 | prealloc_put_cell(structs, cell_prealloc); |
@@ -525,25 +536,34 @@ static dm_block_t block_div(dm_block_t b, uint32_t n) | |||
525 | return b; | 536 | return b; |
526 | } | 537 | } |
527 | 538 | ||
528 | static dm_dblock_t oblock_to_dblock(struct cache *cache, dm_oblock_t oblock) | 539 | static dm_block_t oblocks_per_dblock(struct cache *cache) |
529 | { | 540 | { |
530 | uint32_t discard_blocks = cache->discard_block_size; | 541 | dm_block_t oblocks = cache->discard_block_size; |
531 | dm_block_t b = from_oblock(oblock); | ||
532 | 542 | ||
533 | if (!block_size_is_power_of_two(cache)) | 543 | if (block_size_is_power_of_two(cache)) |
534 | discard_blocks = discard_blocks / cache->sectors_per_block; | 544 | oblocks >>= cache->sectors_per_block_shift; |
535 | else | 545 | else |
536 | discard_blocks >>= cache->sectors_per_block_shift; | 546 | oblocks = block_div(oblocks, cache->sectors_per_block); |
537 | 547 | ||
538 | b = block_div(b, discard_blocks); | 548 | return oblocks; |
549 | } | ||
550 | |||
551 | static dm_dblock_t oblock_to_dblock(struct cache *cache, dm_oblock_t oblock) | ||
552 | { | ||
553 | return to_dblock(block_div(from_oblock(oblock), | ||
554 | oblocks_per_dblock(cache))); | ||
555 | } | ||
539 | 556 | ||
540 | return to_dblock(b); | 557 | static dm_oblock_t dblock_to_oblock(struct cache *cache, dm_dblock_t dblock) |
558 | { | ||
559 | return to_oblock(from_dblock(dblock) * oblocks_per_dblock(cache)); | ||
541 | } | 560 | } |
542 | 561 | ||
543 | static void set_discard(struct cache *cache, dm_dblock_t b) | 562 | static void set_discard(struct cache *cache, dm_dblock_t b) |
544 | { | 563 | { |
545 | unsigned long flags; | 564 | unsigned long flags; |
546 | 565 | ||
566 | BUG_ON(from_dblock(b) >= from_dblock(cache->discard_nr_blocks)); | ||
547 | atomic_inc(&cache->stats.discard_count); | 567 | atomic_inc(&cache->stats.discard_count); |
548 | 568 | ||
549 | spin_lock_irqsave(&cache->lock, flags); | 569 | spin_lock_irqsave(&cache->lock, flags); |
@@ -995,7 +1015,7 @@ static void copy_complete(int read_err, unsigned long write_err, void *context) | |||
995 | wake_worker(cache); | 1015 | wake_worker(cache); |
996 | } | 1016 | } |
997 | 1017 | ||
998 | static void issue_copy_real(struct dm_cache_migration *mg) | 1018 | static void issue_copy(struct dm_cache_migration *mg) |
999 | { | 1019 | { |
1000 | int r; | 1020 | int r; |
1001 | struct dm_io_region o_region, c_region; | 1021 | struct dm_io_region o_region, c_region; |
@@ -1074,11 +1094,46 @@ static void avoid_copy(struct dm_cache_migration *mg) | |||
1074 | migration_success_pre_commit(mg); | 1094 | migration_success_pre_commit(mg); |
1075 | } | 1095 | } |
1076 | 1096 | ||
1077 | static void issue_copy(struct dm_cache_migration *mg) | 1097 | static void calc_discard_block_range(struct cache *cache, struct bio *bio, |
1098 | dm_dblock_t *b, dm_dblock_t *e) | ||
1099 | { | ||
1100 | sector_t sb = bio->bi_iter.bi_sector; | ||
1101 | sector_t se = bio_end_sector(bio); | ||
1102 | |||
1103 | *b = to_dblock(dm_sector_div_up(sb, cache->discard_block_size)); | ||
1104 | |||
1105 | if (se - sb < cache->discard_block_size) | ||
1106 | *e = *b; | ||
1107 | else | ||
1108 | *e = to_dblock(block_div(se, cache->discard_block_size)); | ||
1109 | } | ||
1110 | |||
1111 | static void issue_discard(struct dm_cache_migration *mg) | ||
1112 | { | ||
1113 | dm_dblock_t b, e; | ||
1114 | struct bio *bio = mg->new_ocell->holder; | ||
1115 | |||
1116 | calc_discard_block_range(mg->cache, bio, &b, &e); | ||
1117 | while (b != e) { | ||
1118 | set_discard(mg->cache, b); | ||
1119 | b = to_dblock(from_dblock(b) + 1); | ||
1120 | } | ||
1121 | |||
1122 | bio_endio(bio, 0); | ||
1123 | cell_defer(mg->cache, mg->new_ocell, false); | ||
1124 | free_migration(mg); | ||
1125 | } | ||
1126 | |||
1127 | static void issue_copy_or_discard(struct dm_cache_migration *mg) | ||
1078 | { | 1128 | { |
1079 | bool avoid; | 1129 | bool avoid; |
1080 | struct cache *cache = mg->cache; | 1130 | struct cache *cache = mg->cache; |
1081 | 1131 | ||
1132 | if (mg->discard) { | ||
1133 | issue_discard(mg); | ||
1134 | return; | ||
1135 | } | ||
1136 | |||
1082 | if (mg->writeback || mg->demote) | 1137 | if (mg->writeback || mg->demote) |
1083 | avoid = !is_dirty(cache, mg->cblock) || | 1138 | avoid = !is_dirty(cache, mg->cblock) || |
1084 | is_discarded_oblock(cache, mg->old_oblock); | 1139 | is_discarded_oblock(cache, mg->old_oblock); |
@@ -1093,7 +1148,7 @@ static void issue_copy(struct dm_cache_migration *mg) | |||
1093 | } | 1148 | } |
1094 | } | 1149 | } |
1095 | 1150 | ||
1096 | avoid ? avoid_copy(mg) : issue_copy_real(mg); | 1151 | avoid ? avoid_copy(mg) : issue_copy(mg); |
1097 | } | 1152 | } |
1098 | 1153 | ||
1099 | static void complete_migration(struct dm_cache_migration *mg) | 1154 | static void complete_migration(struct dm_cache_migration *mg) |
@@ -1178,6 +1233,7 @@ static void promote(struct cache *cache, struct prealloc *structs, | |||
1178 | struct dm_cache_migration *mg = prealloc_get_migration(structs); | 1233 | struct dm_cache_migration *mg = prealloc_get_migration(structs); |
1179 | 1234 | ||
1180 | mg->err = false; | 1235 | mg->err = false; |
1236 | mg->discard = false; | ||
1181 | mg->writeback = false; | 1237 | mg->writeback = false; |
1182 | mg->demote = false; | 1238 | mg->demote = false; |
1183 | mg->promote = true; | 1239 | mg->promote = true; |
@@ -1201,6 +1257,7 @@ static void writeback(struct cache *cache, struct prealloc *structs, | |||
1201 | struct dm_cache_migration *mg = prealloc_get_migration(structs); | 1257 | struct dm_cache_migration *mg = prealloc_get_migration(structs); |
1202 | 1258 | ||
1203 | mg->err = false; | 1259 | mg->err = false; |
1260 | mg->discard = false; | ||
1204 | mg->writeback = true; | 1261 | mg->writeback = true; |
1205 | mg->demote = false; | 1262 | mg->demote = false; |
1206 | mg->promote = false; | 1263 | mg->promote = false; |
@@ -1226,6 +1283,7 @@ static void demote_then_promote(struct cache *cache, struct prealloc *structs, | |||
1226 | struct dm_cache_migration *mg = prealloc_get_migration(structs); | 1283 | struct dm_cache_migration *mg = prealloc_get_migration(structs); |
1227 | 1284 | ||
1228 | mg->err = false; | 1285 | mg->err = false; |
1286 | mg->discard = false; | ||
1229 | mg->writeback = false; | 1287 | mg->writeback = false; |
1230 | mg->demote = true; | 1288 | mg->demote = true; |
1231 | mg->promote = true; | 1289 | mg->promote = true; |
@@ -1254,6 +1312,7 @@ static void invalidate(struct cache *cache, struct prealloc *structs, | |||
1254 | struct dm_cache_migration *mg = prealloc_get_migration(structs); | 1312 | struct dm_cache_migration *mg = prealloc_get_migration(structs); |
1255 | 1313 | ||
1256 | mg->err = false; | 1314 | mg->err = false; |
1315 | mg->discard = false; | ||
1257 | mg->writeback = false; | 1316 | mg->writeback = false; |
1258 | mg->demote = true; | 1317 | mg->demote = true; |
1259 | mg->promote = false; | 1318 | mg->promote = false; |
@@ -1270,6 +1329,26 @@ static void invalidate(struct cache *cache, struct prealloc *structs, | |||
1270 | quiesce_migration(mg); | 1329 | quiesce_migration(mg); |
1271 | } | 1330 | } |
1272 | 1331 | ||
1332 | static void discard(struct cache *cache, struct prealloc *structs, | ||
1333 | struct dm_bio_prison_cell *cell) | ||
1334 | { | ||
1335 | struct dm_cache_migration *mg = prealloc_get_migration(structs); | ||
1336 | |||
1337 | mg->err = false; | ||
1338 | mg->discard = true; | ||
1339 | mg->writeback = false; | ||
1340 | mg->demote = false; | ||
1341 | mg->promote = false; | ||
1342 | mg->requeue_holder = false; | ||
1343 | mg->invalidate = false; | ||
1344 | mg->cache = cache; | ||
1345 | mg->old_ocell = NULL; | ||
1346 | mg->new_ocell = cell; | ||
1347 | mg->start_jiffies = jiffies; | ||
1348 | |||
1349 | quiesce_migration(mg); | ||
1350 | } | ||
1351 | |||
1273 | /*---------------------------------------------------------------- | 1352 | /*---------------------------------------------------------------- |
1274 | * bio processing | 1353 | * bio processing |
1275 | *--------------------------------------------------------------*/ | 1354 | *--------------------------------------------------------------*/ |
@@ -1303,31 +1382,27 @@ static void process_flush_bio(struct cache *cache, struct bio *bio) | |||
1303 | issue(cache, bio); | 1382 | issue(cache, bio); |
1304 | } | 1383 | } |
1305 | 1384 | ||
1306 | /* | 1385 | static void process_discard_bio(struct cache *cache, struct prealloc *structs, |
1307 | * People generally discard large parts of a device, eg, the whole device | 1386 | struct bio *bio) |
1308 | * when formatting. Splitting these large discards up into cache block | ||
1309 | * sized ios and then quiescing (always neccessary for discard) takes too | ||
1310 | * long. | ||
1311 | * | ||
1312 | * We keep it simple, and allow any size of discard to come in, and just | ||
1313 | * mark off blocks on the discard bitset. No passdown occurs! | ||
1314 | * | ||
1315 | * To implement passdown we need to change the bio_prison such that a cell | ||
1316 | * can have a key that spans many blocks. | ||
1317 | */ | ||
1318 | static void process_discard_bio(struct cache *cache, struct bio *bio) | ||
1319 | { | 1387 | { |
1320 | dm_block_t start_block = dm_sector_div_up(bio->bi_iter.bi_sector, | 1388 | int r; |
1321 | cache->discard_block_size); | 1389 | dm_dblock_t b, e; |
1322 | dm_block_t end_block = bio_end_sector(bio); | 1390 | struct dm_bio_prison_cell *cell_prealloc, *new_ocell; |
1323 | dm_block_t b; | ||
1324 | 1391 | ||
1325 | end_block = block_div(end_block, cache->discard_block_size); | 1392 | calc_discard_block_range(cache, bio, &b, &e); |
1393 | if (b == e) { | ||
1394 | bio_endio(bio, 0); | ||
1395 | return; | ||
1396 | } | ||
1326 | 1397 | ||
1327 | for (b = start_block; b < end_block; b++) | 1398 | cell_prealloc = prealloc_get_cell(structs); |
1328 | set_discard(cache, to_dblock(b)); | 1399 | r = bio_detain_range(cache, dblock_to_oblock(cache, b), dblock_to_oblock(cache, e), bio, cell_prealloc, |
1400 | (cell_free_fn) prealloc_put_cell, | ||
1401 | structs, &new_ocell); | ||
1402 | if (r > 0) | ||
1403 | return; | ||
1329 | 1404 | ||
1330 | bio_endio(bio, 0); | 1405 | discard(cache, structs, new_ocell); |
1331 | } | 1406 | } |
1332 | 1407 | ||
1333 | static bool spare_migration_bandwidth(struct cache *cache) | 1408 | static bool spare_migration_bandwidth(struct cache *cache) |
@@ -1517,7 +1592,7 @@ static void process_deferred_bios(struct cache *cache) | |||
1517 | if (bio->bi_rw & REQ_FLUSH) | 1592 | if (bio->bi_rw & REQ_FLUSH) |
1518 | process_flush_bio(cache, bio); | 1593 | process_flush_bio(cache, bio); |
1519 | else if (bio->bi_rw & REQ_DISCARD) | 1594 | else if (bio->bi_rw & REQ_DISCARD) |
1520 | process_discard_bio(cache, bio); | 1595 | process_discard_bio(cache, &structs, bio); |
1521 | else | 1596 | else |
1522 | process_bio(cache, &structs, bio); | 1597 | process_bio(cache, &structs, bio); |
1523 | } | 1598 | } |
@@ -1732,7 +1807,7 @@ static void do_worker(struct work_struct *ws) | |||
1732 | process_invalidation_requests(cache); | 1807 | process_invalidation_requests(cache); |
1733 | } | 1808 | } |
1734 | 1809 | ||
1735 | process_migrations(cache, &cache->quiesced_migrations, issue_copy); | 1810 | process_migrations(cache, &cache->quiesced_migrations, issue_copy_or_discard); |
1736 | process_migrations(cache, &cache->completed_migrations, complete_migration); | 1811 | process_migrations(cache, &cache->completed_migrations, complete_migration); |
1737 | 1812 | ||
1738 | if (commit_if_needed(cache)) { | 1813 | if (commit_if_needed(cache)) { |
@@ -3130,7 +3205,8 @@ static void set_discard_limits(struct cache *cache, struct queue_limits *limits) | |||
3130 | /* | 3205 | /* |
3131 | * FIXME: these limits may be incompatible with the cache device | 3206 | * FIXME: these limits may be incompatible with the cache device |
3132 | */ | 3207 | */ |
3133 | limits->max_discard_sectors = cache->discard_block_size * 1024; | 3208 | limits->max_discard_sectors = min_t(sector_t, cache->discard_block_size * 1024, |
3209 | cache->origin_sectors); | ||
3134 | limits->discard_granularity = cache->discard_block_size << SECTOR_SHIFT; | 3210 | limits->discard_granularity = cache->discard_block_size << SECTOR_SHIFT; |
3135 | } | 3211 | } |
3136 | 3212 | ||
@@ -3155,7 +3231,7 @@ static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits) | |||
3155 | 3231 | ||
3156 | static struct target_type cache_target = { | 3232 | static struct target_type cache_target = { |
3157 | .name = "cache", | 3233 | .name = "cache", |
3158 | .version = {1, 5, 0}, | 3234 | .version = {1, 6, 0}, |
3159 | .module = THIS_MODULE, | 3235 | .module = THIS_MODULE, |
3160 | .ctr = cache_ctr, | 3236 | .ctr = cache_ctr, |
3161 | .dtr = cache_dtr, | 3237 | .dtr = cache_dtr, |