aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJoe Thornber <ejt@redhat.com>2013-10-24 14:10:29 -0400
committerMike Snitzer <snitzer@redhat.com>2013-11-11 11:37:49 -0500
commit2ee57d587357f0d752af6c2e3e46434a74b1bee3 (patch)
treebecc2422f9ca836b4a6903051a64f3114b16eeb7
parentf494a9c6b1b6dd9a9f21bbb75d9210d478eeb498 (diff)
dm cache: add passthrough mode
"Passthrough" is a dm-cache operating mode (like writethrough or writeback) which is intended to be used when the cache contents are not known to be coherent with the origin device. It behaves as follows: * All reads are served from the origin device (all reads miss the cache) * All writes are forwarded to the origin device; additionally, write hits cause cache block invalidates This mode decouples cache coherency checks from cache device creation, largely to avoid having to perform coherency checks while booting. Boot scripts can create cache devices in passthrough mode and put them into service (mount cached filesystems, for example) without having to worry about coherency. Coherency that exists is maintained, although the cache will gradually cool as writes take place. Later, applications can perform coherency checks, the nature of which will depend on the type of the underlying storage. If coherency can be verified, the cache device can be transitioned to writethrough or writeback mode while still warm; otherwise, the cache contents can be discarded prior to transitioning to the desired operating mode. Signed-off-by: Joe Thornber <ejt@redhat.com> Signed-off-by: Heinz Mauelshagen <heinzm@redhat.com> Signed-off-by: Morgan Mears <Morgan.Mears@netapp.com> Signed-off-by: Mike Snitzer <snitzer@redhat.com>
-rw-r--r--Documentation/device-mapper/cache.txt19
-rw-r--r--drivers/md/dm-cache-metadata.c5
-rw-r--r--drivers/md/dm-cache-metadata.h5
-rw-r--r--drivers/md/dm-cache-target.c209
4 files changed, 200 insertions, 38 deletions
diff --git a/Documentation/device-mapper/cache.txt b/Documentation/device-mapper/cache.txt
index 33d45ee0b737..ff6639f72536 100644
--- a/Documentation/device-mapper/cache.txt
+++ b/Documentation/device-mapper/cache.txt
@@ -68,10 +68,11 @@ So large block sizes are bad because they waste cache space. And small
68block sizes are bad because they increase the amount of metadata (both 68block sizes are bad because they increase the amount of metadata (both
69in core and on disk). 69in core and on disk).
70 70
71Writeback/writethrough 71Cache operating modes
72---------------------- 72---------------------
73 73
74The cache has two modes, writeback and writethrough. 74The cache has three operating modes: writeback, writethrough and
75passthrough.
75 76
76If writeback, the default, is selected then a write to a block that is 77If writeback, the default, is selected then a write to a block that is
77cached will go only to the cache and the block will be marked dirty in 78cached will go only to the cache and the block will be marked dirty in
@@ -81,6 +82,18 @@ If writethrough is selected then a write to a cached block will not
81complete until it has hit both the origin and cache devices. Clean 82complete until it has hit both the origin and cache devices. Clean
82blocks should remain clean. 83blocks should remain clean.
83 84
85If passthrough is selected, useful when the cache contents are not known
86to be coherent with the origin device, then all reads are served from
87the origin device (all reads miss the cache) and all writes are
88forwarded to the origin device; additionally, write hits cause cache
89block invalidates. Passthrough mode allows a cache device to be
90activated without having to worry about coherency. Coherency that
91exists is maintained, although the cache will gradually cool as writes
92take place. If the coherency of the cache can later be verified, or
93established, the cache device can can be transitioned to writethrough or
94writeback mode while still warm. Otherwise, the cache contents can be
95discarded prior to transitioning to the desired operating mode.
96
84A simple cleaner policy is provided, which will clean (write back) all 97A simple cleaner policy is provided, which will clean (write back) all
85dirty blocks in a cache. Useful for decommissioning a cache. 98dirty blocks in a cache. Useful for decommissioning a cache.
86 99
diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c
index 062b83ed3e84..8601425436cd 100644
--- a/drivers/md/dm-cache-metadata.c
+++ b/drivers/md/dm-cache-metadata.c
@@ -1249,3 +1249,8 @@ int dm_cache_save_hint(struct dm_cache_metadata *cmd, dm_cblock_t cblock,
1249 1249
1250 return r; 1250 return r;
1251} 1251}
1252
1253int dm_cache_metadata_all_clean(struct dm_cache_metadata *cmd, bool *result)
1254{
1255 return blocks_are_unmapped_or_clean(cmd, 0, cmd->cache_blocks, result);
1256}
diff --git a/drivers/md/dm-cache-metadata.h b/drivers/md/dm-cache-metadata.h
index f45cef21f3d0..cd906f14f98d 100644
--- a/drivers/md/dm-cache-metadata.h
+++ b/drivers/md/dm-cache-metadata.h
@@ -137,6 +137,11 @@ int dm_cache_begin_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *
137int dm_cache_save_hint(struct dm_cache_metadata *cmd, 137int dm_cache_save_hint(struct dm_cache_metadata *cmd,
138 dm_cblock_t cblock, uint32_t hint); 138 dm_cblock_t cblock, uint32_t hint);
139 139
140/*
141 * Query method. Are all the blocks in the cache clean?
142 */
143int dm_cache_metadata_all_clean(struct dm_cache_metadata *cmd, bool *result);
144
140/*----------------------------------------------------------------*/ 145/*----------------------------------------------------------------*/
141 146
142#endif /* DM_CACHE_METADATA_H */ 147#endif /* DM_CACHE_METADATA_H */
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index 183dfc9db297..8c0217753cc5 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -104,14 +104,37 @@ static void dm_unhook_bio(struct dm_hook_info *h, struct bio *bio)
104/* 104/*
105 * FIXME: the cache is read/write for the time being. 105 * FIXME: the cache is read/write for the time being.
106 */ 106 */
107enum cache_mode { 107enum cache_metadata_mode {
108 CM_WRITE, /* metadata may be changed */ 108 CM_WRITE, /* metadata may be changed */
109 CM_READ_ONLY, /* metadata may not be changed */ 109 CM_READ_ONLY, /* metadata may not be changed */
110}; 110};
111 111
112enum cache_io_mode {
113 /*
114 * Data is written to cached blocks only. These blocks are marked
115 * dirty. If you lose the cache device you will lose data.
116 * Potential performance increase for both reads and writes.
117 */
118 CM_IO_WRITEBACK,
119
120 /*
121 * Data is written to both cache and origin. Blocks are never
122 * dirty. Potential performance benfit for reads only.
123 */
124 CM_IO_WRITETHROUGH,
125
126 /*
127 * A degraded mode useful for various cache coherency situations
128 * (eg, rolling back snapshots). Reads and writes always go to the
129 * origin. If a write goes to a cached oblock, then the cache
130 * block is invalidated.
131 */
132 CM_IO_PASSTHROUGH
133};
134
112struct cache_features { 135struct cache_features {
113 enum cache_mode mode; 136 enum cache_metadata_mode mode;
114 bool write_through:1; 137 enum cache_io_mode io_mode;
115}; 138};
116 139
117struct cache_stats { 140struct cache_stats {
@@ -565,9 +588,24 @@ static void save_stats(struct cache *cache)
565#define PB_DATA_SIZE_WB (offsetof(struct per_bio_data, cache)) 588#define PB_DATA_SIZE_WB (offsetof(struct per_bio_data, cache))
566#define PB_DATA_SIZE_WT (sizeof(struct per_bio_data)) 589#define PB_DATA_SIZE_WT (sizeof(struct per_bio_data))
567 590
591static bool writethrough_mode(struct cache_features *f)
592{
593 return f->io_mode == CM_IO_WRITETHROUGH;
594}
595
596static bool writeback_mode(struct cache_features *f)
597{
598 return f->io_mode == CM_IO_WRITEBACK;
599}
600
601static bool passthrough_mode(struct cache_features *f)
602{
603 return f->io_mode == CM_IO_PASSTHROUGH;
604}
605
568static size_t get_per_bio_data_size(struct cache *cache) 606static size_t get_per_bio_data_size(struct cache *cache)
569{ 607{
570 return cache->features.write_through ? PB_DATA_SIZE_WT : PB_DATA_SIZE_WB; 608 return writethrough_mode(&cache->features) ? PB_DATA_SIZE_WT : PB_DATA_SIZE_WB;
571} 609}
572 610
573static struct per_bio_data *get_per_bio_data(struct bio *bio, size_t data_size) 611static struct per_bio_data *get_per_bio_data(struct bio *bio, size_t data_size)
@@ -1135,6 +1173,32 @@ static void demote_then_promote(struct cache *cache, struct prealloc *structs,
1135 quiesce_migration(mg); 1173 quiesce_migration(mg);
1136} 1174}
1137 1175
1176/*
1177 * Invalidate a cache entry. No writeback occurs; any changes in the cache
1178 * block are thrown away.
1179 */
1180static void invalidate(struct cache *cache, struct prealloc *structs,
1181 dm_oblock_t oblock, dm_cblock_t cblock,
1182 struct dm_bio_prison_cell *cell)
1183{
1184 struct dm_cache_migration *mg = prealloc_get_migration(structs);
1185
1186 mg->err = false;
1187 mg->writeback = false;
1188 mg->demote = true;
1189 mg->promote = false;
1190 mg->requeue_holder = true;
1191 mg->cache = cache;
1192 mg->old_oblock = oblock;
1193 mg->cblock = cblock;
1194 mg->old_ocell = cell;
1195 mg->new_ocell = NULL;
1196 mg->start_jiffies = jiffies;
1197
1198 inc_nr_migrations(cache);
1199 quiesce_migration(mg);
1200}
1201
1138/*---------------------------------------------------------------- 1202/*----------------------------------------------------------------
1139 * bio processing 1203 * bio processing
1140 *--------------------------------------------------------------*/ 1204 *--------------------------------------------------------------*/
@@ -1197,13 +1261,6 @@ static bool spare_migration_bandwidth(struct cache *cache)
1197 return current_volume < cache->migration_threshold; 1261 return current_volume < cache->migration_threshold;
1198} 1262}
1199 1263
1200static bool is_writethrough_io(struct cache *cache, struct bio *bio,
1201 dm_cblock_t cblock)
1202{
1203 return bio_data_dir(bio) == WRITE &&
1204 cache->features.write_through && !is_dirty(cache, cblock);
1205}
1206
1207static void inc_hit_counter(struct cache *cache, struct bio *bio) 1264static void inc_hit_counter(struct cache *cache, struct bio *bio)
1208{ 1265{
1209 atomic_inc(bio_data_dir(bio) == READ ? 1266 atomic_inc(bio_data_dir(bio) == READ ?
@@ -1216,6 +1273,15 @@ static void inc_miss_counter(struct cache *cache, struct bio *bio)
1216 &cache->stats.read_miss : &cache->stats.write_miss); 1273 &cache->stats.read_miss : &cache->stats.write_miss);
1217} 1274}
1218 1275
1276static void issue_cache_bio(struct cache *cache, struct bio *bio,
1277 struct per_bio_data *pb,
1278 dm_oblock_t oblock, dm_cblock_t cblock)
1279{
1280 pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
1281 remap_to_cache_dirty(cache, bio, oblock, cblock);
1282 issue(cache, bio);
1283}
1284
1219static void process_bio(struct cache *cache, struct prealloc *structs, 1285static void process_bio(struct cache *cache, struct prealloc *structs,
1220 struct bio *bio) 1286 struct bio *bio)
1221{ 1287{
@@ -1227,7 +1293,8 @@ static void process_bio(struct cache *cache, struct prealloc *structs,
1227 size_t pb_data_size = get_per_bio_data_size(cache); 1293 size_t pb_data_size = get_per_bio_data_size(cache);
1228 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); 1294 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
1229 bool discarded_block = is_discarded_oblock(cache, block); 1295 bool discarded_block = is_discarded_oblock(cache, block);
1230 bool can_migrate = discarded_block || spare_migration_bandwidth(cache); 1296 bool passthrough = passthrough_mode(&cache->features);
1297 bool can_migrate = !passthrough && (discarded_block || spare_migration_bandwidth(cache));
1231 1298
1232 /* 1299 /*
1233 * Check to see if that block is currently migrating. 1300 * Check to see if that block is currently migrating.
@@ -1248,15 +1315,39 @@ static void process_bio(struct cache *cache, struct prealloc *structs,
1248 1315
1249 switch (lookup_result.op) { 1316 switch (lookup_result.op) {
1250 case POLICY_HIT: 1317 case POLICY_HIT:
1251 inc_hit_counter(cache, bio); 1318 if (passthrough) {
1252 pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); 1319 inc_miss_counter(cache, bio);
1253 1320
1254 if (is_writethrough_io(cache, bio, lookup_result.cblock)) 1321 /*
1255 remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock); 1322 * Passthrough always maps to the origin,
1256 else 1323 * invalidating any cache blocks that are written
1257 remap_to_cache_dirty(cache, bio, block, lookup_result.cblock); 1324 * to.
1325 */
1326
1327 if (bio_data_dir(bio) == WRITE) {
1328 atomic_inc(&cache->stats.demotion);
1329 invalidate(cache, structs, block, lookup_result.cblock, new_ocell);
1330 release_cell = false;
1331
1332 } else {
1333 /* FIXME: factor out issue_origin() */
1334 pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
1335 remap_to_origin_clear_discard(cache, bio, block);
1336 issue(cache, bio);
1337 }
1338 } else {
1339 inc_hit_counter(cache, bio);
1340
1341 if (bio_data_dir(bio) == WRITE &&
1342 writethrough_mode(&cache->features) &&
1343 !is_dirty(cache, lookup_result.cblock)) {
1344 pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
1345 remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock);
1346 issue(cache, bio);
1347 } else
1348 issue_cache_bio(cache, bio, pb, block, lookup_result.cblock);
1349 }
1258 1350
1259 issue(cache, bio);
1260 break; 1351 break;
1261 1352
1262 case POLICY_MISS: 1353 case POLICY_MISS:
@@ -1807,7 +1898,7 @@ static int parse_block_size(struct cache_args *ca, struct dm_arg_set *as,
1807static void init_features(struct cache_features *cf) 1898static void init_features(struct cache_features *cf)
1808{ 1899{
1809 cf->mode = CM_WRITE; 1900 cf->mode = CM_WRITE;
1810 cf->write_through = false; 1901 cf->io_mode = CM_IO_WRITEBACK;
1811} 1902}
1812 1903
1813static int parse_features(struct cache_args *ca, struct dm_arg_set *as, 1904static int parse_features(struct cache_args *ca, struct dm_arg_set *as,
@@ -1832,10 +1923,13 @@ static int parse_features(struct cache_args *ca, struct dm_arg_set *as,
1832 arg = dm_shift_arg(as); 1923 arg = dm_shift_arg(as);
1833 1924
1834 if (!strcasecmp(arg, "writeback")) 1925 if (!strcasecmp(arg, "writeback"))
1835 cf->write_through = false; 1926 cf->io_mode = CM_IO_WRITEBACK;
1836 1927
1837 else if (!strcasecmp(arg, "writethrough")) 1928 else if (!strcasecmp(arg, "writethrough"))
1838 cf->write_through = true; 1929 cf->io_mode = CM_IO_WRITETHROUGH;
1930
1931 else if (!strcasecmp(arg, "passthrough"))
1932 cf->io_mode = CM_IO_PASSTHROUGH;
1839 1933
1840 else { 1934 else {
1841 *error = "Unrecognised cache feature requested"; 1935 *error = "Unrecognised cache feature requested";
@@ -2088,6 +2182,22 @@ static int cache_create(struct cache_args *ca, struct cache **result)
2088 } 2182 }
2089 cache->cmd = cmd; 2183 cache->cmd = cmd;
2090 2184
2185 if (passthrough_mode(&cache->features)) {
2186 bool all_clean;
2187
2188 r = dm_cache_metadata_all_clean(cache->cmd, &all_clean);
2189 if (r) {
2190 *error = "dm_cache_metadata_all_clean() failed";
2191 goto bad;
2192 }
2193
2194 if (!all_clean) {
2195 *error = "Cannot enter passthrough mode unless all blocks are clean";
2196 r = -EINVAL;
2197 goto bad;
2198 }
2199 }
2200
2091 spin_lock_init(&cache->lock); 2201 spin_lock_init(&cache->lock);
2092 bio_list_init(&cache->deferred_bios); 2202 bio_list_init(&cache->deferred_bios);
2093 bio_list_init(&cache->deferred_flush_bios); 2203 bio_list_init(&cache->deferred_flush_bios);
@@ -2303,17 +2413,37 @@ static int cache_map(struct dm_target *ti, struct bio *bio)
2303 return DM_MAPIO_SUBMITTED; 2413 return DM_MAPIO_SUBMITTED;
2304 } 2414 }
2305 2415
2416 r = DM_MAPIO_REMAPPED;
2306 switch (lookup_result.op) { 2417 switch (lookup_result.op) {
2307 case POLICY_HIT: 2418 case POLICY_HIT:
2308 inc_hit_counter(cache, bio); 2419 if (passthrough_mode(&cache->features)) {
2309 pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); 2420 if (bio_data_dir(bio) == WRITE) {
2421 /*
2422 * We need to invalidate this block, so
2423 * defer for the worker thread.
2424 */
2425 cell_defer(cache, cell, true);
2426 r = DM_MAPIO_SUBMITTED;
2427
2428 } else {
2429 pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
2430 inc_miss_counter(cache, bio);
2431 remap_to_origin_clear_discard(cache, bio, block);
2432
2433 cell_defer(cache, cell, false);
2434 }
2310 2435
2311 if (is_writethrough_io(cache, bio, lookup_result.cblock)) 2436 } else {
2312 remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock); 2437 inc_hit_counter(cache, bio);
2313 else 2438
2314 remap_to_cache_dirty(cache, bio, block, lookup_result.cblock); 2439 if (bio_data_dir(bio) == WRITE && writethrough_mode(&cache->features) &&
2440 !is_dirty(cache, lookup_result.cblock))
2441 remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock);
2442 else
2443 remap_to_cache_dirty(cache, bio, block, lookup_result.cblock);
2315 2444
2316 cell_defer(cache, cell, false); 2445 cell_defer(cache, cell, false);
2446 }
2317 break; 2447 break;
2318 2448
2319 case POLICY_MISS: 2449 case POLICY_MISS:
@@ -2338,10 +2468,10 @@ static int cache_map(struct dm_target *ti, struct bio *bio)
2338 DMERR_LIMIT("%s: erroring bio: unknown policy op: %u", __func__, 2468 DMERR_LIMIT("%s: erroring bio: unknown policy op: %u", __func__,
2339 (unsigned) lookup_result.op); 2469 (unsigned) lookup_result.op);
2340 bio_io_error(bio); 2470 bio_io_error(bio);
2341 return DM_MAPIO_SUBMITTED; 2471 r = DM_MAPIO_SUBMITTED;
2342 } 2472 }
2343 2473
2344 return DM_MAPIO_REMAPPED; 2474 return r;
2345} 2475}
2346 2476
2347static int cache_end_io(struct dm_target *ti, struct bio *bio, int error) 2477static int cache_end_io(struct dm_target *ti, struct bio *bio, int error)
@@ -2659,10 +2789,19 @@ static void cache_status(struct dm_target *ti, status_type_t type,
2659 (unsigned long long) from_cblock(residency), 2789 (unsigned long long) from_cblock(residency),
2660 cache->nr_dirty); 2790 cache->nr_dirty);
2661 2791
2662 if (cache->features.write_through) 2792 if (writethrough_mode(&cache->features))
2663 DMEMIT("1 writethrough "); 2793 DMEMIT("1 writethrough ");
2664 else 2794
2665 DMEMIT("0 "); 2795 else if (passthrough_mode(&cache->features))
2796 DMEMIT("1 passthrough ");
2797
2798 else if (writeback_mode(&cache->features))
2799 DMEMIT("1 writeback ");
2800
2801 else {
2802 DMERR("internal error: unknown io mode: %d", (int) cache->features.io_mode);
2803 goto err;
2804 }
2666 2805
2667 DMEMIT("2 migration_threshold %llu ", (unsigned long long) cache->migration_threshold); 2806 DMEMIT("2 migration_threshold %llu ", (unsigned long long) cache->migration_threshold);
2668 if (sz < maxlen) { 2807 if (sz < maxlen) {
@@ -2771,7 +2910,7 @@ static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits)
2771 2910
2772static struct target_type cache_target = { 2911static struct target_type cache_target = {
2773 .name = "cache", 2912 .name = "cache",
2774 .version = {1, 1, 1}, 2913 .version = {1, 2, 0},
2775 .module = THIS_MODULE, 2914 .module = THIS_MODULE,
2776 .ctr = cache_ctr, 2915 .ctr = cache_ctr,
2777 .dtr = cache_dtr, 2916 .dtr = cache_dtr,