aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJoe Thornber <ejt@redhat.com>2012-06-02 19:30:01 -0400
committerAlasdair G Kergon <agk@redhat.com>2012-06-02 19:30:01 -0400
commitcc8394d86f045b86ff303d3c9e4ce47d97148951 (patch)
treead37143c9709f523fb2ca9fc5ac9de75e9a011f9
parenta24c25696b7133dd534d7a9436e576af79d9ce3b (diff)
dm thin: provide userspace access to pool metadata
This patch implements two new messages that can be sent to the thin pool target allowing it to take a snapshot of the _metadata_. This, read-only snapshot can be accessed by userland, concurrently with the live target. Only one metadata snapshot can be held at a time. The pool's status line will give the block location for the current msnap. Since version 0.1.5 of the userland thin provisioning tools, the thin_dump program displays the msnap as follows: thin_dump -m <msnap root> <metadata dev> Available here: https://github.com/jthornber/thin-provisioning-tools Now that userland can access the metadata we can do various things that have traditionally been kernel side tasks: i) Incremental backups. By using metadata snapshots we can work out what blocks have changed over time. Combined with data snapshots we can ensure the data doesn't change while we back it up. A short proof of concept script can be found here: https://github.com/jthornber/thinp-test-suite/blob/master/incremental_backup_example.rb ii) Migration of thin devices from one pool to another. iii) Merging snapshots back into an external origin. iv) Asyncronous replication. Signed-off-by: Joe Thornber <ejt@redhat.com> Signed-off-by: Alasdair G Kergon <agk@redhat.com>
-rw-r--r--Documentation/device-mapper/thin-provisioning.txt11
-rw-r--r--drivers/md/dm-thin-metadata.c136
-rw-r--r--drivers/md/dm-thin-metadata.h13
-rw-r--r--drivers/md/dm-thin.c42
-rw-r--r--drivers/md/persistent-data/dm-transaction-manager.c2
5 files changed, 193 insertions, 11 deletions
diff --git a/Documentation/device-mapper/thin-provisioning.txt b/Documentation/device-mapper/thin-provisioning.txt
index 3370bc4d7b98..f5cfc62b7ad3 100644
--- a/Documentation/device-mapper/thin-provisioning.txt
+++ b/Documentation/device-mapper/thin-provisioning.txt
@@ -287,6 +287,17 @@ iii) Messages
287 the current transaction id is when you change it with this 287 the current transaction id is when you change it with this
288 compare-and-swap message. 288 compare-and-swap message.
289 289
290 reserve_metadata_snap
291
292 Reserve a copy of the data mapping btree for use by userland.
293 This allows userland to inspect the mappings as they were when
294 this message was executed. Use the pool's status command to
295 get the root block associated with the metadata snapshot.
296
297 release_metadata_snap
298
299 Release a previously reserved copy of the data mapping btree.
300
290'thin' target 301'thin' target
291------------- 302-------------
292 303
diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c
index 737d38865b69..3e2907f0bc46 100644
--- a/drivers/md/dm-thin-metadata.c
+++ b/drivers/md/dm-thin-metadata.c
@@ -1082,12 +1082,89 @@ int dm_pool_get_metadata_transaction_id(struct dm_pool_metadata *pmd,
1082 return 0; 1082 return 0;
1083} 1083}
1084 1084
1085static int __get_held_metadata_root(struct dm_pool_metadata *pmd, 1085static int __reserve_metadata_snap(struct dm_pool_metadata *pmd)
1086 dm_block_t *result) 1086{
1087 int r, inc;
1088 struct thin_disk_superblock *disk_super;
1089 struct dm_block *copy, *sblock;
1090 dm_block_t held_root;
1091
1092 /*
1093 * Copy the superblock.
1094 */
1095 dm_sm_inc_block(pmd->metadata_sm, THIN_SUPERBLOCK_LOCATION);
1096 r = dm_tm_shadow_block(pmd->tm, THIN_SUPERBLOCK_LOCATION,
1097 &sb_validator, &copy, &inc);
1098 if (r)
1099 return r;
1100
1101 BUG_ON(!inc);
1102
1103 held_root = dm_block_location(copy);
1104 disk_super = dm_block_data(copy);
1105
1106 if (le64_to_cpu(disk_super->held_root)) {
1107 DMWARN("Pool metadata snapshot already exists: release this before taking another.");
1108
1109 dm_tm_dec(pmd->tm, held_root);
1110 dm_tm_unlock(pmd->tm, copy);
1111 pmd->need_commit = 1;
1112
1113 return -EBUSY;
1114 }
1115
1116 /*
1117 * Wipe the spacemap since we're not publishing this.
1118 */
1119 memset(&disk_super->data_space_map_root, 0,
1120 sizeof(disk_super->data_space_map_root));
1121 memset(&disk_super->metadata_space_map_root, 0,
1122 sizeof(disk_super->metadata_space_map_root));
1123
1124 /*
1125 * Increment the data structures that need to be preserved.
1126 */
1127 dm_tm_inc(pmd->tm, le64_to_cpu(disk_super->data_mapping_root));
1128 dm_tm_inc(pmd->tm, le64_to_cpu(disk_super->device_details_root));
1129 dm_tm_unlock(pmd->tm, copy);
1130
1131 /*
1132 * Write the held root into the superblock.
1133 */
1134 r = dm_bm_write_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION,
1135 &sb_validator, &sblock);
1136 if (r) {
1137 dm_tm_dec(pmd->tm, held_root);
1138 pmd->need_commit = 1;
1139 return r;
1140 }
1141
1142 disk_super = dm_block_data(sblock);
1143 disk_super->held_root = cpu_to_le64(held_root);
1144 dm_bm_unlock(sblock);
1145
1146 pmd->need_commit = 1;
1147
1148 return 0;
1149}
1150
1151int dm_pool_reserve_metadata_snap(struct dm_pool_metadata *pmd)
1152{
1153 int r;
1154
1155 down_write(&pmd->root_lock);
1156 r = __reserve_metadata_snap(pmd);
1157 up_write(&pmd->root_lock);
1158
1159 return r;
1160}
1161
1162static int __release_metadata_snap(struct dm_pool_metadata *pmd)
1087{ 1163{
1088 int r; 1164 int r;
1089 struct thin_disk_superblock *disk_super; 1165 struct thin_disk_superblock *disk_super;
1090 struct dm_block *sblock; 1166 struct dm_block *sblock, *copy;
1167 dm_block_t held_root;
1091 1168
1092 r = dm_bm_write_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION, 1169 r = dm_bm_write_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION,
1093 &sb_validator, &sblock); 1170 &sb_validator, &sblock);
@@ -1095,18 +1172,65 @@ static int __get_held_metadata_root(struct dm_pool_metadata *pmd,
1095 return r; 1172 return r;
1096 1173
1097 disk_super = dm_block_data(sblock); 1174 disk_super = dm_block_data(sblock);
1175 held_root = le64_to_cpu(disk_super->held_root);
1176 disk_super->held_root = cpu_to_le64(0);
1177 pmd->need_commit = 1;
1178
1179 dm_bm_unlock(sblock);
1180
1181 if (!held_root) {
1182 DMWARN("No pool metadata snapshot found: nothing to release.");
1183 return -EINVAL;
1184 }
1185
1186 r = dm_tm_read_lock(pmd->tm, held_root, &sb_validator, &copy);
1187 if (r)
1188 return r;
1189
1190 disk_super = dm_block_data(copy);
1191 dm_sm_dec_block(pmd->metadata_sm, le64_to_cpu(disk_super->data_mapping_root));
1192 dm_sm_dec_block(pmd->metadata_sm, le64_to_cpu(disk_super->device_details_root));
1193 dm_sm_dec_block(pmd->metadata_sm, held_root);
1194
1195 return dm_tm_unlock(pmd->tm, copy);
1196}
1197
1198int dm_pool_release_metadata_snap(struct dm_pool_metadata *pmd)
1199{
1200 int r;
1201
1202 down_write(&pmd->root_lock);
1203 r = __release_metadata_snap(pmd);
1204 up_write(&pmd->root_lock);
1205
1206 return r;
1207}
1208
1209static int __get_metadata_snap(struct dm_pool_metadata *pmd,
1210 dm_block_t *result)
1211{
1212 int r;
1213 struct thin_disk_superblock *disk_super;
1214 struct dm_block *sblock;
1215
1216 r = dm_bm_read_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION,
1217 &sb_validator, &sblock);
1218 if (r)
1219 return r;
1220
1221 disk_super = dm_block_data(sblock);
1098 *result = le64_to_cpu(disk_super->held_root); 1222 *result = le64_to_cpu(disk_super->held_root);
1099 1223
1100 return dm_bm_unlock(sblock); 1224 return dm_bm_unlock(sblock);
1101} 1225}
1102 1226
1103int dm_pool_get_held_metadata_root(struct dm_pool_metadata *pmd, 1227int dm_pool_get_metadata_snap(struct dm_pool_metadata *pmd,
1104 dm_block_t *result) 1228 dm_block_t *result)
1105{ 1229{
1106 int r; 1230 int r;
1107 1231
1108 down_read(&pmd->root_lock); 1232 down_read(&pmd->root_lock);
1109 r = __get_held_metadata_root(pmd, result); 1233 r = __get_metadata_snap(pmd, result);
1110 up_read(&pmd->root_lock); 1234 up_read(&pmd->root_lock);
1111 1235
1112 return r; 1236 return r;
diff --git a/drivers/md/dm-thin-metadata.h b/drivers/md/dm-thin-metadata.h
index ed4725e67c96..b88918ccdaf6 100644
--- a/drivers/md/dm-thin-metadata.h
+++ b/drivers/md/dm-thin-metadata.h
@@ -90,11 +90,18 @@ int dm_pool_get_metadata_transaction_id(struct dm_pool_metadata *pmd,
90 90
91/* 91/*
92 * Hold/get root for userspace transaction. 92 * Hold/get root for userspace transaction.
93 *
94 * The metadata snapshot is a copy of the current superblock (minus the
95 * space maps). Userland can access the data structures for READ
96 * operations only. A small performance hit is incurred by providing this
97 * copy of the metadata to userland due to extra copy-on-write operations
98 * on the metadata nodes. Release this as soon as you finish with it.
93 */ 99 */
94int dm_pool_hold_metadata_root(struct dm_pool_metadata *pmd); 100int dm_pool_reserve_metadata_snap(struct dm_pool_metadata *pmd);
101int dm_pool_release_metadata_snap(struct dm_pool_metadata *pmd);
95 102
96int dm_pool_get_held_metadata_root(struct dm_pool_metadata *pmd, 103int dm_pool_get_metadata_snap(struct dm_pool_metadata *pmd,
97 dm_block_t *result); 104 dm_block_t *result);
98 105
99/* 106/*
100 * Actions on a single virtual device. 107 * Actions on a single virtual device.
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index db1b041ce975..37fdaf81bd1f 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -2284,6 +2284,36 @@ static int process_set_transaction_id_mesg(unsigned argc, char **argv, struct po
2284 return 0; 2284 return 0;
2285} 2285}
2286 2286
2287static int process_reserve_metadata_snap_mesg(unsigned argc, char **argv, struct pool *pool)
2288{
2289 int r;
2290
2291 r = check_arg_count(argc, 1);
2292 if (r)
2293 return r;
2294
2295 r = dm_pool_reserve_metadata_snap(pool->pmd);
2296 if (r)
2297 DMWARN("reserve_metadata_snap message failed.");
2298
2299 return r;
2300}
2301
2302static int process_release_metadata_snap_mesg(unsigned argc, char **argv, struct pool *pool)
2303{
2304 int r;
2305
2306 r = check_arg_count(argc, 1);
2307 if (r)
2308 return r;
2309
2310 r = dm_pool_release_metadata_snap(pool->pmd);
2311 if (r)
2312 DMWARN("release_metadata_snap message failed.");
2313
2314 return r;
2315}
2316
2287/* 2317/*
2288 * Messages supported: 2318 * Messages supported:
2289 * create_thin <dev_id> 2319 * create_thin <dev_id>
@@ -2291,6 +2321,8 @@ static int process_set_transaction_id_mesg(unsigned argc, char **argv, struct po
2291 * delete <dev_id> 2321 * delete <dev_id>
2292 * trim <dev_id> <new_size_in_sectors> 2322 * trim <dev_id> <new_size_in_sectors>
2293 * set_transaction_id <current_trans_id> <new_trans_id> 2323 * set_transaction_id <current_trans_id> <new_trans_id>
2324 * reserve_metadata_snap
2325 * release_metadata_snap
2294 */ 2326 */
2295static int pool_message(struct dm_target *ti, unsigned argc, char **argv) 2327static int pool_message(struct dm_target *ti, unsigned argc, char **argv)
2296{ 2328{
@@ -2310,6 +2342,12 @@ static int pool_message(struct dm_target *ti, unsigned argc, char **argv)
2310 else if (!strcasecmp(argv[0], "set_transaction_id")) 2342 else if (!strcasecmp(argv[0], "set_transaction_id"))
2311 r = process_set_transaction_id_mesg(argc, argv, pool); 2343 r = process_set_transaction_id_mesg(argc, argv, pool);
2312 2344
2345 else if (!strcasecmp(argv[0], "reserve_metadata_snap"))
2346 r = process_reserve_metadata_snap_mesg(argc, argv, pool);
2347
2348 else if (!strcasecmp(argv[0], "release_metadata_snap"))
2349 r = process_release_metadata_snap_mesg(argc, argv, pool);
2350
2313 else 2351 else
2314 DMWARN("Unrecognised thin pool target message received: %s", argv[0]); 2352 DMWARN("Unrecognised thin pool target message received: %s", argv[0]);
2315 2353
@@ -2369,7 +2407,7 @@ static int pool_status(struct dm_target *ti, status_type_t type,
2369 if (r) 2407 if (r)
2370 return r; 2408 return r;
2371 2409
2372 r = dm_pool_get_held_metadata_root(pool->pmd, &held_root); 2410 r = dm_pool_get_metadata_snap(pool->pmd, &held_root);
2373 if (r) 2411 if (r)
2374 return r; 2412 return r;
2375 2413
@@ -2465,7 +2503,7 @@ static struct target_type pool_target = {
2465 .name = "thin-pool", 2503 .name = "thin-pool",
2466 .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE | 2504 .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE |
2467 DM_TARGET_IMMUTABLE, 2505 DM_TARGET_IMMUTABLE,
2468 .version = {1, 1, 0}, 2506 .version = {1, 2, 0},
2469 .module = THIS_MODULE, 2507 .module = THIS_MODULE,
2470 .ctr = pool_ctr, 2508 .ctr = pool_ctr,
2471 .dtr = pool_dtr, 2509 .dtr = pool_dtr,
diff --git a/drivers/md/persistent-data/dm-transaction-manager.c b/drivers/md/persistent-data/dm-transaction-manager.c
index 6f8d38747d7f..400fe144c0cd 100644
--- a/drivers/md/persistent-data/dm-transaction-manager.c
+++ b/drivers/md/persistent-data/dm-transaction-manager.c
@@ -249,6 +249,7 @@ int dm_tm_shadow_block(struct dm_transaction_manager *tm, dm_block_t orig,
249 249
250 return r; 250 return r;
251} 251}
252EXPORT_SYMBOL_GPL(dm_tm_shadow_block);
252 253
253int dm_tm_read_lock(struct dm_transaction_manager *tm, dm_block_t b, 254int dm_tm_read_lock(struct dm_transaction_manager *tm, dm_block_t b,
254 struct dm_block_validator *v, 255 struct dm_block_validator *v,
@@ -259,6 +260,7 @@ int dm_tm_read_lock(struct dm_transaction_manager *tm, dm_block_t b,
259 260
260 return dm_bm_read_lock(tm->bm, b, v, blk); 261 return dm_bm_read_lock(tm->bm, b, v, blk);
261} 262}
263EXPORT_SYMBOL_GPL(dm_tm_read_lock);
262 264
263int dm_tm_unlock(struct dm_transaction_manager *tm, struct dm_block *b) 265int dm_tm_unlock(struct dm_transaction_manager *tm, struct dm_block *b)
264{ 266{