aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJoe Thornber <ejt@redhat.com>2014-03-27 10:13:20 -0400
committerMike Snitzer <snitzer@redhat.com>2014-03-27 16:56:23 -0400
commita9d45396f5956d0b615c7ae3b936afd888351a47 (patch)
tree41b952569c493bd67743aeb920cda6055211c981
parent64ab346a360a4b15c28fb8531918d4a01f4eabd9 (diff)
dm transaction manager: fix corruption due to non-atomic transaction commit
The persistent-data library used by dm-thin, dm-cache, etc is transactional. If anything goes wrong, such as an io error when writing new metadata or a power failure, then we roll back to the last transaction. Atomicity when committing a transaction is achieved by: a) Never overwriting data from the previous transaction. b) Writing the superblock last, after all other metadata has hit the disk. This commit and the following commit ("dm: take care to copy the space map roots before locking the superblock") fix a bug associated with (b). When committing it was possible for the superblock to still be written in spite of an io error occurring during the preceeding metadata flush. With these commits we're careful not to take the write lock out on the superblock until after the metadata flush has completed. Change the transaction manager's semantics for dm_tm_commit() to assume all data has been flushed _before_ the single superblock that is passed in. As a prerequisite, split the block manager's block unlocking and flushing by simplifying dm_bm_flush_and_unlock() to dm_bm_flush(). Now the unlocking must be done separately. This issue was discovered by forcing io errors at the crucial time using dm-flakey. Signed-off-by: Joe Thornber <ejt@redhat.com> Signed-off-by: Mike Snitzer <snitzer@redhat.com> Cc: stable@vger.kernel.org
-rw-r--r--drivers/md/dm-cache-metadata.c3
-rw-r--r--drivers/md/persistent-data/dm-block-manager.c15
-rw-r--r--drivers/md/persistent-data/dm-block-manager.h3
-rw-r--r--drivers/md/persistent-data/dm-transaction-manager.c5
-rw-r--r--drivers/md/persistent-data/dm-transaction-manager.h17
5 files changed, 16 insertions, 27 deletions
diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c
index cbd568a3a579..4b73abe9323f 100644
--- a/drivers/md/dm-cache-metadata.c
+++ b/drivers/md/dm-cache-metadata.c
@@ -530,8 +530,9 @@ static int __begin_transaction_flags(struct dm_cache_metadata *cmd,
530 disk_super = dm_block_data(sblock); 530 disk_super = dm_block_data(sblock);
531 update_flags(disk_super, mutator); 531 update_flags(disk_super, mutator);
532 read_superblock_fields(cmd, disk_super); 532 read_superblock_fields(cmd, disk_super);
533 dm_bm_unlock(sblock);
533 534
534 return dm_bm_flush_and_unlock(cmd->bm, sblock); 535 return dm_bm_flush(cmd->bm);
535} 536}
536 537
537static int __begin_transaction(struct dm_cache_metadata *cmd) 538static int __begin_transaction(struct dm_cache_metadata *cmd)
diff --git a/drivers/md/persistent-data/dm-block-manager.c b/drivers/md/persistent-data/dm-block-manager.c
index 455f79279a16..087411c95ffc 100644
--- a/drivers/md/persistent-data/dm-block-manager.c
+++ b/drivers/md/persistent-data/dm-block-manager.c
@@ -595,25 +595,14 @@ int dm_bm_unlock(struct dm_block *b)
595} 595}
596EXPORT_SYMBOL_GPL(dm_bm_unlock); 596EXPORT_SYMBOL_GPL(dm_bm_unlock);
597 597
598int dm_bm_flush_and_unlock(struct dm_block_manager *bm, 598int dm_bm_flush(struct dm_block_manager *bm)
599 struct dm_block *superblock)
600{ 599{
601 int r;
602
603 if (bm->read_only) 600 if (bm->read_only)
604 return -EPERM; 601 return -EPERM;
605 602
606 r = dm_bufio_write_dirty_buffers(bm->bufio);
607 if (unlikely(r)) {
608 dm_bm_unlock(superblock);
609 return r;
610 }
611
612 dm_bm_unlock(superblock);
613
614 return dm_bufio_write_dirty_buffers(bm->bufio); 603 return dm_bufio_write_dirty_buffers(bm->bufio);
615} 604}
616EXPORT_SYMBOL_GPL(dm_bm_flush_and_unlock); 605EXPORT_SYMBOL_GPL(dm_bm_flush);
617 606
618void dm_bm_prefetch(struct dm_block_manager *bm, dm_block_t b) 607void dm_bm_prefetch(struct dm_block_manager *bm, dm_block_t b)
619{ 608{
diff --git a/drivers/md/persistent-data/dm-block-manager.h b/drivers/md/persistent-data/dm-block-manager.h
index 13cd58e1fe69..1b95dfc17786 100644
--- a/drivers/md/persistent-data/dm-block-manager.h
+++ b/drivers/md/persistent-data/dm-block-manager.h
@@ -105,8 +105,7 @@ int dm_bm_unlock(struct dm_block *b);
105 * 105 *
106 * This method always blocks. 106 * This method always blocks.
107 */ 107 */
108int dm_bm_flush_and_unlock(struct dm_block_manager *bm, 108int dm_bm_flush(struct dm_block_manager *bm);
109 struct dm_block *superblock);
110 109
111/* 110/*
112 * Request data is prefetched into the cache. 111 * Request data is prefetched into the cache.
diff --git a/drivers/md/persistent-data/dm-transaction-manager.c b/drivers/md/persistent-data/dm-transaction-manager.c
index 81da1a26042e..3bc30a0ae3d6 100644
--- a/drivers/md/persistent-data/dm-transaction-manager.c
+++ b/drivers/md/persistent-data/dm-transaction-manager.c
@@ -154,7 +154,7 @@ int dm_tm_pre_commit(struct dm_transaction_manager *tm)
154 if (r < 0) 154 if (r < 0)
155 return r; 155 return r;
156 156
157 return 0; 157 return dm_bm_flush(tm->bm);
158} 158}
159EXPORT_SYMBOL_GPL(dm_tm_pre_commit); 159EXPORT_SYMBOL_GPL(dm_tm_pre_commit);
160 160
@@ -164,8 +164,9 @@ int dm_tm_commit(struct dm_transaction_manager *tm, struct dm_block *root)
164 return -EWOULDBLOCK; 164 return -EWOULDBLOCK;
165 165
166 wipe_shadow_table(tm); 166 wipe_shadow_table(tm);
167 dm_bm_unlock(root);
167 168
168 return dm_bm_flush_and_unlock(tm->bm, root); 169 return dm_bm_flush(tm->bm);
169} 170}
170EXPORT_SYMBOL_GPL(dm_tm_commit); 171EXPORT_SYMBOL_GPL(dm_tm_commit);
171 172
diff --git a/drivers/md/persistent-data/dm-transaction-manager.h b/drivers/md/persistent-data/dm-transaction-manager.h
index b5b139076ca5..2772ed2a781a 100644
--- a/drivers/md/persistent-data/dm-transaction-manager.h
+++ b/drivers/md/persistent-data/dm-transaction-manager.h
@@ -38,18 +38,17 @@ struct dm_transaction_manager *dm_tm_create_non_blocking_clone(struct dm_transac
38/* 38/*
39 * We use a 2-phase commit here. 39 * We use a 2-phase commit here.
40 * 40 *
41 * i) In the first phase the block manager is told to start flushing, and 41 * i) Make all changes for the transaction *except* for the superblock.
42 * the changes to the space map are written to disk. You should interrogate 42 * Then call dm_tm_pre_commit() to flush them to disk.
43 * your particular space map to get detail of its root node etc. to be
44 * included in your superblock.
45 * 43 *
46 * ii) @root will be committed last. You shouldn't use more than the 44 * ii) Lock your superblock. Update. Then call dm_tm_commit() which will
47 * first 512 bytes of @root if you wish the transaction to survive a power 45 * unlock the superblock and flush it. No other blocks should be updated
48 * failure. You *must* have a write lock held on @root for both stage (i) 46 * during this period. Care should be taken to never unlock a partially
49 * and (ii). The commit will drop the write lock. 47 * updated superblock; perform any operations that could fail *before* you
48 * take the superblock lock.
50 */ 49 */
51int dm_tm_pre_commit(struct dm_transaction_manager *tm); 50int dm_tm_pre_commit(struct dm_transaction_manager *tm);
52int dm_tm_commit(struct dm_transaction_manager *tm, struct dm_block *root); 51int dm_tm_commit(struct dm_transaction_manager *tm, struct dm_block *superblock);
53 52
54/* 53/*
55 * These methods are the only way to get hold of a writeable block. 54 * These methods are the only way to get hold of a writeable block.