diff options
author | Chris Mason <chris.mason@oracle.com> | 2008-05-12 13:39:03 -0400 |
---|---|---|
committer | Chris Mason <chris.mason@oracle.com> | 2008-09-25 11:04:03 -0400 |
commit | 1259ab75c62462b8ffad90067b5e1f6312786a18 (patch) | |
tree | ccc6b795e524a74b3fb9666409ca2f7f33bf98dd /fs/btrfs/disk-io.c | |
parent | ca7a79ad8dbe24669a8ced01f9fc0126b872745d (diff) |
Btrfs: Handle write errors on raid1 and raid10
When duplicate copies exist, writes are allowed to fail to one of those
copies. This changeset includes a few changes that allow the FS to
continue even when some IOs fail.
It also adds verification of the parent generation number for btree blocks.
This generation is stored in the pointer to a block, and it ensures
that missed writes to are detected.
Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/disk-io.c')
-rw-r--r-- | fs/btrfs/disk-io.c | 56 |
1 files changed, 51 insertions, 5 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index edee7a44f861..574b1245964e 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -205,6 +205,33 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, | |||
205 | return 0; | 205 | return 0; |
206 | } | 206 | } |
207 | 207 | ||
208 | static int verify_parent_transid(struct extent_io_tree *io_tree, | ||
209 | struct extent_buffer *eb, u64 parent_transid) | ||
210 | { | ||
211 | int ret; | ||
212 | |||
213 | if (!parent_transid || btrfs_header_generation(eb) == parent_transid) | ||
214 | return 0; | ||
215 | |||
216 | lock_extent(io_tree, eb->start, eb->start + eb->len - 1, GFP_NOFS); | ||
217 | if (extent_buffer_uptodate(io_tree, eb) && | ||
218 | btrfs_header_generation(eb) == parent_transid) { | ||
219 | ret = 0; | ||
220 | goto out; | ||
221 | } | ||
222 | printk("parent transid verify failed on %llu wanted %llu found %llu\n", | ||
223 | (unsigned long long)eb->start, | ||
224 | (unsigned long long)parent_transid, | ||
225 | (unsigned long long)btrfs_header_generation(eb)); | ||
226 | ret = 1; | ||
227 | out: | ||
228 | clear_extent_buffer_uptodate(io_tree, eb); | ||
229 | unlock_extent(io_tree, eb->start, eb->start + eb->len - 1, | ||
230 | GFP_NOFS); | ||
231 | return ret; | ||
232 | |||
233 | } | ||
234 | |||
208 | static int btree_read_extent_buffer_pages(struct btrfs_root *root, | 235 | static int btree_read_extent_buffer_pages(struct btrfs_root *root, |
209 | struct extent_buffer *eb, | 236 | struct extent_buffer *eb, |
210 | u64 start, u64 parent_transid) | 237 | u64 start, u64 parent_transid) |
@@ -218,7 +245,8 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root, | |||
218 | while (1) { | 245 | while (1) { |
219 | ret = read_extent_buffer_pages(io_tree, eb, start, 1, | 246 | ret = read_extent_buffer_pages(io_tree, eb, start, 1, |
220 | btree_get_extent, mirror_num); | 247 | btree_get_extent, mirror_num); |
221 | if (!ret) | 248 | if (!ret && |
249 | !verify_parent_transid(io_tree, eb, parent_transid)) | ||
222 | return ret; | 250 | return ret; |
223 | 251 | ||
224 | num_copies = btrfs_num_copies(&root->fs_info->mapping_tree, | 252 | num_copies = btrfs_num_copies(&root->fs_info->mapping_tree, |
@@ -330,6 +358,13 @@ int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, | |||
330 | ret = -EIO; | 358 | ret = -EIO; |
331 | goto err; | 359 | goto err; |
332 | } | 360 | } |
361 | if (memcmp_extent_buffer(eb, root->fs_info->fsid, | ||
362 | (unsigned long)btrfs_header_fsid(eb), | ||
363 | BTRFS_FSID_SIZE)) { | ||
364 | printk("bad fsid on block %Lu\n", eb->start); | ||
365 | ret = -EIO; | ||
366 | goto err; | ||
367 | } | ||
333 | found_level = btrfs_header_level(eb); | 368 | found_level = btrfs_header_level(eb); |
334 | 369 | ||
335 | ret = csum_tree_block(root, eb, 1); | 370 | ret = csum_tree_block(root, eb, 1); |
@@ -1363,7 +1398,9 @@ static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate) | |||
1363 | "I/O error on %s\n", | 1398 | "I/O error on %s\n", |
1364 | bdevname(bh->b_bdev, b)); | 1399 | bdevname(bh->b_bdev, b)); |
1365 | } | 1400 | } |
1366 | set_buffer_write_io_error(bh); | 1401 | /* note, we dont' set_buffer_write_io_error because we have |
1402 | * our own ways of dealing with the IO errors | ||
1403 | */ | ||
1367 | clear_buffer_uptodate(bh); | 1404 | clear_buffer_uptodate(bh); |
1368 | } | 1405 | } |
1369 | unlock_buffer(bh); | 1406 | unlock_buffer(bh); |
@@ -1459,7 +1496,8 @@ int write_all_supers(struct btrfs_root *root) | |||
1459 | ret = submit_bh(WRITE, bh); | 1496 | ret = submit_bh(WRITE, bh); |
1460 | BUG_ON(ret); | 1497 | BUG_ON(ret); |
1461 | wait_on_buffer(bh); | 1498 | wait_on_buffer(bh); |
1462 | BUG_ON(!buffer_uptodate(bh)); | 1499 | if (!buffer_uptodate(bh)) |
1500 | total_errors++; | ||
1463 | } else { | 1501 | } else { |
1464 | total_errors++; | 1502 | total_errors++; |
1465 | } | 1503 | } |
@@ -1607,10 +1645,18 @@ int close_ctree(struct btrfs_root *root) | |||
1607 | return 0; | 1645 | return 0; |
1608 | } | 1646 | } |
1609 | 1647 | ||
1610 | int btrfs_buffer_uptodate(struct extent_buffer *buf) | 1648 | int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid) |
1611 | { | 1649 | { |
1650 | int ret; | ||
1612 | struct inode *btree_inode = buf->first_page->mapping->host; | 1651 | struct inode *btree_inode = buf->first_page->mapping->host; |
1613 | return extent_buffer_uptodate(&BTRFS_I(btree_inode)->io_tree, buf); | 1652 | |
1653 | ret = extent_buffer_uptodate(&BTRFS_I(btree_inode)->io_tree, buf); | ||
1654 | if (!ret) | ||
1655 | return ret; | ||
1656 | |||
1657 | ret = verify_parent_transid(&BTRFS_I(btree_inode)->io_tree, buf, | ||
1658 | parent_transid); | ||
1659 | return !ret; | ||
1614 | } | 1660 | } |
1615 | 1661 | ||
1616 | int btrfs_set_buffer_uptodate(struct extent_buffer *buf) | 1662 | int btrfs_set_buffer_uptodate(struct extent_buffer *buf) |