aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorMiao Xie <miaox@cn.fujitsu.com>2014-09-12 06:43:56 -0400
committerChris Mason <clm@fb.com>2014-09-17 16:38:53 -0400
commitc1dc08967f69c6b5067f8302c600f6628123f3bf (patch)
tree4ef7d1c9471cdadc8c49266d4e3dfd738dc73ae3 /fs
parentdc380aea5fa4636fc498a351eb720943bc644451 (diff)
Btrfs: do file data check by sub-bio's self
Direct IO splits the original bio to several sub-bios because of the limit of raid stripe, and the filesystem will wait for all sub-bios and then run final end io process. But it was very hard to implement the data repair when dio read failure happens, because at the final end io function, we didn't know which mirror the data was read from. So in order to implement the data repair, we have to move the file data check in the final end io function to the sub-bio end io function, in which we can get the mirror number of the device we access. This patch did this work as the first step of the direct io data repair implementation. Signed-off-by: Miao Xie <miaox@cn.fujitsu.com> Signed-off-by: Chris Mason <clm@fb.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/btrfs_inode.h9
-rw-r--r--fs/btrfs/extent_io.c2
-rw-r--r--fs/btrfs/inode.c100
-rw-r--r--fs/btrfs/volumes.h5
4 files changed, 87 insertions, 29 deletions
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 8bea70e02a3d..4d309471294e 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -245,8 +245,11 @@ static inline int btrfs_inode_in_log(struct inode *inode, u64 generation)
245 return 0; 245 return 0;
246} 246}
247 247
248#define BTRFS_DIO_ORIG_BIO_SUBMITTED 0x1
249
248struct btrfs_dio_private { 250struct btrfs_dio_private {
249 struct inode *inode; 251 struct inode *inode;
252 unsigned long flags;
250 u64 logical_offset; 253 u64 logical_offset;
251 u64 disk_bytenr; 254 u64 disk_bytenr;
252 u64 bytes; 255 u64 bytes;
@@ -263,6 +266,12 @@ struct btrfs_dio_private {
263 266
264 /* dio_bio came from fs/direct-io.c */ 267 /* dio_bio came from fs/direct-io.c */
265 struct bio *dio_bio; 268 struct bio *dio_bio;
269
270 /*
271 * The original bio may be splited to several sub-bios, this is
272 * done during endio of sub-bios
273 */
274 int (*subio_endio)(struct inode *, struct btrfs_io_bio *);
266}; 275};
267 276
268/* 277/*
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index d2f8f39e11fd..ad04f85ac45d 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2472,7 +2472,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
2472 struct inode *inode = page->mapping->host; 2472 struct inode *inode = page->mapping->host;
2473 2473
2474 pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, " 2474 pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, "
2475 "mirror=%lu\n", (u64)bio->bi_iter.bi_sector, err, 2475 "mirror=%u\n", (u64)bio->bi_iter.bi_sector, err,
2476 io_bio->mirror_num); 2476 io_bio->mirror_num);
2477 tree = &BTRFS_I(inode)->io_tree; 2477 tree = &BTRFS_I(inode)->io_tree;
2478 2478
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 70eaae127181..09d8c5ee8869 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -7242,29 +7242,40 @@ unlock_err:
7242 return ret; 7242 return ret;
7243} 7243}
7244 7244
7245static void btrfs_endio_direct_read(struct bio *bio, int err) 7245static int btrfs_subio_endio_read(struct inode *inode,
7246 struct btrfs_io_bio *io_bio)
7246{ 7247{
7247 struct btrfs_dio_private *dip = bio->bi_private;
7248 struct bio_vec *bvec; 7248 struct bio_vec *bvec;
7249 struct inode *inode = dip->inode;
7250 struct bio *dio_bio;
7251 struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
7252 u64 start; 7249 u64 start;
7253 int ret;
7254 int i; 7250 int i;
7251 int ret;
7252 int err = 0;
7255 7253
7256 if (err || (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) 7254 if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
7257 goto skip_checksum; 7255 return 0;
7258 7256
7259 start = dip->logical_offset; 7257 start = io_bio->logical;
7260 bio_for_each_segment_all(bvec, bio, i) { 7258 bio_for_each_segment_all(bvec, &io_bio->bio, i) {
7261 ret = __readpage_endio_check(inode, io_bio, i, bvec->bv_page, 7259 ret = __readpage_endio_check(inode, io_bio, i, bvec->bv_page,
7262 0, start, bvec->bv_len); 7260 0, start, bvec->bv_len);
7263 if (ret) 7261 if (ret)
7264 err = -EIO; 7262 err = -EIO;
7265 start += bvec->bv_len; 7263 start += bvec->bv_len;
7266 } 7264 }
7267skip_checksum: 7265
7266 return err;
7267}
7268
7269static void btrfs_endio_direct_read(struct bio *bio, int err)
7270{
7271 struct btrfs_dio_private *dip = bio->bi_private;
7272 struct inode *inode = dip->inode;
7273 struct bio *dio_bio;
7274 struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
7275
7276 if (!err && (dip->flags & BTRFS_DIO_ORIG_BIO_SUBMITTED))
7277 err = btrfs_subio_endio_read(inode, io_bio);
7278
7268 unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset, 7279 unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset,
7269 dip->logical_offset + dip->bytes - 1); 7280 dip->logical_offset + dip->bytes - 1);
7270 dio_bio = dip->dio_bio; 7281 dio_bio = dip->dio_bio;
@@ -7342,6 +7353,7 @@ static int __btrfs_submit_bio_start_direct_io(struct inode *inode, int rw,
7342static void btrfs_end_dio_bio(struct bio *bio, int err) 7353static void btrfs_end_dio_bio(struct bio *bio, int err)
7343{ 7354{
7344 struct btrfs_dio_private *dip = bio->bi_private; 7355 struct btrfs_dio_private *dip = bio->bi_private;
7356 int ret;
7345 7357
7346 if (err) { 7358 if (err) {
7347 btrfs_err(BTRFS_I(dip->inode)->root->fs_info, 7359 btrfs_err(BTRFS_I(dip->inode)->root->fs_info,
@@ -7349,6 +7361,13 @@ static void btrfs_end_dio_bio(struct bio *bio, int err)
7349 btrfs_ino(dip->inode), bio->bi_rw, 7361 btrfs_ino(dip->inode), bio->bi_rw,
7350 (unsigned long long)bio->bi_iter.bi_sector, 7362 (unsigned long long)bio->bi_iter.bi_sector,
7351 bio->bi_iter.bi_size, err); 7363 bio->bi_iter.bi_size, err);
7364 } else if (dip->subio_endio) {
7365 ret = dip->subio_endio(dip->inode, btrfs_io_bio(bio));
7366 if (ret)
7367 err = ret;
7368 }
7369
7370 if (err) {
7352 dip->errors = 1; 7371 dip->errors = 1;
7353 7372
7354 /* 7373 /*
@@ -7379,6 +7398,38 @@ static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev,
7379 return btrfs_bio_alloc(bdev, first_sector, nr_vecs, gfp_flags); 7398 return btrfs_bio_alloc(bdev, first_sector, nr_vecs, gfp_flags);
7380} 7399}
7381 7400
7401static inline int btrfs_lookup_and_bind_dio_csum(struct btrfs_root *root,
7402 struct inode *inode,
7403 struct btrfs_dio_private *dip,
7404 struct bio *bio,
7405 u64 file_offset)
7406{
7407 struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
7408 struct btrfs_io_bio *orig_io_bio = btrfs_io_bio(dip->orig_bio);
7409 int ret;
7410
7411 /*
7412 * We load all the csum data we need when we submit
7413 * the first bio to reduce the csum tree search and
7414 * contention.
7415 */
7416 if (dip->logical_offset == file_offset) {
7417 ret = btrfs_lookup_bio_sums_dio(root, inode, dip->orig_bio,
7418 file_offset);
7419 if (ret)
7420 return ret;
7421 }
7422
7423 if (bio == dip->orig_bio)
7424 return 0;
7425
7426 file_offset -= dip->logical_offset;
7427 file_offset >>= inode->i_sb->s_blocksize_bits;
7428 io_bio->csum = (u8 *)(((u32 *)orig_io_bio->csum) + file_offset);
7429
7430 return 0;
7431}
7432
7382static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, 7433static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
7383 int rw, u64 file_offset, int skip_sum, 7434 int rw, u64 file_offset, int skip_sum,
7384 int async_submit) 7435 int async_submit)
@@ -7418,16 +7469,8 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
7418 if (ret) 7469 if (ret)
7419 goto err; 7470 goto err;
7420 } else { 7471 } else {
7421 /* 7472 ret = btrfs_lookup_and_bind_dio_csum(root, inode, dip, bio,
7422 * We have loaded all the csum data we need when we submit 7473 file_offset);
7423 * the first bio, so skip it.
7424 */
7425 if (dip->logical_offset != file_offset)
7426 goto map;
7427
7428 /* Load all csum data at once. */
7429 ret = btrfs_lookup_bio_sums_dio(root, inode, dip->orig_bio,
7430 file_offset);
7431 if (ret) 7474 if (ret)
7432 goto err; 7475 goto err;
7433 } 7476 }
@@ -7462,6 +7505,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
7462 7505
7463 if (map_length >= orig_bio->bi_iter.bi_size) { 7506 if (map_length >= orig_bio->bi_iter.bi_size) {
7464 bio = orig_bio; 7507 bio = orig_bio;
7508 dip->flags |= BTRFS_DIO_ORIG_BIO_SUBMITTED;
7465 goto submit; 7509 goto submit;
7466 } 7510 }
7467 7511
@@ -7478,6 +7522,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
7478 7522
7479 bio->bi_private = dip; 7523 bio->bi_private = dip;
7480 bio->bi_end_io = btrfs_end_dio_bio; 7524 bio->bi_end_io = btrfs_end_dio_bio;
7525 btrfs_io_bio(bio)->logical = file_offset;
7481 atomic_inc(&dip->pending_bios); 7526 atomic_inc(&dip->pending_bios);
7482 7527
7483 while (bvec <= (orig_bio->bi_io_vec + orig_bio->bi_vcnt - 1)) { 7528 while (bvec <= (orig_bio->bi_io_vec + orig_bio->bi_vcnt - 1)) {
@@ -7512,6 +7557,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
7512 goto out_err; 7557 goto out_err;
7513 bio->bi_private = dip; 7558 bio->bi_private = dip;
7514 bio->bi_end_io = btrfs_end_dio_bio; 7559 bio->bi_end_io = btrfs_end_dio_bio;
7560 btrfs_io_bio(bio)->logical = file_offset;
7515 7561
7516 map_length = orig_bio->bi_iter.bi_size; 7562 map_length = orig_bio->bi_iter.bi_size;
7517 ret = btrfs_map_block(root->fs_info, rw, 7563 ret = btrfs_map_block(root->fs_info, rw,
@@ -7568,7 +7614,7 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio,
7568 goto free_ordered; 7614 goto free_ordered;
7569 } 7615 }
7570 7616
7571 dip = kmalloc(sizeof(*dip), GFP_NOFS); 7617 dip = kzalloc(sizeof(*dip), GFP_NOFS);
7572 if (!dip) { 7618 if (!dip) {
7573 ret = -ENOMEM; 7619 ret = -ENOMEM;
7574 goto free_io_bio; 7620 goto free_io_bio;
@@ -7580,21 +7626,23 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio,
7580 dip->bytes = dio_bio->bi_iter.bi_size; 7626 dip->bytes = dio_bio->bi_iter.bi_size;
7581 dip->disk_bytenr = (u64)dio_bio->bi_iter.bi_sector << 9; 7627 dip->disk_bytenr = (u64)dio_bio->bi_iter.bi_sector << 9;
7582 io_bio->bi_private = dip; 7628 io_bio->bi_private = dip;
7583 dip->errors = 0;
7584 dip->orig_bio = io_bio; 7629 dip->orig_bio = io_bio;
7585 dip->dio_bio = dio_bio; 7630 dip->dio_bio = dio_bio;
7586 atomic_set(&dip->pending_bios, 0); 7631 atomic_set(&dip->pending_bios, 0);
7632 btrfs_bio = btrfs_io_bio(io_bio);
7633 btrfs_bio->logical = file_offset;
7587 7634
7588 if (write) 7635 if (write) {
7589 io_bio->bi_end_io = btrfs_endio_direct_write; 7636 io_bio->bi_end_io = btrfs_endio_direct_write;
7590 else 7637 } else {
7591 io_bio->bi_end_io = btrfs_endio_direct_read; 7638 io_bio->bi_end_io = btrfs_endio_direct_read;
7639 dip->subio_endio = btrfs_subio_endio_read;
7640 }
7592 7641
7593 ret = btrfs_submit_direct_hook(rw, dip, skip_sum); 7642 ret = btrfs_submit_direct_hook(rw, dip, skip_sum);
7594 if (!ret) 7643 if (!ret)
7595 return; 7644 return;
7596 7645
7597 btrfs_bio = btrfs_io_bio(io_bio);
7598 if (btrfs_bio->end_io) 7646 if (btrfs_bio->end_io)
7599 btrfs_bio->end_io(btrfs_bio, ret); 7647 btrfs_bio->end_io(btrfs_bio, ret);
7600free_io_bio: 7648free_io_bio:
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 2b37da3dd408..91998bc0b4c4 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -268,8 +268,9 @@ struct btrfs_fs_devices {
268 */ 268 */
269typedef void (btrfs_io_bio_end_io_t) (struct btrfs_io_bio *bio, int err); 269typedef void (btrfs_io_bio_end_io_t) (struct btrfs_io_bio *bio, int err);
270struct btrfs_io_bio { 270struct btrfs_io_bio {
271 unsigned long mirror_num; 271 unsigned int mirror_num;
272 unsigned long stripe_index; 272 unsigned int stripe_index;
273 u64 logical;
273 u8 *csum; 274 u8 *csum;
274 u8 csum_inline[BTRFS_BIO_INLINE_CSUM_SIZE]; 275 u8 csum_inline[BTRFS_BIO_INLINE_CSUM_SIZE];
275 u8 *csum_allocated; 276 u8 *csum_allocated;