diff options
author | Stefan Behrens <sbehrens@giantdisaster.de> | 2012-05-25 10:06:08 -0400 |
---|---|---|
committer | Josef Bacik <josef@redhat.com> | 2012-05-30 10:23:39 -0400 |
commit | 442a4f6308e694e0fa6025708bd5e4e424bbf51c (patch) | |
tree | e782db1bcbec25283048d77871e0bed7ad04567c /fs/btrfs/volumes.c | |
parent | d07eb9117050c9ed3f78296ebcc06128b52693be (diff) |
Btrfs: add device counters for detected IO and checksum errors
The goal is to detect when drives start to get an increased error rate,
when drives should be replaced soon. Therefore statistic counters are
added that count IO errors (read, write and flush). Additionally, the
software detected errors like checksum errors and corrupted blocks are
counted.
Signed-off-by: Stefan Behrens <sbehrens@giantdisaster.de>
Diffstat (limited to 'fs/btrfs/volumes.c')
-rw-r--r-- | fs/btrfs/volumes.c | 94 |
1 files changed, 92 insertions, 2 deletions
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 48a06d1fc067..2915521f44ee 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -23,6 +23,7 @@ | |||
23 | #include <linux/random.h> | 23 | #include <linux/random.h> |
24 | #include <linux/iocontext.h> | 24 | #include <linux/iocontext.h> |
25 | #include <linux/capability.h> | 25 | #include <linux/capability.h> |
26 | #include <linux/ratelimit.h> | ||
26 | #include <linux/kthread.h> | 27 | #include <linux/kthread.h> |
27 | #include <asm/div64.h> | 28 | #include <asm/div64.h> |
28 | #include "compat.h" | 29 | #include "compat.h" |
@@ -4001,13 +4002,58 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, | |||
4001 | return 0; | 4002 | return 0; |
4002 | } | 4003 | } |
4003 | 4004 | ||
4005 | static void *merge_stripe_index_into_bio_private(void *bi_private, | ||
4006 | unsigned int stripe_index) | ||
4007 | { | ||
4008 | /* | ||
4009 | * with single, dup, RAID0, RAID1 and RAID10, stripe_index is | ||
4010 | * at most 1. | ||
4011 | * The alternative solution (instead of stealing bits from the | ||
4012 | * pointer) would be to allocate an intermediate structure | ||
4013 | * that contains the old private pointer plus the stripe_index. | ||
4014 | */ | ||
4015 | BUG_ON((((uintptr_t)bi_private) & 3) != 0); | ||
4016 | BUG_ON(stripe_index > 3); | ||
4017 | return (void *)(((uintptr_t)bi_private) | stripe_index); | ||
4018 | } | ||
4019 | |||
4020 | static struct btrfs_bio *extract_bbio_from_bio_private(void *bi_private) | ||
4021 | { | ||
4022 | return (struct btrfs_bio *)(((uintptr_t)bi_private) & ~((uintptr_t)3)); | ||
4023 | } | ||
4024 | |||
4025 | static unsigned int extract_stripe_index_from_bio_private(void *bi_private) | ||
4026 | { | ||
4027 | return (unsigned int)((uintptr_t)bi_private) & 3; | ||
4028 | } | ||
4029 | |||
4004 | static void btrfs_end_bio(struct bio *bio, int err) | 4030 | static void btrfs_end_bio(struct bio *bio, int err) |
4005 | { | 4031 | { |
4006 | struct btrfs_bio *bbio = bio->bi_private; | 4032 | struct btrfs_bio *bbio = extract_bbio_from_bio_private(bio->bi_private); |
4007 | int is_orig_bio = 0; | 4033 | int is_orig_bio = 0; |
4008 | 4034 | ||
4009 | if (err) | 4035 | if (err) { |
4010 | atomic_inc(&bbio->error); | 4036 | atomic_inc(&bbio->error); |
4037 | if (err == -EIO || err == -EREMOTEIO) { | ||
4038 | unsigned int stripe_index = | ||
4039 | extract_stripe_index_from_bio_private( | ||
4040 | bio->bi_private); | ||
4041 | struct btrfs_device *dev; | ||
4042 | |||
4043 | BUG_ON(stripe_index >= bbio->num_stripes); | ||
4044 | dev = bbio->stripes[stripe_index].dev; | ||
4045 | if (bio->bi_rw & WRITE) | ||
4046 | btrfs_dev_stat_inc(dev, | ||
4047 | BTRFS_DEV_STAT_WRITE_ERRS); | ||
4048 | else | ||
4049 | btrfs_dev_stat_inc(dev, | ||
4050 | BTRFS_DEV_STAT_READ_ERRS); | ||
4051 | if ((bio->bi_rw & WRITE_FLUSH) == WRITE_FLUSH) | ||
4052 | btrfs_dev_stat_inc(dev, | ||
4053 | BTRFS_DEV_STAT_FLUSH_ERRS); | ||
4054 | btrfs_dev_stat_print_on_error(dev); | ||
4055 | } | ||
4056 | } | ||
4011 | 4057 | ||
4012 | if (bio == bbio->orig_bio) | 4058 | if (bio == bbio->orig_bio) |
4013 | is_orig_bio = 1; | 4059 | is_orig_bio = 1; |
@@ -4149,6 +4195,8 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, | |||
4149 | bio = first_bio; | 4195 | bio = first_bio; |
4150 | } | 4196 | } |
4151 | bio->bi_private = bbio; | 4197 | bio->bi_private = bbio; |
4198 | bio->bi_private = merge_stripe_index_into_bio_private( | ||
4199 | bio->bi_private, (unsigned int)dev_nr); | ||
4152 | bio->bi_end_io = btrfs_end_bio; | 4200 | bio->bi_end_io = btrfs_end_bio; |
4153 | bio->bi_sector = bbio->stripes[dev_nr].physical >> 9; | 4201 | bio->bi_sector = bbio->stripes[dev_nr].physical >> 9; |
4154 | dev = bbio->stripes[dev_nr].dev; | 4202 | dev = bbio->stripes[dev_nr].dev; |
@@ -4509,6 +4557,28 @@ int btrfs_read_sys_array(struct btrfs_root *root) | |||
4509 | return ret; | 4557 | return ret; |
4510 | } | 4558 | } |
4511 | 4559 | ||
4560 | struct btrfs_device *btrfs_find_device_for_logical(struct btrfs_root *root, | ||
4561 | u64 logical, int mirror_num) | ||
4562 | { | ||
4563 | struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree; | ||
4564 | int ret; | ||
4565 | u64 map_length = 0; | ||
4566 | struct btrfs_bio *bbio = NULL; | ||
4567 | struct btrfs_device *device; | ||
4568 | |||
4569 | BUG_ON(mirror_num == 0); | ||
4570 | ret = btrfs_map_block(map_tree, WRITE, logical, &map_length, &bbio, | ||
4571 | mirror_num); | ||
4572 | if (ret) { | ||
4573 | BUG_ON(bbio != NULL); | ||
4574 | return NULL; | ||
4575 | } | ||
4576 | BUG_ON(mirror_num != bbio->mirror_num); | ||
4577 | device = bbio->stripes[mirror_num - 1].dev; | ||
4578 | kfree(bbio); | ||
4579 | return device; | ||
4580 | } | ||
4581 | |||
4512 | int btrfs_read_chunk_tree(struct btrfs_root *root) | 4582 | int btrfs_read_chunk_tree(struct btrfs_root *root) |
4513 | { | 4583 | { |
4514 | struct btrfs_path *path; | 4584 | struct btrfs_path *path; |
@@ -4583,3 +4653,23 @@ error: | |||
4583 | btrfs_free_path(path); | 4653 | btrfs_free_path(path); |
4584 | return ret; | 4654 | return ret; |
4585 | } | 4655 | } |
4656 | |||
4657 | void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index) | ||
4658 | { | ||
4659 | btrfs_dev_stat_inc(dev, index); | ||
4660 | btrfs_dev_stat_print_on_error(dev); | ||
4661 | } | ||
4662 | |||
4663 | void btrfs_dev_stat_print_on_error(struct btrfs_device *dev) | ||
4664 | { | ||
4665 | printk_ratelimited(KERN_ERR | ||
4666 | "btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n", | ||
4667 | dev->name, | ||
4668 | btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS), | ||
4669 | btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS), | ||
4670 | btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS), | ||
4671 | btrfs_dev_stat_read(dev, | ||
4672 | BTRFS_DEV_STAT_CORRUPTION_ERRS), | ||
4673 | btrfs_dev_stat_read(dev, | ||
4674 | BTRFS_DEV_STAT_GENERATION_ERRS)); | ||
4675 | } | ||