aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorStefan Behrens <sbehrens@giantdisaster.de>2012-11-06 09:06:47 -0500
committerJosef Bacik <jbacik@fusionio.com>2012-12-12 17:15:45 -0500
commitad6d620e2a5704f6bf3a39c92a75aad962c51cb3 (patch)
treeb93b0d1bbf9e0d0c24f12a843f9ba699aa44ecd5
parent72d7aefccd512b66cd5543e652eae04be12085fc (diff)
Btrfs: allow repair code to include target disk when searching mirrors
Make the target disk of a running device replace operation available for reading. This is only used as a last ressort for the defect repair procedure. And it is dependent on the location of the data block to read, because during an ongoing device replace operation, the target drive is only partially filled with the filesystem data. Signed-off-by: Stefan Behrens <sbehrens@giantdisaster.de> Signed-off-by: Chris Mason <chris.mason@fusionio.com>
-rw-r--r--fs/btrfs/volumes.c159
1 files changed, 154 insertions, 5 deletions
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index e2e01a327108..32a4948b621c 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -4004,6 +4004,12 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
4004 else 4004 else
4005 ret = 1; 4005 ret = 1;
4006 free_extent_map(em); 4006 free_extent_map(em);
4007
4008 btrfs_dev_replace_lock(&fs_info->dev_replace);
4009 if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace))
4010 ret++;
4011 btrfs_dev_replace_unlock(&fs_info->dev_replace);
4012
4007 return ret; 4013 return ret;
4008} 4014}
4009 4015
@@ -4068,6 +4074,8 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
4068 struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; 4074 struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
4069 int dev_replace_is_ongoing = 0; 4075 int dev_replace_is_ongoing = 0;
4070 int num_alloc_stripes; 4076 int num_alloc_stripes;
4077 int patch_the_first_stripe_for_dev_replace = 0;
4078 u64 physical_to_patch_in_first_stripe = 0;
4071 4079
4072 read_lock(&em_tree->lock); 4080 read_lock(&em_tree->lock);
4073 em = lookup_extent_mapping(em_tree, logical, *length); 4081 em = lookup_extent_mapping(em_tree, logical, *length);
@@ -4084,9 +4092,6 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
4084 map = (struct map_lookup *)em->bdev; 4092 map = (struct map_lookup *)em->bdev;
4085 offset = logical - em->start; 4093 offset = logical - em->start;
4086 4094
4087 if (mirror_num > map->num_stripes)
4088 mirror_num = 0;
4089
4090 stripe_nr = offset; 4095 stripe_nr = offset;
4091 /* 4096 /*
4092 * stripe_nr counts the total number of stripes we have to stride 4097 * stripe_nr counts the total number of stripes we have to stride
@@ -4118,6 +4123,88 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
4118 if (!dev_replace_is_ongoing) 4123 if (!dev_replace_is_ongoing)
4119 btrfs_dev_replace_unlock(dev_replace); 4124 btrfs_dev_replace_unlock(dev_replace);
4120 4125
4126 if (dev_replace_is_ongoing && mirror_num == map->num_stripes + 1 &&
4127 !(rw & (REQ_WRITE | REQ_DISCARD | REQ_GET_READ_MIRRORS)) &&
4128 dev_replace->tgtdev != NULL) {
4129 /*
4130 * in dev-replace case, for repair case (that's the only
4131 * case where the mirror is selected explicitly when
4132 * calling btrfs_map_block), blocks left of the left cursor
4133 * can also be read from the target drive.
4134 * For REQ_GET_READ_MIRRORS, the target drive is added as
4135 * the last one to the array of stripes. For READ, it also
4136 * needs to be supported using the same mirror number.
4137 * If the requested block is not left of the left cursor,
4138 * EIO is returned. This can happen because btrfs_num_copies()
4139 * returns one more in the dev-replace case.
4140 */
4141 u64 tmp_length = *length;
4142 struct btrfs_bio *tmp_bbio = NULL;
4143 int tmp_num_stripes;
4144 u64 srcdev_devid = dev_replace->srcdev->devid;
4145 int index_srcdev = 0;
4146 int found = 0;
4147 u64 physical_of_found = 0;
4148
4149 ret = __btrfs_map_block(fs_info, REQ_GET_READ_MIRRORS,
4150 logical, &tmp_length, &tmp_bbio, 0);
4151 if (ret) {
4152 WARN_ON(tmp_bbio != NULL);
4153 goto out;
4154 }
4155
4156 tmp_num_stripes = tmp_bbio->num_stripes;
4157 if (mirror_num > tmp_num_stripes) {
4158 /*
4159 * REQ_GET_READ_MIRRORS does not contain this
4160 * mirror, that means that the requested area
4161 * is not left of the left cursor
4162 */
4163 ret = -EIO;
4164 kfree(tmp_bbio);
4165 goto out;
4166 }
4167
4168 /*
4169 * process the rest of the function using the mirror_num
4170 * of the source drive. Therefore look it up first.
4171 * At the end, patch the device pointer to the one of the
4172 * target drive.
4173 */
4174 for (i = 0; i < tmp_num_stripes; i++) {
4175 if (tmp_bbio->stripes[i].dev->devid == srcdev_devid) {
4176 /*
4177 * In case of DUP, in order to keep it
4178 * simple, only add the mirror with the
4179 * lowest physical address
4180 */
4181 if (found &&
4182 physical_of_found <=
4183 tmp_bbio->stripes[i].physical)
4184 continue;
4185 index_srcdev = i;
4186 found = 1;
4187 physical_of_found =
4188 tmp_bbio->stripes[i].physical;
4189 }
4190 }
4191
4192 if (found) {
4193 mirror_num = index_srcdev + 1;
4194 patch_the_first_stripe_for_dev_replace = 1;
4195 physical_to_patch_in_first_stripe = physical_of_found;
4196 } else {
4197 WARN_ON(1);
4198 ret = -EIO;
4199 kfree(tmp_bbio);
4200 goto out;
4201 }
4202
4203 kfree(tmp_bbio);
4204 } else if (mirror_num > map->num_stripes) {
4205 mirror_num = 0;
4206 }
4207
4121 num_stripes = 1; 4208 num_stripes = 1;
4122 stripe_index = 0; 4209 stripe_index = 0;
4123 stripe_nr_orig = stripe_nr; 4210 stripe_nr_orig = stripe_nr;
@@ -4188,8 +4275,12 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
4188 BUG_ON(stripe_index >= map->num_stripes); 4275 BUG_ON(stripe_index >= map->num_stripes);
4189 4276
4190 num_alloc_stripes = num_stripes; 4277 num_alloc_stripes = num_stripes;
4191 if (dev_replace_is_ongoing && (rw & (REQ_WRITE | REQ_DISCARD))) 4278 if (dev_replace_is_ongoing) {
4192 num_alloc_stripes <<= 1; 4279 if (rw & (REQ_WRITE | REQ_DISCARD))
4280 num_alloc_stripes <<= 1;
4281 if (rw & REQ_GET_READ_MIRRORS)
4282 num_alloc_stripes++;
4283 }
4193 bbio = kzalloc(btrfs_bio_size(num_alloc_stripes), GFP_NOFS); 4284 bbio = kzalloc(btrfs_bio_size(num_alloc_stripes), GFP_NOFS);
4194 if (!bbio) { 4285 if (!bbio) {
4195 ret = -ENOMEM; 4286 ret = -ENOMEM;
@@ -4318,12 +4409,70 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
4318 } 4409 }
4319 } 4410 }
4320 num_stripes = index_where_to_add; 4411 num_stripes = index_where_to_add;
4412 } else if (dev_replace_is_ongoing && (rw & REQ_GET_READ_MIRRORS) &&
4413 dev_replace->tgtdev != NULL) {
4414 u64 srcdev_devid = dev_replace->srcdev->devid;
4415 int index_srcdev = 0;
4416 int found = 0;
4417 u64 physical_of_found = 0;
4418
4419 /*
4420 * During the dev-replace procedure, the target drive can
4421 * also be used to read data in case it is needed to repair
4422 * a corrupt block elsewhere. This is possible if the
4423 * requested area is left of the left cursor. In this area,
4424 * the target drive is a full copy of the source drive.
4425 */
4426 for (i = 0; i < num_stripes; i++) {
4427 if (bbio->stripes[i].dev->devid == srcdev_devid) {
4428 /*
4429 * In case of DUP, in order to keep it
4430 * simple, only add the mirror with the
4431 * lowest physical address
4432 */
4433 if (found &&
4434 physical_of_found <=
4435 bbio->stripes[i].physical)
4436 continue;
4437 index_srcdev = i;
4438 found = 1;
4439 physical_of_found = bbio->stripes[i].physical;
4440 }
4441 }
4442 if (found) {
4443 u64 length = map->stripe_len;
4444
4445 if (physical_of_found + length <=
4446 dev_replace->cursor_left) {
4447 struct btrfs_bio_stripe *tgtdev_stripe =
4448 bbio->stripes + num_stripes;
4449
4450 tgtdev_stripe->physical = physical_of_found;
4451 tgtdev_stripe->length =
4452 bbio->stripes[index_srcdev].length;
4453 tgtdev_stripe->dev = dev_replace->tgtdev;
4454
4455 num_stripes++;
4456 }
4457 }
4321 } 4458 }
4322 4459
4323 *bbio_ret = bbio; 4460 *bbio_ret = bbio;
4324 bbio->num_stripes = num_stripes; 4461 bbio->num_stripes = num_stripes;
4325 bbio->max_errors = max_errors; 4462 bbio->max_errors = max_errors;
4326 bbio->mirror_num = mirror_num; 4463 bbio->mirror_num = mirror_num;
4464
4465 /*
4466 * this is the case that REQ_READ && dev_replace_is_ongoing &&
4467 * mirror_num == num_stripes + 1 && dev_replace target drive is
4468 * available as a mirror
4469 */
4470 if (patch_the_first_stripe_for_dev_replace && num_stripes > 0) {
4471 WARN_ON(num_stripes > 1);
4472 bbio->stripes[0].dev = dev_replace->tgtdev;
4473 bbio->stripes[0].physical = physical_to_patch_in_first_stripe;
4474 bbio->mirror_num = map->num_stripes + 1;
4475 }
4327out: 4476out:
4328 if (dev_replace_is_ongoing) 4477 if (dev_replace_is_ongoing)
4329 btrfs_dev_replace_unlock(dev_replace); 4478 btrfs_dev_replace_unlock(dev_replace);