aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorJosef Bacik <jbacik@fusionio.com>2012-10-19 16:50:56 -0400
committerJosef Bacik <jbacik@fusionio.com>2012-12-11 13:31:32 -0500
commitde1ee92ac3bce4c9d760016c4d6198158e6e2f15 (patch)
treedc50044005ce08b77dbf1aa896082c6e031850b0 /fs
parent08e007d2e57744472a9424735a368ffe6d625597 (diff)
Btrfs: recheck bio against block device when we map the bio
Alex reported a problem where we were writing between chunks on a rbd device. The thing is we do bio_add_page using logical offsets, but the physical offset may be different. So when we map the bio now check to see if the bio is still ok with the physical offset, and if it is not split the bio up and redo the bio_add_page with the physical sector. This fixes the problem for Alex and doesn't affect performance in the normal case. Thanks, Reported-and-tested-by: Alex Elder <elder@inktank.com> Signed-off-by: Josef Bacik <jbacik@fusionio.com> Signed-off-by: Chris Mason <chris.mason@fusionio.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/volumes.c159
1 files changed, 131 insertions, 28 deletions
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index a8adf2686473..eaaf0bf52791 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -4217,6 +4217,113 @@ static noinline void schedule_bio(struct btrfs_root *root,
4217 &device->work); 4217 &device->work);
4218} 4218}
4219 4219
4220static int bio_size_ok(struct block_device *bdev, struct bio *bio,
4221 sector_t sector)
4222{
4223 struct bio_vec *prev;
4224 struct request_queue *q = bdev_get_queue(bdev);
4225 unsigned short max_sectors = queue_max_sectors(q);
4226 struct bvec_merge_data bvm = {
4227 .bi_bdev = bdev,
4228 .bi_sector = sector,
4229 .bi_rw = bio->bi_rw,
4230 };
4231
4232 if (bio->bi_vcnt == 0) {
4233 WARN_ON(1);
4234 return 1;
4235 }
4236
4237 prev = &bio->bi_io_vec[bio->bi_vcnt - 1];
4238 if ((bio->bi_size >> 9) > max_sectors)
4239 return 0;
4240
4241 if (!q->merge_bvec_fn)
4242 return 1;
4243
4244 bvm.bi_size = bio->bi_size - prev->bv_len;
4245 if (q->merge_bvec_fn(q, &bvm, prev) < prev->bv_len)
4246 return 0;
4247 return 1;
4248}
4249
4250static void submit_stripe_bio(struct btrfs_root *root, struct btrfs_bio *bbio,
4251 struct bio *bio, u64 physical, int dev_nr,
4252 int rw, int async)
4253{
4254 struct btrfs_device *dev = bbio->stripes[dev_nr].dev;
4255
4256 bio->bi_private = bbio;
4257 bio->bi_private = merge_stripe_index_into_bio_private(
4258 bio->bi_private, (unsigned int)dev_nr);
4259 bio->bi_end_io = btrfs_end_bio;
4260 bio->bi_sector = physical >> 9;
4261#ifdef DEBUG
4262 {
4263 struct rcu_string *name;
4264
4265 rcu_read_lock();
4266 name = rcu_dereference(dev->name);
4267 pr_debug("btrfs_map_bio: rw %d, secor=%llu, dev=%lu "
4268 "(%s id %llu), size=%u\n", rw,
4269 (u64)bio->bi_sector, (u_long)dev->bdev->bd_dev,
4270 name->str, dev->devid, bio->bi_size);
4271 rcu_read_unlock();
4272 }
4273#endif
4274 bio->bi_bdev = dev->bdev;
4275 if (async)
4276 schedule_bio(root, dev, rw, bio);
4277 else
4278 btrfsic_submit_bio(rw, bio);
4279}
4280
4281static int breakup_stripe_bio(struct btrfs_root *root, struct btrfs_bio *bbio,
4282 struct bio *first_bio, struct btrfs_device *dev,
4283 int dev_nr, int rw, int async)
4284{
4285 struct bio_vec *bvec = first_bio->bi_io_vec;
4286 struct bio *bio;
4287 int nr_vecs = bio_get_nr_vecs(dev->bdev);
4288 u64 physical = bbio->stripes[dev_nr].physical;
4289
4290again:
4291 bio = btrfs_bio_alloc(dev->bdev, physical >> 9, nr_vecs, GFP_NOFS);
4292 if (!bio)
4293 return -ENOMEM;
4294
4295 while (bvec <= (first_bio->bi_io_vec + first_bio->bi_vcnt - 1)) {
4296 if (bio_add_page(bio, bvec->bv_page, bvec->bv_len,
4297 bvec->bv_offset) < bvec->bv_len) {
4298 u64 len = bio->bi_size;
4299
4300 atomic_inc(&bbio->stripes_pending);
4301 submit_stripe_bio(root, bbio, bio, physical, dev_nr,
4302 rw, async);
4303 physical += len;
4304 goto again;
4305 }
4306 bvec++;
4307 }
4308
4309 submit_stripe_bio(root, bbio, bio, physical, dev_nr, rw, async);
4310 return 0;
4311}
4312
4313static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical)
4314{
4315 atomic_inc(&bbio->error);
4316 if (atomic_dec_and_test(&bbio->stripes_pending)) {
4317 bio->bi_private = bbio->private;
4318 bio->bi_end_io = bbio->end_io;
4319 bio->bi_bdev = (struct block_device *)
4320 (unsigned long)bbio->mirror_num;
4321 bio->bi_sector = logical >> 9;
4322 kfree(bbio);
4323 bio_endio(bio, -EIO);
4324 }
4325}
4326
4220int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, 4327int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
4221 int mirror_num, int async_submit) 4328 int mirror_num, int async_submit)
4222{ 4329{
@@ -4255,40 +4362,36 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
4255 atomic_set(&bbio->stripes_pending, bbio->num_stripes); 4362 atomic_set(&bbio->stripes_pending, bbio->num_stripes);
4256 4363
4257 while (dev_nr < total_devs) { 4364 while (dev_nr < total_devs) {
4365 dev = bbio->stripes[dev_nr].dev;
4366 if (!dev || !dev->bdev || (rw & WRITE && !dev->writeable)) {
4367 bbio_error(bbio, first_bio, logical);
4368 dev_nr++;
4369 continue;
4370 }
4371
4372 /*
4373 * Check and see if we're ok with this bio based on it's size
4374 * and offset with the given device.
4375 */
4376 if (!bio_size_ok(dev->bdev, first_bio,
4377 bbio->stripes[dev_nr].physical >> 9)) {
4378 ret = breakup_stripe_bio(root, bbio, first_bio, dev,
4379 dev_nr, rw, async_submit);
4380 BUG_ON(ret);
4381 dev_nr++;
4382 continue;
4383 }
4384
4258 if (dev_nr < total_devs - 1) { 4385 if (dev_nr < total_devs - 1) {
4259 bio = bio_clone(first_bio, GFP_NOFS); 4386 bio = bio_clone(first_bio, GFP_NOFS);
4260 BUG_ON(!bio); /* -ENOMEM */ 4387 BUG_ON(!bio); /* -ENOMEM */
4261 } else { 4388 } else {
4262 bio = first_bio; 4389 bio = first_bio;
4263 } 4390 }
4264 bio->bi_private = bbio; 4391
4265 bio->bi_private = merge_stripe_index_into_bio_private( 4392 submit_stripe_bio(root, bbio, bio,
4266 bio->bi_private, (unsigned int)dev_nr); 4393 bbio->stripes[dev_nr].physical, dev_nr, rw,
4267 bio->bi_end_io = btrfs_end_bio; 4394 async_submit);
4268 bio->bi_sector = bbio->stripes[dev_nr].physical >> 9;
4269 dev = bbio->stripes[dev_nr].dev;
4270 if (dev && dev->bdev && (rw != WRITE || dev->writeable)) {
4271#ifdef DEBUG
4272 struct rcu_string *name;
4273
4274 rcu_read_lock();
4275 name = rcu_dereference(dev->name);
4276 pr_debug("btrfs_map_bio: rw %d, secor=%llu, dev=%lu "
4277 "(%s id %llu), size=%u\n", rw,
4278 (u64)bio->bi_sector, (u_long)dev->bdev->bd_dev,
4279 name->str, dev->devid, bio->bi_size);
4280 rcu_read_unlock();
4281#endif
4282 bio->bi_bdev = dev->bdev;
4283 if (async_submit)
4284 schedule_bio(root, dev, rw, bio);
4285 else
4286 btrfsic_submit_bio(rw, bio);
4287 } else {
4288 bio->bi_bdev = root->fs_info->fs_devices->latest_bdev;
4289 bio->bi_sector = logical >> 9;
4290 bio_endio(bio, -EIO);
4291 }
4292 dev_nr++; 4395 dev_nr++;
4293 } 4396 }
4294 return 0; 4397 return 0;