aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2010-05-26 11:04:10 -0400
committerChris Mason <chris.mason@oracle.com>2010-05-26 21:34:45 -0400
commit46bfbb5c0740c200d2b66afcbd1f9d64ab320940 (patch)
tree3ec1523f7da103a79f71fc5f486596307a4eeb54
parent94b604429a6cd94ddc128fa2772c57209bb1318f (diff)
Btrfs: fix preallocation and nodatacow checks in O_DIRECT
The O_DIRECT code wasn't checking for multiple references on preallocated or nodatacow extents. This means it wasn't honoring snapshots properly. The fix here is to add an explicit check for multiple references This also fixes the math for selecting the correct disk block, making sure not to go past the end of the extent. Signed-off-by: Chris Mason <chris.mason@oracle.com>
-rw-r--r--fs/btrfs/inode.c156
1 files changed, 140 insertions, 16 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 5d62f21b2e45..5ab120d544bc 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -5255,6 +5255,106 @@ out:
5255 return em; 5255 return em;
5256} 5256}
5257 5257
5258/*
5259 * returns 1 when the nocow is safe, < 1 on error, 0 if the
5260 * block must be cow'd
5261 */
5262static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans,
5263 struct inode *inode, u64 offset, u64 len)
5264{
5265 struct btrfs_path *path;
5266 int ret;
5267 struct extent_buffer *leaf;
5268 struct btrfs_root *root = BTRFS_I(inode)->root;
5269 struct btrfs_file_extent_item *fi;
5270 struct btrfs_key key;
5271 u64 disk_bytenr;
5272 u64 backref_offset;
5273 u64 extent_end;
5274 u64 num_bytes;
5275 int slot;
5276 int found_type;
5277
5278 path = btrfs_alloc_path();
5279 if (!path)
5280 return -ENOMEM;
5281
5282 ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino,
5283 offset, 0);
5284 if (ret < 0)
5285 goto out;
5286
5287 slot = path->slots[0];
5288 if (ret == 1) {
5289 if (slot == 0) {
5290 /* can't find the item, must cow */
5291 ret = 0;
5292 goto out;
5293 }
5294 slot--;
5295 }
5296 ret = 0;
5297 leaf = path->nodes[0];
5298 btrfs_item_key_to_cpu(leaf, &key, slot);
5299 if (key.objectid != inode->i_ino ||
5300 key.type != BTRFS_EXTENT_DATA_KEY) {
5301 /* not our file or wrong item type, must cow */
5302 goto out;
5303 }
5304
5305 if (key.offset > offset) {
5306 /* Wrong offset, must cow */
5307 goto out;
5308 }
5309
5310 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
5311 found_type = btrfs_file_extent_type(leaf, fi);
5312 if (found_type != BTRFS_FILE_EXTENT_REG &&
5313 found_type != BTRFS_FILE_EXTENT_PREALLOC) {
5314 /* not a regular extent, must cow */
5315 goto out;
5316 }
5317 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
5318 backref_offset = btrfs_file_extent_offset(leaf, fi);
5319
5320 extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
5321 if (extent_end < offset + len) {
5322 /* extent doesn't include our full range, must cow */
5323 goto out;
5324 }
5325
5326 if (btrfs_extent_readonly(root, disk_bytenr))
5327 goto out;
5328
5329 /*
5330 * look for other files referencing this extent, if we
5331 * find any we must cow
5332 */
5333 if (btrfs_cross_ref_exist(trans, root, inode->i_ino,
5334 key.offset - backref_offset, disk_bytenr))
5335 goto out;
5336
5337 /*
5338 * adjust disk_bytenr and num_bytes to cover just the bytes
5339 * in this extent we are about to write. If there
5340 * are any csums in that range we have to cow in order
5341 * to keep the csums correct
5342 */
5343 disk_bytenr += backref_offset;
5344 disk_bytenr += offset - key.offset;
5345 num_bytes = min(offset + len, extent_end) - offset;
5346 if (csum_exist_in_range(root, disk_bytenr, num_bytes))
5347 goto out;
5348 /*
5349 * all of the above have passed, it is safe to overwrite this extent
5350 * without cow
5351 */
5352 ret = 1;
5353out:
5354 btrfs_free_path(path);
5355 return ret;
5356}
5357
5258static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, 5358static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
5259 struct buffer_head *bh_result, int create) 5359 struct buffer_head *bh_result, int create)
5260{ 5360{
@@ -5262,6 +5362,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
5262 struct btrfs_root *root = BTRFS_I(inode)->root; 5362 struct btrfs_root *root = BTRFS_I(inode)->root;
5263 u64 start = iblock << inode->i_blkbits; 5363 u64 start = iblock << inode->i_blkbits;
5264 u64 len = bh_result->b_size; 5364 u64 len = bh_result->b_size;
5365 struct btrfs_trans_handle *trans;
5265 5366
5266 em = btrfs_get_extent(inode, NULL, 0, start, len, 0); 5367 em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
5267 if (IS_ERR(em)) 5368 if (IS_ERR(em))
@@ -5306,42 +5407,65 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
5306 * just use the extent. 5407 * just use the extent.
5307 * 5408 *
5308 */ 5409 */
5309 if (!create) 5410 if (!create) {
5411 len = em->len - (start - em->start);
5310 goto map; 5412 goto map;
5413 }
5311 5414
5312 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) || 5415 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) ||
5313 ((BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) && 5416 ((BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) &&
5314 em->block_start != EXTENT_MAP_HOLE)) { 5417 em->block_start != EXTENT_MAP_HOLE)) {
5315 u64 block_start;
5316 int type; 5418 int type;
5317 int ret; 5419 int ret;
5420 u64 block_start;
5318 5421
5319 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) 5422 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
5320 type = BTRFS_ORDERED_PREALLOC; 5423 type = BTRFS_ORDERED_PREALLOC;
5321 else 5424 else
5322 type = BTRFS_ORDERED_NOCOW; 5425 type = BTRFS_ORDERED_NOCOW;
5323 len = min(len, em->block_len - (start - em->start)); 5426 len = min(len, em->len - (start - em->start));
5324 block_start = em->block_start + (start - em->start); 5427 block_start = em->block_start + (start - em->start);
5325 ret = btrfs_add_ordered_extent_dio(inode, start, 5428
5326 start, len, len, type); 5429 /*
5327 if (ret) { 5430 * we're not going to log anything, but we do need
5328 free_extent_map(em); 5431 * to make sure the current transaction stays open
5329 return ret; 5432 * while we look for nocow cross refs
5433 */
5434 trans = btrfs_join_transaction(root, 0);
5435 if (!trans)
5436 goto must_cow;
5437
5438 if (can_nocow_odirect(trans, inode, start, len) == 1) {
5439 ret = btrfs_add_ordered_extent_dio(inode, start,
5440 block_start, len, len, type);
5441 btrfs_end_transaction(trans, root);
5442 if (ret) {
5443 free_extent_map(em);
5444 return ret;
5445 }
5446 goto unlock;
5330 } 5447 }
5331 } else { 5448 btrfs_end_transaction(trans, root);
5332 free_extent_map(em);
5333 em = btrfs_new_extent_direct(inode, start, len);
5334 if (IS_ERR(em))
5335 return PTR_ERR(em);
5336 len = min(len, em->block_len);
5337 } 5449 }
5450must_cow:
5451 /*
5452 * this will cow the extent, reset the len in case we changed
5453 * it above
5454 */
5455 len = bh_result->b_size;
5456 free_extent_map(em);
5457 em = btrfs_new_extent_direct(inode, start, len);
5458 if (IS_ERR(em))
5459 return PTR_ERR(em);
5460 len = min(len, em->len - (start - em->start));
5461unlock:
5338 clear_extent_bit(&BTRFS_I(inode)->io_tree, start, start + len - 1, 5462 clear_extent_bit(&BTRFS_I(inode)->io_tree, start, start + len - 1,
5339 EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DIRTY, 1, 5463 EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DIRTY, 1,
5340 0, NULL, GFP_NOFS); 5464 0, NULL, GFP_NOFS);
5341map: 5465map:
5342 bh_result->b_blocknr = (em->block_start + (start - em->start)) >> 5466 bh_result->b_blocknr = (em->block_start + (start - em->start)) >>
5343 inode->i_blkbits; 5467 inode->i_blkbits;
5344 bh_result->b_size = em->len - (start - em->start); 5468 bh_result->b_size = len;
5345 bh_result->b_bdev = em->bdev; 5469 bh_result->b_bdev = em->bdev;
5346 set_buffer_mapped(bh_result); 5470 set_buffer_mapped(bh_result);
5347 if (create && !test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) 5471 if (create && !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
@@ -5547,7 +5671,7 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
5547 bvec++; 5671 bvec++;
5548 } while (bvec <= (bio->bi_io_vec + bio->bi_vcnt - 1)); 5672 } while (bvec <= (bio->bi_io_vec + bio->bi_vcnt - 1));
5549 5673
5550 dip->disk_bytenr = bio->bi_sector << 9; 5674 dip->disk_bytenr = (u64)bio->bi_sector << 9;
5551 bio->bi_private = dip; 5675 bio->bi_private = dip;
5552 5676
5553 if (write) 5677 if (write)