aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorJosef Bacik <jbacik@fusionio.com>2013-06-21 16:37:03 -0400
committerJosef Bacik <jbacik@fusionio.com>2013-07-02 11:50:45 -0400
commit7ee9e4405f264e9eda808aa5ca4522746a1af9c1 (patch)
tree66b0ebcb2594309044e3f25ab6836b09662c7601 /fs
parent925a6efb8ff0c2bdbec107ed9890e62650c83306 (diff)
Btrfs: check if we can nocow if we don't have data space
We always just try and reserve data space when we write, but if we are out of space but have prealloc'ed extents we should still successfully write. This patch will try and see if we can write to prealloc'ed space and if we can go ahead and allow the write to continue. With this patch we now pass xfstests generic/274. Thanks, Signed-off-by: Josef Bacik <jbacik@fusionio.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/ctree.h4
-rw-r--r--fs/btrfs/extent-tree.c1
-rw-r--r--fs/btrfs/extent_io.c3
-rw-r--r--fs/btrfs/extent_io.h1
-rw-r--r--fs/btrfs/file.c125
-rw-r--r--fs/btrfs/inode.c40
6 files changed, 148 insertions, 26 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index b528a5509cb8..e795bf135e80 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3552,6 +3552,10 @@ void btrfs_wait_and_free_delalloc_work(struct btrfs_delalloc_work *work);
3552struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *page, 3552struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *page,
3553 size_t pg_offset, u64 start, u64 len, 3553 size_t pg_offset, u64 start, u64 len,
3554 int create); 3554 int create);
3555noinline int can_nocow_extent(struct btrfs_trans_handle *trans,
3556 struct inode *inode, u64 offset, u64 *len,
3557 u64 *orig_start, u64 *orig_block_len,
3558 u64 *ram_bytes);
3555 3559
3556/* RHEL and EL kernels have a patch that renames PG_checked to FsMisc */ 3560/* RHEL and EL kernels have a patch that renames PG_checked to FsMisc */
3557#if defined(ClearPageFsMisc) && !defined(ClearPageChecked) 3561#if defined(ClearPageFsMisc) && !defined(ClearPageChecked)
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 5154b91f6380..11ba82e43e8b 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3666,6 +3666,7 @@ void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes)
3666 3666
3667 data_sinfo = root->fs_info->data_sinfo; 3667 data_sinfo = root->fs_info->data_sinfo;
3668 spin_lock(&data_sinfo->lock); 3668 spin_lock(&data_sinfo->lock);
3669 WARN_ON(data_sinfo->bytes_may_use < bytes);
3669 data_sinfo->bytes_may_use -= bytes; 3670 data_sinfo->bytes_may_use -= bytes;
3670 trace_btrfs_space_reservation(root->fs_info, "space_info", 3671 trace_btrfs_space_reservation(root->fs_info, "space_info",
3671 data_sinfo->flags, bytes, 0); 3672 data_sinfo->flags, bytes, 0);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index a83d7019ede9..f8586a957a02 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -543,6 +543,9 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
543 543
544 btrfs_debug_check_extent_io_range(tree->mapping->host, start, end); 544 btrfs_debug_check_extent_io_range(tree->mapping->host, start, end);
545 545
546 if (bits & EXTENT_DELALLOC)
547 bits |= EXTENT_NORESERVE;
548
546 if (delete) 549 if (delete)
547 bits |= ~EXTENT_CTLBITS; 550 bits |= ~EXTENT_CTLBITS;
548 bits |= EXTENT_FIRST_DELALLOC; 551 bits |= EXTENT_FIRST_DELALLOC;
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 41fb81e7ec53..3b8c4e26e1da 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -19,6 +19,7 @@
19#define EXTENT_FIRST_DELALLOC (1 << 12) 19#define EXTENT_FIRST_DELALLOC (1 << 12)
20#define EXTENT_NEED_WAIT (1 << 13) 20#define EXTENT_NEED_WAIT (1 << 13)
21#define EXTENT_DAMAGED (1 << 14) 21#define EXTENT_DAMAGED (1 << 14)
22#define EXTENT_NORESERVE (1 << 15)
22#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) 23#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
23#define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC) 24#define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC)
24 25
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 5ffde5603686..2d70849cec92 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1312,6 +1312,56 @@ fail:
1312 1312
1313} 1313}
1314 1314
1315static noinline int check_can_nocow(struct inode *inode, loff_t pos,
1316 size_t *write_bytes)
1317{
1318 struct btrfs_trans_handle *trans;
1319 struct btrfs_root *root = BTRFS_I(inode)->root;
1320 struct btrfs_ordered_extent *ordered;
1321 u64 lockstart, lockend;
1322 u64 num_bytes;
1323 int ret;
1324
1325 lockstart = round_down(pos, root->sectorsize);
1326 lockend = lockstart + round_up(*write_bytes, root->sectorsize) - 1;
1327
1328 while (1) {
1329 lock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend);
1330 ordered = btrfs_lookup_ordered_range(inode, lockstart,
1331 lockend - lockstart + 1);
1332 if (!ordered) {
1333 break;
1334 }
1335 unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend);
1336 btrfs_start_ordered_extent(inode, ordered, 1);
1337 btrfs_put_ordered_extent(ordered);
1338 }
1339
1340 trans = btrfs_join_transaction(root);
1341 if (IS_ERR(trans)) {
1342 unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend);
1343 return PTR_ERR(trans);
1344 }
1345
1346 num_bytes = lockend - lockstart + 1;
1347 ret = can_nocow_extent(trans, inode, lockstart, &num_bytes, NULL, NULL,
1348 NULL);
1349 btrfs_end_transaction(trans, root);
1350 if (ret <= 0) {
1351 ret = 0;
1352 } else {
1353 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
1354 EXTENT_DIRTY | EXTENT_DELALLOC |
1355 EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 0, 0,
1356 NULL, GFP_NOFS);
1357 *write_bytes = min_t(size_t, *write_bytes, num_bytes);
1358 }
1359
1360 unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend);
1361
1362 return ret;
1363}
1364
1315static noinline ssize_t __btrfs_buffered_write(struct file *file, 1365static noinline ssize_t __btrfs_buffered_write(struct file *file,
1316 struct iov_iter *i, 1366 struct iov_iter *i,
1317 loff_t pos) 1367 loff_t pos)
@@ -1319,10 +1369,12 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
1319 struct inode *inode = file_inode(file); 1369 struct inode *inode = file_inode(file);
1320 struct btrfs_root *root = BTRFS_I(inode)->root; 1370 struct btrfs_root *root = BTRFS_I(inode)->root;
1321 struct page **pages = NULL; 1371 struct page **pages = NULL;
1372 u64 release_bytes = 0;
1322 unsigned long first_index; 1373 unsigned long first_index;
1323 size_t num_written = 0; 1374 size_t num_written = 0;
1324 int nrptrs; 1375 int nrptrs;
1325 int ret = 0; 1376 int ret = 0;
1377 bool only_release_metadata = false;
1326 bool force_page_uptodate = false; 1378 bool force_page_uptodate = false;
1327 1379
1328 nrptrs = min((iov_iter_count(i) + PAGE_CACHE_SIZE - 1) / 1380 nrptrs = min((iov_iter_count(i) + PAGE_CACHE_SIZE - 1) /
@@ -1343,6 +1395,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
1343 offset); 1395 offset);
1344 size_t num_pages = (write_bytes + offset + 1396 size_t num_pages = (write_bytes + offset +
1345 PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 1397 PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
1398 size_t reserve_bytes;
1346 size_t dirty_pages; 1399 size_t dirty_pages;
1347 size_t copied; 1400 size_t copied;
1348 1401
@@ -1357,11 +1410,41 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
1357 break; 1410 break;
1358 } 1411 }
1359 1412
1360 ret = btrfs_delalloc_reserve_space(inode, 1413 reserve_bytes = num_pages << PAGE_CACHE_SHIFT;
1361 num_pages << PAGE_CACHE_SHIFT); 1414 ret = btrfs_check_data_free_space(inode, reserve_bytes);
1415 if (ret == -ENOSPC &&
1416 (BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
1417 BTRFS_INODE_PREALLOC))) {
1418 ret = check_can_nocow(inode, pos, &write_bytes);
1419 if (ret > 0) {
1420 only_release_metadata = true;
1421 /*
1422 * our prealloc extent may be smaller than
1423 * write_bytes, so scale down.
1424 */
1425 num_pages = (write_bytes + offset +
1426 PAGE_CACHE_SIZE - 1) >>
1427 PAGE_CACHE_SHIFT;
1428 reserve_bytes = num_pages << PAGE_CACHE_SHIFT;
1429 ret = 0;
1430 } else {
1431 ret = -ENOSPC;
1432 }
1433 }
1434
1362 if (ret) 1435 if (ret)
1363 break; 1436 break;
1364 1437
1438 ret = btrfs_delalloc_reserve_metadata(inode, reserve_bytes);
1439 if (ret) {
1440 if (!only_release_metadata)
1441 btrfs_free_reserved_data_space(inode,
1442 reserve_bytes);
1443 break;
1444 }
1445
1446 release_bytes = reserve_bytes;
1447
1365 /* 1448 /*
1366 * This is going to setup the pages array with the number of 1449 * This is going to setup the pages array with the number of
1367 * pages we want, so we don't really need to worry about the 1450 * pages we want, so we don't really need to worry about the
@@ -1370,11 +1453,8 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
1370 ret = prepare_pages(root, file, pages, num_pages, 1453 ret = prepare_pages(root, file, pages, num_pages,
1371 pos, first_index, write_bytes, 1454 pos, first_index, write_bytes,
1372 force_page_uptodate); 1455 force_page_uptodate);
1373 if (ret) { 1456 if (ret)
1374 btrfs_delalloc_release_space(inode,
1375 num_pages << PAGE_CACHE_SHIFT);
1376 break; 1457 break;
1377 }
1378 1458
1379 copied = btrfs_copy_from_user(pos, num_pages, 1459 copied = btrfs_copy_from_user(pos, num_pages,
1380 write_bytes, pages, i); 1460 write_bytes, pages, i);
@@ -1404,30 +1484,46 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
1404 * managed to copy. 1484 * managed to copy.
1405 */ 1485 */
1406 if (num_pages > dirty_pages) { 1486 if (num_pages > dirty_pages) {
1487 release_bytes = (num_pages - dirty_pages) <<
1488 PAGE_CACHE_SHIFT;
1407 if (copied > 0) { 1489 if (copied > 0) {
1408 spin_lock(&BTRFS_I(inode)->lock); 1490 spin_lock(&BTRFS_I(inode)->lock);
1409 BTRFS_I(inode)->outstanding_extents++; 1491 BTRFS_I(inode)->outstanding_extents++;
1410 spin_unlock(&BTRFS_I(inode)->lock); 1492 spin_unlock(&BTRFS_I(inode)->lock);
1411 } 1493 }
1412 btrfs_delalloc_release_space(inode, 1494 if (only_release_metadata)
1413 (num_pages - dirty_pages) << 1495 btrfs_delalloc_release_metadata(inode,
1414 PAGE_CACHE_SHIFT); 1496 release_bytes);
1497 else
1498 btrfs_delalloc_release_space(inode,
1499 release_bytes);
1415 } 1500 }
1416 1501
1502 release_bytes = dirty_pages << PAGE_CACHE_SHIFT;
1417 if (copied > 0) { 1503 if (copied > 0) {
1418 ret = btrfs_dirty_pages(root, inode, pages, 1504 ret = btrfs_dirty_pages(root, inode, pages,
1419 dirty_pages, pos, copied, 1505 dirty_pages, pos, copied,
1420 NULL); 1506 NULL);
1421 if (ret) { 1507 if (ret) {
1422 btrfs_delalloc_release_space(inode,
1423 dirty_pages << PAGE_CACHE_SHIFT);
1424 btrfs_drop_pages(pages, num_pages); 1508 btrfs_drop_pages(pages, num_pages);
1425 break; 1509 break;
1426 } 1510 }
1427 } 1511 }
1428 1512
1513 release_bytes = 0;
1429 btrfs_drop_pages(pages, num_pages); 1514 btrfs_drop_pages(pages, num_pages);
1430 1515
1516 if (only_release_metadata && copied > 0) {
1517 u64 lockstart = round_down(pos, root->sectorsize);
1518 u64 lockend = lockstart +
1519 (dirty_pages << PAGE_CACHE_SHIFT) - 1;
1520
1521 set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
1522 lockend, EXTENT_NORESERVE, NULL,
1523 NULL, GFP_NOFS);
1524 only_release_metadata = false;
1525 }
1526
1431 cond_resched(); 1527 cond_resched();
1432 1528
1433 balance_dirty_pages_ratelimited(inode->i_mapping); 1529 balance_dirty_pages_ratelimited(inode->i_mapping);
@@ -1440,6 +1536,13 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
1440 1536
1441 kfree(pages); 1537 kfree(pages);
1442 1538
1539 if (release_bytes) {
1540 if (only_release_metadata)
1541 btrfs_delalloc_release_metadata(inode, release_bytes);
1542 else
1543 btrfs_delalloc_release_space(inode, release_bytes);
1544 }
1545
1443 return num_written ? num_written : ret; 1546 return num_written ? num_written : ret;
1444} 1547}
1445 1548
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 8edcdf6910f7..4d7c02258390 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1641,7 +1641,7 @@ static void btrfs_clear_bit_hook(struct inode *inode,
1641 btrfs_delalloc_release_metadata(inode, len); 1641 btrfs_delalloc_release_metadata(inode, len);
1642 1642
1643 if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID 1643 if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
1644 && do_list) 1644 && do_list && !(state->state & EXTENT_NORESERVE))
1645 btrfs_free_reserved_data_space(inode, len); 1645 btrfs_free_reserved_data_space(inode, len);
1646 1646
1647 __percpu_counter_add(&root->fs_info->delalloc_bytes, -len, 1647 __percpu_counter_add(&root->fs_info->delalloc_bytes, -len,
@@ -6396,10 +6396,10 @@ out:
6396 * returns 1 when the nocow is safe, < 1 on error, 0 if the 6396 * returns 1 when the nocow is safe, < 1 on error, 0 if the
6397 * block must be cow'd 6397 * block must be cow'd
6398 */ 6398 */
6399static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans, 6399noinline int can_nocow_extent(struct btrfs_trans_handle *trans,
6400 struct inode *inode, u64 offset, u64 *len, 6400 struct inode *inode, u64 offset, u64 *len,
6401 u64 *orig_start, u64 *orig_block_len, 6401 u64 *orig_start, u64 *orig_block_len,
6402 u64 *ram_bytes) 6402 u64 *ram_bytes)
6403{ 6403{
6404 struct btrfs_path *path; 6404 struct btrfs_path *path;
6405 int ret; 6405 int ret;
@@ -6413,7 +6413,7 @@ static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans,
6413 u64 num_bytes; 6413 u64 num_bytes;
6414 int slot; 6414 int slot;
6415 int found_type; 6415 int found_type;
6416 6416 bool nocow = (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW);
6417 path = btrfs_alloc_path(); 6417 path = btrfs_alloc_path();
6418 if (!path) 6418 if (!path)
6419 return -ENOMEM; 6419 return -ENOMEM;
@@ -6453,18 +6453,28 @@ static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans,
6453 /* not a regular extent, must cow */ 6453 /* not a regular extent, must cow */
6454 goto out; 6454 goto out;
6455 } 6455 }
6456
6457 if (!nocow && found_type == BTRFS_FILE_EXTENT_REG)
6458 goto out;
6459
6456 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 6460 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6461 if (disk_bytenr == 0)
6462 goto out;
6463
6464 if (btrfs_file_extent_compression(leaf, fi) ||
6465 btrfs_file_extent_encryption(leaf, fi) ||
6466 btrfs_file_extent_other_encoding(leaf, fi))
6467 goto out;
6468
6457 backref_offset = btrfs_file_extent_offset(leaf, fi); 6469 backref_offset = btrfs_file_extent_offset(leaf, fi);
6458 6470
6459 *orig_start = key.offset - backref_offset; 6471 if (orig_start) {
6460 *orig_block_len = btrfs_file_extent_disk_num_bytes(leaf, fi); 6472 *orig_start = key.offset - backref_offset;
6461 *ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi); 6473 *orig_block_len = btrfs_file_extent_disk_num_bytes(leaf, fi);
6474 *ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
6475 }
6462 6476
6463 extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi); 6477 extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
6464 if (extent_end < offset + *len) {
6465 /* extent doesn't include our full range, must cow */
6466 goto out;
6467 }
6468 6478
6469 if (btrfs_extent_readonly(root, disk_bytenr)) 6479 if (btrfs_extent_readonly(root, disk_bytenr))
6470 goto out; 6480 goto out;
@@ -6708,8 +6718,8 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
6708 if (IS_ERR(trans)) 6718 if (IS_ERR(trans))
6709 goto must_cow; 6719 goto must_cow;
6710 6720
6711 if (can_nocow_odirect(trans, inode, start, &len, &orig_start, 6721 if (can_nocow_extent(trans, inode, start, &len, &orig_start,
6712 &orig_block_len, &ram_bytes) == 1) { 6722 &orig_block_len, &ram_bytes) == 1) {
6713 if (type == BTRFS_ORDERED_PREALLOC) { 6723 if (type == BTRFS_ORDERED_PREALLOC) {
6714 free_extent_map(em); 6724 free_extent_map(em);
6715 em = create_pinned_em(inode, start, len, 6725 em = create_pinned_em(inode, start, len,