summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/btrfs/btrfs_inode.h7
-rw-r--r--fs/btrfs/extent_io.h5
-rw-r--r--fs/btrfs/file.c66
-rw-r--r--fs/btrfs/inode.c242
-rw-r--r--fs/btrfs/send.c23
5 files changed, 277 insertions, 66 deletions
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 0c6baaba0651..b8622e4d1744 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -125,6 +125,13 @@ struct btrfs_inode {
125 u64 delalloc_bytes; 125 u64 delalloc_bytes;
126 126
127 /* 127 /*
128 * Total number of bytes pending delalloc that fall within a file
129 * range that is either a hole or beyond EOF (and no prealloc extent
130 * exists in the range). This is always <= delalloc_bytes.
131 */
132 u64 new_delalloc_bytes;
133
134 /*
128 * total number of bytes pending defrag, used by stat to check whether 135 * total number of bytes pending defrag, used by stat to check whether
129 * it needs COW. 136 * it needs COW.
130 */ 137 */
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 8d2d6e4272d5..1eafa2f0ede3 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -15,14 +15,17 @@
15#define EXTENT_DEFRAG (1U << 6) 15#define EXTENT_DEFRAG (1U << 6)
16#define EXTENT_BOUNDARY (1U << 9) 16#define EXTENT_BOUNDARY (1U << 9)
17#define EXTENT_NODATASUM (1U << 10) 17#define EXTENT_NODATASUM (1U << 10)
18#define EXTENT_DO_ACCOUNTING (1U << 11) 18#define EXTENT_CLEAR_META_RESV (1U << 11)
19#define EXTENT_FIRST_DELALLOC (1U << 12) 19#define EXTENT_FIRST_DELALLOC (1U << 12)
20#define EXTENT_NEED_WAIT (1U << 13) 20#define EXTENT_NEED_WAIT (1U << 13)
21#define EXTENT_DAMAGED (1U << 14) 21#define EXTENT_DAMAGED (1U << 14)
22#define EXTENT_NORESERVE (1U << 15) 22#define EXTENT_NORESERVE (1U << 15)
23#define EXTENT_QGROUP_RESERVED (1U << 16) 23#define EXTENT_QGROUP_RESERVED (1U << 16)
24#define EXTENT_CLEAR_DATA_RESV (1U << 17) 24#define EXTENT_CLEAR_DATA_RESV (1U << 17)
25#define EXTENT_DELALLOC_NEW (1U << 18)
25#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) 26#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
27#define EXTENT_DO_ACCOUNTING (EXTENT_CLEAR_META_RESV | \
28 EXTENT_CLEAR_DATA_RESV)
26#define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC) 29#define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC)
27 30
28/* 31/*
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 3cedbfd08a3a..da1096eb1a40 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1404,6 +1404,47 @@ fail:
1404 1404
1405} 1405}
1406 1406
1407static int btrfs_find_new_delalloc_bytes(struct btrfs_inode *inode,
1408 const u64 start,
1409 const u64 len,
1410 struct extent_state **cached_state)
1411{
1412 u64 search_start = start;
1413 const u64 end = start + len - 1;
1414
1415 while (search_start < end) {
1416 const u64 search_len = end - search_start + 1;
1417 struct extent_map *em;
1418 u64 em_len;
1419 int ret = 0;
1420
1421 em = btrfs_get_extent(inode, NULL, 0, search_start,
1422 search_len, 0);
1423 if (IS_ERR(em))
1424 return PTR_ERR(em);
1425
1426 if (em->block_start != EXTENT_MAP_HOLE)
1427 goto next;
1428
1429 em_len = em->len;
1430 if (em->start < search_start)
1431 em_len -= search_start - em->start;
1432 if (em_len > search_len)
1433 em_len = search_len;
1434
1435 ret = set_extent_bit(&inode->io_tree, search_start,
1436 search_start + em_len - 1,
1437 EXTENT_DELALLOC_NEW,
1438 NULL, cached_state, GFP_NOFS);
1439next:
1440 search_start = extent_map_end(em);
1441 free_extent_map(em);
1442 if (ret)
1443 return ret;
1444 }
1445 return 0;
1446}
1447
1407/* 1448/*
1408 * This function locks the extent and properly waits for data=ordered extents 1449 * This function locks the extent and properly waits for data=ordered extents
1409 * to finish before allowing the pages to be modified if need. 1450 * to finish before allowing the pages to be modified if need.
@@ -1432,8 +1473,11 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages,
1432 + round_up(pos + write_bytes - start_pos, 1473 + round_up(pos + write_bytes - start_pos,
1433 fs_info->sectorsize) - 1; 1474 fs_info->sectorsize) - 1;
1434 1475
1435 if (start_pos < inode->vfs_inode.i_size) { 1476 if (start_pos < inode->vfs_inode.i_size ||
1477 (inode->flags & BTRFS_INODE_PREALLOC)) {
1436 struct btrfs_ordered_extent *ordered; 1478 struct btrfs_ordered_extent *ordered;
1479 unsigned int clear_bits;
1480
1437 lock_extent_bits(&inode->io_tree, start_pos, last_pos, 1481 lock_extent_bits(&inode->io_tree, start_pos, last_pos,
1438 cached_state); 1482 cached_state);
1439 ordered = btrfs_lookup_ordered_range(inode, start_pos, 1483 ordered = btrfs_lookup_ordered_range(inode, start_pos,
@@ -1454,11 +1498,19 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages,
1454 } 1498 }
1455 if (ordered) 1499 if (ordered)
1456 btrfs_put_ordered_extent(ordered); 1500 btrfs_put_ordered_extent(ordered);
1457 1501 ret = btrfs_find_new_delalloc_bytes(inode, start_pos,
1502 last_pos - start_pos + 1,
1503 cached_state);
1504 clear_bits = EXTENT_DIRTY | EXTENT_DELALLOC |
1505 EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG;
1506 if (ret)
1507 clear_bits |= EXTENT_DELALLOC_NEW | EXTENT_LOCKED;
1458 clear_extent_bit(&inode->io_tree, start_pos, 1508 clear_extent_bit(&inode->io_tree, start_pos,
1459 last_pos, EXTENT_DIRTY | EXTENT_DELALLOC | 1509 last_pos, clear_bits,
1460 EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 1510 (clear_bits & EXTENT_LOCKED) ? 1 : 0,
1461 0, 0, cached_state, GFP_NOFS); 1511 0, cached_state, GFP_NOFS);
1512 if (ret)
1513 return ret;
1462 *lockstart = start_pos; 1514 *lockstart = start_pos;
1463 *lockend = last_pos; 1515 *lockend = last_pos;
1464 ret = 1; 1516 ret = 1;
@@ -2848,8 +2900,10 @@ static long btrfs_fallocate(struct file *file, int mode,
2848 } 2900 }
2849 ret = btrfs_qgroup_reserve_data(inode, cur_offset, 2901 ret = btrfs_qgroup_reserve_data(inode, cur_offset,
2850 last_byte - cur_offset); 2902 last_byte - cur_offset);
2851 if (ret < 0) 2903 if (ret < 0) {
2904 free_extent_map(em);
2852 break; 2905 break;
2906 }
2853 } else { 2907 } else {
2854 /* 2908 /*
2855 * Do not need to reserve unwritten extent for this 2909 * Do not need to reserve unwritten extent for this
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 31759d48e880..17cbe9306faf 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -115,6 +115,31 @@ static struct extent_map *create_io_em(struct inode *inode, u64 start, u64 len,
115 u64 ram_bytes, int compress_type, 115 u64 ram_bytes, int compress_type,
116 int type); 116 int type);
117 117
118static void __endio_write_update_ordered(struct inode *inode,
119 const u64 offset, const u64 bytes,
120 const bool uptodate);
121
122/*
123 * Cleanup all submitted ordered extents in specified range to handle errors
124 * from the fill_dellaloc() callback.
125 *
126 * NOTE: caller must ensure that when an error happens, it can not call
127 * extent_clear_unlock_delalloc() to clear both the bits EXTENT_DO_ACCOUNTING
128 * and EXTENT_DELALLOC simultaneously, because that causes the reserved metadata
129 * to be released, which we want to happen only when finishing the ordered
130 * extent (btrfs_finish_ordered_io()). Also note that the caller of the
131 * fill_delalloc() callback already does proper cleanup for the first page of
132 * the range, that is, it invokes the callback writepage_end_io_hook() for the
133 * range of the first page.
134 */
135static inline void btrfs_cleanup_ordered_extents(struct inode *inode,
136 const u64 offset,
137 const u64 bytes)
138{
139 return __endio_write_update_ordered(inode, offset + PAGE_SIZE,
140 bytes - PAGE_SIZE, false);
141}
142
118static int btrfs_dirty_inode(struct inode *inode); 143static int btrfs_dirty_inode(struct inode *inode);
119 144
120#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS 145#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
@@ -547,7 +572,7 @@ cont:
547 } 572 }
548 if (ret <= 0) { 573 if (ret <= 0) {
549 unsigned long clear_flags = EXTENT_DELALLOC | 574 unsigned long clear_flags = EXTENT_DELALLOC |
550 EXTENT_DEFRAG; 575 EXTENT_DELALLOC_NEW | EXTENT_DEFRAG;
551 unsigned long page_error_op; 576 unsigned long page_error_op;
552 577
553 clear_flags |= (ret < 0) ? EXTENT_DO_ACCOUNTING : 0; 578 clear_flags |= (ret < 0) ? EXTENT_DO_ACCOUNTING : 0;
@@ -565,8 +590,10 @@ cont:
565 PAGE_SET_WRITEBACK | 590 PAGE_SET_WRITEBACK |
566 page_error_op | 591 page_error_op |
567 PAGE_END_WRITEBACK); 592 PAGE_END_WRITEBACK);
568 btrfs_free_reserved_data_space_noquota(inode, start, 593 if (ret == 0)
569 end - start + 1); 594 btrfs_free_reserved_data_space_noquota(inode,
595 start,
596 end - start + 1);
570 goto free_pages_out; 597 goto free_pages_out;
571 } 598 }
572 } 599 }
@@ -852,6 +879,7 @@ out_free:
852 async_extent->start + 879 async_extent->start +
853 async_extent->ram_size - 1, 880 async_extent->ram_size - 1,
854 NULL, EXTENT_LOCKED | EXTENT_DELALLOC | 881 NULL, EXTENT_LOCKED | EXTENT_DELALLOC |
882 EXTENT_DELALLOC_NEW |
855 EXTENT_DEFRAG | EXTENT_DO_ACCOUNTING, 883 EXTENT_DEFRAG | EXTENT_DO_ACCOUNTING,
856 PAGE_UNLOCK | PAGE_CLEAR_DIRTY | 884 PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
857 PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK | 885 PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK |
@@ -918,10 +946,13 @@ static noinline int cow_file_range(struct inode *inode,
918 u64 num_bytes; 946 u64 num_bytes;
919 unsigned long ram_size; 947 unsigned long ram_size;
920 u64 disk_num_bytes; 948 u64 disk_num_bytes;
921 u64 cur_alloc_size; 949 u64 cur_alloc_size = 0;
922 u64 blocksize = fs_info->sectorsize; 950 u64 blocksize = fs_info->sectorsize;
923 struct btrfs_key ins; 951 struct btrfs_key ins;
924 struct extent_map *em; 952 struct extent_map *em;
953 unsigned clear_bits;
954 unsigned long page_ops;
955 bool extent_reserved = false;
925 int ret = 0; 956 int ret = 0;
926 957
927 if (btrfs_is_free_space_inode(BTRFS_I(inode))) { 958 if (btrfs_is_free_space_inode(BTRFS_I(inode))) {
@@ -944,6 +975,7 @@ static noinline int cow_file_range(struct inode *inode,
944 extent_clear_unlock_delalloc(inode, start, end, 975 extent_clear_unlock_delalloc(inode, start, end,
945 delalloc_end, NULL, 976 delalloc_end, NULL,
946 EXTENT_LOCKED | EXTENT_DELALLOC | 977 EXTENT_LOCKED | EXTENT_DELALLOC |
978 EXTENT_DELALLOC_NEW |
947 EXTENT_DEFRAG, PAGE_UNLOCK | 979 EXTENT_DEFRAG, PAGE_UNLOCK |
948 PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK | 980 PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK |
949 PAGE_END_WRITEBACK); 981 PAGE_END_WRITEBACK);
@@ -966,14 +998,14 @@ static noinline int cow_file_range(struct inode *inode,
966 start + num_bytes - 1, 0); 998 start + num_bytes - 1, 0);
967 999
968 while (disk_num_bytes > 0) { 1000 while (disk_num_bytes > 0) {
969 unsigned long op;
970
971 cur_alloc_size = disk_num_bytes; 1001 cur_alloc_size = disk_num_bytes;
972 ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size, 1002 ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size,
973 fs_info->sectorsize, 0, alloc_hint, 1003 fs_info->sectorsize, 0, alloc_hint,
974 &ins, 1, 1); 1004 &ins, 1, 1);
975 if (ret < 0) 1005 if (ret < 0)
976 goto out_unlock; 1006 goto out_unlock;
1007 cur_alloc_size = ins.offset;
1008 extent_reserved = true;
977 1009
978 ram_size = ins.offset; 1010 ram_size = ins.offset;
979 em = create_io_em(inode, start, ins.offset, /* len */ 1011 em = create_io_em(inode, start, ins.offset, /* len */
@@ -988,7 +1020,6 @@ static noinline int cow_file_range(struct inode *inode,
988 goto out_reserve; 1020 goto out_reserve;
989 free_extent_map(em); 1021 free_extent_map(em);
990 1022
991 cur_alloc_size = ins.offset;
992 ret = btrfs_add_ordered_extent(inode, start, ins.objectid, 1023 ret = btrfs_add_ordered_extent(inode, start, ins.objectid,
993 ram_size, cur_alloc_size, 0); 1024 ram_size, cur_alloc_size, 0);
994 if (ret) 1025 if (ret)
@@ -998,15 +1029,24 @@ static noinline int cow_file_range(struct inode *inode,
998 BTRFS_DATA_RELOC_TREE_OBJECTID) { 1029 BTRFS_DATA_RELOC_TREE_OBJECTID) {
999 ret = btrfs_reloc_clone_csums(inode, start, 1030 ret = btrfs_reloc_clone_csums(inode, start,
1000 cur_alloc_size); 1031 cur_alloc_size);
1032 /*
1033 * Only drop cache here, and process as normal.
1034 *
1035 * We must not allow extent_clear_unlock_delalloc()
1036 * at out_unlock label to free meta of this ordered
1037 * extent, as its meta should be freed by
1038 * btrfs_finish_ordered_io().
1039 *
1040 * So we must continue until @start is increased to
1041 * skip current ordered extent.
1042 */
1001 if (ret) 1043 if (ret)
1002 goto out_drop_extent_cache; 1044 btrfs_drop_extent_cache(BTRFS_I(inode), start,
1045 start + ram_size - 1, 0);
1003 } 1046 }
1004 1047
1005 btrfs_dec_block_group_reservations(fs_info, ins.objectid); 1048 btrfs_dec_block_group_reservations(fs_info, ins.objectid);
1006 1049
1007 if (disk_num_bytes < cur_alloc_size)
1008 break;
1009
1010 /* we're not doing compressed IO, don't unlock the first 1050 /* we're not doing compressed IO, don't unlock the first
1011 * page (which the caller expects to stay locked), don't 1051 * page (which the caller expects to stay locked), don't
1012 * clear any dirty bits and don't set any writeback bits 1052 * clear any dirty bits and don't set any writeback bits
@@ -1014,18 +1054,30 @@ static noinline int cow_file_range(struct inode *inode,
1014 * Do set the Private2 bit so we know this page was properly 1054 * Do set the Private2 bit so we know this page was properly
1015 * setup for writepage 1055 * setup for writepage
1016 */ 1056 */
1017 op = unlock ? PAGE_UNLOCK : 0; 1057 page_ops = unlock ? PAGE_UNLOCK : 0;
1018 op |= PAGE_SET_PRIVATE2; 1058 page_ops |= PAGE_SET_PRIVATE2;
1019 1059
1020 extent_clear_unlock_delalloc(inode, start, 1060 extent_clear_unlock_delalloc(inode, start,
1021 start + ram_size - 1, 1061 start + ram_size - 1,
1022 delalloc_end, locked_page, 1062 delalloc_end, locked_page,
1023 EXTENT_LOCKED | EXTENT_DELALLOC, 1063 EXTENT_LOCKED | EXTENT_DELALLOC,
1024 op); 1064 page_ops);
1025 disk_num_bytes -= cur_alloc_size; 1065 if (disk_num_bytes < cur_alloc_size)
1066 disk_num_bytes = 0;
1067 else
1068 disk_num_bytes -= cur_alloc_size;
1026 num_bytes -= cur_alloc_size; 1069 num_bytes -= cur_alloc_size;
1027 alloc_hint = ins.objectid + ins.offset; 1070 alloc_hint = ins.objectid + ins.offset;
1028 start += cur_alloc_size; 1071 start += cur_alloc_size;
1072 extent_reserved = false;
1073
1074 /*
1075 * btrfs_reloc_clone_csums() error, since start is increased
1076 * extent_clear_unlock_delalloc() at out_unlock label won't
1077 * free metadata of current ordered extent, we're OK to exit.
1078 */
1079 if (ret)
1080 goto out_unlock;
1029 } 1081 }
1030out: 1082out:
1031 return ret; 1083 return ret;
@@ -1036,12 +1088,35 @@ out_reserve:
1036 btrfs_dec_block_group_reservations(fs_info, ins.objectid); 1088 btrfs_dec_block_group_reservations(fs_info, ins.objectid);
1037 btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 1); 1089 btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 1);
1038out_unlock: 1090out_unlock:
1091 clear_bits = EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
1092 EXTENT_DEFRAG | EXTENT_CLEAR_META_RESV;
1093 page_ops = PAGE_UNLOCK | PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK |
1094 PAGE_END_WRITEBACK;
1095 /*
1096 * If we reserved an extent for our delalloc range (or a subrange) and
1097 * failed to create the respective ordered extent, then it means that
1098 * when we reserved the extent we decremented the extent's size from
1099 * the data space_info's bytes_may_use counter and incremented the
1100 * space_info's bytes_reserved counter by the same amount. We must make
1101 * sure extent_clear_unlock_delalloc() does not try to decrement again
1102 * the data space_info's bytes_may_use counter, therefore we do not pass
1103 * it the flag EXTENT_CLEAR_DATA_RESV.
1104 */
1105 if (extent_reserved) {
1106 extent_clear_unlock_delalloc(inode, start,
1107 start + cur_alloc_size,
1108 start + cur_alloc_size,
1109 locked_page,
1110 clear_bits,
1111 page_ops);
1112 start += cur_alloc_size;
1113 if (start >= end)
1114 goto out;
1115 }
1039 extent_clear_unlock_delalloc(inode, start, end, delalloc_end, 1116 extent_clear_unlock_delalloc(inode, start, end, delalloc_end,
1040 locked_page, 1117 locked_page,
1041 EXTENT_LOCKED | EXTENT_DO_ACCOUNTING | 1118 clear_bits | EXTENT_CLEAR_DATA_RESV,
1042 EXTENT_DELALLOC | EXTENT_DEFRAG, 1119 page_ops);
1043 PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
1044 PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK);
1045 goto out; 1120 goto out;
1046} 1121}
1047 1122
@@ -1414,15 +1489,14 @@ out_check:
1414 BUG_ON(ret); /* -ENOMEM */ 1489 BUG_ON(ret); /* -ENOMEM */
1415 1490
1416 if (root->root_key.objectid == 1491 if (root->root_key.objectid ==
1417 BTRFS_DATA_RELOC_TREE_OBJECTID) { 1492 BTRFS_DATA_RELOC_TREE_OBJECTID)
1493 /*
1494 * Error handled later, as we must prevent
1495 * extent_clear_unlock_delalloc() in error handler
1496 * from freeing metadata of created ordered extent.
1497 */
1418 ret = btrfs_reloc_clone_csums(inode, cur_offset, 1498 ret = btrfs_reloc_clone_csums(inode, cur_offset,
1419 num_bytes); 1499 num_bytes);
1420 if (ret) {
1421 if (!nolock && nocow)
1422 btrfs_end_write_no_snapshoting(root);
1423 goto error;
1424 }
1425 }
1426 1500
1427 extent_clear_unlock_delalloc(inode, cur_offset, 1501 extent_clear_unlock_delalloc(inode, cur_offset,
1428 cur_offset + num_bytes - 1, end, 1502 cur_offset + num_bytes - 1, end,
@@ -1434,6 +1508,14 @@ out_check:
1434 if (!nolock && nocow) 1508 if (!nolock && nocow)
1435 btrfs_end_write_no_snapshoting(root); 1509 btrfs_end_write_no_snapshoting(root);
1436 cur_offset = extent_end; 1510 cur_offset = extent_end;
1511
1512 /*
1513 * btrfs_reloc_clone_csums() error, now we're OK to call error
1514 * handler, as metadata for created ordered extent will only
1515 * be freed by btrfs_finish_ordered_io().
1516 */
1517 if (ret)
1518 goto error;
1437 if (cur_offset > end) 1519 if (cur_offset > end)
1438 break; 1520 break;
1439 } 1521 }
@@ -1509,6 +1591,8 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page,
1509 ret = cow_file_range_async(inode, locked_page, start, end, 1591 ret = cow_file_range_async(inode, locked_page, start, end,
1510 page_started, nr_written); 1592 page_started, nr_written);
1511 } 1593 }
1594 if (ret)
1595 btrfs_cleanup_ordered_extents(inode, start, end - start + 1);
1512 return ret; 1596 return ret;
1513} 1597}
1514 1598
@@ -1693,6 +1777,14 @@ static void btrfs_set_bit_hook(struct inode *inode,
1693 btrfs_add_delalloc_inodes(root, inode); 1777 btrfs_add_delalloc_inodes(root, inode);
1694 spin_unlock(&BTRFS_I(inode)->lock); 1778 spin_unlock(&BTRFS_I(inode)->lock);
1695 } 1779 }
1780
1781 if (!(state->state & EXTENT_DELALLOC_NEW) &&
1782 (*bits & EXTENT_DELALLOC_NEW)) {
1783 spin_lock(&BTRFS_I(inode)->lock);
1784 BTRFS_I(inode)->new_delalloc_bytes += state->end + 1 -
1785 state->start;
1786 spin_unlock(&BTRFS_I(inode)->lock);
1787 }
1696} 1788}
1697 1789
1698/* 1790/*
@@ -1722,7 +1814,7 @@ static void btrfs_clear_bit_hook(struct btrfs_inode *inode,
1722 1814
1723 if (*bits & EXTENT_FIRST_DELALLOC) { 1815 if (*bits & EXTENT_FIRST_DELALLOC) {
1724 *bits &= ~EXTENT_FIRST_DELALLOC; 1816 *bits &= ~EXTENT_FIRST_DELALLOC;
1725 } else if (!(*bits & EXTENT_DO_ACCOUNTING)) { 1817 } else if (!(*bits & EXTENT_CLEAR_META_RESV)) {
1726 spin_lock(&inode->lock); 1818 spin_lock(&inode->lock);
1727 inode->outstanding_extents -= num_extents; 1819 inode->outstanding_extents -= num_extents;
1728 spin_unlock(&inode->lock); 1820 spin_unlock(&inode->lock);
@@ -1733,7 +1825,7 @@ static void btrfs_clear_bit_hook(struct btrfs_inode *inode,
1733 * don't need to call dellalloc_release_metadata if there is an 1825 * don't need to call dellalloc_release_metadata if there is an
1734 * error. 1826 * error.
1735 */ 1827 */
1736 if (*bits & EXTENT_DO_ACCOUNTING && 1828 if (*bits & EXTENT_CLEAR_META_RESV &&
1737 root != fs_info->tree_root) 1829 root != fs_info->tree_root)
1738 btrfs_delalloc_release_metadata(inode, len); 1830 btrfs_delalloc_release_metadata(inode, len);
1739 1831
@@ -1741,10 +1833,9 @@ static void btrfs_clear_bit_hook(struct btrfs_inode *inode,
1741 if (btrfs_is_testing(fs_info)) 1833 if (btrfs_is_testing(fs_info))
1742 return; 1834 return;
1743 1835
1744 if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID 1836 if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID &&
1745 && do_list && !(state->state & EXTENT_NORESERVE) 1837 do_list && !(state->state & EXTENT_NORESERVE) &&
1746 && (*bits & (EXTENT_DO_ACCOUNTING | 1838 (*bits & EXTENT_CLEAR_DATA_RESV))
1747 EXTENT_CLEAR_DATA_RESV)))
1748 btrfs_free_reserved_data_space_noquota( 1839 btrfs_free_reserved_data_space_noquota(
1749 &inode->vfs_inode, 1840 &inode->vfs_inode,
1750 state->start, len); 1841 state->start, len);
@@ -1759,6 +1850,14 @@ static void btrfs_clear_bit_hook(struct btrfs_inode *inode,
1759 btrfs_del_delalloc_inode(root, inode); 1850 btrfs_del_delalloc_inode(root, inode);
1760 spin_unlock(&inode->lock); 1851 spin_unlock(&inode->lock);
1761 } 1852 }
1853
1854 if ((state->state & EXTENT_DELALLOC_NEW) &&
1855 (*bits & EXTENT_DELALLOC_NEW)) {
1856 spin_lock(&inode->lock);
1857 ASSERT(inode->new_delalloc_bytes >= len);
1858 inode->new_delalloc_bytes -= len;
1859 spin_unlock(&inode->lock);
1860 }
1762} 1861}
1763 1862
1764/* 1863/*
@@ -2791,6 +2890,13 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
2791 u64 logical_len = ordered_extent->len; 2890 u64 logical_len = ordered_extent->len;
2792 bool nolock; 2891 bool nolock;
2793 bool truncated = false; 2892 bool truncated = false;
2893 bool range_locked = false;
2894 bool clear_new_delalloc_bytes = false;
2895
2896 if (!test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) &&
2897 !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags) &&
2898 !test_bit(BTRFS_ORDERED_DIRECT, &ordered_extent->flags))
2899 clear_new_delalloc_bytes = true;
2794 2900
2795 nolock = btrfs_is_free_space_inode(BTRFS_I(inode)); 2901 nolock = btrfs_is_free_space_inode(BTRFS_I(inode));
2796 2902
@@ -2839,6 +2945,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
2839 goto out; 2945 goto out;
2840 } 2946 }
2841 2947
2948 range_locked = true;
2842 lock_extent_bits(io_tree, ordered_extent->file_offset, 2949 lock_extent_bits(io_tree, ordered_extent->file_offset,
2843 ordered_extent->file_offset + ordered_extent->len - 1, 2950 ordered_extent->file_offset + ordered_extent->len - 1,
2844 &cached_state); 2951 &cached_state);
@@ -2864,7 +2971,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
2864 if (IS_ERR(trans)) { 2971 if (IS_ERR(trans)) {
2865 ret = PTR_ERR(trans); 2972 ret = PTR_ERR(trans);
2866 trans = NULL; 2973 trans = NULL;
2867 goto out_unlock; 2974 goto out;
2868 } 2975 }
2869 2976
2870 trans->block_rsv = &fs_info->delalloc_block_rsv; 2977 trans->block_rsv = &fs_info->delalloc_block_rsv;
@@ -2896,7 +3003,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
2896 trans->transid); 3003 trans->transid);
2897 if (ret < 0) { 3004 if (ret < 0) {
2898 btrfs_abort_transaction(trans, ret); 3005 btrfs_abort_transaction(trans, ret);
2899 goto out_unlock; 3006 goto out;
2900 } 3007 }
2901 3008
2902 add_pending_csums(trans, inode, &ordered_extent->list); 3009 add_pending_csums(trans, inode, &ordered_extent->list);
@@ -2905,14 +3012,26 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
2905 ret = btrfs_update_inode_fallback(trans, root, inode); 3012 ret = btrfs_update_inode_fallback(trans, root, inode);
2906 if (ret) { /* -ENOMEM or corruption */ 3013 if (ret) { /* -ENOMEM or corruption */
2907 btrfs_abort_transaction(trans, ret); 3014 btrfs_abort_transaction(trans, ret);
2908 goto out_unlock; 3015 goto out;
2909 } 3016 }
2910 ret = 0; 3017 ret = 0;
2911out_unlock:
2912 unlock_extent_cached(io_tree, ordered_extent->file_offset,
2913 ordered_extent->file_offset +
2914 ordered_extent->len - 1, &cached_state, GFP_NOFS);
2915out: 3018out:
3019 if (range_locked || clear_new_delalloc_bytes) {
3020 unsigned int clear_bits = 0;
3021
3022 if (range_locked)
3023 clear_bits |= EXTENT_LOCKED;
3024 if (clear_new_delalloc_bytes)
3025 clear_bits |= EXTENT_DELALLOC_NEW;
3026 clear_extent_bit(&BTRFS_I(inode)->io_tree,
3027 ordered_extent->file_offset,
3028 ordered_extent->file_offset +
3029 ordered_extent->len - 1,
3030 clear_bits,
3031 (clear_bits & EXTENT_LOCKED) ? 1 : 0,
3032 0, &cached_state, GFP_NOFS);
3033 }
3034
2916 if (root != fs_info->tree_root) 3035 if (root != fs_info->tree_root)
2917 btrfs_delalloc_release_metadata(BTRFS_I(inode), 3036 btrfs_delalloc_release_metadata(BTRFS_I(inode),
2918 ordered_extent->len); 3037 ordered_extent->len);
@@ -8132,17 +8251,26 @@ static void btrfs_endio_direct_read(struct bio *bio)
8132 bio_put(bio); 8251 bio_put(bio);
8133} 8252}
8134 8253
8135static void btrfs_endio_direct_write_update_ordered(struct inode *inode, 8254static void __endio_write_update_ordered(struct inode *inode,
8136 const u64 offset, 8255 const u64 offset, const u64 bytes,
8137 const u64 bytes, 8256 const bool uptodate)
8138 const int uptodate)
8139{ 8257{
8140 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 8258 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
8141 struct btrfs_ordered_extent *ordered = NULL; 8259 struct btrfs_ordered_extent *ordered = NULL;
8260 struct btrfs_workqueue *wq;
8261 btrfs_work_func_t func;
8142 u64 ordered_offset = offset; 8262 u64 ordered_offset = offset;
8143 u64 ordered_bytes = bytes; 8263 u64 ordered_bytes = bytes;
8144 int ret; 8264 int ret;
8145 8265
8266 if (btrfs_is_free_space_inode(BTRFS_I(inode))) {
8267 wq = fs_info->endio_freespace_worker;
8268 func = btrfs_freespace_write_helper;
8269 } else {
8270 wq = fs_info->endio_write_workers;
8271 func = btrfs_endio_write_helper;
8272 }
8273
8146again: 8274again:
8147 ret = btrfs_dec_test_first_ordered_pending(inode, &ordered, 8275 ret = btrfs_dec_test_first_ordered_pending(inode, &ordered,
8148 &ordered_offset, 8276 &ordered_offset,
@@ -8151,9 +8279,8 @@ again:
8151 if (!ret) 8279 if (!ret)
8152 goto out_test; 8280 goto out_test;
8153 8281
8154 btrfs_init_work(&ordered->work, btrfs_endio_write_helper, 8282 btrfs_init_work(&ordered->work, func, finish_ordered_fn, NULL, NULL);
8155 finish_ordered_fn, NULL, NULL); 8283 btrfs_queue_work(wq, &ordered->work);
8156 btrfs_queue_work(fs_info->endio_write_workers, &ordered->work);
8157out_test: 8284out_test:
8158 /* 8285 /*
8159 * our bio might span multiple ordered extents. If we haven't 8286 * our bio might span multiple ordered extents. If we haven't
@@ -8171,10 +8298,8 @@ static void btrfs_endio_direct_write(struct bio *bio)
8171 struct btrfs_dio_private *dip = bio->bi_private; 8298 struct btrfs_dio_private *dip = bio->bi_private;
8172 struct bio *dio_bio = dip->dio_bio; 8299 struct bio *dio_bio = dip->dio_bio;
8173 8300
8174 btrfs_endio_direct_write_update_ordered(dip->inode, 8301 __endio_write_update_ordered(dip->inode, dip->logical_offset,
8175 dip->logical_offset, 8302 dip->bytes, !bio->bi_error);
8176 dip->bytes,
8177 !bio->bi_error);
8178 8303
8179 kfree(dip); 8304 kfree(dip);
8180 8305
@@ -8535,10 +8660,10 @@ free_ordered:
8535 io_bio = NULL; 8660 io_bio = NULL;
8536 } else { 8661 } else {
8537 if (write) 8662 if (write)
8538 btrfs_endio_direct_write_update_ordered(inode, 8663 __endio_write_update_ordered(inode,
8539 file_offset, 8664 file_offset,
8540 dio_bio->bi_iter.bi_size, 8665 dio_bio->bi_iter.bi_size,
8541 0); 8666 false);
8542 else 8667 else
8543 unlock_extent(&BTRFS_I(inode)->io_tree, file_offset, 8668 unlock_extent(&BTRFS_I(inode)->io_tree, file_offset,
8544 file_offset + dio_bio->bi_iter.bi_size - 1); 8669 file_offset + dio_bio->bi_iter.bi_size - 1);
@@ -8673,11 +8798,11 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
8673 */ 8798 */
8674 if (dio_data.unsubmitted_oe_range_start < 8799 if (dio_data.unsubmitted_oe_range_start <
8675 dio_data.unsubmitted_oe_range_end) 8800 dio_data.unsubmitted_oe_range_end)
8676 btrfs_endio_direct_write_update_ordered(inode, 8801 __endio_write_update_ordered(inode,
8677 dio_data.unsubmitted_oe_range_start, 8802 dio_data.unsubmitted_oe_range_start,
8678 dio_data.unsubmitted_oe_range_end - 8803 dio_data.unsubmitted_oe_range_end -
8679 dio_data.unsubmitted_oe_range_start, 8804 dio_data.unsubmitted_oe_range_start,
8680 0); 8805 false);
8681 } else if (ret >= 0 && (size_t)ret < count) 8806 } else if (ret >= 0 && (size_t)ret < count)
8682 btrfs_delalloc_release_space(inode, offset, 8807 btrfs_delalloc_release_space(inode, offset,
8683 count - (size_t)ret); 8808 count - (size_t)ret);
@@ -8824,6 +8949,7 @@ again:
8824 if (!inode_evicting) 8949 if (!inode_evicting)
8825 clear_extent_bit(tree, start, end, 8950 clear_extent_bit(tree, start, end,
8826 EXTENT_DIRTY | EXTENT_DELALLOC | 8951 EXTENT_DIRTY | EXTENT_DELALLOC |
8952 EXTENT_DELALLOC_NEW |
8827 EXTENT_LOCKED | EXTENT_DO_ACCOUNTING | 8953 EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
8828 EXTENT_DEFRAG, 1, 0, &cached_state, 8954 EXTENT_DEFRAG, 1, 0, &cached_state,
8829 GFP_NOFS); 8955 GFP_NOFS);
@@ -8881,8 +9007,8 @@ again:
8881 if (!inode_evicting) { 9007 if (!inode_evicting) {
8882 clear_extent_bit(tree, page_start, page_end, 9008 clear_extent_bit(tree, page_start, page_end,
8883 EXTENT_LOCKED | EXTENT_DIRTY | 9009 EXTENT_LOCKED | EXTENT_DIRTY |
8884 EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | 9010 EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
8885 EXTENT_DEFRAG, 1, 1, 9011 EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 1, 1,
8886 &cached_state, GFP_NOFS); 9012 &cached_state, GFP_NOFS);
8887 9013
8888 __btrfs_releasepage(page, GFP_NOFS); 9014 __btrfs_releasepage(page, GFP_NOFS);
@@ -9253,6 +9379,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
9253 ei->last_sub_trans = 0; 9379 ei->last_sub_trans = 0;
9254 ei->logged_trans = 0; 9380 ei->logged_trans = 0;
9255 ei->delalloc_bytes = 0; 9381 ei->delalloc_bytes = 0;
9382 ei->new_delalloc_bytes = 0;
9256 ei->defrag_bytes = 0; 9383 ei->defrag_bytes = 0;
9257 ei->disk_i_size = 0; 9384 ei->disk_i_size = 0;
9258 ei->flags = 0; 9385 ei->flags = 0;
@@ -9318,6 +9445,7 @@ void btrfs_destroy_inode(struct inode *inode)
9318 WARN_ON(BTRFS_I(inode)->outstanding_extents); 9445 WARN_ON(BTRFS_I(inode)->outstanding_extents);
9319 WARN_ON(BTRFS_I(inode)->reserved_extents); 9446 WARN_ON(BTRFS_I(inode)->reserved_extents);
9320 WARN_ON(BTRFS_I(inode)->delalloc_bytes); 9447 WARN_ON(BTRFS_I(inode)->delalloc_bytes);
9448 WARN_ON(BTRFS_I(inode)->new_delalloc_bytes);
9321 WARN_ON(BTRFS_I(inode)->csum_bytes); 9449 WARN_ON(BTRFS_I(inode)->csum_bytes);
9322 WARN_ON(BTRFS_I(inode)->defrag_bytes); 9450 WARN_ON(BTRFS_I(inode)->defrag_bytes);
9323 9451
@@ -9441,7 +9569,7 @@ static int btrfs_getattr(const struct path *path, struct kstat *stat,
9441 stat->dev = BTRFS_I(inode)->root->anon_dev; 9569 stat->dev = BTRFS_I(inode)->root->anon_dev;
9442 9570
9443 spin_lock(&BTRFS_I(inode)->lock); 9571 spin_lock(&BTRFS_I(inode)->lock);
9444 delalloc_bytes = BTRFS_I(inode)->delalloc_bytes; 9572 delalloc_bytes = BTRFS_I(inode)->new_delalloc_bytes;
9445 spin_unlock(&BTRFS_I(inode)->lock); 9573 spin_unlock(&BTRFS_I(inode)->lock);
9446 stat->blocks = (ALIGN(inode_get_bytes(inode), blocksize) + 9574 stat->blocks = (ALIGN(inode_get_bytes(inode), blocksize) +
9447 ALIGN(delalloc_bytes, blocksize)) >> 9; 9575 ALIGN(delalloc_bytes, blocksize)) >> 9;
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index a60d5bfb8a49..5b40d617bb03 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -5184,13 +5184,19 @@ static int is_extent_unchanged(struct send_ctx *sctx,
5184 while (key.offset < ekey->offset + left_len) { 5184 while (key.offset < ekey->offset + left_len) {
5185 ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); 5185 ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
5186 right_type = btrfs_file_extent_type(eb, ei); 5186 right_type = btrfs_file_extent_type(eb, ei);
5187 if (right_type != BTRFS_FILE_EXTENT_REG) { 5187 if (right_type != BTRFS_FILE_EXTENT_REG &&
5188 right_type != BTRFS_FILE_EXTENT_INLINE) {
5188 ret = 0; 5189 ret = 0;
5189 goto out; 5190 goto out;
5190 } 5191 }
5191 5192
5192 right_disknr = btrfs_file_extent_disk_bytenr(eb, ei); 5193 right_disknr = btrfs_file_extent_disk_bytenr(eb, ei);
5193 right_len = btrfs_file_extent_num_bytes(eb, ei); 5194 if (right_type == BTRFS_FILE_EXTENT_INLINE) {
5195 right_len = btrfs_file_extent_inline_len(eb, slot, ei);
5196 right_len = PAGE_ALIGN(right_len);
5197 } else {
5198 right_len = btrfs_file_extent_num_bytes(eb, ei);
5199 }
5194 right_offset = btrfs_file_extent_offset(eb, ei); 5200 right_offset = btrfs_file_extent_offset(eb, ei);
5195 right_gen = btrfs_file_extent_generation(eb, ei); 5201 right_gen = btrfs_file_extent_generation(eb, ei);
5196 5202
@@ -5204,6 +5210,19 @@ static int is_extent_unchanged(struct send_ctx *sctx,
5204 goto out; 5210 goto out;
5205 } 5211 }
5206 5212
5213 /*
5214 * We just wanted to see if when we have an inline extent, what
5215 * follows it is a regular extent (wanted to check the above
5216 * condition for inline extents too). This should normally not
5217 * happen but it's possible for example when we have an inline
5218 * compressed extent representing data with a size matching
5219 * the page size (currently the same as sector size).
5220 */
5221 if (right_type == BTRFS_FILE_EXTENT_INLINE) {
5222 ret = 0;
5223 goto out;
5224 }
5225
5207 left_offset_fixed = left_offset; 5226 left_offset_fixed = left_offset;
5208 if (key.offset < ekey->offset) { 5227 if (key.offset < ekey->offset) {
5209 /* Fix the right offset for 2a and 7. */ 5228 /* Fix the right offset for 2a and 7. */