aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/inode.c')
-rw-r--r--fs/btrfs/inode.c271
1 files changed, 112 insertions, 159 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 5440bab23635..2bfdc641d4e3 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -36,6 +36,7 @@
36#include <linux/xattr.h> 36#include <linux/xattr.h>
37#include <linux/posix_acl.h> 37#include <linux/posix_acl.h>
38#include <linux/falloc.h> 38#include <linux/falloc.h>
39#include <linux/slab.h>
39#include "compat.h" 40#include "compat.h"
40#include "ctree.h" 41#include "ctree.h"
41#include "disk-io.h" 42#include "disk-io.h"
@@ -379,7 +380,8 @@ again:
379 * change at any time if we discover bad compression ratios. 380 * change at any time if we discover bad compression ratios.
380 */ 381 */
381 if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS) && 382 if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS) &&
382 btrfs_test_opt(root, COMPRESS)) { 383 (btrfs_test_opt(root, COMPRESS) ||
384 (BTRFS_I(inode)->force_compress))) {
383 WARN_ON(pages); 385 WARN_ON(pages);
384 pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS); 386 pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS);
385 387
@@ -483,7 +485,10 @@ again:
483 nr_pages_ret = 0; 485 nr_pages_ret = 0;
484 486
485 /* flag the file so we don't compress in the future */ 487 /* flag the file so we don't compress in the future */
486 BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS; 488 if (!btrfs_test_opt(root, FORCE_COMPRESS) &&
489 !(BTRFS_I(inode)->force_compress)) {
490 BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS;
491 }
487 } 492 }
488 if (will_compress) { 493 if (will_compress) {
489 *num_added += 1; 494 *num_added += 1;
@@ -569,8 +574,8 @@ retry:
569 unsigned long nr_written = 0; 574 unsigned long nr_written = 0;
570 575
571 lock_extent(io_tree, async_extent->start, 576 lock_extent(io_tree, async_extent->start,
572 async_extent->start + 577 async_extent->start +
573 async_extent->ram_size - 1, GFP_NOFS); 578 async_extent->ram_size - 1, GFP_NOFS);
574 579
575 /* allocate blocks */ 580 /* allocate blocks */
576 ret = cow_file_range(inode, async_cow->locked_page, 581 ret = cow_file_range(inode, async_cow->locked_page,
@@ -792,7 +797,7 @@ static noinline int cow_file_range(struct inode *inode,
792 while (disk_num_bytes > 0) { 797 while (disk_num_bytes > 0) {
793 unsigned long op; 798 unsigned long op;
794 799
795 cur_alloc_size = min(disk_num_bytes, root->fs_info->max_extent); 800 cur_alloc_size = disk_num_bytes;
796 ret = btrfs_reserve_extent(trans, root, cur_alloc_size, 801 ret = btrfs_reserve_extent(trans, root, cur_alloc_size,
797 root->sectorsize, 0, alloc_hint, 802 root->sectorsize, 0, alloc_hint,
798 (u64)-1, &ins, 1); 803 (u64)-1, &ins, 1);
@@ -1210,7 +1215,8 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page,
1210 else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC) 1215 else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC)
1211 ret = run_delalloc_nocow(inode, locked_page, start, end, 1216 ret = run_delalloc_nocow(inode, locked_page, start, end,
1212 page_started, 0, nr_written); 1217 page_started, 0, nr_written);
1213 else if (!btrfs_test_opt(root, COMPRESS)) 1218 else if (!btrfs_test_opt(root, COMPRESS) &&
1219 !(BTRFS_I(inode)->force_compress))
1214 ret = cow_file_range(inode, locked_page, start, end, 1220 ret = cow_file_range(inode, locked_page, start, end,
1215 page_started, nr_written, 1); 1221 page_started, nr_written, 1);
1216 else 1222 else
@@ -1222,30 +1228,9 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page,
1222static int btrfs_split_extent_hook(struct inode *inode, 1228static int btrfs_split_extent_hook(struct inode *inode,
1223 struct extent_state *orig, u64 split) 1229 struct extent_state *orig, u64 split)
1224{ 1230{
1225 struct btrfs_root *root = BTRFS_I(inode)->root;
1226 u64 size;
1227
1228 if (!(orig->state & EXTENT_DELALLOC)) 1231 if (!(orig->state & EXTENT_DELALLOC))
1229 return 0; 1232 return 0;
1230 1233
1231 size = orig->end - orig->start + 1;
1232 if (size > root->fs_info->max_extent) {
1233 u64 num_extents;
1234 u64 new_size;
1235
1236 new_size = orig->end - split + 1;
1237 num_extents = div64_u64(size + root->fs_info->max_extent - 1,
1238 root->fs_info->max_extent);
1239
1240 /*
1241 * if we break a large extent up then leave oustanding_extents
1242 * be, since we've already accounted for the large extent.
1243 */
1244 if (div64_u64(new_size + root->fs_info->max_extent - 1,
1245 root->fs_info->max_extent) < num_extents)
1246 return 0;
1247 }
1248
1249 spin_lock(&BTRFS_I(inode)->accounting_lock); 1234 spin_lock(&BTRFS_I(inode)->accounting_lock);
1250 BTRFS_I(inode)->outstanding_extents++; 1235 BTRFS_I(inode)->outstanding_extents++;
1251 spin_unlock(&BTRFS_I(inode)->accounting_lock); 1236 spin_unlock(&BTRFS_I(inode)->accounting_lock);
@@ -1263,38 +1248,10 @@ static int btrfs_merge_extent_hook(struct inode *inode,
1263 struct extent_state *new, 1248 struct extent_state *new,
1264 struct extent_state *other) 1249 struct extent_state *other)
1265{ 1250{
1266 struct btrfs_root *root = BTRFS_I(inode)->root;
1267 u64 new_size, old_size;
1268 u64 num_extents;
1269
1270 /* not delalloc, ignore it */ 1251 /* not delalloc, ignore it */
1271 if (!(other->state & EXTENT_DELALLOC)) 1252 if (!(other->state & EXTENT_DELALLOC))
1272 return 0; 1253 return 0;
1273 1254
1274 old_size = other->end - other->start + 1;
1275 if (new->start < other->start)
1276 new_size = other->end - new->start + 1;
1277 else
1278 new_size = new->end - other->start + 1;
1279
1280 /* we're not bigger than the max, unreserve the space and go */
1281 if (new_size <= root->fs_info->max_extent) {
1282 spin_lock(&BTRFS_I(inode)->accounting_lock);
1283 BTRFS_I(inode)->outstanding_extents--;
1284 spin_unlock(&BTRFS_I(inode)->accounting_lock);
1285 return 0;
1286 }
1287
1288 /*
1289 * If we grew by another max_extent, just return, we want to keep that
1290 * reserved amount.
1291 */
1292 num_extents = div64_u64(old_size + root->fs_info->max_extent - 1,
1293 root->fs_info->max_extent);
1294 if (div64_u64(new_size + root->fs_info->max_extent - 1,
1295 root->fs_info->max_extent) > num_extents)
1296 return 0;
1297
1298 spin_lock(&BTRFS_I(inode)->accounting_lock); 1255 spin_lock(&BTRFS_I(inode)->accounting_lock);
1299 BTRFS_I(inode)->outstanding_extents--; 1256 BTRFS_I(inode)->outstanding_extents--;
1300 spin_unlock(&BTRFS_I(inode)->accounting_lock); 1257 spin_unlock(&BTRFS_I(inode)->accounting_lock);
@@ -1323,6 +1280,7 @@ static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end,
1323 BTRFS_I(inode)->outstanding_extents++; 1280 BTRFS_I(inode)->outstanding_extents++;
1324 spin_unlock(&BTRFS_I(inode)->accounting_lock); 1281 spin_unlock(&BTRFS_I(inode)->accounting_lock);
1325 btrfs_delalloc_reserve_space(root, inode, end - start + 1); 1282 btrfs_delalloc_reserve_space(root, inode, end - start + 1);
1283
1326 spin_lock(&root->fs_info->delalloc_lock); 1284 spin_lock(&root->fs_info->delalloc_lock);
1327 BTRFS_I(inode)->delalloc_bytes += end - start + 1; 1285 BTRFS_I(inode)->delalloc_bytes += end - start + 1;
1328 root->fs_info->delalloc_bytes += end - start + 1; 1286 root->fs_info->delalloc_bytes += end - start + 1;
@@ -1351,6 +1309,7 @@ static int btrfs_clear_bit_hook(struct inode *inode,
1351 1309
1352 if (bits & EXTENT_DO_ACCOUNTING) { 1310 if (bits & EXTENT_DO_ACCOUNTING) {
1353 spin_lock(&BTRFS_I(inode)->accounting_lock); 1311 spin_lock(&BTRFS_I(inode)->accounting_lock);
1312 WARN_ON(!BTRFS_I(inode)->outstanding_extents);
1354 BTRFS_I(inode)->outstanding_extents--; 1313 BTRFS_I(inode)->outstanding_extents--;
1355 spin_unlock(&BTRFS_I(inode)->accounting_lock); 1314 spin_unlock(&BTRFS_I(inode)->accounting_lock);
1356 btrfs_unreserve_metadata_for_delalloc(root, inode, 1); 1315 btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
@@ -1507,12 +1466,13 @@ static noinline int add_pending_csums(struct btrfs_trans_handle *trans,
1507 return 0; 1466 return 0;
1508} 1467}
1509 1468
1510int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end) 1469int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
1470 struct extent_state **cached_state)
1511{ 1471{
1512 if ((end & (PAGE_CACHE_SIZE - 1)) == 0) 1472 if ((end & (PAGE_CACHE_SIZE - 1)) == 0)
1513 WARN_ON(1); 1473 WARN_ON(1);
1514 return set_extent_delalloc(&BTRFS_I(inode)->io_tree, start, end, 1474 return set_extent_delalloc(&BTRFS_I(inode)->io_tree, start, end,
1515 GFP_NOFS); 1475 cached_state, GFP_NOFS);
1516} 1476}
1517 1477
1518/* see btrfs_writepage_start_hook for details on why this is required */ 1478/* see btrfs_writepage_start_hook for details on why this is required */
@@ -1525,6 +1485,7 @@ static void btrfs_writepage_fixup_worker(struct btrfs_work *work)
1525{ 1485{
1526 struct btrfs_writepage_fixup *fixup; 1486 struct btrfs_writepage_fixup *fixup;
1527 struct btrfs_ordered_extent *ordered; 1487 struct btrfs_ordered_extent *ordered;
1488 struct extent_state *cached_state = NULL;
1528 struct page *page; 1489 struct page *page;
1529 struct inode *inode; 1490 struct inode *inode;
1530 u64 page_start; 1491 u64 page_start;
@@ -1543,7 +1504,8 @@ again:
1543 page_start = page_offset(page); 1504 page_start = page_offset(page);
1544 page_end = page_offset(page) + PAGE_CACHE_SIZE - 1; 1505 page_end = page_offset(page) + PAGE_CACHE_SIZE - 1;
1545 1506
1546 lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS); 1507 lock_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end, 0,
1508 &cached_state, GFP_NOFS);
1547 1509
1548 /* already ordered? We're done */ 1510 /* already ordered? We're done */
1549 if (PagePrivate2(page)) 1511 if (PagePrivate2(page))
@@ -1551,17 +1513,18 @@ again:
1551 1513
1552 ordered = btrfs_lookup_ordered_extent(inode, page_start); 1514 ordered = btrfs_lookup_ordered_extent(inode, page_start);
1553 if (ordered) { 1515 if (ordered) {
1554 unlock_extent(&BTRFS_I(inode)->io_tree, page_start, 1516 unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start,
1555 page_end, GFP_NOFS); 1517 page_end, &cached_state, GFP_NOFS);
1556 unlock_page(page); 1518 unlock_page(page);
1557 btrfs_start_ordered_extent(inode, ordered, 1); 1519 btrfs_start_ordered_extent(inode, ordered, 1);
1558 goto again; 1520 goto again;
1559 } 1521 }
1560 1522
1561 btrfs_set_extent_delalloc(inode, page_start, page_end); 1523 btrfs_set_extent_delalloc(inode, page_start, page_end, &cached_state);
1562 ClearPageChecked(page); 1524 ClearPageChecked(page);
1563out: 1525out:
1564 unlock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS); 1526 unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, page_end,
1527 &cached_state, GFP_NOFS);
1565out_page: 1528out_page:
1566 unlock_page(page); 1529 unlock_page(page);
1567 page_cache_release(page); 1530 page_cache_release(page);
@@ -1680,24 +1643,6 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
1680 * before we start the transaction. It limits the amount of btree 1643 * before we start the transaction. It limits the amount of btree
1681 * reads required while inside the transaction. 1644 * reads required while inside the transaction.
1682 */ 1645 */
1683static noinline void reada_csum(struct btrfs_root *root,
1684 struct btrfs_path *path,
1685 struct btrfs_ordered_extent *ordered_extent)
1686{
1687 struct btrfs_ordered_sum *sum;
1688 u64 bytenr;
1689
1690 sum = list_entry(ordered_extent->list.next, struct btrfs_ordered_sum,
1691 list);
1692 bytenr = sum->sums[0].bytenr;
1693
1694 /*
1695 * we don't care about the results, the point of this search is
1696 * just to get the btree leaves into ram
1697 */
1698 btrfs_lookup_csum(NULL, root->fs_info->csum_root, path, bytenr, 0);
1699}
1700
1701/* as ordered data IO finishes, this gets called so we can finish 1646/* as ordered data IO finishes, this gets called so we can finish
1702 * an ordered extent if the range of bytes in the file it covers are 1647 * an ordered extent if the range of bytes in the file it covers are
1703 * fully written. 1648 * fully written.
@@ -1708,40 +1653,16 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1708 struct btrfs_trans_handle *trans; 1653 struct btrfs_trans_handle *trans;
1709 struct btrfs_ordered_extent *ordered_extent = NULL; 1654 struct btrfs_ordered_extent *ordered_extent = NULL;
1710 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 1655 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
1711 struct btrfs_path *path; 1656 struct extent_state *cached_state = NULL;
1712 int compressed = 0; 1657 int compressed = 0;
1713 int ret; 1658 int ret;
1714 1659
1715 ret = btrfs_dec_test_ordered_pending(inode, start, end - start + 1); 1660 ret = btrfs_dec_test_ordered_pending(inode, &ordered_extent, start,
1661 end - start + 1);
1716 if (!ret) 1662 if (!ret)
1717 return 0; 1663 return 0;
1718
1719 /*
1720 * before we join the transaction, try to do some of our IO.
1721 * This will limit the amount of IO that we have to do with
1722 * the transaction running. We're unlikely to need to do any
1723 * IO if the file extents are new, the disk_i_size checks
1724 * covers the most common case.
1725 */
1726 if (start < BTRFS_I(inode)->disk_i_size) {
1727 path = btrfs_alloc_path();
1728 if (path) {
1729 ret = btrfs_lookup_file_extent(NULL, root, path,
1730 inode->i_ino,
1731 start, 0);
1732 ordered_extent = btrfs_lookup_ordered_extent(inode,
1733 start);
1734 if (!list_empty(&ordered_extent->list)) {
1735 btrfs_release_path(root, path);
1736 reada_csum(root, path, ordered_extent);
1737 }
1738 btrfs_free_path(path);
1739 }
1740 }
1741
1742 if (!ordered_extent)
1743 ordered_extent = btrfs_lookup_ordered_extent(inode, start);
1744 BUG_ON(!ordered_extent); 1664 BUG_ON(!ordered_extent);
1665
1745 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) { 1666 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
1746 BUG_ON(!list_empty(&ordered_extent->list)); 1667 BUG_ON(!list_empty(&ordered_extent->list));
1747 ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); 1668 ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
@@ -1754,9 +1675,9 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1754 goto out; 1675 goto out;
1755 } 1676 }
1756 1677
1757 lock_extent(io_tree, ordered_extent->file_offset, 1678 lock_extent_bits(io_tree, ordered_extent->file_offset,
1758 ordered_extent->file_offset + ordered_extent->len - 1, 1679 ordered_extent->file_offset + ordered_extent->len - 1,
1759 GFP_NOFS); 1680 0, &cached_state, GFP_NOFS);
1760 1681
1761 trans = btrfs_join_transaction(root, 1); 1682 trans = btrfs_join_transaction(root, 1);
1762 1683
@@ -1783,9 +1704,10 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1783 ordered_extent->len); 1704 ordered_extent->len);
1784 BUG_ON(ret); 1705 BUG_ON(ret);
1785 } 1706 }
1786 unlock_extent(io_tree, ordered_extent->file_offset, 1707 unlock_extent_cached(io_tree, ordered_extent->file_offset,
1787 ordered_extent->file_offset + ordered_extent->len - 1, 1708 ordered_extent->file_offset +
1788 GFP_NOFS); 1709 ordered_extent->len - 1, &cached_state, GFP_NOFS);
1710
1789 add_pending_csums(trans, inode, ordered_extent->file_offset, 1711 add_pending_csums(trans, inode, ordered_extent->file_offset,
1790 &ordered_extent->list); 1712 &ordered_extent->list);
1791 1713
@@ -2194,7 +2116,7 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
2194 found_key.objectid = found_key.offset; 2116 found_key.objectid = found_key.offset;
2195 found_key.type = BTRFS_INODE_ITEM_KEY; 2117 found_key.type = BTRFS_INODE_ITEM_KEY;
2196 found_key.offset = 0; 2118 found_key.offset = 0;
2197 inode = btrfs_iget(root->fs_info->sb, &found_key, root); 2119 inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL);
2198 if (IS_ERR(inode)) 2120 if (IS_ERR(inode))
2199 break; 2121 break;
2200 2122
@@ -3122,6 +3044,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
3122 struct btrfs_root *root = BTRFS_I(inode)->root; 3044 struct btrfs_root *root = BTRFS_I(inode)->root;
3123 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 3045 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
3124 struct btrfs_ordered_extent *ordered; 3046 struct btrfs_ordered_extent *ordered;
3047 struct extent_state *cached_state = NULL;
3125 char *kaddr; 3048 char *kaddr;
3126 u32 blocksize = root->sectorsize; 3049 u32 blocksize = root->sectorsize;
3127 pgoff_t index = from >> PAGE_CACHE_SHIFT; 3050 pgoff_t index = from >> PAGE_CACHE_SHIFT;
@@ -3168,12 +3091,14 @@ again:
3168 } 3091 }
3169 wait_on_page_writeback(page); 3092 wait_on_page_writeback(page);
3170 3093
3171 lock_extent(io_tree, page_start, page_end, GFP_NOFS); 3094 lock_extent_bits(io_tree, page_start, page_end, 0, &cached_state,
3095 GFP_NOFS);
3172 set_page_extent_mapped(page); 3096 set_page_extent_mapped(page);
3173 3097
3174 ordered = btrfs_lookup_ordered_extent(inode, page_start); 3098 ordered = btrfs_lookup_ordered_extent(inode, page_start);
3175 if (ordered) { 3099 if (ordered) {
3176 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 3100 unlock_extent_cached(io_tree, page_start, page_end,
3101 &cached_state, GFP_NOFS);
3177 unlock_page(page); 3102 unlock_page(page);
3178 page_cache_release(page); 3103 page_cache_release(page);
3179 btrfs_start_ordered_extent(inode, ordered, 1); 3104 btrfs_start_ordered_extent(inode, ordered, 1);
@@ -3181,13 +3106,15 @@ again:
3181 goto again; 3106 goto again;
3182 } 3107 }
3183 3108
3184 clear_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end, 3109 clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, page_end,
3185 EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING, 3110 EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING,
3186 GFP_NOFS); 3111 0, 0, &cached_state, GFP_NOFS);
3187 3112
3188 ret = btrfs_set_extent_delalloc(inode, page_start, page_end); 3113 ret = btrfs_set_extent_delalloc(inode, page_start, page_end,
3114 &cached_state);
3189 if (ret) { 3115 if (ret) {
3190 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 3116 unlock_extent_cached(io_tree, page_start, page_end,
3117 &cached_state, GFP_NOFS);
3191 goto out_unlock; 3118 goto out_unlock;
3192 } 3119 }
3193 3120
@@ -3200,7 +3127,8 @@ again:
3200 } 3127 }
3201 ClearPageChecked(page); 3128 ClearPageChecked(page);
3202 set_page_dirty(page); 3129 set_page_dirty(page);
3203 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 3130 unlock_extent_cached(io_tree, page_start, page_end, &cached_state,
3131 GFP_NOFS);
3204 3132
3205out_unlock: 3133out_unlock:
3206 if (ret) 3134 if (ret)
@@ -3218,6 +3146,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
3218 struct btrfs_root *root = BTRFS_I(inode)->root; 3146 struct btrfs_root *root = BTRFS_I(inode)->root;
3219 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 3147 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
3220 struct extent_map *em; 3148 struct extent_map *em;
3149 struct extent_state *cached_state = NULL;
3221 u64 mask = root->sectorsize - 1; 3150 u64 mask = root->sectorsize - 1;
3222 u64 hole_start = (inode->i_size + mask) & ~mask; 3151 u64 hole_start = (inode->i_size + mask) & ~mask;
3223 u64 block_end = (size + mask) & ~mask; 3152 u64 block_end = (size + mask) & ~mask;
@@ -3233,11 +3162,13 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
3233 struct btrfs_ordered_extent *ordered; 3162 struct btrfs_ordered_extent *ordered;
3234 btrfs_wait_ordered_range(inode, hole_start, 3163 btrfs_wait_ordered_range(inode, hole_start,
3235 block_end - hole_start); 3164 block_end - hole_start);
3236 lock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS); 3165 lock_extent_bits(io_tree, hole_start, block_end - 1, 0,
3166 &cached_state, GFP_NOFS);
3237 ordered = btrfs_lookup_ordered_extent(inode, hole_start); 3167 ordered = btrfs_lookup_ordered_extent(inode, hole_start);
3238 if (!ordered) 3168 if (!ordered)
3239 break; 3169 break;
3240 unlock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS); 3170 unlock_extent_cached(io_tree, hole_start, block_end - 1,
3171 &cached_state, GFP_NOFS);
3241 btrfs_put_ordered_extent(ordered); 3172 btrfs_put_ordered_extent(ordered);
3242 } 3173 }
3243 3174
@@ -3282,7 +3213,8 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
3282 break; 3213 break;
3283 } 3214 }
3284 3215
3285 unlock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS); 3216 unlock_extent_cached(io_tree, hole_start, block_end - 1, &cached_state,
3217 GFP_NOFS);
3286 return err; 3218 return err;
3287} 3219}
3288 3220
@@ -3680,6 +3612,7 @@ static noinline void init_btrfs_i(struct inode *inode)
3680 bi->index_cnt = (u64)-1; 3612 bi->index_cnt = (u64)-1;
3681 bi->last_unlink_trans = 0; 3613 bi->last_unlink_trans = 0;
3682 bi->ordered_data_close = 0; 3614 bi->ordered_data_close = 0;
3615 bi->force_compress = 0;
3683 extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS); 3616 extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS);
3684 extent_io_tree_init(&BTRFS_I(inode)->io_tree, 3617 extent_io_tree_init(&BTRFS_I(inode)->io_tree,
3685 inode->i_mapping, GFP_NOFS); 3618 inode->i_mapping, GFP_NOFS);
@@ -3728,7 +3661,7 @@ static struct inode *btrfs_iget_locked(struct super_block *s,
3728 * Returns in *is_new if the inode was read from disk 3661 * Returns in *is_new if the inode was read from disk
3729 */ 3662 */
3730struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, 3663struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
3731 struct btrfs_root *root) 3664 struct btrfs_root *root, int *new)
3732{ 3665{
3733 struct inode *inode; 3666 struct inode *inode;
3734 3667
@@ -3743,6 +3676,8 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
3743 3676
3744 inode_tree_add(inode); 3677 inode_tree_add(inode);
3745 unlock_new_inode(inode); 3678 unlock_new_inode(inode);
3679 if (new)
3680 *new = 1;
3746 } 3681 }
3747 3682
3748 return inode; 3683 return inode;
@@ -3795,7 +3730,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
3795 return NULL; 3730 return NULL;
3796 3731
3797 if (location.type == BTRFS_INODE_ITEM_KEY) { 3732 if (location.type == BTRFS_INODE_ITEM_KEY) {
3798 inode = btrfs_iget(dir->i_sb, &location, root); 3733 inode = btrfs_iget(dir->i_sb, &location, root, NULL);
3799 return inode; 3734 return inode;
3800 } 3735 }
3801 3736
@@ -3810,7 +3745,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
3810 else 3745 else
3811 inode = new_simple_dir(dir->i_sb, &location, sub_root); 3746 inode = new_simple_dir(dir->i_sb, &location, sub_root);
3812 } else { 3747 } else {
3813 inode = btrfs_iget(dir->i_sb, &location, sub_root); 3748 inode = btrfs_iget(dir->i_sb, &location, sub_root, NULL);
3814 } 3749 }
3815 srcu_read_unlock(&root->fs_info->subvol_srcu, index); 3750 srcu_read_unlock(&root->fs_info->subvol_srcu, index);
3816 3751
@@ -3995,7 +3930,11 @@ skip:
3995 3930
3996 /* Reached end of directory/root. Bump pos past the last item. */ 3931 /* Reached end of directory/root. Bump pos past the last item. */
3997 if (key_type == BTRFS_DIR_INDEX_KEY) 3932 if (key_type == BTRFS_DIR_INDEX_KEY)
3998 filp->f_pos = INT_LIMIT(off_t); 3933 /*
3934 * 32-bit glibc will use getdents64, but then strtol -
3935 * so the last number we can serve is this.
3936 */
3937 filp->f_pos = 0x7fffffff;
3999 else 3938 else
4000 filp->f_pos++; 3939 filp->f_pos++;
4001nopos: 3940nopos:
@@ -4005,7 +3944,7 @@ err:
4005 return ret; 3944 return ret;
4006} 3945}
4007 3946
4008int btrfs_write_inode(struct inode *inode, int wait) 3947int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
4009{ 3948{
4010 struct btrfs_root *root = BTRFS_I(inode)->root; 3949 struct btrfs_root *root = BTRFS_I(inode)->root;
4011 struct btrfs_trans_handle *trans; 3950 struct btrfs_trans_handle *trans;
@@ -4014,7 +3953,7 @@ int btrfs_write_inode(struct inode *inode, int wait)
4014 if (root->fs_info->btree_inode == inode) 3953 if (root->fs_info->btree_inode == inode)
4015 return 0; 3954 return 0;
4016 3955
4017 if (wait) { 3956 if (wbc->sync_mode == WB_SYNC_ALL) {
4018 trans = btrfs_join_transaction(root, 1); 3957 trans = btrfs_join_transaction(root, 1);
4019 btrfs_set_trans_block_group(trans, inode); 3958 btrfs_set_trans_block_group(trans, inode);
4020 ret = btrfs_commit_transaction(trans, root); 3959 ret = btrfs_commit_transaction(trans, root);
@@ -4538,7 +4477,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
4538 err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); 4477 err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
4539 if (err) { 4478 if (err) {
4540 err = -ENOSPC; 4479 err = -ENOSPC;
4541 goto out_unlock; 4480 goto out_fail;
4542 } 4481 }
4543 4482
4544 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, 4483 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
@@ -5016,6 +4955,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
5016{ 4955{
5017 struct extent_io_tree *tree; 4956 struct extent_io_tree *tree;
5018 struct btrfs_ordered_extent *ordered; 4957 struct btrfs_ordered_extent *ordered;
4958 struct extent_state *cached_state = NULL;
5019 u64 page_start = page_offset(page); 4959 u64 page_start = page_offset(page);
5020 u64 page_end = page_start + PAGE_CACHE_SIZE - 1; 4960 u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
5021 4961
@@ -5034,7 +4974,8 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
5034 btrfs_releasepage(page, GFP_NOFS); 4974 btrfs_releasepage(page, GFP_NOFS);
5035 return; 4975 return;
5036 } 4976 }
5037 lock_extent(tree, page_start, page_end, GFP_NOFS); 4977 lock_extent_bits(tree, page_start, page_end, 0, &cached_state,
4978 GFP_NOFS);
5038 ordered = btrfs_lookup_ordered_extent(page->mapping->host, 4979 ordered = btrfs_lookup_ordered_extent(page->mapping->host,
5039 page_offset(page)); 4980 page_offset(page));
5040 if (ordered) { 4981 if (ordered) {
@@ -5045,7 +4986,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
5045 clear_extent_bit(tree, page_start, page_end, 4986 clear_extent_bit(tree, page_start, page_end,
5046 EXTENT_DIRTY | EXTENT_DELALLOC | 4987 EXTENT_DIRTY | EXTENT_DELALLOC |
5047 EXTENT_LOCKED | EXTENT_DO_ACCOUNTING, 1, 0, 4988 EXTENT_LOCKED | EXTENT_DO_ACCOUNTING, 1, 0,
5048 NULL, GFP_NOFS); 4989 &cached_state, GFP_NOFS);
5049 /* 4990 /*
5050 * whoever cleared the private bit is responsible 4991 * whoever cleared the private bit is responsible
5051 * for the finish_ordered_io 4992 * for the finish_ordered_io
@@ -5055,11 +4996,13 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
5055 page_start, page_end); 4996 page_start, page_end);
5056 } 4997 }
5057 btrfs_put_ordered_extent(ordered); 4998 btrfs_put_ordered_extent(ordered);
5058 lock_extent(tree, page_start, page_end, GFP_NOFS); 4999 cached_state = NULL;
5000 lock_extent_bits(tree, page_start, page_end, 0, &cached_state,
5001 GFP_NOFS);
5059 } 5002 }
5060 clear_extent_bit(tree, page_start, page_end, 5003 clear_extent_bit(tree, page_start, page_end,
5061 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC | 5004 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC |
5062 EXTENT_DO_ACCOUNTING, 1, 1, NULL, GFP_NOFS); 5005 EXTENT_DO_ACCOUNTING, 1, 1, &cached_state, GFP_NOFS);
5063 __btrfs_releasepage(page, GFP_NOFS); 5006 __btrfs_releasepage(page, GFP_NOFS);
5064 5007
5065 ClearPageChecked(page); 5008 ClearPageChecked(page);
@@ -5092,6 +5035,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
5092 struct btrfs_root *root = BTRFS_I(inode)->root; 5035 struct btrfs_root *root = BTRFS_I(inode)->root;
5093 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 5036 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
5094 struct btrfs_ordered_extent *ordered; 5037 struct btrfs_ordered_extent *ordered;
5038 struct extent_state *cached_state = NULL;
5095 char *kaddr; 5039 char *kaddr;
5096 unsigned long zero_start; 5040 unsigned long zero_start;
5097 loff_t size; 5041 loff_t size;
@@ -5130,7 +5074,8 @@ again:
5130 } 5074 }
5131 wait_on_page_writeback(page); 5075 wait_on_page_writeback(page);
5132 5076
5133 lock_extent(io_tree, page_start, page_end, GFP_NOFS); 5077 lock_extent_bits(io_tree, page_start, page_end, 0, &cached_state,
5078 GFP_NOFS);
5134 set_page_extent_mapped(page); 5079 set_page_extent_mapped(page);
5135 5080
5136 /* 5081 /*
@@ -5139,7 +5084,8 @@ again:
5139 */ 5084 */
5140 ordered = btrfs_lookup_ordered_extent(inode, page_start); 5085 ordered = btrfs_lookup_ordered_extent(inode, page_start);
5141 if (ordered) { 5086 if (ordered) {
5142 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 5087 unlock_extent_cached(io_tree, page_start, page_end,
5088 &cached_state, GFP_NOFS);
5143 unlock_page(page); 5089 unlock_page(page);
5144 btrfs_start_ordered_extent(inode, ordered, 1); 5090 btrfs_start_ordered_extent(inode, ordered, 1);
5145 btrfs_put_ordered_extent(ordered); 5091 btrfs_put_ordered_extent(ordered);
@@ -5153,13 +5099,15 @@ again:
5153 * is probably a better way to do this, but for now keep consistent with 5099 * is probably a better way to do this, but for now keep consistent with
5154 * prepare_pages in the normal write path. 5100 * prepare_pages in the normal write path.
5155 */ 5101 */
5156 clear_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end, 5102 clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, page_end,
5157 EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING, 5103 EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING,
5158 GFP_NOFS); 5104 0, 0, &cached_state, GFP_NOFS);
5159 5105
5160 ret = btrfs_set_extent_delalloc(inode, page_start, page_end); 5106 ret = btrfs_set_extent_delalloc(inode, page_start, page_end,
5107 &cached_state);
5161 if (ret) { 5108 if (ret) {
5162 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 5109 unlock_extent_cached(io_tree, page_start, page_end,
5110 &cached_state, GFP_NOFS);
5163 ret = VM_FAULT_SIGBUS; 5111 ret = VM_FAULT_SIGBUS;
5164 btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); 5112 btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE);
5165 goto out_unlock; 5113 goto out_unlock;
@@ -5185,7 +5133,7 @@ again:
5185 BTRFS_I(inode)->last_trans = root->fs_info->generation; 5133 BTRFS_I(inode)->last_trans = root->fs_info->generation;
5186 BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid; 5134 BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid;
5187 5135
5188 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 5136 unlock_extent_cached(io_tree, page_start, page_end, &cached_state, GFP_NOFS);
5189 5137
5190out_unlock: 5138out_unlock:
5191 btrfs_unreserve_metadata_for_delalloc(root, inode, 1); 5139 btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
@@ -5390,7 +5338,6 @@ free:
5390void btrfs_drop_inode(struct inode *inode) 5338void btrfs_drop_inode(struct inode *inode)
5391{ 5339{
5392 struct btrfs_root *root = BTRFS_I(inode)->root; 5340 struct btrfs_root *root = BTRFS_I(inode)->root;
5393
5394 if (inode->i_nlink > 0 && btrfs_root_refs(&root->root_item) == 0) 5341 if (inode->i_nlink > 0 && btrfs_root_refs(&root->root_item) == 0)
5395 generic_delete_inode(inode); 5342 generic_delete_inode(inode);
5396 else 5343 else
@@ -5789,22 +5736,20 @@ out_fail:
5789} 5736}
5790 5737
5791static int prealloc_file_range(struct inode *inode, u64 start, u64 end, 5738static int prealloc_file_range(struct inode *inode, u64 start, u64 end,
5792 u64 alloc_hint, int mode) 5739 u64 alloc_hint, int mode, loff_t actual_len)
5793{ 5740{
5794 struct btrfs_trans_handle *trans; 5741 struct btrfs_trans_handle *trans;
5795 struct btrfs_root *root = BTRFS_I(inode)->root; 5742 struct btrfs_root *root = BTRFS_I(inode)->root;
5796 struct btrfs_key ins; 5743 struct btrfs_key ins;
5797 u64 alloc_size;
5798 u64 cur_offset = start; 5744 u64 cur_offset = start;
5799 u64 num_bytes = end - start; 5745 u64 num_bytes = end - start;
5800 int ret = 0; 5746 int ret = 0;
5747 u64 i_size;
5801 5748
5802 while (num_bytes > 0) { 5749 while (num_bytes > 0) {
5803 alloc_size = min(num_bytes, root->fs_info->max_extent);
5804
5805 trans = btrfs_start_transaction(root, 1); 5750 trans = btrfs_start_transaction(root, 1);
5806 5751
5807 ret = btrfs_reserve_extent(trans, root, alloc_size, 5752 ret = btrfs_reserve_extent(trans, root, num_bytes,
5808 root->sectorsize, 0, alloc_hint, 5753 root->sectorsize, 0, alloc_hint,
5809 (u64)-1, &ins, 1); 5754 (u64)-1, &ins, 1);
5810 if (ret) { 5755 if (ret) {
@@ -5835,9 +5780,15 @@ static int prealloc_file_range(struct inode *inode, u64 start, u64 end,
5835 inode->i_ctime = CURRENT_TIME; 5780 inode->i_ctime = CURRENT_TIME;
5836 BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC; 5781 BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC;
5837 if (!(mode & FALLOC_FL_KEEP_SIZE) && 5782 if (!(mode & FALLOC_FL_KEEP_SIZE) &&
5838 cur_offset > inode->i_size) { 5783 (actual_len > inode->i_size) &&
5839 i_size_write(inode, cur_offset); 5784 (cur_offset > inode->i_size)) {
5840 btrfs_ordered_update_i_size(inode, cur_offset, NULL); 5785
5786 if (cur_offset > actual_len)
5787 i_size = actual_len;
5788 else
5789 i_size = cur_offset;
5790 i_size_write(inode, i_size);
5791 btrfs_ordered_update_i_size(inode, i_size, NULL);
5841 } 5792 }
5842 5793
5843 ret = btrfs_update_inode(trans, root, inode); 5794 ret = btrfs_update_inode(trans, root, inode);
@@ -5857,6 +5808,7 @@ stop_trans:
5857static long btrfs_fallocate(struct inode *inode, int mode, 5808static long btrfs_fallocate(struct inode *inode, int mode,
5858 loff_t offset, loff_t len) 5809 loff_t offset, loff_t len)
5859{ 5810{
5811 struct extent_state *cached_state = NULL;
5860 u64 cur_offset; 5812 u64 cur_offset;
5861 u64 last_byte; 5813 u64 last_byte;
5862 u64 alloc_start; 5814 u64 alloc_start;
@@ -5895,16 +5847,17 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5895 /* the extent lock is ordered inside the running 5847 /* the extent lock is ordered inside the running
5896 * transaction 5848 * transaction
5897 */ 5849 */
5898 lock_extent(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, 5850 lock_extent_bits(&BTRFS_I(inode)->io_tree, alloc_start,
5899 GFP_NOFS); 5851 locked_end, 0, &cached_state, GFP_NOFS);
5900 ordered = btrfs_lookup_first_ordered_extent(inode, 5852 ordered = btrfs_lookup_first_ordered_extent(inode,
5901 alloc_end - 1); 5853 alloc_end - 1);
5902 if (ordered && 5854 if (ordered &&
5903 ordered->file_offset + ordered->len > alloc_start && 5855 ordered->file_offset + ordered->len > alloc_start &&
5904 ordered->file_offset < alloc_end) { 5856 ordered->file_offset < alloc_end) {
5905 btrfs_put_ordered_extent(ordered); 5857 btrfs_put_ordered_extent(ordered);
5906 unlock_extent(&BTRFS_I(inode)->io_tree, 5858 unlock_extent_cached(&BTRFS_I(inode)->io_tree,
5907 alloc_start, locked_end, GFP_NOFS); 5859 alloc_start, locked_end,
5860 &cached_state, GFP_NOFS);
5908 /* 5861 /*
5909 * we can't wait on the range with the transaction 5862 * we can't wait on the range with the transaction
5910 * running or with the extent lock held 5863 * running or with the extent lock held
@@ -5930,7 +5883,7 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5930 !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { 5883 !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
5931 ret = prealloc_file_range(inode, 5884 ret = prealloc_file_range(inode,
5932 cur_offset, last_byte, 5885 cur_offset, last_byte,
5933 alloc_hint, mode); 5886 alloc_hint, mode, offset+len);
5934 if (ret < 0) { 5887 if (ret < 0) {
5935 free_extent_map(em); 5888 free_extent_map(em);
5936 break; 5889 break;
@@ -5946,8 +5899,8 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5946 break; 5899 break;
5947 } 5900 }
5948 } 5901 }
5949 unlock_extent(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, 5902 unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
5950 GFP_NOFS); 5903 &cached_state, GFP_NOFS);
5951 5904
5952 btrfs_free_reserved_data_space(BTRFS_I(inode)->root, inode, 5905 btrfs_free_reserved_data_space(BTRFS_I(inode)->root, inode,
5953 alloc_end - alloc_start); 5906 alloc_end - alloc_start);