aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/inode.c')
-rw-r--r--fs/btrfs/inode.c822
1 files changed, 462 insertions, 360 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index b3ad168a0bfc..2bfdc641d4e3 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -36,6 +36,7 @@
36#include <linux/xattr.h> 36#include <linux/xattr.h>
37#include <linux/posix_acl.h> 37#include <linux/posix_acl.h>
38#include <linux/falloc.h> 38#include <linux/falloc.h>
39#include <linux/slab.h>
39#include "compat.h" 40#include "compat.h"
40#include "ctree.h" 41#include "ctree.h"
41#include "disk-io.h" 42#include "disk-io.h"
@@ -88,13 +89,14 @@ static noinline int cow_file_range(struct inode *inode,
88 u64 start, u64 end, int *page_started, 89 u64 start, u64 end, int *page_started,
89 unsigned long *nr_written, int unlock); 90 unsigned long *nr_written, int unlock);
90 91
91static int btrfs_init_inode_security(struct inode *inode, struct inode *dir) 92static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
93 struct inode *inode, struct inode *dir)
92{ 94{
93 int err; 95 int err;
94 96
95 err = btrfs_init_acl(inode, dir); 97 err = btrfs_init_acl(trans, inode, dir);
96 if (!err) 98 if (!err)
97 err = btrfs_xattr_security_init(inode, dir); 99 err = btrfs_xattr_security_init(trans, inode, dir);
98 return err; 100 return err;
99} 101}
100 102
@@ -188,8 +190,18 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
188 btrfs_mark_buffer_dirty(leaf); 190 btrfs_mark_buffer_dirty(leaf);
189 btrfs_free_path(path); 191 btrfs_free_path(path);
190 192
193 /*
194 * we're an inline extent, so nobody can
195 * extend the file past i_size without locking
196 * a page we already have locked.
197 *
198 * We must do any isize and inode updates
199 * before we unlock the pages. Otherwise we
200 * could end up racing with unlink.
201 */
191 BTRFS_I(inode)->disk_i_size = inode->i_size; 202 BTRFS_I(inode)->disk_i_size = inode->i_size;
192 btrfs_update_inode(trans, root, inode); 203 btrfs_update_inode(trans, root, inode);
204
193 return 0; 205 return 0;
194fail: 206fail:
195 btrfs_free_path(path); 207 btrfs_free_path(path);
@@ -230,8 +242,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
230 return 1; 242 return 1;
231 } 243 }
232 244
233 ret = btrfs_drop_extents(trans, root, inode, start, 245 ret = btrfs_drop_extents(trans, inode, start, aligned_end,
234 aligned_end, aligned_end, start,
235 &hint_byte, 1); 246 &hint_byte, 1);
236 BUG_ON(ret); 247 BUG_ON(ret);
237 248
@@ -369,7 +380,8 @@ again:
369 * change at any time if we discover bad compression ratios. 380 * change at any time if we discover bad compression ratios.
370 */ 381 */
371 if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS) && 382 if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS) &&
372 btrfs_test_opt(root, COMPRESS)) { 383 (btrfs_test_opt(root, COMPRESS) ||
384 (BTRFS_I(inode)->force_compress))) {
373 WARN_ON(pages); 385 WARN_ON(pages);
374 pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS); 386 pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS);
375 387
@@ -416,7 +428,6 @@ again:
416 start, end, 428 start, end,
417 total_compressed, pages); 429 total_compressed, pages);
418 } 430 }
419 btrfs_end_transaction(trans, root);
420 if (ret == 0) { 431 if (ret == 0) {
421 /* 432 /*
422 * inline extent creation worked, we don't need 433 * inline extent creation worked, we don't need
@@ -430,9 +441,11 @@ again:
430 EXTENT_CLEAR_DELALLOC | 441 EXTENT_CLEAR_DELALLOC |
431 EXTENT_CLEAR_ACCOUNTING | 442 EXTENT_CLEAR_ACCOUNTING |
432 EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK); 443 EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK);
433 ret = 0; 444
445 btrfs_end_transaction(trans, root);
434 goto free_pages_out; 446 goto free_pages_out;
435 } 447 }
448 btrfs_end_transaction(trans, root);
436 } 449 }
437 450
438 if (will_compress) { 451 if (will_compress) {
@@ -472,7 +485,10 @@ again:
472 nr_pages_ret = 0; 485 nr_pages_ret = 0;
473 486
474 /* flag the file so we don't compress in the future */ 487 /* flag the file so we don't compress in the future */
475 BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS; 488 if (!btrfs_test_opt(root, FORCE_COMPRESS) &&
489 !(BTRFS_I(inode)->force_compress)) {
490 BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS;
491 }
476 } 492 }
477 if (will_compress) { 493 if (will_compress) {
478 *num_added += 1; 494 *num_added += 1;
@@ -543,7 +559,6 @@ static noinline int submit_compressed_extents(struct inode *inode,
543 if (list_empty(&async_cow->extents)) 559 if (list_empty(&async_cow->extents))
544 return 0; 560 return 0;
545 561
546 trans = btrfs_join_transaction(root, 1);
547 562
548 while (!list_empty(&async_cow->extents)) { 563 while (!list_empty(&async_cow->extents)) {
549 async_extent = list_entry(async_cow->extents.next, 564 async_extent = list_entry(async_cow->extents.next,
@@ -559,8 +574,8 @@ retry:
559 unsigned long nr_written = 0; 574 unsigned long nr_written = 0;
560 575
561 lock_extent(io_tree, async_extent->start, 576 lock_extent(io_tree, async_extent->start,
562 async_extent->start + 577 async_extent->start +
563 async_extent->ram_size - 1, GFP_NOFS); 578 async_extent->ram_size - 1, GFP_NOFS);
564 579
565 /* allocate blocks */ 580 /* allocate blocks */
566 ret = cow_file_range(inode, async_cow->locked_page, 581 ret = cow_file_range(inode, async_cow->locked_page,
@@ -590,19 +605,15 @@ retry:
590 lock_extent(io_tree, async_extent->start, 605 lock_extent(io_tree, async_extent->start,
591 async_extent->start + async_extent->ram_size - 1, 606 async_extent->start + async_extent->ram_size - 1,
592 GFP_NOFS); 607 GFP_NOFS);
593 /*
594 * here we're doing allocation and writeback of the
595 * compressed pages
596 */
597 btrfs_drop_extent_cache(inode, async_extent->start,
598 async_extent->start +
599 async_extent->ram_size - 1, 0);
600 608
609 trans = btrfs_join_transaction(root, 1);
601 ret = btrfs_reserve_extent(trans, root, 610 ret = btrfs_reserve_extent(trans, root,
602 async_extent->compressed_size, 611 async_extent->compressed_size,
603 async_extent->compressed_size, 612 async_extent->compressed_size,
604 0, alloc_hint, 613 0, alloc_hint,
605 (u64)-1, &ins, 1); 614 (u64)-1, &ins, 1);
615 btrfs_end_transaction(trans, root);
616
606 if (ret) { 617 if (ret) {
607 int i; 618 int i;
608 for (i = 0; i < async_extent->nr_pages; i++) { 619 for (i = 0; i < async_extent->nr_pages; i++) {
@@ -618,6 +629,14 @@ retry:
618 goto retry; 629 goto retry;
619 } 630 }
620 631
632 /*
633 * here we're doing allocation and writeback of the
634 * compressed pages
635 */
636 btrfs_drop_extent_cache(inode, async_extent->start,
637 async_extent->start +
638 async_extent->ram_size - 1, 0);
639
621 em = alloc_extent_map(GFP_NOFS); 640 em = alloc_extent_map(GFP_NOFS);
622 em->start = async_extent->start; 641 em->start = async_extent->start;
623 em->len = async_extent->ram_size; 642 em->len = async_extent->ram_size;
@@ -649,8 +668,6 @@ retry:
649 BTRFS_ORDERED_COMPRESSED); 668 BTRFS_ORDERED_COMPRESSED);
650 BUG_ON(ret); 669 BUG_ON(ret);
651 670
652 btrfs_end_transaction(trans, root);
653
654 /* 671 /*
655 * clear dirty, set writeback and unlock the pages. 672 * clear dirty, set writeback and unlock the pages.
656 */ 673 */
@@ -672,13 +689,11 @@ retry:
672 async_extent->nr_pages); 689 async_extent->nr_pages);
673 690
674 BUG_ON(ret); 691 BUG_ON(ret);
675 trans = btrfs_join_transaction(root, 1);
676 alloc_hint = ins.objectid + ins.offset; 692 alloc_hint = ins.objectid + ins.offset;
677 kfree(async_extent); 693 kfree(async_extent);
678 cond_resched(); 694 cond_resched();
679 } 695 }
680 696
681 btrfs_end_transaction(trans, root);
682 return 0; 697 return 0;
683} 698}
684 699
@@ -742,6 +757,7 @@ static noinline int cow_file_range(struct inode *inode,
742 EXTENT_CLEAR_DIRTY | 757 EXTENT_CLEAR_DIRTY |
743 EXTENT_SET_WRITEBACK | 758 EXTENT_SET_WRITEBACK |
744 EXTENT_END_WRITEBACK); 759 EXTENT_END_WRITEBACK);
760
745 *nr_written = *nr_written + 761 *nr_written = *nr_written +
746 (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE; 762 (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE;
747 *page_started = 1; 763 *page_started = 1;
@@ -781,7 +797,7 @@ static noinline int cow_file_range(struct inode *inode,
781 while (disk_num_bytes > 0) { 797 while (disk_num_bytes > 0) {
782 unsigned long op; 798 unsigned long op;
783 799
784 cur_alloc_size = min(disk_num_bytes, root->fs_info->max_extent); 800 cur_alloc_size = disk_num_bytes;
785 ret = btrfs_reserve_extent(trans, root, cur_alloc_size, 801 ret = btrfs_reserve_extent(trans, root, cur_alloc_size,
786 root->sectorsize, 0, alloc_hint, 802 root->sectorsize, 0, alloc_hint,
787 (u64)-1, &ins, 1); 803 (u64)-1, &ins, 1);
@@ -1199,7 +1215,8 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page,
1199 else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC) 1215 else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC)
1200 ret = run_delalloc_nocow(inode, locked_page, start, end, 1216 ret = run_delalloc_nocow(inode, locked_page, start, end,
1201 page_started, 0, nr_written); 1217 page_started, 0, nr_written);
1202 else if (!btrfs_test_opt(root, COMPRESS)) 1218 else if (!btrfs_test_opt(root, COMPRESS) &&
1219 !(BTRFS_I(inode)->force_compress))
1203 ret = cow_file_range(inode, locked_page, start, end, 1220 ret = cow_file_range(inode, locked_page, start, end,
1204 page_started, nr_written, 1); 1221 page_started, nr_written, 1);
1205 else 1222 else
@@ -1211,30 +1228,9 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page,
1211static int btrfs_split_extent_hook(struct inode *inode, 1228static int btrfs_split_extent_hook(struct inode *inode,
1212 struct extent_state *orig, u64 split) 1229 struct extent_state *orig, u64 split)
1213{ 1230{
1214 struct btrfs_root *root = BTRFS_I(inode)->root;
1215 u64 size;
1216
1217 if (!(orig->state & EXTENT_DELALLOC)) 1231 if (!(orig->state & EXTENT_DELALLOC))
1218 return 0; 1232 return 0;
1219 1233
1220 size = orig->end - orig->start + 1;
1221 if (size > root->fs_info->max_extent) {
1222 u64 num_extents;
1223 u64 new_size;
1224
1225 new_size = orig->end - split + 1;
1226 num_extents = div64_u64(size + root->fs_info->max_extent - 1,
1227 root->fs_info->max_extent);
1228
1229 /*
1230 * if we break a large extent up then leave oustanding_extents
1231 * be, since we've already accounted for the large extent.
1232 */
1233 if (div64_u64(new_size + root->fs_info->max_extent - 1,
1234 root->fs_info->max_extent) < num_extents)
1235 return 0;
1236 }
1237
1238 spin_lock(&BTRFS_I(inode)->accounting_lock); 1234 spin_lock(&BTRFS_I(inode)->accounting_lock);
1239 BTRFS_I(inode)->outstanding_extents++; 1235 BTRFS_I(inode)->outstanding_extents++;
1240 spin_unlock(&BTRFS_I(inode)->accounting_lock); 1236 spin_unlock(&BTRFS_I(inode)->accounting_lock);
@@ -1252,38 +1248,10 @@ static int btrfs_merge_extent_hook(struct inode *inode,
1252 struct extent_state *new, 1248 struct extent_state *new,
1253 struct extent_state *other) 1249 struct extent_state *other)
1254{ 1250{
1255 struct btrfs_root *root = BTRFS_I(inode)->root;
1256 u64 new_size, old_size;
1257 u64 num_extents;
1258
1259 /* not delalloc, ignore it */ 1251 /* not delalloc, ignore it */
1260 if (!(other->state & EXTENT_DELALLOC)) 1252 if (!(other->state & EXTENT_DELALLOC))
1261 return 0; 1253 return 0;
1262 1254
1263 old_size = other->end - other->start + 1;
1264 if (new->start < other->start)
1265 new_size = other->end - new->start + 1;
1266 else
1267 new_size = new->end - other->start + 1;
1268
1269 /* we're not bigger than the max, unreserve the space and go */
1270 if (new_size <= root->fs_info->max_extent) {
1271 spin_lock(&BTRFS_I(inode)->accounting_lock);
1272 BTRFS_I(inode)->outstanding_extents--;
1273 spin_unlock(&BTRFS_I(inode)->accounting_lock);
1274 return 0;
1275 }
1276
1277 /*
1278 * If we grew by another max_extent, just return, we want to keep that
1279 * reserved amount.
1280 */
1281 num_extents = div64_u64(old_size + root->fs_info->max_extent - 1,
1282 root->fs_info->max_extent);
1283 if (div64_u64(new_size + root->fs_info->max_extent - 1,
1284 root->fs_info->max_extent) > num_extents)
1285 return 0;
1286
1287 spin_lock(&BTRFS_I(inode)->accounting_lock); 1255 spin_lock(&BTRFS_I(inode)->accounting_lock);
1288 BTRFS_I(inode)->outstanding_extents--; 1256 BTRFS_I(inode)->outstanding_extents--;
1289 spin_unlock(&BTRFS_I(inode)->accounting_lock); 1257 spin_unlock(&BTRFS_I(inode)->accounting_lock);
@@ -1312,6 +1280,7 @@ static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end,
1312 BTRFS_I(inode)->outstanding_extents++; 1280 BTRFS_I(inode)->outstanding_extents++;
1313 spin_unlock(&BTRFS_I(inode)->accounting_lock); 1281 spin_unlock(&BTRFS_I(inode)->accounting_lock);
1314 btrfs_delalloc_reserve_space(root, inode, end - start + 1); 1282 btrfs_delalloc_reserve_space(root, inode, end - start + 1);
1283
1315 spin_lock(&root->fs_info->delalloc_lock); 1284 spin_lock(&root->fs_info->delalloc_lock);
1316 BTRFS_I(inode)->delalloc_bytes += end - start + 1; 1285 BTRFS_I(inode)->delalloc_bytes += end - start + 1;
1317 root->fs_info->delalloc_bytes += end - start + 1; 1286 root->fs_info->delalloc_bytes += end - start + 1;
@@ -1340,6 +1309,7 @@ static int btrfs_clear_bit_hook(struct inode *inode,
1340 1309
1341 if (bits & EXTENT_DO_ACCOUNTING) { 1310 if (bits & EXTENT_DO_ACCOUNTING) {
1342 spin_lock(&BTRFS_I(inode)->accounting_lock); 1311 spin_lock(&BTRFS_I(inode)->accounting_lock);
1312 WARN_ON(!BTRFS_I(inode)->outstanding_extents);
1343 BTRFS_I(inode)->outstanding_extents--; 1313 BTRFS_I(inode)->outstanding_extents--;
1344 spin_unlock(&BTRFS_I(inode)->accounting_lock); 1314 spin_unlock(&BTRFS_I(inode)->accounting_lock);
1345 btrfs_unreserve_metadata_for_delalloc(root, inode, 1); 1315 btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
@@ -1496,12 +1466,13 @@ static noinline int add_pending_csums(struct btrfs_trans_handle *trans,
1496 return 0; 1466 return 0;
1497} 1467}
1498 1468
1499int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end) 1469int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
1470 struct extent_state **cached_state)
1500{ 1471{
1501 if ((end & (PAGE_CACHE_SIZE - 1)) == 0) 1472 if ((end & (PAGE_CACHE_SIZE - 1)) == 0)
1502 WARN_ON(1); 1473 WARN_ON(1);
1503 return set_extent_delalloc(&BTRFS_I(inode)->io_tree, start, end, 1474 return set_extent_delalloc(&BTRFS_I(inode)->io_tree, start, end,
1504 GFP_NOFS); 1475 cached_state, GFP_NOFS);
1505} 1476}
1506 1477
1507/* see btrfs_writepage_start_hook for details on why this is required */ 1478/* see btrfs_writepage_start_hook for details on why this is required */
@@ -1514,6 +1485,7 @@ static void btrfs_writepage_fixup_worker(struct btrfs_work *work)
1514{ 1485{
1515 struct btrfs_writepage_fixup *fixup; 1486 struct btrfs_writepage_fixup *fixup;
1516 struct btrfs_ordered_extent *ordered; 1487 struct btrfs_ordered_extent *ordered;
1488 struct extent_state *cached_state = NULL;
1517 struct page *page; 1489 struct page *page;
1518 struct inode *inode; 1490 struct inode *inode;
1519 u64 page_start; 1491 u64 page_start;
@@ -1532,7 +1504,8 @@ again:
1532 page_start = page_offset(page); 1504 page_start = page_offset(page);
1533 page_end = page_offset(page) + PAGE_CACHE_SIZE - 1; 1505 page_end = page_offset(page) + PAGE_CACHE_SIZE - 1;
1534 1506
1535 lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS); 1507 lock_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end, 0,
1508 &cached_state, GFP_NOFS);
1536 1509
1537 /* already ordered? We're done */ 1510 /* already ordered? We're done */
1538 if (PagePrivate2(page)) 1511 if (PagePrivate2(page))
@@ -1540,17 +1513,18 @@ again:
1540 1513
1541 ordered = btrfs_lookup_ordered_extent(inode, page_start); 1514 ordered = btrfs_lookup_ordered_extent(inode, page_start);
1542 if (ordered) { 1515 if (ordered) {
1543 unlock_extent(&BTRFS_I(inode)->io_tree, page_start, 1516 unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start,
1544 page_end, GFP_NOFS); 1517 page_end, &cached_state, GFP_NOFS);
1545 unlock_page(page); 1518 unlock_page(page);
1546 btrfs_start_ordered_extent(inode, ordered, 1); 1519 btrfs_start_ordered_extent(inode, ordered, 1);
1547 goto again; 1520 goto again;
1548 } 1521 }
1549 1522
1550 btrfs_set_extent_delalloc(inode, page_start, page_end); 1523 btrfs_set_extent_delalloc(inode, page_start, page_end, &cached_state);
1551 ClearPageChecked(page); 1524 ClearPageChecked(page);
1552out: 1525out:
1553 unlock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS); 1526 unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, page_end,
1527 &cached_state, GFP_NOFS);
1554out_page: 1528out_page:
1555 unlock_page(page); 1529 unlock_page(page);
1556 page_cache_release(page); 1530 page_cache_release(page);
@@ -1596,7 +1570,6 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
1596 struct inode *inode, u64 file_pos, 1570 struct inode *inode, u64 file_pos,
1597 u64 disk_bytenr, u64 disk_num_bytes, 1571 u64 disk_bytenr, u64 disk_num_bytes,
1598 u64 num_bytes, u64 ram_bytes, 1572 u64 num_bytes, u64 ram_bytes,
1599 u64 locked_end,
1600 u8 compression, u8 encryption, 1573 u8 compression, u8 encryption,
1601 u16 other_encoding, int extent_type) 1574 u16 other_encoding, int extent_type)
1602{ 1575{
@@ -1622,9 +1595,8 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
1622 * the caller is expected to unpin it and allow it to be merged 1595 * the caller is expected to unpin it and allow it to be merged
1623 * with the others. 1596 * with the others.
1624 */ 1597 */
1625 ret = btrfs_drop_extents(trans, root, inode, file_pos, 1598 ret = btrfs_drop_extents(trans, inode, file_pos, file_pos + num_bytes,
1626 file_pos + num_bytes, locked_end, 1599 &hint, 0);
1627 file_pos, &hint, 0);
1628 BUG_ON(ret); 1600 BUG_ON(ret);
1629 1601
1630 ins.objectid = inode->i_ino; 1602 ins.objectid = inode->i_ino;
@@ -1671,24 +1643,6 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
1671 * before we start the transaction. It limits the amount of btree 1643 * before we start the transaction. It limits the amount of btree
1672 * reads required while inside the transaction. 1644 * reads required while inside the transaction.
1673 */ 1645 */
1674static noinline void reada_csum(struct btrfs_root *root,
1675 struct btrfs_path *path,
1676 struct btrfs_ordered_extent *ordered_extent)
1677{
1678 struct btrfs_ordered_sum *sum;
1679 u64 bytenr;
1680
1681 sum = list_entry(ordered_extent->list.next, struct btrfs_ordered_sum,
1682 list);
1683 bytenr = sum->sums[0].bytenr;
1684
1685 /*
1686 * we don't care about the results, the point of this search is
1687 * just to get the btree leaves into ram
1688 */
1689 btrfs_lookup_csum(NULL, root->fs_info->csum_root, path, bytenr, 0);
1690}
1691
1692/* as ordered data IO finishes, this gets called so we can finish 1646/* as ordered data IO finishes, this gets called so we can finish
1693 * an ordered extent if the range of bytes in the file it covers are 1647 * an ordered extent if the range of bytes in the file it covers are
1694 * fully written. 1648 * fully written.
@@ -1699,54 +1653,39 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1699 struct btrfs_trans_handle *trans; 1653 struct btrfs_trans_handle *trans;
1700 struct btrfs_ordered_extent *ordered_extent = NULL; 1654 struct btrfs_ordered_extent *ordered_extent = NULL;
1701 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 1655 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
1702 struct btrfs_path *path; 1656 struct extent_state *cached_state = NULL;
1703 int compressed = 0; 1657 int compressed = 0;
1704 int ret; 1658 int ret;
1705 1659
1706 ret = btrfs_dec_test_ordered_pending(inode, start, end - start + 1); 1660 ret = btrfs_dec_test_ordered_pending(inode, &ordered_extent, start,
1661 end - start + 1);
1707 if (!ret) 1662 if (!ret)
1708 return 0; 1663 return 0;
1664 BUG_ON(!ordered_extent);
1709 1665
1710 /* 1666 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
1711 * before we join the transaction, try to do some of our IO. 1667 BUG_ON(!list_empty(&ordered_extent->list));
1712 * This will limit the amount of IO that we have to do with 1668 ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
1713 * the transaction running. We're unlikely to need to do any 1669 if (!ret) {
1714 * IO if the file extents are new, the disk_i_size checks 1670 trans = btrfs_join_transaction(root, 1);
1715 * covers the most common case. 1671 ret = btrfs_update_inode(trans, root, inode);
1716 */ 1672 BUG_ON(ret);
1717 if (start < BTRFS_I(inode)->disk_i_size) { 1673 btrfs_end_transaction(trans, root);
1718 path = btrfs_alloc_path();
1719 if (path) {
1720 ret = btrfs_lookup_file_extent(NULL, root, path,
1721 inode->i_ino,
1722 start, 0);
1723 ordered_extent = btrfs_lookup_ordered_extent(inode,
1724 start);
1725 if (!list_empty(&ordered_extent->list)) {
1726 btrfs_release_path(root, path);
1727 reada_csum(root, path, ordered_extent);
1728 }
1729 btrfs_free_path(path);
1730 } 1674 }
1675 goto out;
1731 } 1676 }
1732 1677
1733 trans = btrfs_join_transaction(root, 1); 1678 lock_extent_bits(io_tree, ordered_extent->file_offset,
1734 1679 ordered_extent->file_offset + ordered_extent->len - 1,
1735 if (!ordered_extent) 1680 0, &cached_state, GFP_NOFS);
1736 ordered_extent = btrfs_lookup_ordered_extent(inode, start);
1737 BUG_ON(!ordered_extent);
1738 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags))
1739 goto nocow;
1740 1681
1741 lock_extent(io_tree, ordered_extent->file_offset, 1682 trans = btrfs_join_transaction(root, 1);
1742 ordered_extent->file_offset + ordered_extent->len - 1,
1743 GFP_NOFS);
1744 1683
1745 if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) 1684 if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags))
1746 compressed = 1; 1685 compressed = 1;
1747 if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) { 1686 if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) {
1748 BUG_ON(compressed); 1687 BUG_ON(compressed);
1749 ret = btrfs_mark_extent_written(trans, root, inode, 1688 ret = btrfs_mark_extent_written(trans, inode,
1750 ordered_extent->file_offset, 1689 ordered_extent->file_offset,
1751 ordered_extent->file_offset + 1690 ordered_extent->file_offset +
1752 ordered_extent->len); 1691 ordered_extent->len);
@@ -1758,8 +1697,6 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1758 ordered_extent->disk_len, 1697 ordered_extent->disk_len,
1759 ordered_extent->len, 1698 ordered_extent->len,
1760 ordered_extent->len, 1699 ordered_extent->len,
1761 ordered_extent->file_offset +
1762 ordered_extent->len,
1763 compressed, 0, 0, 1700 compressed, 0, 0,
1764 BTRFS_FILE_EXTENT_REG); 1701 BTRFS_FILE_EXTENT_REG);
1765 unpin_extent_cache(&BTRFS_I(inode)->extent_tree, 1702 unpin_extent_cache(&BTRFS_I(inode)->extent_tree,
@@ -1767,25 +1704,24 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1767 ordered_extent->len); 1704 ordered_extent->len);
1768 BUG_ON(ret); 1705 BUG_ON(ret);
1769 } 1706 }
1770 unlock_extent(io_tree, ordered_extent->file_offset, 1707 unlock_extent_cached(io_tree, ordered_extent->file_offset,
1771 ordered_extent->file_offset + ordered_extent->len - 1, 1708 ordered_extent->file_offset +
1772 GFP_NOFS); 1709 ordered_extent->len - 1, &cached_state, GFP_NOFS);
1773nocow: 1710
1774 add_pending_csums(trans, inode, ordered_extent->file_offset, 1711 add_pending_csums(trans, inode, ordered_extent->file_offset,
1775 &ordered_extent->list); 1712 &ordered_extent->list);
1776 1713
1777 mutex_lock(&BTRFS_I(inode)->extent_mutex); 1714 /* this also removes the ordered extent from the tree */
1778 btrfs_ordered_update_i_size(inode, ordered_extent); 1715 btrfs_ordered_update_i_size(inode, 0, ordered_extent);
1779 btrfs_update_inode(trans, root, inode); 1716 ret = btrfs_update_inode(trans, root, inode);
1780 btrfs_remove_ordered_extent(inode, ordered_extent); 1717 BUG_ON(ret);
1781 mutex_unlock(&BTRFS_I(inode)->extent_mutex); 1718 btrfs_end_transaction(trans, root);
1782 1719out:
1783 /* once for us */ 1720 /* once for us */
1784 btrfs_put_ordered_extent(ordered_extent); 1721 btrfs_put_ordered_extent(ordered_extent);
1785 /* once for the tree */ 1722 /* once for the tree */
1786 btrfs_put_ordered_extent(ordered_extent); 1723 btrfs_put_ordered_extent(ordered_extent);
1787 1724
1788 btrfs_end_transaction(trans, root);
1789 return 0; 1725 return 0;
1790} 1726}
1791 1727
@@ -2008,6 +1944,54 @@ zeroit:
2008 return -EIO; 1944 return -EIO;
2009} 1945}
2010 1946
1947struct delayed_iput {
1948 struct list_head list;
1949 struct inode *inode;
1950};
1951
1952void btrfs_add_delayed_iput(struct inode *inode)
1953{
1954 struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
1955 struct delayed_iput *delayed;
1956
1957 if (atomic_add_unless(&inode->i_count, -1, 1))
1958 return;
1959
1960 delayed = kmalloc(sizeof(*delayed), GFP_NOFS | __GFP_NOFAIL);
1961 delayed->inode = inode;
1962
1963 spin_lock(&fs_info->delayed_iput_lock);
1964 list_add_tail(&delayed->list, &fs_info->delayed_iputs);
1965 spin_unlock(&fs_info->delayed_iput_lock);
1966}
1967
1968void btrfs_run_delayed_iputs(struct btrfs_root *root)
1969{
1970 LIST_HEAD(list);
1971 struct btrfs_fs_info *fs_info = root->fs_info;
1972 struct delayed_iput *delayed;
1973 int empty;
1974
1975 spin_lock(&fs_info->delayed_iput_lock);
1976 empty = list_empty(&fs_info->delayed_iputs);
1977 spin_unlock(&fs_info->delayed_iput_lock);
1978 if (empty)
1979 return;
1980
1981 down_read(&root->fs_info->cleanup_work_sem);
1982 spin_lock(&fs_info->delayed_iput_lock);
1983 list_splice_init(&fs_info->delayed_iputs, &list);
1984 spin_unlock(&fs_info->delayed_iput_lock);
1985
1986 while (!list_empty(&list)) {
1987 delayed = list_entry(list.next, struct delayed_iput, list);
1988 list_del(&delayed->list);
1989 iput(delayed->inode);
1990 kfree(delayed);
1991 }
1992 up_read(&root->fs_info->cleanup_work_sem);
1993}
1994
2011/* 1995/*
2012 * This creates an orphan entry for the given inode in case something goes 1996 * This creates an orphan entry for the given inode in case something goes
2013 * wrong in the middle of an unlink/truncate. 1997 * wrong in the middle of an unlink/truncate.
@@ -2080,16 +2064,17 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
2080 struct inode *inode; 2064 struct inode *inode;
2081 int ret = 0, nr_unlink = 0, nr_truncate = 0; 2065 int ret = 0, nr_unlink = 0, nr_truncate = 0;
2082 2066
2083 path = btrfs_alloc_path(); 2067 if (!xchg(&root->clean_orphans, 0))
2084 if (!path)
2085 return; 2068 return;
2069
2070 path = btrfs_alloc_path();
2071 BUG_ON(!path);
2086 path->reada = -1; 2072 path->reada = -1;
2087 2073
2088 key.objectid = BTRFS_ORPHAN_OBJECTID; 2074 key.objectid = BTRFS_ORPHAN_OBJECTID;
2089 btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY); 2075 btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY);
2090 key.offset = (u64)-1; 2076 key.offset = (u64)-1;
2091 2077
2092
2093 while (1) { 2078 while (1) {
2094 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 2079 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2095 if (ret < 0) { 2080 if (ret < 0) {
@@ -2131,7 +2116,7 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
2131 found_key.objectid = found_key.offset; 2116 found_key.objectid = found_key.offset;
2132 found_key.type = BTRFS_INODE_ITEM_KEY; 2117 found_key.type = BTRFS_INODE_ITEM_KEY;
2133 found_key.offset = 0; 2118 found_key.offset = 0;
2134 inode = btrfs_iget(root->fs_info->sb, &found_key, root); 2119 inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL);
2135 if (IS_ERR(inode)) 2120 if (IS_ERR(inode))
2136 break; 2121 break;
2137 2122
@@ -2834,37 +2819,40 @@ out:
2834 * min_type is the minimum key type to truncate down to. If set to 0, this 2819 * min_type is the minimum key type to truncate down to. If set to 0, this
2835 * will kill all the items on this inode, including the INODE_ITEM_KEY. 2820 * will kill all the items on this inode, including the INODE_ITEM_KEY.
2836 */ 2821 */
2837noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, 2822int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
2838 struct btrfs_root *root, 2823 struct btrfs_root *root,
2839 struct inode *inode, 2824 struct inode *inode,
2840 u64 new_size, u32 min_type) 2825 u64 new_size, u32 min_type)
2841{ 2826{
2842 int ret;
2843 struct btrfs_path *path; 2827 struct btrfs_path *path;
2844 struct btrfs_key key;
2845 struct btrfs_key found_key;
2846 u32 found_type = (u8)-1;
2847 struct extent_buffer *leaf; 2828 struct extent_buffer *leaf;
2848 struct btrfs_file_extent_item *fi; 2829 struct btrfs_file_extent_item *fi;
2830 struct btrfs_key key;
2831 struct btrfs_key found_key;
2849 u64 extent_start = 0; 2832 u64 extent_start = 0;
2850 u64 extent_num_bytes = 0; 2833 u64 extent_num_bytes = 0;
2851 u64 extent_offset = 0; 2834 u64 extent_offset = 0;
2852 u64 item_end = 0; 2835 u64 item_end = 0;
2836 u64 mask = root->sectorsize - 1;
2837 u32 found_type = (u8)-1;
2853 int found_extent; 2838 int found_extent;
2854 int del_item; 2839 int del_item;
2855 int pending_del_nr = 0; 2840 int pending_del_nr = 0;
2856 int pending_del_slot = 0; 2841 int pending_del_slot = 0;
2857 int extent_type = -1; 2842 int extent_type = -1;
2858 int encoding; 2843 int encoding;
2859 u64 mask = root->sectorsize - 1; 2844 int ret;
2845 int err = 0;
2846
2847 BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY);
2860 2848
2861 if (root->ref_cows) 2849 if (root->ref_cows)
2862 btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0); 2850 btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0);
2851
2863 path = btrfs_alloc_path(); 2852 path = btrfs_alloc_path();
2864 BUG_ON(!path); 2853 BUG_ON(!path);
2865 path->reada = -1; 2854 path->reada = -1;
2866 2855
2867 /* FIXME, add redo link to tree so we don't leak on crash */
2868 key.objectid = inode->i_ino; 2856 key.objectid = inode->i_ino;
2869 key.offset = (u64)-1; 2857 key.offset = (u64)-1;
2870 key.type = (u8)-1; 2858 key.type = (u8)-1;
@@ -2872,17 +2860,17 @@ noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
2872search_again: 2860search_again:
2873 path->leave_spinning = 1; 2861 path->leave_spinning = 1;
2874 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 2862 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2875 if (ret < 0) 2863 if (ret < 0) {
2876 goto error; 2864 err = ret;
2865 goto out;
2866 }
2877 2867
2878 if (ret > 0) { 2868 if (ret > 0) {
2879 /* there are no items in the tree for us to truncate, we're 2869 /* there are no items in the tree for us to truncate, we're
2880 * done 2870 * done
2881 */ 2871 */
2882 if (path->slots[0] == 0) { 2872 if (path->slots[0] == 0)
2883 ret = 0; 2873 goto out;
2884 goto error;
2885 }
2886 path->slots[0]--; 2874 path->slots[0]--;
2887 } 2875 }
2888 2876
@@ -2917,28 +2905,17 @@ search_again:
2917 } 2905 }
2918 item_end--; 2906 item_end--;
2919 } 2907 }
2920 if (item_end < new_size) { 2908 if (found_type > min_type) {
2921 if (found_type == BTRFS_DIR_ITEM_KEY) 2909 del_item = 1;
2922 found_type = BTRFS_INODE_ITEM_KEY; 2910 } else {
2923 else if (found_type == BTRFS_EXTENT_ITEM_KEY) 2911 if (item_end < new_size)
2924 found_type = BTRFS_EXTENT_DATA_KEY;
2925 else if (found_type == BTRFS_EXTENT_DATA_KEY)
2926 found_type = BTRFS_XATTR_ITEM_KEY;
2927 else if (found_type == BTRFS_XATTR_ITEM_KEY)
2928 found_type = BTRFS_INODE_REF_KEY;
2929 else if (found_type)
2930 found_type--;
2931 else
2932 break; 2912 break;
2933 btrfs_set_key_type(&key, found_type); 2913 if (found_key.offset >= new_size)
2934 goto next; 2914 del_item = 1;
2915 else
2916 del_item = 0;
2935 } 2917 }
2936 if (found_key.offset >= new_size)
2937 del_item = 1;
2938 else
2939 del_item = 0;
2940 found_extent = 0; 2918 found_extent = 0;
2941
2942 /* FIXME, shrink the extent if the ref count is only 1 */ 2919 /* FIXME, shrink the extent if the ref count is only 1 */
2943 if (found_type != BTRFS_EXTENT_DATA_KEY) 2920 if (found_type != BTRFS_EXTENT_DATA_KEY)
2944 goto delete; 2921 goto delete;
@@ -3025,42 +3002,36 @@ delete:
3025 inode->i_ino, extent_offset); 3002 inode->i_ino, extent_offset);
3026 BUG_ON(ret); 3003 BUG_ON(ret);
3027 } 3004 }
3028next:
3029 if (path->slots[0] == 0) {
3030 if (pending_del_nr)
3031 goto del_pending;
3032 btrfs_release_path(root, path);
3033 if (found_type == BTRFS_INODE_ITEM_KEY)
3034 break;
3035 goto search_again;
3036 }
3037 3005
3038 path->slots[0]--; 3006 if (found_type == BTRFS_INODE_ITEM_KEY)
3039 if (pending_del_nr && 3007 break;
3040 path->slots[0] + 1 != pending_del_slot) { 3008
3041 struct btrfs_key debug; 3009 if (path->slots[0] == 0 ||
3042del_pending: 3010 path->slots[0] != pending_del_slot) {
3043 btrfs_item_key_to_cpu(path->nodes[0], &debug, 3011 if (root->ref_cows) {
3044 pending_del_slot); 3012 err = -EAGAIN;
3045 ret = btrfs_del_items(trans, root, path, 3013 goto out;
3046 pending_del_slot, 3014 }
3047 pending_del_nr); 3015 if (pending_del_nr) {
3048 BUG_ON(ret); 3016 ret = btrfs_del_items(trans, root, path,
3049 pending_del_nr = 0; 3017 pending_del_slot,
3018 pending_del_nr);
3019 BUG_ON(ret);
3020 pending_del_nr = 0;
3021 }
3050 btrfs_release_path(root, path); 3022 btrfs_release_path(root, path);
3051 if (found_type == BTRFS_INODE_ITEM_KEY)
3052 break;
3053 goto search_again; 3023 goto search_again;
3024 } else {
3025 path->slots[0]--;
3054 } 3026 }
3055 } 3027 }
3056 ret = 0; 3028out:
3057error:
3058 if (pending_del_nr) { 3029 if (pending_del_nr) {
3059 ret = btrfs_del_items(trans, root, path, pending_del_slot, 3030 ret = btrfs_del_items(trans, root, path, pending_del_slot,
3060 pending_del_nr); 3031 pending_del_nr);
3061 } 3032 }
3062 btrfs_free_path(path); 3033 btrfs_free_path(path);
3063 return ret; 3034 return err;
3064} 3035}
3065 3036
3066/* 3037/*
@@ -3073,6 +3044,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
3073 struct btrfs_root *root = BTRFS_I(inode)->root; 3044 struct btrfs_root *root = BTRFS_I(inode)->root;
3074 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 3045 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
3075 struct btrfs_ordered_extent *ordered; 3046 struct btrfs_ordered_extent *ordered;
3047 struct extent_state *cached_state = NULL;
3076 char *kaddr; 3048 char *kaddr;
3077 u32 blocksize = root->sectorsize; 3049 u32 blocksize = root->sectorsize;
3078 pgoff_t index = from >> PAGE_CACHE_SHIFT; 3050 pgoff_t index = from >> PAGE_CACHE_SHIFT;
@@ -3119,12 +3091,14 @@ again:
3119 } 3091 }
3120 wait_on_page_writeback(page); 3092 wait_on_page_writeback(page);
3121 3093
3122 lock_extent(io_tree, page_start, page_end, GFP_NOFS); 3094 lock_extent_bits(io_tree, page_start, page_end, 0, &cached_state,
3095 GFP_NOFS);
3123 set_page_extent_mapped(page); 3096 set_page_extent_mapped(page);
3124 3097
3125 ordered = btrfs_lookup_ordered_extent(inode, page_start); 3098 ordered = btrfs_lookup_ordered_extent(inode, page_start);
3126 if (ordered) { 3099 if (ordered) {
3127 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 3100 unlock_extent_cached(io_tree, page_start, page_end,
3101 &cached_state, GFP_NOFS);
3128 unlock_page(page); 3102 unlock_page(page);
3129 page_cache_release(page); 3103 page_cache_release(page);
3130 btrfs_start_ordered_extent(inode, ordered, 1); 3104 btrfs_start_ordered_extent(inode, ordered, 1);
@@ -3132,13 +3106,15 @@ again:
3132 goto again; 3106 goto again;
3133 } 3107 }
3134 3108
3135 clear_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end, 3109 clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, page_end,
3136 EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING, 3110 EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING,
3137 GFP_NOFS); 3111 0, 0, &cached_state, GFP_NOFS);
3138 3112
3139 ret = btrfs_set_extent_delalloc(inode, page_start, page_end); 3113 ret = btrfs_set_extent_delalloc(inode, page_start, page_end,
3114 &cached_state);
3140 if (ret) { 3115 if (ret) {
3141 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 3116 unlock_extent_cached(io_tree, page_start, page_end,
3117 &cached_state, GFP_NOFS);
3142 goto out_unlock; 3118 goto out_unlock;
3143 } 3119 }
3144 3120
@@ -3151,7 +3127,8 @@ again:
3151 } 3127 }
3152 ClearPageChecked(page); 3128 ClearPageChecked(page);
3153 set_page_dirty(page); 3129 set_page_dirty(page);
3154 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 3130 unlock_extent_cached(io_tree, page_start, page_end, &cached_state,
3131 GFP_NOFS);
3155 3132
3156out_unlock: 3133out_unlock:
3157 if (ret) 3134 if (ret)
@@ -3169,6 +3146,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
3169 struct btrfs_root *root = BTRFS_I(inode)->root; 3146 struct btrfs_root *root = BTRFS_I(inode)->root;
3170 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 3147 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
3171 struct extent_map *em; 3148 struct extent_map *em;
3149 struct extent_state *cached_state = NULL;
3172 u64 mask = root->sectorsize - 1; 3150 u64 mask = root->sectorsize - 1;
3173 u64 hole_start = (inode->i_size + mask) & ~mask; 3151 u64 hole_start = (inode->i_size + mask) & ~mask;
3174 u64 block_end = (size + mask) & ~mask; 3152 u64 block_end = (size + mask) & ~mask;
@@ -3180,25 +3158,20 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
3180 if (size <= hole_start) 3158 if (size <= hole_start)
3181 return 0; 3159 return 0;
3182 3160
3183 err = btrfs_truncate_page(inode->i_mapping, inode->i_size);
3184 if (err)
3185 return err;
3186
3187 while (1) { 3161 while (1) {
3188 struct btrfs_ordered_extent *ordered; 3162 struct btrfs_ordered_extent *ordered;
3189 btrfs_wait_ordered_range(inode, hole_start, 3163 btrfs_wait_ordered_range(inode, hole_start,
3190 block_end - hole_start); 3164 block_end - hole_start);
3191 lock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS); 3165 lock_extent_bits(io_tree, hole_start, block_end - 1, 0,
3166 &cached_state, GFP_NOFS);
3192 ordered = btrfs_lookup_ordered_extent(inode, hole_start); 3167 ordered = btrfs_lookup_ordered_extent(inode, hole_start);
3193 if (!ordered) 3168 if (!ordered)
3194 break; 3169 break;
3195 unlock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS); 3170 unlock_extent_cached(io_tree, hole_start, block_end - 1,
3171 &cached_state, GFP_NOFS);
3196 btrfs_put_ordered_extent(ordered); 3172 btrfs_put_ordered_extent(ordered);
3197 } 3173 }
3198 3174
3199 trans = btrfs_start_transaction(root, 1);
3200 btrfs_set_trans_block_group(trans, inode);
3201
3202 cur_offset = hole_start; 3175 cur_offset = hole_start;
3203 while (1) { 3176 while (1) {
3204 em = btrfs_get_extent(inode, NULL, 0, cur_offset, 3177 em = btrfs_get_extent(inode, NULL, 0, cur_offset,
@@ -3206,40 +3179,121 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
3206 BUG_ON(IS_ERR(em) || !em); 3179 BUG_ON(IS_ERR(em) || !em);
3207 last_byte = min(extent_map_end(em), block_end); 3180 last_byte = min(extent_map_end(em), block_end);
3208 last_byte = (last_byte + mask) & ~mask; 3181 last_byte = (last_byte + mask) & ~mask;
3209 if (test_bit(EXTENT_FLAG_VACANCY, &em->flags)) { 3182 if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
3210 u64 hint_byte = 0; 3183 u64 hint_byte = 0;
3211 hole_size = last_byte - cur_offset; 3184 hole_size = last_byte - cur_offset;
3212 err = btrfs_drop_extents(trans, root, inode,
3213 cur_offset,
3214 cur_offset + hole_size,
3215 block_end,
3216 cur_offset, &hint_byte, 1);
3217 if (err)
3218 break;
3219 3185
3220 err = btrfs_reserve_metadata_space(root, 1); 3186 err = btrfs_reserve_metadata_space(root, 2);
3221 if (err) 3187 if (err)
3222 break; 3188 break;
3223 3189
3190 trans = btrfs_start_transaction(root, 1);
3191 btrfs_set_trans_block_group(trans, inode);
3192
3193 err = btrfs_drop_extents(trans, inode, cur_offset,
3194 cur_offset + hole_size,
3195 &hint_byte, 1);
3196 BUG_ON(err);
3197
3224 err = btrfs_insert_file_extent(trans, root, 3198 err = btrfs_insert_file_extent(trans, root,
3225 inode->i_ino, cur_offset, 0, 3199 inode->i_ino, cur_offset, 0,
3226 0, hole_size, 0, hole_size, 3200 0, hole_size, 0, hole_size,
3227 0, 0, 0); 3201 0, 0, 0);
3202 BUG_ON(err);
3203
3228 btrfs_drop_extent_cache(inode, hole_start, 3204 btrfs_drop_extent_cache(inode, hole_start,
3229 last_byte - 1, 0); 3205 last_byte - 1, 0);
3230 btrfs_unreserve_metadata_space(root, 1); 3206
3207 btrfs_end_transaction(trans, root);
3208 btrfs_unreserve_metadata_space(root, 2);
3231 } 3209 }
3232 free_extent_map(em); 3210 free_extent_map(em);
3233 cur_offset = last_byte; 3211 cur_offset = last_byte;
3234 if (err || cur_offset >= block_end) 3212 if (cur_offset >= block_end)
3235 break; 3213 break;
3236 } 3214 }
3237 3215
3238 btrfs_end_transaction(trans, root); 3216 unlock_extent_cached(io_tree, hole_start, block_end - 1, &cached_state,
3239 unlock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS); 3217 GFP_NOFS);
3240 return err; 3218 return err;
3241} 3219}
3242 3220
3221static int btrfs_setattr_size(struct inode *inode, struct iattr *attr)
3222{
3223 struct btrfs_root *root = BTRFS_I(inode)->root;
3224 struct btrfs_trans_handle *trans;
3225 unsigned long nr;
3226 int ret;
3227
3228 if (attr->ia_size == inode->i_size)
3229 return 0;
3230
3231 if (attr->ia_size > inode->i_size) {
3232 unsigned long limit;
3233 limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
3234 if (attr->ia_size > inode->i_sb->s_maxbytes)
3235 return -EFBIG;
3236 if (limit != RLIM_INFINITY && attr->ia_size > limit) {
3237 send_sig(SIGXFSZ, current, 0);
3238 return -EFBIG;
3239 }
3240 }
3241
3242 ret = btrfs_reserve_metadata_space(root, 1);
3243 if (ret)
3244 return ret;
3245
3246 trans = btrfs_start_transaction(root, 1);
3247 btrfs_set_trans_block_group(trans, inode);
3248
3249 ret = btrfs_orphan_add(trans, inode);
3250 BUG_ON(ret);
3251
3252 nr = trans->blocks_used;
3253 btrfs_end_transaction(trans, root);
3254 btrfs_unreserve_metadata_space(root, 1);
3255 btrfs_btree_balance_dirty(root, nr);
3256
3257 if (attr->ia_size > inode->i_size) {
3258 ret = btrfs_cont_expand(inode, attr->ia_size);
3259 if (ret) {
3260 btrfs_truncate(inode);
3261 return ret;
3262 }
3263
3264 i_size_write(inode, attr->ia_size);
3265 btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
3266
3267 trans = btrfs_start_transaction(root, 1);
3268 btrfs_set_trans_block_group(trans, inode);
3269
3270 ret = btrfs_update_inode(trans, root, inode);
3271 BUG_ON(ret);
3272 if (inode->i_nlink > 0) {
3273 ret = btrfs_orphan_del(trans, inode);
3274 BUG_ON(ret);
3275 }
3276 nr = trans->blocks_used;
3277 btrfs_end_transaction(trans, root);
3278 btrfs_btree_balance_dirty(root, nr);
3279 return 0;
3280 }
3281
3282 /*
3283 * We're truncating a file that used to have good data down to
3284 * zero. Make sure it gets into the ordered flush list so that
3285 * any new writes get down to disk quickly.
3286 */
3287 if (attr->ia_size == 0)
3288 BTRFS_I(inode)->ordered_data_close = 1;
3289
3290 /* we don't support swapfiles, so vmtruncate shouldn't fail */
3291 ret = vmtruncate(inode, attr->ia_size);
3292 BUG_ON(ret);
3293
3294 return 0;
3295}
3296
3243static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) 3297static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
3244{ 3298{
3245 struct inode *inode = dentry->d_inode; 3299 struct inode *inode = dentry->d_inode;
@@ -3250,23 +3304,14 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
3250 return err; 3304 return err;
3251 3305
3252 if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) { 3306 if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
3253 if (attr->ia_size > inode->i_size) { 3307 err = btrfs_setattr_size(inode, attr);
3254 err = btrfs_cont_expand(inode, attr->ia_size); 3308 if (err)
3255 if (err) 3309 return err;
3256 return err;
3257 } else if (inode->i_size > 0 &&
3258 attr->ia_size == 0) {
3259
3260 /* we're truncating a file that used to have good
3261 * data down to zero. Make sure it gets into
3262 * the ordered flush list so that any new writes
3263 * get down to disk quickly.
3264 */
3265 BTRFS_I(inode)->ordered_data_close = 1;
3266 }
3267 } 3310 }
3311 attr->ia_valid &= ~ATTR_SIZE;
3268 3312
3269 err = inode_setattr(inode, attr); 3313 if (attr->ia_valid)
3314 err = inode_setattr(inode, attr);
3270 3315
3271 if (!err && ((attr->ia_valid & ATTR_MODE))) 3316 if (!err && ((attr->ia_valid & ATTR_MODE)))
3272 err = btrfs_acl_chmod(inode); 3317 err = btrfs_acl_chmod(inode);
@@ -3287,36 +3332,43 @@ void btrfs_delete_inode(struct inode *inode)
3287 } 3332 }
3288 btrfs_wait_ordered_range(inode, 0, (u64)-1); 3333 btrfs_wait_ordered_range(inode, 0, (u64)-1);
3289 3334
3335 if (root->fs_info->log_root_recovering) {
3336 BUG_ON(!list_empty(&BTRFS_I(inode)->i_orphan));
3337 goto no_delete;
3338 }
3339
3290 if (inode->i_nlink > 0) { 3340 if (inode->i_nlink > 0) {
3291 BUG_ON(btrfs_root_refs(&root->root_item) != 0); 3341 BUG_ON(btrfs_root_refs(&root->root_item) != 0);
3292 goto no_delete; 3342 goto no_delete;
3293 } 3343 }
3294 3344
3295 btrfs_i_size_write(inode, 0); 3345 btrfs_i_size_write(inode, 0);
3296 trans = btrfs_join_transaction(root, 1);
3297 3346
3298 btrfs_set_trans_block_group(trans, inode); 3347 while (1) {
3299 ret = btrfs_truncate_inode_items(trans, root, inode, inode->i_size, 0); 3348 trans = btrfs_start_transaction(root, 1);
3300 if (ret) { 3349 btrfs_set_trans_block_group(trans, inode);
3301 btrfs_orphan_del(NULL, inode); 3350 ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0);
3302 goto no_delete_lock;
3303 }
3304 3351
3305 btrfs_orphan_del(trans, inode); 3352 if (ret != -EAGAIN)
3353 break;
3306 3354
3307 nr = trans->blocks_used; 3355 nr = trans->blocks_used;
3308 clear_inode(inode); 3356 btrfs_end_transaction(trans, root);
3357 trans = NULL;
3358 btrfs_btree_balance_dirty(root, nr);
3359 }
3309 3360
3310 btrfs_end_transaction(trans, root); 3361 if (ret == 0) {
3311 btrfs_btree_balance_dirty(root, nr); 3362 ret = btrfs_orphan_del(trans, inode);
3312 return; 3363 BUG_ON(ret);
3364 }
3313 3365
3314no_delete_lock:
3315 nr = trans->blocks_used; 3366 nr = trans->blocks_used;
3316 btrfs_end_transaction(trans, root); 3367 btrfs_end_transaction(trans, root);
3317 btrfs_btree_balance_dirty(root, nr); 3368 btrfs_btree_balance_dirty(root, nr);
3318no_delete: 3369no_delete:
3319 clear_inode(inode); 3370 clear_inode(inode);
3371 return;
3320} 3372}
3321 3373
3322/* 3374/*
@@ -3560,6 +3612,7 @@ static noinline void init_btrfs_i(struct inode *inode)
3560 bi->index_cnt = (u64)-1; 3612 bi->index_cnt = (u64)-1;
3561 bi->last_unlink_trans = 0; 3613 bi->last_unlink_trans = 0;
3562 bi->ordered_data_close = 0; 3614 bi->ordered_data_close = 0;
3615 bi->force_compress = 0;
3563 extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS); 3616 extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS);
3564 extent_io_tree_init(&BTRFS_I(inode)->io_tree, 3617 extent_io_tree_init(&BTRFS_I(inode)->io_tree,
3565 inode->i_mapping, GFP_NOFS); 3618 inode->i_mapping, GFP_NOFS);
@@ -3569,7 +3622,6 @@ static noinline void init_btrfs_i(struct inode *inode)
3569 INIT_LIST_HEAD(&BTRFS_I(inode)->ordered_operations); 3622 INIT_LIST_HEAD(&BTRFS_I(inode)->ordered_operations);
3570 RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node); 3623 RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node);
3571 btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree); 3624 btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree);
3572 mutex_init(&BTRFS_I(inode)->extent_mutex);
3573 mutex_init(&BTRFS_I(inode)->log_mutex); 3625 mutex_init(&BTRFS_I(inode)->log_mutex);
3574} 3626}
3575 3627
@@ -3609,7 +3661,7 @@ static struct inode *btrfs_iget_locked(struct super_block *s,
3609 * Returns in *is_new if the inode was read from disk 3661 * Returns in *is_new if the inode was read from disk
3610 */ 3662 */
3611struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, 3663struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
3612 struct btrfs_root *root) 3664 struct btrfs_root *root, int *new)
3613{ 3665{
3614 struct inode *inode; 3666 struct inode *inode;
3615 3667
@@ -3624,6 +3676,8 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
3624 3676
3625 inode_tree_add(inode); 3677 inode_tree_add(inode);
3626 unlock_new_inode(inode); 3678 unlock_new_inode(inode);
3679 if (new)
3680 *new = 1;
3627 } 3681 }
3628 3682
3629 return inode; 3683 return inode;
@@ -3676,7 +3730,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
3676 return NULL; 3730 return NULL;
3677 3731
3678 if (location.type == BTRFS_INODE_ITEM_KEY) { 3732 if (location.type == BTRFS_INODE_ITEM_KEY) {
3679 inode = btrfs_iget(dir->i_sb, &location, root); 3733 inode = btrfs_iget(dir->i_sb, &location, root, NULL);
3680 return inode; 3734 return inode;
3681 } 3735 }
3682 3736
@@ -3691,10 +3745,17 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
3691 else 3745 else
3692 inode = new_simple_dir(dir->i_sb, &location, sub_root); 3746 inode = new_simple_dir(dir->i_sb, &location, sub_root);
3693 } else { 3747 } else {
3694 inode = btrfs_iget(dir->i_sb, &location, sub_root); 3748 inode = btrfs_iget(dir->i_sb, &location, sub_root, NULL);
3695 } 3749 }
3696 srcu_read_unlock(&root->fs_info->subvol_srcu, index); 3750 srcu_read_unlock(&root->fs_info->subvol_srcu, index);
3697 3751
3752 if (root != sub_root) {
3753 down_read(&root->fs_info->cleanup_work_sem);
3754 if (!(inode->i_sb->s_flags & MS_RDONLY))
3755 btrfs_orphan_cleanup(sub_root);
3756 up_read(&root->fs_info->cleanup_work_sem);
3757 }
3758
3698 return inode; 3759 return inode;
3699} 3760}
3700 3761
@@ -3869,7 +3930,11 @@ skip:
3869 3930
3870 /* Reached end of directory/root. Bump pos past the last item. */ 3931 /* Reached end of directory/root. Bump pos past the last item. */
3871 if (key_type == BTRFS_DIR_INDEX_KEY) 3932 if (key_type == BTRFS_DIR_INDEX_KEY)
3872 filp->f_pos = INT_LIMIT(off_t); 3933 /*
3934 * 32-bit glibc will use getdents64, but then strtol -
3935 * so the last number we can serve is this.
3936 */
3937 filp->f_pos = 0x7fffffff;
3873 else 3938 else
3874 filp->f_pos++; 3939 filp->f_pos++;
3875nopos: 3940nopos:
@@ -3879,7 +3944,7 @@ err:
3879 return ret; 3944 return ret;
3880} 3945}
3881 3946
3882int btrfs_write_inode(struct inode *inode, int wait) 3947int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
3883{ 3948{
3884 struct btrfs_root *root = BTRFS_I(inode)->root; 3949 struct btrfs_root *root = BTRFS_I(inode)->root;
3885 struct btrfs_trans_handle *trans; 3950 struct btrfs_trans_handle *trans;
@@ -3888,7 +3953,7 @@ int btrfs_write_inode(struct inode *inode, int wait)
3888 if (root->fs_info->btree_inode == inode) 3953 if (root->fs_info->btree_inode == inode)
3889 return 0; 3954 return 0;
3890 3955
3891 if (wait) { 3956 if (wbc->sync_mode == WB_SYNC_ALL) {
3892 trans = btrfs_join_transaction(root, 1); 3957 trans = btrfs_join_transaction(root, 1);
3893 btrfs_set_trans_block_group(trans, inode); 3958 btrfs_set_trans_block_group(trans, inode);
3894 ret = btrfs_commit_transaction(trans, root); 3959 ret = btrfs_commit_transaction(trans, root);
@@ -4219,7 +4284,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
4219 if (IS_ERR(inode)) 4284 if (IS_ERR(inode))
4220 goto out_unlock; 4285 goto out_unlock;
4221 4286
4222 err = btrfs_init_inode_security(inode, dir); 4287 err = btrfs_init_inode_security(trans, inode, dir);
4223 if (err) { 4288 if (err) {
4224 drop_inode = 1; 4289 drop_inode = 1;
4225 goto out_unlock; 4290 goto out_unlock;
@@ -4290,7 +4355,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
4290 if (IS_ERR(inode)) 4355 if (IS_ERR(inode))
4291 goto out_unlock; 4356 goto out_unlock;
4292 4357
4293 err = btrfs_init_inode_security(inode, dir); 4358 err = btrfs_init_inode_security(trans, inode, dir);
4294 if (err) { 4359 if (err) {
4295 drop_inode = 1; 4360 drop_inode = 1;
4296 goto out_unlock; 4361 goto out_unlock;
@@ -4336,6 +4401,10 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
4336 if (inode->i_nlink == 0) 4401 if (inode->i_nlink == 0)
4337 return -ENOENT; 4402 return -ENOENT;
4338 4403
4404 /* do not allow sys_link's with other subvols of the same device */
4405 if (root->objectid != BTRFS_I(inode)->root->objectid)
4406 return -EPERM;
4407
4339 /* 4408 /*
4340 * 1 item for inode ref 4409 * 1 item for inode ref
4341 * 2 items for dir items 4410 * 2 items for dir items
@@ -4408,7 +4477,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
4408 err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); 4477 err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
4409 if (err) { 4478 if (err) {
4410 err = -ENOSPC; 4479 err = -ENOSPC;
4411 goto out_unlock; 4480 goto out_fail;
4412 } 4481 }
4413 4482
4414 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, 4483 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
@@ -4423,7 +4492,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
4423 4492
4424 drop_on_err = 1; 4493 drop_on_err = 1;
4425 4494
4426 err = btrfs_init_inode_security(inode, dir); 4495 err = btrfs_init_inode_security(trans, inode, dir);
4427 if (err) 4496 if (err)
4428 goto out_fail; 4497 goto out_fail;
4429 4498
@@ -4886,6 +4955,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
4886{ 4955{
4887 struct extent_io_tree *tree; 4956 struct extent_io_tree *tree;
4888 struct btrfs_ordered_extent *ordered; 4957 struct btrfs_ordered_extent *ordered;
4958 struct extent_state *cached_state = NULL;
4889 u64 page_start = page_offset(page); 4959 u64 page_start = page_offset(page);
4890 u64 page_end = page_start + PAGE_CACHE_SIZE - 1; 4960 u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
4891 4961
@@ -4904,7 +4974,8 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
4904 btrfs_releasepage(page, GFP_NOFS); 4974 btrfs_releasepage(page, GFP_NOFS);
4905 return; 4975 return;
4906 } 4976 }
4907 lock_extent(tree, page_start, page_end, GFP_NOFS); 4977 lock_extent_bits(tree, page_start, page_end, 0, &cached_state,
4978 GFP_NOFS);
4908 ordered = btrfs_lookup_ordered_extent(page->mapping->host, 4979 ordered = btrfs_lookup_ordered_extent(page->mapping->host,
4909 page_offset(page)); 4980 page_offset(page));
4910 if (ordered) { 4981 if (ordered) {
@@ -4915,7 +4986,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
4915 clear_extent_bit(tree, page_start, page_end, 4986 clear_extent_bit(tree, page_start, page_end,
4916 EXTENT_DIRTY | EXTENT_DELALLOC | 4987 EXTENT_DIRTY | EXTENT_DELALLOC |
4917 EXTENT_LOCKED | EXTENT_DO_ACCOUNTING, 1, 0, 4988 EXTENT_LOCKED | EXTENT_DO_ACCOUNTING, 1, 0,
4918 NULL, GFP_NOFS); 4989 &cached_state, GFP_NOFS);
4919 /* 4990 /*
4920 * whoever cleared the private bit is responsible 4991 * whoever cleared the private bit is responsible
4921 * for the finish_ordered_io 4992 * for the finish_ordered_io
@@ -4925,11 +4996,13 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
4925 page_start, page_end); 4996 page_start, page_end);
4926 } 4997 }
4927 btrfs_put_ordered_extent(ordered); 4998 btrfs_put_ordered_extent(ordered);
4928 lock_extent(tree, page_start, page_end, GFP_NOFS); 4999 cached_state = NULL;
5000 lock_extent_bits(tree, page_start, page_end, 0, &cached_state,
5001 GFP_NOFS);
4929 } 5002 }
4930 clear_extent_bit(tree, page_start, page_end, 5003 clear_extent_bit(tree, page_start, page_end,
4931 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC | 5004 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC |
4932 EXTENT_DO_ACCOUNTING, 1, 1, NULL, GFP_NOFS); 5005 EXTENT_DO_ACCOUNTING, 1, 1, &cached_state, GFP_NOFS);
4933 __btrfs_releasepage(page, GFP_NOFS); 5006 __btrfs_releasepage(page, GFP_NOFS);
4934 5007
4935 ClearPageChecked(page); 5008 ClearPageChecked(page);
@@ -4962,6 +5035,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
4962 struct btrfs_root *root = BTRFS_I(inode)->root; 5035 struct btrfs_root *root = BTRFS_I(inode)->root;
4963 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 5036 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
4964 struct btrfs_ordered_extent *ordered; 5037 struct btrfs_ordered_extent *ordered;
5038 struct extent_state *cached_state = NULL;
4965 char *kaddr; 5039 char *kaddr;
4966 unsigned long zero_start; 5040 unsigned long zero_start;
4967 loff_t size; 5041 loff_t size;
@@ -5000,7 +5074,8 @@ again:
5000 } 5074 }
5001 wait_on_page_writeback(page); 5075 wait_on_page_writeback(page);
5002 5076
5003 lock_extent(io_tree, page_start, page_end, GFP_NOFS); 5077 lock_extent_bits(io_tree, page_start, page_end, 0, &cached_state,
5078 GFP_NOFS);
5004 set_page_extent_mapped(page); 5079 set_page_extent_mapped(page);
5005 5080
5006 /* 5081 /*
@@ -5009,7 +5084,8 @@ again:
5009 */ 5084 */
5010 ordered = btrfs_lookup_ordered_extent(inode, page_start); 5085 ordered = btrfs_lookup_ordered_extent(inode, page_start);
5011 if (ordered) { 5086 if (ordered) {
5012 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 5087 unlock_extent_cached(io_tree, page_start, page_end,
5088 &cached_state, GFP_NOFS);
5013 unlock_page(page); 5089 unlock_page(page);
5014 btrfs_start_ordered_extent(inode, ordered, 1); 5090 btrfs_start_ordered_extent(inode, ordered, 1);
5015 btrfs_put_ordered_extent(ordered); 5091 btrfs_put_ordered_extent(ordered);
@@ -5023,13 +5099,15 @@ again:
5023 * is probably a better way to do this, but for now keep consistent with 5099 * is probably a better way to do this, but for now keep consistent with
5024 * prepare_pages in the normal write path. 5100 * prepare_pages in the normal write path.
5025 */ 5101 */
5026 clear_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end, 5102 clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, page_end,
5027 EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING, 5103 EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING,
5028 GFP_NOFS); 5104 0, 0, &cached_state, GFP_NOFS);
5029 5105
5030 ret = btrfs_set_extent_delalloc(inode, page_start, page_end); 5106 ret = btrfs_set_extent_delalloc(inode, page_start, page_end,
5107 &cached_state);
5031 if (ret) { 5108 if (ret) {
5032 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 5109 unlock_extent_cached(io_tree, page_start, page_end,
5110 &cached_state, GFP_NOFS);
5033 ret = VM_FAULT_SIGBUS; 5111 ret = VM_FAULT_SIGBUS;
5034 btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); 5112 btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE);
5035 goto out_unlock; 5113 goto out_unlock;
@@ -5055,7 +5133,7 @@ again:
5055 BTRFS_I(inode)->last_trans = root->fs_info->generation; 5133 BTRFS_I(inode)->last_trans = root->fs_info->generation;
5056 BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid; 5134 BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid;
5057 5135
5058 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 5136 unlock_extent_cached(io_tree, page_start, page_end, &cached_state, GFP_NOFS);
5059 5137
5060out_unlock: 5138out_unlock:
5061 btrfs_unreserve_metadata_for_delalloc(root, inode, 1); 5139 btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
@@ -5074,17 +5152,20 @@ static void btrfs_truncate(struct inode *inode)
5074 unsigned long nr; 5152 unsigned long nr;
5075 u64 mask = root->sectorsize - 1; 5153 u64 mask = root->sectorsize - 1;
5076 5154
5077 if (!S_ISREG(inode->i_mode)) 5155 if (!S_ISREG(inode->i_mode)) {
5078 return; 5156 WARN_ON(1);
5079 if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
5080 return; 5157 return;
5158 }
5081 5159
5082 ret = btrfs_truncate_page(inode->i_mapping, inode->i_size); 5160 ret = btrfs_truncate_page(inode->i_mapping, inode->i_size);
5083 if (ret) 5161 if (ret)
5084 return; 5162 return;
5163
5085 btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); 5164 btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1);
5165 btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
5086 5166
5087 trans = btrfs_start_transaction(root, 1); 5167 trans = btrfs_start_transaction(root, 1);
5168 btrfs_set_trans_block_group(trans, inode);
5088 5169
5089 /* 5170 /*
5090 * setattr is responsible for setting the ordered_data_close flag, 5171 * setattr is responsible for setting the ordered_data_close flag,
@@ -5106,21 +5187,32 @@ static void btrfs_truncate(struct inode *inode)
5106 if (inode->i_size == 0 && BTRFS_I(inode)->ordered_data_close) 5187 if (inode->i_size == 0 && BTRFS_I(inode)->ordered_data_close)
5107 btrfs_add_ordered_operation(trans, root, inode); 5188 btrfs_add_ordered_operation(trans, root, inode);
5108 5189
5109 btrfs_set_trans_block_group(trans, inode); 5190 while (1) {
5110 btrfs_i_size_write(inode, inode->i_size); 5191 ret = btrfs_truncate_inode_items(trans, root, inode,
5192 inode->i_size,
5193 BTRFS_EXTENT_DATA_KEY);
5194 if (ret != -EAGAIN)
5195 break;
5111 5196
5112 ret = btrfs_orphan_add(trans, inode); 5197 ret = btrfs_update_inode(trans, root, inode);
5113 if (ret) 5198 BUG_ON(ret);
5114 goto out; 5199
5115 /* FIXME, add redo link to tree so we don't leak on crash */ 5200 nr = trans->blocks_used;
5116 ret = btrfs_truncate_inode_items(trans, root, inode, inode->i_size, 5201 btrfs_end_transaction(trans, root);
5117 BTRFS_EXTENT_DATA_KEY); 5202 btrfs_btree_balance_dirty(root, nr);
5118 btrfs_update_inode(trans, root, inode); 5203
5204 trans = btrfs_start_transaction(root, 1);
5205 btrfs_set_trans_block_group(trans, inode);
5206 }
5207
5208 if (ret == 0 && inode->i_nlink > 0) {
5209 ret = btrfs_orphan_del(trans, inode);
5210 BUG_ON(ret);
5211 }
5119 5212
5120 ret = btrfs_orphan_del(trans, inode); 5213 ret = btrfs_update_inode(trans, root, inode);
5121 BUG_ON(ret); 5214 BUG_ON(ret);
5122 5215
5123out:
5124 nr = trans->blocks_used; 5216 nr = trans->blocks_used;
5125 ret = btrfs_end_transaction_throttle(trans, root); 5217 ret = btrfs_end_transaction_throttle(trans, root);
5126 BUG_ON(ret); 5218 BUG_ON(ret);
@@ -5217,9 +5309,9 @@ void btrfs_destroy_inode(struct inode *inode)
5217 5309
5218 spin_lock(&root->list_lock); 5310 spin_lock(&root->list_lock);
5219 if (!list_empty(&BTRFS_I(inode)->i_orphan)) { 5311 if (!list_empty(&BTRFS_I(inode)->i_orphan)) {
5220 printk(KERN_ERR "BTRFS: inode %lu: inode still on the orphan" 5312 printk(KERN_INFO "BTRFS: inode %lu still on the orphan list\n",
5221 " list\n", inode->i_ino); 5313 inode->i_ino);
5222 dump_stack(); 5314 list_del_init(&BTRFS_I(inode)->i_orphan);
5223 } 5315 }
5224 spin_unlock(&root->list_lock); 5316 spin_unlock(&root->list_lock);
5225 5317
@@ -5246,7 +5338,6 @@ free:
5246void btrfs_drop_inode(struct inode *inode) 5338void btrfs_drop_inode(struct inode *inode)
5247{ 5339{
5248 struct btrfs_root *root = BTRFS_I(inode)->root; 5340 struct btrfs_root *root = BTRFS_I(inode)->root;
5249
5250 if (inode->i_nlink > 0 && btrfs_root_refs(&root->root_item) == 0) 5341 if (inode->i_nlink > 0 && btrfs_root_refs(&root->root_item) == 0)
5251 generic_delete_inode(inode); 5342 generic_delete_inode(inode);
5252 else 5343 else
@@ -5476,7 +5567,7 @@ out_fail:
5476 * some fairly slow code that needs optimization. This walks the list 5567 * some fairly slow code that needs optimization. This walks the list
5477 * of all the inodes with pending delalloc and forces them to disk. 5568 * of all the inodes with pending delalloc and forces them to disk.
5478 */ 5569 */
5479int btrfs_start_delalloc_inodes(struct btrfs_root *root) 5570int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
5480{ 5571{
5481 struct list_head *head = &root->fs_info->delalloc_inodes; 5572 struct list_head *head = &root->fs_info->delalloc_inodes;
5482 struct btrfs_inode *binode; 5573 struct btrfs_inode *binode;
@@ -5495,7 +5586,10 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root)
5495 spin_unlock(&root->fs_info->delalloc_lock); 5586 spin_unlock(&root->fs_info->delalloc_lock);
5496 if (inode) { 5587 if (inode) {
5497 filemap_flush(inode->i_mapping); 5588 filemap_flush(inode->i_mapping);
5498 iput(inode); 5589 if (delay_iput)
5590 btrfs_add_delayed_iput(inode);
5591 else
5592 iput(inode);
5499 } 5593 }
5500 cond_resched(); 5594 cond_resched();
5501 spin_lock(&root->fs_info->delalloc_lock); 5595 spin_lock(&root->fs_info->delalloc_lock);
@@ -5569,7 +5663,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
5569 if (IS_ERR(inode)) 5663 if (IS_ERR(inode))
5570 goto out_unlock; 5664 goto out_unlock;
5571 5665
5572 err = btrfs_init_inode_security(inode, dir); 5666 err = btrfs_init_inode_security(trans, inode, dir);
5573 if (err) { 5667 if (err) {
5574 drop_inode = 1; 5668 drop_inode = 1;
5575 goto out_unlock; 5669 goto out_unlock;
@@ -5641,62 +5735,80 @@ out_fail:
5641 return err; 5735 return err;
5642} 5736}
5643 5737
5644static int prealloc_file_range(struct btrfs_trans_handle *trans, 5738static int prealloc_file_range(struct inode *inode, u64 start, u64 end,
5645 struct inode *inode, u64 start, u64 end, 5739 u64 alloc_hint, int mode, loff_t actual_len)
5646 u64 locked_end, u64 alloc_hint, int mode)
5647{ 5740{
5741 struct btrfs_trans_handle *trans;
5648 struct btrfs_root *root = BTRFS_I(inode)->root; 5742 struct btrfs_root *root = BTRFS_I(inode)->root;
5649 struct btrfs_key ins; 5743 struct btrfs_key ins;
5650 u64 alloc_size;
5651 u64 cur_offset = start; 5744 u64 cur_offset = start;
5652 u64 num_bytes = end - start; 5745 u64 num_bytes = end - start;
5653 int ret = 0; 5746 int ret = 0;
5747 u64 i_size;
5654 5748
5655 while (num_bytes > 0) { 5749 while (num_bytes > 0) {
5656 alloc_size = min(num_bytes, root->fs_info->max_extent); 5750 trans = btrfs_start_transaction(root, 1);
5657 5751
5658 ret = btrfs_reserve_metadata_space(root, 1); 5752 ret = btrfs_reserve_extent(trans, root, num_bytes,
5659 if (ret)
5660 goto out;
5661
5662 ret = btrfs_reserve_extent(trans, root, alloc_size,
5663 root->sectorsize, 0, alloc_hint, 5753 root->sectorsize, 0, alloc_hint,
5664 (u64)-1, &ins, 1); 5754 (u64)-1, &ins, 1);
5665 if (ret) { 5755 if (ret) {
5666 WARN_ON(1); 5756 WARN_ON(1);
5667 goto out; 5757 goto stop_trans;
5668 } 5758 }
5759
5760 ret = btrfs_reserve_metadata_space(root, 3);
5761 if (ret) {
5762 btrfs_free_reserved_extent(root, ins.objectid,
5763 ins.offset);
5764 goto stop_trans;
5765 }
5766
5669 ret = insert_reserved_file_extent(trans, inode, 5767 ret = insert_reserved_file_extent(trans, inode,
5670 cur_offset, ins.objectid, 5768 cur_offset, ins.objectid,
5671 ins.offset, ins.offset, 5769 ins.offset, ins.offset,
5672 ins.offset, locked_end, 5770 ins.offset, 0, 0, 0,
5673 0, 0, 0,
5674 BTRFS_FILE_EXTENT_PREALLOC); 5771 BTRFS_FILE_EXTENT_PREALLOC);
5675 BUG_ON(ret); 5772 BUG_ON(ret);
5676 btrfs_drop_extent_cache(inode, cur_offset, 5773 btrfs_drop_extent_cache(inode, cur_offset,
5677 cur_offset + ins.offset -1, 0); 5774 cur_offset + ins.offset -1, 0);
5775
5678 num_bytes -= ins.offset; 5776 num_bytes -= ins.offset;
5679 cur_offset += ins.offset; 5777 cur_offset += ins.offset;
5680 alloc_hint = ins.objectid + ins.offset; 5778 alloc_hint = ins.objectid + ins.offset;
5681 btrfs_unreserve_metadata_space(root, 1); 5779
5682 }
5683out:
5684 if (cur_offset > start) {
5685 inode->i_ctime = CURRENT_TIME; 5780 inode->i_ctime = CURRENT_TIME;
5686 BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC; 5781 BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC;
5687 if (!(mode & FALLOC_FL_KEEP_SIZE) && 5782 if (!(mode & FALLOC_FL_KEEP_SIZE) &&
5688 cur_offset > i_size_read(inode)) 5783 (actual_len > inode->i_size) &&
5689 btrfs_i_size_write(inode, cur_offset); 5784 (cur_offset > inode->i_size)) {
5785
5786 if (cur_offset > actual_len)
5787 i_size = actual_len;
5788 else
5789 i_size = cur_offset;
5790 i_size_write(inode, i_size);
5791 btrfs_ordered_update_i_size(inode, i_size, NULL);
5792 }
5793
5690 ret = btrfs_update_inode(trans, root, inode); 5794 ret = btrfs_update_inode(trans, root, inode);
5691 BUG_ON(ret); 5795 BUG_ON(ret);
5796
5797 btrfs_end_transaction(trans, root);
5798 btrfs_unreserve_metadata_space(root, 3);
5692 } 5799 }
5800 return ret;
5693 5801
5802stop_trans:
5803 btrfs_end_transaction(trans, root);
5694 return ret; 5804 return ret;
5805
5695} 5806}
5696 5807
5697static long btrfs_fallocate(struct inode *inode, int mode, 5808static long btrfs_fallocate(struct inode *inode, int mode,
5698 loff_t offset, loff_t len) 5809 loff_t offset, loff_t len)
5699{ 5810{
5811 struct extent_state *cached_state = NULL;
5700 u64 cur_offset; 5812 u64 cur_offset;
5701 u64 last_byte; 5813 u64 last_byte;
5702 u64 alloc_start; 5814 u64 alloc_start;
@@ -5705,8 +5817,6 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5705 u64 locked_end; 5817 u64 locked_end;
5706 u64 mask = BTRFS_I(inode)->root->sectorsize - 1; 5818 u64 mask = BTRFS_I(inode)->root->sectorsize - 1;
5707 struct extent_map *em; 5819 struct extent_map *em;
5708 struct btrfs_trans_handle *trans;
5709 struct btrfs_root *root;
5710 int ret; 5820 int ret;
5711 5821
5712 alloc_start = offset & ~mask; 5822 alloc_start = offset & ~mask;
@@ -5725,9 +5835,7 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5725 goto out; 5835 goto out;
5726 } 5836 }
5727 5837
5728 root = BTRFS_I(inode)->root; 5838 ret = btrfs_check_data_free_space(BTRFS_I(inode)->root, inode,
5729
5730 ret = btrfs_check_data_free_space(root, inode,
5731 alloc_end - alloc_start); 5839 alloc_end - alloc_start);
5732 if (ret) 5840 if (ret)
5733 goto out; 5841 goto out;
@@ -5736,27 +5844,20 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5736 while (1) { 5844 while (1) {
5737 struct btrfs_ordered_extent *ordered; 5845 struct btrfs_ordered_extent *ordered;
5738 5846
5739 trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1);
5740 if (!trans) {
5741 ret = -EIO;
5742 goto out_free;
5743 }
5744
5745 /* the extent lock is ordered inside the running 5847 /* the extent lock is ordered inside the running
5746 * transaction 5848 * transaction
5747 */ 5849 */
5748 lock_extent(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, 5850 lock_extent_bits(&BTRFS_I(inode)->io_tree, alloc_start,
5749 GFP_NOFS); 5851 locked_end, 0, &cached_state, GFP_NOFS);
5750 ordered = btrfs_lookup_first_ordered_extent(inode, 5852 ordered = btrfs_lookup_first_ordered_extent(inode,
5751 alloc_end - 1); 5853 alloc_end - 1);
5752 if (ordered && 5854 if (ordered &&
5753 ordered->file_offset + ordered->len > alloc_start && 5855 ordered->file_offset + ordered->len > alloc_start &&
5754 ordered->file_offset < alloc_end) { 5856 ordered->file_offset < alloc_end) {
5755 btrfs_put_ordered_extent(ordered); 5857 btrfs_put_ordered_extent(ordered);
5756 unlock_extent(&BTRFS_I(inode)->io_tree, 5858 unlock_extent_cached(&BTRFS_I(inode)->io_tree,
5757 alloc_start, locked_end, GFP_NOFS); 5859 alloc_start, locked_end,
5758 btrfs_end_transaction(trans, BTRFS_I(inode)->root); 5860 &cached_state, GFP_NOFS);
5759
5760 /* 5861 /*
5761 * we can't wait on the range with the transaction 5862 * we can't wait on the range with the transaction
5762 * running or with the extent lock held 5863 * running or with the extent lock held
@@ -5777,10 +5878,12 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5777 BUG_ON(IS_ERR(em) || !em); 5878 BUG_ON(IS_ERR(em) || !em);
5778 last_byte = min(extent_map_end(em), alloc_end); 5879 last_byte = min(extent_map_end(em), alloc_end);
5779 last_byte = (last_byte + mask) & ~mask; 5880 last_byte = (last_byte + mask) & ~mask;
5780 if (em->block_start == EXTENT_MAP_HOLE) { 5881 if (em->block_start == EXTENT_MAP_HOLE ||
5781 ret = prealloc_file_range(trans, inode, cur_offset, 5882 (cur_offset >= inode->i_size &&
5782 last_byte, locked_end + 1, 5883 !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
5783 alloc_hint, mode); 5884 ret = prealloc_file_range(inode,
5885 cur_offset, last_byte,
5886 alloc_hint, mode, offset+len);
5784 if (ret < 0) { 5887 if (ret < 0) {
5785 free_extent_map(em); 5888 free_extent_map(em);
5786 break; 5889 break;
@@ -5796,12 +5899,11 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5796 break; 5899 break;
5797 } 5900 }
5798 } 5901 }
5799 unlock_extent(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, 5902 unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
5800 GFP_NOFS); 5903 &cached_state, GFP_NOFS);
5801 5904
5802 btrfs_end_transaction(trans, BTRFS_I(inode)->root); 5905 btrfs_free_reserved_data_space(BTRFS_I(inode)->root, inode,
5803out_free: 5906 alloc_end - alloc_start);
5804 btrfs_free_reserved_data_space(root, inode, alloc_end - alloc_start);
5805out: 5907out:
5806 mutex_unlock(&inode->i_mutex); 5908 mutex_unlock(&inode->i_mutex);
5807 return ret; 5909 return ret;