aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/inode.c')
-rw-r--r--fs/btrfs/inode.c627
1 files changed, 373 insertions, 254 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index b3ad168a0bfc..4deb280f8969 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -88,13 +88,14 @@ static noinline int cow_file_range(struct inode *inode,
88 u64 start, u64 end, int *page_started, 88 u64 start, u64 end, int *page_started,
89 unsigned long *nr_written, int unlock); 89 unsigned long *nr_written, int unlock);
90 90
91static int btrfs_init_inode_security(struct inode *inode, struct inode *dir) 91static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
92 struct inode *inode, struct inode *dir)
92{ 93{
93 int err; 94 int err;
94 95
95 err = btrfs_init_acl(inode, dir); 96 err = btrfs_init_acl(trans, inode, dir);
96 if (!err) 97 if (!err)
97 err = btrfs_xattr_security_init(inode, dir); 98 err = btrfs_xattr_security_init(trans, inode, dir);
98 return err; 99 return err;
99} 100}
100 101
@@ -188,8 +189,18 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
188 btrfs_mark_buffer_dirty(leaf); 189 btrfs_mark_buffer_dirty(leaf);
189 btrfs_free_path(path); 190 btrfs_free_path(path);
190 191
192 /*
193 * we're an inline extent, so nobody can
194 * extend the file past i_size without locking
195 * a page we already have locked.
196 *
197 * We must do any isize and inode updates
198 * before we unlock the pages. Otherwise we
199 * could end up racing with unlink.
200 */
191 BTRFS_I(inode)->disk_i_size = inode->i_size; 201 BTRFS_I(inode)->disk_i_size = inode->i_size;
192 btrfs_update_inode(trans, root, inode); 202 btrfs_update_inode(trans, root, inode);
203
193 return 0; 204 return 0;
194fail: 205fail:
195 btrfs_free_path(path); 206 btrfs_free_path(path);
@@ -230,8 +241,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
230 return 1; 241 return 1;
231 } 242 }
232 243
233 ret = btrfs_drop_extents(trans, root, inode, start, 244 ret = btrfs_drop_extents(trans, inode, start, aligned_end,
234 aligned_end, aligned_end, start,
235 &hint_byte, 1); 245 &hint_byte, 1);
236 BUG_ON(ret); 246 BUG_ON(ret);
237 247
@@ -416,7 +426,6 @@ again:
416 start, end, 426 start, end,
417 total_compressed, pages); 427 total_compressed, pages);
418 } 428 }
419 btrfs_end_transaction(trans, root);
420 if (ret == 0) { 429 if (ret == 0) {
421 /* 430 /*
422 * inline extent creation worked, we don't need 431 * inline extent creation worked, we don't need
@@ -430,9 +439,11 @@ again:
430 EXTENT_CLEAR_DELALLOC | 439 EXTENT_CLEAR_DELALLOC |
431 EXTENT_CLEAR_ACCOUNTING | 440 EXTENT_CLEAR_ACCOUNTING |
432 EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK); 441 EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK);
433 ret = 0; 442
443 btrfs_end_transaction(trans, root);
434 goto free_pages_out; 444 goto free_pages_out;
435 } 445 }
446 btrfs_end_transaction(trans, root);
436 } 447 }
437 448
438 if (will_compress) { 449 if (will_compress) {
@@ -472,7 +483,8 @@ again:
472 nr_pages_ret = 0; 483 nr_pages_ret = 0;
473 484
474 /* flag the file so we don't compress in the future */ 485 /* flag the file so we don't compress in the future */
475 BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS; 486 if (!btrfs_test_opt(root, FORCE_COMPRESS))
487 BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS;
476 } 488 }
477 if (will_compress) { 489 if (will_compress) {
478 *num_added += 1; 490 *num_added += 1;
@@ -543,7 +555,6 @@ static noinline int submit_compressed_extents(struct inode *inode,
543 if (list_empty(&async_cow->extents)) 555 if (list_empty(&async_cow->extents))
544 return 0; 556 return 0;
545 557
546 trans = btrfs_join_transaction(root, 1);
547 558
548 while (!list_empty(&async_cow->extents)) { 559 while (!list_empty(&async_cow->extents)) {
549 async_extent = list_entry(async_cow->extents.next, 560 async_extent = list_entry(async_cow->extents.next,
@@ -590,19 +601,15 @@ retry:
590 lock_extent(io_tree, async_extent->start, 601 lock_extent(io_tree, async_extent->start,
591 async_extent->start + async_extent->ram_size - 1, 602 async_extent->start + async_extent->ram_size - 1,
592 GFP_NOFS); 603 GFP_NOFS);
593 /*
594 * here we're doing allocation and writeback of the
595 * compressed pages
596 */
597 btrfs_drop_extent_cache(inode, async_extent->start,
598 async_extent->start +
599 async_extent->ram_size - 1, 0);
600 604
605 trans = btrfs_join_transaction(root, 1);
601 ret = btrfs_reserve_extent(trans, root, 606 ret = btrfs_reserve_extent(trans, root,
602 async_extent->compressed_size, 607 async_extent->compressed_size,
603 async_extent->compressed_size, 608 async_extent->compressed_size,
604 0, alloc_hint, 609 0, alloc_hint,
605 (u64)-1, &ins, 1); 610 (u64)-1, &ins, 1);
611 btrfs_end_transaction(trans, root);
612
606 if (ret) { 613 if (ret) {
607 int i; 614 int i;
608 for (i = 0; i < async_extent->nr_pages; i++) { 615 for (i = 0; i < async_extent->nr_pages; i++) {
@@ -618,6 +625,14 @@ retry:
618 goto retry; 625 goto retry;
619 } 626 }
620 627
628 /*
629 * here we're doing allocation and writeback of the
630 * compressed pages
631 */
632 btrfs_drop_extent_cache(inode, async_extent->start,
633 async_extent->start +
634 async_extent->ram_size - 1, 0);
635
621 em = alloc_extent_map(GFP_NOFS); 636 em = alloc_extent_map(GFP_NOFS);
622 em->start = async_extent->start; 637 em->start = async_extent->start;
623 em->len = async_extent->ram_size; 638 em->len = async_extent->ram_size;
@@ -649,8 +664,6 @@ retry:
649 BTRFS_ORDERED_COMPRESSED); 664 BTRFS_ORDERED_COMPRESSED);
650 BUG_ON(ret); 665 BUG_ON(ret);
651 666
652 btrfs_end_transaction(trans, root);
653
654 /* 667 /*
655 * clear dirty, set writeback and unlock the pages. 668 * clear dirty, set writeback and unlock the pages.
656 */ 669 */
@@ -672,13 +685,11 @@ retry:
672 async_extent->nr_pages); 685 async_extent->nr_pages);
673 686
674 BUG_ON(ret); 687 BUG_ON(ret);
675 trans = btrfs_join_transaction(root, 1);
676 alloc_hint = ins.objectid + ins.offset; 688 alloc_hint = ins.objectid + ins.offset;
677 kfree(async_extent); 689 kfree(async_extent);
678 cond_resched(); 690 cond_resched();
679 } 691 }
680 692
681 btrfs_end_transaction(trans, root);
682 return 0; 693 return 0;
683} 694}
684 695
@@ -742,6 +753,7 @@ static noinline int cow_file_range(struct inode *inode,
742 EXTENT_CLEAR_DIRTY | 753 EXTENT_CLEAR_DIRTY |
743 EXTENT_SET_WRITEBACK | 754 EXTENT_SET_WRITEBACK |
744 EXTENT_END_WRITEBACK); 755 EXTENT_END_WRITEBACK);
756
745 *nr_written = *nr_written + 757 *nr_written = *nr_written +
746 (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE; 758 (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE;
747 *page_started = 1; 759 *page_started = 1;
@@ -1596,7 +1608,6 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
1596 struct inode *inode, u64 file_pos, 1608 struct inode *inode, u64 file_pos,
1597 u64 disk_bytenr, u64 disk_num_bytes, 1609 u64 disk_bytenr, u64 disk_num_bytes,
1598 u64 num_bytes, u64 ram_bytes, 1610 u64 num_bytes, u64 ram_bytes,
1599 u64 locked_end,
1600 u8 compression, u8 encryption, 1611 u8 compression, u8 encryption,
1601 u16 other_encoding, int extent_type) 1612 u16 other_encoding, int extent_type)
1602{ 1613{
@@ -1622,9 +1633,8 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
1622 * the caller is expected to unpin it and allow it to be merged 1633 * the caller is expected to unpin it and allow it to be merged
1623 * with the others. 1634 * with the others.
1624 */ 1635 */
1625 ret = btrfs_drop_extents(trans, root, inode, file_pos, 1636 ret = btrfs_drop_extents(trans, inode, file_pos, file_pos + num_bytes,
1626 file_pos + num_bytes, locked_end, 1637 &hint, 0);
1627 file_pos, &hint, 0);
1628 BUG_ON(ret); 1638 BUG_ON(ret);
1629 1639
1630 ins.objectid = inode->i_ino; 1640 ins.objectid = inode->i_ino;
@@ -1671,24 +1681,6 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
1671 * before we start the transaction. It limits the amount of btree 1681 * before we start the transaction. It limits the amount of btree
1672 * reads required while inside the transaction. 1682 * reads required while inside the transaction.
1673 */ 1683 */
1674static noinline void reada_csum(struct btrfs_root *root,
1675 struct btrfs_path *path,
1676 struct btrfs_ordered_extent *ordered_extent)
1677{
1678 struct btrfs_ordered_sum *sum;
1679 u64 bytenr;
1680
1681 sum = list_entry(ordered_extent->list.next, struct btrfs_ordered_sum,
1682 list);
1683 bytenr = sum->sums[0].bytenr;
1684
1685 /*
1686 * we don't care about the results, the point of this search is
1687 * just to get the btree leaves into ram
1688 */
1689 btrfs_lookup_csum(NULL, root->fs_info->csum_root, path, bytenr, 0);
1690}
1691
1692/* as ordered data IO finishes, this gets called so we can finish 1684/* as ordered data IO finishes, this gets called so we can finish
1693 * an ordered extent if the range of bytes in the file it covers are 1685 * an ordered extent if the range of bytes in the file it covers are
1694 * fully written. 1686 * fully written.
@@ -1699,7 +1691,6 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1699 struct btrfs_trans_handle *trans; 1691 struct btrfs_trans_handle *trans;
1700 struct btrfs_ordered_extent *ordered_extent = NULL; 1692 struct btrfs_ordered_extent *ordered_extent = NULL;
1701 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 1693 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
1702 struct btrfs_path *path;
1703 int compressed = 0; 1694 int compressed = 0;
1704 int ret; 1695 int ret;
1705 1696
@@ -1707,46 +1698,32 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1707 if (!ret) 1698 if (!ret)
1708 return 0; 1699 return 0;
1709 1700
1710 /* 1701 ordered_extent = btrfs_lookup_ordered_extent(inode, start);
1711 * before we join the transaction, try to do some of our IO. 1702 BUG_ON(!ordered_extent);
1712 * This will limit the amount of IO that we have to do with 1703
1713 * the transaction running. We're unlikely to need to do any 1704 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
1714 * IO if the file extents are new, the disk_i_size checks 1705 BUG_ON(!list_empty(&ordered_extent->list));
1715 * covers the most common case. 1706 ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
1716 */ 1707 if (!ret) {
1717 if (start < BTRFS_I(inode)->disk_i_size) { 1708 trans = btrfs_join_transaction(root, 1);
1718 path = btrfs_alloc_path(); 1709 ret = btrfs_update_inode(trans, root, inode);
1719 if (path) { 1710 BUG_ON(ret);
1720 ret = btrfs_lookup_file_extent(NULL, root, path, 1711 btrfs_end_transaction(trans, root);
1721 inode->i_ino,
1722 start, 0);
1723 ordered_extent = btrfs_lookup_ordered_extent(inode,
1724 start);
1725 if (!list_empty(&ordered_extent->list)) {
1726 btrfs_release_path(root, path);
1727 reada_csum(root, path, ordered_extent);
1728 }
1729 btrfs_free_path(path);
1730 } 1712 }
1713 goto out;
1731 } 1714 }
1732 1715
1733 trans = btrfs_join_transaction(root, 1);
1734
1735 if (!ordered_extent)
1736 ordered_extent = btrfs_lookup_ordered_extent(inode, start);
1737 BUG_ON(!ordered_extent);
1738 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags))
1739 goto nocow;
1740
1741 lock_extent(io_tree, ordered_extent->file_offset, 1716 lock_extent(io_tree, ordered_extent->file_offset,
1742 ordered_extent->file_offset + ordered_extent->len - 1, 1717 ordered_extent->file_offset + ordered_extent->len - 1,
1743 GFP_NOFS); 1718 GFP_NOFS);
1744 1719
1720 trans = btrfs_join_transaction(root, 1);
1721
1745 if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) 1722 if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags))
1746 compressed = 1; 1723 compressed = 1;
1747 if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) { 1724 if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) {
1748 BUG_ON(compressed); 1725 BUG_ON(compressed);
1749 ret = btrfs_mark_extent_written(trans, root, inode, 1726 ret = btrfs_mark_extent_written(trans, inode,
1750 ordered_extent->file_offset, 1727 ordered_extent->file_offset,
1751 ordered_extent->file_offset + 1728 ordered_extent->file_offset +
1752 ordered_extent->len); 1729 ordered_extent->len);
@@ -1758,8 +1735,6 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1758 ordered_extent->disk_len, 1735 ordered_extent->disk_len,
1759 ordered_extent->len, 1736 ordered_extent->len,
1760 ordered_extent->len, 1737 ordered_extent->len,
1761 ordered_extent->file_offset +
1762 ordered_extent->len,
1763 compressed, 0, 0, 1738 compressed, 0, 0,
1764 BTRFS_FILE_EXTENT_REG); 1739 BTRFS_FILE_EXTENT_REG);
1765 unpin_extent_cache(&BTRFS_I(inode)->extent_tree, 1740 unpin_extent_cache(&BTRFS_I(inode)->extent_tree,
@@ -1770,22 +1745,20 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1770 unlock_extent(io_tree, ordered_extent->file_offset, 1745 unlock_extent(io_tree, ordered_extent->file_offset,
1771 ordered_extent->file_offset + ordered_extent->len - 1, 1746 ordered_extent->file_offset + ordered_extent->len - 1,
1772 GFP_NOFS); 1747 GFP_NOFS);
1773nocow:
1774 add_pending_csums(trans, inode, ordered_extent->file_offset, 1748 add_pending_csums(trans, inode, ordered_extent->file_offset,
1775 &ordered_extent->list); 1749 &ordered_extent->list);
1776 1750
1777 mutex_lock(&BTRFS_I(inode)->extent_mutex); 1751 /* this also removes the ordered extent from the tree */
1778 btrfs_ordered_update_i_size(inode, ordered_extent); 1752 btrfs_ordered_update_i_size(inode, 0, ordered_extent);
1779 btrfs_update_inode(trans, root, inode); 1753 ret = btrfs_update_inode(trans, root, inode);
1780 btrfs_remove_ordered_extent(inode, ordered_extent); 1754 BUG_ON(ret);
1781 mutex_unlock(&BTRFS_I(inode)->extent_mutex); 1755 btrfs_end_transaction(trans, root);
1782 1756out:
1783 /* once for us */ 1757 /* once for us */
1784 btrfs_put_ordered_extent(ordered_extent); 1758 btrfs_put_ordered_extent(ordered_extent);
1785 /* once for the tree */ 1759 /* once for the tree */
1786 btrfs_put_ordered_extent(ordered_extent); 1760 btrfs_put_ordered_extent(ordered_extent);
1787 1761
1788 btrfs_end_transaction(trans, root);
1789 return 0; 1762 return 0;
1790} 1763}
1791 1764
@@ -2008,6 +1981,54 @@ zeroit:
2008 return -EIO; 1981 return -EIO;
2009} 1982}
2010 1983
1984struct delayed_iput {
1985 struct list_head list;
1986 struct inode *inode;
1987};
1988
1989void btrfs_add_delayed_iput(struct inode *inode)
1990{
1991 struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
1992 struct delayed_iput *delayed;
1993
1994 if (atomic_add_unless(&inode->i_count, -1, 1))
1995 return;
1996
1997 delayed = kmalloc(sizeof(*delayed), GFP_NOFS | __GFP_NOFAIL);
1998 delayed->inode = inode;
1999
2000 spin_lock(&fs_info->delayed_iput_lock);
2001 list_add_tail(&delayed->list, &fs_info->delayed_iputs);
2002 spin_unlock(&fs_info->delayed_iput_lock);
2003}
2004
2005void btrfs_run_delayed_iputs(struct btrfs_root *root)
2006{
2007 LIST_HEAD(list);
2008 struct btrfs_fs_info *fs_info = root->fs_info;
2009 struct delayed_iput *delayed;
2010 int empty;
2011
2012 spin_lock(&fs_info->delayed_iput_lock);
2013 empty = list_empty(&fs_info->delayed_iputs);
2014 spin_unlock(&fs_info->delayed_iput_lock);
2015 if (empty)
2016 return;
2017
2018 down_read(&root->fs_info->cleanup_work_sem);
2019 spin_lock(&fs_info->delayed_iput_lock);
2020 list_splice_init(&fs_info->delayed_iputs, &list);
2021 spin_unlock(&fs_info->delayed_iput_lock);
2022
2023 while (!list_empty(&list)) {
2024 delayed = list_entry(list.next, struct delayed_iput, list);
2025 list_del(&delayed->list);
2026 iput(delayed->inode);
2027 kfree(delayed);
2028 }
2029 up_read(&root->fs_info->cleanup_work_sem);
2030}
2031
2011/* 2032/*
2012 * This creates an orphan entry for the given inode in case something goes 2033 * This creates an orphan entry for the given inode in case something goes
2013 * wrong in the middle of an unlink/truncate. 2034 * wrong in the middle of an unlink/truncate.
@@ -2080,16 +2101,17 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
2080 struct inode *inode; 2101 struct inode *inode;
2081 int ret = 0, nr_unlink = 0, nr_truncate = 0; 2102 int ret = 0, nr_unlink = 0, nr_truncate = 0;
2082 2103
2083 path = btrfs_alloc_path(); 2104 if (!xchg(&root->clean_orphans, 0))
2084 if (!path)
2085 return; 2105 return;
2106
2107 path = btrfs_alloc_path();
2108 BUG_ON(!path);
2086 path->reada = -1; 2109 path->reada = -1;
2087 2110
2088 key.objectid = BTRFS_ORPHAN_OBJECTID; 2111 key.objectid = BTRFS_ORPHAN_OBJECTID;
2089 btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY); 2112 btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY);
2090 key.offset = (u64)-1; 2113 key.offset = (u64)-1;
2091 2114
2092
2093 while (1) { 2115 while (1) {
2094 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 2116 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2095 if (ret < 0) { 2117 if (ret < 0) {
@@ -2834,37 +2856,40 @@ out:
2834 * min_type is the minimum key type to truncate down to. If set to 0, this 2856 * min_type is the minimum key type to truncate down to. If set to 0, this
2835 * will kill all the items on this inode, including the INODE_ITEM_KEY. 2857 * will kill all the items on this inode, including the INODE_ITEM_KEY.
2836 */ 2858 */
2837noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, 2859int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
2838 struct btrfs_root *root, 2860 struct btrfs_root *root,
2839 struct inode *inode, 2861 struct inode *inode,
2840 u64 new_size, u32 min_type) 2862 u64 new_size, u32 min_type)
2841{ 2863{
2842 int ret;
2843 struct btrfs_path *path; 2864 struct btrfs_path *path;
2844 struct btrfs_key key;
2845 struct btrfs_key found_key;
2846 u32 found_type = (u8)-1;
2847 struct extent_buffer *leaf; 2865 struct extent_buffer *leaf;
2848 struct btrfs_file_extent_item *fi; 2866 struct btrfs_file_extent_item *fi;
2867 struct btrfs_key key;
2868 struct btrfs_key found_key;
2849 u64 extent_start = 0; 2869 u64 extent_start = 0;
2850 u64 extent_num_bytes = 0; 2870 u64 extent_num_bytes = 0;
2851 u64 extent_offset = 0; 2871 u64 extent_offset = 0;
2852 u64 item_end = 0; 2872 u64 item_end = 0;
2873 u64 mask = root->sectorsize - 1;
2874 u32 found_type = (u8)-1;
2853 int found_extent; 2875 int found_extent;
2854 int del_item; 2876 int del_item;
2855 int pending_del_nr = 0; 2877 int pending_del_nr = 0;
2856 int pending_del_slot = 0; 2878 int pending_del_slot = 0;
2857 int extent_type = -1; 2879 int extent_type = -1;
2858 int encoding; 2880 int encoding;
2859 u64 mask = root->sectorsize - 1; 2881 int ret;
2882 int err = 0;
2883
2884 BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY);
2860 2885
2861 if (root->ref_cows) 2886 if (root->ref_cows)
2862 btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0); 2887 btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0);
2888
2863 path = btrfs_alloc_path(); 2889 path = btrfs_alloc_path();
2864 BUG_ON(!path); 2890 BUG_ON(!path);
2865 path->reada = -1; 2891 path->reada = -1;
2866 2892
2867 /* FIXME, add redo link to tree so we don't leak on crash */
2868 key.objectid = inode->i_ino; 2893 key.objectid = inode->i_ino;
2869 key.offset = (u64)-1; 2894 key.offset = (u64)-1;
2870 key.type = (u8)-1; 2895 key.type = (u8)-1;
@@ -2872,17 +2897,17 @@ noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
2872search_again: 2897search_again:
2873 path->leave_spinning = 1; 2898 path->leave_spinning = 1;
2874 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 2899 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2875 if (ret < 0) 2900 if (ret < 0) {
2876 goto error; 2901 err = ret;
2902 goto out;
2903 }
2877 2904
2878 if (ret > 0) { 2905 if (ret > 0) {
2879 /* there are no items in the tree for us to truncate, we're 2906 /* there are no items in the tree for us to truncate, we're
2880 * done 2907 * done
2881 */ 2908 */
2882 if (path->slots[0] == 0) { 2909 if (path->slots[0] == 0)
2883 ret = 0; 2910 goto out;
2884 goto error;
2885 }
2886 path->slots[0]--; 2911 path->slots[0]--;
2887 } 2912 }
2888 2913
@@ -2917,28 +2942,17 @@ search_again:
2917 } 2942 }
2918 item_end--; 2943 item_end--;
2919 } 2944 }
2920 if (item_end < new_size) { 2945 if (found_type > min_type) {
2921 if (found_type == BTRFS_DIR_ITEM_KEY) 2946 del_item = 1;
2922 found_type = BTRFS_INODE_ITEM_KEY; 2947 } else {
2923 else if (found_type == BTRFS_EXTENT_ITEM_KEY) 2948 if (item_end < new_size)
2924 found_type = BTRFS_EXTENT_DATA_KEY;
2925 else if (found_type == BTRFS_EXTENT_DATA_KEY)
2926 found_type = BTRFS_XATTR_ITEM_KEY;
2927 else if (found_type == BTRFS_XATTR_ITEM_KEY)
2928 found_type = BTRFS_INODE_REF_KEY;
2929 else if (found_type)
2930 found_type--;
2931 else
2932 break; 2949 break;
2933 btrfs_set_key_type(&key, found_type); 2950 if (found_key.offset >= new_size)
2934 goto next; 2951 del_item = 1;
2952 else
2953 del_item = 0;
2935 } 2954 }
2936 if (found_key.offset >= new_size)
2937 del_item = 1;
2938 else
2939 del_item = 0;
2940 found_extent = 0; 2955 found_extent = 0;
2941
2942 /* FIXME, shrink the extent if the ref count is only 1 */ 2956 /* FIXME, shrink the extent if the ref count is only 1 */
2943 if (found_type != BTRFS_EXTENT_DATA_KEY) 2957 if (found_type != BTRFS_EXTENT_DATA_KEY)
2944 goto delete; 2958 goto delete;
@@ -3025,42 +3039,36 @@ delete:
3025 inode->i_ino, extent_offset); 3039 inode->i_ino, extent_offset);
3026 BUG_ON(ret); 3040 BUG_ON(ret);
3027 } 3041 }
3028next:
3029 if (path->slots[0] == 0) {
3030 if (pending_del_nr)
3031 goto del_pending;
3032 btrfs_release_path(root, path);
3033 if (found_type == BTRFS_INODE_ITEM_KEY)
3034 break;
3035 goto search_again;
3036 }
3037 3042
3038 path->slots[0]--; 3043 if (found_type == BTRFS_INODE_ITEM_KEY)
3039 if (pending_del_nr && 3044 break;
3040 path->slots[0] + 1 != pending_del_slot) { 3045
3041 struct btrfs_key debug; 3046 if (path->slots[0] == 0 ||
3042del_pending: 3047 path->slots[0] != pending_del_slot) {
3043 btrfs_item_key_to_cpu(path->nodes[0], &debug, 3048 if (root->ref_cows) {
3044 pending_del_slot); 3049 err = -EAGAIN;
3045 ret = btrfs_del_items(trans, root, path, 3050 goto out;
3046 pending_del_slot, 3051 }
3047 pending_del_nr); 3052 if (pending_del_nr) {
3048 BUG_ON(ret); 3053 ret = btrfs_del_items(trans, root, path,
3049 pending_del_nr = 0; 3054 pending_del_slot,
3055 pending_del_nr);
3056 BUG_ON(ret);
3057 pending_del_nr = 0;
3058 }
3050 btrfs_release_path(root, path); 3059 btrfs_release_path(root, path);
3051 if (found_type == BTRFS_INODE_ITEM_KEY)
3052 break;
3053 goto search_again; 3060 goto search_again;
3061 } else {
3062 path->slots[0]--;
3054 } 3063 }
3055 } 3064 }
3056 ret = 0; 3065out:
3057error:
3058 if (pending_del_nr) { 3066 if (pending_del_nr) {
3059 ret = btrfs_del_items(trans, root, path, pending_del_slot, 3067 ret = btrfs_del_items(trans, root, path, pending_del_slot,
3060 pending_del_nr); 3068 pending_del_nr);
3061 } 3069 }
3062 btrfs_free_path(path); 3070 btrfs_free_path(path);
3063 return ret; 3071 return err;
3064} 3072}
3065 3073
3066/* 3074/*
@@ -3180,10 +3188,6 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
3180 if (size <= hole_start) 3188 if (size <= hole_start)
3181 return 0; 3189 return 0;
3182 3190
3183 err = btrfs_truncate_page(inode->i_mapping, inode->i_size);
3184 if (err)
3185 return err;
3186
3187 while (1) { 3191 while (1) {
3188 struct btrfs_ordered_extent *ordered; 3192 struct btrfs_ordered_extent *ordered;
3189 btrfs_wait_ordered_range(inode, hole_start, 3193 btrfs_wait_ordered_range(inode, hole_start,
@@ -3196,9 +3200,6 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
3196 btrfs_put_ordered_extent(ordered); 3200 btrfs_put_ordered_extent(ordered);
3197 } 3201 }
3198 3202
3199 trans = btrfs_start_transaction(root, 1);
3200 btrfs_set_trans_block_group(trans, inode);
3201
3202 cur_offset = hole_start; 3203 cur_offset = hole_start;
3203 while (1) { 3204 while (1) {
3204 em = btrfs_get_extent(inode, NULL, 0, cur_offset, 3205 em = btrfs_get_extent(inode, NULL, 0, cur_offset,
@@ -3206,40 +3207,120 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
3206 BUG_ON(IS_ERR(em) || !em); 3207 BUG_ON(IS_ERR(em) || !em);
3207 last_byte = min(extent_map_end(em), block_end); 3208 last_byte = min(extent_map_end(em), block_end);
3208 last_byte = (last_byte + mask) & ~mask; 3209 last_byte = (last_byte + mask) & ~mask;
3209 if (test_bit(EXTENT_FLAG_VACANCY, &em->flags)) { 3210 if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
3210 u64 hint_byte = 0; 3211 u64 hint_byte = 0;
3211 hole_size = last_byte - cur_offset; 3212 hole_size = last_byte - cur_offset;
3212 err = btrfs_drop_extents(trans, root, inode,
3213 cur_offset,
3214 cur_offset + hole_size,
3215 block_end,
3216 cur_offset, &hint_byte, 1);
3217 if (err)
3218 break;
3219 3213
3220 err = btrfs_reserve_metadata_space(root, 1); 3214 err = btrfs_reserve_metadata_space(root, 2);
3221 if (err) 3215 if (err)
3222 break; 3216 break;
3223 3217
3218 trans = btrfs_start_transaction(root, 1);
3219 btrfs_set_trans_block_group(trans, inode);
3220
3221 err = btrfs_drop_extents(trans, inode, cur_offset,
3222 cur_offset + hole_size,
3223 &hint_byte, 1);
3224 BUG_ON(err);
3225
3224 err = btrfs_insert_file_extent(trans, root, 3226 err = btrfs_insert_file_extent(trans, root,
3225 inode->i_ino, cur_offset, 0, 3227 inode->i_ino, cur_offset, 0,
3226 0, hole_size, 0, hole_size, 3228 0, hole_size, 0, hole_size,
3227 0, 0, 0); 3229 0, 0, 0);
3230 BUG_ON(err);
3231
3228 btrfs_drop_extent_cache(inode, hole_start, 3232 btrfs_drop_extent_cache(inode, hole_start,
3229 last_byte - 1, 0); 3233 last_byte - 1, 0);
3230 btrfs_unreserve_metadata_space(root, 1); 3234
3235 btrfs_end_transaction(trans, root);
3236 btrfs_unreserve_metadata_space(root, 2);
3231 } 3237 }
3232 free_extent_map(em); 3238 free_extent_map(em);
3233 cur_offset = last_byte; 3239 cur_offset = last_byte;
3234 if (err || cur_offset >= block_end) 3240 if (cur_offset >= block_end)
3235 break; 3241 break;
3236 } 3242 }
3237 3243
3238 btrfs_end_transaction(trans, root);
3239 unlock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS); 3244 unlock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS);
3240 return err; 3245 return err;
3241} 3246}
3242 3247
3248static int btrfs_setattr_size(struct inode *inode, struct iattr *attr)
3249{
3250 struct btrfs_root *root = BTRFS_I(inode)->root;
3251 struct btrfs_trans_handle *trans;
3252 unsigned long nr;
3253 int ret;
3254
3255 if (attr->ia_size == inode->i_size)
3256 return 0;
3257
3258 if (attr->ia_size > inode->i_size) {
3259 unsigned long limit;
3260 limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
3261 if (attr->ia_size > inode->i_sb->s_maxbytes)
3262 return -EFBIG;
3263 if (limit != RLIM_INFINITY && attr->ia_size > limit) {
3264 send_sig(SIGXFSZ, current, 0);
3265 return -EFBIG;
3266 }
3267 }
3268
3269 ret = btrfs_reserve_metadata_space(root, 1);
3270 if (ret)
3271 return ret;
3272
3273 trans = btrfs_start_transaction(root, 1);
3274 btrfs_set_trans_block_group(trans, inode);
3275
3276 ret = btrfs_orphan_add(trans, inode);
3277 BUG_ON(ret);
3278
3279 nr = trans->blocks_used;
3280 btrfs_end_transaction(trans, root);
3281 btrfs_unreserve_metadata_space(root, 1);
3282 btrfs_btree_balance_dirty(root, nr);
3283
3284 if (attr->ia_size > inode->i_size) {
3285 ret = btrfs_cont_expand(inode, attr->ia_size);
3286 if (ret) {
3287 btrfs_truncate(inode);
3288 return ret;
3289 }
3290
3291 i_size_write(inode, attr->ia_size);
3292 btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
3293
3294 trans = btrfs_start_transaction(root, 1);
3295 btrfs_set_trans_block_group(trans, inode);
3296
3297 ret = btrfs_update_inode(trans, root, inode);
3298 BUG_ON(ret);
3299 if (inode->i_nlink > 0) {
3300 ret = btrfs_orphan_del(trans, inode);
3301 BUG_ON(ret);
3302 }
3303 nr = trans->blocks_used;
3304 btrfs_end_transaction(trans, root);
3305 btrfs_btree_balance_dirty(root, nr);
3306 return 0;
3307 }
3308
3309 /*
3310 * We're truncating a file that used to have good data down to
3311 * zero. Make sure it gets into the ordered flush list so that
3312 * any new writes get down to disk quickly.
3313 */
3314 if (attr->ia_size == 0)
3315 BTRFS_I(inode)->ordered_data_close = 1;
3316
3317 /* we don't support swapfiles, so vmtruncate shouldn't fail */
3318 ret = vmtruncate(inode, attr->ia_size);
3319 BUG_ON(ret);
3320
3321 return 0;
3322}
3323
3243static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) 3324static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
3244{ 3325{
3245 struct inode *inode = dentry->d_inode; 3326 struct inode *inode = dentry->d_inode;
@@ -3250,23 +3331,14 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
3250 return err; 3331 return err;
3251 3332
3252 if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) { 3333 if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
3253 if (attr->ia_size > inode->i_size) { 3334 err = btrfs_setattr_size(inode, attr);
3254 err = btrfs_cont_expand(inode, attr->ia_size); 3335 if (err)
3255 if (err) 3336 return err;
3256 return err;
3257 } else if (inode->i_size > 0 &&
3258 attr->ia_size == 0) {
3259
3260 /* we're truncating a file that used to have good
3261 * data down to zero. Make sure it gets into
3262 * the ordered flush list so that any new writes
3263 * get down to disk quickly.
3264 */
3265 BTRFS_I(inode)->ordered_data_close = 1;
3266 }
3267 } 3337 }
3338 attr->ia_valid &= ~ATTR_SIZE;
3268 3339
3269 err = inode_setattr(inode, attr); 3340 if (attr->ia_valid)
3341 err = inode_setattr(inode, attr);
3270 3342
3271 if (!err && ((attr->ia_valid & ATTR_MODE))) 3343 if (!err && ((attr->ia_valid & ATTR_MODE)))
3272 err = btrfs_acl_chmod(inode); 3344 err = btrfs_acl_chmod(inode);
@@ -3287,36 +3359,43 @@ void btrfs_delete_inode(struct inode *inode)
3287 } 3359 }
3288 btrfs_wait_ordered_range(inode, 0, (u64)-1); 3360 btrfs_wait_ordered_range(inode, 0, (u64)-1);
3289 3361
3362 if (root->fs_info->log_root_recovering) {
3363 BUG_ON(!list_empty(&BTRFS_I(inode)->i_orphan));
3364 goto no_delete;
3365 }
3366
3290 if (inode->i_nlink > 0) { 3367 if (inode->i_nlink > 0) {
3291 BUG_ON(btrfs_root_refs(&root->root_item) != 0); 3368 BUG_ON(btrfs_root_refs(&root->root_item) != 0);
3292 goto no_delete; 3369 goto no_delete;
3293 } 3370 }
3294 3371
3295 btrfs_i_size_write(inode, 0); 3372 btrfs_i_size_write(inode, 0);
3296 trans = btrfs_join_transaction(root, 1);
3297 3373
3298 btrfs_set_trans_block_group(trans, inode); 3374 while (1) {
3299 ret = btrfs_truncate_inode_items(trans, root, inode, inode->i_size, 0); 3375 trans = btrfs_start_transaction(root, 1);
3300 if (ret) { 3376 btrfs_set_trans_block_group(trans, inode);
3301 btrfs_orphan_del(NULL, inode); 3377 ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0);
3302 goto no_delete_lock;
3303 }
3304 3378
3305 btrfs_orphan_del(trans, inode); 3379 if (ret != -EAGAIN)
3380 break;
3306 3381
3307 nr = trans->blocks_used; 3382 nr = trans->blocks_used;
3308 clear_inode(inode); 3383 btrfs_end_transaction(trans, root);
3384 trans = NULL;
3385 btrfs_btree_balance_dirty(root, nr);
3386 }
3309 3387
3310 btrfs_end_transaction(trans, root); 3388 if (ret == 0) {
3311 btrfs_btree_balance_dirty(root, nr); 3389 ret = btrfs_orphan_del(trans, inode);
3312 return; 3390 BUG_ON(ret);
3391 }
3313 3392
3314no_delete_lock:
3315 nr = trans->blocks_used; 3393 nr = trans->blocks_used;
3316 btrfs_end_transaction(trans, root); 3394 btrfs_end_transaction(trans, root);
3317 btrfs_btree_balance_dirty(root, nr); 3395 btrfs_btree_balance_dirty(root, nr);
3318no_delete: 3396no_delete:
3319 clear_inode(inode); 3397 clear_inode(inode);
3398 return;
3320} 3399}
3321 3400
3322/* 3401/*
@@ -3569,7 +3648,6 @@ static noinline void init_btrfs_i(struct inode *inode)
3569 INIT_LIST_HEAD(&BTRFS_I(inode)->ordered_operations); 3648 INIT_LIST_HEAD(&BTRFS_I(inode)->ordered_operations);
3570 RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node); 3649 RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node);
3571 btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree); 3650 btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree);
3572 mutex_init(&BTRFS_I(inode)->extent_mutex);
3573 mutex_init(&BTRFS_I(inode)->log_mutex); 3651 mutex_init(&BTRFS_I(inode)->log_mutex);
3574} 3652}
3575 3653
@@ -3695,6 +3773,13 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
3695 } 3773 }
3696 srcu_read_unlock(&root->fs_info->subvol_srcu, index); 3774 srcu_read_unlock(&root->fs_info->subvol_srcu, index);
3697 3775
3776 if (root != sub_root) {
3777 down_read(&root->fs_info->cleanup_work_sem);
3778 if (!(inode->i_sb->s_flags & MS_RDONLY))
3779 btrfs_orphan_cleanup(sub_root);
3780 up_read(&root->fs_info->cleanup_work_sem);
3781 }
3782
3698 return inode; 3783 return inode;
3699} 3784}
3700 3785
@@ -3869,7 +3954,11 @@ skip:
3869 3954
3870 /* Reached end of directory/root. Bump pos past the last item. */ 3955 /* Reached end of directory/root. Bump pos past the last item. */
3871 if (key_type == BTRFS_DIR_INDEX_KEY) 3956 if (key_type == BTRFS_DIR_INDEX_KEY)
3872 filp->f_pos = INT_LIMIT(off_t); 3957 /*
3958 * 32-bit glibc will use getdents64, but then strtol -
3959 * so the last number we can serve is this.
3960 */
3961 filp->f_pos = 0x7fffffff;
3873 else 3962 else
3874 filp->f_pos++; 3963 filp->f_pos++;
3875nopos: 3964nopos:
@@ -4219,7 +4308,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
4219 if (IS_ERR(inode)) 4308 if (IS_ERR(inode))
4220 goto out_unlock; 4309 goto out_unlock;
4221 4310
4222 err = btrfs_init_inode_security(inode, dir); 4311 err = btrfs_init_inode_security(trans, inode, dir);
4223 if (err) { 4312 if (err) {
4224 drop_inode = 1; 4313 drop_inode = 1;
4225 goto out_unlock; 4314 goto out_unlock;
@@ -4290,7 +4379,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
4290 if (IS_ERR(inode)) 4379 if (IS_ERR(inode))
4291 goto out_unlock; 4380 goto out_unlock;
4292 4381
4293 err = btrfs_init_inode_security(inode, dir); 4382 err = btrfs_init_inode_security(trans, inode, dir);
4294 if (err) { 4383 if (err) {
4295 drop_inode = 1; 4384 drop_inode = 1;
4296 goto out_unlock; 4385 goto out_unlock;
@@ -4336,6 +4425,10 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
4336 if (inode->i_nlink == 0) 4425 if (inode->i_nlink == 0)
4337 return -ENOENT; 4426 return -ENOENT;
4338 4427
4428 /* do not allow sys_link's with other subvols of the same device */
4429 if (root->objectid != BTRFS_I(inode)->root->objectid)
4430 return -EPERM;
4431
4339 /* 4432 /*
4340 * 1 item for inode ref 4433 * 1 item for inode ref
4341 * 2 items for dir items 4434 * 2 items for dir items
@@ -4423,7 +4516,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
4423 4516
4424 drop_on_err = 1; 4517 drop_on_err = 1;
4425 4518
4426 err = btrfs_init_inode_security(inode, dir); 4519 err = btrfs_init_inode_security(trans, inode, dir);
4427 if (err) 4520 if (err)
4428 goto out_fail; 4521 goto out_fail;
4429 4522
@@ -5074,17 +5167,20 @@ static void btrfs_truncate(struct inode *inode)
5074 unsigned long nr; 5167 unsigned long nr;
5075 u64 mask = root->sectorsize - 1; 5168 u64 mask = root->sectorsize - 1;
5076 5169
5077 if (!S_ISREG(inode->i_mode)) 5170 if (!S_ISREG(inode->i_mode)) {
5078 return; 5171 WARN_ON(1);
5079 if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
5080 return; 5172 return;
5173 }
5081 5174
5082 ret = btrfs_truncate_page(inode->i_mapping, inode->i_size); 5175 ret = btrfs_truncate_page(inode->i_mapping, inode->i_size);
5083 if (ret) 5176 if (ret)
5084 return; 5177 return;
5178
5085 btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); 5179 btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1);
5180 btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
5086 5181
5087 trans = btrfs_start_transaction(root, 1); 5182 trans = btrfs_start_transaction(root, 1);
5183 btrfs_set_trans_block_group(trans, inode);
5088 5184
5089 /* 5185 /*
5090 * setattr is responsible for setting the ordered_data_close flag, 5186 * setattr is responsible for setting the ordered_data_close flag,
@@ -5106,21 +5202,32 @@ static void btrfs_truncate(struct inode *inode)
5106 if (inode->i_size == 0 && BTRFS_I(inode)->ordered_data_close) 5202 if (inode->i_size == 0 && BTRFS_I(inode)->ordered_data_close)
5107 btrfs_add_ordered_operation(trans, root, inode); 5203 btrfs_add_ordered_operation(trans, root, inode);
5108 5204
5109 btrfs_set_trans_block_group(trans, inode); 5205 while (1) {
5110 btrfs_i_size_write(inode, inode->i_size); 5206 ret = btrfs_truncate_inode_items(trans, root, inode,
5207 inode->i_size,
5208 BTRFS_EXTENT_DATA_KEY);
5209 if (ret != -EAGAIN)
5210 break;
5111 5211
5112 ret = btrfs_orphan_add(trans, inode); 5212 ret = btrfs_update_inode(trans, root, inode);
5113 if (ret) 5213 BUG_ON(ret);
5114 goto out;
5115 /* FIXME, add redo link to tree so we don't leak on crash */
5116 ret = btrfs_truncate_inode_items(trans, root, inode, inode->i_size,
5117 BTRFS_EXTENT_DATA_KEY);
5118 btrfs_update_inode(trans, root, inode);
5119 5214
5120 ret = btrfs_orphan_del(trans, inode); 5215 nr = trans->blocks_used;
5216 btrfs_end_transaction(trans, root);
5217 btrfs_btree_balance_dirty(root, nr);
5218
5219 trans = btrfs_start_transaction(root, 1);
5220 btrfs_set_trans_block_group(trans, inode);
5221 }
5222
5223 if (ret == 0 && inode->i_nlink > 0) {
5224 ret = btrfs_orphan_del(trans, inode);
5225 BUG_ON(ret);
5226 }
5227
5228 ret = btrfs_update_inode(trans, root, inode);
5121 BUG_ON(ret); 5229 BUG_ON(ret);
5122 5230
5123out:
5124 nr = trans->blocks_used; 5231 nr = trans->blocks_used;
5125 ret = btrfs_end_transaction_throttle(trans, root); 5232 ret = btrfs_end_transaction_throttle(trans, root);
5126 BUG_ON(ret); 5233 BUG_ON(ret);
@@ -5217,9 +5324,9 @@ void btrfs_destroy_inode(struct inode *inode)
5217 5324
5218 spin_lock(&root->list_lock); 5325 spin_lock(&root->list_lock);
5219 if (!list_empty(&BTRFS_I(inode)->i_orphan)) { 5326 if (!list_empty(&BTRFS_I(inode)->i_orphan)) {
5220 printk(KERN_ERR "BTRFS: inode %lu: inode still on the orphan" 5327 printk(KERN_INFO "BTRFS: inode %lu still on the orphan list\n",
5221 " list\n", inode->i_ino); 5328 inode->i_ino);
5222 dump_stack(); 5329 list_del_init(&BTRFS_I(inode)->i_orphan);
5223 } 5330 }
5224 spin_unlock(&root->list_lock); 5331 spin_unlock(&root->list_lock);
5225 5332
@@ -5476,7 +5583,7 @@ out_fail:
5476 * some fairly slow code that needs optimization. This walks the list 5583 * some fairly slow code that needs optimization. This walks the list
5477 * of all the inodes with pending delalloc and forces them to disk. 5584 * of all the inodes with pending delalloc and forces them to disk.
5478 */ 5585 */
5479int btrfs_start_delalloc_inodes(struct btrfs_root *root) 5586int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
5480{ 5587{
5481 struct list_head *head = &root->fs_info->delalloc_inodes; 5588 struct list_head *head = &root->fs_info->delalloc_inodes;
5482 struct btrfs_inode *binode; 5589 struct btrfs_inode *binode;
@@ -5495,7 +5602,10 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root)
5495 spin_unlock(&root->fs_info->delalloc_lock); 5602 spin_unlock(&root->fs_info->delalloc_lock);
5496 if (inode) { 5603 if (inode) {
5497 filemap_flush(inode->i_mapping); 5604 filemap_flush(inode->i_mapping);
5498 iput(inode); 5605 if (delay_iput)
5606 btrfs_add_delayed_iput(inode);
5607 else
5608 iput(inode);
5499 } 5609 }
5500 cond_resched(); 5610 cond_resched();
5501 spin_lock(&root->fs_info->delalloc_lock); 5611 spin_lock(&root->fs_info->delalloc_lock);
@@ -5569,7 +5679,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
5569 if (IS_ERR(inode)) 5679 if (IS_ERR(inode))
5570 goto out_unlock; 5680 goto out_unlock;
5571 5681
5572 err = btrfs_init_inode_security(inode, dir); 5682 err = btrfs_init_inode_security(trans, inode, dir);
5573 if (err) { 5683 if (err) {
5574 drop_inode = 1; 5684 drop_inode = 1;
5575 goto out_unlock; 5685 goto out_unlock;
@@ -5641,57 +5751,77 @@ out_fail:
5641 return err; 5751 return err;
5642} 5752}
5643 5753
5644static int prealloc_file_range(struct btrfs_trans_handle *trans, 5754static int prealloc_file_range(struct inode *inode, u64 start, u64 end,
5645 struct inode *inode, u64 start, u64 end, 5755 u64 alloc_hint, int mode, loff_t actual_len)
5646 u64 locked_end, u64 alloc_hint, int mode)
5647{ 5756{
5757 struct btrfs_trans_handle *trans;
5648 struct btrfs_root *root = BTRFS_I(inode)->root; 5758 struct btrfs_root *root = BTRFS_I(inode)->root;
5649 struct btrfs_key ins; 5759 struct btrfs_key ins;
5650 u64 alloc_size; 5760 u64 alloc_size;
5651 u64 cur_offset = start; 5761 u64 cur_offset = start;
5652 u64 num_bytes = end - start; 5762 u64 num_bytes = end - start;
5653 int ret = 0; 5763 int ret = 0;
5764 u64 i_size;
5654 5765
5655 while (num_bytes > 0) { 5766 while (num_bytes > 0) {
5656 alloc_size = min(num_bytes, root->fs_info->max_extent); 5767 alloc_size = min(num_bytes, root->fs_info->max_extent);
5657 5768
5658 ret = btrfs_reserve_metadata_space(root, 1); 5769 trans = btrfs_start_transaction(root, 1);
5659 if (ret)
5660 goto out;
5661 5770
5662 ret = btrfs_reserve_extent(trans, root, alloc_size, 5771 ret = btrfs_reserve_extent(trans, root, alloc_size,
5663 root->sectorsize, 0, alloc_hint, 5772 root->sectorsize, 0, alloc_hint,
5664 (u64)-1, &ins, 1); 5773 (u64)-1, &ins, 1);
5665 if (ret) { 5774 if (ret) {
5666 WARN_ON(1); 5775 WARN_ON(1);
5667 goto out; 5776 goto stop_trans;
5668 } 5777 }
5778
5779 ret = btrfs_reserve_metadata_space(root, 3);
5780 if (ret) {
5781 btrfs_free_reserved_extent(root, ins.objectid,
5782 ins.offset);
5783 goto stop_trans;
5784 }
5785
5669 ret = insert_reserved_file_extent(trans, inode, 5786 ret = insert_reserved_file_extent(trans, inode,
5670 cur_offset, ins.objectid, 5787 cur_offset, ins.objectid,
5671 ins.offset, ins.offset, 5788 ins.offset, ins.offset,
5672 ins.offset, locked_end, 5789 ins.offset, 0, 0, 0,
5673 0, 0, 0,
5674 BTRFS_FILE_EXTENT_PREALLOC); 5790 BTRFS_FILE_EXTENT_PREALLOC);
5675 BUG_ON(ret); 5791 BUG_ON(ret);
5676 btrfs_drop_extent_cache(inode, cur_offset, 5792 btrfs_drop_extent_cache(inode, cur_offset,
5677 cur_offset + ins.offset -1, 0); 5793 cur_offset + ins.offset -1, 0);
5794
5678 num_bytes -= ins.offset; 5795 num_bytes -= ins.offset;
5679 cur_offset += ins.offset; 5796 cur_offset += ins.offset;
5680 alloc_hint = ins.objectid + ins.offset; 5797 alloc_hint = ins.objectid + ins.offset;
5681 btrfs_unreserve_metadata_space(root, 1); 5798
5682 }
5683out:
5684 if (cur_offset > start) {
5685 inode->i_ctime = CURRENT_TIME; 5799 inode->i_ctime = CURRENT_TIME;
5686 BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC; 5800 BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC;
5687 if (!(mode & FALLOC_FL_KEEP_SIZE) && 5801 if (!(mode & FALLOC_FL_KEEP_SIZE) &&
5688 cur_offset > i_size_read(inode)) 5802 (actual_len > inode->i_size) &&
5689 btrfs_i_size_write(inode, cur_offset); 5803 (cur_offset > inode->i_size)) {
5804
5805 if (cur_offset > actual_len)
5806 i_size = actual_len;
5807 else
5808 i_size = cur_offset;
5809 i_size_write(inode, i_size);
5810 btrfs_ordered_update_i_size(inode, i_size, NULL);
5811 }
5812
5690 ret = btrfs_update_inode(trans, root, inode); 5813 ret = btrfs_update_inode(trans, root, inode);
5691 BUG_ON(ret); 5814 BUG_ON(ret);
5815
5816 btrfs_end_transaction(trans, root);
5817 btrfs_unreserve_metadata_space(root, 3);
5692 } 5818 }
5819 return ret;
5693 5820
5821stop_trans:
5822 btrfs_end_transaction(trans, root);
5694 return ret; 5823 return ret;
5824
5695} 5825}
5696 5826
5697static long btrfs_fallocate(struct inode *inode, int mode, 5827static long btrfs_fallocate(struct inode *inode, int mode,
@@ -5705,8 +5835,6 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5705 u64 locked_end; 5835 u64 locked_end;
5706 u64 mask = BTRFS_I(inode)->root->sectorsize - 1; 5836 u64 mask = BTRFS_I(inode)->root->sectorsize - 1;
5707 struct extent_map *em; 5837 struct extent_map *em;
5708 struct btrfs_trans_handle *trans;
5709 struct btrfs_root *root;
5710 int ret; 5838 int ret;
5711 5839
5712 alloc_start = offset & ~mask; 5840 alloc_start = offset & ~mask;
@@ -5725,9 +5853,7 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5725 goto out; 5853 goto out;
5726 } 5854 }
5727 5855
5728 root = BTRFS_I(inode)->root; 5856 ret = btrfs_check_data_free_space(BTRFS_I(inode)->root, inode,
5729
5730 ret = btrfs_check_data_free_space(root, inode,
5731 alloc_end - alloc_start); 5857 alloc_end - alloc_start);
5732 if (ret) 5858 if (ret)
5733 goto out; 5859 goto out;
@@ -5736,12 +5862,6 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5736 while (1) { 5862 while (1) {
5737 struct btrfs_ordered_extent *ordered; 5863 struct btrfs_ordered_extent *ordered;
5738 5864
5739 trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1);
5740 if (!trans) {
5741 ret = -EIO;
5742 goto out_free;
5743 }
5744
5745 /* the extent lock is ordered inside the running 5865 /* the extent lock is ordered inside the running
5746 * transaction 5866 * transaction
5747 */ 5867 */
@@ -5755,8 +5875,6 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5755 btrfs_put_ordered_extent(ordered); 5875 btrfs_put_ordered_extent(ordered);
5756 unlock_extent(&BTRFS_I(inode)->io_tree, 5876 unlock_extent(&BTRFS_I(inode)->io_tree,
5757 alloc_start, locked_end, GFP_NOFS); 5877 alloc_start, locked_end, GFP_NOFS);
5758 btrfs_end_transaction(trans, BTRFS_I(inode)->root);
5759
5760 /* 5878 /*
5761 * we can't wait on the range with the transaction 5879 * we can't wait on the range with the transaction
5762 * running or with the extent lock held 5880 * running or with the extent lock held
@@ -5777,10 +5895,12 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5777 BUG_ON(IS_ERR(em) || !em); 5895 BUG_ON(IS_ERR(em) || !em);
5778 last_byte = min(extent_map_end(em), alloc_end); 5896 last_byte = min(extent_map_end(em), alloc_end);
5779 last_byte = (last_byte + mask) & ~mask; 5897 last_byte = (last_byte + mask) & ~mask;
5780 if (em->block_start == EXTENT_MAP_HOLE) { 5898 if (em->block_start == EXTENT_MAP_HOLE ||
5781 ret = prealloc_file_range(trans, inode, cur_offset, 5899 (cur_offset >= inode->i_size &&
5782 last_byte, locked_end + 1, 5900 !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
5783 alloc_hint, mode); 5901 ret = prealloc_file_range(inode,
5902 cur_offset, last_byte,
5903 alloc_hint, mode, offset+len);
5784 if (ret < 0) { 5904 if (ret < 0) {
5785 free_extent_map(em); 5905 free_extent_map(em);
5786 break; 5906 break;
@@ -5799,9 +5919,8 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5799 unlock_extent(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, 5919 unlock_extent(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
5800 GFP_NOFS); 5920 GFP_NOFS);
5801 5921
5802 btrfs_end_transaction(trans, BTRFS_I(inode)->root); 5922 btrfs_free_reserved_data_space(BTRFS_I(inode)->root, inode,
5803out_free: 5923 alloc_end - alloc_start);
5804 btrfs_free_reserved_data_space(root, inode, alloc_end - alloc_start);
5805out: 5924out:
5806 mutex_unlock(&inode->i_mutex); 5925 mutex_unlock(&inode->i_mutex);
5807 return ret; 5926 return ret;