aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/inode.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-03-02 19:41:54 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2013-03-02 19:41:54 -0500
commitb695188dd39162a1a6bff11fdbcc4c0b65b933ab (patch)
treea3df7c052d38b5bfaf335fbf3130abcc5c6ca577 /fs/btrfs/inode.c
parent48476df99894492a0f7239f2f3c9a2dde4ff38e2 (diff)
parent180e001cd5fc2950dc6a7997dde5b65c954d0e79 (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
Pull btrfs update from Chris Mason: "The biggest feature in the pull is the new (and still experimental) raid56 code that David Woodhouse started long ago. I'm still working on the parity logging setup that will avoid inconsistent parity after a crash, so this is only for testing right now. But, I'd really like to get it out to a broader audience to hammer out any performance issues or other problems. scrub does not yet correct errors on raid5/6 either. Josef has another pass at fsync performance. The big change here is to combine waiting for metadata with waiting for data, which is a big latency win. It is also step one toward using atomics from the hardware during a commit. Mark Fasheh has a new way to use btrfs send/receive to send only the metadata changes. SUSE is using this to make snapper more efficient at finding changes between snapshosts. Snapshot-aware defrag is also included. Otherwise we have a large number of fixes and cleanups. Eric Sandeen wins the award for removing the most lines, and I'm hoping we steal this idea from XFS over and over again." * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (118 commits) btrfs: fixup/remove module.h usage as required Btrfs: delete inline extents when we find them during logging btrfs: try harder to allocate raid56 stripe cache Btrfs: cleanup to make the function btrfs_delalloc_reserve_metadata more logic Btrfs: don't call btrfs_qgroup_free if just btrfs_qgroup_reserve fails Btrfs: remove reduplicate check about root in the function btrfs_clean_quota_tree Btrfs: return ENOMEM rather than use BUG_ON when btrfs_alloc_path fails Btrfs: fix missing deleted items in btrfs_clean_quota_tree btrfs: use only inline_pages from extent buffer Btrfs: fix wrong reserved space when deleting a snapshot/subvolume Btrfs: fix wrong reserved space in qgroup during snap/subv creation Btrfs: remove unnecessary dget_parent/dput when creating the pending snapshot btrfs: remove a printk from scan_one_device Btrfs: fix NULL pointer after aborting a transaction Btrfs: fix memory leak of log roots Btrfs: copy everything if we've created an inline extent btrfs: cleanup for open-coded alignment Btrfs: do not change inode flags in rename Btrfs: use reserved space for creating a snapshot clear chunk_alloc flag on retryable failure ...
Diffstat (limited to 'fs/btrfs/inode.c')
-rw-r--r--fs/btrfs/inode.c1064
1 files changed, 890 insertions, 174 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 55c07b650378..c226daefd65d 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -39,12 +39,13 @@
39#include <linux/slab.h> 39#include <linux/slab.h>
40#include <linux/ratelimit.h> 40#include <linux/ratelimit.h>
41#include <linux/mount.h> 41#include <linux/mount.h>
42#include <linux/btrfs.h>
43#include <linux/blkdev.h>
42#include "compat.h" 44#include "compat.h"
43#include "ctree.h" 45#include "ctree.h"
44#include "disk-io.h" 46#include "disk-io.h"
45#include "transaction.h" 47#include "transaction.h"
46#include "btrfs_inode.h" 48#include "btrfs_inode.h"
47#include "ioctl.h"
48#include "print-tree.h" 49#include "print-tree.h"
49#include "ordered-data.h" 50#include "ordered-data.h"
50#include "xattr.h" 51#include "xattr.h"
@@ -54,6 +55,7 @@
54#include "locking.h" 55#include "locking.h"
55#include "free-space-cache.h" 56#include "free-space-cache.h"
56#include "inode-map.h" 57#include "inode-map.h"
58#include "backref.h"
57 59
58struct btrfs_iget_args { 60struct btrfs_iget_args {
59 u64 ino; 61 u64 ino;
@@ -231,8 +233,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
231 u64 isize = i_size_read(inode); 233 u64 isize = i_size_read(inode);
232 u64 actual_end = min(end + 1, isize); 234 u64 actual_end = min(end + 1, isize);
233 u64 inline_len = actual_end - start; 235 u64 inline_len = actual_end - start;
234 u64 aligned_end = (end + root->sectorsize - 1) & 236 u64 aligned_end = ALIGN(end, root->sectorsize);
235 ~((u64)root->sectorsize - 1);
236 u64 data_len = inline_len; 237 u64 data_len = inline_len;
237 int ret; 238 int ret;
238 239
@@ -265,6 +266,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
265 return 1; 266 return 1;
266 } 267 }
267 268
269 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags);
268 btrfs_delalloc_release_metadata(inode, end + 1 - start); 270 btrfs_delalloc_release_metadata(inode, end + 1 - start);
269 btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0); 271 btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0);
270 return 0; 272 return 0;
@@ -389,7 +391,7 @@ again:
389 * a compressed extent to 128k. 391 * a compressed extent to 128k.
390 */ 392 */
391 total_compressed = min(total_compressed, max_uncompressed); 393 total_compressed = min(total_compressed, max_uncompressed);
392 num_bytes = (end - start + blocksize) & ~(blocksize - 1); 394 num_bytes = ALIGN(end - start + 1, blocksize);
393 num_bytes = max(blocksize, num_bytes); 395 num_bytes = max(blocksize, num_bytes);
394 total_in = 0; 396 total_in = 0;
395 ret = 0; 397 ret = 0;
@@ -488,15 +490,13 @@ cont:
488 * up to a block size boundary so the allocator does sane 490 * up to a block size boundary so the allocator does sane
489 * things 491 * things
490 */ 492 */
491 total_compressed = (total_compressed + blocksize - 1) & 493 total_compressed = ALIGN(total_compressed, blocksize);
492 ~(blocksize - 1);
493 494
494 /* 495 /*
495 * one last check to make sure the compression is really a 496 * one last check to make sure the compression is really a
496 * win, compare the page count read with the blocks on disk 497 * win, compare the page count read with the blocks on disk
497 */ 498 */
498 total_in = (total_in + PAGE_CACHE_SIZE - 1) & 499 total_in = ALIGN(total_in, PAGE_CACHE_SIZE);
499 ~(PAGE_CACHE_SIZE - 1);
500 if (total_compressed >= total_in) { 500 if (total_compressed >= total_in) {
501 will_compress = 0; 501 will_compress = 0;
502 } else { 502 } else {
@@ -608,7 +608,7 @@ static noinline int submit_compressed_extents(struct inode *inode,
608 if (list_empty(&async_cow->extents)) 608 if (list_empty(&async_cow->extents))
609 return 0; 609 return 0;
610 610
611 611again:
612 while (!list_empty(&async_cow->extents)) { 612 while (!list_empty(&async_cow->extents)) {
613 async_extent = list_entry(async_cow->extents.next, 613 async_extent = list_entry(async_cow->extents.next,
614 struct async_extent, list); 614 struct async_extent, list);
@@ -648,6 +648,8 @@ retry:
648 async_extent->ram_size - 1, 648 async_extent->ram_size - 1,
649 btrfs_get_extent, 649 btrfs_get_extent,
650 WB_SYNC_ALL); 650 WB_SYNC_ALL);
651 else if (ret)
652 unlock_page(async_cow->locked_page);
651 kfree(async_extent); 653 kfree(async_extent);
652 cond_resched(); 654 cond_resched();
653 continue; 655 continue;
@@ -672,6 +674,7 @@ retry:
672 674
673 if (ret) { 675 if (ret) {
674 int i; 676 int i;
677
675 for (i = 0; i < async_extent->nr_pages; i++) { 678 for (i = 0; i < async_extent->nr_pages; i++) {
676 WARN_ON(async_extent->pages[i]->mapping); 679 WARN_ON(async_extent->pages[i]->mapping);
677 page_cache_release(async_extent->pages[i]); 680 page_cache_release(async_extent->pages[i]);
@@ -679,12 +682,10 @@ retry:
679 kfree(async_extent->pages); 682 kfree(async_extent->pages);
680 async_extent->nr_pages = 0; 683 async_extent->nr_pages = 0;
681 async_extent->pages = NULL; 684 async_extent->pages = NULL;
682 unlock_extent(io_tree, async_extent->start, 685
683 async_extent->start +
684 async_extent->ram_size - 1);
685 if (ret == -ENOSPC) 686 if (ret == -ENOSPC)
686 goto retry; 687 goto retry;
687 goto out_free; /* JDM: Requeue? */ 688 goto out_free;
688 } 689 }
689 690
690 /* 691 /*
@@ -696,10 +697,13 @@ retry:
696 async_extent->ram_size - 1, 0); 697 async_extent->ram_size - 1, 0);
697 698
698 em = alloc_extent_map(); 699 em = alloc_extent_map();
699 BUG_ON(!em); /* -ENOMEM */ 700 if (!em)
701 goto out_free_reserve;
700 em->start = async_extent->start; 702 em->start = async_extent->start;
701 em->len = async_extent->ram_size; 703 em->len = async_extent->ram_size;
702 em->orig_start = em->start; 704 em->orig_start = em->start;
705 em->mod_start = em->start;
706 em->mod_len = em->len;
703 707
704 em->block_start = ins.objectid; 708 em->block_start = ins.objectid;
705 em->block_len = ins.offset; 709 em->block_len = ins.offset;
@@ -726,6 +730,9 @@ retry:
726 async_extent->ram_size - 1, 0); 730 async_extent->ram_size - 1, 0);
727 } 731 }
728 732
733 if (ret)
734 goto out_free_reserve;
735
729 ret = btrfs_add_ordered_extent_compress(inode, 736 ret = btrfs_add_ordered_extent_compress(inode,
730 async_extent->start, 737 async_extent->start,
731 ins.objectid, 738 ins.objectid,
@@ -733,7 +740,8 @@ retry:
733 ins.offset, 740 ins.offset,
734 BTRFS_ORDERED_COMPRESSED, 741 BTRFS_ORDERED_COMPRESSED,
735 async_extent->compress_type); 742 async_extent->compress_type);
736 BUG_ON(ret); /* -ENOMEM */ 743 if (ret)
744 goto out_free_reserve;
737 745
738 /* 746 /*
739 * clear dirty, set writeback and unlock the pages. 747 * clear dirty, set writeback and unlock the pages.
@@ -754,18 +762,30 @@ retry:
754 ins.objectid, 762 ins.objectid,
755 ins.offset, async_extent->pages, 763 ins.offset, async_extent->pages,
756 async_extent->nr_pages); 764 async_extent->nr_pages);
757
758 BUG_ON(ret); /* -ENOMEM */
759 alloc_hint = ins.objectid + ins.offset; 765 alloc_hint = ins.objectid + ins.offset;
760 kfree(async_extent); 766 kfree(async_extent);
767 if (ret)
768 goto out;
761 cond_resched(); 769 cond_resched();
762 } 770 }
763 ret = 0; 771 ret = 0;
764out: 772out:
765 return ret; 773 return ret;
774out_free_reserve:
775 btrfs_free_reserved_extent(root, ins.objectid, ins.offset);
766out_free: 776out_free:
777 extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,
778 async_extent->start,
779 async_extent->start +
780 async_extent->ram_size - 1,
781 NULL, EXTENT_CLEAR_UNLOCK_PAGE |
782 EXTENT_CLEAR_UNLOCK |
783 EXTENT_CLEAR_DELALLOC |
784 EXTENT_CLEAR_DIRTY |
785 EXTENT_SET_WRITEBACK |
786 EXTENT_END_WRITEBACK);
767 kfree(async_extent); 787 kfree(async_extent);
768 goto out; 788 goto again;
769} 789}
770 790
771static u64 get_extent_allocation_hint(struct inode *inode, u64 start, 791static u64 get_extent_allocation_hint(struct inode *inode, u64 start,
@@ -834,7 +854,7 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans,
834 854
835 BUG_ON(btrfs_is_free_space_inode(inode)); 855 BUG_ON(btrfs_is_free_space_inode(inode));
836 856
837 num_bytes = (end - start + blocksize) & ~(blocksize - 1); 857 num_bytes = ALIGN(end - start + 1, blocksize);
838 num_bytes = max(blocksize, num_bytes); 858 num_bytes = max(blocksize, num_bytes);
839 disk_num_bytes = num_bytes; 859 disk_num_bytes = num_bytes;
840 860
@@ -892,6 +912,8 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans,
892 em->orig_start = em->start; 912 em->orig_start = em->start;
893 ram_size = ins.offset; 913 ram_size = ins.offset;
894 em->len = ins.offset; 914 em->len = ins.offset;
915 em->mod_start = em->start;
916 em->mod_len = em->len;
895 917
896 em->block_start = ins.objectid; 918 em->block_start = ins.objectid;
897 em->block_len = ins.offset; 919 em->block_len = ins.offset;
@@ -1338,6 +1360,8 @@ out_check:
1338 em->block_start = disk_bytenr; 1360 em->block_start = disk_bytenr;
1339 em->orig_block_len = disk_num_bytes; 1361 em->orig_block_len = disk_num_bytes;
1340 em->bdev = root->fs_info->fs_devices->latest_bdev; 1362 em->bdev = root->fs_info->fs_devices->latest_bdev;
1363 em->mod_start = em->start;
1364 em->mod_len = em->len;
1341 set_bit(EXTENT_FLAG_PINNED, &em->flags); 1365 set_bit(EXTENT_FLAG_PINNED, &em->flags);
1342 set_bit(EXTENT_FLAG_FILLING, &em->flags); 1366 set_bit(EXTENT_FLAG_FILLING, &em->flags);
1343 em->generation = -1; 1367 em->generation = -1;
@@ -1508,14 +1532,22 @@ static void btrfs_set_bit_hook(struct inode *inode,
1508 spin_unlock(&BTRFS_I(inode)->lock); 1532 spin_unlock(&BTRFS_I(inode)->lock);
1509 } 1533 }
1510 1534
1511 spin_lock(&root->fs_info->delalloc_lock); 1535 __percpu_counter_add(&root->fs_info->delalloc_bytes, len,
1536 root->fs_info->delalloc_batch);
1537 spin_lock(&BTRFS_I(inode)->lock);
1512 BTRFS_I(inode)->delalloc_bytes += len; 1538 BTRFS_I(inode)->delalloc_bytes += len;
1513 root->fs_info->delalloc_bytes += len; 1539 if (do_list && !test_bit(BTRFS_INODE_IN_DELALLOC_LIST,
1514 if (do_list && list_empty(&BTRFS_I(inode)->delalloc_inodes)) { 1540 &BTRFS_I(inode)->runtime_flags)) {
1515 list_add_tail(&BTRFS_I(inode)->delalloc_inodes, 1541 spin_lock(&root->fs_info->delalloc_lock);
1516 &root->fs_info->delalloc_inodes); 1542 if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
1543 list_add_tail(&BTRFS_I(inode)->delalloc_inodes,
1544 &root->fs_info->delalloc_inodes);
1545 set_bit(BTRFS_INODE_IN_DELALLOC_LIST,
1546 &BTRFS_I(inode)->runtime_flags);
1547 }
1548 spin_unlock(&root->fs_info->delalloc_lock);
1517 } 1549 }
1518 spin_unlock(&root->fs_info->delalloc_lock); 1550 spin_unlock(&BTRFS_I(inode)->lock);
1519 } 1551 }
1520} 1552}
1521 1553
@@ -1550,15 +1582,22 @@ static void btrfs_clear_bit_hook(struct inode *inode,
1550 && do_list) 1582 && do_list)
1551 btrfs_free_reserved_data_space(inode, len); 1583 btrfs_free_reserved_data_space(inode, len);
1552 1584
1553 spin_lock(&root->fs_info->delalloc_lock); 1585 __percpu_counter_add(&root->fs_info->delalloc_bytes, -len,
1554 root->fs_info->delalloc_bytes -= len; 1586 root->fs_info->delalloc_batch);
1587 spin_lock(&BTRFS_I(inode)->lock);
1555 BTRFS_I(inode)->delalloc_bytes -= len; 1588 BTRFS_I(inode)->delalloc_bytes -= len;
1556
1557 if (do_list && BTRFS_I(inode)->delalloc_bytes == 0 && 1589 if (do_list && BTRFS_I(inode)->delalloc_bytes == 0 &&
1558 !list_empty(&BTRFS_I(inode)->delalloc_inodes)) { 1590 test_bit(BTRFS_INODE_IN_DELALLOC_LIST,
1559 list_del_init(&BTRFS_I(inode)->delalloc_inodes); 1591 &BTRFS_I(inode)->runtime_flags)) {
1592 spin_lock(&root->fs_info->delalloc_lock);
1593 if (!list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
1594 list_del_init(&BTRFS_I(inode)->delalloc_inodes);
1595 clear_bit(BTRFS_INODE_IN_DELALLOC_LIST,
1596 &BTRFS_I(inode)->runtime_flags);
1597 }
1598 spin_unlock(&root->fs_info->delalloc_lock);
1560 } 1599 }
1561 spin_unlock(&root->fs_info->delalloc_lock); 1600 spin_unlock(&BTRFS_I(inode)->lock);
1562 } 1601 }
1563} 1602}
1564 1603
@@ -1566,7 +1605,7 @@ static void btrfs_clear_bit_hook(struct inode *inode,
1566 * extent_io.c merge_bio_hook, this must check the chunk tree to make sure 1605 * extent_io.c merge_bio_hook, this must check the chunk tree to make sure
1567 * we don't create bios that span stripes or chunks 1606 * we don't create bios that span stripes or chunks
1568 */ 1607 */
1569int btrfs_merge_bio_hook(struct page *page, unsigned long offset, 1608int btrfs_merge_bio_hook(int rw, struct page *page, unsigned long offset,
1570 size_t size, struct bio *bio, 1609 size_t size, struct bio *bio,
1571 unsigned long bio_flags) 1610 unsigned long bio_flags)
1572{ 1611{
@@ -1581,7 +1620,7 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
1581 1620
1582 length = bio->bi_size; 1621 length = bio->bi_size;
1583 map_length = length; 1622 map_length = length;
1584 ret = btrfs_map_block(root->fs_info, READ, logical, 1623 ret = btrfs_map_block(root->fs_info, rw, logical,
1585 &map_length, NULL, 0); 1624 &map_length, NULL, 0);
1586 /* Will always return 0 with map_multi == NULL */ 1625 /* Will always return 0 with map_multi == NULL */
1587 BUG_ON(ret < 0); 1626 BUG_ON(ret < 0);
@@ -1892,6 +1931,640 @@ out:
1892 return ret; 1931 return ret;
1893} 1932}
1894 1933
1934/* snapshot-aware defrag */
1935struct sa_defrag_extent_backref {
1936 struct rb_node node;
1937 struct old_sa_defrag_extent *old;
1938 u64 root_id;
1939 u64 inum;
1940 u64 file_pos;
1941 u64 extent_offset;
1942 u64 num_bytes;
1943 u64 generation;
1944};
1945
1946struct old_sa_defrag_extent {
1947 struct list_head list;
1948 struct new_sa_defrag_extent *new;
1949
1950 u64 extent_offset;
1951 u64 bytenr;
1952 u64 offset;
1953 u64 len;
1954 int count;
1955};
1956
1957struct new_sa_defrag_extent {
1958 struct rb_root root;
1959 struct list_head head;
1960 struct btrfs_path *path;
1961 struct inode *inode;
1962 u64 file_pos;
1963 u64 len;
1964 u64 bytenr;
1965 u64 disk_len;
1966 u8 compress_type;
1967};
1968
1969static int backref_comp(struct sa_defrag_extent_backref *b1,
1970 struct sa_defrag_extent_backref *b2)
1971{
1972 if (b1->root_id < b2->root_id)
1973 return -1;
1974 else if (b1->root_id > b2->root_id)
1975 return 1;
1976
1977 if (b1->inum < b2->inum)
1978 return -1;
1979 else if (b1->inum > b2->inum)
1980 return 1;
1981
1982 if (b1->file_pos < b2->file_pos)
1983 return -1;
1984 else if (b1->file_pos > b2->file_pos)
1985 return 1;
1986
1987 /*
1988 * [------------------------------] ===> (a range of space)
1989 * |<--->| |<---->| =============> (fs/file tree A)
1990 * |<---------------------------->| ===> (fs/file tree B)
1991 *
1992 * A range of space can refer to two file extents in one tree while
1993 * refer to only one file extent in another tree.
1994 *
1995 * So we may process a disk offset more than one time(two extents in A)
1996 * and locate at the same extent(one extent in B), then insert two same
1997 * backrefs(both refer to the extent in B).
1998 */
1999 return 0;
2000}
2001
2002static void backref_insert(struct rb_root *root,
2003 struct sa_defrag_extent_backref *backref)
2004{
2005 struct rb_node **p = &root->rb_node;
2006 struct rb_node *parent = NULL;
2007 struct sa_defrag_extent_backref *entry;
2008 int ret;
2009
2010 while (*p) {
2011 parent = *p;
2012 entry = rb_entry(parent, struct sa_defrag_extent_backref, node);
2013
2014 ret = backref_comp(backref, entry);
2015 if (ret < 0)
2016 p = &(*p)->rb_left;
2017 else
2018 p = &(*p)->rb_right;
2019 }
2020
2021 rb_link_node(&backref->node, parent, p);
2022 rb_insert_color(&backref->node, root);
2023}
2024
2025/*
2026 * Note the backref might has changed, and in this case we just return 0.
2027 */
2028static noinline int record_one_backref(u64 inum, u64 offset, u64 root_id,
2029 void *ctx)
2030{
2031 struct btrfs_file_extent_item *extent;
2032 struct btrfs_fs_info *fs_info;
2033 struct old_sa_defrag_extent *old = ctx;
2034 struct new_sa_defrag_extent *new = old->new;
2035 struct btrfs_path *path = new->path;
2036 struct btrfs_key key;
2037 struct btrfs_root *root;
2038 struct sa_defrag_extent_backref *backref;
2039 struct extent_buffer *leaf;
2040 struct inode *inode = new->inode;
2041 int slot;
2042 int ret;
2043 u64 extent_offset;
2044 u64 num_bytes;
2045
2046 if (BTRFS_I(inode)->root->root_key.objectid == root_id &&
2047 inum == btrfs_ino(inode))
2048 return 0;
2049
2050 key.objectid = root_id;
2051 key.type = BTRFS_ROOT_ITEM_KEY;
2052 key.offset = (u64)-1;
2053
2054 fs_info = BTRFS_I(inode)->root->fs_info;
2055 root = btrfs_read_fs_root_no_name(fs_info, &key);
2056 if (IS_ERR(root)) {
2057 if (PTR_ERR(root) == -ENOENT)
2058 return 0;
2059 WARN_ON(1);
2060 pr_debug("inum=%llu, offset=%llu, root_id=%llu\n",
2061 inum, offset, root_id);
2062 return PTR_ERR(root);
2063 }
2064
2065 key.objectid = inum;
2066 key.type = BTRFS_EXTENT_DATA_KEY;
2067 if (offset > (u64)-1 << 32)
2068 key.offset = 0;
2069 else
2070 key.offset = offset;
2071
2072 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2073 if (ret < 0) {
2074 WARN_ON(1);
2075 return ret;
2076 }
2077
2078 while (1) {
2079 cond_resched();
2080
2081 leaf = path->nodes[0];
2082 slot = path->slots[0];
2083
2084 if (slot >= btrfs_header_nritems(leaf)) {
2085 ret = btrfs_next_leaf(root, path);
2086 if (ret < 0) {
2087 goto out;
2088 } else if (ret > 0) {
2089 ret = 0;
2090 goto out;
2091 }
2092 continue;
2093 }
2094
2095 path->slots[0]++;
2096
2097 btrfs_item_key_to_cpu(leaf, &key, slot);
2098
2099 if (key.objectid > inum)
2100 goto out;
2101
2102 if (key.objectid < inum || key.type != BTRFS_EXTENT_DATA_KEY)
2103 continue;
2104
2105 extent = btrfs_item_ptr(leaf, slot,
2106 struct btrfs_file_extent_item);
2107
2108 if (btrfs_file_extent_disk_bytenr(leaf, extent) != old->bytenr)
2109 continue;
2110
2111 extent_offset = btrfs_file_extent_offset(leaf, extent);
2112 if (key.offset - extent_offset != offset)
2113 continue;
2114
2115 num_bytes = btrfs_file_extent_num_bytes(leaf, extent);
2116 if (extent_offset >= old->extent_offset + old->offset +
2117 old->len || extent_offset + num_bytes <=
2118 old->extent_offset + old->offset)
2119 continue;
2120
2121 break;
2122 }
2123
2124 backref = kmalloc(sizeof(*backref), GFP_NOFS);
2125 if (!backref) {
2126 ret = -ENOENT;
2127 goto out;
2128 }
2129
2130 backref->root_id = root_id;
2131 backref->inum = inum;
2132 backref->file_pos = offset + extent_offset;
2133 backref->num_bytes = num_bytes;
2134 backref->extent_offset = extent_offset;
2135 backref->generation = btrfs_file_extent_generation(leaf, extent);
2136 backref->old = old;
2137 backref_insert(&new->root, backref);
2138 old->count++;
2139out:
2140 btrfs_release_path(path);
2141 WARN_ON(ret);
2142 return ret;
2143}
2144
2145static noinline bool record_extent_backrefs(struct btrfs_path *path,
2146 struct new_sa_defrag_extent *new)
2147{
2148 struct btrfs_fs_info *fs_info = BTRFS_I(new->inode)->root->fs_info;
2149 struct old_sa_defrag_extent *old, *tmp;
2150 int ret;
2151
2152 new->path = path;
2153
2154 list_for_each_entry_safe(old, tmp, &new->head, list) {
2155 ret = iterate_inodes_from_logical(old->bytenr, fs_info,
2156 path, record_one_backref,
2157 old);
2158 BUG_ON(ret < 0 && ret != -ENOENT);
2159
2160 /* no backref to be processed for this extent */
2161 if (!old->count) {
2162 list_del(&old->list);
2163 kfree(old);
2164 }
2165 }
2166
2167 if (list_empty(&new->head))
2168 return false;
2169
2170 return true;
2171}
2172
2173static int relink_is_mergable(struct extent_buffer *leaf,
2174 struct btrfs_file_extent_item *fi,
2175 u64 disk_bytenr)
2176{
2177 if (btrfs_file_extent_disk_bytenr(leaf, fi) != disk_bytenr)
2178 return 0;
2179
2180 if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG)
2181 return 0;
2182
2183 if (btrfs_file_extent_compression(leaf, fi) ||
2184 btrfs_file_extent_encryption(leaf, fi) ||
2185 btrfs_file_extent_other_encoding(leaf, fi))
2186 return 0;
2187
2188 return 1;
2189}
2190
2191/*
2192 * Note the backref might has changed, and in this case we just return 0.
2193 */
2194static noinline int relink_extent_backref(struct btrfs_path *path,
2195 struct sa_defrag_extent_backref *prev,
2196 struct sa_defrag_extent_backref *backref)
2197{
2198 struct btrfs_file_extent_item *extent;
2199 struct btrfs_file_extent_item *item;
2200 struct btrfs_ordered_extent *ordered;
2201 struct btrfs_trans_handle *trans;
2202 struct btrfs_fs_info *fs_info;
2203 struct btrfs_root *root;
2204 struct btrfs_key key;
2205 struct extent_buffer *leaf;
2206 struct old_sa_defrag_extent *old = backref->old;
2207 struct new_sa_defrag_extent *new = old->new;
2208 struct inode *src_inode = new->inode;
2209 struct inode *inode;
2210 struct extent_state *cached = NULL;
2211 int ret = 0;
2212 u64 start;
2213 u64 len;
2214 u64 lock_start;
2215 u64 lock_end;
2216 bool merge = false;
2217 int index;
2218
2219 if (prev && prev->root_id == backref->root_id &&
2220 prev->inum == backref->inum &&
2221 prev->file_pos + prev->num_bytes == backref->file_pos)
2222 merge = true;
2223
2224 /* step 1: get root */
2225 key.objectid = backref->root_id;
2226 key.type = BTRFS_ROOT_ITEM_KEY;
2227 key.offset = (u64)-1;
2228
2229 fs_info = BTRFS_I(src_inode)->root->fs_info;
2230 index = srcu_read_lock(&fs_info->subvol_srcu);
2231
2232 root = btrfs_read_fs_root_no_name(fs_info, &key);
2233 if (IS_ERR(root)) {
2234 srcu_read_unlock(&fs_info->subvol_srcu, index);
2235 if (PTR_ERR(root) == -ENOENT)
2236 return 0;
2237 return PTR_ERR(root);
2238 }
2239 if (btrfs_root_refs(&root->root_item) == 0) {
2240 srcu_read_unlock(&fs_info->subvol_srcu, index);
2241 /* parse ENOENT to 0 */
2242 return 0;
2243 }
2244
2245 /* step 2: get inode */
2246 key.objectid = backref->inum;
2247 key.type = BTRFS_INODE_ITEM_KEY;
2248 key.offset = 0;
2249
2250 inode = btrfs_iget(fs_info->sb, &key, root, NULL);
2251 if (IS_ERR(inode)) {
2252 srcu_read_unlock(&fs_info->subvol_srcu, index);
2253 return 0;
2254 }
2255
2256 srcu_read_unlock(&fs_info->subvol_srcu, index);
2257
2258 /* step 3: relink backref */
2259 lock_start = backref->file_pos;
2260 lock_end = backref->file_pos + backref->num_bytes - 1;
2261 lock_extent_bits(&BTRFS_I(inode)->io_tree, lock_start, lock_end,
2262 0, &cached);
2263
2264 ordered = btrfs_lookup_first_ordered_extent(inode, lock_end);
2265 if (ordered) {
2266 btrfs_put_ordered_extent(ordered);
2267 goto out_unlock;
2268 }
2269
2270 trans = btrfs_join_transaction(root);
2271 if (IS_ERR(trans)) {
2272 ret = PTR_ERR(trans);
2273 goto out_unlock;
2274 }
2275
2276 key.objectid = backref->inum;
2277 key.type = BTRFS_EXTENT_DATA_KEY;
2278 key.offset = backref->file_pos;
2279
2280 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2281 if (ret < 0) {
2282 goto out_free_path;
2283 } else if (ret > 0) {
2284 ret = 0;
2285 goto out_free_path;
2286 }
2287
2288 extent = btrfs_item_ptr(path->nodes[0], path->slots[0],
2289 struct btrfs_file_extent_item);
2290
2291 if (btrfs_file_extent_generation(path->nodes[0], extent) !=
2292 backref->generation)
2293 goto out_free_path;
2294
2295 btrfs_release_path(path);
2296
2297 start = backref->file_pos;
2298 if (backref->extent_offset < old->extent_offset + old->offset)
2299 start += old->extent_offset + old->offset -
2300 backref->extent_offset;
2301
2302 len = min(backref->extent_offset + backref->num_bytes,
2303 old->extent_offset + old->offset + old->len);
2304 len -= max(backref->extent_offset, old->extent_offset + old->offset);
2305
2306 ret = btrfs_drop_extents(trans, root, inode, start,
2307 start + len, 1);
2308 if (ret)
2309 goto out_free_path;
2310again:
2311 key.objectid = btrfs_ino(inode);
2312 key.type = BTRFS_EXTENT_DATA_KEY;
2313 key.offset = start;
2314
2315 if (merge) {
2316 struct btrfs_file_extent_item *fi;
2317 u64 extent_len;
2318 struct btrfs_key found_key;
2319
2320 ret = btrfs_search_slot(trans, root, &key, path, 1, 1);
2321 if (ret < 0)
2322 goto out_free_path;
2323
2324 path->slots[0]--;
2325 leaf = path->nodes[0];
2326 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2327
2328 fi = btrfs_item_ptr(leaf, path->slots[0],
2329 struct btrfs_file_extent_item);
2330 extent_len = btrfs_file_extent_num_bytes(leaf, fi);
2331
2332 if (relink_is_mergable(leaf, fi, new->bytenr) &&
2333 extent_len + found_key.offset == start) {
2334 btrfs_set_file_extent_num_bytes(leaf, fi,
2335 extent_len + len);
2336 btrfs_mark_buffer_dirty(leaf);
2337 inode_add_bytes(inode, len);
2338
2339 ret = 1;
2340 goto out_free_path;
2341 } else {
2342 merge = false;
2343 btrfs_release_path(path);
2344 goto again;
2345 }
2346 }
2347
2348 ret = btrfs_insert_empty_item(trans, root, path, &key,
2349 sizeof(*extent));
2350 if (ret) {
2351 btrfs_abort_transaction(trans, root, ret);
2352 goto out_free_path;
2353 }
2354
2355 leaf = path->nodes[0];
2356 item = btrfs_item_ptr(leaf, path->slots[0],
2357 struct btrfs_file_extent_item);
2358 btrfs_set_file_extent_disk_bytenr(leaf, item, new->bytenr);
2359 btrfs_set_file_extent_disk_num_bytes(leaf, item, new->disk_len);
2360 btrfs_set_file_extent_offset(leaf, item, start - new->file_pos);
2361 btrfs_set_file_extent_num_bytes(leaf, item, len);
2362 btrfs_set_file_extent_ram_bytes(leaf, item, new->len);
2363 btrfs_set_file_extent_generation(leaf, item, trans->transid);
2364 btrfs_set_file_extent_type(leaf, item, BTRFS_FILE_EXTENT_REG);
2365 btrfs_set_file_extent_compression(leaf, item, new->compress_type);
2366 btrfs_set_file_extent_encryption(leaf, item, 0);
2367 btrfs_set_file_extent_other_encoding(leaf, item, 0);
2368
2369 btrfs_mark_buffer_dirty(leaf);
2370 inode_add_bytes(inode, len);
2371
2372 ret = btrfs_inc_extent_ref(trans, root, new->bytenr,
2373 new->disk_len, 0,
2374 backref->root_id, backref->inum,
2375 new->file_pos, 0); /* start - extent_offset */
2376 if (ret) {
2377 btrfs_abort_transaction(trans, root, ret);
2378 goto out_free_path;
2379 }
2380
2381 ret = 1;
2382out_free_path:
2383 btrfs_release_path(path);
2384 btrfs_end_transaction(trans, root);
2385out_unlock:
2386 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lock_start, lock_end,
2387 &cached, GFP_NOFS);
2388 iput(inode);
2389 return ret;
2390}
2391
2392static void relink_file_extents(struct new_sa_defrag_extent *new)
2393{
2394 struct btrfs_path *path;
2395 struct old_sa_defrag_extent *old, *tmp;
2396 struct sa_defrag_extent_backref *backref;
2397 struct sa_defrag_extent_backref *prev = NULL;
2398 struct inode *inode;
2399 struct btrfs_root *root;
2400 struct rb_node *node;
2401 int ret;
2402
2403 inode = new->inode;
2404 root = BTRFS_I(inode)->root;
2405
2406 path = btrfs_alloc_path();
2407 if (!path)
2408 return;
2409
2410 if (!record_extent_backrefs(path, new)) {
2411 btrfs_free_path(path);
2412 goto out;
2413 }
2414 btrfs_release_path(path);
2415
2416 while (1) {
2417 node = rb_first(&new->root);
2418 if (!node)
2419 break;
2420 rb_erase(node, &new->root);
2421
2422 backref = rb_entry(node, struct sa_defrag_extent_backref, node);
2423
2424 ret = relink_extent_backref(path, prev, backref);
2425 WARN_ON(ret < 0);
2426
2427 kfree(prev);
2428
2429 if (ret == 1)
2430 prev = backref;
2431 else
2432 prev = NULL;
2433 cond_resched();
2434 }
2435 kfree(prev);
2436
2437 btrfs_free_path(path);
2438
2439 list_for_each_entry_safe(old, tmp, &new->head, list) {
2440 list_del(&old->list);
2441 kfree(old);
2442 }
2443out:
2444 atomic_dec(&root->fs_info->defrag_running);
2445 wake_up(&root->fs_info->transaction_wait);
2446
2447 kfree(new);
2448}
2449
2450static struct new_sa_defrag_extent *
2451record_old_file_extents(struct inode *inode,
2452 struct btrfs_ordered_extent *ordered)
2453{
2454 struct btrfs_root *root = BTRFS_I(inode)->root;
2455 struct btrfs_path *path;
2456 struct btrfs_key key;
2457 struct old_sa_defrag_extent *old, *tmp;
2458 struct new_sa_defrag_extent *new;
2459 int ret;
2460
2461 new = kmalloc(sizeof(*new), GFP_NOFS);
2462 if (!new)
2463 return NULL;
2464
2465 new->inode = inode;
2466 new->file_pos = ordered->file_offset;
2467 new->len = ordered->len;
2468 new->bytenr = ordered->start;
2469 new->disk_len = ordered->disk_len;
2470 new->compress_type = ordered->compress_type;
2471 new->root = RB_ROOT;
2472 INIT_LIST_HEAD(&new->head);
2473
2474 path = btrfs_alloc_path();
2475 if (!path)
2476 goto out_kfree;
2477
2478 key.objectid = btrfs_ino(inode);
2479 key.type = BTRFS_EXTENT_DATA_KEY;
2480 key.offset = new->file_pos;
2481
2482 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2483 if (ret < 0)
2484 goto out_free_path;
2485 if (ret > 0 && path->slots[0] > 0)
2486 path->slots[0]--;
2487
2488 /* find out all the old extents for the file range */
2489 while (1) {
2490 struct btrfs_file_extent_item *extent;
2491 struct extent_buffer *l;
2492 int slot;
2493 u64 num_bytes;
2494 u64 offset;
2495 u64 end;
2496 u64 disk_bytenr;
2497 u64 extent_offset;
2498
2499 l = path->nodes[0];
2500 slot = path->slots[0];
2501
2502 if (slot >= btrfs_header_nritems(l)) {
2503 ret = btrfs_next_leaf(root, path);
2504 if (ret < 0)
2505 goto out_free_list;
2506 else if (ret > 0)
2507 break;
2508 continue;
2509 }
2510
2511 btrfs_item_key_to_cpu(l, &key, slot);
2512
2513 if (key.objectid != btrfs_ino(inode))
2514 break;
2515 if (key.type != BTRFS_EXTENT_DATA_KEY)
2516 break;
2517 if (key.offset >= new->file_pos + new->len)
2518 break;
2519
2520 extent = btrfs_item_ptr(l, slot, struct btrfs_file_extent_item);
2521
2522 num_bytes = btrfs_file_extent_num_bytes(l, extent);
2523 if (key.offset + num_bytes < new->file_pos)
2524 goto next;
2525
2526 disk_bytenr = btrfs_file_extent_disk_bytenr(l, extent);
2527 if (!disk_bytenr)
2528 goto next;
2529
2530 extent_offset = btrfs_file_extent_offset(l, extent);
2531
2532 old = kmalloc(sizeof(*old), GFP_NOFS);
2533 if (!old)
2534 goto out_free_list;
2535
2536 offset = max(new->file_pos, key.offset);
2537 end = min(new->file_pos + new->len, key.offset + num_bytes);
2538
2539 old->bytenr = disk_bytenr;
2540 old->extent_offset = extent_offset;
2541 old->offset = offset - key.offset;
2542 old->len = end - offset;
2543 old->new = new;
2544 old->count = 0;
2545 list_add_tail(&old->list, &new->head);
2546next:
2547 path->slots[0]++;
2548 cond_resched();
2549 }
2550
2551 btrfs_free_path(path);
2552 atomic_inc(&root->fs_info->defrag_running);
2553
2554 return new;
2555
2556out_free_list:
2557 list_for_each_entry_safe(old, tmp, &new->head, list) {
2558 list_del(&old->list);
2559 kfree(old);
2560 }
2561out_free_path:
2562 btrfs_free_path(path);
2563out_kfree:
2564 kfree(new);
2565 return NULL;
2566}
2567
1895/* 2568/*
1896 * helper function for btrfs_finish_ordered_io, this 2569 * helper function for btrfs_finish_ordered_io, this
1897 * just reads in some of the csum leaves to prime them into ram 2570 * just reads in some of the csum leaves to prime them into ram
@@ -1909,6 +2582,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
1909 struct btrfs_trans_handle *trans = NULL; 2582 struct btrfs_trans_handle *trans = NULL;
1910 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 2583 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
1911 struct extent_state *cached_state = NULL; 2584 struct extent_state *cached_state = NULL;
2585 struct new_sa_defrag_extent *new = NULL;
1912 int compress_type = 0; 2586 int compress_type = 0;
1913 int ret; 2587 int ret;
1914 bool nolock; 2588 bool nolock;
@@ -1943,6 +2617,20 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
1943 ordered_extent->file_offset + ordered_extent->len - 1, 2617 ordered_extent->file_offset + ordered_extent->len - 1,
1944 0, &cached_state); 2618 0, &cached_state);
1945 2619
2620 ret = test_range_bit(io_tree, ordered_extent->file_offset,
2621 ordered_extent->file_offset + ordered_extent->len - 1,
2622 EXTENT_DEFRAG, 1, cached_state);
2623 if (ret) {
2624 u64 last_snapshot = btrfs_root_last_snapshot(&root->root_item);
2625 if (last_snapshot >= BTRFS_I(inode)->generation)
2626 /* the inode is shared */
2627 new = record_old_file_extents(inode, ordered_extent);
2628
2629 clear_extent_bit(io_tree, ordered_extent->file_offset,
2630 ordered_extent->file_offset + ordered_extent->len - 1,
2631 EXTENT_DEFRAG, 0, 0, &cached_state, GFP_NOFS);
2632 }
2633
1946 if (nolock) 2634 if (nolock)
1947 trans = btrfs_join_transaction_nolock(root); 2635 trans = btrfs_join_transaction_nolock(root);
1948 else 2636 else
@@ -2001,17 +2689,33 @@ out:
2001 if (trans) 2689 if (trans)
2002 btrfs_end_transaction(trans, root); 2690 btrfs_end_transaction(trans, root);
2003 2691
2004 if (ret) 2692 if (ret) {
2005 clear_extent_uptodate(io_tree, ordered_extent->file_offset, 2693 clear_extent_uptodate(io_tree, ordered_extent->file_offset,
2006 ordered_extent->file_offset + 2694 ordered_extent->file_offset +
2007 ordered_extent->len - 1, NULL, GFP_NOFS); 2695 ordered_extent->len - 1, NULL, GFP_NOFS);
2008 2696
2697 /*
2698 * If the ordered extent had an IOERR or something else went
2699 * wrong we need to return the space for this ordered extent
2700 * back to the allocator.
2701 */
2702 if (!test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) &&
2703 !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags))
2704 btrfs_free_reserved_extent(root, ordered_extent->start,
2705 ordered_extent->disk_len);
2706 }
2707
2708
2009 /* 2709 /*
2010 * This needs to be done to make sure anybody waiting knows we are done 2710 * This needs to be done to make sure anybody waiting knows we are done
2011 * updating everything for this ordered extent. 2711 * updating everything for this ordered extent.
2012 */ 2712 */
2013 btrfs_remove_ordered_extent(inode, ordered_extent); 2713 btrfs_remove_ordered_extent(inode, ordered_extent);
2014 2714
2715 /* for snapshot-aware defrag */
2716 if (new)
2717 relink_file_extents(new);
2718
2015 /* once for us */ 2719 /* once for us */
2016 btrfs_put_ordered_extent(ordered_extent); 2720 btrfs_put_ordered_extent(ordered_extent);
2017 /* once for the tree */ 2721 /* once for the tree */
@@ -2062,7 +2766,7 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
2062static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end, 2766static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
2063 struct extent_state *state, int mirror) 2767 struct extent_state *state, int mirror)
2064{ 2768{
2065 size_t offset = start - ((u64)page->index << PAGE_CACHE_SHIFT); 2769 size_t offset = start - page_offset(page);
2066 struct inode *inode = page->mapping->host; 2770 struct inode *inode = page->mapping->host;
2067 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 2771 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
2068 char *kaddr; 2772 char *kaddr;
@@ -2167,11 +2871,6 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root)
2167 } 2871 }
2168} 2872}
2169 2873
2170enum btrfs_orphan_cleanup_state {
2171 ORPHAN_CLEANUP_STARTED = 1,
2172 ORPHAN_CLEANUP_DONE = 2,
2173};
2174
2175/* 2874/*
2176 * This is called in transaction commit time. If there are no orphan 2875 * This is called in transaction commit time. If there are no orphan
2177 * files in the subvolume, it removes orphan item and frees block_rsv 2876 * files in the subvolume, it removes orphan item and frees block_rsv
@@ -2469,6 +3168,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
2469 */ 3168 */
2470 set_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, 3169 set_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
2471 &BTRFS_I(inode)->runtime_flags); 3170 &BTRFS_I(inode)->runtime_flags);
3171 atomic_inc(&root->orphan_inodes);
2472 3172
2473 /* if we have links, this was a truncate, lets do that */ 3173 /* if we have links, this was a truncate, lets do that */
2474 if (inode->i_nlink) { 3174 if (inode->i_nlink) {
@@ -2491,6 +3191,8 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
2491 goto out; 3191 goto out;
2492 3192
2493 ret = btrfs_truncate(inode); 3193 ret = btrfs_truncate(inode);
3194 if (ret)
3195 btrfs_orphan_del(NULL, inode);
2494 } else { 3196 } else {
2495 nr_unlink++; 3197 nr_unlink++;
2496 } 3198 }
@@ -2709,34 +3411,41 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
2709 struct btrfs_inode_item *item, 3411 struct btrfs_inode_item *item,
2710 struct inode *inode) 3412 struct inode *inode)
2711{ 3413{
2712 btrfs_set_inode_uid(leaf, item, i_uid_read(inode)); 3414 struct btrfs_map_token token;
2713 btrfs_set_inode_gid(leaf, item, i_gid_read(inode)); 3415
2714 btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size); 3416 btrfs_init_map_token(&token);
2715 btrfs_set_inode_mode(leaf, item, inode->i_mode); 3417
2716 btrfs_set_inode_nlink(leaf, item, inode->i_nlink); 3418 btrfs_set_token_inode_uid(leaf, item, i_uid_read(inode), &token);
3419 btrfs_set_token_inode_gid(leaf, item, i_gid_read(inode), &token);
3420 btrfs_set_token_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size,
3421 &token);
3422 btrfs_set_token_inode_mode(leaf, item, inode->i_mode, &token);
3423 btrfs_set_token_inode_nlink(leaf, item, inode->i_nlink, &token);
2717 3424
2718 btrfs_set_timespec_sec(leaf, btrfs_inode_atime(item), 3425 btrfs_set_token_timespec_sec(leaf, btrfs_inode_atime(item),
2719 inode->i_atime.tv_sec); 3426 inode->i_atime.tv_sec, &token);
2720 btrfs_set_timespec_nsec(leaf, btrfs_inode_atime(item), 3427 btrfs_set_token_timespec_nsec(leaf, btrfs_inode_atime(item),
2721 inode->i_atime.tv_nsec); 3428 inode->i_atime.tv_nsec, &token);
2722 3429
2723 btrfs_set_timespec_sec(leaf, btrfs_inode_mtime(item), 3430 btrfs_set_token_timespec_sec(leaf, btrfs_inode_mtime(item),
2724 inode->i_mtime.tv_sec); 3431 inode->i_mtime.tv_sec, &token);
2725 btrfs_set_timespec_nsec(leaf, btrfs_inode_mtime(item), 3432 btrfs_set_token_timespec_nsec(leaf, btrfs_inode_mtime(item),
2726 inode->i_mtime.tv_nsec); 3433 inode->i_mtime.tv_nsec, &token);
2727 3434
2728 btrfs_set_timespec_sec(leaf, btrfs_inode_ctime(item), 3435 btrfs_set_token_timespec_sec(leaf, btrfs_inode_ctime(item),
2729 inode->i_ctime.tv_sec); 3436 inode->i_ctime.tv_sec, &token);
2730 btrfs_set_timespec_nsec(leaf, btrfs_inode_ctime(item), 3437 btrfs_set_token_timespec_nsec(leaf, btrfs_inode_ctime(item),
2731 inode->i_ctime.tv_nsec); 3438 inode->i_ctime.tv_nsec, &token);
2732 3439
2733 btrfs_set_inode_nbytes(leaf, item, inode_get_bytes(inode)); 3440 btrfs_set_token_inode_nbytes(leaf, item, inode_get_bytes(inode),
2734 btrfs_set_inode_generation(leaf, item, BTRFS_I(inode)->generation); 3441 &token);
2735 btrfs_set_inode_sequence(leaf, item, inode->i_version); 3442 btrfs_set_token_inode_generation(leaf, item, BTRFS_I(inode)->generation,
2736 btrfs_set_inode_transid(leaf, item, trans->transid); 3443 &token);
2737 btrfs_set_inode_rdev(leaf, item, inode->i_rdev); 3444 btrfs_set_token_inode_sequence(leaf, item, inode->i_version, &token);
2738 btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags); 3445 btrfs_set_token_inode_transid(leaf, item, trans->transid, &token);
2739 btrfs_set_inode_block_group(leaf, item, 0); 3446 btrfs_set_token_inode_rdev(leaf, item, inode->i_rdev, &token);
3447 btrfs_set_token_inode_flags(leaf, item, BTRFS_I(inode)->flags, &token);
3448 btrfs_set_token_inode_block_group(leaf, item, 0, &token);
2740} 3449}
2741 3450
2742/* 3451/*
@@ -3304,7 +4013,6 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
3304 u64 extent_num_bytes = 0; 4013 u64 extent_num_bytes = 0;
3305 u64 extent_offset = 0; 4014 u64 extent_offset = 0;
3306 u64 item_end = 0; 4015 u64 item_end = 0;
3307 u64 mask = root->sectorsize - 1;
3308 u32 found_type = (u8)-1; 4016 u32 found_type = (u8)-1;
3309 int found_extent; 4017 int found_extent;
3310 int del_item; 4018 int del_item;
@@ -3328,7 +4036,8 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
3328 * extent just the way it is. 4036 * extent just the way it is.
3329 */ 4037 */
3330 if (root->ref_cows || root == root->fs_info->tree_root) 4038 if (root->ref_cows || root == root->fs_info->tree_root)
3331 btrfs_drop_extent_cache(inode, (new_size + mask) & (~mask), (u64)-1, 0); 4039 btrfs_drop_extent_cache(inode, ALIGN(new_size,
4040 root->sectorsize), (u64)-1, 0);
3332 4041
3333 /* 4042 /*
3334 * This function is also used to drop the items in the log tree before 4043 * This function is also used to drop the items in the log tree before
@@ -3407,10 +4116,9 @@ search_again:
3407 if (!del_item) { 4116 if (!del_item) {
3408 u64 orig_num_bytes = 4117 u64 orig_num_bytes =
3409 btrfs_file_extent_num_bytes(leaf, fi); 4118 btrfs_file_extent_num_bytes(leaf, fi);
3410 extent_num_bytes = new_size - 4119 extent_num_bytes = ALIGN(new_size -
3411 found_key.offset + root->sectorsize - 1; 4120 found_key.offset,
3412 extent_num_bytes = extent_num_bytes & 4121 root->sectorsize);
3413 ~((u64)root->sectorsize - 1);
3414 btrfs_set_file_extent_num_bytes(leaf, fi, 4122 btrfs_set_file_extent_num_bytes(leaf, fi,
3415 extent_num_bytes); 4123 extent_num_bytes);
3416 num_dec = (orig_num_bytes - 4124 num_dec = (orig_num_bytes -
@@ -3646,9 +4354,8 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
3646 struct extent_map *em = NULL; 4354 struct extent_map *em = NULL;
3647 struct extent_state *cached_state = NULL; 4355 struct extent_state *cached_state = NULL;
3648 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 4356 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
3649 u64 mask = root->sectorsize - 1; 4357 u64 hole_start = ALIGN(oldsize, root->sectorsize);
3650 u64 hole_start = (oldsize + mask) & ~mask; 4358 u64 block_end = ALIGN(size, root->sectorsize);
3651 u64 block_end = (size + mask) & ~mask;
3652 u64 last_byte; 4359 u64 last_byte;
3653 u64 cur_offset; 4360 u64 cur_offset;
3654 u64 hole_size; 4361 u64 hole_size;
@@ -3681,7 +4388,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
3681 break; 4388 break;
3682 } 4389 }
3683 last_byte = min(extent_map_end(em), block_end); 4390 last_byte = min(extent_map_end(em), block_end);
3684 last_byte = (last_byte + mask) & ~mask; 4391 last_byte = ALIGN(last_byte , root->sectorsize);
3685 if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) { 4392 if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
3686 struct extent_map *hole_em; 4393 struct extent_map *hole_em;
3687 hole_size = last_byte - cur_offset; 4394 hole_size = last_byte - cur_offset;
@@ -3832,6 +4539,12 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
3832 4539
3833 /* we don't support swapfiles, so vmtruncate shouldn't fail */ 4540 /* we don't support swapfiles, so vmtruncate shouldn't fail */
3834 truncate_setsize(inode, newsize); 4541 truncate_setsize(inode, newsize);
4542
4543 /* Disable nonlocked read DIO to avoid the end less truncate */
4544 btrfs_inode_block_unlocked_dio(inode);
4545 inode_dio_wait(inode);
4546 btrfs_inode_resume_unlocked_dio(inode);
4547
3835 ret = btrfs_truncate(inode); 4548 ret = btrfs_truncate(inode);
3836 if (ret && inode->i_nlink) 4549 if (ret && inode->i_nlink)
3837 btrfs_orphan_del(NULL, inode); 4550 btrfs_orphan_del(NULL, inode);
@@ -3904,6 +4617,12 @@ void btrfs_evict_inode(struct inode *inode)
3904 goto no_delete; 4617 goto no_delete;
3905 } 4618 }
3906 4619
4620 ret = btrfs_commit_inode_delayed_inode(inode);
4621 if (ret) {
4622 btrfs_orphan_del(NULL, inode);
4623 goto no_delete;
4624 }
4625
3907 rsv = btrfs_alloc_block_rsv(root, BTRFS_BLOCK_RSV_TEMP); 4626 rsv = btrfs_alloc_block_rsv(root, BTRFS_BLOCK_RSV_TEMP);
3908 if (!rsv) { 4627 if (!rsv) {
3909 btrfs_orphan_del(NULL, inode); 4628 btrfs_orphan_del(NULL, inode);
@@ -3941,7 +4660,7 @@ void btrfs_evict_inode(struct inode *inode)
3941 goto no_delete; 4660 goto no_delete;
3942 } 4661 }
3943 4662
3944 trans = btrfs_start_transaction_lflush(root, 1); 4663 trans = btrfs_join_transaction(root);
3945 if (IS_ERR(trans)) { 4664 if (IS_ERR(trans)) {
3946 btrfs_orphan_del(NULL, inode); 4665 btrfs_orphan_del(NULL, inode);
3947 btrfs_free_block_rsv(root, rsv); 4666 btrfs_free_block_rsv(root, rsv);
@@ -3955,9 +4674,6 @@ void btrfs_evict_inode(struct inode *inode)
3955 break; 4674 break;
3956 4675
3957 trans->block_rsv = &root->fs_info->trans_block_rsv; 4676 trans->block_rsv = &root->fs_info->trans_block_rsv;
3958 ret = btrfs_update_inode(trans, root, inode);
3959 BUG_ON(ret);
3960
3961 btrfs_end_transaction(trans, root); 4677 btrfs_end_transaction(trans, root);
3962 trans = NULL; 4678 trans = NULL;
3963 btrfs_btree_balance_dirty(root); 4679 btrfs_btree_balance_dirty(root);
@@ -4854,7 +5570,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
4854 if (btrfs_test_opt(root, NODATASUM)) 5570 if (btrfs_test_opt(root, NODATASUM))
4855 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM; 5571 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM;
4856 if (btrfs_test_opt(root, NODATACOW)) 5572 if (btrfs_test_opt(root, NODATACOW))
4857 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW; 5573 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW |
5574 BTRFS_INODE_NODATASUM;
4858 } 5575 }
4859 5576
4860 insert_inode_hash(inode); 5577 insert_inode_hash(inode);
@@ -5006,12 +5723,6 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
5006 goto out_unlock; 5723 goto out_unlock;
5007 } 5724 }
5008 5725
5009 err = btrfs_update_inode(trans, root, inode);
5010 if (err) {
5011 drop_inode = 1;
5012 goto out_unlock;
5013 }
5014
5015 /* 5726 /*
5016 * If the active LSM wants to access the inode during 5727 * If the active LSM wants to access the inode during
5017 * d_instantiate it needs these. Smack checks to see 5728 * d_instantiate it needs these. Smack checks to see
@@ -5396,8 +6107,7 @@ again:
5396 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { 6107 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
5397 size_t size; 6108 size_t size;
5398 size = btrfs_file_extent_inline_len(leaf, item); 6109 size = btrfs_file_extent_inline_len(leaf, item);
5399 extent_end = (extent_start + size + root->sectorsize - 1) & 6110 extent_end = ALIGN(extent_start + size, root->sectorsize);
5400 ~((u64)root->sectorsize - 1);
5401 } 6111 }
5402 6112
5403 if (start >= extent_end) { 6113 if (start >= extent_end) {
@@ -5469,8 +6179,7 @@ again:
5469 copy_size = min_t(u64, PAGE_CACHE_SIZE - pg_offset, 6179 copy_size = min_t(u64, PAGE_CACHE_SIZE - pg_offset,
5470 size - extent_offset); 6180 size - extent_offset);
5471 em->start = extent_start + extent_offset; 6181 em->start = extent_start + extent_offset;
5472 em->len = (copy_size + root->sectorsize - 1) & 6182 em->len = ALIGN(copy_size, root->sectorsize);
5473 ~((u64)root->sectorsize - 1);
5474 em->orig_block_len = em->len; 6183 em->orig_block_len = em->len;
5475 em->orig_start = em->start; 6184 em->orig_start = em->start;
5476 if (compress_type) { 6185 if (compress_type) {
@@ -5949,6 +6658,8 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
5949 6658
5950 em->start = start; 6659 em->start = start;
5951 em->orig_start = orig_start; 6660 em->orig_start = orig_start;
6661 em->mod_start = start;
6662 em->mod_len = len;
5952 em->len = len; 6663 em->len = len;
5953 em->block_len = block_len; 6664 em->block_len = block_len;
5954 em->block_start = block_start; 6665 em->block_start = block_start;
@@ -5990,16 +6701,12 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
5990 u64 len = bh_result->b_size; 6701 u64 len = bh_result->b_size;
5991 struct btrfs_trans_handle *trans; 6702 struct btrfs_trans_handle *trans;
5992 int unlock_bits = EXTENT_LOCKED; 6703 int unlock_bits = EXTENT_LOCKED;
5993 int ret; 6704 int ret = 0;
5994 6705
5995 if (create) { 6706 if (create)
5996 ret = btrfs_delalloc_reserve_space(inode, len);
5997 if (ret)
5998 return ret;
5999 unlock_bits |= EXTENT_DELALLOC | EXTENT_DIRTY; 6707 unlock_bits |= EXTENT_DELALLOC | EXTENT_DIRTY;
6000 } else { 6708 else
6001 len = min_t(u64, len, root->sectorsize); 6709 len = min_t(u64, len, root->sectorsize);
6002 }
6003 6710
6004 lockstart = start; 6711 lockstart = start;
6005 lockend = start + len - 1; 6712 lockend = start + len - 1;
@@ -6011,14 +6718,6 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
6011 if (lock_extent_direct(inode, lockstart, lockend, &cached_state, create)) 6718 if (lock_extent_direct(inode, lockstart, lockend, &cached_state, create))
6012 return -ENOTBLK; 6719 return -ENOTBLK;
6013 6720
6014 if (create) {
6015 ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
6016 lockend, EXTENT_DELALLOC, NULL,
6017 &cached_state, GFP_NOFS);
6018 if (ret)
6019 goto unlock_err;
6020 }
6021
6022 em = btrfs_get_extent(inode, NULL, 0, start, len, 0); 6721 em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
6023 if (IS_ERR(em)) { 6722 if (IS_ERR(em)) {
6024 ret = PTR_ERR(em); 6723 ret = PTR_ERR(em);
@@ -6050,7 +6749,6 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
6050 if (!create && (em->block_start == EXTENT_MAP_HOLE || 6749 if (!create && (em->block_start == EXTENT_MAP_HOLE ||
6051 test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { 6750 test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
6052 free_extent_map(em); 6751 free_extent_map(em);
6053 ret = 0;
6054 goto unlock_err; 6752 goto unlock_err;
6055 } 6753 }
6056 6754
@@ -6148,6 +6846,15 @@ unlock:
6148 */ 6846 */
6149 if (start + len > i_size_read(inode)) 6847 if (start + len > i_size_read(inode))
6150 i_size_write(inode, start + len); 6848 i_size_write(inode, start + len);
6849
6850 spin_lock(&BTRFS_I(inode)->lock);
6851 BTRFS_I(inode)->outstanding_extents++;
6852 spin_unlock(&BTRFS_I(inode)->lock);
6853
6854 ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
6855 lockstart + len - 1, EXTENT_DELALLOC, NULL,
6856 &cached_state, GFP_NOFS);
6857 BUG_ON(ret);
6151 } 6858 }
6152 6859
6153 /* 6860 /*
@@ -6156,24 +6863,9 @@ unlock:
6156 * aren't using if there is any left over space. 6863 * aren't using if there is any left over space.
6157 */ 6864 */
6158 if (lockstart < lockend) { 6865 if (lockstart < lockend) {
6159 if (create && len < lockend - lockstart) { 6866 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
6160 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, 6867 lockend, unlock_bits, 1, 0,
6161 lockstart + len - 1, 6868 &cached_state, GFP_NOFS);
6162 unlock_bits | EXTENT_DEFRAG, 1, 0,
6163 &cached_state, GFP_NOFS);
6164 /*
6165 * Beside unlock, we also need to cleanup reserved space
6166 * for the left range by attaching EXTENT_DO_ACCOUNTING.
6167 */
6168 clear_extent_bit(&BTRFS_I(inode)->io_tree,
6169 lockstart + len, lockend,
6170 unlock_bits | EXTENT_DO_ACCOUNTING |
6171 EXTENT_DEFRAG, 1, 0, NULL, GFP_NOFS);
6172 } else {
6173 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
6174 lockend, unlock_bits, 1, 0,
6175 &cached_state, GFP_NOFS);
6176 }
6177 } else { 6869 } else {
6178 free_extent_state(cached_state); 6870 free_extent_state(cached_state);
6179 } 6871 }
@@ -6183,9 +6875,6 @@ unlock:
6183 return 0; 6875 return 0;
6184 6876
6185unlock_err: 6877unlock_err:
6186 if (create)
6187 unlock_bits |= EXTENT_DO_ACCOUNTING;
6188
6189 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, 6878 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
6190 unlock_bits, 1, 0, &cached_state, GFP_NOFS); 6879 unlock_bits, 1, 0, &cached_state, GFP_NOFS);
6191 return ret; 6880 return ret;
@@ -6426,19 +7115,24 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
6426 int async_submit = 0; 7115 int async_submit = 0;
6427 7116
6428 map_length = orig_bio->bi_size; 7117 map_length = orig_bio->bi_size;
6429 ret = btrfs_map_block(root->fs_info, READ, start_sector << 9, 7118 ret = btrfs_map_block(root->fs_info, rw, start_sector << 9,
6430 &map_length, NULL, 0); 7119 &map_length, NULL, 0);
6431 if (ret) { 7120 if (ret) {
6432 bio_put(orig_bio); 7121 bio_put(orig_bio);
6433 return -EIO; 7122 return -EIO;
6434 } 7123 }
6435
6436 if (map_length >= orig_bio->bi_size) { 7124 if (map_length >= orig_bio->bi_size) {
6437 bio = orig_bio; 7125 bio = orig_bio;
6438 goto submit; 7126 goto submit;
6439 } 7127 }
6440 7128
6441 async_submit = 1; 7129 /* async crcs make it difficult to collect full stripe writes. */
7130 if (btrfs_get_alloc_profile(root, 1) &
7131 (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6))
7132 async_submit = 0;
7133 else
7134 async_submit = 1;
7135
6442 bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS); 7136 bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS);
6443 if (!bio) 7137 if (!bio)
6444 return -ENOMEM; 7138 return -ENOMEM;
@@ -6480,7 +7174,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
6480 bio->bi_end_io = btrfs_end_dio_bio; 7174 bio->bi_end_io = btrfs_end_dio_bio;
6481 7175
6482 map_length = orig_bio->bi_size; 7176 map_length = orig_bio->bi_size;
6483 ret = btrfs_map_block(root->fs_info, READ, 7177 ret = btrfs_map_block(root->fs_info, rw,
6484 start_sector << 9, 7178 start_sector << 9,
6485 &map_length, NULL, 0); 7179 &map_length, NULL, 0);
6486 if (ret) { 7180 if (ret) {
@@ -6623,15 +7317,60 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
6623{ 7317{
6624 struct file *file = iocb->ki_filp; 7318 struct file *file = iocb->ki_filp;
6625 struct inode *inode = file->f_mapping->host; 7319 struct inode *inode = file->f_mapping->host;
7320 size_t count = 0;
7321 int flags = 0;
7322 bool wakeup = true;
7323 bool relock = false;
7324 ssize_t ret;
6626 7325
6627 if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov, 7326 if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov,
6628 offset, nr_segs)) 7327 offset, nr_segs))
6629 return 0; 7328 return 0;
6630 7329
6631 return __blockdev_direct_IO(rw, iocb, inode, 7330 atomic_inc(&inode->i_dio_count);
6632 BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev, 7331 smp_mb__after_atomic_inc();
6633 iov, offset, nr_segs, btrfs_get_blocks_direct, NULL, 7332
6634 btrfs_submit_direct, 0); 7333 if (rw & WRITE) {
7334 count = iov_length(iov, nr_segs);
7335 /*
7336 * If the write DIO is beyond the EOF, we need update
7337 * the isize, but it is protected by i_mutex. So we can
7338 * not unlock the i_mutex at this case.
7339 */
7340 if (offset + count <= inode->i_size) {
7341 mutex_unlock(&inode->i_mutex);
7342 relock = true;
7343 }
7344 ret = btrfs_delalloc_reserve_space(inode, count);
7345 if (ret)
7346 goto out;
7347 } else if (unlikely(test_bit(BTRFS_INODE_READDIO_NEED_LOCK,
7348 &BTRFS_I(inode)->runtime_flags))) {
7349 inode_dio_done(inode);
7350 flags = DIO_LOCKING | DIO_SKIP_HOLES;
7351 wakeup = false;
7352 }
7353
7354 ret = __blockdev_direct_IO(rw, iocb, inode,
7355 BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev,
7356 iov, offset, nr_segs, btrfs_get_blocks_direct, NULL,
7357 btrfs_submit_direct, flags);
7358 if (rw & WRITE) {
7359 if (ret < 0 && ret != -EIOCBQUEUED)
7360 btrfs_delalloc_release_space(inode, count);
7361 else if (ret >= 0 && (size_t)ret < count)
7362 btrfs_delalloc_release_space(inode,
7363 count - (size_t)ret);
7364 else
7365 btrfs_delalloc_release_metadata(inode, 0);
7366 }
7367out:
7368 if (wakeup)
7369 inode_dio_done(inode);
7370 if (relock)
7371 mutex_lock(&inode->i_mutex);
7372
7373 return ret;
6635} 7374}
6636 7375
6637#define BTRFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC) 7376#define BTRFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC)
@@ -6735,8 +7474,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
6735 return; 7474 return;
6736 } 7475 }
6737 lock_extent_bits(tree, page_start, page_end, 0, &cached_state); 7476 lock_extent_bits(tree, page_start, page_end, 0, &cached_state);
6738 ordered = btrfs_lookup_ordered_extent(inode, 7477 ordered = btrfs_lookup_ordered_extent(inode, page_offset(page));
6739 page_offset(page));
6740 if (ordered) { 7478 if (ordered) {
6741 /* 7479 /*
6742 * IO on this page will never be started, so we need 7480 * IO on this page will never be started, so we need
@@ -7216,8 +7954,9 @@ int btrfs_drop_inode(struct inode *inode)
7216{ 7954{
7217 struct btrfs_root *root = BTRFS_I(inode)->root; 7955 struct btrfs_root *root = BTRFS_I(inode)->root;
7218 7956
7957 /* the snap/subvol tree is on deleting */
7219 if (btrfs_root_refs(&root->root_item) == 0 && 7958 if (btrfs_root_refs(&root->root_item) == 0 &&
7220 !btrfs_is_free_space_inode(inode)) 7959 root != root->fs_info->tree_root)
7221 return 1; 7960 return 1;
7222 else 7961 else
7223 return generic_drop_inode(inode); 7962 return generic_drop_inode(inode);
@@ -7299,40 +8038,22 @@ fail:
7299static int btrfs_getattr(struct vfsmount *mnt, 8038static int btrfs_getattr(struct vfsmount *mnt,
7300 struct dentry *dentry, struct kstat *stat) 8039 struct dentry *dentry, struct kstat *stat)
7301{ 8040{
8041 u64 delalloc_bytes;
7302 struct inode *inode = dentry->d_inode; 8042 struct inode *inode = dentry->d_inode;
7303 u32 blocksize = inode->i_sb->s_blocksize; 8043 u32 blocksize = inode->i_sb->s_blocksize;
7304 8044
7305 generic_fillattr(inode, stat); 8045 generic_fillattr(inode, stat);
7306 stat->dev = BTRFS_I(inode)->root->anon_dev; 8046 stat->dev = BTRFS_I(inode)->root->anon_dev;
7307 stat->blksize = PAGE_CACHE_SIZE; 8047 stat->blksize = PAGE_CACHE_SIZE;
8048
8049 spin_lock(&BTRFS_I(inode)->lock);
8050 delalloc_bytes = BTRFS_I(inode)->delalloc_bytes;
8051 spin_unlock(&BTRFS_I(inode)->lock);
7308 stat->blocks = (ALIGN(inode_get_bytes(inode), blocksize) + 8052 stat->blocks = (ALIGN(inode_get_bytes(inode), blocksize) +
7309 ALIGN(BTRFS_I(inode)->delalloc_bytes, blocksize)) >> 9; 8053 ALIGN(delalloc_bytes, blocksize)) >> 9;
7310 return 0; 8054 return 0;
7311} 8055}
7312 8056
7313/*
7314 * If a file is moved, it will inherit the cow and compression flags of the new
7315 * directory.
7316 */
7317static void fixup_inode_flags(struct inode *dir, struct inode *inode)
7318{
7319 struct btrfs_inode *b_dir = BTRFS_I(dir);
7320 struct btrfs_inode *b_inode = BTRFS_I(inode);
7321
7322 if (b_dir->flags & BTRFS_INODE_NODATACOW)
7323 b_inode->flags |= BTRFS_INODE_NODATACOW;
7324 else
7325 b_inode->flags &= ~BTRFS_INODE_NODATACOW;
7326
7327 if (b_dir->flags & BTRFS_INODE_COMPRESS) {
7328 b_inode->flags |= BTRFS_INODE_COMPRESS;
7329 b_inode->flags &= ~BTRFS_INODE_NOCOMPRESS;
7330 } else {
7331 b_inode->flags &= ~(BTRFS_INODE_COMPRESS |
7332 BTRFS_INODE_NOCOMPRESS);
7333 }
7334}
7335
7336static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, 8057static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
7337 struct inode *new_dir, struct dentry *new_dentry) 8058 struct inode *new_dir, struct dentry *new_dentry)
7338{ 8059{
@@ -7498,8 +8219,6 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
7498 } 8219 }
7499 } 8220 }
7500 8221
7501 fixup_inode_flags(new_dir, old_inode);
7502
7503 ret = btrfs_add_link(trans, new_dir, old_inode, 8222 ret = btrfs_add_link(trans, new_dir, old_inode,
7504 new_dentry->d_name.name, 8223 new_dentry->d_name.name,
7505 new_dentry->d_name.len, 0, index); 8224 new_dentry->d_name.len, 0, index);
@@ -7583,7 +8302,7 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
7583 8302
7584 INIT_LIST_HEAD(&works); 8303 INIT_LIST_HEAD(&works);
7585 INIT_LIST_HEAD(&splice); 8304 INIT_LIST_HEAD(&splice);
7586again: 8305
7587 spin_lock(&root->fs_info->delalloc_lock); 8306 spin_lock(&root->fs_info->delalloc_lock);
7588 list_splice_init(&root->fs_info->delalloc_inodes, &splice); 8307 list_splice_init(&root->fs_info->delalloc_inodes, &splice);
7589 while (!list_empty(&splice)) { 8308 while (!list_empty(&splice)) {
@@ -7593,8 +8312,11 @@ again:
7593 list_del_init(&binode->delalloc_inodes); 8312 list_del_init(&binode->delalloc_inodes);
7594 8313
7595 inode = igrab(&binode->vfs_inode); 8314 inode = igrab(&binode->vfs_inode);
7596 if (!inode) 8315 if (!inode) {
8316 clear_bit(BTRFS_INODE_IN_DELALLOC_LIST,
8317 &binode->runtime_flags);
7597 continue; 8318 continue;
8319 }
7598 8320
7599 list_add_tail(&binode->delalloc_inodes, 8321 list_add_tail(&binode->delalloc_inodes,
7600 &root->fs_info->delalloc_inodes); 8322 &root->fs_info->delalloc_inodes);
@@ -7619,13 +8341,6 @@ again:
7619 btrfs_wait_and_free_delalloc_work(work); 8341 btrfs_wait_and_free_delalloc_work(work);
7620 } 8342 }
7621 8343
7622 spin_lock(&root->fs_info->delalloc_lock);
7623 if (!list_empty(&root->fs_info->delalloc_inodes)) {
7624 spin_unlock(&root->fs_info->delalloc_lock);
7625 goto again;
7626 }
7627 spin_unlock(&root->fs_info->delalloc_lock);
7628
7629 /* the filemap_flush will queue IO into the worker threads, but 8344 /* the filemap_flush will queue IO into the worker threads, but
7630 * we have to make sure the IO is actually started and that 8345 * we have to make sure the IO is actually started and that
7631 * ordered extents get created before we return 8346 * ordered extents get created before we return
@@ -7801,8 +8516,9 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
7801 } 8516 }
7802 } 8517 }
7803 8518
7804 ret = btrfs_reserve_extent(trans, root, num_bytes, min_size, 8519 ret = btrfs_reserve_extent(trans, root,
7805 0, *alloc_hint, &ins, 1); 8520 min(num_bytes, 256ULL * 1024 * 1024),
8521 min_size, 0, *alloc_hint, &ins, 1);
7806 if (ret) { 8522 if (ret) {
7807 if (own_trans) 8523 if (own_trans)
7808 btrfs_end_transaction(trans, root); 8524 btrfs_end_transaction(trans, root);