aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/inode.c')
-rw-r--r--fs/btrfs/inode.c1064
1 files changed, 890 insertions, 174 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 55c07b650378..c226daefd65d 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -39,12 +39,13 @@
39#include <linux/slab.h> 39#include <linux/slab.h>
40#include <linux/ratelimit.h> 40#include <linux/ratelimit.h>
41#include <linux/mount.h> 41#include <linux/mount.h>
42#include <linux/btrfs.h>
43#include <linux/blkdev.h>
42#include "compat.h" 44#include "compat.h"
43#include "ctree.h" 45#include "ctree.h"
44#include "disk-io.h" 46#include "disk-io.h"
45#include "transaction.h" 47#include "transaction.h"
46#include "btrfs_inode.h" 48#include "btrfs_inode.h"
47#include "ioctl.h"
48#include "print-tree.h" 49#include "print-tree.h"
49#include "ordered-data.h" 50#include "ordered-data.h"
50#include "xattr.h" 51#include "xattr.h"
@@ -54,6 +55,7 @@
54#include "locking.h" 55#include "locking.h"
55#include "free-space-cache.h" 56#include "free-space-cache.h"
56#include "inode-map.h" 57#include "inode-map.h"
58#include "backref.h"
57 59
58struct btrfs_iget_args { 60struct btrfs_iget_args {
59 u64 ino; 61 u64 ino;
@@ -231,8 +233,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
231 u64 isize = i_size_read(inode); 233 u64 isize = i_size_read(inode);
232 u64 actual_end = min(end + 1, isize); 234 u64 actual_end = min(end + 1, isize);
233 u64 inline_len = actual_end - start; 235 u64 inline_len = actual_end - start;
234 u64 aligned_end = (end + root->sectorsize - 1) & 236 u64 aligned_end = ALIGN(end, root->sectorsize);
235 ~((u64)root->sectorsize - 1);
236 u64 data_len = inline_len; 237 u64 data_len = inline_len;
237 int ret; 238 int ret;
238 239
@@ -265,6 +266,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
265 return 1; 266 return 1;
266 } 267 }
267 268
269 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags);
268 btrfs_delalloc_release_metadata(inode, end + 1 - start); 270 btrfs_delalloc_release_metadata(inode, end + 1 - start);
269 btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0); 271 btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0);
270 return 0; 272 return 0;
@@ -389,7 +391,7 @@ again:
389 * a compressed extent to 128k. 391 * a compressed extent to 128k.
390 */ 392 */
391 total_compressed = min(total_compressed, max_uncompressed); 393 total_compressed = min(total_compressed, max_uncompressed);
392 num_bytes = (end - start + blocksize) & ~(blocksize - 1); 394 num_bytes = ALIGN(end - start + 1, blocksize);
393 num_bytes = max(blocksize, num_bytes); 395 num_bytes = max(blocksize, num_bytes);
394 total_in = 0; 396 total_in = 0;
395 ret = 0; 397 ret = 0;
@@ -488,15 +490,13 @@ cont:
488 * up to a block size boundary so the allocator does sane 490 * up to a block size boundary so the allocator does sane
489 * things 491 * things
490 */ 492 */
491 total_compressed = (total_compressed + blocksize - 1) & 493 total_compressed = ALIGN(total_compressed, blocksize);
492 ~(blocksize - 1);
493 494
494 /* 495 /*
495 * one last check to make sure the compression is really a 496 * one last check to make sure the compression is really a
496 * win, compare the page count read with the blocks on disk 497 * win, compare the page count read with the blocks on disk
497 */ 498 */
498 total_in = (total_in + PAGE_CACHE_SIZE - 1) & 499 total_in = ALIGN(total_in, PAGE_CACHE_SIZE);
499 ~(PAGE_CACHE_SIZE - 1);
500 if (total_compressed >= total_in) { 500 if (total_compressed >= total_in) {
501 will_compress = 0; 501 will_compress = 0;
502 } else { 502 } else {
@@ -608,7 +608,7 @@ static noinline int submit_compressed_extents(struct inode *inode,
608 if (list_empty(&async_cow->extents)) 608 if (list_empty(&async_cow->extents))
609 return 0; 609 return 0;
610 610
611 611again:
612 while (!list_empty(&async_cow->extents)) { 612 while (!list_empty(&async_cow->extents)) {
613 async_extent = list_entry(async_cow->extents.next, 613 async_extent = list_entry(async_cow->extents.next,
614 struct async_extent, list); 614 struct async_extent, list);
@@ -648,6 +648,8 @@ retry:
648 async_extent->ram_size - 1, 648 async_extent->ram_size - 1,
649 btrfs_get_extent, 649 btrfs_get_extent,
650 WB_SYNC_ALL); 650 WB_SYNC_ALL);
651 else if (ret)
652 unlock_page(async_cow->locked_page);
651 kfree(async_extent); 653 kfree(async_extent);
652 cond_resched(); 654 cond_resched();
653 continue; 655 continue;
@@ -672,6 +674,7 @@ retry:
672 674
673 if (ret) { 675 if (ret) {
674 int i; 676 int i;
677
675 for (i = 0; i < async_extent->nr_pages; i++) { 678 for (i = 0; i < async_extent->nr_pages; i++) {
676 WARN_ON(async_extent->pages[i]->mapping); 679 WARN_ON(async_extent->pages[i]->mapping);
677 page_cache_release(async_extent->pages[i]); 680 page_cache_release(async_extent->pages[i]);
@@ -679,12 +682,10 @@ retry:
679 kfree(async_extent->pages); 682 kfree(async_extent->pages);
680 async_extent->nr_pages = 0; 683 async_extent->nr_pages = 0;
681 async_extent->pages = NULL; 684 async_extent->pages = NULL;
682 unlock_extent(io_tree, async_extent->start, 685
683 async_extent->start +
684 async_extent->ram_size - 1);
685 if (ret == -ENOSPC) 686 if (ret == -ENOSPC)
686 goto retry; 687 goto retry;
687 goto out_free; /* JDM: Requeue? */ 688 goto out_free;
688 } 689 }
689 690
690 /* 691 /*
@@ -696,10 +697,13 @@ retry:
696 async_extent->ram_size - 1, 0); 697 async_extent->ram_size - 1, 0);
697 698
698 em = alloc_extent_map(); 699 em = alloc_extent_map();
699 BUG_ON(!em); /* -ENOMEM */ 700 if (!em)
701 goto out_free_reserve;
700 em->start = async_extent->start; 702 em->start = async_extent->start;
701 em->len = async_extent->ram_size; 703 em->len = async_extent->ram_size;
702 em->orig_start = em->start; 704 em->orig_start = em->start;
705 em->mod_start = em->start;
706 em->mod_len = em->len;
703 707
704 em->block_start = ins.objectid; 708 em->block_start = ins.objectid;
705 em->block_len = ins.offset; 709 em->block_len = ins.offset;
@@ -726,6 +730,9 @@ retry:
726 async_extent->ram_size - 1, 0); 730 async_extent->ram_size - 1, 0);
727 } 731 }
728 732
733 if (ret)
734 goto out_free_reserve;
735
729 ret = btrfs_add_ordered_extent_compress(inode, 736 ret = btrfs_add_ordered_extent_compress(inode,
730 async_extent->start, 737 async_extent->start,
731 ins.objectid, 738 ins.objectid,
@@ -733,7 +740,8 @@ retry:
733 ins.offset, 740 ins.offset,
734 BTRFS_ORDERED_COMPRESSED, 741 BTRFS_ORDERED_COMPRESSED,
735 async_extent->compress_type); 742 async_extent->compress_type);
736 BUG_ON(ret); /* -ENOMEM */ 743 if (ret)
744 goto out_free_reserve;
737 745
738 /* 746 /*
739 * clear dirty, set writeback and unlock the pages. 747 * clear dirty, set writeback and unlock the pages.
@@ -754,18 +762,30 @@ retry:
754 ins.objectid, 762 ins.objectid,
755 ins.offset, async_extent->pages, 763 ins.offset, async_extent->pages,
756 async_extent->nr_pages); 764 async_extent->nr_pages);
757
758 BUG_ON(ret); /* -ENOMEM */
759 alloc_hint = ins.objectid + ins.offset; 765 alloc_hint = ins.objectid + ins.offset;
760 kfree(async_extent); 766 kfree(async_extent);
767 if (ret)
768 goto out;
761 cond_resched(); 769 cond_resched();
762 } 770 }
763 ret = 0; 771 ret = 0;
764out: 772out:
765 return ret; 773 return ret;
774out_free_reserve:
775 btrfs_free_reserved_extent(root, ins.objectid, ins.offset);
766out_free: 776out_free:
777 extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,
778 async_extent->start,
779 async_extent->start +
780 async_extent->ram_size - 1,
781 NULL, EXTENT_CLEAR_UNLOCK_PAGE |
782 EXTENT_CLEAR_UNLOCK |
783 EXTENT_CLEAR_DELALLOC |
784 EXTENT_CLEAR_DIRTY |
785 EXTENT_SET_WRITEBACK |
786 EXTENT_END_WRITEBACK);
767 kfree(async_extent); 787 kfree(async_extent);
768 goto out; 788 goto again;
769} 789}
770 790
771static u64 get_extent_allocation_hint(struct inode *inode, u64 start, 791static u64 get_extent_allocation_hint(struct inode *inode, u64 start,
@@ -834,7 +854,7 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans,
834 854
835 BUG_ON(btrfs_is_free_space_inode(inode)); 855 BUG_ON(btrfs_is_free_space_inode(inode));
836 856
837 num_bytes = (end - start + blocksize) & ~(blocksize - 1); 857 num_bytes = ALIGN(end - start + 1, blocksize);
838 num_bytes = max(blocksize, num_bytes); 858 num_bytes = max(blocksize, num_bytes);
839 disk_num_bytes = num_bytes; 859 disk_num_bytes = num_bytes;
840 860
@@ -892,6 +912,8 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans,
892 em->orig_start = em->start; 912 em->orig_start = em->start;
893 ram_size = ins.offset; 913 ram_size = ins.offset;
894 em->len = ins.offset; 914 em->len = ins.offset;
915 em->mod_start = em->start;
916 em->mod_len = em->len;
895 917
896 em->block_start = ins.objectid; 918 em->block_start = ins.objectid;
897 em->block_len = ins.offset; 919 em->block_len = ins.offset;
@@ -1338,6 +1360,8 @@ out_check:
1338 em->block_start = disk_bytenr; 1360 em->block_start = disk_bytenr;
1339 em->orig_block_len = disk_num_bytes; 1361 em->orig_block_len = disk_num_bytes;
1340 em->bdev = root->fs_info->fs_devices->latest_bdev; 1362 em->bdev = root->fs_info->fs_devices->latest_bdev;
1363 em->mod_start = em->start;
1364 em->mod_len = em->len;
1341 set_bit(EXTENT_FLAG_PINNED, &em->flags); 1365 set_bit(EXTENT_FLAG_PINNED, &em->flags);
1342 set_bit(EXTENT_FLAG_FILLING, &em->flags); 1366 set_bit(EXTENT_FLAG_FILLING, &em->flags);
1343 em->generation = -1; 1367 em->generation = -1;
@@ -1508,14 +1532,22 @@ static void btrfs_set_bit_hook(struct inode *inode,
1508 spin_unlock(&BTRFS_I(inode)->lock); 1532 spin_unlock(&BTRFS_I(inode)->lock);
1509 } 1533 }
1510 1534
1511 spin_lock(&root->fs_info->delalloc_lock); 1535 __percpu_counter_add(&root->fs_info->delalloc_bytes, len,
1536 root->fs_info->delalloc_batch);
1537 spin_lock(&BTRFS_I(inode)->lock);
1512 BTRFS_I(inode)->delalloc_bytes += len; 1538 BTRFS_I(inode)->delalloc_bytes += len;
1513 root->fs_info->delalloc_bytes += len; 1539 if (do_list && !test_bit(BTRFS_INODE_IN_DELALLOC_LIST,
1514 if (do_list && list_empty(&BTRFS_I(inode)->delalloc_inodes)) { 1540 &BTRFS_I(inode)->runtime_flags)) {
1515 list_add_tail(&BTRFS_I(inode)->delalloc_inodes, 1541 spin_lock(&root->fs_info->delalloc_lock);
1516 &root->fs_info->delalloc_inodes); 1542 if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
1543 list_add_tail(&BTRFS_I(inode)->delalloc_inodes,
1544 &root->fs_info->delalloc_inodes);
1545 set_bit(BTRFS_INODE_IN_DELALLOC_LIST,
1546 &BTRFS_I(inode)->runtime_flags);
1547 }
1548 spin_unlock(&root->fs_info->delalloc_lock);
1517 } 1549 }
1518 spin_unlock(&root->fs_info->delalloc_lock); 1550 spin_unlock(&BTRFS_I(inode)->lock);
1519 } 1551 }
1520} 1552}
1521 1553
@@ -1550,15 +1582,22 @@ static void btrfs_clear_bit_hook(struct inode *inode,
1550 && do_list) 1582 && do_list)
1551 btrfs_free_reserved_data_space(inode, len); 1583 btrfs_free_reserved_data_space(inode, len);
1552 1584
1553 spin_lock(&root->fs_info->delalloc_lock); 1585 __percpu_counter_add(&root->fs_info->delalloc_bytes, -len,
1554 root->fs_info->delalloc_bytes -= len; 1586 root->fs_info->delalloc_batch);
1587 spin_lock(&BTRFS_I(inode)->lock);
1555 BTRFS_I(inode)->delalloc_bytes -= len; 1588 BTRFS_I(inode)->delalloc_bytes -= len;
1556
1557 if (do_list && BTRFS_I(inode)->delalloc_bytes == 0 && 1589 if (do_list && BTRFS_I(inode)->delalloc_bytes == 0 &&
1558 !list_empty(&BTRFS_I(inode)->delalloc_inodes)) { 1590 test_bit(BTRFS_INODE_IN_DELALLOC_LIST,
1559 list_del_init(&BTRFS_I(inode)->delalloc_inodes); 1591 &BTRFS_I(inode)->runtime_flags)) {
1592 spin_lock(&root->fs_info->delalloc_lock);
1593 if (!list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
1594 list_del_init(&BTRFS_I(inode)->delalloc_inodes);
1595 clear_bit(BTRFS_INODE_IN_DELALLOC_LIST,
1596 &BTRFS_I(inode)->runtime_flags);
1597 }
1598 spin_unlock(&root->fs_info->delalloc_lock);
1560 } 1599 }
1561 spin_unlock(&root->fs_info->delalloc_lock); 1600 spin_unlock(&BTRFS_I(inode)->lock);
1562 } 1601 }
1563} 1602}
1564 1603
@@ -1566,7 +1605,7 @@ static void btrfs_clear_bit_hook(struct inode *inode,
1566 * extent_io.c merge_bio_hook, this must check the chunk tree to make sure 1605 * extent_io.c merge_bio_hook, this must check the chunk tree to make sure
1567 * we don't create bios that span stripes or chunks 1606 * we don't create bios that span stripes or chunks
1568 */ 1607 */
1569int btrfs_merge_bio_hook(struct page *page, unsigned long offset, 1608int btrfs_merge_bio_hook(int rw, struct page *page, unsigned long offset,
1570 size_t size, struct bio *bio, 1609 size_t size, struct bio *bio,
1571 unsigned long bio_flags) 1610 unsigned long bio_flags)
1572{ 1611{
@@ -1581,7 +1620,7 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
1581 1620
1582 length = bio->bi_size; 1621 length = bio->bi_size;
1583 map_length = length; 1622 map_length = length;
1584 ret = btrfs_map_block(root->fs_info, READ, logical, 1623 ret = btrfs_map_block(root->fs_info, rw, logical,
1585 &map_length, NULL, 0); 1624 &map_length, NULL, 0);
1586 /* Will always return 0 with map_multi == NULL */ 1625 /* Will always return 0 with map_multi == NULL */
1587 BUG_ON(ret < 0); 1626 BUG_ON(ret < 0);
@@ -1892,6 +1931,640 @@ out:
1892 return ret; 1931 return ret;
1893} 1932}
1894 1933
1934/* snapshot-aware defrag */
1935struct sa_defrag_extent_backref {
1936 struct rb_node node;
1937 struct old_sa_defrag_extent *old;
1938 u64 root_id;
1939 u64 inum;
1940 u64 file_pos;
1941 u64 extent_offset;
1942 u64 num_bytes;
1943 u64 generation;
1944};
1945
1946struct old_sa_defrag_extent {
1947 struct list_head list;
1948 struct new_sa_defrag_extent *new;
1949
1950 u64 extent_offset;
1951 u64 bytenr;
1952 u64 offset;
1953 u64 len;
1954 int count;
1955};
1956
1957struct new_sa_defrag_extent {
1958 struct rb_root root;
1959 struct list_head head;
1960 struct btrfs_path *path;
1961 struct inode *inode;
1962 u64 file_pos;
1963 u64 len;
1964 u64 bytenr;
1965 u64 disk_len;
1966 u8 compress_type;
1967};
1968
1969static int backref_comp(struct sa_defrag_extent_backref *b1,
1970 struct sa_defrag_extent_backref *b2)
1971{
1972 if (b1->root_id < b2->root_id)
1973 return -1;
1974 else if (b1->root_id > b2->root_id)
1975 return 1;
1976
1977 if (b1->inum < b2->inum)
1978 return -1;
1979 else if (b1->inum > b2->inum)
1980 return 1;
1981
1982 if (b1->file_pos < b2->file_pos)
1983 return -1;
1984 else if (b1->file_pos > b2->file_pos)
1985 return 1;
1986
1987 /*
1988 * [------------------------------] ===> (a range of space)
1989 * |<--->| |<---->| =============> (fs/file tree A)
1990 * |<---------------------------->| ===> (fs/file tree B)
1991 *
1992 * A range of space can refer to two file extents in one tree while
1993 * refer to only one file extent in another tree.
1994 *
1995 * So we may process a disk offset more than one time(two extents in A)
1996 * and locate at the same extent(one extent in B), then insert two same
1997 * backrefs(both refer to the extent in B).
1998 */
1999 return 0;
2000}
2001
2002static void backref_insert(struct rb_root *root,
2003 struct sa_defrag_extent_backref *backref)
2004{
2005 struct rb_node **p = &root->rb_node;
2006 struct rb_node *parent = NULL;
2007 struct sa_defrag_extent_backref *entry;
2008 int ret;
2009
2010 while (*p) {
2011 parent = *p;
2012 entry = rb_entry(parent, struct sa_defrag_extent_backref, node);
2013
2014 ret = backref_comp(backref, entry);
2015 if (ret < 0)
2016 p = &(*p)->rb_left;
2017 else
2018 p = &(*p)->rb_right;
2019 }
2020
2021 rb_link_node(&backref->node, parent, p);
2022 rb_insert_color(&backref->node, root);
2023}
2024
2025/*
2026 * Note the backref might has changed, and in this case we just return 0.
2027 */
2028static noinline int record_one_backref(u64 inum, u64 offset, u64 root_id,
2029 void *ctx)
2030{
2031 struct btrfs_file_extent_item *extent;
2032 struct btrfs_fs_info *fs_info;
2033 struct old_sa_defrag_extent *old = ctx;
2034 struct new_sa_defrag_extent *new = old->new;
2035 struct btrfs_path *path = new->path;
2036 struct btrfs_key key;
2037 struct btrfs_root *root;
2038 struct sa_defrag_extent_backref *backref;
2039 struct extent_buffer *leaf;
2040 struct inode *inode = new->inode;
2041 int slot;
2042 int ret;
2043 u64 extent_offset;
2044 u64 num_bytes;
2045
2046 if (BTRFS_I(inode)->root->root_key.objectid == root_id &&
2047 inum == btrfs_ino(inode))
2048 return 0;
2049
2050 key.objectid = root_id;
2051 key.type = BTRFS_ROOT_ITEM_KEY;
2052 key.offset = (u64)-1;
2053
2054 fs_info = BTRFS_I(inode)->root->fs_info;
2055 root = btrfs_read_fs_root_no_name(fs_info, &key);
2056 if (IS_ERR(root)) {
2057 if (PTR_ERR(root) == -ENOENT)
2058 return 0;
2059 WARN_ON(1);
2060 pr_debug("inum=%llu, offset=%llu, root_id=%llu\n",
2061 inum, offset, root_id);
2062 return PTR_ERR(root);
2063 }
2064
2065 key.objectid = inum;
2066 key.type = BTRFS_EXTENT_DATA_KEY;
2067 if (offset > (u64)-1 << 32)
2068 key.offset = 0;
2069 else
2070 key.offset = offset;
2071
2072 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2073 if (ret < 0) {
2074 WARN_ON(1);
2075 return ret;
2076 }
2077
2078 while (1) {
2079 cond_resched();
2080
2081 leaf = path->nodes[0];
2082 slot = path->slots[0];
2083
2084 if (slot >= btrfs_header_nritems(leaf)) {
2085 ret = btrfs_next_leaf(root, path);
2086 if (ret < 0) {
2087 goto out;
2088 } else if (ret > 0) {
2089 ret = 0;
2090 goto out;
2091 }
2092 continue;
2093 }
2094
2095 path->slots[0]++;
2096
2097 btrfs_item_key_to_cpu(leaf, &key, slot);
2098
2099 if (key.objectid > inum)
2100 goto out;
2101
2102 if (key.objectid < inum || key.type != BTRFS_EXTENT_DATA_KEY)
2103 continue;
2104
2105 extent = btrfs_item_ptr(leaf, slot,
2106 struct btrfs_file_extent_item);
2107
2108 if (btrfs_file_extent_disk_bytenr(leaf, extent) != old->bytenr)
2109 continue;
2110
2111 extent_offset = btrfs_file_extent_offset(leaf, extent);
2112 if (key.offset - extent_offset != offset)
2113 continue;
2114
2115 num_bytes = btrfs_file_extent_num_bytes(leaf, extent);
2116 if (extent_offset >= old->extent_offset + old->offset +
2117 old->len || extent_offset + num_bytes <=
2118 old->extent_offset + old->offset)
2119 continue;
2120
2121 break;
2122 }
2123
2124 backref = kmalloc(sizeof(*backref), GFP_NOFS);
2125 if (!backref) {
2126 ret = -ENOENT;
2127 goto out;
2128 }
2129
2130 backref->root_id = root_id;
2131 backref->inum = inum;
2132 backref->file_pos = offset + extent_offset;
2133 backref->num_bytes = num_bytes;
2134 backref->extent_offset = extent_offset;
2135 backref->generation = btrfs_file_extent_generation(leaf, extent);
2136 backref->old = old;
2137 backref_insert(&new->root, backref);
2138 old->count++;
2139out:
2140 btrfs_release_path(path);
2141 WARN_ON(ret);
2142 return ret;
2143}
2144
2145static noinline bool record_extent_backrefs(struct btrfs_path *path,
2146 struct new_sa_defrag_extent *new)
2147{
2148 struct btrfs_fs_info *fs_info = BTRFS_I(new->inode)->root->fs_info;
2149 struct old_sa_defrag_extent *old, *tmp;
2150 int ret;
2151
2152 new->path = path;
2153
2154 list_for_each_entry_safe(old, tmp, &new->head, list) {
2155 ret = iterate_inodes_from_logical(old->bytenr, fs_info,
2156 path, record_one_backref,
2157 old);
2158 BUG_ON(ret < 0 && ret != -ENOENT);
2159
2160 /* no backref to be processed for this extent */
2161 if (!old->count) {
2162 list_del(&old->list);
2163 kfree(old);
2164 }
2165 }
2166
2167 if (list_empty(&new->head))
2168 return false;
2169
2170 return true;
2171}
2172
2173static int relink_is_mergable(struct extent_buffer *leaf,
2174 struct btrfs_file_extent_item *fi,
2175 u64 disk_bytenr)
2176{
2177 if (btrfs_file_extent_disk_bytenr(leaf, fi) != disk_bytenr)
2178 return 0;
2179
2180 if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG)
2181 return 0;
2182
2183 if (btrfs_file_extent_compression(leaf, fi) ||
2184 btrfs_file_extent_encryption(leaf, fi) ||
2185 btrfs_file_extent_other_encoding(leaf, fi))
2186 return 0;
2187
2188 return 1;
2189}
2190
2191/*
2192 * Note the backref might has changed, and in this case we just return 0.
2193 */
2194static noinline int relink_extent_backref(struct btrfs_path *path,
2195 struct sa_defrag_extent_backref *prev,
2196 struct sa_defrag_extent_backref *backref)
2197{
2198 struct btrfs_file_extent_item *extent;
2199 struct btrfs_file_extent_item *item;
2200 struct btrfs_ordered_extent *ordered;
2201 struct btrfs_trans_handle *trans;
2202 struct btrfs_fs_info *fs_info;
2203 struct btrfs_root *root;
2204 struct btrfs_key key;
2205 struct extent_buffer *leaf;
2206 struct old_sa_defrag_extent *old = backref->old;
2207 struct new_sa_defrag_extent *new = old->new;
2208 struct inode *src_inode = new->inode;
2209 struct inode *inode;
2210 struct extent_state *cached = NULL;
2211 int ret = 0;
2212 u64 start;
2213 u64 len;
2214 u64 lock_start;
2215 u64 lock_end;
2216 bool merge = false;
2217 int index;
2218
2219 if (prev && prev->root_id == backref->root_id &&
2220 prev->inum == backref->inum &&
2221 prev->file_pos + prev->num_bytes == backref->file_pos)
2222 merge = true;
2223
2224 /* step 1: get root */
2225 key.objectid = backref->root_id;
2226 key.type = BTRFS_ROOT_ITEM_KEY;
2227 key.offset = (u64)-1;
2228
2229 fs_info = BTRFS_I(src_inode)->root->fs_info;
2230 index = srcu_read_lock(&fs_info->subvol_srcu);
2231
2232 root = btrfs_read_fs_root_no_name(fs_info, &key);
2233 if (IS_ERR(root)) {
2234 srcu_read_unlock(&fs_info->subvol_srcu, index);
2235 if (PTR_ERR(root) == -ENOENT)
2236 return 0;
2237 return PTR_ERR(root);
2238 }
2239 if (btrfs_root_refs(&root->root_item) == 0) {
2240 srcu_read_unlock(&fs_info->subvol_srcu, index);
2241 /* parse ENOENT to 0 */
2242 return 0;
2243 }
2244
2245 /* step 2: get inode */
2246 key.objectid = backref->inum;
2247 key.type = BTRFS_INODE_ITEM_KEY;
2248 key.offset = 0;
2249
2250 inode = btrfs_iget(fs_info->sb, &key, root, NULL);
2251 if (IS_ERR(inode)) {
2252 srcu_read_unlock(&fs_info->subvol_srcu, index);
2253 return 0;
2254 }
2255
2256 srcu_read_unlock(&fs_info->subvol_srcu, index);
2257
2258 /* step 3: relink backref */
2259 lock_start = backref->file_pos;
2260 lock_end = backref->file_pos + backref->num_bytes - 1;
2261 lock_extent_bits(&BTRFS_I(inode)->io_tree, lock_start, lock_end,
2262 0, &cached);
2263
2264 ordered = btrfs_lookup_first_ordered_extent(inode, lock_end);
2265 if (ordered) {
2266 btrfs_put_ordered_extent(ordered);
2267 goto out_unlock;
2268 }
2269
2270 trans = btrfs_join_transaction(root);
2271 if (IS_ERR(trans)) {
2272 ret = PTR_ERR(trans);
2273 goto out_unlock;
2274 }
2275
2276 key.objectid = backref->inum;
2277 key.type = BTRFS_EXTENT_DATA_KEY;
2278 key.offset = backref->file_pos;
2279
2280 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2281 if (ret < 0) {
2282 goto out_free_path;
2283 } else if (ret > 0) {
2284 ret = 0;
2285 goto out_free_path;
2286 }
2287
2288 extent = btrfs_item_ptr(path->nodes[0], path->slots[0],
2289 struct btrfs_file_extent_item);
2290
2291 if (btrfs_file_extent_generation(path->nodes[0], extent) !=
2292 backref->generation)
2293 goto out_free_path;
2294
2295 btrfs_release_path(path);
2296
2297 start = backref->file_pos;
2298 if (backref->extent_offset < old->extent_offset + old->offset)
2299 start += old->extent_offset + old->offset -
2300 backref->extent_offset;
2301
2302 len = min(backref->extent_offset + backref->num_bytes,
2303 old->extent_offset + old->offset + old->len);
2304 len -= max(backref->extent_offset, old->extent_offset + old->offset);
2305
2306 ret = btrfs_drop_extents(trans, root, inode, start,
2307 start + len, 1);
2308 if (ret)
2309 goto out_free_path;
2310again:
2311 key.objectid = btrfs_ino(inode);
2312 key.type = BTRFS_EXTENT_DATA_KEY;
2313 key.offset = start;
2314
2315 if (merge) {
2316 struct btrfs_file_extent_item *fi;
2317 u64 extent_len;
2318 struct btrfs_key found_key;
2319
2320 ret = btrfs_search_slot(trans, root, &key, path, 1, 1);
2321 if (ret < 0)
2322 goto out_free_path;
2323
2324 path->slots[0]--;
2325 leaf = path->nodes[0];
2326 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2327
2328 fi = btrfs_item_ptr(leaf, path->slots[0],
2329 struct btrfs_file_extent_item);
2330 extent_len = btrfs_file_extent_num_bytes(leaf, fi);
2331
2332 if (relink_is_mergable(leaf, fi, new->bytenr) &&
2333 extent_len + found_key.offset == start) {
2334 btrfs_set_file_extent_num_bytes(leaf, fi,
2335 extent_len + len);
2336 btrfs_mark_buffer_dirty(leaf);
2337 inode_add_bytes(inode, len);
2338
2339 ret = 1;
2340 goto out_free_path;
2341 } else {
2342 merge = false;
2343 btrfs_release_path(path);
2344 goto again;
2345 }
2346 }
2347
2348 ret = btrfs_insert_empty_item(trans, root, path, &key,
2349 sizeof(*extent));
2350 if (ret) {
2351 btrfs_abort_transaction(trans, root, ret);
2352 goto out_free_path;
2353 }
2354
2355 leaf = path->nodes[0];
2356 item = btrfs_item_ptr(leaf, path->slots[0],
2357 struct btrfs_file_extent_item);
2358 btrfs_set_file_extent_disk_bytenr(leaf, item, new->bytenr);
2359 btrfs_set_file_extent_disk_num_bytes(leaf, item, new->disk_len);
2360 btrfs_set_file_extent_offset(leaf, item, start - new->file_pos);
2361 btrfs_set_file_extent_num_bytes(leaf, item, len);
2362 btrfs_set_file_extent_ram_bytes(leaf, item, new->len);
2363 btrfs_set_file_extent_generation(leaf, item, trans->transid);
2364 btrfs_set_file_extent_type(leaf, item, BTRFS_FILE_EXTENT_REG);
2365 btrfs_set_file_extent_compression(leaf, item, new->compress_type);
2366 btrfs_set_file_extent_encryption(leaf, item, 0);
2367 btrfs_set_file_extent_other_encoding(leaf, item, 0);
2368
2369 btrfs_mark_buffer_dirty(leaf);
2370 inode_add_bytes(inode, len);
2371
2372 ret = btrfs_inc_extent_ref(trans, root, new->bytenr,
2373 new->disk_len, 0,
2374 backref->root_id, backref->inum,
2375 new->file_pos, 0); /* start - extent_offset */
2376 if (ret) {
2377 btrfs_abort_transaction(trans, root, ret);
2378 goto out_free_path;
2379 }
2380
2381 ret = 1;
2382out_free_path:
2383 btrfs_release_path(path);
2384 btrfs_end_transaction(trans, root);
2385out_unlock:
2386 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lock_start, lock_end,
2387 &cached, GFP_NOFS);
2388 iput(inode);
2389 return ret;
2390}
2391
2392static void relink_file_extents(struct new_sa_defrag_extent *new)
2393{
2394 struct btrfs_path *path;
2395 struct old_sa_defrag_extent *old, *tmp;
2396 struct sa_defrag_extent_backref *backref;
2397 struct sa_defrag_extent_backref *prev = NULL;
2398 struct inode *inode;
2399 struct btrfs_root *root;
2400 struct rb_node *node;
2401 int ret;
2402
2403 inode = new->inode;
2404 root = BTRFS_I(inode)->root;
2405
2406 path = btrfs_alloc_path();
2407 if (!path)
2408 return;
2409
2410 if (!record_extent_backrefs(path, new)) {
2411 btrfs_free_path(path);
2412 goto out;
2413 }
2414 btrfs_release_path(path);
2415
2416 while (1) {
2417 node = rb_first(&new->root);
2418 if (!node)
2419 break;
2420 rb_erase(node, &new->root);
2421
2422 backref = rb_entry(node, struct sa_defrag_extent_backref, node);
2423
2424 ret = relink_extent_backref(path, prev, backref);
2425 WARN_ON(ret < 0);
2426
2427 kfree(prev);
2428
2429 if (ret == 1)
2430 prev = backref;
2431 else
2432 prev = NULL;
2433 cond_resched();
2434 }
2435 kfree(prev);
2436
2437 btrfs_free_path(path);
2438
2439 list_for_each_entry_safe(old, tmp, &new->head, list) {
2440 list_del(&old->list);
2441 kfree(old);
2442 }
2443out:
2444 atomic_dec(&root->fs_info->defrag_running);
2445 wake_up(&root->fs_info->transaction_wait);
2446
2447 kfree(new);
2448}
2449
2450static struct new_sa_defrag_extent *
2451record_old_file_extents(struct inode *inode,
2452 struct btrfs_ordered_extent *ordered)
2453{
2454 struct btrfs_root *root = BTRFS_I(inode)->root;
2455 struct btrfs_path *path;
2456 struct btrfs_key key;
2457 struct old_sa_defrag_extent *old, *tmp;
2458 struct new_sa_defrag_extent *new;
2459 int ret;
2460
2461 new = kmalloc(sizeof(*new), GFP_NOFS);
2462 if (!new)
2463 return NULL;
2464
2465 new->inode = inode;
2466 new->file_pos = ordered->file_offset;
2467 new->len = ordered->len;
2468 new->bytenr = ordered->start;
2469 new->disk_len = ordered->disk_len;
2470 new->compress_type = ordered->compress_type;
2471 new->root = RB_ROOT;
2472 INIT_LIST_HEAD(&new->head);
2473
2474 path = btrfs_alloc_path();
2475 if (!path)
2476 goto out_kfree;
2477
2478 key.objectid = btrfs_ino(inode);
2479 key.type = BTRFS_EXTENT_DATA_KEY;
2480 key.offset = new->file_pos;
2481
2482 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2483 if (ret < 0)
2484 goto out_free_path;
2485 if (ret > 0 && path->slots[0] > 0)
2486 path->slots[0]--;
2487
2488 /* find out all the old extents for the file range */
2489 while (1) {
2490 struct btrfs_file_extent_item *extent;
2491 struct extent_buffer *l;
2492 int slot;
2493 u64 num_bytes;
2494 u64 offset;
2495 u64 end;
2496 u64 disk_bytenr;
2497 u64 extent_offset;
2498
2499 l = path->nodes[0];
2500 slot = path->slots[0];
2501
2502 if (slot >= btrfs_header_nritems(l)) {
2503 ret = btrfs_next_leaf(root, path);
2504 if (ret < 0)
2505 goto out_free_list;
2506 else if (ret > 0)
2507 break;
2508 continue;
2509 }
2510
2511 btrfs_item_key_to_cpu(l, &key, slot);
2512
2513 if (key.objectid != btrfs_ino(inode))
2514 break;
2515 if (key.type != BTRFS_EXTENT_DATA_KEY)
2516 break;
2517 if (key.offset >= new->file_pos + new->len)
2518 break;
2519
2520 extent = btrfs_item_ptr(l, slot, struct btrfs_file_extent_item);
2521
2522 num_bytes = btrfs_file_extent_num_bytes(l, extent);
2523 if (key.offset + num_bytes < new->file_pos)
2524 goto next;
2525
2526 disk_bytenr = btrfs_file_extent_disk_bytenr(l, extent);
2527 if (!disk_bytenr)
2528 goto next;
2529
2530 extent_offset = btrfs_file_extent_offset(l, extent);
2531
2532 old = kmalloc(sizeof(*old), GFP_NOFS);
2533 if (!old)
2534 goto out_free_list;
2535
2536 offset = max(new->file_pos, key.offset);
2537 end = min(new->file_pos + new->len, key.offset + num_bytes);
2538
2539 old->bytenr = disk_bytenr;
2540 old->extent_offset = extent_offset;
2541 old->offset = offset - key.offset;
2542 old->len = end - offset;
2543 old->new = new;
2544 old->count = 0;
2545 list_add_tail(&old->list, &new->head);
2546next:
2547 path->slots[0]++;
2548 cond_resched();
2549 }
2550
2551 btrfs_free_path(path);
2552 atomic_inc(&root->fs_info->defrag_running);
2553
2554 return new;
2555
2556out_free_list:
2557 list_for_each_entry_safe(old, tmp, &new->head, list) {
2558 list_del(&old->list);
2559 kfree(old);
2560 }
2561out_free_path:
2562 btrfs_free_path(path);
2563out_kfree:
2564 kfree(new);
2565 return NULL;
2566}
2567
1895/* 2568/*
1896 * helper function for btrfs_finish_ordered_io, this 2569 * helper function for btrfs_finish_ordered_io, this
1897 * just reads in some of the csum leaves to prime them into ram 2570 * just reads in some of the csum leaves to prime them into ram
@@ -1909,6 +2582,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
1909 struct btrfs_trans_handle *trans = NULL; 2582 struct btrfs_trans_handle *trans = NULL;
1910 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 2583 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
1911 struct extent_state *cached_state = NULL; 2584 struct extent_state *cached_state = NULL;
2585 struct new_sa_defrag_extent *new = NULL;
1912 int compress_type = 0; 2586 int compress_type = 0;
1913 int ret; 2587 int ret;
1914 bool nolock; 2588 bool nolock;
@@ -1943,6 +2617,20 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
1943 ordered_extent->file_offset + ordered_extent->len - 1, 2617 ordered_extent->file_offset + ordered_extent->len - 1,
1944 0, &cached_state); 2618 0, &cached_state);
1945 2619
2620 ret = test_range_bit(io_tree, ordered_extent->file_offset,
2621 ordered_extent->file_offset + ordered_extent->len - 1,
2622 EXTENT_DEFRAG, 1, cached_state);
2623 if (ret) {
2624 u64 last_snapshot = btrfs_root_last_snapshot(&root->root_item);
2625 if (last_snapshot >= BTRFS_I(inode)->generation)
2626 /* the inode is shared */
2627 new = record_old_file_extents(inode, ordered_extent);
2628
2629 clear_extent_bit(io_tree, ordered_extent->file_offset,
2630 ordered_extent->file_offset + ordered_extent->len - 1,
2631 EXTENT_DEFRAG, 0, 0, &cached_state, GFP_NOFS);
2632 }
2633
1946 if (nolock) 2634 if (nolock)
1947 trans = btrfs_join_transaction_nolock(root); 2635 trans = btrfs_join_transaction_nolock(root);
1948 else 2636 else
@@ -2001,17 +2689,33 @@ out:
2001 if (trans) 2689 if (trans)
2002 btrfs_end_transaction(trans, root); 2690 btrfs_end_transaction(trans, root);
2003 2691
2004 if (ret) 2692 if (ret) {
2005 clear_extent_uptodate(io_tree, ordered_extent->file_offset, 2693 clear_extent_uptodate(io_tree, ordered_extent->file_offset,
2006 ordered_extent->file_offset + 2694 ordered_extent->file_offset +
2007 ordered_extent->len - 1, NULL, GFP_NOFS); 2695 ordered_extent->len - 1, NULL, GFP_NOFS);
2008 2696
2697 /*
2698 * If the ordered extent had an IOERR or something else went
2699 * wrong we need to return the space for this ordered extent
2700 * back to the allocator.
2701 */
2702 if (!test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) &&
2703 !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags))
2704 btrfs_free_reserved_extent(root, ordered_extent->start,
2705 ordered_extent->disk_len);
2706 }
2707
2708
2009 /* 2709 /*
2010 * This needs to be done to make sure anybody waiting knows we are done 2710 * This needs to be done to make sure anybody waiting knows we are done
2011 * updating everything for this ordered extent. 2711 * updating everything for this ordered extent.
2012 */ 2712 */
2013 btrfs_remove_ordered_extent(inode, ordered_extent); 2713 btrfs_remove_ordered_extent(inode, ordered_extent);
2014 2714
2715 /* for snapshot-aware defrag */
2716 if (new)
2717 relink_file_extents(new);
2718
2015 /* once for us */ 2719 /* once for us */
2016 btrfs_put_ordered_extent(ordered_extent); 2720 btrfs_put_ordered_extent(ordered_extent);
2017 /* once for the tree */ 2721 /* once for the tree */
@@ -2062,7 +2766,7 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
2062static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end, 2766static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
2063 struct extent_state *state, int mirror) 2767 struct extent_state *state, int mirror)
2064{ 2768{
2065 size_t offset = start - ((u64)page->index << PAGE_CACHE_SHIFT); 2769 size_t offset = start - page_offset(page);
2066 struct inode *inode = page->mapping->host; 2770 struct inode *inode = page->mapping->host;
2067 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 2771 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
2068 char *kaddr; 2772 char *kaddr;
@@ -2167,11 +2871,6 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root)
2167 } 2871 }
2168} 2872}
2169 2873
2170enum btrfs_orphan_cleanup_state {
2171 ORPHAN_CLEANUP_STARTED = 1,
2172 ORPHAN_CLEANUP_DONE = 2,
2173};
2174
2175/* 2874/*
2176 * This is called in transaction commit time. If there are no orphan 2875 * This is called in transaction commit time. If there are no orphan
2177 * files in the subvolume, it removes orphan item and frees block_rsv 2876 * files in the subvolume, it removes orphan item and frees block_rsv
@@ -2469,6 +3168,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
2469 */ 3168 */
2470 set_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, 3169 set_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
2471 &BTRFS_I(inode)->runtime_flags); 3170 &BTRFS_I(inode)->runtime_flags);
3171 atomic_inc(&root->orphan_inodes);
2472 3172
2473 /* if we have links, this was a truncate, lets do that */ 3173 /* if we have links, this was a truncate, lets do that */
2474 if (inode->i_nlink) { 3174 if (inode->i_nlink) {
@@ -2491,6 +3191,8 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
2491 goto out; 3191 goto out;
2492 3192
2493 ret = btrfs_truncate(inode); 3193 ret = btrfs_truncate(inode);
3194 if (ret)
3195 btrfs_orphan_del(NULL, inode);
2494 } else { 3196 } else {
2495 nr_unlink++; 3197 nr_unlink++;
2496 } 3198 }
@@ -2709,34 +3411,41 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
2709 struct btrfs_inode_item *item, 3411 struct btrfs_inode_item *item,
2710 struct inode *inode) 3412 struct inode *inode)
2711{ 3413{
2712 btrfs_set_inode_uid(leaf, item, i_uid_read(inode)); 3414 struct btrfs_map_token token;
2713 btrfs_set_inode_gid(leaf, item, i_gid_read(inode)); 3415
2714 btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size); 3416 btrfs_init_map_token(&token);
2715 btrfs_set_inode_mode(leaf, item, inode->i_mode); 3417
2716 btrfs_set_inode_nlink(leaf, item, inode->i_nlink); 3418 btrfs_set_token_inode_uid(leaf, item, i_uid_read(inode), &token);
3419 btrfs_set_token_inode_gid(leaf, item, i_gid_read(inode), &token);
3420 btrfs_set_token_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size,
3421 &token);
3422 btrfs_set_token_inode_mode(leaf, item, inode->i_mode, &token);
3423 btrfs_set_token_inode_nlink(leaf, item, inode->i_nlink, &token);
2717 3424
2718 btrfs_set_timespec_sec(leaf, btrfs_inode_atime(item), 3425 btrfs_set_token_timespec_sec(leaf, btrfs_inode_atime(item),
2719 inode->i_atime.tv_sec); 3426 inode->i_atime.tv_sec, &token);
2720 btrfs_set_timespec_nsec(leaf, btrfs_inode_atime(item), 3427 btrfs_set_token_timespec_nsec(leaf, btrfs_inode_atime(item),
2721 inode->i_atime.tv_nsec); 3428 inode->i_atime.tv_nsec, &token);
2722 3429
2723 btrfs_set_timespec_sec(leaf, btrfs_inode_mtime(item), 3430 btrfs_set_token_timespec_sec(leaf, btrfs_inode_mtime(item),
2724 inode->i_mtime.tv_sec); 3431 inode->i_mtime.tv_sec, &token);
2725 btrfs_set_timespec_nsec(leaf, btrfs_inode_mtime(item), 3432 btrfs_set_token_timespec_nsec(leaf, btrfs_inode_mtime(item),
2726 inode->i_mtime.tv_nsec); 3433 inode->i_mtime.tv_nsec, &token);
2727 3434
2728 btrfs_set_timespec_sec(leaf, btrfs_inode_ctime(item), 3435 btrfs_set_token_timespec_sec(leaf, btrfs_inode_ctime(item),
2729 inode->i_ctime.tv_sec); 3436 inode->i_ctime.tv_sec, &token);
2730 btrfs_set_timespec_nsec(leaf, btrfs_inode_ctime(item), 3437 btrfs_set_token_timespec_nsec(leaf, btrfs_inode_ctime(item),
2731 inode->i_ctime.tv_nsec); 3438 inode->i_ctime.tv_nsec, &token);
2732 3439
2733 btrfs_set_inode_nbytes(leaf, item, inode_get_bytes(inode)); 3440 btrfs_set_token_inode_nbytes(leaf, item, inode_get_bytes(inode),
2734 btrfs_set_inode_generation(leaf, item, BTRFS_I(inode)->generation); 3441 &token);
2735 btrfs_set_inode_sequence(leaf, item, inode->i_version); 3442 btrfs_set_token_inode_generation(leaf, item, BTRFS_I(inode)->generation,
2736 btrfs_set_inode_transid(leaf, item, trans->transid); 3443 &token);
2737 btrfs_set_inode_rdev(leaf, item, inode->i_rdev); 3444 btrfs_set_token_inode_sequence(leaf, item, inode->i_version, &token);
2738 btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags); 3445 btrfs_set_token_inode_transid(leaf, item, trans->transid, &token);
2739 btrfs_set_inode_block_group(leaf, item, 0); 3446 btrfs_set_token_inode_rdev(leaf, item, inode->i_rdev, &token);
3447 btrfs_set_token_inode_flags(leaf, item, BTRFS_I(inode)->flags, &token);
3448 btrfs_set_token_inode_block_group(leaf, item, 0, &token);
2740} 3449}
2741 3450
2742/* 3451/*
@@ -3304,7 +4013,6 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
3304 u64 extent_num_bytes = 0; 4013 u64 extent_num_bytes = 0;
3305 u64 extent_offset = 0; 4014 u64 extent_offset = 0;
3306 u64 item_end = 0; 4015 u64 item_end = 0;
3307 u64 mask = root->sectorsize - 1;
3308 u32 found_type = (u8)-1; 4016 u32 found_type = (u8)-1;
3309 int found_extent; 4017 int found_extent;
3310 int del_item; 4018 int del_item;
@@ -3328,7 +4036,8 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
3328 * extent just the way it is. 4036 * extent just the way it is.
3329 */ 4037 */
3330 if (root->ref_cows || root == root->fs_info->tree_root) 4038 if (root->ref_cows || root == root->fs_info->tree_root)
3331 btrfs_drop_extent_cache(inode, (new_size + mask) & (~mask), (u64)-1, 0); 4039 btrfs_drop_extent_cache(inode, ALIGN(new_size,
4040 root->sectorsize), (u64)-1, 0);
3332 4041
3333 /* 4042 /*
3334 * This function is also used to drop the items in the log tree before 4043 * This function is also used to drop the items in the log tree before
@@ -3407,10 +4116,9 @@ search_again:
3407 if (!del_item) { 4116 if (!del_item) {
3408 u64 orig_num_bytes = 4117 u64 orig_num_bytes =
3409 btrfs_file_extent_num_bytes(leaf, fi); 4118 btrfs_file_extent_num_bytes(leaf, fi);
3410 extent_num_bytes = new_size - 4119 extent_num_bytes = ALIGN(new_size -
3411 found_key.offset + root->sectorsize - 1; 4120 found_key.offset,
3412 extent_num_bytes = extent_num_bytes & 4121 root->sectorsize);
3413 ~((u64)root->sectorsize - 1);
3414 btrfs_set_file_extent_num_bytes(leaf, fi, 4122 btrfs_set_file_extent_num_bytes(leaf, fi,
3415 extent_num_bytes); 4123 extent_num_bytes);
3416 num_dec = (orig_num_bytes - 4124 num_dec = (orig_num_bytes -
@@ -3646,9 +4354,8 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
3646 struct extent_map *em = NULL; 4354 struct extent_map *em = NULL;
3647 struct extent_state *cached_state = NULL; 4355 struct extent_state *cached_state = NULL;
3648 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 4356 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
3649 u64 mask = root->sectorsize - 1; 4357 u64 hole_start = ALIGN(oldsize, root->sectorsize);
3650 u64 hole_start = (oldsize + mask) & ~mask; 4358 u64 block_end = ALIGN(size, root->sectorsize);
3651 u64 block_end = (size + mask) & ~mask;
3652 u64 last_byte; 4359 u64 last_byte;
3653 u64 cur_offset; 4360 u64 cur_offset;
3654 u64 hole_size; 4361 u64 hole_size;
@@ -3681,7 +4388,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
3681 break; 4388 break;
3682 } 4389 }
3683 last_byte = min(extent_map_end(em), block_end); 4390 last_byte = min(extent_map_end(em), block_end);
3684 last_byte = (last_byte + mask) & ~mask; 4391 last_byte = ALIGN(last_byte , root->sectorsize);
3685 if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) { 4392 if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
3686 struct extent_map *hole_em; 4393 struct extent_map *hole_em;
3687 hole_size = last_byte - cur_offset; 4394 hole_size = last_byte - cur_offset;
@@ -3832,6 +4539,12 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
3832 4539
3833 /* we don't support swapfiles, so vmtruncate shouldn't fail */ 4540 /* we don't support swapfiles, so vmtruncate shouldn't fail */
3834 truncate_setsize(inode, newsize); 4541 truncate_setsize(inode, newsize);
4542
4543 /* Disable nonlocked read DIO to avoid the end less truncate */
4544 btrfs_inode_block_unlocked_dio(inode);
4545 inode_dio_wait(inode);
4546 btrfs_inode_resume_unlocked_dio(inode);
4547
3835 ret = btrfs_truncate(inode); 4548 ret = btrfs_truncate(inode);
3836 if (ret && inode->i_nlink) 4549 if (ret && inode->i_nlink)
3837 btrfs_orphan_del(NULL, inode); 4550 btrfs_orphan_del(NULL, inode);
@@ -3904,6 +4617,12 @@ void btrfs_evict_inode(struct inode *inode)
3904 goto no_delete; 4617 goto no_delete;
3905 } 4618 }
3906 4619
4620 ret = btrfs_commit_inode_delayed_inode(inode);
4621 if (ret) {
4622 btrfs_orphan_del(NULL, inode);
4623 goto no_delete;
4624 }
4625
3907 rsv = btrfs_alloc_block_rsv(root, BTRFS_BLOCK_RSV_TEMP); 4626 rsv = btrfs_alloc_block_rsv(root, BTRFS_BLOCK_RSV_TEMP);
3908 if (!rsv) { 4627 if (!rsv) {
3909 btrfs_orphan_del(NULL, inode); 4628 btrfs_orphan_del(NULL, inode);
@@ -3941,7 +4660,7 @@ void btrfs_evict_inode(struct inode *inode)
3941 goto no_delete; 4660 goto no_delete;
3942 } 4661 }
3943 4662
3944 trans = btrfs_start_transaction_lflush(root, 1); 4663 trans = btrfs_join_transaction(root);
3945 if (IS_ERR(trans)) { 4664 if (IS_ERR(trans)) {
3946 btrfs_orphan_del(NULL, inode); 4665 btrfs_orphan_del(NULL, inode);
3947 btrfs_free_block_rsv(root, rsv); 4666 btrfs_free_block_rsv(root, rsv);
@@ -3955,9 +4674,6 @@ void btrfs_evict_inode(struct inode *inode)
3955 break; 4674 break;
3956 4675
3957 trans->block_rsv = &root->fs_info->trans_block_rsv; 4676 trans->block_rsv = &root->fs_info->trans_block_rsv;
3958 ret = btrfs_update_inode(trans, root, inode);
3959 BUG_ON(ret);
3960
3961 btrfs_end_transaction(trans, root); 4677 btrfs_end_transaction(trans, root);
3962 trans = NULL; 4678 trans = NULL;
3963 btrfs_btree_balance_dirty(root); 4679 btrfs_btree_balance_dirty(root);
@@ -4854,7 +5570,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
4854 if (btrfs_test_opt(root, NODATASUM)) 5570 if (btrfs_test_opt(root, NODATASUM))
4855 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM; 5571 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM;
4856 if (btrfs_test_opt(root, NODATACOW)) 5572 if (btrfs_test_opt(root, NODATACOW))
4857 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW; 5573 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW |
5574 BTRFS_INODE_NODATASUM;
4858 } 5575 }
4859 5576
4860 insert_inode_hash(inode); 5577 insert_inode_hash(inode);
@@ -5006,12 +5723,6 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
5006 goto out_unlock; 5723 goto out_unlock;
5007 } 5724 }
5008 5725
5009 err = btrfs_update_inode(trans, root, inode);
5010 if (err) {
5011 drop_inode = 1;
5012 goto out_unlock;
5013 }
5014
5015 /* 5726 /*
5016 * If the active LSM wants to access the inode during 5727 * If the active LSM wants to access the inode during
5017 * d_instantiate it needs these. Smack checks to see 5728 * d_instantiate it needs these. Smack checks to see
@@ -5396,8 +6107,7 @@ again:
5396 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { 6107 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
5397 size_t size; 6108 size_t size;
5398 size = btrfs_file_extent_inline_len(leaf, item); 6109 size = btrfs_file_extent_inline_len(leaf, item);
5399 extent_end = (extent_start + size + root->sectorsize - 1) & 6110 extent_end = ALIGN(extent_start + size, root->sectorsize);
5400 ~((u64)root->sectorsize - 1);
5401 } 6111 }
5402 6112
5403 if (start >= extent_end) { 6113 if (start >= extent_end) {
@@ -5469,8 +6179,7 @@ again:
5469 copy_size = min_t(u64, PAGE_CACHE_SIZE - pg_offset, 6179 copy_size = min_t(u64, PAGE_CACHE_SIZE - pg_offset,
5470 size - extent_offset); 6180 size - extent_offset);
5471 em->start = extent_start + extent_offset; 6181 em->start = extent_start + extent_offset;
5472 em->len = (copy_size + root->sectorsize - 1) & 6182 em->len = ALIGN(copy_size, root->sectorsize);
5473 ~((u64)root->sectorsize - 1);
5474 em->orig_block_len = em->len; 6183 em->orig_block_len = em->len;
5475 em->orig_start = em->start; 6184 em->orig_start = em->start;
5476 if (compress_type) { 6185 if (compress_type) {
@@ -5949,6 +6658,8 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
5949 6658
5950 em->start = start; 6659 em->start = start;
5951 em->orig_start = orig_start; 6660 em->orig_start = orig_start;
6661 em->mod_start = start;
6662 em->mod_len = len;
5952 em->len = len; 6663 em->len = len;
5953 em->block_len = block_len; 6664 em->block_len = block_len;
5954 em->block_start = block_start; 6665 em->block_start = block_start;
@@ -5990,16 +6701,12 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
5990 u64 len = bh_result->b_size; 6701 u64 len = bh_result->b_size;
5991 struct btrfs_trans_handle *trans; 6702 struct btrfs_trans_handle *trans;
5992 int unlock_bits = EXTENT_LOCKED; 6703 int unlock_bits = EXTENT_LOCKED;
5993 int ret; 6704 int ret = 0;
5994 6705
5995 if (create) { 6706 if (create)
5996 ret = btrfs_delalloc_reserve_space(inode, len);
5997 if (ret)
5998 return ret;
5999 unlock_bits |= EXTENT_DELALLOC | EXTENT_DIRTY; 6707 unlock_bits |= EXTENT_DELALLOC | EXTENT_DIRTY;
6000 } else { 6708 else
6001 len = min_t(u64, len, root->sectorsize); 6709 len = min_t(u64, len, root->sectorsize);
6002 }
6003 6710
6004 lockstart = start; 6711 lockstart = start;
6005 lockend = start + len - 1; 6712 lockend = start + len - 1;
@@ -6011,14 +6718,6 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
6011 if (lock_extent_direct(inode, lockstart, lockend, &cached_state, create)) 6718 if (lock_extent_direct(inode, lockstart, lockend, &cached_state, create))
6012 return -ENOTBLK; 6719 return -ENOTBLK;
6013 6720
6014 if (create) {
6015 ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
6016 lockend, EXTENT_DELALLOC, NULL,
6017 &cached_state, GFP_NOFS);
6018 if (ret)
6019 goto unlock_err;
6020 }
6021
6022 em = btrfs_get_extent(inode, NULL, 0, start, len, 0); 6721 em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
6023 if (IS_ERR(em)) { 6722 if (IS_ERR(em)) {
6024 ret = PTR_ERR(em); 6723 ret = PTR_ERR(em);
@@ -6050,7 +6749,6 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
6050 if (!create && (em->block_start == EXTENT_MAP_HOLE || 6749 if (!create && (em->block_start == EXTENT_MAP_HOLE ||
6051 test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { 6750 test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
6052 free_extent_map(em); 6751 free_extent_map(em);
6053 ret = 0;
6054 goto unlock_err; 6752 goto unlock_err;
6055 } 6753 }
6056 6754
@@ -6148,6 +6846,15 @@ unlock:
6148 */ 6846 */
6149 if (start + len > i_size_read(inode)) 6847 if (start + len > i_size_read(inode))
6150 i_size_write(inode, start + len); 6848 i_size_write(inode, start + len);
6849
6850 spin_lock(&BTRFS_I(inode)->lock);
6851 BTRFS_I(inode)->outstanding_extents++;
6852 spin_unlock(&BTRFS_I(inode)->lock);
6853
6854 ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
6855 lockstart + len - 1, EXTENT_DELALLOC, NULL,
6856 &cached_state, GFP_NOFS);
6857 BUG_ON(ret);
6151 } 6858 }
6152 6859
6153 /* 6860 /*
@@ -6156,24 +6863,9 @@ unlock:
6156 * aren't using if there is any left over space. 6863 * aren't using if there is any left over space.
6157 */ 6864 */
6158 if (lockstart < lockend) { 6865 if (lockstart < lockend) {
6159 if (create && len < lockend - lockstart) { 6866 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
6160 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, 6867 lockend, unlock_bits, 1, 0,
6161 lockstart + len - 1, 6868 &cached_state, GFP_NOFS);
6162 unlock_bits | EXTENT_DEFRAG, 1, 0,
6163 &cached_state, GFP_NOFS);
6164 /*
6165 * Beside unlock, we also need to cleanup reserved space
6166 * for the left range by attaching EXTENT_DO_ACCOUNTING.
6167 */
6168 clear_extent_bit(&BTRFS_I(inode)->io_tree,
6169 lockstart + len, lockend,
6170 unlock_bits | EXTENT_DO_ACCOUNTING |
6171 EXTENT_DEFRAG, 1, 0, NULL, GFP_NOFS);
6172 } else {
6173 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
6174 lockend, unlock_bits, 1, 0,
6175 &cached_state, GFP_NOFS);
6176 }
6177 } else { 6869 } else {
6178 free_extent_state(cached_state); 6870 free_extent_state(cached_state);
6179 } 6871 }
@@ -6183,9 +6875,6 @@ unlock:
6183 return 0; 6875 return 0;
6184 6876
6185unlock_err: 6877unlock_err:
6186 if (create)
6187 unlock_bits |= EXTENT_DO_ACCOUNTING;
6188
6189 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, 6878 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
6190 unlock_bits, 1, 0, &cached_state, GFP_NOFS); 6879 unlock_bits, 1, 0, &cached_state, GFP_NOFS);
6191 return ret; 6880 return ret;
@@ -6426,19 +7115,24 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
6426 int async_submit = 0; 7115 int async_submit = 0;
6427 7116
6428 map_length = orig_bio->bi_size; 7117 map_length = orig_bio->bi_size;
6429 ret = btrfs_map_block(root->fs_info, READ, start_sector << 9, 7118 ret = btrfs_map_block(root->fs_info, rw, start_sector << 9,
6430 &map_length, NULL, 0); 7119 &map_length, NULL, 0);
6431 if (ret) { 7120 if (ret) {
6432 bio_put(orig_bio); 7121 bio_put(orig_bio);
6433 return -EIO; 7122 return -EIO;
6434 } 7123 }
6435
6436 if (map_length >= orig_bio->bi_size) { 7124 if (map_length >= orig_bio->bi_size) {
6437 bio = orig_bio; 7125 bio = orig_bio;
6438 goto submit; 7126 goto submit;
6439 } 7127 }
6440 7128
6441 async_submit = 1; 7129 /* async crcs make it difficult to collect full stripe writes. */
7130 if (btrfs_get_alloc_profile(root, 1) &
7131 (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6))
7132 async_submit = 0;
7133 else
7134 async_submit = 1;
7135
6442 bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS); 7136 bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS);
6443 if (!bio) 7137 if (!bio)
6444 return -ENOMEM; 7138 return -ENOMEM;
@@ -6480,7 +7174,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
6480 bio->bi_end_io = btrfs_end_dio_bio; 7174 bio->bi_end_io = btrfs_end_dio_bio;
6481 7175
6482 map_length = orig_bio->bi_size; 7176 map_length = orig_bio->bi_size;
6483 ret = btrfs_map_block(root->fs_info, READ, 7177 ret = btrfs_map_block(root->fs_info, rw,
6484 start_sector << 9, 7178 start_sector << 9,
6485 &map_length, NULL, 0); 7179 &map_length, NULL, 0);
6486 if (ret) { 7180 if (ret) {
@@ -6623,15 +7317,60 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
6623{ 7317{
6624 struct file *file = iocb->ki_filp; 7318 struct file *file = iocb->ki_filp;
6625 struct inode *inode = file->f_mapping->host; 7319 struct inode *inode = file->f_mapping->host;
7320 size_t count = 0;
7321 int flags = 0;
7322 bool wakeup = true;
7323 bool relock = false;
7324 ssize_t ret;
6626 7325
6627 if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov, 7326 if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov,
6628 offset, nr_segs)) 7327 offset, nr_segs))
6629 return 0; 7328 return 0;
6630 7329
6631 return __blockdev_direct_IO(rw, iocb, inode, 7330 atomic_inc(&inode->i_dio_count);
6632 BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev, 7331 smp_mb__after_atomic_inc();
6633 iov, offset, nr_segs, btrfs_get_blocks_direct, NULL, 7332
6634 btrfs_submit_direct, 0); 7333 if (rw & WRITE) {
7334 count = iov_length(iov, nr_segs);
7335 /*
7336 * If the write DIO is beyond the EOF, we need update
7337 * the isize, but it is protected by i_mutex. So we can
7338 * not unlock the i_mutex at this case.
7339 */
7340 if (offset + count <= inode->i_size) {
7341 mutex_unlock(&inode->i_mutex);
7342 relock = true;
7343 }
7344 ret = btrfs_delalloc_reserve_space(inode, count);
7345 if (ret)
7346 goto out;
7347 } else if (unlikely(test_bit(BTRFS_INODE_READDIO_NEED_LOCK,
7348 &BTRFS_I(inode)->runtime_flags))) {
7349 inode_dio_done(inode);
7350 flags = DIO_LOCKING | DIO_SKIP_HOLES;
7351 wakeup = false;
7352 }
7353
7354 ret = __blockdev_direct_IO(rw, iocb, inode,
7355 BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev,
7356 iov, offset, nr_segs, btrfs_get_blocks_direct, NULL,
7357 btrfs_submit_direct, flags);
7358 if (rw & WRITE) {
7359 if (ret < 0 && ret != -EIOCBQUEUED)
7360 btrfs_delalloc_release_space(inode, count);
7361 else if (ret >= 0 && (size_t)ret < count)
7362 btrfs_delalloc_release_space(inode,
7363 count - (size_t)ret);
7364 else
7365 btrfs_delalloc_release_metadata(inode, 0);
7366 }
7367out:
7368 if (wakeup)
7369 inode_dio_done(inode);
7370 if (relock)
7371 mutex_lock(&inode->i_mutex);
7372
7373 return ret;
6635} 7374}
6636 7375
6637#define BTRFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC) 7376#define BTRFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC)
@@ -6735,8 +7474,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
6735 return; 7474 return;
6736 } 7475 }
6737 lock_extent_bits(tree, page_start, page_end, 0, &cached_state); 7476 lock_extent_bits(tree, page_start, page_end, 0, &cached_state);
6738 ordered = btrfs_lookup_ordered_extent(inode, 7477 ordered = btrfs_lookup_ordered_extent(inode, page_offset(page));
6739 page_offset(page));
6740 if (ordered) { 7478 if (ordered) {
6741 /* 7479 /*
6742 * IO on this page will never be started, so we need 7480 * IO on this page will never be started, so we need
@@ -7216,8 +7954,9 @@ int btrfs_drop_inode(struct inode *inode)
7216{ 7954{
7217 struct btrfs_root *root = BTRFS_I(inode)->root; 7955 struct btrfs_root *root = BTRFS_I(inode)->root;
7218 7956
7957 /* the snap/subvol tree is on deleting */
7219 if (btrfs_root_refs(&root->root_item) == 0 && 7958 if (btrfs_root_refs(&root->root_item) == 0 &&
7220 !btrfs_is_free_space_inode(inode)) 7959 root != root->fs_info->tree_root)
7221 return 1; 7960 return 1;
7222 else 7961 else
7223 return generic_drop_inode(inode); 7962 return generic_drop_inode(inode);
@@ -7299,40 +8038,22 @@ fail:
7299static int btrfs_getattr(struct vfsmount *mnt, 8038static int btrfs_getattr(struct vfsmount *mnt,
7300 struct dentry *dentry, struct kstat *stat) 8039 struct dentry *dentry, struct kstat *stat)
7301{ 8040{
8041 u64 delalloc_bytes;
7302 struct inode *inode = dentry->d_inode; 8042 struct inode *inode = dentry->d_inode;
7303 u32 blocksize = inode->i_sb->s_blocksize; 8043 u32 blocksize = inode->i_sb->s_blocksize;
7304 8044
7305 generic_fillattr(inode, stat); 8045 generic_fillattr(inode, stat);
7306 stat->dev = BTRFS_I(inode)->root->anon_dev; 8046 stat->dev = BTRFS_I(inode)->root->anon_dev;
7307 stat->blksize = PAGE_CACHE_SIZE; 8047 stat->blksize = PAGE_CACHE_SIZE;
8048
8049 spin_lock(&BTRFS_I(inode)->lock);
8050 delalloc_bytes = BTRFS_I(inode)->delalloc_bytes;
8051 spin_unlock(&BTRFS_I(inode)->lock);
7308 stat->blocks = (ALIGN(inode_get_bytes(inode), blocksize) + 8052 stat->blocks = (ALIGN(inode_get_bytes(inode), blocksize) +
7309 ALIGN(BTRFS_I(inode)->delalloc_bytes, blocksize)) >> 9; 8053 ALIGN(delalloc_bytes, blocksize)) >> 9;
7310 return 0; 8054 return 0;
7311} 8055}
7312 8056
7313/*
7314 * If a file is moved, it will inherit the cow and compression flags of the new
7315 * directory.
7316 */
7317static void fixup_inode_flags(struct inode *dir, struct inode *inode)
7318{
7319 struct btrfs_inode *b_dir = BTRFS_I(dir);
7320 struct btrfs_inode *b_inode = BTRFS_I(inode);
7321
7322 if (b_dir->flags & BTRFS_INODE_NODATACOW)
7323 b_inode->flags |= BTRFS_INODE_NODATACOW;
7324 else
7325 b_inode->flags &= ~BTRFS_INODE_NODATACOW;
7326
7327 if (b_dir->flags & BTRFS_INODE_COMPRESS) {
7328 b_inode->flags |= BTRFS_INODE_COMPRESS;
7329 b_inode->flags &= ~BTRFS_INODE_NOCOMPRESS;
7330 } else {
7331 b_inode->flags &= ~(BTRFS_INODE_COMPRESS |
7332 BTRFS_INODE_NOCOMPRESS);
7333 }
7334}
7335
7336static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, 8057static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
7337 struct inode *new_dir, struct dentry *new_dentry) 8058 struct inode *new_dir, struct dentry *new_dentry)
7338{ 8059{
@@ -7498,8 +8219,6 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
7498 } 8219 }
7499 } 8220 }
7500 8221
7501 fixup_inode_flags(new_dir, old_inode);
7502
7503 ret = btrfs_add_link(trans, new_dir, old_inode, 8222 ret = btrfs_add_link(trans, new_dir, old_inode,
7504 new_dentry->d_name.name, 8223 new_dentry->d_name.name,
7505 new_dentry->d_name.len, 0, index); 8224 new_dentry->d_name.len, 0, index);
@@ -7583,7 +8302,7 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
7583 8302
7584 INIT_LIST_HEAD(&works); 8303 INIT_LIST_HEAD(&works);
7585 INIT_LIST_HEAD(&splice); 8304 INIT_LIST_HEAD(&splice);
7586again: 8305
7587 spin_lock(&root->fs_info->delalloc_lock); 8306 spin_lock(&root->fs_info->delalloc_lock);
7588 list_splice_init(&root->fs_info->delalloc_inodes, &splice); 8307 list_splice_init(&root->fs_info->delalloc_inodes, &splice);
7589 while (!list_empty(&splice)) { 8308 while (!list_empty(&splice)) {
@@ -7593,8 +8312,11 @@ again:
7593 list_del_init(&binode->delalloc_inodes); 8312 list_del_init(&binode->delalloc_inodes);
7594 8313
7595 inode = igrab(&binode->vfs_inode); 8314 inode = igrab(&binode->vfs_inode);
7596 if (!inode) 8315 if (!inode) {
8316 clear_bit(BTRFS_INODE_IN_DELALLOC_LIST,
8317 &binode->runtime_flags);
7597 continue; 8318 continue;
8319 }
7598 8320
7599 list_add_tail(&binode->delalloc_inodes, 8321 list_add_tail(&binode->delalloc_inodes,
7600 &root->fs_info->delalloc_inodes); 8322 &root->fs_info->delalloc_inodes);
@@ -7619,13 +8341,6 @@ again:
7619 btrfs_wait_and_free_delalloc_work(work); 8341 btrfs_wait_and_free_delalloc_work(work);
7620 } 8342 }
7621 8343
7622 spin_lock(&root->fs_info->delalloc_lock);
7623 if (!list_empty(&root->fs_info->delalloc_inodes)) {
7624 spin_unlock(&root->fs_info->delalloc_lock);
7625 goto again;
7626 }
7627 spin_unlock(&root->fs_info->delalloc_lock);
7628
7629 /* the filemap_flush will queue IO into the worker threads, but 8344 /* the filemap_flush will queue IO into the worker threads, but
7630 * we have to make sure the IO is actually started and that 8345 * we have to make sure the IO is actually started and that
7631 * ordered extents get created before we return 8346 * ordered extents get created before we return
@@ -7801,8 +8516,9 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
7801 } 8516 }
7802 } 8517 }
7803 8518
7804 ret = btrfs_reserve_extent(trans, root, num_bytes, min_size, 8519 ret = btrfs_reserve_extent(trans, root,
7805 0, *alloc_hint, &ins, 1); 8520 min(num_bytes, 256ULL * 1024 * 1024),
8521 min_size, 0, *alloc_hint, &ins, 1);
7806 if (ret) { 8522 if (ret) {
7807 if (own_trans) 8523 if (own_trans)
7808 btrfs_end_transaction(trans, root); 8524 btrfs_end_transaction(trans, root);