aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/inode.c
diff options
context:
space:
mode:
authorJosef Bacik <josef@redhat.com>2012-05-02 14:00:54 -0400
committerJosef Bacik <josef@redhat.com>2012-05-30 10:23:33 -0400
commit5fd02043553b02867b29de1ac9fff2ec16b84def (patch)
treef378b1042b4fbd366185b8b12b082bce4fd4fac4 /fs/btrfs/inode.c
parent4e89915220e2f1341c757b610d0f0c3821f3a65f (diff)
Btrfs: finish ordered extents in their own thread
We noticed that the ordered extent completion doesn't really rely on having a page and that it could be done independantly of ending the writeback on a page. This patch makes us not do the threaded endio stuff for normal buffered writes and direct writes so we can end page writeback as soon as possible (in irq context) and only start threads to do the ordered work when it is actually done. Compression needs to be reworked some to take advantage of this as well, but atm it has to do a find_get_page in its endio handler so it must be done in its own thread. This makes direct writes quite a bit faster. Thanks, Signed-off-by: Josef Bacik <josef@redhat.com>
Diffstat (limited to 'fs/btrfs/inode.c')
-rw-r--r--fs/btrfs/inode.c177
1 files changed, 76 insertions, 101 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 41a62e6954c2..9a1b96fd672a 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -89,7 +89,7 @@ static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
89 89
90static int btrfs_setsize(struct inode *inode, loff_t newsize); 90static int btrfs_setsize(struct inode *inode, loff_t newsize);
91static int btrfs_truncate(struct inode *inode); 91static int btrfs_truncate(struct inode *inode);
92static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end); 92static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent);
93static noinline int cow_file_range(struct inode *inode, 93static noinline int cow_file_range(struct inode *inode,
94 struct page *locked_page, 94 struct page *locked_page,
95 u64 start, u64 end, int *page_started, 95 u64 start, u64 end, int *page_started,
@@ -1572,11 +1572,11 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
1572 if (btrfs_is_free_space_inode(root, inode)) 1572 if (btrfs_is_free_space_inode(root, inode))
1573 metadata = 2; 1573 metadata = 2;
1574 1574
1575 ret = btrfs_bio_wq_end_io(root->fs_info, bio, metadata);
1576 if (ret)
1577 return ret;
1578
1579 if (!(rw & REQ_WRITE)) { 1575 if (!(rw & REQ_WRITE)) {
1576 ret = btrfs_bio_wq_end_io(root->fs_info, bio, metadata);
1577 if (ret)
1578 return ret;
1579
1580 if (bio_flags & EXTENT_BIO_COMPRESSED) { 1580 if (bio_flags & EXTENT_BIO_COMPRESSED) {
1581 return btrfs_submit_compressed_read(inode, bio, 1581 return btrfs_submit_compressed_read(inode, bio,
1582 mirror_num, bio_flags); 1582 mirror_num, bio_flags);
@@ -1815,25 +1815,24 @@ out:
1815 * an ordered extent if the range of bytes in the file it covers are 1815 * an ordered extent if the range of bytes in the file it covers are
1816 * fully written. 1816 * fully written.
1817 */ 1817 */
1818static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) 1818static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
1819{ 1819{
1820 struct inode *inode = ordered_extent->inode;
1820 struct btrfs_root *root = BTRFS_I(inode)->root; 1821 struct btrfs_root *root = BTRFS_I(inode)->root;
1821 struct btrfs_trans_handle *trans = NULL; 1822 struct btrfs_trans_handle *trans = NULL;
1822 struct btrfs_ordered_extent *ordered_extent = NULL;
1823 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 1823 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
1824 struct extent_state *cached_state = NULL; 1824 struct extent_state *cached_state = NULL;
1825 int compress_type = 0; 1825 int compress_type = 0;
1826 int ret; 1826 int ret;
1827 bool nolock; 1827 bool nolock;
1828 1828
1829 ret = btrfs_dec_test_ordered_pending(inode, &ordered_extent, start,
1830 end - start + 1);
1831 if (!ret)
1832 return 0;
1833 BUG_ON(!ordered_extent); /* Logic error */
1834
1835 nolock = btrfs_is_free_space_inode(root, inode); 1829 nolock = btrfs_is_free_space_inode(root, inode);
1836 1830
1831 if (test_bit(BTRFS_ORDERED_IOERR, &ordered_extent->flags)) {
1832 ret = -EIO;
1833 goto out;
1834 }
1835
1837 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) { 1836 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
1838 BUG_ON(!list_empty(&ordered_extent->list)); /* Logic error */ 1837 BUG_ON(!list_empty(&ordered_extent->list)); /* Logic error */
1839 ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); 1838 ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
@@ -1889,12 +1888,10 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1889 ordered_extent->file_offset, 1888 ordered_extent->file_offset,
1890 ordered_extent->len); 1889 ordered_extent->len);
1891 } 1890 }
1892 unlock_extent_cached(io_tree, ordered_extent->file_offset, 1891
1893 ordered_extent->file_offset +
1894 ordered_extent->len - 1, &cached_state, GFP_NOFS);
1895 if (ret < 0) { 1892 if (ret < 0) {
1896 btrfs_abort_transaction(trans, root, ret); 1893 btrfs_abort_transaction(trans, root, ret);
1897 goto out; 1894 goto out_unlock;
1898 } 1895 }
1899 1896
1900 add_pending_csums(trans, inode, ordered_extent->file_offset, 1897 add_pending_csums(trans, inode, ordered_extent->file_offset,
@@ -1905,10 +1902,14 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1905 ret = btrfs_update_inode_fallback(trans, root, inode); 1902 ret = btrfs_update_inode_fallback(trans, root, inode);
1906 if (ret) { /* -ENOMEM or corruption */ 1903 if (ret) { /* -ENOMEM or corruption */
1907 btrfs_abort_transaction(trans, root, ret); 1904 btrfs_abort_transaction(trans, root, ret);
1908 goto out; 1905 goto out_unlock;
1909 } 1906 }
1910 } 1907 }
1911 ret = 0; 1908 ret = 0;
1909out_unlock:
1910 unlock_extent_cached(io_tree, ordered_extent->file_offset,
1911 ordered_extent->file_offset +
1912 ordered_extent->len - 1, &cached_state, GFP_NOFS);
1912out: 1913out:
1913 if (root != root->fs_info->tree_root) 1914 if (root != root->fs_info->tree_root)
1914 btrfs_delalloc_release_metadata(inode, ordered_extent->len); 1915 btrfs_delalloc_release_metadata(inode, ordered_extent->len);
@@ -1919,26 +1920,57 @@ out:
1919 btrfs_end_transaction(trans, root); 1920 btrfs_end_transaction(trans, root);
1920 } 1921 }
1921 1922
1923 if (ret)
1924 clear_extent_uptodate(io_tree, ordered_extent->file_offset,
1925 ordered_extent->file_offset +
1926 ordered_extent->len - 1, NULL, GFP_NOFS);
1927
1928 /*
1929 * This needs to be dont to make sure anybody waiting knows we are done
1930 * upating everything for this ordered extent.
1931 */
1932 btrfs_remove_ordered_extent(inode, ordered_extent);
1933
1922 /* once for us */ 1934 /* once for us */
1923 btrfs_put_ordered_extent(ordered_extent); 1935 btrfs_put_ordered_extent(ordered_extent);
1924 /* once for the tree */ 1936 /* once for the tree */
1925 btrfs_put_ordered_extent(ordered_extent); 1937 btrfs_put_ordered_extent(ordered_extent);
1926 1938
1927 return 0; 1939 return ret;
1928out_unlock: 1940}
1929 unlock_extent_cached(io_tree, ordered_extent->file_offset, 1941
1930 ordered_extent->file_offset + 1942static void finish_ordered_fn(struct btrfs_work *work)
1931 ordered_extent->len - 1, &cached_state, GFP_NOFS); 1943{
1932 goto out; 1944 struct btrfs_ordered_extent *ordered_extent;
1945 ordered_extent = container_of(work, struct btrfs_ordered_extent, work);
1946 btrfs_finish_ordered_io(ordered_extent);
1933} 1947}
1934 1948
1935static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, 1949static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
1936 struct extent_state *state, int uptodate) 1950 struct extent_state *state, int uptodate)
1937{ 1951{
1952 struct inode *inode = page->mapping->host;
1953 struct btrfs_root *root = BTRFS_I(inode)->root;
1954 struct btrfs_ordered_extent *ordered_extent = NULL;
1955 struct btrfs_workers *workers;
1956
1938 trace_btrfs_writepage_end_io_hook(page, start, end, uptodate); 1957 trace_btrfs_writepage_end_io_hook(page, start, end, uptodate);
1939 1958
1940 ClearPagePrivate2(page); 1959 ClearPagePrivate2(page);
1941 return btrfs_finish_ordered_io(page->mapping->host, start, end); 1960 if (!btrfs_dec_test_ordered_pending(inode, &ordered_extent, start,
1961 end - start + 1, uptodate))
1962 return 0;
1963
1964 ordered_extent->work.func = finish_ordered_fn;
1965 ordered_extent->work.flags = 0;
1966
1967 if (btrfs_is_free_space_inode(root, inode))
1968 workers = &root->fs_info->endio_freespace_worker;
1969 else
1970 workers = &root->fs_info->endio_write_workers;
1971 btrfs_queue_worker(workers, &ordered_extent->work);
1972
1973 return 0;
1942} 1974}
1943 1975
1944/* 1976/*
@@ -5909,9 +5941,7 @@ static void btrfs_endio_direct_write(struct bio *bio, int err)
5909 struct btrfs_dio_private *dip = bio->bi_private; 5941 struct btrfs_dio_private *dip = bio->bi_private;
5910 struct inode *inode = dip->inode; 5942 struct inode *inode = dip->inode;
5911 struct btrfs_root *root = BTRFS_I(inode)->root; 5943 struct btrfs_root *root = BTRFS_I(inode)->root;
5912 struct btrfs_trans_handle *trans;
5913 struct btrfs_ordered_extent *ordered = NULL; 5944 struct btrfs_ordered_extent *ordered = NULL;
5914 struct extent_state *cached_state = NULL;
5915 u64 ordered_offset = dip->logical_offset; 5945 u64 ordered_offset = dip->logical_offset;
5916 u64 ordered_bytes = dip->bytes; 5946 u64 ordered_bytes = dip->bytes;
5917 int ret; 5947 int ret;
@@ -5921,73 +5951,14 @@ static void btrfs_endio_direct_write(struct bio *bio, int err)
5921again: 5951again:
5922 ret = btrfs_dec_test_first_ordered_pending(inode, &ordered, 5952 ret = btrfs_dec_test_first_ordered_pending(inode, &ordered,
5923 &ordered_offset, 5953 &ordered_offset,
5924 ordered_bytes); 5954 ordered_bytes, !err);
5925 if (!ret) 5955 if (!ret)
5926 goto out_test; 5956 goto out_test;
5927 5957
5928 BUG_ON(!ordered); 5958 ordered->work.func = finish_ordered_fn;
5929 5959 ordered->work.flags = 0;
5930 trans = btrfs_join_transaction(root); 5960 btrfs_queue_worker(&root->fs_info->endio_write_workers,
5931 if (IS_ERR(trans)) { 5961 &ordered->work);
5932 err = -ENOMEM;
5933 goto out;
5934 }
5935 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
5936
5937 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) {
5938 ret = btrfs_ordered_update_i_size(inode, 0, ordered);
5939 if (!ret)
5940 err = btrfs_update_inode_fallback(trans, root, inode);
5941 goto out;
5942 }
5943
5944 lock_extent_bits(&BTRFS_I(inode)->io_tree, ordered->file_offset,
5945 ordered->file_offset + ordered->len - 1, 0,
5946 &cached_state);
5947
5948 if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags)) {
5949 ret = btrfs_mark_extent_written(trans, inode,
5950 ordered->file_offset,
5951 ordered->file_offset +
5952 ordered->len);
5953 if (ret) {
5954 err = ret;
5955 goto out_unlock;
5956 }
5957 } else {
5958 ret = insert_reserved_file_extent(trans, inode,
5959 ordered->file_offset,
5960 ordered->start,
5961 ordered->disk_len,
5962 ordered->len,
5963 ordered->len,
5964 0, 0, 0,
5965 BTRFS_FILE_EXTENT_REG);
5966 unpin_extent_cache(&BTRFS_I(inode)->extent_tree,
5967 ordered->file_offset, ordered->len);
5968 if (ret) {
5969 err = ret;
5970 WARN_ON(1);
5971 goto out_unlock;
5972 }
5973 }
5974
5975 add_pending_csums(trans, inode, ordered->file_offset, &ordered->list);
5976 ret = btrfs_ordered_update_i_size(inode, 0, ordered);
5977 if (!ret || !test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags))
5978 btrfs_update_inode_fallback(trans, root, inode);
5979 ret = 0;
5980out_unlock:
5981 unlock_extent_cached(&BTRFS_I(inode)->io_tree, ordered->file_offset,
5982 ordered->file_offset + ordered->len - 1,
5983 &cached_state, GFP_NOFS);
5984out:
5985 btrfs_delalloc_release_metadata(inode, ordered->len);
5986 btrfs_end_transaction(trans, root);
5987 ordered_offset = ordered->file_offset + ordered->len;
5988 btrfs_put_ordered_extent(ordered);
5989 btrfs_put_ordered_extent(ordered);
5990
5991out_test: 5962out_test:
5992 /* 5963 /*
5993 * our bio might span multiple ordered extents. If we haven't 5964 * our bio might span multiple ordered extents. If we haven't
@@ -5996,12 +5967,12 @@ out_test:
5996 if (ordered_offset < dip->logical_offset + dip->bytes) { 5967 if (ordered_offset < dip->logical_offset + dip->bytes) {
5997 ordered_bytes = dip->logical_offset + dip->bytes - 5968 ordered_bytes = dip->logical_offset + dip->bytes -
5998 ordered_offset; 5969 ordered_offset;
5970 ordered = NULL;
5999 goto again; 5971 goto again;
6000 } 5972 }
6001out_done: 5973out_done:
6002 bio->bi_private = dip->private; 5974 bio->bi_private = dip->private;
6003 5975
6004 kfree(dip->csums);
6005 kfree(dip); 5976 kfree(dip);
6006 5977
6007 /* If we had an error make sure to clear the uptodate flag */ 5978 /* If we had an error make sure to clear the uptodate flag */
@@ -6069,9 +6040,12 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
6069 int ret; 6040 int ret;
6070 6041
6071 bio_get(bio); 6042 bio_get(bio);
6072 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); 6043
6073 if (ret) 6044 if (!write) {
6074 goto err; 6045 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
6046 if (ret)
6047 goto err;
6048 }
6075 6049
6076 if (skip_sum) 6050 if (skip_sum)
6077 goto map; 6051 goto map;
@@ -6491,13 +6465,13 @@ static int btrfs_releasepage(struct page *page, gfp_t gfp_flags)
6491 6465
6492static void btrfs_invalidatepage(struct page *page, unsigned long offset) 6466static void btrfs_invalidatepage(struct page *page, unsigned long offset)
6493{ 6467{
6468 struct inode *inode = page->mapping->host;
6494 struct extent_io_tree *tree; 6469 struct extent_io_tree *tree;
6495 struct btrfs_ordered_extent *ordered; 6470 struct btrfs_ordered_extent *ordered;
6496 struct extent_state *cached_state = NULL; 6471 struct extent_state *cached_state = NULL;
6497 u64 page_start = page_offset(page); 6472 u64 page_start = page_offset(page);
6498 u64 page_end = page_start + PAGE_CACHE_SIZE - 1; 6473 u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
6499 6474
6500
6501 /* 6475 /*
6502 * we have the page locked, so new writeback can't start, 6476 * we have the page locked, so new writeback can't start,
6503 * and the dirty bit won't be cleared while we are here. 6477 * and the dirty bit won't be cleared while we are here.
@@ -6507,13 +6481,13 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
6507 */ 6481 */
6508 wait_on_page_writeback(page); 6482 wait_on_page_writeback(page);
6509 6483
6510 tree = &BTRFS_I(page->mapping->host)->io_tree; 6484 tree = &BTRFS_I(inode)->io_tree;
6511 if (offset) { 6485 if (offset) {
6512 btrfs_releasepage(page, GFP_NOFS); 6486 btrfs_releasepage(page, GFP_NOFS);
6513 return; 6487 return;
6514 } 6488 }
6515 lock_extent_bits(tree, page_start, page_end, 0, &cached_state); 6489 lock_extent_bits(tree, page_start, page_end, 0, &cached_state);
6516 ordered = btrfs_lookup_ordered_extent(page->mapping->host, 6490 ordered = btrfs_lookup_ordered_extent(inode,
6517 page_offset(page)); 6491 page_offset(page));
6518 if (ordered) { 6492 if (ordered) {
6519 /* 6493 /*
@@ -6528,9 +6502,10 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
6528 * whoever cleared the private bit is responsible 6502 * whoever cleared the private bit is responsible
6529 * for the finish_ordered_io 6503 * for the finish_ordered_io
6530 */ 6504 */
6531 if (TestClearPagePrivate2(page)) { 6505 if (TestClearPagePrivate2(page) &&
6532 btrfs_finish_ordered_io(page->mapping->host, 6506 btrfs_dec_test_ordered_pending(inode, &ordered, page_start,
6533 page_start, page_end); 6507 PAGE_CACHE_SIZE, 1)) {
6508 btrfs_finish_ordered_io(ordered);
6534 } 6509 }
6535 btrfs_put_ordered_extent(ordered); 6510 btrfs_put_ordered_extent(ordered);
6536 cached_state = NULL; 6511 cached_state = NULL;