aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2008-07-17 12:53:50 -0400
committerChris Mason <chris.mason@oracle.com>2008-09-25 11:04:04 -0400
commite6dcd2dc9c489108648e2ed543315dd134d50a9a (patch)
treecddf6f588b65e28c5feb8bff89b22d8ff70f8a50 /fs
parent77a41afb7d0dd0f27b6f2f1a5bc701929c7034de (diff)
Btrfs: New data=ordered implementation
The old data=ordered code would force commit to wait until all the data extents from the transaction were fully on disk. This introduced large latencies into the commit and stalled new writers in the transaction for a long time. The new code changes the way data allocations and extents work: * When delayed allocation is filled, data extents are reserved, and the extent bit EXTENT_ORDERED is set on the entire range of the extent. A struct btrfs_ordered_extent is allocated an inserted into a per-inode rbtree to track the pending extents. * As each page is written EXTENT_ORDERED is cleared on the bytes corresponding to that page. * When all of the bytes corresponding to a single struct btrfs_ordered_extent are written, The previously reserved extent is inserted into the FS btree and into the extent allocation trees. The checksums for the file data are also updated. Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/btrfs_inode.h4
-rw-r--r--fs/btrfs/ctree.h19
-rw-r--r--fs/btrfs/disk-io.c13
-rw-r--r--fs/btrfs/extent-tree.c132
-rw-r--r--fs/btrfs/extent_io.c52
-rw-r--r--fs/btrfs/extent_io.h14
-rw-r--r--fs/btrfs/extent_map.c5
-rw-r--r--fs/btrfs/file-item.c62
-rw-r--r--fs/btrfs/file.c67
-rw-r--r--fs/btrfs/inode.c447
-rw-r--r--fs/btrfs/ordered-data.c455
-rw-r--r--fs/btrfs/ordered-data.h71
-rw-r--r--fs/btrfs/transaction.c67
-rw-r--r--fs/btrfs/transaction.h4
14 files changed, 910 insertions, 502 deletions
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 40b4e0c9cd09..8d03687510e0 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -21,6 +21,7 @@
21 21
22#include "extent_map.h" 22#include "extent_map.h"
23#include "extent_io.h" 23#include "extent_io.h"
24#include "ordered-data.h"
24 25
25/* in memory btrfs inode */ 26/* in memory btrfs inode */
26struct btrfs_inode { 27struct btrfs_inode {
@@ -32,9 +33,8 @@ struct btrfs_inode {
32 struct extent_io_tree io_failure_tree; 33 struct extent_io_tree io_failure_tree;
33 struct mutex csum_mutex; 34 struct mutex csum_mutex;
34 struct inode vfs_inode; 35 struct inode vfs_inode;
35 atomic_t ordered_writeback; 36 struct btrfs_ordered_inode_tree ordered_tree;
36 37
37 u64 ordered_trans;
38 /* 38 /*
39 * transid of the trans_handle that last modified this inode 39 * transid of the trans_handle that last modified this inode
40 */ 40 */
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index f3783dbd9b60..ceebc052ddcb 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -25,6 +25,7 @@
25#include <linux/fs.h> 25#include <linux/fs.h>
26#include <linux/completion.h> 26#include <linux/completion.h>
27#include <linux/backing-dev.h> 27#include <linux/backing-dev.h>
28#include <linux/wait.h>
28#include <asm/kmap_types.h> 29#include <asm/kmap_types.h>
29#include "bit-radix.h" 30#include "bit-radix.h"
30#include "extent_io.h" 31#include "extent_io.h"
@@ -37,6 +38,7 @@ extern struct kmem_cache *btrfs_trans_handle_cachep;
37extern struct kmem_cache *btrfs_transaction_cachep; 38extern struct kmem_cache *btrfs_transaction_cachep;
38extern struct kmem_cache *btrfs_bit_radix_cachep; 39extern struct kmem_cache *btrfs_bit_radix_cachep;
39extern struct kmem_cache *btrfs_path_cachep; 40extern struct kmem_cache *btrfs_path_cachep;
41struct btrfs_ordered_sum;
40 42
41#define BTRFS_MAGIC "_B5RfS_M" 43#define BTRFS_MAGIC "_B5RfS_M"
42 44
@@ -510,6 +512,7 @@ struct btrfs_fs_info {
510 u64 max_inline; 512 u64 max_inline;
511 u64 alloc_start; 513 u64 alloc_start;
512 struct btrfs_transaction *running_transaction; 514 struct btrfs_transaction *running_transaction;
515 wait_queue_head_t transaction_throttle;
513 struct btrfs_super_block super_copy; 516 struct btrfs_super_block super_copy;
514 struct btrfs_super_block super_for_commit; 517 struct btrfs_super_block super_for_commit;
515 struct block_device *__bdev; 518 struct block_device *__bdev;
@@ -541,6 +544,7 @@ struct btrfs_fs_info {
541 */ 544 */
542 struct btrfs_workers workers; 545 struct btrfs_workers workers;
543 struct btrfs_workers endio_workers; 546 struct btrfs_workers endio_workers;
547 struct btrfs_workers endio_write_workers;
544 struct btrfs_workers submit_workers; 548 struct btrfs_workers submit_workers;
545 struct task_struct *transaction_kthread; 549 struct task_struct *transaction_kthread;
546 struct task_struct *cleaner_kthread; 550 struct task_struct *cleaner_kthread;
@@ -1384,6 +1388,17 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
1384 u64 owner, u64 owner_offset, 1388 u64 owner, u64 owner_offset,
1385 u64 empty_size, u64 hint_byte, 1389 u64 empty_size, u64 hint_byte,
1386 u64 search_end, struct btrfs_key *ins, u64 data); 1390 u64 search_end, struct btrfs_key *ins, u64 data);
1391int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
1392 struct btrfs_root *root,
1393 u64 root_objectid, u64 ref_generation,
1394 u64 owner, u64 owner_offset,
1395 struct btrfs_key *ins);
1396int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
1397 struct btrfs_root *root,
1398 u64 num_bytes, u64 min_alloc_size,
1399 u64 empty_size, u64 hint_byte,
1400 u64 search_end, struct btrfs_key *ins,
1401 u64 data);
1387int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 1402int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1388 struct extent_buffer *buf); 1403 struct extent_buffer *buf);
1389int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root 1404int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
@@ -1556,9 +1571,9 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
1556 u64 bytenr, int mod); 1571 u64 bytenr, int mod);
1557int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, 1572int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
1558 struct btrfs_root *root, struct inode *inode, 1573 struct btrfs_root *root, struct inode *inode,
1559 struct bio *bio, char *sums); 1574 struct btrfs_ordered_sum *sums);
1560int btrfs_csum_one_bio(struct btrfs_root *root, 1575int btrfs_csum_one_bio(struct btrfs_root *root,
1561 struct bio *bio, char **sums_ret); 1576 struct bio *bio, struct btrfs_ordered_sum **sums_ret);
1562struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans, 1577struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans,
1563 struct btrfs_root *root, 1578 struct btrfs_root *root,
1564 struct btrfs_path *path, 1579 struct btrfs_path *path,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index b01b3f4f92a9..4a5ebafb935a 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -407,7 +407,11 @@ static int end_workqueue_bio(struct bio *bio,
407 end_io_wq->error = err; 407 end_io_wq->error = err;
408 end_io_wq->work.func = end_workqueue_fn; 408 end_io_wq->work.func = end_workqueue_fn;
409 end_io_wq->work.flags = 0; 409 end_io_wq->work.flags = 0;
410 btrfs_queue_worker(&fs_info->endio_workers, &end_io_wq->work); 410 if (bio->bi_rw & (1 << BIO_RW))
411 btrfs_queue_worker(&fs_info->endio_write_workers,
412 &end_io_wq->work);
413 else
414 btrfs_queue_worker(&fs_info->endio_workers, &end_io_wq->work);
411 415
412#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) 416#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
413 return 0; 417 return 0;
@@ -1286,6 +1290,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1286 mutex_init(&fs_info->transaction_kthread_mutex); 1290 mutex_init(&fs_info->transaction_kthread_mutex);
1287 mutex_init(&fs_info->cleaner_mutex); 1291 mutex_init(&fs_info->cleaner_mutex);
1288 mutex_init(&fs_info->volume_mutex); 1292 mutex_init(&fs_info->volume_mutex);
1293 init_waitqueue_head(&fs_info->transaction_throttle);
1289 1294
1290#if 0 1295#if 0
1291 ret = add_hasher(fs_info, "crc32c"); 1296 ret = add_hasher(fs_info, "crc32c");
@@ -1325,9 +1330,13 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1325 btrfs_init_workers(&fs_info->workers, fs_info->thread_pool_size); 1330 btrfs_init_workers(&fs_info->workers, fs_info->thread_pool_size);
1326 btrfs_init_workers(&fs_info->submit_workers, fs_info->thread_pool_size); 1331 btrfs_init_workers(&fs_info->submit_workers, fs_info->thread_pool_size);
1327 btrfs_init_workers(&fs_info->endio_workers, fs_info->thread_pool_size); 1332 btrfs_init_workers(&fs_info->endio_workers, fs_info->thread_pool_size);
1333 btrfs_init_workers(&fs_info->endio_write_workers,
1334 fs_info->thread_pool_size);
1328 btrfs_start_workers(&fs_info->workers, 1); 1335 btrfs_start_workers(&fs_info->workers, 1);
1329 btrfs_start_workers(&fs_info->submit_workers, 1); 1336 btrfs_start_workers(&fs_info->submit_workers, 1);
1330 btrfs_start_workers(&fs_info->endio_workers, fs_info->thread_pool_size); 1337 btrfs_start_workers(&fs_info->endio_workers, fs_info->thread_pool_size);
1338 btrfs_start_workers(&fs_info->endio_write_workers,
1339 fs_info->thread_pool_size);
1331 1340
1332 err = -EINVAL; 1341 err = -EINVAL;
1333 if (btrfs_super_num_devices(disk_super) > fs_devices->open_devices) { 1342 if (btrfs_super_num_devices(disk_super) > fs_devices->open_devices) {
@@ -1447,6 +1456,7 @@ fail_sb_buffer:
1447 extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree); 1456 extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree);
1448 btrfs_stop_workers(&fs_info->workers); 1457 btrfs_stop_workers(&fs_info->workers);
1449 btrfs_stop_workers(&fs_info->endio_workers); 1458 btrfs_stop_workers(&fs_info->endio_workers);
1459 btrfs_stop_workers(&fs_info->endio_write_workers);
1450 btrfs_stop_workers(&fs_info->submit_workers); 1460 btrfs_stop_workers(&fs_info->submit_workers);
1451fail_iput: 1461fail_iput:
1452 iput(fs_info->btree_inode); 1462 iput(fs_info->btree_inode);
@@ -1702,6 +1712,7 @@ int close_ctree(struct btrfs_root *root)
1702 1712
1703 btrfs_stop_workers(&fs_info->workers); 1713 btrfs_stop_workers(&fs_info->workers);
1704 btrfs_stop_workers(&fs_info->endio_workers); 1714 btrfs_stop_workers(&fs_info->endio_workers);
1715 btrfs_stop_workers(&fs_info->endio_write_workers);
1705 btrfs_stop_workers(&fs_info->submit_workers); 1716 btrfs_stop_workers(&fs_info->submit_workers);
1706 1717
1707 iput(fs_info->btree_inode); 1718 iput(fs_info->btree_inode);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 8ebfa6be0790..343d1101c31c 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1895,36 +1895,17 @@ error:
1895 return ret; 1895 return ret;
1896} 1896}
1897 1897
1898/* 1898static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans,
1899 * finds a free extent and does all the dirty work required for allocation 1899 struct btrfs_root *root,
1900 * returns the key for the extent through ins, and a tree buffer for 1900 u64 num_bytes, u64 min_alloc_size,
1901 * the first block of the extent through buf. 1901 u64 empty_size, u64 hint_byte,
1902 * 1902 u64 search_end, struct btrfs_key *ins,
1903 * returns 0 if everything worked, non-zero otherwise. 1903 u64 data)
1904 */
1905int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
1906 struct btrfs_root *root,
1907 u64 num_bytes, u64 min_alloc_size,
1908 u64 root_objectid, u64 ref_generation,
1909 u64 owner, u64 owner_offset,
1910 u64 empty_size, u64 hint_byte,
1911 u64 search_end, struct btrfs_key *ins, u64 data)
1912{ 1904{
1913 int ret; 1905 int ret;
1914 int pending_ret;
1915 u64 super_used;
1916 u64 root_used;
1917 u64 search_start = 0; 1906 u64 search_start = 0;
1918 u64 alloc_profile; 1907 u64 alloc_profile;
1919 u32 sizes[2];
1920 struct btrfs_fs_info *info = root->fs_info; 1908 struct btrfs_fs_info *info = root->fs_info;
1921 struct btrfs_root *extent_root = info->extent_root;
1922 struct btrfs_extent_item *extent_item;
1923 struct btrfs_extent_ref *ref;
1924 struct btrfs_path *path;
1925 struct btrfs_key keys[2];
1926
1927 maybe_lock_mutex(root);
1928 1909
1929 if (data) { 1910 if (data) {
1930 alloc_profile = info->avail_data_alloc_bits & 1911 alloc_profile = info->avail_data_alloc_bits &
@@ -1974,11 +1955,48 @@ again:
1974 } 1955 }
1975 if (ret) { 1956 if (ret) {
1976 printk("allocation failed flags %Lu\n", data); 1957 printk("allocation failed flags %Lu\n", data);
1977 }
1978 if (ret) {
1979 BUG(); 1958 BUG();
1980 goto out;
1981 } 1959 }
1960 clear_extent_dirty(&root->fs_info->free_space_cache,
1961 ins->objectid, ins->objectid + ins->offset - 1,
1962 GFP_NOFS);
1963 return 0;
1964}
1965
1966int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
1967 struct btrfs_root *root,
1968 u64 num_bytes, u64 min_alloc_size,
1969 u64 empty_size, u64 hint_byte,
1970 u64 search_end, struct btrfs_key *ins,
1971 u64 data)
1972{
1973 int ret;
1974 maybe_lock_mutex(root);
1975 ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size,
1976 empty_size, hint_byte, search_end, ins,
1977 data);
1978 maybe_unlock_mutex(root);
1979 return ret;
1980}
1981
1982static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
1983 struct btrfs_root *root,
1984 u64 root_objectid, u64 ref_generation,
1985 u64 owner, u64 owner_offset,
1986 struct btrfs_key *ins)
1987{
1988 int ret;
1989 int pending_ret;
1990 u64 super_used;
1991 u64 root_used;
1992 u64 num_bytes = ins->offset;
1993 u32 sizes[2];
1994 struct btrfs_fs_info *info = root->fs_info;
1995 struct btrfs_root *extent_root = info->extent_root;
1996 struct btrfs_extent_item *extent_item;
1997 struct btrfs_extent_ref *ref;
1998 struct btrfs_path *path;
1999 struct btrfs_key keys[2];
1982 2000
1983 /* block accounting for super block */ 2001 /* block accounting for super block */
1984 spin_lock_irq(&info->delalloc_lock); 2002 spin_lock_irq(&info->delalloc_lock);
@@ -1990,10 +2008,6 @@ again:
1990 root_used = btrfs_root_used(&root->root_item); 2008 root_used = btrfs_root_used(&root->root_item);
1991 btrfs_set_root_used(&root->root_item, root_used + num_bytes); 2009 btrfs_set_root_used(&root->root_item, root_used + num_bytes);
1992 2010
1993 clear_extent_dirty(&root->fs_info->free_space_cache,
1994 ins->objectid, ins->objectid + ins->offset - 1,
1995 GFP_NOFS);
1996
1997 if (root == extent_root) { 2011 if (root == extent_root) {
1998 set_extent_bits(&root->fs_info->extent_ins, ins->objectid, 2012 set_extent_bits(&root->fs_info->extent_ins, ins->objectid,
1999 ins->objectid + ins->offset - 1, 2013 ins->objectid + ins->offset - 1,
@@ -2001,10 +2015,6 @@ again:
2001 goto update_block; 2015 goto update_block;
2002 } 2016 }
2003 2017
2004 WARN_ON(trans->alloc_exclude_nr);
2005 trans->alloc_exclude_start = ins->objectid;
2006 trans->alloc_exclude_nr = ins->offset;
2007
2008 memcpy(&keys[0], ins, sizeof(*ins)); 2018 memcpy(&keys[0], ins, sizeof(*ins));
2009 keys[1].offset = hash_extent_ref(root_objectid, ref_generation, 2019 keys[1].offset = hash_extent_ref(root_objectid, ref_generation,
2010 owner, owner_offset); 2020 owner, owner_offset);
@@ -2054,6 +2064,51 @@ update_block:
2054 BUG(); 2064 BUG();
2055 } 2065 }
2056out: 2066out:
2067 return ret;
2068}
2069
2070int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
2071 struct btrfs_root *root,
2072 u64 root_objectid, u64 ref_generation,
2073 u64 owner, u64 owner_offset,
2074 struct btrfs_key *ins)
2075{
2076 int ret;
2077 maybe_lock_mutex(root);
2078 ret = __btrfs_alloc_reserved_extent(trans, root, root_objectid,
2079 ref_generation, owner,
2080 owner_offset, ins);
2081 maybe_unlock_mutex(root);
2082 return ret;
2083}
2084/*
2085 * finds a free extent and does all the dirty work required for allocation
2086 * returns the key for the extent through ins, and a tree buffer for
2087 * the first block of the extent through buf.
2088 *
2089 * returns 0 if everything worked, non-zero otherwise.
2090 */
2091int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
2092 struct btrfs_root *root,
2093 u64 num_bytes, u64 min_alloc_size,
2094 u64 root_objectid, u64 ref_generation,
2095 u64 owner, u64 owner_offset,
2096 u64 empty_size, u64 hint_byte,
2097 u64 search_end, struct btrfs_key *ins, u64 data)
2098{
2099 int ret;
2100
2101 maybe_lock_mutex(root);
2102
2103 ret = __btrfs_reserve_extent(trans, root, num_bytes,
2104 min_alloc_size, empty_size, hint_byte,
2105 search_end, ins, data);
2106 BUG_ON(ret);
2107 ret = __btrfs_alloc_reserved_extent(trans, root, root_objectid,
2108 ref_generation, owner,
2109 owner_offset, ins);
2110 BUG_ON(ret);
2111
2057 maybe_unlock_mutex(root); 2112 maybe_unlock_mutex(root);
2058 return ret; 2113 return ret;
2059} 2114}
@@ -2288,8 +2343,8 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
2288 mutex_lock(&root->fs_info->alloc_mutex); 2343 mutex_lock(&root->fs_info->alloc_mutex);
2289 2344
2290 /* we've dropped the lock, double check */ 2345 /* we've dropped the lock, double check */
2291 ret = drop_snap_lookup_refcount(root, bytenr, 2346 ret = lookup_extent_ref(NULL, root, bytenr, blocksize,
2292 blocksize, &refs); 2347 &refs);
2293 BUG_ON(ret); 2348 BUG_ON(ret);
2294 if (refs != 1) { 2349 if (refs != 1) {
2295 parent = path->nodes[*level]; 2350 parent = path->nodes[*level];
@@ -2584,7 +2639,6 @@ out_unlock:
2584 kfree(ra); 2639 kfree(ra);
2585 trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1); 2640 trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1);
2586 if (trans) { 2641 if (trans) {
2587 btrfs_add_ordered_inode(inode);
2588 btrfs_end_transaction(trans, BTRFS_I(inode)->root); 2642 btrfs_end_transaction(trans, BTRFS_I(inode)->root);
2589 mark_inode_dirty(inode); 2643 mark_inode_dirty(inode);
2590 } 2644 }
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 40a5f53cb040..3f82a6e9ca4f 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -793,6 +793,13 @@ int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
793} 793}
794EXPORT_SYMBOL(set_extent_dirty); 794EXPORT_SYMBOL(set_extent_dirty);
795 795
796int set_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end,
797 gfp_t mask)
798{
799 return set_extent_bit(tree, start, end, EXTENT_ORDERED, 0, NULL, mask);
800}
801EXPORT_SYMBOL(set_extent_ordered);
802
796int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 803int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
797 int bits, gfp_t mask) 804 int bits, gfp_t mask)
798{ 805{
@@ -812,8 +819,8 @@ int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
812 gfp_t mask) 819 gfp_t mask)
813{ 820{
814 return set_extent_bit(tree, start, end, 821 return set_extent_bit(tree, start, end,
815 EXTENT_DELALLOC | EXTENT_DIRTY, 0, NULL, 822 EXTENT_DELALLOC | EXTENT_DIRTY,
816 mask); 823 0, NULL, mask);
817} 824}
818EXPORT_SYMBOL(set_extent_delalloc); 825EXPORT_SYMBOL(set_extent_delalloc);
819 826
@@ -825,6 +832,13 @@ int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
825} 832}
826EXPORT_SYMBOL(clear_extent_dirty); 833EXPORT_SYMBOL(clear_extent_dirty);
827 834
835int clear_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end,
836 gfp_t mask)
837{
838 return clear_extent_bit(tree, start, end, EXTENT_ORDERED, 1, 0, mask);
839}
840EXPORT_SYMBOL(clear_extent_ordered);
841
828int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, 842int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
829 gfp_t mask) 843 gfp_t mask)
830{ 844{
@@ -1395,10 +1409,9 @@ static int end_bio_extent_writepage(struct bio *bio,
1395 1409
1396 if (--bvec >= bio->bi_io_vec) 1410 if (--bvec >= bio->bi_io_vec)
1397 prefetchw(&bvec->bv_page->flags); 1411 prefetchw(&bvec->bv_page->flags);
1398
1399 if (tree->ops && tree->ops->writepage_end_io_hook) { 1412 if (tree->ops && tree->ops->writepage_end_io_hook) {
1400 ret = tree->ops->writepage_end_io_hook(page, start, 1413 ret = tree->ops->writepage_end_io_hook(page, start,
1401 end, state); 1414 end, state, uptodate);
1402 if (ret) 1415 if (ret)
1403 uptodate = 0; 1416 uptodate = 0;
1404 } 1417 }
@@ -1868,9 +1881,14 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
1868 unlock_extent(tree, cur, end, GFP_NOFS); 1881 unlock_extent(tree, cur, end, GFP_NOFS);
1869 break; 1882 break;
1870 } 1883 }
1871
1872 extent_offset = cur - em->start; 1884 extent_offset = cur - em->start;
1885 if (extent_map_end(em) <= cur) {
1886printk("bad mapping em [%Lu %Lu] cur %Lu\n", em->start, extent_map_end(em), cur);
1887 }
1873 BUG_ON(extent_map_end(em) <= cur); 1888 BUG_ON(extent_map_end(em) <= cur);
1889 if (end < cur) {
1890printk("2bad mapping end %Lu cur %Lu\n", end, cur);
1891 }
1874 BUG_ON(end < cur); 1892 BUG_ON(end < cur);
1875 1893
1876 iosize = min(extent_map_end(em) - cur, end - cur + 1); 1894 iosize = min(extent_map_end(em) - cur, end - cur + 1);
@@ -1976,6 +1994,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
1976 u64 last_byte = i_size_read(inode); 1994 u64 last_byte = i_size_read(inode);
1977 u64 block_start; 1995 u64 block_start;
1978 u64 iosize; 1996 u64 iosize;
1997 u64 unlock_start;
1979 sector_t sector; 1998 sector_t sector;
1980 struct extent_map *em; 1999 struct extent_map *em;
1981 struct block_device *bdev; 2000 struct block_device *bdev;
@@ -1988,7 +2007,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
1988 u64 nr_delalloc; 2007 u64 nr_delalloc;
1989 u64 delalloc_end; 2008 u64 delalloc_end;
1990 2009
1991
1992 WARN_ON(!PageLocked(page)); 2010 WARN_ON(!PageLocked(page));
1993 page_offset = i_size & (PAGE_CACHE_SIZE - 1); 2011 page_offset = i_size & (PAGE_CACHE_SIZE - 1);
1994 if (page->index > end_index || 2012 if (page->index > end_index ||
@@ -2030,6 +2048,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2030 delalloc_start = delalloc_end + 1; 2048 delalloc_start = delalloc_end + 1;
2031 } 2049 }
2032 lock_extent(tree, start, page_end, GFP_NOFS); 2050 lock_extent(tree, start, page_end, GFP_NOFS);
2051 unlock_start = start;
2033 2052
2034 end = page_end; 2053 end = page_end;
2035 if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) { 2054 if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) {
@@ -2038,6 +2057,11 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2038 2057
2039 if (last_byte <= start) { 2058 if (last_byte <= start) {
2040 clear_extent_dirty(tree, start, page_end, GFP_NOFS); 2059 clear_extent_dirty(tree, start, page_end, GFP_NOFS);
2060 unlock_extent(tree, start, page_end, GFP_NOFS);
2061 if (tree->ops && tree->ops->writepage_end_io_hook)
2062 tree->ops->writepage_end_io_hook(page, start,
2063 page_end, NULL, 1);
2064 unlock_start = page_end + 1;
2041 goto done; 2065 goto done;
2042 } 2066 }
2043 2067
@@ -2047,6 +2071,11 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2047 while (cur <= end) { 2071 while (cur <= end) {
2048 if (cur >= last_byte) { 2072 if (cur >= last_byte) {
2049 clear_extent_dirty(tree, cur, page_end, GFP_NOFS); 2073 clear_extent_dirty(tree, cur, page_end, GFP_NOFS);
2074 unlock_extent(tree, unlock_start, page_end, GFP_NOFS);
2075 if (tree->ops && tree->ops->writepage_end_io_hook)
2076 tree->ops->writepage_end_io_hook(page, cur,
2077 page_end, NULL, 1);
2078 unlock_start = page_end + 1;
2050 break; 2079 break;
2051 } 2080 }
2052 em = epd->get_extent(inode, page, page_offset, cur, 2081 em = epd->get_extent(inode, page, page_offset, cur,
@@ -2071,8 +2100,16 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2071 block_start == EXTENT_MAP_INLINE) { 2100 block_start == EXTENT_MAP_INLINE) {
2072 clear_extent_dirty(tree, cur, 2101 clear_extent_dirty(tree, cur,
2073 cur + iosize - 1, GFP_NOFS); 2102 cur + iosize - 1, GFP_NOFS);
2103
2104 unlock_extent(tree, unlock_start, cur + iosize -1,
2105 GFP_NOFS);
2106 if (tree->ops && tree->ops->writepage_end_io_hook)
2107 tree->ops->writepage_end_io_hook(page, cur,
2108 cur + iosize - 1,
2109 NULL, 1);
2074 cur = cur + iosize; 2110 cur = cur + iosize;
2075 page_offset += iosize; 2111 page_offset += iosize;
2112 unlock_start = cur;
2076 continue; 2113 continue;
2077 } 2114 }
2078 2115
@@ -2119,7 +2156,8 @@ done:
2119 set_page_writeback(page); 2156 set_page_writeback(page);
2120 end_page_writeback(page); 2157 end_page_writeback(page);
2121 } 2158 }
2122 unlock_extent(tree, start, page_end, GFP_NOFS); 2159 if (unlock_start <= page_end)
2160 unlock_extent(tree, unlock_start, page_end, GFP_NOFS);
2123 unlock_page(page); 2161 unlock_page(page);
2124 return 0; 2162 return 0;
2125} 2163}
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index f1960dafaa19..2268a7995896 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -13,6 +13,8 @@
13#define EXTENT_DEFRAG (1 << 6) 13#define EXTENT_DEFRAG (1 << 6)
14#define EXTENT_DEFRAG_DONE (1 << 7) 14#define EXTENT_DEFRAG_DONE (1 << 7)
15#define EXTENT_BUFFER_FILLED (1 << 8) 15#define EXTENT_BUFFER_FILLED (1 << 8)
16#define EXTENT_ORDERED (1 << 9)
17#define EXTENT_ORDERED_METADATA (1 << 10)
16#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) 18#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
17 19
18/* 20/*
@@ -42,7 +44,7 @@ struct extent_io_ops {
42 int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end, 44 int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end,
43 struct extent_state *state); 45 struct extent_state *state);
44 int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end, 46 int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end,
45 struct extent_state *state); 47 struct extent_state *state, int uptodate);
46 int (*set_bit_hook)(struct inode *inode, u64 start, u64 end, 48 int (*set_bit_hook)(struct inode *inode, u64 start, u64 end,
47 unsigned long old, unsigned long bits); 49 unsigned long old, unsigned long bits);
48 int (*clear_bit_hook)(struct inode *inode, u64 start, u64 end, 50 int (*clear_bit_hook)(struct inode *inode, u64 start, u64 end,
@@ -131,6 +133,8 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
131 int bits, int filled); 133 int bits, int filled);
132int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 134int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
133 int bits, gfp_t mask); 135 int bits, gfp_t mask);
136int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
137 int bits, int wake, int delete, gfp_t mask);
134int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 138int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
135 int bits, gfp_t mask); 139 int bits, gfp_t mask);
136int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, 140int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
@@ -141,8 +145,14 @@ int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
141 gfp_t mask); 145 gfp_t mask);
142int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, 146int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
143 gfp_t mask); 147 gfp_t mask);
148int clear_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end,
149 gfp_t mask);
150int clear_extent_ordered_metadata(struct extent_io_tree *tree, u64 start,
151 u64 end, gfp_t mask);
144int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, 152int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
145 gfp_t mask); 153 gfp_t mask);
154int set_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end,
155 gfp_t mask);
146int find_first_extent_bit(struct extent_io_tree *tree, u64 start, 156int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
147 u64 *start_ret, u64 *end_ret, int bits); 157 u64 *start_ret, u64 *end_ret, int bits);
148struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree, 158struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree,
@@ -209,6 +219,8 @@ void memset_extent_buffer(struct extent_buffer *eb, char c,
209 unsigned long start, unsigned long len); 219 unsigned long start, unsigned long len);
210int wait_on_extent_buffer_writeback(struct extent_io_tree *tree, 220int wait_on_extent_buffer_writeback(struct extent_io_tree *tree,
211 struct extent_buffer *eb); 221 struct extent_buffer *eb);
222int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end);
223int wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits);
212int clear_extent_buffer_dirty(struct extent_io_tree *tree, 224int clear_extent_buffer_dirty(struct extent_io_tree *tree,
213 struct extent_buffer *eb); 225 struct extent_buffer *eb);
214int set_extent_buffer_dirty(struct extent_io_tree *tree, 226int set_extent_buffer_dirty(struct extent_io_tree *tree,
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index f5a04eb9a2ac..81123277c2b8 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -206,10 +206,11 @@ int add_extent_mapping(struct extent_map_tree *tree,
206 struct extent_map *merge = NULL; 206 struct extent_map *merge = NULL;
207 struct rb_node *rb; 207 struct rb_node *rb;
208 208
209 BUG_ON(spin_trylock(&tree->lock));
209 rb = tree_insert(&tree->map, em->start, &em->rb_node); 210 rb = tree_insert(&tree->map, em->start, &em->rb_node);
210 if (rb) { 211 if (rb) {
211 merge = rb_entry(rb, struct extent_map, rb_node);
212 ret = -EEXIST; 212 ret = -EEXIST;
213 free_extent_map(merge);
213 goto out; 214 goto out;
214 } 215 }
215 atomic_inc(&em->refs); 216 atomic_inc(&em->refs);
@@ -268,6 +269,7 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
268 struct rb_node *next = NULL; 269 struct rb_node *next = NULL;
269 u64 end = range_end(start, len); 270 u64 end = range_end(start, len);
270 271
272 BUG_ON(spin_trylock(&tree->lock));
271 em = tree->last; 273 em = tree->last;
272 if (em && end > em->start && start < extent_map_end(em)) 274 if (em && end > em->start && start < extent_map_end(em))
273 goto found; 275 goto found;
@@ -318,6 +320,7 @@ int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em)
318{ 320{
319 int ret = 0; 321 int ret = 0;
320 322
323 BUG_ON(spin_trylock(&tree->lock));
321 rb_erase(&em->rb_node, &tree->map); 324 rb_erase(&em->rb_node, &tree->map);
322 em->in_tree = 0; 325 em->in_tree = 0;
323 if (tree->last == em) 326 if (tree->last == em)
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index f537eb43c2c6..345caf8ff516 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -135,26 +135,37 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
135} 135}
136 136
137int btrfs_csum_one_bio(struct btrfs_root *root, 137int btrfs_csum_one_bio(struct btrfs_root *root,
138 struct bio *bio, char **sums_ret) 138 struct bio *bio, struct btrfs_ordered_sum **sums_ret)
139{ 139{
140 u32 *sums; 140 struct btrfs_ordered_sum *sums;
141 struct btrfs_sector_sum *sector_sum;
141 char *data; 142 char *data;
142 struct bio_vec *bvec = bio->bi_io_vec; 143 struct bio_vec *bvec = bio->bi_io_vec;
143 int bio_index = 0; 144 int bio_index = 0;
144 145
145 sums = kmalloc(bio->bi_vcnt * BTRFS_CRC32_SIZE, GFP_NOFS); 146 WARN_ON(bio->bi_vcnt <= 0);
147 sums = kzalloc(btrfs_ordered_sum_size(root, bio->bi_size), GFP_NOFS);
146 if (!sums) 148 if (!sums)
147 return -ENOMEM; 149 return -ENOMEM;
148 *sums_ret = (char *)sums; 150 *sums_ret = sums;
151 sector_sum = &sums->sums;
152 sums->file_offset = page_offset(bvec->bv_page);
153 sums->len = bio->bi_size;
154 INIT_LIST_HEAD(&sums->list);
149 155
150 while(bio_index < bio->bi_vcnt) { 156 while(bio_index < bio->bi_vcnt) {
151 data = kmap_atomic(bvec->bv_page, KM_USER0); 157 data = kmap_atomic(bvec->bv_page, KM_USER0);
152 *sums = ~(u32)0; 158 sector_sum->sum = ~(u32)0;
153 *sums = btrfs_csum_data(root, data + bvec->bv_offset, 159 sector_sum->sum = btrfs_csum_data(root,
154 *sums, bvec->bv_len); 160 data + bvec->bv_offset,
161 sector_sum->sum,
162 bvec->bv_len);
155 kunmap_atomic(data, KM_USER0); 163 kunmap_atomic(data, KM_USER0);
156 btrfs_csum_final(*sums, (char *)sums); 164 btrfs_csum_final(sector_sum->sum,
157 sums++; 165 (char *)&sector_sum->sum);
166 sector_sum->offset = page_offset(bvec->bv_page) +
167 bvec->bv_offset;
168 sector_sum++;
158 bio_index++; 169 bio_index++;
159 bvec++; 170 bvec++;
160 } 171 }
@@ -163,7 +174,7 @@ int btrfs_csum_one_bio(struct btrfs_root *root,
163 174
164int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, 175int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
165 struct btrfs_root *root, struct inode *inode, 176 struct btrfs_root *root, struct inode *inode,
166 struct bio *bio, char *sums) 177 struct btrfs_ordered_sum *sums)
167{ 178{
168 u64 objectid = inode->i_ino; 179 u64 objectid = inode->i_ino;
169 u64 offset; 180 u64 offset;
@@ -171,17 +182,16 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
171 struct btrfs_key file_key; 182 struct btrfs_key file_key;
172 struct btrfs_key found_key; 183 struct btrfs_key found_key;
173 u64 next_offset; 184 u64 next_offset;
185 u64 total_bytes = 0;
174 int found_next; 186 int found_next;
175 struct btrfs_path *path; 187 struct btrfs_path *path;
176 struct btrfs_csum_item *item; 188 struct btrfs_csum_item *item;
177 struct btrfs_csum_item *item_end; 189 struct btrfs_csum_item *item_end;
178 struct extent_buffer *leaf = NULL; 190 struct extent_buffer *leaf = NULL;
179 u64 csum_offset; 191 u64 csum_offset;
180 u32 *sums32 = (u32 *)sums; 192 struct btrfs_sector_sum *sector_sum;
181 u32 nritems; 193 u32 nritems;
182 u32 ins_size; 194 u32 ins_size;
183 int bio_index = 0;
184 struct bio_vec *bvec = bio->bi_io_vec;
185 char *eb_map; 195 char *eb_map;
186 char *eb_token; 196 char *eb_token;
187 unsigned long map_len; 197 unsigned long map_len;
@@ -189,10 +199,11 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
189 199
190 path = btrfs_alloc_path(); 200 path = btrfs_alloc_path();
191 BUG_ON(!path); 201 BUG_ON(!path);
202 sector_sum = &sums->sums;
192again: 203again:
193 next_offset = (u64)-1; 204 next_offset = (u64)-1;
194 found_next = 0; 205 found_next = 0;
195 offset = page_offset(bvec->bv_page) + bvec->bv_offset; 206 offset = sector_sum->offset;
196 file_key.objectid = objectid; 207 file_key.objectid = objectid;
197 file_key.offset = offset; 208 file_key.offset = offset;
198 btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY); 209 btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY);
@@ -303,7 +314,7 @@ found:
303 item_end = (struct btrfs_csum_item *)((unsigned char *)item_end + 314 item_end = (struct btrfs_csum_item *)((unsigned char *)item_end +
304 btrfs_item_size_nr(leaf, path->slots[0])); 315 btrfs_item_size_nr(leaf, path->slots[0]));
305 eb_token = NULL; 316 eb_token = NULL;
306next_bvec: 317next_sector:
307 318
308 if (!eb_token || 319 if (!eb_token ||
309 (unsigned long)item + BTRFS_CRC32_SIZE >= map_start + map_len) { 320 (unsigned long)item + BTRFS_CRC32_SIZE >= map_start + map_len) {
@@ -321,21 +332,20 @@ next_bvec:
321 } 332 }
322 if (eb_token) { 333 if (eb_token) {
323 memcpy(eb_token + ((unsigned long)item & (PAGE_CACHE_SIZE - 1)), 334 memcpy(eb_token + ((unsigned long)item & (PAGE_CACHE_SIZE - 1)),
324 sums32, BTRFS_CRC32_SIZE); 335 &sector_sum->sum, BTRFS_CRC32_SIZE);
325 } else { 336 } else {
326 write_extent_buffer(leaf, sums32, (unsigned long)item, 337 write_extent_buffer(leaf, &sector_sum->sum,
327 BTRFS_CRC32_SIZE); 338 (unsigned long)item, BTRFS_CRC32_SIZE);
328 } 339 }
329 bio_index++; 340 total_bytes += root->sectorsize;
330 bvec++; 341 sector_sum++;
331 sums32++; 342 if (total_bytes < sums->len) {
332 if (bio_index < bio->bi_vcnt) {
333 item = (struct btrfs_csum_item *)((char *)item + 343 item = (struct btrfs_csum_item *)((char *)item +
334 BTRFS_CRC32_SIZE); 344 BTRFS_CRC32_SIZE);
335 if (item < item_end && offset + PAGE_CACHE_SIZE == 345 if (item < item_end && offset + PAGE_CACHE_SIZE ==
336 page_offset(bvec->bv_page)) { 346 sector_sum->offset) {
337 offset = page_offset(bvec->bv_page); 347 offset = sector_sum->offset;
338 goto next_bvec; 348 goto next_sector;
339 } 349 }
340 } 350 }
341 if (eb_token) { 351 if (eb_token) {
@@ -343,7 +353,7 @@ next_bvec:
343 eb_token = NULL; 353 eb_token = NULL;
344 } 354 }
345 btrfs_mark_buffer_dirty(path->nodes[0]); 355 btrfs_mark_buffer_dirty(path->nodes[0]);
346 if (bio_index < bio->bi_vcnt) { 356 if (total_bytes < sums->len) {
347 btrfs_release_path(root, path); 357 btrfs_release_path(root, path);
348 goto again; 358 goto again;
349 } 359 }
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 8037792f8789..12e765f7e0d4 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -34,7 +34,6 @@
34#include "disk-io.h" 34#include "disk-io.h"
35#include "transaction.h" 35#include "transaction.h"
36#include "btrfs_inode.h" 36#include "btrfs_inode.h"
37#include "ordered-data.h"
38#include "ioctl.h" 37#include "ioctl.h"
39#include "print-tree.h" 38#include "print-tree.h"
40#include "compat.h" 39#include "compat.h"
@@ -273,7 +272,9 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
273 u64 mask = root->sectorsize - 1; 272 u64 mask = root->sectorsize - 1;
274 last_pos_in_file = (isize + mask) & ~mask; 273 last_pos_in_file = (isize + mask) & ~mask;
275 hole_size = (start_pos - last_pos_in_file + mask) & ~mask; 274 hole_size = (start_pos - last_pos_in_file + mask) & ~mask;
276 if (last_pos_in_file < start_pos) { 275 if (hole_size > 0) {
276 btrfs_wait_ordered_range(inode, last_pos_in_file,
277 last_pos_in_file + hole_size);
277 err = btrfs_drop_extents(trans, root, inode, 278 err = btrfs_drop_extents(trans, root, inode,
278 last_pos_in_file, 279 last_pos_in_file,
279 last_pos_in_file + hole_size, 280 last_pos_in_file + hole_size,
@@ -303,19 +304,17 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
303 inline_size > root->fs_info->max_inline || 304 inline_size > root->fs_info->max_inline ||
304 (inline_size & (root->sectorsize -1)) == 0 || 305 (inline_size & (root->sectorsize -1)) == 0 ||
305 inline_size >= BTRFS_MAX_INLINE_DATA_SIZE(root)) { 306 inline_size >= BTRFS_MAX_INLINE_DATA_SIZE(root)) {
306 u64 last_end; 307 /* check for reserved extents on each page, we don't want
307 308 * to reset the delalloc bit on things that already have
309 * extents reserved.
310 */
311 set_extent_delalloc(io_tree, start_pos,
312 end_of_last_block, GFP_NOFS);
308 for (i = 0; i < num_pages; i++) { 313 for (i = 0; i < num_pages; i++) {
309 struct page *p = pages[i]; 314 struct page *p = pages[i];
310 SetPageUptodate(p); 315 SetPageUptodate(p);
311 set_page_dirty(p); 316 set_page_dirty(p);
312 } 317 }
313 last_end = (u64)(pages[num_pages -1]->index) <<
314 PAGE_CACHE_SHIFT;
315 last_end += PAGE_CACHE_SIZE - 1;
316 set_extent_delalloc(io_tree, start_pos, end_of_last_block,
317 GFP_NOFS);
318 btrfs_add_ordered_inode(inode);
319 } else { 318 } else {
320 u64 aligned_end; 319 u64 aligned_end;
321 /* step one, delete the existing extents in this range */ 320 /* step one, delete the existing extents in this range */
@@ -350,10 +349,13 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end)
350 struct extent_map *split = NULL; 349 struct extent_map *split = NULL;
351 struct extent_map *split2 = NULL; 350 struct extent_map *split2 = NULL;
352 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 351 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
352 struct extent_map *tmp;
353 u64 len = end - start + 1; 353 u64 len = end - start + 1;
354 u64 next_start;
354 int ret; 355 int ret;
355 int testend = 1; 356 int testend = 1;
356 357
358 WARN_ON(end < start);
357 if (end == (u64)-1) { 359 if (end == (u64)-1) {
358 len = (u64)-1; 360 len = (u64)-1;
359 testend = 0; 361 testend = 0;
@@ -370,6 +372,8 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end)
370 spin_unlock(&em_tree->lock); 372 spin_unlock(&em_tree->lock);
371 break; 373 break;
372 } 374 }
375 tmp = rb_entry(&em->rb_node, struct extent_map, rb_node);
376 next_start = tmp->start;
373 remove_extent_mapping(em_tree, em); 377 remove_extent_mapping(em_tree, em);
374 378
375 if (em->block_start < EXTENT_MAP_LAST_BYTE && 379 if (em->block_start < EXTENT_MAP_LAST_BYTE &&
@@ -778,37 +782,58 @@ static int prepare_pages(struct btrfs_root *root, struct file *file,
778 struct inode *inode = fdentry(file)->d_inode; 782 struct inode *inode = fdentry(file)->d_inode;
779 int err = 0; 783 int err = 0;
780 u64 start_pos; 784 u64 start_pos;
785 u64 last_pos;
781 786
782 start_pos = pos & ~((u64)root->sectorsize - 1); 787 start_pos = pos & ~((u64)root->sectorsize - 1);
788 last_pos = ((u64)index + num_pages) << PAGE_CACHE_SHIFT;
783 789
784 memset(pages, 0, num_pages * sizeof(struct page *)); 790 memset(pages, 0, num_pages * sizeof(struct page *));
785 791again:
786 for (i = 0; i < num_pages; i++) { 792 for (i = 0; i < num_pages; i++) {
787 pages[i] = grab_cache_page(inode->i_mapping, index + i); 793 pages[i] = grab_cache_page(inode->i_mapping, index + i);
788 if (!pages[i]) { 794 if (!pages[i]) {
789 err = -ENOMEM; 795 err = -ENOMEM;
790 BUG_ON(1); 796 BUG_ON(1);
791 } 797 }
792#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
793 ClearPageDirty(pages[i]);
794#else
795 cancel_dirty_page(pages[i], PAGE_CACHE_SIZE);
796#endif
797 wait_on_page_writeback(pages[i]); 798 wait_on_page_writeback(pages[i]);
798 set_page_extent_mapped(pages[i]);
799 WARN_ON(!PageLocked(pages[i]));
800 } 799 }
801 if (start_pos < inode->i_size) { 800 if (start_pos < inode->i_size) {
802 u64 last_pos; 801 struct btrfs_ordered_extent *ordered;
803 last_pos = ((u64)index + num_pages) << PAGE_CACHE_SHIFT;
804 lock_extent(&BTRFS_I(inode)->io_tree, 802 lock_extent(&BTRFS_I(inode)->io_tree,
805 start_pos, last_pos - 1, GFP_NOFS); 803 start_pos, last_pos - 1, GFP_NOFS);
804 ordered = btrfs_lookup_first_ordered_extent(inode, last_pos -1);
805 if (ordered &&
806 ordered->file_offset + ordered->len > start_pos &&
807 ordered->file_offset < last_pos) {
808 btrfs_put_ordered_extent(ordered);
809 unlock_extent(&BTRFS_I(inode)->io_tree,
810 start_pos, last_pos - 1, GFP_NOFS);
811 for (i = 0; i < num_pages; i++) {
812 unlock_page(pages[i]);
813 page_cache_release(pages[i]);
814 }
815 btrfs_wait_ordered_range(inode, start_pos,
816 last_pos - start_pos);
817 goto again;
818 }
819 if (ordered)
820 btrfs_put_ordered_extent(ordered);
821
806 clear_extent_bits(&BTRFS_I(inode)->io_tree, start_pos, 822 clear_extent_bits(&BTRFS_I(inode)->io_tree, start_pos,
807 last_pos - 1, EXTENT_DIRTY | EXTENT_DELALLOC, 823 last_pos - 1, EXTENT_DIRTY | EXTENT_DELALLOC,
808 GFP_NOFS); 824 GFP_NOFS);
809 unlock_extent(&BTRFS_I(inode)->io_tree, 825 unlock_extent(&BTRFS_I(inode)->io_tree,
810 start_pos, last_pos - 1, GFP_NOFS); 826 start_pos, last_pos - 1, GFP_NOFS);
811 } 827 }
828 for (i = 0; i < num_pages; i++) {
829#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
830 ClearPageDirty(pages[i]);
831#else
832 cancel_dirty_page(pages[i], PAGE_CACHE_SIZE);
833#endif
834 set_page_extent_mapped(pages[i]);
835 WARN_ON(!PageLocked(pages[i]));
836 }
812 return 0; 837 return 0;
813} 838}
814 839
@@ -969,13 +994,11 @@ out_nolock:
969 (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT); 994 (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT);
970 } 995 }
971 current->backing_dev_info = NULL; 996 current->backing_dev_info = NULL;
972 btrfs_ordered_throttle(root, inode);
973 return num_written ? num_written : err; 997 return num_written ? num_written : err;
974} 998}
975 999
976int btrfs_release_file(struct inode * inode, struct file * filp) 1000int btrfs_release_file(struct inode * inode, struct file * filp)
977{ 1001{
978 btrfs_del_ordered_inode(inode, 0);
979 if (filp->private_data) 1002 if (filp->private_data)
980 btrfs_ioctl_trans_end(filp); 1003 btrfs_ioctl_trans_end(filp);
981 return 0; 1004 return 0;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index d39433dfb2c7..c5a62f0b9595 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -43,6 +43,7 @@
43#include "ioctl.h" 43#include "ioctl.h"
44#include "print-tree.h" 44#include "print-tree.h"
45#include "volumes.h" 45#include "volumes.h"
46#include "ordered-data.h"
46 47
47struct btrfs_iget_args { 48struct btrfs_iget_args {
48 u64 ino; 49 u64 ino;
@@ -109,10 +110,11 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end)
109 u64 num_bytes; 110 u64 num_bytes;
110 u64 cur_alloc_size; 111 u64 cur_alloc_size;
111 u64 blocksize = root->sectorsize; 112 u64 blocksize = root->sectorsize;
112 u64 orig_start = start;
113 u64 orig_num_bytes; 113 u64 orig_num_bytes;
114 struct btrfs_key ins; 114 struct btrfs_key ins;
115 int ret; 115 struct extent_map *em;
116 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
117 int ret = 0;
116 118
117 trans = btrfs_start_transaction(root, 1); 119 trans = btrfs_start_transaction(root, 1);
118 BUG_ON(!trans); 120 BUG_ON(!trans);
@@ -120,33 +122,44 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end)
120 122
121 num_bytes = (end - start + blocksize) & ~(blocksize - 1); 123 num_bytes = (end - start + blocksize) & ~(blocksize - 1);
122 num_bytes = max(blocksize, num_bytes); 124 num_bytes = max(blocksize, num_bytes);
123 ret = btrfs_drop_extents(trans, root, inode,
124 start, start + num_bytes, start, &alloc_hint);
125 orig_num_bytes = num_bytes; 125 orig_num_bytes = num_bytes;
126 126
127 if (alloc_hint == EXTENT_MAP_INLINE) 127 if (alloc_hint == EXTENT_MAP_INLINE)
128 goto out; 128 goto out;
129 129
130 BUG_ON(num_bytes > btrfs_super_total_bytes(&root->fs_info->super_copy)); 130 BUG_ON(num_bytes > btrfs_super_total_bytes(&root->fs_info->super_copy));
131 btrfs_drop_extent_cache(inode, start, start + num_bytes - 1);
131 132
132 while(num_bytes > 0) { 133 while(num_bytes > 0) {
133 cur_alloc_size = min(num_bytes, root->fs_info->max_extent); 134 cur_alloc_size = min(num_bytes, root->fs_info->max_extent);
134 ret = btrfs_alloc_extent(trans, root, cur_alloc_size, 135 ret = btrfs_reserve_extent(trans, root, cur_alloc_size,
135 root->sectorsize, 136 root->sectorsize, 0, 0,
136 root->root_key.objectid, 137 (u64)-1, &ins, 1);
137 trans->transid,
138 inode->i_ino, start, 0,
139 alloc_hint, (u64)-1, &ins, 1);
140 if (ret) { 138 if (ret) {
141 WARN_ON(1); 139 WARN_ON(1);
142 goto out; 140 goto out;
143 } 141 }
142 em = alloc_extent_map(GFP_NOFS);
143 em->start = start;
144 em->len = ins.offset;
145 em->block_start = ins.objectid;
146 em->bdev = root->fs_info->fs_devices->latest_bdev;
147 while(1) {
148 spin_lock(&em_tree->lock);
149 ret = add_extent_mapping(em_tree, em);
150 spin_unlock(&em_tree->lock);
151 if (ret != -EEXIST) {
152 free_extent_map(em);
153 break;
154 }
155 btrfs_drop_extent_cache(inode, start,
156 start + ins.offset - 1);
157 }
158
144 cur_alloc_size = ins.offset; 159 cur_alloc_size = ins.offset;
145 ret = btrfs_insert_file_extent(trans, root, inode->i_ino, 160 ret = btrfs_add_ordered_extent(inode, start, ins.objectid,
146 start, ins.objectid, ins.offset, 161 ins.offset);
147 ins.offset, 0); 162 BUG_ON(ret);
148 inode->i_blocks += ins.offset >> 9;
149 btrfs_check_file(root, inode);
150 if (num_bytes < cur_alloc_size) { 163 if (num_bytes < cur_alloc_size) {
151 printk("num_bytes %Lu cur_alloc %Lu\n", num_bytes, 164 printk("num_bytes %Lu cur_alloc %Lu\n", num_bytes,
152 cur_alloc_size); 165 cur_alloc_size);
@@ -156,10 +169,6 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end)
156 alloc_hint = ins.objectid + ins.offset; 169 alloc_hint = ins.objectid + ins.offset;
157 start += cur_alloc_size; 170 start += cur_alloc_size;
158 } 171 }
159 btrfs_drop_extent_cache(inode, orig_start,
160 orig_start + orig_num_bytes - 1);
161 btrfs_add_ordered_inode(inode);
162 btrfs_update_inode(trans, root, inode);
163out: 172out:
164 btrfs_end_transaction(trans, root); 173 btrfs_end_transaction(trans, root);
165 return ret; 174 return ret;
@@ -341,25 +350,15 @@ int __btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
341 int mirror_num) 350 int mirror_num)
342{ 351{
343 struct btrfs_root *root = BTRFS_I(inode)->root; 352 struct btrfs_root *root = BTRFS_I(inode)->root;
344 struct btrfs_trans_handle *trans;
345 int ret = 0; 353 int ret = 0;
346 char *sums = NULL; 354 struct btrfs_ordered_sum *sums;
347 355
348 ret = btrfs_csum_one_bio(root, bio, &sums); 356 ret = btrfs_csum_one_bio(root, bio, &sums);
349 BUG_ON(ret); 357 BUG_ON(ret);
350 358
351 trans = btrfs_start_transaction(root, 1); 359 ret = btrfs_add_ordered_sum(inode, sums);
352
353 btrfs_set_trans_block_group(trans, inode);
354 mutex_lock(&BTRFS_I(inode)->csum_mutex);
355 btrfs_csum_file_blocks(trans, root, inode, bio, sums);
356 mutex_unlock(&BTRFS_I(inode)->csum_mutex);
357
358 ret = btrfs_end_transaction(trans, root);
359 BUG_ON(ret); 360 BUG_ON(ret);
360 361
361 kfree(sums);
362
363 return btrfs_map_bio(root, rw, bio, mirror_num, 1); 362 return btrfs_map_bio(root, rw, bio, mirror_num, 1);
364} 363}
365 364
@@ -369,14 +368,10 @@ int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
369 struct btrfs_root *root = BTRFS_I(inode)->root; 368 struct btrfs_root *root = BTRFS_I(inode)->root;
370 int ret = 0; 369 int ret = 0;
371 370
372 if (!(rw & (1 << BIO_RW))) { 371 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
373 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); 372 BUG_ON(ret);
374 BUG_ON(ret);
375 goto mapit;
376 }
377 373
378 if (btrfs_test_opt(root, NODATASUM) || 374 if (!(rw & (1 << BIO_RW))) {
379 btrfs_test_flag(inode, NODATASUM)) {
380 goto mapit; 375 goto mapit;
381 } 376 }
382 377
@@ -387,6 +382,96 @@ mapit:
387 return btrfs_map_bio(root, rw, bio, mirror_num, 0); 382 return btrfs_map_bio(root, rw, bio, mirror_num, 0);
388} 383}
389 384
385static int add_pending_csums(struct btrfs_trans_handle *trans,
386 struct inode *inode, u64 file_offset,
387 struct list_head *list)
388{
389 struct list_head *cur;
390 struct btrfs_ordered_sum *sum;
391
392 btrfs_set_trans_block_group(trans, inode);
393 while(!list_empty(list)) {
394 cur = list->next;
395 sum = list_entry(cur, struct btrfs_ordered_sum, list);
396 mutex_lock(&BTRFS_I(inode)->csum_mutex);
397 btrfs_csum_file_blocks(trans, BTRFS_I(inode)->root,
398 inode, sum);
399 mutex_unlock(&BTRFS_I(inode)->csum_mutex);
400 list_del(&sum->list);
401 kfree(sum);
402 }
403 return 0;
404}
405
406int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
407 struct extent_state *state, int uptodate)
408{
409 struct inode *inode = page->mapping->host;
410 struct btrfs_root *root = BTRFS_I(inode)->root;
411 struct btrfs_trans_handle *trans;
412 struct btrfs_ordered_extent *ordered_extent;
413 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
414 u64 alloc_hint = 0;
415 struct list_head list;
416 struct btrfs_key ins;
417 int ret;
418
419 ret = btrfs_dec_test_ordered_pending(inode, start, end - start + 1);
420 if (!ret) {
421 return 0;
422 }
423
424 trans = btrfs_start_transaction(root, 1);
425
426 ordered_extent = btrfs_lookup_ordered_extent(inode, start);
427 BUG_ON(!ordered_extent);
428
429 lock_extent(io_tree, ordered_extent->file_offset,
430 ordered_extent->file_offset + ordered_extent->len - 1,
431 GFP_NOFS);
432
433 INIT_LIST_HEAD(&list);
434
435 ins.objectid = ordered_extent->start;
436 ins.offset = ordered_extent->len;
437 ins.type = BTRFS_EXTENT_ITEM_KEY;
438 ret = btrfs_alloc_reserved_extent(trans, root, root->root_key.objectid,
439 trans->transid, inode->i_ino,
440 ordered_extent->file_offset, &ins);
441 BUG_ON(ret);
442 ret = btrfs_drop_extents(trans, root, inode,
443 ordered_extent->file_offset,
444 ordered_extent->file_offset +
445 ordered_extent->len,
446 ordered_extent->file_offset, &alloc_hint);
447 BUG_ON(ret);
448 ret = btrfs_insert_file_extent(trans, root, inode->i_ino,
449 ordered_extent->file_offset,
450 ordered_extent->start,
451 ordered_extent->len,
452 ordered_extent->len, 0);
453 BUG_ON(ret);
454 btrfs_drop_extent_cache(inode, ordered_extent->file_offset,
455 ordered_extent->file_offset +
456 ordered_extent->len - 1);
457 inode->i_blocks += ordered_extent->len >> 9;
458 unlock_extent(io_tree, ordered_extent->file_offset,
459 ordered_extent->file_offset + ordered_extent->len - 1,
460 GFP_NOFS);
461 add_pending_csums(trans, inode, ordered_extent->file_offset,
462 &ordered_extent->list);
463
464 btrfs_remove_ordered_extent(inode, ordered_extent);
465 /* once for us */
466 btrfs_put_ordered_extent(ordered_extent);
467 /* once for the tree */
468 btrfs_put_ordered_extent(ordered_extent);
469
470 btrfs_update_inode(trans, root, inode);
471 btrfs_end_transaction(trans, root);
472 return 0;
473}
474
390int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end) 475int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end)
391{ 476{
392 int ret = 0; 477 int ret = 0;
@@ -409,7 +494,8 @@ int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end)
409 if (ret == -ENOENT || ret == -EFBIG) 494 if (ret == -ENOENT || ret == -EFBIG)
410 ret = 0; 495 ret = 0;
411 csum = 0; 496 csum = 0;
412 printk("no csum found for inode %lu start %Lu\n", inode->i_ino, start); 497 printk("no csum found for inode %lu start %Lu\n", inode->i_ino,
498 start);
413 goto out; 499 goto out;
414 } 500 }
415 read_extent_buffer(path->nodes[0], &csum, (unsigned long)item, 501 read_extent_buffer(path->nodes[0], &csum, (unsigned long)item,
@@ -833,7 +919,6 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
833{ 919{
834 struct btrfs_root *root; 920 struct btrfs_root *root;
835 struct btrfs_trans_handle *trans; 921 struct btrfs_trans_handle *trans;
836 struct inode *inode = dentry->d_inode;
837 int ret; 922 int ret;
838 unsigned long nr = 0; 923 unsigned long nr = 0;
839 924
@@ -849,14 +934,6 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
849 ret = btrfs_unlink_trans(trans, root, dir, dentry); 934 ret = btrfs_unlink_trans(trans, root, dir, dentry);
850 nr = trans->blocks_used; 935 nr = trans->blocks_used;
851 936
852 if (inode->i_nlink == 0) {
853 /* if the inode isn't linked anywhere,
854 * we don't need to worry about
855 * data=ordered
856 */
857 btrfs_del_ordered_inode(inode, 1);
858 }
859
860 btrfs_end_transaction_throttle(trans, root); 937 btrfs_end_transaction_throttle(trans, root);
861fail: 938fail:
862 btrfs_btree_balance_dirty(root, nr); 939 btrfs_btree_balance_dirty(root, nr);
@@ -931,6 +1008,7 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans,
931 int extent_type = -1; 1008 int extent_type = -1;
932 u64 mask = root->sectorsize - 1; 1009 u64 mask = root->sectorsize - 1;
933 1010
1011 btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1);
934 btrfs_drop_extent_cache(inode, inode->i_size & (~mask), (u64)-1); 1012 btrfs_drop_extent_cache(inode, inode->i_size & (~mask), (u64)-1);
935 path = btrfs_alloc_path(); 1013 path = btrfs_alloc_path();
936 path->reada = -1; 1014 path->reada = -1;
@@ -1117,34 +1195,6 @@ error:
1117 return ret; 1195 return ret;
1118} 1196}
1119 1197
1120static int btrfs_cow_one_page(struct inode *inode, struct page *page,
1121 size_t zero_start)
1122{
1123 char *kaddr;
1124 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
1125 u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
1126 u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
1127 int ret = 0;
1128
1129 WARN_ON(!PageLocked(page));
1130 set_page_extent_mapped(page);
1131
1132 lock_extent(io_tree, page_start, page_end, GFP_NOFS);
1133 set_extent_delalloc(&BTRFS_I(inode)->io_tree, page_start,
1134 page_end, GFP_NOFS);
1135
1136 if (zero_start != PAGE_CACHE_SIZE) {
1137 kaddr = kmap(page);
1138 memset(kaddr + zero_start, 0, PAGE_CACHE_SIZE - zero_start);
1139 flush_dcache_page(page);
1140 kunmap(page);
1141 }
1142 set_page_dirty(page);
1143 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
1144
1145 return ret;
1146}
1147
1148/* 1198/*
1149 * taken from block_truncate_page, but does cow as it zeros out 1199 * taken from block_truncate_page, but does cow as it zeros out
1150 * any bytes left in the last page in the file. 1200 * any bytes left in the last page in the file.
@@ -1153,12 +1203,16 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
1153{ 1203{
1154 struct inode *inode = mapping->host; 1204 struct inode *inode = mapping->host;
1155 struct btrfs_root *root = BTRFS_I(inode)->root; 1205 struct btrfs_root *root = BTRFS_I(inode)->root;
1206 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
1207 struct btrfs_ordered_extent *ordered;
1208 char *kaddr;
1156 u32 blocksize = root->sectorsize; 1209 u32 blocksize = root->sectorsize;
1157 pgoff_t index = from >> PAGE_CACHE_SHIFT; 1210 pgoff_t index = from >> PAGE_CACHE_SHIFT;
1158 unsigned offset = from & (PAGE_CACHE_SIZE-1); 1211 unsigned offset = from & (PAGE_CACHE_SIZE-1);
1159 struct page *page; 1212 struct page *page;
1160 int ret = 0; 1213 int ret = 0;
1161 u64 page_start; 1214 u64 page_start;
1215 u64 page_end;
1162 1216
1163 if ((offset & (blocksize - 1)) == 0) 1217 if ((offset & (blocksize - 1)) == 0)
1164 goto out; 1218 goto out;
@@ -1168,6 +1222,10 @@ again:
1168 page = grab_cache_page(mapping, index); 1222 page = grab_cache_page(mapping, index);
1169 if (!page) 1223 if (!page)
1170 goto out; 1224 goto out;
1225
1226 page_start = page_offset(page);
1227 page_end = page_start + PAGE_CACHE_SIZE - 1;
1228
1171 if (!PageUptodate(page)) { 1229 if (!PageUptodate(page)) {
1172 ret = btrfs_readpage(NULL, page); 1230 ret = btrfs_readpage(NULL, page);
1173 lock_page(page); 1231 lock_page(page);
@@ -1181,10 +1239,32 @@ again:
1181 goto out; 1239 goto out;
1182 } 1240 }
1183 } 1241 }
1184
1185 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
1186 wait_on_page_writeback(page); 1242 wait_on_page_writeback(page);
1187 ret = btrfs_cow_one_page(inode, page, offset); 1243
1244 lock_extent(io_tree, page_start, page_end, GFP_NOFS);
1245 set_page_extent_mapped(page);
1246
1247 ordered = btrfs_lookup_ordered_extent(inode, page_start);
1248 if (ordered) {
1249 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
1250 unlock_page(page);
1251 page_cache_release(page);
1252 btrfs_wait_ordered_extent(inode, ordered);
1253 btrfs_put_ordered_extent(ordered);
1254 goto again;
1255 }
1256
1257 set_extent_delalloc(&BTRFS_I(inode)->io_tree, page_start,
1258 page_end, GFP_NOFS);
1259 ret = 0;
1260 if (offset != PAGE_CACHE_SIZE) {
1261 kaddr = kmap(page);
1262 memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset);
1263 flush_dcache_page(page);
1264 kunmap(page);
1265 }
1266 set_page_dirty(page);
1267 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
1188 1268
1189 unlock_page(page); 1269 unlock_page(page);
1190 page_cache_release(page); 1270 page_cache_release(page);
@@ -1222,8 +1302,9 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
1222 1302
1223 btrfs_truncate_page(inode->i_mapping, inode->i_size); 1303 btrfs_truncate_page(inode->i_mapping, inode->i_size);
1224 1304
1225 lock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS);
1226 hole_size = block_end - hole_start; 1305 hole_size = block_end - hole_start;
1306 btrfs_wait_ordered_range(inode, hole_start, hole_size);
1307 lock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS);
1227 1308
1228 trans = btrfs_start_transaction(root, 1); 1309 trans = btrfs_start_transaction(root, 1);
1229 btrfs_set_trans_block_group(trans, inode); 1310 btrfs_set_trans_block_group(trans, inode);
@@ -1258,6 +1339,7 @@ void btrfs_delete_inode(struct inode *inode)
1258 unsigned long nr; 1339 unsigned long nr;
1259 int ret; 1340 int ret;
1260 1341
1342 btrfs_wait_ordered_range(inode, 0, (u64)-1);
1261 truncate_inode_pages(&inode->i_data, 0); 1343 truncate_inode_pages(&inode->i_data, 0);
1262 if (is_bad_inode(inode)) { 1344 if (is_bad_inode(inode)) {
1263 goto no_delete; 1345 goto no_delete;
@@ -1403,7 +1485,6 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p)
1403 extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree, 1485 extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree,
1404 inode->i_mapping, GFP_NOFS); 1486 inode->i_mapping, GFP_NOFS);
1405 mutex_init(&BTRFS_I(inode)->csum_mutex); 1487 mutex_init(&BTRFS_I(inode)->csum_mutex);
1406 atomic_set(&BTRFS_I(inode)->ordered_writeback, 0);
1407 return 0; 1488 return 0;
1408} 1489}
1409 1490
@@ -1705,7 +1786,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
1705 extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree, 1786 extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree,
1706 inode->i_mapping, GFP_NOFS); 1787 inode->i_mapping, GFP_NOFS);
1707 mutex_init(&BTRFS_I(inode)->csum_mutex); 1788 mutex_init(&BTRFS_I(inode)->csum_mutex);
1708 atomic_set(&BTRFS_I(inode)->ordered_writeback, 0);
1709 BTRFS_I(inode)->delalloc_bytes = 0; 1789 BTRFS_I(inode)->delalloc_bytes = 0;
1710 BTRFS_I(inode)->root = root; 1790 BTRFS_I(inode)->root = root;
1711 1791
@@ -1930,7 +2010,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
1930 inode->i_mapping, GFP_NOFS); 2010 inode->i_mapping, GFP_NOFS);
1931 mutex_init(&BTRFS_I(inode)->csum_mutex); 2011 mutex_init(&BTRFS_I(inode)->csum_mutex);
1932 BTRFS_I(inode)->delalloc_bytes = 0; 2012 BTRFS_I(inode)->delalloc_bytes = 0;
1933 atomic_set(&BTRFS_I(inode)->ordered_writeback, 0);
1934 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; 2013 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
1935 } 2014 }
1936 dir->i_sb->s_dirt = 1; 2015 dir->i_sb->s_dirt = 1;
@@ -2066,64 +2145,18 @@ out_unlock:
2066 2145
2067static int merge_extent_mapping(struct extent_map_tree *em_tree, 2146static int merge_extent_mapping(struct extent_map_tree *em_tree,
2068 struct extent_map *existing, 2147 struct extent_map *existing,
2069 struct extent_map *em) 2148 struct extent_map *em,
2149 u64 map_start, u64 map_len)
2070{ 2150{
2071 u64 start_diff; 2151 u64 start_diff;
2072 u64 new_end;
2073 int ret = 0;
2074 int real_blocks = existing->block_start < EXTENT_MAP_LAST_BYTE;
2075
2076 if (real_blocks && em->block_start >= EXTENT_MAP_LAST_BYTE)
2077 goto invalid;
2078
2079 if (!real_blocks && em->block_start != existing->block_start)
2080 goto invalid;
2081
2082 new_end = max(existing->start + existing->len, em->start + em->len);
2083
2084 if (existing->start >= em->start) {
2085 if (em->start + em->len < existing->start)
2086 goto invalid;
2087 2152
2088 start_diff = existing->start - em->start; 2153 BUG_ON(map_start < em->start || map_start >= extent_map_end(em));
2089 if (real_blocks && em->block_start + start_diff != 2154 start_diff = map_start - em->start;
2090 existing->block_start) 2155 em->start = map_start;
2091 goto invalid; 2156 em->len = map_len;
2092 2157 if (em->block_start < EXTENT_MAP_LAST_BYTE)
2093 em->len = new_end - em->start; 2158 em->block_start += start_diff;
2094 2159 return add_extent_mapping(em_tree, em);
2095 remove_extent_mapping(em_tree, existing);
2096 /* free for the tree */
2097 free_extent_map(existing);
2098 ret = add_extent_mapping(em_tree, em);
2099
2100 } else if (em->start > existing->start) {
2101
2102 if (existing->start + existing->len < em->start)
2103 goto invalid;
2104
2105 start_diff = em->start - existing->start;
2106 if (real_blocks && existing->block_start + start_diff !=
2107 em->block_start)
2108 goto invalid;
2109
2110 remove_extent_mapping(em_tree, existing);
2111 em->block_start = existing->block_start;
2112 em->start = existing->start;
2113 em->len = new_end - existing->start;
2114 free_extent_map(existing);
2115
2116 ret = add_extent_mapping(em_tree, em);
2117 } else {
2118 goto invalid;
2119 }
2120 return ret;
2121
2122invalid:
2123 printk("invalid extent map merge [%Lu %Lu %Lu] [%Lu %Lu %Lu]\n",
2124 existing->start, existing->len, existing->block_start,
2125 em->start, em->len, em->block_start);
2126 return -EIO;
2127} 2160}
2128 2161
2129struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, 2162struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
@@ -2170,10 +2203,9 @@ again:
2170 err = -ENOMEM; 2203 err = -ENOMEM;
2171 goto out; 2204 goto out;
2172 } 2205 }
2173 2206 em->bdev = root->fs_info->fs_devices->latest_bdev;
2174 em->start = EXTENT_MAP_HOLE; 2207 em->start = EXTENT_MAP_HOLE;
2175 em->len = (u64)-1; 2208 em->len = (u64)-1;
2176 em->bdev = root->fs_info->fs_devices->latest_bdev;
2177 ret = btrfs_lookup_file_extent(trans, root, path, 2209 ret = btrfs_lookup_file_extent(trans, root, path,
2178 objectid, start, trans != NULL); 2210 objectid, start, trans != NULL);
2179 if (ret < 0) { 2211 if (ret < 0) {
@@ -2314,6 +2346,9 @@ insert:
2314 */ 2346 */
2315 if (ret == -EEXIST) { 2347 if (ret == -EEXIST) {
2316 struct extent_map *existing; 2348 struct extent_map *existing;
2349
2350 ret = 0;
2351
2317 existing = lookup_extent_mapping(em_tree, start, len); 2352 existing = lookup_extent_mapping(em_tree, start, len);
2318 if (existing && (existing->start > start || 2353 if (existing && (existing->start > start ||
2319 existing->start + existing->len <= start)) { 2354 existing->start + existing->len <= start)) {
@@ -2325,7 +2360,8 @@ insert:
2325 em->len); 2360 em->len);
2326 if (existing) { 2361 if (existing) {
2327 err = merge_extent_mapping(em_tree, existing, 2362 err = merge_extent_mapping(em_tree, existing,
2328 em); 2363 em, start,
2364 root->sectorsize);
2329 free_extent_map(existing); 2365 free_extent_map(existing);
2330 if (err) { 2366 if (err) {
2331 free_extent_map(em); 2367 free_extent_map(em);
@@ -2341,6 +2377,7 @@ insert:
2341 } else { 2377 } else {
2342 free_extent_map(em); 2378 free_extent_map(em);
2343 em = existing; 2379 em = existing;
2380 err = 0;
2344 } 2381 }
2345 } 2382 }
2346 spin_unlock(&em_tree->lock); 2383 spin_unlock(&em_tree->lock);
@@ -2348,8 +2385,9 @@ out:
2348 btrfs_free_path(path); 2385 btrfs_free_path(path);
2349 if (trans) { 2386 if (trans) {
2350 ret = btrfs_end_transaction(trans, root); 2387 ret = btrfs_end_transaction(trans, root);
2351 if (!err) 2388 if (!err) {
2352 err = ret; 2389 err = ret;
2390 }
2353 } 2391 }
2354 if (err) { 2392 if (err) {
2355 free_extent_map(em); 2393 free_extent_map(em);
@@ -2474,8 +2512,7 @@ btrfs_readpages(struct file *file, struct address_space *mapping,
2474 return extent_readpages(tree, mapping, pages, nr_pages, 2512 return extent_readpages(tree, mapping, pages, nr_pages,
2475 btrfs_get_extent); 2513 btrfs_get_extent);
2476} 2514}
2477 2515static int __btrfs_releasepage(struct page *page, gfp_t gfp_flags)
2478static int btrfs_releasepage(struct page *page, gfp_t gfp_flags)
2479{ 2516{
2480 struct extent_io_tree *tree; 2517 struct extent_io_tree *tree;
2481 struct extent_map_tree *map; 2518 struct extent_map_tree *map;
@@ -2493,15 +2530,54 @@ static int btrfs_releasepage(struct page *page, gfp_t gfp_flags)
2493 return ret; 2530 return ret;
2494} 2531}
2495 2532
2533static int btrfs_releasepage(struct page *page, gfp_t gfp_flags)
2534{
2535 struct btrfs_ordered_extent *ordered;
2536
2537 ordered = btrfs_lookup_ordered_extent(page->mapping->host,
2538 page_offset(page));
2539 if (ordered) {
2540 btrfs_put_ordered_extent(ordered);
2541 return 0;
2542 }
2543 return __btrfs_releasepage(page, gfp_flags);
2544}
2545
2496static void btrfs_invalidatepage(struct page *page, unsigned long offset) 2546static void btrfs_invalidatepage(struct page *page, unsigned long offset)
2497{ 2547{
2498 struct extent_io_tree *tree; 2548 struct extent_io_tree *tree;
2549 struct btrfs_ordered_extent *ordered;
2550 u64 page_start = page_offset(page);
2551 u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
2499 2552
2553 wait_on_page_writeback(page);
2500 tree = &BTRFS_I(page->mapping->host)->io_tree; 2554 tree = &BTRFS_I(page->mapping->host)->io_tree;
2501 extent_invalidatepage(tree, page, offset); 2555 if (offset) {
2502 btrfs_releasepage(page, GFP_NOFS); 2556 btrfs_releasepage(page, GFP_NOFS);
2557 return;
2558 }
2559
2560 lock_extent(tree, page_start, page_end, GFP_NOFS);
2561 ordered = btrfs_lookup_ordered_extent(page->mapping->host,
2562 page_offset(page));
2563 if (ordered) {
2564 clear_extent_bit(tree, page_start, page_end,
2565 EXTENT_DIRTY | EXTENT_DELALLOC |
2566 EXTENT_LOCKED, 1, 0, GFP_NOFS);
2567 btrfs_writepage_end_io_hook(page, page_start,
2568 page_end, NULL, 1);
2569 btrfs_put_ordered_extent(ordered);
2570 lock_extent(tree, page_start, page_end, GFP_NOFS);
2571 }
2572 clear_extent_bit(tree, page_start, page_end,
2573 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC |
2574 EXTENT_ORDERED,
2575 1, 1, GFP_NOFS);
2576 __btrfs_releasepage(page, GFP_NOFS);
2577
2503 if (PagePrivate(page)) { 2578 if (PagePrivate(page)) {
2504 invalidate_extent_lru(tree, page_offset(page), PAGE_CACHE_SIZE); 2579 invalidate_extent_lru(tree, page_offset(page),
2580 PAGE_CACHE_SIZE);
2505 ClearPagePrivate(page); 2581 ClearPagePrivate(page);
2506 set_page_private(page, 0); 2582 set_page_private(page, 0);
2507 page_cache_release(page); 2583 page_cache_release(page);
@@ -2527,35 +2603,63 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page)
2527{ 2603{
2528 struct inode *inode = fdentry(vma->vm_file)->d_inode; 2604 struct inode *inode = fdentry(vma->vm_file)->d_inode;
2529 struct btrfs_root *root = BTRFS_I(inode)->root; 2605 struct btrfs_root *root = BTRFS_I(inode)->root;
2530 unsigned long end; 2606 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
2607 struct btrfs_ordered_extent *ordered;
2608 char *kaddr;
2609 unsigned long zero_start;
2531 loff_t size; 2610 loff_t size;
2532 int ret; 2611 int ret;
2533 u64 page_start; 2612 u64 page_start;
2613 u64 page_end;
2534 2614
2535 ret = btrfs_check_free_space(root, PAGE_CACHE_SIZE, 0); 2615 ret = btrfs_check_free_space(root, PAGE_CACHE_SIZE, 0);
2536 if (ret) 2616 if (ret)
2537 goto out; 2617 goto out;
2538 2618
2539 ret = -EINVAL; 2619 ret = -EINVAL;
2540 2620again:
2541 lock_page(page); 2621 lock_page(page);
2542 wait_on_page_writeback(page);
2543 size = i_size_read(inode); 2622 size = i_size_read(inode);
2544 page_start = (u64)page->index << PAGE_CACHE_SHIFT; 2623 page_start = page_offset(page);
2624 page_end = page_start + PAGE_CACHE_SIZE - 1;
2545 2625
2546 if ((page->mapping != inode->i_mapping) || 2626 if ((page->mapping != inode->i_mapping) ||
2547 (page_start > size)) { 2627 (page_start >= size)) {
2548 /* page got truncated out from underneath us */ 2628 /* page got truncated out from underneath us */
2549 goto out_unlock; 2629 goto out_unlock;
2550 } 2630 }
2631 wait_on_page_writeback(page);
2632
2633 lock_extent(io_tree, page_start, page_end, GFP_NOFS);
2634 set_page_extent_mapped(page);
2635
2636 ordered = btrfs_lookup_ordered_extent(inode, page_start);
2637 if (ordered) {
2638 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
2639 unlock_page(page);
2640 btrfs_wait_ordered_extent(inode, ordered);
2641 btrfs_put_ordered_extent(ordered);
2642 goto again;
2643 }
2644
2645 set_extent_delalloc(&BTRFS_I(inode)->io_tree, page_start,
2646 page_end, GFP_NOFS);
2647 ret = 0;
2551 2648
2552 /* page is wholly or partially inside EOF */ 2649 /* page is wholly or partially inside EOF */
2553 if (page_start + PAGE_CACHE_SIZE > size) 2650 if (page_start + PAGE_CACHE_SIZE > size)
2554 end = size & ~PAGE_CACHE_MASK; 2651 zero_start = size & ~PAGE_CACHE_MASK;
2555 else 2652 else
2556 end = PAGE_CACHE_SIZE; 2653 zero_start = PAGE_CACHE_SIZE;
2557 2654
2558 ret = btrfs_cow_one_page(inode, page, end); 2655 if (zero_start != PAGE_CACHE_SIZE) {
2656 kaddr = kmap(page);
2657 memset(kaddr + zero_start, 0, PAGE_CACHE_SIZE - zero_start);
2658 flush_dcache_page(page);
2659 kunmap(page);
2660 }
2661 set_page_dirty(page);
2662 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
2559 2663
2560out_unlock: 2664out_unlock:
2561 unlock_page(page); 2665 unlock_page(page);
@@ -2662,15 +2766,28 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
2662 if (!ei) 2766 if (!ei)
2663 return NULL; 2767 return NULL;
2664 ei->last_trans = 0; 2768 ei->last_trans = 0;
2665 ei->ordered_trans = 0; 2769 btrfs_ordered_inode_tree_init(&ei->ordered_tree);
2666 return &ei->vfs_inode; 2770 return &ei->vfs_inode;
2667} 2771}
2668 2772
2669void btrfs_destroy_inode(struct inode *inode) 2773void btrfs_destroy_inode(struct inode *inode)
2670{ 2774{
2775 struct btrfs_ordered_extent *ordered;
2671 WARN_ON(!list_empty(&inode->i_dentry)); 2776 WARN_ON(!list_empty(&inode->i_dentry));
2672 WARN_ON(inode->i_data.nrpages); 2777 WARN_ON(inode->i_data.nrpages);
2673 2778
2779 while(1) {
2780 ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1);
2781 if (!ordered)
2782 break;
2783 else {
2784 printk("found ordered extent %Lu %Lu\n",
2785 ordered->file_offset, ordered->len);
2786 btrfs_remove_ordered_extent(inode, ordered);
2787 btrfs_put_ordered_extent(ordered);
2788 btrfs_put_ordered_extent(ordered);
2789 }
2790 }
2674 btrfs_drop_extent_cache(inode, 0, (u64)-1); 2791 btrfs_drop_extent_cache(inode, 0, (u64)-1);
2675 kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode)); 2792 kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
2676} 2793}
@@ -2869,7 +2986,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
2869 inode->i_mapping, GFP_NOFS); 2986 inode->i_mapping, GFP_NOFS);
2870 mutex_init(&BTRFS_I(inode)->csum_mutex); 2987 mutex_init(&BTRFS_I(inode)->csum_mutex);
2871 BTRFS_I(inode)->delalloc_bytes = 0; 2988 BTRFS_I(inode)->delalloc_bytes = 0;
2872 atomic_set(&BTRFS_I(inode)->ordered_writeback, 0);
2873 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; 2989 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
2874 } 2990 }
2875 dir->i_sb->s_dirt = 1; 2991 dir->i_sb->s_dirt = 1;
@@ -2921,6 +3037,20 @@ out_fail:
2921 return err; 3037 return err;
2922} 3038}
2923 3039
3040static int btrfs_set_page_dirty(struct page *page)
3041{
3042 struct inode *inode = page->mapping->host;
3043 u64 page_start = page_offset(page);
3044 u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
3045
3046 if (!test_range_bit(&BTRFS_I(inode)->io_tree, page_start, page_end,
3047 EXTENT_DELALLOC, 0)) {
3048printk("inode %lu page %Lu not delalloc\n", inode->i_ino, page_offset(page));
3049WARN_ON(1);
3050 }
3051 return __set_page_dirty_nobuffers(page);
3052}
3053
2924static int btrfs_permission(struct inode *inode, int mask, 3054static int btrfs_permission(struct inode *inode, int mask,
2925 struct nameidata *nd) 3055 struct nameidata *nd)
2926{ 3056{
@@ -2967,6 +3097,7 @@ static struct extent_io_ops btrfs_extent_io_ops = {
2967 .merge_bio_hook = btrfs_merge_bio_hook, 3097 .merge_bio_hook = btrfs_merge_bio_hook,
2968 .readpage_io_hook = btrfs_readpage_io_hook, 3098 .readpage_io_hook = btrfs_readpage_io_hook,
2969 .readpage_end_io_hook = btrfs_readpage_end_io_hook, 3099 .readpage_end_io_hook = btrfs_readpage_end_io_hook,
3100 .writepage_end_io_hook = btrfs_writepage_end_io_hook,
2970 .readpage_io_failed_hook = btrfs_io_failed_hook, 3101 .readpage_io_failed_hook = btrfs_io_failed_hook,
2971 .set_bit_hook = btrfs_set_bit_hook, 3102 .set_bit_hook = btrfs_set_bit_hook,
2972 .clear_bit_hook = btrfs_clear_bit_hook, 3103 .clear_bit_hook = btrfs_clear_bit_hook,
@@ -2982,7 +3113,7 @@ static struct address_space_operations btrfs_aops = {
2982 .direct_IO = btrfs_direct_IO, 3113 .direct_IO = btrfs_direct_IO,
2983 .invalidatepage = btrfs_invalidatepage, 3114 .invalidatepage = btrfs_invalidatepage,
2984 .releasepage = btrfs_releasepage, 3115 .releasepage = btrfs_releasepage,
2985 .set_page_dirty = __set_page_dirty_nobuffers, 3116 .set_page_dirty = btrfs_set_page_dirty,
2986}; 3117};
2987 3118
2988static struct address_space_operations btrfs_symlink_aops = { 3119static struct address_space_operations btrfs_symlink_aops = {
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 254da8225664..6513270f054c 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -22,48 +22,30 @@
22#include "ctree.h" 22#include "ctree.h"
23#include "transaction.h" 23#include "transaction.h"
24#include "btrfs_inode.h" 24#include "btrfs_inode.h"
25#include "extent_io.h"
25 26
26struct tree_entry {
27 u64 root_objectid;
28 u64 objectid;
29 struct inode *inode;
30 struct rb_node rb_node;
31};
32 27
33/* 28static u64 entry_end(struct btrfs_ordered_extent *entry)
34 * returns > 0 if entry passed (root, objectid) is > entry,
35 * < 0 if (root, objectid) < entry and zero if they are equal
36 */
37static int comp_entry(struct tree_entry *entry, u64 root_objectid,
38 u64 objectid)
39{ 29{
40 if (root_objectid < entry->root_objectid) 30 if (entry->file_offset + entry->len < entry->file_offset)
41 return -1; 31 return (u64)-1;
42 if (root_objectid > entry->root_objectid) 32 return entry->file_offset + entry->len;
43 return 1;
44 if (objectid < entry->objectid)
45 return -1;
46 if (objectid > entry->objectid)
47 return 1;
48 return 0;
49} 33}
50 34
51static struct rb_node *tree_insert(struct rb_root *root, u64 root_objectid, 35static struct rb_node *tree_insert(struct rb_root *root, u64 file_offset,
52 u64 objectid, struct rb_node *node) 36 struct rb_node *node)
53{ 37{
54 struct rb_node ** p = &root->rb_node; 38 struct rb_node ** p = &root->rb_node;
55 struct rb_node * parent = NULL; 39 struct rb_node * parent = NULL;
56 struct tree_entry *entry; 40 struct btrfs_ordered_extent *entry;
57 int comp;
58 41
59 while(*p) { 42 while(*p) {
60 parent = *p; 43 parent = *p;
61 entry = rb_entry(parent, struct tree_entry, rb_node); 44 entry = rb_entry(parent, struct btrfs_ordered_extent, rb_node);
62 45
63 comp = comp_entry(entry, root_objectid, objectid); 46 if (file_offset < entry->file_offset)
64 if (comp < 0)
65 p = &(*p)->rb_left; 47 p = &(*p)->rb_left;
66 else if (comp > 0) 48 else if (file_offset >= entry_end(entry))
67 p = &(*p)->rb_right; 49 p = &(*p)->rb_right;
68 else 50 else
69 return parent; 51 return parent;
@@ -74,24 +56,23 @@ static struct rb_node *tree_insert(struct rb_root *root, u64 root_objectid,
74 return NULL; 56 return NULL;
75} 57}
76 58
77static struct rb_node *__tree_search(struct rb_root *root, u64 root_objectid, 59static struct rb_node *__tree_search(struct rb_root *root, u64 file_offset,
78 u64 objectid, struct rb_node **prev_ret) 60 struct rb_node **prev_ret)
79{ 61{
80 struct rb_node * n = root->rb_node; 62 struct rb_node * n = root->rb_node;
81 struct rb_node *prev = NULL; 63 struct rb_node *prev = NULL;
82 struct tree_entry *entry; 64 struct rb_node *test;
83 struct tree_entry *prev_entry = NULL; 65 struct btrfs_ordered_extent *entry;
84 int comp; 66 struct btrfs_ordered_extent *prev_entry = NULL;
85 67
86 while(n) { 68 while(n) {
87 entry = rb_entry(n, struct tree_entry, rb_node); 69 entry = rb_entry(n, struct btrfs_ordered_extent, rb_node);
88 prev = n; 70 prev = n;
89 prev_entry = entry; 71 prev_entry = entry;
90 comp = comp_entry(entry, root_objectid, objectid);
91 72
92 if (comp < 0) 73 if (file_offset < entry->file_offset)
93 n = n->rb_left; 74 n = n->rb_left;
94 else if (comp > 0) 75 else if (file_offset >= entry_end(entry))
95 n = n->rb_right; 76 n = n->rb_right;
96 else 77 else
97 return n; 78 return n;
@@ -99,195 +80,329 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 root_objectid,
99 if (!prev_ret) 80 if (!prev_ret)
100 return NULL; 81 return NULL;
101 82
102 while(prev && comp_entry(prev_entry, root_objectid, objectid) >= 0) { 83 while(prev && file_offset >= entry_end(prev_entry)) {
103 prev = rb_next(prev); 84 test = rb_next(prev);
104 prev_entry = rb_entry(prev, struct tree_entry, rb_node); 85 if (!test)
86 break;
87 prev_entry = rb_entry(test, struct btrfs_ordered_extent,
88 rb_node);
89 if (file_offset < entry_end(prev_entry))
90 break;
91
92 prev = test;
93 }
94 if (prev)
95 prev_entry = rb_entry(prev, struct btrfs_ordered_extent,
96 rb_node);
97 while(prev && file_offset < entry_end(prev_entry)) {
98 test = rb_prev(prev);
99 if (!test)
100 break;
101 prev_entry = rb_entry(test, struct btrfs_ordered_extent,
102 rb_node);
103 prev = test;
105 } 104 }
106 *prev_ret = prev; 105 *prev_ret = prev;
107 return NULL; 106 return NULL;
108} 107}
109 108
110static inline struct rb_node *tree_search(struct rb_root *root, 109static int offset_in_entry(struct btrfs_ordered_extent *entry, u64 file_offset)
111 u64 root_objectid, u64 objectid) 110{
111 if (file_offset < entry->file_offset ||
112 entry->file_offset + entry->len <= file_offset)
113 return 0;
114 return 1;
115}
116
117static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree,
118 u64 file_offset)
112{ 119{
120 struct rb_root *root = &tree->tree;
113 struct rb_node *prev; 121 struct rb_node *prev;
114 struct rb_node *ret; 122 struct rb_node *ret;
115 ret = __tree_search(root, root_objectid, objectid, &prev); 123 struct btrfs_ordered_extent *entry;
124
125 if (tree->last) {
126 entry = rb_entry(tree->last, struct btrfs_ordered_extent,
127 rb_node);
128 if (offset_in_entry(entry, file_offset))
129 return tree->last;
130 }
131 ret = __tree_search(root, file_offset, &prev);
116 if (!ret) 132 if (!ret)
117 return prev; 133 ret = prev;
134 if (ret)
135 tree->last = ret;
118 return ret; 136 return ret;
119} 137}
120 138
121int btrfs_add_ordered_inode(struct inode *inode) 139int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
140 u64 start, u64 len)
122{ 141{
123 struct btrfs_root *root = BTRFS_I(inode)->root;
124 u64 root_objectid = root->root_key.objectid;
125 u64 transid = root->fs_info->running_transaction->transid;
126 struct tree_entry *entry;
127 struct rb_node *node;
128 struct btrfs_ordered_inode_tree *tree; 142 struct btrfs_ordered_inode_tree *tree;
143 struct rb_node *node;
144 struct btrfs_ordered_extent *entry;
129 145
130 if (transid <= BTRFS_I(inode)->ordered_trans) 146 tree = &BTRFS_I(inode)->ordered_tree;
131 return 0; 147 entry = kzalloc(sizeof(*entry), GFP_NOFS);
132
133 tree = &root->fs_info->running_transaction->ordered_inode_tree;
134
135 read_lock(&tree->lock);
136 node = __tree_search(&tree->tree, root_objectid, inode->i_ino, NULL);
137 read_unlock(&tree->lock);
138 if (node) {
139 return 0;
140 }
141
142 entry = kmalloc(sizeof(*entry), GFP_NOFS);
143 if (!entry) 148 if (!entry)
144 return -ENOMEM; 149 return -ENOMEM;
145 150
146 write_lock(&tree->lock); 151 mutex_lock(&tree->mutex);
147 entry->objectid = inode->i_ino; 152 entry->file_offset = file_offset;
148 entry->root_objectid = root_objectid; 153 entry->start = start;
154 entry->len = len;
149 entry->inode = inode; 155 entry->inode = inode;
156 /* one ref for the tree */
157 atomic_set(&entry->refs, 1);
158 init_waitqueue_head(&entry->wait);
159 INIT_LIST_HEAD(&entry->list);
150 160
151 node = tree_insert(&tree->tree, root_objectid, 161 node = tree_insert(&tree->tree, file_offset,
152 inode->i_ino, &entry->rb_node); 162 &entry->rb_node);
153 163 if (node) {
154 BTRFS_I(inode)->ordered_trans = transid; 164 entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
155 if (!node) 165 atomic_inc(&entry->refs);
156 igrab(inode); 166 }
157 167 set_extent_ordered(&BTRFS_I(inode)->io_tree, file_offset,
158 write_unlock(&tree->lock); 168 entry_end(entry) - 1, GFP_NOFS);
159 169
160 if (node) 170 set_bit(BTRFS_ORDERED_START, &entry->flags);
161 kfree(entry); 171 mutex_unlock(&tree->mutex);
172 BUG_ON(node);
162 return 0; 173 return 0;
163} 174}
164 175
165int btrfs_find_first_ordered_inode(struct btrfs_ordered_inode_tree *tree, 176int btrfs_add_ordered_sum(struct inode *inode, struct btrfs_ordered_sum *sum)
166 u64 *root_objectid, u64 *objectid,
167 struct inode **inode)
168{ 177{
169 struct tree_entry *entry; 178 struct btrfs_ordered_inode_tree *tree;
170 struct rb_node *node; 179 struct rb_node *node;
180 struct btrfs_ordered_extent *entry;
171 181
172 write_lock(&tree->lock); 182 tree = &BTRFS_I(inode)->ordered_tree;
173 node = tree_search(&tree->tree, *root_objectid, *objectid); 183 mutex_lock(&tree->mutex);
184 node = tree_search(tree, sum->file_offset);
174 if (!node) { 185 if (!node) {
175 write_unlock(&tree->lock); 186search_fail:
176 return 0; 187printk("add ordered sum failed to find a node for inode %lu offset %Lu\n", inode->i_ino, sum->file_offset);
188 node = rb_first(&tree->tree);
189 while(node) {
190 entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
191 printk("entry %Lu %Lu %Lu\n", entry->file_offset, entry->file_offset + entry->len, entry->start);
192 node = rb_next(node);
193 }
194 BUG();
177 } 195 }
178 entry = rb_entry(node, struct tree_entry, rb_node); 196 BUG_ON(!node);
179 197
180 while(comp_entry(entry, *root_objectid, *objectid) >= 0) { 198 entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
181 node = rb_next(node); 199 if (!offset_in_entry(entry, sum->file_offset)) {
182 if (!node) 200 goto search_fail;
183 break;
184 entry = rb_entry(node, struct tree_entry, rb_node);
185 }
186 if (!node) {
187 write_unlock(&tree->lock);
188 return 0;
189 } 201 }
190 202
191 *root_objectid = entry->root_objectid; 203 list_add_tail(&sum->list, &entry->list);
192 *inode = entry->inode; 204 mutex_unlock(&tree->mutex);
193 atomic_inc(&entry->inode->i_count); 205 return 0;
194 *objectid = entry->objectid;
195 write_unlock(&tree->lock);
196 return 1;
197} 206}
198 207
199int btrfs_find_del_first_ordered_inode(struct btrfs_ordered_inode_tree *tree, 208int btrfs_dec_test_ordered_pending(struct inode *inode,
200 u64 *root_objectid, u64 *objectid, 209 u64 file_offset, u64 io_size)
201 struct inode **inode)
202{ 210{
203 struct tree_entry *entry; 211 struct btrfs_ordered_inode_tree *tree;
204 struct rb_node *node; 212 struct rb_node *node;
205 213 struct btrfs_ordered_extent *entry;
206 write_lock(&tree->lock); 214 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
207 node = tree_search(&tree->tree, *root_objectid, *objectid); 215 int ret;
216
217 tree = &BTRFS_I(inode)->ordered_tree;
218 mutex_lock(&tree->mutex);
219 clear_extent_ordered(io_tree, file_offset, file_offset + io_size - 1,
220 GFP_NOFS);
221 node = tree_search(tree, file_offset);
208 if (!node) { 222 if (!node) {
209 write_unlock(&tree->lock); 223 ret = 1;
210 return 0; 224 goto out;
211 } 225 }
212 226
213 entry = rb_entry(node, struct tree_entry, rb_node); 227 entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
214 while(comp_entry(entry, *root_objectid, *objectid) >= 0) { 228 if (!offset_in_entry(entry, file_offset)) {
215 node = rb_next(node); 229 ret = 1;
216 if (!node) 230 goto out;
217 break;
218 entry = rb_entry(node, struct tree_entry, rb_node);
219 } 231 }
220 if (!node) { 232
221 write_unlock(&tree->lock); 233 ret = test_range_bit(io_tree, entry->file_offset,
222 return 0; 234 entry->file_offset + entry->len - 1,
235 EXTENT_ORDERED, 0);
236 if (!test_bit(BTRFS_ORDERED_START, &entry->flags)) {
237printk("inode %lu not ready yet for extent %Lu %Lu\n", inode->i_ino, entry->file_offset, entry_end(entry));
223 } 238 }
239 if (ret == 0)
240 ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags);
241out:
242 mutex_unlock(&tree->mutex);
243 return ret == 0;
244}
224 245
225 *root_objectid = entry->root_objectid; 246int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
226 *objectid = entry->objectid; 247{
227 *inode = entry->inode; 248 if (atomic_dec_and_test(&entry->refs))
228 atomic_inc(&entry->inode->i_count); 249 kfree(entry);
229 rb_erase(node, &tree->tree); 250 return 0;
230 write_unlock(&tree->lock);
231 kfree(entry);
232 return 1;
233} 251}
234 252
235static void __btrfs_del_ordered_inode(struct btrfs_ordered_inode_tree *tree, 253int btrfs_remove_ordered_extent(struct inode *inode,
236 struct inode *inode, 254 struct btrfs_ordered_extent *entry)
237 u64 root_objectid, u64 objectid)
238{ 255{
239 struct tree_entry *entry; 256 struct btrfs_ordered_inode_tree *tree;
240 struct rb_node *node; 257 struct rb_node *node;
241 struct rb_node *prev;
242 258
243 write_lock(&tree->lock); 259 tree = &BTRFS_I(inode)->ordered_tree;
244 node = __tree_search(&tree->tree, root_objectid, objectid, &prev); 260 mutex_lock(&tree->mutex);
245 if (!node) { 261 node = &entry->rb_node;
246 write_unlock(&tree->lock);
247 return;
248 }
249 rb_erase(node, &tree->tree); 262 rb_erase(node, &tree->tree);
250 BTRFS_I(inode)->ordered_trans = 0; 263 tree->last = NULL;
251 write_unlock(&tree->lock); 264 set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags);
252 atomic_dec(&inode->i_count); 265 mutex_unlock(&tree->mutex);
253 entry = rb_entry(node, struct tree_entry, rb_node); 266 wake_up(&entry->wait);
254 kfree(entry); 267 return 0;
255 return;
256} 268}
257 269
258void btrfs_del_ordered_inode(struct inode *inode, int force) 270void btrfs_wait_ordered_extent(struct inode *inode,
271 struct btrfs_ordered_extent *entry)
259{ 272{
260 struct btrfs_root *root = BTRFS_I(inode)->root; 273 u64 start = entry->file_offset;
261 u64 root_objectid = root->root_key.objectid; 274 u64 end = start + entry->len - 1;
275#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22)
276 do_sync_file_range(file, start, end, SYNC_FILE_RANGE_WRITE);
277#else
278 do_sync_mapping_range(inode->i_mapping, start, end,
279 SYNC_FILE_RANGE_WRITE);
280#endif
281 wait_event(entry->wait,
282 test_bit(BTRFS_ORDERED_COMPLETE, &entry->flags));
283}
262 284
263 if (!BTRFS_I(inode)->ordered_trans) { 285static void btrfs_start_ordered_extent(struct inode *inode,
264 return; 286 struct btrfs_ordered_extent *entry, int wait)
265 } 287{
288 u64 start = entry->file_offset;
289 u64 end = start + entry->len - 1;
266 290
267 if (!force && (mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY) || 291#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22)
268 mapping_tagged(inode->i_mapping, PAGECACHE_TAG_WRITEBACK))) 292 do_sync_file_range(file, start, end, SYNC_FILE_RANGE_WRITE);
269 return; 293#else
294 do_sync_mapping_range(inode->i_mapping, start, end,
295 SYNC_FILE_RANGE_WRITE);
296#endif
297 if (wait)
298 wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE,
299 &entry->flags));
300}
270 301
271 spin_lock(&root->fs_info->new_trans_lock); 302void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
272 if (root->fs_info->running_transaction) { 303{
273 struct btrfs_ordered_inode_tree *tree; 304 u64 end;
274 tree = &root->fs_info->running_transaction->ordered_inode_tree; 305 struct btrfs_ordered_extent *ordered;
275 __btrfs_del_ordered_inode(tree, inode, root_objectid, 306 int found;
276 inode->i_ino); 307 int should_wait = 0;
308
309again:
310 if (start + len < start)
311 end = (u64)-1;
312 else
313 end = start + len - 1;
314 found = 0;
315 while(1) {
316 ordered = btrfs_lookup_first_ordered_extent(inode, end);
317 if (!ordered) {
318 break;
319 }
320 if (ordered->file_offset >= start + len) {
321 btrfs_put_ordered_extent(ordered);
322 break;
323 }
324 if (ordered->file_offset + ordered->len < start) {
325 btrfs_put_ordered_extent(ordered);
326 break;
327 }
328 btrfs_start_ordered_extent(inode, ordered, should_wait);
329 found++;
330 end = ordered->file_offset;
331 btrfs_put_ordered_extent(ordered);
332 if (end == 0)
333 break;
334 end--;
335 }
336 if (should_wait && found) {
337 should_wait = 0;
338 goto again;
277 } 339 }
278 spin_unlock(&root->fs_info->new_trans_lock);
279} 340}
280 341
281int btrfs_ordered_throttle(struct btrfs_root *root, struct inode *inode) 342int btrfs_add_ordered_pending(struct inode *inode,
343 struct btrfs_ordered_extent *ordered,
344 u64 start, u64 len)
282{ 345{
283 struct btrfs_transaction *cur = root->fs_info->running_transaction; 346 WARN_ON(1);
284 while(cur == root->fs_info->running_transaction &&
285 atomic_read(&BTRFS_I(inode)->ordered_writeback)) {
286#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18)
287 congestion_wait(WRITE, HZ/20);
288#else
289 blk_congestion_wait(WRITE, HZ/20);
290#endif
291 }
292 return 0; 347 return 0;
348#if 0
349 int ret;
350 struct btrfs_ordered_inode_tree *tree;
351 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
352
353 tree = &BTRFS_I(inode)->ordered_tree;
354 mutex_lock(&tree->mutex);
355 if (test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags)) {
356 ret = -EAGAIN;
357 goto out;
358 }
359 set_extent_ordered(io_tree, start, start + len - 1, GFP_NOFS);
360 ret = 0;
361out:
362 mutex_unlock(&tree->mutex);
363 return ret;
364#endif
365}
366
367struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode,
368 u64 file_offset)
369{
370 struct btrfs_ordered_inode_tree *tree;
371 struct rb_node *node;
372 struct btrfs_ordered_extent *entry = NULL;
373
374 tree = &BTRFS_I(inode)->ordered_tree;
375 mutex_lock(&tree->mutex);
376 node = tree_search(tree, file_offset);
377 if (!node)
378 goto out;
379
380 entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
381 if (!offset_in_entry(entry, file_offset))
382 entry = NULL;
383 if (entry)
384 atomic_inc(&entry->refs);
385out:
386 mutex_unlock(&tree->mutex);
387 return entry;
388}
389
390struct btrfs_ordered_extent *
391btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset)
392{
393 struct btrfs_ordered_inode_tree *tree;
394 struct rb_node *node;
395 struct btrfs_ordered_extent *entry = NULL;
396
397 tree = &BTRFS_I(inode)->ordered_tree;
398 mutex_lock(&tree->mutex);
399 node = tree_search(tree, file_offset);
400 if (!node)
401 goto out;
402
403 entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
404 atomic_inc(&entry->refs);
405out:
406 mutex_unlock(&tree->mutex);
407 return entry;
293} 408}
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index 4fa78736423e..33292c5fe90c 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -20,24 +20,73 @@
20#define __BTRFS_ORDERED_DATA__ 20#define __BTRFS_ORDERED_DATA__
21 21
22struct btrfs_ordered_inode_tree { 22struct btrfs_ordered_inode_tree {
23 rwlock_t lock; 23 struct mutex mutex;
24 struct rb_root tree; 24 struct rb_root tree;
25 struct rb_node *last;
25}; 26};
26 27
28struct btrfs_sector_sum {
29 u64 offset;
30 u32 sum;
31};
32
33struct btrfs_ordered_sum {
34 u64 file_offset;
35 u64 len;
36 struct list_head list;
37 struct btrfs_sector_sum sums;
38};
39
40/* bits for the flags field */
41#define BTRFS_ORDERED_IO_DONE 0 /* set when all the pages are written */
42#define BTRFS_ORDERED_COMPLETE 1 /* set when removed from the tree */
43#define BTRFS_ORDERED_START 2 /* set when tree setup */
44
45struct btrfs_ordered_extent {
46 u64 file_offset;
47 u64 start;
48 u64 len;
49 unsigned long flags;
50 atomic_t refs;
51 struct list_head list;
52 struct inode *inode;
53 wait_queue_head_t wait;
54 struct rb_node rb_node;
55};
56
57
58static inline int btrfs_ordered_sum_size(struct btrfs_root *root, u64 bytes)
59{
60 unsigned long num_sectors = (bytes + root->sectorsize - 1) /
61 root->sectorsize;
62 return sizeof(struct btrfs_ordered_sum) +
63 num_sectors * sizeof(struct btrfs_sector_sum);
64}
65
27static inline void 66static inline void
28btrfs_ordered_inode_tree_init(struct btrfs_ordered_inode_tree *t) 67btrfs_ordered_inode_tree_init(struct btrfs_ordered_inode_tree *t)
29{ 68{
30 rwlock_init(&t->lock); 69 mutex_init(&t->mutex);
31 t->tree.rb_node = NULL; 70 t->tree.rb_node = NULL;
71 t->last = NULL;
32} 72}
33 73
34int btrfs_add_ordered_inode(struct inode *inode); 74int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry);
35int btrfs_find_del_first_ordered_inode(struct btrfs_ordered_inode_tree *tree, 75int btrfs_remove_ordered_extent(struct inode *inode,
36 u64 *root_objectid, u64 *objectid, 76 struct btrfs_ordered_extent *entry);
37 struct inode **inode); 77int btrfs_dec_test_ordered_pending(struct inode *inode,
38int btrfs_find_first_ordered_inode(struct btrfs_ordered_inode_tree *tree, 78 u64 file_offset, u64 io_size);
39 u64 *root_objectid, u64 *objectid, 79int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
40 struct inode **inode); 80 u64 start, u64 len);
41void btrfs_del_ordered_inode(struct inode *inode, int force); 81int btrfs_add_ordered_sum(struct inode *inode, struct btrfs_ordered_sum *sum);
42int btrfs_ordered_throttle(struct btrfs_root *root, struct inode *inode); 82struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode,
83 u64 file_offset);
84void btrfs_wait_ordered_extent(struct inode *inode,
85 struct btrfs_ordered_extent *entry);
86void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len);
87struct btrfs_ordered_extent *
88btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset);
89int btrfs_add_ordered_pending(struct inode *inode,
90 struct btrfs_ordered_extent *ordered,
91 u64 start, u64 len);
43#endif 92#endif
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index a8a3cb03de59..86a5acc19ce7 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -67,7 +67,6 @@ static noinline int join_transaction(struct btrfs_root *root)
67 cur_trans->start_time = get_seconds(); 67 cur_trans->start_time = get_seconds();
68 INIT_LIST_HEAD(&cur_trans->pending_snapshots); 68 INIT_LIST_HEAD(&cur_trans->pending_snapshots);
69 list_add_tail(&cur_trans->list, &root->fs_info->trans_list); 69 list_add_tail(&cur_trans->list, &root->fs_info->trans_list);
70 btrfs_ordered_inode_tree_init(&cur_trans->ordered_inode_tree);
71 extent_io_tree_init(&cur_trans->dirty_pages, 70 extent_io_tree_init(&cur_trans->dirty_pages,
72 root->fs_info->btree_inode->i_mapping, 71 root->fs_info->btree_inode->i_mapping,
73 GFP_NOFS); 72 GFP_NOFS);
@@ -158,10 +157,12 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
158 wake_up(&cur_trans->writer_wait); 157 wake_up(&cur_trans->writer_wait);
159 158
160 if (cur_trans->in_commit && throttle) { 159 if (cur_trans->in_commit && throttle) {
161 int ret; 160 DEFINE_WAIT(wait);
162 mutex_unlock(&root->fs_info->trans_mutex); 161 mutex_unlock(&root->fs_info->trans_mutex);
163 ret = wait_for_commit(root, cur_trans); 162 prepare_to_wait(&root->fs_info->transaction_throttle, &wait,
164 BUG_ON(ret); 163 TASK_UNINTERRUPTIBLE);
164 schedule();
165 finish_wait(&root->fs_info->transaction_throttle, &wait);
165 mutex_lock(&root->fs_info->trans_mutex); 166 mutex_lock(&root->fs_info->trans_mutex);
166 } 167 }
167 168
@@ -486,58 +487,6 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root,
486 return ret; 487 return ret;
487} 488}
488 489
489int btrfs_write_ordered_inodes(struct btrfs_trans_handle *trans,
490 struct btrfs_root *root)
491{
492 struct btrfs_transaction *cur_trans = trans->transaction;
493 struct inode *inode;
494 u64 root_objectid = 0;
495 u64 objectid = 0;
496 int ret;
497
498 atomic_inc(&root->fs_info->throttles);
499 while(1) {
500 ret = btrfs_find_first_ordered_inode(
501 &cur_trans->ordered_inode_tree,
502 &root_objectid, &objectid, &inode);
503 if (!ret)
504 break;
505
506 mutex_unlock(&root->fs_info->trans_mutex);
507
508 if (S_ISREG(inode->i_mode)) {
509 atomic_inc(&BTRFS_I(inode)->ordered_writeback);
510 filemap_fdatawrite(inode->i_mapping);
511 atomic_dec(&BTRFS_I(inode)->ordered_writeback);
512 }
513 iput(inode);
514
515 mutex_lock(&root->fs_info->trans_mutex);
516 }
517 while(1) {
518 root_objectid = 0;
519 objectid = 0;
520 ret = btrfs_find_del_first_ordered_inode(
521 &cur_trans->ordered_inode_tree,
522 &root_objectid, &objectid, &inode);
523 if (!ret)
524 break;
525 mutex_unlock(&root->fs_info->trans_mutex);
526
527 if (S_ISREG(inode->i_mode)) {
528 atomic_inc(&BTRFS_I(inode)->ordered_writeback);
529 filemap_write_and_wait(inode->i_mapping);
530 atomic_dec(&BTRFS_I(inode)->ordered_writeback);
531 }
532 atomic_dec(&inode->i_count);
533 iput(inode);
534
535 mutex_lock(&root->fs_info->trans_mutex);
536 }
537 atomic_dec(&root->fs_info->throttles);
538 return 0;
539}
540
541static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, 490static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
542 struct btrfs_fs_info *fs_info, 491 struct btrfs_fs_info *fs_info,
543 struct btrfs_pending_snapshot *pending) 492 struct btrfs_pending_snapshot *pending)
@@ -666,6 +615,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
666 extent_io_tree_init(pinned_copy, 615 extent_io_tree_init(pinned_copy,
667 root->fs_info->btree_inode->i_mapping, GFP_NOFS); 616 root->fs_info->btree_inode->i_mapping, GFP_NOFS);
668 617
618printk("commit trans %Lu\n", trans->transid);
669 trans->transaction->in_commit = 1; 619 trans->transaction->in_commit = 1;
670 cur_trans = trans->transaction; 620 cur_trans = trans->transaction;
671 if (cur_trans->list.prev != &root->fs_info->trans_list) { 621 if (cur_trans->list.prev != &root->fs_info->trans_list) {
@@ -699,8 +649,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
699 649
700 mutex_lock(&root->fs_info->trans_mutex); 650 mutex_lock(&root->fs_info->trans_mutex);
701 finish_wait(&cur_trans->writer_wait, &wait); 651 finish_wait(&cur_trans->writer_wait, &wait);
702 ret = btrfs_write_ordered_inodes(trans, root);
703
704 } while (cur_trans->num_writers > 1 || 652 } while (cur_trans->num_writers > 1 ||
705 (cur_trans->num_joined != joined)); 653 (cur_trans->num_joined != joined));
706 654
@@ -736,6 +684,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
736 684
737 btrfs_copy_pinned(root, pinned_copy); 685 btrfs_copy_pinned(root, pinned_copy);
738 686
687 wake_up(&root->fs_info->transaction_throttle);
688
739 mutex_unlock(&root->fs_info->trans_mutex); 689 mutex_unlock(&root->fs_info->trans_mutex);
740 ret = btrfs_write_and_wait_transaction(trans, root); 690 ret = btrfs_write_and_wait_transaction(trans, root);
741 BUG_ON(ret); 691 BUG_ON(ret);
@@ -758,6 +708,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
758 list_splice_init(&dirty_fs_roots, &root->fs_info->dead_roots); 708 list_splice_init(&dirty_fs_roots, &root->fs_info->dead_roots);
759 709
760 mutex_unlock(&root->fs_info->trans_mutex); 710 mutex_unlock(&root->fs_info->trans_mutex);
711printk("done commit trans %Lu\n", trans->transid);
761 kmem_cache_free(btrfs_trans_handle_cachep, trans); 712 kmem_cache_free(btrfs_trans_handle_cachep, trans);
762 713
763 if (root->fs_info->closing) { 714 if (root->fs_info->closing) {
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 9ccd5a5b170f..910350cd4cf0 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -19,7 +19,6 @@
19#ifndef __BTRFS_TRANSACTION__ 19#ifndef __BTRFS_TRANSACTION__
20#define __BTRFS_TRANSACTION__ 20#define __BTRFS_TRANSACTION__
21#include "btrfs_inode.h" 21#include "btrfs_inode.h"
22#include "ordered-data.h"
23 22
24struct btrfs_transaction { 23struct btrfs_transaction {
25 u64 transid; 24 u64 transid;
@@ -31,7 +30,6 @@ struct btrfs_transaction {
31 struct list_head list; 30 struct list_head list;
32 struct extent_io_tree dirty_pages; 31 struct extent_io_tree dirty_pages;
33 unsigned long start_time; 32 unsigned long start_time;
34 struct btrfs_ordered_inode_tree ordered_inode_tree;
35 wait_queue_head_t writer_wait; 33 wait_queue_head_t writer_wait;
36 wait_queue_head_t commit_wait; 34 wait_queue_head_t commit_wait;
37 struct list_head pending_snapshots; 35 struct list_head pending_snapshots;
@@ -88,8 +86,6 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly);
88int btrfs_clean_old_snapshots(struct btrfs_root *root); 86int btrfs_clean_old_snapshots(struct btrfs_root *root);
89int btrfs_commit_transaction(struct btrfs_trans_handle *trans, 87int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
90 struct btrfs_root *root); 88 struct btrfs_root *root);
91int btrfs_write_ordered_inodes(struct btrfs_trans_handle *trans,
92 struct btrfs_root *root);
93int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, 89int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
94 struct btrfs_root *root); 90 struct btrfs_root *root);
95#endif 91#endif