aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/btrfs/btrfs_inode.h4
-rw-r--r--fs/btrfs/ctree.h19
-rw-r--r--fs/btrfs/disk-io.c13
-rw-r--r--fs/btrfs/extent-tree.c132
-rw-r--r--fs/btrfs/extent_io.c52
-rw-r--r--fs/btrfs/extent_io.h14
-rw-r--r--fs/btrfs/extent_map.c5
-rw-r--r--fs/btrfs/file-item.c62
-rw-r--r--fs/btrfs/file.c67
-rw-r--r--fs/btrfs/inode.c447
-rw-r--r--fs/btrfs/ordered-data.c455
-rw-r--r--fs/btrfs/ordered-data.h71
-rw-r--r--fs/btrfs/transaction.c67
-rw-r--r--fs/btrfs/transaction.h4
14 files changed, 910 insertions, 502 deletions
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 40b4e0c9cd09..8d03687510e0 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -21,6 +21,7 @@
21 21
22#include "extent_map.h" 22#include "extent_map.h"
23#include "extent_io.h" 23#include "extent_io.h"
24#include "ordered-data.h"
24 25
25/* in memory btrfs inode */ 26/* in memory btrfs inode */
26struct btrfs_inode { 27struct btrfs_inode {
@@ -32,9 +33,8 @@ struct btrfs_inode {
32 struct extent_io_tree io_failure_tree; 33 struct extent_io_tree io_failure_tree;
33 struct mutex csum_mutex; 34 struct mutex csum_mutex;
34 struct inode vfs_inode; 35 struct inode vfs_inode;
35 atomic_t ordered_writeback; 36 struct btrfs_ordered_inode_tree ordered_tree;
36 37
37 u64 ordered_trans;
38 /* 38 /*
39 * transid of the trans_handle that last modified this inode 39 * transid of the trans_handle that last modified this inode
40 */ 40 */
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index f3783dbd9b60..ceebc052ddcb 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -25,6 +25,7 @@
25#include <linux/fs.h> 25#include <linux/fs.h>
26#include <linux/completion.h> 26#include <linux/completion.h>
27#include <linux/backing-dev.h> 27#include <linux/backing-dev.h>
28#include <linux/wait.h>
28#include <asm/kmap_types.h> 29#include <asm/kmap_types.h>
29#include "bit-radix.h" 30#include "bit-radix.h"
30#include "extent_io.h" 31#include "extent_io.h"
@@ -37,6 +38,7 @@ extern struct kmem_cache *btrfs_trans_handle_cachep;
37extern struct kmem_cache *btrfs_transaction_cachep; 38extern struct kmem_cache *btrfs_transaction_cachep;
38extern struct kmem_cache *btrfs_bit_radix_cachep; 39extern struct kmem_cache *btrfs_bit_radix_cachep;
39extern struct kmem_cache *btrfs_path_cachep; 40extern struct kmem_cache *btrfs_path_cachep;
41struct btrfs_ordered_sum;
40 42
41#define BTRFS_MAGIC "_B5RfS_M" 43#define BTRFS_MAGIC "_B5RfS_M"
42 44
@@ -510,6 +512,7 @@ struct btrfs_fs_info {
510 u64 max_inline; 512 u64 max_inline;
511 u64 alloc_start; 513 u64 alloc_start;
512 struct btrfs_transaction *running_transaction; 514 struct btrfs_transaction *running_transaction;
515 wait_queue_head_t transaction_throttle;
513 struct btrfs_super_block super_copy; 516 struct btrfs_super_block super_copy;
514 struct btrfs_super_block super_for_commit; 517 struct btrfs_super_block super_for_commit;
515 struct block_device *__bdev; 518 struct block_device *__bdev;
@@ -541,6 +544,7 @@ struct btrfs_fs_info {
541 */ 544 */
542 struct btrfs_workers workers; 545 struct btrfs_workers workers;
543 struct btrfs_workers endio_workers; 546 struct btrfs_workers endio_workers;
547 struct btrfs_workers endio_write_workers;
544 struct btrfs_workers submit_workers; 548 struct btrfs_workers submit_workers;
545 struct task_struct *transaction_kthread; 549 struct task_struct *transaction_kthread;
546 struct task_struct *cleaner_kthread; 550 struct task_struct *cleaner_kthread;
@@ -1384,6 +1388,17 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
1384 u64 owner, u64 owner_offset, 1388 u64 owner, u64 owner_offset,
1385 u64 empty_size, u64 hint_byte, 1389 u64 empty_size, u64 hint_byte,
1386 u64 search_end, struct btrfs_key *ins, u64 data); 1390 u64 search_end, struct btrfs_key *ins, u64 data);
1391int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
1392 struct btrfs_root *root,
1393 u64 root_objectid, u64 ref_generation,
1394 u64 owner, u64 owner_offset,
1395 struct btrfs_key *ins);
1396int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
1397 struct btrfs_root *root,
1398 u64 num_bytes, u64 min_alloc_size,
1399 u64 empty_size, u64 hint_byte,
1400 u64 search_end, struct btrfs_key *ins,
1401 u64 data);
1387int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 1402int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1388 struct extent_buffer *buf); 1403 struct extent_buffer *buf);
1389int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root 1404int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
@@ -1556,9 +1571,9 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
1556 u64 bytenr, int mod); 1571 u64 bytenr, int mod);
1557int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, 1572int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
1558 struct btrfs_root *root, struct inode *inode, 1573 struct btrfs_root *root, struct inode *inode,
1559 struct bio *bio, char *sums); 1574 struct btrfs_ordered_sum *sums);
1560int btrfs_csum_one_bio(struct btrfs_root *root, 1575int btrfs_csum_one_bio(struct btrfs_root *root,
1561 struct bio *bio, char **sums_ret); 1576 struct bio *bio, struct btrfs_ordered_sum **sums_ret);
1562struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans, 1577struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans,
1563 struct btrfs_root *root, 1578 struct btrfs_root *root,
1564 struct btrfs_path *path, 1579 struct btrfs_path *path,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index b01b3f4f92a9..4a5ebafb935a 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -407,7 +407,11 @@ static int end_workqueue_bio(struct bio *bio,
407 end_io_wq->error = err; 407 end_io_wq->error = err;
408 end_io_wq->work.func = end_workqueue_fn; 408 end_io_wq->work.func = end_workqueue_fn;
409 end_io_wq->work.flags = 0; 409 end_io_wq->work.flags = 0;
410 btrfs_queue_worker(&fs_info->endio_workers, &end_io_wq->work); 410 if (bio->bi_rw & (1 << BIO_RW))
411 btrfs_queue_worker(&fs_info->endio_write_workers,
412 &end_io_wq->work);
413 else
414 btrfs_queue_worker(&fs_info->endio_workers, &end_io_wq->work);
411 415
412#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) 416#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
413 return 0; 417 return 0;
@@ -1286,6 +1290,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1286 mutex_init(&fs_info->transaction_kthread_mutex); 1290 mutex_init(&fs_info->transaction_kthread_mutex);
1287 mutex_init(&fs_info->cleaner_mutex); 1291 mutex_init(&fs_info->cleaner_mutex);
1288 mutex_init(&fs_info->volume_mutex); 1292 mutex_init(&fs_info->volume_mutex);
1293 init_waitqueue_head(&fs_info->transaction_throttle);
1289 1294
1290#if 0 1295#if 0
1291 ret = add_hasher(fs_info, "crc32c"); 1296 ret = add_hasher(fs_info, "crc32c");
@@ -1325,9 +1330,13 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1325 btrfs_init_workers(&fs_info->workers, fs_info->thread_pool_size); 1330 btrfs_init_workers(&fs_info->workers, fs_info->thread_pool_size);
1326 btrfs_init_workers(&fs_info->submit_workers, fs_info->thread_pool_size); 1331 btrfs_init_workers(&fs_info->submit_workers, fs_info->thread_pool_size);
1327 btrfs_init_workers(&fs_info->endio_workers, fs_info->thread_pool_size); 1332 btrfs_init_workers(&fs_info->endio_workers, fs_info->thread_pool_size);
1333 btrfs_init_workers(&fs_info->endio_write_workers,
1334 fs_info->thread_pool_size);
1328 btrfs_start_workers(&fs_info->workers, 1); 1335 btrfs_start_workers(&fs_info->workers, 1);
1329 btrfs_start_workers(&fs_info->submit_workers, 1); 1336 btrfs_start_workers(&fs_info->submit_workers, 1);
1330 btrfs_start_workers(&fs_info->endio_workers, fs_info->thread_pool_size); 1337 btrfs_start_workers(&fs_info->endio_workers, fs_info->thread_pool_size);
1338 btrfs_start_workers(&fs_info->endio_write_workers,
1339 fs_info->thread_pool_size);
1331 1340
1332 err = -EINVAL; 1341 err = -EINVAL;
1333 if (btrfs_super_num_devices(disk_super) > fs_devices->open_devices) { 1342 if (btrfs_super_num_devices(disk_super) > fs_devices->open_devices) {
@@ -1447,6 +1456,7 @@ fail_sb_buffer:
1447 extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree); 1456 extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree);
1448 btrfs_stop_workers(&fs_info->workers); 1457 btrfs_stop_workers(&fs_info->workers);
1449 btrfs_stop_workers(&fs_info->endio_workers); 1458 btrfs_stop_workers(&fs_info->endio_workers);
1459 btrfs_stop_workers(&fs_info->endio_write_workers);
1450 btrfs_stop_workers(&fs_info->submit_workers); 1460 btrfs_stop_workers(&fs_info->submit_workers);
1451fail_iput: 1461fail_iput:
1452 iput(fs_info->btree_inode); 1462 iput(fs_info->btree_inode);
@@ -1702,6 +1712,7 @@ int close_ctree(struct btrfs_root *root)
1702 1712
1703 btrfs_stop_workers(&fs_info->workers); 1713 btrfs_stop_workers(&fs_info->workers);
1704 btrfs_stop_workers(&fs_info->endio_workers); 1714 btrfs_stop_workers(&fs_info->endio_workers);
1715 btrfs_stop_workers(&fs_info->endio_write_workers);
1705 btrfs_stop_workers(&fs_info->submit_workers); 1716 btrfs_stop_workers(&fs_info->submit_workers);
1706 1717
1707 iput(fs_info->btree_inode); 1718 iput(fs_info->btree_inode);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 8ebfa6be0790..343d1101c31c 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1895,36 +1895,17 @@ error:
1895 return ret; 1895 return ret;
1896} 1896}
1897 1897
1898/* 1898static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans,
1899 * finds a free extent and does all the dirty work required for allocation 1899 struct btrfs_root *root,
1900 * returns the key for the extent through ins, and a tree buffer for 1900 u64 num_bytes, u64 min_alloc_size,
1901 * the first block of the extent through buf. 1901 u64 empty_size, u64 hint_byte,
1902 * 1902 u64 search_end, struct btrfs_key *ins,
1903 * returns 0 if everything worked, non-zero otherwise. 1903 u64 data)
1904 */
1905int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
1906 struct btrfs_root *root,
1907 u64 num_bytes, u64 min_alloc_size,
1908 u64 root_objectid, u64 ref_generation,
1909 u64 owner, u64 owner_offset,
1910 u64 empty_size, u64 hint_byte,
1911 u64 search_end, struct btrfs_key *ins, u64 data)
1912{ 1904{
1913 int ret; 1905 int ret;
1914 int pending_ret;
1915 u64 super_used;
1916 u64 root_used;
1917 u64 search_start = 0; 1906 u64 search_start = 0;
1918 u64 alloc_profile; 1907 u64 alloc_profile;
1919 u32 sizes[2];
1920 struct btrfs_fs_info *info = root->fs_info; 1908 struct btrfs_fs_info *info = root->fs_info;
1921 struct btrfs_root *extent_root = info->extent_root;
1922 struct btrfs_extent_item *extent_item;
1923 struct btrfs_extent_ref *ref;
1924 struct btrfs_path *path;
1925 struct btrfs_key keys[2];
1926
1927 maybe_lock_mutex(root);
1928 1909
1929 if (data) { 1910 if (data) {
1930 alloc_profile = info->avail_data_alloc_bits & 1911 alloc_profile = info->avail_data_alloc_bits &
@@ -1974,11 +1955,48 @@ again:
1974 } 1955 }
1975 if (ret) { 1956 if (ret) {
1976 printk("allocation failed flags %Lu\n", data); 1957 printk("allocation failed flags %Lu\n", data);
1977 }
1978 if (ret) {
1979 BUG(); 1958 BUG();
1980 goto out;
1981 } 1959 }
1960 clear_extent_dirty(&root->fs_info->free_space_cache,
1961 ins->objectid, ins->objectid + ins->offset - 1,
1962 GFP_NOFS);
1963 return 0;
1964}
1965
1966int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
1967 struct btrfs_root *root,
1968 u64 num_bytes, u64 min_alloc_size,
1969 u64 empty_size, u64 hint_byte,
1970 u64 search_end, struct btrfs_key *ins,
1971 u64 data)
1972{
1973 int ret;
1974 maybe_lock_mutex(root);
1975 ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size,
1976 empty_size, hint_byte, search_end, ins,
1977 data);
1978 maybe_unlock_mutex(root);
1979 return ret;
1980}
1981
1982static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
1983 struct btrfs_root *root,
1984 u64 root_objectid, u64 ref_generation,
1985 u64 owner, u64 owner_offset,
1986 struct btrfs_key *ins)
1987{
1988 int ret;
1989 int pending_ret;
1990 u64 super_used;
1991 u64 root_used;
1992 u64 num_bytes = ins->offset;
1993 u32 sizes[2];
1994 struct btrfs_fs_info *info = root->fs_info;
1995 struct btrfs_root *extent_root = info->extent_root;
1996 struct btrfs_extent_item *extent_item;
1997 struct btrfs_extent_ref *ref;
1998 struct btrfs_path *path;
1999 struct btrfs_key keys[2];
1982 2000
1983 /* block accounting for super block */ 2001 /* block accounting for super block */
1984 spin_lock_irq(&info->delalloc_lock); 2002 spin_lock_irq(&info->delalloc_lock);
@@ -1990,10 +2008,6 @@ again:
1990 root_used = btrfs_root_used(&root->root_item); 2008 root_used = btrfs_root_used(&root->root_item);
1991 btrfs_set_root_used(&root->root_item, root_used + num_bytes); 2009 btrfs_set_root_used(&root->root_item, root_used + num_bytes);
1992 2010
1993 clear_extent_dirty(&root->fs_info->free_space_cache,
1994 ins->objectid, ins->objectid + ins->offset - 1,
1995 GFP_NOFS);
1996
1997 if (root == extent_root) { 2011 if (root == extent_root) {
1998 set_extent_bits(&root->fs_info->extent_ins, ins->objectid, 2012 set_extent_bits(&root->fs_info->extent_ins, ins->objectid,
1999 ins->objectid + ins->offset - 1, 2013 ins->objectid + ins->offset - 1,
@@ -2001,10 +2015,6 @@ again:
2001 goto update_block; 2015 goto update_block;
2002 } 2016 }
2003 2017
2004 WARN_ON(trans->alloc_exclude_nr);
2005 trans->alloc_exclude_start = ins->objectid;
2006 trans->alloc_exclude_nr = ins->offset;
2007
2008 memcpy(&keys[0], ins, sizeof(*ins)); 2018 memcpy(&keys[0], ins, sizeof(*ins));
2009 keys[1].offset = hash_extent_ref(root_objectid, ref_generation, 2019 keys[1].offset = hash_extent_ref(root_objectid, ref_generation,
2010 owner, owner_offset); 2020 owner, owner_offset);
@@ -2054,6 +2064,51 @@ update_block:
2054 BUG(); 2064 BUG();
2055 } 2065 }
2056out: 2066out:
2067 return ret;
2068}
2069
2070int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
2071 struct btrfs_root *root,
2072 u64 root_objectid, u64 ref_generation,
2073 u64 owner, u64 owner_offset,
2074 struct btrfs_key *ins)
2075{
2076 int ret;
2077 maybe_lock_mutex(root);
2078 ret = __btrfs_alloc_reserved_extent(trans, root, root_objectid,
2079 ref_generation, owner,
2080 owner_offset, ins);
2081 maybe_unlock_mutex(root);
2082 return ret;
2083}
2084/*
2085 * finds a free extent and does all the dirty work required for allocation
2086 * returns the key for the extent through ins, and a tree buffer for
2087 * the first block of the extent through buf.
2088 *
2089 * returns 0 if everything worked, non-zero otherwise.
2090 */
2091int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
2092 struct btrfs_root *root,
2093 u64 num_bytes, u64 min_alloc_size,
2094 u64 root_objectid, u64 ref_generation,
2095 u64 owner, u64 owner_offset,
2096 u64 empty_size, u64 hint_byte,
2097 u64 search_end, struct btrfs_key *ins, u64 data)
2098{
2099 int ret;
2100
2101 maybe_lock_mutex(root);
2102
2103 ret = __btrfs_reserve_extent(trans, root, num_bytes,
2104 min_alloc_size, empty_size, hint_byte,
2105 search_end, ins, data);
2106 BUG_ON(ret);
2107 ret = __btrfs_alloc_reserved_extent(trans, root, root_objectid,
2108 ref_generation, owner,
2109 owner_offset, ins);
2110 BUG_ON(ret);
2111
2057 maybe_unlock_mutex(root); 2112 maybe_unlock_mutex(root);
2058 return ret; 2113 return ret;
2059} 2114}
@@ -2288,8 +2343,8 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
2288 mutex_lock(&root->fs_info->alloc_mutex); 2343 mutex_lock(&root->fs_info->alloc_mutex);
2289 2344
2290 /* we've dropped the lock, double check */ 2345 /* we've dropped the lock, double check */
2291 ret = drop_snap_lookup_refcount(root, bytenr, 2346 ret = lookup_extent_ref(NULL, root, bytenr, blocksize,
2292 blocksize, &refs); 2347 &refs);
2293 BUG_ON(ret); 2348 BUG_ON(ret);
2294 if (refs != 1) { 2349 if (refs != 1) {
2295 parent = path->nodes[*level]; 2350 parent = path->nodes[*level];
@@ -2584,7 +2639,6 @@ out_unlock:
2584 kfree(ra); 2639 kfree(ra);
2585 trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1); 2640 trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1);
2586 if (trans) { 2641 if (trans) {
2587 btrfs_add_ordered_inode(inode);
2588 btrfs_end_transaction(trans, BTRFS_I(inode)->root); 2642 btrfs_end_transaction(trans, BTRFS_I(inode)->root);
2589 mark_inode_dirty(inode); 2643 mark_inode_dirty(inode);
2590 } 2644 }
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 40a5f53cb040..3f82a6e9ca4f 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -793,6 +793,13 @@ int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
793} 793}
794EXPORT_SYMBOL(set_extent_dirty); 794EXPORT_SYMBOL(set_extent_dirty);
795 795
796int set_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end,
797 gfp_t mask)
798{
799 return set_extent_bit(tree, start, end, EXTENT_ORDERED, 0, NULL, mask);
800}
801EXPORT_SYMBOL(set_extent_ordered);
802
796int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 803int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
797 int bits, gfp_t mask) 804 int bits, gfp_t mask)
798{ 805{
@@ -812,8 +819,8 @@ int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
812 gfp_t mask) 819 gfp_t mask)
813{ 820{
814 return set_extent_bit(tree, start, end, 821 return set_extent_bit(tree, start, end,
815 EXTENT_DELALLOC | EXTENT_DIRTY, 0, NULL, 822 EXTENT_DELALLOC | EXTENT_DIRTY,
816 mask); 823 0, NULL, mask);
817} 824}
818EXPORT_SYMBOL(set_extent_delalloc); 825EXPORT_SYMBOL(set_extent_delalloc);
819 826
@@ -825,6 +832,13 @@ int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
825} 832}
826EXPORT_SYMBOL(clear_extent_dirty); 833EXPORT_SYMBOL(clear_extent_dirty);
827 834
835int clear_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end,
836 gfp_t mask)
837{
838 return clear_extent_bit(tree, start, end, EXTENT_ORDERED, 1, 0, mask);
839}
840EXPORT_SYMBOL(clear_extent_ordered);
841
828int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, 842int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
829 gfp_t mask) 843 gfp_t mask)
830{ 844{
@@ -1395,10 +1409,9 @@ static int end_bio_extent_writepage(struct bio *bio,
1395 1409
1396 if (--bvec >= bio->bi_io_vec) 1410 if (--bvec >= bio->bi_io_vec)
1397 prefetchw(&bvec->bv_page->flags); 1411 prefetchw(&bvec->bv_page->flags);
1398
1399 if (tree->ops && tree->ops->writepage_end_io_hook) { 1412 if (tree->ops && tree->ops->writepage_end_io_hook) {
1400 ret = tree->ops->writepage_end_io_hook(page, start, 1413 ret = tree->ops->writepage_end_io_hook(page, start,
1401 end, state); 1414 end, state, uptodate);
1402 if (ret) 1415 if (ret)
1403 uptodate = 0; 1416 uptodate = 0;
1404 } 1417 }
@@ -1868,9 +1881,14 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
1868 unlock_extent(tree, cur, end, GFP_NOFS); 1881 unlock_extent(tree, cur, end, GFP_NOFS);
1869 break; 1882 break;
1870 } 1883 }
1871
1872 extent_offset = cur - em->start; 1884 extent_offset = cur - em->start;
1885 if (extent_map_end(em) <= cur) {
1886printk("bad mapping em [%Lu %Lu] cur %Lu\n", em->start, extent_map_end(em), cur);
1887 }
1873 BUG_ON(extent_map_end(em) <= cur); 1888 BUG_ON(extent_map_end(em) <= cur);
1889 if (end < cur) {
1890printk("2bad mapping end %Lu cur %Lu\n", end, cur);
1891 }
1874 BUG_ON(end < cur); 1892 BUG_ON(end < cur);
1875 1893
1876 iosize = min(extent_map_end(em) - cur, end - cur + 1); 1894 iosize = min(extent_map_end(em) - cur, end - cur + 1);
@@ -1976,6 +1994,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
1976 u64 last_byte = i_size_read(inode); 1994 u64 last_byte = i_size_read(inode);
1977 u64 block_start; 1995 u64 block_start;
1978 u64 iosize; 1996 u64 iosize;
1997 u64 unlock_start;
1979 sector_t sector; 1998 sector_t sector;
1980 struct extent_map *em; 1999 struct extent_map *em;
1981 struct block_device *bdev; 2000 struct block_device *bdev;
@@ -1988,7 +2007,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
1988 u64 nr_delalloc; 2007 u64 nr_delalloc;
1989 u64 delalloc_end; 2008 u64 delalloc_end;
1990 2009
1991
1992 WARN_ON(!PageLocked(page)); 2010 WARN_ON(!PageLocked(page));
1993 page_offset = i_size & (PAGE_CACHE_SIZE - 1); 2011 page_offset = i_size & (PAGE_CACHE_SIZE - 1);
1994 if (page->index > end_index || 2012 if (page->index > end_index ||
@@ -2030,6 +2048,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2030 delalloc_start = delalloc_end + 1; 2048 delalloc_start = delalloc_end + 1;
2031 } 2049 }
2032 lock_extent(tree, start, page_end, GFP_NOFS); 2050 lock_extent(tree, start, page_end, GFP_NOFS);
2051 unlock_start = start;
2033 2052
2034 end = page_end; 2053 end = page_end;
2035 if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) { 2054 if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) {
@@ -2038,6 +2057,11 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2038 2057
2039 if (last_byte <= start) { 2058 if (last_byte <= start) {
2040 clear_extent_dirty(tree, start, page_end, GFP_NOFS); 2059 clear_extent_dirty(tree, start, page_end, GFP_NOFS);
2060 unlock_extent(tree, start, page_end, GFP_NOFS);
2061 if (tree->ops && tree->ops->writepage_end_io_hook)
2062 tree->ops->writepage_end_io_hook(page, start,
2063 page_end, NULL, 1);
2064 unlock_start = page_end + 1;
2041 goto done; 2065 goto done;
2042 } 2066 }
2043 2067
@@ -2047,6 +2071,11 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2047 while (cur <= end) { 2071 while (cur <= end) {
2048 if (cur >= last_byte) { 2072 if (cur >= last_byte) {
2049 clear_extent_dirty(tree, cur, page_end, GFP_NOFS); 2073 clear_extent_dirty(tree, cur, page_end, GFP_NOFS);
2074 unlock_extent(tree, unlock_start, page_end, GFP_NOFS);
2075 if (tree->ops && tree->ops->writepage_end_io_hook)
2076 tree->ops->writepage_end_io_hook(page, cur,
2077 page_end, NULL, 1);
2078 unlock_start = page_end + 1;
2050 break; 2079 break;
2051 } 2080 }
2052 em = epd->get_extent(inode, page, page_offset, cur, 2081 em = epd->get_extent(inode, page, page_offset, cur,
@@ -2071,8 +2100,16 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2071 block_start == EXTENT_MAP_INLINE) { 2100 block_start == EXTENT_MAP_INLINE) {
2072 clear_extent_dirty(tree, cur, 2101 clear_extent_dirty(tree, cur,
2073 cur + iosize - 1, GFP_NOFS); 2102 cur + iosize - 1, GFP_NOFS);
2103
2104 unlock_extent(tree, unlock_start, cur + iosize -1,
2105 GFP_NOFS);
2106 if (tree->ops && tree->ops->writepage_end_io_hook)
2107 tree->ops->writepage_end_io_hook(page, cur,
2108 cur + iosize - 1,
2109 NULL, 1);
2074 cur = cur + iosize; 2110 cur = cur + iosize;
2075 page_offset += iosize; 2111 page_offset += iosize;
2112 unlock_start = cur;
2076 continue; 2113 continue;
2077 } 2114 }
2078 2115
@@ -2119,7 +2156,8 @@ done:
2119 set_page_writeback(page); 2156 set_page_writeback(page);
2120 end_page_writeback(page); 2157 end_page_writeback(page);
2121 } 2158 }
2122 unlock_extent(tree, start, page_end, GFP_NOFS); 2159 if (unlock_start <= page_end)
2160 unlock_extent(tree, unlock_start, page_end, GFP_NOFS);
2123 unlock_page(page); 2161 unlock_page(page);
2124 return 0; 2162 return 0;
2125} 2163}
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index f1960dafaa19..2268a7995896 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -13,6 +13,8 @@
13#define EXTENT_DEFRAG (1 << 6) 13#define EXTENT_DEFRAG (1 << 6)
14#define EXTENT_DEFRAG_DONE (1 << 7) 14#define EXTENT_DEFRAG_DONE (1 << 7)
15#define EXTENT_BUFFER_FILLED (1 << 8) 15#define EXTENT_BUFFER_FILLED (1 << 8)
16#define EXTENT_ORDERED (1 << 9)
17#define EXTENT_ORDERED_METADATA (1 << 10)
16#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) 18#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
17 19
18/* 20/*
@@ -42,7 +44,7 @@ struct extent_io_ops {
42 int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end, 44 int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end,
43 struct extent_state *state); 45 struct extent_state *state);
44 int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end, 46 int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end,
45 struct extent_state *state); 47 struct extent_state *state, int uptodate);
46 int (*set_bit_hook)(struct inode *inode, u64 start, u64 end, 48 int (*set_bit_hook)(struct inode *inode, u64 start, u64 end,
47 unsigned long old, unsigned long bits); 49 unsigned long old, unsigned long bits);
48 int (*clear_bit_hook)(struct inode *inode, u64 start, u64 end, 50 int (*clear_bit_hook)(struct inode *inode, u64 start, u64 end,
@@ -131,6 +133,8 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
131 int bits, int filled); 133 int bits, int filled);
132int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 134int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
133 int bits, gfp_t mask); 135 int bits, gfp_t mask);
136int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
137 int bits, int wake, int delete, gfp_t mask);
134int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 138int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
135 int bits, gfp_t mask); 139 int bits, gfp_t mask);
136int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, 140int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
@@ -141,8 +145,14 @@ int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
141 gfp_t mask); 145 gfp_t mask);
142int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, 146int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
143 gfp_t mask); 147 gfp_t mask);
148int clear_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end,
149 gfp_t mask);
150int clear_extent_ordered_metadata(struct extent_io_tree *tree, u64 start,
151 u64 end, gfp_t mask);
144int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, 152int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
145 gfp_t mask); 153 gfp_t mask);
154int set_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end,
155 gfp_t mask);
146int find_first_extent_bit(struct extent_io_tree *tree, u64 start, 156int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
147 u64 *start_ret, u64 *end_ret, int bits); 157 u64 *start_ret, u64 *end_ret, int bits);
148struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree, 158struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree,
@@ -209,6 +219,8 @@ void memset_extent_buffer(struct extent_buffer *eb, char c,
209 unsigned long start, unsigned long len); 219 unsigned long start, unsigned long len);
210int wait_on_extent_buffer_writeback(struct extent_io_tree *tree, 220int wait_on_extent_buffer_writeback(struct extent_io_tree *tree,
211 struct extent_buffer *eb); 221 struct extent_buffer *eb);
222int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end);
223int wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits);
212int clear_extent_buffer_dirty(struct extent_io_tree *tree, 224int clear_extent_buffer_dirty(struct extent_io_tree *tree,
213 struct extent_buffer *eb); 225 struct extent_buffer *eb);
214int set_extent_buffer_dirty(struct extent_io_tree *tree, 226int set_extent_buffer_dirty(struct extent_io_tree *tree,
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index f5a04eb9a2ac..81123277c2b8 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -206,10 +206,11 @@ int add_extent_mapping(struct extent_map_tree *tree,
206 struct extent_map *merge = NULL; 206 struct extent_map *merge = NULL;
207 struct rb_node *rb; 207 struct rb_node *rb;
208 208
209 BUG_ON(spin_trylock(&tree->lock));
209 rb = tree_insert(&tree->map, em->start, &em->rb_node); 210 rb = tree_insert(&tree->map, em->start, &em->rb_node);
210 if (rb) { 211 if (rb) {
211 merge = rb_entry(rb, struct extent_map, rb_node);
212 ret = -EEXIST; 212 ret = -EEXIST;
213 free_extent_map(merge);
213 goto out; 214 goto out;
214 } 215 }
215 atomic_inc(&em->refs); 216 atomic_inc(&em->refs);
@@ -268,6 +269,7 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
268 struct rb_node *next = NULL; 269 struct rb_node *next = NULL;
269 u64 end = range_end(start, len); 270 u64 end = range_end(start, len);
270 271
272 BUG_ON(spin_trylock(&tree->lock));
271 em = tree->last; 273 em = tree->last;
272 if (em && end > em->start && start < extent_map_end(em)) 274 if (em && end > em->start && start < extent_map_end(em))
273 goto found; 275 goto found;
@@ -318,6 +320,7 @@ int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em)
318{ 320{
319 int ret = 0; 321 int ret = 0;
320 322
323 BUG_ON(spin_trylock(&tree->lock));
321 rb_erase(&em->rb_node, &tree->map); 324 rb_erase(&em->rb_node, &tree->map);
322 em->in_tree = 0; 325 em->in_tree = 0;
323 if (tree->last == em) 326 if (tree->last == em)
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index f537eb43c2c6..345caf8ff516 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -135,26 +135,37 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
135} 135}
136 136
137int btrfs_csum_one_bio(struct btrfs_root *root, 137int btrfs_csum_one_bio(struct btrfs_root *root,
138 struct bio *bio, char **sums_ret) 138 struct bio *bio, struct btrfs_ordered_sum **sums_ret)
139{ 139{
140 u32 *sums; 140 struct btrfs_ordered_sum *sums;
141 struct btrfs_sector_sum *sector_sum;
141 char *data; 142 char *data;
142 struct bio_vec *bvec = bio->bi_io_vec; 143 struct bio_vec *bvec = bio->bi_io_vec;
143 int bio_index = 0; 144 int bio_index = 0;
144 145
145 sums = kmalloc(bio->bi_vcnt * BTRFS_CRC32_SIZE, GFP_NOFS); 146 WARN_ON(bio->bi_vcnt <= 0);
147 sums = kzalloc(btrfs_ordered_sum_size(root, bio->bi_size), GFP_NOFS);
146 if (!sums) 148 if (!sums)
147 return -ENOMEM; 149 return -ENOMEM;
148 *sums_ret = (char *)sums; 150 *sums_ret = sums;
151 sector_sum = &sums->sums;
152 sums->file_offset = page_offset(bvec->bv_page);
153 sums->len = bio->bi_size;
154 INIT_LIST_HEAD(&sums->list);
149 155
150 while(bio_index < bio->bi_vcnt) { 156 while(bio_index < bio->bi_vcnt) {
151 data = kmap_atomic(bvec->bv_page, KM_USER0); 157 data = kmap_atomic(bvec->bv_page, KM_USER0);
152 *sums = ~(u32)0; 158 sector_sum->sum = ~(u32)0;
153 *sums = btrfs_csum_data(root, data + bvec->bv_offset, 159 sector_sum->sum = btrfs_csum_data(root,
154 *sums, bvec->bv_len); 160 data + bvec->bv_offset,
161 sector_sum->sum,
162 bvec->bv_len);
155 kunmap_atomic(data, KM_USER0); 163 kunmap_atomic(data, KM_USER0);
156 btrfs_csum_final(*sums, (char *)sums); 164 btrfs_csum_final(sector_sum->sum,
157 sums++; 165 (char *)&sector_sum->sum);
166 sector_sum->offset = page_offset(bvec->bv_page) +
167 bvec->bv_offset;
168 sector_sum++;
158 bio_index++; 169 bio_index++;
159 bvec++; 170 bvec++;
160 } 171 }
@@ -163,7 +174,7 @@ int btrfs_csum_one_bio(struct btrfs_root *root,
163 174
164int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, 175int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
165 struct btrfs_root *root, struct inode *inode, 176 struct btrfs_root *root, struct inode *inode,
166 struct bio *bio, char *sums) 177 struct btrfs_ordered_sum *sums)
167{ 178{
168 u64 objectid = inode->i_ino; 179 u64 objectid = inode->i_ino;
169 u64 offset; 180 u64 offset;
@@ -171,17 +182,16 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
171 struct btrfs_key file_key; 182 struct btrfs_key file_key;
172 struct btrfs_key found_key; 183 struct btrfs_key found_key;
173 u64 next_offset; 184 u64 next_offset;
185 u64 total_bytes = 0;
174 int found_next; 186 int found_next;
175 struct btrfs_path *path; 187 struct btrfs_path *path;
176 struct btrfs_csum_item *item; 188 struct btrfs_csum_item *item;
177 struct btrfs_csum_item *item_end; 189 struct btrfs_csum_item *item_end;
178 struct extent_buffer *leaf = NULL; 190 struct extent_buffer *leaf = NULL;
179 u64 csum_offset; 191 u64 csum_offset;
180 u32 *sums32 = (u32 *)sums; 192 struct btrfs_sector_sum *sector_sum;
181 u32 nritems; 193 u32 nritems;
182 u32 ins_size; 194 u32 ins_size;
183 int bio_index = 0;
184 struct bio_vec *bvec = bio->bi_io_vec;
185 char *eb_map; 195 char *eb_map;
186 char *eb_token; 196 char *eb_token;
187 unsigned long map_len; 197 unsigned long map_len;
@@ -189,10 +199,11 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
189 199
190 path = btrfs_alloc_path(); 200 path = btrfs_alloc_path();
191 BUG_ON(!path); 201 BUG_ON(!path);
202 sector_sum = &sums->sums;
192again: 203again:
193 next_offset = (u64)-1; 204 next_offset = (u64)-1;
194 found_next = 0; 205 found_next = 0;
195 offset = page_offset(bvec->bv_page) + bvec->bv_offset; 206 offset = sector_sum->offset;
196 file_key.objectid = objectid; 207 file_key.objectid = objectid;
197 file_key.offset = offset; 208 file_key.offset = offset;
198 btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY); 209 btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY);
@@ -303,7 +314,7 @@ found:
303 item_end = (struct btrfs_csum_item *)((unsigned char *)item_end + 314 item_end = (struct btrfs_csum_item *)((unsigned char *)item_end +
304 btrfs_item_size_nr(leaf, path->slots[0])); 315 btrfs_item_size_nr(leaf, path->slots[0]));
305 eb_token = NULL; 316 eb_token = NULL;
306next_bvec: 317next_sector:
307 318
308 if (!eb_token || 319 if (!eb_token ||
309 (unsigned long)item + BTRFS_CRC32_SIZE >= map_start + map_len) { 320 (unsigned long)item + BTRFS_CRC32_SIZE >= map_start + map_len) {
@@ -321,21 +332,20 @@ next_bvec:
321 } 332 }
322 if (eb_token) { 333 if (eb_token) {
323 memcpy(eb_token + ((unsigned long)item & (PAGE_CACHE_SIZE - 1)), 334 memcpy(eb_token + ((unsigned long)item & (PAGE_CACHE_SIZE - 1)),
324 sums32, BTRFS_CRC32_SIZE); 335 &sector_sum->sum, BTRFS_CRC32_SIZE);
325 } else { 336 } else {
326 write_extent_buffer(leaf, sums32, (unsigned long)item, 337 write_extent_buffer(leaf, &sector_sum->sum,
327 BTRFS_CRC32_SIZE); 338 (unsigned long)item, BTRFS_CRC32_SIZE);
328 } 339 }
329 bio_index++; 340 total_bytes += root->sectorsize;
330 bvec++; 341 sector_sum++;
331 sums32++; 342 if (total_bytes < sums->len) {
332 if (bio_index < bio->bi_vcnt) {
333 item = (struct btrfs_csum_item *)((char *)item + 343 item = (struct btrfs_csum_item *)((char *)item +
334 BTRFS_CRC32_SIZE); 344 BTRFS_CRC32_SIZE);
335 if (item < item_end && offset + PAGE_CACHE_SIZE == 345 if (item < item_end && offset + PAGE_CACHE_SIZE ==
336 page_offset(bvec->bv_page)) { 346 sector_sum->offset) {
337 offset = page_offset(bvec->bv_page); 347 offset = sector_sum->offset;
338 goto next_bvec; 348 goto next_sector;
339 } 349 }
340 } 350 }
341 if (eb_token) { 351 if (eb_token) {
@@ -343,7 +353,7 @@ next_bvec:
343 eb_token = NULL; 353 eb_token = NULL;
344 } 354 }
345 btrfs_mark_buffer_dirty(path->nodes[0]); 355 btrfs_mark_buffer_dirty(path->nodes[0]);
346 if (bio_index < bio->bi_vcnt) { 356 if (total_bytes < sums->len) {
347 btrfs_release_path(root, path); 357 btrfs_release_path(root, path);
348 goto again; 358 goto again;
349 } 359 }
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 8037792f8789..12e765f7e0d4 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -34,7 +34,6 @@
34#include "disk-io.h" 34#include "disk-io.h"
35#include "transaction.h" 35#include "transaction.h"
36#include "btrfs_inode.h" 36#include "btrfs_inode.h"
37#include "ordered-data.h"
38#include "ioctl.h" 37#include "ioctl.h"
39#include "print-tree.h" 38#include "print-tree.h"
40#include "compat.h" 39#include "compat.h"
@@ -273,7 +272,9 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
273 u64 mask = root->sectorsize - 1; 272 u64 mask = root->sectorsize - 1;
274 last_pos_in_file = (isize + mask) & ~mask; 273 last_pos_in_file = (isize + mask) & ~mask;
275 hole_size = (start_pos - last_pos_in_file + mask) & ~mask; 274 hole_size = (start_pos - last_pos_in_file + mask) & ~mask;
276 if (last_pos_in_file < start_pos) { 275 if (hole_size > 0) {
276 btrfs_wait_ordered_range(inode, last_pos_in_file,
277 last_pos_in_file + hole_size);
277 err = btrfs_drop_extents(trans, root, inode, 278 err = btrfs_drop_extents(trans, root, inode,
278 last_pos_in_file, 279 last_pos_in_file,
279 last_pos_in_file + hole_size, 280 last_pos_in_file + hole_size,
@@ -303,19 +304,17 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
303 inline_size > root->fs_info->max_inline || 304 inline_size > root->fs_info->max_inline ||
304 (inline_size & (root->sectorsize -1)) == 0 || 305 (inline_size & (root->sectorsize -1)) == 0 ||
305 inline_size >= BTRFS_MAX_INLINE_DATA_SIZE(root)) { 306 inline_size >= BTRFS_MAX_INLINE_DATA_SIZE(root)) {
306 u64 last_end; 307 /* check for reserved extents on each page, we don't want
307 308 * to reset the delalloc bit on things that already have
309 * extents reserved.
310 */
311 set_extent_delalloc(io_tree, start_pos,
312 end_of_last_block, GFP_NOFS);
308 for (i = 0; i < num_pages; i++) { 313 for (i = 0; i < num_pages; i++) {
309 struct page *p = pages[i]; 314 struct page *p = pages[i];
310 SetPageUptodate(p); 315 SetPageUptodate(p);
311 set_page_dirty(p); 316 set_page_dirty(p);
312 } 317 }
313 last_end = (u64)(pages[num_pages -1]->index) <<
314 PAGE_CACHE_SHIFT;
315 last_end += PAGE_CACHE_SIZE - 1;
316 set_extent_delalloc(io_tree, start_pos, end_of_last_block,
317 GFP_NOFS);
318 btrfs_add_ordered_inode(inode);
319 } else { 318 } else {
320 u64 aligned_end; 319 u64 aligned_end;
321 /* step one, delete the existing extents in this range */ 320 /* step one, delete the existing extents in this range */
@@ -350,10 +349,13 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end)
350 struct extent_map *split = NULL; 349 struct extent_map *split = NULL;
351 struct extent_map *split2 = NULL; 350 struct extent_map *split2 = NULL;
352 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 351 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
352 struct extent_map *tmp;
353 u64 len = end - start + 1; 353 u64 len = end - start + 1;
354 u64 next_start;
354 int ret; 355 int ret;
355 int testend = 1; 356 int testend = 1;
356 357
358 WARN_ON(end < start);
357 if (end == (u64)-1) { 359 if (end == (u64)-1) {
358 len = (u64)-1; 360 len = (u64)-1;
359 testend = 0; 361 testend = 0;
@@ -370,6 +372,8 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end)
370 spin_unlock(&em_tree->lock); 372 spin_unlock(&em_tree->lock);
371 break; 373 break;
372 } 374 }
375 tmp = rb_entry(&em->rb_node, struct extent_map, rb_node);
376 next_start = tmp->start;
373 remove_extent_mapping(em_tree, em); 377 remove_extent_mapping(em_tree, em);
374 378
375 if (em->block_start < EXTENT_MAP_LAST_BYTE && 379 if (em->block_start < EXTENT_MAP_LAST_BYTE &&
@@ -778,37 +782,58 @@ static int prepare_pages(struct btrfs_root *root, struct file *file,
778 struct inode *inode = fdentry(file)->d_inode; 782 struct inode *inode = fdentry(file)->d_inode;
779 int err = 0; 783 int err = 0;
780 u64 start_pos; 784 u64 start_pos;
785 u64 last_pos;
781 786
782 start_pos = pos & ~((u64)root->sectorsize - 1); 787 start_pos = pos & ~((u64)root->sectorsize - 1);
788 last_pos = ((u64)index + num_pages) << PAGE_CACHE_SHIFT;
783 789
784 memset(pages, 0, num_pages * sizeof(struct page *)); 790 memset(pages, 0, num_pages * sizeof(struct page *));
785 791again:
786 for (i = 0; i < num_pages; i++) { 792 for (i = 0; i < num_pages; i++) {
787 pages[i] = grab_cache_page(inode->i_mapping, index + i); 793 pages[i] = grab_cache_page(inode->i_mapping, index + i);
788 if (!pages[i]) { 794 if (!pages[i]) {
789 err = -ENOMEM; 795 err = -ENOMEM;
790 BUG_ON(1); 796 BUG_ON(1);
791 } 797 }
792#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
793 ClearPageDirty(pages[i]);
794#else
795 cancel_dirty_page(pages[i], PAGE_CACHE_SIZE);
796#endif
797 wait_on_page_writeback(pages[i]); 798 wait_on_page_writeback(pages[i]);
798 set_page_extent_mapped(pages[i]);
799 WARN_ON(!PageLocked(pages[i]));
800 } 799 }
801 if (start_pos < inode->i_size) { 800 if (start_pos < inode->i_size) {
802 u64 last_pos; 801 struct btrfs_ordered_extent *ordered;
803 last_pos = ((u64)index + num_pages) << PAGE_CACHE_SHIFT;
804 lock_extent(&BTRFS_I(inode)->io_tree, 802 lock_extent(&BTRFS_I(inode)->io_tree,
805 start_pos, last_pos - 1, GFP_NOFS); 803 start_pos, last_pos - 1, GFP_NOFS);
804 ordered = btrfs_lookup_first_ordered_extent(inode, last_pos -1);
805 if (ordered &&
806 ordered->file_offset + ordered->len > start_pos &&
807 ordered->file_offset < last_pos) {
808 btrfs_put_ordered_extent(ordered);
809 unlock_extent(&BTRFS_I(inode)->io_tree,
810 start_pos, last_pos - 1, GFP_NOFS);
811 for (i = 0; i < num_pages; i++) {
812 unlock_page(pages[i]);
813 page_cache_release(pages[i]);
814 }
815 btrfs_wait_ordered_range(inode, start_pos,
816 last_pos - start_pos);
817 goto again;
818 }
819 if (ordered)
820 btrfs_put_ordered_extent(ordered);
821
806 clear_extent_bits(&BTRFS_I(inode)->io_tree, start_pos, 822 clear_extent_bits(&BTRFS_I(inode)->io_tree, start_pos,
807 last_pos - 1, EXTENT_DIRTY | EXTENT_DELALLOC, 823 last_pos - 1, EXTENT_DIRTY | EXTENT_DELALLOC,
808 GFP_NOFS); 824 GFP_NOFS);
809 unlock_extent(&BTRFS_I(inode)->io_tree, 825 unlock_extent(&BTRFS_I(inode)->io_tree,
810 start_pos, last_pos - 1, GFP_NOFS); 826 start_pos, last_pos - 1, GFP_NOFS);
811 } 827 }
828 for (i = 0; i < num_pages; i++) {
829#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
830 ClearPageDirty(pages[i]);
831#else
832 cancel_dirty_page(pages[i], PAGE_CACHE_SIZE);
833#endif
834 set_page_extent_mapped(pages[i]);
835 WARN_ON(!PageLocked(pages[i]));
836 }
812 return 0; 837 return 0;
813} 838}
814 839
@@ -969,13 +994,11 @@ out_nolock:
969 (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT); 994 (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT);
970 } 995 }
971 current->backing_dev_info = NULL; 996 current->backing_dev_info = NULL;
972 btrfs_ordered_throttle(root, inode);
973 return num_written ? num_written : err; 997 return num_written ? num_written : err;
974} 998}
975 999
976int btrfs_release_file(struct inode * inode, struct file * filp) 1000int btrfs_release_file(struct inode * inode, struct file * filp)
977{ 1001{
978 btrfs_del_ordered_inode(inode, 0);
979 if (filp->private_data) 1002 if (filp->private_data)
980 btrfs_ioctl_trans_end(filp); 1003 btrfs_ioctl_trans_end(filp);
981 return 0; 1004 return 0;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index d39433dfb2c7..c5a62f0b9595 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -43,6 +43,7 @@
43#include "ioctl.h" 43#include "ioctl.h"
44#include "print-tree.h" 44#include "print-tree.h"
45#include "volumes.h" 45#include "volumes.h"
46#include "ordered-data.h"
46 47
47struct btrfs_iget_args { 48struct btrfs_iget_args {
48 u64 ino; 49 u64 ino;
@@ -109,10 +110,11 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end)
109 u64 num_bytes; 110 u64 num_bytes;
110 u64 cur_alloc_size; 111 u64 cur_alloc_size;
111 u64 blocksize = root->sectorsize; 112 u64 blocksize = root->sectorsize;
112 u64 orig_start = start;
113 u64 orig_num_bytes; 113 u64 orig_num_bytes;
114 struct btrfs_key ins; 114 struct btrfs_key ins;
115 int ret; 115 struct extent_map *em;
116 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
117 int ret = 0;
116 118
117 trans = btrfs_start_transaction(root, 1); 119 trans = btrfs_start_transaction(root, 1);
118 BUG_ON(!trans); 120 BUG_ON(!trans);
@@ -120,33 +122,44 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end)
120 122
121 num_bytes = (end - start + blocksize) & ~(blocksize - 1); 123 num_bytes = (end - start + blocksize) & ~(blocksize - 1);
122 num_bytes = max(blocksize, num_bytes); 124 num_bytes = max(blocksize, num_bytes);
123 ret = btrfs_drop_extents(trans, root, inode,
124 start, start + num_bytes, start, &alloc_hint);
125 orig_num_bytes = num_bytes; 125 orig_num_bytes = num_bytes;
126 126
127 if (alloc_hint == EXTENT_MAP_INLINE) 127 if (alloc_hint == EXTENT_MAP_INLINE)
128 goto out; 128 goto out;
129 129
130 BUG_ON(num_bytes > btrfs_super_total_bytes(&root->fs_info->super_copy)); 130 BUG_ON(num_bytes > btrfs_super_total_bytes(&root->fs_info->super_copy));
131 btrfs_drop_extent_cache(inode, start, start + num_bytes - 1);
131 132
132 while(num_bytes > 0) { 133 while(num_bytes > 0) {
133 cur_alloc_size = min(num_bytes, root->fs_info->max_extent); 134 cur_alloc_size = min(num_bytes, root->fs_info->max_extent);
134 ret = btrfs_alloc_extent(trans, root, cur_alloc_size, 135 ret = btrfs_reserve_extent(trans, root, cur_alloc_size,
135 root->sectorsize, 136 root->sectorsize, 0, 0,
136 root->root_key.objectid, 137 (u64)-1, &ins, 1);
137 trans->transid,
138 inode->i_ino, start, 0,
139 alloc_hint, (u64)-1, &ins, 1);
140 if (ret) { 138 if (ret) {
141 WARN_ON(1); 139 WARN_ON(1);
142 goto out; 140 goto out;
143 } 141 }
142 em = alloc_extent_map(GFP_NOFS);
143 em->start = start;
144 em->len = ins.offset;
145 em->block_start = ins.objectid;
146 em->bdev = root->fs_info->fs_devices->latest_bdev;
147 while(1) {
148 spin_lock(&em_tree->lock);
149 ret = add_extent_mapping(em_tree, em);
150 spin_unlock(&em_tree->lock);
151 if (ret != -EEXIST) {
152 free_extent_map(em);
153 break;
154 }
155 btrfs_drop_extent_cache(inode, start,
156 start + ins.offset - 1);
157 }
158
144 cur_alloc_size = ins.offset; 159 cur_alloc_size = ins.offset;
145 ret = btrfs_insert_file_extent(trans, root, inode->i_ino, 160 ret = btrfs_add_ordered_extent(inode, start, ins.objectid,
146 start, ins.objectid, ins.offset, 161 ins.offset);
147 ins.offset, 0); 162 BUG_ON(ret);
148 inode->i_blocks += ins.offset >> 9;
149 btrfs_check_file(root, inode);
150 if (num_bytes < cur_alloc_size) { 163 if (num_bytes < cur_alloc_size) {
151 printk("num_bytes %Lu cur_alloc %Lu\n", num_bytes, 164 printk("num_bytes %Lu cur_alloc %Lu\n", num_bytes,
152 cur_alloc_size); 165 cur_alloc_size);
@@ -156,10 +169,6 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end)
156 alloc_hint = ins.objectid + ins.offset; 169 alloc_hint = ins.objectid + ins.offset;
157 start += cur_alloc_size; 170 start += cur_alloc_size;
158 } 171 }
159 btrfs_drop_extent_cache(inode, orig_start,
160 orig_start + orig_num_bytes - 1);
161 btrfs_add_ordered_inode(inode);
162 btrfs_update_inode(trans, root, inode);
163out: 172out:
164 btrfs_end_transaction(trans, root); 173 btrfs_end_transaction(trans, root);
165 return ret; 174 return ret;
@@ -341,25 +350,15 @@ int __btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
341 int mirror_num) 350 int mirror_num)
342{ 351{
343 struct btrfs_root *root = BTRFS_I(inode)->root; 352 struct btrfs_root *root = BTRFS_I(inode)->root;
344 struct btrfs_trans_handle *trans;
345 int ret = 0; 353 int ret = 0;
346 char *sums = NULL; 354 struct btrfs_ordered_sum *sums;
347 355
348 ret = btrfs_csum_one_bio(root, bio, &sums); 356 ret = btrfs_csum_one_bio(root, bio, &sums);
349 BUG_ON(ret); 357 BUG_ON(ret);
350 358
351 trans = btrfs_start_transaction(root, 1); 359 ret = btrfs_add_ordered_sum(inode, sums);
352
353 btrfs_set_trans_block_group(trans, inode);
354 mutex_lock(&BTRFS_I(inode)->csum_mutex);
355 btrfs_csum_file_blocks(trans, root, inode, bio, sums);
356 mutex_unlock(&BTRFS_I(inode)->csum_mutex);
357
358 ret = btrfs_end_transaction(trans, root);
359 BUG_ON(ret); 360 BUG_ON(ret);
360 361
361 kfree(sums);
362
363 return btrfs_map_bio(root, rw, bio, mirror_num, 1); 362 return btrfs_map_bio(root, rw, bio, mirror_num, 1);
364} 363}
365 364
@@ -369,14 +368,10 @@ int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
369 struct btrfs_root *root = BTRFS_I(inode)->root; 368 struct btrfs_root *root = BTRFS_I(inode)->root;
370 int ret = 0; 369 int ret = 0;
371 370
372 if (!(rw & (1 << BIO_RW))) { 371 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
373 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); 372 BUG_ON(ret);
374 BUG_ON(ret);
375 goto mapit;
376 }
377 373
378 if (btrfs_test_opt(root, NODATASUM) || 374 if (!(rw & (1 << BIO_RW))) {
379 btrfs_test_flag(inode, NODATASUM)) {
380 goto mapit; 375 goto mapit;
381 } 376 }
382 377
@@ -387,6 +382,96 @@ mapit:
387 return btrfs_map_bio(root, rw, bio, mirror_num, 0); 382 return btrfs_map_bio(root, rw, bio, mirror_num, 0);
388} 383}
389 384
385static int add_pending_csums(struct btrfs_trans_handle *trans,
386 struct inode *inode, u64 file_offset,
387 struct list_head *list)
388{
389 struct list_head *cur;
390 struct btrfs_ordered_sum *sum;
391
392 btrfs_set_trans_block_group(trans, inode);
393 while(!list_empty(list)) {
394 cur = list->next;
395 sum = list_entry(cur, struct btrfs_ordered_sum, list);
396 mutex_lock(&BTRFS_I(inode)->csum_mutex);
397 btrfs_csum_file_blocks(trans, BTRFS_I(inode)->root,
398 inode, sum);
399 mutex_unlock(&BTRFS_I(inode)->csum_mutex);
400 list_del(&sum->list);
401 kfree(sum);
402 }
403 return 0;
404}
405
406int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
407 struct extent_state *state, int uptodate)
408{
409 struct inode *inode = page->mapping->host;
410 struct btrfs_root *root = BTRFS_I(inode)->root;
411 struct btrfs_trans_handle *trans;
412 struct btrfs_ordered_extent *ordered_extent;
413 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
414 u64 alloc_hint = 0;
415 struct list_head list;
416 struct btrfs_key ins;
417 int ret;
418
419 ret = btrfs_dec_test_ordered_pending(inode, start, end - start + 1);
420 if (!ret) {
421 return 0;
422 }
423
424 trans = btrfs_start_transaction(root, 1);
425
426 ordered_extent = btrfs_lookup_ordered_extent(inode, start);
427 BUG_ON(!ordered_extent);
428
429 lock_extent(io_tree, ordered_extent->file_offset,
430 ordered_extent->file_offset + ordered_extent->len - 1,
431 GFP_NOFS);
432
433 INIT_LIST_HEAD(&list);
434
435 ins.objectid = ordered_extent->start;
436 ins.offset = ordered_extent->len;
437 ins.type = BTRFS_EXTENT_ITEM_KEY;
438 ret = btrfs_alloc_reserved_extent(trans, root, root->root_key.objectid,
439 trans->transid, inode->i_ino,
440 ordered_extent->file_offset, &ins);
441 BUG_ON(ret);
442 ret = btrfs_drop_extents(trans, root, inode,
443 ordered_extent->file_offset,
444 ordered_extent->file_offset +
445 ordered_extent->len,
446 ordered_extent->file_offset, &alloc_hint);
447 BUG_ON(ret);
448 ret = btrfs_insert_file_extent(trans, root, inode->i_ino,
449 ordered_extent->file_offset,
450 ordered_extent->start,
451 ordered_extent->len,
452 ordered_extent->len, 0);
453 BUG_ON(ret);
454 btrfs_drop_extent_cache(inode, ordered_extent->file_offset,
455 ordered_extent->file_offset +
456 ordered_extent->len - 1);
457 inode->i_blocks += ordered_extent->len >> 9;
458 unlock_extent(io_tree, ordered_extent->file_offset,
459 ordered_extent->file_offset + ordered_extent->len - 1,
460 GFP_NOFS);
461 add_pending_csums(trans, inode, ordered_extent->file_offset,
462 &ordered_extent->list);
463
464 btrfs_remove_ordered_extent(inode, ordered_extent);
465 /* once for us */
466 btrfs_put_ordered_extent(ordered_extent);
467 /* once for the tree */
468 btrfs_put_ordered_extent(ordered_extent);
469
470 btrfs_update_inode(trans, root, inode);
471 btrfs_end_transaction(trans, root);
472 return 0;
473}
474
390int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end) 475int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end)
391{ 476{
392 int ret = 0; 477 int ret = 0;
@@ -409,7 +494,8 @@ int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end)
409 if (ret == -ENOENT || ret == -EFBIG) 494 if (ret == -ENOENT || ret == -EFBIG)
410 ret = 0; 495 ret = 0;
411 csum = 0; 496 csum = 0;
412 printk("no csum found for inode %lu start %Lu\n", inode->i_ino, start); 497 printk("no csum found for inode %lu start %Lu\n", inode->i_ino,
498 start);
413 goto out; 499 goto out;
414 } 500 }
415 read_extent_buffer(path->nodes[0], &csum, (unsigned long)item, 501 read_extent_buffer(path->nodes[0], &csum, (unsigned long)item,
@@ -833,7 +919,6 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
833{ 919{
834 struct btrfs_root *root; 920 struct btrfs_root *root;
835 struct btrfs_trans_handle *trans; 921 struct btrfs_trans_handle *trans;
836 struct inode *inode = dentry->d_inode;
837 int ret; 922 int ret;
838 unsigned long nr = 0; 923 unsigned long nr = 0;
839 924
@@ -849,14 +934,6 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
849 ret = btrfs_unlink_trans(trans, root, dir, dentry); 934 ret = btrfs_unlink_trans(trans, root, dir, dentry);
850 nr = trans->blocks_used; 935 nr = trans->blocks_used;
851 936
852 if (inode->i_nlink == 0) {
853 /* if the inode isn't linked anywhere,
854 * we don't need to worry about
855 * data=ordered
856 */
857 btrfs_del_ordered_inode(inode, 1);
858 }
859
860 btrfs_end_transaction_throttle(trans, root); 937 btrfs_end_transaction_throttle(trans, root);
861fail: 938fail:
862 btrfs_btree_balance_dirty(root, nr); 939 btrfs_btree_balance_dirty(root, nr);
@@ -931,6 +1008,7 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans,
931 int extent_type = -1; 1008 int extent_type = -1;
932 u64 mask = root->sectorsize - 1; 1009 u64 mask = root->sectorsize - 1;
933 1010
1011 btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1);
934 btrfs_drop_extent_cache(inode, inode->i_size & (~mask), (u64)-1); 1012 btrfs_drop_extent_cache(inode, inode->i_size & (~mask), (u64)-1);
935 path = btrfs_alloc_path(); 1013 path = btrfs_alloc_path();
936 path->reada = -1; 1014 path->reada = -1;
@@ -1117,34 +1195,6 @@ error:
1117 return ret; 1195 return ret;
1118} 1196}
1119 1197
1120static int btrfs_cow_one_page(struct inode *inode, struct page *page,
1121 size_t zero_start)
1122{
1123 char *kaddr;
1124 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
1125 u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
1126 u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
1127 int ret = 0;
1128
1129 WARN_ON(!PageLocked(page));
1130 set_page_extent_mapped(page);
1131
1132 lock_extent(io_tree, page_start, page_end, GFP_NOFS);
1133 set_extent_delalloc(&BTRFS_I(inode)->io_tree, page_start,
1134 page_end, GFP_NOFS);
1135
1136 if (zero_start != PAGE_CACHE_SIZE) {
1137 kaddr = kmap(page);
1138 memset(kaddr + zero_start, 0, PAGE_CACHE_SIZE - zero_start);
1139 flush_dcache_page(page);
1140 kunmap(page);
1141 }
1142 set_page_dirty(page);
1143 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
1144
1145 return ret;
1146}
1147
1148/* 1198/*
1149 * taken from block_truncate_page, but does cow as it zeros out 1199 * taken from block_truncate_page, but does cow as it zeros out
1150 * any bytes left in the last page in the file. 1200 * any bytes left in the last page in the file.
@@ -1153,12 +1203,16 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
1153{ 1203{
1154 struct inode *inode = mapping->host; 1204 struct inode *inode = mapping->host;
1155 struct btrfs_root *root = BTRFS_I(inode)->root; 1205 struct btrfs_root *root = BTRFS_I(inode)->root;
1206 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
1207 struct btrfs_ordered_extent *ordered;
1208 char *kaddr;
1156 u32 blocksize = root->sectorsize; 1209 u32 blocksize = root->sectorsize;
1157 pgoff_t index = from >> PAGE_CACHE_SHIFT; 1210 pgoff_t index = from >> PAGE_CACHE_SHIFT;
1158 unsigned offset = from & (PAGE_CACHE_SIZE-1); 1211 unsigned offset = from & (PAGE_CACHE_SIZE-1);
1159 struct page *page; 1212 struct page *page;
1160 int ret = 0; 1213 int ret = 0;
1161 u64 page_start; 1214 u64 page_start;
1215 u64 page_end;
1162 1216
1163 if ((offset & (blocksize - 1)) == 0) 1217 if ((offset & (blocksize - 1)) == 0)
1164 goto out; 1218 goto out;
@@ -1168,6 +1222,10 @@ again:
1168 page = grab_cache_page(mapping, index); 1222 page = grab_cache_page(mapping, index);
1169 if (!page) 1223 if (!page)
1170 goto out; 1224 goto out;
1225
1226 page_start = page_offset(page);
1227 page_end = page_start + PAGE_CACHE_SIZE - 1;
1228
1171 if (!PageUptodate(page)) { 1229 if (!PageUptodate(page)) {
1172 ret = btrfs_readpage(NULL, page); 1230 ret = btrfs_readpage(NULL, page);
1173 lock_page(page); 1231 lock_page(page);
@@ -1181,10 +1239,32 @@ again:
1181 goto out; 1239 goto out;
1182 } 1240 }
1183 } 1241 }
1184
1185 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
1186 wait_on_page_writeback(page); 1242 wait_on_page_writeback(page);
1187 ret = btrfs_cow_one_page(inode, page, offset); 1243
1244 lock_extent(io_tree, page_start, page_end, GFP_NOFS);
1245 set_page_extent_mapped(page);
1246
1247 ordered = btrfs_lookup_ordered_extent(inode, page_start);
1248 if (ordered) {
1249 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
1250 unlock_page(page);
1251 page_cache_release(page);
1252 btrfs_wait_ordered_extent(inode, ordered);
1253 btrfs_put_ordered_extent(ordered);
1254 goto again;
1255 }
1256
1257 set_extent_delalloc(&BTRFS_I(inode)->io_tree, page_start,
1258 page_end, GFP_NOFS);
1259 ret = 0;
1260 if (offset != PAGE_CACHE_SIZE) {
1261 kaddr = kmap(page);
1262 memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset);
1263 flush_dcache_page(page);
1264 kunmap(page);
1265 }
1266 set_page_dirty(page);
1267 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
1188 1268
1189 unlock_page(page); 1269 unlock_page(page);
1190 page_cache_release(page); 1270 page_cache_release(page);
@@ -1222,8 +1302,9 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
1222 1302
1223 btrfs_truncate_page(inode->i_mapping, inode->i_size); 1303 btrfs_truncate_page(inode->i_mapping, inode->i_size);
1224 1304
1225 lock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS);
1226 hole_size = block_end - hole_start; 1305 hole_size = block_end - hole_start;
1306 btrfs_wait_ordered_range(inode, hole_start, hole_size);
1307 lock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS);
1227 1308
1228 trans = btrfs_start_transaction(root, 1); 1309 trans = btrfs_start_transaction(root, 1);
1229 btrfs_set_trans_block_group(trans, inode); 1310 btrfs_set_trans_block_group(trans, inode);
@@ -1258,6 +1339,7 @@ void btrfs_delete_inode(struct inode *inode)
1258 unsigned long nr; 1339 unsigned long nr;
1259 int ret; 1340 int ret;
1260 1341
1342 btrfs_wait_ordered_range(inode, 0, (u64)-1);
1261 truncate_inode_pages(&inode->i_data, 0); 1343 truncate_inode_pages(&inode->i_data, 0);
1262 if (is_bad_inode(inode)) { 1344 if (is_bad_inode(inode)) {
1263 goto no_delete; 1345 goto no_delete;
@@ -1403,7 +1485,6 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p)
1403 extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree, 1485 extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree,
1404 inode->i_mapping, GFP_NOFS); 1486 inode->i_mapping, GFP_NOFS);
1405 mutex_init(&BTRFS_I(inode)->csum_mutex); 1487 mutex_init(&BTRFS_I(inode)->csum_mutex);
1406 atomic_set(&BTRFS_I(inode)->ordered_writeback, 0);
1407 return 0; 1488 return 0;
1408} 1489}
1409 1490
@@ -1705,7 +1786,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
1705 extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree, 1786 extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree,
1706 inode->i_mapping, GFP_NOFS); 1787 inode->i_mapping, GFP_NOFS);
1707 mutex_init(&BTRFS_I(inode)->csum_mutex); 1788 mutex_init(&BTRFS_I(inode)->csum_mutex);
1708 atomic_set(&BTRFS_I(inode)->ordered_writeback, 0);
1709 BTRFS_I(inode)->delalloc_bytes = 0; 1789 BTRFS_I(inode)->delalloc_bytes = 0;
1710 BTRFS_I(inode)->root = root; 1790 BTRFS_I(inode)->root = root;
1711 1791
@@ -1930,7 +2010,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
1930 inode->i_mapping, GFP_NOFS); 2010 inode->i_mapping, GFP_NOFS);
1931 mutex_init(&BTRFS_I(inode)->csum_mutex); 2011 mutex_init(&BTRFS_I(inode)->csum_mutex);
1932 BTRFS_I(inode)->delalloc_bytes = 0; 2012 BTRFS_I(inode)->delalloc_bytes = 0;
1933 atomic_set(&BTRFS_I(inode)->ordered_writeback, 0);
1934 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; 2013 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
1935 } 2014 }
1936 dir->i_sb->s_dirt = 1; 2015 dir->i_sb->s_dirt = 1;
@@ -2066,64 +2145,18 @@ out_unlock:
2066 2145
2067static int merge_extent_mapping(struct extent_map_tree *em_tree, 2146static int merge_extent_mapping(struct extent_map_tree *em_tree,
2068 struct extent_map *existing, 2147 struct extent_map *existing,
2069 struct extent_map *em) 2148 struct extent_map *em,
2149 u64 map_start, u64 map_len)
2070{ 2150{
2071 u64 start_diff; 2151 u64 start_diff;
2072 u64 new_end;
2073 int ret = 0;
2074 int real_blocks = existing->block_start < EXTENT_MAP_LAST_BYTE;
2075
2076 if (real_blocks && em->block_start >= EXTENT_MAP_LAST_BYTE)
2077 goto invalid;
2078
2079 if (!real_blocks && em->block_start != existing->block_start)
2080 goto invalid;
2081
2082 new_end = max(existing->start + existing->len, em->start + em->len);
2083
2084 if (existing->start >= em->start) {
2085 if (em->start + em->len < existing->start)
2086 goto invalid;
2087 2152
2088 start_diff = existing->start - em->start; 2153 BUG_ON(map_start < em->start || map_start >= extent_map_end(em));
2089 if (real_blocks && em->block_start + start_diff != 2154 start_diff = map_start - em->start;
2090 existing->block_start) 2155 em->start = map_start;
2091 goto invalid; 2156 em->len = map_len;
2092 2157 if (em->block_start < EXTENT_MAP_LAST_BYTE)
2093 em->len = new_end - em->start; 2158 em->block_start += start_diff;
2094 2159 return add_extent_mapping(em_tree, em);
2095 remove_extent_mapping(em_tree, existing);
2096 /* free for the tree */
2097 free_extent_map(existing);
2098 ret = add_extent_mapping(em_tree, em);
2099
2100 } else if (em->start > existing->start) {
2101
2102 if (existing->start + existing->len < em->start)
2103 goto invalid;
2104
2105 start_diff = em->start - existing->start;
2106 if (real_blocks && existing->block_start + start_diff !=
2107 em->block_start)
2108 goto invalid;
2109
2110 remove_extent_mapping(em_tree, existing);
2111 em->block_start = existing->block_start;
2112 em->start = existing->start;
2113 em->len = new_end - existing->start;
2114 free_extent_map(existing);
2115
2116 ret = add_extent_mapping(em_tree, em);
2117 } else {
2118 goto invalid;
2119 }
2120 return ret;
2121
2122invalid:
2123 printk("invalid extent map merge [%Lu %Lu %Lu] [%Lu %Lu %Lu]\n",
2124 existing->start, existing->len, existing->block_start,
2125 em->start, em->len, em->block_start);
2126 return -EIO;
2127} 2160}
2128 2161
2129struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, 2162struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
@@ -2170,10 +2203,9 @@ again:
2170 err = -ENOMEM; 2203 err = -ENOMEM;
2171 goto out; 2204 goto out;
2172 } 2205 }
2173 2206 em->bdev = root->fs_info->fs_devices->latest_bdev;
2174 em->start = EXTENT_MAP_HOLE; 2207 em->start = EXTENT_MAP_HOLE;
2175 em->len = (u64)-1; 2208 em->len = (u64)-1;
2176 em->bdev = root->fs_info->fs_devices->latest_bdev;
2177 ret = btrfs_lookup_file_extent(trans, root, path, 2209 ret = btrfs_lookup_file_extent(trans, root, path,
2178 objectid, start, trans != NULL); 2210 objectid, start, trans != NULL);
2179 if (ret < 0) { 2211 if (ret < 0) {
@@ -2314,6 +2346,9 @@ insert:
2314 */ 2346 */
2315 if (ret == -EEXIST) { 2347 if (ret == -EEXIST) {
2316 struct extent_map *existing; 2348 struct extent_map *existing;
2349
2350 ret = 0;
2351
2317 existing = lookup_extent_mapping(em_tree, start, len); 2352 existing = lookup_extent_mapping(em_tree, start, len);
2318 if (existing && (existing->start > start || 2353 if (existing && (existing->start > start ||
2319 existing->start + existing->len <= start)) { 2354 existing->start + existing->len <= start)) {
@@ -2325,7 +2360,8 @@ insert:
2325 em->len); 2360 em->len);
2326 if (existing) { 2361 if (existing) {
2327 err = merge_extent_mapping(em_tree, existing, 2362 err = merge_extent_mapping(em_tree, existing,
2328 em); 2363 em, start,
2364 root->sectorsize);
2329 free_extent_map(existing); 2365 free_extent_map(existing);
2330 if (err) { 2366 if (err) {
2331 free_extent_map(em); 2367 free_extent_map(em);
@@ -2341,6 +2377,7 @@ insert:
2341 } else { 2377 } else {
2342 free_extent_map(em); 2378 free_extent_map(em);
2343 em = existing; 2379 em = existing;
2380 err = 0;
2344 } 2381 }
2345 } 2382 }
2346 spin_unlock(&em_tree->lock); 2383 spin_unlock(&em_tree->lock);
@@ -2348,8 +2385,9 @@ out:
2348 btrfs_free_path(path); 2385 btrfs_free_path(path);
2349 if (trans) { 2386 if (trans) {
2350 ret = btrfs_end_transaction(trans, root); 2387 ret = btrfs_end_transaction(trans, root);
2351 if (!err) 2388 if (!err) {
2352 err = ret; 2389 err = ret;
2390 }
2353 } 2391 }
2354 if (err) { 2392 if (err) {
2355 free_extent_map(em); 2393 free_extent_map(em);
@@ -2474,8 +2512,7 @@ btrfs_readpages(struct file *file, struct address_space *mapping,
2474 return extent_readpages(tree, mapping, pages, nr_pages, 2512 return extent_readpages(tree, mapping, pages, nr_pages,
2475 btrfs_get_extent); 2513 btrfs_get_extent);
2476} 2514}
2477 2515static int __btrfs_releasepage(struct page *page, gfp_t gfp_flags)
2478static int btrfs_releasepage(struct page *page, gfp_t gfp_flags)
2479{ 2516{
2480 struct extent_io_tree *tree; 2517 struct extent_io_tree *tree;
2481 struct extent_map_tree *map; 2518 struct extent_map_tree *map;
@@ -2493,15 +2530,54 @@ static int btrfs_releasepage(struct page *page, gfp_t gfp_flags)
2493 return ret; 2530 return ret;
2494} 2531}
2495 2532
2533static int btrfs_releasepage(struct page *page, gfp_t gfp_flags)
2534{
2535 struct btrfs_ordered_extent *ordered;
2536
2537 ordered = btrfs_lookup_ordered_extent(page->mapping->host,
2538 page_offset(page));
2539 if (ordered) {
2540 btrfs_put_ordered_extent(ordered);
2541 return 0;
2542 }
2543 return __btrfs_releasepage(page, gfp_flags);
2544}
2545
2496static void btrfs_invalidatepage(struct page *page, unsigned long offset) 2546static void btrfs_invalidatepage(struct page *page, unsigned long offset)
2497{ 2547{
2498 struct extent_io_tree *tree; 2548 struct extent_io_tree *tree;
2549 struct btrfs_ordered_extent *ordered;
2550 u64 page_start = page_offset(page);
2551 u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
2499 2552
2553 wait_on_page_writeback(page);
2500 tree = &BTRFS_I(page->mapping->host)->io_tree; 2554 tree = &BTRFS_I(page->mapping->host)->io_tree;
2501 extent_invalidatepage(tree, page, offset); 2555 if (offset) {
2502 btrfs_releasepage(page, GFP_NOFS); 2556 btrfs_releasepage(page, GFP_NOFS);
2557 return;
2558 }
2559
2560 lock_extent(tree, page_start, page_end, GFP_NOFS);
2561 ordered = btrfs_lookup_ordered_extent(page->mapping->host,
2562 page_offset(page));
2563 if (ordered) {
2564 clear_extent_bit(tree, page_start, page_end,
2565 EXTENT_DIRTY | EXTENT_DELALLOC |
2566 EXTENT_LOCKED, 1, 0, GFP_NOFS);
2567 btrfs_writepage_end_io_hook(page, page_start,
2568 page_end, NULL, 1);
2569 btrfs_put_ordered_extent(ordered);
2570 lock_extent(tree, page_start, page_end, GFP_NOFS);
2571 }
2572 clear_extent_bit(tree, page_start, page_end,
2573 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC |
2574 EXTENT_ORDERED,
2575 1, 1, GFP_NOFS);
2576 __btrfs_releasepage(page, GFP_NOFS);
2577
2503 if (PagePrivate(page)) { 2578 if (PagePrivate(page)) {
2504 invalidate_extent_lru(tree, page_offset(page), PAGE_CACHE_SIZE); 2579 invalidate_extent_lru(tree, page_offset(page),
2580 PAGE_CACHE_SIZE);
2505 ClearPagePrivate(page); 2581 ClearPagePrivate(page);
2506 set_page_private(page, 0); 2582 set_page_private(page, 0);
2507 page_cache_release(page); 2583 page_cache_release(page);
@@ -2527,35 +2603,63 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page)
2527{ 2603{
2528 struct inode *inode = fdentry(vma->vm_file)->d_inode; 2604 struct inode *inode = fdentry(vma->vm_file)->d_inode;
2529 struct btrfs_root *root = BTRFS_I(inode)->root; 2605 struct btrfs_root *root = BTRFS_I(inode)->root;
2530 unsigned long end; 2606 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
2607 struct btrfs_ordered_extent *ordered;
2608 char *kaddr;
2609 unsigned long zero_start;
2531 loff_t size; 2610 loff_t size;
2532 int ret; 2611 int ret;
2533 u64 page_start; 2612 u64 page_start;
2613 u64 page_end;
2534 2614
2535 ret = btrfs_check_free_space(root, PAGE_CACHE_SIZE, 0); 2615 ret = btrfs_check_free_space(root, PAGE_CACHE_SIZE, 0);
2536 if (ret) 2616 if (ret)
2537 goto out; 2617 goto out;
2538 2618
2539 ret = -EINVAL; 2619 ret = -EINVAL;
2540 2620again:
2541 lock_page(page); 2621 lock_page(page);
2542 wait_on_page_writeback(page);
2543 size = i_size_read(inode); 2622 size = i_size_read(inode);
2544 page_start = (u64)page->index << PAGE_CACHE_SHIFT; 2623 page_start = page_offset(page);
2624 page_end = page_start + PAGE_CACHE_SIZE - 1;
2545 2625
2546 if ((page->mapping != inode->i_mapping) || 2626 if ((page->mapping != inode->i_mapping) ||
2547 (page_start > size)) { 2627 (page_start >= size)) {
2548 /* page got truncated out from underneath us */ 2628 /* page got truncated out from underneath us */
2549 goto out_unlock; 2629 goto out_unlock;
2550 } 2630 }
2631 wait_on_page_writeback(page);
2632
2633 lock_extent(io_tree, page_start, page_end, GFP_NOFS);
2634 set_page_extent_mapped(page);
2635
2636 ordered = btrfs_lookup_ordered_extent(inode, page_start);
2637 if (ordered) {
2638 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
2639 unlock_page(page);
2640 btrfs_wait_ordered_extent(inode, ordered);
2641 btrfs_put_ordered_extent(ordered);
2642 goto again;
2643 }
2644
2645 set_extent_delalloc(&BTRFS_I(inode)->io_tree, page_start,
2646 page_end, GFP_NOFS);
2647 ret = 0;
2551 2648
2552 /* page is wholly or partially inside EOF */ 2649 /* page is wholly or partially inside EOF */
2553 if (page_start + PAGE_CACHE_SIZE > size) 2650 if (page_start + PAGE_CACHE_SIZE > size)
2554 end = size & ~PAGE_CACHE_MASK; 2651 zero_start = size & ~PAGE_CACHE_MASK;
2555 else 2652 else
2556 end = PAGE_CACHE_SIZE; 2653 zero_start = PAGE_CACHE_SIZE;
2557 2654
2558 ret = btrfs_cow_one_page(inode, page, end); 2655 if (zero_start != PAGE_CACHE_SIZE) {
2656 kaddr = kmap(page);
2657 memset(kaddr + zero_start, 0, PAGE_CACHE_SIZE - zero_start);
2658 flush_dcache_page(page);
2659 kunmap(page);
2660 }
2661 set_page_dirty(page);
2662 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
2559 2663
2560out_unlock: 2664out_unlock:
2561 unlock_page(page); 2665 unlock_page(page);
@@ -2662,15 +2766,28 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
2662 if (!ei) 2766 if (!ei)
2663 return NULL; 2767 return NULL;
2664 ei->last_trans = 0; 2768 ei->last_trans = 0;
2665 ei->ordered_trans = 0; 2769 btrfs_ordered_inode_tree_init(&ei->ordered_tree);
2666 return &ei->vfs_inode; 2770 return &ei->vfs_inode;
2667} 2771}
2668 2772
2669void btrfs_destroy_inode(struct inode *inode) 2773void btrfs_destroy_inode(struct inode *inode)
2670{ 2774{
2775 struct btrfs_ordered_extent *ordered;
2671 WARN_ON(!list_empty(&inode->i_dentry)); 2776 WARN_ON(!list_empty(&inode->i_dentry));
2672 WARN_ON(inode->i_data.nrpages); 2777 WARN_ON(inode->i_data.nrpages);
2673 2778
2779 while(1) {
2780 ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1);
2781 if (!ordered)
2782 break;
2783 else {
2784 printk("found ordered extent %Lu %Lu\n",
2785 ordered->file_offset, ordered->len);
2786 btrfs_remove_ordered_extent(inode, ordered);
2787 btrfs_put_ordered_extent(ordered);
2788 btrfs_put_ordered_extent(ordered);
2789 }
2790 }
2674 btrfs_drop_extent_cache(inode, 0, (u64)-1); 2791 btrfs_drop_extent_cache(inode, 0, (u64)-1);
2675 kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode)); 2792 kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
2676} 2793}
@@ -2869,7 +2986,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
2869 inode->i_mapping, GFP_NOFS); 2986 inode->i_mapping, GFP_NOFS);
2870 mutex_init(&BTRFS_I(inode)->csum_mutex); 2987 mutex_init(&BTRFS_I(inode)->csum_mutex);
2871 BTRFS_I(inode)->delalloc_bytes = 0; 2988 BTRFS_I(inode)->delalloc_bytes = 0;
2872 atomic_set(&BTRFS_I(inode)->ordered_writeback, 0);
2873 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; 2989 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
2874 } 2990 }
2875 dir->i_sb->s_dirt = 1; 2991 dir->i_sb->s_dirt = 1;
@@ -2921,6 +3037,20 @@ out_fail:
2921 return err; 3037 return err;
2922} 3038}
2923 3039
3040static int btrfs_set_page_dirty(struct page *page)
3041{
3042 struct inode *inode = page->mapping->host;
3043 u64 page_start = page_offset(page);
3044 u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
3045
3046 if (!test_range_bit(&BTRFS_I(inode)->io_tree, page_start, page_end,
3047 EXTENT_DELALLOC, 0)) {
3048printk("inode %lu page %Lu not delalloc\n", inode->i_ino, page_offset(page));
3049WARN_ON(1);
3050 }
3051 return __set_page_dirty_nobuffers(page);
3052}
3053
2924static int btrfs_permission(struct inode *inode, int mask, 3054static int btrfs_permission(struct inode *inode, int mask,
2925 struct nameidata *nd) 3055 struct nameidata *nd)
2926{ 3056{
@@ -2967,6 +3097,7 @@ static struct extent_io_ops btrfs_extent_io_ops = {
2967 .merge_bio_hook = btrfs_merge_bio_hook, 3097 .merge_bio_hook = btrfs_merge_bio_hook,
2968 .readpage_io_hook = btrfs_readpage_io_hook, 3098 .readpage_io_hook = btrfs_readpage_io_hook,
2969 .readpage_end_io_hook = btrfs_readpage_end_io_hook, 3099 .readpage_end_io_hook = btrfs_readpage_end_io_hook,
3100 .writepage_end_io_hook = btrfs_writepage_end_io_hook,
2970 .readpage_io_failed_hook = btrfs_io_failed_hook, 3101 .readpage_io_failed_hook = btrfs_io_failed_hook,
2971 .set_bit_hook = btrfs_set_bit_hook, 3102 .set_bit_hook = btrfs_set_bit_hook,
2972 .clear_bit_hook = btrfs_clear_bit_hook, 3103 .clear_bit_hook = btrfs_clear_bit_hook,
@@ -2982,7 +3113,7 @@ static struct address_space_operations btrfs_aops = {
2982 .direct_IO = btrfs_direct_IO, 3113 .direct_IO = btrfs_direct_IO,
2983 .invalidatepage = btrfs_invalidatepage, 3114 .invalidatepage = btrfs_invalidatepage,
2984 .releasepage = btrfs_releasepage, 3115 .releasepage = btrfs_releasepage,
2985 .set_page_dirty = __set_page_dirty_nobuffers, 3116 .set_page_dirty = btrfs_set_page_dirty,
2986}; 3117};
2987 3118
2988static struct address_space_operations btrfs_symlink_aops = { 3119static struct address_space_operations btrfs_symlink_aops = {
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 254da8225664..6513270f054c 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -22,48 +22,30 @@
22#include "ctree.h" 22#include "ctree.h"
23#include "transaction.h" 23#include "transaction.h"
24#include "btrfs_inode.h" 24#include "btrfs_inode.h"
25#include "extent_io.h"
25 26
26struct tree_entry {
27 u64 root_objectid;
28 u64 objectid;
29 struct inode *inode;
30 struct rb_node rb_node;
31};
32 27
33/* 28static u64 entry_end(struct btrfs_ordered_extent *entry)
34 * returns > 0 if entry passed (root, objectid) is > entry,
35 * < 0 if (root, objectid) < entry and zero if they are equal
36 */
37static int comp_entry(struct tree_entry *entry, u64 root_objectid,
38 u64 objectid)
39{ 29{
40 if (root_objectid < entry->root_objectid) 30 if (entry->file_offset + entry->len < entry->file_offset)
41 return -1; 31 return (u64)-1;
42 if (root_objectid > entry->root_objectid) 32 return entry->file_offset + entry->len;
43 return 1;
44 if (objectid < entry->objectid)
45 return -1;
46 if (objectid > entry->objectid)
47 return 1;
48 return 0;
49} 33}
50 34
51static struct rb_node *tree_insert(struct rb_root *root, u64 root_objectid, 35static struct rb_node *tree_insert(struct rb_root *root, u64 file_offset,
52 u64 objectid, struct rb_node *node) 36 struct rb_node *node)
53{ 37{
54 struct rb_node ** p = &root->rb_node; 38 struct rb_node ** p = &root->rb_node;
55 struct rb_node * parent = NULL; 39 struct rb_node * parent = NULL;
56 struct tree_entry *entry; 40 struct btrfs_ordered_extent *entry;
57 int comp;
58 41
59 while(*p) { 42 while(*p) {
60 parent = *p; 43 parent = *p;
61 entry = rb_entry(parent, struct tree_entry, rb_node); 44 entry = rb_entry(parent, struct btrfs_ordered_extent, rb_node);
62 45
63 comp = comp_entry(entry, root_objectid, objectid); 46 if (file_offset < entry->file_offset)
64 if (comp < 0)
65 p = &(*p)->rb_left; 47 p = &(*p)->rb_left;
66 else if (comp > 0) 48 else if (file_offset >= entry_end(entry))
67 p = &(*p)->rb_right; 49 p = &(*p)->rb_right;
68 else 50 else
69 return parent; 51 return parent;
@@ -74,24 +56,23 @@ static struct rb_node *tree_insert(struct rb_root *root, u64 root_objectid,
74 return NULL; 56 return NULL;
75} 57}
76 58
77static struct rb_node *__tree_search(struct rb_root *root, u64 root_objectid, 59static struct rb_node *__tree_search(struct rb_root *root, u64 file_offset,
78 u64 objectid, struct rb_node **prev_ret) 60 struct rb_node **prev_ret)
79{ 61{
80 struct rb_node * n = root->rb_node; 62 struct rb_node * n = root->rb_node;
81 struct rb_node *prev = NULL; 63 struct rb_node *prev = NULL;
82 struct tree_entry *entry; 64 struct rb_node *test;
83 struct tree_entry *prev_entry = NULL; 65 struct btrfs_ordered_extent *entry;
84 int comp; 66 struct btrfs_ordered_extent *prev_entry = NULL;
85 67
86 while(n) { 68 while(n) {
87 entry = rb_entry(n, struct tree_entry, rb_node); 69 entry = rb_entry(n, struct btrfs_ordered_extent, rb_node);
88 prev = n; 70 prev = n;
89 prev_entry = entry; 71 prev_entry = entry;
90 comp = comp_entry(entry, root_objectid, objectid);
91 72
92 if (comp < 0) 73 if (file_offset < entry->file_offset)
93 n = n->rb_left; 74 n = n->rb_left;
94 else if (comp > 0) 75 else if (file_offset >= entry_end(entry))
95 n = n->rb_right; 76 n = n->rb_right;
96 else 77 else
97 return n; 78 return n;
@@ -99,195 +80,329 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 root_objectid,
99 if (!prev_ret) 80 if (!prev_ret)
100 return NULL; 81 return NULL;
101 82
102 while(prev && comp_entry(prev_entry, root_objectid, objectid) >= 0) { 83 while(prev && file_offset >= entry_end(prev_entry)) {
103 prev = rb_next(prev); 84 test = rb_next(prev);
104 prev_entry = rb_entry(prev, struct tree_entry, rb_node); 85 if (!test)
86 break;
87 prev_entry = rb_entry(test, struct btrfs_ordered_extent,
88 rb_node);
89 if (file_offset < entry_end(prev_entry))
90 break;
91
92 prev = test;
93 }
94 if (prev)
95 prev_entry = rb_entry(prev, struct btrfs_ordered_extent,
96 rb_node);
97 while(prev && file_offset < entry_end(prev_entry)) {
98 test = rb_prev(prev);
99 if (!test)
100 break;
101 prev_entry = rb_entry(test, struct btrfs_ordered_extent,
102 rb_node);
103 prev = test;
105 } 104 }
106 *prev_ret = prev; 105 *prev_ret = prev;
107 return NULL; 106 return NULL;
108} 107}
109 108
110static inline struct rb_node *tree_search(struct rb_root *root, 109static int offset_in_entry(struct btrfs_ordered_extent *entry, u64 file_offset)
111 u64 root_objectid, u64 objectid) 110{
111 if (file_offset < entry->file_offset ||
112 entry->file_offset + entry->len <= file_offset)
113 return 0;
114 return 1;
115}
116
117static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree,
118 u64 file_offset)
112{ 119{
120 struct rb_root *root = &tree->tree;
113 struct rb_node *prev; 121 struct rb_node *prev;
114 struct rb_node *ret; 122 struct rb_node *ret;
115 ret = __tree_search(root, root_objectid, objectid, &prev); 123 struct btrfs_ordered_extent *entry;
124
125 if (tree->last) {
126 entry = rb_entry(tree->last, struct btrfs_ordered_extent,
127 rb_node);
128 if (offset_in_entry(entry, file_offset))
129 return tree->last;
130 }
131 ret = __tree_search(root, file_offset, &prev);
116 if (!ret) 132 if (!ret)
117 return prev; 133 ret = prev;
134 if (ret)
135 tree->last = ret;
118 return ret; 136 return ret;
119} 137}
120 138
121int btrfs_add_ordered_inode(struct inode *inode) 139int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
140 u64 start, u64 len)
122{ 141{
123 struct btrfs_root *root = BTRFS_I(inode)->root;
124 u64 root_objectid = root->root_key.objectid;
125 u64 transid = root->fs_info->running_transaction->transid;
126 struct tree_entry *entry;
127 struct rb_node *node;
128 struct btrfs_ordered_inode_tree *tree; 142 struct btrfs_ordered_inode_tree *tree;
143 struct rb_node *node;
144 struct btrfs_ordered_extent *entry;
129 145
130 if (transid <= BTRFS_I(inode)->ordered_trans) 146 tree = &BTRFS_I(inode)->ordered_tree;
131 return 0; 147 entry = kzalloc(sizeof(*entry), GFP_NOFS);
132
133 tree = &root->fs_info->running_transaction->ordered_inode_tree;
134
135 read_lock(&tree->lock);
136 node = __tree_search(&tree->tree, root_objectid, inode->i_ino, NULL);
137 read_unlock(&tree->lock);
138 if (node) {
139 return 0;
140 }
141
142 entry = kmalloc(sizeof(*entry), GFP_NOFS);
143 if (!entry) 148 if (!entry)
144 return -ENOMEM; 149 return -ENOMEM;
145 150
146 write_lock(&tree->lock); 151 mutex_lock(&tree->mutex);
147 entry->objectid = inode->i_ino; 152 entry->file_offset = file_offset;
148 entry->root_objectid = root_objectid; 153 entry->start = start;
154 entry->len = len;
149 entry->inode = inode; 155 entry->inode = inode;
156 /* one ref for the tree */
157 atomic_set(&entry->refs, 1);
158 init_waitqueue_head(&entry->wait);
159 INIT_LIST_HEAD(&entry->list);
150 160
151 node = tree_insert(&tree->tree, root_objectid, 161 node = tree_insert(&tree->tree, file_offset,
152 inode->i_ino, &entry->rb_node); 162 &entry->rb_node);
153 163 if (node) {
154 BTRFS_I(inode)->ordered_trans = transid; 164 entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
155 if (!node) 165 atomic_inc(&entry->refs);
156 igrab(inode); 166 }
157 167 set_extent_ordered(&BTRFS_I(inode)->io_tree, file_offset,
158 write_unlock(&tree->lock); 168 entry_end(entry) - 1, GFP_NOFS);
159 169
160 if (node) 170 set_bit(BTRFS_ORDERED_START, &entry->flags);
161 kfree(entry); 171 mutex_unlock(&tree->mutex);
172 BUG_ON(node);
162 return 0; 173 return 0;
163} 174}
164 175
165int btrfs_find_first_ordered_inode(struct btrfs_ordered_inode_tree *tree, 176int btrfs_add_ordered_sum(struct inode *inode, struct btrfs_ordered_sum *sum)
166 u64 *root_objectid, u64 *objectid,
167 struct inode **inode)
168{ 177{
169 struct tree_entry *entry; 178 struct btrfs_ordered_inode_tree *tree;
170 struct rb_node *node; 179 struct rb_node *node;
180 struct btrfs_ordered_extent *entry;
171 181
172 write_lock(&tree->lock); 182 tree = &BTRFS_I(inode)->ordered_tree;
173 node = tree_search(&tree->tree, *root_objectid, *objectid); 183 mutex_lock(&tree->mutex);
184 node = tree_search(tree, sum->file_offset);
174 if (!node) { 185 if (!node) {
175 write_unlock(&tree->lock); 186search_fail:
176 return 0; 187printk("add ordered sum failed to find a node for inode %lu offset %Lu\n", inode->i_ino, sum->file_offset);
188 node = rb_first(&tree->tree);
189 while(node) {
190 entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
191 printk("entry %Lu %Lu %Lu\n", entry->file_offset, entry->file_offset + entry->len, entry->start);
192 node = rb_next(node);
193 }
194 BUG();
177 } 195 }
178 entry = rb_entry(node, struct tree_entry, rb_node); 196 BUG_ON(!node);
179 197
180 while(comp_entry(entry, *root_objectid, *objectid) >= 0) { 198 entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
181 node = rb_next(node); 199 if (!offset_in_entry(entry, sum->file_offset)) {
182 if (!node) 200 goto search_fail;
183 break;
184 entry = rb_entry(node, struct tree_entry, rb_node);
185 }
186 if (!node) {
187 write_unlock(&tree->lock);
188 return 0;
189 } 201 }
190 202
191 *root_objectid = entry->root_objectid; 203 list_add_tail(&sum->list, &entry->list);
192 *inode = entry->inode; 204 mutex_unlock(&tree->mutex);
193 atomic_inc(&entry->inode->i_count); 205 return 0;
194 *objectid = entry->objectid;
195 write_unlock(&tree->lock);
196 return 1;
197} 206}
198 207
199int btrfs_find_del_first_ordered_inode(struct btrfs_ordered_inode_tree *tree, 208int btrfs_dec_test_ordered_pending(struct inode *inode,
200 u64 *root_objectid, u64 *objectid, 209 u64 file_offset, u64 io_size)
201 struct inode **inode)
202{ 210{
203 struct tree_entry *entry; 211 struct btrfs_ordered_inode_tree *tree;
204 struct rb_node *node; 212 struct rb_node *node;
205 213 struct btrfs_ordered_extent *entry;
206 write_lock(&tree->lock); 214 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
207 node = tree_search(&tree->tree, *root_objectid, *objectid); 215 int ret;
216
217 tree = &BTRFS_I(inode)->ordered_tree;
218 mutex_lock(&tree->mutex);
219 clear_extent_ordered(io_tree, file_offset, file_offset + io_size - 1,
220 GFP_NOFS);
221 node = tree_search(tree, file_offset);
208 if (!node) { 222 if (!node) {
209 write_unlock(&tree->lock); 223 ret = 1;
210 return 0; 224 goto out;
211 } 225 }
212 226
213 entry = rb_entry(node, struct tree_entry, rb_node); 227 entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
214 while(comp_entry(entry, *root_objectid, *objectid) >= 0) { 228 if (!offset_in_entry(entry, file_offset)) {
215 node = rb_next(node); 229 ret = 1;
216 if (!node) 230 goto out;
217 break;
218 entry = rb_entry(node, struct tree_entry, rb_node);
219 } 231 }
220 if (!node) { 232
221 write_unlock(&tree->lock); 233 ret = test_range_bit(io_tree, entry->file_offset,
222 return 0; 234 entry->file_offset + entry->len - 1,
235 EXTENT_ORDERED, 0);
236 if (!test_bit(BTRFS_ORDERED_START, &entry->flags)) {
237printk("inode %lu not ready yet for extent %Lu %Lu\n", inode->i_ino, entry->file_offset, entry_end(entry));
223 } 238 }
239 if (ret == 0)
240 ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags);
241out:
242 mutex_unlock(&tree->mutex);
243 return ret == 0;
244}
224 245
225 *root_objectid = entry->root_objectid; 246int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
226 *objectid = entry->objectid; 247{
227 *inode = entry->inode; 248 if (atomic_dec_and_test(&entry->refs))
228 atomic_inc(&entry->inode->i_count); 249 kfree(entry);
229 rb_erase(node, &tree->tree); 250 return 0;
230 write_unlock(&tree->lock);
231 kfree(entry);
232 return 1;
233} 251}
234 252
235static void __btrfs_del_ordered_inode(struct btrfs_ordered_inode_tree *tree, 253int btrfs_remove_ordered_extent(struct inode *inode,
236 struct inode *inode, 254 struct btrfs_ordered_extent *entry)
237 u64 root_objectid, u64 objectid)
238{ 255{
239 struct tree_entry *entry; 256 struct btrfs_ordered_inode_tree *tree;
240 struct rb_node *node; 257 struct rb_node *node;
241 struct rb_node *prev;
242 258
243 write_lock(&tree->lock); 259 tree = &BTRFS_I(inode)->ordered_tree;
244 node = __tree_search(&tree->tree, root_objectid, objectid, &prev); 260 mutex_lock(&tree->mutex);
245 if (!node) { 261 node = &entry->rb_node;
246 write_unlock(&tree->lock);
247 return;
248 }
249 rb_erase(node, &tree->tree); 262 rb_erase(node, &tree->tree);
250 BTRFS_I(inode)->ordered_trans = 0; 263 tree->last = NULL;
251 write_unlock(&tree->lock); 264 set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags);
252 atomic_dec(&inode->i_count); 265 mutex_unlock(&tree->mutex);
253 entry = rb_entry(node, struct tree_entry, rb_node); 266 wake_up(&entry->wait);
254 kfree(entry); 267 return 0;
255 return;
256} 268}
257 269
258void btrfs_del_ordered_inode(struct inode *inode, int force) 270void btrfs_wait_ordered_extent(struct inode *inode,
271 struct btrfs_ordered_extent *entry)
259{ 272{
260 struct btrfs_root *root = BTRFS_I(inode)->root; 273 u64 start = entry->file_offset;
261 u64 root_objectid = root->root_key.objectid; 274 u64 end = start + entry->len - 1;
275#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22)
276 do_sync_file_range(file, start, end, SYNC_FILE_RANGE_WRITE);
277#else
278 do_sync_mapping_range(inode->i_mapping, start, end,
279 SYNC_FILE_RANGE_WRITE);
280#endif
281 wait_event(entry->wait,
282 test_bit(BTRFS_ORDERED_COMPLETE, &entry->flags));
283}
262 284
263 if (!BTRFS_I(inode)->ordered_trans) { 285static void btrfs_start_ordered_extent(struct inode *inode,
264 return; 286 struct btrfs_ordered_extent *entry, int wait)
265 } 287{
288 u64 start = entry->file_offset;
289 u64 end = start + entry->len - 1;
266 290
267 if (!force && (mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY) || 291#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22)
268 mapping_tagged(inode->i_mapping, PAGECACHE_TAG_WRITEBACK))) 292 do_sync_file_range(file, start, end, SYNC_FILE_RANGE_WRITE);
269 return; 293#else
294 do_sync_mapping_range(inode->i_mapping, start, end,
295 SYNC_FILE_RANGE_WRITE);
296#endif
297 if (wait)
298 wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE,
299 &entry->flags));
300}
270 301
271 spin_lock(&root->fs_info->new_trans_lock); 302void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
272 if (root->fs_info->running_transaction) { 303{
273 struct btrfs_ordered_inode_tree *tree; 304 u64 end;
274 tree = &root->fs_info->running_transaction->ordered_inode_tree; 305 struct btrfs_ordered_extent *ordered;
275 __btrfs_del_ordered_inode(tree, inode, root_objectid, 306 int found;
276 inode->i_ino); 307 int should_wait = 0;
308
309again:
310 if (start + len < start)
311 end = (u64)-1;
312 else
313 end = start + len - 1;
314 found = 0;
315 while(1) {
316 ordered = btrfs_lookup_first_ordered_extent(inode, end);
317 if (!ordered) {
318 break;
319 }
320 if (ordered->file_offset >= start + len) {
321 btrfs_put_ordered_extent(ordered);
322 break;
323 }
324 if (ordered->file_offset + ordered->len < start) {
325 btrfs_put_ordered_extent(ordered);
326 break;
327 }
328 btrfs_start_ordered_extent(inode, ordered, should_wait);
329 found++;
330 end = ordered->file_offset;
331 btrfs_put_ordered_extent(ordered);
332 if (end == 0)
333 break;
334 end--;
335 }
336 if (should_wait && found) {
337 should_wait = 0;
338 goto again;
277 } 339 }
278 spin_unlock(&root->fs_info->new_trans_lock);
279} 340}
280 341
281int btrfs_ordered_throttle(struct btrfs_root *root, struct inode *inode) 342int btrfs_add_ordered_pending(struct inode *inode,
343 struct btrfs_ordered_extent *ordered,
344 u64 start, u64 len)
282{ 345{
283 struct btrfs_transaction *cur = root->fs_info->running_transaction; 346 WARN_ON(1);
284 while(cur == root->fs_info->running_transaction &&
285 atomic_read(&BTRFS_I(inode)->ordered_writeback)) {
286#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18)
287 congestion_wait(WRITE, HZ/20);
288#else
289 blk_congestion_wait(WRITE, HZ/20);
290#endif
291 }
292 return 0; 347 return 0;
348#if 0
349 int ret;
350 struct btrfs_ordered_inode_tree *tree;
351 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
352
353 tree = &BTRFS_I(inode)->ordered_tree;
354 mutex_lock(&tree->mutex);
355 if (test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags)) {
356 ret = -EAGAIN;
357 goto out;
358 }
359 set_extent_ordered(io_tree, start, start + len - 1, GFP_NOFS);
360 ret = 0;
361out:
362 mutex_unlock(&tree->mutex);
363 return ret;
364#endif
365}
366
367struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode,
368 u64 file_offset)
369{
370 struct btrfs_ordered_inode_tree *tree;
371 struct rb_node *node;
372 struct btrfs_ordered_extent *entry = NULL;
373
374 tree = &BTRFS_I(inode)->ordered_tree;
375 mutex_lock(&tree->mutex);
376 node = tree_search(tree, file_offset);
377 if (!node)
378 goto out;
379
380 entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
381 if (!offset_in_entry(entry, file_offset))
382 entry = NULL;
383 if (entry)
384 atomic_inc(&entry->refs);
385out:
386 mutex_unlock(&tree->mutex);
387 return entry;
388}
389
390struct btrfs_ordered_extent *
391btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset)
392{
393 struct btrfs_ordered_inode_tree *tree;
394 struct rb_node *node;
395 struct btrfs_ordered_extent *entry = NULL;
396
397 tree = &BTRFS_I(inode)->ordered_tree;
398 mutex_lock(&tree->mutex);
399 node = tree_search(tree, file_offset);
400 if (!node)
401 goto out;
402
403 entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
404 atomic_inc(&entry->refs);
405out:
406 mutex_unlock(&tree->mutex);
407 return entry;
293} 408}
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index 4fa78736423e..33292c5fe90c 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -20,24 +20,73 @@
20#define __BTRFS_ORDERED_DATA__ 20#define __BTRFS_ORDERED_DATA__
21 21
22struct btrfs_ordered_inode_tree { 22struct btrfs_ordered_inode_tree {
23 rwlock_t lock; 23 struct mutex mutex;
24 struct rb_root tree; 24 struct rb_root tree;
25 struct rb_node *last;
25}; 26};
26 27
28struct btrfs_sector_sum {
29 u64 offset;
30 u32 sum;
31};
32
33struct btrfs_ordered_sum {
34 u64 file_offset;
35 u64 len;
36 struct list_head list;
37 struct btrfs_sector_sum sums;
38};
39
40/* bits for the flags field */
41#define BTRFS_ORDERED_IO_DONE 0 /* set when all the pages are written */
42#define BTRFS_ORDERED_COMPLETE 1 /* set when removed from the tree */
43#define BTRFS_ORDERED_START 2 /* set when tree setup */
44
45struct btrfs_ordered_extent {
46 u64 file_offset;
47 u64 start;
48 u64 len;
49 unsigned long flags;
50 atomic_t refs;
51 struct list_head list;
52 struct inode *inode;
53 wait_queue_head_t wait;
54 struct rb_node rb_node;
55};
56
57
58static inline int btrfs_ordered_sum_size(struct btrfs_root *root, u64 bytes)
59{
60 unsigned long num_sectors = (bytes + root->sectorsize - 1) /
61 root->sectorsize;
62 return sizeof(struct btrfs_ordered_sum) +
63 num_sectors * sizeof(struct btrfs_sector_sum);
64}
65
27static inline void 66static inline void
28btrfs_ordered_inode_tree_init(struct btrfs_ordered_inode_tree *t) 67btrfs_ordered_inode_tree_init(struct btrfs_ordered_inode_tree *t)
29{ 68{
30 rwlock_init(&t->lock); 69 mutex_init(&t->mutex);
31 t->tree.rb_node = NULL; 70 t->tree.rb_node = NULL;
71 t->last = NULL;
32} 72}
33 73
34int btrfs_add_ordered_inode(struct inode *inode); 74int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry);
35int btrfs_find_del_first_ordered_inode(struct btrfs_ordered_inode_tree *tree, 75int btrfs_remove_ordered_extent(struct inode *inode,
36 u64 *root_objectid, u64 *objectid, 76 struct btrfs_ordered_extent *entry);
37 struct inode **inode); 77int btrfs_dec_test_ordered_pending(struct inode *inode,
38int btrfs_find_first_ordered_inode(struct btrfs_ordered_inode_tree *tree, 78 u64 file_offset, u64 io_size);
39 u64 *root_objectid, u64 *objectid, 79int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
40 struct inode **inode); 80 u64 start, u64 len);
41void btrfs_del_ordered_inode(struct inode *inode, int force); 81int btrfs_add_ordered_sum(struct inode *inode, struct btrfs_ordered_sum *sum);
42int btrfs_ordered_throttle(struct btrfs_root *root, struct inode *inode); 82struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode,
83 u64 file_offset);
84void btrfs_wait_ordered_extent(struct inode *inode,
85 struct btrfs_ordered_extent *entry);
86void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len);
87struct btrfs_ordered_extent *
88btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset);
89int btrfs_add_ordered_pending(struct inode *inode,
90 struct btrfs_ordered_extent *ordered,
91 u64 start, u64 len);
43#endif 92#endif
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index a8a3cb03de59..86a5acc19ce7 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -67,7 +67,6 @@ static noinline int join_transaction(struct btrfs_root *root)
67 cur_trans->start_time = get_seconds(); 67 cur_trans->start_time = get_seconds();
68 INIT_LIST_HEAD(&cur_trans->pending_snapshots); 68 INIT_LIST_HEAD(&cur_trans->pending_snapshots);
69 list_add_tail(&cur_trans->list, &root->fs_info->trans_list); 69 list_add_tail(&cur_trans->list, &root->fs_info->trans_list);
70 btrfs_ordered_inode_tree_init(&cur_trans->ordered_inode_tree);
71 extent_io_tree_init(&cur_trans->dirty_pages, 70 extent_io_tree_init(&cur_trans->dirty_pages,
72 root->fs_info->btree_inode->i_mapping, 71 root->fs_info->btree_inode->i_mapping,
73 GFP_NOFS); 72 GFP_NOFS);
@@ -158,10 +157,12 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
158 wake_up(&cur_trans->writer_wait); 157 wake_up(&cur_trans->writer_wait);
159 158
160 if (cur_trans->in_commit && throttle) { 159 if (cur_trans->in_commit && throttle) {
161 int ret; 160 DEFINE_WAIT(wait);
162 mutex_unlock(&root->fs_info->trans_mutex); 161 mutex_unlock(&root->fs_info->trans_mutex);
163 ret = wait_for_commit(root, cur_trans); 162 prepare_to_wait(&root->fs_info->transaction_throttle, &wait,
164 BUG_ON(ret); 163 TASK_UNINTERRUPTIBLE);
164 schedule();
165 finish_wait(&root->fs_info->transaction_throttle, &wait);
165 mutex_lock(&root->fs_info->trans_mutex); 166 mutex_lock(&root->fs_info->trans_mutex);
166 } 167 }
167 168
@@ -486,58 +487,6 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root,
486 return ret; 487 return ret;
487} 488}
488 489
489int btrfs_write_ordered_inodes(struct btrfs_trans_handle *trans,
490 struct btrfs_root *root)
491{
492 struct btrfs_transaction *cur_trans = trans->transaction;
493 struct inode *inode;
494 u64 root_objectid = 0;
495 u64 objectid = 0;
496 int ret;
497
498 atomic_inc(&root->fs_info->throttles);
499 while(1) {
500 ret = btrfs_find_first_ordered_inode(
501 &cur_trans->ordered_inode_tree,
502 &root_objectid, &objectid, &inode);
503 if (!ret)
504 break;
505
506 mutex_unlock(&root->fs_info->trans_mutex);
507
508 if (S_ISREG(inode->i_mode)) {
509 atomic_inc(&BTRFS_I(inode)->ordered_writeback);
510 filemap_fdatawrite(inode->i_mapping);
511 atomic_dec(&BTRFS_I(inode)->ordered_writeback);
512 }
513 iput(inode);
514
515 mutex_lock(&root->fs_info->trans_mutex);
516 }
517 while(1) {
518 root_objectid = 0;
519 objectid = 0;
520 ret = btrfs_find_del_first_ordered_inode(
521 &cur_trans->ordered_inode_tree,
522 &root_objectid, &objectid, &inode);
523 if (!ret)
524 break;
525 mutex_unlock(&root->fs_info->trans_mutex);
526
527 if (S_ISREG(inode->i_mode)) {
528 atomic_inc(&BTRFS_I(inode)->ordered_writeback);
529 filemap_write_and_wait(inode->i_mapping);
530 atomic_dec(&BTRFS_I(inode)->ordered_writeback);
531 }
532 atomic_dec(&inode->i_count);
533 iput(inode);
534
535 mutex_lock(&root->fs_info->trans_mutex);
536 }
537 atomic_dec(&root->fs_info->throttles);
538 return 0;
539}
540
541static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, 490static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
542 struct btrfs_fs_info *fs_info, 491 struct btrfs_fs_info *fs_info,
543 struct btrfs_pending_snapshot *pending) 492 struct btrfs_pending_snapshot *pending)
@@ -666,6 +615,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
666 extent_io_tree_init(pinned_copy, 615 extent_io_tree_init(pinned_copy,
667 root->fs_info->btree_inode->i_mapping, GFP_NOFS); 616 root->fs_info->btree_inode->i_mapping, GFP_NOFS);
668 617
618printk("commit trans %Lu\n", trans->transid);
669 trans->transaction->in_commit = 1; 619 trans->transaction->in_commit = 1;
670 cur_trans = trans->transaction; 620 cur_trans = trans->transaction;
671 if (cur_trans->list.prev != &root->fs_info->trans_list) { 621 if (cur_trans->list.prev != &root->fs_info->trans_list) {
@@ -699,8 +649,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
699 649
700 mutex_lock(&root->fs_info->trans_mutex); 650 mutex_lock(&root->fs_info->trans_mutex);
701 finish_wait(&cur_trans->writer_wait, &wait); 651 finish_wait(&cur_trans->writer_wait, &wait);
702 ret = btrfs_write_ordered_inodes(trans, root);
703
704 } while (cur_trans->num_writers > 1 || 652 } while (cur_trans->num_writers > 1 ||
705 (cur_trans->num_joined != joined)); 653 (cur_trans->num_joined != joined));
706 654
@@ -736,6 +684,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
736 684
737 btrfs_copy_pinned(root, pinned_copy); 685 btrfs_copy_pinned(root, pinned_copy);
738 686
687 wake_up(&root->fs_info->transaction_throttle);
688
739 mutex_unlock(&root->fs_info->trans_mutex); 689 mutex_unlock(&root->fs_info->trans_mutex);
740 ret = btrfs_write_and_wait_transaction(trans, root); 690 ret = btrfs_write_and_wait_transaction(trans, root);
741 BUG_ON(ret); 691 BUG_ON(ret);
@@ -758,6 +708,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
758 list_splice_init(&dirty_fs_roots, &root->fs_info->dead_roots); 708 list_splice_init(&dirty_fs_roots, &root->fs_info->dead_roots);
759 709
760 mutex_unlock(&root->fs_info->trans_mutex); 710 mutex_unlock(&root->fs_info->trans_mutex);
711printk("done commit trans %Lu\n", trans->transid);
761 kmem_cache_free(btrfs_trans_handle_cachep, trans); 712 kmem_cache_free(btrfs_trans_handle_cachep, trans);
762 713
763 if (root->fs_info->closing) { 714 if (root->fs_info->closing) {
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 9ccd5a5b170f..910350cd4cf0 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -19,7 +19,6 @@
19#ifndef __BTRFS_TRANSACTION__ 19#ifndef __BTRFS_TRANSACTION__
20#define __BTRFS_TRANSACTION__ 20#define __BTRFS_TRANSACTION__
21#include "btrfs_inode.h" 21#include "btrfs_inode.h"
22#include "ordered-data.h"
23 22
24struct btrfs_transaction { 23struct btrfs_transaction {
25 u64 transid; 24 u64 transid;
@@ -31,7 +30,6 @@ struct btrfs_transaction {
31 struct list_head list; 30 struct list_head list;
32 struct extent_io_tree dirty_pages; 31 struct extent_io_tree dirty_pages;
33 unsigned long start_time; 32 unsigned long start_time;
34 struct btrfs_ordered_inode_tree ordered_inode_tree;
35 wait_queue_head_t writer_wait; 33 wait_queue_head_t writer_wait;
36 wait_queue_head_t commit_wait; 34 wait_queue_head_t commit_wait;
37 struct list_head pending_snapshots; 35 struct list_head pending_snapshots;
@@ -88,8 +86,6 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly);
88int btrfs_clean_old_snapshots(struct btrfs_root *root); 86int btrfs_clean_old_snapshots(struct btrfs_root *root);
89int btrfs_commit_transaction(struct btrfs_trans_handle *trans, 87int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
90 struct btrfs_root *root); 88 struct btrfs_root *root);
91int btrfs_write_ordered_inodes(struct btrfs_trans_handle *trans,
92 struct btrfs_root *root);
93int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, 89int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
94 struct btrfs_root *root); 90 struct btrfs_root *root);
95#endif 91#endif