diff options
author | Chris Mason <chris.mason@oracle.com> | 2008-01-08 15:46:30 -0500 |
---|---|---|
committer | Chris Mason <chris.mason@oracle.com> | 2008-09-25 11:03:59 -0400 |
commit | dc17ff8f11d129db9e83ab7244769e4eae05e14d (patch) | |
tree | 622e70100d6082e371a6ca62b02fd57e0c37f8dc | |
parent | e4204dedbbaa3a614605cb83cc0ac5161af6b4e6 (diff) |
Btrfs: Add data=ordered support
This forces file data extents down the disk along with the metadata that
references them. The current implementation is fairly simple, and just
writes out all of the dirty pages in an inode before the commit.
Signed-off-by: Chris Mason <chris.mason@oracle.com>
-rw-r--r-- | fs/btrfs/Makefile | 2 | ||||
-rw-r--r-- | fs/btrfs/btrfs_inode.h | 1 | ||||
-rw-r--r-- | fs/btrfs/ctree.c | 6 | ||||
-rw-r--r-- | fs/btrfs/ctree.h | 7 | ||||
-rw-r--r-- | fs/btrfs/disk-io.c | 16 | ||||
-rw-r--r-- | fs/btrfs/disk-io.h | 2 | ||||
-rw-r--r-- | fs/btrfs/extent-tree.c | 4 | ||||
-rw-r--r-- | fs/btrfs/file.c | 5 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 46 | ||||
-rw-r--r-- | fs/btrfs/ordered-data.c | 221 | ||||
-rw-r--r-- | fs/btrfs/ordered-data.h | 39 | ||||
-rw-r--r-- | fs/btrfs/transaction.c | 58 | ||||
-rw-r--r-- | fs/btrfs/transaction.h | 8 |
13 files changed, 387 insertions, 28 deletions
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index d5804c5ca075..ab9a9f8edbf8 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile | |||
@@ -5,7 +5,7 @@ obj-m := btrfs.o | |||
5 | btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ | 5 | btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ |
6 | hash.o file-item.o inode-item.o inode-map.o disk-io.o \ | 6 | hash.o file-item.o inode-item.o inode-map.o disk-io.o \ |
7 | transaction.o bit-radix.o inode.o file.o tree-defrag.o \ | 7 | transaction.o bit-radix.o inode.o file.o tree-defrag.o \ |
8 | extent_map.o sysfs.o struct-funcs.o xattr.o acl.o | 8 | extent_map.o sysfs.o struct-funcs.o xattr.o acl.o ordered-data.o |
9 | 9 | ||
10 | #btrfs-y := ctree.o disk-io.o radix-tree.o extent-tree.o print-tree.o \ | 10 | #btrfs-y := ctree.o disk-io.o radix-tree.o extent-tree.o print-tree.o \ |
11 | # root-tree.o dir-item.o hash.o file-item.o inode-item.o \ | 11 | # root-tree.o dir-item.o hash.o file-item.o inode-item.o \ |
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index d1d5af471c3c..f27e633f1742 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h | |||
@@ -29,6 +29,7 @@ struct btrfs_inode { | |||
29 | struct extent_map_tree extent_tree; | 29 | struct extent_map_tree extent_tree; |
30 | struct inode vfs_inode; | 30 | struct inode vfs_inode; |
31 | 31 | ||
32 | u64 ordered_trans; | ||
32 | /* | 33 | /* |
33 | * transid of the trans_handle that last modified this inode | 34 | * transid of the trans_handle that last modified this inode |
34 | */ | 35 | */ |
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 35c57074a376..43d23148a4fe 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
@@ -221,7 +221,9 @@ int btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
221 | struct extent_buffer **cow_ret) | 221 | struct extent_buffer **cow_ret) |
222 | { | 222 | { |
223 | u64 search_start; | 223 | u64 search_start; |
224 | u64 header_trans; | ||
224 | int ret; | 225 | int ret; |
226 | |||
225 | if (trans->transaction != root->fs_info->running_transaction) { | 227 | if (trans->transaction != root->fs_info->running_transaction) { |
226 | printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid, | 228 | printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid, |
227 | root->fs_info->running_transaction->transid); | 229 | root->fs_info->running_transaction->transid); |
@@ -232,7 +234,9 @@ int btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
232 | root->fs_info->generation); | 234 | root->fs_info->generation); |
233 | WARN_ON(1); | 235 | WARN_ON(1); |
234 | } | 236 | } |
235 | if (btrfs_header_generation(buf) == trans->transid) { | 237 | |
238 | header_trans = btrfs_header_generation(buf); | ||
239 | if (header_trans == trans->transid) { | ||
236 | *cow_ret = buf; | 240 | *cow_ret = buf; |
237 | return 0; | 241 | return 0; |
238 | } | 242 | } |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 9873975ce0ee..b55dba58dfaa 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -16,8 +16,8 @@ | |||
16 | * Boston, MA 021110-1307, USA. | 16 | * Boston, MA 021110-1307, USA. |
17 | */ | 17 | */ |
18 | 18 | ||
19 | #ifndef __BTRFS__ | 19 | #ifndef __BTRFS_CTREE__ |
20 | #define __BTRFS__ | 20 | #define __BTRFS_CTREE__ |
21 | 21 | ||
22 | #include <linux/version.h> | 22 | #include <linux/version.h> |
23 | #include <linux/mm.h> | 23 | #include <linux/mm.h> |
@@ -363,7 +363,6 @@ struct btrfs_root { | |||
363 | struct inode *inode; | 363 | struct inode *inode; |
364 | struct kobject root_kobj; | 364 | struct kobject root_kobj; |
365 | struct completion kobj_unregister; | 365 | struct completion kobj_unregister; |
366 | struct rw_semaphore snap_sem; | ||
367 | u64 objectid; | 366 | u64 objectid; |
368 | u64 last_trans; | 367 | u64 last_trans; |
369 | 368 | ||
@@ -1142,6 +1141,8 @@ void btrfs_destroy_cachep(void); | |||
1142 | long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); | 1141 | long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); |
1143 | struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid, | 1142 | struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid, |
1144 | struct btrfs_root *root); | 1143 | struct btrfs_root *root); |
1144 | struct inode *btrfs_ilookup(struct super_block *s, u64 objectid, | ||
1145 | u64 root_objectid); | ||
1145 | int btrfs_commit_write(struct file *file, struct page *page, | 1146 | int btrfs_commit_write(struct file *file, struct page *page, |
1146 | unsigned from, unsigned to); | 1147 | unsigned from, unsigned to); |
1147 | struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, | 1148 | struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index a6170ff19e7f..34cf1f1f47be 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -406,7 +406,6 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | |||
406 | memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); | 406 | memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); |
407 | memset(&root->root_kobj, 0, sizeof(root->root_kobj)); | 407 | memset(&root->root_kobj, 0, sizeof(root->root_kobj)); |
408 | init_completion(&root->kobj_unregister); | 408 | init_completion(&root->kobj_unregister); |
409 | init_rwsem(&root->snap_sem); | ||
410 | root->defrag_running = 0; | 409 | root->defrag_running = 0; |
411 | root->defrag_level = 0; | 410 | root->defrag_level = 0; |
412 | root->root_key.objectid = objectid; | 411 | root->root_key.objectid = objectid; |
@@ -498,6 +497,21 @@ insert: | |||
498 | return root; | 497 | return root; |
499 | } | 498 | } |
500 | 499 | ||
500 | struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, | ||
501 | u64 root_objectid) | ||
502 | { | ||
503 | struct btrfs_root *root; | ||
504 | |||
505 | if (root_objectid == BTRFS_ROOT_TREE_OBJECTID) | ||
506 | return fs_info->tree_root; | ||
507 | if (root_objectid == BTRFS_EXTENT_TREE_OBJECTID) | ||
508 | return fs_info->extent_root; | ||
509 | |||
510 | root = radix_tree_lookup(&fs_info->fs_roots_radix, | ||
511 | (unsigned long)root_objectid); | ||
512 | return root; | ||
513 | } | ||
514 | |||
501 | struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, | 515 | struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, |
502 | struct btrfs_key *location) | 516 | struct btrfs_key *location) |
503 | { | 517 | { |
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 8c3cfd02901f..dae9fba8efcd 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h | |||
@@ -34,6 +34,8 @@ int write_ctree_super(struct btrfs_trans_handle *trans, | |||
34 | struct btrfs_root *root); | 34 | struct btrfs_root *root); |
35 | struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, | 35 | struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, |
36 | u64 bytenr, u32 blocksize); | 36 | u64 bytenr, u32 blocksize); |
37 | struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, | ||
38 | u64 root_objectid); | ||
37 | struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, | 39 | struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, |
38 | struct btrfs_key *location, | 40 | struct btrfs_key *location, |
39 | const char *name, int namelen); | 41 | const char *name, int namelen); |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index c906bb19b211..68137cd8506a 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -1195,7 +1195,9 @@ static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes, | |||
1195 | if (btrfs_buffer_uptodate(buf)) { | 1195 | if (btrfs_buffer_uptodate(buf)) { |
1196 | u64 transid = | 1196 | u64 transid = |
1197 | root->fs_info->running_transaction->transid; | 1197 | root->fs_info->running_transaction->transid; |
1198 | if (btrfs_header_generation(buf) == transid) { | 1198 | u64 header_transid = |
1199 | btrfs_header_generation(buf); | ||
1200 | if (header_transid == transid) { | ||
1199 | free_extent_buffer(buf); | 1201 | free_extent_buffer(buf); |
1200 | return 1; | 1202 | return 1; |
1201 | } | 1203 | } |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 94c93373cb7d..0a5f4defe59b 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -34,6 +34,7 @@ | |||
34 | #include "disk-io.h" | 34 | #include "disk-io.h" |
35 | #include "transaction.h" | 35 | #include "transaction.h" |
36 | #include "btrfs_inode.h" | 36 | #include "btrfs_inode.h" |
37 | #include "ordered-data.h" | ||
37 | #include "ioctl.h" | 38 | #include "ioctl.h" |
38 | #include "print-tree.h" | 39 | #include "print-tree.h" |
39 | 40 | ||
@@ -329,6 +330,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, | |||
329 | root->fs_info->delalloc_bytes += (end_of_last_block + 1 - | 330 | root->fs_info->delalloc_bytes += (end_of_last_block + 1 - |
330 | start_pos) - existing_delalloc; | 331 | start_pos) - existing_delalloc; |
331 | spin_unlock(&root->fs_info->delalloc_lock); | 332 | spin_unlock(&root->fs_info->delalloc_lock); |
333 | btrfs_add_ordered_inode(inode); | ||
332 | } else { | 334 | } else { |
333 | u64 aligned_end; | 335 | u64 aligned_end; |
334 | /* step one, delete the existing extents in this range */ | 336 | /* step one, delete the existing extents in this range */ |
@@ -724,8 +726,6 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
724 | 726 | ||
725 | pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); | 727 | pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); |
726 | 728 | ||
727 | down_read(&BTRFS_I(inode)->root->snap_sem); | ||
728 | |||
729 | mutex_lock(&inode->i_mutex); | 729 | mutex_lock(&inode->i_mutex); |
730 | first_index = pos >> PAGE_CACHE_SHIFT; | 730 | first_index = pos >> PAGE_CACHE_SHIFT; |
731 | last_index = (pos + count) >> PAGE_CACHE_SHIFT; | 731 | last_index = (pos + count) >> PAGE_CACHE_SHIFT; |
@@ -804,7 +804,6 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
804 | } | 804 | } |
805 | out: | 805 | out: |
806 | mutex_unlock(&inode->i_mutex); | 806 | mutex_unlock(&inode->i_mutex); |
807 | up_read(&BTRFS_I(inode)->root->snap_sem); | ||
808 | 807 | ||
809 | out_nolock: | 808 | out_nolock: |
810 | kfree(pages); | 809 | kfree(pages); |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 6d0cd9a7a615..6d6e1ac0a9a0 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -135,6 +135,7 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end) | |||
135 | alloc_hint = ins.objectid + ins.offset; | 135 | alloc_hint = ins.objectid + ins.offset; |
136 | start += cur_alloc_size; | 136 | start += cur_alloc_size; |
137 | } | 137 | } |
138 | btrfs_add_ordered_inode(inode); | ||
138 | out: | 139 | out: |
139 | btrfs_end_transaction(trans, root); | 140 | btrfs_end_transaction(trans, root); |
140 | return ret; | 141 | return ret; |
@@ -367,8 +368,8 @@ void btrfs_read_locked_inode(struct inode *inode) | |||
367 | path = btrfs_alloc_path(); | 368 | path = btrfs_alloc_path(); |
368 | BUG_ON(!path); | 369 | BUG_ON(!path); |
369 | mutex_lock(&root->fs_info->fs_mutex); | 370 | mutex_lock(&root->fs_info->fs_mutex); |
370 | |||
371 | memcpy(&location, &BTRFS_I(inode)->location, sizeof(location)); | 371 | memcpy(&location, &BTRFS_I(inode)->location, sizeof(location)); |
372 | |||
372 | ret = btrfs_lookup_inode(NULL, root, path, &location, 0); | 373 | ret = btrfs_lookup_inode(NULL, root, path, &location, 0); |
373 | if (ret) | 374 | if (ret) |
374 | goto make_bad; | 375 | goto make_bad; |
@@ -898,7 +899,6 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) | |||
898 | if ((offset & (blocksize - 1)) == 0) | 899 | if ((offset & (blocksize - 1)) == 0) |
899 | goto out; | 900 | goto out; |
900 | 901 | ||
901 | down_read(&root->snap_sem); | ||
902 | ret = -ENOMEM; | 902 | ret = -ENOMEM; |
903 | page = grab_cache_page(mapping, index); | 903 | page = grab_cache_page(mapping, index); |
904 | if (!page) | 904 | if (!page) |
@@ -917,7 +917,6 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) | |||
917 | 917 | ||
918 | unlock_page(page); | 918 | unlock_page(page); |
919 | page_cache_release(page); | 919 | page_cache_release(page); |
920 | up_read(&BTRFS_I(inode)->root->snap_sem); | ||
921 | out: | 920 | out: |
922 | return ret; | 921 | return ret; |
923 | } | 922 | } |
@@ -1146,6 +1145,19 @@ static int btrfs_find_actor(struct inode *inode, void *opaque) | |||
1146 | args->root == BTRFS_I(inode)->root); | 1145 | args->root == BTRFS_I(inode)->root); |
1147 | } | 1146 | } |
1148 | 1147 | ||
1148 | struct inode *btrfs_ilookup(struct super_block *s, u64 objectid, | ||
1149 | u64 root_objectid) | ||
1150 | { | ||
1151 | struct btrfs_iget_args args; | ||
1152 | args.ino = objectid; | ||
1153 | args.root = btrfs_lookup_fs_root(btrfs_sb(s)->fs_info, root_objectid); | ||
1154 | |||
1155 | if (!args.root) | ||
1156 | return NULL; | ||
1157 | |||
1158 | return ilookup5(s, objectid, btrfs_find_actor, (void *)&args); | ||
1159 | } | ||
1160 | |||
1149 | struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid, | 1161 | struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid, |
1150 | struct btrfs_root *root) | 1162 | struct btrfs_root *root) |
1151 | { | 1163 | { |
@@ -1336,7 +1348,6 @@ read_dir_items: | |||
1336 | 1348 | ||
1337 | d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)]; | 1349 | d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)]; |
1338 | btrfs_dir_item_key_to_cpu(leaf, di, &location); | 1350 | btrfs_dir_item_key_to_cpu(leaf, di, &location); |
1339 | |||
1340 | over = filldir(dirent, name_ptr, name_len, | 1351 | over = filldir(dirent, name_ptr, name_len, |
1341 | found_key.offset, | 1352 | found_key.offset, |
1342 | location.objectid, | 1353 | location.objectid, |
@@ -2054,7 +2065,6 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page) | |||
2054 | 2065 | ||
2055 | ret = -EINVAL; | 2066 | ret = -EINVAL; |
2056 | 2067 | ||
2057 | down_read(&BTRFS_I(inode)->root->snap_sem); | ||
2058 | lock_page(page); | 2068 | lock_page(page); |
2059 | wait_on_page_writeback(page); | 2069 | wait_on_page_writeback(page); |
2060 | size = i_size_read(inode); | 2070 | size = i_size_read(inode); |
@@ -2075,7 +2085,6 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page) | |||
2075 | ret = btrfs_cow_one_page(inode, page, end); | 2085 | ret = btrfs_cow_one_page(inode, page, end); |
2076 | 2086 | ||
2077 | out_unlock: | 2087 | out_unlock: |
2078 | up_read(&BTRFS_I(inode)->root->snap_sem); | ||
2079 | unlock_page(page); | 2088 | unlock_page(page); |
2080 | out: | 2089 | out: |
2081 | return ret; | 2090 | return ret; |
@@ -2118,7 +2127,7 @@ static int noinline create_subvol(struct btrfs_root *root, char *name, | |||
2118 | struct btrfs_root_item root_item; | 2127 | struct btrfs_root_item root_item; |
2119 | struct btrfs_inode_item *inode_item; | 2128 | struct btrfs_inode_item *inode_item; |
2120 | struct extent_buffer *leaf; | 2129 | struct extent_buffer *leaf; |
2121 | struct btrfs_root *new_root; | 2130 | struct btrfs_root *new_root = root; |
2122 | struct inode *inode; | 2131 | struct inode *inode; |
2123 | struct inode *dir; | 2132 | struct inode *dir; |
2124 | int ret; | 2133 | int ret; |
@@ -2230,7 +2239,7 @@ static int noinline create_subvol(struct btrfs_root *root, char *name, | |||
2230 | goto fail; | 2239 | goto fail; |
2231 | fail: | 2240 | fail: |
2232 | nr = trans->blocks_used; | 2241 | nr = trans->blocks_used; |
2233 | err = btrfs_commit_transaction(trans, root); | 2242 | err = btrfs_commit_transaction(trans, new_root); |
2234 | if (err && !ret) | 2243 | if (err && !ret) |
2235 | ret = err; | 2244 | ret = err; |
2236 | fail_commit: | 2245 | fail_commit: |
@@ -2253,10 +2262,6 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) | |||
2253 | if (!root->ref_cows) | 2262 | if (!root->ref_cows) |
2254 | return -EINVAL; | 2263 | return -EINVAL; |
2255 | 2264 | ||
2256 | down_write(&root->snap_sem); | ||
2257 | freeze_bdev(root->fs_info->sb->s_bdev); | ||
2258 | thaw_bdev(root->fs_info->sb->s_bdev, root->fs_info->sb); | ||
2259 | |||
2260 | mutex_lock(&root->fs_info->fs_mutex); | 2265 | mutex_lock(&root->fs_info->fs_mutex); |
2261 | ret = btrfs_check_free_space(root, 1, 0); | 2266 | ret = btrfs_check_free_space(root, 1, 0); |
2262 | if (ret) | 2267 | if (ret) |
@@ -2264,6 +2269,9 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) | |||
2264 | 2269 | ||
2265 | trans = btrfs_start_transaction(root, 1); | 2270 | trans = btrfs_start_transaction(root, 1); |
2266 | BUG_ON(!trans); | 2271 | BUG_ON(!trans); |
2272 | err = btrfs_commit_transaction(trans, root); | ||
2273 | |||
2274 | trans = btrfs_start_transaction(root, 1); | ||
2267 | 2275 | ||
2268 | ret = btrfs_update_inode(trans, root, root->inode); | 2276 | ret = btrfs_update_inode(trans, root, root->inode); |
2269 | if (ret) | 2277 | if (ret) |
@@ -2272,9 +2280,7 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) | |||
2272 | ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root, | 2280 | ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root, |
2273 | 0, &objectid); | 2281 | 0, &objectid); |
2274 | if (ret) | 2282 | if (ret) |
2275 | goto fail; | 2283 | goto fail; memcpy(&new_root_item, &root->root_item, |
2276 | |||
2277 | memcpy(&new_root_item, &root->root_item, | ||
2278 | sizeof(new_root_item)); | 2284 | sizeof(new_root_item)); |
2279 | 2285 | ||
2280 | key.objectid = objectid; | 2286 | key.objectid = objectid; |
@@ -2285,12 +2291,20 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) | |||
2285 | btrfs_cow_block(trans, root, root->node, NULL, 0, &tmp); | 2291 | btrfs_cow_block(trans, root, root->node, NULL, 0, &tmp); |
2286 | free_extent_buffer(tmp); | 2292 | free_extent_buffer(tmp); |
2287 | 2293 | ||
2294 | /* write the ordered inodes to force all delayed allocations to | ||
2295 | * be filled. Once this is done, we can copy the root | ||
2296 | */ | ||
2297 | mutex_lock(&root->fs_info->trans_mutex); | ||
2298 | btrfs_write_ordered_inodes(trans, root); | ||
2299 | mutex_unlock(&root->fs_info->trans_mutex); | ||
2300 | |||
2288 | btrfs_copy_root(trans, root, root->node, &tmp, objectid); | 2301 | btrfs_copy_root(trans, root, root->node, &tmp, objectid); |
2289 | 2302 | ||
2290 | btrfs_set_root_bytenr(&new_root_item, tmp->start); | 2303 | btrfs_set_root_bytenr(&new_root_item, tmp->start); |
2291 | btrfs_set_root_level(&new_root_item, btrfs_header_level(tmp)); | 2304 | btrfs_set_root_level(&new_root_item, btrfs_header_level(tmp)); |
2292 | ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, | 2305 | ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, |
2293 | &new_root_item); | 2306 | &new_root_item); |
2307 | printk("new root %Lu node %Lu\n", objectid, tmp->start); | ||
2294 | free_extent_buffer(tmp); | 2308 | free_extent_buffer(tmp); |
2295 | if (ret) | 2309 | if (ret) |
2296 | goto fail; | 2310 | goto fail; |
@@ -2321,7 +2335,6 @@ fail: | |||
2321 | ret = err; | 2335 | ret = err; |
2322 | fail_unlock: | 2336 | fail_unlock: |
2323 | mutex_unlock(&root->fs_info->fs_mutex); | 2337 | mutex_unlock(&root->fs_info->fs_mutex); |
2324 | up_write(&root->snap_sem); | ||
2325 | btrfs_btree_balance_dirty(root, nr); | 2338 | btrfs_btree_balance_dirty(root, nr); |
2326 | return ret; | 2339 | return ret; |
2327 | } | 2340 | } |
@@ -2608,6 +2621,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) | |||
2608 | if (!ei) | 2621 | if (!ei) |
2609 | return NULL; | 2622 | return NULL; |
2610 | ei->last_trans = 0; | 2623 | ei->last_trans = 0; |
2624 | ei->ordered_trans = 0; | ||
2611 | return &ei->vfs_inode; | 2625 | return &ei->vfs_inode; |
2612 | } | 2626 | } |
2613 | 2627 | ||
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c new file mode 100644 index 000000000000..411aba84d305 --- /dev/null +++ b/fs/btrfs/ordered-data.c | |||
@@ -0,0 +1,221 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2007 Oracle. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public | ||
6 | * License v2 as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
11 | * General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public | ||
14 | * License along with this program; if not, write to the | ||
15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
16 | * Boston, MA 021110-1307, USA. | ||
17 | */ | ||
18 | |||
19 | #include <linux/gfp.h> | ||
20 | #include <linux/slab.h> | ||
21 | #include "ctree.h" | ||
22 | #include "transaction.h" | ||
23 | #include "btrfs_inode.h" | ||
24 | |||
25 | struct tree_entry { | ||
26 | u64 root_objectid; | ||
27 | u64 objectid; | ||
28 | struct rb_node rb_node; | ||
29 | }; | ||
30 | |||
31 | /* | ||
32 | * returns > 0 if entry passed (root, objectid) is > entry, | ||
33 | * < 0 if (root, objectid) < entry and zero if they are equal | ||
34 | */ | ||
35 | static int comp_entry(struct tree_entry *entry, u64 root_objectid, | ||
36 | u64 objectid) | ||
37 | { | ||
38 | if (root_objectid < entry->root_objectid) | ||
39 | return -1; | ||
40 | if (root_objectid > entry->root_objectid) | ||
41 | return 1; | ||
42 | if (objectid < entry->objectid) | ||
43 | return -1; | ||
44 | if (objectid > entry->objectid) | ||
45 | return 1; | ||
46 | return 0; | ||
47 | } | ||
48 | |||
49 | static struct rb_node *tree_insert(struct rb_root *root, u64 root_objectid, | ||
50 | u64 objectid, struct rb_node *node) | ||
51 | { | ||
52 | struct rb_node ** p = &root->rb_node; | ||
53 | struct rb_node * parent = NULL; | ||
54 | struct tree_entry *entry; | ||
55 | int comp; | ||
56 | |||
57 | while(*p) { | ||
58 | parent = *p; | ||
59 | entry = rb_entry(parent, struct tree_entry, rb_node); | ||
60 | |||
61 | comp = comp_entry(entry, root_objectid, objectid); | ||
62 | if (comp < 0) | ||
63 | p = &(*p)->rb_left; | ||
64 | else if (comp > 0) | ||
65 | p = &(*p)->rb_right; | ||
66 | else | ||
67 | return parent; | ||
68 | } | ||
69 | |||
70 | rb_link_node(node, parent, p); | ||
71 | rb_insert_color(node, root); | ||
72 | return NULL; | ||
73 | } | ||
74 | |||
75 | static struct rb_node *__tree_search(struct rb_root *root, u64 root_objectid, | ||
76 | u64 objectid, struct rb_node **prev_ret) | ||
77 | { | ||
78 | struct rb_node * n = root->rb_node; | ||
79 | struct rb_node *prev = NULL; | ||
80 | struct tree_entry *entry; | ||
81 | struct tree_entry *prev_entry = NULL; | ||
82 | int comp; | ||
83 | |||
84 | while(n) { | ||
85 | entry = rb_entry(n, struct tree_entry, rb_node); | ||
86 | prev = n; | ||
87 | prev_entry = entry; | ||
88 | comp = comp_entry(entry, root_objectid, objectid); | ||
89 | |||
90 | if (comp < 0) | ||
91 | n = n->rb_left; | ||
92 | else if (comp > 0) | ||
93 | n = n->rb_right; | ||
94 | else | ||
95 | return n; | ||
96 | } | ||
97 | if (!prev_ret) | ||
98 | return NULL; | ||
99 | |||
100 | while(prev && comp_entry(prev_entry, root_objectid, objectid) >= 0) { | ||
101 | prev = rb_next(prev); | ||
102 | prev_entry = rb_entry(prev, struct tree_entry, rb_node); | ||
103 | } | ||
104 | *prev_ret = prev; | ||
105 | return NULL; | ||
106 | } | ||
107 | |||
108 | static inline struct rb_node *tree_search(struct rb_root *root, | ||
109 | u64 root_objectid, u64 objectid) | ||
110 | { | ||
111 | struct rb_node *prev; | ||
112 | struct rb_node *ret; | ||
113 | ret = __tree_search(root, root_objectid, objectid, &prev); | ||
114 | if (!ret) | ||
115 | return prev; | ||
116 | return ret; | ||
117 | } | ||
118 | |||
119 | int btrfs_add_ordered_inode(struct inode *inode) | ||
120 | { | ||
121 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
122 | u64 root_objectid = root->root_key.objectid; | ||
123 | u64 transid = root->fs_info->running_transaction->transid; | ||
124 | struct tree_entry *entry; | ||
125 | struct rb_node *node; | ||
126 | struct btrfs_ordered_inode_tree *tree; | ||
127 | |||
128 | if (transid <= BTRFS_I(inode)->ordered_trans) | ||
129 | return 0; | ||
130 | |||
131 | tree = &root->fs_info->running_transaction->ordered_inode_tree; | ||
132 | |||
133 | read_lock(&tree->lock); | ||
134 | node = __tree_search(&tree->tree, root_objectid, inode->i_ino, NULL); | ||
135 | read_unlock(&tree->lock); | ||
136 | if (node) { | ||
137 | return 0; | ||
138 | } | ||
139 | |||
140 | entry = kmalloc(sizeof(*entry), GFP_NOFS); | ||
141 | if (!entry) | ||
142 | return -ENOMEM; | ||
143 | |||
144 | write_lock(&tree->lock); | ||
145 | entry->objectid = inode->i_ino; | ||
146 | entry->root_objectid = root_objectid; | ||
147 | |||
148 | node = tree_insert(&tree->tree, root_objectid, | ||
149 | inode->i_ino, &entry->rb_node); | ||
150 | |||
151 | BTRFS_I(inode)->ordered_trans = transid; | ||
152 | |||
153 | write_unlock(&tree->lock); | ||
154 | if (node) | ||
155 | kfree(entry); | ||
156 | return 0; | ||
157 | } | ||
158 | |||
159 | int btrfs_find_first_ordered_inode(struct btrfs_ordered_inode_tree *tree, | ||
160 | u64 *root_objectid, u64 *objectid) | ||
161 | { | ||
162 | struct tree_entry *entry; | ||
163 | struct rb_node *node; | ||
164 | |||
165 | write_lock(&tree->lock); | ||
166 | node = tree_search(&tree->tree, *root_objectid, *objectid); | ||
167 | if (!node) { | ||
168 | write_unlock(&tree->lock); | ||
169 | return 0; | ||
170 | } | ||
171 | entry = rb_entry(node, struct tree_entry, rb_node); | ||
172 | |||
173 | while(comp_entry(entry, *root_objectid, *objectid) >= 0) { | ||
174 | node = rb_next(node); | ||
175 | if (!node) | ||
176 | break; | ||
177 | entry = rb_entry(node, struct tree_entry, rb_node); | ||
178 | } | ||
179 | if (!node) { | ||
180 | write_unlock(&tree->lock); | ||
181 | return 0; | ||
182 | } | ||
183 | |||
184 | *root_objectid = entry->root_objectid; | ||
185 | *objectid = entry->objectid; | ||
186 | write_unlock(&tree->lock); | ||
187 | return 1; | ||
188 | } | ||
189 | |||
190 | int btrfs_find_del_first_ordered_inode(struct btrfs_ordered_inode_tree *tree, | ||
191 | u64 *root_objectid, u64 *objectid) | ||
192 | { | ||
193 | struct tree_entry *entry; | ||
194 | struct rb_node *node; | ||
195 | |||
196 | write_lock(&tree->lock); | ||
197 | node = tree_search(&tree->tree, *root_objectid, *objectid); | ||
198 | if (!node) { | ||
199 | write_unlock(&tree->lock); | ||
200 | return 0; | ||
201 | } | ||
202 | |||
203 | entry = rb_entry(node, struct tree_entry, rb_node); | ||
204 | while(comp_entry(entry, *root_objectid, *objectid) >= 0) { | ||
205 | node = rb_next(node); | ||
206 | if (!node) | ||
207 | break; | ||
208 | entry = rb_entry(node, struct tree_entry, rb_node); | ||
209 | } | ||
210 | if (!node) { | ||
211 | write_unlock(&tree->lock); | ||
212 | return 0; | ||
213 | } | ||
214 | |||
215 | *root_objectid = entry->root_objectid; | ||
216 | *objectid = entry->objectid; | ||
217 | rb_erase(node, &tree->tree); | ||
218 | write_unlock(&tree->lock); | ||
219 | kfree(entry); | ||
220 | return 1; | ||
221 | } | ||
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h new file mode 100644 index 000000000000..aaf9eb142719 --- /dev/null +++ b/fs/btrfs/ordered-data.h | |||
@@ -0,0 +1,39 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2007 Oracle. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public | ||
6 | * License v2 as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
11 | * General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public | ||
14 | * License along with this program; if not, write to the | ||
15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
16 | * Boston, MA 021110-1307, USA. | ||
17 | */ | ||
18 | |||
19 | #ifndef __BTRFS_ORDERED_DATA__ | ||
20 | #define __BTRFS_ORDERED_DATA__ | ||
21 | |||
22 | struct btrfs_ordered_inode_tree { | ||
23 | rwlock_t lock; | ||
24 | struct rb_root tree; | ||
25 | }; | ||
26 | |||
27 | static inline void | ||
28 | btrfs_ordered_inode_tree_init(struct btrfs_ordered_inode_tree *t) | ||
29 | { | ||
30 | rwlock_init(&t->lock); | ||
31 | t->tree.rb_node = NULL; | ||
32 | } | ||
33 | |||
34 | int btrfs_add_ordered_inode(struct inode *inode); | ||
35 | int btrfs_find_del_first_ordered_inode(struct btrfs_ordered_inode_tree *tree, | ||
36 | u64 *root_objectid, u64 *objectid); | ||
37 | int btrfs_find_first_ordered_inode(struct btrfs_ordered_inode_tree *tree, | ||
38 | u64 *root_objectid, u64 *objectid); | ||
39 | #endif | ||
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 02721eea9a7a..3ed5868e7c0f 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -67,6 +67,7 @@ static int join_transaction(struct btrfs_root *root) | |||
67 | cur_trans->commit_done = 0; | 67 | cur_trans->commit_done = 0; |
68 | cur_trans->start_time = get_seconds(); | 68 | cur_trans->start_time = get_seconds(); |
69 | list_add_tail(&cur_trans->list, &root->fs_info->trans_list); | 69 | list_add_tail(&cur_trans->list, &root->fs_info->trans_list); |
70 | btrfs_ordered_inode_tree_init(&cur_trans->ordered_inode_tree); | ||
70 | extent_map_tree_init(&cur_trans->dirty_pages, | 71 | extent_map_tree_init(&cur_trans->dirty_pages, |
71 | root->fs_info->btree_inode->i_mapping, | 72 | root->fs_info->btree_inode->i_mapping, |
72 | GFP_NOFS); | 73 | GFP_NOFS); |
@@ -473,6 +474,60 @@ static int drop_dirty_roots(struct btrfs_root *tree_root, | |||
473 | return ret; | 474 | return ret; |
474 | } | 475 | } |
475 | 476 | ||
477 | int btrfs_write_ordered_inodes(struct btrfs_trans_handle *trans, | ||
478 | struct btrfs_root *root) | ||
479 | { | ||
480 | struct btrfs_transaction *cur_trans = trans->transaction; | ||
481 | struct inode *inode; | ||
482 | u64 root_objectid = 0; | ||
483 | u64 objectid = 0; | ||
484 | u64 transid = trans->transid; | ||
485 | int ret; | ||
486 | |||
487 | printk("write ordered trans %Lu\n", transid); | ||
488 | while(1) { | ||
489 | ret = btrfs_find_first_ordered_inode( | ||
490 | &cur_trans->ordered_inode_tree, | ||
491 | &root_objectid, &objectid); | ||
492 | if (!ret) | ||
493 | break; | ||
494 | |||
495 | mutex_unlock(&root->fs_info->trans_mutex); | ||
496 | mutex_unlock(&root->fs_info->fs_mutex); | ||
497 | inode = btrfs_ilookup(root->fs_info->sb, objectid, | ||
498 | root_objectid); | ||
499 | if (inode) { | ||
500 | if (S_ISREG(inode->i_mode)) | ||
501 | filemap_fdatawrite(inode->i_mapping); | ||
502 | iput(inode); | ||
503 | } | ||
504 | mutex_lock(&root->fs_info->fs_mutex); | ||
505 | mutex_lock(&root->fs_info->trans_mutex); | ||
506 | } | ||
507 | while(1) { | ||
508 | root_objectid = 0; | ||
509 | objectid = 0; | ||
510 | ret = btrfs_find_del_first_ordered_inode( | ||
511 | &cur_trans->ordered_inode_tree, | ||
512 | &root_objectid, &objectid); | ||
513 | if (!ret) | ||
514 | break; | ||
515 | mutex_unlock(&root->fs_info->trans_mutex); | ||
516 | mutex_unlock(&root->fs_info->fs_mutex); | ||
517 | inode = btrfs_ilookup(root->fs_info->sb, objectid, | ||
518 | root_objectid); | ||
519 | if (inode) { | ||
520 | if (S_ISREG(inode->i_mode)) | ||
521 | filemap_write_and_wait(inode->i_mapping); | ||
522 | iput(inode); | ||
523 | } | ||
524 | mutex_lock(&root->fs_info->fs_mutex); | ||
525 | mutex_lock(&root->fs_info->trans_mutex); | ||
526 | } | ||
527 | printk("done write ordered trans %Lu\n", transid); | ||
528 | return 0; | ||
529 | } | ||
530 | |||
476 | int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | 531 | int btrfs_commit_transaction(struct btrfs_trans_handle *trans, |
477 | struct btrfs_root *root) | 532 | struct btrfs_root *root) |
478 | { | 533 | { |
@@ -550,10 +605,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
550 | mutex_lock(&root->fs_info->fs_mutex); | 605 | mutex_lock(&root->fs_info->fs_mutex); |
551 | mutex_lock(&root->fs_info->trans_mutex); | 606 | mutex_lock(&root->fs_info->trans_mutex); |
552 | finish_wait(&cur_trans->writer_wait, &wait); | 607 | finish_wait(&cur_trans->writer_wait, &wait); |
608 | ret = btrfs_write_ordered_inodes(trans, root); | ||
609 | |||
553 | } while (cur_trans->num_writers > 1 || | 610 | } while (cur_trans->num_writers > 1 || |
554 | (cur_trans->num_joined != joined)); | 611 | (cur_trans->num_joined != joined)); |
555 | 612 | ||
556 | WARN_ON(cur_trans != trans->transaction); | 613 | WARN_ON(cur_trans != trans->transaction); |
614 | |||
557 | ret = add_dirty_roots(trans, &root->fs_info->fs_roots_radix, | 615 | ret = add_dirty_roots(trans, &root->fs_info->fs_roots_radix, |
558 | &dirty_fs_roots); | 616 | &dirty_fs_roots); |
559 | BUG_ON(ret); | 617 | BUG_ON(ret); |
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index eef840bca91e..c157ddbe9d1e 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h | |||
@@ -16,9 +16,10 @@ | |||
16 | * Boston, MA 021110-1307, USA. | 16 | * Boston, MA 021110-1307, USA. |
17 | */ | 17 | */ |
18 | 18 | ||
19 | #ifndef __TRANSACTION__ | 19 | #ifndef __BTRFS_TRANSACTION__ |
20 | #define __TRANSACTION__ | 20 | #define __BTRFS_TRANSACTION__ |
21 | #include "btrfs_inode.h" | 21 | #include "btrfs_inode.h" |
22 | #include "ordered-data.h" | ||
22 | 23 | ||
23 | struct btrfs_transaction { | 24 | struct btrfs_transaction { |
24 | u64 transid; | 25 | u64 transid; |
@@ -30,6 +31,7 @@ struct btrfs_transaction { | |||
30 | struct list_head list; | 31 | struct list_head list; |
31 | struct extent_map_tree dirty_pages; | 32 | struct extent_map_tree dirty_pages; |
32 | unsigned long start_time; | 33 | unsigned long start_time; |
34 | struct btrfs_ordered_inode_tree ordered_inode_tree; | ||
33 | wait_queue_head_t writer_wait; | 35 | wait_queue_head_t writer_wait; |
34 | wait_queue_head_t commit_wait; | 36 | wait_queue_head_t commit_wait; |
35 | }; | 37 | }; |
@@ -90,4 +92,6 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly); | |||
90 | int btrfs_clean_old_snapshots(struct btrfs_root *root); | 92 | int btrfs_clean_old_snapshots(struct btrfs_root *root); |
91 | int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | 93 | int btrfs_commit_transaction(struct btrfs_trans_handle *trans, |
92 | struct btrfs_root *root); | 94 | struct btrfs_root *root); |
95 | int btrfs_write_ordered_inodes(struct btrfs_trans_handle *trans, | ||
96 | struct btrfs_root *root); | ||
93 | #endif | 97 | #endif |