aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2008-01-08 15:46:30 -0500
committerChris Mason <chris.mason@oracle.com>2008-09-25 11:03:59 -0400
commitdc17ff8f11d129db9e83ab7244769e4eae05e14d (patch)
tree622e70100d6082e371a6ca62b02fd57e0c37f8dc /fs/btrfs
parente4204dedbbaa3a614605cb83cc0ac5161af6b4e6 (diff)
Btrfs: Add data=ordered support
This forces file data extents down the disk along with the metadata that references them. The current implementation is fairly simple, and just writes out all of the dirty pages in an inode before the commit. Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/Makefile2
-rw-r--r--fs/btrfs/btrfs_inode.h1
-rw-r--r--fs/btrfs/ctree.c6
-rw-r--r--fs/btrfs/ctree.h7
-rw-r--r--fs/btrfs/disk-io.c16
-rw-r--r--fs/btrfs/disk-io.h2
-rw-r--r--fs/btrfs/extent-tree.c4
-rw-r--r--fs/btrfs/file.c5
-rw-r--r--fs/btrfs/inode.c46
-rw-r--r--fs/btrfs/ordered-data.c221
-rw-r--r--fs/btrfs/ordered-data.h39
-rw-r--r--fs/btrfs/transaction.c58
-rw-r--r--fs/btrfs/transaction.h8
13 files changed, 387 insertions, 28 deletions
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index d5804c5ca075..ab9a9f8edbf8 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -5,7 +5,7 @@ obj-m := btrfs.o
5btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ 5btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
6 hash.o file-item.o inode-item.o inode-map.o disk-io.o \ 6 hash.o file-item.o inode-item.o inode-map.o disk-io.o \
7 transaction.o bit-radix.o inode.o file.o tree-defrag.o \ 7 transaction.o bit-radix.o inode.o file.o tree-defrag.o \
8 extent_map.o sysfs.o struct-funcs.o xattr.o acl.o 8 extent_map.o sysfs.o struct-funcs.o xattr.o acl.o ordered-data.o
9 9
10#btrfs-y := ctree.o disk-io.o radix-tree.o extent-tree.o print-tree.o \ 10#btrfs-y := ctree.o disk-io.o radix-tree.o extent-tree.o print-tree.o \
11# root-tree.o dir-item.o hash.o file-item.o inode-item.o \ 11# root-tree.o dir-item.o hash.o file-item.o inode-item.o \
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index d1d5af471c3c..f27e633f1742 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -29,6 +29,7 @@ struct btrfs_inode {
29 struct extent_map_tree extent_tree; 29 struct extent_map_tree extent_tree;
30 struct inode vfs_inode; 30 struct inode vfs_inode;
31 31
32 u64 ordered_trans;
32 /* 33 /*
33 * transid of the trans_handle that last modified this inode 34 * transid of the trans_handle that last modified this inode
34 */ 35 */
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 35c57074a376..43d23148a4fe 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -221,7 +221,9 @@ int btrfs_cow_block(struct btrfs_trans_handle *trans,
221 struct extent_buffer **cow_ret) 221 struct extent_buffer **cow_ret)
222{ 222{
223 u64 search_start; 223 u64 search_start;
224 u64 header_trans;
224 int ret; 225 int ret;
226
225 if (trans->transaction != root->fs_info->running_transaction) { 227 if (trans->transaction != root->fs_info->running_transaction) {
226 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid, 228 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
227 root->fs_info->running_transaction->transid); 229 root->fs_info->running_transaction->transid);
@@ -232,7 +234,9 @@ int btrfs_cow_block(struct btrfs_trans_handle *trans,
232 root->fs_info->generation); 234 root->fs_info->generation);
233 WARN_ON(1); 235 WARN_ON(1);
234 } 236 }
235 if (btrfs_header_generation(buf) == trans->transid) { 237
238 header_trans = btrfs_header_generation(buf);
239 if (header_trans == trans->transid) {
236 *cow_ret = buf; 240 *cow_ret = buf;
237 return 0; 241 return 0;
238 } 242 }
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 9873975ce0ee..b55dba58dfaa 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -16,8 +16,8 @@
16 * Boston, MA 021110-1307, USA. 16 * Boston, MA 021110-1307, USA.
17 */ 17 */
18 18
19#ifndef __BTRFS__ 19#ifndef __BTRFS_CTREE__
20#define __BTRFS__ 20#define __BTRFS_CTREE__
21 21
22#include <linux/version.h> 22#include <linux/version.h>
23#include <linux/mm.h> 23#include <linux/mm.h>
@@ -363,7 +363,6 @@ struct btrfs_root {
363 struct inode *inode; 363 struct inode *inode;
364 struct kobject root_kobj; 364 struct kobject root_kobj;
365 struct completion kobj_unregister; 365 struct completion kobj_unregister;
366 struct rw_semaphore snap_sem;
367 u64 objectid; 366 u64 objectid;
368 u64 last_trans; 367 u64 last_trans;
369 368
@@ -1142,6 +1141,8 @@ void btrfs_destroy_cachep(void);
1142long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); 1141long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
1143struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid, 1142struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid,
1144 struct btrfs_root *root); 1143 struct btrfs_root *root);
1144struct inode *btrfs_ilookup(struct super_block *s, u64 objectid,
1145 u64 root_objectid);
1145int btrfs_commit_write(struct file *file, struct page *page, 1146int btrfs_commit_write(struct file *file, struct page *page,
1146 unsigned from, unsigned to); 1147 unsigned from, unsigned to);
1147struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, 1148struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index a6170ff19e7f..34cf1f1f47be 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -406,7 +406,6 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
406 memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); 406 memset(&root->defrag_progress, 0, sizeof(root->defrag_progress));
407 memset(&root->root_kobj, 0, sizeof(root->root_kobj)); 407 memset(&root->root_kobj, 0, sizeof(root->root_kobj));
408 init_completion(&root->kobj_unregister); 408 init_completion(&root->kobj_unregister);
409 init_rwsem(&root->snap_sem);
410 root->defrag_running = 0; 409 root->defrag_running = 0;
411 root->defrag_level = 0; 410 root->defrag_level = 0;
412 root->root_key.objectid = objectid; 411 root->root_key.objectid = objectid;
@@ -498,6 +497,21 @@ insert:
498 return root; 497 return root;
499} 498}
500 499
500struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
501 u64 root_objectid)
502{
503 struct btrfs_root *root;
504
505 if (root_objectid == BTRFS_ROOT_TREE_OBJECTID)
506 return fs_info->tree_root;
507 if (root_objectid == BTRFS_EXTENT_TREE_OBJECTID)
508 return fs_info->extent_root;
509
510 root = radix_tree_lookup(&fs_info->fs_roots_radix,
511 (unsigned long)root_objectid);
512 return root;
513}
514
501struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, 515struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
502 struct btrfs_key *location) 516 struct btrfs_key *location)
503{ 517{
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 8c3cfd02901f..dae9fba8efcd 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -34,6 +34,8 @@ int write_ctree_super(struct btrfs_trans_handle *trans,
34 struct btrfs_root *root); 34 struct btrfs_root *root);
35struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, 35struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
36 u64 bytenr, u32 blocksize); 36 u64 bytenr, u32 blocksize);
37struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
38 u64 root_objectid);
37struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, 39struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info,
38 struct btrfs_key *location, 40 struct btrfs_key *location,
39 const char *name, int namelen); 41 const char *name, int namelen);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index c906bb19b211..68137cd8506a 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1195,7 +1195,9 @@ static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes,
1195 if (btrfs_buffer_uptodate(buf)) { 1195 if (btrfs_buffer_uptodate(buf)) {
1196 u64 transid = 1196 u64 transid =
1197 root->fs_info->running_transaction->transid; 1197 root->fs_info->running_transaction->transid;
1198 if (btrfs_header_generation(buf) == transid) { 1198 u64 header_transid =
1199 btrfs_header_generation(buf);
1200 if (header_transid == transid) {
1199 free_extent_buffer(buf); 1201 free_extent_buffer(buf);
1200 return 1; 1202 return 1;
1201 } 1203 }
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 94c93373cb7d..0a5f4defe59b 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -34,6 +34,7 @@
34#include "disk-io.h" 34#include "disk-io.h"
35#include "transaction.h" 35#include "transaction.h"
36#include "btrfs_inode.h" 36#include "btrfs_inode.h"
37#include "ordered-data.h"
37#include "ioctl.h" 38#include "ioctl.h"
38#include "print-tree.h" 39#include "print-tree.h"
39 40
@@ -329,6 +330,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
329 root->fs_info->delalloc_bytes += (end_of_last_block + 1 - 330 root->fs_info->delalloc_bytes += (end_of_last_block + 1 -
330 start_pos) - existing_delalloc; 331 start_pos) - existing_delalloc;
331 spin_unlock(&root->fs_info->delalloc_lock); 332 spin_unlock(&root->fs_info->delalloc_lock);
333 btrfs_add_ordered_inode(inode);
332 } else { 334 } else {
333 u64 aligned_end; 335 u64 aligned_end;
334 /* step one, delete the existing extents in this range */ 336 /* step one, delete the existing extents in this range */
@@ -724,8 +726,6 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
724 726
725 pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); 727 pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL);
726 728
727 down_read(&BTRFS_I(inode)->root->snap_sem);
728
729 mutex_lock(&inode->i_mutex); 729 mutex_lock(&inode->i_mutex);
730 first_index = pos >> PAGE_CACHE_SHIFT; 730 first_index = pos >> PAGE_CACHE_SHIFT;
731 last_index = (pos + count) >> PAGE_CACHE_SHIFT; 731 last_index = (pos + count) >> PAGE_CACHE_SHIFT;
@@ -804,7 +804,6 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
804 } 804 }
805out: 805out:
806 mutex_unlock(&inode->i_mutex); 806 mutex_unlock(&inode->i_mutex);
807 up_read(&BTRFS_I(inode)->root->snap_sem);
808 807
809out_nolock: 808out_nolock:
810 kfree(pages); 809 kfree(pages);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 6d0cd9a7a615..6d6e1ac0a9a0 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -135,6 +135,7 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end)
135 alloc_hint = ins.objectid + ins.offset; 135 alloc_hint = ins.objectid + ins.offset;
136 start += cur_alloc_size; 136 start += cur_alloc_size;
137 } 137 }
138 btrfs_add_ordered_inode(inode);
138out: 139out:
139 btrfs_end_transaction(trans, root); 140 btrfs_end_transaction(trans, root);
140 return ret; 141 return ret;
@@ -367,8 +368,8 @@ void btrfs_read_locked_inode(struct inode *inode)
367 path = btrfs_alloc_path(); 368 path = btrfs_alloc_path();
368 BUG_ON(!path); 369 BUG_ON(!path);
369 mutex_lock(&root->fs_info->fs_mutex); 370 mutex_lock(&root->fs_info->fs_mutex);
370
371 memcpy(&location, &BTRFS_I(inode)->location, sizeof(location)); 371 memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
372
372 ret = btrfs_lookup_inode(NULL, root, path, &location, 0); 373 ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
373 if (ret) 374 if (ret)
374 goto make_bad; 375 goto make_bad;
@@ -898,7 +899,6 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
898 if ((offset & (blocksize - 1)) == 0) 899 if ((offset & (blocksize - 1)) == 0)
899 goto out; 900 goto out;
900 901
901 down_read(&root->snap_sem);
902 ret = -ENOMEM; 902 ret = -ENOMEM;
903 page = grab_cache_page(mapping, index); 903 page = grab_cache_page(mapping, index);
904 if (!page) 904 if (!page)
@@ -917,7 +917,6 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
917 917
918 unlock_page(page); 918 unlock_page(page);
919 page_cache_release(page); 919 page_cache_release(page);
920 up_read(&BTRFS_I(inode)->root->snap_sem);
921out: 920out:
922 return ret; 921 return ret;
923} 922}
@@ -1146,6 +1145,19 @@ static int btrfs_find_actor(struct inode *inode, void *opaque)
1146 args->root == BTRFS_I(inode)->root); 1145 args->root == BTRFS_I(inode)->root);
1147} 1146}
1148 1147
1148struct inode *btrfs_ilookup(struct super_block *s, u64 objectid,
1149 u64 root_objectid)
1150{
1151 struct btrfs_iget_args args;
1152 args.ino = objectid;
1153 args.root = btrfs_lookup_fs_root(btrfs_sb(s)->fs_info, root_objectid);
1154
1155 if (!args.root)
1156 return NULL;
1157
1158 return ilookup5(s, objectid, btrfs_find_actor, (void *)&args);
1159}
1160
1149struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid, 1161struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid,
1150 struct btrfs_root *root) 1162 struct btrfs_root *root)
1151{ 1163{
@@ -1336,7 +1348,6 @@ read_dir_items:
1336 1348
1337 d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)]; 1349 d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)];
1338 btrfs_dir_item_key_to_cpu(leaf, di, &location); 1350 btrfs_dir_item_key_to_cpu(leaf, di, &location);
1339
1340 over = filldir(dirent, name_ptr, name_len, 1351 over = filldir(dirent, name_ptr, name_len,
1341 found_key.offset, 1352 found_key.offset,
1342 location.objectid, 1353 location.objectid,
@@ -2054,7 +2065,6 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page)
2054 2065
2055 ret = -EINVAL; 2066 ret = -EINVAL;
2056 2067
2057 down_read(&BTRFS_I(inode)->root->snap_sem);
2058 lock_page(page); 2068 lock_page(page);
2059 wait_on_page_writeback(page); 2069 wait_on_page_writeback(page);
2060 size = i_size_read(inode); 2070 size = i_size_read(inode);
@@ -2075,7 +2085,6 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page)
2075 ret = btrfs_cow_one_page(inode, page, end); 2085 ret = btrfs_cow_one_page(inode, page, end);
2076 2086
2077out_unlock: 2087out_unlock:
2078 up_read(&BTRFS_I(inode)->root->snap_sem);
2079 unlock_page(page); 2088 unlock_page(page);
2080out: 2089out:
2081 return ret; 2090 return ret;
@@ -2118,7 +2127,7 @@ static int noinline create_subvol(struct btrfs_root *root, char *name,
2118 struct btrfs_root_item root_item; 2127 struct btrfs_root_item root_item;
2119 struct btrfs_inode_item *inode_item; 2128 struct btrfs_inode_item *inode_item;
2120 struct extent_buffer *leaf; 2129 struct extent_buffer *leaf;
2121 struct btrfs_root *new_root; 2130 struct btrfs_root *new_root = root;
2122 struct inode *inode; 2131 struct inode *inode;
2123 struct inode *dir; 2132 struct inode *dir;
2124 int ret; 2133 int ret;
@@ -2230,7 +2239,7 @@ static int noinline create_subvol(struct btrfs_root *root, char *name,
2230 goto fail; 2239 goto fail;
2231fail: 2240fail:
2232 nr = trans->blocks_used; 2241 nr = trans->blocks_used;
2233 err = btrfs_commit_transaction(trans, root); 2242 err = btrfs_commit_transaction(trans, new_root);
2234 if (err && !ret) 2243 if (err && !ret)
2235 ret = err; 2244 ret = err;
2236fail_commit: 2245fail_commit:
@@ -2253,10 +2262,6 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen)
2253 if (!root->ref_cows) 2262 if (!root->ref_cows)
2254 return -EINVAL; 2263 return -EINVAL;
2255 2264
2256 down_write(&root->snap_sem);
2257 freeze_bdev(root->fs_info->sb->s_bdev);
2258 thaw_bdev(root->fs_info->sb->s_bdev, root->fs_info->sb);
2259
2260 mutex_lock(&root->fs_info->fs_mutex); 2265 mutex_lock(&root->fs_info->fs_mutex);
2261 ret = btrfs_check_free_space(root, 1, 0); 2266 ret = btrfs_check_free_space(root, 1, 0);
2262 if (ret) 2267 if (ret)
@@ -2264,6 +2269,9 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen)
2264 2269
2265 trans = btrfs_start_transaction(root, 1); 2270 trans = btrfs_start_transaction(root, 1);
2266 BUG_ON(!trans); 2271 BUG_ON(!trans);
2272 err = btrfs_commit_transaction(trans, root);
2273
2274 trans = btrfs_start_transaction(root, 1);
2267 2275
2268 ret = btrfs_update_inode(trans, root, root->inode); 2276 ret = btrfs_update_inode(trans, root, root->inode);
2269 if (ret) 2277 if (ret)
@@ -2272,9 +2280,7 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen)
2272 ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root, 2280 ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root,
2273 0, &objectid); 2281 0, &objectid);
2274 if (ret) 2282 if (ret)
2275 goto fail; 2283 goto fail; memcpy(&new_root_item, &root->root_item,
2276
2277 memcpy(&new_root_item, &root->root_item,
2278 sizeof(new_root_item)); 2284 sizeof(new_root_item));
2279 2285
2280 key.objectid = objectid; 2286 key.objectid = objectid;
@@ -2285,12 +2291,20 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen)
2285 btrfs_cow_block(trans, root, root->node, NULL, 0, &tmp); 2291 btrfs_cow_block(trans, root, root->node, NULL, 0, &tmp);
2286 free_extent_buffer(tmp); 2292 free_extent_buffer(tmp);
2287 2293
2294 /* write the ordered inodes to force all delayed allocations to
2295 * be filled. Once this is done, we can copy the root
2296 */
2297 mutex_lock(&root->fs_info->trans_mutex);
2298 btrfs_write_ordered_inodes(trans, root);
2299 mutex_unlock(&root->fs_info->trans_mutex);
2300
2288 btrfs_copy_root(trans, root, root->node, &tmp, objectid); 2301 btrfs_copy_root(trans, root, root->node, &tmp, objectid);
2289 2302
2290 btrfs_set_root_bytenr(&new_root_item, tmp->start); 2303 btrfs_set_root_bytenr(&new_root_item, tmp->start);
2291 btrfs_set_root_level(&new_root_item, btrfs_header_level(tmp)); 2304 btrfs_set_root_level(&new_root_item, btrfs_header_level(tmp));
2292 ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, 2305 ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
2293 &new_root_item); 2306 &new_root_item);
2307printk("new root %Lu node %Lu\n", objectid, tmp->start);
2294 free_extent_buffer(tmp); 2308 free_extent_buffer(tmp);
2295 if (ret) 2309 if (ret)
2296 goto fail; 2310 goto fail;
@@ -2321,7 +2335,6 @@ fail:
2321 ret = err; 2335 ret = err;
2322fail_unlock: 2336fail_unlock:
2323 mutex_unlock(&root->fs_info->fs_mutex); 2337 mutex_unlock(&root->fs_info->fs_mutex);
2324 up_write(&root->snap_sem);
2325 btrfs_btree_balance_dirty(root, nr); 2338 btrfs_btree_balance_dirty(root, nr);
2326 return ret; 2339 return ret;
2327} 2340}
@@ -2608,6 +2621,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
2608 if (!ei) 2621 if (!ei)
2609 return NULL; 2622 return NULL;
2610 ei->last_trans = 0; 2623 ei->last_trans = 0;
2624 ei->ordered_trans = 0;
2611 return &ei->vfs_inode; 2625 return &ei->vfs_inode;
2612} 2626}
2613 2627
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
new file mode 100644
index 000000000000..411aba84d305
--- /dev/null
+++ b/fs/btrfs/ordered-data.c
@@ -0,0 +1,221 @@
1/*
2 * Copyright (C) 2007 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#include <linux/gfp.h>
20#include <linux/slab.h>
21#include "ctree.h"
22#include "transaction.h"
23#include "btrfs_inode.h"
24
25struct tree_entry {
26 u64 root_objectid;
27 u64 objectid;
28 struct rb_node rb_node;
29};
30
31/*
32 * returns > 0 if entry passed (root, objectid) is > entry,
33 * < 0 if (root, objectid) < entry and zero if they are equal
34 */
35static int comp_entry(struct tree_entry *entry, u64 root_objectid,
36 u64 objectid)
37{
38 if (root_objectid < entry->root_objectid)
39 return -1;
40 if (root_objectid > entry->root_objectid)
41 return 1;
42 if (objectid < entry->objectid)
43 return -1;
44 if (objectid > entry->objectid)
45 return 1;
46 return 0;
47}
48
49static struct rb_node *tree_insert(struct rb_root *root, u64 root_objectid,
50 u64 objectid, struct rb_node *node)
51{
52 struct rb_node ** p = &root->rb_node;
53 struct rb_node * parent = NULL;
54 struct tree_entry *entry;
55 int comp;
56
57 while(*p) {
58 parent = *p;
59 entry = rb_entry(parent, struct tree_entry, rb_node);
60
61 comp = comp_entry(entry, root_objectid, objectid);
62 if (comp < 0)
63 p = &(*p)->rb_left;
64 else if (comp > 0)
65 p = &(*p)->rb_right;
66 else
67 return parent;
68 }
69
70 rb_link_node(node, parent, p);
71 rb_insert_color(node, root);
72 return NULL;
73}
74
75static struct rb_node *__tree_search(struct rb_root *root, u64 root_objectid,
76 u64 objectid, struct rb_node **prev_ret)
77{
78 struct rb_node * n = root->rb_node;
79 struct rb_node *prev = NULL;
80 struct tree_entry *entry;
81 struct tree_entry *prev_entry = NULL;
82 int comp;
83
84 while(n) {
85 entry = rb_entry(n, struct tree_entry, rb_node);
86 prev = n;
87 prev_entry = entry;
88 comp = comp_entry(entry, root_objectid, objectid);
89
90 if (comp < 0)
91 n = n->rb_left;
92 else if (comp > 0)
93 n = n->rb_right;
94 else
95 return n;
96 }
97 if (!prev_ret)
98 return NULL;
99
100 while(prev && comp_entry(prev_entry, root_objectid, objectid) >= 0) {
101 prev = rb_next(prev);
102 prev_entry = rb_entry(prev, struct tree_entry, rb_node);
103 }
104 *prev_ret = prev;
105 return NULL;
106}
107
108static inline struct rb_node *tree_search(struct rb_root *root,
109 u64 root_objectid, u64 objectid)
110{
111 struct rb_node *prev;
112 struct rb_node *ret;
113 ret = __tree_search(root, root_objectid, objectid, &prev);
114 if (!ret)
115 return prev;
116 return ret;
117}
118
119int btrfs_add_ordered_inode(struct inode *inode)
120{
121 struct btrfs_root *root = BTRFS_I(inode)->root;
122 u64 root_objectid = root->root_key.objectid;
123 u64 transid = root->fs_info->running_transaction->transid;
124 struct tree_entry *entry;
125 struct rb_node *node;
126 struct btrfs_ordered_inode_tree *tree;
127
128 if (transid <= BTRFS_I(inode)->ordered_trans)
129 return 0;
130
131 tree = &root->fs_info->running_transaction->ordered_inode_tree;
132
133 read_lock(&tree->lock);
134 node = __tree_search(&tree->tree, root_objectid, inode->i_ino, NULL);
135 read_unlock(&tree->lock);
136 if (node) {
137 return 0;
138 }
139
140 entry = kmalloc(sizeof(*entry), GFP_NOFS);
141 if (!entry)
142 return -ENOMEM;
143
144 write_lock(&tree->lock);
145 entry->objectid = inode->i_ino;
146 entry->root_objectid = root_objectid;
147
148 node = tree_insert(&tree->tree, root_objectid,
149 inode->i_ino, &entry->rb_node);
150
151 BTRFS_I(inode)->ordered_trans = transid;
152
153 write_unlock(&tree->lock);
154 if (node)
155 kfree(entry);
156 return 0;
157}
158
159int btrfs_find_first_ordered_inode(struct btrfs_ordered_inode_tree *tree,
160 u64 *root_objectid, u64 *objectid)
161{
162 struct tree_entry *entry;
163 struct rb_node *node;
164
165 write_lock(&tree->lock);
166 node = tree_search(&tree->tree, *root_objectid, *objectid);
167 if (!node) {
168 write_unlock(&tree->lock);
169 return 0;
170 }
171 entry = rb_entry(node, struct tree_entry, rb_node);
172
173 while(comp_entry(entry, *root_objectid, *objectid) >= 0) {
174 node = rb_next(node);
175 if (!node)
176 break;
177 entry = rb_entry(node, struct tree_entry, rb_node);
178 }
179 if (!node) {
180 write_unlock(&tree->lock);
181 return 0;
182 }
183
184 *root_objectid = entry->root_objectid;
185 *objectid = entry->objectid;
186 write_unlock(&tree->lock);
187 return 1;
188}
189
190int btrfs_find_del_first_ordered_inode(struct btrfs_ordered_inode_tree *tree,
191 u64 *root_objectid, u64 *objectid)
192{
193 struct tree_entry *entry;
194 struct rb_node *node;
195
196 write_lock(&tree->lock);
197 node = tree_search(&tree->tree, *root_objectid, *objectid);
198 if (!node) {
199 write_unlock(&tree->lock);
200 return 0;
201 }
202
203 entry = rb_entry(node, struct tree_entry, rb_node);
204 while(comp_entry(entry, *root_objectid, *objectid) >= 0) {
205 node = rb_next(node);
206 if (!node)
207 break;
208 entry = rb_entry(node, struct tree_entry, rb_node);
209 }
210 if (!node) {
211 write_unlock(&tree->lock);
212 return 0;
213 }
214
215 *root_objectid = entry->root_objectid;
216 *objectid = entry->objectid;
217 rb_erase(node, &tree->tree);
218 write_unlock(&tree->lock);
219 kfree(entry);
220 return 1;
221}
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
new file mode 100644
index 000000000000..aaf9eb142719
--- /dev/null
+++ b/fs/btrfs/ordered-data.h
@@ -0,0 +1,39 @@
1/*
2 * Copyright (C) 2007 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#ifndef __BTRFS_ORDERED_DATA__
20#define __BTRFS_ORDERED_DATA__
21
22struct btrfs_ordered_inode_tree {
23 rwlock_t lock;
24 struct rb_root tree;
25};
26
27static inline void
28btrfs_ordered_inode_tree_init(struct btrfs_ordered_inode_tree *t)
29{
30 rwlock_init(&t->lock);
31 t->tree.rb_node = NULL;
32}
33
34int btrfs_add_ordered_inode(struct inode *inode);
35int btrfs_find_del_first_ordered_inode(struct btrfs_ordered_inode_tree *tree,
36 u64 *root_objectid, u64 *objectid);
37int btrfs_find_first_ordered_inode(struct btrfs_ordered_inode_tree *tree,
38 u64 *root_objectid, u64 *objectid);
39#endif
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 02721eea9a7a..3ed5868e7c0f 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -67,6 +67,7 @@ static int join_transaction(struct btrfs_root *root)
67 cur_trans->commit_done = 0; 67 cur_trans->commit_done = 0;
68 cur_trans->start_time = get_seconds(); 68 cur_trans->start_time = get_seconds();
69 list_add_tail(&cur_trans->list, &root->fs_info->trans_list); 69 list_add_tail(&cur_trans->list, &root->fs_info->trans_list);
70 btrfs_ordered_inode_tree_init(&cur_trans->ordered_inode_tree);
70 extent_map_tree_init(&cur_trans->dirty_pages, 71 extent_map_tree_init(&cur_trans->dirty_pages,
71 root->fs_info->btree_inode->i_mapping, 72 root->fs_info->btree_inode->i_mapping,
72 GFP_NOFS); 73 GFP_NOFS);
@@ -473,6 +474,60 @@ static int drop_dirty_roots(struct btrfs_root *tree_root,
473 return ret; 474 return ret;
474} 475}
475 476
477int btrfs_write_ordered_inodes(struct btrfs_trans_handle *trans,
478 struct btrfs_root *root)
479{
480 struct btrfs_transaction *cur_trans = trans->transaction;
481 struct inode *inode;
482 u64 root_objectid = 0;
483 u64 objectid = 0;
484 u64 transid = trans->transid;
485 int ret;
486
487printk("write ordered trans %Lu\n", transid);
488 while(1) {
489 ret = btrfs_find_first_ordered_inode(
490 &cur_trans->ordered_inode_tree,
491 &root_objectid, &objectid);
492 if (!ret)
493 break;
494
495 mutex_unlock(&root->fs_info->trans_mutex);
496 mutex_unlock(&root->fs_info->fs_mutex);
497 inode = btrfs_ilookup(root->fs_info->sb, objectid,
498 root_objectid);
499 if (inode) {
500 if (S_ISREG(inode->i_mode))
501 filemap_fdatawrite(inode->i_mapping);
502 iput(inode);
503 }
504 mutex_lock(&root->fs_info->fs_mutex);
505 mutex_lock(&root->fs_info->trans_mutex);
506 }
507 while(1) {
508 root_objectid = 0;
509 objectid = 0;
510 ret = btrfs_find_del_first_ordered_inode(
511 &cur_trans->ordered_inode_tree,
512 &root_objectid, &objectid);
513 if (!ret)
514 break;
515 mutex_unlock(&root->fs_info->trans_mutex);
516 mutex_unlock(&root->fs_info->fs_mutex);
517 inode = btrfs_ilookup(root->fs_info->sb, objectid,
518 root_objectid);
519 if (inode) {
520 if (S_ISREG(inode->i_mode))
521 filemap_write_and_wait(inode->i_mapping);
522 iput(inode);
523 }
524 mutex_lock(&root->fs_info->fs_mutex);
525 mutex_lock(&root->fs_info->trans_mutex);
526 }
527printk("done write ordered trans %Lu\n", transid);
528 return 0;
529}
530
476int btrfs_commit_transaction(struct btrfs_trans_handle *trans, 531int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
477 struct btrfs_root *root) 532 struct btrfs_root *root)
478{ 533{
@@ -550,10 +605,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
550 mutex_lock(&root->fs_info->fs_mutex); 605 mutex_lock(&root->fs_info->fs_mutex);
551 mutex_lock(&root->fs_info->trans_mutex); 606 mutex_lock(&root->fs_info->trans_mutex);
552 finish_wait(&cur_trans->writer_wait, &wait); 607 finish_wait(&cur_trans->writer_wait, &wait);
608 ret = btrfs_write_ordered_inodes(trans, root);
609
553 } while (cur_trans->num_writers > 1 || 610 } while (cur_trans->num_writers > 1 ||
554 (cur_trans->num_joined != joined)); 611 (cur_trans->num_joined != joined));
555 612
556 WARN_ON(cur_trans != trans->transaction); 613 WARN_ON(cur_trans != trans->transaction);
614
557 ret = add_dirty_roots(trans, &root->fs_info->fs_roots_radix, 615 ret = add_dirty_roots(trans, &root->fs_info->fs_roots_radix,
558 &dirty_fs_roots); 616 &dirty_fs_roots);
559 BUG_ON(ret); 617 BUG_ON(ret);
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index eef840bca91e..c157ddbe9d1e 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -16,9 +16,10 @@
16 * Boston, MA 021110-1307, USA. 16 * Boston, MA 021110-1307, USA.
17 */ 17 */
18 18
19#ifndef __TRANSACTION__ 19#ifndef __BTRFS_TRANSACTION__
20#define __TRANSACTION__ 20#define __BTRFS_TRANSACTION__
21#include "btrfs_inode.h" 21#include "btrfs_inode.h"
22#include "ordered-data.h"
22 23
23struct btrfs_transaction { 24struct btrfs_transaction {
24 u64 transid; 25 u64 transid;
@@ -30,6 +31,7 @@ struct btrfs_transaction {
30 struct list_head list; 31 struct list_head list;
31 struct extent_map_tree dirty_pages; 32 struct extent_map_tree dirty_pages;
32 unsigned long start_time; 33 unsigned long start_time;
34 struct btrfs_ordered_inode_tree ordered_inode_tree;
33 wait_queue_head_t writer_wait; 35 wait_queue_head_t writer_wait;
34 wait_queue_head_t commit_wait; 36 wait_queue_head_t commit_wait;
35}; 37};
@@ -90,4 +92,6 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly);
90int btrfs_clean_old_snapshots(struct btrfs_root *root); 92int btrfs_clean_old_snapshots(struct btrfs_root *root);
91int btrfs_commit_transaction(struct btrfs_trans_handle *trans, 93int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
92 struct btrfs_root *root); 94 struct btrfs_root *root);
95int btrfs_write_ordered_inodes(struct btrfs_trans_handle *trans,
96 struct btrfs_root *root);
93#endif 97#endif