aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/backref.c15
-rw-r--r--fs/btrfs/ctree.c60
-rw-r--r--fs/btrfs/disk-io.c34
-rw-r--r--fs/btrfs/extent-tree.c11
-rw-r--r--fs/btrfs/extent_io.c14
-rw-r--r--fs/btrfs/file.c13
-rw-r--r--fs/btrfs/free-space-cache.c145
-rw-r--r--fs/btrfs/inode.c57
-rw-r--r--fs/btrfs/ioctl.h2
-rw-r--r--fs/btrfs/super.c4
-rw-r--r--fs/btrfs/tree-log.c6
-rw-r--r--fs/btrfs/volumes.c95
-rw-r--r--fs/btrfs/volumes.h3
13 files changed, 258 insertions, 201 deletions
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 7301cdb4b2cb..a383c18e74e8 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -301,10 +301,14 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
301 goto out; 301 goto out;
302 302
303 eb = path->nodes[level]; 303 eb = path->nodes[level];
304 if (!eb) { 304 while (!eb) {
305 WARN_ON(1); 305 if (!level) {
306 ret = 1; 306 WARN_ON(1);
307 goto out; 307 ret = 1;
308 goto out;
309 }
310 level--;
311 eb = path->nodes[level];
308 } 312 }
309 313
310 ret = add_all_parents(root, path, parents, level, &ref->key_for_search, 314 ret = add_all_parents(root, path, parents, level, &ref->key_for_search,
@@ -835,6 +839,7 @@ again:
835 } 839 }
836 ret = __add_delayed_refs(head, delayed_ref_seq, 840 ret = __add_delayed_refs(head, delayed_ref_seq,
837 &prefs_delayed); 841 &prefs_delayed);
842 mutex_unlock(&head->mutex);
838 if (ret) { 843 if (ret) {
839 spin_unlock(&delayed_refs->lock); 844 spin_unlock(&delayed_refs->lock);
840 goto out; 845 goto out;
@@ -928,8 +933,6 @@ again:
928 } 933 }
929 934
930out: 935out:
931 if (head)
932 mutex_unlock(&head->mutex);
933 btrfs_free_path(path); 936 btrfs_free_path(path);
934 while (!list_empty(&prefs)) { 937 while (!list_empty(&prefs)) {
935 ref = list_first_entry(&prefs, struct __prelim_ref, list); 938 ref = list_first_entry(&prefs, struct __prelim_ref, list);
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 15cbc2bf4ff0..8206b3900587 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1024,11 +1024,18 @@ __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info,
1024 if (!looped && !tm) 1024 if (!looped && !tm)
1025 return 0; 1025 return 0;
1026 /* 1026 /*
1027 * we must have key remove operations in the log before the 1027 * if there are no tree operation for the oldest root, we simply
1028 * replace operation. 1028 * return it. this should only happen if that (old) root is at
1029 * level 0.
1029 */ 1030 */
1030 BUG_ON(!tm); 1031 if (!tm)
1032 break;
1031 1033
1034 /*
1035 * if there's an operation that's not a root replacement, we
1036 * found the oldest version of our root. normally, we'll find a
1037 * MOD_LOG_KEY_REMOVE_WHILE_FREEING operation here.
1038 */
1032 if (tm->op != MOD_LOG_ROOT_REPLACE) 1039 if (tm->op != MOD_LOG_ROOT_REPLACE)
1033 break; 1040 break;
1034 1041
@@ -1087,11 +1094,7 @@ __tree_mod_log_rewind(struct extent_buffer *eb, u64 time_seq,
1087 tm->generation); 1094 tm->generation);
1088 break; 1095 break;
1089 case MOD_LOG_KEY_ADD: 1096 case MOD_LOG_KEY_ADD:
1090 if (tm->slot != n - 1) { 1097 /* if a move operation is needed it's in the log */
1091 o_dst = btrfs_node_key_ptr_offset(tm->slot);
1092 o_src = btrfs_node_key_ptr_offset(tm->slot + 1);
1093 memmove_extent_buffer(eb, o_dst, o_src, p_size);
1094 }
1095 n--; 1098 n--;
1096 break; 1099 break;
1097 case MOD_LOG_MOVE_KEYS: 1100 case MOD_LOG_MOVE_KEYS:
@@ -1192,16 +1195,8 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
1192 } 1195 }
1193 1196
1194 tm = tree_mod_log_search(root->fs_info, logical, time_seq); 1197 tm = tree_mod_log_search(root->fs_info, logical, time_seq);
1195 /*
1196 * there was an item in the log when __tree_mod_log_oldest_root
1197 * returned. this one must not go away, because the time_seq passed to
1198 * us must be blocking its removal.
1199 */
1200 BUG_ON(!tm);
1201
1202 if (old_root) 1198 if (old_root)
1203 eb = alloc_dummy_extent_buffer(tm->index << PAGE_CACHE_SHIFT, 1199 eb = alloc_dummy_extent_buffer(logical, root->nodesize);
1204 root->nodesize);
1205 else 1200 else
1206 eb = btrfs_clone_extent_buffer(root->node); 1201 eb = btrfs_clone_extent_buffer(root->node);
1207 btrfs_tree_read_unlock(root->node); 1202 btrfs_tree_read_unlock(root->node);
@@ -1216,7 +1211,10 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
1216 btrfs_set_header_level(eb, old_root->level); 1211 btrfs_set_header_level(eb, old_root->level);
1217 btrfs_set_header_generation(eb, old_generation); 1212 btrfs_set_header_generation(eb, old_generation);
1218 } 1213 }
1219 __tree_mod_log_rewind(eb, time_seq, tm); 1214 if (tm)
1215 __tree_mod_log_rewind(eb, time_seq, tm);
1216 else
1217 WARN_ON(btrfs_header_level(eb) != 0);
1220 extent_buffer_get(eb); 1218 extent_buffer_get(eb);
1221 1219
1222 return eb; 1220 return eb;
@@ -2995,7 +2993,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
2995static void insert_ptr(struct btrfs_trans_handle *trans, 2993static void insert_ptr(struct btrfs_trans_handle *trans,
2996 struct btrfs_root *root, struct btrfs_path *path, 2994 struct btrfs_root *root, struct btrfs_path *path,
2997 struct btrfs_disk_key *key, u64 bytenr, 2995 struct btrfs_disk_key *key, u64 bytenr,
2998 int slot, int level, int tree_mod_log) 2996 int slot, int level)
2999{ 2997{
3000 struct extent_buffer *lower; 2998 struct extent_buffer *lower;
3001 int nritems; 2999 int nritems;
@@ -3008,7 +3006,7 @@ static void insert_ptr(struct btrfs_trans_handle *trans,
3008 BUG_ON(slot > nritems); 3006 BUG_ON(slot > nritems);
3009 BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(root)); 3007 BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(root));
3010 if (slot != nritems) { 3008 if (slot != nritems) {
3011 if (tree_mod_log && level) 3009 if (level)
3012 tree_mod_log_eb_move(root->fs_info, lower, slot + 1, 3010 tree_mod_log_eb_move(root->fs_info, lower, slot + 1,
3013 slot, nritems - slot); 3011 slot, nritems - slot);
3014 memmove_extent_buffer(lower, 3012 memmove_extent_buffer(lower,
@@ -3016,7 +3014,7 @@ static void insert_ptr(struct btrfs_trans_handle *trans,
3016 btrfs_node_key_ptr_offset(slot), 3014 btrfs_node_key_ptr_offset(slot),
3017 (nritems - slot) * sizeof(struct btrfs_key_ptr)); 3015 (nritems - slot) * sizeof(struct btrfs_key_ptr));
3018 } 3016 }
3019 if (tree_mod_log && level) { 3017 if (level) {
3020 ret = tree_mod_log_insert_key(root->fs_info, lower, slot, 3018 ret = tree_mod_log_insert_key(root->fs_info, lower, slot,
3021 MOD_LOG_KEY_ADD); 3019 MOD_LOG_KEY_ADD);
3022 BUG_ON(ret < 0); 3020 BUG_ON(ret < 0);
@@ -3104,7 +3102,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
3104 btrfs_mark_buffer_dirty(split); 3102 btrfs_mark_buffer_dirty(split);
3105 3103
3106 insert_ptr(trans, root, path, &disk_key, split->start, 3104 insert_ptr(trans, root, path, &disk_key, split->start,
3107 path->slots[level + 1] + 1, level + 1, 1); 3105 path->slots[level + 1] + 1, level + 1);
3108 3106
3109 if (path->slots[level] >= mid) { 3107 if (path->slots[level] >= mid) {
3110 path->slots[level] -= mid; 3108 path->slots[level] -= mid;
@@ -3641,7 +3639,7 @@ static noinline void copy_for_split(struct btrfs_trans_handle *trans,
3641 btrfs_set_header_nritems(l, mid); 3639 btrfs_set_header_nritems(l, mid);
3642 btrfs_item_key(right, &disk_key, 0); 3640 btrfs_item_key(right, &disk_key, 0);
3643 insert_ptr(trans, root, path, &disk_key, right->start, 3641 insert_ptr(trans, root, path, &disk_key, right->start,
3644 path->slots[1] + 1, 1, 0); 3642 path->slots[1] + 1, 1);
3645 3643
3646 btrfs_mark_buffer_dirty(right); 3644 btrfs_mark_buffer_dirty(right);
3647 btrfs_mark_buffer_dirty(l); 3645 btrfs_mark_buffer_dirty(l);
@@ -3848,7 +3846,7 @@ again:
3848 if (mid <= slot) { 3846 if (mid <= slot) {
3849 btrfs_set_header_nritems(right, 0); 3847 btrfs_set_header_nritems(right, 0);
3850 insert_ptr(trans, root, path, &disk_key, right->start, 3848 insert_ptr(trans, root, path, &disk_key, right->start,
3851 path->slots[1] + 1, 1, 0); 3849 path->slots[1] + 1, 1);
3852 btrfs_tree_unlock(path->nodes[0]); 3850 btrfs_tree_unlock(path->nodes[0]);
3853 free_extent_buffer(path->nodes[0]); 3851 free_extent_buffer(path->nodes[0]);
3854 path->nodes[0] = right; 3852 path->nodes[0] = right;
@@ -3857,7 +3855,7 @@ again:
3857 } else { 3855 } else {
3858 btrfs_set_header_nritems(right, 0); 3856 btrfs_set_header_nritems(right, 0);
3859 insert_ptr(trans, root, path, &disk_key, right->start, 3857 insert_ptr(trans, root, path, &disk_key, right->start,
3860 path->slots[1], 1, 0); 3858 path->slots[1], 1);
3861 btrfs_tree_unlock(path->nodes[0]); 3859 btrfs_tree_unlock(path->nodes[0]);
3862 free_extent_buffer(path->nodes[0]); 3860 free_extent_buffer(path->nodes[0]);
3863 path->nodes[0] = right; 3861 path->nodes[0] = right;
@@ -5121,6 +5119,18 @@ again:
5121 5119
5122 if (!path->skip_locking) { 5120 if (!path->skip_locking) {
5123 ret = btrfs_try_tree_read_lock(next); 5121 ret = btrfs_try_tree_read_lock(next);
5122 if (!ret && time_seq) {
5123 /*
5124 * If we don't get the lock, we may be racing
5125 * with push_leaf_left, holding that lock while
5126 * itself waiting for the leaf we've currently
5127 * locked. To solve this situation, we give up
5128 * on our lock and cycle.
5129 */
5130 btrfs_release_path(path);
5131 cond_resched();
5132 goto again;
5133 }
5124 if (!ret) { 5134 if (!ret) {
5125 btrfs_set_path_blocking(path); 5135 btrfs_set_path_blocking(path);
5126 btrfs_tree_read_lock(next); 5136 btrfs_tree_read_lock(next);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 7b845ff4af99..2936ca49b3b4 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2354,12 +2354,17 @@ retry_root_backup:
2354 BTRFS_CSUM_TREE_OBJECTID, csum_root); 2354 BTRFS_CSUM_TREE_OBJECTID, csum_root);
2355 if (ret) 2355 if (ret)
2356 goto recovery_tree_root; 2356 goto recovery_tree_root;
2357
2358 csum_root->track_dirty = 1; 2357 csum_root->track_dirty = 1;
2359 2358
2360 fs_info->generation = generation; 2359 fs_info->generation = generation;
2361 fs_info->last_trans_committed = generation; 2360 fs_info->last_trans_committed = generation;
2362 2361
2362 ret = btrfs_recover_balance(fs_info);
2363 if (ret) {
2364 printk(KERN_WARNING "btrfs: failed to recover balance\n");
2365 goto fail_block_groups;
2366 }
2367
2363 ret = btrfs_init_dev_stats(fs_info); 2368 ret = btrfs_init_dev_stats(fs_info);
2364 if (ret) { 2369 if (ret) {
2365 printk(KERN_ERR "btrfs: failed to init dev_stats: %d\n", 2370 printk(KERN_ERR "btrfs: failed to init dev_stats: %d\n",
@@ -2485,20 +2490,23 @@ retry_root_backup:
2485 goto fail_trans_kthread; 2490 goto fail_trans_kthread;
2486 } 2491 }
2487 2492
2488 if (!(sb->s_flags & MS_RDONLY)) { 2493 if (sb->s_flags & MS_RDONLY)
2489 down_read(&fs_info->cleanup_work_sem); 2494 return 0;
2490 err = btrfs_orphan_cleanup(fs_info->fs_root);
2491 if (!err)
2492 err = btrfs_orphan_cleanup(fs_info->tree_root);
2493 up_read(&fs_info->cleanup_work_sem);
2494 2495
2495 if (!err) 2496 down_read(&fs_info->cleanup_work_sem);
2496 err = btrfs_recover_balance(fs_info->tree_root); 2497 if ((ret = btrfs_orphan_cleanup(fs_info->fs_root)) ||
2498 (ret = btrfs_orphan_cleanup(fs_info->tree_root))) {
2499 up_read(&fs_info->cleanup_work_sem);
2500 close_ctree(tree_root);
2501 return ret;
2502 }
2503 up_read(&fs_info->cleanup_work_sem);
2497 2504
2498 if (err) { 2505 ret = btrfs_resume_balance_async(fs_info);
2499 close_ctree(tree_root); 2506 if (ret) {
2500 return err; 2507 printk(KERN_WARNING "btrfs: failed to resume balance\n");
2501 } 2508 close_ctree(tree_root);
2509 return ret;
2502 } 2510 }
2503 2511
2504 return 0; 2512 return 0;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 4b5a1e1bdefb..6e1d36702ff7 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2347,12 +2347,10 @@ next:
2347 return count; 2347 return count;
2348} 2348}
2349 2349
2350
2351static void wait_for_more_refs(struct btrfs_delayed_ref_root *delayed_refs, 2350static void wait_for_more_refs(struct btrfs_delayed_ref_root *delayed_refs,
2352 unsigned long num_refs) 2351 unsigned long num_refs,
2352 struct list_head *first_seq)
2353{ 2353{
2354 struct list_head *first_seq = delayed_refs->seq_head.next;
2355
2356 spin_unlock(&delayed_refs->lock); 2354 spin_unlock(&delayed_refs->lock);
2357 pr_debug("waiting for more refs (num %ld, first %p)\n", 2355 pr_debug("waiting for more refs (num %ld, first %p)\n",
2358 num_refs, first_seq); 2356 num_refs, first_seq);
@@ -2381,6 +2379,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2381 struct btrfs_delayed_ref_root *delayed_refs; 2379 struct btrfs_delayed_ref_root *delayed_refs;
2382 struct btrfs_delayed_ref_node *ref; 2380 struct btrfs_delayed_ref_node *ref;
2383 struct list_head cluster; 2381 struct list_head cluster;
2382 struct list_head *first_seq = NULL;
2384 int ret; 2383 int ret;
2385 u64 delayed_start; 2384 u64 delayed_start;
2386 int run_all = count == (unsigned long)-1; 2385 int run_all = count == (unsigned long)-1;
@@ -2436,8 +2435,10 @@ again:
2436 */ 2435 */
2437 consider_waiting = 1; 2436 consider_waiting = 1;
2438 num_refs = delayed_refs->num_entries; 2437 num_refs = delayed_refs->num_entries;
2438 first_seq = root->fs_info->tree_mod_seq_list.next;
2439 } else { 2439 } else {
2440 wait_for_more_refs(delayed_refs, num_refs); 2440 wait_for_more_refs(delayed_refs,
2441 num_refs, first_seq);
2441 /* 2442 /*
2442 * after waiting, things have changed. we 2443 * after waiting, things have changed. we
2443 * dropped the lock and someone else might have 2444 * dropped the lock and someone else might have
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index aaa12c1eb348..01c21b6c6d43 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3324,6 +3324,7 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
3324 writepage_t writepage, void *data, 3324 writepage_t writepage, void *data,
3325 void (*flush_fn)(void *)) 3325 void (*flush_fn)(void *))
3326{ 3326{
3327 struct inode *inode = mapping->host;
3327 int ret = 0; 3328 int ret = 0;
3328 int done = 0; 3329 int done = 0;
3329 int nr_to_write_done = 0; 3330 int nr_to_write_done = 0;
@@ -3334,6 +3335,18 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
3334 int scanned = 0; 3335 int scanned = 0;
3335 int tag; 3336 int tag;
3336 3337
3338 /*
3339 * We have to hold onto the inode so that ordered extents can do their
3340 * work when the IO finishes. The alternative to this is failing to add
3341 * an ordered extent if the igrab() fails there and that is a huge pain
3342 * to deal with, so instead just hold onto the inode throughout the
3343 * writepages operation. If it fails here we are freeing up the inode
3344 * anyway and we'd rather not waste our time writing out stuff that is
3345 * going to be truncated anyway.
3346 */
3347 if (!igrab(inode))
3348 return 0;
3349
3337 pagevec_init(&pvec, 0); 3350 pagevec_init(&pvec, 0);
3338 if (wbc->range_cyclic) { 3351 if (wbc->range_cyclic) {
3339 index = mapping->writeback_index; /* Start from prev offset */ 3352 index = mapping->writeback_index; /* Start from prev offset */
@@ -3428,6 +3441,7 @@ retry:
3428 index = 0; 3441 index = 0;
3429 goto retry; 3442 goto retry;
3430 } 3443 }
3444 btrfs_add_delayed_iput(inode);
3431 return ret; 3445 return ret;
3432} 3446}
3433 3447
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 70dc8ca73e25..9aa01ec2138d 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1334,7 +1334,6 @@ static ssize_t __btrfs_direct_write(struct kiocb *iocb,
1334 loff_t *ppos, size_t count, size_t ocount) 1334 loff_t *ppos, size_t count, size_t ocount)
1335{ 1335{
1336 struct file *file = iocb->ki_filp; 1336 struct file *file = iocb->ki_filp;
1337 struct inode *inode = fdentry(file)->d_inode;
1338 struct iov_iter i; 1337 struct iov_iter i;
1339 ssize_t written; 1338 ssize_t written;
1340 ssize_t written_buffered; 1339 ssize_t written_buffered;
@@ -1344,18 +1343,6 @@ static ssize_t __btrfs_direct_write(struct kiocb *iocb,
1344 written = generic_file_direct_write(iocb, iov, &nr_segs, pos, ppos, 1343 written = generic_file_direct_write(iocb, iov, &nr_segs, pos, ppos,
1345 count, ocount); 1344 count, ocount);
1346 1345
1347 /*
1348 * the generic O_DIRECT will update in-memory i_size after the
1349 * DIOs are done. But our endio handlers that update the on
1350 * disk i_size never update past the in memory i_size. So we
1351 * need one more update here to catch any additions to the
1352 * file
1353 */
1354 if (inode->i_size != BTRFS_I(inode)->disk_i_size) {
1355 btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
1356 mark_inode_dirty(inode);
1357 }
1358
1359 if (written < 0 || written == count) 1346 if (written < 0 || written == count)
1360 return written; 1347 return written;
1361 1348
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 81296c57405a..6c4e2baa9290 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -1543,29 +1543,26 @@ again:
1543 end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1; 1543 end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1;
1544 1544
1545 /* 1545 /*
1546 * XXX - this can go away after a few releases. 1546 * We need to search for bits in this bitmap. We could only cover some
1547 * 1547 * of the extent in this bitmap thanks to how we add space, so we need
1548 * since the only user of btrfs_remove_free_space is the tree logging 1548 * to search for as much as it as we can and clear that amount, and then
1549 * stuff, and the only way to test that is under crash conditions, we 1549 * go searching for the next bit.
1550 * want to have this debug stuff here just in case somethings not
1551 * working. Search the bitmap for the space we are trying to use to
1552 * make sure its actually there. If its not there then we need to stop
1553 * because something has gone wrong.
1554 */ 1550 */
1555 search_start = *offset; 1551 search_start = *offset;
1556 search_bytes = *bytes; 1552 search_bytes = ctl->unit;
1557 search_bytes = min(search_bytes, end - search_start + 1); 1553 search_bytes = min(search_bytes, end - search_start + 1);
1558 ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes); 1554 ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes);
1559 BUG_ON(ret < 0 || search_start != *offset); 1555 BUG_ON(ret < 0 || search_start != *offset);
1560 1556
1561 if (*offset > bitmap_info->offset && *offset + *bytes > end) { 1557 /* We may have found more bits than what we need */
1562 bitmap_clear_bits(ctl, bitmap_info, *offset, end - *offset + 1); 1558 search_bytes = min(search_bytes, *bytes);
1563 *bytes -= end - *offset + 1; 1559
1564 *offset = end + 1; 1560 /* Cannot clear past the end of the bitmap */
1565 } else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) { 1561 search_bytes = min(search_bytes, end - search_start + 1);
1566 bitmap_clear_bits(ctl, bitmap_info, *offset, *bytes); 1562
1567 *bytes = 0; 1563 bitmap_clear_bits(ctl, bitmap_info, search_start, search_bytes);
1568 } 1564 *offset += search_bytes;
1565 *bytes -= search_bytes;
1569 1566
1570 if (*bytes) { 1567 if (*bytes) {
1571 struct rb_node *next = rb_next(&bitmap_info->offset_index); 1568 struct rb_node *next = rb_next(&bitmap_info->offset_index);
@@ -1596,7 +1593,7 @@ again:
1596 * everything over again. 1593 * everything over again.
1597 */ 1594 */
1598 search_start = *offset; 1595 search_start = *offset;
1599 search_bytes = *bytes; 1596 search_bytes = ctl->unit;
1600 ret = search_bitmap(ctl, bitmap_info, &search_start, 1597 ret = search_bitmap(ctl, bitmap_info, &search_start,
1601 &search_bytes); 1598 &search_bytes);
1602 if (ret < 0 || search_start != *offset) 1599 if (ret < 0 || search_start != *offset)
@@ -1879,12 +1876,14 @@ int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
1879{ 1876{
1880 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 1877 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1881 struct btrfs_free_space *info; 1878 struct btrfs_free_space *info;
1882 struct btrfs_free_space *next_info = NULL;
1883 int ret = 0; 1879 int ret = 0;
1884 1880
1885 spin_lock(&ctl->tree_lock); 1881 spin_lock(&ctl->tree_lock);
1886 1882
1887again: 1883again:
1884 if (!bytes)
1885 goto out_lock;
1886
1888 info = tree_search_offset(ctl, offset, 0, 0); 1887 info = tree_search_offset(ctl, offset, 0, 0);
1889 if (!info) { 1888 if (!info) {
1890 /* 1889 /*
@@ -1905,88 +1904,48 @@ again:
1905 } 1904 }
1906 } 1905 }
1907 1906
1908 if (info->bytes < bytes && rb_next(&info->offset_index)) { 1907 if (!info->bitmap) {
1909 u64 end;
1910 next_info = rb_entry(rb_next(&info->offset_index),
1911 struct btrfs_free_space,
1912 offset_index);
1913
1914 if (next_info->bitmap)
1915 end = next_info->offset +
1916 BITS_PER_BITMAP * ctl->unit - 1;
1917 else
1918 end = next_info->offset + next_info->bytes;
1919
1920 if (next_info->bytes < bytes ||
1921 next_info->offset > offset || offset > end) {
1922 printk(KERN_CRIT "Found free space at %llu, size %llu,"
1923 " trying to use %llu\n",
1924 (unsigned long long)info->offset,
1925 (unsigned long long)info->bytes,
1926 (unsigned long long)bytes);
1927 WARN_ON(1);
1928 ret = -EINVAL;
1929 goto out_lock;
1930 }
1931
1932 info = next_info;
1933 }
1934
1935 if (info->bytes == bytes) {
1936 unlink_free_space(ctl, info); 1908 unlink_free_space(ctl, info);
1937 if (info->bitmap) { 1909 if (offset == info->offset) {
1938 kfree(info->bitmap); 1910 u64 to_free = min(bytes, info->bytes);
1939 ctl->total_bitmaps--; 1911
1940 } 1912 info->bytes -= to_free;
1941 kmem_cache_free(btrfs_free_space_cachep, info); 1913 info->offset += to_free;
1942 ret = 0; 1914 if (info->bytes) {
1943 goto out_lock; 1915 ret = link_free_space(ctl, info);
1944 } 1916 WARN_ON(ret);
1945 1917 } else {
1946 if (!info->bitmap && info->offset == offset) { 1918 kmem_cache_free(btrfs_free_space_cachep, info);
1947 unlink_free_space(ctl, info); 1919 }
1948 info->offset += bytes;
1949 info->bytes -= bytes;
1950 ret = link_free_space(ctl, info);
1951 WARN_ON(ret);
1952 goto out_lock;
1953 }
1954 1920
1955 if (!info->bitmap && info->offset <= offset && 1921 offset += to_free;
1956 info->offset + info->bytes >= offset + bytes) { 1922 bytes -= to_free;
1957 u64 old_start = info->offset; 1923 goto again;
1958 /* 1924 } else {
1959 * we're freeing space in the middle of the info, 1925 u64 old_end = info->bytes + info->offset;
1960 * this can happen during tree log replay
1961 *
1962 * first unlink the old info and then
1963 * insert it again after the hole we're creating
1964 */
1965 unlink_free_space(ctl, info);
1966 if (offset + bytes < info->offset + info->bytes) {
1967 u64 old_end = info->offset + info->bytes;
1968 1926
1969 info->offset = offset + bytes; 1927 info->bytes = offset - info->offset;
1970 info->bytes = old_end - info->offset;
1971 ret = link_free_space(ctl, info); 1928 ret = link_free_space(ctl, info);
1972 WARN_ON(ret); 1929 WARN_ON(ret);
1973 if (ret) 1930 if (ret)
1974 goto out_lock; 1931 goto out_lock;
1975 } else {
1976 /* the hole we're creating ends at the end
1977 * of the info struct, just free the info
1978 */
1979 kmem_cache_free(btrfs_free_space_cachep, info);
1980 }
1981 spin_unlock(&ctl->tree_lock);
1982 1932
1983 /* step two, insert a new info struct to cover 1933 /* Not enough bytes in this entry to satisfy us */
1984 * anything before the hole 1934 if (old_end < offset + bytes) {
1985 */ 1935 bytes -= old_end - offset;
1986 ret = btrfs_add_free_space(block_group, old_start, 1936 offset = old_end;
1987 offset - old_start); 1937 goto again;
1988 WARN_ON(ret); /* -ENOMEM */ 1938 } else if (old_end == offset + bytes) {
1989 goto out; 1939 /* all done */
1940 goto out_lock;
1941 }
1942 spin_unlock(&ctl->tree_lock);
1943
1944 ret = btrfs_add_free_space(block_group, offset + bytes,
1945 old_end - (offset + bytes));
1946 WARN_ON(ret);
1947 goto out;
1948 }
1990 } 1949 }
1991 1950
1992 ret = remove_from_bitmap(ctl, info, &offset, &bytes); 1951 ret = remove_from_bitmap(ctl, info, &offset, &bytes);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index d8bb0dbc4941..a7d1921ac76b 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3754,7 +3754,7 @@ void btrfs_evict_inode(struct inode *inode)
3754 btrfs_wait_ordered_range(inode, 0, (u64)-1); 3754 btrfs_wait_ordered_range(inode, 0, (u64)-1);
3755 3755
3756 if (root->fs_info->log_root_recovering) { 3756 if (root->fs_info->log_root_recovering) {
3757 BUG_ON(!test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, 3757 BUG_ON(test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
3758 &BTRFS_I(inode)->runtime_flags)); 3758 &BTRFS_I(inode)->runtime_flags));
3759 goto no_delete; 3759 goto no_delete;
3760 } 3760 }
@@ -5876,8 +5876,17 @@ map:
5876 bh_result->b_size = len; 5876 bh_result->b_size = len;
5877 bh_result->b_bdev = em->bdev; 5877 bh_result->b_bdev = em->bdev;
5878 set_buffer_mapped(bh_result); 5878 set_buffer_mapped(bh_result);
5879 if (create && !test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) 5879 if (create) {
5880 set_buffer_new(bh_result); 5880 if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
5881 set_buffer_new(bh_result);
5882
5883 /*
5884 * Need to update the i_size under the extent lock so buffered
5885 * readers will get the updated i_size when we unlock.
5886 */
5887 if (start + len > i_size_read(inode))
5888 i_size_write(inode, start + len);
5889 }
5881 5890
5882 free_extent_map(em); 5891 free_extent_map(em);
5883 5892
@@ -6360,12 +6369,48 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
6360 */ 6369 */
6361 ordered = btrfs_lookup_ordered_range(inode, lockstart, 6370 ordered = btrfs_lookup_ordered_range(inode, lockstart,
6362 lockend - lockstart + 1); 6371 lockend - lockstart + 1);
6363 if (!ordered) 6372
6373 /*
6374 * We need to make sure there are no buffered pages in this
6375 * range either, we could have raced between the invalidate in
6376 * generic_file_direct_write and locking the extent. The
6377 * invalidate needs to happen so that reads after a write do not
6378 * get stale data.
6379 */
6380 if (!ordered && (!writing ||
6381 !test_range_bit(&BTRFS_I(inode)->io_tree,
6382 lockstart, lockend, EXTENT_UPTODATE, 0,
6383 cached_state)))
6364 break; 6384 break;
6385
6365 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, 6386 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
6366 &cached_state, GFP_NOFS); 6387 &cached_state, GFP_NOFS);
6367 btrfs_start_ordered_extent(inode, ordered, 1); 6388
6368 btrfs_put_ordered_extent(ordered); 6389 if (ordered) {
6390 btrfs_start_ordered_extent(inode, ordered, 1);
6391 btrfs_put_ordered_extent(ordered);
6392 } else {
6393 /* Screw you mmap */
6394 ret = filemap_write_and_wait_range(file->f_mapping,
6395 lockstart,
6396 lockend);
6397 if (ret)
6398 goto out;
6399
6400 /*
6401 * If we found a page that couldn't be invalidated just
6402 * fall back to buffered.
6403 */
6404 ret = invalidate_inode_pages2_range(file->f_mapping,
6405 lockstart >> PAGE_CACHE_SHIFT,
6406 lockend >> PAGE_CACHE_SHIFT);
6407 if (ret) {
6408 if (ret == -EBUSY)
6409 ret = 0;
6410 goto out;
6411 }
6412 }
6413
6369 cond_resched(); 6414 cond_resched();
6370 } 6415 }
6371 6416
diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h
index 497c530724cf..e440aa653c30 100644
--- a/fs/btrfs/ioctl.h
+++ b/fs/btrfs/ioctl.h
@@ -339,7 +339,7 @@ struct btrfs_ioctl_get_dev_stats {
339#define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64) 339#define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64)
340#define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \ 340#define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \
341 struct btrfs_ioctl_vol_args_v2) 341 struct btrfs_ioctl_vol_args_v2)
342#define BTRFS_IOC_SUBVOL_GETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 25, __u64) 342#define BTRFS_IOC_SUBVOL_GETFLAGS _IOR(BTRFS_IOCTL_MAGIC, 25, __u64)
343#define BTRFS_IOC_SUBVOL_SETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 26, __u64) 343#define BTRFS_IOC_SUBVOL_SETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 26, __u64)
344#define BTRFS_IOC_SCRUB _IOWR(BTRFS_IOCTL_MAGIC, 27, \ 344#define BTRFS_IOC_SCRUB _IOWR(BTRFS_IOCTL_MAGIC, 27, \
345 struct btrfs_ioctl_scrub_args) 345 struct btrfs_ioctl_scrub_args)
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 0eb9a4da069e..e23991574fdf 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1187,6 +1187,10 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
1187 if (ret) 1187 if (ret)
1188 goto restore; 1188 goto restore;
1189 1189
1190 ret = btrfs_resume_balance_async(fs_info);
1191 if (ret)
1192 goto restore;
1193
1190 sb->s_flags &= ~MS_RDONLY; 1194 sb->s_flags &= ~MS_RDONLY;
1191 } 1195 }
1192 1196
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 2017d0ff511c..8abeae4224f9 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -690,6 +690,8 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
690 kfree(name); 690 kfree(name);
691 691
692 iput(inode); 692 iput(inode);
693
694 btrfs_run_delayed_items(trans, root);
693 return ret; 695 return ret;
694} 696}
695 697
@@ -895,6 +897,7 @@ again:
895 ret = btrfs_unlink_inode(trans, root, dir, 897 ret = btrfs_unlink_inode(trans, root, dir,
896 inode, victim_name, 898 inode, victim_name,
897 victim_name_len); 899 victim_name_len);
900 btrfs_run_delayed_items(trans, root);
898 } 901 }
899 kfree(victim_name); 902 kfree(victim_name);
900 ptr = (unsigned long)(victim_ref + 1) + victim_name_len; 903 ptr = (unsigned long)(victim_ref + 1) + victim_name_len;
@@ -1475,6 +1478,9 @@ again:
1475 ret = btrfs_unlink_inode(trans, root, dir, inode, 1478 ret = btrfs_unlink_inode(trans, root, dir, inode,
1476 name, name_len); 1479 name, name_len);
1477 BUG_ON(ret); 1480 BUG_ON(ret);
1481
1482 btrfs_run_delayed_items(trans, root);
1483
1478 kfree(name); 1484 kfree(name);
1479 iput(inode); 1485 iput(inode);
1480 1486
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 8a3d2594b807..ecaad40e7ef4 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2845,31 +2845,48 @@ out:
2845 2845
2846static int balance_kthread(void *data) 2846static int balance_kthread(void *data)
2847{ 2847{
2848 struct btrfs_balance_control *bctl = 2848 struct btrfs_fs_info *fs_info = data;
2849 (struct btrfs_balance_control *)data;
2850 struct btrfs_fs_info *fs_info = bctl->fs_info;
2851 int ret = 0; 2849 int ret = 0;
2852 2850
2853 mutex_lock(&fs_info->volume_mutex); 2851 mutex_lock(&fs_info->volume_mutex);
2854 mutex_lock(&fs_info->balance_mutex); 2852 mutex_lock(&fs_info->balance_mutex);
2855 2853
2856 set_balance_control(bctl); 2854 if (fs_info->balance_ctl) {
2857
2858 if (btrfs_test_opt(fs_info->tree_root, SKIP_BALANCE)) {
2859 printk(KERN_INFO "btrfs: force skipping balance\n");
2860 } else {
2861 printk(KERN_INFO "btrfs: continuing balance\n"); 2855 printk(KERN_INFO "btrfs: continuing balance\n");
2862 ret = btrfs_balance(bctl, NULL); 2856 ret = btrfs_balance(fs_info->balance_ctl, NULL);
2863 } 2857 }
2864 2858
2865 mutex_unlock(&fs_info->balance_mutex); 2859 mutex_unlock(&fs_info->balance_mutex);
2866 mutex_unlock(&fs_info->volume_mutex); 2860 mutex_unlock(&fs_info->volume_mutex);
2861
2867 return ret; 2862 return ret;
2868} 2863}
2869 2864
2870int btrfs_recover_balance(struct btrfs_root *tree_root) 2865int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info)
2871{ 2866{
2872 struct task_struct *tsk; 2867 struct task_struct *tsk;
2868
2869 spin_lock(&fs_info->balance_lock);
2870 if (!fs_info->balance_ctl) {
2871 spin_unlock(&fs_info->balance_lock);
2872 return 0;
2873 }
2874 spin_unlock(&fs_info->balance_lock);
2875
2876 if (btrfs_test_opt(fs_info->tree_root, SKIP_BALANCE)) {
2877 printk(KERN_INFO "btrfs: force skipping balance\n");
2878 return 0;
2879 }
2880
2881 tsk = kthread_run(balance_kthread, fs_info, "btrfs-balance");
2882 if (IS_ERR(tsk))
2883 return PTR_ERR(tsk);
2884
2885 return 0;
2886}
2887
2888int btrfs_recover_balance(struct btrfs_fs_info *fs_info)
2889{
2873 struct btrfs_balance_control *bctl; 2890 struct btrfs_balance_control *bctl;
2874 struct btrfs_balance_item *item; 2891 struct btrfs_balance_item *item;
2875 struct btrfs_disk_balance_args disk_bargs; 2892 struct btrfs_disk_balance_args disk_bargs;
@@ -2882,29 +2899,30 @@ int btrfs_recover_balance(struct btrfs_root *tree_root)
2882 if (!path) 2899 if (!path)
2883 return -ENOMEM; 2900 return -ENOMEM;
2884 2901
2885 bctl = kzalloc(sizeof(*bctl), GFP_NOFS);
2886 if (!bctl) {
2887 ret = -ENOMEM;
2888 goto out;
2889 }
2890
2891 key.objectid = BTRFS_BALANCE_OBJECTID; 2902 key.objectid = BTRFS_BALANCE_OBJECTID;
2892 key.type = BTRFS_BALANCE_ITEM_KEY; 2903 key.type = BTRFS_BALANCE_ITEM_KEY;
2893 key.offset = 0; 2904 key.offset = 0;
2894 2905
2895 ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0); 2906 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0);
2896 if (ret < 0) 2907 if (ret < 0)
2897 goto out_bctl; 2908 goto out;
2898 if (ret > 0) { /* ret = -ENOENT; */ 2909 if (ret > 0) { /* ret = -ENOENT; */
2899 ret = 0; 2910 ret = 0;
2900 goto out_bctl; 2911 goto out;
2912 }
2913
2914 bctl = kzalloc(sizeof(*bctl), GFP_NOFS);
2915 if (!bctl) {
2916 ret = -ENOMEM;
2917 goto out;
2901 } 2918 }
2902 2919
2903 leaf = path->nodes[0]; 2920 leaf = path->nodes[0];
2904 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_balance_item); 2921 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_balance_item);
2905 2922
2906 bctl->fs_info = tree_root->fs_info; 2923 bctl->fs_info = fs_info;
2907 bctl->flags = btrfs_balance_flags(leaf, item) | BTRFS_BALANCE_RESUME; 2924 bctl->flags = btrfs_balance_flags(leaf, item);
2925 bctl->flags |= BTRFS_BALANCE_RESUME;
2908 2926
2909 btrfs_balance_data(leaf, item, &disk_bargs); 2927 btrfs_balance_data(leaf, item, &disk_bargs);
2910 btrfs_disk_balance_args_to_cpu(&bctl->data, &disk_bargs); 2928 btrfs_disk_balance_args_to_cpu(&bctl->data, &disk_bargs);
@@ -2913,14 +2931,13 @@ int btrfs_recover_balance(struct btrfs_root *tree_root)
2913 btrfs_balance_sys(leaf, item, &disk_bargs); 2931 btrfs_balance_sys(leaf, item, &disk_bargs);
2914 btrfs_disk_balance_args_to_cpu(&bctl->sys, &disk_bargs); 2932 btrfs_disk_balance_args_to_cpu(&bctl->sys, &disk_bargs);
2915 2933
2916 tsk = kthread_run(balance_kthread, bctl, "btrfs-balance"); 2934 mutex_lock(&fs_info->volume_mutex);
2917 if (IS_ERR(tsk)) 2935 mutex_lock(&fs_info->balance_mutex);
2918 ret = PTR_ERR(tsk);
2919 else
2920 goto out;
2921 2936
2922out_bctl: 2937 set_balance_control(bctl);
2923 kfree(bctl); 2938
2939 mutex_unlock(&fs_info->balance_mutex);
2940 mutex_unlock(&fs_info->volume_mutex);
2924out: 2941out:
2925 btrfs_free_path(path); 2942 btrfs_free_path(path);
2926 return ret; 2943 return ret;
@@ -4061,16 +4078,18 @@ static void btrfs_end_bio(struct bio *bio, int err)
4061 4078
4062 BUG_ON(stripe_index >= bbio->num_stripes); 4079 BUG_ON(stripe_index >= bbio->num_stripes);
4063 dev = bbio->stripes[stripe_index].dev; 4080 dev = bbio->stripes[stripe_index].dev;
4064 if (bio->bi_rw & WRITE) 4081 if (dev->bdev) {
4065 btrfs_dev_stat_inc(dev, 4082 if (bio->bi_rw & WRITE)
4066 BTRFS_DEV_STAT_WRITE_ERRS); 4083 btrfs_dev_stat_inc(dev,
4067 else 4084 BTRFS_DEV_STAT_WRITE_ERRS);
4068 btrfs_dev_stat_inc(dev, 4085 else
4069 BTRFS_DEV_STAT_READ_ERRS); 4086 btrfs_dev_stat_inc(dev,
4070 if ((bio->bi_rw & WRITE_FLUSH) == WRITE_FLUSH) 4087 BTRFS_DEV_STAT_READ_ERRS);
4071 btrfs_dev_stat_inc(dev, 4088 if ((bio->bi_rw & WRITE_FLUSH) == WRITE_FLUSH)
4072 BTRFS_DEV_STAT_FLUSH_ERRS); 4089 btrfs_dev_stat_inc(dev,
4073 btrfs_dev_stat_print_on_error(dev); 4090 BTRFS_DEV_STAT_FLUSH_ERRS);
4091 btrfs_dev_stat_print_on_error(dev);
4092 }
4074 } 4093 }
4075 } 4094 }
4076 4095
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 74366f27a76b..95f6637614db 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -281,7 +281,8 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size);
281int btrfs_init_new_device(struct btrfs_root *root, char *path); 281int btrfs_init_new_device(struct btrfs_root *root, char *path);
282int btrfs_balance(struct btrfs_balance_control *bctl, 282int btrfs_balance(struct btrfs_balance_control *bctl,
283 struct btrfs_ioctl_balance_args *bargs); 283 struct btrfs_ioctl_balance_args *bargs);
284int btrfs_recover_balance(struct btrfs_root *tree_root); 284int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info);
285int btrfs_recover_balance(struct btrfs_fs_info *fs_info);
285int btrfs_pause_balance(struct btrfs_fs_info *fs_info); 286int btrfs_pause_balance(struct btrfs_fs_info *fs_info);
286int btrfs_cancel_balance(struct btrfs_fs_info *fs_info); 287int btrfs_cancel_balance(struct btrfs_fs_info *fs_info);
287int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset); 288int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset);