aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2012-07-18 05:17:17 -0400
committerIngo Molnar <mingo@kernel.org>2012-07-18 05:17:17 -0400
commita2fe194723f6e4990d01d8c208c7b138fd410522 (patch)
tree7aee93fa8f4ba1e18b56fa7d8eab75d249fc6966 /fs
parentc3b7cdf180090d2686239a75bb0ae408108ed749 (diff)
parenta018540141a931f5299a866907b27886916b4374 (diff)
Merge branch 'linus' into perf/core
Pick up the latest ring-buffer fixes, before applying a new fix. Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/backref.c15
-rw-r--r--fs/btrfs/ctree.c60
-rw-r--r--fs/btrfs/disk-io.c34
-rw-r--r--fs/btrfs/extent-tree.c11
-rw-r--r--fs/btrfs/extent_io.c14
-rw-r--r--fs/btrfs/file.c13
-rw-r--r--fs/btrfs/free-space-cache.c145
-rw-r--r--fs/btrfs/inode.c57
-rw-r--r--fs/btrfs/ioctl.h2
-rw-r--r--fs/btrfs/super.c4
-rw-r--r--fs/btrfs/tree-log.c6
-rw-r--r--fs/btrfs/volumes.c95
-rw-r--r--fs/btrfs/volumes.h3
-rw-r--r--fs/buffer.c22
-rw-r--r--fs/cifs/connect.c41
-rw-r--r--fs/ecryptfs/kthread.c2
-rw-r--r--fs/ecryptfs/miscdev.c48
-rw-r--r--fs/fat/inode.c13
-rw-r--r--fs/fifo.c9
-rw-r--r--fs/locks.c2
-rw-r--r--fs/nfs/direct.c6
-rw-r--r--fs/nfs/super.c2
-rw-r--r--fs/ocfs2/dlmglue.c33
-rw-r--r--fs/ocfs2/extent_map.c2
-rw-r--r--fs/ocfs2/file.c6
-rw-r--r--fs/ocfs2/quota_global.c2
-rw-r--r--fs/open.c6
-rw-r--r--fs/ramfs/file-nommu.c1
-rw-r--r--fs/xfs/xfs_alloc.c19
-rw-r--r--fs/xfs/xfs_buf.c53
-rw-r--r--fs/xfs/xfs_buf.h1
-rw-r--r--fs/xfs/xfs_buf_item.c2
32 files changed, 405 insertions, 324 deletions
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 7301cdb4b2cb..a383c18e74e8 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -301,10 +301,14 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
301 goto out; 301 goto out;
302 302
303 eb = path->nodes[level]; 303 eb = path->nodes[level];
304 if (!eb) { 304 while (!eb) {
305 WARN_ON(1); 305 if (!level) {
306 ret = 1; 306 WARN_ON(1);
307 goto out; 307 ret = 1;
308 goto out;
309 }
310 level--;
311 eb = path->nodes[level];
308 } 312 }
309 313
310 ret = add_all_parents(root, path, parents, level, &ref->key_for_search, 314 ret = add_all_parents(root, path, parents, level, &ref->key_for_search,
@@ -835,6 +839,7 @@ again:
835 } 839 }
836 ret = __add_delayed_refs(head, delayed_ref_seq, 840 ret = __add_delayed_refs(head, delayed_ref_seq,
837 &prefs_delayed); 841 &prefs_delayed);
842 mutex_unlock(&head->mutex);
838 if (ret) { 843 if (ret) {
839 spin_unlock(&delayed_refs->lock); 844 spin_unlock(&delayed_refs->lock);
840 goto out; 845 goto out;
@@ -928,8 +933,6 @@ again:
928 } 933 }
929 934
930out: 935out:
931 if (head)
932 mutex_unlock(&head->mutex);
933 btrfs_free_path(path); 936 btrfs_free_path(path);
934 while (!list_empty(&prefs)) { 937 while (!list_empty(&prefs)) {
935 ref = list_first_entry(&prefs, struct __prelim_ref, list); 938 ref = list_first_entry(&prefs, struct __prelim_ref, list);
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 15cbc2bf4ff0..8206b3900587 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1024,11 +1024,18 @@ __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info,
1024 if (!looped && !tm) 1024 if (!looped && !tm)
1025 return 0; 1025 return 0;
1026 /* 1026 /*
1027 * we must have key remove operations in the log before the 1027 * if there are no tree operation for the oldest root, we simply
1028 * replace operation. 1028 * return it. this should only happen if that (old) root is at
1029 * level 0.
1029 */ 1030 */
1030 BUG_ON(!tm); 1031 if (!tm)
1032 break;
1031 1033
1034 /*
1035 * if there's an operation that's not a root replacement, we
1036 * found the oldest version of our root. normally, we'll find a
1037 * MOD_LOG_KEY_REMOVE_WHILE_FREEING operation here.
1038 */
1032 if (tm->op != MOD_LOG_ROOT_REPLACE) 1039 if (tm->op != MOD_LOG_ROOT_REPLACE)
1033 break; 1040 break;
1034 1041
@@ -1087,11 +1094,7 @@ __tree_mod_log_rewind(struct extent_buffer *eb, u64 time_seq,
1087 tm->generation); 1094 tm->generation);
1088 break; 1095 break;
1089 case MOD_LOG_KEY_ADD: 1096 case MOD_LOG_KEY_ADD:
1090 if (tm->slot != n - 1) { 1097 /* if a move operation is needed it's in the log */
1091 o_dst = btrfs_node_key_ptr_offset(tm->slot);
1092 o_src = btrfs_node_key_ptr_offset(tm->slot + 1);
1093 memmove_extent_buffer(eb, o_dst, o_src, p_size);
1094 }
1095 n--; 1098 n--;
1096 break; 1099 break;
1097 case MOD_LOG_MOVE_KEYS: 1100 case MOD_LOG_MOVE_KEYS:
@@ -1192,16 +1195,8 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
1192 } 1195 }
1193 1196
1194 tm = tree_mod_log_search(root->fs_info, logical, time_seq); 1197 tm = tree_mod_log_search(root->fs_info, logical, time_seq);
1195 /*
1196 * there was an item in the log when __tree_mod_log_oldest_root
1197 * returned. this one must not go away, because the time_seq passed to
1198 * us must be blocking its removal.
1199 */
1200 BUG_ON(!tm);
1201
1202 if (old_root) 1198 if (old_root)
1203 eb = alloc_dummy_extent_buffer(tm->index << PAGE_CACHE_SHIFT, 1199 eb = alloc_dummy_extent_buffer(logical, root->nodesize);
1204 root->nodesize);
1205 else 1200 else
1206 eb = btrfs_clone_extent_buffer(root->node); 1201 eb = btrfs_clone_extent_buffer(root->node);
1207 btrfs_tree_read_unlock(root->node); 1202 btrfs_tree_read_unlock(root->node);
@@ -1216,7 +1211,10 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
1216 btrfs_set_header_level(eb, old_root->level); 1211 btrfs_set_header_level(eb, old_root->level);
1217 btrfs_set_header_generation(eb, old_generation); 1212 btrfs_set_header_generation(eb, old_generation);
1218 } 1213 }
1219 __tree_mod_log_rewind(eb, time_seq, tm); 1214 if (tm)
1215 __tree_mod_log_rewind(eb, time_seq, tm);
1216 else
1217 WARN_ON(btrfs_header_level(eb) != 0);
1220 extent_buffer_get(eb); 1218 extent_buffer_get(eb);
1221 1219
1222 return eb; 1220 return eb;
@@ -2995,7 +2993,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
2995static void insert_ptr(struct btrfs_trans_handle *trans, 2993static void insert_ptr(struct btrfs_trans_handle *trans,
2996 struct btrfs_root *root, struct btrfs_path *path, 2994 struct btrfs_root *root, struct btrfs_path *path,
2997 struct btrfs_disk_key *key, u64 bytenr, 2995 struct btrfs_disk_key *key, u64 bytenr,
2998 int slot, int level, int tree_mod_log) 2996 int slot, int level)
2999{ 2997{
3000 struct extent_buffer *lower; 2998 struct extent_buffer *lower;
3001 int nritems; 2999 int nritems;
@@ -3008,7 +3006,7 @@ static void insert_ptr(struct btrfs_trans_handle *trans,
3008 BUG_ON(slot > nritems); 3006 BUG_ON(slot > nritems);
3009 BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(root)); 3007 BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(root));
3010 if (slot != nritems) { 3008 if (slot != nritems) {
3011 if (tree_mod_log && level) 3009 if (level)
3012 tree_mod_log_eb_move(root->fs_info, lower, slot + 1, 3010 tree_mod_log_eb_move(root->fs_info, lower, slot + 1,
3013 slot, nritems - slot); 3011 slot, nritems - slot);
3014 memmove_extent_buffer(lower, 3012 memmove_extent_buffer(lower,
@@ -3016,7 +3014,7 @@ static void insert_ptr(struct btrfs_trans_handle *trans,
3016 btrfs_node_key_ptr_offset(slot), 3014 btrfs_node_key_ptr_offset(slot),
3017 (nritems - slot) * sizeof(struct btrfs_key_ptr)); 3015 (nritems - slot) * sizeof(struct btrfs_key_ptr));
3018 } 3016 }
3019 if (tree_mod_log && level) { 3017 if (level) {
3020 ret = tree_mod_log_insert_key(root->fs_info, lower, slot, 3018 ret = tree_mod_log_insert_key(root->fs_info, lower, slot,
3021 MOD_LOG_KEY_ADD); 3019 MOD_LOG_KEY_ADD);
3022 BUG_ON(ret < 0); 3020 BUG_ON(ret < 0);
@@ -3104,7 +3102,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
3104 btrfs_mark_buffer_dirty(split); 3102 btrfs_mark_buffer_dirty(split);
3105 3103
3106 insert_ptr(trans, root, path, &disk_key, split->start, 3104 insert_ptr(trans, root, path, &disk_key, split->start,
3107 path->slots[level + 1] + 1, level + 1, 1); 3105 path->slots[level + 1] + 1, level + 1);
3108 3106
3109 if (path->slots[level] >= mid) { 3107 if (path->slots[level] >= mid) {
3110 path->slots[level] -= mid; 3108 path->slots[level] -= mid;
@@ -3641,7 +3639,7 @@ static noinline void copy_for_split(struct btrfs_trans_handle *trans,
3641 btrfs_set_header_nritems(l, mid); 3639 btrfs_set_header_nritems(l, mid);
3642 btrfs_item_key(right, &disk_key, 0); 3640 btrfs_item_key(right, &disk_key, 0);
3643 insert_ptr(trans, root, path, &disk_key, right->start, 3641 insert_ptr(trans, root, path, &disk_key, right->start,
3644 path->slots[1] + 1, 1, 0); 3642 path->slots[1] + 1, 1);
3645 3643
3646 btrfs_mark_buffer_dirty(right); 3644 btrfs_mark_buffer_dirty(right);
3647 btrfs_mark_buffer_dirty(l); 3645 btrfs_mark_buffer_dirty(l);
@@ -3848,7 +3846,7 @@ again:
3848 if (mid <= slot) { 3846 if (mid <= slot) {
3849 btrfs_set_header_nritems(right, 0); 3847 btrfs_set_header_nritems(right, 0);
3850 insert_ptr(trans, root, path, &disk_key, right->start, 3848 insert_ptr(trans, root, path, &disk_key, right->start,
3851 path->slots[1] + 1, 1, 0); 3849 path->slots[1] + 1, 1);
3852 btrfs_tree_unlock(path->nodes[0]); 3850 btrfs_tree_unlock(path->nodes[0]);
3853 free_extent_buffer(path->nodes[0]); 3851 free_extent_buffer(path->nodes[0]);
3854 path->nodes[0] = right; 3852 path->nodes[0] = right;
@@ -3857,7 +3855,7 @@ again:
3857 } else { 3855 } else {
3858 btrfs_set_header_nritems(right, 0); 3856 btrfs_set_header_nritems(right, 0);
3859 insert_ptr(trans, root, path, &disk_key, right->start, 3857 insert_ptr(trans, root, path, &disk_key, right->start,
3860 path->slots[1], 1, 0); 3858 path->slots[1], 1);
3861 btrfs_tree_unlock(path->nodes[0]); 3859 btrfs_tree_unlock(path->nodes[0]);
3862 free_extent_buffer(path->nodes[0]); 3860 free_extent_buffer(path->nodes[0]);
3863 path->nodes[0] = right; 3861 path->nodes[0] = right;
@@ -5121,6 +5119,18 @@ again:
5121 5119
5122 if (!path->skip_locking) { 5120 if (!path->skip_locking) {
5123 ret = btrfs_try_tree_read_lock(next); 5121 ret = btrfs_try_tree_read_lock(next);
5122 if (!ret && time_seq) {
5123 /*
5124 * If we don't get the lock, we may be racing
5125 * with push_leaf_left, holding that lock while
5126 * itself waiting for the leaf we've currently
5127 * locked. To solve this situation, we give up
5128 * on our lock and cycle.
5129 */
5130 btrfs_release_path(path);
5131 cond_resched();
5132 goto again;
5133 }
5124 if (!ret) { 5134 if (!ret) {
5125 btrfs_set_path_blocking(path); 5135 btrfs_set_path_blocking(path);
5126 btrfs_tree_read_lock(next); 5136 btrfs_tree_read_lock(next);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 7b845ff4af99..2936ca49b3b4 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2354,12 +2354,17 @@ retry_root_backup:
2354 BTRFS_CSUM_TREE_OBJECTID, csum_root); 2354 BTRFS_CSUM_TREE_OBJECTID, csum_root);
2355 if (ret) 2355 if (ret)
2356 goto recovery_tree_root; 2356 goto recovery_tree_root;
2357
2358 csum_root->track_dirty = 1; 2357 csum_root->track_dirty = 1;
2359 2358
2360 fs_info->generation = generation; 2359 fs_info->generation = generation;
2361 fs_info->last_trans_committed = generation; 2360 fs_info->last_trans_committed = generation;
2362 2361
2362 ret = btrfs_recover_balance(fs_info);
2363 if (ret) {
2364 printk(KERN_WARNING "btrfs: failed to recover balance\n");
2365 goto fail_block_groups;
2366 }
2367
2363 ret = btrfs_init_dev_stats(fs_info); 2368 ret = btrfs_init_dev_stats(fs_info);
2364 if (ret) { 2369 if (ret) {
2365 printk(KERN_ERR "btrfs: failed to init dev_stats: %d\n", 2370 printk(KERN_ERR "btrfs: failed to init dev_stats: %d\n",
@@ -2485,20 +2490,23 @@ retry_root_backup:
2485 goto fail_trans_kthread; 2490 goto fail_trans_kthread;
2486 } 2491 }
2487 2492
2488 if (!(sb->s_flags & MS_RDONLY)) { 2493 if (sb->s_flags & MS_RDONLY)
2489 down_read(&fs_info->cleanup_work_sem); 2494 return 0;
2490 err = btrfs_orphan_cleanup(fs_info->fs_root);
2491 if (!err)
2492 err = btrfs_orphan_cleanup(fs_info->tree_root);
2493 up_read(&fs_info->cleanup_work_sem);
2494 2495
2495 if (!err) 2496 down_read(&fs_info->cleanup_work_sem);
2496 err = btrfs_recover_balance(fs_info->tree_root); 2497 if ((ret = btrfs_orphan_cleanup(fs_info->fs_root)) ||
2498 (ret = btrfs_orphan_cleanup(fs_info->tree_root))) {
2499 up_read(&fs_info->cleanup_work_sem);
2500 close_ctree(tree_root);
2501 return ret;
2502 }
2503 up_read(&fs_info->cleanup_work_sem);
2497 2504
2498 if (err) { 2505 ret = btrfs_resume_balance_async(fs_info);
2499 close_ctree(tree_root); 2506 if (ret) {
2500 return err; 2507 printk(KERN_WARNING "btrfs: failed to resume balance\n");
2501 } 2508 close_ctree(tree_root);
2509 return ret;
2502 } 2510 }
2503 2511
2504 return 0; 2512 return 0;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 4b5a1e1bdefb..6e1d36702ff7 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2347,12 +2347,10 @@ next:
2347 return count; 2347 return count;
2348} 2348}
2349 2349
2350
2351static void wait_for_more_refs(struct btrfs_delayed_ref_root *delayed_refs, 2350static void wait_for_more_refs(struct btrfs_delayed_ref_root *delayed_refs,
2352 unsigned long num_refs) 2351 unsigned long num_refs,
2352 struct list_head *first_seq)
2353{ 2353{
2354 struct list_head *first_seq = delayed_refs->seq_head.next;
2355
2356 spin_unlock(&delayed_refs->lock); 2354 spin_unlock(&delayed_refs->lock);
2357 pr_debug("waiting for more refs (num %ld, first %p)\n", 2355 pr_debug("waiting for more refs (num %ld, first %p)\n",
2358 num_refs, first_seq); 2356 num_refs, first_seq);
@@ -2381,6 +2379,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2381 struct btrfs_delayed_ref_root *delayed_refs; 2379 struct btrfs_delayed_ref_root *delayed_refs;
2382 struct btrfs_delayed_ref_node *ref; 2380 struct btrfs_delayed_ref_node *ref;
2383 struct list_head cluster; 2381 struct list_head cluster;
2382 struct list_head *first_seq = NULL;
2384 int ret; 2383 int ret;
2385 u64 delayed_start; 2384 u64 delayed_start;
2386 int run_all = count == (unsigned long)-1; 2385 int run_all = count == (unsigned long)-1;
@@ -2436,8 +2435,10 @@ again:
2436 */ 2435 */
2437 consider_waiting = 1; 2436 consider_waiting = 1;
2438 num_refs = delayed_refs->num_entries; 2437 num_refs = delayed_refs->num_entries;
2438 first_seq = root->fs_info->tree_mod_seq_list.next;
2439 } else { 2439 } else {
2440 wait_for_more_refs(delayed_refs, num_refs); 2440 wait_for_more_refs(delayed_refs,
2441 num_refs, first_seq);
2441 /* 2442 /*
2442 * after waiting, things have changed. we 2443 * after waiting, things have changed. we
2443 * dropped the lock and someone else might have 2444 * dropped the lock and someone else might have
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index aaa12c1eb348..01c21b6c6d43 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3324,6 +3324,7 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
3324 writepage_t writepage, void *data, 3324 writepage_t writepage, void *data,
3325 void (*flush_fn)(void *)) 3325 void (*flush_fn)(void *))
3326{ 3326{
3327 struct inode *inode = mapping->host;
3327 int ret = 0; 3328 int ret = 0;
3328 int done = 0; 3329 int done = 0;
3329 int nr_to_write_done = 0; 3330 int nr_to_write_done = 0;
@@ -3334,6 +3335,18 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
3334 int scanned = 0; 3335 int scanned = 0;
3335 int tag; 3336 int tag;
3336 3337
3338 /*
3339 * We have to hold onto the inode so that ordered extents can do their
3340 * work when the IO finishes. The alternative to this is failing to add
3341 * an ordered extent if the igrab() fails there and that is a huge pain
3342 * to deal with, so instead just hold onto the inode throughout the
3343 * writepages operation. If it fails here we are freeing up the inode
3344 * anyway and we'd rather not waste our time writing out stuff that is
3345 * going to be truncated anyway.
3346 */
3347 if (!igrab(inode))
3348 return 0;
3349
3337 pagevec_init(&pvec, 0); 3350 pagevec_init(&pvec, 0);
3338 if (wbc->range_cyclic) { 3351 if (wbc->range_cyclic) {
3339 index = mapping->writeback_index; /* Start from prev offset */ 3352 index = mapping->writeback_index; /* Start from prev offset */
@@ -3428,6 +3441,7 @@ retry:
3428 index = 0; 3441 index = 0;
3429 goto retry; 3442 goto retry;
3430 } 3443 }
3444 btrfs_add_delayed_iput(inode);
3431 return ret; 3445 return ret;
3432} 3446}
3433 3447
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 70dc8ca73e25..9aa01ec2138d 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1334,7 +1334,6 @@ static ssize_t __btrfs_direct_write(struct kiocb *iocb,
1334 loff_t *ppos, size_t count, size_t ocount) 1334 loff_t *ppos, size_t count, size_t ocount)
1335{ 1335{
1336 struct file *file = iocb->ki_filp; 1336 struct file *file = iocb->ki_filp;
1337 struct inode *inode = fdentry(file)->d_inode;
1338 struct iov_iter i; 1337 struct iov_iter i;
1339 ssize_t written; 1338 ssize_t written;
1340 ssize_t written_buffered; 1339 ssize_t written_buffered;
@@ -1344,18 +1343,6 @@ static ssize_t __btrfs_direct_write(struct kiocb *iocb,
1344 written = generic_file_direct_write(iocb, iov, &nr_segs, pos, ppos, 1343 written = generic_file_direct_write(iocb, iov, &nr_segs, pos, ppos,
1345 count, ocount); 1344 count, ocount);
1346 1345
1347 /*
1348 * the generic O_DIRECT will update in-memory i_size after the
1349 * DIOs are done. But our endio handlers that update the on
1350 * disk i_size never update past the in memory i_size. So we
1351 * need one more update here to catch any additions to the
1352 * file
1353 */
1354 if (inode->i_size != BTRFS_I(inode)->disk_i_size) {
1355 btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
1356 mark_inode_dirty(inode);
1357 }
1358
1359 if (written < 0 || written == count) 1346 if (written < 0 || written == count)
1360 return written; 1347 return written;
1361 1348
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 81296c57405a..6c4e2baa9290 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -1543,29 +1543,26 @@ again:
1543 end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1; 1543 end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1;
1544 1544
1545 /* 1545 /*
1546 * XXX - this can go away after a few releases. 1546 * We need to search for bits in this bitmap. We could only cover some
1547 * 1547 * of the extent in this bitmap thanks to how we add space, so we need
1548 * since the only user of btrfs_remove_free_space is the tree logging 1548 * to search for as much as it as we can and clear that amount, and then
1549 * stuff, and the only way to test that is under crash conditions, we 1549 * go searching for the next bit.
1550 * want to have this debug stuff here just in case somethings not
1551 * working. Search the bitmap for the space we are trying to use to
1552 * make sure its actually there. If its not there then we need to stop
1553 * because something has gone wrong.
1554 */ 1550 */
1555 search_start = *offset; 1551 search_start = *offset;
1556 search_bytes = *bytes; 1552 search_bytes = ctl->unit;
1557 search_bytes = min(search_bytes, end - search_start + 1); 1553 search_bytes = min(search_bytes, end - search_start + 1);
1558 ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes); 1554 ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes);
1559 BUG_ON(ret < 0 || search_start != *offset); 1555 BUG_ON(ret < 0 || search_start != *offset);
1560 1556
1561 if (*offset > bitmap_info->offset && *offset + *bytes > end) { 1557 /* We may have found more bits than what we need */
1562 bitmap_clear_bits(ctl, bitmap_info, *offset, end - *offset + 1); 1558 search_bytes = min(search_bytes, *bytes);
1563 *bytes -= end - *offset + 1; 1559
1564 *offset = end + 1; 1560 /* Cannot clear past the end of the bitmap */
1565 } else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) { 1561 search_bytes = min(search_bytes, end - search_start + 1);
1566 bitmap_clear_bits(ctl, bitmap_info, *offset, *bytes); 1562
1567 *bytes = 0; 1563 bitmap_clear_bits(ctl, bitmap_info, search_start, search_bytes);
1568 } 1564 *offset += search_bytes;
1565 *bytes -= search_bytes;
1569 1566
1570 if (*bytes) { 1567 if (*bytes) {
1571 struct rb_node *next = rb_next(&bitmap_info->offset_index); 1568 struct rb_node *next = rb_next(&bitmap_info->offset_index);
@@ -1596,7 +1593,7 @@ again:
1596 * everything over again. 1593 * everything over again.
1597 */ 1594 */
1598 search_start = *offset; 1595 search_start = *offset;
1599 search_bytes = *bytes; 1596 search_bytes = ctl->unit;
1600 ret = search_bitmap(ctl, bitmap_info, &search_start, 1597 ret = search_bitmap(ctl, bitmap_info, &search_start,
1601 &search_bytes); 1598 &search_bytes);
1602 if (ret < 0 || search_start != *offset) 1599 if (ret < 0 || search_start != *offset)
@@ -1879,12 +1876,14 @@ int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
1879{ 1876{
1880 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 1877 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1881 struct btrfs_free_space *info; 1878 struct btrfs_free_space *info;
1882 struct btrfs_free_space *next_info = NULL;
1883 int ret = 0; 1879 int ret = 0;
1884 1880
1885 spin_lock(&ctl->tree_lock); 1881 spin_lock(&ctl->tree_lock);
1886 1882
1887again: 1883again:
1884 if (!bytes)
1885 goto out_lock;
1886
1888 info = tree_search_offset(ctl, offset, 0, 0); 1887 info = tree_search_offset(ctl, offset, 0, 0);
1889 if (!info) { 1888 if (!info) {
1890 /* 1889 /*
@@ -1905,88 +1904,48 @@ again:
1905 } 1904 }
1906 } 1905 }
1907 1906
1908 if (info->bytes < bytes && rb_next(&info->offset_index)) { 1907 if (!info->bitmap) {
1909 u64 end;
1910 next_info = rb_entry(rb_next(&info->offset_index),
1911 struct btrfs_free_space,
1912 offset_index);
1913
1914 if (next_info->bitmap)
1915 end = next_info->offset +
1916 BITS_PER_BITMAP * ctl->unit - 1;
1917 else
1918 end = next_info->offset + next_info->bytes;
1919
1920 if (next_info->bytes < bytes ||
1921 next_info->offset > offset || offset > end) {
1922 printk(KERN_CRIT "Found free space at %llu, size %llu,"
1923 " trying to use %llu\n",
1924 (unsigned long long)info->offset,
1925 (unsigned long long)info->bytes,
1926 (unsigned long long)bytes);
1927 WARN_ON(1);
1928 ret = -EINVAL;
1929 goto out_lock;
1930 }
1931
1932 info = next_info;
1933 }
1934
1935 if (info->bytes == bytes) {
1936 unlink_free_space(ctl, info); 1908 unlink_free_space(ctl, info);
1937 if (info->bitmap) { 1909 if (offset == info->offset) {
1938 kfree(info->bitmap); 1910 u64 to_free = min(bytes, info->bytes);
1939 ctl->total_bitmaps--; 1911
1940 } 1912 info->bytes -= to_free;
1941 kmem_cache_free(btrfs_free_space_cachep, info); 1913 info->offset += to_free;
1942 ret = 0; 1914 if (info->bytes) {
1943 goto out_lock; 1915 ret = link_free_space(ctl, info);
1944 } 1916 WARN_ON(ret);
1945 1917 } else {
1946 if (!info->bitmap && info->offset == offset) { 1918 kmem_cache_free(btrfs_free_space_cachep, info);
1947 unlink_free_space(ctl, info); 1919 }
1948 info->offset += bytes;
1949 info->bytes -= bytes;
1950 ret = link_free_space(ctl, info);
1951 WARN_ON(ret);
1952 goto out_lock;
1953 }
1954 1920
1955 if (!info->bitmap && info->offset <= offset && 1921 offset += to_free;
1956 info->offset + info->bytes >= offset + bytes) { 1922 bytes -= to_free;
1957 u64 old_start = info->offset; 1923 goto again;
1958 /* 1924 } else {
1959 * we're freeing space in the middle of the info, 1925 u64 old_end = info->bytes + info->offset;
1960 * this can happen during tree log replay
1961 *
1962 * first unlink the old info and then
1963 * insert it again after the hole we're creating
1964 */
1965 unlink_free_space(ctl, info);
1966 if (offset + bytes < info->offset + info->bytes) {
1967 u64 old_end = info->offset + info->bytes;
1968 1926
1969 info->offset = offset + bytes; 1927 info->bytes = offset - info->offset;
1970 info->bytes = old_end - info->offset;
1971 ret = link_free_space(ctl, info); 1928 ret = link_free_space(ctl, info);
1972 WARN_ON(ret); 1929 WARN_ON(ret);
1973 if (ret) 1930 if (ret)
1974 goto out_lock; 1931 goto out_lock;
1975 } else {
1976 /* the hole we're creating ends at the end
1977 * of the info struct, just free the info
1978 */
1979 kmem_cache_free(btrfs_free_space_cachep, info);
1980 }
1981 spin_unlock(&ctl->tree_lock);
1982 1932
1983 /* step two, insert a new info struct to cover 1933 /* Not enough bytes in this entry to satisfy us */
1984 * anything before the hole 1934 if (old_end < offset + bytes) {
1985 */ 1935 bytes -= old_end - offset;
1986 ret = btrfs_add_free_space(block_group, old_start, 1936 offset = old_end;
1987 offset - old_start); 1937 goto again;
1988 WARN_ON(ret); /* -ENOMEM */ 1938 } else if (old_end == offset + bytes) {
1989 goto out; 1939 /* all done */
1940 goto out_lock;
1941 }
1942 spin_unlock(&ctl->tree_lock);
1943
1944 ret = btrfs_add_free_space(block_group, offset + bytes,
1945 old_end - (offset + bytes));
1946 WARN_ON(ret);
1947 goto out;
1948 }
1990 } 1949 }
1991 1950
1992 ret = remove_from_bitmap(ctl, info, &offset, &bytes); 1951 ret = remove_from_bitmap(ctl, info, &offset, &bytes);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index d8bb0dbc4941..a7d1921ac76b 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3754,7 +3754,7 @@ void btrfs_evict_inode(struct inode *inode)
3754 btrfs_wait_ordered_range(inode, 0, (u64)-1); 3754 btrfs_wait_ordered_range(inode, 0, (u64)-1);
3755 3755
3756 if (root->fs_info->log_root_recovering) { 3756 if (root->fs_info->log_root_recovering) {
3757 BUG_ON(!test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, 3757 BUG_ON(test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
3758 &BTRFS_I(inode)->runtime_flags)); 3758 &BTRFS_I(inode)->runtime_flags));
3759 goto no_delete; 3759 goto no_delete;
3760 } 3760 }
@@ -5876,8 +5876,17 @@ map:
5876 bh_result->b_size = len; 5876 bh_result->b_size = len;
5877 bh_result->b_bdev = em->bdev; 5877 bh_result->b_bdev = em->bdev;
5878 set_buffer_mapped(bh_result); 5878 set_buffer_mapped(bh_result);
5879 if (create && !test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) 5879 if (create) {
5880 set_buffer_new(bh_result); 5880 if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
5881 set_buffer_new(bh_result);
5882
5883 /*
5884 * Need to update the i_size under the extent lock so buffered
5885 * readers will get the updated i_size when we unlock.
5886 */
5887 if (start + len > i_size_read(inode))
5888 i_size_write(inode, start + len);
5889 }
5881 5890
5882 free_extent_map(em); 5891 free_extent_map(em);
5883 5892
@@ -6360,12 +6369,48 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
6360 */ 6369 */
6361 ordered = btrfs_lookup_ordered_range(inode, lockstart, 6370 ordered = btrfs_lookup_ordered_range(inode, lockstart,
6362 lockend - lockstart + 1); 6371 lockend - lockstart + 1);
6363 if (!ordered) 6372
6373 /*
6374 * We need to make sure there are no buffered pages in this
6375 * range either, we could have raced between the invalidate in
6376 * generic_file_direct_write and locking the extent. The
6377 * invalidate needs to happen so that reads after a write do not
6378 * get stale data.
6379 */
6380 if (!ordered && (!writing ||
6381 !test_range_bit(&BTRFS_I(inode)->io_tree,
6382 lockstart, lockend, EXTENT_UPTODATE, 0,
6383 cached_state)))
6364 break; 6384 break;
6385
6365 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, 6386 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
6366 &cached_state, GFP_NOFS); 6387 &cached_state, GFP_NOFS);
6367 btrfs_start_ordered_extent(inode, ordered, 1); 6388
6368 btrfs_put_ordered_extent(ordered); 6389 if (ordered) {
6390 btrfs_start_ordered_extent(inode, ordered, 1);
6391 btrfs_put_ordered_extent(ordered);
6392 } else {
6393 /* Screw you mmap */
6394 ret = filemap_write_and_wait_range(file->f_mapping,
6395 lockstart,
6396 lockend);
6397 if (ret)
6398 goto out;
6399
6400 /*
6401 * If we found a page that couldn't be invalidated just
6402 * fall back to buffered.
6403 */
6404 ret = invalidate_inode_pages2_range(file->f_mapping,
6405 lockstart >> PAGE_CACHE_SHIFT,
6406 lockend >> PAGE_CACHE_SHIFT);
6407 if (ret) {
6408 if (ret == -EBUSY)
6409 ret = 0;
6410 goto out;
6411 }
6412 }
6413
6369 cond_resched(); 6414 cond_resched();
6370 } 6415 }
6371 6416
diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h
index 497c530724cf..e440aa653c30 100644
--- a/fs/btrfs/ioctl.h
+++ b/fs/btrfs/ioctl.h
@@ -339,7 +339,7 @@ struct btrfs_ioctl_get_dev_stats {
339#define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64) 339#define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64)
340#define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \ 340#define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \
341 struct btrfs_ioctl_vol_args_v2) 341 struct btrfs_ioctl_vol_args_v2)
342#define BTRFS_IOC_SUBVOL_GETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 25, __u64) 342#define BTRFS_IOC_SUBVOL_GETFLAGS _IOR(BTRFS_IOCTL_MAGIC, 25, __u64)
343#define BTRFS_IOC_SUBVOL_SETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 26, __u64) 343#define BTRFS_IOC_SUBVOL_SETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 26, __u64)
344#define BTRFS_IOC_SCRUB _IOWR(BTRFS_IOCTL_MAGIC, 27, \ 344#define BTRFS_IOC_SCRUB _IOWR(BTRFS_IOCTL_MAGIC, 27, \
345 struct btrfs_ioctl_scrub_args) 345 struct btrfs_ioctl_scrub_args)
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 0eb9a4da069e..e23991574fdf 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1187,6 +1187,10 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
1187 if (ret) 1187 if (ret)
1188 goto restore; 1188 goto restore;
1189 1189
1190 ret = btrfs_resume_balance_async(fs_info);
1191 if (ret)
1192 goto restore;
1193
1190 sb->s_flags &= ~MS_RDONLY; 1194 sb->s_flags &= ~MS_RDONLY;
1191 } 1195 }
1192 1196
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 2017d0ff511c..8abeae4224f9 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -690,6 +690,8 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
690 kfree(name); 690 kfree(name);
691 691
692 iput(inode); 692 iput(inode);
693
694 btrfs_run_delayed_items(trans, root);
693 return ret; 695 return ret;
694} 696}
695 697
@@ -895,6 +897,7 @@ again:
895 ret = btrfs_unlink_inode(trans, root, dir, 897 ret = btrfs_unlink_inode(trans, root, dir,
896 inode, victim_name, 898 inode, victim_name,
897 victim_name_len); 899 victim_name_len);
900 btrfs_run_delayed_items(trans, root);
898 } 901 }
899 kfree(victim_name); 902 kfree(victim_name);
900 ptr = (unsigned long)(victim_ref + 1) + victim_name_len; 903 ptr = (unsigned long)(victim_ref + 1) + victim_name_len;
@@ -1475,6 +1478,9 @@ again:
1475 ret = btrfs_unlink_inode(trans, root, dir, inode, 1478 ret = btrfs_unlink_inode(trans, root, dir, inode,
1476 name, name_len); 1479 name, name_len);
1477 BUG_ON(ret); 1480 BUG_ON(ret);
1481
1482 btrfs_run_delayed_items(trans, root);
1483
1478 kfree(name); 1484 kfree(name);
1479 iput(inode); 1485 iput(inode);
1480 1486
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 8a3d2594b807..ecaad40e7ef4 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2845,31 +2845,48 @@ out:
2845 2845
2846static int balance_kthread(void *data) 2846static int balance_kthread(void *data)
2847{ 2847{
2848 struct btrfs_balance_control *bctl = 2848 struct btrfs_fs_info *fs_info = data;
2849 (struct btrfs_balance_control *)data;
2850 struct btrfs_fs_info *fs_info = bctl->fs_info;
2851 int ret = 0; 2849 int ret = 0;
2852 2850
2853 mutex_lock(&fs_info->volume_mutex); 2851 mutex_lock(&fs_info->volume_mutex);
2854 mutex_lock(&fs_info->balance_mutex); 2852 mutex_lock(&fs_info->balance_mutex);
2855 2853
2856 set_balance_control(bctl); 2854 if (fs_info->balance_ctl) {
2857
2858 if (btrfs_test_opt(fs_info->tree_root, SKIP_BALANCE)) {
2859 printk(KERN_INFO "btrfs: force skipping balance\n");
2860 } else {
2861 printk(KERN_INFO "btrfs: continuing balance\n"); 2855 printk(KERN_INFO "btrfs: continuing balance\n");
2862 ret = btrfs_balance(bctl, NULL); 2856 ret = btrfs_balance(fs_info->balance_ctl, NULL);
2863 } 2857 }
2864 2858
2865 mutex_unlock(&fs_info->balance_mutex); 2859 mutex_unlock(&fs_info->balance_mutex);
2866 mutex_unlock(&fs_info->volume_mutex); 2860 mutex_unlock(&fs_info->volume_mutex);
2861
2867 return ret; 2862 return ret;
2868} 2863}
2869 2864
2870int btrfs_recover_balance(struct btrfs_root *tree_root) 2865int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info)
2871{ 2866{
2872 struct task_struct *tsk; 2867 struct task_struct *tsk;
2868
2869 spin_lock(&fs_info->balance_lock);
2870 if (!fs_info->balance_ctl) {
2871 spin_unlock(&fs_info->balance_lock);
2872 return 0;
2873 }
2874 spin_unlock(&fs_info->balance_lock);
2875
2876 if (btrfs_test_opt(fs_info->tree_root, SKIP_BALANCE)) {
2877 printk(KERN_INFO "btrfs: force skipping balance\n");
2878 return 0;
2879 }
2880
2881 tsk = kthread_run(balance_kthread, fs_info, "btrfs-balance");
2882 if (IS_ERR(tsk))
2883 return PTR_ERR(tsk);
2884
2885 return 0;
2886}
2887
2888int btrfs_recover_balance(struct btrfs_fs_info *fs_info)
2889{
2873 struct btrfs_balance_control *bctl; 2890 struct btrfs_balance_control *bctl;
2874 struct btrfs_balance_item *item; 2891 struct btrfs_balance_item *item;
2875 struct btrfs_disk_balance_args disk_bargs; 2892 struct btrfs_disk_balance_args disk_bargs;
@@ -2882,29 +2899,30 @@ int btrfs_recover_balance(struct btrfs_root *tree_root)
2882 if (!path) 2899 if (!path)
2883 return -ENOMEM; 2900 return -ENOMEM;
2884 2901
2885 bctl = kzalloc(sizeof(*bctl), GFP_NOFS);
2886 if (!bctl) {
2887 ret = -ENOMEM;
2888 goto out;
2889 }
2890
2891 key.objectid = BTRFS_BALANCE_OBJECTID; 2902 key.objectid = BTRFS_BALANCE_OBJECTID;
2892 key.type = BTRFS_BALANCE_ITEM_KEY; 2903 key.type = BTRFS_BALANCE_ITEM_KEY;
2893 key.offset = 0; 2904 key.offset = 0;
2894 2905
2895 ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0); 2906 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0);
2896 if (ret < 0) 2907 if (ret < 0)
2897 goto out_bctl; 2908 goto out;
2898 if (ret > 0) { /* ret = -ENOENT; */ 2909 if (ret > 0) { /* ret = -ENOENT; */
2899 ret = 0; 2910 ret = 0;
2900 goto out_bctl; 2911 goto out;
2912 }
2913
2914 bctl = kzalloc(sizeof(*bctl), GFP_NOFS);
2915 if (!bctl) {
2916 ret = -ENOMEM;
2917 goto out;
2901 } 2918 }
2902 2919
2903 leaf = path->nodes[0]; 2920 leaf = path->nodes[0];
2904 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_balance_item); 2921 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_balance_item);
2905 2922
2906 bctl->fs_info = tree_root->fs_info; 2923 bctl->fs_info = fs_info;
2907 bctl->flags = btrfs_balance_flags(leaf, item) | BTRFS_BALANCE_RESUME; 2924 bctl->flags = btrfs_balance_flags(leaf, item);
2925 bctl->flags |= BTRFS_BALANCE_RESUME;
2908 2926
2909 btrfs_balance_data(leaf, item, &disk_bargs); 2927 btrfs_balance_data(leaf, item, &disk_bargs);
2910 btrfs_disk_balance_args_to_cpu(&bctl->data, &disk_bargs); 2928 btrfs_disk_balance_args_to_cpu(&bctl->data, &disk_bargs);
@@ -2913,14 +2931,13 @@ int btrfs_recover_balance(struct btrfs_root *tree_root)
2913 btrfs_balance_sys(leaf, item, &disk_bargs); 2931 btrfs_balance_sys(leaf, item, &disk_bargs);
2914 btrfs_disk_balance_args_to_cpu(&bctl->sys, &disk_bargs); 2932 btrfs_disk_balance_args_to_cpu(&bctl->sys, &disk_bargs);
2915 2933
2916 tsk = kthread_run(balance_kthread, bctl, "btrfs-balance"); 2934 mutex_lock(&fs_info->volume_mutex);
2917 if (IS_ERR(tsk)) 2935 mutex_lock(&fs_info->balance_mutex);
2918 ret = PTR_ERR(tsk);
2919 else
2920 goto out;
2921 2936
2922out_bctl: 2937 set_balance_control(bctl);
2923 kfree(bctl); 2938
2939 mutex_unlock(&fs_info->balance_mutex);
2940 mutex_unlock(&fs_info->volume_mutex);
2924out: 2941out:
2925 btrfs_free_path(path); 2942 btrfs_free_path(path);
2926 return ret; 2943 return ret;
@@ -4061,16 +4078,18 @@ static void btrfs_end_bio(struct bio *bio, int err)
4061 4078
4062 BUG_ON(stripe_index >= bbio->num_stripes); 4079 BUG_ON(stripe_index >= bbio->num_stripes);
4063 dev = bbio->stripes[stripe_index].dev; 4080 dev = bbio->stripes[stripe_index].dev;
4064 if (bio->bi_rw & WRITE) 4081 if (dev->bdev) {
4065 btrfs_dev_stat_inc(dev, 4082 if (bio->bi_rw & WRITE)
4066 BTRFS_DEV_STAT_WRITE_ERRS); 4083 btrfs_dev_stat_inc(dev,
4067 else 4084 BTRFS_DEV_STAT_WRITE_ERRS);
4068 btrfs_dev_stat_inc(dev, 4085 else
4069 BTRFS_DEV_STAT_READ_ERRS); 4086 btrfs_dev_stat_inc(dev,
4070 if ((bio->bi_rw & WRITE_FLUSH) == WRITE_FLUSH) 4087 BTRFS_DEV_STAT_READ_ERRS);
4071 btrfs_dev_stat_inc(dev, 4088 if ((bio->bi_rw & WRITE_FLUSH) == WRITE_FLUSH)
4072 BTRFS_DEV_STAT_FLUSH_ERRS); 4089 btrfs_dev_stat_inc(dev,
4073 btrfs_dev_stat_print_on_error(dev); 4090 BTRFS_DEV_STAT_FLUSH_ERRS);
4091 btrfs_dev_stat_print_on_error(dev);
4092 }
4074 } 4093 }
4075 } 4094 }
4076 4095
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 74366f27a76b..95f6637614db 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -281,7 +281,8 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size);
281int btrfs_init_new_device(struct btrfs_root *root, char *path); 281int btrfs_init_new_device(struct btrfs_root *root, char *path);
282int btrfs_balance(struct btrfs_balance_control *bctl, 282int btrfs_balance(struct btrfs_balance_control *bctl,
283 struct btrfs_ioctl_balance_args *bargs); 283 struct btrfs_ioctl_balance_args *bargs);
284int btrfs_recover_balance(struct btrfs_root *tree_root); 284int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info);
285int btrfs_recover_balance(struct btrfs_fs_info *fs_info);
285int btrfs_pause_balance(struct btrfs_fs_info *fs_info); 286int btrfs_pause_balance(struct btrfs_fs_info *fs_info);
286int btrfs_cancel_balance(struct btrfs_fs_info *fs_info); 287int btrfs_cancel_balance(struct btrfs_fs_info *fs_info);
287int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset); 288int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset);
diff --git a/fs/buffer.c b/fs/buffer.c
index 838a9cf246bd..c7062c896d7c 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1036,6 +1036,9 @@ grow_buffers(struct block_device *bdev, sector_t block, int size)
1036static struct buffer_head * 1036static struct buffer_head *
1037__getblk_slow(struct block_device *bdev, sector_t block, int size) 1037__getblk_slow(struct block_device *bdev, sector_t block, int size)
1038{ 1038{
1039 int ret;
1040 struct buffer_head *bh;
1041
1039 /* Size must be multiple of hard sectorsize */ 1042 /* Size must be multiple of hard sectorsize */
1040 if (unlikely(size & (bdev_logical_block_size(bdev)-1) || 1043 if (unlikely(size & (bdev_logical_block_size(bdev)-1) ||
1041 (size < 512 || size > PAGE_SIZE))) { 1044 (size < 512 || size > PAGE_SIZE))) {
@@ -1048,20 +1051,21 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size)
1048 return NULL; 1051 return NULL;
1049 } 1052 }
1050 1053
1051 for (;;) { 1054retry:
1052 struct buffer_head * bh; 1055 bh = __find_get_block(bdev, block, size);
1053 int ret; 1056 if (bh)
1057 return bh;
1054 1058
1059 ret = grow_buffers(bdev, block, size);
1060 if (ret == 0) {
1061 free_more_memory();
1062 goto retry;
1063 } else if (ret > 0) {
1055 bh = __find_get_block(bdev, block, size); 1064 bh = __find_get_block(bdev, block, size);
1056 if (bh) 1065 if (bh)
1057 return bh; 1066 return bh;
1058
1059 ret = grow_buffers(bdev, block, size);
1060 if (ret < 0)
1061 return NULL;
1062 if (ret == 0)
1063 free_more_memory();
1064 } 1067 }
1068 return NULL;
1065} 1069}
1066 1070
1067/* 1071/*
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 78db68a5cf44..0ae86ddf2213 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -1653,24 +1653,26 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
1653 * If yes, we have encountered a double deliminator 1653 * If yes, we have encountered a double deliminator
1654 * reset the NULL character to the deliminator 1654 * reset the NULL character to the deliminator
1655 */ 1655 */
1656 if (tmp_end < end && tmp_end[1] == delim) 1656 if (tmp_end < end && tmp_end[1] == delim) {
1657 tmp_end[0] = delim; 1657 tmp_end[0] = delim;
1658 1658
1659 /* Keep iterating until we get to a single deliminator 1659 /* Keep iterating until we get to a single
1660 * OR the end 1660 * deliminator OR the end
1661 */ 1661 */
1662 while ((tmp_end = strchr(tmp_end, delim)) != NULL && 1662 while ((tmp_end = strchr(tmp_end, delim))
1663 (tmp_end[1] == delim)) { 1663 != NULL && (tmp_end[1] == delim)) {
1664 tmp_end = (char *) &tmp_end[2]; 1664 tmp_end = (char *) &tmp_end[2];
1665 } 1665 }
1666 1666
1667 /* Reset var options to point to next element */ 1667 /* Reset var options to point to next element */
1668 if (tmp_end) { 1668 if (tmp_end) {
1669 tmp_end[0] = '\0'; 1669 tmp_end[0] = '\0';
1670 options = (char *) &tmp_end[1]; 1670 options = (char *) &tmp_end[1];
1671 } else 1671 } else
1672 /* Reached the end of the mount option string */ 1672 /* Reached the end of the mount option
1673 options = end; 1673 * string */
1674 options = end;
1675 }
1674 1676
1675 /* Now build new password string */ 1677 /* Now build new password string */
1676 temp_len = strlen(value); 1678 temp_len = strlen(value);
@@ -3493,18 +3495,15 @@ cifs_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info)
3493 * MS-CIFS indicates that servers are only limited by the client's 3495 * MS-CIFS indicates that servers are only limited by the client's
3494 * bufsize for reads, testing against win98se shows that it throws 3496 * bufsize for reads, testing against win98se shows that it throws
3495 * INVALID_PARAMETER errors if you try to request too large a read. 3497 * INVALID_PARAMETER errors if you try to request too large a read.
3498 * OS/2 just sends back short reads.
3496 * 3499 *
3497 * If the server advertises a MaxBufferSize of less than one page, 3500 * If the server doesn't advertise CAP_LARGE_READ_X, then assume that
3498 * assume that it also can't satisfy reads larger than that either. 3501 * it can't handle a read request larger than its MaxBufferSize either.
3499 *
3500 * FIXME: Is there a better heuristic for this?
3501 */ 3502 */
3502 if (tcon->unix_ext && (unix_cap & CIFS_UNIX_LARGE_READ_CAP)) 3503 if (tcon->unix_ext && (unix_cap & CIFS_UNIX_LARGE_READ_CAP))
3503 defsize = CIFS_DEFAULT_IOSIZE; 3504 defsize = CIFS_DEFAULT_IOSIZE;
3504 else if (server->capabilities & CAP_LARGE_READ_X) 3505 else if (server->capabilities & CAP_LARGE_READ_X)
3505 defsize = CIFS_DEFAULT_NON_POSIX_RSIZE; 3506 defsize = CIFS_DEFAULT_NON_POSIX_RSIZE;
3506 else if (server->maxBuf >= PAGE_CACHE_SIZE)
3507 defsize = CIFSMaxBufSize;
3508 else 3507 else
3509 defsize = server->maxBuf - sizeof(READ_RSP); 3508 defsize = server->maxBuf - sizeof(READ_RSP);
3510 3509
diff --git a/fs/ecryptfs/kthread.c b/fs/ecryptfs/kthread.c
index 69f994a7d524..0dbe58a8b172 100644
--- a/fs/ecryptfs/kthread.c
+++ b/fs/ecryptfs/kthread.c
@@ -149,7 +149,7 @@ int ecryptfs_privileged_open(struct file **lower_file,
149 (*lower_file) = dentry_open(lower_dentry, lower_mnt, flags, cred); 149 (*lower_file) = dentry_open(lower_dentry, lower_mnt, flags, cred);
150 if (!IS_ERR(*lower_file)) 150 if (!IS_ERR(*lower_file))
151 goto out; 151 goto out;
152 if (flags & O_RDONLY) { 152 if ((flags & O_ACCMODE) == O_RDONLY) {
153 rc = PTR_ERR((*lower_file)); 153 rc = PTR_ERR((*lower_file));
154 goto out; 154 goto out;
155 } 155 }
diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c
index 3a06f4043df4..c0038f6566d4 100644
--- a/fs/ecryptfs/miscdev.c
+++ b/fs/ecryptfs/miscdev.c
@@ -49,7 +49,10 @@ ecryptfs_miscdev_poll(struct file *file, poll_table *pt)
49 mutex_lock(&ecryptfs_daemon_hash_mux); 49 mutex_lock(&ecryptfs_daemon_hash_mux);
50 /* TODO: Just use file->private_data? */ 50 /* TODO: Just use file->private_data? */
51 rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns()); 51 rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns());
52 BUG_ON(rc || !daemon); 52 if (rc || !daemon) {
53 mutex_unlock(&ecryptfs_daemon_hash_mux);
54 return -EINVAL;
55 }
53 mutex_lock(&daemon->mux); 56 mutex_lock(&daemon->mux);
54 mutex_unlock(&ecryptfs_daemon_hash_mux); 57 mutex_unlock(&ecryptfs_daemon_hash_mux);
55 if (daemon->flags & ECRYPTFS_DAEMON_ZOMBIE) { 58 if (daemon->flags & ECRYPTFS_DAEMON_ZOMBIE) {
@@ -122,6 +125,7 @@ ecryptfs_miscdev_open(struct inode *inode, struct file *file)
122 goto out_unlock_daemon; 125 goto out_unlock_daemon;
123 } 126 }
124 daemon->flags |= ECRYPTFS_DAEMON_MISCDEV_OPEN; 127 daemon->flags |= ECRYPTFS_DAEMON_MISCDEV_OPEN;
128 file->private_data = daemon;
125 atomic_inc(&ecryptfs_num_miscdev_opens); 129 atomic_inc(&ecryptfs_num_miscdev_opens);
126out_unlock_daemon: 130out_unlock_daemon:
127 mutex_unlock(&daemon->mux); 131 mutex_unlock(&daemon->mux);
@@ -152,9 +156,9 @@ ecryptfs_miscdev_release(struct inode *inode, struct file *file)
152 156
153 mutex_lock(&ecryptfs_daemon_hash_mux); 157 mutex_lock(&ecryptfs_daemon_hash_mux);
154 rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns()); 158 rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns());
155 BUG_ON(rc || !daemon); 159 if (rc || !daemon)
160 daemon = file->private_data;
156 mutex_lock(&daemon->mux); 161 mutex_lock(&daemon->mux);
157 BUG_ON(daemon->pid != task_pid(current));
158 BUG_ON(!(daemon->flags & ECRYPTFS_DAEMON_MISCDEV_OPEN)); 162 BUG_ON(!(daemon->flags & ECRYPTFS_DAEMON_MISCDEV_OPEN));
159 daemon->flags &= ~ECRYPTFS_DAEMON_MISCDEV_OPEN; 163 daemon->flags &= ~ECRYPTFS_DAEMON_MISCDEV_OPEN;
160 atomic_dec(&ecryptfs_num_miscdev_opens); 164 atomic_dec(&ecryptfs_num_miscdev_opens);
@@ -191,31 +195,32 @@ int ecryptfs_send_miscdev(char *data, size_t data_size,
191 struct ecryptfs_msg_ctx *msg_ctx, u8 msg_type, 195 struct ecryptfs_msg_ctx *msg_ctx, u8 msg_type,
192 u16 msg_flags, struct ecryptfs_daemon *daemon) 196 u16 msg_flags, struct ecryptfs_daemon *daemon)
193{ 197{
194 int rc = 0; 198 struct ecryptfs_message *msg;
195 199
196 mutex_lock(&msg_ctx->mux); 200 msg = kmalloc((sizeof(*msg) + data_size), GFP_KERNEL);
197 msg_ctx->msg = kmalloc((sizeof(*msg_ctx->msg) + data_size), 201 if (!msg) {
198 GFP_KERNEL);
199 if (!msg_ctx->msg) {
200 rc = -ENOMEM;
201 printk(KERN_ERR "%s: Out of memory whilst attempting " 202 printk(KERN_ERR "%s: Out of memory whilst attempting "
202 "to kmalloc(%zd, GFP_KERNEL)\n", __func__, 203 "to kmalloc(%zd, GFP_KERNEL)\n", __func__,
203 (sizeof(*msg_ctx->msg) + data_size)); 204 (sizeof(*msg) + data_size));
204 goto out_unlock; 205 return -ENOMEM;
205 } 206 }
207
208 mutex_lock(&msg_ctx->mux);
209 msg_ctx->msg = msg;
206 msg_ctx->msg->index = msg_ctx->index; 210 msg_ctx->msg->index = msg_ctx->index;
207 msg_ctx->msg->data_len = data_size; 211 msg_ctx->msg->data_len = data_size;
208 msg_ctx->type = msg_type; 212 msg_ctx->type = msg_type;
209 memcpy(msg_ctx->msg->data, data, data_size); 213 memcpy(msg_ctx->msg->data, data, data_size);
210 msg_ctx->msg_size = (sizeof(*msg_ctx->msg) + data_size); 214 msg_ctx->msg_size = (sizeof(*msg_ctx->msg) + data_size);
211 mutex_lock(&daemon->mux);
212 list_add_tail(&msg_ctx->daemon_out_list, &daemon->msg_ctx_out_queue); 215 list_add_tail(&msg_ctx->daemon_out_list, &daemon->msg_ctx_out_queue);
216 mutex_unlock(&msg_ctx->mux);
217
218 mutex_lock(&daemon->mux);
213 daemon->num_queued_msg_ctx++; 219 daemon->num_queued_msg_ctx++;
214 wake_up_interruptible(&daemon->wait); 220 wake_up_interruptible(&daemon->wait);
215 mutex_unlock(&daemon->mux); 221 mutex_unlock(&daemon->mux);
216out_unlock: 222
217 mutex_unlock(&msg_ctx->mux); 223 return 0;
218 return rc;
219} 224}
220 225
221/* 226/*
@@ -269,8 +274,16 @@ ecryptfs_miscdev_read(struct file *file, char __user *buf, size_t count,
269 mutex_lock(&ecryptfs_daemon_hash_mux); 274 mutex_lock(&ecryptfs_daemon_hash_mux);
270 /* TODO: Just use file->private_data? */ 275 /* TODO: Just use file->private_data? */
271 rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns()); 276 rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns());
272 BUG_ON(rc || !daemon); 277 if (rc || !daemon) {
278 mutex_unlock(&ecryptfs_daemon_hash_mux);
279 return -EINVAL;
280 }
273 mutex_lock(&daemon->mux); 281 mutex_lock(&daemon->mux);
282 if (task_pid(current) != daemon->pid) {
283 mutex_unlock(&daemon->mux);
284 mutex_unlock(&ecryptfs_daemon_hash_mux);
285 return -EPERM;
286 }
274 if (daemon->flags & ECRYPTFS_DAEMON_ZOMBIE) { 287 if (daemon->flags & ECRYPTFS_DAEMON_ZOMBIE) {
275 rc = 0; 288 rc = 0;
276 mutex_unlock(&ecryptfs_daemon_hash_mux); 289 mutex_unlock(&ecryptfs_daemon_hash_mux);
@@ -307,9 +320,6 @@ check_list:
307 * message from the queue; try again */ 320 * message from the queue; try again */
308 goto check_list; 321 goto check_list;
309 } 322 }
310 BUG_ON(euid != daemon->euid);
311 BUG_ON(current_user_ns() != daemon->user_ns);
312 BUG_ON(task_pid(current) != daemon->pid);
313 msg_ctx = list_first_entry(&daemon->msg_ctx_out_queue, 323 msg_ctx = list_first_entry(&daemon->msg_ctx_out_queue,
314 struct ecryptfs_msg_ctx, daemon_out_list); 324 struct ecryptfs_msg_ctx, daemon_out_list);
315 BUG_ON(!msg_ctx); 325 BUG_ON(!msg_ctx);
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index a3d81ebf6d86..0038b32cb362 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -738,22 +738,21 @@ static int
738fat_encode_fh(struct inode *inode, __u32 *fh, int *lenp, struct inode *parent) 738fat_encode_fh(struct inode *inode, __u32 *fh, int *lenp, struct inode *parent)
739{ 739{
740 int len = *lenp; 740 int len = *lenp;
741 u32 ipos_h, ipos_m, ipos_l; 741 struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb);
742 loff_t i_pos;
742 743
743 if (len < 5) { 744 if (len < 5) {
744 *lenp = 5; 745 *lenp = 5;
745 return 255; /* no room */ 746 return 255; /* no room */
746 } 747 }
747 748
748 ipos_h = MSDOS_I(inode)->i_pos >> 8; 749 i_pos = fat_i_pos_read(sbi, inode);
749 ipos_m = (MSDOS_I(inode)->i_pos & 0xf0) << 24;
750 ipos_l = (MSDOS_I(inode)->i_pos & 0x0f) << 28;
751 *lenp = 5; 750 *lenp = 5;
752 fh[0] = inode->i_ino; 751 fh[0] = inode->i_ino;
753 fh[1] = inode->i_generation; 752 fh[1] = inode->i_generation;
754 fh[2] = ipos_h; 753 fh[2] = i_pos >> 8;
755 fh[3] = ipos_m | MSDOS_I(inode)->i_logstart; 754 fh[3] = ((i_pos & 0xf0) << 24) | MSDOS_I(inode)->i_logstart;
756 fh[4] = ipos_l; 755 fh[4] = (i_pos & 0x0f) << 28;
757 if (parent) 756 if (parent)
758 fh[4] |= MSDOS_I(parent)->i_logstart; 757 fh[4] |= MSDOS_I(parent)->i_logstart;
759 return 3; 758 return 3;
diff --git a/fs/fifo.c b/fs/fifo.c
index b1a524d798e7..cf6f4345ceb0 100644
--- a/fs/fifo.c
+++ b/fs/fifo.c
@@ -14,7 +14,7 @@
14#include <linux/sched.h> 14#include <linux/sched.h>
15#include <linux/pipe_fs_i.h> 15#include <linux/pipe_fs_i.h>
16 16
17static void wait_for_partner(struct inode* inode, unsigned int *cnt) 17static int wait_for_partner(struct inode* inode, unsigned int *cnt)
18{ 18{
19 int cur = *cnt; 19 int cur = *cnt;
20 20
@@ -23,6 +23,7 @@ static void wait_for_partner(struct inode* inode, unsigned int *cnt)
23 if (signal_pending(current)) 23 if (signal_pending(current))
24 break; 24 break;
25 } 25 }
26 return cur == *cnt ? -ERESTARTSYS : 0;
26} 27}
27 28
28static void wake_up_partner(struct inode* inode) 29static void wake_up_partner(struct inode* inode)
@@ -67,8 +68,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
67 * seen a writer */ 68 * seen a writer */
68 filp->f_version = pipe->w_counter; 69 filp->f_version = pipe->w_counter;
69 } else { 70 } else {
70 wait_for_partner(inode, &pipe->w_counter); 71 if (wait_for_partner(inode, &pipe->w_counter))
71 if(signal_pending(current))
72 goto err_rd; 72 goto err_rd;
73 } 73 }
74 } 74 }
@@ -90,8 +90,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
90 wake_up_partner(inode); 90 wake_up_partner(inode);
91 91
92 if (!pipe->readers) { 92 if (!pipe->readers) {
93 wait_for_partner(inode, &pipe->r_counter); 93 if (wait_for_partner(inode, &pipe->r_counter))
94 if (signal_pending(current))
95 goto err_wr; 94 goto err_wr;
96 } 95 }
97 break; 96 break;
diff --git a/fs/locks.c b/fs/locks.c
index 814c51d0de47..fce6238d52c1 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1465,7 +1465,7 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
1465 case F_WRLCK: 1465 case F_WRLCK:
1466 return generic_add_lease(filp, arg, flp); 1466 return generic_add_lease(filp, arg, flp);
1467 default: 1467 default:
1468 BUG(); 1468 return -EINVAL;
1469 } 1469 }
1470} 1470}
1471EXPORT_SYMBOL(generic_setlease); 1471EXPORT_SYMBOL(generic_setlease);
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 9a4cbfc85d81..48253372ab1d 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -484,6 +484,7 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
484 484
485 list_for_each_entry_safe(req, tmp, &reqs, wb_list) { 485 list_for_each_entry_safe(req, tmp, &reqs, wb_list) {
486 if (!nfs_pageio_add_request(&desc, req)) { 486 if (!nfs_pageio_add_request(&desc, req)) {
487 nfs_list_remove_request(req);
487 nfs_list_add_request(req, &failed); 488 nfs_list_add_request(req, &failed);
488 spin_lock(cinfo.lock); 489 spin_lock(cinfo.lock);
489 dreq->flags = 0; 490 dreq->flags = 0;
@@ -494,8 +495,11 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
494 } 495 }
495 nfs_pageio_complete(&desc); 496 nfs_pageio_complete(&desc);
496 497
497 while (!list_empty(&failed)) 498 while (!list_empty(&failed)) {
499 req = nfs_list_entry(failed.next);
500 nfs_list_remove_request(req);
498 nfs_unlock_and_release_request(req); 501 nfs_unlock_and_release_request(req);
502 }
499 503
500 if (put_dreq(dreq)) 504 if (put_dreq(dreq))
501 nfs_direct_write_complete(dreq, dreq->inode); 505 nfs_direct_write_complete(dreq, dreq->inode);
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 906f09c7d842..06228192f64e 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -2860,6 +2860,8 @@ static struct dentry *nfs4_try_mount(int flags, const char *dev_name,
2860 2860
2861 dfprintk(MOUNT, "--> nfs4_try_mount()\n"); 2861 dfprintk(MOUNT, "--> nfs4_try_mount()\n");
2862 2862
2863 mount_info->fill_super = nfs4_fill_super;
2864
2863 export_path = data->nfs_server.export_path; 2865 export_path = data->nfs_server.export_path;
2864 data->nfs_server.export_path = "/"; 2866 data->nfs_server.export_path = "/";
2865 root_mnt = nfs_do_root_mount(&nfs4_remote_fs_type, flags, mount_info, 2867 root_mnt = nfs_do_root_mount(&nfs4_remote_fs_type, flags, mount_info,
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 81a4cd22f80b..4f7795fb5fc0 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -456,7 +456,7 @@ static void ocfs2_update_lock_stats(struct ocfs2_lock_res *res, int level,
456 stats->ls_gets++; 456 stats->ls_gets++;
457 stats->ls_total += ktime_to_ns(kt); 457 stats->ls_total += ktime_to_ns(kt);
458 /* overflow */ 458 /* overflow */
459 if (unlikely(stats->ls_gets) == 0) { 459 if (unlikely(stats->ls_gets == 0)) {
460 stats->ls_gets++; 460 stats->ls_gets++;
461 stats->ls_total = ktime_to_ns(kt); 461 stats->ls_total = ktime_to_ns(kt);
462 } 462 }
@@ -3932,6 +3932,8 @@ unqueue:
3932static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb, 3932static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb,
3933 struct ocfs2_lock_res *lockres) 3933 struct ocfs2_lock_res *lockres)
3934{ 3934{
3935 unsigned long flags;
3936
3935 assert_spin_locked(&lockres->l_lock); 3937 assert_spin_locked(&lockres->l_lock);
3936 3938
3937 if (lockres->l_flags & OCFS2_LOCK_FREEING) { 3939 if (lockres->l_flags & OCFS2_LOCK_FREEING) {
@@ -3945,21 +3947,22 @@ static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb,
3945 3947
3946 lockres_or_flags(lockres, OCFS2_LOCK_QUEUED); 3948 lockres_or_flags(lockres, OCFS2_LOCK_QUEUED);
3947 3949
3948 spin_lock(&osb->dc_task_lock); 3950 spin_lock_irqsave(&osb->dc_task_lock, flags);
3949 if (list_empty(&lockres->l_blocked_list)) { 3951 if (list_empty(&lockres->l_blocked_list)) {
3950 list_add_tail(&lockres->l_blocked_list, 3952 list_add_tail(&lockres->l_blocked_list,
3951 &osb->blocked_lock_list); 3953 &osb->blocked_lock_list);
3952 osb->blocked_lock_count++; 3954 osb->blocked_lock_count++;
3953 } 3955 }
3954 spin_unlock(&osb->dc_task_lock); 3956 spin_unlock_irqrestore(&osb->dc_task_lock, flags);
3955} 3957}
3956 3958
3957static void ocfs2_downconvert_thread_do_work(struct ocfs2_super *osb) 3959static void ocfs2_downconvert_thread_do_work(struct ocfs2_super *osb)
3958{ 3960{
3959 unsigned long processed; 3961 unsigned long processed;
3962 unsigned long flags;
3960 struct ocfs2_lock_res *lockres; 3963 struct ocfs2_lock_res *lockres;
3961 3964
3962 spin_lock(&osb->dc_task_lock); 3965 spin_lock_irqsave(&osb->dc_task_lock, flags);
3963 /* grab this early so we know to try again if a state change and 3966 /* grab this early so we know to try again if a state change and
3964 * wake happens part-way through our work */ 3967 * wake happens part-way through our work */
3965 osb->dc_work_sequence = osb->dc_wake_sequence; 3968 osb->dc_work_sequence = osb->dc_wake_sequence;
@@ -3972,38 +3975,40 @@ static void ocfs2_downconvert_thread_do_work(struct ocfs2_super *osb)
3972 struct ocfs2_lock_res, l_blocked_list); 3975 struct ocfs2_lock_res, l_blocked_list);
3973 list_del_init(&lockres->l_blocked_list); 3976 list_del_init(&lockres->l_blocked_list);
3974 osb->blocked_lock_count--; 3977 osb->blocked_lock_count--;
3975 spin_unlock(&osb->dc_task_lock); 3978 spin_unlock_irqrestore(&osb->dc_task_lock, flags);
3976 3979
3977 BUG_ON(!processed); 3980 BUG_ON(!processed);
3978 processed--; 3981 processed--;
3979 3982
3980 ocfs2_process_blocked_lock(osb, lockres); 3983 ocfs2_process_blocked_lock(osb, lockres);
3981 3984
3982 spin_lock(&osb->dc_task_lock); 3985 spin_lock_irqsave(&osb->dc_task_lock, flags);
3983 } 3986 }
3984 spin_unlock(&osb->dc_task_lock); 3987 spin_unlock_irqrestore(&osb->dc_task_lock, flags);
3985} 3988}
3986 3989
3987static int ocfs2_downconvert_thread_lists_empty(struct ocfs2_super *osb) 3990static int ocfs2_downconvert_thread_lists_empty(struct ocfs2_super *osb)
3988{ 3991{
3989 int empty = 0; 3992 int empty = 0;
3993 unsigned long flags;
3990 3994
3991 spin_lock(&osb->dc_task_lock); 3995 spin_lock_irqsave(&osb->dc_task_lock, flags);
3992 if (list_empty(&osb->blocked_lock_list)) 3996 if (list_empty(&osb->blocked_lock_list))
3993 empty = 1; 3997 empty = 1;
3994 3998
3995 spin_unlock(&osb->dc_task_lock); 3999 spin_unlock_irqrestore(&osb->dc_task_lock, flags);
3996 return empty; 4000 return empty;
3997} 4001}
3998 4002
3999static int ocfs2_downconvert_thread_should_wake(struct ocfs2_super *osb) 4003static int ocfs2_downconvert_thread_should_wake(struct ocfs2_super *osb)
4000{ 4004{
4001 int should_wake = 0; 4005 int should_wake = 0;
4006 unsigned long flags;
4002 4007
4003 spin_lock(&osb->dc_task_lock); 4008 spin_lock_irqsave(&osb->dc_task_lock, flags);
4004 if (osb->dc_work_sequence != osb->dc_wake_sequence) 4009 if (osb->dc_work_sequence != osb->dc_wake_sequence)
4005 should_wake = 1; 4010 should_wake = 1;
4006 spin_unlock(&osb->dc_task_lock); 4011 spin_unlock_irqrestore(&osb->dc_task_lock, flags);
4007 4012
4008 return should_wake; 4013 return should_wake;
4009} 4014}
@@ -4033,10 +4038,12 @@ static int ocfs2_downconvert_thread(void *arg)
4033 4038
4034void ocfs2_wake_downconvert_thread(struct ocfs2_super *osb) 4039void ocfs2_wake_downconvert_thread(struct ocfs2_super *osb)
4035{ 4040{
4036 spin_lock(&osb->dc_task_lock); 4041 unsigned long flags;
4042
4043 spin_lock_irqsave(&osb->dc_task_lock, flags);
4037 /* make sure the voting thread gets a swipe at whatever changes 4044 /* make sure the voting thread gets a swipe at whatever changes
4038 * the caller may have made to the voting state */ 4045 * the caller may have made to the voting state */
4039 osb->dc_wake_sequence++; 4046 osb->dc_wake_sequence++;
4040 spin_unlock(&osb->dc_task_lock); 4047 spin_unlock_irqrestore(&osb->dc_task_lock, flags);
4041 wake_up(&osb->dc_event); 4048 wake_up(&osb->dc_event);
4042} 4049}
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index 2f5b92ef0e53..70b5863a2d64 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -923,8 +923,6 @@ out_unlock:
923 923
924 ocfs2_inode_unlock(inode, 0); 924 ocfs2_inode_unlock(inode, 0);
925out: 925out:
926 if (ret && ret != -ENXIO)
927 ret = -ENXIO;
928 return ret; 926 return ret;
929} 927}
930 928
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 061591a3ab08..7602783d7f41 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1950,7 +1950,7 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode,
1950 if (ret < 0) 1950 if (ret < 0)
1951 mlog_errno(ret); 1951 mlog_errno(ret);
1952 1952
1953 if (file->f_flags & O_SYNC) 1953 if (file && (file->f_flags & O_SYNC))
1954 handle->h_sync = 1; 1954 handle->h_sync = 1;
1955 1955
1956 ocfs2_commit_trans(osb, handle); 1956 ocfs2_commit_trans(osb, handle);
@@ -2422,8 +2422,10 @@ out_dio:
2422 unaligned_dio = 0; 2422 unaligned_dio = 0;
2423 } 2423 }
2424 2424
2425 if (unaligned_dio) 2425 if (unaligned_dio) {
2426 ocfs2_iocb_clear_unaligned_aio(iocb);
2426 atomic_dec(&OCFS2_I(inode)->ip_unaligned_aio); 2427 atomic_dec(&OCFS2_I(inode)->ip_unaligned_aio);
2428 }
2427 2429
2428out: 2430out:
2429 if (rw_level != -1) 2431 if (rw_level != -1)
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index 92fcd575775a..0a86e302655f 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -399,8 +399,6 @@ int ocfs2_global_read_info(struct super_block *sb, int type)
399 msecs_to_jiffies(oinfo->dqi_syncms)); 399 msecs_to_jiffies(oinfo->dqi_syncms));
400 400
401out_err: 401out_err:
402 if (status)
403 mlog_errno(status);
404 return status; 402 return status;
405out_unlock: 403out_unlock:
406 ocfs2_unlock_global_qf(oinfo, 0); 404 ocfs2_unlock_global_qf(oinfo, 0);
diff --git a/fs/open.c b/fs/open.c
index d6c79a0dffc7..1540632d8387 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -397,10 +397,10 @@ SYSCALL_DEFINE1(fchdir, unsigned int, fd)
397{ 397{
398 struct file *file; 398 struct file *file;
399 struct inode *inode; 399 struct inode *inode;
400 int error; 400 int error, fput_needed;
401 401
402 error = -EBADF; 402 error = -EBADF;
403 file = fget(fd); 403 file = fget_raw_light(fd, &fput_needed);
404 if (!file) 404 if (!file)
405 goto out; 405 goto out;
406 406
@@ -414,7 +414,7 @@ SYSCALL_DEFINE1(fchdir, unsigned int, fd)
414 if (!error) 414 if (!error)
415 set_fs_pwd(current->fs, &file->f_path); 415 set_fs_pwd(current->fs, &file->f_path);
416out_putf: 416out_putf:
417 fput(file); 417 fput_light(file, fput_needed);
418out: 418out:
419 return error; 419 return error;
420} 420}
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index fbb0b478a346..d5378d028589 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -110,6 +110,7 @@ int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
110 110
111 /* prevent the page from being discarded on memory pressure */ 111 /* prevent the page from being discarded on memory pressure */
112 SetPageDirty(page); 112 SetPageDirty(page);
113 SetPageUptodate(page);
113 114
114 unlock_page(page); 115 unlock_page(page);
115 put_page(page); 116 put_page(page);
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 9d1aeb7e2734..4f33c32affe3 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -1074,13 +1074,13 @@ restart:
1074 * If we couldn't get anything, give up. 1074 * If we couldn't get anything, give up.
1075 */ 1075 */
1076 if (bno_cur_lt == NULL && bno_cur_gt == NULL) { 1076 if (bno_cur_lt == NULL && bno_cur_gt == NULL) {
1077 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1078
1077 if (!forced++) { 1079 if (!forced++) {
1078 trace_xfs_alloc_near_busy(args); 1080 trace_xfs_alloc_near_busy(args);
1079 xfs_log_force(args->mp, XFS_LOG_SYNC); 1081 xfs_log_force(args->mp, XFS_LOG_SYNC);
1080 goto restart; 1082 goto restart;
1081 } 1083 }
1082
1083 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1084 trace_xfs_alloc_size_neither(args); 1084 trace_xfs_alloc_size_neither(args);
1085 args->agbno = NULLAGBLOCK; 1085 args->agbno = NULLAGBLOCK;
1086 return 0; 1086 return 0;
@@ -2434,13 +2434,22 @@ xfs_alloc_vextent_worker(
2434 current_restore_flags_nested(&pflags, PF_FSTRANS); 2434 current_restore_flags_nested(&pflags, PF_FSTRANS);
2435} 2435}
2436 2436
2437 2437/*
2438int /* error */ 2438 * Data allocation requests often come in with little stack to work on. Push
2439 * them off to a worker thread so there is lots of stack to use. Metadata
2440 * requests, OTOH, are generally from low stack usage paths, so avoid the
2441 * context switch overhead here.
2442 */
2443int
2439xfs_alloc_vextent( 2444xfs_alloc_vextent(
2440 xfs_alloc_arg_t *args) /* allocation argument structure */ 2445 struct xfs_alloc_arg *args)
2441{ 2446{
2442 DECLARE_COMPLETION_ONSTACK(done); 2447 DECLARE_COMPLETION_ONSTACK(done);
2443 2448
2449 if (!args->userdata)
2450 return __xfs_alloc_vextent(args);
2451
2452
2444 args->done = &done; 2453 args->done = &done;
2445 INIT_WORK_ONSTACK(&args->work, xfs_alloc_vextent_worker); 2454 INIT_WORK_ONSTACK(&args->work, xfs_alloc_vextent_worker);
2446 queue_work(xfs_alloc_wq, &args->work); 2455 queue_work(xfs_alloc_wq, &args->work);
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index a4beb421018a..269b35c084da 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -989,27 +989,6 @@ xfs_buf_ioerror_alert(
989 (__uint64_t)XFS_BUF_ADDR(bp), func, bp->b_error, bp->b_length); 989 (__uint64_t)XFS_BUF_ADDR(bp), func, bp->b_error, bp->b_length);
990} 990}
991 991
992int
993xfs_bwrite(
994 struct xfs_buf *bp)
995{
996 int error;
997
998 ASSERT(xfs_buf_islocked(bp));
999
1000 bp->b_flags |= XBF_WRITE;
1001 bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q);
1002
1003 xfs_bdstrat_cb(bp);
1004
1005 error = xfs_buf_iowait(bp);
1006 if (error) {
1007 xfs_force_shutdown(bp->b_target->bt_mount,
1008 SHUTDOWN_META_IO_ERROR);
1009 }
1010 return error;
1011}
1012
1013/* 992/*
1014 * Called when we want to stop a buffer from getting written or read. 993 * Called when we want to stop a buffer from getting written or read.
1015 * We attach the EIO error, muck with its flags, and call xfs_buf_ioend 994 * We attach the EIO error, muck with its flags, and call xfs_buf_ioend
@@ -1079,14 +1058,7 @@ xfs_bioerror_relse(
1079 return EIO; 1058 return EIO;
1080} 1059}
1081 1060
1082 1061STATIC int
1083/*
1084 * All xfs metadata buffers except log state machine buffers
1085 * get this attached as their b_bdstrat callback function.
1086 * This is so that we can catch a buffer
1087 * after prematurely unpinning it to forcibly shutdown the filesystem.
1088 */
1089int
1090xfs_bdstrat_cb( 1062xfs_bdstrat_cb(
1091 struct xfs_buf *bp) 1063 struct xfs_buf *bp)
1092{ 1064{
@@ -1107,6 +1079,27 @@ xfs_bdstrat_cb(
1107 return 0; 1079 return 0;
1108} 1080}
1109 1081
1082int
1083xfs_bwrite(
1084 struct xfs_buf *bp)
1085{
1086 int error;
1087
1088 ASSERT(xfs_buf_islocked(bp));
1089
1090 bp->b_flags |= XBF_WRITE;
1091 bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q);
1092
1093 xfs_bdstrat_cb(bp);
1094
1095 error = xfs_buf_iowait(bp);
1096 if (error) {
1097 xfs_force_shutdown(bp->b_target->bt_mount,
1098 SHUTDOWN_META_IO_ERROR);
1099 }
1100 return error;
1101}
1102
1110/* 1103/*
1111 * Wrapper around bdstrat so that we can stop data from going to disk in case 1104 * Wrapper around bdstrat so that we can stop data from going to disk in case
1112 * we are shutting down the filesystem. Typically user data goes thru this 1105 * we are shutting down the filesystem. Typically user data goes thru this
@@ -1243,7 +1236,7 @@ xfs_buf_iorequest(
1243 */ 1236 */
1244 atomic_set(&bp->b_io_remaining, 1); 1237 atomic_set(&bp->b_io_remaining, 1);
1245 _xfs_buf_ioapply(bp); 1238 _xfs_buf_ioapply(bp);
1246 _xfs_buf_ioend(bp, 0); 1239 _xfs_buf_ioend(bp, 1);
1247 1240
1248 xfs_buf_rele(bp); 1241 xfs_buf_rele(bp);
1249} 1242}
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 7f1d1392ce37..79344c48008e 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -180,7 +180,6 @@ extern void xfs_buf_unlock(xfs_buf_t *);
180extern int xfs_bwrite(struct xfs_buf *bp); 180extern int xfs_bwrite(struct xfs_buf *bp);
181 181
182extern void xfsbdstrat(struct xfs_mount *, struct xfs_buf *); 182extern void xfsbdstrat(struct xfs_mount *, struct xfs_buf *);
183extern int xfs_bdstrat_cb(struct xfs_buf *);
184 183
185extern void xfs_buf_ioend(xfs_buf_t *, int); 184extern void xfs_buf_ioend(xfs_buf_t *, int);
186extern void xfs_buf_ioerror(xfs_buf_t *, int); 185extern void xfs_buf_ioerror(xfs_buf_t *, int);
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 45df2b857d48..d9e451115f98 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -954,7 +954,7 @@ xfs_buf_iodone_callbacks(
954 954
955 if (!XFS_BUF_ISSTALE(bp)) { 955 if (!XFS_BUF_ISSTALE(bp)) {
956 bp->b_flags |= XBF_WRITE | XBF_ASYNC | XBF_DONE; 956 bp->b_flags |= XBF_WRITE | XBF_ASYNC | XBF_DONE;
957 xfs_bdstrat_cb(bp); 957 xfs_buf_iorequest(bp);
958 } else { 958 } else {
959 xfs_buf_relse(bp); 959 xfs_buf_relse(bp);
960 } 960 }