aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-08-29 14:36:22 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-08-29 14:36:22 -0400
commit318e15101993c0fdc3f23f24ac61fc7769d27e68 (patch)
tree98e2805502dc83f64c632706aabe06391469df32 /fs
parenta7ccbcf3307022c48810eebd99aa8dba84f13caf (diff)
parent256dd1bb3750ac5ad49b40887c1691788dc44b33 (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
Pull btrfs fixes from Chris Mason: "I've split out the big send/receive update from my last pull request and now have just the fixes in my for-linus branch. The send/recv branch will wander over to linux-next shortly though. The largest patches in this pull are Josef's patches to fix DIO locking problems and his patch to fix a crash during balance. They are both well tested. The rest are smaller fixes that we've had queued. The last rc came out while I was hacking new and exciting ways to recover from a misplaced rm -rf on my dev box, so these missed rc3." * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (25 commits) Btrfs: fix that repair code is spuriously executed for transid failures Btrfs: fix ordered extent leak when failing to start a transaction Btrfs: fix a dio write regression Btrfs: fix deadlock with freeze and sync V2 Btrfs: revert checksum error statistic which can cause a BUG() Btrfs: remove superblock writing after fatal error Btrfs: allow delayed refs to be merged Btrfs: fix enospc problems when deleting a subvol Btrfs: fix wrong mtime and ctime when creating snapshots Btrfs: fix race in run_clustered_refs Btrfs: don't run __tree_mod_log_free_eb on leaves Btrfs: increase the size of the free space cache Btrfs: barrier before waitqueue_active Btrfs: fix deadlock in wait_for_more_refs btrfs: fix second lock in btrfs_delete_delayed_items() Btrfs: don't allocate a seperate csums array for direct reads Btrfs: do not strdup non existent strings Btrfs: do not use missing devices when showing devname Btrfs: fix that error value is changed by mistake Btrfs: lock extents as we map them in DIO ...
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/backref.c4
-rw-r--r--fs/btrfs/compression.c1
-rw-r--r--fs/btrfs/ctree.c9
-rw-r--r--fs/btrfs/ctree.h3
-rw-r--r--fs/btrfs/delayed-inode.c12
-rw-r--r--fs/btrfs/delayed-ref.c163
-rw-r--r--fs/btrfs/delayed-ref.h4
-rw-r--r--fs/btrfs/disk-io.c53
-rw-r--r--fs/btrfs/disk-io.h2
-rw-r--r--fs/btrfs/extent-tree.c123
-rw-r--r--fs/btrfs/extent_io.c17
-rw-r--r--fs/btrfs/file-item.c4
-rw-r--r--fs/btrfs/inode.c326
-rw-r--r--fs/btrfs/ioctl.c2
-rw-r--r--fs/btrfs/locking.c2
-rw-r--r--fs/btrfs/qgroup.c12
-rw-r--r--fs/btrfs/root-tree.c4
-rw-r--r--fs/btrfs/super.c15
-rw-r--r--fs/btrfs/transaction.c3
-rw-r--r--fs/btrfs/volumes.c33
-rw-r--r--fs/btrfs/volumes.h2
21 files changed, 418 insertions, 376 deletions
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index a256f3b2a845..ff6475f409d6 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -1438,10 +1438,10 @@ int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
1438 ret = extent_from_logical(fs_info, logical, path, 1438 ret = extent_from_logical(fs_info, logical, path,
1439 &found_key); 1439 &found_key);
1440 btrfs_release_path(path); 1440 btrfs_release_path(path);
1441 if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK)
1442 ret = -EINVAL;
1443 if (ret < 0) 1441 if (ret < 0)
1444 return ret; 1442 return ret;
1443 if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK)
1444 return -EINVAL;
1445 1445
1446 extent_item_pos = logical - found_key.objectid; 1446 extent_item_pos = logical - found_key.objectid;
1447 ret = iterate_extent_inodes(fs_info, found_key.objectid, 1447 ret = iterate_extent_inodes(fs_info, found_key.objectid,
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 86eff48dab78..43d1c5a3a030 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -818,6 +818,7 @@ static void free_workspace(int type, struct list_head *workspace)
818 btrfs_compress_op[idx]->free_workspace(workspace); 818 btrfs_compress_op[idx]->free_workspace(workspace);
819 atomic_dec(alloc_workspace); 819 atomic_dec(alloc_workspace);
820wake: 820wake:
821 smp_mb();
821 if (waitqueue_active(workspace_wait)) 822 if (waitqueue_active(workspace_wait))
822 wake_up(workspace_wait); 823 wake_up(workspace_wait);
823} 824}
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 9d7621f271ff..6d183f60d63a 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -421,12 +421,6 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
421 spin_unlock(&fs_info->tree_mod_seq_lock); 421 spin_unlock(&fs_info->tree_mod_seq_lock);
422 422
423 /* 423 /*
424 * we removed the lowest blocker from the blocker list, so there may be
425 * more processible delayed refs.
426 */
427 wake_up(&fs_info->tree_mod_seq_wait);
428
429 /*
430 * anything that's lower than the lowest existing (read: blocked) 424 * anything that's lower than the lowest existing (read: blocked)
431 * sequence number can be removed from the tree. 425 * sequence number can be removed from the tree.
432 */ 426 */
@@ -631,6 +625,9 @@ __tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb)
631 u32 nritems; 625 u32 nritems;
632 int ret; 626 int ret;
633 627
628 if (btrfs_header_level(eb) == 0)
629 return;
630
634 nritems = btrfs_header_nritems(eb); 631 nritems = btrfs_header_nritems(eb);
635 for (i = nritems - 1; i >= 0; i--) { 632 for (i = nritems - 1; i >= 0; i--) {
636 ret = tree_mod_log_insert_key_locked(fs_info, eb, i, 633 ret = tree_mod_log_insert_key_locked(fs_info, eb, i,
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 4bab807227ad..0d195b507660 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1252,7 +1252,6 @@ struct btrfs_fs_info {
1252 atomic_t tree_mod_seq; 1252 atomic_t tree_mod_seq;
1253 struct list_head tree_mod_seq_list; 1253 struct list_head tree_mod_seq_list;
1254 struct seq_list tree_mod_seq_elem; 1254 struct seq_list tree_mod_seq_elem;
1255 wait_queue_head_t tree_mod_seq_wait;
1256 1255
1257 /* this protects tree_mod_log */ 1256 /* this protects tree_mod_log */
1258 rwlock_t tree_mod_log_lock; 1257 rwlock_t tree_mod_log_lock;
@@ -3192,7 +3191,7 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
3192int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, 3191int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
3193 struct bio *bio, u32 *dst); 3192 struct bio *bio, u32 *dst);
3194int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode, 3193int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode,
3195 struct bio *bio, u64 logical_offset, u32 *dst); 3194 struct bio *bio, u64 logical_offset);
3196int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, 3195int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
3197 struct btrfs_root *root, 3196 struct btrfs_root *root,
3198 u64 objectid, u64 pos, 3197 u64 objectid, u64 pos,
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 335605c8ceab..07d5eeb1e6f1 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -512,8 +512,8 @@ static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item)
512 512
513 rb_erase(&delayed_item->rb_node, root); 513 rb_erase(&delayed_item->rb_node, root);
514 delayed_item->delayed_node->count--; 514 delayed_item->delayed_node->count--;
515 atomic_dec(&delayed_root->items); 515 if (atomic_dec_return(&delayed_root->items) <
516 if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND && 516 BTRFS_DELAYED_BACKGROUND &&
517 waitqueue_active(&delayed_root->wait)) 517 waitqueue_active(&delayed_root->wait))
518 wake_up(&delayed_root->wait); 518 wake_up(&delayed_root->wait);
519} 519}
@@ -1028,9 +1028,10 @@ do_again:
1028 btrfs_release_delayed_item(prev); 1028 btrfs_release_delayed_item(prev);
1029 ret = 0; 1029 ret = 0;
1030 btrfs_release_path(path); 1030 btrfs_release_path(path);
1031 if (curr) 1031 if (curr) {
1032 mutex_unlock(&node->mutex);
1032 goto do_again; 1033 goto do_again;
1033 else 1034 } else
1034 goto delete_fail; 1035 goto delete_fail;
1035 } 1036 }
1036 1037
@@ -1055,8 +1056,7 @@ static void btrfs_release_delayed_inode(struct btrfs_delayed_node *delayed_node)
1055 delayed_node->count--; 1056 delayed_node->count--;
1056 1057
1057 delayed_root = delayed_node->root->fs_info->delayed_root; 1058 delayed_root = delayed_node->root->fs_info->delayed_root;
1058 atomic_dec(&delayed_root->items); 1059 if (atomic_dec_return(&delayed_root->items) <
1059 if (atomic_read(&delayed_root->items) <
1060 BTRFS_DELAYED_BACKGROUND && 1060 BTRFS_DELAYED_BACKGROUND &&
1061 waitqueue_active(&delayed_root->wait)) 1061 waitqueue_active(&delayed_root->wait))
1062 wake_up(&delayed_root->wait); 1062 wake_up(&delayed_root->wait);
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index da7419ed01bb..ae9411773397 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -38,17 +38,14 @@
38static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref2, 38static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref2,
39 struct btrfs_delayed_tree_ref *ref1) 39 struct btrfs_delayed_tree_ref *ref1)
40{ 40{
41 if (ref1->node.type == BTRFS_TREE_BLOCK_REF_KEY) { 41 if (ref1->root < ref2->root)
42 if (ref1->root < ref2->root) 42 return -1;
43 return -1; 43 if (ref1->root > ref2->root)
44 if (ref1->root > ref2->root) 44 return 1;
45 return 1; 45 if (ref1->parent < ref2->parent)
46 } else { 46 return -1;
47 if (ref1->parent < ref2->parent) 47 if (ref1->parent > ref2->parent)
48 return -1; 48 return 1;
49 if (ref1->parent > ref2->parent)
50 return 1;
51 }
52 return 0; 49 return 0;
53} 50}
54 51
@@ -85,7 +82,8 @@ static int comp_data_refs(struct btrfs_delayed_data_ref *ref2,
85 * type of the delayed backrefs and content of delayed backrefs. 82 * type of the delayed backrefs and content of delayed backrefs.
86 */ 83 */
87static int comp_entry(struct btrfs_delayed_ref_node *ref2, 84static int comp_entry(struct btrfs_delayed_ref_node *ref2,
88 struct btrfs_delayed_ref_node *ref1) 85 struct btrfs_delayed_ref_node *ref1,
86 bool compare_seq)
89{ 87{
90 if (ref1->bytenr < ref2->bytenr) 88 if (ref1->bytenr < ref2->bytenr)
91 return -1; 89 return -1;
@@ -102,10 +100,12 @@ static int comp_entry(struct btrfs_delayed_ref_node *ref2,
102 if (ref1->type > ref2->type) 100 if (ref1->type > ref2->type)
103 return 1; 101 return 1;
104 /* merging of sequenced refs is not allowed */ 102 /* merging of sequenced refs is not allowed */
105 if (ref1->seq < ref2->seq) 103 if (compare_seq) {
106 return -1; 104 if (ref1->seq < ref2->seq)
107 if (ref1->seq > ref2->seq) 105 return -1;
108 return 1; 106 if (ref1->seq > ref2->seq)
107 return 1;
108 }
109 if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY || 109 if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY ||
110 ref1->type == BTRFS_SHARED_BLOCK_REF_KEY) { 110 ref1->type == BTRFS_SHARED_BLOCK_REF_KEY) {
111 return comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref2), 111 return comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref2),
@@ -139,7 +139,7 @@ static struct btrfs_delayed_ref_node *tree_insert(struct rb_root *root,
139 entry = rb_entry(parent_node, struct btrfs_delayed_ref_node, 139 entry = rb_entry(parent_node, struct btrfs_delayed_ref_node,
140 rb_node); 140 rb_node);
141 141
142 cmp = comp_entry(entry, ins); 142 cmp = comp_entry(entry, ins, 1);
143 if (cmp < 0) 143 if (cmp < 0)
144 p = &(*p)->rb_left; 144 p = &(*p)->rb_left;
145 else if (cmp > 0) 145 else if (cmp > 0)
@@ -233,6 +233,114 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
233 return 0; 233 return 0;
234} 234}
235 235
236static void inline drop_delayed_ref(struct btrfs_trans_handle *trans,
237 struct btrfs_delayed_ref_root *delayed_refs,
238 struct btrfs_delayed_ref_node *ref)
239{
240 rb_erase(&ref->rb_node, &delayed_refs->root);
241 ref->in_tree = 0;
242 btrfs_put_delayed_ref(ref);
243 delayed_refs->num_entries--;
244 if (trans->delayed_ref_updates)
245 trans->delayed_ref_updates--;
246}
247
248static int merge_ref(struct btrfs_trans_handle *trans,
249 struct btrfs_delayed_ref_root *delayed_refs,
250 struct btrfs_delayed_ref_node *ref, u64 seq)
251{
252 struct rb_node *node;
253 int merged = 0;
254 int mod = 0;
255 int done = 0;
256
257 node = rb_prev(&ref->rb_node);
258 while (node) {
259 struct btrfs_delayed_ref_node *next;
260
261 next = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
262 node = rb_prev(node);
263 if (next->bytenr != ref->bytenr)
264 break;
265 if (seq && next->seq >= seq)
266 break;
267 if (comp_entry(ref, next, 0))
268 continue;
269
270 if (ref->action == next->action) {
271 mod = next->ref_mod;
272 } else {
273 if (ref->ref_mod < next->ref_mod) {
274 struct btrfs_delayed_ref_node *tmp;
275
276 tmp = ref;
277 ref = next;
278 next = tmp;
279 done = 1;
280 }
281 mod = -next->ref_mod;
282 }
283
284 merged++;
285 drop_delayed_ref(trans, delayed_refs, next);
286 ref->ref_mod += mod;
287 if (ref->ref_mod == 0) {
288 drop_delayed_ref(trans, delayed_refs, ref);
289 break;
290 } else {
291 /*
292 * You can't have multiples of the same ref on a tree
293 * block.
294 */
295 WARN_ON(ref->type == BTRFS_TREE_BLOCK_REF_KEY ||
296 ref->type == BTRFS_SHARED_BLOCK_REF_KEY);
297 }
298
299 if (done)
300 break;
301 node = rb_prev(&ref->rb_node);
302 }
303
304 return merged;
305}
306
307void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
308 struct btrfs_fs_info *fs_info,
309 struct btrfs_delayed_ref_root *delayed_refs,
310 struct btrfs_delayed_ref_head *head)
311{
312 struct rb_node *node;
313 u64 seq = 0;
314
315 spin_lock(&fs_info->tree_mod_seq_lock);
316 if (!list_empty(&fs_info->tree_mod_seq_list)) {
317 struct seq_list *elem;
318
319 elem = list_first_entry(&fs_info->tree_mod_seq_list,
320 struct seq_list, list);
321 seq = elem->seq;
322 }
323 spin_unlock(&fs_info->tree_mod_seq_lock);
324
325 node = rb_prev(&head->node.rb_node);
326 while (node) {
327 struct btrfs_delayed_ref_node *ref;
328
329 ref = rb_entry(node, struct btrfs_delayed_ref_node,
330 rb_node);
331 if (ref->bytenr != head->node.bytenr)
332 break;
333
334 /* We can't merge refs that are outside of our seq count */
335 if (seq && ref->seq >= seq)
336 break;
337 if (merge_ref(trans, delayed_refs, ref, seq))
338 node = rb_prev(&head->node.rb_node);
339 else
340 node = rb_prev(node);
341 }
342}
343
236int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, 344int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
237 struct btrfs_delayed_ref_root *delayed_refs, 345 struct btrfs_delayed_ref_root *delayed_refs,
238 u64 seq) 346 u64 seq)
@@ -336,18 +444,11 @@ update_existing_ref(struct btrfs_trans_handle *trans,
336 * every changing the extent allocation tree. 444 * every changing the extent allocation tree.
337 */ 445 */
338 existing->ref_mod--; 446 existing->ref_mod--;
339 if (existing->ref_mod == 0) { 447 if (existing->ref_mod == 0)
340 rb_erase(&existing->rb_node, 448 drop_delayed_ref(trans, delayed_refs, existing);
341 &delayed_refs->root); 449 else
342 existing->in_tree = 0;
343 btrfs_put_delayed_ref(existing);
344 delayed_refs->num_entries--;
345 if (trans->delayed_ref_updates)
346 trans->delayed_ref_updates--;
347 } else {
348 WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY || 450 WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY ||
349 existing->type == BTRFS_SHARED_BLOCK_REF_KEY); 451 existing->type == BTRFS_SHARED_BLOCK_REF_KEY);
350 }
351 } else { 452 } else {
352 WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY || 453 WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY ||
353 existing->type == BTRFS_SHARED_BLOCK_REF_KEY); 454 existing->type == BTRFS_SHARED_BLOCK_REF_KEY);
@@ -662,9 +763,6 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
662 add_delayed_tree_ref(fs_info, trans, &ref->node, bytenr, 763 add_delayed_tree_ref(fs_info, trans, &ref->node, bytenr,
663 num_bytes, parent, ref_root, level, action, 764 num_bytes, parent, ref_root, level, action,
664 for_cow); 765 for_cow);
665 if (!need_ref_seq(for_cow, ref_root) &&
666 waitqueue_active(&fs_info->tree_mod_seq_wait))
667 wake_up(&fs_info->tree_mod_seq_wait);
668 spin_unlock(&delayed_refs->lock); 766 spin_unlock(&delayed_refs->lock);
669 if (need_ref_seq(for_cow, ref_root)) 767 if (need_ref_seq(for_cow, ref_root))
670 btrfs_qgroup_record_ref(trans, &ref->node, extent_op); 768 btrfs_qgroup_record_ref(trans, &ref->node, extent_op);
@@ -713,9 +811,6 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
713 add_delayed_data_ref(fs_info, trans, &ref->node, bytenr, 811 add_delayed_data_ref(fs_info, trans, &ref->node, bytenr,
714 num_bytes, parent, ref_root, owner, offset, 812 num_bytes, parent, ref_root, owner, offset,
715 action, for_cow); 813 action, for_cow);
716 if (!need_ref_seq(for_cow, ref_root) &&
717 waitqueue_active(&fs_info->tree_mod_seq_wait))
718 wake_up(&fs_info->tree_mod_seq_wait);
719 spin_unlock(&delayed_refs->lock); 814 spin_unlock(&delayed_refs->lock);
720 if (need_ref_seq(for_cow, ref_root)) 815 if (need_ref_seq(for_cow, ref_root))
721 btrfs_qgroup_record_ref(trans, &ref->node, extent_op); 816 btrfs_qgroup_record_ref(trans, &ref->node, extent_op);
@@ -744,8 +839,6 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
744 num_bytes, BTRFS_UPDATE_DELAYED_HEAD, 839 num_bytes, BTRFS_UPDATE_DELAYED_HEAD,
745 extent_op->is_data); 840 extent_op->is_data);
746 841
747 if (waitqueue_active(&fs_info->tree_mod_seq_wait))
748 wake_up(&fs_info->tree_mod_seq_wait);
749 spin_unlock(&delayed_refs->lock); 842 spin_unlock(&delayed_refs->lock);
750 return 0; 843 return 0;
751} 844}
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index 0d7c90c366b6..ab5300595847 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -167,6 +167,10 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
167 struct btrfs_trans_handle *trans, 167 struct btrfs_trans_handle *trans,
168 u64 bytenr, u64 num_bytes, 168 u64 bytenr, u64 num_bytes,
169 struct btrfs_delayed_extent_op *extent_op); 169 struct btrfs_delayed_extent_op *extent_op);
170void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
171 struct btrfs_fs_info *fs_info,
172 struct btrfs_delayed_ref_root *delayed_refs,
173 struct btrfs_delayed_ref_head *head);
170 174
171struct btrfs_delayed_ref_head * 175struct btrfs_delayed_ref_head *
172btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr); 176btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 62e0cafd6e25..22e98e04c2ea 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -377,9 +377,13 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,
377 ret = read_extent_buffer_pages(io_tree, eb, start, 377 ret = read_extent_buffer_pages(io_tree, eb, start,
378 WAIT_COMPLETE, 378 WAIT_COMPLETE,
379 btree_get_extent, mirror_num); 379 btree_get_extent, mirror_num);
380 if (!ret && !verify_parent_transid(io_tree, eb, 380 if (!ret) {
381 if (!verify_parent_transid(io_tree, eb,
381 parent_transid, 0)) 382 parent_transid, 0))
382 break; 383 break;
384 else
385 ret = -EIO;
386 }
383 387
384 /* 388 /*
385 * This buffer's crc is fine, but its contents are corrupted, so 389 * This buffer's crc is fine, but its contents are corrupted, so
@@ -754,9 +758,7 @@ static void run_one_async_done(struct btrfs_work *work)
754 limit = btrfs_async_submit_limit(fs_info); 758 limit = btrfs_async_submit_limit(fs_info);
755 limit = limit * 2 / 3; 759 limit = limit * 2 / 3;
756 760
757 atomic_dec(&fs_info->nr_async_submits); 761 if (atomic_dec_return(&fs_info->nr_async_submits) < limit &&
758
759 if (atomic_read(&fs_info->nr_async_submits) < limit &&
760 waitqueue_active(&fs_info->async_submit_wait)) 762 waitqueue_active(&fs_info->async_submit_wait))
761 wake_up(&fs_info->async_submit_wait); 763 wake_up(&fs_info->async_submit_wait);
762 764
@@ -2032,8 +2034,6 @@ int open_ctree(struct super_block *sb,
2032 fs_info->free_chunk_space = 0; 2034 fs_info->free_chunk_space = 0;
2033 fs_info->tree_mod_log = RB_ROOT; 2035 fs_info->tree_mod_log = RB_ROOT;
2034 2036
2035 init_waitqueue_head(&fs_info->tree_mod_seq_wait);
2036
2037 /* readahead state */ 2037 /* readahead state */
2038 INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT); 2038 INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT);
2039 spin_lock_init(&fs_info->reada_lock); 2039 spin_lock_init(&fs_info->reada_lock);
@@ -2528,8 +2528,7 @@ retry_root_backup:
2528 goto fail_trans_kthread; 2528 goto fail_trans_kthread;
2529 2529
2530 /* do not make disk changes in broken FS */ 2530 /* do not make disk changes in broken FS */
2531 if (btrfs_super_log_root(disk_super) != 0 && 2531 if (btrfs_super_log_root(disk_super) != 0) {
2532 !(fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)) {
2533 u64 bytenr = btrfs_super_log_root(disk_super); 2532 u64 bytenr = btrfs_super_log_root(disk_super);
2534 2533
2535 if (fs_devices->rw_devices == 0) { 2534 if (fs_devices->rw_devices == 0) {
@@ -3189,30 +3188,14 @@ int close_ctree(struct btrfs_root *root)
3189 /* clear out the rbtree of defraggable inodes */ 3188 /* clear out the rbtree of defraggable inodes */
3190 btrfs_run_defrag_inodes(fs_info); 3189 btrfs_run_defrag_inodes(fs_info);
3191 3190
3192 /*
3193 * Here come 2 situations when btrfs is broken to flip readonly:
3194 *
3195 * 1. when btrfs flips readonly somewhere else before
3196 * btrfs_commit_super, sb->s_flags has MS_RDONLY flag,
3197 * and btrfs will skip to write sb directly to keep
3198 * ERROR state on disk.
3199 *
3200 * 2. when btrfs flips readonly just in btrfs_commit_super,
3201 * and in such case, btrfs cannot write sb via btrfs_commit_super,
3202 * and since fs_state has been set BTRFS_SUPER_FLAG_ERROR flag,
3203 * btrfs will cleanup all FS resources first and write sb then.
3204 */
3205 if (!(fs_info->sb->s_flags & MS_RDONLY)) { 3191 if (!(fs_info->sb->s_flags & MS_RDONLY)) {
3206 ret = btrfs_commit_super(root); 3192 ret = btrfs_commit_super(root);
3207 if (ret) 3193 if (ret)
3208 printk(KERN_ERR "btrfs: commit super ret %d\n", ret); 3194 printk(KERN_ERR "btrfs: commit super ret %d\n", ret);
3209 } 3195 }
3210 3196
3211 if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { 3197 if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
3212 ret = btrfs_error_commit_super(root); 3198 btrfs_error_commit_super(root);
3213 if (ret)
3214 printk(KERN_ERR "btrfs: commit super ret %d\n", ret);
3215 }
3216 3199
3217 btrfs_put_block_group_cache(fs_info); 3200 btrfs_put_block_group_cache(fs_info);
3218 3201
@@ -3434,18 +3417,11 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
3434 if (read_only) 3417 if (read_only)
3435 return 0; 3418 return 0;
3436 3419
3437 if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
3438 printk(KERN_WARNING "warning: mount fs with errors, "
3439 "running btrfsck is recommended\n");
3440 }
3441
3442 return 0; 3420 return 0;
3443} 3421}
3444 3422
3445int btrfs_error_commit_super(struct btrfs_root *root) 3423void btrfs_error_commit_super(struct btrfs_root *root)
3446{ 3424{
3447 int ret;
3448
3449 mutex_lock(&root->fs_info->cleaner_mutex); 3425 mutex_lock(&root->fs_info->cleaner_mutex);
3450 btrfs_run_delayed_iputs(root); 3426 btrfs_run_delayed_iputs(root);
3451 mutex_unlock(&root->fs_info->cleaner_mutex); 3427 mutex_unlock(&root->fs_info->cleaner_mutex);
@@ -3455,10 +3431,6 @@ int btrfs_error_commit_super(struct btrfs_root *root)
3455 3431
3456 /* cleanup FS via transaction */ 3432 /* cleanup FS via transaction */
3457 btrfs_cleanup_transaction(root); 3433 btrfs_cleanup_transaction(root);
3458
3459 ret = write_ctree_super(NULL, root, 0);
3460
3461 return ret;
3462} 3434}
3463 3435
3464static void btrfs_destroy_ordered_operations(struct btrfs_root *root) 3436static void btrfs_destroy_ordered_operations(struct btrfs_root *root)
@@ -3782,14 +3754,17 @@ int btrfs_cleanup_transaction(struct btrfs_root *root)
3782 /* FIXME: cleanup wait for commit */ 3754 /* FIXME: cleanup wait for commit */
3783 t->in_commit = 1; 3755 t->in_commit = 1;
3784 t->blocked = 1; 3756 t->blocked = 1;
3757 smp_mb();
3785 if (waitqueue_active(&root->fs_info->transaction_blocked_wait)) 3758 if (waitqueue_active(&root->fs_info->transaction_blocked_wait))
3786 wake_up(&root->fs_info->transaction_blocked_wait); 3759 wake_up(&root->fs_info->transaction_blocked_wait);
3787 3760
3788 t->blocked = 0; 3761 t->blocked = 0;
3762 smp_mb();
3789 if (waitqueue_active(&root->fs_info->transaction_wait)) 3763 if (waitqueue_active(&root->fs_info->transaction_wait))
3790 wake_up(&root->fs_info->transaction_wait); 3764 wake_up(&root->fs_info->transaction_wait);
3791 3765
3792 t->commit_done = 1; 3766 t->commit_done = 1;
3767 smp_mb();
3793 if (waitqueue_active(&t->commit_wait)) 3768 if (waitqueue_active(&t->commit_wait))
3794 wake_up(&t->commit_wait); 3769 wake_up(&t->commit_wait);
3795 3770
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 95e147eea239..c5b00a735fef 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -54,7 +54,7 @@ int write_ctree_super(struct btrfs_trans_handle *trans,
54 struct btrfs_root *root, int max_mirrors); 54 struct btrfs_root *root, int max_mirrors);
55struct buffer_head *btrfs_read_dev_super(struct block_device *bdev); 55struct buffer_head *btrfs_read_dev_super(struct block_device *bdev);
56int btrfs_commit_super(struct btrfs_root *root); 56int btrfs_commit_super(struct btrfs_root *root);
57int btrfs_error_commit_super(struct btrfs_root *root); 57void btrfs_error_commit_super(struct btrfs_root *root);
58struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, 58struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
59 u64 bytenr, u32 blocksize); 59 u64 bytenr, u32 blocksize);
60struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, 60struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 4e1b153b7c47..ba58024d40d3 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2252,6 +2252,16 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
2252 } 2252 }
2253 2253
2254 /* 2254 /*
2255 * We need to try and merge add/drops of the same ref since we
2256 * can run into issues with relocate dropping the implicit ref
2257 * and then it being added back again before the drop can
2258 * finish. If we merged anything we need to re-loop so we can
2259 * get a good ref.
2260 */
2261 btrfs_merge_delayed_refs(trans, fs_info, delayed_refs,
2262 locked_ref);
2263
2264 /*
2255 * locked_ref is the head node, so we have to go one 2265 * locked_ref is the head node, so we have to go one
2256 * node back for any delayed ref updates 2266 * node back for any delayed ref updates
2257 */ 2267 */
@@ -2318,12 +2328,23 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
2318 ref->in_tree = 0; 2328 ref->in_tree = 0;
2319 rb_erase(&ref->rb_node, &delayed_refs->root); 2329 rb_erase(&ref->rb_node, &delayed_refs->root);
2320 delayed_refs->num_entries--; 2330 delayed_refs->num_entries--;
2321 /* 2331 if (locked_ref) {
2322 * we modified num_entries, but as we're currently running 2332 /*
2323 * delayed refs, skip 2333 * when we play the delayed ref, also correct the
2324 * wake_up(&delayed_refs->seq_wait); 2334 * ref_mod on head
2325 * here. 2335 */
2326 */ 2336 switch (ref->action) {
2337 case BTRFS_ADD_DELAYED_REF:
2338 case BTRFS_ADD_DELAYED_EXTENT:
2339 locked_ref->node.ref_mod -= ref->ref_mod;
2340 break;
2341 case BTRFS_DROP_DELAYED_REF:
2342 locked_ref->node.ref_mod += ref->ref_mod;
2343 break;
2344 default:
2345 WARN_ON(1);
2346 }
2347 }
2327 spin_unlock(&delayed_refs->lock); 2348 spin_unlock(&delayed_refs->lock);
2328 2349
2329 ret = run_one_delayed_ref(trans, root, ref, extent_op, 2350 ret = run_one_delayed_ref(trans, root, ref, extent_op,
@@ -2350,22 +2371,6 @@ next:
2350 return count; 2371 return count;
2351} 2372}
2352 2373
2353static void wait_for_more_refs(struct btrfs_fs_info *fs_info,
2354 struct btrfs_delayed_ref_root *delayed_refs,
2355 unsigned long num_refs,
2356 struct list_head *first_seq)
2357{
2358 spin_unlock(&delayed_refs->lock);
2359 pr_debug("waiting for more refs (num %ld, first %p)\n",
2360 num_refs, first_seq);
2361 wait_event(fs_info->tree_mod_seq_wait,
2362 num_refs != delayed_refs->num_entries ||
2363 fs_info->tree_mod_seq_list.next != first_seq);
2364 pr_debug("done waiting for more refs (num %ld, first %p)\n",
2365 delayed_refs->num_entries, fs_info->tree_mod_seq_list.next);
2366 spin_lock(&delayed_refs->lock);
2367}
2368
2369#ifdef SCRAMBLE_DELAYED_REFS 2374#ifdef SCRAMBLE_DELAYED_REFS
2370/* 2375/*
2371 * Normally delayed refs get processed in ascending bytenr order. This 2376 * Normally delayed refs get processed in ascending bytenr order. This
@@ -2460,13 +2465,11 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2460 struct btrfs_delayed_ref_root *delayed_refs; 2465 struct btrfs_delayed_ref_root *delayed_refs;
2461 struct btrfs_delayed_ref_node *ref; 2466 struct btrfs_delayed_ref_node *ref;
2462 struct list_head cluster; 2467 struct list_head cluster;
2463 struct list_head *first_seq = NULL;
2464 int ret; 2468 int ret;
2465 u64 delayed_start; 2469 u64 delayed_start;
2466 int run_all = count == (unsigned long)-1; 2470 int run_all = count == (unsigned long)-1;
2467 int run_most = 0; 2471 int run_most = 0;
2468 unsigned long num_refs = 0; 2472 int loops;
2469 int consider_waiting;
2470 2473
2471 /* We'll clean this up in btrfs_cleanup_transaction */ 2474 /* We'll clean this up in btrfs_cleanup_transaction */
2472 if (trans->aborted) 2475 if (trans->aborted)
@@ -2484,7 +2487,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2484 delayed_refs = &trans->transaction->delayed_refs; 2487 delayed_refs = &trans->transaction->delayed_refs;
2485 INIT_LIST_HEAD(&cluster); 2488 INIT_LIST_HEAD(&cluster);
2486again: 2489again:
2487 consider_waiting = 0; 2490 loops = 0;
2488 spin_lock(&delayed_refs->lock); 2491 spin_lock(&delayed_refs->lock);
2489 2492
2490#ifdef SCRAMBLE_DELAYED_REFS 2493#ifdef SCRAMBLE_DELAYED_REFS
@@ -2512,31 +2515,6 @@ again:
2512 if (ret) 2515 if (ret)
2513 break; 2516 break;
2514 2517
2515 if (delayed_start >= delayed_refs->run_delayed_start) {
2516 if (consider_waiting == 0) {
2517 /*
2518 * btrfs_find_ref_cluster looped. let's do one
2519 * more cycle. if we don't run any delayed ref
2520 * during that cycle (because we can't because
2521 * all of them are blocked) and if the number of
2522 * refs doesn't change, we avoid busy waiting.
2523 */
2524 consider_waiting = 1;
2525 num_refs = delayed_refs->num_entries;
2526 first_seq = root->fs_info->tree_mod_seq_list.next;
2527 } else {
2528 wait_for_more_refs(root->fs_info, delayed_refs,
2529 num_refs, first_seq);
2530 /*
2531 * after waiting, things have changed. we
2532 * dropped the lock and someone else might have
2533 * run some refs, built new clusters and so on.
2534 * therefore, we restart staleness detection.
2535 */
2536 consider_waiting = 0;
2537 }
2538 }
2539
2540 ret = run_clustered_refs(trans, root, &cluster); 2518 ret = run_clustered_refs(trans, root, &cluster);
2541 if (ret < 0) { 2519 if (ret < 0) {
2542 spin_unlock(&delayed_refs->lock); 2520 spin_unlock(&delayed_refs->lock);
@@ -2549,9 +2527,26 @@ again:
2549 if (count == 0) 2527 if (count == 0)
2550 break; 2528 break;
2551 2529
2552 if (ret || delayed_refs->run_delayed_start == 0) { 2530 if (delayed_start >= delayed_refs->run_delayed_start) {
2531 if (loops == 0) {
2532 /*
2533 * btrfs_find_ref_cluster looped. let's do one
2534 * more cycle. if we don't run any delayed ref
2535 * during that cycle (because we can't because
2536 * all of them are blocked), bail out.
2537 */
2538 loops = 1;
2539 } else {
2540 /*
2541 * no runnable refs left, stop trying
2542 */
2543 BUG_ON(run_all);
2544 break;
2545 }
2546 }
2547 if (ret) {
2553 /* refs were run, let's reset staleness detection */ 2548 /* refs were run, let's reset staleness detection */
2554 consider_waiting = 0; 2549 loops = 0;
2555 } 2550 }
2556 } 2551 }
2557 2552
@@ -3007,17 +3002,16 @@ again:
3007 } 3002 }
3008 spin_unlock(&block_group->lock); 3003 spin_unlock(&block_group->lock);
3009 3004
3010 num_pages = (int)div64_u64(block_group->key.offset, 1024 * 1024 * 1024); 3005 /*
3006 * Try to preallocate enough space based on how big the block group is.
3007 * Keep in mind this has to include any pinned space which could end up
3008 * taking up quite a bit since it's not folded into the other space
3009 * cache.
3010 */
3011 num_pages = (int)div64_u64(block_group->key.offset, 256 * 1024 * 1024);
3011 if (!num_pages) 3012 if (!num_pages)
3012 num_pages = 1; 3013 num_pages = 1;
3013 3014
3014 /*
3015 * Just to make absolutely sure we have enough space, we're going to
3016 * preallocate 12 pages worth of space for each block group. In
3017 * practice we ought to use at most 8, but we need extra space so we can
3018 * add our header and have a terminator between the extents and the
3019 * bitmaps.
3020 */
3021 num_pages *= 16; 3015 num_pages *= 16;
3022 num_pages *= PAGE_CACHE_SIZE; 3016 num_pages *= PAGE_CACHE_SIZE;
3023 3017
@@ -4571,8 +4565,10 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
4571 if (root->fs_info->quota_enabled) { 4565 if (root->fs_info->quota_enabled) {
4572 ret = btrfs_qgroup_reserve(root, num_bytes + 4566 ret = btrfs_qgroup_reserve(root, num_bytes +
4573 nr_extents * root->leafsize); 4567 nr_extents * root->leafsize);
4574 if (ret) 4568 if (ret) {
4569 mutex_unlock(&BTRFS_I(inode)->delalloc_mutex);
4575 return ret; 4570 return ret;
4571 }
4576 } 4572 }
4577 4573
4578 ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush); 4574 ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush);
@@ -5294,9 +5290,6 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
5294 rb_erase(&head->node.rb_node, &delayed_refs->root); 5290 rb_erase(&head->node.rb_node, &delayed_refs->root);
5295 5291
5296 delayed_refs->num_entries--; 5292 delayed_refs->num_entries--;
5297 smp_mb();
5298 if (waitqueue_active(&root->fs_info->tree_mod_seq_wait))
5299 wake_up(&root->fs_info->tree_mod_seq_wait);
5300 5293
5301 /* 5294 /*
5302 * we don't take a ref on the node because we're removing it from the 5295 * we don't take a ref on the node because we're removing it from the
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 45c81bb4ac82..4c878476bb91 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2330,23 +2330,10 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
2330 if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { 2330 if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {
2331 ret = tree->ops->readpage_end_io_hook(page, start, end, 2331 ret = tree->ops->readpage_end_io_hook(page, start, end,
2332 state, mirror); 2332 state, mirror);
2333 if (ret) { 2333 if (ret)
2334 /* no IO indicated but software detected errors
2335 * in the block, either checksum errors or
2336 * issues with the contents */
2337 struct btrfs_root *root =
2338 BTRFS_I(page->mapping->host)->root;
2339 struct btrfs_device *device;
2340
2341 uptodate = 0; 2334 uptodate = 0;
2342 device = btrfs_find_device_for_logical( 2335 else
2343 root, start, mirror);
2344 if (device)
2345 btrfs_dev_stat_inc_and_print(device,
2346 BTRFS_DEV_STAT_CORRUPTION_ERRS);
2347 } else {
2348 clean_io_failure(start, page); 2336 clean_io_failure(start, page);
2349 }
2350 } 2337 }
2351 2338
2352 if (!uptodate && tree->ops && tree->ops->readpage_io_failed_hook) { 2339 if (!uptodate && tree->ops && tree->ops->readpage_io_failed_hook) {
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index b45b9de0c21d..857d93cd01dc 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -272,9 +272,9 @@ int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
272} 272}
273 273
274int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode, 274int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode,
275 struct bio *bio, u64 offset, u32 *dst) 275 struct bio *bio, u64 offset)
276{ 276{
277 return __btrfs_lookup_bio_sums(root, inode, bio, offset, dst, 1); 277 return __btrfs_lookup_bio_sums(root, inode, bio, offset, NULL, 1);
278} 278}
279 279
280int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, 280int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 6e8f416773d4..ec154f954646 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1008,9 +1008,7 @@ static noinline void async_cow_submit(struct btrfs_work *work)
1008 nr_pages = (async_cow->end - async_cow->start + PAGE_CACHE_SIZE) >> 1008 nr_pages = (async_cow->end - async_cow->start + PAGE_CACHE_SIZE) >>
1009 PAGE_CACHE_SHIFT; 1009 PAGE_CACHE_SHIFT;
1010 1010
1011 atomic_sub(nr_pages, &root->fs_info->async_delalloc_pages); 1011 if (atomic_sub_return(nr_pages, &root->fs_info->async_delalloc_pages) <
1012
1013 if (atomic_read(&root->fs_info->async_delalloc_pages) <
1014 5 * 1024 * 1024 && 1012 5 * 1024 * 1024 &&
1015 waitqueue_active(&root->fs_info->async_submit_wait)) 1013 waitqueue_active(&root->fs_info->async_submit_wait))
1016 wake_up(&root->fs_info->async_submit_wait); 1014 wake_up(&root->fs_info->async_submit_wait);
@@ -1885,8 +1883,11 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
1885 trans = btrfs_join_transaction_nolock(root); 1883 trans = btrfs_join_transaction_nolock(root);
1886 else 1884 else
1887 trans = btrfs_join_transaction(root); 1885 trans = btrfs_join_transaction(root);
1888 if (IS_ERR(trans)) 1886 if (IS_ERR(trans)) {
1889 return PTR_ERR(trans); 1887 ret = PTR_ERR(trans);
1888 trans = NULL;
1889 goto out;
1890 }
1890 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 1891 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
1891 ret = btrfs_update_inode_fallback(trans, root, inode); 1892 ret = btrfs_update_inode_fallback(trans, root, inode);
1892 if (ret) /* -ENOMEM or corruption */ 1893 if (ret) /* -ENOMEM or corruption */
@@ -3174,7 +3175,7 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
3174 btrfs_i_size_write(dir, dir->i_size - name_len * 2); 3175 btrfs_i_size_write(dir, dir->i_size - name_len * 2);
3175 inode_inc_iversion(dir); 3176 inode_inc_iversion(dir);
3176 dir->i_mtime = dir->i_ctime = CURRENT_TIME; 3177 dir->i_mtime = dir->i_ctime = CURRENT_TIME;
3177 ret = btrfs_update_inode(trans, root, dir); 3178 ret = btrfs_update_inode_fallback(trans, root, dir);
3178 if (ret) 3179 if (ret)
3179 btrfs_abort_transaction(trans, root, ret); 3180 btrfs_abort_transaction(trans, root, ret);
3180out: 3181out:
@@ -5774,18 +5775,112 @@ out:
5774 return ret; 5775 return ret;
5775} 5776}
5776 5777
5778static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
5779 struct extent_state **cached_state, int writing)
5780{
5781 struct btrfs_ordered_extent *ordered;
5782 int ret = 0;
5783
5784 while (1) {
5785 lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
5786 0, cached_state);
5787 /*
5788 * We're concerned with the entire range that we're going to be
5789 * doing DIO to, so we need to make sure theres no ordered
5790 * extents in this range.
5791 */
5792 ordered = btrfs_lookup_ordered_range(inode, lockstart,
5793 lockend - lockstart + 1);
5794
5795 /*
5796 * We need to make sure there are no buffered pages in this
5797 * range either, we could have raced between the invalidate in
5798 * generic_file_direct_write and locking the extent. The
5799 * invalidate needs to happen so that reads after a write do not
5800 * get stale data.
5801 */
5802 if (!ordered && (!writing ||
5803 !test_range_bit(&BTRFS_I(inode)->io_tree,
5804 lockstart, lockend, EXTENT_UPTODATE, 0,
5805 *cached_state)))
5806 break;
5807
5808 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
5809 cached_state, GFP_NOFS);
5810
5811 if (ordered) {
5812 btrfs_start_ordered_extent(inode, ordered, 1);
5813 btrfs_put_ordered_extent(ordered);
5814 } else {
5815 /* Screw you mmap */
5816 ret = filemap_write_and_wait_range(inode->i_mapping,
5817 lockstart,
5818 lockend);
5819 if (ret)
5820 break;
5821
5822 /*
5823 * If we found a page that couldn't be invalidated just
5824 * fall back to buffered.
5825 */
5826 ret = invalidate_inode_pages2_range(inode->i_mapping,
5827 lockstart >> PAGE_CACHE_SHIFT,
5828 lockend >> PAGE_CACHE_SHIFT);
5829 if (ret)
5830 break;
5831 }
5832
5833 cond_resched();
5834 }
5835
5836 return ret;
5837}
5838
5777static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, 5839static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
5778 struct buffer_head *bh_result, int create) 5840 struct buffer_head *bh_result, int create)
5779{ 5841{
5780 struct extent_map *em; 5842 struct extent_map *em;
5781 struct btrfs_root *root = BTRFS_I(inode)->root; 5843 struct btrfs_root *root = BTRFS_I(inode)->root;
5844 struct extent_state *cached_state = NULL;
5782 u64 start = iblock << inode->i_blkbits; 5845 u64 start = iblock << inode->i_blkbits;
5846 u64 lockstart, lockend;
5783 u64 len = bh_result->b_size; 5847 u64 len = bh_result->b_size;
5784 struct btrfs_trans_handle *trans; 5848 struct btrfs_trans_handle *trans;
5849 int unlock_bits = EXTENT_LOCKED;
5850 int ret;
5851
5852 if (create) {
5853 ret = btrfs_delalloc_reserve_space(inode, len);
5854 if (ret)
5855 return ret;
5856 unlock_bits |= EXTENT_DELALLOC | EXTENT_DIRTY;
5857 } else {
5858 len = min_t(u64, len, root->sectorsize);
5859 }
5860
5861 lockstart = start;
5862 lockend = start + len - 1;
5863
5864 /*
5865 * If this errors out it's because we couldn't invalidate pagecache for
5866 * this range and we need to fallback to buffered.
5867 */
5868 if (lock_extent_direct(inode, lockstart, lockend, &cached_state, create))
5869 return -ENOTBLK;
5870
5871 if (create) {
5872 ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
5873 lockend, EXTENT_DELALLOC, NULL,
5874 &cached_state, GFP_NOFS);
5875 if (ret)
5876 goto unlock_err;
5877 }
5785 5878
5786 em = btrfs_get_extent(inode, NULL, 0, start, len, 0); 5879 em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
5787 if (IS_ERR(em)) 5880 if (IS_ERR(em)) {
5788 return PTR_ERR(em); 5881 ret = PTR_ERR(em);
5882 goto unlock_err;
5883 }
5789 5884
5790 /* 5885 /*
5791 * Ok for INLINE and COMPRESSED extents we need to fallback on buffered 5886 * Ok for INLINE and COMPRESSED extents we need to fallback on buffered
@@ -5804,17 +5899,16 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
5804 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) || 5899 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) ||
5805 em->block_start == EXTENT_MAP_INLINE) { 5900 em->block_start == EXTENT_MAP_INLINE) {
5806 free_extent_map(em); 5901 free_extent_map(em);
5807 return -ENOTBLK; 5902 ret = -ENOTBLK;
5903 goto unlock_err;
5808 } 5904 }
5809 5905
5810 /* Just a good old fashioned hole, return */ 5906 /* Just a good old fashioned hole, return */
5811 if (!create && (em->block_start == EXTENT_MAP_HOLE || 5907 if (!create && (em->block_start == EXTENT_MAP_HOLE ||
5812 test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { 5908 test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
5813 free_extent_map(em); 5909 free_extent_map(em);
5814 /* DIO will do one hole at a time, so just unlock a sector */ 5910 ret = 0;
5815 unlock_extent(&BTRFS_I(inode)->io_tree, start, 5911 goto unlock_err;
5816 start + root->sectorsize - 1);
5817 return 0;
5818 } 5912 }
5819 5913
5820 /* 5914 /*
@@ -5827,8 +5921,9 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
5827 * 5921 *
5828 */ 5922 */
5829 if (!create) { 5923 if (!create) {
5830 len = em->len - (start - em->start); 5924 len = min(len, em->len - (start - em->start));
5831 goto map; 5925 lockstart = start + len;
5926 goto unlock;
5832 } 5927 }
5833 5928
5834 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) || 5929 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) ||
@@ -5860,7 +5955,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
5860 btrfs_end_transaction(trans, root); 5955 btrfs_end_transaction(trans, root);
5861 if (ret) { 5956 if (ret) {
5862 free_extent_map(em); 5957 free_extent_map(em);
5863 return ret; 5958 goto unlock_err;
5864 } 5959 }
5865 goto unlock; 5960 goto unlock;
5866 } 5961 }
@@ -5873,14 +5968,12 @@ must_cow:
5873 */ 5968 */
5874 len = bh_result->b_size; 5969 len = bh_result->b_size;
5875 em = btrfs_new_extent_direct(inode, em, start, len); 5970 em = btrfs_new_extent_direct(inode, em, start, len);
5876 if (IS_ERR(em)) 5971 if (IS_ERR(em)) {
5877 return PTR_ERR(em); 5972 ret = PTR_ERR(em);
5973 goto unlock_err;
5974 }
5878 len = min(len, em->len - (start - em->start)); 5975 len = min(len, em->len - (start - em->start));
5879unlock: 5976unlock:
5880 clear_extent_bit(&BTRFS_I(inode)->io_tree, start, start + len - 1,
5881 EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DIRTY, 1,
5882 0, NULL, GFP_NOFS);
5883map:
5884 bh_result->b_blocknr = (em->block_start + (start - em->start)) >> 5977 bh_result->b_blocknr = (em->block_start + (start - em->start)) >>
5885 inode->i_blkbits; 5978 inode->i_blkbits;
5886 bh_result->b_size = len; 5979 bh_result->b_size = len;
@@ -5898,9 +5991,44 @@ map:
5898 i_size_write(inode, start + len); 5991 i_size_write(inode, start + len);
5899 } 5992 }
5900 5993
5994 /*
5995 * In the case of write we need to clear and unlock the entire range,
5996 * in the case of read we need to unlock only the end area that we
5997 * aren't using if there is any left over space.
5998 */
5999 if (lockstart < lockend) {
6000 if (create && len < lockend - lockstart) {
6001 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
6002 lockstart + len - 1, unlock_bits, 1, 0,
6003 &cached_state, GFP_NOFS);
6004 /*
6005 * Beside unlock, we also need to cleanup reserved space
6006 * for the left range by attaching EXTENT_DO_ACCOUNTING.
6007 */
6008 clear_extent_bit(&BTRFS_I(inode)->io_tree,
6009 lockstart + len, lockend,
6010 unlock_bits | EXTENT_DO_ACCOUNTING,
6011 1, 0, NULL, GFP_NOFS);
6012 } else {
6013 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
6014 lockend, unlock_bits, 1, 0,
6015 &cached_state, GFP_NOFS);
6016 }
6017 } else {
6018 free_extent_state(cached_state);
6019 }
6020
5901 free_extent_map(em); 6021 free_extent_map(em);
5902 6022
5903 return 0; 6023 return 0;
6024
6025unlock_err:
6026 if (create)
6027 unlock_bits |= EXTENT_DO_ACCOUNTING;
6028
6029 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
6030 unlock_bits, 1, 0, &cached_state, GFP_NOFS);
6031 return ret;
5904} 6032}
5905 6033
5906struct btrfs_dio_private { 6034struct btrfs_dio_private {
@@ -5908,7 +6036,6 @@ struct btrfs_dio_private {
5908 u64 logical_offset; 6036 u64 logical_offset;
5909 u64 disk_bytenr; 6037 u64 disk_bytenr;
5910 u64 bytes; 6038 u64 bytes;
5911 u32 *csums;
5912 void *private; 6039 void *private;
5913 6040
5914 /* number of bios pending for this dio */ 6041 /* number of bios pending for this dio */
@@ -5928,7 +6055,6 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
5928 struct inode *inode = dip->inode; 6055 struct inode *inode = dip->inode;
5929 struct btrfs_root *root = BTRFS_I(inode)->root; 6056 struct btrfs_root *root = BTRFS_I(inode)->root;
5930 u64 start; 6057 u64 start;
5931 u32 *private = dip->csums;
5932 6058
5933 start = dip->logical_offset; 6059 start = dip->logical_offset;
5934 do { 6060 do {
@@ -5936,8 +6062,12 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
5936 struct page *page = bvec->bv_page; 6062 struct page *page = bvec->bv_page;
5937 char *kaddr; 6063 char *kaddr;
5938 u32 csum = ~(u32)0; 6064 u32 csum = ~(u32)0;
6065 u64 private = ~(u32)0;
5939 unsigned long flags; 6066 unsigned long flags;
5940 6067
6068 if (get_state_private(&BTRFS_I(inode)->io_tree,
6069 start, &private))
6070 goto failed;
5941 local_irq_save(flags); 6071 local_irq_save(flags);
5942 kaddr = kmap_atomic(page); 6072 kaddr = kmap_atomic(page);
5943 csum = btrfs_csum_data(root, kaddr + bvec->bv_offset, 6073 csum = btrfs_csum_data(root, kaddr + bvec->bv_offset,
@@ -5947,18 +6077,18 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
5947 local_irq_restore(flags); 6077 local_irq_restore(flags);
5948 6078
5949 flush_dcache_page(bvec->bv_page); 6079 flush_dcache_page(bvec->bv_page);
5950 if (csum != *private) { 6080 if (csum != private) {
6081failed:
5951 printk(KERN_ERR "btrfs csum failed ino %llu off" 6082 printk(KERN_ERR "btrfs csum failed ino %llu off"
5952 " %llu csum %u private %u\n", 6083 " %llu csum %u private %u\n",
5953 (unsigned long long)btrfs_ino(inode), 6084 (unsigned long long)btrfs_ino(inode),
5954 (unsigned long long)start, 6085 (unsigned long long)start,
5955 csum, *private); 6086 csum, (unsigned)private);
5956 err = -EIO; 6087 err = -EIO;
5957 } 6088 }
5958 } 6089 }
5959 6090
5960 start += bvec->bv_len; 6091 start += bvec->bv_len;
5961 private++;
5962 bvec++; 6092 bvec++;
5963 } while (bvec <= bvec_end); 6093 } while (bvec <= bvec_end);
5964 6094
@@ -5966,7 +6096,6 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
5966 dip->logical_offset + dip->bytes - 1); 6096 dip->logical_offset + dip->bytes - 1);
5967 bio->bi_private = dip->private; 6097 bio->bi_private = dip->private;
5968 6098
5969 kfree(dip->csums);
5970 kfree(dip); 6099 kfree(dip);
5971 6100
5972 /* If we had a csum failure make sure to clear the uptodate flag */ 6101 /* If we had a csum failure make sure to clear the uptodate flag */
@@ -6072,7 +6201,7 @@ static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev,
6072 6201
6073static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, 6202static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
6074 int rw, u64 file_offset, int skip_sum, 6203 int rw, u64 file_offset, int skip_sum,
6075 u32 *csums, int async_submit) 6204 int async_submit)
6076{ 6205{
6077 int write = rw & REQ_WRITE; 6206 int write = rw & REQ_WRITE;
6078 struct btrfs_root *root = BTRFS_I(inode)->root; 6207 struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -6105,8 +6234,7 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
6105 if (ret) 6234 if (ret)
6106 goto err; 6235 goto err;
6107 } else if (!skip_sum) { 6236 } else if (!skip_sum) {
6108 ret = btrfs_lookup_bio_sums_dio(root, inode, bio, 6237 ret = btrfs_lookup_bio_sums_dio(root, inode, bio, file_offset);
6109 file_offset, csums);
6110 if (ret) 6238 if (ret)
6111 goto err; 6239 goto err;
6112 } 6240 }
@@ -6132,10 +6260,8 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
6132 u64 submit_len = 0; 6260 u64 submit_len = 0;
6133 u64 map_length; 6261 u64 map_length;
6134 int nr_pages = 0; 6262 int nr_pages = 0;
6135 u32 *csums = dip->csums;
6136 int ret = 0; 6263 int ret = 0;
6137 int async_submit = 0; 6264 int async_submit = 0;
6138 int write = rw & REQ_WRITE;
6139 6265
6140 map_length = orig_bio->bi_size; 6266 map_length = orig_bio->bi_size;
6141 ret = btrfs_map_block(map_tree, READ, start_sector << 9, 6267 ret = btrfs_map_block(map_tree, READ, start_sector << 9,
@@ -6171,16 +6297,13 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
6171 atomic_inc(&dip->pending_bios); 6297 atomic_inc(&dip->pending_bios);
6172 ret = __btrfs_submit_dio_bio(bio, inode, rw, 6298 ret = __btrfs_submit_dio_bio(bio, inode, rw,
6173 file_offset, skip_sum, 6299 file_offset, skip_sum,
6174 csums, async_submit); 6300 async_submit);
6175 if (ret) { 6301 if (ret) {
6176 bio_put(bio); 6302 bio_put(bio);
6177 atomic_dec(&dip->pending_bios); 6303 atomic_dec(&dip->pending_bios);
6178 goto out_err; 6304 goto out_err;
6179 } 6305 }
6180 6306
6181 /* Write's use the ordered csums */
6182 if (!write && !skip_sum)
6183 csums = csums + nr_pages;
6184 start_sector += submit_len >> 9; 6307 start_sector += submit_len >> 9;
6185 file_offset += submit_len; 6308 file_offset += submit_len;
6186 6309
@@ -6210,7 +6333,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
6210 6333
6211submit: 6334submit:
6212 ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum, 6335 ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum,
6213 csums, async_submit); 6336 async_submit);
6214 if (!ret) 6337 if (!ret)
6215 return 0; 6338 return 0;
6216 6339
@@ -6246,17 +6369,6 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
6246 ret = -ENOMEM; 6369 ret = -ENOMEM;
6247 goto free_ordered; 6370 goto free_ordered;
6248 } 6371 }
6249 dip->csums = NULL;
6250
6251 /* Write's use the ordered csum stuff, so we don't need dip->csums */
6252 if (!write && !skip_sum) {
6253 dip->csums = kmalloc(sizeof(u32) * bio->bi_vcnt, GFP_NOFS);
6254 if (!dip->csums) {
6255 kfree(dip);
6256 ret = -ENOMEM;
6257 goto free_ordered;
6258 }
6259 }
6260 6372
6261 dip->private = bio->bi_private; 6373 dip->private = bio->bi_private;
6262 dip->inode = inode; 6374 dip->inode = inode;
@@ -6341,132 +6453,22 @@ static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *io
6341out: 6453out:
6342 return retval; 6454 return retval;
6343} 6455}
6456
6344static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, 6457static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
6345 const struct iovec *iov, loff_t offset, 6458 const struct iovec *iov, loff_t offset,
6346 unsigned long nr_segs) 6459 unsigned long nr_segs)
6347{ 6460{
6348 struct file *file = iocb->ki_filp; 6461 struct file *file = iocb->ki_filp;
6349 struct inode *inode = file->f_mapping->host; 6462 struct inode *inode = file->f_mapping->host;
6350 struct btrfs_ordered_extent *ordered;
6351 struct extent_state *cached_state = NULL;
6352 u64 lockstart, lockend;
6353 ssize_t ret;
6354 int writing = rw & WRITE;
6355 int write_bits = 0;
6356 size_t count = iov_length(iov, nr_segs);
6357 6463
6358 if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov, 6464 if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov,
6359 offset, nr_segs)) { 6465 offset, nr_segs))
6360 return 0; 6466 return 0;
6361 }
6362
6363 lockstart = offset;
6364 lockend = offset + count - 1;
6365
6366 if (writing) {
6367 ret = btrfs_delalloc_reserve_space(inode, count);
6368 if (ret)
6369 goto out;
6370 }
6371
6372 while (1) {
6373 lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
6374 0, &cached_state);
6375 /*
6376 * We're concerned with the entire range that we're going to be
6377 * doing DIO to, so we need to make sure theres no ordered
6378 * extents in this range.
6379 */
6380 ordered = btrfs_lookup_ordered_range(inode, lockstart,
6381 lockend - lockstart + 1);
6382
6383 /*
6384 * We need to make sure there are no buffered pages in this
6385 * range either, we could have raced between the invalidate in
6386 * generic_file_direct_write and locking the extent. The
6387 * invalidate needs to happen so that reads after a write do not
6388 * get stale data.
6389 */
6390 if (!ordered && (!writing ||
6391 !test_range_bit(&BTRFS_I(inode)->io_tree,
6392 lockstart, lockend, EXTENT_UPTODATE, 0,
6393 cached_state)))
6394 break;
6395
6396 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
6397 &cached_state, GFP_NOFS);
6398
6399 if (ordered) {
6400 btrfs_start_ordered_extent(inode, ordered, 1);
6401 btrfs_put_ordered_extent(ordered);
6402 } else {
6403 /* Screw you mmap */
6404 ret = filemap_write_and_wait_range(file->f_mapping,
6405 lockstart,
6406 lockend);
6407 if (ret)
6408 goto out;
6409
6410 /*
6411 * If we found a page that couldn't be invalidated just
6412 * fall back to buffered.
6413 */
6414 ret = invalidate_inode_pages2_range(file->f_mapping,
6415 lockstart >> PAGE_CACHE_SHIFT,
6416 lockend >> PAGE_CACHE_SHIFT);
6417 if (ret) {
6418 if (ret == -EBUSY)
6419 ret = 0;
6420 goto out;
6421 }
6422 }
6423
6424 cond_resched();
6425 }
6426 6467
6427 /* 6468 return __blockdev_direct_IO(rw, iocb, inode,
6428 * we don't use btrfs_set_extent_delalloc because we don't want
6429 * the dirty or uptodate bits
6430 */
6431 if (writing) {
6432 write_bits = EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING;
6433 ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
6434 EXTENT_DELALLOC, NULL, &cached_state,
6435 GFP_NOFS);
6436 if (ret) {
6437 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
6438 lockend, EXTENT_LOCKED | write_bits,
6439 1, 0, &cached_state, GFP_NOFS);
6440 goto out;
6441 }
6442 }
6443
6444 free_extent_state(cached_state);
6445 cached_state = NULL;
6446
6447 ret = __blockdev_direct_IO(rw, iocb, inode,
6448 BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev, 6469 BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev,
6449 iov, offset, nr_segs, btrfs_get_blocks_direct, NULL, 6470 iov, offset, nr_segs, btrfs_get_blocks_direct, NULL,
6450 btrfs_submit_direct, 0); 6471 btrfs_submit_direct, 0);
6451
6452 if (ret < 0 && ret != -EIOCBQUEUED) {
6453 clear_extent_bit(&BTRFS_I(inode)->io_tree, offset,
6454 offset + iov_length(iov, nr_segs) - 1,
6455 EXTENT_LOCKED | write_bits, 1, 0,
6456 &cached_state, GFP_NOFS);
6457 } else if (ret >= 0 && ret < iov_length(iov, nr_segs)) {
6458 /*
6459 * We're falling back to buffered, unlock the section we didn't
6460 * do IO on.
6461 */
6462 clear_extent_bit(&BTRFS_I(inode)->io_tree, offset + ret,
6463 offset + iov_length(iov, nr_segs) - 1,
6464 EXTENT_LOCKED | write_bits, 1, 0,
6465 &cached_state, GFP_NOFS);
6466 }
6467out:
6468 free_extent_state(cached_state);
6469 return ret;
6470} 6472}
6471 6473
6472static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 6474static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 7bb755677a22..9df50fa8a078 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -424,7 +424,7 @@ static noinline int create_subvol(struct btrfs_root *root,
424 uuid_le_gen(&new_uuid); 424 uuid_le_gen(&new_uuid);
425 memcpy(root_item.uuid, new_uuid.b, BTRFS_UUID_SIZE); 425 memcpy(root_item.uuid, new_uuid.b, BTRFS_UUID_SIZE);
426 root_item.otime.sec = cpu_to_le64(cur_time.tv_sec); 426 root_item.otime.sec = cpu_to_le64(cur_time.tv_sec);
427 root_item.otime.nsec = cpu_to_le64(cur_time.tv_nsec); 427 root_item.otime.nsec = cpu_to_le32(cur_time.tv_nsec);
428 root_item.ctime = root_item.otime; 428 root_item.ctime = root_item.otime;
429 btrfs_set_root_ctransid(&root_item, trans->transid); 429 btrfs_set_root_ctransid(&root_item, trans->transid);
430 btrfs_set_root_otransid(&root_item, trans->transid); 430 btrfs_set_root_otransid(&root_item, trans->transid);
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c
index a44eff074805..2a1762c66041 100644
--- a/fs/btrfs/locking.c
+++ b/fs/btrfs/locking.c
@@ -67,7 +67,7 @@ void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw)
67{ 67{
68 if (eb->lock_nested) { 68 if (eb->lock_nested) {
69 read_lock(&eb->lock); 69 read_lock(&eb->lock);
70 if (&eb->lock_nested && current->pid == eb->lock_owner) { 70 if (eb->lock_nested && current->pid == eb->lock_owner) {
71 read_unlock(&eb->lock); 71 read_unlock(&eb->lock);
72 return; 72 return;
73 } 73 }
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index bc424ae5a81a..38b42e7bc91d 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1364,13 +1364,17 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
1364 spin_lock(&fs_info->qgroup_lock); 1364 spin_lock(&fs_info->qgroup_lock);
1365 1365
1366 dstgroup = add_qgroup_rb(fs_info, objectid); 1366 dstgroup = add_qgroup_rb(fs_info, objectid);
1367 if (!dstgroup) 1367 if (IS_ERR(dstgroup)) {
1368 ret = PTR_ERR(dstgroup);
1368 goto unlock; 1369 goto unlock;
1370 }
1369 1371
1370 if (srcid) { 1372 if (srcid) {
1371 srcgroup = find_qgroup_rb(fs_info, srcid); 1373 srcgroup = find_qgroup_rb(fs_info, srcid);
1372 if (!srcgroup) 1374 if (!srcgroup) {
1375 ret = -EINVAL;
1373 goto unlock; 1376 goto unlock;
1377 }
1374 dstgroup->rfer = srcgroup->rfer - level_size; 1378 dstgroup->rfer = srcgroup->rfer - level_size;
1375 dstgroup->rfer_cmpr = srcgroup->rfer_cmpr - level_size; 1379 dstgroup->rfer_cmpr = srcgroup->rfer_cmpr - level_size;
1376 srcgroup->excl = level_size; 1380 srcgroup->excl = level_size;
@@ -1379,8 +1383,10 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
1379 qgroup_dirty(fs_info, srcgroup); 1383 qgroup_dirty(fs_info, srcgroup);
1380 } 1384 }
1381 1385
1382 if (!inherit) 1386 if (!inherit) {
1387 ret = -EINVAL;
1383 goto unlock; 1388 goto unlock;
1389 }
1384 1390
1385 i_qgroups = (u64 *)(inherit + 1); 1391 i_qgroups = (u64 *)(inherit + 1);
1386 for (i = 0; i < inherit->num_qgroups; ++i) { 1392 for (i = 0; i < inherit->num_qgroups; ++i) {
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index 6bb465cca20f..10d8e4d88071 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -544,8 +544,8 @@ void btrfs_update_root_times(struct btrfs_trans_handle *trans,
544 struct timespec ct = CURRENT_TIME; 544 struct timespec ct = CURRENT_TIME;
545 545
546 spin_lock(&root->root_times_lock); 546 spin_lock(&root->root_times_lock);
547 item->ctransid = trans->transid; 547 item->ctransid = cpu_to_le64(trans->transid);
548 item->ctime.sec = cpu_to_le64(ct.tv_sec); 548 item->ctime.sec = cpu_to_le64(ct.tv_sec);
549 item->ctime.nsec = cpu_to_le64(ct.tv_nsec); 549 item->ctime.nsec = cpu_to_le32(ct.tv_nsec);
550 spin_unlock(&root->root_times_lock); 550 spin_unlock(&root->root_times_lock);
551} 551}
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index f2eb24c477a3..83d6f9f9c220 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -838,7 +838,6 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
838 struct btrfs_trans_handle *trans; 838 struct btrfs_trans_handle *trans;
839 struct btrfs_fs_info *fs_info = btrfs_sb(sb); 839 struct btrfs_fs_info *fs_info = btrfs_sb(sb);
840 struct btrfs_root *root = fs_info->tree_root; 840 struct btrfs_root *root = fs_info->tree_root;
841 int ret;
842 841
843 trace_btrfs_sync_fs(wait); 842 trace_btrfs_sync_fs(wait);
844 843
@@ -849,11 +848,17 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
849 848
850 btrfs_wait_ordered_extents(root, 0, 0); 849 btrfs_wait_ordered_extents(root, 0, 0);
851 850
852 trans = btrfs_start_transaction(root, 0); 851 spin_lock(&fs_info->trans_lock);
852 if (!fs_info->running_transaction) {
853 spin_unlock(&fs_info->trans_lock);
854 return 0;
855 }
856 spin_unlock(&fs_info->trans_lock);
857
858 trans = btrfs_join_transaction(root);
853 if (IS_ERR(trans)) 859 if (IS_ERR(trans))
854 return PTR_ERR(trans); 860 return PTR_ERR(trans);
855 ret = btrfs_commit_transaction(trans, root); 861 return btrfs_commit_transaction(trans, root);
856 return ret;
857} 862}
858 863
859static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) 864static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
@@ -1530,6 +1535,8 @@ static int btrfs_show_devname(struct seq_file *m, struct dentry *root)
1530 while (cur_devices) { 1535 while (cur_devices) {
1531 head = &cur_devices->devices; 1536 head = &cur_devices->devices;
1532 list_for_each_entry(dev, head, dev_list) { 1537 list_for_each_entry(dev, head, dev_list) {
1538 if (dev->missing)
1539 continue;
1533 if (!first_dev || dev->devid < first_dev->devid) 1540 if (!first_dev || dev->devid < first_dev->devid)
1534 first_dev = dev; 1541 first_dev = dev;
1535 } 1542 }
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 17be3dedacba..27c26004e050 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1031,6 +1031,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
1031 1031
1032 btrfs_i_size_write(parent_inode, parent_inode->i_size + 1032 btrfs_i_size_write(parent_inode, parent_inode->i_size +
1033 dentry->d_name.len * 2); 1033 dentry->d_name.len * 2);
1034 parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME;
1034 ret = btrfs_update_inode(trans, parent_root, parent_inode); 1035 ret = btrfs_update_inode(trans, parent_root, parent_inode);
1035 if (ret) 1036 if (ret)
1036 goto abort_trans_dput; 1037 goto abort_trans_dput;
@@ -1066,7 +1067,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
1066 memcpy(new_root_item->parent_uuid, root->root_item.uuid, 1067 memcpy(new_root_item->parent_uuid, root->root_item.uuid,
1067 BTRFS_UUID_SIZE); 1068 BTRFS_UUID_SIZE);
1068 new_root_item->otime.sec = cpu_to_le64(cur_time.tv_sec); 1069 new_root_item->otime.sec = cpu_to_le64(cur_time.tv_sec);
1069 new_root_item->otime.nsec = cpu_to_le64(cur_time.tv_nsec); 1070 new_root_item->otime.nsec = cpu_to_le32(cur_time.tv_nsec);
1070 btrfs_set_root_otransid(new_root_item, trans->transid); 1071 btrfs_set_root_otransid(new_root_item, trans->transid);
1071 memset(&new_root_item->stime, 0, sizeof(new_root_item->stime)); 1072 memset(&new_root_item->stime, 0, sizeof(new_root_item->stime));
1072 memset(&new_root_item->rtime, 0, sizeof(new_root_item->rtime)); 1073 memset(&new_root_item->rtime, 0, sizeof(new_root_item->rtime));
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index e86ae04abe6a..88b969aeeb71 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -227,9 +227,8 @@ loop_lock:
227 cur = pending; 227 cur = pending;
228 pending = pending->bi_next; 228 pending = pending->bi_next;
229 cur->bi_next = NULL; 229 cur->bi_next = NULL;
230 atomic_dec(&fs_info->nr_async_bios);
231 230
232 if (atomic_read(&fs_info->nr_async_bios) < limit && 231 if (atomic_dec_return(&fs_info->nr_async_bios) < limit &&
233 waitqueue_active(&fs_info->async_submit_wait)) 232 waitqueue_active(&fs_info->async_submit_wait))
234 wake_up(&fs_info->async_submit_wait); 233 wake_up(&fs_info->async_submit_wait);
235 234
@@ -569,9 +568,11 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
569 memcpy(new_device, device, sizeof(*new_device)); 568 memcpy(new_device, device, sizeof(*new_device));
570 569
571 /* Safe because we are under uuid_mutex */ 570 /* Safe because we are under uuid_mutex */
572 name = rcu_string_strdup(device->name->str, GFP_NOFS); 571 if (device->name) {
573 BUG_ON(device->name && !name); /* -ENOMEM */ 572 name = rcu_string_strdup(device->name->str, GFP_NOFS);
574 rcu_assign_pointer(new_device->name, name); 573 BUG_ON(device->name && !name); /* -ENOMEM */
574 rcu_assign_pointer(new_device->name, name);
575 }
575 new_device->bdev = NULL; 576 new_device->bdev = NULL;
576 new_device->writeable = 0; 577 new_device->writeable = 0;
577 new_device->in_fs_metadata = 0; 578 new_device->in_fs_metadata = 0;
@@ -4605,28 +4606,6 @@ int btrfs_read_sys_array(struct btrfs_root *root)
4605 return ret; 4606 return ret;
4606} 4607}
4607 4608
4608struct btrfs_device *btrfs_find_device_for_logical(struct btrfs_root *root,
4609 u64 logical, int mirror_num)
4610{
4611 struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
4612 int ret;
4613 u64 map_length = 0;
4614 struct btrfs_bio *bbio = NULL;
4615 struct btrfs_device *device;
4616
4617 BUG_ON(mirror_num == 0);
4618 ret = btrfs_map_block(map_tree, WRITE, logical, &map_length, &bbio,
4619 mirror_num);
4620 if (ret) {
4621 BUG_ON(bbio != NULL);
4622 return NULL;
4623 }
4624 BUG_ON(mirror_num != bbio->mirror_num);
4625 device = bbio->stripes[mirror_num - 1].dev;
4626 kfree(bbio);
4627 return device;
4628}
4629
4630int btrfs_read_chunk_tree(struct btrfs_root *root) 4609int btrfs_read_chunk_tree(struct btrfs_root *root)
4631{ 4610{
4632 struct btrfs_path *path; 4611 struct btrfs_path *path;
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 5479325987b3..53c06af92e8d 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -289,8 +289,6 @@ int btrfs_cancel_balance(struct btrfs_fs_info *fs_info);
289int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset); 289int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset);
290int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes, 290int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes,
291 u64 *start, u64 *max_avail); 291 u64 *start, u64 *max_avail);
292struct btrfs_device *btrfs_find_device_for_logical(struct btrfs_root *root,
293 u64 logical, int mirror_num);
294void btrfs_dev_stat_print_on_error(struct btrfs_device *device); 292void btrfs_dev_stat_print_on_error(struct btrfs_device *device);
295void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index); 293void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index);
296int btrfs_get_dev_stats(struct btrfs_root *root, 294int btrfs_get_dev_stats(struct btrfs_root *root,