aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/async-thread.c15
-rw-r--r--fs/btrfs/async-thread.h4
-rw-r--r--fs/btrfs/backref.c122
-rw-r--r--fs/btrfs/backref.h5
-rw-r--r--fs/btrfs/compression.c38
-rw-r--r--fs/btrfs/compression.h2
-rw-r--r--fs/btrfs/ctree.c384
-rw-r--r--fs/btrfs/ctree.h169
-rw-r--r--fs/btrfs/delayed-inode.c33
-rw-r--r--fs/btrfs/delayed-ref.c33
-rw-r--r--fs/btrfs/dir-item.c10
-rw-r--r--fs/btrfs/disk-io.c649
-rw-r--r--fs/btrfs/disk-io.h10
-rw-r--r--fs/btrfs/export.c2
-rw-r--r--fs/btrfs/extent-tree.c737
-rw-r--r--fs/btrfs/extent_io.c1035
-rw-r--r--fs/btrfs/extent_io.h62
-rw-r--r--fs/btrfs/file-item.c57
-rw-r--r--fs/btrfs/file.c52
-rw-r--r--fs/btrfs/free-space-cache.c15
-rw-r--r--fs/btrfs/inode-item.c6
-rw-r--r--fs/btrfs/inode-map.c25
-rw-r--r--fs/btrfs/inode.c457
-rw-r--r--fs/btrfs/ioctl.c194
-rw-r--r--fs/btrfs/locking.c6
-rw-r--r--fs/btrfs/locking.h4
-rw-r--r--fs/btrfs/ordered-data.c60
-rw-r--r--fs/btrfs/ordered-data.h24
-rw-r--r--fs/btrfs/orphan.c2
-rw-r--r--fs/btrfs/reada.c10
-rw-r--r--fs/btrfs/relocation.c130
-rw-r--r--fs/btrfs/root-tree.c25
-rw-r--r--fs/btrfs/scrub.c1407
-rw-r--r--fs/btrfs/struct-funcs.c53
-rw-r--r--fs/btrfs/super.c192
-rw-r--r--fs/btrfs/transaction.c213
-rw-r--r--fs/btrfs/transaction.h3
-rw-r--r--fs/btrfs/tree-log.c96
-rw-r--r--fs/btrfs/tree-log.h2
-rw-r--r--fs/btrfs/volumes.c240
-rw-r--r--fs/btrfs/volumes.h4
41 files changed, 4362 insertions, 2225 deletions
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index 0cc20b35c1c4..42704149b723 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -171,11 +171,11 @@ out:
171 spin_unlock_irqrestore(&workers->lock, flags); 171 spin_unlock_irqrestore(&workers->lock, flags);
172} 172}
173 173
174static noinline int run_ordered_completions(struct btrfs_workers *workers, 174static noinline void run_ordered_completions(struct btrfs_workers *workers,
175 struct btrfs_work *work) 175 struct btrfs_work *work)
176{ 176{
177 if (!workers->ordered) 177 if (!workers->ordered)
178 return 0; 178 return;
179 179
180 set_bit(WORK_DONE_BIT, &work->flags); 180 set_bit(WORK_DONE_BIT, &work->flags);
181 181
@@ -213,7 +213,6 @@ static noinline int run_ordered_completions(struct btrfs_workers *workers,
213 } 213 }
214 214
215 spin_unlock(&workers->order_lock); 215 spin_unlock(&workers->order_lock);
216 return 0;
217} 216}
218 217
219static void put_worker(struct btrfs_worker_thread *worker) 218static void put_worker(struct btrfs_worker_thread *worker)
@@ -399,7 +398,7 @@ again:
399/* 398/*
400 * this will wait for all the worker threads to shutdown 399 * this will wait for all the worker threads to shutdown
401 */ 400 */
402int btrfs_stop_workers(struct btrfs_workers *workers) 401void btrfs_stop_workers(struct btrfs_workers *workers)
403{ 402{
404 struct list_head *cur; 403 struct list_head *cur;
405 struct btrfs_worker_thread *worker; 404 struct btrfs_worker_thread *worker;
@@ -427,7 +426,6 @@ int btrfs_stop_workers(struct btrfs_workers *workers)
427 put_worker(worker); 426 put_worker(worker);
428 } 427 }
429 spin_unlock_irq(&workers->lock); 428 spin_unlock_irq(&workers->lock);
430 return 0;
431} 429}
432 430
433/* 431/*
@@ -615,14 +613,14 @@ found:
615 * it was taken from. It is intended for use with long running work functions 613 * it was taken from. It is intended for use with long running work functions
616 * that make some progress and want to give the cpu up for others. 614 * that make some progress and want to give the cpu up for others.
617 */ 615 */
618int btrfs_requeue_work(struct btrfs_work *work) 616void btrfs_requeue_work(struct btrfs_work *work)
619{ 617{
620 struct btrfs_worker_thread *worker = work->worker; 618 struct btrfs_worker_thread *worker = work->worker;
621 unsigned long flags; 619 unsigned long flags;
622 int wake = 0; 620 int wake = 0;
623 621
624 if (test_and_set_bit(WORK_QUEUED_BIT, &work->flags)) 622 if (test_and_set_bit(WORK_QUEUED_BIT, &work->flags))
625 goto out; 623 return;
626 624
627 spin_lock_irqsave(&worker->lock, flags); 625 spin_lock_irqsave(&worker->lock, flags);
628 if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) 626 if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags))
@@ -649,9 +647,6 @@ int btrfs_requeue_work(struct btrfs_work *work)
649 if (wake) 647 if (wake)
650 wake_up_process(worker->task); 648 wake_up_process(worker->task);
651 spin_unlock_irqrestore(&worker->lock, flags); 649 spin_unlock_irqrestore(&worker->lock, flags);
652out:
653
654 return 0;
655} 650}
656 651
657void btrfs_set_work_high_prio(struct btrfs_work *work) 652void btrfs_set_work_high_prio(struct btrfs_work *work)
diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h
index f34cc31fa3c9..063698b90ce2 100644
--- a/fs/btrfs/async-thread.h
+++ b/fs/btrfs/async-thread.h
@@ -111,9 +111,9 @@ struct btrfs_workers {
111 111
112void btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work); 112void btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work);
113int btrfs_start_workers(struct btrfs_workers *workers); 113int btrfs_start_workers(struct btrfs_workers *workers);
114int btrfs_stop_workers(struct btrfs_workers *workers); 114void btrfs_stop_workers(struct btrfs_workers *workers);
115void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max, 115void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max,
116 struct btrfs_workers *async_starter); 116 struct btrfs_workers *async_starter);
117int btrfs_requeue_work(struct btrfs_work *work); 117void btrfs_requeue_work(struct btrfs_work *work);
118void btrfs_set_work_high_prio(struct btrfs_work *work); 118void btrfs_set_work_high_prio(struct btrfs_work *work);
119#endif 119#endif
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 0436c12da8c2..f4e90748940a 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -116,6 +116,7 @@ add_parent:
116 * to a logical address 116 * to a logical address
117 */ 117 */
118static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, 118static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
119 int search_commit_root,
119 struct __prelim_ref *ref, 120 struct __prelim_ref *ref,
120 struct ulist *parents) 121 struct ulist *parents)
121{ 122{
@@ -131,6 +132,7 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
131 path = btrfs_alloc_path(); 132 path = btrfs_alloc_path();
132 if (!path) 133 if (!path)
133 return -ENOMEM; 134 return -ENOMEM;
135 path->search_commit_root = !!search_commit_root;
134 136
135 root_key.objectid = ref->root_id; 137 root_key.objectid = ref->root_id;
136 root_key.type = BTRFS_ROOT_ITEM_KEY; 138 root_key.type = BTRFS_ROOT_ITEM_KEY;
@@ -188,6 +190,7 @@ out:
188 * resolve all indirect backrefs from the list 190 * resolve all indirect backrefs from the list
189 */ 191 */
190static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, 192static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info,
193 int search_commit_root,
191 struct list_head *head) 194 struct list_head *head)
192{ 195{
193 int err; 196 int err;
@@ -212,7 +215,8 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info,
212 continue; 215 continue;
213 if (ref->count == 0) 216 if (ref->count == 0)
214 continue; 217 continue;
215 err = __resolve_indirect_ref(fs_info, ref, parents); 218 err = __resolve_indirect_ref(fs_info, search_commit_root,
219 ref, parents);
216 if (err) { 220 if (err) {
217 if (ret == 0) 221 if (ret == 0)
218 ret = err; 222 ret = err;
@@ -586,6 +590,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
586 struct btrfs_delayed_ref_head *head; 590 struct btrfs_delayed_ref_head *head;
587 int info_level = 0; 591 int info_level = 0;
588 int ret; 592 int ret;
593 int search_commit_root = (trans == BTRFS_BACKREF_SEARCH_COMMIT_ROOT);
589 struct list_head prefs_delayed; 594 struct list_head prefs_delayed;
590 struct list_head prefs; 595 struct list_head prefs;
591 struct __prelim_ref *ref; 596 struct __prelim_ref *ref;
@@ -600,6 +605,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
600 path = btrfs_alloc_path(); 605 path = btrfs_alloc_path();
601 if (!path) 606 if (!path)
602 return -ENOMEM; 607 return -ENOMEM;
608 path->search_commit_root = !!search_commit_root;
603 609
604 /* 610 /*
605 * grab both a lock on the path and a lock on the delayed ref head. 611 * grab both a lock on the path and a lock on the delayed ref head.
@@ -614,35 +620,39 @@ again:
614 goto out; 620 goto out;
615 BUG_ON(ret == 0); 621 BUG_ON(ret == 0);
616 622
617 /* 623 if (trans != BTRFS_BACKREF_SEARCH_COMMIT_ROOT) {
618 * look if there are updates for this ref queued and lock the head 624 /*
619 */ 625 * look if there are updates for this ref queued and lock the
620 delayed_refs = &trans->transaction->delayed_refs; 626 * head
621 spin_lock(&delayed_refs->lock); 627 */
622 head = btrfs_find_delayed_ref_head(trans, bytenr); 628 delayed_refs = &trans->transaction->delayed_refs;
623 if (head) { 629 spin_lock(&delayed_refs->lock);
624 if (!mutex_trylock(&head->mutex)) { 630 head = btrfs_find_delayed_ref_head(trans, bytenr);
625 atomic_inc(&head->node.refs); 631 if (head) {
626 spin_unlock(&delayed_refs->lock); 632 if (!mutex_trylock(&head->mutex)) {
627 633 atomic_inc(&head->node.refs);
628 btrfs_release_path(path); 634 spin_unlock(&delayed_refs->lock);
629 635
630 /* 636 btrfs_release_path(path);
631 * Mutex was contended, block until it's 637
632 * released and try again 638 /*
633 */ 639 * Mutex was contended, block until it's
634 mutex_lock(&head->mutex); 640 * released and try again
635 mutex_unlock(&head->mutex); 641 */
636 btrfs_put_delayed_ref(&head->node); 642 mutex_lock(&head->mutex);
637 goto again; 643 mutex_unlock(&head->mutex);
638 } 644 btrfs_put_delayed_ref(&head->node);
639 ret = __add_delayed_refs(head, seq, &info_key, &prefs_delayed); 645 goto again;
640 if (ret) { 646 }
641 spin_unlock(&delayed_refs->lock); 647 ret = __add_delayed_refs(head, seq, &info_key,
642 goto out; 648 &prefs_delayed);
649 if (ret) {
650 spin_unlock(&delayed_refs->lock);
651 goto out;
652 }
643 } 653 }
654 spin_unlock(&delayed_refs->lock);
644 } 655 }
645 spin_unlock(&delayed_refs->lock);
646 656
647 if (path->slots[0]) { 657 if (path->slots[0]) {
648 struct extent_buffer *leaf; 658 struct extent_buffer *leaf;
@@ -679,7 +689,7 @@ again:
679 if (ret) 689 if (ret)
680 goto out; 690 goto out;
681 691
682 ret = __resolve_indirect_refs(fs_info, &prefs); 692 ret = __resolve_indirect_refs(fs_info, search_commit_root, &prefs);
683 if (ret) 693 if (ret)
684 goto out; 694 goto out;
685 695
@@ -1074,8 +1084,7 @@ int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb,
1074 return 0; 1084 return 0;
1075} 1085}
1076 1086
1077static int iterate_leaf_refs(struct btrfs_fs_info *fs_info, 1087static int iterate_leaf_refs(struct btrfs_fs_info *fs_info, u64 logical,
1078 struct btrfs_path *path, u64 logical,
1079 u64 orig_extent_item_objectid, 1088 u64 orig_extent_item_objectid,
1080 u64 extent_item_pos, u64 root, 1089 u64 extent_item_pos, u64 root,
1081 iterate_extent_inodes_t *iterate, void *ctx) 1090 iterate_extent_inodes_t *iterate, void *ctx)
@@ -1143,35 +1152,38 @@ static int iterate_leaf_refs(struct btrfs_fs_info *fs_info,
1143 * calls iterate() for every inode that references the extent identified by 1152 * calls iterate() for every inode that references the extent identified by
1144 * the given parameters. 1153 * the given parameters.
1145 * when the iterator function returns a non-zero value, iteration stops. 1154 * when the iterator function returns a non-zero value, iteration stops.
1146 * path is guaranteed to be in released state when iterate() is called.
1147 */ 1155 */
1148int iterate_extent_inodes(struct btrfs_fs_info *fs_info, 1156int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
1149 struct btrfs_path *path,
1150 u64 extent_item_objectid, u64 extent_item_pos, 1157 u64 extent_item_objectid, u64 extent_item_pos,
1158 int search_commit_root,
1151 iterate_extent_inodes_t *iterate, void *ctx) 1159 iterate_extent_inodes_t *iterate, void *ctx)
1152{ 1160{
1153 int ret; 1161 int ret;
1154 struct list_head data_refs = LIST_HEAD_INIT(data_refs); 1162 struct list_head data_refs = LIST_HEAD_INIT(data_refs);
1155 struct list_head shared_refs = LIST_HEAD_INIT(shared_refs); 1163 struct list_head shared_refs = LIST_HEAD_INIT(shared_refs);
1156 struct btrfs_trans_handle *trans; 1164 struct btrfs_trans_handle *trans;
1157 struct ulist *refs; 1165 struct ulist *refs = NULL;
1158 struct ulist *roots; 1166 struct ulist *roots = NULL;
1159 struct ulist_node *ref_node = NULL; 1167 struct ulist_node *ref_node = NULL;
1160 struct ulist_node *root_node = NULL; 1168 struct ulist_node *root_node = NULL;
1161 struct seq_list seq_elem; 1169 struct seq_list seq_elem;
1162 struct btrfs_delayed_ref_root *delayed_refs; 1170 struct btrfs_delayed_ref_root *delayed_refs = NULL;
1163
1164 trans = btrfs_join_transaction(fs_info->extent_root);
1165 if (IS_ERR(trans))
1166 return PTR_ERR(trans);
1167 1171
1168 pr_debug("resolving all inodes for extent %llu\n", 1172 pr_debug("resolving all inodes for extent %llu\n",
1169 extent_item_objectid); 1173 extent_item_objectid);
1170 1174
1171 delayed_refs = &trans->transaction->delayed_refs; 1175 if (search_commit_root) {
1172 spin_lock(&delayed_refs->lock); 1176 trans = BTRFS_BACKREF_SEARCH_COMMIT_ROOT;
1173 btrfs_get_delayed_seq(delayed_refs, &seq_elem); 1177 } else {
1174 spin_unlock(&delayed_refs->lock); 1178 trans = btrfs_join_transaction(fs_info->extent_root);
1179 if (IS_ERR(trans))
1180 return PTR_ERR(trans);
1181
1182 delayed_refs = &trans->transaction->delayed_refs;
1183 spin_lock(&delayed_refs->lock);
1184 btrfs_get_delayed_seq(delayed_refs, &seq_elem);
1185 spin_unlock(&delayed_refs->lock);
1186 }
1175 1187
1176 ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid, 1188 ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid,
1177 extent_item_pos, seq_elem.seq, 1189 extent_item_pos, seq_elem.seq,
@@ -1188,7 +1200,7 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
1188 while (!ret && (root_node = ulist_next(roots, root_node))) { 1200 while (!ret && (root_node = ulist_next(roots, root_node))) {
1189 pr_debug("root %llu references leaf %llu\n", 1201 pr_debug("root %llu references leaf %llu\n",
1190 root_node->val, ref_node->val); 1202 root_node->val, ref_node->val);
1191 ret = iterate_leaf_refs(fs_info, path, ref_node->val, 1203 ret = iterate_leaf_refs(fs_info, ref_node->val,
1192 extent_item_objectid, 1204 extent_item_objectid,
1193 extent_item_pos, root_node->val, 1205 extent_item_pos, root_node->val,
1194 iterate, ctx); 1206 iterate, ctx);
@@ -1198,8 +1210,11 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
1198 ulist_free(refs); 1210 ulist_free(refs);
1199 ulist_free(roots); 1211 ulist_free(roots);
1200out: 1212out:
1201 btrfs_put_delayed_seq(delayed_refs, &seq_elem); 1213 if (!search_commit_root) {
1202 btrfs_end_transaction(trans, fs_info->extent_root); 1214 btrfs_put_delayed_seq(delayed_refs, &seq_elem);
1215 btrfs_end_transaction(trans, fs_info->extent_root);
1216 }
1217
1203 return ret; 1218 return ret;
1204} 1219}
1205 1220
@@ -1210,6 +1225,7 @@ int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
1210 int ret; 1225 int ret;
1211 u64 extent_item_pos; 1226 u64 extent_item_pos;
1212 struct btrfs_key found_key; 1227 struct btrfs_key found_key;
1228 int search_commit_root = path->search_commit_root;
1213 1229
1214 ret = extent_from_logical(fs_info, logical, path, 1230 ret = extent_from_logical(fs_info, logical, path,
1215 &found_key); 1231 &found_key);
@@ -1220,8 +1236,9 @@ int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
1220 return ret; 1236 return ret;
1221 1237
1222 extent_item_pos = logical - found_key.objectid; 1238 extent_item_pos = logical - found_key.objectid;
1223 ret = iterate_extent_inodes(fs_info, path, found_key.objectid, 1239 ret = iterate_extent_inodes(fs_info, found_key.objectid,
1224 extent_item_pos, iterate, ctx); 1240 extent_item_pos, search_commit_root,
1241 iterate, ctx);
1225 1242
1226 return ret; 1243 return ret;
1227} 1244}
@@ -1342,12 +1359,6 @@ int paths_from_inode(u64 inum, struct inode_fs_paths *ipath)
1342 inode_to_path, ipath); 1359 inode_to_path, ipath);
1343} 1360}
1344 1361
1345/*
1346 * allocates space to return multiple file system paths for an inode.
1347 * total_bytes to allocate are passed, note that space usable for actual path
1348 * information will be total_bytes - sizeof(struct inode_fs_paths).
1349 * the returned pointer must be freed with free_ipath() in the end.
1350 */
1351struct btrfs_data_container *init_data_container(u32 total_bytes) 1362struct btrfs_data_container *init_data_container(u32 total_bytes)
1352{ 1363{
1353 struct btrfs_data_container *data; 1364 struct btrfs_data_container *data;
@@ -1403,5 +1414,6 @@ struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root,
1403 1414
1404void free_ipath(struct inode_fs_paths *ipath) 1415void free_ipath(struct inode_fs_paths *ipath)
1405{ 1416{
1417 kfree(ipath->fspath);
1406 kfree(ipath); 1418 kfree(ipath);
1407} 1419}
diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h
index d00dfa9ca934..57ea2e959e4d 100644
--- a/fs/btrfs/backref.h
+++ b/fs/btrfs/backref.h
@@ -22,6 +22,8 @@
22#include "ioctl.h" 22#include "ioctl.h"
23#include "ulist.h" 23#include "ulist.h"
24 24
25#define BTRFS_BACKREF_SEARCH_COMMIT_ROOT ((struct btrfs_trans_handle *)0)
26
25struct inode_fs_paths { 27struct inode_fs_paths {
26 struct btrfs_path *btrfs_path; 28 struct btrfs_path *btrfs_path;
27 struct btrfs_root *fs_root; 29 struct btrfs_root *fs_root;
@@ -44,9 +46,8 @@ int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb,
44 u64 *out_root, u8 *out_level); 46 u64 *out_root, u8 *out_level);
45 47
46int iterate_extent_inodes(struct btrfs_fs_info *fs_info, 48int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
47 struct btrfs_path *path,
48 u64 extent_item_objectid, 49 u64 extent_item_objectid,
49 u64 extent_offset, 50 u64 extent_offset, int search_commit_root,
50 iterate_extent_inodes_t *iterate, void *ctx); 51 iterate_extent_inodes_t *iterate, void *ctx);
51 52
52int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info, 53int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index b805afb37fa8..d286b40a5671 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -226,8 +226,8 @@ out:
226 * Clear the writeback bits on all of the file 226 * Clear the writeback bits on all of the file
227 * pages for a compressed write 227 * pages for a compressed write
228 */ 228 */
229static noinline int end_compressed_writeback(struct inode *inode, u64 start, 229static noinline void end_compressed_writeback(struct inode *inode, u64 start,
230 unsigned long ram_size) 230 unsigned long ram_size)
231{ 231{
232 unsigned long index = start >> PAGE_CACHE_SHIFT; 232 unsigned long index = start >> PAGE_CACHE_SHIFT;
233 unsigned long end_index = (start + ram_size - 1) >> PAGE_CACHE_SHIFT; 233 unsigned long end_index = (start + ram_size - 1) >> PAGE_CACHE_SHIFT;
@@ -253,7 +253,6 @@ static noinline int end_compressed_writeback(struct inode *inode, u64 start,
253 index += ret; 253 index += ret;
254 } 254 }
255 /* the inode may be gone now */ 255 /* the inode may be gone now */
256 return 0;
257} 256}
258 257
259/* 258/*
@@ -392,16 +391,16 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
392 */ 391 */
393 atomic_inc(&cb->pending_bios); 392 atomic_inc(&cb->pending_bios);
394 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); 393 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
395 BUG_ON(ret); 394 BUG_ON(ret); /* -ENOMEM */
396 395
397 if (!skip_sum) { 396 if (!skip_sum) {
398 ret = btrfs_csum_one_bio(root, inode, bio, 397 ret = btrfs_csum_one_bio(root, inode, bio,
399 start, 1); 398 start, 1);
400 BUG_ON(ret); 399 BUG_ON(ret); /* -ENOMEM */
401 } 400 }
402 401
403 ret = btrfs_map_bio(root, WRITE, bio, 0, 1); 402 ret = btrfs_map_bio(root, WRITE, bio, 0, 1);
404 BUG_ON(ret); 403 BUG_ON(ret); /* -ENOMEM */
405 404
406 bio_put(bio); 405 bio_put(bio);
407 406
@@ -421,15 +420,15 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
421 bio_get(bio); 420 bio_get(bio);
422 421
423 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); 422 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
424 BUG_ON(ret); 423 BUG_ON(ret); /* -ENOMEM */
425 424
426 if (!skip_sum) { 425 if (!skip_sum) {
427 ret = btrfs_csum_one_bio(root, inode, bio, start, 1); 426 ret = btrfs_csum_one_bio(root, inode, bio, start, 1);
428 BUG_ON(ret); 427 BUG_ON(ret); /* -ENOMEM */
429 } 428 }
430 429
431 ret = btrfs_map_bio(root, WRITE, bio, 0, 1); 430 ret = btrfs_map_bio(root, WRITE, bio, 0, 1);
432 BUG_ON(ret); 431 BUG_ON(ret); /* -ENOMEM */
433 432
434 bio_put(bio); 433 bio_put(bio);
435 return 0; 434 return 0;
@@ -497,7 +496,7 @@ static noinline int add_ra_bio_pages(struct inode *inode,
497 * sure they map to this compressed extent on disk. 496 * sure they map to this compressed extent on disk.
498 */ 497 */
499 set_page_extent_mapped(page); 498 set_page_extent_mapped(page);
500 lock_extent(tree, last_offset, end, GFP_NOFS); 499 lock_extent(tree, last_offset, end);
501 read_lock(&em_tree->lock); 500 read_lock(&em_tree->lock);
502 em = lookup_extent_mapping(em_tree, last_offset, 501 em = lookup_extent_mapping(em_tree, last_offset,
503 PAGE_CACHE_SIZE); 502 PAGE_CACHE_SIZE);
@@ -507,7 +506,7 @@ static noinline int add_ra_bio_pages(struct inode *inode,
507 (last_offset + PAGE_CACHE_SIZE > extent_map_end(em)) || 506 (last_offset + PAGE_CACHE_SIZE > extent_map_end(em)) ||
508 (em->block_start >> 9) != cb->orig_bio->bi_sector) { 507 (em->block_start >> 9) != cb->orig_bio->bi_sector) {
509 free_extent_map(em); 508 free_extent_map(em);
510 unlock_extent(tree, last_offset, end, GFP_NOFS); 509 unlock_extent(tree, last_offset, end);
511 unlock_page(page); 510 unlock_page(page);
512 page_cache_release(page); 511 page_cache_release(page);
513 break; 512 break;
@@ -535,7 +534,7 @@ static noinline int add_ra_bio_pages(struct inode *inode,
535 nr_pages++; 534 nr_pages++;
536 page_cache_release(page); 535 page_cache_release(page);
537 } else { 536 } else {
538 unlock_extent(tree, last_offset, end, GFP_NOFS); 537 unlock_extent(tree, last_offset, end);
539 unlock_page(page); 538 unlock_page(page);
540 page_cache_release(page); 539 page_cache_release(page);
541 break; 540 break;
@@ -662,7 +661,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
662 bio_get(comp_bio); 661 bio_get(comp_bio);
663 662
664 ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0); 663 ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0);
665 BUG_ON(ret); 664 BUG_ON(ret); /* -ENOMEM */
666 665
667 /* 666 /*
668 * inc the count before we submit the bio so 667 * inc the count before we submit the bio so
@@ -675,14 +674,14 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
675 if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) { 674 if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
676 ret = btrfs_lookup_bio_sums(root, inode, 675 ret = btrfs_lookup_bio_sums(root, inode,
677 comp_bio, sums); 676 comp_bio, sums);
678 BUG_ON(ret); 677 BUG_ON(ret); /* -ENOMEM */
679 } 678 }
680 sums += (comp_bio->bi_size + root->sectorsize - 1) / 679 sums += (comp_bio->bi_size + root->sectorsize - 1) /
681 root->sectorsize; 680 root->sectorsize;
682 681
683 ret = btrfs_map_bio(root, READ, comp_bio, 682 ret = btrfs_map_bio(root, READ, comp_bio,
684 mirror_num, 0); 683 mirror_num, 0);
685 BUG_ON(ret); 684 BUG_ON(ret); /* -ENOMEM */
686 685
687 bio_put(comp_bio); 686 bio_put(comp_bio);
688 687
@@ -698,15 +697,15 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
698 bio_get(comp_bio); 697 bio_get(comp_bio);
699 698
700 ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0); 699 ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0);
701 BUG_ON(ret); 700 BUG_ON(ret); /* -ENOMEM */
702 701
703 if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) { 702 if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
704 ret = btrfs_lookup_bio_sums(root, inode, comp_bio, sums); 703 ret = btrfs_lookup_bio_sums(root, inode, comp_bio, sums);
705 BUG_ON(ret); 704 BUG_ON(ret); /* -ENOMEM */
706 } 705 }
707 706
708 ret = btrfs_map_bio(root, READ, comp_bio, mirror_num, 0); 707 ret = btrfs_map_bio(root, READ, comp_bio, mirror_num, 0);
709 BUG_ON(ret); 708 BUG_ON(ret); /* -ENOMEM */
710 709
711 bio_put(comp_bio); 710 bio_put(comp_bio);
712 return 0; 711 return 0;
@@ -734,7 +733,7 @@ struct btrfs_compress_op *btrfs_compress_op[] = {
734 &btrfs_lzo_compress, 733 &btrfs_lzo_compress,
735}; 734};
736 735
737int __init btrfs_init_compress(void) 736void __init btrfs_init_compress(void)
738{ 737{
739 int i; 738 int i;
740 739
@@ -744,7 +743,6 @@ int __init btrfs_init_compress(void)
744 atomic_set(&comp_alloc_workspace[i], 0); 743 atomic_set(&comp_alloc_workspace[i], 0);
745 init_waitqueue_head(&comp_workspace_wait[i]); 744 init_waitqueue_head(&comp_workspace_wait[i]);
746 } 745 }
747 return 0;
748} 746}
749 747
750/* 748/*
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index a12059f4f0fd..9afb0a62ae82 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -19,7 +19,7 @@
19#ifndef __BTRFS_COMPRESSION_ 19#ifndef __BTRFS_COMPRESSION_
20#define __BTRFS_COMPRESSION_ 20#define __BTRFS_COMPRESSION_
21 21
22int btrfs_init_compress(void); 22void btrfs_init_compress(void);
23void btrfs_exit_compress(void); 23void btrfs_exit_compress(void);
24 24
25int btrfs_compress_pages(int type, struct address_space *mapping, 25int btrfs_compress_pages(int type, struct address_space *mapping,
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 0639a555e16e..e801f226d7e0 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -36,7 +36,7 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
36 struct btrfs_root *root, 36 struct btrfs_root *root,
37 struct extent_buffer *dst_buf, 37 struct extent_buffer *dst_buf,
38 struct extent_buffer *src_buf); 38 struct extent_buffer *src_buf);
39static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, 39static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
40 struct btrfs_path *path, int level, int slot); 40 struct btrfs_path *path, int level, int slot);
41 41
42struct btrfs_path *btrfs_alloc_path(void) 42struct btrfs_path *btrfs_alloc_path(void)
@@ -156,10 +156,23 @@ struct extent_buffer *btrfs_root_node(struct btrfs_root *root)
156{ 156{
157 struct extent_buffer *eb; 157 struct extent_buffer *eb;
158 158
159 rcu_read_lock(); 159 while (1) {
160 eb = rcu_dereference(root->node); 160 rcu_read_lock();
161 extent_buffer_get(eb); 161 eb = rcu_dereference(root->node);
162 rcu_read_unlock(); 162
163 /*
164 * RCU really hurts here, we could free up the root node because
165 * it was cow'ed but we may not get the new root node yet so do
166 * the inc_not_zero dance and if it doesn't work then
167 * synchronize_rcu and try again.
168 */
169 if (atomic_inc_not_zero(&eb->refs)) {
170 rcu_read_unlock();
171 break;
172 }
173 rcu_read_unlock();
174 synchronize_rcu();
175 }
163 return eb; 176 return eb;
164} 177}
165 178
@@ -331,8 +344,13 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
331 if (btrfs_block_can_be_shared(root, buf)) { 344 if (btrfs_block_can_be_shared(root, buf)) {
332 ret = btrfs_lookup_extent_info(trans, root, buf->start, 345 ret = btrfs_lookup_extent_info(trans, root, buf->start,
333 buf->len, &refs, &flags); 346 buf->len, &refs, &flags);
334 BUG_ON(ret); 347 if (ret)
335 BUG_ON(refs == 0); 348 return ret;
349 if (refs == 0) {
350 ret = -EROFS;
351 btrfs_std_error(root->fs_info, ret);
352 return ret;
353 }
336 } else { 354 } else {
337 refs = 1; 355 refs = 1;
338 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID || 356 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
@@ -351,14 +369,14 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
351 root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) && 369 root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) &&
352 !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) { 370 !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) {
353 ret = btrfs_inc_ref(trans, root, buf, 1, 1); 371 ret = btrfs_inc_ref(trans, root, buf, 1, 1);
354 BUG_ON(ret); 372 BUG_ON(ret); /* -ENOMEM */
355 373
356 if (root->root_key.objectid == 374 if (root->root_key.objectid ==
357 BTRFS_TREE_RELOC_OBJECTID) { 375 BTRFS_TREE_RELOC_OBJECTID) {
358 ret = btrfs_dec_ref(trans, root, buf, 0, 1); 376 ret = btrfs_dec_ref(trans, root, buf, 0, 1);
359 BUG_ON(ret); 377 BUG_ON(ret); /* -ENOMEM */
360 ret = btrfs_inc_ref(trans, root, cow, 1, 1); 378 ret = btrfs_inc_ref(trans, root, cow, 1, 1);
361 BUG_ON(ret); 379 BUG_ON(ret); /* -ENOMEM */
362 } 380 }
363 new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; 381 new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
364 } else { 382 } else {
@@ -368,14 +386,15 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
368 ret = btrfs_inc_ref(trans, root, cow, 1, 1); 386 ret = btrfs_inc_ref(trans, root, cow, 1, 1);
369 else 387 else
370 ret = btrfs_inc_ref(trans, root, cow, 0, 1); 388 ret = btrfs_inc_ref(trans, root, cow, 0, 1);
371 BUG_ON(ret); 389 BUG_ON(ret); /* -ENOMEM */
372 } 390 }
373 if (new_flags != 0) { 391 if (new_flags != 0) {
374 ret = btrfs_set_disk_extent_flags(trans, root, 392 ret = btrfs_set_disk_extent_flags(trans, root,
375 buf->start, 393 buf->start,
376 buf->len, 394 buf->len,
377 new_flags, 0); 395 new_flags, 0);
378 BUG_ON(ret); 396 if (ret)
397 return ret;
379 } 398 }
380 } else { 399 } else {
381 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) { 400 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
@@ -384,9 +403,9 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
384 ret = btrfs_inc_ref(trans, root, cow, 1, 1); 403 ret = btrfs_inc_ref(trans, root, cow, 1, 1);
385 else 404 else
386 ret = btrfs_inc_ref(trans, root, cow, 0, 1); 405 ret = btrfs_inc_ref(trans, root, cow, 0, 1);
387 BUG_ON(ret); 406 BUG_ON(ret); /* -ENOMEM */
388 ret = btrfs_dec_ref(trans, root, buf, 1, 1); 407 ret = btrfs_dec_ref(trans, root, buf, 1, 1);
389 BUG_ON(ret); 408 BUG_ON(ret); /* -ENOMEM */
390 } 409 }
391 clean_tree_block(trans, root, buf); 410 clean_tree_block(trans, root, buf);
392 *last_ref = 1; 411 *last_ref = 1;
@@ -415,7 +434,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
415{ 434{
416 struct btrfs_disk_key disk_key; 435 struct btrfs_disk_key disk_key;
417 struct extent_buffer *cow; 436 struct extent_buffer *cow;
418 int level; 437 int level, ret;
419 int last_ref = 0; 438 int last_ref = 0;
420 int unlock_orig = 0; 439 int unlock_orig = 0;
421 u64 parent_start; 440 u64 parent_start;
@@ -467,7 +486,11 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
467 (unsigned long)btrfs_header_fsid(cow), 486 (unsigned long)btrfs_header_fsid(cow),
468 BTRFS_FSID_SIZE); 487 BTRFS_FSID_SIZE);
469 488
470 update_ref_for_cow(trans, root, buf, cow, &last_ref); 489 ret = update_ref_for_cow(trans, root, buf, cow, &last_ref);
490 if (ret) {
491 btrfs_abort_transaction(trans, root, ret);
492 return ret;
493 }
471 494
472 if (root->ref_cows) 495 if (root->ref_cows)
473 btrfs_reloc_cow_block(trans, root, buf, cow); 496 btrfs_reloc_cow_block(trans, root, buf, cow);
@@ -504,7 +527,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
504 } 527 }
505 if (unlock_orig) 528 if (unlock_orig)
506 btrfs_tree_unlock(buf); 529 btrfs_tree_unlock(buf);
507 free_extent_buffer(buf); 530 free_extent_buffer_stale(buf);
508 btrfs_mark_buffer_dirty(cow); 531 btrfs_mark_buffer_dirty(cow);
509 *cow_ret = cow; 532 *cow_ret = cow;
510 return 0; 533 return 0;
@@ -934,7 +957,12 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
934 957
935 /* promote the child to a root */ 958 /* promote the child to a root */
936 child = read_node_slot(root, mid, 0); 959 child = read_node_slot(root, mid, 0);
937 BUG_ON(!child); 960 if (!child) {
961 ret = -EROFS;
962 btrfs_std_error(root->fs_info, ret);
963 goto enospc;
964 }
965
938 btrfs_tree_lock(child); 966 btrfs_tree_lock(child);
939 btrfs_set_lock_blocking(child); 967 btrfs_set_lock_blocking(child);
940 ret = btrfs_cow_block(trans, root, child, mid, 0, &child); 968 ret = btrfs_cow_block(trans, root, child, mid, 0, &child);
@@ -959,7 +987,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
959 root_sub_used(root, mid->len); 987 root_sub_used(root, mid->len);
960 btrfs_free_tree_block(trans, root, mid, 0, 1, 0); 988 btrfs_free_tree_block(trans, root, mid, 0, 1, 0);
961 /* once for the root ptr */ 989 /* once for the root ptr */
962 free_extent_buffer(mid); 990 free_extent_buffer_stale(mid);
963 return 0; 991 return 0;
964 } 992 }
965 if (btrfs_header_nritems(mid) > 993 if (btrfs_header_nritems(mid) >
@@ -1010,13 +1038,10 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1010 if (btrfs_header_nritems(right) == 0) { 1038 if (btrfs_header_nritems(right) == 0) {
1011 clean_tree_block(trans, root, right); 1039 clean_tree_block(trans, root, right);
1012 btrfs_tree_unlock(right); 1040 btrfs_tree_unlock(right);
1013 wret = del_ptr(trans, root, path, level + 1, pslot + 1041 del_ptr(trans, root, path, level + 1, pslot + 1);
1014 1);
1015 if (wret)
1016 ret = wret;
1017 root_sub_used(root, right->len); 1042 root_sub_used(root, right->len);
1018 btrfs_free_tree_block(trans, root, right, 0, 1, 0); 1043 btrfs_free_tree_block(trans, root, right, 0, 1, 0);
1019 free_extent_buffer(right); 1044 free_extent_buffer_stale(right);
1020 right = NULL; 1045 right = NULL;
1021 } else { 1046 } else {
1022 struct btrfs_disk_key right_key; 1047 struct btrfs_disk_key right_key;
@@ -1035,7 +1060,11 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1035 * otherwise we would have pulled some pointers from the 1060 * otherwise we would have pulled some pointers from the
1036 * right 1061 * right
1037 */ 1062 */
1038 BUG_ON(!left); 1063 if (!left) {
1064 ret = -EROFS;
1065 btrfs_std_error(root->fs_info, ret);
1066 goto enospc;
1067 }
1039 wret = balance_node_right(trans, root, mid, left); 1068 wret = balance_node_right(trans, root, mid, left);
1040 if (wret < 0) { 1069 if (wret < 0) {
1041 ret = wret; 1070 ret = wret;
@@ -1051,12 +1080,10 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1051 if (btrfs_header_nritems(mid) == 0) { 1080 if (btrfs_header_nritems(mid) == 0) {
1052 clean_tree_block(trans, root, mid); 1081 clean_tree_block(trans, root, mid);
1053 btrfs_tree_unlock(mid); 1082 btrfs_tree_unlock(mid);
1054 wret = del_ptr(trans, root, path, level + 1, pslot); 1083 del_ptr(trans, root, path, level + 1, pslot);
1055 if (wret)
1056 ret = wret;
1057 root_sub_used(root, mid->len); 1084 root_sub_used(root, mid->len);
1058 btrfs_free_tree_block(trans, root, mid, 0, 1, 0); 1085 btrfs_free_tree_block(trans, root, mid, 0, 1, 0);
1059 free_extent_buffer(mid); 1086 free_extent_buffer_stale(mid);
1060 mid = NULL; 1087 mid = NULL;
1061 } else { 1088 } else {
1062 /* update the parent key to reflect our changes */ 1089 /* update the parent key to reflect our changes */
@@ -1382,7 +1409,8 @@ static noinline int reada_for_balance(struct btrfs_root *root,
1382 * if lowest_unlock is 1, level 0 won't be unlocked 1409 * if lowest_unlock is 1, level 0 won't be unlocked
1383 */ 1410 */
1384static noinline void unlock_up(struct btrfs_path *path, int level, 1411static noinline void unlock_up(struct btrfs_path *path, int level,
1385 int lowest_unlock) 1412 int lowest_unlock, int min_write_lock_level,
1413 int *write_lock_level)
1386{ 1414{
1387 int i; 1415 int i;
1388 int skip_level = level; 1416 int skip_level = level;
@@ -1414,6 +1442,11 @@ static noinline void unlock_up(struct btrfs_path *path, int level,
1414 if (i >= lowest_unlock && i > skip_level && path->locks[i]) { 1442 if (i >= lowest_unlock && i > skip_level && path->locks[i]) {
1415 btrfs_tree_unlock_rw(t, path->locks[i]); 1443 btrfs_tree_unlock_rw(t, path->locks[i]);
1416 path->locks[i] = 0; 1444 path->locks[i] = 0;
1445 if (write_lock_level &&
1446 i > min_write_lock_level &&
1447 i <= *write_lock_level) {
1448 *write_lock_level = i - 1;
1449 }
1417 } 1450 }
1418 } 1451 }
1419} 1452}
@@ -1637,6 +1670,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
1637 /* everything at write_lock_level or lower must be write locked */ 1670 /* everything at write_lock_level or lower must be write locked */
1638 int write_lock_level = 0; 1671 int write_lock_level = 0;
1639 u8 lowest_level = 0; 1672 u8 lowest_level = 0;
1673 int min_write_lock_level;
1640 1674
1641 lowest_level = p->lowest_level; 1675 lowest_level = p->lowest_level;
1642 WARN_ON(lowest_level && ins_len > 0); 1676 WARN_ON(lowest_level && ins_len > 0);
@@ -1664,6 +1698,8 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
1664 if (cow && (p->keep_locks || p->lowest_level)) 1698 if (cow && (p->keep_locks || p->lowest_level))
1665 write_lock_level = BTRFS_MAX_LEVEL; 1699 write_lock_level = BTRFS_MAX_LEVEL;
1666 1700
1701 min_write_lock_level = write_lock_level;
1702
1667again: 1703again:
1668 /* 1704 /*
1669 * we try very hard to do read locks on the root 1705 * we try very hard to do read locks on the root
@@ -1795,7 +1831,8 @@ cow_done:
1795 goto again; 1831 goto again;
1796 } 1832 }
1797 1833
1798 unlock_up(p, level, lowest_unlock); 1834 unlock_up(p, level, lowest_unlock,
1835 min_write_lock_level, &write_lock_level);
1799 1836
1800 if (level == lowest_level) { 1837 if (level == lowest_level) {
1801 if (dec) 1838 if (dec)
@@ -1857,7 +1894,8 @@ cow_done:
1857 } 1894 }
1858 } 1895 }
1859 if (!p->search_for_split) 1896 if (!p->search_for_split)
1860 unlock_up(p, level, lowest_unlock); 1897 unlock_up(p, level, lowest_unlock,
1898 min_write_lock_level, &write_lock_level);
1861 goto done; 1899 goto done;
1862 } 1900 }
1863 } 1901 }
@@ -1881,15 +1919,12 @@ done:
1881 * fixing up pointers when a given leaf/node is not in slot 0 of the 1919 * fixing up pointers when a given leaf/node is not in slot 0 of the
1882 * higher levels 1920 * higher levels
1883 * 1921 *
1884 * If this fails to write a tree block, it returns -1, but continues
1885 * fixing up the blocks in ram so the tree is consistent.
1886 */ 1922 */
1887static int fixup_low_keys(struct btrfs_trans_handle *trans, 1923static void fixup_low_keys(struct btrfs_trans_handle *trans,
1888 struct btrfs_root *root, struct btrfs_path *path, 1924 struct btrfs_root *root, struct btrfs_path *path,
1889 struct btrfs_disk_key *key, int level) 1925 struct btrfs_disk_key *key, int level)
1890{ 1926{
1891 int i; 1927 int i;
1892 int ret = 0;
1893 struct extent_buffer *t; 1928 struct extent_buffer *t;
1894 1929
1895 for (i = level; i < BTRFS_MAX_LEVEL; i++) { 1930 for (i = level; i < BTRFS_MAX_LEVEL; i++) {
@@ -1902,7 +1937,6 @@ static int fixup_low_keys(struct btrfs_trans_handle *trans,
1902 if (tslot != 0) 1937 if (tslot != 0)
1903 break; 1938 break;
1904 } 1939 }
1905 return ret;
1906} 1940}
1907 1941
1908/* 1942/*
@@ -1911,9 +1945,9 @@ static int fixup_low_keys(struct btrfs_trans_handle *trans,
1911 * This function isn't completely safe. It's the caller's responsibility 1945 * This function isn't completely safe. It's the caller's responsibility
1912 * that the new key won't break the order 1946 * that the new key won't break the order
1913 */ 1947 */
1914int btrfs_set_item_key_safe(struct btrfs_trans_handle *trans, 1948void btrfs_set_item_key_safe(struct btrfs_trans_handle *trans,
1915 struct btrfs_root *root, struct btrfs_path *path, 1949 struct btrfs_root *root, struct btrfs_path *path,
1916 struct btrfs_key *new_key) 1950 struct btrfs_key *new_key)
1917{ 1951{
1918 struct btrfs_disk_key disk_key; 1952 struct btrfs_disk_key disk_key;
1919 struct extent_buffer *eb; 1953 struct extent_buffer *eb;
@@ -1923,13 +1957,11 @@ int btrfs_set_item_key_safe(struct btrfs_trans_handle *trans,
1923 slot = path->slots[0]; 1957 slot = path->slots[0];
1924 if (slot > 0) { 1958 if (slot > 0) {
1925 btrfs_item_key(eb, &disk_key, slot - 1); 1959 btrfs_item_key(eb, &disk_key, slot - 1);
1926 if (comp_keys(&disk_key, new_key) >= 0) 1960 BUG_ON(comp_keys(&disk_key, new_key) >= 0);
1927 return -1;
1928 } 1961 }
1929 if (slot < btrfs_header_nritems(eb) - 1) { 1962 if (slot < btrfs_header_nritems(eb) - 1) {
1930 btrfs_item_key(eb, &disk_key, slot + 1); 1963 btrfs_item_key(eb, &disk_key, slot + 1);
1931 if (comp_keys(&disk_key, new_key) <= 0) 1964 BUG_ON(comp_keys(&disk_key, new_key) <= 0);
1932 return -1;
1933 } 1965 }
1934 1966
1935 btrfs_cpu_key_to_disk(&disk_key, new_key); 1967 btrfs_cpu_key_to_disk(&disk_key, new_key);
@@ -1937,7 +1969,6 @@ int btrfs_set_item_key_safe(struct btrfs_trans_handle *trans,
1937 btrfs_mark_buffer_dirty(eb); 1969 btrfs_mark_buffer_dirty(eb);
1938 if (slot == 0) 1970 if (slot == 0)
1939 fixup_low_keys(trans, root, path, &disk_key, 1); 1971 fixup_low_keys(trans, root, path, &disk_key, 1);
1940 return 0;
1941} 1972}
1942 1973
1943/* 1974/*
@@ -2140,12 +2171,11 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
2140 * 2171 *
2141 * slot and level indicate where you want the key to go, and 2172 * slot and level indicate where you want the key to go, and
2142 * blocknr is the block the key points to. 2173 * blocknr is the block the key points to.
2143 *
2144 * returns zero on success and < 0 on any error
2145 */ 2174 */
2146static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root 2175static void insert_ptr(struct btrfs_trans_handle *trans,
2147 *root, struct btrfs_path *path, struct btrfs_disk_key 2176 struct btrfs_root *root, struct btrfs_path *path,
2148 *key, u64 bytenr, int slot, int level) 2177 struct btrfs_disk_key *key, u64 bytenr,
2178 int slot, int level)
2149{ 2179{
2150 struct extent_buffer *lower; 2180 struct extent_buffer *lower;
2151 int nritems; 2181 int nritems;
@@ -2155,8 +2185,7 @@ static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root
2155 lower = path->nodes[level]; 2185 lower = path->nodes[level];
2156 nritems = btrfs_header_nritems(lower); 2186 nritems = btrfs_header_nritems(lower);
2157 BUG_ON(slot > nritems); 2187 BUG_ON(slot > nritems);
2158 if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root)) 2188 BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(root));
2159 BUG();
2160 if (slot != nritems) { 2189 if (slot != nritems) {
2161 memmove_extent_buffer(lower, 2190 memmove_extent_buffer(lower,
2162 btrfs_node_key_ptr_offset(slot + 1), 2191 btrfs_node_key_ptr_offset(slot + 1),
@@ -2169,7 +2198,6 @@ static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root
2169 btrfs_set_node_ptr_generation(lower, slot, trans->transid); 2198 btrfs_set_node_ptr_generation(lower, slot, trans->transid);
2170 btrfs_set_header_nritems(lower, nritems + 1); 2199 btrfs_set_header_nritems(lower, nritems + 1);
2171 btrfs_mark_buffer_dirty(lower); 2200 btrfs_mark_buffer_dirty(lower);
2172 return 0;
2173} 2201}
2174 2202
2175/* 2203/*
@@ -2190,7 +2218,6 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
2190 struct btrfs_disk_key disk_key; 2218 struct btrfs_disk_key disk_key;
2191 int mid; 2219 int mid;
2192 int ret; 2220 int ret;
2193 int wret;
2194 u32 c_nritems; 2221 u32 c_nritems;
2195 2222
2196 c = path->nodes[level]; 2223 c = path->nodes[level];
@@ -2247,11 +2274,8 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
2247 btrfs_mark_buffer_dirty(c); 2274 btrfs_mark_buffer_dirty(c);
2248 btrfs_mark_buffer_dirty(split); 2275 btrfs_mark_buffer_dirty(split);
2249 2276
2250 wret = insert_ptr(trans, root, path, &disk_key, split->start, 2277 insert_ptr(trans, root, path, &disk_key, split->start,
2251 path->slots[level + 1] + 1, 2278 path->slots[level + 1] + 1, level + 1);
2252 level + 1);
2253 if (wret)
2254 ret = wret;
2255 2279
2256 if (path->slots[level] >= mid) { 2280 if (path->slots[level] >= mid) {
2257 path->slots[level] -= mid; 2281 path->slots[level] -= mid;
@@ -2320,6 +2344,7 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
2320{ 2344{
2321 struct extent_buffer *left = path->nodes[0]; 2345 struct extent_buffer *left = path->nodes[0];
2322 struct extent_buffer *upper = path->nodes[1]; 2346 struct extent_buffer *upper = path->nodes[1];
2347 struct btrfs_map_token token;
2323 struct btrfs_disk_key disk_key; 2348 struct btrfs_disk_key disk_key;
2324 int slot; 2349 int slot;
2325 u32 i; 2350 u32 i;
@@ -2331,6 +2356,8 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
2331 u32 data_end; 2356 u32 data_end;
2332 u32 this_item_size; 2357 u32 this_item_size;
2333 2358
2359 btrfs_init_map_token(&token);
2360
2334 if (empty) 2361 if (empty)
2335 nr = 0; 2362 nr = 0;
2336 else 2363 else
@@ -2408,8 +2435,8 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
2408 push_space = BTRFS_LEAF_DATA_SIZE(root); 2435 push_space = BTRFS_LEAF_DATA_SIZE(root);
2409 for (i = 0; i < right_nritems; i++) { 2436 for (i = 0; i < right_nritems; i++) {
2410 item = btrfs_item_nr(right, i); 2437 item = btrfs_item_nr(right, i);
2411 push_space -= btrfs_item_size(right, item); 2438 push_space -= btrfs_token_item_size(right, item, &token);
2412 btrfs_set_item_offset(right, item, push_space); 2439 btrfs_set_token_item_offset(right, item, push_space, &token);
2413 } 2440 }
2414 2441
2415 left_nritems -= push_items; 2442 left_nritems -= push_items;
@@ -2537,9 +2564,11 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
2537 u32 old_left_nritems; 2564 u32 old_left_nritems;
2538 u32 nr; 2565 u32 nr;
2539 int ret = 0; 2566 int ret = 0;
2540 int wret;
2541 u32 this_item_size; 2567 u32 this_item_size;
2542 u32 old_left_item_size; 2568 u32 old_left_item_size;
2569 struct btrfs_map_token token;
2570
2571 btrfs_init_map_token(&token);
2543 2572
2544 if (empty) 2573 if (empty)
2545 nr = min(right_nritems, max_slot); 2574 nr = min(right_nritems, max_slot);
@@ -2600,9 +2629,10 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
2600 2629
2601 item = btrfs_item_nr(left, i); 2630 item = btrfs_item_nr(left, i);
2602 2631
2603 ioff = btrfs_item_offset(left, item); 2632 ioff = btrfs_token_item_offset(left, item, &token);
2604 btrfs_set_item_offset(left, item, 2633 btrfs_set_token_item_offset(left, item,
2605 ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size)); 2634 ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size),
2635 &token);
2606 } 2636 }
2607 btrfs_set_header_nritems(left, old_left_nritems + push_items); 2637 btrfs_set_header_nritems(left, old_left_nritems + push_items);
2608 2638
@@ -2632,8 +2662,9 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
2632 for (i = 0; i < right_nritems; i++) { 2662 for (i = 0; i < right_nritems; i++) {
2633 item = btrfs_item_nr(right, i); 2663 item = btrfs_item_nr(right, i);
2634 2664
2635 push_space = push_space - btrfs_item_size(right, item); 2665 push_space = push_space - btrfs_token_item_size(right,
2636 btrfs_set_item_offset(right, item, push_space); 2666 item, &token);
2667 btrfs_set_token_item_offset(right, item, push_space, &token);
2637 } 2668 }
2638 2669
2639 btrfs_mark_buffer_dirty(left); 2670 btrfs_mark_buffer_dirty(left);
@@ -2643,9 +2674,7 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
2643 clean_tree_block(trans, root, right); 2674 clean_tree_block(trans, root, right);
2644 2675
2645 btrfs_item_key(right, &disk_key, 0); 2676 btrfs_item_key(right, &disk_key, 0);
2646 wret = fixup_low_keys(trans, root, path, &disk_key, 1); 2677 fixup_low_keys(trans, root, path, &disk_key, 1);
2647 if (wret)
2648 ret = wret;
2649 2678
2650 /* then fixup the leaf pointer in the path */ 2679 /* then fixup the leaf pointer in the path */
2651 if (path->slots[0] < push_items) { 2680 if (path->slots[0] < push_items) {
@@ -2716,7 +2745,8 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
2716 path->nodes[1], slot - 1, &left); 2745 path->nodes[1], slot - 1, &left);
2717 if (ret) { 2746 if (ret) {
2718 /* we hit -ENOSPC, but it isn't fatal here */ 2747 /* we hit -ENOSPC, but it isn't fatal here */
2719 ret = 1; 2748 if (ret == -ENOSPC)
2749 ret = 1;
2720 goto out; 2750 goto out;
2721 } 2751 }
2722 2752
@@ -2738,22 +2768,21 @@ out:
2738/* 2768/*
2739 * split the path's leaf in two, making sure there is at least data_size 2769 * split the path's leaf in two, making sure there is at least data_size
2740 * available for the resulting leaf level of the path. 2770 * available for the resulting leaf level of the path.
2741 *
2742 * returns 0 if all went well and < 0 on failure.
2743 */ 2771 */
2744static noinline int copy_for_split(struct btrfs_trans_handle *trans, 2772static noinline void copy_for_split(struct btrfs_trans_handle *trans,
2745 struct btrfs_root *root, 2773 struct btrfs_root *root,
2746 struct btrfs_path *path, 2774 struct btrfs_path *path,
2747 struct extent_buffer *l, 2775 struct extent_buffer *l,
2748 struct extent_buffer *right, 2776 struct extent_buffer *right,
2749 int slot, int mid, int nritems) 2777 int slot, int mid, int nritems)
2750{ 2778{
2751 int data_copy_size; 2779 int data_copy_size;
2752 int rt_data_off; 2780 int rt_data_off;
2753 int i; 2781 int i;
2754 int ret = 0;
2755 int wret;
2756 struct btrfs_disk_key disk_key; 2782 struct btrfs_disk_key disk_key;
2783 struct btrfs_map_token token;
2784
2785 btrfs_init_map_token(&token);
2757 2786
2758 nritems = nritems - mid; 2787 nritems = nritems - mid;
2759 btrfs_set_header_nritems(right, nritems); 2788 btrfs_set_header_nritems(right, nritems);
@@ -2775,17 +2804,15 @@ static noinline int copy_for_split(struct btrfs_trans_handle *trans,
2775 struct btrfs_item *item = btrfs_item_nr(right, i); 2804 struct btrfs_item *item = btrfs_item_nr(right, i);
2776 u32 ioff; 2805 u32 ioff;
2777 2806
2778 ioff = btrfs_item_offset(right, item); 2807 ioff = btrfs_token_item_offset(right, item, &token);
2779 btrfs_set_item_offset(right, item, ioff + rt_data_off); 2808 btrfs_set_token_item_offset(right, item,
2809 ioff + rt_data_off, &token);
2780 } 2810 }
2781 2811
2782 btrfs_set_header_nritems(l, mid); 2812 btrfs_set_header_nritems(l, mid);
2783 ret = 0;
2784 btrfs_item_key(right, &disk_key, 0); 2813 btrfs_item_key(right, &disk_key, 0);
2785 wret = insert_ptr(trans, root, path, &disk_key, right->start, 2814 insert_ptr(trans, root, path, &disk_key, right->start,
2786 path->slots[1] + 1, 1); 2815 path->slots[1] + 1, 1);
2787 if (wret)
2788 ret = wret;
2789 2816
2790 btrfs_mark_buffer_dirty(right); 2817 btrfs_mark_buffer_dirty(right);
2791 btrfs_mark_buffer_dirty(l); 2818 btrfs_mark_buffer_dirty(l);
@@ -2803,8 +2830,6 @@ static noinline int copy_for_split(struct btrfs_trans_handle *trans,
2803 } 2830 }
2804 2831
2805 BUG_ON(path->slots[0] < 0); 2832 BUG_ON(path->slots[0] < 0);
2806
2807 return ret;
2808} 2833}
2809 2834
2810/* 2835/*
@@ -2993,12 +3018,8 @@ again:
2993 if (split == 0) { 3018 if (split == 0) {
2994 if (mid <= slot) { 3019 if (mid <= slot) {
2995 btrfs_set_header_nritems(right, 0); 3020 btrfs_set_header_nritems(right, 0);
2996 wret = insert_ptr(trans, root, path, 3021 insert_ptr(trans, root, path, &disk_key, right->start,
2997 &disk_key, right->start, 3022 path->slots[1] + 1, 1);
2998 path->slots[1] + 1, 1);
2999 if (wret)
3000 ret = wret;
3001
3002 btrfs_tree_unlock(path->nodes[0]); 3023 btrfs_tree_unlock(path->nodes[0]);
3003 free_extent_buffer(path->nodes[0]); 3024 free_extent_buffer(path->nodes[0]);
3004 path->nodes[0] = right; 3025 path->nodes[0] = right;
@@ -3006,29 +3027,21 @@ again:
3006 path->slots[1] += 1; 3027 path->slots[1] += 1;
3007 } else { 3028 } else {
3008 btrfs_set_header_nritems(right, 0); 3029 btrfs_set_header_nritems(right, 0);
3009 wret = insert_ptr(trans, root, path, 3030 insert_ptr(trans, root, path, &disk_key, right->start,
3010 &disk_key,
3011 right->start,
3012 path->slots[1], 1); 3031 path->slots[1], 1);
3013 if (wret)
3014 ret = wret;
3015 btrfs_tree_unlock(path->nodes[0]); 3032 btrfs_tree_unlock(path->nodes[0]);
3016 free_extent_buffer(path->nodes[0]); 3033 free_extent_buffer(path->nodes[0]);
3017 path->nodes[0] = right; 3034 path->nodes[0] = right;
3018 path->slots[0] = 0; 3035 path->slots[0] = 0;
3019 if (path->slots[1] == 0) { 3036 if (path->slots[1] == 0)
3020 wret = fixup_low_keys(trans, root, 3037 fixup_low_keys(trans, root, path,
3021 path, &disk_key, 1); 3038 &disk_key, 1);
3022 if (wret)
3023 ret = wret;
3024 }
3025 } 3039 }
3026 btrfs_mark_buffer_dirty(right); 3040 btrfs_mark_buffer_dirty(right);
3027 return ret; 3041 return ret;
3028 } 3042 }
3029 3043
3030 ret = copy_for_split(trans, root, path, l, right, slot, mid, nritems); 3044 copy_for_split(trans, root, path, l, right, slot, mid, nritems);
3031 BUG_ON(ret);
3032 3045
3033 if (split == 2) { 3046 if (split == 2) {
3034 BUG_ON(num_doubles != 0); 3047 BUG_ON(num_doubles != 0);
@@ -3036,7 +3049,7 @@ again:
3036 goto again; 3049 goto again;
3037 } 3050 }
3038 3051
3039 return ret; 3052 return 0;
3040 3053
3041push_for_double: 3054push_for_double:
3042 push_for_double_split(trans, root, path, data_size); 3055 push_for_double_split(trans, root, path, data_size);
@@ -3238,11 +3251,9 @@ int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
3238 return ret; 3251 return ret;
3239 3252
3240 path->slots[0]++; 3253 path->slots[0]++;
3241 ret = setup_items_for_insert(trans, root, path, new_key, &item_size, 3254 setup_items_for_insert(trans, root, path, new_key, &item_size,
3242 item_size, item_size + 3255 item_size, item_size +
3243 sizeof(struct btrfs_item), 1); 3256 sizeof(struct btrfs_item), 1);
3244 BUG_ON(ret);
3245
3246 leaf = path->nodes[0]; 3257 leaf = path->nodes[0];
3247 memcpy_extent_buffer(leaf, 3258 memcpy_extent_buffer(leaf,
3248 btrfs_item_ptr_offset(leaf, path->slots[0]), 3259 btrfs_item_ptr_offset(leaf, path->slots[0]),
@@ -3257,10 +3268,10 @@ int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
3257 * off the end of the item or if we shift the item to chop bytes off 3268 * off the end of the item or if we shift the item to chop bytes off
3258 * the front. 3269 * the front.
3259 */ 3270 */
3260int btrfs_truncate_item(struct btrfs_trans_handle *trans, 3271void btrfs_truncate_item(struct btrfs_trans_handle *trans,
3261 struct btrfs_root *root, 3272 struct btrfs_root *root,
3262 struct btrfs_path *path, 3273 struct btrfs_path *path,
3263 u32 new_size, int from_end) 3274 u32 new_size, int from_end)
3264{ 3275{
3265 int slot; 3276 int slot;
3266 struct extent_buffer *leaf; 3277 struct extent_buffer *leaf;
@@ -3271,13 +3282,16 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans,
3271 unsigned int old_size; 3282 unsigned int old_size;
3272 unsigned int size_diff; 3283 unsigned int size_diff;
3273 int i; 3284 int i;
3285 struct btrfs_map_token token;
3286
3287 btrfs_init_map_token(&token);
3274 3288
3275 leaf = path->nodes[0]; 3289 leaf = path->nodes[0];
3276 slot = path->slots[0]; 3290 slot = path->slots[0];
3277 3291
3278 old_size = btrfs_item_size_nr(leaf, slot); 3292 old_size = btrfs_item_size_nr(leaf, slot);
3279 if (old_size == new_size) 3293 if (old_size == new_size)
3280 return 0; 3294 return;
3281 3295
3282 nritems = btrfs_header_nritems(leaf); 3296 nritems = btrfs_header_nritems(leaf);
3283 data_end = leaf_data_end(root, leaf); 3297 data_end = leaf_data_end(root, leaf);
@@ -3297,8 +3311,9 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans,
3297 u32 ioff; 3311 u32 ioff;
3298 item = btrfs_item_nr(leaf, i); 3312 item = btrfs_item_nr(leaf, i);
3299 3313
3300 ioff = btrfs_item_offset(leaf, item); 3314 ioff = btrfs_token_item_offset(leaf, item, &token);
3301 btrfs_set_item_offset(leaf, item, ioff + size_diff); 3315 btrfs_set_token_item_offset(leaf, item,
3316 ioff + size_diff, &token);
3302 } 3317 }
3303 3318
3304 /* shift the data */ 3319 /* shift the data */
@@ -3350,15 +3365,14 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans,
3350 btrfs_print_leaf(root, leaf); 3365 btrfs_print_leaf(root, leaf);
3351 BUG(); 3366 BUG();
3352 } 3367 }
3353 return 0;
3354} 3368}
3355 3369
3356/* 3370/*
3357 * make the item pointed to by the path bigger, data_size is the new size. 3371 * make the item pointed to by the path bigger, data_size is the new size.
3358 */ 3372 */
3359int btrfs_extend_item(struct btrfs_trans_handle *trans, 3373void btrfs_extend_item(struct btrfs_trans_handle *trans,
3360 struct btrfs_root *root, struct btrfs_path *path, 3374 struct btrfs_root *root, struct btrfs_path *path,
3361 u32 data_size) 3375 u32 data_size)
3362{ 3376{
3363 int slot; 3377 int slot;
3364 struct extent_buffer *leaf; 3378 struct extent_buffer *leaf;
@@ -3368,6 +3382,9 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans,
3368 unsigned int old_data; 3382 unsigned int old_data;
3369 unsigned int old_size; 3383 unsigned int old_size;
3370 int i; 3384 int i;
3385 struct btrfs_map_token token;
3386
3387 btrfs_init_map_token(&token);
3371 3388
3372 leaf = path->nodes[0]; 3389 leaf = path->nodes[0];
3373 3390
@@ -3397,8 +3414,9 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans,
3397 u32 ioff; 3414 u32 ioff;
3398 item = btrfs_item_nr(leaf, i); 3415 item = btrfs_item_nr(leaf, i);
3399 3416
3400 ioff = btrfs_item_offset(leaf, item); 3417 ioff = btrfs_token_item_offset(leaf, item, &token);
3401 btrfs_set_item_offset(leaf, item, ioff - data_size); 3418 btrfs_set_token_item_offset(leaf, item,
3419 ioff - data_size, &token);
3402 } 3420 }
3403 3421
3404 /* shift the data */ 3422 /* shift the data */
@@ -3416,7 +3434,6 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans,
3416 btrfs_print_leaf(root, leaf); 3434 btrfs_print_leaf(root, leaf);
3417 BUG(); 3435 BUG();
3418 } 3436 }
3419 return 0;
3420} 3437}
3421 3438
3422/* 3439/*
@@ -3441,6 +3458,9 @@ int btrfs_insert_some_items(struct btrfs_trans_handle *trans,
3441 unsigned int data_end; 3458 unsigned int data_end;
3442 struct btrfs_disk_key disk_key; 3459 struct btrfs_disk_key disk_key;
3443 struct btrfs_key found_key; 3460 struct btrfs_key found_key;
3461 struct btrfs_map_token token;
3462
3463 btrfs_init_map_token(&token);
3444 3464
3445 for (i = 0; i < nr; i++) { 3465 for (i = 0; i < nr; i++) {
3446 if (total_size + data_size[i] + sizeof(struct btrfs_item) > 3466 if (total_size + data_size[i] + sizeof(struct btrfs_item) >
@@ -3506,8 +3526,9 @@ int btrfs_insert_some_items(struct btrfs_trans_handle *trans,
3506 u32 ioff; 3526 u32 ioff;
3507 3527
3508 item = btrfs_item_nr(leaf, i); 3528 item = btrfs_item_nr(leaf, i);
3509 ioff = btrfs_item_offset(leaf, item); 3529 ioff = btrfs_token_item_offset(leaf, item, &token);
3510 btrfs_set_item_offset(leaf, item, ioff - total_data); 3530 btrfs_set_token_item_offset(leaf, item,
3531 ioff - total_data, &token);
3511 } 3532 }
3512 /* shift the items */ 3533 /* shift the items */
3513 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr), 3534 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr),
@@ -3534,9 +3555,10 @@ int btrfs_insert_some_items(struct btrfs_trans_handle *trans,
3534 btrfs_cpu_key_to_disk(&disk_key, cpu_key + i); 3555 btrfs_cpu_key_to_disk(&disk_key, cpu_key + i);
3535 btrfs_set_item_key(leaf, &disk_key, slot + i); 3556 btrfs_set_item_key(leaf, &disk_key, slot + i);
3536 item = btrfs_item_nr(leaf, slot + i); 3557 item = btrfs_item_nr(leaf, slot + i);
3537 btrfs_set_item_offset(leaf, item, data_end - data_size[i]); 3558 btrfs_set_token_item_offset(leaf, item,
3559 data_end - data_size[i], &token);
3538 data_end -= data_size[i]; 3560 data_end -= data_size[i];
3539 btrfs_set_item_size(leaf, item, data_size[i]); 3561 btrfs_set_token_item_size(leaf, item, data_size[i], &token);
3540 } 3562 }
3541 btrfs_set_header_nritems(leaf, nritems + nr); 3563 btrfs_set_header_nritems(leaf, nritems + nr);
3542 btrfs_mark_buffer_dirty(leaf); 3564 btrfs_mark_buffer_dirty(leaf);
@@ -3544,7 +3566,7 @@ int btrfs_insert_some_items(struct btrfs_trans_handle *trans,
3544 ret = 0; 3566 ret = 0;
3545 if (slot == 0) { 3567 if (slot == 0) {
3546 btrfs_cpu_key_to_disk(&disk_key, cpu_key); 3568 btrfs_cpu_key_to_disk(&disk_key, cpu_key);
3547 ret = fixup_low_keys(trans, root, path, &disk_key, 1); 3569 fixup_low_keys(trans, root, path, &disk_key, 1);
3548 } 3570 }
3549 3571
3550 if (btrfs_leaf_free_space(root, leaf) < 0) { 3572 if (btrfs_leaf_free_space(root, leaf) < 0) {
@@ -3562,19 +3584,21 @@ out:
3562 * to save stack depth by doing the bulk of the work in a function 3584 * to save stack depth by doing the bulk of the work in a function
3563 * that doesn't call btrfs_search_slot 3585 * that doesn't call btrfs_search_slot
3564 */ 3586 */
3565int setup_items_for_insert(struct btrfs_trans_handle *trans, 3587void setup_items_for_insert(struct btrfs_trans_handle *trans,
3566 struct btrfs_root *root, struct btrfs_path *path, 3588 struct btrfs_root *root, struct btrfs_path *path,
3567 struct btrfs_key *cpu_key, u32 *data_size, 3589 struct btrfs_key *cpu_key, u32 *data_size,
3568 u32 total_data, u32 total_size, int nr) 3590 u32 total_data, u32 total_size, int nr)
3569{ 3591{
3570 struct btrfs_item *item; 3592 struct btrfs_item *item;
3571 int i; 3593 int i;
3572 u32 nritems; 3594 u32 nritems;
3573 unsigned int data_end; 3595 unsigned int data_end;
3574 struct btrfs_disk_key disk_key; 3596 struct btrfs_disk_key disk_key;
3575 int ret;
3576 struct extent_buffer *leaf; 3597 struct extent_buffer *leaf;
3577 int slot; 3598 int slot;
3599 struct btrfs_map_token token;
3600
3601 btrfs_init_map_token(&token);
3578 3602
3579 leaf = path->nodes[0]; 3603 leaf = path->nodes[0];
3580 slot = path->slots[0]; 3604 slot = path->slots[0];
@@ -3606,8 +3630,9 @@ int setup_items_for_insert(struct btrfs_trans_handle *trans,
3606 u32 ioff; 3630 u32 ioff;
3607 3631
3608 item = btrfs_item_nr(leaf, i); 3632 item = btrfs_item_nr(leaf, i);
3609 ioff = btrfs_item_offset(leaf, item); 3633 ioff = btrfs_token_item_offset(leaf, item, &token);
3610 btrfs_set_item_offset(leaf, item, ioff - total_data); 3634 btrfs_set_token_item_offset(leaf, item,
3635 ioff - total_data, &token);
3611 } 3636 }
3612 /* shift the items */ 3637 /* shift the items */
3613 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr), 3638 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr),
@@ -3626,17 +3651,17 @@ int setup_items_for_insert(struct btrfs_trans_handle *trans,
3626 btrfs_cpu_key_to_disk(&disk_key, cpu_key + i); 3651 btrfs_cpu_key_to_disk(&disk_key, cpu_key + i);
3627 btrfs_set_item_key(leaf, &disk_key, slot + i); 3652 btrfs_set_item_key(leaf, &disk_key, slot + i);
3628 item = btrfs_item_nr(leaf, slot + i); 3653 item = btrfs_item_nr(leaf, slot + i);
3629 btrfs_set_item_offset(leaf, item, data_end - data_size[i]); 3654 btrfs_set_token_item_offset(leaf, item,
3655 data_end - data_size[i], &token);
3630 data_end -= data_size[i]; 3656 data_end -= data_size[i];
3631 btrfs_set_item_size(leaf, item, data_size[i]); 3657 btrfs_set_token_item_size(leaf, item, data_size[i], &token);
3632 } 3658 }
3633 3659
3634 btrfs_set_header_nritems(leaf, nritems + nr); 3660 btrfs_set_header_nritems(leaf, nritems + nr);
3635 3661
3636 ret = 0;
3637 if (slot == 0) { 3662 if (slot == 0) {
3638 btrfs_cpu_key_to_disk(&disk_key, cpu_key); 3663 btrfs_cpu_key_to_disk(&disk_key, cpu_key);
3639 ret = fixup_low_keys(trans, root, path, &disk_key, 1); 3664 fixup_low_keys(trans, root, path, &disk_key, 1);
3640 } 3665 }
3641 btrfs_unlock_up_safe(path, 1); 3666 btrfs_unlock_up_safe(path, 1);
3642 btrfs_mark_buffer_dirty(leaf); 3667 btrfs_mark_buffer_dirty(leaf);
@@ -3645,7 +3670,6 @@ int setup_items_for_insert(struct btrfs_trans_handle *trans,
3645 btrfs_print_leaf(root, leaf); 3670 btrfs_print_leaf(root, leaf);
3646 BUG(); 3671 BUG();
3647 } 3672 }
3648 return ret;
3649} 3673}
3650 3674
3651/* 3675/*
@@ -3672,16 +3696,14 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
3672 if (ret == 0) 3696 if (ret == 0)
3673 return -EEXIST; 3697 return -EEXIST;
3674 if (ret < 0) 3698 if (ret < 0)
3675 goto out; 3699 return ret;
3676 3700
3677 slot = path->slots[0]; 3701 slot = path->slots[0];
3678 BUG_ON(slot < 0); 3702 BUG_ON(slot < 0);
3679 3703
3680 ret = setup_items_for_insert(trans, root, path, cpu_key, data_size, 3704 setup_items_for_insert(trans, root, path, cpu_key, data_size,
3681 total_data, total_size, nr); 3705 total_data, total_size, nr);
3682 3706 return 0;
3683out:
3684 return ret;
3685} 3707}
3686 3708
3687/* 3709/*
@@ -3717,13 +3739,11 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
3717 * the tree should have been previously balanced so the deletion does not 3739 * the tree should have been previously balanced so the deletion does not
3718 * empty a node. 3740 * empty a node.
3719 */ 3741 */
3720static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, 3742static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3721 struct btrfs_path *path, int level, int slot) 3743 struct btrfs_path *path, int level, int slot)
3722{ 3744{
3723 struct extent_buffer *parent = path->nodes[level]; 3745 struct extent_buffer *parent = path->nodes[level];
3724 u32 nritems; 3746 u32 nritems;
3725 int ret = 0;
3726 int wret;
3727 3747
3728 nritems = btrfs_header_nritems(parent); 3748 nritems = btrfs_header_nritems(parent);
3729 if (slot != nritems - 1) { 3749 if (slot != nritems - 1) {
@@ -3743,12 +3763,9 @@ static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3743 struct btrfs_disk_key disk_key; 3763 struct btrfs_disk_key disk_key;
3744 3764
3745 btrfs_node_key(parent, &disk_key, 0); 3765 btrfs_node_key(parent, &disk_key, 0);
3746 wret = fixup_low_keys(trans, root, path, &disk_key, level + 1); 3766 fixup_low_keys(trans, root, path, &disk_key, level + 1);
3747 if (wret)
3748 ret = wret;
3749 } 3767 }
3750 btrfs_mark_buffer_dirty(parent); 3768 btrfs_mark_buffer_dirty(parent);
3751 return ret;
3752} 3769}
3753 3770
3754/* 3771/*
@@ -3761,17 +3778,13 @@ static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3761 * The path must have already been setup for deleting the leaf, including 3778 * The path must have already been setup for deleting the leaf, including
3762 * all the proper balancing. path->nodes[1] must be locked. 3779 * all the proper balancing. path->nodes[1] must be locked.
3763 */ 3780 */
3764static noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans, 3781static noinline void btrfs_del_leaf(struct btrfs_trans_handle *trans,
3765 struct btrfs_root *root, 3782 struct btrfs_root *root,
3766 struct btrfs_path *path, 3783 struct btrfs_path *path,
3767 struct extent_buffer *leaf) 3784 struct extent_buffer *leaf)
3768{ 3785{
3769 int ret;
3770
3771 WARN_ON(btrfs_header_generation(leaf) != trans->transid); 3786 WARN_ON(btrfs_header_generation(leaf) != trans->transid);
3772 ret = del_ptr(trans, root, path, 1, path->slots[1]); 3787 del_ptr(trans, root, path, 1, path->slots[1]);
3773 if (ret)
3774 return ret;
3775 3788
3776 /* 3789 /*
3777 * btrfs_free_extent is expensive, we want to make sure we 3790 * btrfs_free_extent is expensive, we want to make sure we
@@ -3781,8 +3794,9 @@ static noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans,
3781 3794
3782 root_sub_used(root, leaf->len); 3795 root_sub_used(root, leaf->len);
3783 3796
3797 extent_buffer_get(leaf);
3784 btrfs_free_tree_block(trans, root, leaf, 0, 1, 0); 3798 btrfs_free_tree_block(trans, root, leaf, 0, 1, 0);
3785 return 0; 3799 free_extent_buffer_stale(leaf);
3786} 3800}
3787/* 3801/*
3788 * delete the item at the leaf level in path. If that empties 3802 * delete the item at the leaf level in path. If that empties
@@ -3799,6 +3813,9 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3799 int wret; 3813 int wret;
3800 int i; 3814 int i;
3801 u32 nritems; 3815 u32 nritems;
3816 struct btrfs_map_token token;
3817
3818 btrfs_init_map_token(&token);
3802 3819
3803 leaf = path->nodes[0]; 3820 leaf = path->nodes[0];
3804 last_off = btrfs_item_offset_nr(leaf, slot + nr - 1); 3821 last_off = btrfs_item_offset_nr(leaf, slot + nr - 1);
@@ -3820,8 +3837,9 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3820 u32 ioff; 3837 u32 ioff;
3821 3838
3822 item = btrfs_item_nr(leaf, i); 3839 item = btrfs_item_nr(leaf, i);
3823 ioff = btrfs_item_offset(leaf, item); 3840 ioff = btrfs_token_item_offset(leaf, item, &token);
3824 btrfs_set_item_offset(leaf, item, ioff + dsize); 3841 btrfs_set_token_item_offset(leaf, item,
3842 ioff + dsize, &token);
3825 } 3843 }
3826 3844
3827 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot), 3845 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot),
@@ -3839,8 +3857,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3839 } else { 3857 } else {
3840 btrfs_set_path_blocking(path); 3858 btrfs_set_path_blocking(path);
3841 clean_tree_block(trans, root, leaf); 3859 clean_tree_block(trans, root, leaf);
3842 ret = btrfs_del_leaf(trans, root, path, leaf); 3860 btrfs_del_leaf(trans, root, path, leaf);
3843 BUG_ON(ret);
3844 } 3861 }
3845 } else { 3862 } else {
3846 int used = leaf_space_used(leaf, 0, nritems); 3863 int used = leaf_space_used(leaf, 0, nritems);
@@ -3848,10 +3865,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3848 struct btrfs_disk_key disk_key; 3865 struct btrfs_disk_key disk_key;
3849 3866
3850 btrfs_item_key(leaf, &disk_key, 0); 3867 btrfs_item_key(leaf, &disk_key, 0);
3851 wret = fixup_low_keys(trans, root, path, 3868 fixup_low_keys(trans, root, path, &disk_key, 1);
3852 &disk_key, 1);
3853 if (wret)
3854 ret = wret;
3855 } 3869 }
3856 3870
3857 /* delete the leaf if it is mostly empty */ 3871 /* delete the leaf if it is mostly empty */
@@ -3879,9 +3893,9 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3879 3893
3880 if (btrfs_header_nritems(leaf) == 0) { 3894 if (btrfs_header_nritems(leaf) == 0) {
3881 path->slots[1] = slot; 3895 path->slots[1] = slot;
3882 ret = btrfs_del_leaf(trans, root, path, leaf); 3896 btrfs_del_leaf(trans, root, path, leaf);
3883 BUG_ON(ret);
3884 free_extent_buffer(leaf); 3897 free_extent_buffer(leaf);
3898 ret = 0;
3885 } else { 3899 } else {
3886 /* if we're still in the path, make sure 3900 /* if we're still in the path, make sure
3887 * we're dirty. Otherwise, one of the 3901 * we're dirty. Otherwise, one of the
@@ -4059,18 +4073,18 @@ find_next_key:
4059 path->slots[level] = slot; 4073 path->slots[level] = slot;
4060 if (level == path->lowest_level) { 4074 if (level == path->lowest_level) {
4061 ret = 0; 4075 ret = 0;
4062 unlock_up(path, level, 1); 4076 unlock_up(path, level, 1, 0, NULL);
4063 goto out; 4077 goto out;
4064 } 4078 }
4065 btrfs_set_path_blocking(path); 4079 btrfs_set_path_blocking(path);
4066 cur = read_node_slot(root, cur, slot); 4080 cur = read_node_slot(root, cur, slot);
4067 BUG_ON(!cur); 4081 BUG_ON(!cur); /* -ENOMEM */
4068 4082
4069 btrfs_tree_read_lock(cur); 4083 btrfs_tree_read_lock(cur);
4070 4084
4071 path->locks[level - 1] = BTRFS_READ_LOCK; 4085 path->locks[level - 1] = BTRFS_READ_LOCK;
4072 path->nodes[level - 1] = cur; 4086 path->nodes[level - 1] = cur;
4073 unlock_up(path, level, 1); 4087 unlock_up(path, level, 1, 0, NULL);
4074 btrfs_clear_path_blocking(path, NULL, 0); 4088 btrfs_clear_path_blocking(path, NULL, 0);
4075 } 4089 }
4076out: 4090out:
@@ -4306,7 +4320,7 @@ again:
4306 } 4320 }
4307 ret = 0; 4321 ret = 0;
4308done: 4322done:
4309 unlock_up(path, 0, 1); 4323 unlock_up(path, 0, 1, 0, NULL);
4310 path->leave_spinning = old_spinning; 4324 path->leave_spinning = old_spinning;
4311 if (!old_spinning) 4325 if (!old_spinning)
4312 btrfs_set_path_blocking(path); 4326 btrfs_set_path_blocking(path);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 80b6486fd5e6..5b8ef8eb3521 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -48,6 +48,8 @@ struct btrfs_ordered_sum;
48 48
49#define BTRFS_MAGIC "_BHRfS_M" 49#define BTRFS_MAGIC "_BHRfS_M"
50 50
51#define BTRFS_MAX_MIRRORS 2
52
51#define BTRFS_MAX_LEVEL 8 53#define BTRFS_MAX_LEVEL 8
52 54
53#define BTRFS_COMPAT_EXTENT_TREE_V0 55#define BTRFS_COMPAT_EXTENT_TREE_V0
@@ -138,6 +140,12 @@ struct btrfs_ordered_sum;
138#define BTRFS_EMPTY_SUBVOL_DIR_OBJECTID 2 140#define BTRFS_EMPTY_SUBVOL_DIR_OBJECTID 2
139 141
140/* 142/*
143 * the max metadata block size. This limit is somewhat artificial,
144 * but the memmove costs go through the roof for larger blocks.
145 */
146#define BTRFS_MAX_METADATA_BLOCKSIZE 65536
147
148/*
141 * we can actually store much bigger names, but lets not confuse the rest 149 * we can actually store much bigger names, but lets not confuse the rest
142 * of linux 150 * of linux
143 */ 151 */
@@ -461,6 +469,19 @@ struct btrfs_super_block {
461#define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (1ULL << 1) 469#define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (1ULL << 1)
462#define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS (1ULL << 2) 470#define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS (1ULL << 2)
463#define BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO (1ULL << 3) 471#define BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO (1ULL << 3)
472/*
473 * some patches floated around with a second compression method
474 * lets save that incompat here for when they do get in
475 * Note we don't actually support it, we're just reserving the
476 * number
477 */
478#define BTRFS_FEATURE_INCOMPAT_COMPRESS_LZOv2 (1ULL << 4)
479
480/*
481 * older kernels tried to do bigger metadata blocks, but the
482 * code was pretty buggy. Lets not let them try anymore.
483 */
484#define BTRFS_FEATURE_INCOMPAT_BIG_METADATA (1ULL << 5)
464 485
465#define BTRFS_FEATURE_COMPAT_SUPP 0ULL 486#define BTRFS_FEATURE_COMPAT_SUPP 0ULL
466#define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL 487#define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL
@@ -468,6 +489,7 @@ struct btrfs_super_block {
468 (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \ 489 (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \
469 BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL | \ 490 BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL | \
470 BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS | \ 491 BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS | \
492 BTRFS_FEATURE_INCOMPAT_BIG_METADATA | \
471 BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO) 493 BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO)
472 494
473/* 495/*
@@ -829,6 +851,21 @@ struct btrfs_csum_item {
829 */ 851 */
830#define BTRFS_AVAIL_ALLOC_BIT_SINGLE (1ULL << 48) 852#define BTRFS_AVAIL_ALLOC_BIT_SINGLE (1ULL << 48)
831 853
854#define BTRFS_EXTENDED_PROFILE_MASK (BTRFS_BLOCK_GROUP_PROFILE_MASK | \
855 BTRFS_AVAIL_ALLOC_BIT_SINGLE)
856
857static inline u64 chunk_to_extended(u64 flags)
858{
859 if ((flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0)
860 flags |= BTRFS_AVAIL_ALLOC_BIT_SINGLE;
861
862 return flags;
863}
864static inline u64 extended_to_chunk(u64 flags)
865{
866 return flags & ~BTRFS_AVAIL_ALLOC_BIT_SINGLE;
867}
868
832struct btrfs_block_group_item { 869struct btrfs_block_group_item {
833 __le64 used; 870 __le64 used;
834 __le64 chunk_objectid; 871 __le64 chunk_objectid;
@@ -1503,6 +1540,7 @@ struct btrfs_ioctl_defrag_range_args {
1503#define BTRFS_MOUNT_SKIP_BALANCE (1 << 19) 1540#define BTRFS_MOUNT_SKIP_BALANCE (1 << 19)
1504#define BTRFS_MOUNT_CHECK_INTEGRITY (1 << 20) 1541#define BTRFS_MOUNT_CHECK_INTEGRITY (1 << 20)
1505#define BTRFS_MOUNT_CHECK_INTEGRITY_INCLUDING_EXTENT_DATA (1 << 21) 1542#define BTRFS_MOUNT_CHECK_INTEGRITY_INCLUDING_EXTENT_DATA (1 << 21)
1543#define BTRFS_MOUNT_PANIC_ON_FATAL_ERROR (1 << 22)
1506 1544
1507#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) 1545#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt)
1508#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) 1546#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt)
@@ -1526,6 +1564,17 @@ struct btrfs_ioctl_defrag_range_args {
1526 1564
1527#define BTRFS_INODE_ROOT_ITEM_INIT (1 << 31) 1565#define BTRFS_INODE_ROOT_ITEM_INIT (1 << 31)
1528 1566
1567struct btrfs_map_token {
1568 struct extent_buffer *eb;
1569 char *kaddr;
1570 unsigned long offset;
1571};
1572
1573static inline void btrfs_init_map_token (struct btrfs_map_token *token)
1574{
1575 memset(token, 0, sizeof(*token));
1576}
1577
1529/* some macros to generate set/get funcs for the struct fields. This 1578/* some macros to generate set/get funcs for the struct fields. This
1530 * assumes there is a lefoo_to_cpu for every type, so lets make a simple 1579 * assumes there is a lefoo_to_cpu for every type, so lets make a simple
1531 * one for u8: 1580 * one for u8:
@@ -1549,20 +1598,22 @@ struct btrfs_ioctl_defrag_range_args {
1549#ifndef BTRFS_SETGET_FUNCS 1598#ifndef BTRFS_SETGET_FUNCS
1550#define BTRFS_SETGET_FUNCS(name, type, member, bits) \ 1599#define BTRFS_SETGET_FUNCS(name, type, member, bits) \
1551u##bits btrfs_##name(struct extent_buffer *eb, type *s); \ 1600u##bits btrfs_##name(struct extent_buffer *eb, type *s); \
1601u##bits btrfs_token_##name(struct extent_buffer *eb, type *s, struct btrfs_map_token *token); \
1602void btrfs_set_token_##name(struct extent_buffer *eb, type *s, u##bits val, struct btrfs_map_token *token);\
1552void btrfs_set_##name(struct extent_buffer *eb, type *s, u##bits val); 1603void btrfs_set_##name(struct extent_buffer *eb, type *s, u##bits val);
1553#endif 1604#endif
1554 1605
1555#define BTRFS_SETGET_HEADER_FUNCS(name, type, member, bits) \ 1606#define BTRFS_SETGET_HEADER_FUNCS(name, type, member, bits) \
1556static inline u##bits btrfs_##name(struct extent_buffer *eb) \ 1607static inline u##bits btrfs_##name(struct extent_buffer *eb) \
1557{ \ 1608{ \
1558 type *p = page_address(eb->first_page); \ 1609 type *p = page_address(eb->pages[0]); \
1559 u##bits res = le##bits##_to_cpu(p->member); \ 1610 u##bits res = le##bits##_to_cpu(p->member); \
1560 return res; \ 1611 return res; \
1561} \ 1612} \
1562static inline void btrfs_set_##name(struct extent_buffer *eb, \ 1613static inline void btrfs_set_##name(struct extent_buffer *eb, \
1563 u##bits val) \ 1614 u##bits val) \
1564{ \ 1615{ \
1565 type *p = page_address(eb->first_page); \ 1616 type *p = page_address(eb->pages[0]); \
1566 p->member = cpu_to_le##bits(val); \ 1617 p->member = cpu_to_le##bits(val); \
1567} 1618}
1568 1619
@@ -2466,8 +2517,7 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
2466 struct btrfs_root *root, 2517 struct btrfs_root *root,
2467 u64 num_bytes, u64 min_alloc_size, 2518 u64 num_bytes, u64 min_alloc_size,
2468 u64 empty_size, u64 hint_byte, 2519 u64 empty_size, u64 hint_byte,
2469 u64 search_end, struct btrfs_key *ins, 2520 struct btrfs_key *ins, u64 data);
2470 u64 data);
2471int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 2521int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2472 struct extent_buffer *buf, int full_backref, int for_cow); 2522 struct extent_buffer *buf, int full_backref, int for_cow);
2473int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 2523int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
@@ -2484,8 +2534,8 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
2484int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len); 2534int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len);
2485int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root, 2535int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root,
2486 u64 start, u64 len); 2536 u64 start, u64 len);
2487int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, 2537void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
2488 struct btrfs_root *root); 2538 struct btrfs_root *root);
2489int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, 2539int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
2490 struct btrfs_root *root); 2540 struct btrfs_root *root);
2491int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, 2541int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
@@ -2548,8 +2598,8 @@ void btrfs_block_rsv_release(struct btrfs_root *root,
2548 u64 num_bytes); 2598 u64 num_bytes);
2549int btrfs_set_block_group_ro(struct btrfs_root *root, 2599int btrfs_set_block_group_ro(struct btrfs_root *root,
2550 struct btrfs_block_group_cache *cache); 2600 struct btrfs_block_group_cache *cache);
2551int btrfs_set_block_group_rw(struct btrfs_root *root, 2601void btrfs_set_block_group_rw(struct btrfs_root *root,
2552 struct btrfs_block_group_cache *cache); 2602 struct btrfs_block_group_cache *cache);
2553void btrfs_put_block_group_cache(struct btrfs_fs_info *info); 2603void btrfs_put_block_group_cache(struct btrfs_fs_info *info);
2554u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo); 2604u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo);
2555int btrfs_error_unpin_extent_range(struct btrfs_root *root, 2605int btrfs_error_unpin_extent_range(struct btrfs_root *root,
@@ -2568,9 +2618,9 @@ int btrfs_comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2);
2568int btrfs_previous_item(struct btrfs_root *root, 2618int btrfs_previous_item(struct btrfs_root *root,
2569 struct btrfs_path *path, u64 min_objectid, 2619 struct btrfs_path *path, u64 min_objectid,
2570 int type); 2620 int type);
2571int btrfs_set_item_key_safe(struct btrfs_trans_handle *trans, 2621void btrfs_set_item_key_safe(struct btrfs_trans_handle *trans,
2572 struct btrfs_root *root, struct btrfs_path *path, 2622 struct btrfs_root *root, struct btrfs_path *path,
2573 struct btrfs_key *new_key); 2623 struct btrfs_key *new_key);
2574struct extent_buffer *btrfs_root_node(struct btrfs_root *root); 2624struct extent_buffer *btrfs_root_node(struct btrfs_root *root);
2575struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root); 2625struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root);
2576int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, 2626int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
@@ -2590,12 +2640,13 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
2590 struct extent_buffer **cow_ret, u64 new_root_objectid); 2640 struct extent_buffer **cow_ret, u64 new_root_objectid);
2591int btrfs_block_can_be_shared(struct btrfs_root *root, 2641int btrfs_block_can_be_shared(struct btrfs_root *root,
2592 struct extent_buffer *buf); 2642 struct extent_buffer *buf);
2593int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root 2643void btrfs_extend_item(struct btrfs_trans_handle *trans,
2594 *root, struct btrfs_path *path, u32 data_size); 2644 struct btrfs_root *root, struct btrfs_path *path,
2595int btrfs_truncate_item(struct btrfs_trans_handle *trans, 2645 u32 data_size);
2596 struct btrfs_root *root, 2646void btrfs_truncate_item(struct btrfs_trans_handle *trans,
2597 struct btrfs_path *path, 2647 struct btrfs_root *root,
2598 u32 new_size, int from_end); 2648 struct btrfs_path *path,
2649 u32 new_size, int from_end);
2599int btrfs_split_item(struct btrfs_trans_handle *trans, 2650int btrfs_split_item(struct btrfs_trans_handle *trans,
2600 struct btrfs_root *root, 2651 struct btrfs_root *root,
2601 struct btrfs_path *path, 2652 struct btrfs_path *path,
@@ -2629,10 +2680,10 @@ static inline int btrfs_del_item(struct btrfs_trans_handle *trans,
2629 return btrfs_del_items(trans, root, path, path->slots[0], 1); 2680 return btrfs_del_items(trans, root, path, path->slots[0], 1);
2630} 2681}
2631 2682
2632int setup_items_for_insert(struct btrfs_trans_handle *trans, 2683void setup_items_for_insert(struct btrfs_trans_handle *trans,
2633 struct btrfs_root *root, struct btrfs_path *path, 2684 struct btrfs_root *root, struct btrfs_path *path,
2634 struct btrfs_key *cpu_key, u32 *data_size, 2685 struct btrfs_key *cpu_key, u32 *data_size,
2635 u32 total_data, u32 total_size, int nr); 2686 u32 total_data, u32 total_size, int nr);
2636int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root 2687int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
2637 *root, struct btrfs_key *key, void *data, u32 data_size); 2688 *root, struct btrfs_key *key, void *data, u32 data_size);
2638int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, 2689int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
@@ -2659,9 +2710,9 @@ static inline int btrfs_next_item(struct btrfs_root *root, struct btrfs_path *p)
2659} 2710}
2660int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path); 2711int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path);
2661int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf); 2712int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf);
2662void btrfs_drop_snapshot(struct btrfs_root *root, 2713int __must_check btrfs_drop_snapshot(struct btrfs_root *root,
2663 struct btrfs_block_rsv *block_rsv, int update_ref, 2714 struct btrfs_block_rsv *block_rsv,
2664 int for_reloc); 2715 int update_ref, int for_reloc);
2665int btrfs_drop_subtree(struct btrfs_trans_handle *trans, 2716int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
2666 struct btrfs_root *root, 2717 struct btrfs_root *root,
2667 struct extent_buffer *node, 2718 struct extent_buffer *node,
@@ -2687,24 +2738,6 @@ static inline void free_fs_info(struct btrfs_fs_info *fs_info)
2687 kfree(fs_info->super_for_commit); 2738 kfree(fs_info->super_for_commit);
2688 kfree(fs_info); 2739 kfree(fs_info);
2689} 2740}
2690/**
2691 * profile_is_valid - tests whether a given profile is valid and reduced
2692 * @flags: profile to validate
2693 * @extended: if true @flags is treated as an extended profile
2694 */
2695static inline int profile_is_valid(u64 flags, int extended)
2696{
2697 u64 mask = ~BTRFS_BLOCK_GROUP_PROFILE_MASK;
2698
2699 flags &= ~BTRFS_BLOCK_GROUP_TYPE_MASK;
2700 if (extended)
2701 mask &= ~BTRFS_AVAIL_ALLOC_BIT_SINGLE;
2702
2703 if (flags & mask)
2704 return 0;
2705 /* true if zero or exactly one bit set */
2706 return (flags & (~flags + 1)) == flags;
2707}
2708 2741
2709/* root-item.c */ 2742/* root-item.c */
2710int btrfs_find_root_ref(struct btrfs_root *tree_root, 2743int btrfs_find_root_ref(struct btrfs_root *tree_root,
@@ -2723,9 +2756,10 @@ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2723int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root 2756int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root
2724 *root, struct btrfs_key *key, struct btrfs_root_item 2757 *root, struct btrfs_key *key, struct btrfs_root_item
2725 *item); 2758 *item);
2726int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root 2759int __must_check btrfs_update_root(struct btrfs_trans_handle *trans,
2727 *root, struct btrfs_key *key, struct btrfs_root_item 2760 struct btrfs_root *root,
2728 *item); 2761 struct btrfs_key *key,
2762 struct btrfs_root_item *item);
2729int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct 2763int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct
2730 btrfs_root_item *item, struct btrfs_key *key); 2764 btrfs_root_item *item, struct btrfs_key *key);
2731int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid); 2765int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid);
@@ -2909,7 +2943,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root);
2909void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans, 2943void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans,
2910 struct btrfs_root *root); 2944 struct btrfs_root *root);
2911int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size); 2945int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size);
2912int btrfs_invalidate_inodes(struct btrfs_root *root); 2946void btrfs_invalidate_inodes(struct btrfs_root *root);
2913void btrfs_add_delayed_iput(struct inode *inode); 2947void btrfs_add_delayed_iput(struct inode *inode);
2914void btrfs_run_delayed_iputs(struct btrfs_root *root); 2948void btrfs_run_delayed_iputs(struct btrfs_root *root);
2915int btrfs_prealloc_file_range(struct inode *inode, int mode, 2949int btrfs_prealloc_file_range(struct inode *inode, int mode,
@@ -2961,13 +2995,41 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size);
2961/* super.c */ 2995/* super.c */
2962int btrfs_parse_options(struct btrfs_root *root, char *options); 2996int btrfs_parse_options(struct btrfs_root *root, char *options);
2963int btrfs_sync_fs(struct super_block *sb, int wait); 2997int btrfs_sync_fs(struct super_block *sb, int wait);
2998void btrfs_printk(struct btrfs_fs_info *fs_info, const char *fmt, ...);
2964void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function, 2999void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
2965 unsigned int line, int errno); 3000 unsigned int line, int errno, const char *fmt, ...);
3001
3002void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
3003 struct btrfs_root *root, const char *function,
3004 unsigned int line, int errno);
3005
3006#define btrfs_abort_transaction(trans, root, errno) \
3007do { \
3008 __btrfs_abort_transaction(trans, root, __func__, \
3009 __LINE__, errno); \
3010} while (0)
2966 3011
2967#define btrfs_std_error(fs_info, errno) \ 3012#define btrfs_std_error(fs_info, errno) \
2968do { \ 3013do { \
2969 if ((errno)) \ 3014 if ((errno)) \
2970 __btrfs_std_error((fs_info), __func__, __LINE__, (errno));\ 3015 __btrfs_std_error((fs_info), __func__, \
3016 __LINE__, (errno), NULL); \
3017} while (0)
3018
3019#define btrfs_error(fs_info, errno, fmt, args...) \
3020do { \
3021 __btrfs_std_error((fs_info), __func__, __LINE__, \
3022 (errno), fmt, ##args); \
3023} while (0)
3024
3025void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function,
3026 unsigned int line, int errno, const char *fmt, ...);
3027
3028#define btrfs_panic(fs_info, errno, fmt, args...) \
3029do { \
3030 struct btrfs_fs_info *_i = (fs_info); \
3031 __btrfs_panic(_i, __func__, __LINE__, errno, fmt, ##args); \
3032 BUG_ON(!(_i->mount_opt & BTRFS_MOUNT_PANIC_ON_FATAL_ERROR)); \
2971} while (0) 3033} while (0)
2972 3034
2973/* acl.c */ 3035/* acl.c */
@@ -3003,16 +3065,17 @@ void btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
3003void btrfs_reloc_pre_snapshot(struct btrfs_trans_handle *trans, 3065void btrfs_reloc_pre_snapshot(struct btrfs_trans_handle *trans,
3004 struct btrfs_pending_snapshot *pending, 3066 struct btrfs_pending_snapshot *pending,
3005 u64 *bytes_to_reserve); 3067 u64 *bytes_to_reserve);
3006void btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans, 3068int btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans,
3007 struct btrfs_pending_snapshot *pending); 3069 struct btrfs_pending_snapshot *pending);
3008 3070
3009/* scrub.c */ 3071/* scrub.c */
3010int btrfs_scrub_dev(struct btrfs_root *root, u64 devid, u64 start, u64 end, 3072int btrfs_scrub_dev(struct btrfs_root *root, u64 devid, u64 start, u64 end,
3011 struct btrfs_scrub_progress *progress, int readonly); 3073 struct btrfs_scrub_progress *progress, int readonly);
3012int btrfs_scrub_pause(struct btrfs_root *root); 3074void btrfs_scrub_pause(struct btrfs_root *root);
3013int btrfs_scrub_pause_super(struct btrfs_root *root); 3075void btrfs_scrub_pause_super(struct btrfs_root *root);
3014int btrfs_scrub_continue(struct btrfs_root *root); 3076void btrfs_scrub_continue(struct btrfs_root *root);
3015int btrfs_scrub_continue_super(struct btrfs_root *root); 3077void btrfs_scrub_continue_super(struct btrfs_root *root);
3078int __btrfs_scrub_cancel(struct btrfs_fs_info *info);
3016int btrfs_scrub_cancel(struct btrfs_root *root); 3079int btrfs_scrub_cancel(struct btrfs_root *root);
3017int btrfs_scrub_cancel_dev(struct btrfs_root *root, struct btrfs_device *dev); 3080int btrfs_scrub_cancel_dev(struct btrfs_root *root, struct btrfs_device *dev);
3018int btrfs_scrub_cancel_devid(struct btrfs_root *root, u64 devid); 3081int btrfs_scrub_cancel_devid(struct btrfs_root *root, u64 devid);
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index fe4cd0f1cef1..03e3748d84d0 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -115,6 +115,7 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node(struct inode *inode)
115 return NULL; 115 return NULL;
116} 116}
117 117
118/* Will return either the node or PTR_ERR(-ENOMEM) */
118static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node( 119static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node(
119 struct inode *inode) 120 struct inode *inode)
120{ 121{
@@ -836,10 +837,8 @@ static int btrfs_batch_insert_items(struct btrfs_trans_handle *trans,
836 btrfs_clear_path_blocking(path, NULL, 0); 837 btrfs_clear_path_blocking(path, NULL, 0);
837 838
838 /* insert the keys of the items */ 839 /* insert the keys of the items */
839 ret = setup_items_for_insert(trans, root, path, keys, data_size, 840 setup_items_for_insert(trans, root, path, keys, data_size,
840 total_data_size, total_size, nitems); 841 total_data_size, total_size, nitems);
841 if (ret)
842 goto error;
843 842
844 /* insert the dir index items */ 843 /* insert the dir index items */
845 slot = path->slots[0]; 844 slot = path->slots[0];
@@ -1108,16 +1107,25 @@ static int btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
1108 return 0; 1107 return 0;
1109} 1108}
1110 1109
1111/* Called when committing the transaction. */ 1110/*
1111 * Called when committing the transaction.
1112 * Returns 0 on success.
1113 * Returns < 0 on error and returns with an aborted transaction with any
1114 * outstanding delayed items cleaned up.
1115 */
1112int btrfs_run_delayed_items(struct btrfs_trans_handle *trans, 1116int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
1113 struct btrfs_root *root) 1117 struct btrfs_root *root)
1114{ 1118{
1119 struct btrfs_root *curr_root = root;
1115 struct btrfs_delayed_root *delayed_root; 1120 struct btrfs_delayed_root *delayed_root;
1116 struct btrfs_delayed_node *curr_node, *prev_node; 1121 struct btrfs_delayed_node *curr_node, *prev_node;
1117 struct btrfs_path *path; 1122 struct btrfs_path *path;
1118 struct btrfs_block_rsv *block_rsv; 1123 struct btrfs_block_rsv *block_rsv;
1119 int ret = 0; 1124 int ret = 0;
1120 1125
1126 if (trans->aborted)
1127 return -EIO;
1128
1121 path = btrfs_alloc_path(); 1129 path = btrfs_alloc_path();
1122 if (!path) 1130 if (!path)
1123 return -ENOMEM; 1131 return -ENOMEM;
@@ -1130,17 +1138,18 @@ int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
1130 1138
1131 curr_node = btrfs_first_delayed_node(delayed_root); 1139 curr_node = btrfs_first_delayed_node(delayed_root);
1132 while (curr_node) { 1140 while (curr_node) {
1133 root = curr_node->root; 1141 curr_root = curr_node->root;
1134 ret = btrfs_insert_delayed_items(trans, path, root, 1142 ret = btrfs_insert_delayed_items(trans, path, curr_root,
1135 curr_node); 1143 curr_node);
1136 if (!ret) 1144 if (!ret)
1137 ret = btrfs_delete_delayed_items(trans, path, root, 1145 ret = btrfs_delete_delayed_items(trans, path,
1138 curr_node); 1146 curr_root, curr_node);
1139 if (!ret) 1147 if (!ret)
1140 ret = btrfs_update_delayed_inode(trans, root, path, 1148 ret = btrfs_update_delayed_inode(trans, curr_root,
1141 curr_node); 1149 path, curr_node);
1142 if (ret) { 1150 if (ret) {
1143 btrfs_release_delayed_node(curr_node); 1151 btrfs_release_delayed_node(curr_node);
1152 btrfs_abort_transaction(trans, root, ret);
1144 break; 1153 break;
1145 } 1154 }
1146 1155
@@ -1151,6 +1160,7 @@ int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
1151 1160
1152 btrfs_free_path(path); 1161 btrfs_free_path(path);
1153 trans->block_rsv = block_rsv; 1162 trans->block_rsv = block_rsv;
1163
1154 return ret; 1164 return ret;
1155} 1165}
1156 1166
@@ -1371,6 +1381,7 @@ void btrfs_balance_delayed_items(struct btrfs_root *root)
1371 btrfs_wq_run_delayed_node(delayed_root, root, 0); 1381 btrfs_wq_run_delayed_node(delayed_root, root, 0);
1372} 1382}
1373 1383
1384/* Will return 0 or -ENOMEM */
1374int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans, 1385int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
1375 struct btrfs_root *root, const char *name, 1386 struct btrfs_root *root, const char *name,
1376 int name_len, struct inode *dir, 1387 int name_len, struct inode *dir,
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index 66e4f29505a3..69f22e3ab3bc 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -420,7 +420,7 @@ update_existing_head_ref(struct btrfs_delayed_ref_node *existing,
420 * this does all the dirty work in terms of maintaining the correct 420 * this does all the dirty work in terms of maintaining the correct
421 * overall modification count. 421 * overall modification count.
422 */ 422 */
423static noinline int add_delayed_ref_head(struct btrfs_fs_info *fs_info, 423static noinline void add_delayed_ref_head(struct btrfs_fs_info *fs_info,
424 struct btrfs_trans_handle *trans, 424 struct btrfs_trans_handle *trans,
425 struct btrfs_delayed_ref_node *ref, 425 struct btrfs_delayed_ref_node *ref,
426 u64 bytenr, u64 num_bytes, 426 u64 bytenr, u64 num_bytes,
@@ -487,20 +487,19 @@ static noinline int add_delayed_ref_head(struct btrfs_fs_info *fs_info,
487 * we've updated the existing ref, free the newly 487 * we've updated the existing ref, free the newly
488 * allocated ref 488 * allocated ref
489 */ 489 */
490 kfree(ref); 490 kfree(head_ref);
491 } else { 491 } else {
492 delayed_refs->num_heads++; 492 delayed_refs->num_heads++;
493 delayed_refs->num_heads_ready++; 493 delayed_refs->num_heads_ready++;
494 delayed_refs->num_entries++; 494 delayed_refs->num_entries++;
495 trans->delayed_ref_updates++; 495 trans->delayed_ref_updates++;
496 } 496 }
497 return 0;
498} 497}
499 498
500/* 499/*
501 * helper to insert a delayed tree ref into the rbtree. 500 * helper to insert a delayed tree ref into the rbtree.
502 */ 501 */
503static noinline int add_delayed_tree_ref(struct btrfs_fs_info *fs_info, 502static noinline void add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
504 struct btrfs_trans_handle *trans, 503 struct btrfs_trans_handle *trans,
505 struct btrfs_delayed_ref_node *ref, 504 struct btrfs_delayed_ref_node *ref,
506 u64 bytenr, u64 num_bytes, u64 parent, 505 u64 bytenr, u64 num_bytes, u64 parent,
@@ -549,18 +548,17 @@ static noinline int add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
549 * we've updated the existing ref, free the newly 548 * we've updated the existing ref, free the newly
550 * allocated ref 549 * allocated ref
551 */ 550 */
552 kfree(ref); 551 kfree(full_ref);
553 } else { 552 } else {
554 delayed_refs->num_entries++; 553 delayed_refs->num_entries++;
555 trans->delayed_ref_updates++; 554 trans->delayed_ref_updates++;
556 } 555 }
557 return 0;
558} 556}
559 557
560/* 558/*
561 * helper to insert a delayed data ref into the rbtree. 559 * helper to insert a delayed data ref into the rbtree.
562 */ 560 */
563static noinline int add_delayed_data_ref(struct btrfs_fs_info *fs_info, 561static noinline void add_delayed_data_ref(struct btrfs_fs_info *fs_info,
564 struct btrfs_trans_handle *trans, 562 struct btrfs_trans_handle *trans,
565 struct btrfs_delayed_ref_node *ref, 563 struct btrfs_delayed_ref_node *ref,
566 u64 bytenr, u64 num_bytes, u64 parent, 564 u64 bytenr, u64 num_bytes, u64 parent,
@@ -611,12 +609,11 @@ static noinline int add_delayed_data_ref(struct btrfs_fs_info *fs_info,
611 * we've updated the existing ref, free the newly 609 * we've updated the existing ref, free the newly
612 * allocated ref 610 * allocated ref
613 */ 611 */
614 kfree(ref); 612 kfree(full_ref);
615 } else { 613 } else {
616 delayed_refs->num_entries++; 614 delayed_refs->num_entries++;
617 trans->delayed_ref_updates++; 615 trans->delayed_ref_updates++;
618 } 616 }
619 return 0;
620} 617}
621 618
622/* 619/*
@@ -634,7 +631,6 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
634 struct btrfs_delayed_tree_ref *ref; 631 struct btrfs_delayed_tree_ref *ref;
635 struct btrfs_delayed_ref_head *head_ref; 632 struct btrfs_delayed_ref_head *head_ref;
636 struct btrfs_delayed_ref_root *delayed_refs; 633 struct btrfs_delayed_ref_root *delayed_refs;
637 int ret;
638 634
639 BUG_ON(extent_op && extent_op->is_data); 635 BUG_ON(extent_op && extent_op->is_data);
640 ref = kmalloc(sizeof(*ref), GFP_NOFS); 636 ref = kmalloc(sizeof(*ref), GFP_NOFS);
@@ -656,14 +652,12 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
656 * insert both the head node and the new ref without dropping 652 * insert both the head node and the new ref without dropping
657 * the spin lock 653 * the spin lock
658 */ 654 */
659 ret = add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr, 655 add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr,
660 num_bytes, action, 0); 656 num_bytes, action, 0);
661 BUG_ON(ret);
662 657
663 ret = add_delayed_tree_ref(fs_info, trans, &ref->node, bytenr, 658 add_delayed_tree_ref(fs_info, trans, &ref->node, bytenr,
664 num_bytes, parent, ref_root, level, action, 659 num_bytes, parent, ref_root, level, action,
665 for_cow); 660 for_cow);
666 BUG_ON(ret);
667 if (!need_ref_seq(for_cow, ref_root) && 661 if (!need_ref_seq(for_cow, ref_root) &&
668 waitqueue_active(&delayed_refs->seq_wait)) 662 waitqueue_active(&delayed_refs->seq_wait))
669 wake_up(&delayed_refs->seq_wait); 663 wake_up(&delayed_refs->seq_wait);
@@ -685,7 +679,6 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
685 struct btrfs_delayed_data_ref *ref; 679 struct btrfs_delayed_data_ref *ref;
686 struct btrfs_delayed_ref_head *head_ref; 680 struct btrfs_delayed_ref_head *head_ref;
687 struct btrfs_delayed_ref_root *delayed_refs; 681 struct btrfs_delayed_ref_root *delayed_refs;
688 int ret;
689 682
690 BUG_ON(extent_op && !extent_op->is_data); 683 BUG_ON(extent_op && !extent_op->is_data);
691 ref = kmalloc(sizeof(*ref), GFP_NOFS); 684 ref = kmalloc(sizeof(*ref), GFP_NOFS);
@@ -707,14 +700,12 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
707 * insert both the head node and the new ref without dropping 700 * insert both the head node and the new ref without dropping
708 * the spin lock 701 * the spin lock
709 */ 702 */
710 ret = add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr, 703 add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr,
711 num_bytes, action, 1); 704 num_bytes, action, 1);
712 BUG_ON(ret);
713 705
714 ret = add_delayed_data_ref(fs_info, trans, &ref->node, bytenr, 706 add_delayed_data_ref(fs_info, trans, &ref->node, bytenr,
715 num_bytes, parent, ref_root, owner, offset, 707 num_bytes, parent, ref_root, owner, offset,
716 action, for_cow); 708 action, for_cow);
717 BUG_ON(ret);
718 if (!need_ref_seq(for_cow, ref_root) && 709 if (!need_ref_seq(for_cow, ref_root) &&
719 waitqueue_active(&delayed_refs->seq_wait)) 710 waitqueue_active(&delayed_refs->seq_wait))
720 wake_up(&delayed_refs->seq_wait); 711 wake_up(&delayed_refs->seq_wait);
@@ -729,7 +720,6 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
729{ 720{
730 struct btrfs_delayed_ref_head *head_ref; 721 struct btrfs_delayed_ref_head *head_ref;
731 struct btrfs_delayed_ref_root *delayed_refs; 722 struct btrfs_delayed_ref_root *delayed_refs;
732 int ret;
733 723
734 head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS); 724 head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS);
735 if (!head_ref) 725 if (!head_ref)
@@ -740,10 +730,9 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
740 delayed_refs = &trans->transaction->delayed_refs; 730 delayed_refs = &trans->transaction->delayed_refs;
741 spin_lock(&delayed_refs->lock); 731 spin_lock(&delayed_refs->lock);
742 732
743 ret = add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr, 733 add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr,
744 num_bytes, BTRFS_UPDATE_DELAYED_HEAD, 734 num_bytes, BTRFS_UPDATE_DELAYED_HEAD,
745 extent_op->is_data); 735 extent_op->is_data);
746 BUG_ON(ret);
747 736
748 if (waitqueue_active(&delayed_refs->seq_wait)) 737 if (waitqueue_active(&delayed_refs->seq_wait))
749 wake_up(&delayed_refs->seq_wait); 738 wake_up(&delayed_refs->seq_wait);
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index 31d84e78129b..c1a074d0696f 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -49,9 +49,8 @@ static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle
49 di = btrfs_match_dir_item_name(root, path, name, name_len); 49 di = btrfs_match_dir_item_name(root, path, name, name_len);
50 if (di) 50 if (di)
51 return ERR_PTR(-EEXIST); 51 return ERR_PTR(-EEXIST);
52 ret = btrfs_extend_item(trans, root, path, data_size); 52 btrfs_extend_item(trans, root, path, data_size);
53 } 53 } else if (ret < 0)
54 if (ret < 0)
55 return ERR_PTR(ret); 54 return ERR_PTR(ret);
56 WARN_ON(ret > 0); 55 WARN_ON(ret > 0);
57 leaf = path->nodes[0]; 56 leaf = path->nodes[0];
@@ -116,6 +115,7 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
116 * 'location' is the key to stuff into the directory item, 'type' is the 115 * 'location' is the key to stuff into the directory item, 'type' is the
117 * type of the inode we're pointing to, and 'index' is the sequence number 116 * type of the inode we're pointing to, and 'index' is the sequence number
118 * to use for the second index (if one is created). 117 * to use for the second index (if one is created).
118 * Will return 0 or -ENOMEM
119 */ 119 */
120int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root 120int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root
121 *root, const char *name, int name_len, 121 *root, const char *name, int name_len,
@@ -383,8 +383,8 @@ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans,
383 start = btrfs_item_ptr_offset(leaf, path->slots[0]); 383 start = btrfs_item_ptr_offset(leaf, path->slots[0]);
384 memmove_extent_buffer(leaf, ptr, ptr + sub_item_len, 384 memmove_extent_buffer(leaf, ptr, ptr + sub_item_len,
385 item_len - (ptr + sub_item_len - start)); 385 item_len - (ptr + sub_item_len - start));
386 ret = btrfs_truncate_item(trans, root, path, 386 btrfs_truncate_item(trans, root, path,
387 item_len - sub_item_len, 1); 387 item_len - sub_item_len, 1);
388 } 388 }
389 return ret; 389 return ret;
390} 390}
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 534266fe505f..20196f411206 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -48,20 +48,19 @@
48static struct extent_io_ops btree_extent_io_ops; 48static struct extent_io_ops btree_extent_io_ops;
49static void end_workqueue_fn(struct btrfs_work *work); 49static void end_workqueue_fn(struct btrfs_work *work);
50static void free_fs_root(struct btrfs_root *root); 50static void free_fs_root(struct btrfs_root *root);
51static void btrfs_check_super_valid(struct btrfs_fs_info *fs_info, 51static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
52 int read_only); 52 int read_only);
53static int btrfs_destroy_ordered_operations(struct btrfs_root *root); 53static void btrfs_destroy_ordered_operations(struct btrfs_root *root);
54static int btrfs_destroy_ordered_extents(struct btrfs_root *root); 54static void btrfs_destroy_ordered_extents(struct btrfs_root *root);
55static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, 55static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
56 struct btrfs_root *root); 56 struct btrfs_root *root);
57static int btrfs_destroy_pending_snapshots(struct btrfs_transaction *t); 57static void btrfs_destroy_pending_snapshots(struct btrfs_transaction *t);
58static int btrfs_destroy_delalloc_inodes(struct btrfs_root *root); 58static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root);
59static int btrfs_destroy_marked_extents(struct btrfs_root *root, 59static int btrfs_destroy_marked_extents(struct btrfs_root *root,
60 struct extent_io_tree *dirty_pages, 60 struct extent_io_tree *dirty_pages,
61 int mark); 61 int mark);
62static int btrfs_destroy_pinned_extent(struct btrfs_root *root, 62static int btrfs_destroy_pinned_extent(struct btrfs_root *root,
63 struct extent_io_tree *pinned_extents); 63 struct extent_io_tree *pinned_extents);
64static int btrfs_cleanup_transaction(struct btrfs_root *root);
65 64
66/* 65/*
67 * end_io_wq structs are used to do processing in task context when an IO is 66 * end_io_wq structs are used to do processing in task context when an IO is
@@ -99,6 +98,7 @@ struct async_submit_bio {
99 */ 98 */
100 u64 bio_offset; 99 u64 bio_offset;
101 struct btrfs_work work; 100 struct btrfs_work work;
101 int error;
102}; 102};
103 103
104/* 104/*
@@ -332,8 +332,8 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
332 return 0; 332 return 0;
333 333
334 lock_extent_bits(io_tree, eb->start, eb->start + eb->len - 1, 334 lock_extent_bits(io_tree, eb->start, eb->start + eb->len - 1,
335 0, &cached_state, GFP_NOFS); 335 0, &cached_state);
336 if (extent_buffer_uptodate(io_tree, eb, cached_state) && 336 if (extent_buffer_uptodate(eb) &&
337 btrfs_header_generation(eb) == parent_transid) { 337 btrfs_header_generation(eb) == parent_transid) {
338 ret = 0; 338 ret = 0;
339 goto out; 339 goto out;
@@ -344,7 +344,7 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
344 (unsigned long long)parent_transid, 344 (unsigned long long)parent_transid,
345 (unsigned long long)btrfs_header_generation(eb)); 345 (unsigned long long)btrfs_header_generation(eb));
346 ret = 1; 346 ret = 1;
347 clear_extent_buffer_uptodate(io_tree, eb, &cached_state); 347 clear_extent_buffer_uptodate(eb);
348out: 348out:
349 unlock_extent_cached(io_tree, eb->start, eb->start + eb->len - 1, 349 unlock_extent_cached(io_tree, eb->start, eb->start + eb->len - 1,
350 &cached_state, GFP_NOFS); 350 &cached_state, GFP_NOFS);
@@ -360,9 +360,11 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,
360 u64 start, u64 parent_transid) 360 u64 start, u64 parent_transid)
361{ 361{
362 struct extent_io_tree *io_tree; 362 struct extent_io_tree *io_tree;
363 int failed = 0;
363 int ret; 364 int ret;
364 int num_copies = 0; 365 int num_copies = 0;
365 int mirror_num = 0; 366 int mirror_num = 0;
367 int failed_mirror = 0;
366 368
367 clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags); 369 clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
368 io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; 370 io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree;
@@ -370,9 +372,8 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,
370 ret = read_extent_buffer_pages(io_tree, eb, start, 372 ret = read_extent_buffer_pages(io_tree, eb, start,
371 WAIT_COMPLETE, 373 WAIT_COMPLETE,
372 btree_get_extent, mirror_num); 374 btree_get_extent, mirror_num);
373 if (!ret && 375 if (!ret && !verify_parent_transid(io_tree, eb, parent_transid))
374 !verify_parent_transid(io_tree, eb, parent_transid)) 376 break;
375 return ret;
376 377
377 /* 378 /*
378 * This buffer's crc is fine, but its contents are corrupted, so 379 * This buffer's crc is fine, but its contents are corrupted, so
@@ -380,18 +381,31 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,
380 * any less wrong. 381 * any less wrong.
381 */ 382 */
382 if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags)) 383 if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags))
383 return ret; 384 break;
385
386 if (!failed_mirror) {
387 failed = 1;
388 printk(KERN_ERR "failed mirror was %d\n", eb->failed_mirror);
389 failed_mirror = eb->failed_mirror;
390 }
384 391
385 num_copies = btrfs_num_copies(&root->fs_info->mapping_tree, 392 num_copies = btrfs_num_copies(&root->fs_info->mapping_tree,
386 eb->start, eb->len); 393 eb->start, eb->len);
387 if (num_copies == 1) 394 if (num_copies == 1)
388 return ret; 395 break;
389 396
390 mirror_num++; 397 mirror_num++;
398 if (mirror_num == failed_mirror)
399 mirror_num++;
400
391 if (mirror_num > num_copies) 401 if (mirror_num > num_copies)
392 return ret; 402 break;
393 } 403 }
394 return -EIO; 404
405 if (failed && !ret)
406 repair_eb_io_failure(root, eb, failed_mirror);
407
408 return ret;
395} 409}
396 410
397/* 411/*
@@ -404,50 +418,27 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
404 struct extent_io_tree *tree; 418 struct extent_io_tree *tree;
405 u64 start = (u64)page->index << PAGE_CACHE_SHIFT; 419 u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
406 u64 found_start; 420 u64 found_start;
407 unsigned long len;
408 struct extent_buffer *eb; 421 struct extent_buffer *eb;
409 int ret;
410 422
411 tree = &BTRFS_I(page->mapping->host)->io_tree; 423 tree = &BTRFS_I(page->mapping->host)->io_tree;
412 424
413 if (page->private == EXTENT_PAGE_PRIVATE) { 425 eb = (struct extent_buffer *)page->private;
414 WARN_ON(1); 426 if (page != eb->pages[0])
415 goto out; 427 return 0;
416 }
417 if (!page->private) {
418 WARN_ON(1);
419 goto out;
420 }
421 len = page->private >> 2;
422 WARN_ON(len == 0);
423
424 eb = alloc_extent_buffer(tree, start, len, page);
425 if (eb == NULL) {
426 WARN_ON(1);
427 goto out;
428 }
429 ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE,
430 btrfs_header_generation(eb));
431 BUG_ON(ret);
432 WARN_ON(!btrfs_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN));
433
434 found_start = btrfs_header_bytenr(eb); 428 found_start = btrfs_header_bytenr(eb);
435 if (found_start != start) { 429 if (found_start != start) {
436 WARN_ON(1); 430 WARN_ON(1);
437 goto err; 431 return 0;
438 } 432 }
439 if (eb->first_page != page) { 433 if (eb->pages[0] != page) {
440 WARN_ON(1); 434 WARN_ON(1);
441 goto err; 435 return 0;
442 } 436 }
443 if (!PageUptodate(page)) { 437 if (!PageUptodate(page)) {
444 WARN_ON(1); 438 WARN_ON(1);
445 goto err; 439 return 0;
446 } 440 }
447 csum_tree_block(root, eb, 0); 441 csum_tree_block(root, eb, 0);
448err:
449 free_extent_buffer(eb);
450out:
451 return 0; 442 return 0;
452} 443}
453 444
@@ -537,34 +528,74 @@ static noinline int check_leaf(struct btrfs_root *root,
537 return 0; 528 return 0;
538} 529}
539 530
531struct extent_buffer *find_eb_for_page(struct extent_io_tree *tree,
532 struct page *page, int max_walk)
533{
534 struct extent_buffer *eb;
535 u64 start = page_offset(page);
536 u64 target = start;
537 u64 min_start;
538
539 if (start < max_walk)
540 min_start = 0;
541 else
542 min_start = start - max_walk;
543
544 while (start >= min_start) {
545 eb = find_extent_buffer(tree, start, 0);
546 if (eb) {
547 /*
548 * we found an extent buffer and it contains our page
549 * horray!
550 */
551 if (eb->start <= target &&
552 eb->start + eb->len > target)
553 return eb;
554
555 /* we found an extent buffer that wasn't for us */
556 free_extent_buffer(eb);
557 return NULL;
558 }
559 if (start == 0)
560 break;
561 start -= PAGE_CACHE_SIZE;
562 }
563 return NULL;
564}
565
540static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, 566static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
541 struct extent_state *state) 567 struct extent_state *state)
542{ 568{
543 struct extent_io_tree *tree; 569 struct extent_io_tree *tree;
544 u64 found_start; 570 u64 found_start;
545 int found_level; 571 int found_level;
546 unsigned long len;
547 struct extent_buffer *eb; 572 struct extent_buffer *eb;
548 struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; 573 struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
549 int ret = 0; 574 int ret = 0;
575 int reads_done;
550 576
551 tree = &BTRFS_I(page->mapping->host)->io_tree;
552 if (page->private == EXTENT_PAGE_PRIVATE)
553 goto out;
554 if (!page->private) 577 if (!page->private)
555 goto out; 578 goto out;
556 579
557 len = page->private >> 2; 580 tree = &BTRFS_I(page->mapping->host)->io_tree;
558 WARN_ON(len == 0); 581 eb = (struct extent_buffer *)page->private;
559 582
560 eb = alloc_extent_buffer(tree, start, len, page); 583 /* the pending IO might have been the only thing that kept this buffer
561 if (eb == NULL) { 584 * in memory. Make sure we have a ref for all this other checks
585 */
586 extent_buffer_get(eb);
587
588 reads_done = atomic_dec_and_test(&eb->io_pages);
589 if (!reads_done)
590 goto err;
591
592 if (test_bit(EXTENT_BUFFER_IOERR, &eb->bflags)) {
562 ret = -EIO; 593 ret = -EIO;
563 goto out; 594 goto err;
564 } 595 }
565 596
566 found_start = btrfs_header_bytenr(eb); 597 found_start = btrfs_header_bytenr(eb);
567 if (found_start != start) { 598 if (found_start != eb->start) {
568 printk_ratelimited(KERN_INFO "btrfs bad tree block start " 599 printk_ratelimited(KERN_INFO "btrfs bad tree block start "
569 "%llu %llu\n", 600 "%llu %llu\n",
570 (unsigned long long)found_start, 601 (unsigned long long)found_start,
@@ -572,13 +603,6 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
572 ret = -EIO; 603 ret = -EIO;
573 goto err; 604 goto err;
574 } 605 }
575 if (eb->first_page != page) {
576 printk(KERN_INFO "btrfs bad first page %lu %lu\n",
577 eb->first_page->index, page->index);
578 WARN_ON(1);
579 ret = -EIO;
580 goto err;
581 }
582 if (check_tree_block_fsid(root, eb)) { 606 if (check_tree_block_fsid(root, eb)) {
583 printk_ratelimited(KERN_INFO "btrfs bad fsid on block %llu\n", 607 printk_ratelimited(KERN_INFO "btrfs bad fsid on block %llu\n",
584 (unsigned long long)eb->start); 608 (unsigned long long)eb->start);
@@ -606,48 +630,31 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
606 ret = -EIO; 630 ret = -EIO;
607 } 631 }
608 632
609 end = min_t(u64, eb->len, PAGE_CACHE_SIZE); 633 if (!ret)
610 end = eb->start + end - 1; 634 set_extent_buffer_uptodate(eb);
611err: 635err:
612 if (test_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) { 636 if (test_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) {
613 clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags); 637 clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags);
614 btree_readahead_hook(root, eb, eb->start, ret); 638 btree_readahead_hook(root, eb, eb->start, ret);
615 } 639 }
616 640
641 if (ret)
642 clear_extent_buffer_uptodate(eb);
617 free_extent_buffer(eb); 643 free_extent_buffer(eb);
618out: 644out:
619 return ret; 645 return ret;
620} 646}
621 647
622static int btree_io_failed_hook(struct bio *failed_bio, 648static int btree_io_failed_hook(struct page *page, int failed_mirror)
623 struct page *page, u64 start, u64 end,
624 int mirror_num, struct extent_state *state)
625{ 649{
626 struct extent_io_tree *tree;
627 unsigned long len;
628 struct extent_buffer *eb; 650 struct extent_buffer *eb;
629 struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; 651 struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
630 652
631 tree = &BTRFS_I(page->mapping->host)->io_tree; 653 eb = (struct extent_buffer *)page->private;
632 if (page->private == EXTENT_PAGE_PRIVATE) 654 set_bit(EXTENT_BUFFER_IOERR, &eb->bflags);
633 goto out; 655 eb->failed_mirror = failed_mirror;
634 if (!page->private) 656 if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags))
635 goto out;
636
637 len = page->private >> 2;
638 WARN_ON(len == 0);
639
640 eb = alloc_extent_buffer(tree, start, len, page);
641 if (eb == NULL)
642 goto out;
643
644 if (test_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) {
645 clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags);
646 btree_readahead_hook(root, eb, eb->start, -EIO); 657 btree_readahead_hook(root, eb, eb->start, -EIO);
647 }
648 free_extent_buffer(eb);
649
650out:
651 return -EIO; /* we fixed nothing */ 658 return -EIO; /* we fixed nothing */
652} 659}
653 660
@@ -719,11 +726,14 @@ unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info)
719static void run_one_async_start(struct btrfs_work *work) 726static void run_one_async_start(struct btrfs_work *work)
720{ 727{
721 struct async_submit_bio *async; 728 struct async_submit_bio *async;
729 int ret;
722 730
723 async = container_of(work, struct async_submit_bio, work); 731 async = container_of(work, struct async_submit_bio, work);
724 async->submit_bio_start(async->inode, async->rw, async->bio, 732 ret = async->submit_bio_start(async->inode, async->rw, async->bio,
725 async->mirror_num, async->bio_flags, 733 async->mirror_num, async->bio_flags,
726 async->bio_offset); 734 async->bio_offset);
735 if (ret)
736 async->error = ret;
727} 737}
728 738
729static void run_one_async_done(struct btrfs_work *work) 739static void run_one_async_done(struct btrfs_work *work)
@@ -744,6 +754,12 @@ static void run_one_async_done(struct btrfs_work *work)
744 waitqueue_active(&fs_info->async_submit_wait)) 754 waitqueue_active(&fs_info->async_submit_wait))
745 wake_up(&fs_info->async_submit_wait); 755 wake_up(&fs_info->async_submit_wait);
746 756
757 /* If an error occured we just want to clean up the bio and move on */
758 if (async->error) {
759 bio_endio(async->bio, async->error);
760 return;
761 }
762
747 async->submit_bio_done(async->inode, async->rw, async->bio, 763 async->submit_bio_done(async->inode, async->rw, async->bio,
748 async->mirror_num, async->bio_flags, 764 async->mirror_num, async->bio_flags,
749 async->bio_offset); 765 async->bio_offset);
@@ -785,6 +801,8 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
785 async->bio_flags = bio_flags; 801 async->bio_flags = bio_flags;
786 async->bio_offset = bio_offset; 802 async->bio_offset = bio_offset;
787 803
804 async->error = 0;
805
788 atomic_inc(&fs_info->nr_async_submits); 806 atomic_inc(&fs_info->nr_async_submits);
789 807
790 if (rw & REQ_SYNC) 808 if (rw & REQ_SYNC)
@@ -806,15 +824,18 @@ static int btree_csum_one_bio(struct bio *bio)
806 struct bio_vec *bvec = bio->bi_io_vec; 824 struct bio_vec *bvec = bio->bi_io_vec;
807 int bio_index = 0; 825 int bio_index = 0;
808 struct btrfs_root *root; 826 struct btrfs_root *root;
827 int ret = 0;
809 828
810 WARN_ON(bio->bi_vcnt <= 0); 829 WARN_ON(bio->bi_vcnt <= 0);
811 while (bio_index < bio->bi_vcnt) { 830 while (bio_index < bio->bi_vcnt) {
812 root = BTRFS_I(bvec->bv_page->mapping->host)->root; 831 root = BTRFS_I(bvec->bv_page->mapping->host)->root;
813 csum_dirty_buffer(root, bvec->bv_page); 832 ret = csum_dirty_buffer(root, bvec->bv_page);
833 if (ret)
834 break;
814 bio_index++; 835 bio_index++;
815 bvec++; 836 bvec++;
816 } 837 }
817 return 0; 838 return ret;
818} 839}
819 840
820static int __btree_submit_bio_start(struct inode *inode, int rw, 841static int __btree_submit_bio_start(struct inode *inode, int rw,
@@ -826,8 +847,7 @@ static int __btree_submit_bio_start(struct inode *inode, int rw,
826 * when we're called for a write, we're already in the async 847 * when we're called for a write, we're already in the async
827 * submission context. Just jump into btrfs_map_bio 848 * submission context. Just jump into btrfs_map_bio
828 */ 849 */
829 btree_csum_one_bio(bio); 850 return btree_csum_one_bio(bio);
830 return 0;
831} 851}
832 852
833static int __btree_submit_bio_done(struct inode *inode, int rw, struct bio *bio, 853static int __btree_submit_bio_done(struct inode *inode, int rw, struct bio *bio,
@@ -847,15 +867,16 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
847{ 867{
848 int ret; 868 int ret;
849 869
850 ret = btrfs_bio_wq_end_io(BTRFS_I(inode)->root->fs_info,
851 bio, 1);
852 BUG_ON(ret);
853
854 if (!(rw & REQ_WRITE)) { 870 if (!(rw & REQ_WRITE)) {
871
855 /* 872 /*
856 * called for a read, do the setup so that checksum validation 873 * called for a read, do the setup so that checksum validation
857 * can happen in the async kernel threads 874 * can happen in the async kernel threads
858 */ 875 */
876 ret = btrfs_bio_wq_end_io(BTRFS_I(inode)->root->fs_info,
877 bio, 1);
878 if (ret)
879 return ret;
859 return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, 880 return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio,
860 mirror_num, 0); 881 mirror_num, 0);
861 } 882 }
@@ -893,34 +914,6 @@ static int btree_migratepage(struct address_space *mapping,
893} 914}
894#endif 915#endif
895 916
896static int btree_writepage(struct page *page, struct writeback_control *wbc)
897{
898 struct extent_io_tree *tree;
899 struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
900 struct extent_buffer *eb;
901 int was_dirty;
902
903 tree = &BTRFS_I(page->mapping->host)->io_tree;
904 if (!(current->flags & PF_MEMALLOC)) {
905 return extent_write_full_page(tree, page,
906 btree_get_extent, wbc);
907 }
908
909 redirty_page_for_writepage(wbc, page);
910 eb = btrfs_find_tree_block(root, page_offset(page), PAGE_CACHE_SIZE);
911 WARN_ON(!eb);
912
913 was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags);
914 if (!was_dirty) {
915 spin_lock(&root->fs_info->delalloc_lock);
916 root->fs_info->dirty_metadata_bytes += PAGE_CACHE_SIZE;
917 spin_unlock(&root->fs_info->delalloc_lock);
918 }
919 free_extent_buffer(eb);
920
921 unlock_page(page);
922 return 0;
923}
924 917
925static int btree_writepages(struct address_space *mapping, 918static int btree_writepages(struct address_space *mapping,
926 struct writeback_control *wbc) 919 struct writeback_control *wbc)
@@ -940,7 +933,7 @@ static int btree_writepages(struct address_space *mapping,
940 if (num_dirty < thresh) 933 if (num_dirty < thresh)
941 return 0; 934 return 0;
942 } 935 }
943 return extent_writepages(tree, mapping, btree_get_extent, wbc); 936 return btree_write_cache_pages(mapping, wbc);
944} 937}
945 938
946static int btree_readpage(struct file *file, struct page *page) 939static int btree_readpage(struct file *file, struct page *page)
@@ -952,16 +945,8 @@ static int btree_readpage(struct file *file, struct page *page)
952 945
953static int btree_releasepage(struct page *page, gfp_t gfp_flags) 946static int btree_releasepage(struct page *page, gfp_t gfp_flags)
954{ 947{
955 struct extent_io_tree *tree;
956 struct extent_map_tree *map;
957 int ret;
958
959 if (PageWriteback(page) || PageDirty(page)) 948 if (PageWriteback(page) || PageDirty(page))
960 return 0; 949 return 0;
961
962 tree = &BTRFS_I(page->mapping->host)->io_tree;
963 map = &BTRFS_I(page->mapping->host)->extent_tree;
964
965 /* 950 /*
966 * We need to mask out eg. __GFP_HIGHMEM and __GFP_DMA32 as we're doing 951 * We need to mask out eg. __GFP_HIGHMEM and __GFP_DMA32 as we're doing
967 * slab allocation from alloc_extent_state down the callchain where 952 * slab allocation from alloc_extent_state down the callchain where
@@ -969,18 +954,7 @@ static int btree_releasepage(struct page *page, gfp_t gfp_flags)
969 */ 954 */
970 gfp_flags &= ~GFP_SLAB_BUG_MASK; 955 gfp_flags &= ~GFP_SLAB_BUG_MASK;
971 956
972 ret = try_release_extent_state(map, tree, page, gfp_flags); 957 return try_release_extent_buffer(page, gfp_flags);
973 if (!ret)
974 return 0;
975
976 ret = try_release_extent_buffer(tree, page);
977 if (ret == 1) {
978 ClearPagePrivate(page);
979 set_page_private(page, 0);
980 page_cache_release(page);
981 }
982
983 return ret;
984} 958}
985 959
986static void btree_invalidatepage(struct page *page, unsigned long offset) 960static void btree_invalidatepage(struct page *page, unsigned long offset)
@@ -998,15 +972,28 @@ static void btree_invalidatepage(struct page *page, unsigned long offset)
998 } 972 }
999} 973}
1000 974
975static int btree_set_page_dirty(struct page *page)
976{
977 struct extent_buffer *eb;
978
979 BUG_ON(!PagePrivate(page));
980 eb = (struct extent_buffer *)page->private;
981 BUG_ON(!eb);
982 BUG_ON(!test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
983 BUG_ON(!atomic_read(&eb->refs));
984 btrfs_assert_tree_locked(eb);
985 return __set_page_dirty_nobuffers(page);
986}
987
1001static const struct address_space_operations btree_aops = { 988static const struct address_space_operations btree_aops = {
1002 .readpage = btree_readpage, 989 .readpage = btree_readpage,
1003 .writepage = btree_writepage,
1004 .writepages = btree_writepages, 990 .writepages = btree_writepages,
1005 .releasepage = btree_releasepage, 991 .releasepage = btree_releasepage,
1006 .invalidatepage = btree_invalidatepage, 992 .invalidatepage = btree_invalidatepage,
1007#ifdef CONFIG_MIGRATION 993#ifdef CONFIG_MIGRATION
1008 .migratepage = btree_migratepage, 994 .migratepage = btree_migratepage,
1009#endif 995#endif
996 .set_page_dirty = btree_set_page_dirty,
1010}; 997};
1011 998
1012int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize, 999int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize,
@@ -1049,7 +1036,7 @@ int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize,
1049 if (test_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags)) { 1036 if (test_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags)) {
1050 free_extent_buffer(buf); 1037 free_extent_buffer(buf);
1051 return -EIO; 1038 return -EIO;
1052 } else if (extent_buffer_uptodate(io_tree, buf, NULL)) { 1039 } else if (extent_buffer_uptodate(buf)) {
1053 *eb = buf; 1040 *eb = buf;
1054 } else { 1041 } else {
1055 free_extent_buffer(buf); 1042 free_extent_buffer(buf);
@@ -1074,20 +1061,20 @@ struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
1074 struct extent_buffer *eb; 1061 struct extent_buffer *eb;
1075 1062
1076 eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->io_tree, 1063 eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->io_tree,
1077 bytenr, blocksize, NULL); 1064 bytenr, blocksize);
1078 return eb; 1065 return eb;
1079} 1066}
1080 1067
1081 1068
1082int btrfs_write_tree_block(struct extent_buffer *buf) 1069int btrfs_write_tree_block(struct extent_buffer *buf)
1083{ 1070{
1084 return filemap_fdatawrite_range(buf->first_page->mapping, buf->start, 1071 return filemap_fdatawrite_range(buf->pages[0]->mapping, buf->start,
1085 buf->start + buf->len - 1); 1072 buf->start + buf->len - 1);
1086} 1073}
1087 1074
1088int btrfs_wait_tree_block_writeback(struct extent_buffer *buf) 1075int btrfs_wait_tree_block_writeback(struct extent_buffer *buf)
1089{ 1076{
1090 return filemap_fdatawait_range(buf->first_page->mapping, 1077 return filemap_fdatawait_range(buf->pages[0]->mapping,
1091 buf->start, buf->start + buf->len - 1); 1078 buf->start, buf->start + buf->len - 1);
1092} 1079}
1093 1080
@@ -1102,17 +1089,13 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
1102 return NULL; 1089 return NULL;
1103 1090
1104 ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid); 1091 ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid);
1105
1106 if (ret == 0)
1107 set_bit(EXTENT_BUFFER_UPTODATE, &buf->bflags);
1108 return buf; 1092 return buf;
1109 1093
1110} 1094}
1111 1095
1112int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, 1096void clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1113 struct extent_buffer *buf) 1097 struct extent_buffer *buf)
1114{ 1098{
1115 struct inode *btree_inode = root->fs_info->btree_inode;
1116 if (btrfs_header_generation(buf) == 1099 if (btrfs_header_generation(buf) ==
1117 root->fs_info->running_transaction->transid) { 1100 root->fs_info->running_transaction->transid) {
1118 btrfs_assert_tree_locked(buf); 1101 btrfs_assert_tree_locked(buf);
@@ -1121,23 +1104,27 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1121 spin_lock(&root->fs_info->delalloc_lock); 1104 spin_lock(&root->fs_info->delalloc_lock);
1122 if (root->fs_info->dirty_metadata_bytes >= buf->len) 1105 if (root->fs_info->dirty_metadata_bytes >= buf->len)
1123 root->fs_info->dirty_metadata_bytes -= buf->len; 1106 root->fs_info->dirty_metadata_bytes -= buf->len;
1124 else 1107 else {
1125 WARN_ON(1); 1108 spin_unlock(&root->fs_info->delalloc_lock);
1109 btrfs_panic(root->fs_info, -EOVERFLOW,
1110 "Can't clear %lu bytes from "
1111 " dirty_mdatadata_bytes (%lu)",
1112 buf->len,
1113 root->fs_info->dirty_metadata_bytes);
1114 }
1126 spin_unlock(&root->fs_info->delalloc_lock); 1115 spin_unlock(&root->fs_info->delalloc_lock);
1127 } 1116 }
1128 1117
1129 /* ugh, clear_extent_buffer_dirty needs to lock the page */ 1118 /* ugh, clear_extent_buffer_dirty needs to lock the page */
1130 btrfs_set_lock_blocking(buf); 1119 btrfs_set_lock_blocking(buf);
1131 clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, 1120 clear_extent_buffer_dirty(buf);
1132 buf);
1133 } 1121 }
1134 return 0;
1135} 1122}
1136 1123
1137static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, 1124static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
1138 u32 stripesize, struct btrfs_root *root, 1125 u32 stripesize, struct btrfs_root *root,
1139 struct btrfs_fs_info *fs_info, 1126 struct btrfs_fs_info *fs_info,
1140 u64 objectid) 1127 u64 objectid)
1141{ 1128{
1142 root->node = NULL; 1129 root->node = NULL;
1143 root->commit_root = NULL; 1130 root->commit_root = NULL;
@@ -1189,13 +1176,12 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
1189 root->defrag_running = 0; 1176 root->defrag_running = 0;
1190 root->root_key.objectid = objectid; 1177 root->root_key.objectid = objectid;
1191 root->anon_dev = 0; 1178 root->anon_dev = 0;
1192 return 0;
1193} 1179}
1194 1180
1195static int find_and_setup_root(struct btrfs_root *tree_root, 1181static int __must_check find_and_setup_root(struct btrfs_root *tree_root,
1196 struct btrfs_fs_info *fs_info, 1182 struct btrfs_fs_info *fs_info,
1197 u64 objectid, 1183 u64 objectid,
1198 struct btrfs_root *root) 1184 struct btrfs_root *root)
1199{ 1185{
1200 int ret; 1186 int ret;
1201 u32 blocksize; 1187 u32 blocksize;
@@ -1208,7 +1194,8 @@ static int find_and_setup_root(struct btrfs_root *tree_root,
1208 &root->root_item, &root->root_key); 1194 &root->root_item, &root->root_key);
1209 if (ret > 0) 1195 if (ret > 0)
1210 return -ENOENT; 1196 return -ENOENT;
1211 BUG_ON(ret); 1197 else if (ret < 0)
1198 return ret;
1212 1199
1213 generation = btrfs_root_generation(&root->root_item); 1200 generation = btrfs_root_generation(&root->root_item);
1214 blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); 1201 blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
@@ -1377,7 +1364,7 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
1377 root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), 1364 root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
1378 blocksize, generation); 1365 blocksize, generation);
1379 root->commit_root = btrfs_root_node(root); 1366 root->commit_root = btrfs_root_node(root);
1380 BUG_ON(!root->node); 1367 BUG_ON(!root->node); /* -ENOMEM */
1381out: 1368out:
1382 if (location->objectid != BTRFS_TREE_LOG_OBJECTID) { 1369 if (location->objectid != BTRFS_TREE_LOG_OBJECTID) {
1383 root->ref_cows = 1; 1370 root->ref_cows = 1;
@@ -1513,41 +1500,6 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi)
1513 return 0; 1500 return 0;
1514} 1501}
1515 1502
1516static int bio_ready_for_csum(struct bio *bio)
1517{
1518 u64 length = 0;
1519 u64 buf_len = 0;
1520 u64 start = 0;
1521 struct page *page;
1522 struct extent_io_tree *io_tree = NULL;
1523 struct bio_vec *bvec;
1524 int i;
1525 int ret;
1526
1527 bio_for_each_segment(bvec, bio, i) {
1528 page = bvec->bv_page;
1529 if (page->private == EXTENT_PAGE_PRIVATE) {
1530 length += bvec->bv_len;
1531 continue;
1532 }
1533 if (!page->private) {
1534 length += bvec->bv_len;
1535 continue;
1536 }
1537 length = bvec->bv_len;
1538 buf_len = page->private >> 2;
1539 start = page_offset(page) + bvec->bv_offset;
1540 io_tree = &BTRFS_I(page->mapping->host)->io_tree;
1541 }
1542 /* are we fully contained in this bio? */
1543 if (buf_len <= length)
1544 return 1;
1545
1546 ret = extent_range_uptodate(io_tree, start + length,
1547 start + buf_len - 1);
1548 return ret;
1549}
1550
1551/* 1503/*
1552 * called by the kthread helper functions to finally call the bio end_io 1504 * called by the kthread helper functions to finally call the bio end_io
1553 * functions. This is where read checksum verification actually happens 1505 * functions. This is where read checksum verification actually happens
@@ -1563,17 +1515,6 @@ static void end_workqueue_fn(struct btrfs_work *work)
1563 bio = end_io_wq->bio; 1515 bio = end_io_wq->bio;
1564 fs_info = end_io_wq->info; 1516 fs_info = end_io_wq->info;
1565 1517
1566 /* metadata bio reads are special because the whole tree block must
1567 * be checksummed at once. This makes sure the entire block is in
1568 * ram and up to date before trying to verify things. For
1569 * blocksize <= pagesize, it is basically a noop
1570 */
1571 if (!(bio->bi_rw & REQ_WRITE) && end_io_wq->metadata &&
1572 !bio_ready_for_csum(bio)) {
1573 btrfs_queue_worker(&fs_info->endio_meta_workers,
1574 &end_io_wq->work);
1575 return;
1576 }
1577 error = end_io_wq->error; 1518 error = end_io_wq->error;
1578 bio->bi_private = end_io_wq->private; 1519 bio->bi_private = end_io_wq->private;
1579 bio->bi_end_io = end_io_wq->end_io; 1520 bio->bi_end_io = end_io_wq->end_io;
@@ -1614,9 +1555,10 @@ static int transaction_kthread(void *arg)
1614 u64 transid; 1555 u64 transid;
1615 unsigned long now; 1556 unsigned long now;
1616 unsigned long delay; 1557 unsigned long delay;
1617 int ret; 1558 bool cannot_commit;
1618 1559
1619 do { 1560 do {
1561 cannot_commit = false;
1620 delay = HZ * 30; 1562 delay = HZ * 30;
1621 vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); 1563 vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE);
1622 mutex_lock(&root->fs_info->transaction_kthread_mutex); 1564 mutex_lock(&root->fs_info->transaction_kthread_mutex);
@@ -1638,11 +1580,14 @@ static int transaction_kthread(void *arg)
1638 transid = cur->transid; 1580 transid = cur->transid;
1639 spin_unlock(&root->fs_info->trans_lock); 1581 spin_unlock(&root->fs_info->trans_lock);
1640 1582
1583 /* If the file system is aborted, this will always fail. */
1641 trans = btrfs_join_transaction(root); 1584 trans = btrfs_join_transaction(root);
1642 BUG_ON(IS_ERR(trans)); 1585 if (IS_ERR(trans)) {
1586 cannot_commit = true;
1587 goto sleep;
1588 }
1643 if (transid == trans->transid) { 1589 if (transid == trans->transid) {
1644 ret = btrfs_commit_transaction(trans, root); 1590 btrfs_commit_transaction(trans, root);
1645 BUG_ON(ret);
1646 } else { 1591 } else {
1647 btrfs_end_transaction(trans, root); 1592 btrfs_end_transaction(trans, root);
1648 } 1593 }
@@ -1653,7 +1598,8 @@ sleep:
1653 if (!try_to_freeze()) { 1598 if (!try_to_freeze()) {
1654 set_current_state(TASK_INTERRUPTIBLE); 1599 set_current_state(TASK_INTERRUPTIBLE);
1655 if (!kthread_should_stop() && 1600 if (!kthread_should_stop() &&
1656 !btrfs_transaction_blocked(root->fs_info)) 1601 (!btrfs_transaction_blocked(root->fs_info) ||
1602 cannot_commit))
1657 schedule_timeout(delay); 1603 schedule_timeout(delay);
1658 __set_current_state(TASK_RUNNING); 1604 __set_current_state(TASK_RUNNING);
1659 } 1605 }
@@ -2042,6 +1988,7 @@ int open_ctree(struct super_block *sb,
2042 RB_CLEAR_NODE(&BTRFS_I(fs_info->btree_inode)->rb_node); 1988 RB_CLEAR_NODE(&BTRFS_I(fs_info->btree_inode)->rb_node);
2043 extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree, 1989 extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree,
2044 fs_info->btree_inode->i_mapping); 1990 fs_info->btree_inode->i_mapping);
1991 BTRFS_I(fs_info->btree_inode)->io_tree.track_uptodate = 0;
2045 extent_map_tree_init(&BTRFS_I(fs_info->btree_inode)->extent_tree); 1992 extent_map_tree_init(&BTRFS_I(fs_info->btree_inode)->extent_tree);
2046 1993
2047 BTRFS_I(fs_info->btree_inode)->io_tree.ops = &btree_extent_io_ops; 1994 BTRFS_I(fs_info->btree_inode)->io_tree.ops = &btree_extent_io_ops;
@@ -2084,6 +2031,7 @@ int open_ctree(struct super_block *sb,
2084 __setup_root(4096, 4096, 4096, 4096, tree_root, 2031 __setup_root(4096, 4096, 4096, 4096, tree_root,
2085 fs_info, BTRFS_ROOT_TREE_OBJECTID); 2032 fs_info, BTRFS_ROOT_TREE_OBJECTID);
2086 2033
2034 invalidate_bdev(fs_devices->latest_bdev);
2087 bh = btrfs_read_dev_super(fs_devices->latest_bdev); 2035 bh = btrfs_read_dev_super(fs_devices->latest_bdev);
2088 if (!bh) { 2036 if (!bh) {
2089 err = -EINVAL; 2037 err = -EINVAL;
@@ -2104,7 +2052,12 @@ int open_ctree(struct super_block *sb,
2104 /* check FS state, whether FS is broken. */ 2052 /* check FS state, whether FS is broken. */
2105 fs_info->fs_state |= btrfs_super_flags(disk_super); 2053 fs_info->fs_state |= btrfs_super_flags(disk_super);
2106 2054
2107 btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY); 2055 ret = btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY);
2056 if (ret) {
2057 printk(KERN_ERR "btrfs: superblock contains fatal errors\n");
2058 err = ret;
2059 goto fail_alloc;
2060 }
2108 2061
2109 /* 2062 /*
2110 * run through our array of backup supers and setup 2063 * run through our array of backup supers and setup
@@ -2135,10 +2088,55 @@ int open_ctree(struct super_block *sb,
2135 goto fail_alloc; 2088 goto fail_alloc;
2136 } 2089 }
2137 2090
2091 if (btrfs_super_leafsize(disk_super) !=
2092 btrfs_super_nodesize(disk_super)) {
2093 printk(KERN_ERR "BTRFS: couldn't mount because metadata "
2094 "blocksizes don't match. node %d leaf %d\n",
2095 btrfs_super_nodesize(disk_super),
2096 btrfs_super_leafsize(disk_super));
2097 err = -EINVAL;
2098 goto fail_alloc;
2099 }
2100 if (btrfs_super_leafsize(disk_super) > BTRFS_MAX_METADATA_BLOCKSIZE) {
2101 printk(KERN_ERR "BTRFS: couldn't mount because metadata "
2102 "blocksize (%d) was too large\n",
2103 btrfs_super_leafsize(disk_super));
2104 err = -EINVAL;
2105 goto fail_alloc;
2106 }
2107
2138 features = btrfs_super_incompat_flags(disk_super); 2108 features = btrfs_super_incompat_flags(disk_super);
2139 features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF; 2109 features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF;
2140 if (tree_root->fs_info->compress_type & BTRFS_COMPRESS_LZO) 2110 if (tree_root->fs_info->compress_type & BTRFS_COMPRESS_LZO)
2141 features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; 2111 features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO;
2112
2113 /*
2114 * flag our filesystem as having big metadata blocks if
2115 * they are bigger than the page size
2116 */
2117 if (btrfs_super_leafsize(disk_super) > PAGE_CACHE_SIZE) {
2118 if (!(features & BTRFS_FEATURE_INCOMPAT_BIG_METADATA))
2119 printk(KERN_INFO "btrfs flagging fs with big metadata feature\n");
2120 features |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA;
2121 }
2122
2123 nodesize = btrfs_super_nodesize(disk_super);
2124 leafsize = btrfs_super_leafsize(disk_super);
2125 sectorsize = btrfs_super_sectorsize(disk_super);
2126 stripesize = btrfs_super_stripesize(disk_super);
2127
2128 /*
2129 * mixed block groups end up with duplicate but slightly offset
2130 * extent buffers for the same range. It leads to corruptions
2131 */
2132 if ((features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) &&
2133 (sectorsize != leafsize)) {
2134 printk(KERN_WARNING "btrfs: unequal leaf/node/sector sizes "
2135 "are not allowed for mixed block groups on %s\n",
2136 sb->s_id);
2137 goto fail_alloc;
2138 }
2139
2142 btrfs_set_super_incompat_flags(disk_super, features); 2140 btrfs_set_super_incompat_flags(disk_super, features);
2143 2141
2144 features = btrfs_super_compat_ro_flags(disk_super) & 2142 features = btrfs_super_compat_ro_flags(disk_super) &
@@ -2242,10 +2240,6 @@ int open_ctree(struct super_block *sb,
2242 fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, 2240 fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages,
2243 4 * 1024 * 1024 / PAGE_CACHE_SIZE); 2241 4 * 1024 * 1024 / PAGE_CACHE_SIZE);
2244 2242
2245 nodesize = btrfs_super_nodesize(disk_super);
2246 leafsize = btrfs_super_leafsize(disk_super);
2247 sectorsize = btrfs_super_sectorsize(disk_super);
2248 stripesize = btrfs_super_stripesize(disk_super);
2249 tree_root->nodesize = nodesize; 2243 tree_root->nodesize = nodesize;
2250 tree_root->leafsize = leafsize; 2244 tree_root->leafsize = leafsize;
2251 tree_root->sectorsize = sectorsize; 2245 tree_root->sectorsize = sectorsize;
@@ -2285,7 +2279,7 @@ int open_ctree(struct super_block *sb,
2285 chunk_root->node = read_tree_block(chunk_root, 2279 chunk_root->node = read_tree_block(chunk_root,
2286 btrfs_super_chunk_root(disk_super), 2280 btrfs_super_chunk_root(disk_super),
2287 blocksize, generation); 2281 blocksize, generation);
2288 BUG_ON(!chunk_root->node); 2282 BUG_ON(!chunk_root->node); /* -ENOMEM */
2289 if (!test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) { 2283 if (!test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) {
2290 printk(KERN_WARNING "btrfs: failed to read chunk root on %s\n", 2284 printk(KERN_WARNING "btrfs: failed to read chunk root on %s\n",
2291 sb->s_id); 2285 sb->s_id);
@@ -2425,21 +2419,31 @@ retry_root_backup:
2425 log_tree_root->node = read_tree_block(tree_root, bytenr, 2419 log_tree_root->node = read_tree_block(tree_root, bytenr,
2426 blocksize, 2420 blocksize,
2427 generation + 1); 2421 generation + 1);
2422 /* returns with log_tree_root freed on success */
2428 ret = btrfs_recover_log_trees(log_tree_root); 2423 ret = btrfs_recover_log_trees(log_tree_root);
2429 BUG_ON(ret); 2424 if (ret) {
2425 btrfs_error(tree_root->fs_info, ret,
2426 "Failed to recover log tree");
2427 free_extent_buffer(log_tree_root->node);
2428 kfree(log_tree_root);
2429 goto fail_trans_kthread;
2430 }
2430 2431
2431 if (sb->s_flags & MS_RDONLY) { 2432 if (sb->s_flags & MS_RDONLY) {
2432 ret = btrfs_commit_super(tree_root); 2433 ret = btrfs_commit_super(tree_root);
2433 BUG_ON(ret); 2434 if (ret)
2435 goto fail_trans_kthread;
2434 } 2436 }
2435 } 2437 }
2436 2438
2437 ret = btrfs_find_orphan_roots(tree_root); 2439 ret = btrfs_find_orphan_roots(tree_root);
2438 BUG_ON(ret); 2440 if (ret)
2441 goto fail_trans_kthread;
2439 2442
2440 if (!(sb->s_flags & MS_RDONLY)) { 2443 if (!(sb->s_flags & MS_RDONLY)) {
2441 ret = btrfs_cleanup_fs_roots(fs_info); 2444 ret = btrfs_cleanup_fs_roots(fs_info);
2442 BUG_ON(ret); 2445 if (ret) {
2446 }
2443 2447
2444 ret = btrfs_recover_relocation(tree_root); 2448 ret = btrfs_recover_relocation(tree_root);
2445 if (ret < 0) { 2449 if (ret < 0) {
@@ -2859,6 +2863,8 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors)
2859 if (total_errors > max_errors) { 2863 if (total_errors > max_errors) {
2860 printk(KERN_ERR "btrfs: %d errors while writing supers\n", 2864 printk(KERN_ERR "btrfs: %d errors while writing supers\n",
2861 total_errors); 2865 total_errors);
2866
2867 /* This shouldn't happen. FUA is masked off if unsupported */
2862 BUG(); 2868 BUG();
2863 } 2869 }
2864 2870
@@ -2875,9 +2881,9 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors)
2875 } 2881 }
2876 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); 2882 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
2877 if (total_errors > max_errors) { 2883 if (total_errors > max_errors) {
2878 printk(KERN_ERR "btrfs: %d errors while writing supers\n", 2884 btrfs_error(root->fs_info, -EIO,
2879 total_errors); 2885 "%d errors while writing supers", total_errors);
2880 BUG(); 2886 return -EIO;
2881 } 2887 }
2882 return 0; 2888 return 0;
2883} 2889}
@@ -2891,7 +2897,20 @@ int write_ctree_super(struct btrfs_trans_handle *trans,
2891 return ret; 2897 return ret;
2892} 2898}
2893 2899
2894int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) 2900/* Kill all outstanding I/O */
2901void btrfs_abort_devices(struct btrfs_root *root)
2902{
2903 struct list_head *head;
2904 struct btrfs_device *dev;
2905 mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
2906 head = &root->fs_info->fs_devices->devices;
2907 list_for_each_entry_rcu(dev, head, dev_list) {
2908 blk_abort_queue(dev->bdev->bd_disk->queue);
2909 }
2910 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
2911}
2912
2913void btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
2895{ 2914{
2896 spin_lock(&fs_info->fs_roots_radix_lock); 2915 spin_lock(&fs_info->fs_roots_radix_lock);
2897 radix_tree_delete(&fs_info->fs_roots_radix, 2916 radix_tree_delete(&fs_info->fs_roots_radix,
@@ -2904,7 +2923,6 @@ int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
2904 __btrfs_remove_free_space_cache(root->free_ino_pinned); 2923 __btrfs_remove_free_space_cache(root->free_ino_pinned);
2905 __btrfs_remove_free_space_cache(root->free_ino_ctl); 2924 __btrfs_remove_free_space_cache(root->free_ino_ctl);
2906 free_fs_root(root); 2925 free_fs_root(root);
2907 return 0;
2908} 2926}
2909 2927
2910static void free_fs_root(struct btrfs_root *root) 2928static void free_fs_root(struct btrfs_root *root)
@@ -2921,7 +2939,7 @@ static void free_fs_root(struct btrfs_root *root)
2921 kfree(root); 2939 kfree(root);
2922} 2940}
2923 2941
2924static int del_fs_roots(struct btrfs_fs_info *fs_info) 2942static void del_fs_roots(struct btrfs_fs_info *fs_info)
2925{ 2943{
2926 int ret; 2944 int ret;
2927 struct btrfs_root *gang[8]; 2945 struct btrfs_root *gang[8];
@@ -2950,7 +2968,6 @@ static int del_fs_roots(struct btrfs_fs_info *fs_info)
2950 for (i = 0; i < ret; i++) 2968 for (i = 0; i < ret; i++)
2951 btrfs_free_fs_root(fs_info, gang[i]); 2969 btrfs_free_fs_root(fs_info, gang[i]);
2952 } 2970 }
2953 return 0;
2954} 2971}
2955 2972
2956int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info) 2973int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info)
@@ -2999,14 +3016,21 @@ int btrfs_commit_super(struct btrfs_root *root)
2999 if (IS_ERR(trans)) 3016 if (IS_ERR(trans))
3000 return PTR_ERR(trans); 3017 return PTR_ERR(trans);
3001 ret = btrfs_commit_transaction(trans, root); 3018 ret = btrfs_commit_transaction(trans, root);
3002 BUG_ON(ret); 3019 if (ret)
3020 return ret;
3003 /* run commit again to drop the original snapshot */ 3021 /* run commit again to drop the original snapshot */
3004 trans = btrfs_join_transaction(root); 3022 trans = btrfs_join_transaction(root);
3005 if (IS_ERR(trans)) 3023 if (IS_ERR(trans))
3006 return PTR_ERR(trans); 3024 return PTR_ERR(trans);
3007 btrfs_commit_transaction(trans, root); 3025 ret = btrfs_commit_transaction(trans, root);
3026 if (ret)
3027 return ret;
3008 ret = btrfs_write_and_wait_transaction(NULL, root); 3028 ret = btrfs_write_and_wait_transaction(NULL, root);
3009 BUG_ON(ret); 3029 if (ret) {
3030 btrfs_error(root->fs_info, ret,
3031 "Failed to sync btree inode to disk.");
3032 return ret;
3033 }
3010 3034
3011 ret = write_ctree_super(NULL, root, 0); 3035 ret = write_ctree_super(NULL, root, 0);
3012 return ret; 3036 return ret;
@@ -3122,10 +3146,9 @@ int close_ctree(struct btrfs_root *root)
3122int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid) 3146int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid)
3123{ 3147{
3124 int ret; 3148 int ret;
3125 struct inode *btree_inode = buf->first_page->mapping->host; 3149 struct inode *btree_inode = buf->pages[0]->mapping->host;
3126 3150
3127 ret = extent_buffer_uptodate(&BTRFS_I(btree_inode)->io_tree, buf, 3151 ret = extent_buffer_uptodate(buf);
3128 NULL);
3129 if (!ret) 3152 if (!ret)
3130 return ret; 3153 return ret;
3131 3154
@@ -3136,16 +3159,13 @@ int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid)
3136 3159
3137int btrfs_set_buffer_uptodate(struct extent_buffer *buf) 3160int btrfs_set_buffer_uptodate(struct extent_buffer *buf)
3138{ 3161{
3139 struct inode *btree_inode = buf->first_page->mapping->host; 3162 return set_extent_buffer_uptodate(buf);
3140 return set_extent_buffer_uptodate(&BTRFS_I(btree_inode)->io_tree,
3141 buf);
3142} 3163}
3143 3164
3144void btrfs_mark_buffer_dirty(struct extent_buffer *buf) 3165void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
3145{ 3166{
3146 struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; 3167 struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root;
3147 u64 transid = btrfs_header_generation(buf); 3168 u64 transid = btrfs_header_generation(buf);
3148 struct inode *btree_inode = root->fs_info->btree_inode;
3149 int was_dirty; 3169 int was_dirty;
3150 3170
3151 btrfs_assert_tree_locked(buf); 3171 btrfs_assert_tree_locked(buf);
@@ -3157,8 +3177,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
3157 (unsigned long long)root->fs_info->generation); 3177 (unsigned long long)root->fs_info->generation);
3158 WARN_ON(1); 3178 WARN_ON(1);
3159 } 3179 }
3160 was_dirty = set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, 3180 was_dirty = set_extent_buffer_dirty(buf);
3161 buf);
3162 if (!was_dirty) { 3181 if (!was_dirty) {
3163 spin_lock(&root->fs_info->delalloc_lock); 3182 spin_lock(&root->fs_info->delalloc_lock);
3164 root->fs_info->dirty_metadata_bytes += buf->len; 3183 root->fs_info->dirty_metadata_bytes += buf->len;
@@ -3212,12 +3231,8 @@ void __btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)
3212 3231
3213int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid) 3232int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid)
3214{ 3233{
3215 struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; 3234 struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root;
3216 int ret; 3235 return btree_read_extent_buffer_pages(root, buf, 0, parent_transid);
3217 ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid);
3218 if (ret == 0)
3219 set_bit(EXTENT_BUFFER_UPTODATE, &buf->bflags);
3220 return ret;
3221} 3236}
3222 3237
3223static int btree_lock_page_hook(struct page *page, void *data, 3238static int btree_lock_page_hook(struct page *page, void *data,
@@ -3225,17 +3240,21 @@ static int btree_lock_page_hook(struct page *page, void *data,
3225{ 3240{
3226 struct inode *inode = page->mapping->host; 3241 struct inode *inode = page->mapping->host;
3227 struct btrfs_root *root = BTRFS_I(inode)->root; 3242 struct btrfs_root *root = BTRFS_I(inode)->root;
3228 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
3229 struct extent_buffer *eb; 3243 struct extent_buffer *eb;
3230 unsigned long len;
3231 u64 bytenr = page_offset(page);
3232 3244
3233 if (page->private == EXTENT_PAGE_PRIVATE) 3245 /*
3246 * We culled this eb but the page is still hanging out on the mapping,
3247 * carry on.
3248 */
3249 if (!PagePrivate(page))
3234 goto out; 3250 goto out;
3235 3251
3236 len = page->private >> 2; 3252 eb = (struct extent_buffer *)page->private;
3237 eb = find_extent_buffer(io_tree, bytenr, len); 3253 if (!eb) {
3238 if (!eb) 3254 WARN_ON(1);
3255 goto out;
3256 }
3257 if (page != eb->pages[0])
3239 goto out; 3258 goto out;
3240 3259
3241 if (!btrfs_try_tree_write_lock(eb)) { 3260 if (!btrfs_try_tree_write_lock(eb)) {
@@ -3254,7 +3273,6 @@ static int btree_lock_page_hook(struct page *page, void *data,
3254 } 3273 }
3255 3274
3256 btrfs_tree_unlock(eb); 3275 btrfs_tree_unlock(eb);
3257 free_extent_buffer(eb);
3258out: 3276out:
3259 if (!trylock_page(page)) { 3277 if (!trylock_page(page)) {
3260 flush_fn(data); 3278 flush_fn(data);
@@ -3263,15 +3281,23 @@ out:
3263 return 0; 3281 return 0;
3264} 3282}
3265 3283
3266static void btrfs_check_super_valid(struct btrfs_fs_info *fs_info, 3284static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
3267 int read_only) 3285 int read_only)
3268{ 3286{
3287 if (btrfs_super_csum_type(fs_info->super_copy) >= ARRAY_SIZE(btrfs_csum_sizes)) {
3288 printk(KERN_ERR "btrfs: unsupported checksum algorithm\n");
3289 return -EINVAL;
3290 }
3291
3269 if (read_only) 3292 if (read_only)
3270 return; 3293 return 0;
3271 3294
3272 if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) 3295 if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
3273 printk(KERN_WARNING "warning: mount fs with errors, " 3296 printk(KERN_WARNING "warning: mount fs with errors, "
3274 "running btrfsck is recommended\n"); 3297 "running btrfsck is recommended\n");
3298 }
3299
3300 return 0;
3275} 3301}
3276 3302
3277int btrfs_error_commit_super(struct btrfs_root *root) 3303int btrfs_error_commit_super(struct btrfs_root *root)
@@ -3293,7 +3319,7 @@ int btrfs_error_commit_super(struct btrfs_root *root)
3293 return ret; 3319 return ret;
3294} 3320}
3295 3321
3296static int btrfs_destroy_ordered_operations(struct btrfs_root *root) 3322static void btrfs_destroy_ordered_operations(struct btrfs_root *root)
3297{ 3323{
3298 struct btrfs_inode *btrfs_inode; 3324 struct btrfs_inode *btrfs_inode;
3299 struct list_head splice; 3325 struct list_head splice;
@@ -3315,11 +3341,9 @@ static int btrfs_destroy_ordered_operations(struct btrfs_root *root)
3315 3341
3316 spin_unlock(&root->fs_info->ordered_extent_lock); 3342 spin_unlock(&root->fs_info->ordered_extent_lock);
3317 mutex_unlock(&root->fs_info->ordered_operations_mutex); 3343 mutex_unlock(&root->fs_info->ordered_operations_mutex);
3318
3319 return 0;
3320} 3344}
3321 3345
3322static int btrfs_destroy_ordered_extents(struct btrfs_root *root) 3346static void btrfs_destroy_ordered_extents(struct btrfs_root *root)
3323{ 3347{
3324 struct list_head splice; 3348 struct list_head splice;
3325 struct btrfs_ordered_extent *ordered; 3349 struct btrfs_ordered_extent *ordered;
@@ -3351,12 +3375,10 @@ static int btrfs_destroy_ordered_extents(struct btrfs_root *root)
3351 } 3375 }
3352 3376
3353 spin_unlock(&root->fs_info->ordered_extent_lock); 3377 spin_unlock(&root->fs_info->ordered_extent_lock);
3354
3355 return 0;
3356} 3378}
3357 3379
3358static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, 3380int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
3359 struct btrfs_root *root) 3381 struct btrfs_root *root)
3360{ 3382{
3361 struct rb_node *node; 3383 struct rb_node *node;
3362 struct btrfs_delayed_ref_root *delayed_refs; 3384 struct btrfs_delayed_ref_root *delayed_refs;
@@ -3365,6 +3387,7 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
3365 3387
3366 delayed_refs = &trans->delayed_refs; 3388 delayed_refs = &trans->delayed_refs;
3367 3389
3390again:
3368 spin_lock(&delayed_refs->lock); 3391 spin_lock(&delayed_refs->lock);
3369 if (delayed_refs->num_entries == 0) { 3392 if (delayed_refs->num_entries == 0) {
3370 spin_unlock(&delayed_refs->lock); 3393 spin_unlock(&delayed_refs->lock);
@@ -3386,6 +3409,7 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
3386 struct btrfs_delayed_ref_head *head; 3409 struct btrfs_delayed_ref_head *head;
3387 3410
3388 head = btrfs_delayed_node_to_head(ref); 3411 head = btrfs_delayed_node_to_head(ref);
3412 spin_unlock(&delayed_refs->lock);
3389 mutex_lock(&head->mutex); 3413 mutex_lock(&head->mutex);
3390 kfree(head->extent_op); 3414 kfree(head->extent_op);
3391 delayed_refs->num_heads--; 3415 delayed_refs->num_heads--;
@@ -3393,8 +3417,9 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
3393 delayed_refs->num_heads_ready--; 3417 delayed_refs->num_heads_ready--;
3394 list_del_init(&head->cluster); 3418 list_del_init(&head->cluster);
3395 mutex_unlock(&head->mutex); 3419 mutex_unlock(&head->mutex);
3420 btrfs_put_delayed_ref(ref);
3421 goto again;
3396 } 3422 }
3397
3398 spin_unlock(&delayed_refs->lock); 3423 spin_unlock(&delayed_refs->lock);
3399 btrfs_put_delayed_ref(ref); 3424 btrfs_put_delayed_ref(ref);
3400 3425
@@ -3407,7 +3432,7 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
3407 return ret; 3432 return ret;
3408} 3433}
3409 3434
3410static int btrfs_destroy_pending_snapshots(struct btrfs_transaction *t) 3435static void btrfs_destroy_pending_snapshots(struct btrfs_transaction *t)
3411{ 3436{
3412 struct btrfs_pending_snapshot *snapshot; 3437 struct btrfs_pending_snapshot *snapshot;
3413 struct list_head splice; 3438 struct list_head splice;
@@ -3425,11 +3450,9 @@ static int btrfs_destroy_pending_snapshots(struct btrfs_transaction *t)
3425 3450
3426 kfree(snapshot); 3451 kfree(snapshot);
3427 } 3452 }
3428
3429 return 0;
3430} 3453}
3431 3454
3432static int btrfs_destroy_delalloc_inodes(struct btrfs_root *root) 3455static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root)
3433{ 3456{
3434 struct btrfs_inode *btrfs_inode; 3457 struct btrfs_inode *btrfs_inode;
3435 struct list_head splice; 3458 struct list_head splice;
@@ -3449,8 +3472,6 @@ static int btrfs_destroy_delalloc_inodes(struct btrfs_root *root)
3449 } 3472 }
3450 3473
3451 spin_unlock(&root->fs_info->delalloc_lock); 3474 spin_unlock(&root->fs_info->delalloc_lock);
3452
3453 return 0;
3454} 3475}
3455 3476
3456static int btrfs_destroy_marked_extents(struct btrfs_root *root, 3477static int btrfs_destroy_marked_extents(struct btrfs_root *root,
@@ -3541,13 +3562,43 @@ static int btrfs_destroy_pinned_extent(struct btrfs_root *root,
3541 return 0; 3562 return 0;
3542} 3563}
3543 3564
3544static int btrfs_cleanup_transaction(struct btrfs_root *root) 3565void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
3566 struct btrfs_root *root)
3567{
3568 btrfs_destroy_delayed_refs(cur_trans, root);
3569 btrfs_block_rsv_release(root, &root->fs_info->trans_block_rsv,
3570 cur_trans->dirty_pages.dirty_bytes);
3571
3572 /* FIXME: cleanup wait for commit */
3573 cur_trans->in_commit = 1;
3574 cur_trans->blocked = 1;
3575 if (waitqueue_active(&root->fs_info->transaction_blocked_wait))
3576 wake_up(&root->fs_info->transaction_blocked_wait);
3577
3578 cur_trans->blocked = 0;
3579 if (waitqueue_active(&root->fs_info->transaction_wait))
3580 wake_up(&root->fs_info->transaction_wait);
3581
3582 cur_trans->commit_done = 1;
3583 if (waitqueue_active(&cur_trans->commit_wait))
3584 wake_up(&cur_trans->commit_wait);
3585
3586 btrfs_destroy_pending_snapshots(cur_trans);
3587
3588 btrfs_destroy_marked_extents(root, &cur_trans->dirty_pages,
3589 EXTENT_DIRTY);
3590
3591 /*
3592 memset(cur_trans, 0, sizeof(*cur_trans));
3593 kmem_cache_free(btrfs_transaction_cachep, cur_trans);
3594 */
3595}
3596
3597int btrfs_cleanup_transaction(struct btrfs_root *root)
3545{ 3598{
3546 struct btrfs_transaction *t; 3599 struct btrfs_transaction *t;
3547 LIST_HEAD(list); 3600 LIST_HEAD(list);
3548 3601
3549 WARN_ON(1);
3550
3551 mutex_lock(&root->fs_info->transaction_kthread_mutex); 3602 mutex_lock(&root->fs_info->transaction_kthread_mutex);
3552 3603
3553 spin_lock(&root->fs_info->trans_lock); 3604 spin_lock(&root->fs_info->trans_lock);
@@ -3612,6 +3663,17 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root)
3612 return 0; 3663 return 0;
3613} 3664}
3614 3665
3666static int btree_writepage_io_failed_hook(struct bio *bio, struct page *page,
3667 u64 start, u64 end,
3668 struct extent_state *state)
3669{
3670 struct super_block *sb = page->mapping->host->i_sb;
3671 struct btrfs_fs_info *fs_info = btrfs_sb(sb);
3672 btrfs_error(fs_info, -EIO,
3673 "Error occured while writing out btree at %llu", start);
3674 return -EIO;
3675}
3676
3615static struct extent_io_ops btree_extent_io_ops = { 3677static struct extent_io_ops btree_extent_io_ops = {
3616 .write_cache_pages_lock_hook = btree_lock_page_hook, 3678 .write_cache_pages_lock_hook = btree_lock_page_hook,
3617 .readpage_end_io_hook = btree_readpage_end_io_hook, 3679 .readpage_end_io_hook = btree_readpage_end_io_hook,
@@ -3619,4 +3681,5 @@ static struct extent_io_ops btree_extent_io_ops = {
3619 .submit_bio_hook = btree_submit_bio_hook, 3681 .submit_bio_hook = btree_submit_bio_hook,
3620 /* note we're sharing with inode.c for the merge bio hook */ 3682 /* note we're sharing with inode.c for the merge bio hook */
3621 .merge_bio_hook = btrfs_merge_bio_hook, 3683 .merge_bio_hook = btrfs_merge_bio_hook,
3684 .writepage_io_failed_hook = btree_writepage_io_failed_hook,
3622}; 3685};
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index e4bc4741319b..a7ace1a2dd12 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -44,8 +44,8 @@ int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize,
44 int mirror_num, struct extent_buffer **eb); 44 int mirror_num, struct extent_buffer **eb);
45struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, 45struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
46 u64 bytenr, u32 blocksize); 46 u64 bytenr, u32 blocksize);
47int clean_tree_block(struct btrfs_trans_handle *trans, 47void clean_tree_block(struct btrfs_trans_handle *trans,
48 struct btrfs_root *root, struct extent_buffer *buf); 48 struct btrfs_root *root, struct extent_buffer *buf);
49int open_ctree(struct super_block *sb, 49int open_ctree(struct super_block *sb,
50 struct btrfs_fs_devices *fs_devices, 50 struct btrfs_fs_devices *fs_devices,
51 char *options); 51 char *options);
@@ -64,7 +64,7 @@ struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
64int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info); 64int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info);
65void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr); 65void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr);
66void __btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr); 66void __btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr);
67int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root); 67void btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root);
68void btrfs_mark_buffer_dirty(struct extent_buffer *buf); 68void btrfs_mark_buffer_dirty(struct extent_buffer *buf);
69int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid); 69int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid);
70int btrfs_set_buffer_uptodate(struct extent_buffer *buf); 70int btrfs_set_buffer_uptodate(struct extent_buffer *buf);
@@ -85,6 +85,10 @@ int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
85 struct btrfs_fs_info *fs_info); 85 struct btrfs_fs_info *fs_info);
86int btrfs_add_log_tree(struct btrfs_trans_handle *trans, 86int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
87 struct btrfs_root *root); 87 struct btrfs_root *root);
88int btrfs_cleanup_transaction(struct btrfs_root *root);
89void btrfs_cleanup_one_transaction(struct btrfs_transaction *trans,
90 struct btrfs_root *root);
91void btrfs_abort_devices(struct btrfs_root *root);
88 92
89#ifdef CONFIG_DEBUG_LOCK_ALLOC 93#ifdef CONFIG_DEBUG_LOCK_ALLOC
90void btrfs_init_lockdep(void); 94void btrfs_init_lockdep(void);
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index 5f77166fd01c..e887ee62b6d4 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -193,7 +193,7 @@ static struct dentry *btrfs_get_parent(struct dentry *child)
193 if (ret < 0) 193 if (ret < 0)
194 goto fail; 194 goto fail;
195 195
196 BUG_ON(ret == 0); 196 BUG_ON(ret == 0); /* Key with offset of -1 found */
197 if (path->slots[0] == 0) { 197 if (path->slots[0] == 0) {
198 ret = -ENOENT; 198 ret = -ENOENT;
199 goto fail; 199 goto fail;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 37e0a800d34e..a84420491c11 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -245,7 +245,7 @@ static int exclude_super_stripes(struct btrfs_root *root,
245 cache->bytes_super += stripe_len; 245 cache->bytes_super += stripe_len;
246 ret = add_excluded_extent(root, cache->key.objectid, 246 ret = add_excluded_extent(root, cache->key.objectid,
247 stripe_len); 247 stripe_len);
248 BUG_ON(ret); 248 BUG_ON(ret); /* -ENOMEM */
249 } 249 }
250 250
251 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { 251 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
@@ -253,13 +253,13 @@ static int exclude_super_stripes(struct btrfs_root *root,
253 ret = btrfs_rmap_block(&root->fs_info->mapping_tree, 253 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
254 cache->key.objectid, bytenr, 254 cache->key.objectid, bytenr,
255 0, &logical, &nr, &stripe_len); 255 0, &logical, &nr, &stripe_len);
256 BUG_ON(ret); 256 BUG_ON(ret); /* -ENOMEM */
257 257
258 while (nr--) { 258 while (nr--) {
259 cache->bytes_super += stripe_len; 259 cache->bytes_super += stripe_len;
260 ret = add_excluded_extent(root, logical[nr], 260 ret = add_excluded_extent(root, logical[nr],
261 stripe_len); 261 stripe_len);
262 BUG_ON(ret); 262 BUG_ON(ret); /* -ENOMEM */
263 } 263 }
264 264
265 kfree(logical); 265 kfree(logical);
@@ -321,7 +321,7 @@ static u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
321 total_added += size; 321 total_added += size;
322 ret = btrfs_add_free_space(block_group, start, 322 ret = btrfs_add_free_space(block_group, start,
323 size); 323 size);
324 BUG_ON(ret); 324 BUG_ON(ret); /* -ENOMEM or logic error */
325 start = extent_end + 1; 325 start = extent_end + 1;
326 } else { 326 } else {
327 break; 327 break;
@@ -332,7 +332,7 @@ static u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
332 size = end - start; 332 size = end - start;
333 total_added += size; 333 total_added += size;
334 ret = btrfs_add_free_space(block_group, start, size); 334 ret = btrfs_add_free_space(block_group, start, size);
335 BUG_ON(ret); 335 BUG_ON(ret); /* -ENOMEM or logic error */
336 } 336 }
337 337
338 return total_added; 338 return total_added;
@@ -474,7 +474,8 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
474 int ret = 0; 474 int ret = 0;
475 475
476 caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS); 476 caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS);
477 BUG_ON(!caching_ctl); 477 if (!caching_ctl)
478 return -ENOMEM;
478 479
479 INIT_LIST_HEAD(&caching_ctl->list); 480 INIT_LIST_HEAD(&caching_ctl->list);
480 mutex_init(&caching_ctl->mutex); 481 mutex_init(&caching_ctl->mutex);
@@ -982,7 +983,7 @@ static int convert_extent_item_v0(struct btrfs_trans_handle *trans,
982 ret = btrfs_next_leaf(root, path); 983 ret = btrfs_next_leaf(root, path);
983 if (ret < 0) 984 if (ret < 0)
984 return ret; 985 return ret;
985 BUG_ON(ret > 0); 986 BUG_ON(ret > 0); /* Corruption */
986 leaf = path->nodes[0]; 987 leaf = path->nodes[0];
987 } 988 }
988 btrfs_item_key_to_cpu(leaf, &found_key, 989 btrfs_item_key_to_cpu(leaf, &found_key,
@@ -1008,9 +1009,9 @@ static int convert_extent_item_v0(struct btrfs_trans_handle *trans,
1008 new_size + extra_size, 1); 1009 new_size + extra_size, 1);
1009 if (ret < 0) 1010 if (ret < 0)
1010 return ret; 1011 return ret;
1011 BUG_ON(ret); 1012 BUG_ON(ret); /* Corruption */
1012 1013
1013 ret = btrfs_extend_item(trans, root, path, new_size); 1014 btrfs_extend_item(trans, root, path, new_size);
1014 1015
1015 leaf = path->nodes[0]; 1016 leaf = path->nodes[0];
1016 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1017 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
@@ -1478,7 +1479,11 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
1478 err = ret; 1479 err = ret;
1479 goto out; 1480 goto out;
1480 } 1481 }
1481 BUG_ON(ret); 1482 if (ret && !insert) {
1483 err = -ENOENT;
1484 goto out;
1485 }
1486 BUG_ON(ret); /* Corruption */
1482 1487
1483 leaf = path->nodes[0]; 1488 leaf = path->nodes[0];
1484 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 1489 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
@@ -1592,13 +1597,13 @@ out:
1592 * helper to add new inline back ref 1597 * helper to add new inline back ref
1593 */ 1598 */
1594static noinline_for_stack 1599static noinline_for_stack
1595int setup_inline_extent_backref(struct btrfs_trans_handle *trans, 1600void setup_inline_extent_backref(struct btrfs_trans_handle *trans,
1596 struct btrfs_root *root, 1601 struct btrfs_root *root,
1597 struct btrfs_path *path, 1602 struct btrfs_path *path,
1598 struct btrfs_extent_inline_ref *iref, 1603 struct btrfs_extent_inline_ref *iref,
1599 u64 parent, u64 root_objectid, 1604 u64 parent, u64 root_objectid,
1600 u64 owner, u64 offset, int refs_to_add, 1605 u64 owner, u64 offset, int refs_to_add,
1601 struct btrfs_delayed_extent_op *extent_op) 1606 struct btrfs_delayed_extent_op *extent_op)
1602{ 1607{
1603 struct extent_buffer *leaf; 1608 struct extent_buffer *leaf;
1604 struct btrfs_extent_item *ei; 1609 struct btrfs_extent_item *ei;
@@ -1608,7 +1613,6 @@ int setup_inline_extent_backref(struct btrfs_trans_handle *trans,
1608 u64 refs; 1613 u64 refs;
1609 int size; 1614 int size;
1610 int type; 1615 int type;
1611 int ret;
1612 1616
1613 leaf = path->nodes[0]; 1617 leaf = path->nodes[0];
1614 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1618 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
@@ -1617,7 +1621,7 @@ int setup_inline_extent_backref(struct btrfs_trans_handle *trans,
1617 type = extent_ref_type(parent, owner); 1621 type = extent_ref_type(parent, owner);
1618 size = btrfs_extent_inline_ref_size(type); 1622 size = btrfs_extent_inline_ref_size(type);
1619 1623
1620 ret = btrfs_extend_item(trans, root, path, size); 1624 btrfs_extend_item(trans, root, path, size);
1621 1625
1622 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1626 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1623 refs = btrfs_extent_refs(leaf, ei); 1627 refs = btrfs_extent_refs(leaf, ei);
@@ -1652,7 +1656,6 @@ int setup_inline_extent_backref(struct btrfs_trans_handle *trans,
1652 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid); 1656 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
1653 } 1657 }
1654 btrfs_mark_buffer_dirty(leaf); 1658 btrfs_mark_buffer_dirty(leaf);
1655 return 0;
1656} 1659}
1657 1660
1658static int lookup_extent_backref(struct btrfs_trans_handle *trans, 1661static int lookup_extent_backref(struct btrfs_trans_handle *trans,
@@ -1687,12 +1690,12 @@ static int lookup_extent_backref(struct btrfs_trans_handle *trans,
1687 * helper to update/remove inline back ref 1690 * helper to update/remove inline back ref
1688 */ 1691 */
1689static noinline_for_stack 1692static noinline_for_stack
1690int update_inline_extent_backref(struct btrfs_trans_handle *trans, 1693void update_inline_extent_backref(struct btrfs_trans_handle *trans,
1691 struct btrfs_root *root, 1694 struct btrfs_root *root,
1692 struct btrfs_path *path, 1695 struct btrfs_path *path,
1693 struct btrfs_extent_inline_ref *iref, 1696 struct btrfs_extent_inline_ref *iref,
1694 int refs_to_mod, 1697 int refs_to_mod,
1695 struct btrfs_delayed_extent_op *extent_op) 1698 struct btrfs_delayed_extent_op *extent_op)
1696{ 1699{
1697 struct extent_buffer *leaf; 1700 struct extent_buffer *leaf;
1698 struct btrfs_extent_item *ei; 1701 struct btrfs_extent_item *ei;
@@ -1703,7 +1706,6 @@ int update_inline_extent_backref(struct btrfs_trans_handle *trans,
1703 u32 item_size; 1706 u32 item_size;
1704 int size; 1707 int size;
1705 int type; 1708 int type;
1706 int ret;
1707 u64 refs; 1709 u64 refs;
1708 1710
1709 leaf = path->nodes[0]; 1711 leaf = path->nodes[0];
@@ -1745,10 +1747,9 @@ int update_inline_extent_backref(struct btrfs_trans_handle *trans,
1745 memmove_extent_buffer(leaf, ptr, ptr + size, 1747 memmove_extent_buffer(leaf, ptr, ptr + size,
1746 end - ptr - size); 1748 end - ptr - size);
1747 item_size -= size; 1749 item_size -= size;
1748 ret = btrfs_truncate_item(trans, root, path, item_size, 1); 1750 btrfs_truncate_item(trans, root, path, item_size, 1);
1749 } 1751 }
1750 btrfs_mark_buffer_dirty(leaf); 1752 btrfs_mark_buffer_dirty(leaf);
1751 return 0;
1752} 1753}
1753 1754
1754static noinline_for_stack 1755static noinline_for_stack
@@ -1768,13 +1769,13 @@ int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
1768 root_objectid, owner, offset, 1); 1769 root_objectid, owner, offset, 1);
1769 if (ret == 0) { 1770 if (ret == 0) {
1770 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID); 1771 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID);
1771 ret = update_inline_extent_backref(trans, root, path, iref, 1772 update_inline_extent_backref(trans, root, path, iref,
1772 refs_to_add, extent_op); 1773 refs_to_add, extent_op);
1773 } else if (ret == -ENOENT) { 1774 } else if (ret == -ENOENT) {
1774 ret = setup_inline_extent_backref(trans, root, path, iref, 1775 setup_inline_extent_backref(trans, root, path, iref, parent,
1775 parent, root_objectid, 1776 root_objectid, owner, offset,
1776 owner, offset, refs_to_add, 1777 refs_to_add, extent_op);
1777 extent_op); 1778 ret = 0;
1778 } 1779 }
1779 return ret; 1780 return ret;
1780} 1781}
@@ -1804,12 +1805,12 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans,
1804 struct btrfs_extent_inline_ref *iref, 1805 struct btrfs_extent_inline_ref *iref,
1805 int refs_to_drop, int is_data) 1806 int refs_to_drop, int is_data)
1806{ 1807{
1807 int ret; 1808 int ret = 0;
1808 1809
1809 BUG_ON(!is_data && refs_to_drop != 1); 1810 BUG_ON(!is_data && refs_to_drop != 1);
1810 if (iref) { 1811 if (iref) {
1811 ret = update_inline_extent_backref(trans, root, path, iref, 1812 update_inline_extent_backref(trans, root, path, iref,
1812 -refs_to_drop, NULL); 1813 -refs_to_drop, NULL);
1813 } else if (is_data) { 1814 } else if (is_data) {
1814 ret = remove_extent_data_ref(trans, root, path, refs_to_drop); 1815 ret = remove_extent_data_ref(trans, root, path, refs_to_drop);
1815 } else { 1816 } else {
@@ -1835,6 +1836,7 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
1835 /* Tell the block device(s) that the sectors can be discarded */ 1836 /* Tell the block device(s) that the sectors can be discarded */
1836 ret = btrfs_map_block(&root->fs_info->mapping_tree, REQ_DISCARD, 1837 ret = btrfs_map_block(&root->fs_info->mapping_tree, REQ_DISCARD,
1837 bytenr, &num_bytes, &bbio, 0); 1838 bytenr, &num_bytes, &bbio, 0);
1839 /* Error condition is -ENOMEM */
1838 if (!ret) { 1840 if (!ret) {
1839 struct btrfs_bio_stripe *stripe = bbio->stripes; 1841 struct btrfs_bio_stripe *stripe = bbio->stripes;
1840 int i; 1842 int i;
@@ -1850,7 +1852,7 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
1850 if (!ret) 1852 if (!ret)
1851 discarded_bytes += stripe->length; 1853 discarded_bytes += stripe->length;
1852 else if (ret != -EOPNOTSUPP) 1854 else if (ret != -EOPNOTSUPP)
1853 break; 1855 break; /* Logic errors or -ENOMEM, or -EIO but I don't know how that could happen JDM */
1854 1856
1855 /* 1857 /*
1856 * Just in case we get back EOPNOTSUPP for some reason, 1858 * Just in case we get back EOPNOTSUPP for some reason,
@@ -1869,6 +1871,7 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
1869 return ret; 1871 return ret;
1870} 1872}
1871 1873
1874/* Can return -ENOMEM */
1872int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, 1875int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1873 struct btrfs_root *root, 1876 struct btrfs_root *root,
1874 u64 bytenr, u64 num_bytes, u64 parent, 1877 u64 bytenr, u64 num_bytes, u64 parent,
@@ -1944,7 +1947,8 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1944 ret = insert_extent_backref(trans, root->fs_info->extent_root, 1947 ret = insert_extent_backref(trans, root->fs_info->extent_root,
1945 path, bytenr, parent, root_objectid, 1948 path, bytenr, parent, root_objectid,
1946 owner, offset, refs_to_add); 1949 owner, offset, refs_to_add);
1947 BUG_ON(ret); 1950 if (ret)
1951 btrfs_abort_transaction(trans, root, ret);
1948out: 1952out:
1949 btrfs_free_path(path); 1953 btrfs_free_path(path);
1950 return err; 1954 return err;
@@ -2031,6 +2035,9 @@ static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
2031 int ret; 2035 int ret;
2032 int err = 0; 2036 int err = 0;
2033 2037
2038 if (trans->aborted)
2039 return 0;
2040
2034 path = btrfs_alloc_path(); 2041 path = btrfs_alloc_path();
2035 if (!path) 2042 if (!path)
2036 return -ENOMEM; 2043 return -ENOMEM;
@@ -2128,7 +2135,11 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
2128 struct btrfs_delayed_extent_op *extent_op, 2135 struct btrfs_delayed_extent_op *extent_op,
2129 int insert_reserved) 2136 int insert_reserved)
2130{ 2137{
2131 int ret; 2138 int ret = 0;
2139
2140 if (trans->aborted)
2141 return 0;
2142
2132 if (btrfs_delayed_ref_is_head(node)) { 2143 if (btrfs_delayed_ref_is_head(node)) {
2133 struct btrfs_delayed_ref_head *head; 2144 struct btrfs_delayed_ref_head *head;
2134 /* 2145 /*
@@ -2146,11 +2157,10 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
2146 ret = btrfs_del_csums(trans, root, 2157 ret = btrfs_del_csums(trans, root,
2147 node->bytenr, 2158 node->bytenr,
2148 node->num_bytes); 2159 node->num_bytes);
2149 BUG_ON(ret);
2150 } 2160 }
2151 } 2161 }
2152 mutex_unlock(&head->mutex); 2162 mutex_unlock(&head->mutex);
2153 return 0; 2163 return ret;
2154 } 2164 }
2155 2165
2156 if (node->type == BTRFS_TREE_BLOCK_REF_KEY || 2166 if (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
@@ -2197,6 +2207,10 @@ again:
2197 return NULL; 2207 return NULL;
2198} 2208}
2199 2209
2210/*
2211 * Returns 0 on success or if called with an already aborted transaction.
2212 * Returns -ENOMEM or -EIO on failure and will abort the transaction.
2213 */
2200static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, 2214static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
2201 struct btrfs_root *root, 2215 struct btrfs_root *root,
2202 struct list_head *cluster) 2216 struct list_head *cluster)
@@ -2285,9 +2299,13 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
2285 2299
2286 ret = run_delayed_extent_op(trans, root, 2300 ret = run_delayed_extent_op(trans, root,
2287 ref, extent_op); 2301 ref, extent_op);
2288 BUG_ON(ret);
2289 kfree(extent_op); 2302 kfree(extent_op);
2290 2303
2304 if (ret) {
2305 printk(KERN_DEBUG "btrfs: run_delayed_extent_op returned %d\n", ret);
2306 return ret;
2307 }
2308
2291 goto next; 2309 goto next;
2292 } 2310 }
2293 2311
@@ -2308,11 +2326,16 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
2308 2326
2309 ret = run_one_delayed_ref(trans, root, ref, extent_op, 2327 ret = run_one_delayed_ref(trans, root, ref, extent_op,
2310 must_insert_reserved); 2328 must_insert_reserved);
2311 BUG_ON(ret);
2312 2329
2313 btrfs_put_delayed_ref(ref); 2330 btrfs_put_delayed_ref(ref);
2314 kfree(extent_op); 2331 kfree(extent_op);
2315 count++; 2332 count++;
2333
2334 if (ret) {
2335 printk(KERN_DEBUG "btrfs: run_one_delayed_ref returned %d\n", ret);
2336 return ret;
2337 }
2338
2316next: 2339next:
2317 do_chunk_alloc(trans, root->fs_info->extent_root, 2340 do_chunk_alloc(trans, root->fs_info->extent_root,
2318 2 * 1024 * 1024, 2341 2 * 1024 * 1024,
@@ -2347,6 +2370,9 @@ static void wait_for_more_refs(struct btrfs_delayed_ref_root *delayed_refs,
2347 * 0, which means to process everything in the tree at the start 2370 * 0, which means to process everything in the tree at the start
2348 * of the run (but not newly added entries), or it can be some target 2371 * of the run (but not newly added entries), or it can be some target
2349 * number you'd like to process. 2372 * number you'd like to process.
2373 *
2374 * Returns 0 on success or if called with an aborted transaction
2375 * Returns <0 on error and aborts the transaction
2350 */ 2376 */
2351int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, 2377int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2352 struct btrfs_root *root, unsigned long count) 2378 struct btrfs_root *root, unsigned long count)
@@ -2362,6 +2388,10 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2362 unsigned long num_refs = 0; 2388 unsigned long num_refs = 0;
2363 int consider_waiting; 2389 int consider_waiting;
2364 2390
2391 /* We'll clean this up in btrfs_cleanup_transaction */
2392 if (trans->aborted)
2393 return 0;
2394
2365 if (root == root->fs_info->extent_root) 2395 if (root == root->fs_info->extent_root)
2366 root = root->fs_info->tree_root; 2396 root = root->fs_info->tree_root;
2367 2397
@@ -2419,7 +2449,11 @@ again:
2419 } 2449 }
2420 2450
2421 ret = run_clustered_refs(trans, root, &cluster); 2451 ret = run_clustered_refs(trans, root, &cluster);
2422 BUG_ON(ret < 0); 2452 if (ret < 0) {
2453 spin_unlock(&delayed_refs->lock);
2454 btrfs_abort_transaction(trans, root, ret);
2455 return ret;
2456 }
2423 2457
2424 count -= min_t(unsigned long, ret, count); 2458 count -= min_t(unsigned long, ret, count);
2425 2459
@@ -2584,7 +2618,7 @@ static noinline int check_committed_ref(struct btrfs_trans_handle *trans,
2584 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); 2618 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
2585 if (ret < 0) 2619 if (ret < 0)
2586 goto out; 2620 goto out;
2587 BUG_ON(ret == 0); 2621 BUG_ON(ret == 0); /* Corruption */
2588 2622
2589 ret = -ENOENT; 2623 ret = -ENOENT;
2590 if (path->slots[0] == 0) 2624 if (path->slots[0] == 0)
@@ -2738,7 +2772,6 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
2738 } 2772 }
2739 return 0; 2773 return 0;
2740fail: 2774fail:
2741 BUG();
2742 return ret; 2775 return ret;
2743} 2776}
2744 2777
@@ -2767,7 +2800,7 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans,
2767 ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1); 2800 ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
2768 if (ret < 0) 2801 if (ret < 0)
2769 goto fail; 2802 goto fail;
2770 BUG_ON(ret); 2803 BUG_ON(ret); /* Corruption */
2771 2804
2772 leaf = path->nodes[0]; 2805 leaf = path->nodes[0];
2773 bi = btrfs_item_ptr_offset(leaf, path->slots[0]); 2806 bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
@@ -2775,8 +2808,10 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans,
2775 btrfs_mark_buffer_dirty(leaf); 2808 btrfs_mark_buffer_dirty(leaf);
2776 btrfs_release_path(path); 2809 btrfs_release_path(path);
2777fail: 2810fail:
2778 if (ret) 2811 if (ret) {
2812 btrfs_abort_transaction(trans, root, ret);
2779 return ret; 2813 return ret;
2814 }
2780 return 0; 2815 return 0;
2781 2816
2782} 2817}
@@ -2949,7 +2984,8 @@ again:
2949 if (last == 0) { 2984 if (last == 0) {
2950 err = btrfs_run_delayed_refs(trans, root, 2985 err = btrfs_run_delayed_refs(trans, root,
2951 (unsigned long)-1); 2986 (unsigned long)-1);
2952 BUG_ON(err); 2987 if (err) /* File system offline */
2988 goto out;
2953 } 2989 }
2954 2990
2955 cache = btrfs_lookup_first_block_group(root->fs_info, last); 2991 cache = btrfs_lookup_first_block_group(root->fs_info, last);
@@ -2976,7 +3012,9 @@ again:
2976 last = cache->key.objectid + cache->key.offset; 3012 last = cache->key.objectid + cache->key.offset;
2977 3013
2978 err = write_one_cache_group(trans, root, path, cache); 3014 err = write_one_cache_group(trans, root, path, cache);
2979 BUG_ON(err); 3015 if (err) /* File system offline */
3016 goto out;
3017
2980 btrfs_put_block_group(cache); 3018 btrfs_put_block_group(cache);
2981 } 3019 }
2982 3020
@@ -2989,7 +3027,8 @@ again:
2989 if (last == 0) { 3027 if (last == 0) {
2990 err = btrfs_run_delayed_refs(trans, root, 3028 err = btrfs_run_delayed_refs(trans, root,
2991 (unsigned long)-1); 3029 (unsigned long)-1);
2992 BUG_ON(err); 3030 if (err) /* File system offline */
3031 goto out;
2993 } 3032 }
2994 3033
2995 cache = btrfs_lookup_first_block_group(root->fs_info, last); 3034 cache = btrfs_lookup_first_block_group(root->fs_info, last);
@@ -3014,20 +3053,21 @@ again:
3014 continue; 3053 continue;
3015 } 3054 }
3016 3055
3017 btrfs_write_out_cache(root, trans, cache, path); 3056 err = btrfs_write_out_cache(root, trans, cache, path);
3018 3057
3019 /* 3058 /*
3020 * If we didn't have an error then the cache state is still 3059 * If we didn't have an error then the cache state is still
3021 * NEED_WRITE, so we can set it to WRITTEN. 3060 * NEED_WRITE, so we can set it to WRITTEN.
3022 */ 3061 */
3023 if (cache->disk_cache_state == BTRFS_DC_NEED_WRITE) 3062 if (!err && cache->disk_cache_state == BTRFS_DC_NEED_WRITE)
3024 cache->disk_cache_state = BTRFS_DC_WRITTEN; 3063 cache->disk_cache_state = BTRFS_DC_WRITTEN;
3025 last = cache->key.objectid + cache->key.offset; 3064 last = cache->key.objectid + cache->key.offset;
3026 btrfs_put_block_group(cache); 3065 btrfs_put_block_group(cache);
3027 } 3066 }
3067out:
3028 3068
3029 btrfs_free_path(path); 3069 btrfs_free_path(path);
3030 return 0; 3070 return err;
3031} 3071}
3032 3072
3033int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr) 3073int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr)
@@ -3098,11 +3138,8 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
3098 3138
3099static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) 3139static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
3100{ 3140{
3101 u64 extra_flags = flags & BTRFS_BLOCK_GROUP_PROFILE_MASK; 3141 u64 extra_flags = chunk_to_extended(flags) &
3102 3142 BTRFS_EXTENDED_PROFILE_MASK;
3103 /* chunk -> extended profile */
3104 if (extra_flags == 0)
3105 extra_flags = BTRFS_AVAIL_ALLOC_BIT_SINGLE;
3106 3143
3107 if (flags & BTRFS_BLOCK_GROUP_DATA) 3144 if (flags & BTRFS_BLOCK_GROUP_DATA)
3108 fs_info->avail_data_alloc_bits |= extra_flags; 3145 fs_info->avail_data_alloc_bits |= extra_flags;
@@ -3113,6 +3150,35 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
3113} 3150}
3114 3151
3115/* 3152/*
3153 * returns target flags in extended format or 0 if restripe for this
3154 * chunk_type is not in progress
3155 */
3156static u64 get_restripe_target(struct btrfs_fs_info *fs_info, u64 flags)
3157{
3158 struct btrfs_balance_control *bctl = fs_info->balance_ctl;
3159 u64 target = 0;
3160
3161 BUG_ON(!mutex_is_locked(&fs_info->volume_mutex) &&
3162 !spin_is_locked(&fs_info->balance_lock));
3163
3164 if (!bctl)
3165 return 0;
3166
3167 if (flags & BTRFS_BLOCK_GROUP_DATA &&
3168 bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) {
3169 target = BTRFS_BLOCK_GROUP_DATA | bctl->data.target;
3170 } else if (flags & BTRFS_BLOCK_GROUP_SYSTEM &&
3171 bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) {
3172 target = BTRFS_BLOCK_GROUP_SYSTEM | bctl->sys.target;
3173 } else if (flags & BTRFS_BLOCK_GROUP_METADATA &&
3174 bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) {
3175 target = BTRFS_BLOCK_GROUP_METADATA | bctl->meta.target;
3176 }
3177
3178 return target;
3179}
3180
3181/*
3116 * @flags: available profiles in extended format (see ctree.h) 3182 * @flags: available profiles in extended format (see ctree.h)
3117 * 3183 *
3118 * Returns reduced profile in chunk format. If profile changing is in 3184 * Returns reduced profile in chunk format. If profile changing is in
@@ -3128,31 +3194,19 @@ u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
3128 */ 3194 */
3129 u64 num_devices = root->fs_info->fs_devices->rw_devices + 3195 u64 num_devices = root->fs_info->fs_devices->rw_devices +
3130 root->fs_info->fs_devices->missing_devices; 3196 root->fs_info->fs_devices->missing_devices;
3197 u64 target;
3131 3198
3132 /* pick restriper's target profile if it's available */ 3199 /*
3200 * see if restripe for this chunk_type is in progress, if so
3201 * try to reduce to the target profile
3202 */
3133 spin_lock(&root->fs_info->balance_lock); 3203 spin_lock(&root->fs_info->balance_lock);
3134 if (root->fs_info->balance_ctl) { 3204 target = get_restripe_target(root->fs_info, flags);
3135 struct btrfs_balance_control *bctl = root->fs_info->balance_ctl; 3205 if (target) {
3136 u64 tgt = 0; 3206 /* pick target profile only if it's already available */
3137 3207 if ((flags & target) & BTRFS_EXTENDED_PROFILE_MASK) {
3138 if ((flags & BTRFS_BLOCK_GROUP_DATA) &&
3139 (bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
3140 (flags & bctl->data.target)) {
3141 tgt = BTRFS_BLOCK_GROUP_DATA | bctl->data.target;
3142 } else if ((flags & BTRFS_BLOCK_GROUP_SYSTEM) &&
3143 (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
3144 (flags & bctl->sys.target)) {
3145 tgt = BTRFS_BLOCK_GROUP_SYSTEM | bctl->sys.target;
3146 } else if ((flags & BTRFS_BLOCK_GROUP_METADATA) &&
3147 (bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
3148 (flags & bctl->meta.target)) {
3149 tgt = BTRFS_BLOCK_GROUP_METADATA | bctl->meta.target;
3150 }
3151
3152 if (tgt) {
3153 spin_unlock(&root->fs_info->balance_lock); 3208 spin_unlock(&root->fs_info->balance_lock);
3154 flags = tgt; 3209 return extended_to_chunk(target);
3155 goto out;
3156 } 3210 }
3157 } 3211 }
3158 spin_unlock(&root->fs_info->balance_lock); 3212 spin_unlock(&root->fs_info->balance_lock);
@@ -3180,10 +3234,7 @@ u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
3180 flags &= ~BTRFS_BLOCK_GROUP_RAID0; 3234 flags &= ~BTRFS_BLOCK_GROUP_RAID0;
3181 } 3235 }
3182 3236
3183out: 3237 return extended_to_chunk(flags);
3184 /* extended -> chunk profile */
3185 flags &= ~BTRFS_AVAIL_ALLOC_BIT_SINGLE;
3186 return flags;
3187} 3238}
3188 3239
3189static u64 get_alloc_profile(struct btrfs_root *root, u64 flags) 3240static u64 get_alloc_profile(struct btrfs_root *root, u64 flags)
@@ -3312,8 +3363,7 @@ commit_trans:
3312 } 3363 }
3313 data_sinfo->bytes_may_use += bytes; 3364 data_sinfo->bytes_may_use += bytes;
3314 trace_btrfs_space_reservation(root->fs_info, "space_info", 3365 trace_btrfs_space_reservation(root->fs_info, "space_info",
3315 (u64)(unsigned long)data_sinfo, 3366 data_sinfo->flags, bytes, 1);
3316 bytes, 1);
3317 spin_unlock(&data_sinfo->lock); 3367 spin_unlock(&data_sinfo->lock);
3318 3368
3319 return 0; 3369 return 0;
@@ -3334,8 +3384,7 @@ void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes)
3334 spin_lock(&data_sinfo->lock); 3384 spin_lock(&data_sinfo->lock);
3335 data_sinfo->bytes_may_use -= bytes; 3385 data_sinfo->bytes_may_use -= bytes;
3336 trace_btrfs_space_reservation(root->fs_info, "space_info", 3386 trace_btrfs_space_reservation(root->fs_info, "space_info",
3337 (u64)(unsigned long)data_sinfo, 3387 data_sinfo->flags, bytes, 0);
3338 bytes, 0);
3339 spin_unlock(&data_sinfo->lock); 3388 spin_unlock(&data_sinfo->lock);
3340} 3389}
3341 3390
@@ -3396,6 +3445,50 @@ static int should_alloc_chunk(struct btrfs_root *root,
3396 return 1; 3445 return 1;
3397} 3446}
3398 3447
3448static u64 get_system_chunk_thresh(struct btrfs_root *root, u64 type)
3449{
3450 u64 num_dev;
3451
3452 if (type & BTRFS_BLOCK_GROUP_RAID10 ||
3453 type & BTRFS_BLOCK_GROUP_RAID0)
3454 num_dev = root->fs_info->fs_devices->rw_devices;
3455 else if (type & BTRFS_BLOCK_GROUP_RAID1)
3456 num_dev = 2;
3457 else
3458 num_dev = 1; /* DUP or single */
3459
3460 /* metadata for updaing devices and chunk tree */
3461 return btrfs_calc_trans_metadata_size(root, num_dev + 1);
3462}
3463
3464static void check_system_chunk(struct btrfs_trans_handle *trans,
3465 struct btrfs_root *root, u64 type)
3466{
3467 struct btrfs_space_info *info;
3468 u64 left;
3469 u64 thresh;
3470
3471 info = __find_space_info(root->fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
3472 spin_lock(&info->lock);
3473 left = info->total_bytes - info->bytes_used - info->bytes_pinned -
3474 info->bytes_reserved - info->bytes_readonly;
3475 spin_unlock(&info->lock);
3476
3477 thresh = get_system_chunk_thresh(root, type);
3478 if (left < thresh && btrfs_test_opt(root, ENOSPC_DEBUG)) {
3479 printk(KERN_INFO "left=%llu, need=%llu, flags=%llu\n",
3480 left, thresh, type);
3481 dump_space_info(info, 0, 0);
3482 }
3483
3484 if (left < thresh) {
3485 u64 flags;
3486
3487 flags = btrfs_get_alloc_profile(root->fs_info->chunk_root, 0);
3488 btrfs_alloc_chunk(trans, root, flags);
3489 }
3490}
3491
3399static int do_chunk_alloc(struct btrfs_trans_handle *trans, 3492static int do_chunk_alloc(struct btrfs_trans_handle *trans,
3400 struct btrfs_root *extent_root, u64 alloc_bytes, 3493 struct btrfs_root *extent_root, u64 alloc_bytes,
3401 u64 flags, int force) 3494 u64 flags, int force)
@@ -3405,15 +3498,13 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
3405 int wait_for_alloc = 0; 3498 int wait_for_alloc = 0;
3406 int ret = 0; 3499 int ret = 0;
3407 3500
3408 BUG_ON(!profile_is_valid(flags, 0));
3409
3410 space_info = __find_space_info(extent_root->fs_info, flags); 3501 space_info = __find_space_info(extent_root->fs_info, flags);
3411 if (!space_info) { 3502 if (!space_info) {
3412 ret = update_space_info(extent_root->fs_info, flags, 3503 ret = update_space_info(extent_root->fs_info, flags,
3413 0, 0, &space_info); 3504 0, 0, &space_info);
3414 BUG_ON(ret); 3505 BUG_ON(ret); /* -ENOMEM */
3415 } 3506 }
3416 BUG_ON(!space_info); 3507 BUG_ON(!space_info); /* Logic error */
3417 3508
3418again: 3509again:
3419 spin_lock(&space_info->lock); 3510 spin_lock(&space_info->lock);
@@ -3468,6 +3559,12 @@ again:
3468 force_metadata_allocation(fs_info); 3559 force_metadata_allocation(fs_info);
3469 } 3560 }
3470 3561
3562 /*
3563 * Check if we have enough space in SYSTEM chunk because we may need
3564 * to update devices.
3565 */
3566 check_system_chunk(trans, extent_root, flags);
3567
3471 ret = btrfs_alloc_chunk(trans, extent_root, flags); 3568 ret = btrfs_alloc_chunk(trans, extent_root, flags);
3472 if (ret < 0 && ret != -ENOSPC) 3569 if (ret < 0 && ret != -ENOSPC)
3473 goto out; 3570 goto out;
@@ -3678,8 +3775,10 @@ again:
3678 ret = wait_event_interruptible(space_info->wait, 3775 ret = wait_event_interruptible(space_info->wait,
3679 !space_info->flush); 3776 !space_info->flush);
3680 /* Must have been interrupted, return */ 3777 /* Must have been interrupted, return */
3681 if (ret) 3778 if (ret) {
3779 printk(KERN_DEBUG "btrfs: %s returning -EINTR\n", __func__);
3682 return -EINTR; 3780 return -EINTR;
3781 }
3683 3782
3684 spin_lock(&space_info->lock); 3783 spin_lock(&space_info->lock);
3685 } 3784 }
@@ -3700,9 +3799,7 @@ again:
3700 if (used + orig_bytes <= space_info->total_bytes) { 3799 if (used + orig_bytes <= space_info->total_bytes) {
3701 space_info->bytes_may_use += orig_bytes; 3800 space_info->bytes_may_use += orig_bytes;
3702 trace_btrfs_space_reservation(root->fs_info, 3801 trace_btrfs_space_reservation(root->fs_info,
3703 "space_info", 3802 "space_info", space_info->flags, orig_bytes, 1);
3704 (u64)(unsigned long)space_info,
3705 orig_bytes, 1);
3706 ret = 0; 3803 ret = 0;
3707 } else { 3804 } else {
3708 /* 3805 /*
@@ -3771,9 +3868,7 @@ again:
3771 if (used + num_bytes < space_info->total_bytes + avail) { 3868 if (used + num_bytes < space_info->total_bytes + avail) {
3772 space_info->bytes_may_use += orig_bytes; 3869 space_info->bytes_may_use += orig_bytes;
3773 trace_btrfs_space_reservation(root->fs_info, 3870 trace_btrfs_space_reservation(root->fs_info,
3774 "space_info", 3871 "space_info", space_info->flags, orig_bytes, 1);
3775 (u64)(unsigned long)space_info,
3776 orig_bytes, 1);
3777 ret = 0; 3872 ret = 0;
3778 } else { 3873 } else {
3779 wait_ordered = true; 3874 wait_ordered = true;
@@ -3836,8 +3931,9 @@ out:
3836 return ret; 3931 return ret;
3837} 3932}
3838 3933
3839static struct btrfs_block_rsv *get_block_rsv(struct btrfs_trans_handle *trans, 3934static struct btrfs_block_rsv *get_block_rsv(
3840 struct btrfs_root *root) 3935 const struct btrfs_trans_handle *trans,
3936 const struct btrfs_root *root)
3841{ 3937{
3842 struct btrfs_block_rsv *block_rsv = NULL; 3938 struct btrfs_block_rsv *block_rsv = NULL;
3843 3939
@@ -3918,8 +4014,7 @@ static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
3918 spin_lock(&space_info->lock); 4014 spin_lock(&space_info->lock);
3919 space_info->bytes_may_use -= num_bytes; 4015 space_info->bytes_may_use -= num_bytes;
3920 trace_btrfs_space_reservation(fs_info, "space_info", 4016 trace_btrfs_space_reservation(fs_info, "space_info",
3921 (u64)(unsigned long)space_info, 4017 space_info->flags, num_bytes, 0);
3922 num_bytes, 0);
3923 space_info->reservation_progress++; 4018 space_info->reservation_progress++;
3924 spin_unlock(&space_info->lock); 4019 spin_unlock(&space_info->lock);
3925 } 4020 }
@@ -4137,14 +4232,14 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
4137 block_rsv->reserved += num_bytes; 4232 block_rsv->reserved += num_bytes;
4138 sinfo->bytes_may_use += num_bytes; 4233 sinfo->bytes_may_use += num_bytes;
4139 trace_btrfs_space_reservation(fs_info, "space_info", 4234 trace_btrfs_space_reservation(fs_info, "space_info",
4140 (u64)(unsigned long)sinfo, num_bytes, 1); 4235 sinfo->flags, num_bytes, 1);
4141 } 4236 }
4142 4237
4143 if (block_rsv->reserved >= block_rsv->size) { 4238 if (block_rsv->reserved >= block_rsv->size) {
4144 num_bytes = block_rsv->reserved - block_rsv->size; 4239 num_bytes = block_rsv->reserved - block_rsv->size;
4145 sinfo->bytes_may_use -= num_bytes; 4240 sinfo->bytes_may_use -= num_bytes;
4146 trace_btrfs_space_reservation(fs_info, "space_info", 4241 trace_btrfs_space_reservation(fs_info, "space_info",
4147 (u64)(unsigned long)sinfo, num_bytes, 0); 4242 sinfo->flags, num_bytes, 0);
4148 sinfo->reservation_progress++; 4243 sinfo->reservation_progress++;
4149 block_rsv->reserved = block_rsv->size; 4244 block_rsv->reserved = block_rsv->size;
4150 block_rsv->full = 1; 4245 block_rsv->full = 1;
@@ -4198,12 +4293,12 @@ void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
4198 return; 4293 return;
4199 4294
4200 trace_btrfs_space_reservation(root->fs_info, "transaction", 4295 trace_btrfs_space_reservation(root->fs_info, "transaction",
4201 (u64)(unsigned long)trans, 4296 trans->transid, trans->bytes_reserved, 0);
4202 trans->bytes_reserved, 0);
4203 btrfs_block_rsv_release(root, trans->block_rsv, trans->bytes_reserved); 4297 btrfs_block_rsv_release(root, trans->block_rsv, trans->bytes_reserved);
4204 trans->bytes_reserved = 0; 4298 trans->bytes_reserved = 0;
4205} 4299}
4206 4300
4301/* Can only return 0 or -ENOSPC */
4207int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans, 4302int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
4208 struct inode *inode) 4303 struct inode *inode)
4209{ 4304{
@@ -4540,7 +4635,7 @@ static int update_block_group(struct btrfs_trans_handle *trans,
4540 while (total) { 4635 while (total) {
4541 cache = btrfs_lookup_block_group(info, bytenr); 4636 cache = btrfs_lookup_block_group(info, bytenr);
4542 if (!cache) 4637 if (!cache)
4543 return -1; 4638 return -ENOENT;
4544 if (cache->flags & (BTRFS_BLOCK_GROUP_DUP | 4639 if (cache->flags & (BTRFS_BLOCK_GROUP_DUP |
4545 BTRFS_BLOCK_GROUP_RAID1 | 4640 BTRFS_BLOCK_GROUP_RAID1 |
4546 BTRFS_BLOCK_GROUP_RAID10)) 4641 BTRFS_BLOCK_GROUP_RAID10))
@@ -4643,7 +4738,7 @@ int btrfs_pin_extent(struct btrfs_root *root,
4643 struct btrfs_block_group_cache *cache; 4738 struct btrfs_block_group_cache *cache;
4644 4739
4645 cache = btrfs_lookup_block_group(root->fs_info, bytenr); 4740 cache = btrfs_lookup_block_group(root->fs_info, bytenr);
4646 BUG_ON(!cache); 4741 BUG_ON(!cache); /* Logic error */
4647 4742
4648 pin_down_extent(root, cache, bytenr, num_bytes, reserved); 4743 pin_down_extent(root, cache, bytenr, num_bytes, reserved);
4649 4744
@@ -4661,7 +4756,7 @@ int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans,
4661 struct btrfs_block_group_cache *cache; 4756 struct btrfs_block_group_cache *cache;
4662 4757
4663 cache = btrfs_lookup_block_group(root->fs_info, bytenr); 4758 cache = btrfs_lookup_block_group(root->fs_info, bytenr);
4664 BUG_ON(!cache); 4759 BUG_ON(!cache); /* Logic error */
4665 4760
4666 /* 4761 /*
4667 * pull in the free space cache (if any) so that our pin 4762 * pull in the free space cache (if any) so that our pin
@@ -4706,6 +4801,7 @@ static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
4706{ 4801{
4707 struct btrfs_space_info *space_info = cache->space_info; 4802 struct btrfs_space_info *space_info = cache->space_info;
4708 int ret = 0; 4803 int ret = 0;
4804
4709 spin_lock(&space_info->lock); 4805 spin_lock(&space_info->lock);
4710 spin_lock(&cache->lock); 4806 spin_lock(&cache->lock);
4711 if (reserve != RESERVE_FREE) { 4807 if (reserve != RESERVE_FREE) {
@@ -4716,9 +4812,8 @@ static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
4716 space_info->bytes_reserved += num_bytes; 4812 space_info->bytes_reserved += num_bytes;
4717 if (reserve == RESERVE_ALLOC) { 4813 if (reserve == RESERVE_ALLOC) {
4718 trace_btrfs_space_reservation(cache->fs_info, 4814 trace_btrfs_space_reservation(cache->fs_info,
4719 "space_info", 4815 "space_info", space_info->flags,
4720 (u64)(unsigned long)space_info, 4816 num_bytes, 0);
4721 num_bytes, 0);
4722 space_info->bytes_may_use -= num_bytes; 4817 space_info->bytes_may_use -= num_bytes;
4723 } 4818 }
4724 } 4819 }
@@ -4734,7 +4829,7 @@ static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
4734 return ret; 4829 return ret;
4735} 4830}
4736 4831
4737int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, 4832void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
4738 struct btrfs_root *root) 4833 struct btrfs_root *root)
4739{ 4834{
4740 struct btrfs_fs_info *fs_info = root->fs_info; 4835 struct btrfs_fs_info *fs_info = root->fs_info;
@@ -4764,7 +4859,6 @@ int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
4764 up_write(&fs_info->extent_commit_sem); 4859 up_write(&fs_info->extent_commit_sem);
4765 4860
4766 update_global_block_rsv(fs_info); 4861 update_global_block_rsv(fs_info);
4767 return 0;
4768} 4862}
4769 4863
4770static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) 4864static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
@@ -4779,7 +4873,7 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
4779 if (cache) 4873 if (cache)
4780 btrfs_put_block_group(cache); 4874 btrfs_put_block_group(cache);
4781 cache = btrfs_lookup_block_group(fs_info, start); 4875 cache = btrfs_lookup_block_group(fs_info, start);
4782 BUG_ON(!cache); 4876 BUG_ON(!cache); /* Logic error */
4783 } 4877 }
4784 4878
4785 len = cache->key.objectid + cache->key.offset - start; 4879 len = cache->key.objectid + cache->key.offset - start;
@@ -4816,6 +4910,9 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
4816 u64 end; 4910 u64 end;
4817 int ret; 4911 int ret;
4818 4912
4913 if (trans->aborted)
4914 return 0;
4915
4819 if (fs_info->pinned_extents == &fs_info->freed_extents[0]) 4916 if (fs_info->pinned_extents == &fs_info->freed_extents[0])
4820 unpin = &fs_info->freed_extents[1]; 4917 unpin = &fs_info->freed_extents[1];
4821 else 4918 else
@@ -4901,7 +4998,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
4901 ret = remove_extent_backref(trans, extent_root, path, 4998 ret = remove_extent_backref(trans, extent_root, path,
4902 NULL, refs_to_drop, 4999 NULL, refs_to_drop,
4903 is_data); 5000 is_data);
4904 BUG_ON(ret); 5001 if (ret)
5002 goto abort;
4905 btrfs_release_path(path); 5003 btrfs_release_path(path);
4906 path->leave_spinning = 1; 5004 path->leave_spinning = 1;
4907 5005
@@ -4919,10 +5017,11 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
4919 btrfs_print_leaf(extent_root, 5017 btrfs_print_leaf(extent_root,
4920 path->nodes[0]); 5018 path->nodes[0]);
4921 } 5019 }
4922 BUG_ON(ret); 5020 if (ret < 0)
5021 goto abort;
4923 extent_slot = path->slots[0]; 5022 extent_slot = path->slots[0];
4924 } 5023 }
4925 } else { 5024 } else if (ret == -ENOENT) {
4926 btrfs_print_leaf(extent_root, path->nodes[0]); 5025 btrfs_print_leaf(extent_root, path->nodes[0]);
4927 WARN_ON(1); 5026 WARN_ON(1);
4928 printk(KERN_ERR "btrfs unable to find ref byte nr %llu " 5027 printk(KERN_ERR "btrfs unable to find ref byte nr %llu "
@@ -4932,6 +5031,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
4932 (unsigned long long)root_objectid, 5031 (unsigned long long)root_objectid,
4933 (unsigned long long)owner_objectid, 5032 (unsigned long long)owner_objectid,
4934 (unsigned long long)owner_offset); 5033 (unsigned long long)owner_offset);
5034 } else {
5035 goto abort;
4935 } 5036 }
4936 5037
4937 leaf = path->nodes[0]; 5038 leaf = path->nodes[0];
@@ -4941,7 +5042,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
4941 BUG_ON(found_extent || extent_slot != path->slots[0]); 5042 BUG_ON(found_extent || extent_slot != path->slots[0]);
4942 ret = convert_extent_item_v0(trans, extent_root, path, 5043 ret = convert_extent_item_v0(trans, extent_root, path,
4943 owner_objectid, 0); 5044 owner_objectid, 0);
4944 BUG_ON(ret < 0); 5045 if (ret < 0)
5046 goto abort;
4945 5047
4946 btrfs_release_path(path); 5048 btrfs_release_path(path);
4947 path->leave_spinning = 1; 5049 path->leave_spinning = 1;
@@ -4958,7 +5060,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
4958 (unsigned long long)bytenr); 5060 (unsigned long long)bytenr);
4959 btrfs_print_leaf(extent_root, path->nodes[0]); 5061 btrfs_print_leaf(extent_root, path->nodes[0]);
4960 } 5062 }
4961 BUG_ON(ret); 5063 if (ret < 0)
5064 goto abort;
4962 extent_slot = path->slots[0]; 5065 extent_slot = path->slots[0];
4963 leaf = path->nodes[0]; 5066 leaf = path->nodes[0];
4964 item_size = btrfs_item_size_nr(leaf, extent_slot); 5067 item_size = btrfs_item_size_nr(leaf, extent_slot);
@@ -4995,7 +5098,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
4995 ret = remove_extent_backref(trans, extent_root, path, 5098 ret = remove_extent_backref(trans, extent_root, path,
4996 iref, refs_to_drop, 5099 iref, refs_to_drop,
4997 is_data); 5100 is_data);
4998 BUG_ON(ret); 5101 if (ret)
5102 goto abort;
4999 } 5103 }
5000 } else { 5104 } else {
5001 if (found_extent) { 5105 if (found_extent) {
@@ -5012,23 +5116,27 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
5012 5116
5013 ret = btrfs_del_items(trans, extent_root, path, path->slots[0], 5117 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
5014 num_to_del); 5118 num_to_del);
5015 BUG_ON(ret); 5119 if (ret)
5120 goto abort;
5016 btrfs_release_path(path); 5121 btrfs_release_path(path);
5017 5122
5018 if (is_data) { 5123 if (is_data) {
5019 ret = btrfs_del_csums(trans, root, bytenr, num_bytes); 5124 ret = btrfs_del_csums(trans, root, bytenr, num_bytes);
5020 BUG_ON(ret); 5125 if (ret)
5021 } else { 5126 goto abort;
5022 invalidate_mapping_pages(info->btree_inode->i_mapping,
5023 bytenr >> PAGE_CACHE_SHIFT,
5024 (bytenr + num_bytes - 1) >> PAGE_CACHE_SHIFT);
5025 } 5127 }
5026 5128
5027 ret = update_block_group(trans, root, bytenr, num_bytes, 0); 5129 ret = update_block_group(trans, root, bytenr, num_bytes, 0);
5028 BUG_ON(ret); 5130 if (ret)
5131 goto abort;
5029 } 5132 }
5133out:
5030 btrfs_free_path(path); 5134 btrfs_free_path(path);
5031 return ret; 5135 return ret;
5136
5137abort:
5138 btrfs_abort_transaction(trans, extent_root, ret);
5139 goto out;
5032} 5140}
5033 5141
5034/* 5142/*
@@ -5124,7 +5232,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
5124 parent, root->root_key.objectid, 5232 parent, root->root_key.objectid,
5125 btrfs_header_level(buf), 5233 btrfs_header_level(buf),
5126 BTRFS_DROP_DELAYED_REF, NULL, for_cow); 5234 BTRFS_DROP_DELAYED_REF, NULL, for_cow);
5127 BUG_ON(ret); 5235 BUG_ON(ret); /* -ENOMEM */
5128 } 5236 }
5129 5237
5130 if (!last_ref) 5238 if (!last_ref)
@@ -5158,6 +5266,7 @@ out:
5158 btrfs_put_block_group(cache); 5266 btrfs_put_block_group(cache);
5159} 5267}
5160 5268
5269/* Can return -ENOMEM */
5161int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, 5270int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root,
5162 u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, 5271 u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
5163 u64 owner, u64 offset, int for_cow) 5272 u64 owner, u64 offset, int for_cow)
@@ -5179,14 +5288,12 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root,
5179 num_bytes, 5288 num_bytes,
5180 parent, root_objectid, (int)owner, 5289 parent, root_objectid, (int)owner,
5181 BTRFS_DROP_DELAYED_REF, NULL, for_cow); 5290 BTRFS_DROP_DELAYED_REF, NULL, for_cow);
5182 BUG_ON(ret);
5183 } else { 5291 } else {
5184 ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr, 5292 ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
5185 num_bytes, 5293 num_bytes,
5186 parent, root_objectid, owner, 5294 parent, root_objectid, owner,
5187 offset, BTRFS_DROP_DELAYED_REF, 5295 offset, BTRFS_DROP_DELAYED_REF,
5188 NULL, for_cow); 5296 NULL, for_cow);
5189 BUG_ON(ret);
5190 } 5297 }
5191 return ret; 5298 return ret;
5192} 5299}
@@ -5243,28 +5350,34 @@ wait_block_group_cache_done(struct btrfs_block_group_cache *cache)
5243 return 0; 5350 return 0;
5244} 5351}
5245 5352
5246static int get_block_group_index(struct btrfs_block_group_cache *cache) 5353static int __get_block_group_index(u64 flags)
5247{ 5354{
5248 int index; 5355 int index;
5249 if (cache->flags & BTRFS_BLOCK_GROUP_RAID10) 5356
5357 if (flags & BTRFS_BLOCK_GROUP_RAID10)
5250 index = 0; 5358 index = 0;
5251 else if (cache->flags & BTRFS_BLOCK_GROUP_RAID1) 5359 else if (flags & BTRFS_BLOCK_GROUP_RAID1)
5252 index = 1; 5360 index = 1;
5253 else if (cache->flags & BTRFS_BLOCK_GROUP_DUP) 5361 else if (flags & BTRFS_BLOCK_GROUP_DUP)
5254 index = 2; 5362 index = 2;
5255 else if (cache->flags & BTRFS_BLOCK_GROUP_RAID0) 5363 else if (flags & BTRFS_BLOCK_GROUP_RAID0)
5256 index = 3; 5364 index = 3;
5257 else 5365 else
5258 index = 4; 5366 index = 4;
5367
5259 return index; 5368 return index;
5260} 5369}
5261 5370
5371static int get_block_group_index(struct btrfs_block_group_cache *cache)
5372{
5373 return __get_block_group_index(cache->flags);
5374}
5375
5262enum btrfs_loop_type { 5376enum btrfs_loop_type {
5263 LOOP_FIND_IDEAL = 0, 5377 LOOP_CACHING_NOWAIT = 0,
5264 LOOP_CACHING_NOWAIT = 1, 5378 LOOP_CACHING_WAIT = 1,
5265 LOOP_CACHING_WAIT = 2, 5379 LOOP_ALLOC_CHUNK = 2,
5266 LOOP_ALLOC_CHUNK = 3, 5380 LOOP_NO_EMPTY_SIZE = 3,
5267 LOOP_NO_EMPTY_SIZE = 4,
5268}; 5381};
5269 5382
5270/* 5383/*
@@ -5278,7 +5391,6 @@ enum btrfs_loop_type {
5278static noinline int find_free_extent(struct btrfs_trans_handle *trans, 5391static noinline int find_free_extent(struct btrfs_trans_handle *trans,
5279 struct btrfs_root *orig_root, 5392 struct btrfs_root *orig_root,
5280 u64 num_bytes, u64 empty_size, 5393 u64 num_bytes, u64 empty_size,
5281 u64 search_start, u64 search_end,
5282 u64 hint_byte, struct btrfs_key *ins, 5394 u64 hint_byte, struct btrfs_key *ins,
5283 u64 data) 5395 u64 data)
5284{ 5396{
@@ -5287,6 +5399,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
5287 struct btrfs_free_cluster *last_ptr = NULL; 5399 struct btrfs_free_cluster *last_ptr = NULL;
5288 struct btrfs_block_group_cache *block_group = NULL; 5400 struct btrfs_block_group_cache *block_group = NULL;
5289 struct btrfs_block_group_cache *used_block_group; 5401 struct btrfs_block_group_cache *used_block_group;
5402 u64 search_start = 0;
5290 int empty_cluster = 2 * 1024 * 1024; 5403 int empty_cluster = 2 * 1024 * 1024;
5291 int allowed_chunk_alloc = 0; 5404 int allowed_chunk_alloc = 0;
5292 int done_chunk_alloc = 0; 5405 int done_chunk_alloc = 0;
@@ -5300,8 +5413,6 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
5300 bool failed_alloc = false; 5413 bool failed_alloc = false;
5301 bool use_cluster = true; 5414 bool use_cluster = true;
5302 bool have_caching_bg = false; 5415 bool have_caching_bg = false;
5303 u64 ideal_cache_percent = 0;
5304 u64 ideal_cache_offset = 0;
5305 5416
5306 WARN_ON(num_bytes < root->sectorsize); 5417 WARN_ON(num_bytes < root->sectorsize);
5307 btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); 5418 btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
@@ -5351,7 +5462,6 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
5351 empty_cluster = 0; 5462 empty_cluster = 0;
5352 5463
5353 if (search_start == hint_byte) { 5464 if (search_start == hint_byte) {
5354ideal_cache:
5355 block_group = btrfs_lookup_block_group(root->fs_info, 5465 block_group = btrfs_lookup_block_group(root->fs_info,
5356 search_start); 5466 search_start);
5357 used_block_group = block_group; 5467 used_block_group = block_group;
@@ -5363,8 +5473,7 @@ ideal_cache:
5363 * picked out then we don't care that the block group is cached. 5473 * picked out then we don't care that the block group is cached.
5364 */ 5474 */
5365 if (block_group && block_group_bits(block_group, data) && 5475 if (block_group && block_group_bits(block_group, data) &&
5366 (block_group->cached != BTRFS_CACHE_NO || 5476 block_group->cached != BTRFS_CACHE_NO) {
5367 search_start == ideal_cache_offset)) {
5368 down_read(&space_info->groups_sem); 5477 down_read(&space_info->groups_sem);
5369 if (list_empty(&block_group->list) || 5478 if (list_empty(&block_group->list) ||
5370 block_group->ro) { 5479 block_group->ro) {
@@ -5418,44 +5527,13 @@ search:
5418have_block_group: 5527have_block_group:
5419 cached = block_group_cache_done(block_group); 5528 cached = block_group_cache_done(block_group);
5420 if (unlikely(!cached)) { 5529 if (unlikely(!cached)) {
5421 u64 free_percent;
5422
5423 found_uncached_bg = true; 5530 found_uncached_bg = true;
5424 ret = cache_block_group(block_group, trans, 5531 ret = cache_block_group(block_group, trans,
5425 orig_root, 1); 5532 orig_root, 0);
5426 if (block_group->cached == BTRFS_CACHE_FINISHED) 5533 BUG_ON(ret < 0);
5427 goto alloc; 5534 ret = 0;
5428
5429 free_percent = btrfs_block_group_used(&block_group->item);
5430 free_percent *= 100;
5431 free_percent = div64_u64(free_percent,
5432 block_group->key.offset);
5433 free_percent = 100 - free_percent;
5434 if (free_percent > ideal_cache_percent &&
5435 likely(!block_group->ro)) {
5436 ideal_cache_offset = block_group->key.objectid;
5437 ideal_cache_percent = free_percent;
5438 }
5439
5440 /*
5441 * The caching workers are limited to 2 threads, so we
5442 * can queue as much work as we care to.
5443 */
5444 if (loop > LOOP_FIND_IDEAL) {
5445 ret = cache_block_group(block_group, trans,
5446 orig_root, 0);
5447 BUG_ON(ret);
5448 }
5449
5450 /*
5451 * If loop is set for cached only, try the next block
5452 * group.
5453 */
5454 if (loop == LOOP_FIND_IDEAL)
5455 goto loop;
5456 } 5535 }
5457 5536
5458alloc:
5459 if (unlikely(block_group->ro)) 5537 if (unlikely(block_group->ro))
5460 goto loop; 5538 goto loop;
5461 5539
@@ -5606,11 +5684,6 @@ unclustered_alloc:
5606 } 5684 }
5607checks: 5685checks:
5608 search_start = stripe_align(root, offset); 5686 search_start = stripe_align(root, offset);
5609 /* move on to the next group */
5610 if (search_start + num_bytes >= search_end) {
5611 btrfs_add_free_space(used_block_group, offset, num_bytes);
5612 goto loop;
5613 }
5614 5687
5615 /* move on to the next group */ 5688 /* move on to the next group */
5616 if (search_start + num_bytes > 5689 if (search_start + num_bytes >
@@ -5661,9 +5734,7 @@ loop:
5661 if (!ins->objectid && ++index < BTRFS_NR_RAID_TYPES) 5734 if (!ins->objectid && ++index < BTRFS_NR_RAID_TYPES)
5662 goto search; 5735 goto search;
5663 5736
5664 /* LOOP_FIND_IDEAL, only search caching/cached bg's, and don't wait for 5737 /*
5665 * for them to make caching progress. Also
5666 * determine the best possible bg to cache
5667 * LOOP_CACHING_NOWAIT, search partially cached block groups, kicking 5738 * LOOP_CACHING_NOWAIT, search partially cached block groups, kicking
5668 * caching kthreads as we move along 5739 * caching kthreads as we move along
5669 * LOOP_CACHING_WAIT, search everything, and wait if our bg is caching 5740 * LOOP_CACHING_WAIT, search everything, and wait if our bg is caching
@@ -5673,50 +5744,17 @@ loop:
5673 */ 5744 */
5674 if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE) { 5745 if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE) {
5675 index = 0; 5746 index = 0;
5676 if (loop == LOOP_FIND_IDEAL && found_uncached_bg) {
5677 found_uncached_bg = false;
5678 loop++;
5679 if (!ideal_cache_percent)
5680 goto search;
5681
5682 /*
5683 * 1 of the following 2 things have happened so far
5684 *
5685 * 1) We found an ideal block group for caching that
5686 * is mostly full and will cache quickly, so we might
5687 * as well wait for it.
5688 *
5689 * 2) We searched for cached only and we didn't find
5690 * anything, and we didn't start any caching kthreads
5691 * either, so chances are we will loop through and
5692 * start a couple caching kthreads, and then come back
5693 * around and just wait for them. This will be slower
5694 * because we will have 2 caching kthreads reading at
5695 * the same time when we could have just started one
5696 * and waited for it to get far enough to give us an
5697 * allocation, so go ahead and go to the wait caching
5698 * loop.
5699 */
5700 loop = LOOP_CACHING_WAIT;
5701 search_start = ideal_cache_offset;
5702 ideal_cache_percent = 0;
5703 goto ideal_cache;
5704 } else if (loop == LOOP_FIND_IDEAL) {
5705 /*
5706 * Didn't find a uncached bg, wait on anything we find
5707 * next.
5708 */
5709 loop = LOOP_CACHING_WAIT;
5710 goto search;
5711 }
5712
5713 loop++; 5747 loop++;
5714
5715 if (loop == LOOP_ALLOC_CHUNK) { 5748 if (loop == LOOP_ALLOC_CHUNK) {
5716 if (allowed_chunk_alloc) { 5749 if (allowed_chunk_alloc) {
5717 ret = do_chunk_alloc(trans, root, num_bytes + 5750 ret = do_chunk_alloc(trans, root, num_bytes +
5718 2 * 1024 * 1024, data, 5751 2 * 1024 * 1024, data,
5719 CHUNK_ALLOC_LIMITED); 5752 CHUNK_ALLOC_LIMITED);
5753 if (ret < 0) {
5754 btrfs_abort_transaction(trans,
5755 root, ret);
5756 goto out;
5757 }
5720 allowed_chunk_alloc = 0; 5758 allowed_chunk_alloc = 0;
5721 if (ret == 1) 5759 if (ret == 1)
5722 done_chunk_alloc = 1; 5760 done_chunk_alloc = 1;
@@ -5745,6 +5783,7 @@ loop:
5745 } else if (ins->objectid) { 5783 } else if (ins->objectid) {
5746 ret = 0; 5784 ret = 0;
5747 } 5785 }
5786out:
5748 5787
5749 return ret; 5788 return ret;
5750} 5789}
@@ -5798,12 +5837,10 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
5798 struct btrfs_root *root, 5837 struct btrfs_root *root,
5799 u64 num_bytes, u64 min_alloc_size, 5838 u64 num_bytes, u64 min_alloc_size,
5800 u64 empty_size, u64 hint_byte, 5839 u64 empty_size, u64 hint_byte,
5801 u64 search_end, struct btrfs_key *ins, 5840 struct btrfs_key *ins, u64 data)
5802 u64 data)
5803{ 5841{
5804 bool final_tried = false; 5842 bool final_tried = false;
5805 int ret; 5843 int ret;
5806 u64 search_start = 0;
5807 5844
5808 data = btrfs_get_alloc_profile(root, data); 5845 data = btrfs_get_alloc_profile(root, data);
5809again: 5846again:
@@ -5811,23 +5848,31 @@ again:
5811 * the only place that sets empty_size is btrfs_realloc_node, which 5848 * the only place that sets empty_size is btrfs_realloc_node, which
5812 * is not called recursively on allocations 5849 * is not called recursively on allocations
5813 */ 5850 */
5814 if (empty_size || root->ref_cows) 5851 if (empty_size || root->ref_cows) {
5815 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 5852 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
5816 num_bytes + 2 * 1024 * 1024, data, 5853 num_bytes + 2 * 1024 * 1024, data,
5817 CHUNK_ALLOC_NO_FORCE); 5854 CHUNK_ALLOC_NO_FORCE);
5855 if (ret < 0 && ret != -ENOSPC) {
5856 btrfs_abort_transaction(trans, root, ret);
5857 return ret;
5858 }
5859 }
5818 5860
5819 WARN_ON(num_bytes < root->sectorsize); 5861 WARN_ON(num_bytes < root->sectorsize);
5820 ret = find_free_extent(trans, root, num_bytes, empty_size, 5862 ret = find_free_extent(trans, root, num_bytes, empty_size,
5821 search_start, search_end, hint_byte, 5863 hint_byte, ins, data);
5822 ins, data);
5823 5864
5824 if (ret == -ENOSPC) { 5865 if (ret == -ENOSPC) {
5825 if (!final_tried) { 5866 if (!final_tried) {
5826 num_bytes = num_bytes >> 1; 5867 num_bytes = num_bytes >> 1;
5827 num_bytes = num_bytes & ~(root->sectorsize - 1); 5868 num_bytes = num_bytes & ~(root->sectorsize - 1);
5828 num_bytes = max(num_bytes, min_alloc_size); 5869 num_bytes = max(num_bytes, min_alloc_size);
5829 do_chunk_alloc(trans, root->fs_info->extent_root, 5870 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
5830 num_bytes, data, CHUNK_ALLOC_FORCE); 5871 num_bytes, data, CHUNK_ALLOC_FORCE);
5872 if (ret < 0 && ret != -ENOSPC) {
5873 btrfs_abort_transaction(trans, root, ret);
5874 return ret;
5875 }
5831 if (num_bytes == min_alloc_size) 5876 if (num_bytes == min_alloc_size)
5832 final_tried = true; 5877 final_tried = true;
5833 goto again; 5878 goto again;
@@ -5838,7 +5883,8 @@ again:
5838 printk(KERN_ERR "btrfs allocation failed flags %llu, " 5883 printk(KERN_ERR "btrfs allocation failed flags %llu, "
5839 "wanted %llu\n", (unsigned long long)data, 5884 "wanted %llu\n", (unsigned long long)data,
5840 (unsigned long long)num_bytes); 5885 (unsigned long long)num_bytes);
5841 dump_space_info(sinfo, num_bytes, 1); 5886 if (sinfo)
5887 dump_space_info(sinfo, num_bytes, 1);
5842 } 5888 }
5843 } 5889 }
5844 5890
@@ -5917,7 +5963,10 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
5917 path->leave_spinning = 1; 5963 path->leave_spinning = 1;
5918 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path, 5964 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
5919 ins, size); 5965 ins, size);
5920 BUG_ON(ret); 5966 if (ret) {
5967 btrfs_free_path(path);
5968 return ret;
5969 }
5921 5970
5922 leaf = path->nodes[0]; 5971 leaf = path->nodes[0];
5923 extent_item = btrfs_item_ptr(leaf, path->slots[0], 5972 extent_item = btrfs_item_ptr(leaf, path->slots[0],
@@ -5947,7 +5996,7 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
5947 btrfs_free_path(path); 5996 btrfs_free_path(path);
5948 5997
5949 ret = update_block_group(trans, root, ins->objectid, ins->offset, 1); 5998 ret = update_block_group(trans, root, ins->objectid, ins->offset, 1);
5950 if (ret) { 5999 if (ret) { /* -ENOENT, logic error */
5951 printk(KERN_ERR "btrfs update block group failed for %llu " 6000 printk(KERN_ERR "btrfs update block group failed for %llu "
5952 "%llu\n", (unsigned long long)ins->objectid, 6001 "%llu\n", (unsigned long long)ins->objectid,
5953 (unsigned long long)ins->offset); 6002 (unsigned long long)ins->offset);
@@ -5978,7 +6027,10 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
5978 path->leave_spinning = 1; 6027 path->leave_spinning = 1;
5979 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path, 6028 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
5980 ins, size); 6029 ins, size);
5981 BUG_ON(ret); 6030 if (ret) {
6031 btrfs_free_path(path);
6032 return ret;
6033 }
5982 6034
5983 leaf = path->nodes[0]; 6035 leaf = path->nodes[0];
5984 extent_item = btrfs_item_ptr(leaf, path->slots[0], 6036 extent_item = btrfs_item_ptr(leaf, path->slots[0],
@@ -6008,7 +6060,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
6008 btrfs_free_path(path); 6060 btrfs_free_path(path);
6009 6061
6010 ret = update_block_group(trans, root, ins->objectid, ins->offset, 1); 6062 ret = update_block_group(trans, root, ins->objectid, ins->offset, 1);
6011 if (ret) { 6063 if (ret) { /* -ENOENT, logic error */
6012 printk(KERN_ERR "btrfs update block group failed for %llu " 6064 printk(KERN_ERR "btrfs update block group failed for %llu "
6013 "%llu\n", (unsigned long long)ins->objectid, 6065 "%llu\n", (unsigned long long)ins->objectid,
6014 (unsigned long long)ins->offset); 6066 (unsigned long long)ins->offset);
@@ -6056,28 +6108,28 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
6056 if (!caching_ctl) { 6108 if (!caching_ctl) {
6057 BUG_ON(!block_group_cache_done(block_group)); 6109 BUG_ON(!block_group_cache_done(block_group));
6058 ret = btrfs_remove_free_space(block_group, start, num_bytes); 6110 ret = btrfs_remove_free_space(block_group, start, num_bytes);
6059 BUG_ON(ret); 6111 BUG_ON(ret); /* -ENOMEM */
6060 } else { 6112 } else {
6061 mutex_lock(&caching_ctl->mutex); 6113 mutex_lock(&caching_ctl->mutex);
6062 6114
6063 if (start >= caching_ctl->progress) { 6115 if (start >= caching_ctl->progress) {
6064 ret = add_excluded_extent(root, start, num_bytes); 6116 ret = add_excluded_extent(root, start, num_bytes);
6065 BUG_ON(ret); 6117 BUG_ON(ret); /* -ENOMEM */
6066 } else if (start + num_bytes <= caching_ctl->progress) { 6118 } else if (start + num_bytes <= caching_ctl->progress) {
6067 ret = btrfs_remove_free_space(block_group, 6119 ret = btrfs_remove_free_space(block_group,
6068 start, num_bytes); 6120 start, num_bytes);
6069 BUG_ON(ret); 6121 BUG_ON(ret); /* -ENOMEM */
6070 } else { 6122 } else {
6071 num_bytes = caching_ctl->progress - start; 6123 num_bytes = caching_ctl->progress - start;
6072 ret = btrfs_remove_free_space(block_group, 6124 ret = btrfs_remove_free_space(block_group,
6073 start, num_bytes); 6125 start, num_bytes);
6074 BUG_ON(ret); 6126 BUG_ON(ret); /* -ENOMEM */
6075 6127
6076 start = caching_ctl->progress; 6128 start = caching_ctl->progress;
6077 num_bytes = ins->objectid + ins->offset - 6129 num_bytes = ins->objectid + ins->offset -
6078 caching_ctl->progress; 6130 caching_ctl->progress;
6079 ret = add_excluded_extent(root, start, num_bytes); 6131 ret = add_excluded_extent(root, start, num_bytes);
6080 BUG_ON(ret); 6132 BUG_ON(ret); /* -ENOMEM */
6081 } 6133 }
6082 6134
6083 mutex_unlock(&caching_ctl->mutex); 6135 mutex_unlock(&caching_ctl->mutex);
@@ -6086,7 +6138,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
6086 6138
6087 ret = btrfs_update_reserved_bytes(block_group, ins->offset, 6139 ret = btrfs_update_reserved_bytes(block_group, ins->offset,
6088 RESERVE_ALLOC_NO_ACCOUNT); 6140 RESERVE_ALLOC_NO_ACCOUNT);
6089 BUG_ON(ret); 6141 BUG_ON(ret); /* logic error */
6090 btrfs_put_block_group(block_group); 6142 btrfs_put_block_group(block_group);
6091 ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, 6143 ret = alloc_reserved_file_extent(trans, root, 0, root_objectid,
6092 0, owner, offset, ins, 1); 6144 0, owner, offset, ins, 1);
@@ -6107,6 +6159,7 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
6107 btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level); 6159 btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level);
6108 btrfs_tree_lock(buf); 6160 btrfs_tree_lock(buf);
6109 clean_tree_block(trans, root, buf); 6161 clean_tree_block(trans, root, buf);
6162 clear_bit(EXTENT_BUFFER_STALE, &buf->bflags);
6110 6163
6111 btrfs_set_lock_blocking(buf); 6164 btrfs_set_lock_blocking(buf);
6112 btrfs_set_buffer_uptodate(buf); 6165 btrfs_set_buffer_uptodate(buf);
@@ -6214,7 +6267,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
6214 return ERR_CAST(block_rsv); 6267 return ERR_CAST(block_rsv);
6215 6268
6216 ret = btrfs_reserve_extent(trans, root, blocksize, blocksize, 6269 ret = btrfs_reserve_extent(trans, root, blocksize, blocksize,
6217 empty_size, hint, (u64)-1, &ins, 0); 6270 empty_size, hint, &ins, 0);
6218 if (ret) { 6271 if (ret) {
6219 unuse_block_rsv(root->fs_info, block_rsv, blocksize); 6272 unuse_block_rsv(root->fs_info, block_rsv, blocksize);
6220 return ERR_PTR(ret); 6273 return ERR_PTR(ret);
@@ -6222,7 +6275,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
6222 6275
6223 buf = btrfs_init_new_buffer(trans, root, ins.objectid, 6276 buf = btrfs_init_new_buffer(trans, root, ins.objectid,
6224 blocksize, level); 6277 blocksize, level);
6225 BUG_ON(IS_ERR(buf)); 6278 BUG_ON(IS_ERR(buf)); /* -ENOMEM */
6226 6279
6227 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) { 6280 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
6228 if (parent == 0) 6281 if (parent == 0)
@@ -6234,7 +6287,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
6234 if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { 6287 if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
6235 struct btrfs_delayed_extent_op *extent_op; 6288 struct btrfs_delayed_extent_op *extent_op;
6236 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); 6289 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
6237 BUG_ON(!extent_op); 6290 BUG_ON(!extent_op); /* -ENOMEM */
6238 if (key) 6291 if (key)
6239 memcpy(&extent_op->key, key, sizeof(extent_op->key)); 6292 memcpy(&extent_op->key, key, sizeof(extent_op->key));
6240 else 6293 else
@@ -6249,7 +6302,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
6249 ins.offset, parent, root_objectid, 6302 ins.offset, parent, root_objectid,
6250 level, BTRFS_ADD_DELAYED_EXTENT, 6303 level, BTRFS_ADD_DELAYED_EXTENT,
6251 extent_op, for_cow); 6304 extent_op, for_cow);
6252 BUG_ON(ret); 6305 BUG_ON(ret); /* -ENOMEM */
6253 } 6306 }
6254 return buf; 6307 return buf;
6255} 6308}
@@ -6319,7 +6372,9 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
6319 /* We don't lock the tree block, it's OK to be racy here */ 6372 /* We don't lock the tree block, it's OK to be racy here */
6320 ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize, 6373 ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize,
6321 &refs, &flags); 6374 &refs, &flags);
6322 BUG_ON(ret); 6375 /* We don't care about errors in readahead. */
6376 if (ret < 0)
6377 continue;
6323 BUG_ON(refs == 0); 6378 BUG_ON(refs == 0);
6324 6379
6325 if (wc->stage == DROP_REFERENCE) { 6380 if (wc->stage == DROP_REFERENCE) {
@@ -6386,7 +6441,9 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
6386 eb->start, eb->len, 6441 eb->start, eb->len,
6387 &wc->refs[level], 6442 &wc->refs[level],
6388 &wc->flags[level]); 6443 &wc->flags[level]);
6389 BUG_ON(ret); 6444 BUG_ON(ret == -ENOMEM);
6445 if (ret)
6446 return ret;
6390 BUG_ON(wc->refs[level] == 0); 6447 BUG_ON(wc->refs[level] == 0);
6391 } 6448 }
6392 6449
@@ -6405,12 +6462,12 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
6405 if (!(wc->flags[level] & flag)) { 6462 if (!(wc->flags[level] & flag)) {
6406 BUG_ON(!path->locks[level]); 6463 BUG_ON(!path->locks[level]);
6407 ret = btrfs_inc_ref(trans, root, eb, 1, wc->for_reloc); 6464 ret = btrfs_inc_ref(trans, root, eb, 1, wc->for_reloc);
6408 BUG_ON(ret); 6465 BUG_ON(ret); /* -ENOMEM */
6409 ret = btrfs_dec_ref(trans, root, eb, 0, wc->for_reloc); 6466 ret = btrfs_dec_ref(trans, root, eb, 0, wc->for_reloc);
6410 BUG_ON(ret); 6467 BUG_ON(ret); /* -ENOMEM */
6411 ret = btrfs_set_disk_extent_flags(trans, root, eb->start, 6468 ret = btrfs_set_disk_extent_flags(trans, root, eb->start,
6412 eb->len, flag, 0); 6469 eb->len, flag, 0);
6413 BUG_ON(ret); 6470 BUG_ON(ret); /* -ENOMEM */
6414 wc->flags[level] |= flag; 6471 wc->flags[level] |= flag;
6415 } 6472 }
6416 6473
@@ -6482,7 +6539,11 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
6482 ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize, 6539 ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize,
6483 &wc->refs[level - 1], 6540 &wc->refs[level - 1],
6484 &wc->flags[level - 1]); 6541 &wc->flags[level - 1]);
6485 BUG_ON(ret); 6542 if (ret < 0) {
6543 btrfs_tree_unlock(next);
6544 return ret;
6545 }
6546
6486 BUG_ON(wc->refs[level - 1] == 0); 6547 BUG_ON(wc->refs[level - 1] == 0);
6487 *lookup_info = 0; 6548 *lookup_info = 0;
6488 6549
@@ -6551,7 +6612,7 @@ skip:
6551 6612
6552 ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent, 6613 ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent,
6553 root->root_key.objectid, level - 1, 0, 0); 6614 root->root_key.objectid, level - 1, 0, 0);
6554 BUG_ON(ret); 6615 BUG_ON(ret); /* -ENOMEM */
6555 } 6616 }
6556 btrfs_tree_unlock(next); 6617 btrfs_tree_unlock(next);
6557 free_extent_buffer(next); 6618 free_extent_buffer(next);
@@ -6609,7 +6670,10 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
6609 eb->start, eb->len, 6670 eb->start, eb->len,
6610 &wc->refs[level], 6671 &wc->refs[level],
6611 &wc->flags[level]); 6672 &wc->flags[level]);
6612 BUG_ON(ret); 6673 if (ret < 0) {
6674 btrfs_tree_unlock_rw(eb, path->locks[level]);
6675 return ret;
6676 }
6613 BUG_ON(wc->refs[level] == 0); 6677 BUG_ON(wc->refs[level] == 0);
6614 if (wc->refs[level] == 1) { 6678 if (wc->refs[level] == 1) {
6615 btrfs_tree_unlock_rw(eb, path->locks[level]); 6679 btrfs_tree_unlock_rw(eb, path->locks[level]);
@@ -6629,7 +6693,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
6629 else 6693 else
6630 ret = btrfs_dec_ref(trans, root, eb, 0, 6694 ret = btrfs_dec_ref(trans, root, eb, 0,
6631 wc->for_reloc); 6695 wc->for_reloc);
6632 BUG_ON(ret); 6696 BUG_ON(ret); /* -ENOMEM */
6633 } 6697 }
6634 /* make block locked assertion in clean_tree_block happy */ 6698 /* make block locked assertion in clean_tree_block happy */
6635 if (!path->locks[level] && 6699 if (!path->locks[level] &&
@@ -6738,7 +6802,7 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
6738 * also make sure backrefs for the shared block and all lower level 6802 * also make sure backrefs for the shared block and all lower level
6739 * blocks are properly updated. 6803 * blocks are properly updated.
6740 */ 6804 */
6741void btrfs_drop_snapshot(struct btrfs_root *root, 6805int btrfs_drop_snapshot(struct btrfs_root *root,
6742 struct btrfs_block_rsv *block_rsv, int update_ref, 6806 struct btrfs_block_rsv *block_rsv, int update_ref,
6743 int for_reloc) 6807 int for_reloc)
6744{ 6808{
@@ -6766,7 +6830,10 @@ void btrfs_drop_snapshot(struct btrfs_root *root,
6766 } 6830 }
6767 6831
6768 trans = btrfs_start_transaction(tree_root, 0); 6832 trans = btrfs_start_transaction(tree_root, 0);
6769 BUG_ON(IS_ERR(trans)); 6833 if (IS_ERR(trans)) {
6834 err = PTR_ERR(trans);
6835 goto out_free;
6836 }
6770 6837
6771 if (block_rsv) 6838 if (block_rsv)
6772 trans->block_rsv = block_rsv; 6839 trans->block_rsv = block_rsv;
@@ -6791,7 +6858,7 @@ void btrfs_drop_snapshot(struct btrfs_root *root,
6791 path->lowest_level = 0; 6858 path->lowest_level = 0;
6792 if (ret < 0) { 6859 if (ret < 0) {
6793 err = ret; 6860 err = ret;
6794 goto out_free; 6861 goto out_end_trans;
6795 } 6862 }
6796 WARN_ON(ret > 0); 6863 WARN_ON(ret > 0);
6797 6864
@@ -6811,7 +6878,10 @@ void btrfs_drop_snapshot(struct btrfs_root *root,
6811 path->nodes[level]->len, 6878 path->nodes[level]->len,
6812 &wc->refs[level], 6879 &wc->refs[level],
6813 &wc->flags[level]); 6880 &wc->flags[level]);
6814 BUG_ON(ret); 6881 if (ret < 0) {
6882 err = ret;
6883 goto out_end_trans;
6884 }
6815 BUG_ON(wc->refs[level] == 0); 6885 BUG_ON(wc->refs[level] == 0);
6816 6886
6817 if (level == root_item->drop_level) 6887 if (level == root_item->drop_level)
@@ -6862,26 +6932,40 @@ void btrfs_drop_snapshot(struct btrfs_root *root,
6862 ret = btrfs_update_root(trans, tree_root, 6932 ret = btrfs_update_root(trans, tree_root,
6863 &root->root_key, 6933 &root->root_key,
6864 root_item); 6934 root_item);
6865 BUG_ON(ret); 6935 if (ret) {
6936 btrfs_abort_transaction(trans, tree_root, ret);
6937 err = ret;
6938 goto out_end_trans;
6939 }
6866 6940
6867 btrfs_end_transaction_throttle(trans, tree_root); 6941 btrfs_end_transaction_throttle(trans, tree_root);
6868 trans = btrfs_start_transaction(tree_root, 0); 6942 trans = btrfs_start_transaction(tree_root, 0);
6869 BUG_ON(IS_ERR(trans)); 6943 if (IS_ERR(trans)) {
6944 err = PTR_ERR(trans);
6945 goto out_free;
6946 }
6870 if (block_rsv) 6947 if (block_rsv)
6871 trans->block_rsv = block_rsv; 6948 trans->block_rsv = block_rsv;
6872 } 6949 }
6873 } 6950 }
6874 btrfs_release_path(path); 6951 btrfs_release_path(path);
6875 BUG_ON(err); 6952 if (err)
6953 goto out_end_trans;
6876 6954
6877 ret = btrfs_del_root(trans, tree_root, &root->root_key); 6955 ret = btrfs_del_root(trans, tree_root, &root->root_key);
6878 BUG_ON(ret); 6956 if (ret) {
6957 btrfs_abort_transaction(trans, tree_root, ret);
6958 goto out_end_trans;
6959 }
6879 6960
6880 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) { 6961 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
6881 ret = btrfs_find_last_root(tree_root, root->root_key.objectid, 6962 ret = btrfs_find_last_root(tree_root, root->root_key.objectid,
6882 NULL, NULL); 6963 NULL, NULL);
6883 BUG_ON(ret < 0); 6964 if (ret < 0) {
6884 if (ret > 0) { 6965 btrfs_abort_transaction(trans, tree_root, ret);
6966 err = ret;
6967 goto out_end_trans;
6968 } else if (ret > 0) {
6885 /* if we fail to delete the orphan item this time 6969 /* if we fail to delete the orphan item this time
6886 * around, it'll get picked up the next time. 6970 * around, it'll get picked up the next time.
6887 * 6971 *
@@ -6899,14 +6983,15 @@ void btrfs_drop_snapshot(struct btrfs_root *root,
6899 free_extent_buffer(root->commit_root); 6983 free_extent_buffer(root->commit_root);
6900 kfree(root); 6984 kfree(root);
6901 } 6985 }
6902out_free: 6986out_end_trans:
6903 btrfs_end_transaction_throttle(trans, tree_root); 6987 btrfs_end_transaction_throttle(trans, tree_root);
6988out_free:
6904 kfree(wc); 6989 kfree(wc);
6905 btrfs_free_path(path); 6990 btrfs_free_path(path);
6906out: 6991out:
6907 if (err) 6992 if (err)
6908 btrfs_std_error(root->fs_info, err); 6993 btrfs_std_error(root->fs_info, err);
6909 return; 6994 return err;
6910} 6995}
6911 6996
6912/* 6997/*
@@ -6983,31 +7068,15 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
6983static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) 7068static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
6984{ 7069{
6985 u64 num_devices; 7070 u64 num_devices;
6986 u64 stripped = BTRFS_BLOCK_GROUP_RAID0 | 7071 u64 stripped;
6987 BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
6988 7072
6989 if (root->fs_info->balance_ctl) { 7073 /*
6990 struct btrfs_balance_control *bctl = root->fs_info->balance_ctl; 7074 * if restripe for this chunk_type is on pick target profile and
6991 u64 tgt = 0; 7075 * return, otherwise do the usual balance
6992 7076 */
6993 /* pick restriper's target profile and return */ 7077 stripped = get_restripe_target(root->fs_info, flags);
6994 if (flags & BTRFS_BLOCK_GROUP_DATA && 7078 if (stripped)
6995 bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) { 7079 return extended_to_chunk(stripped);
6996 tgt = BTRFS_BLOCK_GROUP_DATA | bctl->data.target;
6997 } else if (flags & BTRFS_BLOCK_GROUP_SYSTEM &&
6998 bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) {
6999 tgt = BTRFS_BLOCK_GROUP_SYSTEM | bctl->sys.target;
7000 } else if (flags & BTRFS_BLOCK_GROUP_METADATA &&
7001 bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) {
7002 tgt = BTRFS_BLOCK_GROUP_METADATA | bctl->meta.target;
7003 }
7004
7005 if (tgt) {
7006 /* extended -> chunk profile */
7007 tgt &= ~BTRFS_AVAIL_ALLOC_BIT_SINGLE;
7008 return tgt;
7009 }
7010 }
7011 7080
7012 /* 7081 /*
7013 * we add in the count of missing devices because we want 7082 * we add in the count of missing devices because we want
@@ -7017,6 +7086,9 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
7017 num_devices = root->fs_info->fs_devices->rw_devices + 7086 num_devices = root->fs_info->fs_devices->rw_devices +
7018 root->fs_info->fs_devices->missing_devices; 7087 root->fs_info->fs_devices->missing_devices;
7019 7088
7089 stripped = BTRFS_BLOCK_GROUP_RAID0 |
7090 BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
7091
7020 if (num_devices == 1) { 7092 if (num_devices == 1) {
7021 stripped |= BTRFS_BLOCK_GROUP_DUP; 7093 stripped |= BTRFS_BLOCK_GROUP_DUP;
7022 stripped = flags & ~stripped; 7094 stripped = flags & ~stripped;
@@ -7029,7 +7101,6 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
7029 if (flags & (BTRFS_BLOCK_GROUP_RAID1 | 7101 if (flags & (BTRFS_BLOCK_GROUP_RAID1 |
7030 BTRFS_BLOCK_GROUP_RAID10)) 7102 BTRFS_BLOCK_GROUP_RAID10))
7031 return stripped | BTRFS_BLOCK_GROUP_DUP; 7103 return stripped | BTRFS_BLOCK_GROUP_DUP;
7032 return flags;
7033 } else { 7104 } else {
7034 /* they already had raid on here, just return */ 7105 /* they already had raid on here, just return */
7035 if (flags & stripped) 7106 if (flags & stripped)
@@ -7042,9 +7113,9 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
7042 if (flags & BTRFS_BLOCK_GROUP_DUP) 7113 if (flags & BTRFS_BLOCK_GROUP_DUP)
7043 return stripped | BTRFS_BLOCK_GROUP_RAID1; 7114 return stripped | BTRFS_BLOCK_GROUP_RAID1;
7044 7115
7045 /* turn single device chunks into raid0 */ 7116 /* this is drive concat, leave it alone */
7046 return stripped | BTRFS_BLOCK_GROUP_RAID0;
7047 } 7117 }
7118
7048 return flags; 7119 return flags;
7049} 7120}
7050 7121
@@ -7103,12 +7174,16 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
7103 BUG_ON(cache->ro); 7174 BUG_ON(cache->ro);
7104 7175
7105 trans = btrfs_join_transaction(root); 7176 trans = btrfs_join_transaction(root);
7106 BUG_ON(IS_ERR(trans)); 7177 if (IS_ERR(trans))
7178 return PTR_ERR(trans);
7107 7179
7108 alloc_flags = update_block_group_flags(root, cache->flags); 7180 alloc_flags = update_block_group_flags(root, cache->flags);
7109 if (alloc_flags != cache->flags) 7181 if (alloc_flags != cache->flags) {
7110 do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 7182 ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
7111 CHUNK_ALLOC_FORCE); 7183 CHUNK_ALLOC_FORCE);
7184 if (ret < 0)
7185 goto out;
7186 }
7112 7187
7113 ret = set_block_group_ro(cache, 0); 7188 ret = set_block_group_ro(cache, 0);
7114 if (!ret) 7189 if (!ret)
@@ -7188,7 +7263,7 @@ u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo)
7188 return free_bytes; 7263 return free_bytes;
7189} 7264}
7190 7265
7191int btrfs_set_block_group_rw(struct btrfs_root *root, 7266void btrfs_set_block_group_rw(struct btrfs_root *root,
7192 struct btrfs_block_group_cache *cache) 7267 struct btrfs_block_group_cache *cache)
7193{ 7268{
7194 struct btrfs_space_info *sinfo = cache->space_info; 7269 struct btrfs_space_info *sinfo = cache->space_info;
@@ -7204,7 +7279,6 @@ int btrfs_set_block_group_rw(struct btrfs_root *root,
7204 cache->ro = 0; 7279 cache->ro = 0;
7205 spin_unlock(&cache->lock); 7280 spin_unlock(&cache->lock);
7206 spin_unlock(&sinfo->lock); 7281 spin_unlock(&sinfo->lock);
7207 return 0;
7208} 7282}
7209 7283
7210/* 7284/*
@@ -7222,6 +7296,7 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
7222 u64 min_free; 7296 u64 min_free;
7223 u64 dev_min = 1; 7297 u64 dev_min = 1;
7224 u64 dev_nr = 0; 7298 u64 dev_nr = 0;
7299 u64 target;
7225 int index; 7300 int index;
7226 int full = 0; 7301 int full = 0;
7227 int ret = 0; 7302 int ret = 0;
@@ -7262,13 +7337,11 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
7262 /* 7337 /*
7263 * ok we don't have enough space, but maybe we have free space on our 7338 * ok we don't have enough space, but maybe we have free space on our
7264 * devices to allocate new chunks for relocation, so loop through our 7339 * devices to allocate new chunks for relocation, so loop through our
7265 * alloc devices and guess if we have enough space. However, if we 7340 * alloc devices and guess if we have enough space. if this block
7266 * were marked as full, then we know there aren't enough chunks, and we 7341 * group is going to be restriped, run checks against the target
7267 * can just return. 7342 * profile instead of the current one.
7268 */ 7343 */
7269 ret = -1; 7344 ret = -1;
7270 if (full)
7271 goto out;
7272 7345
7273 /* 7346 /*
7274 * index: 7347 * index:
@@ -7278,7 +7351,20 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
7278 * 3: raid0 7351 * 3: raid0
7279 * 4: single 7352 * 4: single
7280 */ 7353 */
7281 index = get_block_group_index(block_group); 7354 target = get_restripe_target(root->fs_info, block_group->flags);
7355 if (target) {
7356 index = __get_block_group_index(extended_to_chunk(target));
7357 } else {
7358 /*
7359 * this is just a balance, so if we were marked as full
7360 * we know there is no space for a new chunk
7361 */
7362 if (full)
7363 goto out;
7364
7365 index = get_block_group_index(block_group);
7366 }
7367
7282 if (index == 0) { 7368 if (index == 0) {
7283 dev_min = 4; 7369 dev_min = 4;
7284 /* Divide by 2 */ 7370 /* Divide by 2 */
@@ -7572,7 +7658,7 @@ int btrfs_read_block_groups(struct btrfs_root *root)
7572 ret = update_space_info(info, cache->flags, found_key.offset, 7658 ret = update_space_info(info, cache->flags, found_key.offset,
7573 btrfs_block_group_used(&cache->item), 7659 btrfs_block_group_used(&cache->item),
7574 &space_info); 7660 &space_info);
7575 BUG_ON(ret); 7661 BUG_ON(ret); /* -ENOMEM */
7576 cache->space_info = space_info; 7662 cache->space_info = space_info;
7577 spin_lock(&cache->space_info->lock); 7663 spin_lock(&cache->space_info->lock);
7578 cache->space_info->bytes_readonly += cache->bytes_super; 7664 cache->space_info->bytes_readonly += cache->bytes_super;
@@ -7581,7 +7667,7 @@ int btrfs_read_block_groups(struct btrfs_root *root)
7581 __link_block_group(space_info, cache); 7667 __link_block_group(space_info, cache);
7582 7668
7583 ret = btrfs_add_block_group_cache(root->fs_info, cache); 7669 ret = btrfs_add_block_group_cache(root->fs_info, cache);
7584 BUG_ON(ret); 7670 BUG_ON(ret); /* Logic error */
7585 7671
7586 set_avail_alloc_bits(root->fs_info, cache->flags); 7672 set_avail_alloc_bits(root->fs_info, cache->flags);
7587 if (btrfs_chunk_readonly(root, cache->key.objectid)) 7673 if (btrfs_chunk_readonly(root, cache->key.objectid))
@@ -7663,7 +7749,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
7663 7749
7664 ret = update_space_info(root->fs_info, cache->flags, size, bytes_used, 7750 ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
7665 &cache->space_info); 7751 &cache->space_info);
7666 BUG_ON(ret); 7752 BUG_ON(ret); /* -ENOMEM */
7667 update_global_block_rsv(root->fs_info); 7753 update_global_block_rsv(root->fs_info);
7668 7754
7669 spin_lock(&cache->space_info->lock); 7755 spin_lock(&cache->space_info->lock);
@@ -7673,11 +7759,14 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
7673 __link_block_group(cache->space_info, cache); 7759 __link_block_group(cache->space_info, cache);
7674 7760
7675 ret = btrfs_add_block_group_cache(root->fs_info, cache); 7761 ret = btrfs_add_block_group_cache(root->fs_info, cache);
7676 BUG_ON(ret); 7762 BUG_ON(ret); /* Logic error */
7677 7763
7678 ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item, 7764 ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item,
7679 sizeof(cache->item)); 7765 sizeof(cache->item));
7680 BUG_ON(ret); 7766 if (ret) {
7767 btrfs_abort_transaction(trans, extent_root, ret);
7768 return ret;
7769 }
7681 7770
7682 set_avail_alloc_bits(extent_root->fs_info, type); 7771 set_avail_alloc_bits(extent_root->fs_info, type);
7683 7772
@@ -7686,11 +7775,8 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
7686 7775
7687static void clear_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) 7776static void clear_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
7688{ 7777{
7689 u64 extra_flags = flags & BTRFS_BLOCK_GROUP_PROFILE_MASK; 7778 u64 extra_flags = chunk_to_extended(flags) &
7690 7779 BTRFS_EXTENDED_PROFILE_MASK;
7691 /* chunk -> extended profile */
7692 if (extra_flags == 0)
7693 extra_flags = BTRFS_AVAIL_ALLOC_BIT_SINGLE;
7694 7780
7695 if (flags & BTRFS_BLOCK_GROUP_DATA) 7781 if (flags & BTRFS_BLOCK_GROUP_DATA)
7696 fs_info->avail_data_alloc_bits &= ~extra_flags; 7782 fs_info->avail_data_alloc_bits &= ~extra_flags;
@@ -7758,7 +7844,10 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
7758 inode = lookup_free_space_inode(tree_root, block_group, path); 7844 inode = lookup_free_space_inode(tree_root, block_group, path);
7759 if (!IS_ERR(inode)) { 7845 if (!IS_ERR(inode)) {
7760 ret = btrfs_orphan_add(trans, inode); 7846 ret = btrfs_orphan_add(trans, inode);
7761 BUG_ON(ret); 7847 if (ret) {
7848 btrfs_add_delayed_iput(inode);
7849 goto out;
7850 }
7762 clear_nlink(inode); 7851 clear_nlink(inode);
7763 /* One for the block groups ref */ 7852 /* One for the block groups ref */
7764 spin_lock(&block_group->lock); 7853 spin_lock(&block_group->lock);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 2862454bcdb3..8d904dd7ea9f 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -19,6 +19,7 @@
19#include "btrfs_inode.h" 19#include "btrfs_inode.h"
20#include "volumes.h" 20#include "volumes.h"
21#include "check-integrity.h" 21#include "check-integrity.h"
22#include "locking.h"
22 23
23static struct kmem_cache *extent_state_cache; 24static struct kmem_cache *extent_state_cache;
24static struct kmem_cache *extent_buffer_cache; 25static struct kmem_cache *extent_buffer_cache;
@@ -53,6 +54,13 @@ struct extent_page_data {
53 unsigned int sync_io:1; 54 unsigned int sync_io:1;
54}; 55};
55 56
57static noinline void flush_write_bio(void *data);
58static inline struct btrfs_fs_info *
59tree_fs_info(struct extent_io_tree *tree)
60{
61 return btrfs_sb(tree->mapping->host->i_sb);
62}
63
56int __init extent_io_init(void) 64int __init extent_io_init(void)
57{ 65{
58 extent_state_cache = kmem_cache_create("extent_state", 66 extent_state_cache = kmem_cache_create("extent_state",
@@ -136,6 +144,7 @@ static struct extent_state *alloc_extent_state(gfp_t mask)
136#endif 144#endif
137 atomic_set(&state->refs, 1); 145 atomic_set(&state->refs, 1);
138 init_waitqueue_head(&state->wq); 146 init_waitqueue_head(&state->wq);
147 trace_alloc_extent_state(state, mask, _RET_IP_);
139 return state; 148 return state;
140} 149}
141 150
@@ -153,6 +162,7 @@ void free_extent_state(struct extent_state *state)
153 list_del(&state->leak_list); 162 list_del(&state->leak_list);
154 spin_unlock_irqrestore(&leak_lock, flags); 163 spin_unlock_irqrestore(&leak_lock, flags);
155#endif 164#endif
165 trace_free_extent_state(state, _RET_IP_);
156 kmem_cache_free(extent_state_cache, state); 166 kmem_cache_free(extent_state_cache, state);
157 } 167 }
158} 168}
@@ -439,6 +449,13 @@ alloc_extent_state_atomic(struct extent_state *prealloc)
439 return prealloc; 449 return prealloc;
440} 450}
441 451
452void extent_io_tree_panic(struct extent_io_tree *tree, int err)
453{
454 btrfs_panic(tree_fs_info(tree), err, "Locking error: "
455 "Extent tree was modified by another "
456 "thread while locked.");
457}
458
442/* 459/*
443 * clear some bits on a range in the tree. This may require splitting 460 * clear some bits on a range in the tree. This may require splitting
444 * or inserting elements in the tree, so the gfp mask is used to 461 * or inserting elements in the tree, so the gfp mask is used to
@@ -449,8 +466,7 @@ alloc_extent_state_atomic(struct extent_state *prealloc)
449 * 466 *
450 * the range [start, end] is inclusive. 467 * the range [start, end] is inclusive.
451 * 468 *
452 * This takes the tree lock, and returns < 0 on error, > 0 if any of the 469 * This takes the tree lock, and returns 0 on success and < 0 on error.
453 * bits were already set, or zero if none of the bits were already set.
454 */ 470 */
455int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 471int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
456 int bits, int wake, int delete, 472 int bits, int wake, int delete,
@@ -464,7 +480,6 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
464 struct rb_node *node; 480 struct rb_node *node;
465 u64 last_end; 481 u64 last_end;
466 int err; 482 int err;
467 int set = 0;
468 int clear = 0; 483 int clear = 0;
469 484
470 if (delete) 485 if (delete)
@@ -542,12 +557,14 @@ hit_next:
542 prealloc = alloc_extent_state_atomic(prealloc); 557 prealloc = alloc_extent_state_atomic(prealloc);
543 BUG_ON(!prealloc); 558 BUG_ON(!prealloc);
544 err = split_state(tree, state, prealloc, start); 559 err = split_state(tree, state, prealloc, start);
545 BUG_ON(err == -EEXIST); 560 if (err)
561 extent_io_tree_panic(tree, err);
562
546 prealloc = NULL; 563 prealloc = NULL;
547 if (err) 564 if (err)
548 goto out; 565 goto out;
549 if (state->end <= end) { 566 if (state->end <= end) {
550 set |= clear_state_bit(tree, state, &bits, wake); 567 clear_state_bit(tree, state, &bits, wake);
551 if (last_end == (u64)-1) 568 if (last_end == (u64)-1)
552 goto out; 569 goto out;
553 start = last_end + 1; 570 start = last_end + 1;
@@ -564,17 +581,19 @@ hit_next:
564 prealloc = alloc_extent_state_atomic(prealloc); 581 prealloc = alloc_extent_state_atomic(prealloc);
565 BUG_ON(!prealloc); 582 BUG_ON(!prealloc);
566 err = split_state(tree, state, prealloc, end + 1); 583 err = split_state(tree, state, prealloc, end + 1);
567 BUG_ON(err == -EEXIST); 584 if (err)
585 extent_io_tree_panic(tree, err);
586
568 if (wake) 587 if (wake)
569 wake_up(&state->wq); 588 wake_up(&state->wq);
570 589
571 set |= clear_state_bit(tree, prealloc, &bits, wake); 590 clear_state_bit(tree, prealloc, &bits, wake);
572 591
573 prealloc = NULL; 592 prealloc = NULL;
574 goto out; 593 goto out;
575 } 594 }
576 595
577 set |= clear_state_bit(tree, state, &bits, wake); 596 clear_state_bit(tree, state, &bits, wake);
578next: 597next:
579 if (last_end == (u64)-1) 598 if (last_end == (u64)-1)
580 goto out; 599 goto out;
@@ -591,7 +610,7 @@ out:
591 if (prealloc) 610 if (prealloc)
592 free_extent_state(prealloc); 611 free_extent_state(prealloc);
593 612
594 return set; 613 return 0;
595 614
596search_again: 615search_again:
597 if (start > end) 616 if (start > end)
@@ -602,8 +621,8 @@ search_again:
602 goto again; 621 goto again;
603} 622}
604 623
605static int wait_on_state(struct extent_io_tree *tree, 624static void wait_on_state(struct extent_io_tree *tree,
606 struct extent_state *state) 625 struct extent_state *state)
607 __releases(tree->lock) 626 __releases(tree->lock)
608 __acquires(tree->lock) 627 __acquires(tree->lock)
609{ 628{
@@ -613,7 +632,6 @@ static int wait_on_state(struct extent_io_tree *tree,
613 schedule(); 632 schedule();
614 spin_lock(&tree->lock); 633 spin_lock(&tree->lock);
615 finish_wait(&state->wq, &wait); 634 finish_wait(&state->wq, &wait);
616 return 0;
617} 635}
618 636
619/* 637/*
@@ -621,7 +639,7 @@ static int wait_on_state(struct extent_io_tree *tree,
621 * The range [start, end] is inclusive. 639 * The range [start, end] is inclusive.
622 * The tree lock is taken by this function 640 * The tree lock is taken by this function
623 */ 641 */
624int wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits) 642void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits)
625{ 643{
626 struct extent_state *state; 644 struct extent_state *state;
627 struct rb_node *node; 645 struct rb_node *node;
@@ -658,7 +676,6 @@ again:
658 } 676 }
659out: 677out:
660 spin_unlock(&tree->lock); 678 spin_unlock(&tree->lock);
661 return 0;
662} 679}
663 680
664static void set_state_bits(struct extent_io_tree *tree, 681static void set_state_bits(struct extent_io_tree *tree,
@@ -706,9 +723,10 @@ static void uncache_state(struct extent_state **cached_ptr)
706 * [start, end] is inclusive This takes the tree lock. 723 * [start, end] is inclusive This takes the tree lock.
707 */ 724 */
708 725
709int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 726static int __must_check
710 int bits, int exclusive_bits, u64 *failed_start, 727__set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
711 struct extent_state **cached_state, gfp_t mask) 728 int bits, int exclusive_bits, u64 *failed_start,
729 struct extent_state **cached_state, gfp_t mask)
712{ 730{
713 struct extent_state *state; 731 struct extent_state *state;
714 struct extent_state *prealloc = NULL; 732 struct extent_state *prealloc = NULL;
@@ -742,8 +760,10 @@ again:
742 prealloc = alloc_extent_state_atomic(prealloc); 760 prealloc = alloc_extent_state_atomic(prealloc);
743 BUG_ON(!prealloc); 761 BUG_ON(!prealloc);
744 err = insert_state(tree, prealloc, start, end, &bits); 762 err = insert_state(tree, prealloc, start, end, &bits);
763 if (err)
764 extent_io_tree_panic(tree, err);
765
745 prealloc = NULL; 766 prealloc = NULL;
746 BUG_ON(err == -EEXIST);
747 goto out; 767 goto out;
748 } 768 }
749 state = rb_entry(node, struct extent_state, rb_node); 769 state = rb_entry(node, struct extent_state, rb_node);
@@ -809,7 +829,9 @@ hit_next:
809 prealloc = alloc_extent_state_atomic(prealloc); 829 prealloc = alloc_extent_state_atomic(prealloc);
810 BUG_ON(!prealloc); 830 BUG_ON(!prealloc);
811 err = split_state(tree, state, prealloc, start); 831 err = split_state(tree, state, prealloc, start);
812 BUG_ON(err == -EEXIST); 832 if (err)
833 extent_io_tree_panic(tree, err);
834
813 prealloc = NULL; 835 prealloc = NULL;
814 if (err) 836 if (err)
815 goto out; 837 goto out;
@@ -846,12 +868,9 @@ hit_next:
846 */ 868 */
847 err = insert_state(tree, prealloc, start, this_end, 869 err = insert_state(tree, prealloc, start, this_end,
848 &bits); 870 &bits);
849 BUG_ON(err == -EEXIST); 871 if (err)
850 if (err) { 872 extent_io_tree_panic(tree, err);
851 free_extent_state(prealloc); 873
852 prealloc = NULL;
853 goto out;
854 }
855 cache_state(prealloc, cached_state); 874 cache_state(prealloc, cached_state);
856 prealloc = NULL; 875 prealloc = NULL;
857 start = this_end + 1; 876 start = this_end + 1;
@@ -873,7 +892,8 @@ hit_next:
873 prealloc = alloc_extent_state_atomic(prealloc); 892 prealloc = alloc_extent_state_atomic(prealloc);
874 BUG_ON(!prealloc); 893 BUG_ON(!prealloc);
875 err = split_state(tree, state, prealloc, end + 1); 894 err = split_state(tree, state, prealloc, end + 1);
876 BUG_ON(err == -EEXIST); 895 if (err)
896 extent_io_tree_panic(tree, err);
877 897
878 set_state_bits(tree, prealloc, &bits); 898 set_state_bits(tree, prealloc, &bits);
879 cache_state(prealloc, cached_state); 899 cache_state(prealloc, cached_state);
@@ -900,6 +920,15 @@ search_again:
900 goto again; 920 goto again;
901} 921}
902 922
923int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits,
924 u64 *failed_start, struct extent_state **cached_state,
925 gfp_t mask)
926{
927 return __set_extent_bit(tree, start, end, bits, 0, failed_start,
928 cached_state, mask);
929}
930
931
903/** 932/**
904 * convert_extent - convert all bits in a given range from one bit to another 933 * convert_extent - convert all bits in a given range from one bit to another
905 * @tree: the io tree to search 934 * @tree: the io tree to search
@@ -946,7 +975,8 @@ again:
946 } 975 }
947 err = insert_state(tree, prealloc, start, end, &bits); 976 err = insert_state(tree, prealloc, start, end, &bits);
948 prealloc = NULL; 977 prealloc = NULL;
949 BUG_ON(err == -EEXIST); 978 if (err)
979 extent_io_tree_panic(tree, err);
950 goto out; 980 goto out;
951 } 981 }
952 state = rb_entry(node, struct extent_state, rb_node); 982 state = rb_entry(node, struct extent_state, rb_node);
@@ -1002,7 +1032,8 @@ hit_next:
1002 goto out; 1032 goto out;
1003 } 1033 }
1004 err = split_state(tree, state, prealloc, start); 1034 err = split_state(tree, state, prealloc, start);
1005 BUG_ON(err == -EEXIST); 1035 if (err)
1036 extent_io_tree_panic(tree, err);
1006 prealloc = NULL; 1037 prealloc = NULL;
1007 if (err) 1038 if (err)
1008 goto out; 1039 goto out;
@@ -1041,12 +1072,8 @@ hit_next:
1041 */ 1072 */
1042 err = insert_state(tree, prealloc, start, this_end, 1073 err = insert_state(tree, prealloc, start, this_end,
1043 &bits); 1074 &bits);
1044 BUG_ON(err == -EEXIST); 1075 if (err)
1045 if (err) { 1076 extent_io_tree_panic(tree, err);
1046 free_extent_state(prealloc);
1047 prealloc = NULL;
1048 goto out;
1049 }
1050 prealloc = NULL; 1077 prealloc = NULL;
1051 start = this_end + 1; 1078 start = this_end + 1;
1052 goto search_again; 1079 goto search_again;
@@ -1065,7 +1092,8 @@ hit_next:
1065 } 1092 }
1066 1093
1067 err = split_state(tree, state, prealloc, end + 1); 1094 err = split_state(tree, state, prealloc, end + 1);
1068 BUG_ON(err == -EEXIST); 1095 if (err)
1096 extent_io_tree_panic(tree, err);
1069 1097
1070 set_state_bits(tree, prealloc, &bits); 1098 set_state_bits(tree, prealloc, &bits);
1071 clear_state_bit(tree, prealloc, &clear_bits, 0); 1099 clear_state_bit(tree, prealloc, &clear_bits, 0);
@@ -1095,14 +1123,14 @@ search_again:
1095int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, 1123int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
1096 gfp_t mask) 1124 gfp_t mask)
1097{ 1125{
1098 return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL, 1126 return set_extent_bit(tree, start, end, EXTENT_DIRTY, NULL,
1099 NULL, mask); 1127 NULL, mask);
1100} 1128}
1101 1129
1102int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 1130int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
1103 int bits, gfp_t mask) 1131 int bits, gfp_t mask)
1104{ 1132{
1105 return set_extent_bit(tree, start, end, bits, 0, NULL, 1133 return set_extent_bit(tree, start, end, bits, NULL,
1106 NULL, mask); 1134 NULL, mask);
1107} 1135}
1108 1136
@@ -1117,7 +1145,7 @@ int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
1117{ 1145{
1118 return set_extent_bit(tree, start, end, 1146 return set_extent_bit(tree, start, end,
1119 EXTENT_DELALLOC | EXTENT_UPTODATE, 1147 EXTENT_DELALLOC | EXTENT_UPTODATE,
1120 0, NULL, cached_state, mask); 1148 NULL, cached_state, mask);
1121} 1149}
1122 1150
1123int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, 1151int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
@@ -1131,7 +1159,7 @@ int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
1131int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, 1159int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
1132 gfp_t mask) 1160 gfp_t mask)
1133{ 1161{
1134 return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL, 1162 return set_extent_bit(tree, start, end, EXTENT_NEW, NULL,
1135 NULL, mask); 1163 NULL, mask);
1136} 1164}
1137 1165
@@ -1139,7 +1167,7 @@ int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
1139 struct extent_state **cached_state, gfp_t mask) 1167 struct extent_state **cached_state, gfp_t mask)
1140{ 1168{
1141 return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 1169 return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0,
1142 NULL, cached_state, mask); 1170 cached_state, mask);
1143} 1171}
1144 1172
1145static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, 1173static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start,
@@ -1155,42 +1183,40 @@ static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start,
1155 * us if waiting is desired. 1183 * us if waiting is desired.
1156 */ 1184 */
1157int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 1185int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
1158 int bits, struct extent_state **cached_state, gfp_t mask) 1186 int bits, struct extent_state **cached_state)
1159{ 1187{
1160 int err; 1188 int err;
1161 u64 failed_start; 1189 u64 failed_start;
1162 while (1) { 1190 while (1) {
1163 err = set_extent_bit(tree, start, end, EXTENT_LOCKED | bits, 1191 err = __set_extent_bit(tree, start, end, EXTENT_LOCKED | bits,
1164 EXTENT_LOCKED, &failed_start, 1192 EXTENT_LOCKED, &failed_start,
1165 cached_state, mask); 1193 cached_state, GFP_NOFS);
1166 if (err == -EEXIST && (mask & __GFP_WAIT)) { 1194 if (err == -EEXIST) {
1167 wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED); 1195 wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED);
1168 start = failed_start; 1196 start = failed_start;
1169 } else { 1197 } else
1170 break; 1198 break;
1171 }
1172 WARN_ON(start > end); 1199 WARN_ON(start > end);
1173 } 1200 }
1174 return err; 1201 return err;
1175} 1202}
1176 1203
1177int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) 1204int lock_extent(struct extent_io_tree *tree, u64 start, u64 end)
1178{ 1205{
1179 return lock_extent_bits(tree, start, end, 0, NULL, mask); 1206 return lock_extent_bits(tree, start, end, 0, NULL);
1180} 1207}
1181 1208
1182int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end, 1209int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end)
1183 gfp_t mask)
1184{ 1210{
1185 int err; 1211 int err;
1186 u64 failed_start; 1212 u64 failed_start;
1187 1213
1188 err = set_extent_bit(tree, start, end, EXTENT_LOCKED, EXTENT_LOCKED, 1214 err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, EXTENT_LOCKED,
1189 &failed_start, NULL, mask); 1215 &failed_start, NULL, GFP_NOFS);
1190 if (err == -EEXIST) { 1216 if (err == -EEXIST) {
1191 if (failed_start > start) 1217 if (failed_start > start)
1192 clear_extent_bit(tree, start, failed_start - 1, 1218 clear_extent_bit(tree, start, failed_start - 1,
1193 EXTENT_LOCKED, 1, 0, NULL, mask); 1219 EXTENT_LOCKED, 1, 0, NULL, GFP_NOFS);
1194 return 0; 1220 return 0;
1195 } 1221 }
1196 return 1; 1222 return 1;
@@ -1203,10 +1229,10 @@ int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end,
1203 mask); 1229 mask);
1204} 1230}
1205 1231
1206int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) 1232int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end)
1207{ 1233{
1208 return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL, 1234 return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL,
1209 mask); 1235 GFP_NOFS);
1210} 1236}
1211 1237
1212/* 1238/*
@@ -1220,7 +1246,7 @@ static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
1220 1246
1221 while (index <= end_index) { 1247 while (index <= end_index) {
1222 page = find_get_page(tree->mapping, index); 1248 page = find_get_page(tree->mapping, index);
1223 BUG_ON(!page); 1249 BUG_ON(!page); /* Pages should be in the extent_io_tree */
1224 set_page_writeback(page); 1250 set_page_writeback(page);
1225 page_cache_release(page); 1251 page_cache_release(page);
1226 index++; 1252 index++;
@@ -1343,9 +1369,9 @@ out:
1343 return found; 1369 return found;
1344} 1370}
1345 1371
1346static noinline int __unlock_for_delalloc(struct inode *inode, 1372static noinline void __unlock_for_delalloc(struct inode *inode,
1347 struct page *locked_page, 1373 struct page *locked_page,
1348 u64 start, u64 end) 1374 u64 start, u64 end)
1349{ 1375{
1350 int ret; 1376 int ret;
1351 struct page *pages[16]; 1377 struct page *pages[16];
@@ -1355,7 +1381,7 @@ static noinline int __unlock_for_delalloc(struct inode *inode,
1355 int i; 1381 int i;
1356 1382
1357 if (index == locked_page->index && end_index == index) 1383 if (index == locked_page->index && end_index == index)
1358 return 0; 1384 return;
1359 1385
1360 while (nr_pages > 0) { 1386 while (nr_pages > 0) {
1361 ret = find_get_pages_contig(inode->i_mapping, index, 1387 ret = find_get_pages_contig(inode->i_mapping, index,
@@ -1370,7 +1396,6 @@ static noinline int __unlock_for_delalloc(struct inode *inode,
1370 index += ret; 1396 index += ret;
1371 cond_resched(); 1397 cond_resched();
1372 } 1398 }
1373 return 0;
1374} 1399}
1375 1400
1376static noinline int lock_delalloc_pages(struct inode *inode, 1401static noinline int lock_delalloc_pages(struct inode *inode,
@@ -1500,11 +1525,10 @@ again:
1500 goto out_failed; 1525 goto out_failed;
1501 } 1526 }
1502 } 1527 }
1503 BUG_ON(ret); 1528 BUG_ON(ret); /* Only valid values are 0 and -EAGAIN */
1504 1529
1505 /* step three, lock the state bits for the whole range */ 1530 /* step three, lock the state bits for the whole range */
1506 lock_extent_bits(tree, delalloc_start, delalloc_end, 1531 lock_extent_bits(tree, delalloc_start, delalloc_end, 0, &cached_state);
1507 0, &cached_state, GFP_NOFS);
1508 1532
1509 /* then test to make sure it is all still delalloc */ 1533 /* then test to make sure it is all still delalloc */
1510 ret = test_range_bit(tree, delalloc_start, delalloc_end, 1534 ret = test_range_bit(tree, delalloc_start, delalloc_end,
@@ -1761,39 +1785,34 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
1761 * helper function to set a given page up to date if all the 1785 * helper function to set a given page up to date if all the
1762 * extents in the tree for that page are up to date 1786 * extents in the tree for that page are up to date
1763 */ 1787 */
1764static int check_page_uptodate(struct extent_io_tree *tree, 1788static void check_page_uptodate(struct extent_io_tree *tree, struct page *page)
1765 struct page *page)
1766{ 1789{
1767 u64 start = (u64)page->index << PAGE_CACHE_SHIFT; 1790 u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
1768 u64 end = start + PAGE_CACHE_SIZE - 1; 1791 u64 end = start + PAGE_CACHE_SIZE - 1;
1769 if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL)) 1792 if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL))
1770 SetPageUptodate(page); 1793 SetPageUptodate(page);
1771 return 0;
1772} 1794}
1773 1795
1774/* 1796/*
1775 * helper function to unlock a page if all the extents in the tree 1797 * helper function to unlock a page if all the extents in the tree
1776 * for that page are unlocked 1798 * for that page are unlocked
1777 */ 1799 */
1778static int check_page_locked(struct extent_io_tree *tree, 1800static void check_page_locked(struct extent_io_tree *tree, struct page *page)
1779 struct page *page)
1780{ 1801{
1781 u64 start = (u64)page->index << PAGE_CACHE_SHIFT; 1802 u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
1782 u64 end = start + PAGE_CACHE_SIZE - 1; 1803 u64 end = start + PAGE_CACHE_SIZE - 1;
1783 if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0, NULL)) 1804 if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0, NULL))
1784 unlock_page(page); 1805 unlock_page(page);
1785 return 0;
1786} 1806}
1787 1807
1788/* 1808/*
1789 * helper function to end page writeback if all the extents 1809 * helper function to end page writeback if all the extents
1790 * in the tree for that page are done with writeback 1810 * in the tree for that page are done with writeback
1791 */ 1811 */
1792static int check_page_writeback(struct extent_io_tree *tree, 1812static void check_page_writeback(struct extent_io_tree *tree,
1793 struct page *page) 1813 struct page *page)
1794{ 1814{
1795 end_page_writeback(page); 1815 end_page_writeback(page);
1796 return 0;
1797} 1816}
1798 1817
1799/* 1818/*
@@ -1912,6 +1931,26 @@ int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start,
1912 return 0; 1931 return 0;
1913} 1932}
1914 1933
1934int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb,
1935 int mirror_num)
1936{
1937 struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
1938 u64 start = eb->start;
1939 unsigned long i, num_pages = num_extent_pages(eb->start, eb->len);
1940 int ret;
1941
1942 for (i = 0; i < num_pages; i++) {
1943 struct page *p = extent_buffer_page(eb, i);
1944 ret = repair_io_failure(map_tree, start, PAGE_CACHE_SIZE,
1945 start, p, mirror_num);
1946 if (ret)
1947 break;
1948 start += PAGE_CACHE_SIZE;
1949 }
1950
1951 return ret;
1952}
1953
1915/* 1954/*
1916 * each time an IO finishes, we do a fast check in the IO failure tree 1955 * each time an IO finishes, we do a fast check in the IO failure tree
1917 * to see if we need to process or clean up an io_failure_record 1956 * to see if we need to process or clean up an io_failure_record
@@ -2258,6 +2297,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
2258 u64 start; 2297 u64 start;
2259 u64 end; 2298 u64 end;
2260 int whole_page; 2299 int whole_page;
2300 int failed_mirror;
2261 int ret; 2301 int ret;
2262 2302
2263 if (err) 2303 if (err)
@@ -2304,9 +2344,16 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
2304 else 2344 else
2305 clean_io_failure(start, page); 2345 clean_io_failure(start, page);
2306 } 2346 }
2307 if (!uptodate) { 2347
2308 int failed_mirror; 2348 if (!uptodate)
2309 failed_mirror = (int)(unsigned long)bio->bi_bdev; 2349 failed_mirror = (int)(unsigned long)bio->bi_bdev;
2350
2351 if (!uptodate && tree->ops && tree->ops->readpage_io_failed_hook) {
2352 ret = tree->ops->readpage_io_failed_hook(page, failed_mirror);
2353 if (!ret && !err &&
2354 test_bit(BIO_UPTODATE, &bio->bi_flags))
2355 uptodate = 1;
2356 } else if (!uptodate) {
2310 /* 2357 /*
2311 * The generic bio_readpage_error handles errors the 2358 * The generic bio_readpage_error handles errors the
2312 * following way: If possible, new read requests are 2359 * following way: If possible, new read requests are
@@ -2320,7 +2367,6 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
2320 ret = bio_readpage_error(bio, page, start, end, 2367 ret = bio_readpage_error(bio, page, start, end,
2321 failed_mirror, NULL); 2368 failed_mirror, NULL);
2322 if (ret == 0) { 2369 if (ret == 0) {
2323error_handled:
2324 uptodate = 2370 uptodate =
2325 test_bit(BIO_UPTODATE, &bio->bi_flags); 2371 test_bit(BIO_UPTODATE, &bio->bi_flags);
2326 if (err) 2372 if (err)
@@ -2328,16 +2374,9 @@ error_handled:
2328 uncache_state(&cached); 2374 uncache_state(&cached);
2329 continue; 2375 continue;
2330 } 2376 }
2331 if (tree->ops && tree->ops->readpage_io_failed_hook) {
2332 ret = tree->ops->readpage_io_failed_hook(
2333 bio, page, start, end,
2334 failed_mirror, state);
2335 if (ret == 0)
2336 goto error_handled;
2337 }
2338 } 2377 }
2339 2378
2340 if (uptodate) { 2379 if (uptodate && tree->track_uptodate) {
2341 set_extent_uptodate(tree, start, end, &cached, 2380 set_extent_uptodate(tree, start, end, &cached,
2342 GFP_ATOMIC); 2381 GFP_ATOMIC);
2343 } 2382 }
@@ -2386,8 +2425,12 @@ btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
2386 return bio; 2425 return bio;
2387} 2426}
2388 2427
2389static int submit_one_bio(int rw, struct bio *bio, int mirror_num, 2428/*
2390 unsigned long bio_flags) 2429 * Since writes are async, they will only return -ENOMEM.
2430 * Reads can return the full range of I/O error conditions.
2431 */
2432static int __must_check submit_one_bio(int rw, struct bio *bio,
2433 int mirror_num, unsigned long bio_flags)
2391{ 2434{
2392 int ret = 0; 2435 int ret = 0;
2393 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 2436 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
@@ -2413,6 +2456,19 @@ static int submit_one_bio(int rw, struct bio *bio, int mirror_num,
2413 return ret; 2456 return ret;
2414} 2457}
2415 2458
2459static int merge_bio(struct extent_io_tree *tree, struct page *page,
2460 unsigned long offset, size_t size, struct bio *bio,
2461 unsigned long bio_flags)
2462{
2463 int ret = 0;
2464 if (tree->ops && tree->ops->merge_bio_hook)
2465 ret = tree->ops->merge_bio_hook(page, offset, size, bio,
2466 bio_flags);
2467 BUG_ON(ret < 0);
2468 return ret;
2469
2470}
2471
2416static int submit_extent_page(int rw, struct extent_io_tree *tree, 2472static int submit_extent_page(int rw, struct extent_io_tree *tree,
2417 struct page *page, sector_t sector, 2473 struct page *page, sector_t sector,
2418 size_t size, unsigned long offset, 2474 size_t size, unsigned long offset,
@@ -2441,12 +2497,12 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
2441 sector; 2497 sector;
2442 2498
2443 if (prev_bio_flags != bio_flags || !contig || 2499 if (prev_bio_flags != bio_flags || !contig ||
2444 (tree->ops && tree->ops->merge_bio_hook && 2500 merge_bio(tree, page, offset, page_size, bio, bio_flags) ||
2445 tree->ops->merge_bio_hook(page, offset, page_size, bio,
2446 bio_flags)) ||
2447 bio_add_page(bio, page, page_size, offset) < page_size) { 2501 bio_add_page(bio, page, page_size, offset) < page_size) {
2448 ret = submit_one_bio(rw, bio, mirror_num, 2502 ret = submit_one_bio(rw, bio, mirror_num,
2449 prev_bio_flags); 2503 prev_bio_flags);
2504 if (ret < 0)
2505 return ret;
2450 bio = NULL; 2506 bio = NULL;
2451 } else { 2507 } else {
2452 return 0; 2508 return 0;
@@ -2473,25 +2529,31 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
2473 return ret; 2529 return ret;
2474} 2530}
2475 2531
2476void set_page_extent_mapped(struct page *page) 2532void attach_extent_buffer_page(struct extent_buffer *eb, struct page *page)
2477{ 2533{
2478 if (!PagePrivate(page)) { 2534 if (!PagePrivate(page)) {
2479 SetPagePrivate(page); 2535 SetPagePrivate(page);
2480 page_cache_get(page); 2536 page_cache_get(page);
2481 set_page_private(page, EXTENT_PAGE_PRIVATE); 2537 set_page_private(page, (unsigned long)eb);
2538 } else {
2539 WARN_ON(page->private != (unsigned long)eb);
2482 } 2540 }
2483} 2541}
2484 2542
2485static void set_page_extent_head(struct page *page, unsigned long len) 2543void set_page_extent_mapped(struct page *page)
2486{ 2544{
2487 WARN_ON(!PagePrivate(page)); 2545 if (!PagePrivate(page)) {
2488 set_page_private(page, EXTENT_PAGE_PRIVATE_FIRST_PAGE | len << 2); 2546 SetPagePrivate(page);
2547 page_cache_get(page);
2548 set_page_private(page, EXTENT_PAGE_PRIVATE);
2549 }
2489} 2550}
2490 2551
2491/* 2552/*
2492 * basic readpage implementation. Locked extent state structs are inserted 2553 * basic readpage implementation. Locked extent state structs are inserted
2493 * into the tree that are removed when the IO is done (by the end_io 2554 * into the tree that are removed when the IO is done (by the end_io
2494 * handlers) 2555 * handlers)
2556 * XXX JDM: This needs looking at to ensure proper page locking
2495 */ 2557 */
2496static int __extent_read_full_page(struct extent_io_tree *tree, 2558static int __extent_read_full_page(struct extent_io_tree *tree,
2497 struct page *page, 2559 struct page *page,
@@ -2531,11 +2593,11 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2531 2593
2532 end = page_end; 2594 end = page_end;
2533 while (1) { 2595 while (1) {
2534 lock_extent(tree, start, end, GFP_NOFS); 2596 lock_extent(tree, start, end);
2535 ordered = btrfs_lookup_ordered_extent(inode, start); 2597 ordered = btrfs_lookup_ordered_extent(inode, start);
2536 if (!ordered) 2598 if (!ordered)
2537 break; 2599 break;
2538 unlock_extent(tree, start, end, GFP_NOFS); 2600 unlock_extent(tree, start, end);
2539 btrfs_start_ordered_extent(inode, ordered, 1); 2601 btrfs_start_ordered_extent(inode, ordered, 1);
2540 btrfs_put_ordered_extent(ordered); 2602 btrfs_put_ordered_extent(ordered);
2541 } 2603 }
@@ -2572,7 +2634,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2572 end - cur + 1, 0); 2634 end - cur + 1, 0);
2573 if (IS_ERR_OR_NULL(em)) { 2635 if (IS_ERR_OR_NULL(em)) {
2574 SetPageError(page); 2636 SetPageError(page);
2575 unlock_extent(tree, cur, end, GFP_NOFS); 2637 unlock_extent(tree, cur, end);
2576 break; 2638 break;
2577 } 2639 }
2578 extent_offset = cur - em->start; 2640 extent_offset = cur - em->start;
@@ -2624,7 +2686,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2624 if (test_range_bit(tree, cur, cur_end, 2686 if (test_range_bit(tree, cur, cur_end,
2625 EXTENT_UPTODATE, 1, NULL)) { 2687 EXTENT_UPTODATE, 1, NULL)) {
2626 check_page_uptodate(tree, page); 2688 check_page_uptodate(tree, page);
2627 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); 2689 unlock_extent(tree, cur, cur + iosize - 1);
2628 cur = cur + iosize; 2690 cur = cur + iosize;
2629 pg_offset += iosize; 2691 pg_offset += iosize;
2630 continue; 2692 continue;
@@ -2634,7 +2696,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2634 */ 2696 */
2635 if (block_start == EXTENT_MAP_INLINE) { 2697 if (block_start == EXTENT_MAP_INLINE) {
2636 SetPageError(page); 2698 SetPageError(page);
2637 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); 2699 unlock_extent(tree, cur, cur + iosize - 1);
2638 cur = cur + iosize; 2700 cur = cur + iosize;
2639 pg_offset += iosize; 2701 pg_offset += iosize;
2640 continue; 2702 continue;
@@ -2654,6 +2716,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2654 end_bio_extent_readpage, mirror_num, 2716 end_bio_extent_readpage, mirror_num,
2655 *bio_flags, 2717 *bio_flags,
2656 this_bio_flag); 2718 this_bio_flag);
2719 BUG_ON(ret == -ENOMEM);
2657 nr++; 2720 nr++;
2658 *bio_flags = this_bio_flag; 2721 *bio_flags = this_bio_flag;
2659 } 2722 }
@@ -2795,7 +2858,11 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2795 delalloc_end, 2858 delalloc_end,
2796 &page_started, 2859 &page_started,
2797 &nr_written); 2860 &nr_written);
2798 BUG_ON(ret); 2861 /* File system has been set read-only */
2862 if (ret) {
2863 SetPageError(page);
2864 goto done;
2865 }
2799 /* 2866 /*
2800 * delalloc_end is already one less than the total 2867 * delalloc_end is already one less than the total
2801 * length, so we don't subtract one from 2868 * length, so we don't subtract one from
@@ -2968,6 +3035,275 @@ done_unlocked:
2968 return 0; 3035 return 0;
2969} 3036}
2970 3037
3038static int eb_wait(void *word)
3039{
3040 io_schedule();
3041 return 0;
3042}
3043
3044static void wait_on_extent_buffer_writeback(struct extent_buffer *eb)
3045{
3046 wait_on_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK, eb_wait,
3047 TASK_UNINTERRUPTIBLE);
3048}
3049
3050static int lock_extent_buffer_for_io(struct extent_buffer *eb,
3051 struct btrfs_fs_info *fs_info,
3052 struct extent_page_data *epd)
3053{
3054 unsigned long i, num_pages;
3055 int flush = 0;
3056 int ret = 0;
3057
3058 if (!btrfs_try_tree_write_lock(eb)) {
3059 flush = 1;
3060 flush_write_bio(epd);
3061 btrfs_tree_lock(eb);
3062 }
3063
3064 if (test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags)) {
3065 btrfs_tree_unlock(eb);
3066 if (!epd->sync_io)
3067 return 0;
3068 if (!flush) {
3069 flush_write_bio(epd);
3070 flush = 1;
3071 }
3072 while (1) {
3073 wait_on_extent_buffer_writeback(eb);
3074 btrfs_tree_lock(eb);
3075 if (!test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags))
3076 break;
3077 btrfs_tree_unlock(eb);
3078 }
3079 }
3080
3081 if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
3082 set_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
3083 btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN);
3084 spin_lock(&fs_info->delalloc_lock);
3085 if (fs_info->dirty_metadata_bytes >= eb->len)
3086 fs_info->dirty_metadata_bytes -= eb->len;
3087 else
3088 WARN_ON(1);
3089 spin_unlock(&fs_info->delalloc_lock);
3090 ret = 1;
3091 }
3092
3093 btrfs_tree_unlock(eb);
3094
3095 if (!ret)
3096 return ret;
3097
3098 num_pages = num_extent_pages(eb->start, eb->len);
3099 for (i = 0; i < num_pages; i++) {
3100 struct page *p = extent_buffer_page(eb, i);
3101
3102 if (!trylock_page(p)) {
3103 if (!flush) {
3104 flush_write_bio(epd);
3105 flush = 1;
3106 }
3107 lock_page(p);
3108 }
3109 }
3110
3111 return ret;
3112}
3113
3114static void end_extent_buffer_writeback(struct extent_buffer *eb)
3115{
3116 clear_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
3117 smp_mb__after_clear_bit();
3118 wake_up_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK);
3119}
3120
3121static void end_bio_extent_buffer_writepage(struct bio *bio, int err)
3122{
3123 int uptodate = err == 0;
3124 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
3125 struct extent_buffer *eb;
3126 int done;
3127
3128 do {
3129 struct page *page = bvec->bv_page;
3130
3131 bvec--;
3132 eb = (struct extent_buffer *)page->private;
3133 BUG_ON(!eb);
3134 done = atomic_dec_and_test(&eb->io_pages);
3135
3136 if (!uptodate || test_bit(EXTENT_BUFFER_IOERR, &eb->bflags)) {
3137 set_bit(EXTENT_BUFFER_IOERR, &eb->bflags);
3138 ClearPageUptodate(page);
3139 SetPageError(page);
3140 }
3141
3142 end_page_writeback(page);
3143
3144 if (!done)
3145 continue;
3146
3147 end_extent_buffer_writeback(eb);
3148 } while (bvec >= bio->bi_io_vec);
3149
3150 bio_put(bio);
3151
3152}
3153
3154static int write_one_eb(struct extent_buffer *eb,
3155 struct btrfs_fs_info *fs_info,
3156 struct writeback_control *wbc,
3157 struct extent_page_data *epd)
3158{
3159 struct block_device *bdev = fs_info->fs_devices->latest_bdev;
3160 u64 offset = eb->start;
3161 unsigned long i, num_pages;
3162 int rw = (epd->sync_io ? WRITE_SYNC : WRITE);
3163 int ret;
3164
3165 clear_bit(EXTENT_BUFFER_IOERR, &eb->bflags);
3166 num_pages = num_extent_pages(eb->start, eb->len);
3167 atomic_set(&eb->io_pages, num_pages);
3168 for (i = 0; i < num_pages; i++) {
3169 struct page *p = extent_buffer_page(eb, i);
3170
3171 clear_page_dirty_for_io(p);
3172 set_page_writeback(p);
3173 ret = submit_extent_page(rw, eb->tree, p, offset >> 9,
3174 PAGE_CACHE_SIZE, 0, bdev, &epd->bio,
3175 -1, end_bio_extent_buffer_writepage,
3176 0, 0, 0);
3177 if (ret) {
3178 set_bit(EXTENT_BUFFER_IOERR, &eb->bflags);
3179 SetPageError(p);
3180 if (atomic_sub_and_test(num_pages - i, &eb->io_pages))
3181 end_extent_buffer_writeback(eb);
3182 ret = -EIO;
3183 break;
3184 }
3185 offset += PAGE_CACHE_SIZE;
3186 update_nr_written(p, wbc, 1);
3187 unlock_page(p);
3188 }
3189
3190 if (unlikely(ret)) {
3191 for (; i < num_pages; i++) {
3192 struct page *p = extent_buffer_page(eb, i);
3193 unlock_page(p);
3194 }
3195 }
3196
3197 return ret;
3198}
3199
3200int btree_write_cache_pages(struct address_space *mapping,
3201 struct writeback_control *wbc)
3202{
3203 struct extent_io_tree *tree = &BTRFS_I(mapping->host)->io_tree;
3204 struct btrfs_fs_info *fs_info = BTRFS_I(mapping->host)->root->fs_info;
3205 struct extent_buffer *eb, *prev_eb = NULL;
3206 struct extent_page_data epd = {
3207 .bio = NULL,
3208 .tree = tree,
3209 .extent_locked = 0,
3210 .sync_io = wbc->sync_mode == WB_SYNC_ALL,
3211 };
3212 int ret = 0;
3213 int done = 0;
3214 int nr_to_write_done = 0;
3215 struct pagevec pvec;
3216 int nr_pages;
3217 pgoff_t index;
3218 pgoff_t end; /* Inclusive */
3219 int scanned = 0;
3220 int tag;
3221
3222 pagevec_init(&pvec, 0);
3223 if (wbc->range_cyclic) {
3224 index = mapping->writeback_index; /* Start from prev offset */
3225 end = -1;
3226 } else {
3227 index = wbc->range_start >> PAGE_CACHE_SHIFT;
3228 end = wbc->range_end >> PAGE_CACHE_SHIFT;
3229 scanned = 1;
3230 }
3231 if (wbc->sync_mode == WB_SYNC_ALL)
3232 tag = PAGECACHE_TAG_TOWRITE;
3233 else
3234 tag = PAGECACHE_TAG_DIRTY;
3235retry:
3236 if (wbc->sync_mode == WB_SYNC_ALL)
3237 tag_pages_for_writeback(mapping, index, end);
3238 while (!done && !nr_to_write_done && (index <= end) &&
3239 (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
3240 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
3241 unsigned i;
3242
3243 scanned = 1;
3244 for (i = 0; i < nr_pages; i++) {
3245 struct page *page = pvec.pages[i];
3246
3247 if (!PagePrivate(page))
3248 continue;
3249
3250 if (!wbc->range_cyclic && page->index > end) {
3251 done = 1;
3252 break;
3253 }
3254
3255 eb = (struct extent_buffer *)page->private;
3256 if (!eb) {
3257 WARN_ON(1);
3258 continue;
3259 }
3260
3261 if (eb == prev_eb)
3262 continue;
3263
3264 if (!atomic_inc_not_zero(&eb->refs)) {
3265 WARN_ON(1);
3266 continue;
3267 }
3268
3269 prev_eb = eb;
3270 ret = lock_extent_buffer_for_io(eb, fs_info, &epd);
3271 if (!ret) {
3272 free_extent_buffer(eb);
3273 continue;
3274 }
3275
3276 ret = write_one_eb(eb, fs_info, wbc, &epd);
3277 if (ret) {
3278 done = 1;
3279 free_extent_buffer(eb);
3280 break;
3281 }
3282 free_extent_buffer(eb);
3283
3284 /*
3285 * the filesystem may choose to bump up nr_to_write.
3286 * We have to make sure to honor the new nr_to_write
3287 * at any time
3288 */
3289 nr_to_write_done = wbc->nr_to_write <= 0;
3290 }
3291 pagevec_release(&pvec);
3292 cond_resched();
3293 }
3294 if (!scanned && !done) {
3295 /*
3296 * We hit the last page and there is more work to be done: wrap
3297 * back to the start of the file
3298 */
3299 scanned = 1;
3300 index = 0;
3301 goto retry;
3302 }
3303 flush_write_bio(&epd);
3304 return ret;
3305}
3306
2971/** 3307/**
2972 * write_cache_pages - walk the list of dirty pages of the given address space and write all of them. 3308 * write_cache_pages - walk the list of dirty pages of the given address space and write all of them.
2973 * @mapping: address space structure to write 3309 * @mapping: address space structure to write
@@ -3099,10 +3435,14 @@ retry:
3099static void flush_epd_write_bio(struct extent_page_data *epd) 3435static void flush_epd_write_bio(struct extent_page_data *epd)
3100{ 3436{
3101 if (epd->bio) { 3437 if (epd->bio) {
3438 int rw = WRITE;
3439 int ret;
3440
3102 if (epd->sync_io) 3441 if (epd->sync_io)
3103 submit_one_bio(WRITE_SYNC, epd->bio, 0, 0); 3442 rw = WRITE_SYNC;
3104 else 3443
3105 submit_one_bio(WRITE, epd->bio, 0, 0); 3444 ret = submit_one_bio(rw, epd->bio, 0, 0);
3445 BUG_ON(ret < 0); /* -ENOMEM */
3106 epd->bio = NULL; 3446 epd->bio = NULL;
3107 } 3447 }
3108} 3448}
@@ -3219,7 +3559,7 @@ int extent_readpages(struct extent_io_tree *tree,
3219 } 3559 }
3220 BUG_ON(!list_empty(pages)); 3560 BUG_ON(!list_empty(pages));
3221 if (bio) 3561 if (bio)
3222 submit_one_bio(READ, bio, 0, bio_flags); 3562 return submit_one_bio(READ, bio, 0, bio_flags);
3223 return 0; 3563 return 0;
3224} 3564}
3225 3565
@@ -3240,7 +3580,7 @@ int extent_invalidatepage(struct extent_io_tree *tree,
3240 if (start > end) 3580 if (start > end)
3241 return 0; 3581 return 0;
3242 3582
3243 lock_extent_bits(tree, start, end, 0, &cached_state, GFP_NOFS); 3583 lock_extent_bits(tree, start, end, 0, &cached_state);
3244 wait_on_page_writeback(page); 3584 wait_on_page_writeback(page);
3245 clear_extent_bit(tree, start, end, 3585 clear_extent_bit(tree, start, end,
3246 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC | 3586 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC |
@@ -3454,7 +3794,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
3454 } 3794 }
3455 3795
3456 lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0, 3796 lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0,
3457 &cached_state, GFP_NOFS); 3797 &cached_state);
3458 3798
3459 em = get_extent_skip_holes(inode, start, last_for_get_extent, 3799 em = get_extent_skip_holes(inode, start, last_for_get_extent,
3460 get_extent); 3800 get_extent);
@@ -3548,26 +3888,7 @@ out:
3548inline struct page *extent_buffer_page(struct extent_buffer *eb, 3888inline struct page *extent_buffer_page(struct extent_buffer *eb,
3549 unsigned long i) 3889 unsigned long i)
3550{ 3890{
3551 struct page *p; 3891 return eb->pages[i];
3552 struct address_space *mapping;
3553
3554 if (i == 0)
3555 return eb->first_page;
3556 i += eb->start >> PAGE_CACHE_SHIFT;
3557 mapping = eb->first_page->mapping;
3558 if (!mapping)
3559 return NULL;
3560
3561 /*
3562 * extent_buffer_page is only called after pinning the page
3563 * by increasing the reference count. So we know the page must
3564 * be in the radix tree.
3565 */
3566 rcu_read_lock();
3567 p = radix_tree_lookup(&mapping->page_tree, i);
3568 rcu_read_unlock();
3569
3570 return p;
3571} 3892}
3572 3893
3573inline unsigned long num_extent_pages(u64 start, u64 len) 3894inline unsigned long num_extent_pages(u64 start, u64 len)
@@ -3576,6 +3897,19 @@ inline unsigned long num_extent_pages(u64 start, u64 len)
3576 (start >> PAGE_CACHE_SHIFT); 3897 (start >> PAGE_CACHE_SHIFT);
3577} 3898}
3578 3899
3900static void __free_extent_buffer(struct extent_buffer *eb)
3901{
3902#if LEAK_DEBUG
3903 unsigned long flags;
3904 spin_lock_irqsave(&leak_lock, flags);
3905 list_del(&eb->leak_list);
3906 spin_unlock_irqrestore(&leak_lock, flags);
3907#endif
3908 if (eb->pages && eb->pages != eb->inline_pages)
3909 kfree(eb->pages);
3910 kmem_cache_free(extent_buffer_cache, eb);
3911}
3912
3579static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, 3913static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
3580 u64 start, 3914 u64 start,
3581 unsigned long len, 3915 unsigned long len,
@@ -3591,6 +3925,7 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
3591 return NULL; 3925 return NULL;
3592 eb->start = start; 3926 eb->start = start;
3593 eb->len = len; 3927 eb->len = len;
3928 eb->tree = tree;
3594 rwlock_init(&eb->lock); 3929 rwlock_init(&eb->lock);
3595 atomic_set(&eb->write_locks, 0); 3930 atomic_set(&eb->write_locks, 0);
3596 atomic_set(&eb->read_locks, 0); 3931 atomic_set(&eb->read_locks, 0);
@@ -3607,20 +3942,32 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
3607 list_add(&eb->leak_list, &buffers); 3942 list_add(&eb->leak_list, &buffers);
3608 spin_unlock_irqrestore(&leak_lock, flags); 3943 spin_unlock_irqrestore(&leak_lock, flags);
3609#endif 3944#endif
3945 spin_lock_init(&eb->refs_lock);
3610 atomic_set(&eb->refs, 1); 3946 atomic_set(&eb->refs, 1);
3947 atomic_set(&eb->io_pages, 0);
3948
3949 if (len > MAX_INLINE_EXTENT_BUFFER_SIZE) {
3950 struct page **pages;
3951 int num_pages = (len + PAGE_CACHE_SIZE - 1) >>
3952 PAGE_CACHE_SHIFT;
3953 pages = kzalloc(num_pages, mask);
3954 if (!pages) {
3955 __free_extent_buffer(eb);
3956 return NULL;
3957 }
3958 eb->pages = pages;
3959 } else {
3960 eb->pages = eb->inline_pages;
3961 }
3611 3962
3612 return eb; 3963 return eb;
3613} 3964}
3614 3965
3615static void __free_extent_buffer(struct extent_buffer *eb) 3966static int extent_buffer_under_io(struct extent_buffer *eb)
3616{ 3967{
3617#if LEAK_DEBUG 3968 return (atomic_read(&eb->io_pages) ||
3618 unsigned long flags; 3969 test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags) ||
3619 spin_lock_irqsave(&leak_lock, flags); 3970 test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
3620 list_del(&eb->leak_list);
3621 spin_unlock_irqrestore(&leak_lock, flags);
3622#endif
3623 kmem_cache_free(extent_buffer_cache, eb);
3624} 3971}
3625 3972
3626/* 3973/*
@@ -3632,8 +3979,7 @@ static void btrfs_release_extent_buffer_page(struct extent_buffer *eb,
3632 unsigned long index; 3979 unsigned long index;
3633 struct page *page; 3980 struct page *page;
3634 3981
3635 if (!eb->first_page) 3982 BUG_ON(extent_buffer_under_io(eb));
3636 return;
3637 3983
3638 index = num_extent_pages(eb->start, eb->len); 3984 index = num_extent_pages(eb->start, eb->len);
3639 if (start_idx >= index) 3985 if (start_idx >= index)
@@ -3642,8 +3988,34 @@ static void btrfs_release_extent_buffer_page(struct extent_buffer *eb,
3642 do { 3988 do {
3643 index--; 3989 index--;
3644 page = extent_buffer_page(eb, index); 3990 page = extent_buffer_page(eb, index);
3645 if (page) 3991 if (page) {
3992 spin_lock(&page->mapping->private_lock);
3993 /*
3994 * We do this since we'll remove the pages after we've
3995 * removed the eb from the radix tree, so we could race
3996 * and have this page now attached to the new eb. So
3997 * only clear page_private if it's still connected to
3998 * this eb.
3999 */
4000 if (PagePrivate(page) &&
4001 page->private == (unsigned long)eb) {
4002 BUG_ON(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
4003 BUG_ON(PageDirty(page));
4004 BUG_ON(PageWriteback(page));
4005 /*
4006 * We need to make sure we haven't be attached
4007 * to a new eb.
4008 */
4009 ClearPagePrivate(page);
4010 set_page_private(page, 0);
4011 /* One for the page private */
4012 page_cache_release(page);
4013 }
4014 spin_unlock(&page->mapping->private_lock);
4015
4016 /* One for when we alloced the page */
3646 page_cache_release(page); 4017 page_cache_release(page);
4018 }
3647 } while (index != start_idx); 4019 } while (index != start_idx);
3648} 4020}
3649 4021
@@ -3656,9 +4028,50 @@ static inline void btrfs_release_extent_buffer(struct extent_buffer *eb)
3656 __free_extent_buffer(eb); 4028 __free_extent_buffer(eb);
3657} 4029}
3658 4030
4031static void check_buffer_tree_ref(struct extent_buffer *eb)
4032{
4033 /* the ref bit is tricky. We have to make sure it is set
4034 * if we have the buffer dirty. Otherwise the
4035 * code to free a buffer can end up dropping a dirty
4036 * page
4037 *
4038 * Once the ref bit is set, it won't go away while the
4039 * buffer is dirty or in writeback, and it also won't
4040 * go away while we have the reference count on the
4041 * eb bumped.
4042 *
4043 * We can't just set the ref bit without bumping the
4044 * ref on the eb because free_extent_buffer might
4045 * see the ref bit and try to clear it. If this happens
4046 * free_extent_buffer might end up dropping our original
4047 * ref by mistake and freeing the page before we are able
4048 * to add one more ref.
4049 *
4050 * So bump the ref count first, then set the bit. If someone
4051 * beat us to it, drop the ref we added.
4052 */
4053 if (!test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) {
4054 atomic_inc(&eb->refs);
4055 if (test_and_set_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
4056 atomic_dec(&eb->refs);
4057 }
4058}
4059
4060static void mark_extent_buffer_accessed(struct extent_buffer *eb)
4061{
4062 unsigned long num_pages, i;
4063
4064 check_buffer_tree_ref(eb);
4065
4066 num_pages = num_extent_pages(eb->start, eb->len);
4067 for (i = 0; i < num_pages; i++) {
4068 struct page *p = extent_buffer_page(eb, i);
4069 mark_page_accessed(p);
4070 }
4071}
4072
3659struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, 4073struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
3660 u64 start, unsigned long len, 4074 u64 start, unsigned long len)
3661 struct page *page0)
3662{ 4075{
3663 unsigned long num_pages = num_extent_pages(start, len); 4076 unsigned long num_pages = num_extent_pages(start, len);
3664 unsigned long i; 4077 unsigned long i;
@@ -3674,7 +4087,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
3674 eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT); 4087 eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
3675 if (eb && atomic_inc_not_zero(&eb->refs)) { 4088 if (eb && atomic_inc_not_zero(&eb->refs)) {
3676 rcu_read_unlock(); 4089 rcu_read_unlock();
3677 mark_page_accessed(eb->first_page); 4090 mark_extent_buffer_accessed(eb);
3678 return eb; 4091 return eb;
3679 } 4092 }
3680 rcu_read_unlock(); 4093 rcu_read_unlock();
@@ -3683,32 +4096,43 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
3683 if (!eb) 4096 if (!eb)
3684 return NULL; 4097 return NULL;
3685 4098
3686 if (page0) { 4099 for (i = 0; i < num_pages; i++, index++) {
3687 eb->first_page = page0;
3688 i = 1;
3689 index++;
3690 page_cache_get(page0);
3691 mark_page_accessed(page0);
3692 set_page_extent_mapped(page0);
3693 set_page_extent_head(page0, len);
3694 uptodate = PageUptodate(page0);
3695 } else {
3696 i = 0;
3697 }
3698 for (; i < num_pages; i++, index++) {
3699 p = find_or_create_page(mapping, index, GFP_NOFS); 4100 p = find_or_create_page(mapping, index, GFP_NOFS);
3700 if (!p) { 4101 if (!p) {
3701 WARN_ON(1); 4102 WARN_ON(1);
3702 goto free_eb; 4103 goto free_eb;
3703 } 4104 }
3704 set_page_extent_mapped(p); 4105
3705 mark_page_accessed(p); 4106 spin_lock(&mapping->private_lock);
3706 if (i == 0) { 4107 if (PagePrivate(p)) {
3707 eb->first_page = p; 4108 /*
3708 set_page_extent_head(p, len); 4109 * We could have already allocated an eb for this page
3709 } else { 4110 * and attached one so lets see if we can get a ref on
3710 set_page_private(p, EXTENT_PAGE_PRIVATE); 4111 * the existing eb, and if we can we know it's good and
4112 * we can just return that one, else we know we can just
4113 * overwrite page->private.
4114 */
4115 exists = (struct extent_buffer *)p->private;
4116 if (atomic_inc_not_zero(&exists->refs)) {
4117 spin_unlock(&mapping->private_lock);
4118 unlock_page(p);
4119 mark_extent_buffer_accessed(exists);
4120 goto free_eb;
4121 }
4122
4123 /*
4124 * Do this so attach doesn't complain and we need to
4125 * drop the ref the old guy had.
4126 */
4127 ClearPagePrivate(p);
4128 WARN_ON(PageDirty(p));
4129 page_cache_release(p);
3711 } 4130 }
4131 attach_extent_buffer_page(eb, p);
4132 spin_unlock(&mapping->private_lock);
4133 WARN_ON(PageDirty(p));
4134 mark_page_accessed(p);
4135 eb->pages[i] = p;
3712 if (!PageUptodate(p)) 4136 if (!PageUptodate(p))
3713 uptodate = 0; 4137 uptodate = 0;
3714 4138
@@ -3716,12 +4140,10 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
3716 * see below about how we avoid a nasty race with release page 4140 * see below about how we avoid a nasty race with release page
3717 * and why we unlock later 4141 * and why we unlock later
3718 */ 4142 */
3719 if (i != 0)
3720 unlock_page(p);
3721 } 4143 }
3722 if (uptodate) 4144 if (uptodate)
3723 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); 4145 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
3724 4146again:
3725 ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM); 4147 ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
3726 if (ret) 4148 if (ret)
3727 goto free_eb; 4149 goto free_eb;
@@ -3731,14 +4153,21 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
3731 if (ret == -EEXIST) { 4153 if (ret == -EEXIST) {
3732 exists = radix_tree_lookup(&tree->buffer, 4154 exists = radix_tree_lookup(&tree->buffer,
3733 start >> PAGE_CACHE_SHIFT); 4155 start >> PAGE_CACHE_SHIFT);
3734 /* add one reference for the caller */ 4156 if (!atomic_inc_not_zero(&exists->refs)) {
3735 atomic_inc(&exists->refs); 4157 spin_unlock(&tree->buffer_lock);
4158 radix_tree_preload_end();
4159 exists = NULL;
4160 goto again;
4161 }
3736 spin_unlock(&tree->buffer_lock); 4162 spin_unlock(&tree->buffer_lock);
3737 radix_tree_preload_end(); 4163 radix_tree_preload_end();
4164 mark_extent_buffer_accessed(exists);
3738 goto free_eb; 4165 goto free_eb;
3739 } 4166 }
3740 /* add one reference for the tree */ 4167 /* add one reference for the tree */
3741 atomic_inc(&eb->refs); 4168 spin_lock(&eb->refs_lock);
4169 check_buffer_tree_ref(eb);
4170 spin_unlock(&eb->refs_lock);
3742 spin_unlock(&tree->buffer_lock); 4171 spin_unlock(&tree->buffer_lock);
3743 radix_tree_preload_end(); 4172 radix_tree_preload_end();
3744 4173
@@ -3751,15 +4180,20 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
3751 * after the extent buffer is in the radix tree so 4180 * after the extent buffer is in the radix tree so
3752 * it doesn't get lost 4181 * it doesn't get lost
3753 */ 4182 */
3754 set_page_extent_mapped(eb->first_page); 4183 SetPageChecked(eb->pages[0]);
3755 set_page_extent_head(eb->first_page, eb->len); 4184 for (i = 1; i < num_pages; i++) {
3756 if (!page0) 4185 p = extent_buffer_page(eb, i);
3757 unlock_page(eb->first_page); 4186 ClearPageChecked(p);
4187 unlock_page(p);
4188 }
4189 unlock_page(eb->pages[0]);
3758 return eb; 4190 return eb;
3759 4191
3760free_eb: 4192free_eb:
3761 if (eb->first_page && !page0) 4193 for (i = 0; i < num_pages; i++) {
3762 unlock_page(eb->first_page); 4194 if (eb->pages[i])
4195 unlock_page(eb->pages[i]);
4196 }
3763 4197
3764 if (!atomic_dec_and_test(&eb->refs)) 4198 if (!atomic_dec_and_test(&eb->refs))
3765 return exists; 4199 return exists;
@@ -3776,7 +4210,7 @@ struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
3776 eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT); 4210 eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
3777 if (eb && atomic_inc_not_zero(&eb->refs)) { 4211 if (eb && atomic_inc_not_zero(&eb->refs)) {
3778 rcu_read_unlock(); 4212 rcu_read_unlock();
3779 mark_page_accessed(eb->first_page); 4213 mark_extent_buffer_accessed(eb);
3780 return eb; 4214 return eb;
3781 } 4215 }
3782 rcu_read_unlock(); 4216 rcu_read_unlock();
@@ -3784,19 +4218,71 @@ struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
3784 return NULL; 4218 return NULL;
3785} 4219}
3786 4220
4221static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head)
4222{
4223 struct extent_buffer *eb =
4224 container_of(head, struct extent_buffer, rcu_head);
4225
4226 __free_extent_buffer(eb);
4227}
4228
4229/* Expects to have eb->eb_lock already held */
4230static void release_extent_buffer(struct extent_buffer *eb, gfp_t mask)
4231{
4232 WARN_ON(atomic_read(&eb->refs) == 0);
4233 if (atomic_dec_and_test(&eb->refs)) {
4234 struct extent_io_tree *tree = eb->tree;
4235
4236 spin_unlock(&eb->refs_lock);
4237
4238 spin_lock(&tree->buffer_lock);
4239 radix_tree_delete(&tree->buffer,
4240 eb->start >> PAGE_CACHE_SHIFT);
4241 spin_unlock(&tree->buffer_lock);
4242
4243 /* Should be safe to release our pages at this point */
4244 btrfs_release_extent_buffer_page(eb, 0);
4245
4246 call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu);
4247 return;
4248 }
4249 spin_unlock(&eb->refs_lock);
4250}
4251
3787void free_extent_buffer(struct extent_buffer *eb) 4252void free_extent_buffer(struct extent_buffer *eb)
3788{ 4253{
3789 if (!eb) 4254 if (!eb)
3790 return; 4255 return;
3791 4256
3792 if (!atomic_dec_and_test(&eb->refs)) 4257 spin_lock(&eb->refs_lock);
4258 if (atomic_read(&eb->refs) == 2 &&
4259 test_bit(EXTENT_BUFFER_STALE, &eb->bflags) &&
4260 !extent_buffer_under_io(eb) &&
4261 test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
4262 atomic_dec(&eb->refs);
4263
4264 /*
4265 * I know this is terrible, but it's temporary until we stop tracking
4266 * the uptodate bits and such for the extent buffers.
4267 */
4268 release_extent_buffer(eb, GFP_ATOMIC);
4269}
4270
4271void free_extent_buffer_stale(struct extent_buffer *eb)
4272{
4273 if (!eb)
3793 return; 4274 return;
3794 4275
3795 WARN_ON(1); 4276 spin_lock(&eb->refs_lock);
4277 set_bit(EXTENT_BUFFER_STALE, &eb->bflags);
4278
4279 if (atomic_read(&eb->refs) == 2 && !extent_buffer_under_io(eb) &&
4280 test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
4281 atomic_dec(&eb->refs);
4282 release_extent_buffer(eb, GFP_NOFS);
3796} 4283}
3797 4284
3798int clear_extent_buffer_dirty(struct extent_io_tree *tree, 4285void clear_extent_buffer_dirty(struct extent_buffer *eb)
3799 struct extent_buffer *eb)
3800{ 4286{
3801 unsigned long i; 4287 unsigned long i;
3802 unsigned long num_pages; 4288 unsigned long num_pages;
@@ -3812,10 +4298,6 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree,
3812 lock_page(page); 4298 lock_page(page);
3813 WARN_ON(!PagePrivate(page)); 4299 WARN_ON(!PagePrivate(page));
3814 4300
3815 set_page_extent_mapped(page);
3816 if (i == 0)
3817 set_page_extent_head(page, eb->len);
3818
3819 clear_page_dirty_for_io(page); 4301 clear_page_dirty_for_io(page);
3820 spin_lock_irq(&page->mapping->tree_lock); 4302 spin_lock_irq(&page->mapping->tree_lock);
3821 if (!PageDirty(page)) { 4303 if (!PageDirty(page)) {
@@ -3827,24 +4309,29 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree,
3827 ClearPageError(page); 4309 ClearPageError(page);
3828 unlock_page(page); 4310 unlock_page(page);
3829 } 4311 }
3830 return 0; 4312 WARN_ON(atomic_read(&eb->refs) == 0);
3831} 4313}
3832 4314
3833int set_extent_buffer_dirty(struct extent_io_tree *tree, 4315int set_extent_buffer_dirty(struct extent_buffer *eb)
3834 struct extent_buffer *eb)
3835{ 4316{
3836 unsigned long i; 4317 unsigned long i;
3837 unsigned long num_pages; 4318 unsigned long num_pages;
3838 int was_dirty = 0; 4319 int was_dirty = 0;
3839 4320
4321 check_buffer_tree_ref(eb);
4322
3840 was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags); 4323 was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags);
4324
3841 num_pages = num_extent_pages(eb->start, eb->len); 4325 num_pages = num_extent_pages(eb->start, eb->len);
4326 WARN_ON(atomic_read(&eb->refs) == 0);
4327 WARN_ON(!test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags));
4328
3842 for (i = 0; i < num_pages; i++) 4329 for (i = 0; i < num_pages; i++)
3843 __set_page_dirty_nobuffers(extent_buffer_page(eb, i)); 4330 set_page_dirty(extent_buffer_page(eb, i));
3844 return was_dirty; 4331 return was_dirty;
3845} 4332}
3846 4333
3847static int __eb_straddles_pages(u64 start, u64 len) 4334static int range_straddles_pages(u64 start, u64 len)
3848{ 4335{
3849 if (len < PAGE_CACHE_SIZE) 4336 if (len < PAGE_CACHE_SIZE)
3850 return 1; 4337 return 1;
@@ -3855,25 +4342,14 @@ static int __eb_straddles_pages(u64 start, u64 len)
3855 return 0; 4342 return 0;
3856} 4343}
3857 4344
3858static int eb_straddles_pages(struct extent_buffer *eb) 4345int clear_extent_buffer_uptodate(struct extent_buffer *eb)
3859{
3860 return __eb_straddles_pages(eb->start, eb->len);
3861}
3862
3863int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
3864 struct extent_buffer *eb,
3865 struct extent_state **cached_state)
3866{ 4346{
3867 unsigned long i; 4347 unsigned long i;
3868 struct page *page; 4348 struct page *page;
3869 unsigned long num_pages; 4349 unsigned long num_pages;
3870 4350
3871 num_pages = num_extent_pages(eb->start, eb->len);
3872 clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); 4351 clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
3873 4352 num_pages = num_extent_pages(eb->start, eb->len);
3874 clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
3875 cached_state, GFP_NOFS);
3876
3877 for (i = 0; i < num_pages; i++) { 4353 for (i = 0; i < num_pages; i++) {
3878 page = extent_buffer_page(eb, i); 4354 page = extent_buffer_page(eb, i);
3879 if (page) 4355 if (page)
@@ -3882,27 +4358,16 @@ int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
3882 return 0; 4358 return 0;
3883} 4359}
3884 4360
3885int set_extent_buffer_uptodate(struct extent_io_tree *tree, 4361int set_extent_buffer_uptodate(struct extent_buffer *eb)
3886 struct extent_buffer *eb)
3887{ 4362{
3888 unsigned long i; 4363 unsigned long i;
3889 struct page *page; 4364 struct page *page;
3890 unsigned long num_pages; 4365 unsigned long num_pages;
3891 4366
4367 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
3892 num_pages = num_extent_pages(eb->start, eb->len); 4368 num_pages = num_extent_pages(eb->start, eb->len);
3893
3894 if (eb_straddles_pages(eb)) {
3895 set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
3896 NULL, GFP_NOFS);
3897 }
3898 for (i = 0; i < num_pages; i++) { 4369 for (i = 0; i < num_pages; i++) {
3899 page = extent_buffer_page(eb, i); 4370 page = extent_buffer_page(eb, i);
3900 if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) ||
3901 ((i == num_pages - 1) &&
3902 ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) {
3903 check_page_uptodate(tree, page);
3904 continue;
3905 }
3906 SetPageUptodate(page); 4371 SetPageUptodate(page);
3907 } 4372 }
3908 return 0; 4373 return 0;
@@ -3917,7 +4382,7 @@ int extent_range_uptodate(struct extent_io_tree *tree,
3917 int uptodate; 4382 int uptodate;
3918 unsigned long index; 4383 unsigned long index;
3919 4384
3920 if (__eb_straddles_pages(start, end - start + 1)) { 4385 if (range_straddles_pages(start, end - start + 1)) {
3921 ret = test_range_bit(tree, start, end, 4386 ret = test_range_bit(tree, start, end,
3922 EXTENT_UPTODATE, 1, NULL); 4387 EXTENT_UPTODATE, 1, NULL);
3923 if (ret) 4388 if (ret)
@@ -3939,35 +4404,9 @@ int extent_range_uptodate(struct extent_io_tree *tree,
3939 return pg_uptodate; 4404 return pg_uptodate;
3940} 4405}
3941 4406
3942int extent_buffer_uptodate(struct extent_io_tree *tree, 4407int extent_buffer_uptodate(struct extent_buffer *eb)
3943 struct extent_buffer *eb,
3944 struct extent_state *cached_state)
3945{ 4408{
3946 int ret = 0; 4409 return test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
3947 unsigned long num_pages;
3948 unsigned long i;
3949 struct page *page;
3950 int pg_uptodate = 1;
3951
3952 if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
3953 return 1;
3954
3955 if (eb_straddles_pages(eb)) {
3956 ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1,
3957 EXTENT_UPTODATE, 1, cached_state);
3958 if (ret)
3959 return ret;
3960 }
3961
3962 num_pages = num_extent_pages(eb->start, eb->len);
3963 for (i = 0; i < num_pages; i++) {
3964 page = extent_buffer_page(eb, i);
3965 if (!PageUptodate(page)) {
3966 pg_uptodate = 0;
3967 break;
3968 }
3969 }
3970 return pg_uptodate;
3971} 4410}
3972 4411
3973int read_extent_buffer_pages(struct extent_io_tree *tree, 4412int read_extent_buffer_pages(struct extent_io_tree *tree,
@@ -3981,21 +4420,14 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
3981 int ret = 0; 4420 int ret = 0;
3982 int locked_pages = 0; 4421 int locked_pages = 0;
3983 int all_uptodate = 1; 4422 int all_uptodate = 1;
3984 int inc_all_pages = 0;
3985 unsigned long num_pages; 4423 unsigned long num_pages;
4424 unsigned long num_reads = 0;
3986 struct bio *bio = NULL; 4425 struct bio *bio = NULL;
3987 unsigned long bio_flags = 0; 4426 unsigned long bio_flags = 0;
3988 4427
3989 if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags)) 4428 if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
3990 return 0; 4429 return 0;
3991 4430
3992 if (eb_straddles_pages(eb)) {
3993 if (test_range_bit(tree, eb->start, eb->start + eb->len - 1,
3994 EXTENT_UPTODATE, 1, NULL)) {
3995 return 0;
3996 }
3997 }
3998
3999 if (start) { 4431 if (start) {
4000 WARN_ON(start < eb->start); 4432 WARN_ON(start < eb->start);
4001 start_i = (start >> PAGE_CACHE_SHIFT) - 4433 start_i = (start >> PAGE_CACHE_SHIFT) -
@@ -4014,8 +4446,10 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
4014 lock_page(page); 4446 lock_page(page);
4015 } 4447 }
4016 locked_pages++; 4448 locked_pages++;
4017 if (!PageUptodate(page)) 4449 if (!PageUptodate(page)) {
4450 num_reads++;
4018 all_uptodate = 0; 4451 all_uptodate = 0;
4452 }
4019 } 4453 }
4020 if (all_uptodate) { 4454 if (all_uptodate) {
4021 if (start_i == 0) 4455 if (start_i == 0)
@@ -4023,20 +4457,12 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
4023 goto unlock_exit; 4457 goto unlock_exit;
4024 } 4458 }
4025 4459
4460 clear_bit(EXTENT_BUFFER_IOERR, &eb->bflags);
4461 eb->failed_mirror = 0;
4462 atomic_set(&eb->io_pages, num_reads);
4026 for (i = start_i; i < num_pages; i++) { 4463 for (i = start_i; i < num_pages; i++) {
4027 page = extent_buffer_page(eb, i); 4464 page = extent_buffer_page(eb, i);
4028
4029 WARN_ON(!PagePrivate(page));
4030
4031 set_page_extent_mapped(page);
4032 if (i == 0)
4033 set_page_extent_head(page, eb->len);
4034
4035 if (inc_all_pages)
4036 page_cache_get(page);
4037 if (!PageUptodate(page)) { 4465 if (!PageUptodate(page)) {
4038 if (start_i == 0)
4039 inc_all_pages = 1;
4040 ClearPageError(page); 4466 ClearPageError(page);
4041 err = __extent_read_full_page(tree, page, 4467 err = __extent_read_full_page(tree, page,
4042 get_extent, &bio, 4468 get_extent, &bio,
@@ -4048,8 +4474,11 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
4048 } 4474 }
4049 } 4475 }
4050 4476
4051 if (bio) 4477 if (bio) {
4052 submit_one_bio(READ, bio, mirror_num, bio_flags); 4478 err = submit_one_bio(READ, bio, mirror_num, bio_flags);
4479 if (err)
4480 return err;
4481 }
4053 4482
4054 if (ret || wait != WAIT_COMPLETE) 4483 if (ret || wait != WAIT_COMPLETE)
4055 return ret; 4484 return ret;
@@ -4061,8 +4490,6 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
4061 ret = -EIO; 4490 ret = -EIO;
4062 } 4491 }
4063 4492
4064 if (!ret)
4065 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
4066 return ret; 4493 return ret;
4067 4494
4068unlock_exit: 4495unlock_exit:
@@ -4304,15 +4731,20 @@ static void copy_pages(struct page *dst_page, struct page *src_page,
4304{ 4731{
4305 char *dst_kaddr = page_address(dst_page); 4732 char *dst_kaddr = page_address(dst_page);
4306 char *src_kaddr; 4733 char *src_kaddr;
4734 int must_memmove = 0;
4307 4735
4308 if (dst_page != src_page) { 4736 if (dst_page != src_page) {
4309 src_kaddr = page_address(src_page); 4737 src_kaddr = page_address(src_page);
4310 } else { 4738 } else {
4311 src_kaddr = dst_kaddr; 4739 src_kaddr = dst_kaddr;
4312 BUG_ON(areas_overlap(src_off, dst_off, len)); 4740 if (areas_overlap(src_off, dst_off, len))
4741 must_memmove = 1;
4313 } 4742 }
4314 4743
4315 memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len); 4744 if (must_memmove)
4745 memmove(dst_kaddr + dst_off, src_kaddr + src_off, len);
4746 else
4747 memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len);
4316} 4748}
4317 4749
4318void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, 4750void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
@@ -4382,7 +4814,7 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
4382 "len %lu len %lu\n", dst_offset, len, dst->len); 4814 "len %lu len %lu\n", dst_offset, len, dst->len);
4383 BUG_ON(1); 4815 BUG_ON(1);
4384 } 4816 }
4385 if (!areas_overlap(src_offset, dst_offset, len)) { 4817 if (dst_offset < src_offset) {
4386 memcpy_extent_buffer(dst, dst_offset, src_offset, len); 4818 memcpy_extent_buffer(dst, dst_offset, src_offset, len);
4387 return; 4819 return;
4388 } 4820 }
@@ -4408,47 +4840,48 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
4408 } 4840 }
4409} 4841}
4410 4842
4411static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head) 4843int try_release_extent_buffer(struct page *page, gfp_t mask)
4412{ 4844{
4413 struct extent_buffer *eb =
4414 container_of(head, struct extent_buffer, rcu_head);
4415
4416 btrfs_release_extent_buffer(eb);
4417}
4418
4419int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page)
4420{
4421 u64 start = page_offset(page);
4422 struct extent_buffer *eb; 4845 struct extent_buffer *eb;
4423 int ret = 1;
4424 4846
4425 spin_lock(&tree->buffer_lock); 4847 /*
4426 eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT); 4848 * We need to make sure noboody is attaching this page to an eb right
4427 if (!eb) { 4849 * now.
4428 spin_unlock(&tree->buffer_lock); 4850 */
4429 return ret; 4851 spin_lock(&page->mapping->private_lock);
4852 if (!PagePrivate(page)) {
4853 spin_unlock(&page->mapping->private_lock);
4854 return 1;
4430 } 4855 }
4431 4856
4432 if (test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) { 4857 eb = (struct extent_buffer *)page->private;
4433 ret = 0; 4858 BUG_ON(!eb);
4434 goto out;
4435 }
4436 4859
4437 /* 4860 /*
4438 * set @eb->refs to 0 if it is already 1, and then release the @eb. 4861 * This is a little awful but should be ok, we need to make sure that
4439 * Or go back. 4862 * the eb doesn't disappear out from under us while we're looking at
4863 * this page.
4440 */ 4864 */
4441 if (atomic_cmpxchg(&eb->refs, 1, 0) != 1) { 4865 spin_lock(&eb->refs_lock);
4442 ret = 0; 4866 if (atomic_read(&eb->refs) != 1 || extent_buffer_under_io(eb)) {
4443 goto out; 4867 spin_unlock(&eb->refs_lock);
4868 spin_unlock(&page->mapping->private_lock);
4869 return 0;
4444 } 4870 }
4871 spin_unlock(&page->mapping->private_lock);
4445 4872
4446 radix_tree_delete(&tree->buffer, start >> PAGE_CACHE_SHIFT); 4873 if ((mask & GFP_NOFS) == GFP_NOFS)
4447out: 4874 mask = GFP_NOFS;
4448 spin_unlock(&tree->buffer_lock);
4449 4875
4450 /* at this point we can safely release the extent buffer */ 4876 /*
4451 if (atomic_read(&eb->refs) == 0) 4877 * If tree ref isn't set then we know the ref on this eb is a real ref,
4452 call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu); 4878 * so just return, this page will likely be freed soon anyway.
4453 return ret; 4879 */
4880 if (!test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) {
4881 spin_unlock(&eb->refs_lock);
4882 return 0;
4883 }
4884 release_extent_buffer(eb, mask);
4885
4886 return 1;
4454} 4887}
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index cecc3518c121..faf10eb57f75 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -35,6 +35,10 @@
35#define EXTENT_BUFFER_DIRTY 2 35#define EXTENT_BUFFER_DIRTY 2
36#define EXTENT_BUFFER_CORRUPT 3 36#define EXTENT_BUFFER_CORRUPT 3
37#define EXTENT_BUFFER_READAHEAD 4 /* this got triggered by readahead */ 37#define EXTENT_BUFFER_READAHEAD 4 /* this got triggered by readahead */
38#define EXTENT_BUFFER_TREE_REF 5
39#define EXTENT_BUFFER_STALE 6
40#define EXTENT_BUFFER_WRITEBACK 7
41#define EXTENT_BUFFER_IOERR 8
38 42
39/* these are flags for extent_clear_unlock_delalloc */ 43/* these are flags for extent_clear_unlock_delalloc */
40#define EXTENT_CLEAR_UNLOCK_PAGE 0x1 44#define EXTENT_CLEAR_UNLOCK_PAGE 0x1
@@ -54,6 +58,7 @@
54#define EXTENT_PAGE_PRIVATE_FIRST_PAGE 3 58#define EXTENT_PAGE_PRIVATE_FIRST_PAGE 3
55 59
56struct extent_state; 60struct extent_state;
61struct btrfs_root;
57 62
58typedef int (extent_submit_bio_hook_t)(struct inode *inode, int rw, 63typedef int (extent_submit_bio_hook_t)(struct inode *inode, int rw,
59 struct bio *bio, int mirror_num, 64 struct bio *bio, int mirror_num,
@@ -69,9 +74,7 @@ struct extent_io_ops {
69 size_t size, struct bio *bio, 74 size_t size, struct bio *bio,
70 unsigned long bio_flags); 75 unsigned long bio_flags);
71 int (*readpage_io_hook)(struct page *page, u64 start, u64 end); 76 int (*readpage_io_hook)(struct page *page, u64 start, u64 end);
72 int (*readpage_io_failed_hook)(struct bio *bio, struct page *page, 77 int (*readpage_io_failed_hook)(struct page *page, int failed_mirror);
73 u64 start, u64 end, int failed_mirror,
74 struct extent_state *state);
75 int (*writepage_io_failed_hook)(struct bio *bio, struct page *page, 78 int (*writepage_io_failed_hook)(struct bio *bio, struct page *page,
76 u64 start, u64 end, 79 u64 start, u64 end,
77 struct extent_state *state); 80 struct extent_state *state);
@@ -97,6 +100,7 @@ struct extent_io_tree {
97 struct radix_tree_root buffer; 100 struct radix_tree_root buffer;
98 struct address_space *mapping; 101 struct address_space *mapping;
99 u64 dirty_bytes; 102 u64 dirty_bytes;
103 int track_uptodate;
100 spinlock_t lock; 104 spinlock_t lock;
101 spinlock_t buffer_lock; 105 spinlock_t buffer_lock;
102 struct extent_io_ops *ops; 106 struct extent_io_ops *ops;
@@ -119,16 +123,21 @@ struct extent_state {
119 struct list_head leak_list; 123 struct list_head leak_list;
120}; 124};
121 125
126#define INLINE_EXTENT_BUFFER_PAGES 16
127#define MAX_INLINE_EXTENT_BUFFER_SIZE (INLINE_EXTENT_BUFFER_PAGES * PAGE_CACHE_SIZE)
122struct extent_buffer { 128struct extent_buffer {
123 u64 start; 129 u64 start;
124 unsigned long len; 130 unsigned long len;
125 unsigned long map_start; 131 unsigned long map_start;
126 unsigned long map_len; 132 unsigned long map_len;
127 struct page *first_page;
128 unsigned long bflags; 133 unsigned long bflags;
134 struct extent_io_tree *tree;
135 spinlock_t refs_lock;
136 atomic_t refs;
137 atomic_t io_pages;
138 int failed_mirror;
129 struct list_head leak_list; 139 struct list_head leak_list;
130 struct rcu_head rcu_head; 140 struct rcu_head rcu_head;
131 atomic_t refs;
132 pid_t lock_owner; 141 pid_t lock_owner;
133 142
134 /* count of read lock holders on the extent buffer */ 143 /* count of read lock holders on the extent buffer */
@@ -152,6 +161,9 @@ struct extent_buffer {
152 * to unlock 161 * to unlock
153 */ 162 */
154 wait_queue_head_t read_lock_wq; 163 wait_queue_head_t read_lock_wq;
164 wait_queue_head_t lock_wq;
165 struct page *inline_pages[INLINE_EXTENT_BUFFER_PAGES];
166 struct page **pages;
155}; 167};
156 168
157static inline void extent_set_compress_type(unsigned long *bio_flags, 169static inline void extent_set_compress_type(unsigned long *bio_flags,
@@ -178,18 +190,17 @@ void extent_io_tree_init(struct extent_io_tree *tree,
178int try_release_extent_mapping(struct extent_map_tree *map, 190int try_release_extent_mapping(struct extent_map_tree *map,
179 struct extent_io_tree *tree, struct page *page, 191 struct extent_io_tree *tree, struct page *page,
180 gfp_t mask); 192 gfp_t mask);
181int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page); 193int try_release_extent_buffer(struct page *page, gfp_t mask);
182int try_release_extent_state(struct extent_map_tree *map, 194int try_release_extent_state(struct extent_map_tree *map,
183 struct extent_io_tree *tree, struct page *page, 195 struct extent_io_tree *tree, struct page *page,
184 gfp_t mask); 196 gfp_t mask);
185int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); 197int lock_extent(struct extent_io_tree *tree, u64 start, u64 end);
186int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 198int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
187 int bits, struct extent_state **cached, gfp_t mask); 199 int bits, struct extent_state **cached);
188int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); 200int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end);
189int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end, 201int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end,
190 struct extent_state **cached, gfp_t mask); 202 struct extent_state **cached, gfp_t mask);
191int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end, 203int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end);
192 gfp_t mask);
193int extent_read_full_page(struct extent_io_tree *tree, struct page *page, 204int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
194 get_extent_t *get_extent, int mirror_num); 205 get_extent_t *get_extent, int mirror_num);
195int __init extent_io_init(void); 206int __init extent_io_init(void);
@@ -210,7 +221,7 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
210int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 221int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
211 int bits, gfp_t mask); 222 int bits, gfp_t mask);
212int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 223int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
213 int bits, int exclusive_bits, u64 *failed_start, 224 int bits, u64 *failed_start,
214 struct extent_state **cached_state, gfp_t mask); 225 struct extent_state **cached_state, gfp_t mask);
215int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, 226int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
216 struct extent_state **cached_state, gfp_t mask); 227 struct extent_state **cached_state, gfp_t mask);
@@ -240,6 +251,8 @@ int extent_writepages(struct extent_io_tree *tree,
240 struct address_space *mapping, 251 struct address_space *mapping,
241 get_extent_t *get_extent, 252 get_extent_t *get_extent,
242 struct writeback_control *wbc); 253 struct writeback_control *wbc);
254int btree_write_cache_pages(struct address_space *mapping,
255 struct writeback_control *wbc);
243int extent_readpages(struct extent_io_tree *tree, 256int extent_readpages(struct extent_io_tree *tree,
244 struct address_space *mapping, 257 struct address_space *mapping,
245 struct list_head *pages, unsigned nr_pages, 258 struct list_head *pages, unsigned nr_pages,
@@ -251,11 +264,11 @@ int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private);
251void set_page_extent_mapped(struct page *page); 264void set_page_extent_mapped(struct page *page);
252 265
253struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, 266struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
254 u64 start, unsigned long len, 267 u64 start, unsigned long len);
255 struct page *page0);
256struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, 268struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
257 u64 start, unsigned long len); 269 u64 start, unsigned long len);
258void free_extent_buffer(struct extent_buffer *eb); 270void free_extent_buffer(struct extent_buffer *eb);
271void free_extent_buffer_stale(struct extent_buffer *eb);
259#define WAIT_NONE 0 272#define WAIT_NONE 0
260#define WAIT_COMPLETE 1 273#define WAIT_COMPLETE 1
261#define WAIT_PAGE_LOCK 2 274#define WAIT_PAGE_LOCK 2
@@ -287,19 +300,12 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
287 unsigned long src_offset, unsigned long len); 300 unsigned long src_offset, unsigned long len);
288void memset_extent_buffer(struct extent_buffer *eb, char c, 301void memset_extent_buffer(struct extent_buffer *eb, char c,
289 unsigned long start, unsigned long len); 302 unsigned long start, unsigned long len);
290int wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits); 303void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits);
291int clear_extent_buffer_dirty(struct extent_io_tree *tree, 304void clear_extent_buffer_dirty(struct extent_buffer *eb);
292 struct extent_buffer *eb); 305int set_extent_buffer_dirty(struct extent_buffer *eb);
293int set_extent_buffer_dirty(struct extent_io_tree *tree, 306int set_extent_buffer_uptodate(struct extent_buffer *eb);
294 struct extent_buffer *eb); 307int clear_extent_buffer_uptodate(struct extent_buffer *eb);
295int set_extent_buffer_uptodate(struct extent_io_tree *tree, 308int extent_buffer_uptodate(struct extent_buffer *eb);
296 struct extent_buffer *eb);
297int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
298 struct extent_buffer *eb,
299 struct extent_state **cached_state);
300int extent_buffer_uptodate(struct extent_io_tree *tree,
301 struct extent_buffer *eb,
302 struct extent_state *cached_state);
303int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset, 309int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset,
304 unsigned long min_len, char **map, 310 unsigned long min_len, char **map,
305 unsigned long *map_start, 311 unsigned long *map_start,
@@ -320,4 +326,6 @@ int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start,
320 u64 length, u64 logical, struct page *page, 326 u64 length, u64 logical, struct page *page,
321 int mirror_num); 327 int mirror_num);
322int end_extent_writepage(struct page *page, int err, u64 start, u64 end); 328int end_extent_writepage(struct page *page, int err, u64 start, u64 end);
329int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb,
330 int mirror_num);
323#endif 331#endif
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 078b4fd54500..5d158d320233 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -25,10 +25,12 @@
25#include "transaction.h" 25#include "transaction.h"
26#include "print-tree.h" 26#include "print-tree.h"
27 27
28#define MAX_CSUM_ITEMS(r, size) ((((BTRFS_LEAF_DATA_SIZE(r) - \ 28#define __MAX_CSUM_ITEMS(r, size) ((((BTRFS_LEAF_DATA_SIZE(r) - \
29 sizeof(struct btrfs_item) * 2) / \ 29 sizeof(struct btrfs_item) * 2) / \
30 size) - 1)) 30 size) - 1))
31 31
32#define MAX_CSUM_ITEMS(r, size) (min(__MAX_CSUM_ITEMS(r, size), PAGE_CACHE_SIZE))
33
32#define MAX_ORDERED_SUM_BYTES(r) ((PAGE_SIZE - \ 34#define MAX_ORDERED_SUM_BYTES(r) ((PAGE_SIZE - \
33 sizeof(struct btrfs_ordered_sum)) / \ 35 sizeof(struct btrfs_ordered_sum)) / \
34 sizeof(struct btrfs_sector_sum) * \ 36 sizeof(struct btrfs_sector_sum) * \
@@ -59,7 +61,7 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
59 sizeof(*item)); 61 sizeof(*item));
60 if (ret < 0) 62 if (ret < 0)
61 goto out; 63 goto out;
62 BUG_ON(ret); 64 BUG_ON(ret); /* Can't happen */
63 leaf = path->nodes[0]; 65 leaf = path->nodes[0];
64 item = btrfs_item_ptr(leaf, path->slots[0], 66 item = btrfs_item_ptr(leaf, path->slots[0],
65 struct btrfs_file_extent_item); 67 struct btrfs_file_extent_item);
@@ -284,6 +286,7 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
284 struct btrfs_ordered_sum *sums; 286 struct btrfs_ordered_sum *sums;
285 struct btrfs_sector_sum *sector_sum; 287 struct btrfs_sector_sum *sector_sum;
286 struct btrfs_csum_item *item; 288 struct btrfs_csum_item *item;
289 LIST_HEAD(tmplist);
287 unsigned long offset; 290 unsigned long offset;
288 int ret; 291 int ret;
289 size_t size; 292 size_t size;
@@ -358,7 +361,10 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
358 MAX_ORDERED_SUM_BYTES(root)); 361 MAX_ORDERED_SUM_BYTES(root));
359 sums = kzalloc(btrfs_ordered_sum_size(root, size), 362 sums = kzalloc(btrfs_ordered_sum_size(root, size),
360 GFP_NOFS); 363 GFP_NOFS);
361 BUG_ON(!sums); 364 if (!sums) {
365 ret = -ENOMEM;
366 goto fail;
367 }
362 368
363 sector_sum = sums->sums; 369 sector_sum = sums->sums;
364 sums->bytenr = start; 370 sums->bytenr = start;
@@ -380,12 +386,19 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
380 offset += csum_size; 386 offset += csum_size;
381 sector_sum++; 387 sector_sum++;
382 } 388 }
383 list_add_tail(&sums->list, list); 389 list_add_tail(&sums->list, &tmplist);
384 } 390 }
385 path->slots[0]++; 391 path->slots[0]++;
386 } 392 }
387 ret = 0; 393 ret = 0;
388fail: 394fail:
395 while (ret < 0 && !list_empty(&tmplist)) {
396 sums = list_entry(&tmplist, struct btrfs_ordered_sum, list);
397 list_del(&sums->list);
398 kfree(sums);
399 }
400 list_splice_tail(&tmplist, list);
401
389 btrfs_free_path(path); 402 btrfs_free_path(path);
390 return ret; 403 return ret;
391} 404}
@@ -420,7 +433,7 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
420 offset = page_offset(bvec->bv_page) + bvec->bv_offset; 433 offset = page_offset(bvec->bv_page) + bvec->bv_offset;
421 434
422 ordered = btrfs_lookup_ordered_extent(inode, offset); 435 ordered = btrfs_lookup_ordered_extent(inode, offset);
423 BUG_ON(!ordered); 436 BUG_ON(!ordered); /* Logic error */
424 sums->bytenr = ordered->start; 437 sums->bytenr = ordered->start;
425 438
426 while (bio_index < bio->bi_vcnt) { 439 while (bio_index < bio->bi_vcnt) {
@@ -439,11 +452,11 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
439 452
440 sums = kzalloc(btrfs_ordered_sum_size(root, bytes_left), 453 sums = kzalloc(btrfs_ordered_sum_size(root, bytes_left),
441 GFP_NOFS); 454 GFP_NOFS);
442 BUG_ON(!sums); 455 BUG_ON(!sums); /* -ENOMEM */
443 sector_sum = sums->sums; 456 sector_sum = sums->sums;
444 sums->len = bytes_left; 457 sums->len = bytes_left;
445 ordered = btrfs_lookup_ordered_extent(inode, offset); 458 ordered = btrfs_lookup_ordered_extent(inode, offset);
446 BUG_ON(!ordered); 459 BUG_ON(!ordered); /* Logic error */
447 sums->bytenr = ordered->start; 460 sums->bytenr = ordered->start;
448 } 461 }
449 462
@@ -483,18 +496,17 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
483 * This calls btrfs_truncate_item with the correct args based on the 496 * This calls btrfs_truncate_item with the correct args based on the
484 * overlap, and fixes up the key as required. 497 * overlap, and fixes up the key as required.
485 */ 498 */
486static noinline int truncate_one_csum(struct btrfs_trans_handle *trans, 499static noinline void truncate_one_csum(struct btrfs_trans_handle *trans,
487 struct btrfs_root *root, 500 struct btrfs_root *root,
488 struct btrfs_path *path, 501 struct btrfs_path *path,
489 struct btrfs_key *key, 502 struct btrfs_key *key,
490 u64 bytenr, u64 len) 503 u64 bytenr, u64 len)
491{ 504{
492 struct extent_buffer *leaf; 505 struct extent_buffer *leaf;
493 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); 506 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
494 u64 csum_end; 507 u64 csum_end;
495 u64 end_byte = bytenr + len; 508 u64 end_byte = bytenr + len;
496 u32 blocksize_bits = root->fs_info->sb->s_blocksize_bits; 509 u32 blocksize_bits = root->fs_info->sb->s_blocksize_bits;
497 int ret;
498 510
499 leaf = path->nodes[0]; 511 leaf = path->nodes[0];
500 csum_end = btrfs_item_size_nr(leaf, path->slots[0]) / csum_size; 512 csum_end = btrfs_item_size_nr(leaf, path->slots[0]) / csum_size;
@@ -510,7 +522,7 @@ static noinline int truncate_one_csum(struct btrfs_trans_handle *trans,
510 */ 522 */
511 u32 new_size = (bytenr - key->offset) >> blocksize_bits; 523 u32 new_size = (bytenr - key->offset) >> blocksize_bits;
512 new_size *= csum_size; 524 new_size *= csum_size;
513 ret = btrfs_truncate_item(trans, root, path, new_size, 1); 525 btrfs_truncate_item(trans, root, path, new_size, 1);
514 } else if (key->offset >= bytenr && csum_end > end_byte && 526 } else if (key->offset >= bytenr && csum_end > end_byte &&
515 end_byte > key->offset) { 527 end_byte > key->offset) {
516 /* 528 /*
@@ -522,15 +534,13 @@ static noinline int truncate_one_csum(struct btrfs_trans_handle *trans,
522 u32 new_size = (csum_end - end_byte) >> blocksize_bits; 534 u32 new_size = (csum_end - end_byte) >> blocksize_bits;
523 new_size *= csum_size; 535 new_size *= csum_size;
524 536
525 ret = btrfs_truncate_item(trans, root, path, new_size, 0); 537 btrfs_truncate_item(trans, root, path, new_size, 0);
526 538
527 key->offset = end_byte; 539 key->offset = end_byte;
528 ret = btrfs_set_item_key_safe(trans, root, path, key); 540 btrfs_set_item_key_safe(trans, root, path, key);
529 BUG_ON(ret);
530 } else { 541 } else {
531 BUG(); 542 BUG();
532 } 543 }
533 return 0;
534} 544}
535 545
536/* 546/*
@@ -635,13 +645,14 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
635 * item changed size or key 645 * item changed size or key
636 */ 646 */
637 ret = btrfs_split_item(trans, root, path, &key, offset); 647 ret = btrfs_split_item(trans, root, path, &key, offset);
638 BUG_ON(ret && ret != -EAGAIN); 648 if (ret && ret != -EAGAIN) {
649 btrfs_abort_transaction(trans, root, ret);
650 goto out;
651 }
639 652
640 key.offset = end_byte - 1; 653 key.offset = end_byte - 1;
641 } else { 654 } else {
642 ret = truncate_one_csum(trans, root, path, 655 truncate_one_csum(trans, root, path, &key, bytenr, len);
643 &key, bytenr, len);
644 BUG_ON(ret);
645 if (key.offset < bytenr) 656 if (key.offset < bytenr)
646 break; 657 break;
647 } 658 }
@@ -772,7 +783,7 @@ again:
772 if (diff != csum_size) 783 if (diff != csum_size)
773 goto insert; 784 goto insert;
774 785
775 ret = btrfs_extend_item(trans, root, path, diff); 786 btrfs_extend_item(trans, root, path, diff);
776 goto csum; 787 goto csum;
777 } 788 }
778 789
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index e8d06b6b9194..d83260d7498f 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -452,7 +452,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
452 split = alloc_extent_map(); 452 split = alloc_extent_map();
453 if (!split2) 453 if (!split2)
454 split2 = alloc_extent_map(); 454 split2 = alloc_extent_map();
455 BUG_ON(!split || !split2); 455 BUG_ON(!split || !split2); /* -ENOMEM */
456 456
457 write_lock(&em_tree->lock); 457 write_lock(&em_tree->lock);
458 em = lookup_extent_mapping(em_tree, start, len); 458 em = lookup_extent_mapping(em_tree, start, len);
@@ -494,7 +494,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
494 split->flags = flags; 494 split->flags = flags;
495 split->compress_type = em->compress_type; 495 split->compress_type = em->compress_type;
496 ret = add_extent_mapping(em_tree, split); 496 ret = add_extent_mapping(em_tree, split);
497 BUG_ON(ret); 497 BUG_ON(ret); /* Logic error */
498 free_extent_map(split); 498 free_extent_map(split);
499 split = split2; 499 split = split2;
500 split2 = NULL; 500 split2 = NULL;
@@ -520,7 +520,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
520 } 520 }
521 521
522 ret = add_extent_mapping(em_tree, split); 522 ret = add_extent_mapping(em_tree, split);
523 BUG_ON(ret); 523 BUG_ON(ret); /* Logic error */
524 free_extent_map(split); 524 free_extent_map(split);
525 split = NULL; 525 split = NULL;
526 } 526 }
@@ -679,7 +679,7 @@ next_slot:
679 root->root_key.objectid, 679 root->root_key.objectid,
680 new_key.objectid, 680 new_key.objectid,
681 start - extent_offset, 0); 681 start - extent_offset, 0);
682 BUG_ON(ret); 682 BUG_ON(ret); /* -ENOMEM */
683 *hint_byte = disk_bytenr; 683 *hint_byte = disk_bytenr;
684 } 684 }
685 key.offset = start; 685 key.offset = start;
@@ -754,7 +754,7 @@ next_slot:
754 root->root_key.objectid, 754 root->root_key.objectid,
755 key.objectid, key.offset - 755 key.objectid, key.offset -
756 extent_offset, 0); 756 extent_offset, 0);
757 BUG_ON(ret); 757 BUG_ON(ret); /* -ENOMEM */
758 inode_sub_bytes(inode, 758 inode_sub_bytes(inode,
759 extent_end - key.offset); 759 extent_end - key.offset);
760 *hint_byte = disk_bytenr; 760 *hint_byte = disk_bytenr;
@@ -770,7 +770,10 @@ next_slot:
770 770
771 ret = btrfs_del_items(trans, root, path, del_slot, 771 ret = btrfs_del_items(trans, root, path, del_slot,
772 del_nr); 772 del_nr);
773 BUG_ON(ret); 773 if (ret) {
774 btrfs_abort_transaction(trans, root, ret);
775 goto out;
776 }
774 777
775 del_nr = 0; 778 del_nr = 0;
776 del_slot = 0; 779 del_slot = 0;
@@ -782,11 +785,13 @@ next_slot:
782 BUG_ON(1); 785 BUG_ON(1);
783 } 786 }
784 787
785 if (del_nr > 0) { 788 if (!ret && del_nr > 0) {
786 ret = btrfs_del_items(trans, root, path, del_slot, del_nr); 789 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
787 BUG_ON(ret); 790 if (ret)
791 btrfs_abort_transaction(trans, root, ret);
788 } 792 }
789 793
794out:
790 btrfs_free_path(path); 795 btrfs_free_path(path);
791 return ret; 796 return ret;
792} 797}
@@ -944,7 +949,10 @@ again:
944 btrfs_release_path(path); 949 btrfs_release_path(path);
945 goto again; 950 goto again;
946 } 951 }
947 BUG_ON(ret < 0); 952 if (ret < 0) {
953 btrfs_abort_transaction(trans, root, ret);
954 goto out;
955 }
948 956
949 leaf = path->nodes[0]; 957 leaf = path->nodes[0];
950 fi = btrfs_item_ptr(leaf, path->slots[0] - 1, 958 fi = btrfs_item_ptr(leaf, path->slots[0] - 1,
@@ -963,7 +971,7 @@ again:
963 ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0, 971 ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0,
964 root->root_key.objectid, 972 root->root_key.objectid,
965 ino, orig_offset, 0); 973 ino, orig_offset, 0);
966 BUG_ON(ret); 974 BUG_ON(ret); /* -ENOMEM */
967 975
968 if (split == start) { 976 if (split == start) {
969 key.offset = start; 977 key.offset = start;
@@ -990,7 +998,7 @@ again:
990 ret = btrfs_free_extent(trans, root, bytenr, num_bytes, 998 ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
991 0, root->root_key.objectid, 999 0, root->root_key.objectid,
992 ino, orig_offset, 0); 1000 ino, orig_offset, 0);
993 BUG_ON(ret); 1001 BUG_ON(ret); /* -ENOMEM */
994 } 1002 }
995 other_start = 0; 1003 other_start = 0;
996 other_end = start; 1004 other_end = start;
@@ -1007,7 +1015,7 @@ again:
1007 ret = btrfs_free_extent(trans, root, bytenr, num_bytes, 1015 ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
1008 0, root->root_key.objectid, 1016 0, root->root_key.objectid,
1009 ino, orig_offset, 0); 1017 ino, orig_offset, 0);
1010 BUG_ON(ret); 1018 BUG_ON(ret); /* -ENOMEM */
1011 } 1019 }
1012 if (del_nr == 0) { 1020 if (del_nr == 0) {
1013 fi = btrfs_item_ptr(leaf, path->slots[0], 1021 fi = btrfs_item_ptr(leaf, path->slots[0],
@@ -1025,7 +1033,10 @@ again:
1025 btrfs_mark_buffer_dirty(leaf); 1033 btrfs_mark_buffer_dirty(leaf);
1026 1034
1027 ret = btrfs_del_items(trans, root, path, del_slot, del_nr); 1035 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
1028 BUG_ON(ret); 1036 if (ret < 0) {
1037 btrfs_abort_transaction(trans, root, ret);
1038 goto out;
1039 }
1029 } 1040 }
1030out: 1041out:
1031 btrfs_free_path(path); 1042 btrfs_free_path(path);
@@ -1105,8 +1116,7 @@ again:
1105 if (start_pos < inode->i_size) { 1116 if (start_pos < inode->i_size) {
1106 struct btrfs_ordered_extent *ordered; 1117 struct btrfs_ordered_extent *ordered;
1107 lock_extent_bits(&BTRFS_I(inode)->io_tree, 1118 lock_extent_bits(&BTRFS_I(inode)->io_tree,
1108 start_pos, last_pos - 1, 0, &cached_state, 1119 start_pos, last_pos - 1, 0, &cached_state);
1109 GFP_NOFS);
1110 ordered = btrfs_lookup_first_ordered_extent(inode, 1120 ordered = btrfs_lookup_first_ordered_extent(inode,
1111 last_pos - 1); 1121 last_pos - 1);
1112 if (ordered && 1122 if (ordered &&
@@ -1638,7 +1648,7 @@ static long btrfs_fallocate(struct file *file, int mode,
1638 * transaction 1648 * transaction
1639 */ 1649 */
1640 lock_extent_bits(&BTRFS_I(inode)->io_tree, alloc_start, 1650 lock_extent_bits(&BTRFS_I(inode)->io_tree, alloc_start,
1641 locked_end, 0, &cached_state, GFP_NOFS); 1651 locked_end, 0, &cached_state);
1642 ordered = btrfs_lookup_first_ordered_extent(inode, 1652 ordered = btrfs_lookup_first_ordered_extent(inode,
1643 alloc_end - 1); 1653 alloc_end - 1);
1644 if (ordered && 1654 if (ordered &&
@@ -1667,7 +1677,13 @@ static long btrfs_fallocate(struct file *file, int mode,
1667 1677
1668 em = btrfs_get_extent(inode, NULL, 0, cur_offset, 1678 em = btrfs_get_extent(inode, NULL, 0, cur_offset,
1669 alloc_end - cur_offset, 0); 1679 alloc_end - cur_offset, 0);
1670 BUG_ON(IS_ERR_OR_NULL(em)); 1680 if (IS_ERR_OR_NULL(em)) {
1681 if (!em)
1682 ret = -ENOMEM;
1683 else
1684 ret = PTR_ERR(em);
1685 break;
1686 }
1671 last_byte = min(extent_map_end(em), alloc_end); 1687 last_byte = min(extent_map_end(em), alloc_end);
1672 actual_end = min_t(u64, extent_map_end(em), offset + len); 1688 actual_end = min_t(u64, extent_map_end(em), offset + len);
1673 last_byte = (last_byte + mask) & ~mask; 1689 last_byte = (last_byte + mask) & ~mask;
@@ -1737,7 +1753,7 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int origin)
1737 return -ENXIO; 1753 return -ENXIO;
1738 1754
1739 lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, 0, 1755 lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, 0,
1740 &cached_state, GFP_NOFS); 1756 &cached_state);
1741 1757
1742 /* 1758 /*
1743 * Delalloc is such a pain. If we have a hole and we have pending 1759 * Delalloc is such a pain. If we have a hole and we have pending
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index b02e379b14c7..e88330d3df52 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -230,11 +230,13 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root,
230 230
231 if (ret) { 231 if (ret) {
232 trans->block_rsv = rsv; 232 trans->block_rsv = rsv;
233 WARN_ON(1); 233 btrfs_abort_transaction(trans, root, ret);
234 return ret; 234 return ret;
235 } 235 }
236 236
237 ret = btrfs_update_inode(trans, root, inode); 237 ret = btrfs_update_inode(trans, root, inode);
238 if (ret)
239 btrfs_abort_transaction(trans, root, ret);
238 trans->block_rsv = rsv; 240 trans->block_rsv = rsv;
239 241
240 return ret; 242 return ret;
@@ -869,7 +871,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
869 io_ctl_prepare_pages(&io_ctl, inode, 0); 871 io_ctl_prepare_pages(&io_ctl, inode, 0);
870 872
871 lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, 873 lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
872 0, &cached_state, GFP_NOFS); 874 0, &cached_state);
873 875
874 node = rb_first(&ctl->free_space_offset); 876 node = rb_first(&ctl->free_space_offset);
875 if (!node && cluster) { 877 if (!node && cluster) {
@@ -1948,14 +1950,14 @@ again:
1948 */ 1950 */
1949 ret = btrfs_add_free_space(block_group, old_start, 1951 ret = btrfs_add_free_space(block_group, old_start,
1950 offset - old_start); 1952 offset - old_start);
1951 WARN_ON(ret); 1953 WARN_ON(ret); /* -ENOMEM */
1952 goto out; 1954 goto out;
1953 } 1955 }
1954 1956
1955 ret = remove_from_bitmap(ctl, info, &offset, &bytes); 1957 ret = remove_from_bitmap(ctl, info, &offset, &bytes);
1956 if (ret == -EAGAIN) 1958 if (ret == -EAGAIN)
1957 goto again; 1959 goto again;
1958 BUG_ON(ret); 1960 BUG_ON(ret); /* logic error */
1959out_lock: 1961out_lock:
1960 spin_unlock(&ctl->tree_lock); 1962 spin_unlock(&ctl->tree_lock);
1961out: 1963out:
@@ -2346,7 +2348,7 @@ again:
2346 rb_erase(&entry->offset_index, &ctl->free_space_offset); 2348 rb_erase(&entry->offset_index, &ctl->free_space_offset);
2347 ret = tree_insert_offset(&cluster->root, entry->offset, 2349 ret = tree_insert_offset(&cluster->root, entry->offset,
2348 &entry->offset_index, 1); 2350 &entry->offset_index, 1);
2349 BUG_ON(ret); 2351 BUG_ON(ret); /* -EEXIST; Logic error */
2350 2352
2351 trace_btrfs_setup_cluster(block_group, cluster, 2353 trace_btrfs_setup_cluster(block_group, cluster,
2352 total_found * block_group->sectorsize, 1); 2354 total_found * block_group->sectorsize, 1);
@@ -2439,7 +2441,7 @@ setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
2439 ret = tree_insert_offset(&cluster->root, entry->offset, 2441 ret = tree_insert_offset(&cluster->root, entry->offset,
2440 &entry->offset_index, 0); 2442 &entry->offset_index, 0);
2441 total_size += entry->bytes; 2443 total_size += entry->bytes;
2442 BUG_ON(ret); 2444 BUG_ON(ret); /* -EEXIST; Logic error */
2443 } while (node && entry != last); 2445 } while (node && entry != last);
2444 2446
2445 cluster->max_size = max_extent; 2447 cluster->max_size = max_extent;
@@ -2830,6 +2832,7 @@ u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root)
2830 int ret; 2832 int ret;
2831 2833
2832 ret = search_bitmap(ctl, entry, &offset, &count); 2834 ret = search_bitmap(ctl, entry, &offset, &count);
2835 /* Logic error; Should be empty if it can't find anything */
2833 BUG_ON(ret); 2836 BUG_ON(ret);
2834 2837
2835 ino = offset; 2838 ino = offset;
diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c
index baa74f3db691..a13cf1a96c73 100644
--- a/fs/btrfs/inode-item.c
+++ b/fs/btrfs/inode-item.c
@@ -19,6 +19,7 @@
19#include "ctree.h" 19#include "ctree.h"
20#include "disk-io.h" 20#include "disk-io.h"
21#include "transaction.h" 21#include "transaction.h"
22#include "print-tree.h"
22 23
23static int find_name_in_backref(struct btrfs_path *path, const char *name, 24static int find_name_in_backref(struct btrfs_path *path, const char *name,
24 int name_len, struct btrfs_inode_ref **ref_ret) 25 int name_len, struct btrfs_inode_ref **ref_ret)
@@ -128,13 +129,14 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans,
128 item_start = btrfs_item_ptr_offset(leaf, path->slots[0]); 129 item_start = btrfs_item_ptr_offset(leaf, path->slots[0]);
129 memmove_extent_buffer(leaf, ptr, ptr + sub_item_len, 130 memmove_extent_buffer(leaf, ptr, ptr + sub_item_len,
130 item_size - (ptr + sub_item_len - item_start)); 131 item_size - (ptr + sub_item_len - item_start));
131 ret = btrfs_truncate_item(trans, root, path, 132 btrfs_truncate_item(trans, root, path,
132 item_size - sub_item_len, 1); 133 item_size - sub_item_len, 1);
133out: 134out:
134 btrfs_free_path(path); 135 btrfs_free_path(path);
135 return ret; 136 return ret;
136} 137}
137 138
139/* Will return 0, -ENOMEM, -EMLINK, or -EEXIST or anything from the CoW path */
138int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans, 140int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
139 struct btrfs_root *root, 141 struct btrfs_root *root,
140 const char *name, int name_len, 142 const char *name, int name_len,
@@ -165,7 +167,7 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
165 goto out; 167 goto out;
166 168
167 old_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]); 169 old_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]);
168 ret = btrfs_extend_item(trans, root, path, ins_len); 170 btrfs_extend_item(trans, root, path, ins_len);
169 ref = btrfs_item_ptr(path->nodes[0], path->slots[0], 171 ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
170 struct btrfs_inode_ref); 172 struct btrfs_inode_ref);
171 ref = (struct btrfs_inode_ref *)((unsigned long)ref + old_size); 173 ref = (struct btrfs_inode_ref *)((unsigned long)ref + old_size);
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index ee15d88b33d2..b1a1c929ba80 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -178,7 +178,7 @@ static void start_caching(struct btrfs_root *root)
178 178
179 tsk = kthread_run(caching_kthread, root, "btrfs-ino-cache-%llu\n", 179 tsk = kthread_run(caching_kthread, root, "btrfs-ino-cache-%llu\n",
180 root->root_key.objectid); 180 root->root_key.objectid);
181 BUG_ON(IS_ERR(tsk)); 181 BUG_ON(IS_ERR(tsk)); /* -ENOMEM */
182} 182}
183 183
184int btrfs_find_free_ino(struct btrfs_root *root, u64 *objectid) 184int btrfs_find_free_ino(struct btrfs_root *root, u64 *objectid)
@@ -271,7 +271,7 @@ void btrfs_unpin_free_ino(struct btrfs_root *root)
271 break; 271 break;
272 272
273 info = rb_entry(n, struct btrfs_free_space, offset_index); 273 info = rb_entry(n, struct btrfs_free_space, offset_index);
274 BUG_ON(info->bitmap); 274 BUG_ON(info->bitmap); /* Logic error */
275 275
276 if (info->offset > root->cache_progress) 276 if (info->offset > root->cache_progress)
277 goto free; 277 goto free;
@@ -439,17 +439,16 @@ int btrfs_save_ino_cache(struct btrfs_root *root,
439 if (ret) 439 if (ret)
440 goto out; 440 goto out;
441 trace_btrfs_space_reservation(root->fs_info, "ino_cache", 441 trace_btrfs_space_reservation(root->fs_info, "ino_cache",
442 (u64)(unsigned long)trans, 442 trans->transid, trans->bytes_reserved, 1);
443 trans->bytes_reserved, 1);
444again: 443again:
445 inode = lookup_free_ino_inode(root, path); 444 inode = lookup_free_ino_inode(root, path);
446 if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) { 445 if (IS_ERR(inode) && (PTR_ERR(inode) != -ENOENT || retry)) {
447 ret = PTR_ERR(inode); 446 ret = PTR_ERR(inode);
448 goto out_release; 447 goto out_release;
449 } 448 }
450 449
451 if (IS_ERR(inode)) { 450 if (IS_ERR(inode)) {
452 BUG_ON(retry); 451 BUG_ON(retry); /* Logic error */
453 retry = true; 452 retry = true;
454 453
455 ret = create_free_ino_inode(root, trans, path); 454 ret = create_free_ino_inode(root, trans, path);
@@ -460,12 +459,17 @@ again:
460 459
461 BTRFS_I(inode)->generation = 0; 460 BTRFS_I(inode)->generation = 0;
462 ret = btrfs_update_inode(trans, root, inode); 461 ret = btrfs_update_inode(trans, root, inode);
463 WARN_ON(ret); 462 if (ret) {
463 btrfs_abort_transaction(trans, root, ret);
464 goto out_put;
465 }
464 466
465 if (i_size_read(inode) > 0) { 467 if (i_size_read(inode) > 0) {
466 ret = btrfs_truncate_free_space_cache(root, trans, path, inode); 468 ret = btrfs_truncate_free_space_cache(root, trans, path, inode);
467 if (ret) 469 if (ret) {
470 btrfs_abort_transaction(trans, root, ret);
468 goto out_put; 471 goto out_put;
472 }
469 } 473 }
470 474
471 spin_lock(&root->cache_lock); 475 spin_lock(&root->cache_lock);
@@ -502,8 +506,7 @@ out_put:
502 iput(inode); 506 iput(inode);
503out_release: 507out_release:
504 trace_btrfs_space_reservation(root->fs_info, "ino_cache", 508 trace_btrfs_space_reservation(root->fs_info, "ino_cache",
505 (u64)(unsigned long)trans, 509 trans->transid, trans->bytes_reserved, 0);
506 trans->bytes_reserved, 0);
507 btrfs_block_rsv_release(root, trans->block_rsv, trans->bytes_reserved); 510 btrfs_block_rsv_release(root, trans->block_rsv, trans->bytes_reserved);
508out: 511out:
509 trans->block_rsv = rsv; 512 trans->block_rsv = rsv;
@@ -532,7 +535,7 @@ static int btrfs_find_highest_objectid(struct btrfs_root *root, u64 *objectid)
532 ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0); 535 ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
533 if (ret < 0) 536 if (ret < 0)
534 goto error; 537 goto error;
535 BUG_ON(ret == 0); 538 BUG_ON(ret == 0); /* Corruption */
536 if (path->slots[0] > 0) { 539 if (path->slots[0] > 0) {
537 slot = path->slots[0] - 1; 540 slot = path->slots[0] - 1;
538 l = path->nodes[0]; 541 l = path->nodes[0];
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 3a0b5c1f9d31..115bc05e42b0 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -150,7 +150,6 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
150 inode_add_bytes(inode, size); 150 inode_add_bytes(inode, size);
151 ret = btrfs_insert_empty_item(trans, root, path, &key, 151 ret = btrfs_insert_empty_item(trans, root, path, &key,
152 datasize); 152 datasize);
153 BUG_ON(ret);
154 if (ret) { 153 if (ret) {
155 err = ret; 154 err = ret;
156 goto fail; 155 goto fail;
@@ -206,9 +205,9 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
206 * could end up racing with unlink. 205 * could end up racing with unlink.
207 */ 206 */
208 BTRFS_I(inode)->disk_i_size = inode->i_size; 207 BTRFS_I(inode)->disk_i_size = inode->i_size;
209 btrfs_update_inode(trans, root, inode); 208 ret = btrfs_update_inode(trans, root, inode);
210 209
211 return 0; 210 return ret;
212fail: 211fail:
213 btrfs_free_path(path); 212 btrfs_free_path(path);
214 return err; 213 return err;
@@ -250,14 +249,18 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
250 249
251 ret = btrfs_drop_extents(trans, inode, start, aligned_end, 250 ret = btrfs_drop_extents(trans, inode, start, aligned_end,
252 &hint_byte, 1); 251 &hint_byte, 1);
253 BUG_ON(ret); 252 if (ret)
253 return ret;
254 254
255 if (isize > actual_end) 255 if (isize > actual_end)
256 inline_len = min_t(u64, isize, actual_end); 256 inline_len = min_t(u64, isize, actual_end);
257 ret = insert_inline_extent(trans, root, inode, start, 257 ret = insert_inline_extent(trans, root, inode, start,
258 inline_len, compressed_size, 258 inline_len, compressed_size,
259 compress_type, compressed_pages); 259 compress_type, compressed_pages);
260 BUG_ON(ret); 260 if (ret) {
261 btrfs_abort_transaction(trans, root, ret);
262 return ret;
263 }
261 btrfs_delalloc_release_metadata(inode, end + 1 - start); 264 btrfs_delalloc_release_metadata(inode, end + 1 - start);
262 btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0); 265 btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0);
263 return 0; 266 return 0;
@@ -293,7 +296,7 @@ static noinline int add_async_extent(struct async_cow *cow,
293 struct async_extent *async_extent; 296 struct async_extent *async_extent;
294 297
295 async_extent = kmalloc(sizeof(*async_extent), GFP_NOFS); 298 async_extent = kmalloc(sizeof(*async_extent), GFP_NOFS);
296 BUG_ON(!async_extent); 299 BUG_ON(!async_extent); /* -ENOMEM */
297 async_extent->start = start; 300 async_extent->start = start;
298 async_extent->ram_size = ram_size; 301 async_extent->ram_size = ram_size;
299 async_extent->compressed_size = compressed_size; 302 async_extent->compressed_size = compressed_size;
@@ -344,8 +347,9 @@ static noinline int compress_file_range(struct inode *inode,
344 int will_compress; 347 int will_compress;
345 int compress_type = root->fs_info->compress_type; 348 int compress_type = root->fs_info->compress_type;
346 349
347 /* if this is a small write inside eof, kick off a defragbot */ 350 /* if this is a small write inside eof, kick off a defrag */
348 if (end <= BTRFS_I(inode)->disk_i_size && (end - start + 1) < 16 * 1024) 351 if ((end - start + 1) < 16 * 1024 &&
352 (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size))
349 btrfs_add_inode_defrag(NULL, inode); 353 btrfs_add_inode_defrag(NULL, inode);
350 354
351 actual_end = min_t(u64, isize, end + 1); 355 actual_end = min_t(u64, isize, end + 1);
@@ -433,7 +437,11 @@ again:
433cont: 437cont:
434 if (start == 0) { 438 if (start == 0) {
435 trans = btrfs_join_transaction(root); 439 trans = btrfs_join_transaction(root);
436 BUG_ON(IS_ERR(trans)); 440 if (IS_ERR(trans)) {
441 ret = PTR_ERR(trans);
442 trans = NULL;
443 goto cleanup_and_out;
444 }
437 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 445 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
438 446
439 /* lets try to make an inline extent */ 447 /* lets try to make an inline extent */
@@ -450,11 +458,11 @@ cont:
450 total_compressed, 458 total_compressed,
451 compress_type, pages); 459 compress_type, pages);
452 } 460 }
453 if (ret == 0) { 461 if (ret <= 0) {
454 /* 462 /*
455 * inline extent creation worked, we don't need 463 * inline extent creation worked or returned error,
456 * to create any more async work items. Unlock 464 * we don't need to create any more async work items.
457 * and free up our temp pages. 465 * Unlock and free up our temp pages.
458 */ 466 */
459 extent_clear_unlock_delalloc(inode, 467 extent_clear_unlock_delalloc(inode,
460 &BTRFS_I(inode)->io_tree, 468 &BTRFS_I(inode)->io_tree,
@@ -547,7 +555,7 @@ cleanup_and_bail_uncompressed:
547 } 555 }
548 556
549out: 557out:
550 return 0; 558 return ret;
551 559
552free_pages_out: 560free_pages_out:
553 for (i = 0; i < nr_pages_ret; i++) { 561 for (i = 0; i < nr_pages_ret; i++) {
@@ -557,6 +565,20 @@ free_pages_out:
557 kfree(pages); 565 kfree(pages);
558 566
559 goto out; 567 goto out;
568
569cleanup_and_out:
570 extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,
571 start, end, NULL,
572 EXTENT_CLEAR_UNLOCK_PAGE |
573 EXTENT_CLEAR_DIRTY |
574 EXTENT_CLEAR_DELALLOC |
575 EXTENT_SET_WRITEBACK |
576 EXTENT_END_WRITEBACK);
577 if (!trans || IS_ERR(trans))
578 btrfs_error(root->fs_info, ret, "Failed to join transaction");
579 else
580 btrfs_abort_transaction(trans, root, ret);
581 goto free_pages_out;
560} 582}
561 583
562/* 584/*
@@ -597,7 +619,7 @@ retry:
597 619
598 lock_extent(io_tree, async_extent->start, 620 lock_extent(io_tree, async_extent->start,
599 async_extent->start + 621 async_extent->start +
600 async_extent->ram_size - 1, GFP_NOFS); 622 async_extent->ram_size - 1);
601 623
602 /* allocate blocks */ 624 /* allocate blocks */
603 ret = cow_file_range(inode, async_cow->locked_page, 625 ret = cow_file_range(inode, async_cow->locked_page,
@@ -606,6 +628,8 @@ retry:
606 async_extent->ram_size - 1, 628 async_extent->ram_size - 1,
607 &page_started, &nr_written, 0); 629 &page_started, &nr_written, 0);
608 630
631 /* JDM XXX */
632
609 /* 633 /*
610 * if page_started, cow_file_range inserted an 634 * if page_started, cow_file_range inserted an
611 * inline extent and took care of all the unlocking 635 * inline extent and took care of all the unlocking
@@ -625,18 +649,21 @@ retry:
625 } 649 }
626 650
627 lock_extent(io_tree, async_extent->start, 651 lock_extent(io_tree, async_extent->start,
628 async_extent->start + async_extent->ram_size - 1, 652 async_extent->start + async_extent->ram_size - 1);
629 GFP_NOFS);
630 653
631 trans = btrfs_join_transaction(root); 654 trans = btrfs_join_transaction(root);
632 BUG_ON(IS_ERR(trans)); 655 if (IS_ERR(trans)) {
633 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 656 ret = PTR_ERR(trans);
634 ret = btrfs_reserve_extent(trans, root, 657 } else {
658 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
659 ret = btrfs_reserve_extent(trans, root,
635 async_extent->compressed_size, 660 async_extent->compressed_size,
636 async_extent->compressed_size, 661 async_extent->compressed_size,
637 0, alloc_hint, 662 0, alloc_hint, &ins, 1);
638 (u64)-1, &ins, 1); 663 if (ret)
639 btrfs_end_transaction(trans, root); 664 btrfs_abort_transaction(trans, root, ret);
665 btrfs_end_transaction(trans, root);
666 }
640 667
641 if (ret) { 668 if (ret) {
642 int i; 669 int i;
@@ -649,8 +676,10 @@ retry:
649 async_extent->pages = NULL; 676 async_extent->pages = NULL;
650 unlock_extent(io_tree, async_extent->start, 677 unlock_extent(io_tree, async_extent->start,
651 async_extent->start + 678 async_extent->start +
652 async_extent->ram_size - 1, GFP_NOFS); 679 async_extent->ram_size - 1);
653 goto retry; 680 if (ret == -ENOSPC)
681 goto retry;
682 goto out_free; /* JDM: Requeue? */
654 } 683 }
655 684
656 /* 685 /*
@@ -662,7 +691,7 @@ retry:
662 async_extent->ram_size - 1, 0); 691 async_extent->ram_size - 1, 0);
663 692
664 em = alloc_extent_map(); 693 em = alloc_extent_map();
665 BUG_ON(!em); 694 BUG_ON(!em); /* -ENOMEM */
666 em->start = async_extent->start; 695 em->start = async_extent->start;
667 em->len = async_extent->ram_size; 696 em->len = async_extent->ram_size;
668 em->orig_start = em->start; 697 em->orig_start = em->start;
@@ -694,7 +723,7 @@ retry:
694 ins.offset, 723 ins.offset,
695 BTRFS_ORDERED_COMPRESSED, 724 BTRFS_ORDERED_COMPRESSED,
696 async_extent->compress_type); 725 async_extent->compress_type);
697 BUG_ON(ret); 726 BUG_ON(ret); /* -ENOMEM */
698 727
699 /* 728 /*
700 * clear dirty, set writeback and unlock the pages. 729 * clear dirty, set writeback and unlock the pages.
@@ -716,13 +745,17 @@ retry:
716 ins.offset, async_extent->pages, 745 ins.offset, async_extent->pages,
717 async_extent->nr_pages); 746 async_extent->nr_pages);
718 747
719 BUG_ON(ret); 748 BUG_ON(ret); /* -ENOMEM */
720 alloc_hint = ins.objectid + ins.offset; 749 alloc_hint = ins.objectid + ins.offset;
721 kfree(async_extent); 750 kfree(async_extent);
722 cond_resched(); 751 cond_resched();
723 } 752 }
724 753 ret = 0;
725 return 0; 754out:
755 return ret;
756out_free:
757 kfree(async_extent);
758 goto out;
726} 759}
727 760
728static u64 get_extent_allocation_hint(struct inode *inode, u64 start, 761static u64 get_extent_allocation_hint(struct inode *inode, u64 start,
@@ -791,7 +824,18 @@ static noinline int cow_file_range(struct inode *inode,
791 824
792 BUG_ON(btrfs_is_free_space_inode(root, inode)); 825 BUG_ON(btrfs_is_free_space_inode(root, inode));
793 trans = btrfs_join_transaction(root); 826 trans = btrfs_join_transaction(root);
794 BUG_ON(IS_ERR(trans)); 827 if (IS_ERR(trans)) {
828 extent_clear_unlock_delalloc(inode,
829 &BTRFS_I(inode)->io_tree,
830 start, end, NULL,
831 EXTENT_CLEAR_UNLOCK_PAGE |
832 EXTENT_CLEAR_UNLOCK |
833 EXTENT_CLEAR_DELALLOC |
834 EXTENT_CLEAR_DIRTY |
835 EXTENT_SET_WRITEBACK |
836 EXTENT_END_WRITEBACK);
837 return PTR_ERR(trans);
838 }
795 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 839 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
796 840
797 num_bytes = (end - start + blocksize) & ~(blocksize - 1); 841 num_bytes = (end - start + blocksize) & ~(blocksize - 1);
@@ -800,7 +844,8 @@ static noinline int cow_file_range(struct inode *inode,
800 ret = 0; 844 ret = 0;
801 845
802 /* if this is a small write inside eof, kick off defrag */ 846 /* if this is a small write inside eof, kick off defrag */
803 if (end <= BTRFS_I(inode)->disk_i_size && num_bytes < 64 * 1024) 847 if (num_bytes < 64 * 1024 &&
848 (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size))
804 btrfs_add_inode_defrag(trans, inode); 849 btrfs_add_inode_defrag(trans, inode);
805 850
806 if (start == 0) { 851 if (start == 0) {
@@ -821,8 +866,10 @@ static noinline int cow_file_range(struct inode *inode,
821 *nr_written = *nr_written + 866 *nr_written = *nr_written +
822 (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE; 867 (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE;
823 *page_started = 1; 868 *page_started = 1;
824 ret = 0;
825 goto out; 869 goto out;
870 } else if (ret < 0) {
871 btrfs_abort_transaction(trans, root, ret);
872 goto out_unlock;
826 } 873 }
827 } 874 }
828 875
@@ -838,11 +885,14 @@ static noinline int cow_file_range(struct inode *inode,
838 cur_alloc_size = disk_num_bytes; 885 cur_alloc_size = disk_num_bytes;
839 ret = btrfs_reserve_extent(trans, root, cur_alloc_size, 886 ret = btrfs_reserve_extent(trans, root, cur_alloc_size,
840 root->sectorsize, 0, alloc_hint, 887 root->sectorsize, 0, alloc_hint,
841 (u64)-1, &ins, 1); 888 &ins, 1);
842 BUG_ON(ret); 889 if (ret < 0) {
890 btrfs_abort_transaction(trans, root, ret);
891 goto out_unlock;
892 }
843 893
844 em = alloc_extent_map(); 894 em = alloc_extent_map();
845 BUG_ON(!em); 895 BUG_ON(!em); /* -ENOMEM */
846 em->start = start; 896 em->start = start;
847 em->orig_start = em->start; 897 em->orig_start = em->start;
848 ram_size = ins.offset; 898 ram_size = ins.offset;
@@ -868,13 +918,16 @@ static noinline int cow_file_range(struct inode *inode,
868 cur_alloc_size = ins.offset; 918 cur_alloc_size = ins.offset;
869 ret = btrfs_add_ordered_extent(inode, start, ins.objectid, 919 ret = btrfs_add_ordered_extent(inode, start, ins.objectid,
870 ram_size, cur_alloc_size, 0); 920 ram_size, cur_alloc_size, 0);
871 BUG_ON(ret); 921 BUG_ON(ret); /* -ENOMEM */
872 922
873 if (root->root_key.objectid == 923 if (root->root_key.objectid ==
874 BTRFS_DATA_RELOC_TREE_OBJECTID) { 924 BTRFS_DATA_RELOC_TREE_OBJECTID) {
875 ret = btrfs_reloc_clone_csums(inode, start, 925 ret = btrfs_reloc_clone_csums(inode, start,
876 cur_alloc_size); 926 cur_alloc_size);
877 BUG_ON(ret); 927 if (ret) {
928 btrfs_abort_transaction(trans, root, ret);
929 goto out_unlock;
930 }
878 } 931 }
879 932
880 if (disk_num_bytes < cur_alloc_size) 933 if (disk_num_bytes < cur_alloc_size)
@@ -899,11 +952,23 @@ static noinline int cow_file_range(struct inode *inode,
899 alloc_hint = ins.objectid + ins.offset; 952 alloc_hint = ins.objectid + ins.offset;
900 start += cur_alloc_size; 953 start += cur_alloc_size;
901 } 954 }
902out:
903 ret = 0; 955 ret = 0;
956out:
904 btrfs_end_transaction(trans, root); 957 btrfs_end_transaction(trans, root);
905 958
906 return ret; 959 return ret;
960out_unlock:
961 extent_clear_unlock_delalloc(inode,
962 &BTRFS_I(inode)->io_tree,
963 start, end, NULL,
964 EXTENT_CLEAR_UNLOCK_PAGE |
965 EXTENT_CLEAR_UNLOCK |
966 EXTENT_CLEAR_DELALLOC |
967 EXTENT_CLEAR_DIRTY |
968 EXTENT_SET_WRITEBACK |
969 EXTENT_END_WRITEBACK);
970
971 goto out;
907} 972}
908 973
909/* 974/*
@@ -969,7 +1034,7 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
969 1, 0, NULL, GFP_NOFS); 1034 1, 0, NULL, GFP_NOFS);
970 while (start < end) { 1035 while (start < end) {
971 async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS); 1036 async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS);
972 BUG_ON(!async_cow); 1037 BUG_ON(!async_cow); /* -ENOMEM */
973 async_cow->inode = inode; 1038 async_cow->inode = inode;
974 async_cow->root = root; 1039 async_cow->root = root;
975 async_cow->locked_page = locked_page; 1040 async_cow->locked_page = locked_page;
@@ -1060,7 +1125,7 @@ static noinline int run_delalloc_nocow(struct inode *inode,
1060 u64 disk_bytenr; 1125 u64 disk_bytenr;
1061 u64 num_bytes; 1126 u64 num_bytes;
1062 int extent_type; 1127 int extent_type;
1063 int ret; 1128 int ret, err;
1064 int type; 1129 int type;
1065 int nocow; 1130 int nocow;
1066 int check_prev = 1; 1131 int check_prev = 1;
@@ -1078,7 +1143,11 @@ static noinline int run_delalloc_nocow(struct inode *inode,
1078 else 1143 else
1079 trans = btrfs_join_transaction(root); 1144 trans = btrfs_join_transaction(root);
1080 1145
1081 BUG_ON(IS_ERR(trans)); 1146 if (IS_ERR(trans)) {
1147 btrfs_free_path(path);
1148 return PTR_ERR(trans);
1149 }
1150
1082 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 1151 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
1083 1152
1084 cow_start = (u64)-1; 1153 cow_start = (u64)-1;
@@ -1086,7 +1155,10 @@ static noinline int run_delalloc_nocow(struct inode *inode,
1086 while (1) { 1155 while (1) {
1087 ret = btrfs_lookup_file_extent(trans, root, path, ino, 1156 ret = btrfs_lookup_file_extent(trans, root, path, ino,
1088 cur_offset, 0); 1157 cur_offset, 0);
1089 BUG_ON(ret < 0); 1158 if (ret < 0) {
1159 btrfs_abort_transaction(trans, root, ret);
1160 goto error;
1161 }
1090 if (ret > 0 && path->slots[0] > 0 && check_prev) { 1162 if (ret > 0 && path->slots[0] > 0 && check_prev) {
1091 leaf = path->nodes[0]; 1163 leaf = path->nodes[0];
1092 btrfs_item_key_to_cpu(leaf, &found_key, 1164 btrfs_item_key_to_cpu(leaf, &found_key,
@@ -1100,8 +1172,10 @@ next_slot:
1100 leaf = path->nodes[0]; 1172 leaf = path->nodes[0];
1101 if (path->slots[0] >= btrfs_header_nritems(leaf)) { 1173 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
1102 ret = btrfs_next_leaf(root, path); 1174 ret = btrfs_next_leaf(root, path);
1103 if (ret < 0) 1175 if (ret < 0) {
1104 BUG_ON(1); 1176 btrfs_abort_transaction(trans, root, ret);
1177 goto error;
1178 }
1105 if (ret > 0) 1179 if (ret > 0)
1106 break; 1180 break;
1107 leaf = path->nodes[0]; 1181 leaf = path->nodes[0];
@@ -1189,7 +1263,10 @@ out_check:
1189 ret = cow_file_range(inode, locked_page, cow_start, 1263 ret = cow_file_range(inode, locked_page, cow_start,
1190 found_key.offset - 1, page_started, 1264 found_key.offset - 1, page_started,
1191 nr_written, 1); 1265 nr_written, 1);
1192 BUG_ON(ret); 1266 if (ret) {
1267 btrfs_abort_transaction(trans, root, ret);
1268 goto error;
1269 }
1193 cow_start = (u64)-1; 1270 cow_start = (u64)-1;
1194 } 1271 }
1195 1272
@@ -1198,7 +1275,7 @@ out_check:
1198 struct extent_map_tree *em_tree; 1275 struct extent_map_tree *em_tree;
1199 em_tree = &BTRFS_I(inode)->extent_tree; 1276 em_tree = &BTRFS_I(inode)->extent_tree;
1200 em = alloc_extent_map(); 1277 em = alloc_extent_map();
1201 BUG_ON(!em); 1278 BUG_ON(!em); /* -ENOMEM */
1202 em->start = cur_offset; 1279 em->start = cur_offset;
1203 em->orig_start = em->start; 1280 em->orig_start = em->start;
1204 em->len = num_bytes; 1281 em->len = num_bytes;
@@ -1224,13 +1301,16 @@ out_check:
1224 1301
1225 ret = btrfs_add_ordered_extent(inode, cur_offset, disk_bytenr, 1302 ret = btrfs_add_ordered_extent(inode, cur_offset, disk_bytenr,
1226 num_bytes, num_bytes, type); 1303 num_bytes, num_bytes, type);
1227 BUG_ON(ret); 1304 BUG_ON(ret); /* -ENOMEM */
1228 1305
1229 if (root->root_key.objectid == 1306 if (root->root_key.objectid ==
1230 BTRFS_DATA_RELOC_TREE_OBJECTID) { 1307 BTRFS_DATA_RELOC_TREE_OBJECTID) {
1231 ret = btrfs_reloc_clone_csums(inode, cur_offset, 1308 ret = btrfs_reloc_clone_csums(inode, cur_offset,
1232 num_bytes); 1309 num_bytes);
1233 BUG_ON(ret); 1310 if (ret) {
1311 btrfs_abort_transaction(trans, root, ret);
1312 goto error;
1313 }
1234 } 1314 }
1235 1315
1236 extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, 1316 extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,
@@ -1249,18 +1329,23 @@ out_check:
1249 if (cow_start != (u64)-1) { 1329 if (cow_start != (u64)-1) {
1250 ret = cow_file_range(inode, locked_page, cow_start, end, 1330 ret = cow_file_range(inode, locked_page, cow_start, end,
1251 page_started, nr_written, 1); 1331 page_started, nr_written, 1);
1252 BUG_ON(ret); 1332 if (ret) {
1333 btrfs_abort_transaction(trans, root, ret);
1334 goto error;
1335 }
1253 } 1336 }
1254 1337
1338error:
1255 if (nolock) { 1339 if (nolock) {
1256 ret = btrfs_end_transaction_nolock(trans, root); 1340 err = btrfs_end_transaction_nolock(trans, root);
1257 BUG_ON(ret);
1258 } else { 1341 } else {
1259 ret = btrfs_end_transaction(trans, root); 1342 err = btrfs_end_transaction(trans, root);
1260 BUG_ON(ret);
1261 } 1343 }
1344 if (!ret)
1345 ret = err;
1346
1262 btrfs_free_path(path); 1347 btrfs_free_path(path);
1263 return 0; 1348 return ret;
1264} 1349}
1265 1350
1266/* 1351/*
@@ -1425,10 +1510,11 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
1425 map_length = length; 1510 map_length = length;
1426 ret = btrfs_map_block(map_tree, READ, logical, 1511 ret = btrfs_map_block(map_tree, READ, logical,
1427 &map_length, NULL, 0); 1512 &map_length, NULL, 0);
1428 1513 /* Will always return 0 or 1 with map_multi == NULL */
1514 BUG_ON(ret < 0);
1429 if (map_length < length + size) 1515 if (map_length < length + size)
1430 return 1; 1516 return 1;
1431 return ret; 1517 return 0;
1432} 1518}
1433 1519
1434/* 1520/*
@@ -1448,7 +1534,7 @@ static int __btrfs_submit_bio_start(struct inode *inode, int rw,
1448 int ret = 0; 1534 int ret = 0;
1449 1535
1450 ret = btrfs_csum_one_bio(root, inode, bio, 0, 0); 1536 ret = btrfs_csum_one_bio(root, inode, bio, 0, 0);
1451 BUG_ON(ret); 1537 BUG_ON(ret); /* -ENOMEM */
1452 return 0; 1538 return 0;
1453} 1539}
1454 1540
@@ -1479,14 +1565,16 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
1479 struct btrfs_root *root = BTRFS_I(inode)->root; 1565 struct btrfs_root *root = BTRFS_I(inode)->root;
1480 int ret = 0; 1566 int ret = 0;
1481 int skip_sum; 1567 int skip_sum;
1568 int metadata = 0;
1482 1569
1483 skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; 1570 skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
1484 1571
1485 if (btrfs_is_free_space_inode(root, inode)) 1572 if (btrfs_is_free_space_inode(root, inode))
1486 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 2); 1573 metadata = 2;
1487 else 1574
1488 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); 1575 ret = btrfs_bio_wq_end_io(root->fs_info, bio, metadata);
1489 BUG_ON(ret); 1576 if (ret)
1577 return ret;
1490 1578
1491 if (!(rw & REQ_WRITE)) { 1579 if (!(rw & REQ_WRITE)) {
1492 if (bio_flags & EXTENT_BIO_COMPRESSED) { 1580 if (bio_flags & EXTENT_BIO_COMPRESSED) {
@@ -1571,7 +1659,7 @@ again:
1571 page_end = page_offset(page) + PAGE_CACHE_SIZE - 1; 1659 page_end = page_offset(page) + PAGE_CACHE_SIZE - 1;
1572 1660
1573 lock_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end, 0, 1661 lock_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end, 0,
1574 &cached_state, GFP_NOFS); 1662 &cached_state);
1575 1663
1576 /* already ordered? We're done */ 1664 /* already ordered? We're done */
1577 if (PagePrivate2(page)) 1665 if (PagePrivate2(page))
@@ -1675,13 +1763,15 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
1675 */ 1763 */
1676 ret = btrfs_drop_extents(trans, inode, file_pos, file_pos + num_bytes, 1764 ret = btrfs_drop_extents(trans, inode, file_pos, file_pos + num_bytes,
1677 &hint, 0); 1765 &hint, 0);
1678 BUG_ON(ret); 1766 if (ret)
1767 goto out;
1679 1768
1680 ins.objectid = btrfs_ino(inode); 1769 ins.objectid = btrfs_ino(inode);
1681 ins.offset = file_pos; 1770 ins.offset = file_pos;
1682 ins.type = BTRFS_EXTENT_DATA_KEY; 1771 ins.type = BTRFS_EXTENT_DATA_KEY;
1683 ret = btrfs_insert_empty_item(trans, root, path, &ins, sizeof(*fi)); 1772 ret = btrfs_insert_empty_item(trans, root, path, &ins, sizeof(*fi));
1684 BUG_ON(ret); 1773 if (ret)
1774 goto out;
1685 leaf = path->nodes[0]; 1775 leaf = path->nodes[0];
1686 fi = btrfs_item_ptr(leaf, path->slots[0], 1776 fi = btrfs_item_ptr(leaf, path->slots[0],
1687 struct btrfs_file_extent_item); 1777 struct btrfs_file_extent_item);
@@ -1709,10 +1799,10 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
1709 ret = btrfs_alloc_reserved_file_extent(trans, root, 1799 ret = btrfs_alloc_reserved_file_extent(trans, root,
1710 root->root_key.objectid, 1800 root->root_key.objectid,
1711 btrfs_ino(inode), file_pos, &ins); 1801 btrfs_ino(inode), file_pos, &ins);
1712 BUG_ON(ret); 1802out:
1713 btrfs_free_path(path); 1803 btrfs_free_path(path);
1714 1804
1715 return 0; 1805 return ret;
1716} 1806}
1717 1807
1718/* 1808/*
@@ -1740,35 +1830,41 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1740 end - start + 1); 1830 end - start + 1);
1741 if (!ret) 1831 if (!ret)
1742 return 0; 1832 return 0;
1743 BUG_ON(!ordered_extent); 1833 BUG_ON(!ordered_extent); /* Logic error */
1744 1834
1745 nolock = btrfs_is_free_space_inode(root, inode); 1835 nolock = btrfs_is_free_space_inode(root, inode);
1746 1836
1747 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) { 1837 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
1748 BUG_ON(!list_empty(&ordered_extent->list)); 1838 BUG_ON(!list_empty(&ordered_extent->list)); /* Logic error */
1749 ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); 1839 ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
1750 if (!ret) { 1840 if (!ret) {
1751 if (nolock) 1841 if (nolock)
1752 trans = btrfs_join_transaction_nolock(root); 1842 trans = btrfs_join_transaction_nolock(root);
1753 else 1843 else
1754 trans = btrfs_join_transaction(root); 1844 trans = btrfs_join_transaction(root);
1755 BUG_ON(IS_ERR(trans)); 1845 if (IS_ERR(trans))
1846 return PTR_ERR(trans);
1756 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 1847 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
1757 ret = btrfs_update_inode_fallback(trans, root, inode); 1848 ret = btrfs_update_inode_fallback(trans, root, inode);
1758 BUG_ON(ret); 1849 if (ret) /* -ENOMEM or corruption */
1850 btrfs_abort_transaction(trans, root, ret);
1759 } 1851 }
1760 goto out; 1852 goto out;
1761 } 1853 }
1762 1854
1763 lock_extent_bits(io_tree, ordered_extent->file_offset, 1855 lock_extent_bits(io_tree, ordered_extent->file_offset,
1764 ordered_extent->file_offset + ordered_extent->len - 1, 1856 ordered_extent->file_offset + ordered_extent->len - 1,
1765 0, &cached_state, GFP_NOFS); 1857 0, &cached_state);
1766 1858
1767 if (nolock) 1859 if (nolock)
1768 trans = btrfs_join_transaction_nolock(root); 1860 trans = btrfs_join_transaction_nolock(root);
1769 else 1861 else
1770 trans = btrfs_join_transaction(root); 1862 trans = btrfs_join_transaction(root);
1771 BUG_ON(IS_ERR(trans)); 1863 if (IS_ERR(trans)) {
1864 ret = PTR_ERR(trans);
1865 trans = NULL;
1866 goto out_unlock;
1867 }
1772 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 1868 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
1773 1869
1774 if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) 1870 if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags))
@@ -1779,7 +1875,6 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1779 ordered_extent->file_offset, 1875 ordered_extent->file_offset,
1780 ordered_extent->file_offset + 1876 ordered_extent->file_offset +
1781 ordered_extent->len); 1877 ordered_extent->len);
1782 BUG_ON(ret);
1783 } else { 1878 } else {
1784 BUG_ON(root == root->fs_info->tree_root); 1879 BUG_ON(root == root->fs_info->tree_root);
1785 ret = insert_reserved_file_extent(trans, inode, 1880 ret = insert_reserved_file_extent(trans, inode,
@@ -1793,11 +1888,14 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1793 unpin_extent_cache(&BTRFS_I(inode)->extent_tree, 1888 unpin_extent_cache(&BTRFS_I(inode)->extent_tree,
1794 ordered_extent->file_offset, 1889 ordered_extent->file_offset,
1795 ordered_extent->len); 1890 ordered_extent->len);
1796 BUG_ON(ret);
1797 } 1891 }
1798 unlock_extent_cached(io_tree, ordered_extent->file_offset, 1892 unlock_extent_cached(io_tree, ordered_extent->file_offset,
1799 ordered_extent->file_offset + 1893 ordered_extent->file_offset +
1800 ordered_extent->len - 1, &cached_state, GFP_NOFS); 1894 ordered_extent->len - 1, &cached_state, GFP_NOFS);
1895 if (ret < 0) {
1896 btrfs_abort_transaction(trans, root, ret);
1897 goto out;
1898 }
1801 1899
1802 add_pending_csums(trans, inode, ordered_extent->file_offset, 1900 add_pending_csums(trans, inode, ordered_extent->file_offset,
1803 &ordered_extent->list); 1901 &ordered_extent->list);
@@ -1805,7 +1903,10 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1805 ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); 1903 ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
1806 if (!ret || !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) { 1904 if (!ret || !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) {
1807 ret = btrfs_update_inode_fallback(trans, root, inode); 1905 ret = btrfs_update_inode_fallback(trans, root, inode);
1808 BUG_ON(ret); 1906 if (ret) { /* -ENOMEM or corruption */
1907 btrfs_abort_transaction(trans, root, ret);
1908 goto out;
1909 }
1809 } 1910 }
1810 ret = 0; 1911 ret = 0;
1811out: 1912out:
@@ -1824,6 +1925,11 @@ out:
1824 btrfs_put_ordered_extent(ordered_extent); 1925 btrfs_put_ordered_extent(ordered_extent);
1825 1926
1826 return 0; 1927 return 0;
1928out_unlock:
1929 unlock_extent_cached(io_tree, ordered_extent->file_offset,
1930 ordered_extent->file_offset +
1931 ordered_extent->len - 1, &cached_state, GFP_NOFS);
1932 goto out;
1827} 1933}
1828 1934
1829static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, 1935static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
@@ -1905,6 +2011,8 @@ struct delayed_iput {
1905 struct inode *inode; 2011 struct inode *inode;
1906}; 2012};
1907 2013
2014/* JDM: If this is fs-wide, why can't we add a pointer to
2015 * btrfs_inode instead and avoid the allocation? */
1908void btrfs_add_delayed_iput(struct inode *inode) 2016void btrfs_add_delayed_iput(struct inode *inode)
1909{ 2017{
1910 struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; 2018 struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
@@ -2051,20 +2159,27 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
2051 /* grab metadata reservation from transaction handle */ 2159 /* grab metadata reservation from transaction handle */
2052 if (reserve) { 2160 if (reserve) {
2053 ret = btrfs_orphan_reserve_metadata(trans, inode); 2161 ret = btrfs_orphan_reserve_metadata(trans, inode);
2054 BUG_ON(ret); 2162 BUG_ON(ret); /* -ENOSPC in reservation; Logic error? JDM */
2055 } 2163 }
2056 2164
2057 /* insert an orphan item to track this unlinked/truncated file */ 2165 /* insert an orphan item to track this unlinked/truncated file */
2058 if (insert >= 1) { 2166 if (insert >= 1) {
2059 ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode)); 2167 ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode));
2060 BUG_ON(ret && ret != -EEXIST); 2168 if (ret && ret != -EEXIST) {
2169 btrfs_abort_transaction(trans, root, ret);
2170 return ret;
2171 }
2172 ret = 0;
2061 } 2173 }
2062 2174
2063 /* insert an orphan item to track subvolume contains orphan files */ 2175 /* insert an orphan item to track subvolume contains orphan files */
2064 if (insert >= 2) { 2176 if (insert >= 2) {
2065 ret = btrfs_insert_orphan_item(trans, root->fs_info->tree_root, 2177 ret = btrfs_insert_orphan_item(trans, root->fs_info->tree_root,
2066 root->root_key.objectid); 2178 root->root_key.objectid);
2067 BUG_ON(ret); 2179 if (ret && ret != -EEXIST) {
2180 btrfs_abort_transaction(trans, root, ret);
2181 return ret;
2182 }
2068 } 2183 }
2069 return 0; 2184 return 0;
2070} 2185}
@@ -2094,7 +2209,7 @@ int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode)
2094 2209
2095 if (trans && delete_item) { 2210 if (trans && delete_item) {
2096 ret = btrfs_del_orphan_item(trans, root, btrfs_ino(inode)); 2211 ret = btrfs_del_orphan_item(trans, root, btrfs_ino(inode));
2097 BUG_ON(ret); 2212 BUG_ON(ret); /* -ENOMEM or corruption (JDM: Recheck) */
2098 } 2213 }
2099 2214
2100 if (release_rsv) 2215 if (release_rsv)
@@ -2228,7 +2343,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
2228 } 2343 }
2229 ret = btrfs_del_orphan_item(trans, root, 2344 ret = btrfs_del_orphan_item(trans, root,
2230 found_key.objectid); 2345 found_key.objectid);
2231 BUG_ON(ret); 2346 BUG_ON(ret); /* -ENOMEM or corruption (JDM: Recheck) */
2232 btrfs_end_transaction(trans, root); 2347 btrfs_end_transaction(trans, root);
2233 continue; 2348 continue;
2234 } 2349 }
@@ -2610,16 +2725,22 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
2610 printk(KERN_INFO "btrfs failed to delete reference to %.*s, " 2725 printk(KERN_INFO "btrfs failed to delete reference to %.*s, "
2611 "inode %llu parent %llu\n", name_len, name, 2726 "inode %llu parent %llu\n", name_len, name,
2612 (unsigned long long)ino, (unsigned long long)dir_ino); 2727 (unsigned long long)ino, (unsigned long long)dir_ino);
2728 btrfs_abort_transaction(trans, root, ret);
2613 goto err; 2729 goto err;
2614 } 2730 }
2615 2731
2616 ret = btrfs_delete_delayed_dir_index(trans, root, dir, index); 2732 ret = btrfs_delete_delayed_dir_index(trans, root, dir, index);
2617 if (ret) 2733 if (ret) {
2734 btrfs_abort_transaction(trans, root, ret);
2618 goto err; 2735 goto err;
2736 }
2619 2737
2620 ret = btrfs_del_inode_ref_in_log(trans, root, name, name_len, 2738 ret = btrfs_del_inode_ref_in_log(trans, root, name, name_len,
2621 inode, dir_ino); 2739 inode, dir_ino);
2622 BUG_ON(ret != 0 && ret != -ENOENT); 2740 if (ret != 0 && ret != -ENOENT) {
2741 btrfs_abort_transaction(trans, root, ret);
2742 goto err;
2743 }
2623 2744
2624 ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len, 2745 ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len,
2625 dir, index); 2746 dir, index);
@@ -2777,7 +2898,7 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
2777 err = ret; 2898 err = ret;
2778 goto out; 2899 goto out;
2779 } 2900 }
2780 BUG_ON(ret == 0); 2901 BUG_ON(ret == 0); /* Corruption */
2781 if (check_path_shared(root, path)) 2902 if (check_path_shared(root, path))
2782 goto out; 2903 goto out;
2783 btrfs_release_path(path); 2904 btrfs_release_path(path);
@@ -2810,7 +2931,7 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
2810 err = PTR_ERR(ref); 2931 err = PTR_ERR(ref);
2811 goto out; 2932 goto out;
2812 } 2933 }
2813 BUG_ON(!ref); 2934 BUG_ON(!ref); /* Logic error */
2814 if (check_path_shared(root, path)) 2935 if (check_path_shared(root, path))
2815 goto out; 2936 goto out;
2816 index = btrfs_inode_ref_index(path->nodes[0], ref); 2937 index = btrfs_inode_ref_index(path->nodes[0], ref);
@@ -2917,23 +3038,42 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
2917 3038
2918 di = btrfs_lookup_dir_item(trans, root, path, dir_ino, 3039 di = btrfs_lookup_dir_item(trans, root, path, dir_ino,
2919 name, name_len, -1); 3040 name, name_len, -1);
2920 BUG_ON(IS_ERR_OR_NULL(di)); 3041 if (IS_ERR_OR_NULL(di)) {
3042 if (!di)
3043 ret = -ENOENT;
3044 else
3045 ret = PTR_ERR(di);
3046 goto out;
3047 }
2921 3048
2922 leaf = path->nodes[0]; 3049 leaf = path->nodes[0];
2923 btrfs_dir_item_key_to_cpu(leaf, di, &key); 3050 btrfs_dir_item_key_to_cpu(leaf, di, &key);
2924 WARN_ON(key.type != BTRFS_ROOT_ITEM_KEY || key.objectid != objectid); 3051 WARN_ON(key.type != BTRFS_ROOT_ITEM_KEY || key.objectid != objectid);
2925 ret = btrfs_delete_one_dir_name(trans, root, path, di); 3052 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2926 BUG_ON(ret); 3053 if (ret) {
3054 btrfs_abort_transaction(trans, root, ret);
3055 goto out;
3056 }
2927 btrfs_release_path(path); 3057 btrfs_release_path(path);
2928 3058
2929 ret = btrfs_del_root_ref(trans, root->fs_info->tree_root, 3059 ret = btrfs_del_root_ref(trans, root->fs_info->tree_root,
2930 objectid, root->root_key.objectid, 3060 objectid, root->root_key.objectid,
2931 dir_ino, &index, name, name_len); 3061 dir_ino, &index, name, name_len);
2932 if (ret < 0) { 3062 if (ret < 0) {
2933 BUG_ON(ret != -ENOENT); 3063 if (ret != -ENOENT) {
3064 btrfs_abort_transaction(trans, root, ret);
3065 goto out;
3066 }
2934 di = btrfs_search_dir_index_item(root, path, dir_ino, 3067 di = btrfs_search_dir_index_item(root, path, dir_ino,
2935 name, name_len); 3068 name, name_len);
2936 BUG_ON(IS_ERR_OR_NULL(di)); 3069 if (IS_ERR_OR_NULL(di)) {
3070 if (!di)
3071 ret = -ENOENT;
3072 else
3073 ret = PTR_ERR(di);
3074 btrfs_abort_transaction(trans, root, ret);
3075 goto out;
3076 }
2937 3077
2938 leaf = path->nodes[0]; 3078 leaf = path->nodes[0];
2939 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 3079 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
@@ -2943,15 +3083,19 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
2943 btrfs_release_path(path); 3083 btrfs_release_path(path);
2944 3084
2945 ret = btrfs_delete_delayed_dir_index(trans, root, dir, index); 3085 ret = btrfs_delete_delayed_dir_index(trans, root, dir, index);
2946 BUG_ON(ret); 3086 if (ret) {
3087 btrfs_abort_transaction(trans, root, ret);
3088 goto out;
3089 }
2947 3090
2948 btrfs_i_size_write(dir, dir->i_size - name_len * 2); 3091 btrfs_i_size_write(dir, dir->i_size - name_len * 2);
2949 dir->i_mtime = dir->i_ctime = CURRENT_TIME; 3092 dir->i_mtime = dir->i_ctime = CURRENT_TIME;
2950 ret = btrfs_update_inode(trans, root, dir); 3093 ret = btrfs_update_inode(trans, root, dir);
2951 BUG_ON(ret); 3094 if (ret)
2952 3095 btrfs_abort_transaction(trans, root, ret);
3096out:
2953 btrfs_free_path(path); 3097 btrfs_free_path(path);
2954 return 0; 3098 return ret;
2955} 3099}
2956 3100
2957static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) 3101static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
@@ -3161,8 +3305,8 @@ search_again:
3161 } 3305 }
3162 size = 3306 size =
3163 btrfs_file_extent_calc_inline_size(size); 3307 btrfs_file_extent_calc_inline_size(size);
3164 ret = btrfs_truncate_item(trans, root, path, 3308 btrfs_truncate_item(trans, root, path,
3165 size, 1); 3309 size, 1);
3166 } else if (root->ref_cows) { 3310 } else if (root->ref_cows) {
3167 inode_sub_bytes(inode, item_end + 1 - 3311 inode_sub_bytes(inode, item_end + 1 -
3168 found_key.offset); 3312 found_key.offset);
@@ -3210,7 +3354,11 @@ delete:
3210 ret = btrfs_del_items(trans, root, path, 3354 ret = btrfs_del_items(trans, root, path,
3211 pending_del_slot, 3355 pending_del_slot,
3212 pending_del_nr); 3356 pending_del_nr);
3213 BUG_ON(ret); 3357 if (ret) {
3358 btrfs_abort_transaction(trans,
3359 root, ret);
3360 goto error;
3361 }
3214 pending_del_nr = 0; 3362 pending_del_nr = 0;
3215 } 3363 }
3216 btrfs_release_path(path); 3364 btrfs_release_path(path);
@@ -3223,8 +3371,10 @@ out:
3223 if (pending_del_nr) { 3371 if (pending_del_nr) {
3224 ret = btrfs_del_items(trans, root, path, pending_del_slot, 3372 ret = btrfs_del_items(trans, root, path, pending_del_slot,
3225 pending_del_nr); 3373 pending_del_nr);
3226 BUG_ON(ret); 3374 if (ret)
3375 btrfs_abort_transaction(trans, root, ret);
3227 } 3376 }
3377error:
3228 btrfs_free_path(path); 3378 btrfs_free_path(path);
3229 return err; 3379 return err;
3230} 3380}
@@ -3282,8 +3432,7 @@ again:
3282 } 3432 }
3283 wait_on_page_writeback(page); 3433 wait_on_page_writeback(page);
3284 3434
3285 lock_extent_bits(io_tree, page_start, page_end, 0, &cached_state, 3435 lock_extent_bits(io_tree, page_start, page_end, 0, &cached_state);
3286 GFP_NOFS);
3287 set_page_extent_mapped(page); 3436 set_page_extent_mapped(page);
3288 3437
3289 ordered = btrfs_lookup_ordered_extent(inode, page_start); 3438 ordered = btrfs_lookup_ordered_extent(inode, page_start);
@@ -3359,7 +3508,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
3359 btrfs_wait_ordered_range(inode, hole_start, 3508 btrfs_wait_ordered_range(inode, hole_start,
3360 block_end - hole_start); 3509 block_end - hole_start);
3361 lock_extent_bits(io_tree, hole_start, block_end - 1, 0, 3510 lock_extent_bits(io_tree, hole_start, block_end - 1, 0,
3362 &cached_state, GFP_NOFS); 3511 &cached_state);
3363 ordered = btrfs_lookup_ordered_extent(inode, hole_start); 3512 ordered = btrfs_lookup_ordered_extent(inode, hole_start);
3364 if (!ordered) 3513 if (!ordered)
3365 break; 3514 break;
@@ -3372,7 +3521,10 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
3372 while (1) { 3521 while (1) {
3373 em = btrfs_get_extent(inode, NULL, 0, cur_offset, 3522 em = btrfs_get_extent(inode, NULL, 0, cur_offset,
3374 block_end - cur_offset, 0); 3523 block_end - cur_offset, 0);
3375 BUG_ON(IS_ERR_OR_NULL(em)); 3524 if (IS_ERR(em)) {
3525 err = PTR_ERR(em);
3526 break;
3527 }
3376 last_byte = min(extent_map_end(em), block_end); 3528 last_byte = min(extent_map_end(em), block_end);
3377 last_byte = (last_byte + mask) & ~mask; 3529 last_byte = (last_byte + mask) & ~mask;
3378 if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) { 3530 if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
@@ -3389,7 +3541,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
3389 cur_offset + hole_size, 3541 cur_offset + hole_size,
3390 &hint_byte, 1); 3542 &hint_byte, 1);
3391 if (err) { 3543 if (err) {
3392 btrfs_update_inode(trans, root, inode); 3544 btrfs_abort_transaction(trans, root, err);
3393 btrfs_end_transaction(trans, root); 3545 btrfs_end_transaction(trans, root);
3394 break; 3546 break;
3395 } 3547 }
@@ -3399,7 +3551,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
3399 0, hole_size, 0, hole_size, 3551 0, hole_size, 0, hole_size,
3400 0, 0, 0); 3552 0, 0, 0);
3401 if (err) { 3553 if (err) {
3402 btrfs_update_inode(trans, root, inode); 3554 btrfs_abort_transaction(trans, root, err);
3403 btrfs_end_transaction(trans, root); 3555 btrfs_end_transaction(trans, root);
3404 break; 3556 break;
3405 } 3557 }
@@ -3779,7 +3931,7 @@ static void inode_tree_del(struct inode *inode)
3779 } 3931 }
3780} 3932}
3781 3933
3782int btrfs_invalidate_inodes(struct btrfs_root *root) 3934void btrfs_invalidate_inodes(struct btrfs_root *root)
3783{ 3935{
3784 struct rb_node *node; 3936 struct rb_node *node;
3785 struct rb_node *prev; 3937 struct rb_node *prev;
@@ -3839,7 +3991,6 @@ again:
3839 node = rb_next(node); 3991 node = rb_next(node);
3840 } 3992 }
3841 spin_unlock(&root->inode_lock); 3993 spin_unlock(&root->inode_lock);
3842 return 0;
3843} 3994}
3844 3995
3845static int btrfs_init_locked_inode(struct inode *inode, void *p) 3996static int btrfs_init_locked_inode(struct inode *inode, void *p)
@@ -4581,18 +4732,26 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
4581 parent_ino, index); 4732 parent_ino, index);
4582 } 4733 }
4583 4734
4584 if (ret == 0) { 4735 /* Nothing to clean up yet */
4585 ret = btrfs_insert_dir_item(trans, root, name, name_len, 4736 if (ret)
4586 parent_inode, &key, 4737 return ret;
4587 btrfs_inode_type(inode), index);
4588 if (ret)
4589 goto fail_dir_item;
4590 4738
4591 btrfs_i_size_write(parent_inode, parent_inode->i_size + 4739 ret = btrfs_insert_dir_item(trans, root, name, name_len,
4592 name_len * 2); 4740 parent_inode, &key,
4593 parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME; 4741 btrfs_inode_type(inode), index);
4594 ret = btrfs_update_inode(trans, root, parent_inode); 4742 if (ret == -EEXIST)
4743 goto fail_dir_item;
4744 else if (ret) {
4745 btrfs_abort_transaction(trans, root, ret);
4746 return ret;
4595 } 4747 }
4748
4749 btrfs_i_size_write(parent_inode, parent_inode->i_size +
4750 name_len * 2);
4751 parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME;
4752 ret = btrfs_update_inode(trans, root, parent_inode);
4753 if (ret)
4754 btrfs_abort_transaction(trans, root, ret);
4596 return ret; 4755 return ret;
4597 4756
4598fail_dir_item: 4757fail_dir_item:
@@ -4806,7 +4965,8 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
4806 } else { 4965 } else {
4807 struct dentry *parent = dentry->d_parent; 4966 struct dentry *parent = dentry->d_parent;
4808 err = btrfs_update_inode(trans, root, inode); 4967 err = btrfs_update_inode(trans, root, inode);
4809 BUG_ON(err); 4968 if (err)
4969 goto fail;
4810 d_instantiate(dentry, inode); 4970 d_instantiate(dentry, inode);
4811 btrfs_log_new_name(trans, inode, NULL, parent); 4971 btrfs_log_new_name(trans, inode, NULL, parent);
4812 } 4972 }
@@ -5137,7 +5297,7 @@ again:
5137 ret = uncompress_inline(path, inode, page, 5297 ret = uncompress_inline(path, inode, page,
5138 pg_offset, 5298 pg_offset,
5139 extent_offset, item); 5299 extent_offset, item);
5140 BUG_ON(ret); 5300 BUG_ON(ret); /* -ENOMEM */
5141 } else { 5301 } else {
5142 map = kmap(page); 5302 map = kmap(page);
5143 read_extent_buffer(leaf, map + pg_offset, ptr, 5303 read_extent_buffer(leaf, map + pg_offset, ptr,
@@ -5252,6 +5412,7 @@ out:
5252 free_extent_map(em); 5412 free_extent_map(em);
5253 return ERR_PTR(err); 5413 return ERR_PTR(err);
5254 } 5414 }
5415 BUG_ON(!em); /* Error is always set */
5255 return em; 5416 return em;
5256} 5417}
5257 5418
@@ -5414,7 +5575,7 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
5414 5575
5415 alloc_hint = get_extent_allocation_hint(inode, start, len); 5576 alloc_hint = get_extent_allocation_hint(inode, start, len);
5416 ret = btrfs_reserve_extent(trans, root, len, root->sectorsize, 0, 5577 ret = btrfs_reserve_extent(trans, root, len, root->sectorsize, 0,
5417 alloc_hint, (u64)-1, &ins, 1); 5578 alloc_hint, &ins, 1);
5418 if (ret) { 5579 if (ret) {
5419 em = ERR_PTR(ret); 5580 em = ERR_PTR(ret);
5420 goto out; 5581 goto out;
@@ -5602,7 +5763,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
5602 free_extent_map(em); 5763 free_extent_map(em);
5603 /* DIO will do one hole at a time, so just unlock a sector */ 5764 /* DIO will do one hole at a time, so just unlock a sector */
5604 unlock_extent(&BTRFS_I(inode)->io_tree, start, 5765 unlock_extent(&BTRFS_I(inode)->io_tree, start,
5605 start + root->sectorsize - 1, GFP_NOFS); 5766 start + root->sectorsize - 1);
5606 return 0; 5767 return 0;
5607 } 5768 }
5608 5769
@@ -5743,7 +5904,7 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
5743 } while (bvec <= bvec_end); 5904 } while (bvec <= bvec_end);
5744 5905
5745 unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset, 5906 unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset,
5746 dip->logical_offset + dip->bytes - 1, GFP_NOFS); 5907 dip->logical_offset + dip->bytes - 1);
5747 bio->bi_private = dip->private; 5908 bio->bi_private = dip->private;
5748 5909
5749 kfree(dip->csums); 5910 kfree(dip->csums);
@@ -5794,7 +5955,7 @@ again:
5794 5955
5795 lock_extent_bits(&BTRFS_I(inode)->io_tree, ordered->file_offset, 5956 lock_extent_bits(&BTRFS_I(inode)->io_tree, ordered->file_offset,
5796 ordered->file_offset + ordered->len - 1, 0, 5957 ordered->file_offset + ordered->len - 1, 0,
5797 &cached_state, GFP_NOFS); 5958 &cached_state);
5798 5959
5799 if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags)) { 5960 if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags)) {
5800 ret = btrfs_mark_extent_written(trans, inode, 5961 ret = btrfs_mark_extent_written(trans, inode,
@@ -5868,7 +6029,7 @@ static int __btrfs_submit_bio_start_direct_io(struct inode *inode, int rw,
5868 int ret; 6029 int ret;
5869 struct btrfs_root *root = BTRFS_I(inode)->root; 6030 struct btrfs_root *root = BTRFS_I(inode)->root;
5870 ret = btrfs_csum_one_bio(root, inode, bio, offset, 1); 6031 ret = btrfs_csum_one_bio(root, inode, bio, offset, 1);
5871 BUG_ON(ret); 6032 BUG_ON(ret); /* -ENOMEM */
5872 return 0; 6033 return 0;
5873} 6034}
5874 6035
@@ -6209,7 +6370,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
6209 6370
6210 while (1) { 6371 while (1) {
6211 lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, 6372 lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
6212 0, &cached_state, GFP_NOFS); 6373 0, &cached_state);
6213 /* 6374 /*
6214 * We're concerned with the entire range that we're going to be 6375 * We're concerned with the entire range that we're going to be
6215 * doing DIO to, so we need to make sure theres no ordered 6376 * doing DIO to, so we need to make sure theres no ordered
@@ -6233,7 +6394,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
6233 if (writing) { 6394 if (writing) {
6234 write_bits = EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING; 6395 write_bits = EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING;
6235 ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, 6396 ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
6236 EXTENT_DELALLOC, 0, NULL, &cached_state, 6397 EXTENT_DELALLOC, NULL, &cached_state,
6237 GFP_NOFS); 6398 GFP_NOFS);
6238 if (ret) { 6399 if (ret) {
6239 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, 6400 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
@@ -6363,8 +6524,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
6363 btrfs_releasepage(page, GFP_NOFS); 6524 btrfs_releasepage(page, GFP_NOFS);
6364 return; 6525 return;
6365 } 6526 }
6366 lock_extent_bits(tree, page_start, page_end, 0, &cached_state, 6527 lock_extent_bits(tree, page_start, page_end, 0, &cached_state);
6367 GFP_NOFS);
6368 ordered = btrfs_lookup_ordered_extent(page->mapping->host, 6528 ordered = btrfs_lookup_ordered_extent(page->mapping->host,
6369 page_offset(page)); 6529 page_offset(page));
6370 if (ordered) { 6530 if (ordered) {
@@ -6386,8 +6546,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
6386 } 6546 }
6387 btrfs_put_ordered_extent(ordered); 6547 btrfs_put_ordered_extent(ordered);
6388 cached_state = NULL; 6548 cached_state = NULL;
6389 lock_extent_bits(tree, page_start, page_end, 0, &cached_state, 6549 lock_extent_bits(tree, page_start, page_end, 0, &cached_state);
6390 GFP_NOFS);
6391 } 6550 }
6392 clear_extent_bit(tree, page_start, page_end, 6551 clear_extent_bit(tree, page_start, page_end,
6393 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC | 6552 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC |
@@ -6462,8 +6621,7 @@ again:
6462 } 6621 }
6463 wait_on_page_writeback(page); 6622 wait_on_page_writeback(page);
6464 6623
6465 lock_extent_bits(io_tree, page_start, page_end, 0, &cached_state, 6624 lock_extent_bits(io_tree, page_start, page_end, 0, &cached_state);
6466 GFP_NOFS);
6467 set_page_extent_mapped(page); 6625 set_page_extent_mapped(page);
6468 6626
6469 /* 6627 /*
@@ -6737,10 +6895,9 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
6737 btrfs_i_size_write(inode, 0); 6895 btrfs_i_size_write(inode, 0);
6738 6896
6739 err = btrfs_update_inode(trans, new_root, inode); 6897 err = btrfs_update_inode(trans, new_root, inode);
6740 BUG_ON(err);
6741 6898
6742 iput(inode); 6899 iput(inode);
6743 return 0; 6900 return err;
6744} 6901}
6745 6902
6746struct inode *btrfs_alloc_inode(struct super_block *sb) 6903struct inode *btrfs_alloc_inode(struct super_block *sb)
@@ -6783,6 +6940,8 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
6783 extent_map_tree_init(&ei->extent_tree); 6940 extent_map_tree_init(&ei->extent_tree);
6784 extent_io_tree_init(&ei->io_tree, &inode->i_data); 6941 extent_io_tree_init(&ei->io_tree, &inode->i_data);
6785 extent_io_tree_init(&ei->io_failure_tree, &inode->i_data); 6942 extent_io_tree_init(&ei->io_failure_tree, &inode->i_data);
6943 ei->io_tree.track_uptodate = 1;
6944 ei->io_failure_tree.track_uptodate = 1;
6786 mutex_init(&ei->log_mutex); 6945 mutex_init(&ei->log_mutex);
6787 mutex_init(&ei->delalloc_mutex); 6946 mutex_init(&ei->delalloc_mutex);
6788 btrfs_ordered_inode_tree_init(&ei->ordered_tree); 6947 btrfs_ordered_inode_tree_init(&ei->ordered_tree);
@@ -7072,7 +7231,10 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
7072 if (!ret) 7231 if (!ret)
7073 ret = btrfs_update_inode(trans, root, old_inode); 7232 ret = btrfs_update_inode(trans, root, old_inode);
7074 } 7233 }
7075 BUG_ON(ret); 7234 if (ret) {
7235 btrfs_abort_transaction(trans, root, ret);
7236 goto out_fail;
7237 }
7076 7238
7077 if (new_inode) { 7239 if (new_inode) {
7078 new_inode->i_ctime = CURRENT_TIME; 7240 new_inode->i_ctime = CURRENT_TIME;
@@ -7090,11 +7252,14 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
7090 new_dentry->d_name.name, 7252 new_dentry->d_name.name,
7091 new_dentry->d_name.len); 7253 new_dentry->d_name.len);
7092 } 7254 }
7093 BUG_ON(ret); 7255 if (!ret && new_inode->i_nlink == 0) {
7094 if (new_inode->i_nlink == 0) {
7095 ret = btrfs_orphan_add(trans, new_dentry->d_inode); 7256 ret = btrfs_orphan_add(trans, new_dentry->d_inode);
7096 BUG_ON(ret); 7257 BUG_ON(ret);
7097 } 7258 }
7259 if (ret) {
7260 btrfs_abort_transaction(trans, root, ret);
7261 goto out_fail;
7262 }
7098 } 7263 }
7099 7264
7100 fixup_inode_flags(new_dir, old_inode); 7265 fixup_inode_flags(new_dir, old_inode);
@@ -7102,7 +7267,10 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
7102 ret = btrfs_add_link(trans, new_dir, old_inode, 7267 ret = btrfs_add_link(trans, new_dir, old_inode,
7103 new_dentry->d_name.name, 7268 new_dentry->d_name.name,
7104 new_dentry->d_name.len, 0, index); 7269 new_dentry->d_name.len, 0, index);
7105 BUG_ON(ret); 7270 if (ret) {
7271 btrfs_abort_transaction(trans, root, ret);
7272 goto out_fail;
7273 }
7106 7274
7107 if (old_ino != BTRFS_FIRST_FREE_OBJECTID) { 7275 if (old_ino != BTRFS_FIRST_FREE_OBJECTID) {
7108 struct dentry *parent = new_dentry->d_parent; 7276 struct dentry *parent = new_dentry->d_parent;
@@ -7315,7 +7483,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
7315 } 7483 }
7316 7484
7317 ret = btrfs_reserve_extent(trans, root, num_bytes, min_size, 7485 ret = btrfs_reserve_extent(trans, root, num_bytes, min_size,
7318 0, *alloc_hint, (u64)-1, &ins, 1); 7486 0, *alloc_hint, &ins, 1);
7319 if (ret) { 7487 if (ret) {
7320 if (own_trans) 7488 if (own_trans)
7321 btrfs_end_transaction(trans, root); 7489 btrfs_end_transaction(trans, root);
@@ -7327,7 +7495,12 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
7327 ins.offset, ins.offset, 7495 ins.offset, ins.offset,
7328 ins.offset, 0, 0, 0, 7496 ins.offset, 0, 0, 0,
7329 BTRFS_FILE_EXTENT_PREALLOC); 7497 BTRFS_FILE_EXTENT_PREALLOC);
7330 BUG_ON(ret); 7498 if (ret) {
7499 btrfs_abort_transaction(trans, root, ret);
7500 if (own_trans)
7501 btrfs_end_transaction(trans, root);
7502 break;
7503 }
7331 btrfs_drop_extent_cache(inode, cur_offset, 7504 btrfs_drop_extent_cache(inode, cur_offset,
7332 cur_offset + ins.offset -1, 0); 7505 cur_offset + ins.offset -1, 0);
7333 7506
@@ -7349,7 +7522,13 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
7349 } 7522 }
7350 7523
7351 ret = btrfs_update_inode(trans, root, inode); 7524 ret = btrfs_update_inode(trans, root, inode);
7352 BUG_ON(ret); 7525
7526 if (ret) {
7527 btrfs_abort_transaction(trans, root, ret);
7528 if (own_trans)
7529 btrfs_end_transaction(trans, root);
7530 break;
7531 }
7353 7532
7354 if (own_trans) 7533 if (own_trans)
7355 btrfs_end_transaction(trans, root); 7534 btrfs_end_transaction(trans, root);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index d8b54715c2de..18cc23d164a8 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -425,22 +425,37 @@ static noinline int create_subvol(struct btrfs_root *root,
425 425
426 key.offset = (u64)-1; 426 key.offset = (u64)-1;
427 new_root = btrfs_read_fs_root_no_name(root->fs_info, &key); 427 new_root = btrfs_read_fs_root_no_name(root->fs_info, &key);
428 BUG_ON(IS_ERR(new_root)); 428 if (IS_ERR(new_root)) {
429 btrfs_abort_transaction(trans, root, PTR_ERR(new_root));
430 ret = PTR_ERR(new_root);
431 goto fail;
432 }
429 433
430 btrfs_record_root_in_trans(trans, new_root); 434 btrfs_record_root_in_trans(trans, new_root);
431 435
432 ret = btrfs_create_subvol_root(trans, new_root, new_dirid); 436 ret = btrfs_create_subvol_root(trans, new_root, new_dirid);
437 if (ret) {
438 /* We potentially lose an unused inode item here */
439 btrfs_abort_transaction(trans, root, ret);
440 goto fail;
441 }
442
433 /* 443 /*
434 * insert the directory item 444 * insert the directory item
435 */ 445 */
436 ret = btrfs_set_inode_index(dir, &index); 446 ret = btrfs_set_inode_index(dir, &index);
437 BUG_ON(ret); 447 if (ret) {
448 btrfs_abort_transaction(trans, root, ret);
449 goto fail;
450 }
438 451
439 ret = btrfs_insert_dir_item(trans, root, 452 ret = btrfs_insert_dir_item(trans, root,
440 name, namelen, dir, &key, 453 name, namelen, dir, &key,
441 BTRFS_FT_DIR, index); 454 BTRFS_FT_DIR, index);
442 if (ret) 455 if (ret) {
456 btrfs_abort_transaction(trans, root, ret);
443 goto fail; 457 goto fail;
458 }
444 459
445 btrfs_i_size_write(dir, dir->i_size + namelen * 2); 460 btrfs_i_size_write(dir, dir->i_size + namelen * 2);
446 ret = btrfs_update_inode(trans, root, dir); 461 ret = btrfs_update_inode(trans, root, dir);
@@ -769,6 +784,31 @@ none:
769 return -ENOENT; 784 return -ENOENT;
770} 785}
771 786
787/*
788 * Validaty check of prev em and next em:
789 * 1) no prev/next em
790 * 2) prev/next em is an hole/inline extent
791 */
792static int check_adjacent_extents(struct inode *inode, struct extent_map *em)
793{
794 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
795 struct extent_map *prev = NULL, *next = NULL;
796 int ret = 0;
797
798 read_lock(&em_tree->lock);
799 prev = lookup_extent_mapping(em_tree, em->start - 1, (u64)-1);
800 next = lookup_extent_mapping(em_tree, em->start + em->len, (u64)-1);
801 read_unlock(&em_tree->lock);
802
803 if ((!prev || prev->block_start >= EXTENT_MAP_LAST_BYTE) &&
804 (!next || next->block_start >= EXTENT_MAP_LAST_BYTE))
805 ret = 1;
806 free_extent_map(prev);
807 free_extent_map(next);
808
809 return ret;
810}
811
772static int should_defrag_range(struct inode *inode, u64 start, u64 len, 812static int should_defrag_range(struct inode *inode, u64 start, u64 len,
773 int thresh, u64 *last_len, u64 *skip, 813 int thresh, u64 *last_len, u64 *skip,
774 u64 *defrag_end) 814 u64 *defrag_end)
@@ -797,17 +837,25 @@ static int should_defrag_range(struct inode *inode, u64 start, u64 len,
797 837
798 if (!em) { 838 if (!em) {
799 /* get the big lock and read metadata off disk */ 839 /* get the big lock and read metadata off disk */
800 lock_extent(io_tree, start, start + len - 1, GFP_NOFS); 840 lock_extent(io_tree, start, start + len - 1);
801 em = btrfs_get_extent(inode, NULL, 0, start, len, 0); 841 em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
802 unlock_extent(io_tree, start, start + len - 1, GFP_NOFS); 842 unlock_extent(io_tree, start, start + len - 1);
803 843
804 if (IS_ERR(em)) 844 if (IS_ERR(em))
805 return 0; 845 return 0;
806 } 846 }
807 847
808 /* this will cover holes, and inline extents */ 848 /* this will cover holes, and inline extents */
809 if (em->block_start >= EXTENT_MAP_LAST_BYTE) 849 if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
810 ret = 0; 850 ret = 0;
851 goto out;
852 }
853
854 /* If we have nothing to merge with us, just skip. */
855 if (check_adjacent_extents(inode, em)) {
856 ret = 0;
857 goto out;
858 }
811 859
812 /* 860 /*
813 * we hit a real extent, if it is big don't bother defragging it again 861 * we hit a real extent, if it is big don't bother defragging it again
@@ -815,6 +863,7 @@ static int should_defrag_range(struct inode *inode, u64 start, u64 len,
815 if ((*last_len == 0 || *last_len >= thresh) && em->len >= thresh) 863 if ((*last_len == 0 || *last_len >= thresh) && em->len >= thresh)
816 ret = 0; 864 ret = 0;
817 865
866out:
818 /* 867 /*
819 * last_len ends up being a counter of how many bytes we've defragged. 868 * last_len ends up being a counter of how many bytes we've defragged.
820 * every time we choose not to defrag an extent, we reset *last_len 869 * every time we choose not to defrag an extent, we reset *last_len
@@ -856,6 +905,7 @@ static int cluster_pages_for_defrag(struct inode *inode,
856 u64 isize = i_size_read(inode); 905 u64 isize = i_size_read(inode);
857 u64 page_start; 906 u64 page_start;
858 u64 page_end; 907 u64 page_end;
908 u64 page_cnt;
859 int ret; 909 int ret;
860 int i; 910 int i;
861 int i_done; 911 int i_done;
@@ -864,19 +914,21 @@ static int cluster_pages_for_defrag(struct inode *inode,
864 struct extent_io_tree *tree; 914 struct extent_io_tree *tree;
865 gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); 915 gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
866 916
867 if (isize == 0)
868 return 0;
869 file_end = (isize - 1) >> PAGE_CACHE_SHIFT; 917 file_end = (isize - 1) >> PAGE_CACHE_SHIFT;
918 if (!isize || start_index > file_end)
919 return 0;
920
921 page_cnt = min_t(u64, (u64)num_pages, (u64)file_end - start_index + 1);
870 922
871 ret = btrfs_delalloc_reserve_space(inode, 923 ret = btrfs_delalloc_reserve_space(inode,
872 num_pages << PAGE_CACHE_SHIFT); 924 page_cnt << PAGE_CACHE_SHIFT);
873 if (ret) 925 if (ret)
874 return ret; 926 return ret;
875 i_done = 0; 927 i_done = 0;
876 tree = &BTRFS_I(inode)->io_tree; 928 tree = &BTRFS_I(inode)->io_tree;
877 929
878 /* step one, lock all the pages */ 930 /* step one, lock all the pages */
879 for (i = 0; i < num_pages; i++) { 931 for (i = 0; i < page_cnt; i++) {
880 struct page *page; 932 struct page *page;
881again: 933again:
882 page = find_or_create_page(inode->i_mapping, 934 page = find_or_create_page(inode->i_mapping,
@@ -887,10 +939,10 @@ again:
887 page_start = page_offset(page); 939 page_start = page_offset(page);
888 page_end = page_start + PAGE_CACHE_SIZE - 1; 940 page_end = page_start + PAGE_CACHE_SIZE - 1;
889 while (1) { 941 while (1) {
890 lock_extent(tree, page_start, page_end, GFP_NOFS); 942 lock_extent(tree, page_start, page_end);
891 ordered = btrfs_lookup_ordered_extent(inode, 943 ordered = btrfs_lookup_ordered_extent(inode,
892 page_start); 944 page_start);
893 unlock_extent(tree, page_start, page_end, GFP_NOFS); 945 unlock_extent(tree, page_start, page_end);
894 if (!ordered) 946 if (!ordered)
895 break; 947 break;
896 948
@@ -898,6 +950,15 @@ again:
898 btrfs_start_ordered_extent(inode, ordered, 1); 950 btrfs_start_ordered_extent(inode, ordered, 1);
899 btrfs_put_ordered_extent(ordered); 951 btrfs_put_ordered_extent(ordered);
900 lock_page(page); 952 lock_page(page);
953 /*
954 * we unlocked the page above, so we need check if
955 * it was released or not.
956 */
957 if (page->mapping != inode->i_mapping) {
958 unlock_page(page);
959 page_cache_release(page);
960 goto again;
961 }
901 } 962 }
902 963
903 if (!PageUptodate(page)) { 964 if (!PageUptodate(page)) {
@@ -911,15 +972,6 @@ again:
911 } 972 }
912 } 973 }
913 974
914 isize = i_size_read(inode);
915 file_end = (isize - 1) >> PAGE_CACHE_SHIFT;
916 if (!isize || page->index > file_end) {
917 /* whoops, we blew past eof, skip this page */
918 unlock_page(page);
919 page_cache_release(page);
920 break;
921 }
922
923 if (page->mapping != inode->i_mapping) { 975 if (page->mapping != inode->i_mapping) {
924 unlock_page(page); 976 unlock_page(page);
925 page_cache_release(page); 977 page_cache_release(page);
@@ -946,19 +998,18 @@ again:
946 page_end = page_offset(pages[i_done - 1]) + PAGE_CACHE_SIZE; 998 page_end = page_offset(pages[i_done - 1]) + PAGE_CACHE_SIZE;
947 999
948 lock_extent_bits(&BTRFS_I(inode)->io_tree, 1000 lock_extent_bits(&BTRFS_I(inode)->io_tree,
949 page_start, page_end - 1, 0, &cached_state, 1001 page_start, page_end - 1, 0, &cached_state);
950 GFP_NOFS);
951 clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, 1002 clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start,
952 page_end - 1, EXTENT_DIRTY | EXTENT_DELALLOC | 1003 page_end - 1, EXTENT_DIRTY | EXTENT_DELALLOC |
953 EXTENT_DO_ACCOUNTING, 0, 0, &cached_state, 1004 EXTENT_DO_ACCOUNTING, 0, 0, &cached_state,
954 GFP_NOFS); 1005 GFP_NOFS);
955 1006
956 if (i_done != num_pages) { 1007 if (i_done != page_cnt) {
957 spin_lock(&BTRFS_I(inode)->lock); 1008 spin_lock(&BTRFS_I(inode)->lock);
958 BTRFS_I(inode)->outstanding_extents++; 1009 BTRFS_I(inode)->outstanding_extents++;
959 spin_unlock(&BTRFS_I(inode)->lock); 1010 spin_unlock(&BTRFS_I(inode)->lock);
960 btrfs_delalloc_release_space(inode, 1011 btrfs_delalloc_release_space(inode,
961 (num_pages - i_done) << PAGE_CACHE_SHIFT); 1012 (page_cnt - i_done) << PAGE_CACHE_SHIFT);
962 } 1013 }
963 1014
964 1015
@@ -983,7 +1034,7 @@ out:
983 unlock_page(pages[i]); 1034 unlock_page(pages[i]);
984 page_cache_release(pages[i]); 1035 page_cache_release(pages[i]);
985 } 1036 }
986 btrfs_delalloc_release_space(inode, num_pages << PAGE_CACHE_SHIFT); 1037 btrfs_delalloc_release_space(inode, page_cnt << PAGE_CACHE_SHIFT);
987 return ret; 1038 return ret;
988 1039
989} 1040}
@@ -1089,12 +1140,9 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
1089 if (!(inode->i_sb->s_flags & MS_ACTIVE)) 1140 if (!(inode->i_sb->s_flags & MS_ACTIVE))
1090 break; 1141 break;
1091 1142
1092 if (!newer_than && 1143 if (!should_defrag_range(inode, (u64)i << PAGE_CACHE_SHIFT,
1093 !should_defrag_range(inode, (u64)i << PAGE_CACHE_SHIFT, 1144 PAGE_CACHE_SIZE, extent_thresh,
1094 PAGE_CACHE_SIZE, 1145 &last_len, &skip, &defrag_end)) {
1095 extent_thresh,
1096 &last_len, &skip,
1097 &defrag_end)) {
1098 unsigned long next; 1146 unsigned long next;
1099 /* 1147 /*
1100 * the should_defrag function tells us how much to skip 1148 * the should_defrag function tells us how much to skip
@@ -1123,17 +1171,24 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
1123 ra_index += max_cluster; 1171 ra_index += max_cluster;
1124 } 1172 }
1125 1173
1174 mutex_lock(&inode->i_mutex);
1126 ret = cluster_pages_for_defrag(inode, pages, i, cluster); 1175 ret = cluster_pages_for_defrag(inode, pages, i, cluster);
1127 if (ret < 0) 1176 if (ret < 0) {
1177 mutex_unlock(&inode->i_mutex);
1128 goto out_ra; 1178 goto out_ra;
1179 }
1129 1180
1130 defrag_count += ret; 1181 defrag_count += ret;
1131 balance_dirty_pages_ratelimited_nr(inode->i_mapping, ret); 1182 balance_dirty_pages_ratelimited_nr(inode->i_mapping, ret);
1183 mutex_unlock(&inode->i_mutex);
1132 1184
1133 if (newer_than) { 1185 if (newer_than) {
1134 if (newer_off == (u64)-1) 1186 if (newer_off == (u64)-1)
1135 break; 1187 break;
1136 1188
1189 if (ret > 0)
1190 i += ret;
1191
1137 newer_off = max(newer_off + 1, 1192 newer_off = max(newer_off + 1,
1138 (u64)i << PAGE_CACHE_SHIFT); 1193 (u64)i << PAGE_CACHE_SHIFT);
1139 1194
@@ -1966,7 +2021,11 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
1966 dest->root_key.objectid, 2021 dest->root_key.objectid,
1967 dentry->d_name.name, 2022 dentry->d_name.name,
1968 dentry->d_name.len); 2023 dentry->d_name.len);
1969 BUG_ON(ret); 2024 if (ret) {
2025 err = ret;
2026 btrfs_abort_transaction(trans, root, ret);
2027 goto out_end_trans;
2028 }
1970 2029
1971 btrfs_record_root_in_trans(trans, dest); 2030 btrfs_record_root_in_trans(trans, dest);
1972 2031
@@ -1979,11 +2038,16 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
1979 ret = btrfs_insert_orphan_item(trans, 2038 ret = btrfs_insert_orphan_item(trans,
1980 root->fs_info->tree_root, 2039 root->fs_info->tree_root,
1981 dest->root_key.objectid); 2040 dest->root_key.objectid);
1982 BUG_ON(ret); 2041 if (ret) {
2042 btrfs_abort_transaction(trans, root, ret);
2043 err = ret;
2044 goto out_end_trans;
2045 }
1983 } 2046 }
1984 2047out_end_trans:
1985 ret = btrfs_end_transaction(trans, root); 2048 ret = btrfs_end_transaction(trans, root);
1986 BUG_ON(ret); 2049 if (ret && !err)
2050 err = ret;
1987 inode->i_flags |= S_DEAD; 2051 inode->i_flags |= S_DEAD;
1988out_up_write: 2052out_up_write:
1989 up_write(&root->fs_info->subvol_sem); 2053 up_write(&root->fs_info->subvol_sem);
@@ -2326,13 +2390,13 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
2326 another, and lock file content */ 2390 another, and lock file content */
2327 while (1) { 2391 while (1) {
2328 struct btrfs_ordered_extent *ordered; 2392 struct btrfs_ordered_extent *ordered;
2329 lock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS); 2393 lock_extent(&BTRFS_I(src)->io_tree, off, off+len);
2330 ordered = btrfs_lookup_first_ordered_extent(src, off+len); 2394 ordered = btrfs_lookup_first_ordered_extent(src, off+len);
2331 if (!ordered && 2395 if (!ordered &&
2332 !test_range_bit(&BTRFS_I(src)->io_tree, off, off+len, 2396 !test_range_bit(&BTRFS_I(src)->io_tree, off, off+len,
2333 EXTENT_DELALLOC, 0, NULL)) 2397 EXTENT_DELALLOC, 0, NULL))
2334 break; 2398 break;
2335 unlock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS); 2399 unlock_extent(&BTRFS_I(src)->io_tree, off, off+len);
2336 if (ordered) 2400 if (ordered)
2337 btrfs_put_ordered_extent(ordered); 2401 btrfs_put_ordered_extent(ordered);
2338 btrfs_wait_ordered_range(src, off, len); 2402 btrfs_wait_ordered_range(src, off, len);
@@ -2447,11 +2511,21 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
2447 new_key.offset, 2511 new_key.offset,
2448 new_key.offset + datal, 2512 new_key.offset + datal,
2449 &hint_byte, 1); 2513 &hint_byte, 1);
2450 BUG_ON(ret); 2514 if (ret) {
2515 btrfs_abort_transaction(trans, root,
2516 ret);
2517 btrfs_end_transaction(trans, root);
2518 goto out;
2519 }
2451 2520
2452 ret = btrfs_insert_empty_item(trans, root, path, 2521 ret = btrfs_insert_empty_item(trans, root, path,
2453 &new_key, size); 2522 &new_key, size);
2454 BUG_ON(ret); 2523 if (ret) {
2524 btrfs_abort_transaction(trans, root,
2525 ret);
2526 btrfs_end_transaction(trans, root);
2527 goto out;
2528 }
2455 2529
2456 leaf = path->nodes[0]; 2530 leaf = path->nodes[0];
2457 slot = path->slots[0]; 2531 slot = path->slots[0];
@@ -2478,7 +2552,15 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
2478 btrfs_ino(inode), 2552 btrfs_ino(inode),
2479 new_key.offset - datao, 2553 new_key.offset - datao,
2480 0); 2554 0);
2481 BUG_ON(ret); 2555 if (ret) {
2556 btrfs_abort_transaction(trans,
2557 root,
2558 ret);
2559 btrfs_end_transaction(trans,
2560 root);
2561 goto out;
2562
2563 }
2482 } 2564 }
2483 } else if (type == BTRFS_FILE_EXTENT_INLINE) { 2565 } else if (type == BTRFS_FILE_EXTENT_INLINE) {
2484 u64 skip = 0; 2566 u64 skip = 0;
@@ -2503,11 +2585,21 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
2503 new_key.offset, 2585 new_key.offset,
2504 new_key.offset + datal, 2586 new_key.offset + datal,
2505 &hint_byte, 1); 2587 &hint_byte, 1);
2506 BUG_ON(ret); 2588 if (ret) {
2589 btrfs_abort_transaction(trans, root,
2590 ret);
2591 btrfs_end_transaction(trans, root);
2592 goto out;
2593 }
2507 2594
2508 ret = btrfs_insert_empty_item(trans, root, path, 2595 ret = btrfs_insert_empty_item(trans, root, path,
2509 &new_key, size); 2596 &new_key, size);
2510 BUG_ON(ret); 2597 if (ret) {
2598 btrfs_abort_transaction(trans, root,
2599 ret);
2600 btrfs_end_transaction(trans, root);
2601 goto out;
2602 }
2511 2603
2512 if (skip) { 2604 if (skip) {
2513 u32 start = 2605 u32 start =
@@ -2541,8 +2633,12 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
2541 btrfs_i_size_write(inode, endoff); 2633 btrfs_i_size_write(inode, endoff);
2542 2634
2543 ret = btrfs_update_inode(trans, root, inode); 2635 ret = btrfs_update_inode(trans, root, inode);
2544 BUG_ON(ret); 2636 if (ret) {
2545 btrfs_end_transaction(trans, root); 2637 btrfs_abort_transaction(trans, root, ret);
2638 btrfs_end_transaction(trans, root);
2639 goto out;
2640 }
2641 ret = btrfs_end_transaction(trans, root);
2546 } 2642 }
2547next: 2643next:
2548 btrfs_release_path(path); 2644 btrfs_release_path(path);
@@ -2551,7 +2647,7 @@ next:
2551 ret = 0; 2647 ret = 0;
2552out: 2648out:
2553 btrfs_release_path(path); 2649 btrfs_release_path(path);
2554 unlock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS); 2650 unlock_extent(&BTRFS_I(src)->io_tree, off, off+len);
2555out_unlock: 2651out_unlock:
2556 mutex_unlock(&src->i_mutex); 2652 mutex_unlock(&src->i_mutex);
2557 mutex_unlock(&inode->i_mutex); 2653 mutex_unlock(&inode->i_mutex);
@@ -3066,8 +3162,8 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_root *root,
3066 goto out; 3162 goto out;
3067 3163
3068 extent_item_pos = loi->logical - key.objectid; 3164 extent_item_pos = loi->logical - key.objectid;
3069 ret = iterate_extent_inodes(root->fs_info, path, key.objectid, 3165 ret = iterate_extent_inodes(root->fs_info, key.objectid,
3070 extent_item_pos, build_ino_list, 3166 extent_item_pos, 0, build_ino_list,
3071 inodes); 3167 inodes);
3072 3168
3073 if (ret < 0) 3169 if (ret < 0)
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c
index 5e178d8f7167..272f911203ff 100644
--- a/fs/btrfs/locking.c
+++ b/fs/btrfs/locking.c
@@ -208,7 +208,7 @@ void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb)
208 * take a spinning write lock. This will wait for both 208 * take a spinning write lock. This will wait for both
209 * blocking readers or writers 209 * blocking readers or writers
210 */ 210 */
211int btrfs_tree_lock(struct extent_buffer *eb) 211void btrfs_tree_lock(struct extent_buffer *eb)
212{ 212{
213again: 213again:
214 wait_event(eb->read_lock_wq, atomic_read(&eb->blocking_readers) == 0); 214 wait_event(eb->read_lock_wq, atomic_read(&eb->blocking_readers) == 0);
@@ -230,13 +230,12 @@ again:
230 atomic_inc(&eb->spinning_writers); 230 atomic_inc(&eb->spinning_writers);
231 atomic_inc(&eb->write_locks); 231 atomic_inc(&eb->write_locks);
232 eb->lock_owner = current->pid; 232 eb->lock_owner = current->pid;
233 return 0;
234} 233}
235 234
236/* 235/*
237 * drop a spinning or a blocking write lock. 236 * drop a spinning or a blocking write lock.
238 */ 237 */
239int btrfs_tree_unlock(struct extent_buffer *eb) 238void btrfs_tree_unlock(struct extent_buffer *eb)
240{ 239{
241 int blockers = atomic_read(&eb->blocking_writers); 240 int blockers = atomic_read(&eb->blocking_writers);
242 241
@@ -255,7 +254,6 @@ int btrfs_tree_unlock(struct extent_buffer *eb)
255 atomic_dec(&eb->spinning_writers); 254 atomic_dec(&eb->spinning_writers);
256 write_unlock(&eb->lock); 255 write_unlock(&eb->lock);
257 } 256 }
258 return 0;
259} 257}
260 258
261void btrfs_assert_tree_locked(struct extent_buffer *eb) 259void btrfs_assert_tree_locked(struct extent_buffer *eb)
diff --git a/fs/btrfs/locking.h b/fs/btrfs/locking.h
index 17247ddb81a0..ca52681e5f40 100644
--- a/fs/btrfs/locking.h
+++ b/fs/btrfs/locking.h
@@ -24,8 +24,8 @@
24#define BTRFS_WRITE_LOCK_BLOCKING 3 24#define BTRFS_WRITE_LOCK_BLOCKING 3
25#define BTRFS_READ_LOCK_BLOCKING 4 25#define BTRFS_READ_LOCK_BLOCKING 4
26 26
27int btrfs_tree_lock(struct extent_buffer *eb); 27void btrfs_tree_lock(struct extent_buffer *eb);
28int btrfs_tree_unlock(struct extent_buffer *eb); 28void btrfs_tree_unlock(struct extent_buffer *eb);
29int btrfs_try_spin_lock(struct extent_buffer *eb); 29int btrfs_try_spin_lock(struct extent_buffer *eb);
30 30
31void btrfs_tree_read_lock(struct extent_buffer *eb); 31void btrfs_tree_read_lock(struct extent_buffer *eb);
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index a1c940425307..bbf6d0d9aebe 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -59,6 +59,14 @@ static struct rb_node *tree_insert(struct rb_root *root, u64 file_offset,
59 return NULL; 59 return NULL;
60} 60}
61 61
62static void ordered_data_tree_panic(struct inode *inode, int errno,
63 u64 offset)
64{
65 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
66 btrfs_panic(fs_info, errno, "Inconsistency in ordered tree at offset "
67 "%llu\n", (unsigned long long)offset);
68}
69
62/* 70/*
63 * look for a given offset in the tree, and if it can't be found return the 71 * look for a given offset in the tree, and if it can't be found return the
64 * first lesser offset 72 * first lesser offset
@@ -207,7 +215,8 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
207 spin_lock(&tree->lock); 215 spin_lock(&tree->lock);
208 node = tree_insert(&tree->tree, file_offset, 216 node = tree_insert(&tree->tree, file_offset,
209 &entry->rb_node); 217 &entry->rb_node);
210 BUG_ON(node); 218 if (node)
219 ordered_data_tree_panic(inode, -EEXIST, file_offset);
211 spin_unlock(&tree->lock); 220 spin_unlock(&tree->lock);
212 221
213 spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); 222 spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);
@@ -215,7 +224,6 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
215 &BTRFS_I(inode)->root->fs_info->ordered_extents); 224 &BTRFS_I(inode)->root->fs_info->ordered_extents);
216 spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); 225 spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);
217 226
218 BUG_ON(node);
219 return 0; 227 return 0;
220} 228}
221 229
@@ -249,9 +257,9 @@ int btrfs_add_ordered_extent_compress(struct inode *inode, u64 file_offset,
249 * when an ordered extent is finished. If the list covers more than one 257 * when an ordered extent is finished. If the list covers more than one
250 * ordered extent, it is split across multiples. 258 * ordered extent, it is split across multiples.
251 */ 259 */
252int btrfs_add_ordered_sum(struct inode *inode, 260void btrfs_add_ordered_sum(struct inode *inode,
253 struct btrfs_ordered_extent *entry, 261 struct btrfs_ordered_extent *entry,
254 struct btrfs_ordered_sum *sum) 262 struct btrfs_ordered_sum *sum)
255{ 263{
256 struct btrfs_ordered_inode_tree *tree; 264 struct btrfs_ordered_inode_tree *tree;
257 265
@@ -259,7 +267,6 @@ int btrfs_add_ordered_sum(struct inode *inode,
259 spin_lock(&tree->lock); 267 spin_lock(&tree->lock);
260 list_add_tail(&sum->list, &entry->list); 268 list_add_tail(&sum->list, &entry->list);
261 spin_unlock(&tree->lock); 269 spin_unlock(&tree->lock);
262 return 0;
263} 270}
264 271
265/* 272/*
@@ -384,7 +391,7 @@ out:
384 * used to drop a reference on an ordered extent. This will free 391 * used to drop a reference on an ordered extent. This will free
385 * the extent if the last reference is dropped 392 * the extent if the last reference is dropped
386 */ 393 */
387int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry) 394void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
388{ 395{
389 struct list_head *cur; 396 struct list_head *cur;
390 struct btrfs_ordered_sum *sum; 397 struct btrfs_ordered_sum *sum;
@@ -400,7 +407,6 @@ int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
400 } 407 }
401 kfree(entry); 408 kfree(entry);
402 } 409 }
403 return 0;
404} 410}
405 411
406/* 412/*
@@ -408,8 +414,8 @@ int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
408 * and you must wake_up entry->wait. You must hold the tree lock 414 * and you must wake_up entry->wait. You must hold the tree lock
409 * while you call this function. 415 * while you call this function.
410 */ 416 */
411static int __btrfs_remove_ordered_extent(struct inode *inode, 417static void __btrfs_remove_ordered_extent(struct inode *inode,
412 struct btrfs_ordered_extent *entry) 418 struct btrfs_ordered_extent *entry)
413{ 419{
414 struct btrfs_ordered_inode_tree *tree; 420 struct btrfs_ordered_inode_tree *tree;
415 struct btrfs_root *root = BTRFS_I(inode)->root; 421 struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -436,35 +442,30 @@ static int __btrfs_remove_ordered_extent(struct inode *inode,
436 list_del_init(&BTRFS_I(inode)->ordered_operations); 442 list_del_init(&BTRFS_I(inode)->ordered_operations);
437 } 443 }
438 spin_unlock(&root->fs_info->ordered_extent_lock); 444 spin_unlock(&root->fs_info->ordered_extent_lock);
439
440 return 0;
441} 445}
442 446
443/* 447/*
444 * remove an ordered extent from the tree. No references are dropped 448 * remove an ordered extent from the tree. No references are dropped
445 * but any waiters are woken. 449 * but any waiters are woken.
446 */ 450 */
447int btrfs_remove_ordered_extent(struct inode *inode, 451void btrfs_remove_ordered_extent(struct inode *inode,
448 struct btrfs_ordered_extent *entry) 452 struct btrfs_ordered_extent *entry)
449{ 453{
450 struct btrfs_ordered_inode_tree *tree; 454 struct btrfs_ordered_inode_tree *tree;
451 int ret;
452 455
453 tree = &BTRFS_I(inode)->ordered_tree; 456 tree = &BTRFS_I(inode)->ordered_tree;
454 spin_lock(&tree->lock); 457 spin_lock(&tree->lock);
455 ret = __btrfs_remove_ordered_extent(inode, entry); 458 __btrfs_remove_ordered_extent(inode, entry);
456 spin_unlock(&tree->lock); 459 spin_unlock(&tree->lock);
457 wake_up(&entry->wait); 460 wake_up(&entry->wait);
458
459 return ret;
460} 461}
461 462
462/* 463/*
463 * wait for all the ordered extents in a root. This is done when balancing 464 * wait for all the ordered extents in a root. This is done when balancing
464 * space between drives. 465 * space between drives.
465 */ 466 */
466int btrfs_wait_ordered_extents(struct btrfs_root *root, 467void btrfs_wait_ordered_extents(struct btrfs_root *root,
467 int nocow_only, int delay_iput) 468 int nocow_only, int delay_iput)
468{ 469{
469 struct list_head splice; 470 struct list_head splice;
470 struct list_head *cur; 471 struct list_head *cur;
@@ -512,7 +513,6 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root,
512 spin_lock(&root->fs_info->ordered_extent_lock); 513 spin_lock(&root->fs_info->ordered_extent_lock);
513 } 514 }
514 spin_unlock(&root->fs_info->ordered_extent_lock); 515 spin_unlock(&root->fs_info->ordered_extent_lock);
515 return 0;
516} 516}
517 517
518/* 518/*
@@ -525,7 +525,7 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root,
525 * extra check to make sure the ordered operation list really is empty 525 * extra check to make sure the ordered operation list really is empty
526 * before we return 526 * before we return
527 */ 527 */
528int btrfs_run_ordered_operations(struct btrfs_root *root, int wait) 528void btrfs_run_ordered_operations(struct btrfs_root *root, int wait)
529{ 529{
530 struct btrfs_inode *btrfs_inode; 530 struct btrfs_inode *btrfs_inode;
531 struct inode *inode; 531 struct inode *inode;
@@ -573,8 +573,6 @@ again:
573 573
574 spin_unlock(&root->fs_info->ordered_extent_lock); 574 spin_unlock(&root->fs_info->ordered_extent_lock);
575 mutex_unlock(&root->fs_info->ordered_operations_mutex); 575 mutex_unlock(&root->fs_info->ordered_operations_mutex);
576
577 return 0;
578} 576}
579 577
580/* 578/*
@@ -609,7 +607,7 @@ void btrfs_start_ordered_extent(struct inode *inode,
609/* 607/*
610 * Used to wait on ordered extents across a large range of bytes. 608 * Used to wait on ordered extents across a large range of bytes.
611 */ 609 */
612int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) 610void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
613{ 611{
614 u64 end; 612 u64 end;
615 u64 orig_end; 613 u64 orig_end;
@@ -664,7 +662,6 @@ again:
664 schedule_timeout(1); 662 schedule_timeout(1);
665 goto again; 663 goto again;
666 } 664 }
667 return 0;
668} 665}
669 666
670/* 667/*
@@ -948,9 +945,8 @@ out:
948 * If trans is not null, we'll do a friendly check for a transaction that 945 * If trans is not null, we'll do a friendly check for a transaction that
949 * is already flushing things and force the IO down ourselves. 946 * is already flushing things and force the IO down ourselves.
950 */ 947 */
951int btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, 948void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
952 struct btrfs_root *root, 949 struct btrfs_root *root, struct inode *inode)
953 struct inode *inode)
954{ 950{
955 u64 last_mod; 951 u64 last_mod;
956 952
@@ -961,7 +957,7 @@ int btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
961 * commit, we can safely return without doing anything 957 * commit, we can safely return without doing anything
962 */ 958 */
963 if (last_mod < root->fs_info->last_trans_committed) 959 if (last_mod < root->fs_info->last_trans_committed)
964 return 0; 960 return;
965 961
966 /* 962 /*
967 * the transaction is already committing. Just start the IO and 963 * the transaction is already committing. Just start the IO and
@@ -969,7 +965,7 @@ int btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
969 */ 965 */
970 if (trans && root->fs_info->running_transaction->blocked) { 966 if (trans && root->fs_info->running_transaction->blocked) {
971 btrfs_wait_ordered_range(inode, 0, (u64)-1); 967 btrfs_wait_ordered_range(inode, 0, (u64)-1);
972 return 0; 968 return;
973 } 969 }
974 970
975 spin_lock(&root->fs_info->ordered_extent_lock); 971 spin_lock(&root->fs_info->ordered_extent_lock);
@@ -978,6 +974,4 @@ int btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
978 &root->fs_info->ordered_operations); 974 &root->fs_info->ordered_operations);
979 } 975 }
980 spin_unlock(&root->fs_info->ordered_extent_lock); 976 spin_unlock(&root->fs_info->ordered_extent_lock);
981
982 return 0;
983} 977}
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index ff1f69aa1883..c355ad4dc1a6 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -138,8 +138,8 @@ btrfs_ordered_inode_tree_init(struct btrfs_ordered_inode_tree *t)
138 t->last = NULL; 138 t->last = NULL;
139} 139}
140 140
141int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry); 141void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry);
142int btrfs_remove_ordered_extent(struct inode *inode, 142void btrfs_remove_ordered_extent(struct inode *inode,
143 struct btrfs_ordered_extent *entry); 143 struct btrfs_ordered_extent *entry);
144int btrfs_dec_test_ordered_pending(struct inode *inode, 144int btrfs_dec_test_ordered_pending(struct inode *inode,
145 struct btrfs_ordered_extent **cached, 145 struct btrfs_ordered_extent **cached,
@@ -154,14 +154,14 @@ int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset,
154int btrfs_add_ordered_extent_compress(struct inode *inode, u64 file_offset, 154int btrfs_add_ordered_extent_compress(struct inode *inode, u64 file_offset,
155 u64 start, u64 len, u64 disk_len, 155 u64 start, u64 len, u64 disk_len,
156 int type, int compress_type); 156 int type, int compress_type);
157int btrfs_add_ordered_sum(struct inode *inode, 157void btrfs_add_ordered_sum(struct inode *inode,
158 struct btrfs_ordered_extent *entry, 158 struct btrfs_ordered_extent *entry,
159 struct btrfs_ordered_sum *sum); 159 struct btrfs_ordered_sum *sum);
160struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode, 160struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode,
161 u64 file_offset); 161 u64 file_offset);
162void btrfs_start_ordered_extent(struct inode *inode, 162void btrfs_start_ordered_extent(struct inode *inode,
163 struct btrfs_ordered_extent *entry, int wait); 163 struct btrfs_ordered_extent *entry, int wait);
164int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len); 164void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len);
165struct btrfs_ordered_extent * 165struct btrfs_ordered_extent *
166btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset); 166btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset);
167struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode, 167struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode,
@@ -170,10 +170,10 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode,
170int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, 170int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
171 struct btrfs_ordered_extent *ordered); 171 struct btrfs_ordered_extent *ordered);
172int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum); 172int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum);
173int btrfs_run_ordered_operations(struct btrfs_root *root, int wait); 173void btrfs_run_ordered_operations(struct btrfs_root *root, int wait);
174int btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, 174void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
175 struct btrfs_root *root, 175 struct btrfs_root *root,
176 struct inode *inode); 176 struct inode *inode);
177int btrfs_wait_ordered_extents(struct btrfs_root *root, 177void btrfs_wait_ordered_extents(struct btrfs_root *root,
178 int nocow_only, int delay_iput); 178 int nocow_only, int delay_iput);
179#endif 179#endif
diff --git a/fs/btrfs/orphan.c b/fs/btrfs/orphan.c
index f8be250963a0..24cad1695af7 100644
--- a/fs/btrfs/orphan.c
+++ b/fs/btrfs/orphan.c
@@ -58,7 +58,7 @@ int btrfs_del_orphan_item(struct btrfs_trans_handle *trans,
58 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 58 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
59 if (ret < 0) 59 if (ret < 0)
60 goto out; 60 goto out;
61 if (ret) { 61 if (ret) { /* JDM: Really? */
62 ret = -ENOENT; 62 ret = -ENOENT;
63 goto out; 63 goto out;
64 } 64 }
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
index 22db04550f6a..dc5d33146fdb 100644
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c
@@ -54,7 +54,6 @@
54 * than the 2 started one after another. 54 * than the 2 started one after another.
55 */ 55 */
56 56
57#define MAX_MIRRORS 2
58#define MAX_IN_FLIGHT 6 57#define MAX_IN_FLIGHT 6
59 58
60struct reada_extctl { 59struct reada_extctl {
@@ -71,7 +70,7 @@ struct reada_extent {
71 struct list_head extctl; 70 struct list_head extctl;
72 struct kref refcnt; 71 struct kref refcnt;
73 spinlock_t lock; 72 spinlock_t lock;
74 struct reada_zone *zones[MAX_MIRRORS]; 73 struct reada_zone *zones[BTRFS_MAX_MIRRORS];
75 int nzones; 74 int nzones;
76 struct btrfs_device *scheduled_for; 75 struct btrfs_device *scheduled_for;
77}; 76};
@@ -84,7 +83,8 @@ struct reada_zone {
84 spinlock_t lock; 83 spinlock_t lock;
85 int locked; 84 int locked;
86 struct btrfs_device *device; 85 struct btrfs_device *device;
87 struct btrfs_device *devs[MAX_MIRRORS]; /* full list, incl self */ 86 struct btrfs_device *devs[BTRFS_MAX_MIRRORS]; /* full list, incl
87 * self */
88 int ndevs; 88 int ndevs;
89 struct kref refcnt; 89 struct kref refcnt;
90}; 90};
@@ -365,9 +365,9 @@ again:
365 if (ret || !bbio || length < blocksize) 365 if (ret || !bbio || length < blocksize)
366 goto error; 366 goto error;
367 367
368 if (bbio->num_stripes > MAX_MIRRORS) { 368 if (bbio->num_stripes > BTRFS_MAX_MIRRORS) {
369 printk(KERN_ERR "btrfs readahead: more than %d copies not " 369 printk(KERN_ERR "btrfs readahead: more than %d copies not "
370 "supported", MAX_MIRRORS); 370 "supported", BTRFS_MAX_MIRRORS);
371 goto error; 371 goto error;
372 } 372 }
373 373
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 8c1aae2c845d..017281dbb2a7 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -326,6 +326,19 @@ static struct rb_node *tree_search(struct rb_root *root, u64 bytenr)
326 return NULL; 326 return NULL;
327} 327}
328 328
329void backref_tree_panic(struct rb_node *rb_node, int errno,
330 u64 bytenr)
331{
332
333 struct btrfs_fs_info *fs_info = NULL;
334 struct backref_node *bnode = rb_entry(rb_node, struct backref_node,
335 rb_node);
336 if (bnode->root)
337 fs_info = bnode->root->fs_info;
338 btrfs_panic(fs_info, errno, "Inconsistency in backref cache "
339 "found at offset %llu\n", (unsigned long long)bytenr);
340}
341
329/* 342/*
330 * walk up backref nodes until reach node presents tree root 343 * walk up backref nodes until reach node presents tree root
331 */ 344 */
@@ -452,7 +465,8 @@ static void update_backref_node(struct backref_cache *cache,
452 rb_erase(&node->rb_node, &cache->rb_root); 465 rb_erase(&node->rb_node, &cache->rb_root);
453 node->bytenr = bytenr; 466 node->bytenr = bytenr;
454 rb_node = tree_insert(&cache->rb_root, node->bytenr, &node->rb_node); 467 rb_node = tree_insert(&cache->rb_root, node->bytenr, &node->rb_node);
455 BUG_ON(rb_node); 468 if (rb_node)
469 backref_tree_panic(rb_node, -EEXIST, bytenr);
456} 470}
457 471
458/* 472/*
@@ -999,7 +1013,8 @@ next:
999 if (!cowonly) { 1013 if (!cowonly) {
1000 rb_node = tree_insert(&cache->rb_root, node->bytenr, 1014 rb_node = tree_insert(&cache->rb_root, node->bytenr,
1001 &node->rb_node); 1015 &node->rb_node);
1002 BUG_ON(rb_node); 1016 if (rb_node)
1017 backref_tree_panic(rb_node, -EEXIST, node->bytenr);
1003 list_add_tail(&node->lower, &cache->leaves); 1018 list_add_tail(&node->lower, &cache->leaves);
1004 } 1019 }
1005 1020
@@ -1034,7 +1049,9 @@ next:
1034 if (!cowonly) { 1049 if (!cowonly) {
1035 rb_node = tree_insert(&cache->rb_root, upper->bytenr, 1050 rb_node = tree_insert(&cache->rb_root, upper->bytenr,
1036 &upper->rb_node); 1051 &upper->rb_node);
1037 BUG_ON(rb_node); 1052 if (rb_node)
1053 backref_tree_panic(rb_node, -EEXIST,
1054 upper->bytenr);
1038 } 1055 }
1039 1056
1040 list_add_tail(&edge->list[UPPER], &upper->lower); 1057 list_add_tail(&edge->list[UPPER], &upper->lower);
@@ -1180,7 +1197,8 @@ static int clone_backref_node(struct btrfs_trans_handle *trans,
1180 1197
1181 rb_node = tree_insert(&cache->rb_root, new_node->bytenr, 1198 rb_node = tree_insert(&cache->rb_root, new_node->bytenr,
1182 &new_node->rb_node); 1199 &new_node->rb_node);
1183 BUG_ON(rb_node); 1200 if (rb_node)
1201 backref_tree_panic(rb_node, -EEXIST, new_node->bytenr);
1184 1202
1185 if (!new_node->lowest) { 1203 if (!new_node->lowest) {
1186 list_for_each_entry(new_edge, &new_node->lower, list[UPPER]) { 1204 list_for_each_entry(new_edge, &new_node->lower, list[UPPER]) {
@@ -1203,14 +1221,15 @@ fail:
1203/* 1221/*
1204 * helper to add 'address of tree root -> reloc tree' mapping 1222 * helper to add 'address of tree root -> reloc tree' mapping
1205 */ 1223 */
1206static int __add_reloc_root(struct btrfs_root *root) 1224static int __must_check __add_reloc_root(struct btrfs_root *root)
1207{ 1225{
1208 struct rb_node *rb_node; 1226 struct rb_node *rb_node;
1209 struct mapping_node *node; 1227 struct mapping_node *node;
1210 struct reloc_control *rc = root->fs_info->reloc_ctl; 1228 struct reloc_control *rc = root->fs_info->reloc_ctl;
1211 1229
1212 node = kmalloc(sizeof(*node), GFP_NOFS); 1230 node = kmalloc(sizeof(*node), GFP_NOFS);
1213 BUG_ON(!node); 1231 if (!node)
1232 return -ENOMEM;
1214 1233
1215 node->bytenr = root->node->start; 1234 node->bytenr = root->node->start;
1216 node->data = root; 1235 node->data = root;
@@ -1219,7 +1238,12 @@ static int __add_reloc_root(struct btrfs_root *root)
1219 rb_node = tree_insert(&rc->reloc_root_tree.rb_root, 1238 rb_node = tree_insert(&rc->reloc_root_tree.rb_root,
1220 node->bytenr, &node->rb_node); 1239 node->bytenr, &node->rb_node);
1221 spin_unlock(&rc->reloc_root_tree.lock); 1240 spin_unlock(&rc->reloc_root_tree.lock);
1222 BUG_ON(rb_node); 1241 if (rb_node) {
1242 kfree(node);
1243 btrfs_panic(root->fs_info, -EEXIST, "Duplicate root found "
1244 "for start=%llu while inserting into relocation "
1245 "tree\n");
1246 }
1223 1247
1224 list_add_tail(&root->root_list, &rc->reloc_roots); 1248 list_add_tail(&root->root_list, &rc->reloc_roots);
1225 return 0; 1249 return 0;
@@ -1252,7 +1276,8 @@ static int __update_reloc_root(struct btrfs_root *root, int del)
1252 rb_node = tree_insert(&rc->reloc_root_tree.rb_root, 1276 rb_node = tree_insert(&rc->reloc_root_tree.rb_root,
1253 node->bytenr, &node->rb_node); 1277 node->bytenr, &node->rb_node);
1254 spin_unlock(&rc->reloc_root_tree.lock); 1278 spin_unlock(&rc->reloc_root_tree.lock);
1255 BUG_ON(rb_node); 1279 if (rb_node)
1280 backref_tree_panic(rb_node, -EEXIST, node->bytenr);
1256 } else { 1281 } else {
1257 list_del_init(&root->root_list); 1282 list_del_init(&root->root_list);
1258 kfree(node); 1283 kfree(node);
@@ -1334,6 +1359,7 @@ int btrfs_init_reloc_root(struct btrfs_trans_handle *trans,
1334 struct btrfs_root *reloc_root; 1359 struct btrfs_root *reloc_root;
1335 struct reloc_control *rc = root->fs_info->reloc_ctl; 1360 struct reloc_control *rc = root->fs_info->reloc_ctl;
1336 int clear_rsv = 0; 1361 int clear_rsv = 0;
1362 int ret;
1337 1363
1338 if (root->reloc_root) { 1364 if (root->reloc_root) {
1339 reloc_root = root->reloc_root; 1365 reloc_root = root->reloc_root;
@@ -1353,7 +1379,8 @@ int btrfs_init_reloc_root(struct btrfs_trans_handle *trans,
1353 if (clear_rsv) 1379 if (clear_rsv)
1354 trans->block_rsv = NULL; 1380 trans->block_rsv = NULL;
1355 1381
1356 __add_reloc_root(reloc_root); 1382 ret = __add_reloc_root(reloc_root);
1383 BUG_ON(ret < 0);
1357 root->reloc_root = reloc_root; 1384 root->reloc_root = reloc_root;
1358 return 0; 1385 return 0;
1359} 1386}
@@ -1577,15 +1604,14 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
1577 WARN_ON(!IS_ALIGNED(end, root->sectorsize)); 1604 WARN_ON(!IS_ALIGNED(end, root->sectorsize));
1578 end--; 1605 end--;
1579 ret = try_lock_extent(&BTRFS_I(inode)->io_tree, 1606 ret = try_lock_extent(&BTRFS_I(inode)->io_tree,
1580 key.offset, end, 1607 key.offset, end);
1581 GFP_NOFS);
1582 if (!ret) 1608 if (!ret)
1583 continue; 1609 continue;
1584 1610
1585 btrfs_drop_extent_cache(inode, key.offset, end, 1611 btrfs_drop_extent_cache(inode, key.offset, end,
1586 1); 1612 1);
1587 unlock_extent(&BTRFS_I(inode)->io_tree, 1613 unlock_extent(&BTRFS_I(inode)->io_tree,
1588 key.offset, end, GFP_NOFS); 1614 key.offset, end);
1589 } 1615 }
1590 } 1616 }
1591 1617
@@ -1956,9 +1982,9 @@ static int invalidate_extent_cache(struct btrfs_root *root,
1956 } 1982 }
1957 1983
1958 /* the lock_extent waits for readpage to complete */ 1984 /* the lock_extent waits for readpage to complete */
1959 lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); 1985 lock_extent(&BTRFS_I(inode)->io_tree, start, end);
1960 btrfs_drop_extent_cache(inode, start, end, 1); 1986 btrfs_drop_extent_cache(inode, start, end, 1);
1961 unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); 1987 unlock_extent(&BTRFS_I(inode)->io_tree, start, end);
1962 } 1988 }
1963 return 0; 1989 return 0;
1964} 1990}
@@ -2246,7 +2272,8 @@ again:
2246 } else { 2272 } else {
2247 list_del_init(&reloc_root->root_list); 2273 list_del_init(&reloc_root->root_list);
2248 } 2274 }
2249 btrfs_drop_snapshot(reloc_root, rc->block_rsv, 0, 1); 2275 ret = btrfs_drop_snapshot(reloc_root, rc->block_rsv, 0, 1);
2276 BUG_ON(ret < 0);
2250 } 2277 }
2251 2278
2252 if (found) { 2279 if (found) {
@@ -2862,12 +2889,12 @@ int prealloc_file_extent_cluster(struct inode *inode,
2862 else 2889 else
2863 end = cluster->end - offset; 2890 end = cluster->end - offset;
2864 2891
2865 lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); 2892 lock_extent(&BTRFS_I(inode)->io_tree, start, end);
2866 num_bytes = end + 1 - start; 2893 num_bytes = end + 1 - start;
2867 ret = btrfs_prealloc_file_range(inode, 0, start, 2894 ret = btrfs_prealloc_file_range(inode, 0, start,
2868 num_bytes, num_bytes, 2895 num_bytes, num_bytes,
2869 end + 1, &alloc_hint); 2896 end + 1, &alloc_hint);
2870 unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); 2897 unlock_extent(&BTRFS_I(inode)->io_tree, start, end);
2871 if (ret) 2898 if (ret)
2872 break; 2899 break;
2873 nr++; 2900 nr++;
@@ -2899,7 +2926,7 @@ int setup_extent_mapping(struct inode *inode, u64 start, u64 end,
2899 em->bdev = root->fs_info->fs_devices->latest_bdev; 2926 em->bdev = root->fs_info->fs_devices->latest_bdev;
2900 set_bit(EXTENT_FLAG_PINNED, &em->flags); 2927 set_bit(EXTENT_FLAG_PINNED, &em->flags);
2901 2928
2902 lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); 2929 lock_extent(&BTRFS_I(inode)->io_tree, start, end);
2903 while (1) { 2930 while (1) {
2904 write_lock(&em_tree->lock); 2931 write_lock(&em_tree->lock);
2905 ret = add_extent_mapping(em_tree, em); 2932 ret = add_extent_mapping(em_tree, em);
@@ -2910,7 +2937,7 @@ int setup_extent_mapping(struct inode *inode, u64 start, u64 end,
2910 } 2937 }
2911 btrfs_drop_extent_cache(inode, start, end, 0); 2938 btrfs_drop_extent_cache(inode, start, end, 0);
2912 } 2939 }
2913 unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); 2940 unlock_extent(&BTRFS_I(inode)->io_tree, start, end);
2914 return ret; 2941 return ret;
2915} 2942}
2916 2943
@@ -2990,8 +3017,7 @@ static int relocate_file_extent_cluster(struct inode *inode,
2990 page_start = (u64)page->index << PAGE_CACHE_SHIFT; 3017 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
2991 page_end = page_start + PAGE_CACHE_SIZE - 1; 3018 page_end = page_start + PAGE_CACHE_SIZE - 1;
2992 3019
2993 lock_extent(&BTRFS_I(inode)->io_tree, 3020 lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end);
2994 page_start, page_end, GFP_NOFS);
2995 3021
2996 set_page_extent_mapped(page); 3022 set_page_extent_mapped(page);
2997 3023
@@ -3007,7 +3033,7 @@ static int relocate_file_extent_cluster(struct inode *inode,
3007 set_page_dirty(page); 3033 set_page_dirty(page);
3008 3034
3009 unlock_extent(&BTRFS_I(inode)->io_tree, 3035 unlock_extent(&BTRFS_I(inode)->io_tree,
3010 page_start, page_end, GFP_NOFS); 3036 page_start, page_end);
3011 unlock_page(page); 3037 unlock_page(page);
3012 page_cache_release(page); 3038 page_cache_release(page);
3013 3039
@@ -3154,7 +3180,8 @@ static int add_tree_block(struct reloc_control *rc,
3154 block->key_ready = 0; 3180 block->key_ready = 0;
3155 3181
3156 rb_node = tree_insert(blocks, block->bytenr, &block->rb_node); 3182 rb_node = tree_insert(blocks, block->bytenr, &block->rb_node);
3157 BUG_ON(rb_node); 3183 if (rb_node)
3184 backref_tree_panic(rb_node, -EEXIST, block->bytenr);
3158 3185
3159 return 0; 3186 return 0;
3160} 3187}
@@ -3426,7 +3453,9 @@ static int find_data_references(struct reloc_control *rc,
3426 block->key_ready = 1; 3453 block->key_ready = 1;
3427 rb_node = tree_insert(blocks, block->bytenr, 3454 rb_node = tree_insert(blocks, block->bytenr,
3428 &block->rb_node); 3455 &block->rb_node);
3429 BUG_ON(rb_node); 3456 if (rb_node)
3457 backref_tree_panic(rb_node, -EEXIST,
3458 block->bytenr);
3430 } 3459 }
3431 if (counted) 3460 if (counted)
3432 added = 1; 3461 added = 1;
@@ -4073,10 +4102,11 @@ out:
4073static noinline_for_stack int mark_garbage_root(struct btrfs_root *root) 4102static noinline_for_stack int mark_garbage_root(struct btrfs_root *root)
4074{ 4103{
4075 struct btrfs_trans_handle *trans; 4104 struct btrfs_trans_handle *trans;
4076 int ret; 4105 int ret, err;
4077 4106
4078 trans = btrfs_start_transaction(root->fs_info->tree_root, 0); 4107 trans = btrfs_start_transaction(root->fs_info->tree_root, 0);
4079 BUG_ON(IS_ERR(trans)); 4108 if (IS_ERR(trans))
4109 return PTR_ERR(trans);
4080 4110
4081 memset(&root->root_item.drop_progress, 0, 4111 memset(&root->root_item.drop_progress, 0,
4082 sizeof(root->root_item.drop_progress)); 4112 sizeof(root->root_item.drop_progress));
@@ -4084,11 +4114,11 @@ static noinline_for_stack int mark_garbage_root(struct btrfs_root *root)
4084 btrfs_set_root_refs(&root->root_item, 0); 4114 btrfs_set_root_refs(&root->root_item, 0);
4085 ret = btrfs_update_root(trans, root->fs_info->tree_root, 4115 ret = btrfs_update_root(trans, root->fs_info->tree_root,
4086 &root->root_key, &root->root_item); 4116 &root->root_key, &root->root_item);
4087 BUG_ON(ret);
4088 4117
4089 ret = btrfs_end_transaction(trans, root->fs_info->tree_root); 4118 err = btrfs_end_transaction(trans, root->fs_info->tree_root);
4090 BUG_ON(ret); 4119 if (err)
4091 return 0; 4120 return err;
4121 return ret;
4092} 4122}
4093 4123
4094/* 4124/*
@@ -4156,7 +4186,11 @@ int btrfs_recover_relocation(struct btrfs_root *root)
4156 err = ret; 4186 err = ret;
4157 goto out; 4187 goto out;
4158 } 4188 }
4159 mark_garbage_root(reloc_root); 4189 ret = mark_garbage_root(reloc_root);
4190 if (ret < 0) {
4191 err = ret;
4192 goto out;
4193 }
4160 } 4194 }
4161 } 4195 }
4162 4196
@@ -4202,13 +4236,19 @@ int btrfs_recover_relocation(struct btrfs_root *root)
4202 4236
4203 fs_root = read_fs_root(root->fs_info, 4237 fs_root = read_fs_root(root->fs_info,
4204 reloc_root->root_key.offset); 4238 reloc_root->root_key.offset);
4205 BUG_ON(IS_ERR(fs_root)); 4239 if (IS_ERR(fs_root)) {
4240 err = PTR_ERR(fs_root);
4241 goto out_free;
4242 }
4206 4243
4207 __add_reloc_root(reloc_root); 4244 err = __add_reloc_root(reloc_root);
4245 BUG_ON(err < 0); /* -ENOMEM or logic error */
4208 fs_root->reloc_root = reloc_root; 4246 fs_root->reloc_root = reloc_root;
4209 } 4247 }
4210 4248
4211 btrfs_commit_transaction(trans, rc->extent_root); 4249 err = btrfs_commit_transaction(trans, rc->extent_root);
4250 if (err)
4251 goto out_free;
4212 4252
4213 merge_reloc_roots(rc); 4253 merge_reloc_roots(rc);
4214 4254
@@ -4218,7 +4258,7 @@ int btrfs_recover_relocation(struct btrfs_root *root)
4218 if (IS_ERR(trans)) 4258 if (IS_ERR(trans))
4219 err = PTR_ERR(trans); 4259 err = PTR_ERR(trans);
4220 else 4260 else
4221 btrfs_commit_transaction(trans, rc->extent_root); 4261 err = btrfs_commit_transaction(trans, rc->extent_root);
4222out_free: 4262out_free:
4223 kfree(rc); 4263 kfree(rc);
4224out: 4264out:
@@ -4267,6 +4307,8 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)
4267 disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt; 4307 disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt;
4268 ret = btrfs_lookup_csums_range(root->fs_info->csum_root, disk_bytenr, 4308 ret = btrfs_lookup_csums_range(root->fs_info->csum_root, disk_bytenr,
4269 disk_bytenr + len - 1, &list, 0); 4309 disk_bytenr + len - 1, &list, 0);
4310 if (ret)
4311 goto out;
4270 4312
4271 while (!list_empty(&list)) { 4313 while (!list_empty(&list)) {
4272 sums = list_entry(list.next, struct btrfs_ordered_sum, list); 4314 sums = list_entry(list.next, struct btrfs_ordered_sum, list);
@@ -4284,6 +4326,7 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)
4284 4326
4285 btrfs_add_ordered_sum(inode, ordered, sums); 4327 btrfs_add_ordered_sum(inode, ordered, sums);
4286 } 4328 }
4329out:
4287 btrfs_put_ordered_extent(ordered); 4330 btrfs_put_ordered_extent(ordered);
4288 return ret; 4331 return ret;
4289} 4332}
@@ -4380,7 +4423,7 @@ void btrfs_reloc_pre_snapshot(struct btrfs_trans_handle *trans,
4380 * called after snapshot is created. migrate block reservation 4423 * called after snapshot is created. migrate block reservation
4381 * and create reloc root for the newly created snapshot 4424 * and create reloc root for the newly created snapshot
4382 */ 4425 */
4383void btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans, 4426int btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans,
4384 struct btrfs_pending_snapshot *pending) 4427 struct btrfs_pending_snapshot *pending)
4385{ 4428{
4386 struct btrfs_root *root = pending->root; 4429 struct btrfs_root *root = pending->root;
@@ -4390,7 +4433,7 @@ void btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans,
4390 int ret; 4433 int ret;
4391 4434
4392 if (!root->reloc_root) 4435 if (!root->reloc_root)
4393 return; 4436 return 0;
4394 4437
4395 rc = root->fs_info->reloc_ctl; 4438 rc = root->fs_info->reloc_ctl;
4396 rc->merging_rsv_size += rc->nodes_relocated; 4439 rc->merging_rsv_size += rc->nodes_relocated;
@@ -4399,18 +4442,21 @@ void btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans,
4399 ret = btrfs_block_rsv_migrate(&pending->block_rsv, 4442 ret = btrfs_block_rsv_migrate(&pending->block_rsv,
4400 rc->block_rsv, 4443 rc->block_rsv,
4401 rc->nodes_relocated); 4444 rc->nodes_relocated);
4402 BUG_ON(ret); 4445 if (ret)
4446 return ret;
4403 } 4447 }
4404 4448
4405 new_root = pending->snap; 4449 new_root = pending->snap;
4406 reloc_root = create_reloc_root(trans, root->reloc_root, 4450 reloc_root = create_reloc_root(trans, root->reloc_root,
4407 new_root->root_key.objectid); 4451 new_root->root_key.objectid);
4452 if (IS_ERR(reloc_root))
4453 return PTR_ERR(reloc_root);
4408 4454
4409 __add_reloc_root(reloc_root); 4455 ret = __add_reloc_root(reloc_root);
4456 BUG_ON(ret < 0);
4410 new_root->reloc_root = reloc_root; 4457 new_root->reloc_root = reloc_root;
4411 4458
4412 if (rc->create_reloc_tree) { 4459 if (rc->create_reloc_tree)
4413 ret = clone_backref_node(trans, rc, root, reloc_root); 4460 ret = clone_backref_node(trans, rc, root, reloc_root);
4414 BUG_ON(ret); 4461 return ret;
4415 }
4416} 4462}
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index f4099904565a..24fb8ce4e071 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -93,10 +93,14 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root
93 unsigned long ptr; 93 unsigned long ptr;
94 94
95 path = btrfs_alloc_path(); 95 path = btrfs_alloc_path();
96 BUG_ON(!path); 96 if (!path)
97 return -ENOMEM;
98
97 ret = btrfs_search_slot(trans, root, key, path, 0, 1); 99 ret = btrfs_search_slot(trans, root, key, path, 0, 1);
98 if (ret < 0) 100 if (ret < 0) {
101 btrfs_abort_transaction(trans, root, ret);
99 goto out; 102 goto out;
103 }
100 104
101 if (ret != 0) { 105 if (ret != 0) {
102 btrfs_print_leaf(root, path->nodes[0]); 106 btrfs_print_leaf(root, path->nodes[0]);
@@ -116,13 +120,10 @@ out:
116 return ret; 120 return ret;
117} 121}
118 122
119int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root 123int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
120 *root, struct btrfs_key *key, struct btrfs_root_item 124 struct btrfs_key *key, struct btrfs_root_item *item)
121 *item)
122{ 125{
123 int ret; 126 return btrfs_insert_item(trans, root, key, item, sizeof(*item));
124 ret = btrfs_insert_item(trans, root, key, item, sizeof(*item));
125 return ret;
126} 127}
127 128
128/* 129/*
@@ -384,6 +385,8 @@ int btrfs_find_root_ref(struct btrfs_root *tree_root,
384 * 385 *
385 * For a back ref the root_id is the id of the subvol or snapshot and 386 * For a back ref the root_id is the id of the subvol or snapshot and
386 * ref_id is the id of the tree referencing it. 387 * ref_id is the id of the tree referencing it.
388 *
389 * Will return 0, -ENOMEM, or anything from the CoW path
387 */ 390 */
388int btrfs_add_root_ref(struct btrfs_trans_handle *trans, 391int btrfs_add_root_ref(struct btrfs_trans_handle *trans,
389 struct btrfs_root *tree_root, 392 struct btrfs_root *tree_root,
@@ -407,7 +410,11 @@ int btrfs_add_root_ref(struct btrfs_trans_handle *trans,
407again: 410again:
408 ret = btrfs_insert_empty_item(trans, tree_root, path, &key, 411 ret = btrfs_insert_empty_item(trans, tree_root, path, &key,
409 sizeof(*ref) + name_len); 412 sizeof(*ref) + name_len);
410 BUG_ON(ret); 413 if (ret) {
414 btrfs_abort_transaction(trans, tree_root, ret);
415 btrfs_free_path(path);
416 return ret;
417 }
411 418
412 leaf = path->nodes[0]; 419 leaf = path->nodes[0];
413 ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref); 420 ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref);
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 390e7102b0ff..90acc82046c3 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -36,37 +36,30 @@
36 * Future enhancements: 36 * Future enhancements:
37 * - In case an unrepairable extent is encountered, track which files are 37 * - In case an unrepairable extent is encountered, track which files are
38 * affected and report them 38 * affected and report them
39 * - In case of a read error on files with nodatasum, map the file and read
40 * the extent to trigger a writeback of the good copy
41 * - track and record media errors, throw out bad devices 39 * - track and record media errors, throw out bad devices
42 * - add a mode to also read unallocated space 40 * - add a mode to also read unallocated space
43 */ 41 */
44 42
45struct scrub_bio; 43struct scrub_block;
46struct scrub_page;
47struct scrub_dev; 44struct scrub_dev;
48static void scrub_bio_end_io(struct bio *bio, int err);
49static void scrub_checksum(struct btrfs_work *work);
50static int scrub_checksum_data(struct scrub_dev *sdev,
51 struct scrub_page *spag, void *buffer);
52static int scrub_checksum_tree_block(struct scrub_dev *sdev,
53 struct scrub_page *spag, u64 logical,
54 void *buffer);
55static int scrub_checksum_super(struct scrub_bio *sbio, void *buffer);
56static int scrub_fixup_check(struct scrub_bio *sbio, int ix);
57static void scrub_fixup_end_io(struct bio *bio, int err);
58static int scrub_fixup_io(int rw, struct block_device *bdev, sector_t sector,
59 struct page *page);
60static void scrub_fixup(struct scrub_bio *sbio, int ix);
61 45
62#define SCRUB_PAGES_PER_BIO 16 /* 64k per bio */ 46#define SCRUB_PAGES_PER_BIO 16 /* 64k per bio */
63#define SCRUB_BIOS_PER_DEV 16 /* 1 MB per device in flight */ 47#define SCRUB_BIOS_PER_DEV 16 /* 1 MB per device in flight */
48#define SCRUB_MAX_PAGES_PER_BLOCK 16 /* 64k per node/leaf/sector */
64 49
65struct scrub_page { 50struct scrub_page {
51 struct scrub_block *sblock;
52 struct page *page;
53 struct block_device *bdev;
66 u64 flags; /* extent flags */ 54 u64 flags; /* extent flags */
67 u64 generation; 55 u64 generation;
68 int mirror_num; 56 u64 logical;
69 int have_csum; 57 u64 physical;
58 struct {
59 unsigned int mirror_num:8;
60 unsigned int have_csum:1;
61 unsigned int io_error:1;
62 };
70 u8 csum[BTRFS_CSUM_SIZE]; 63 u8 csum[BTRFS_CSUM_SIZE];
71}; 64};
72 65
@@ -77,12 +70,25 @@ struct scrub_bio {
77 int err; 70 int err;
78 u64 logical; 71 u64 logical;
79 u64 physical; 72 u64 physical;
80 struct scrub_page spag[SCRUB_PAGES_PER_BIO]; 73 struct scrub_page *pagev[SCRUB_PAGES_PER_BIO];
81 u64 count; 74 int page_count;
82 int next_free; 75 int next_free;
83 struct btrfs_work work; 76 struct btrfs_work work;
84}; 77};
85 78
79struct scrub_block {
80 struct scrub_page pagev[SCRUB_MAX_PAGES_PER_BLOCK];
81 int page_count;
82 atomic_t outstanding_pages;
83 atomic_t ref_count; /* free mem on transition to zero */
84 struct scrub_dev *sdev;
85 struct {
86 unsigned int header_error:1;
87 unsigned int checksum_error:1;
88 unsigned int no_io_error_seen:1;
89 };
90};
91
86struct scrub_dev { 92struct scrub_dev {
87 struct scrub_bio *bios[SCRUB_BIOS_PER_DEV]; 93 struct scrub_bio *bios[SCRUB_BIOS_PER_DEV];
88 struct btrfs_device *dev; 94 struct btrfs_device *dev;
@@ -96,6 +102,10 @@ struct scrub_dev {
96 struct list_head csum_list; 102 struct list_head csum_list;
97 atomic_t cancel_req; 103 atomic_t cancel_req;
98 int readonly; 104 int readonly;
105 int pages_per_bio; /* <= SCRUB_PAGES_PER_BIO */
106 u32 sectorsize;
107 u32 nodesize;
108 u32 leafsize;
99 /* 109 /*
100 * statistics 110 * statistics
101 */ 111 */
@@ -124,6 +134,43 @@ struct scrub_warning {
124 int scratch_bufsize; 134 int scratch_bufsize;
125}; 135};
126 136
137
138static int scrub_handle_errored_block(struct scrub_block *sblock_to_check);
139static int scrub_setup_recheck_block(struct scrub_dev *sdev,
140 struct btrfs_mapping_tree *map_tree,
141 u64 length, u64 logical,
142 struct scrub_block *sblock);
143static int scrub_recheck_block(struct btrfs_fs_info *fs_info,
144 struct scrub_block *sblock, int is_metadata,
145 int have_csum, u8 *csum, u64 generation,
146 u16 csum_size);
147static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info,
148 struct scrub_block *sblock,
149 int is_metadata, int have_csum,
150 const u8 *csum, u64 generation,
151 u16 csum_size);
152static void scrub_complete_bio_end_io(struct bio *bio, int err);
153static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
154 struct scrub_block *sblock_good,
155 int force_write);
156static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
157 struct scrub_block *sblock_good,
158 int page_num, int force_write);
159static int scrub_checksum_data(struct scrub_block *sblock);
160static int scrub_checksum_tree_block(struct scrub_block *sblock);
161static int scrub_checksum_super(struct scrub_block *sblock);
162static void scrub_block_get(struct scrub_block *sblock);
163static void scrub_block_put(struct scrub_block *sblock);
164static int scrub_add_page_to_bio(struct scrub_dev *sdev,
165 struct scrub_page *spage);
166static int scrub_pages(struct scrub_dev *sdev, u64 logical, u64 len,
167 u64 physical, u64 flags, u64 gen, int mirror_num,
168 u8 *csum, int force);
169static void scrub_bio_end_io(struct bio *bio, int err);
170static void scrub_bio_end_io_worker(struct btrfs_work *work);
171static void scrub_block_complete(struct scrub_block *sblock);
172
173
127static void scrub_free_csums(struct scrub_dev *sdev) 174static void scrub_free_csums(struct scrub_dev *sdev)
128{ 175{
129 while (!list_empty(&sdev->csum_list)) { 176 while (!list_empty(&sdev->csum_list)) {
@@ -135,23 +182,6 @@ static void scrub_free_csums(struct scrub_dev *sdev)
135 } 182 }
136} 183}
137 184
138static void scrub_free_bio(struct bio *bio)
139{
140 int i;
141 struct page *last_page = NULL;
142
143 if (!bio)
144 return;
145
146 for (i = 0; i < bio->bi_vcnt; ++i) {
147 if (bio->bi_io_vec[i].bv_page == last_page)
148 continue;
149 last_page = bio->bi_io_vec[i].bv_page;
150 __free_page(last_page);
151 }
152 bio_put(bio);
153}
154
155static noinline_for_stack void scrub_free_dev(struct scrub_dev *sdev) 185static noinline_for_stack void scrub_free_dev(struct scrub_dev *sdev)
156{ 186{
157 int i; 187 int i;
@@ -159,13 +189,23 @@ static noinline_for_stack void scrub_free_dev(struct scrub_dev *sdev)
159 if (!sdev) 189 if (!sdev)
160 return; 190 return;
161 191
192 /* this can happen when scrub is cancelled */
193 if (sdev->curr != -1) {
194 struct scrub_bio *sbio = sdev->bios[sdev->curr];
195
196 for (i = 0; i < sbio->page_count; i++) {
197 BUG_ON(!sbio->pagev[i]);
198 BUG_ON(!sbio->pagev[i]->page);
199 scrub_block_put(sbio->pagev[i]->sblock);
200 }
201 bio_put(sbio->bio);
202 }
203
162 for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) { 204 for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) {
163 struct scrub_bio *sbio = sdev->bios[i]; 205 struct scrub_bio *sbio = sdev->bios[i];
164 206
165 if (!sbio) 207 if (!sbio)
166 break; 208 break;
167
168 scrub_free_bio(sbio->bio);
169 kfree(sbio); 209 kfree(sbio);
170 } 210 }
171 211
@@ -179,11 +219,16 @@ struct scrub_dev *scrub_setup_dev(struct btrfs_device *dev)
179 struct scrub_dev *sdev; 219 struct scrub_dev *sdev;
180 int i; 220 int i;
181 struct btrfs_fs_info *fs_info = dev->dev_root->fs_info; 221 struct btrfs_fs_info *fs_info = dev->dev_root->fs_info;
222 int pages_per_bio;
182 223
224 pages_per_bio = min_t(int, SCRUB_PAGES_PER_BIO,
225 bio_get_nr_vecs(dev->bdev));
183 sdev = kzalloc(sizeof(*sdev), GFP_NOFS); 226 sdev = kzalloc(sizeof(*sdev), GFP_NOFS);
184 if (!sdev) 227 if (!sdev)
185 goto nomem; 228 goto nomem;
186 sdev->dev = dev; 229 sdev->dev = dev;
230 sdev->pages_per_bio = pages_per_bio;
231 sdev->curr = -1;
187 for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) { 232 for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) {
188 struct scrub_bio *sbio; 233 struct scrub_bio *sbio;
189 234
@@ -194,8 +239,8 @@ struct scrub_dev *scrub_setup_dev(struct btrfs_device *dev)
194 239
195 sbio->index = i; 240 sbio->index = i;
196 sbio->sdev = sdev; 241 sbio->sdev = sdev;
197 sbio->count = 0; 242 sbio->page_count = 0;
198 sbio->work.func = scrub_checksum; 243 sbio->work.func = scrub_bio_end_io_worker;
199 244
200 if (i != SCRUB_BIOS_PER_DEV-1) 245 if (i != SCRUB_BIOS_PER_DEV-1)
201 sdev->bios[i]->next_free = i + 1; 246 sdev->bios[i]->next_free = i + 1;
@@ -203,7 +248,9 @@ struct scrub_dev *scrub_setup_dev(struct btrfs_device *dev)
203 sdev->bios[i]->next_free = -1; 248 sdev->bios[i]->next_free = -1;
204 } 249 }
205 sdev->first_free = 0; 250 sdev->first_free = 0;
206 sdev->curr = -1; 251 sdev->nodesize = dev->dev_root->nodesize;
252 sdev->leafsize = dev->dev_root->leafsize;
253 sdev->sectorsize = dev->dev_root->sectorsize;
207 atomic_set(&sdev->in_flight, 0); 254 atomic_set(&sdev->in_flight, 0);
208 atomic_set(&sdev->fixup_cnt, 0); 255 atomic_set(&sdev->fixup_cnt, 0);
209 atomic_set(&sdev->cancel_req, 0); 256 atomic_set(&sdev->cancel_req, 0);
@@ -294,10 +341,9 @@ err:
294 return 0; 341 return 0;
295} 342}
296 343
297static void scrub_print_warning(const char *errstr, struct scrub_bio *sbio, 344static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
298 int ix)
299{ 345{
300 struct btrfs_device *dev = sbio->sdev->dev; 346 struct btrfs_device *dev = sblock->sdev->dev;
301 struct btrfs_fs_info *fs_info = dev->dev_root->fs_info; 347 struct btrfs_fs_info *fs_info = dev->dev_root->fs_info;
302 struct btrfs_path *path; 348 struct btrfs_path *path;
303 struct btrfs_key found_key; 349 struct btrfs_key found_key;
@@ -316,8 +362,9 @@ static void scrub_print_warning(const char *errstr, struct scrub_bio *sbio,
316 362
317 swarn.scratch_buf = kmalloc(bufsize, GFP_NOFS); 363 swarn.scratch_buf = kmalloc(bufsize, GFP_NOFS);
318 swarn.msg_buf = kmalloc(bufsize, GFP_NOFS); 364 swarn.msg_buf = kmalloc(bufsize, GFP_NOFS);
319 swarn.sector = (sbio->physical + ix * PAGE_SIZE) >> 9; 365 BUG_ON(sblock->page_count < 1);
320 swarn.logical = sbio->logical + ix * PAGE_SIZE; 366 swarn.sector = (sblock->pagev[0].physical) >> 9;
367 swarn.logical = sblock->pagev[0].logical;
321 swarn.errstr = errstr; 368 swarn.errstr = errstr;
322 swarn.dev = dev; 369 swarn.dev = dev;
323 swarn.msg_bufsize = bufsize; 370 swarn.msg_bufsize = bufsize;
@@ -342,7 +389,8 @@ static void scrub_print_warning(const char *errstr, struct scrub_bio *sbio,
342 do { 389 do {
343 ret = tree_backref_for_extent(&ptr, eb, ei, item_size, 390 ret = tree_backref_for_extent(&ptr, eb, ei, item_size,
344 &ref_root, &ref_level); 391 &ref_root, &ref_level);
345 printk(KERN_WARNING "%s at logical %llu on dev %s, " 392 printk(KERN_WARNING
393 "btrfs: %s at logical %llu on dev %s, "
346 "sector %llu: metadata %s (level %d) in tree " 394 "sector %llu: metadata %s (level %d) in tree "
347 "%llu\n", errstr, swarn.logical, dev->name, 395 "%llu\n", errstr, swarn.logical, dev->name,
348 (unsigned long long)swarn.sector, 396 (unsigned long long)swarn.sector,
@@ -352,8 +400,8 @@ static void scrub_print_warning(const char *errstr, struct scrub_bio *sbio,
352 } while (ret != 1); 400 } while (ret != 1);
353 } else { 401 } else {
354 swarn.path = path; 402 swarn.path = path;
355 iterate_extent_inodes(fs_info, path, found_key.objectid, 403 iterate_extent_inodes(fs_info, found_key.objectid,
356 extent_item_pos, 404 extent_item_pos, 1,
357 scrub_print_warning_inode, &swarn); 405 scrub_print_warning_inode, &swarn);
358 } 406 }
359 407
@@ -531,9 +579,9 @@ out:
531 spin_lock(&sdev->stat_lock); 579 spin_lock(&sdev->stat_lock);
532 ++sdev->stat.uncorrectable_errors; 580 ++sdev->stat.uncorrectable_errors;
533 spin_unlock(&sdev->stat_lock); 581 spin_unlock(&sdev->stat_lock);
534 printk_ratelimited(KERN_ERR "btrfs: unable to fixup " 582 printk_ratelimited(KERN_ERR
535 "(nodatasum) error at logical %llu\n", 583 "btrfs: unable to fixup (nodatasum) error at logical %llu on dev %s\n",
536 fixup->logical); 584 (unsigned long long)fixup->logical, sdev->dev->name);
537 } 585 }
538 586
539 btrfs_free_path(path); 587 btrfs_free_path(path);
@@ -550,91 +598,168 @@ out:
550} 598}
551 599
552/* 600/*
553 * scrub_recheck_error gets called when either verification of the page 601 * scrub_handle_errored_block gets called when either verification of the
554 * failed or the bio failed to read, e.g. with EIO. In the latter case, 602 * pages failed or the bio failed to read, e.g. with EIO. In the latter
555 * recheck_error gets called for every page in the bio, even though only 603 * case, this function handles all pages in the bio, even though only one
556 * one may be bad 604 * may be bad.
605 * The goal of this function is to repair the errored block by using the
606 * contents of one of the mirrors.
557 */ 607 */
558static int scrub_recheck_error(struct scrub_bio *sbio, int ix) 608static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
559{ 609{
560 struct scrub_dev *sdev = sbio->sdev; 610 struct scrub_dev *sdev = sblock_to_check->sdev;
561 u64 sector = (sbio->physical + ix * PAGE_SIZE) >> 9; 611 struct btrfs_fs_info *fs_info;
612 u64 length;
613 u64 logical;
614 u64 generation;
615 unsigned int failed_mirror_index;
616 unsigned int is_metadata;
617 unsigned int have_csum;
618 u8 *csum;
619 struct scrub_block *sblocks_for_recheck; /* holds one for each mirror */
620 struct scrub_block *sblock_bad;
621 int ret;
622 int mirror_index;
623 int page_num;
624 int success;
562 static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL, 625 static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
563 DEFAULT_RATELIMIT_BURST); 626 DEFAULT_RATELIMIT_BURST);
627
628 BUG_ON(sblock_to_check->page_count < 1);
629 fs_info = sdev->dev->dev_root->fs_info;
630 length = sblock_to_check->page_count * PAGE_SIZE;
631 logical = sblock_to_check->pagev[0].logical;
632 generation = sblock_to_check->pagev[0].generation;
633 BUG_ON(sblock_to_check->pagev[0].mirror_num < 1);
634 failed_mirror_index = sblock_to_check->pagev[0].mirror_num - 1;
635 is_metadata = !(sblock_to_check->pagev[0].flags &
636 BTRFS_EXTENT_FLAG_DATA);
637 have_csum = sblock_to_check->pagev[0].have_csum;
638 csum = sblock_to_check->pagev[0].csum;
564 639
565 if (sbio->err) { 640 /*
566 if (scrub_fixup_io(READ, sbio->sdev->dev->bdev, sector, 641 * read all mirrors one after the other. This includes to
567 sbio->bio->bi_io_vec[ix].bv_page) == 0) { 642 * re-read the extent or metadata block that failed (that was
568 if (scrub_fixup_check(sbio, ix) == 0) 643 * the cause that this fixup code is called) another time,
569 return 0; 644 * page by page this time in order to know which pages
570 } 645 * caused I/O errors and which ones are good (for all mirrors).
571 if (__ratelimit(&_rs)) 646 * It is the goal to handle the situation when more than one
572 scrub_print_warning("i/o error", sbio, ix); 647 * mirror contains I/O errors, but the errors do not
573 } else { 648 * overlap, i.e. the data can be repaired by selecting the
574 if (__ratelimit(&_rs)) 649 * pages from those mirrors without I/O error on the
575 scrub_print_warning("checksum error", sbio, ix); 650 * particular pages. One example (with blocks >= 2 * PAGE_SIZE)
651 * would be that mirror #1 has an I/O error on the first page,
652 * the second page is good, and mirror #2 has an I/O error on
653 * the second page, but the first page is good.
654 * Then the first page of the first mirror can be repaired by
655 * taking the first page of the second mirror, and the
656 * second page of the second mirror can be repaired by
657 * copying the contents of the 2nd page of the 1st mirror.
658 * One more note: if the pages of one mirror contain I/O
659 * errors, the checksum cannot be verified. In order to get
660 * the best data for repairing, the first attempt is to find
661 * a mirror without I/O errors and with a validated checksum.
662 * Only if this is not possible, the pages are picked from
663 * mirrors with I/O errors without considering the checksum.
664 * If the latter is the case, at the end, the checksum of the
665 * repaired area is verified in order to correctly maintain
666 * the statistics.
667 */
668
669 sblocks_for_recheck = kzalloc(BTRFS_MAX_MIRRORS *
670 sizeof(*sblocks_for_recheck),
671 GFP_NOFS);
672 if (!sblocks_for_recheck) {
673 spin_lock(&sdev->stat_lock);
674 sdev->stat.malloc_errors++;
675 sdev->stat.read_errors++;
676 sdev->stat.uncorrectable_errors++;
677 spin_unlock(&sdev->stat_lock);
678 goto out;
576 } 679 }
577 680
578 spin_lock(&sdev->stat_lock); 681 /* setup the context, map the logical blocks and alloc the pages */
579 ++sdev->stat.read_errors; 682 ret = scrub_setup_recheck_block(sdev, &fs_info->mapping_tree, length,
580 spin_unlock(&sdev->stat_lock); 683 logical, sblocks_for_recheck);
684 if (ret) {
685 spin_lock(&sdev->stat_lock);
686 sdev->stat.read_errors++;
687 sdev->stat.uncorrectable_errors++;
688 spin_unlock(&sdev->stat_lock);
689 goto out;
690 }
691 BUG_ON(failed_mirror_index >= BTRFS_MAX_MIRRORS);
692 sblock_bad = sblocks_for_recheck + failed_mirror_index;
581 693
582 scrub_fixup(sbio, ix); 694 /* build and submit the bios for the failed mirror, check checksums */
583 return 1; 695 ret = scrub_recheck_block(fs_info, sblock_bad, is_metadata, have_csum,
584} 696 csum, generation, sdev->csum_size);
697 if (ret) {
698 spin_lock(&sdev->stat_lock);
699 sdev->stat.read_errors++;
700 sdev->stat.uncorrectable_errors++;
701 spin_unlock(&sdev->stat_lock);
702 goto out;
703 }
585 704
586static int scrub_fixup_check(struct scrub_bio *sbio, int ix) 705 if (!sblock_bad->header_error && !sblock_bad->checksum_error &&
587{ 706 sblock_bad->no_io_error_seen) {
588 int ret = 1; 707 /*
589 struct page *page; 708 * the error disappeared after reading page by page, or
590 void *buffer; 709 * the area was part of a huge bio and other parts of the
591 u64 flags = sbio->spag[ix].flags; 710 * bio caused I/O errors, or the block layer merged several
711 * read requests into one and the error is caused by a
712 * different bio (usually one of the two latter cases is
713 * the cause)
714 */
715 spin_lock(&sdev->stat_lock);
716 sdev->stat.unverified_errors++;
717 spin_unlock(&sdev->stat_lock);
592 718
593 page = sbio->bio->bi_io_vec[ix].bv_page; 719 goto out;
594 buffer = kmap_atomic(page);
595 if (flags & BTRFS_EXTENT_FLAG_DATA) {
596 ret = scrub_checksum_data(sbio->sdev,
597 sbio->spag + ix, buffer);
598 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
599 ret = scrub_checksum_tree_block(sbio->sdev,
600 sbio->spag + ix,
601 sbio->logical + ix * PAGE_SIZE,
602 buffer);
603 } else {
604 WARN_ON(1);
605 } 720 }
606 kunmap_atomic(buffer);
607 721
608 return ret; 722 if (!sblock_bad->no_io_error_seen) {
609} 723 spin_lock(&sdev->stat_lock);
724 sdev->stat.read_errors++;
725 spin_unlock(&sdev->stat_lock);
726 if (__ratelimit(&_rs))
727 scrub_print_warning("i/o error", sblock_to_check);
728 } else if (sblock_bad->checksum_error) {
729 spin_lock(&sdev->stat_lock);
730 sdev->stat.csum_errors++;
731 spin_unlock(&sdev->stat_lock);
732 if (__ratelimit(&_rs))
733 scrub_print_warning("checksum error", sblock_to_check);
734 } else if (sblock_bad->header_error) {
735 spin_lock(&sdev->stat_lock);
736 sdev->stat.verify_errors++;
737 spin_unlock(&sdev->stat_lock);
738 if (__ratelimit(&_rs))
739 scrub_print_warning("checksum/header error",
740 sblock_to_check);
741 }
610 742
611static void scrub_fixup_end_io(struct bio *bio, int err) 743 if (sdev->readonly)
612{ 744 goto did_not_correct_error;
613 complete((struct completion *)bio->bi_private);
614}
615 745
616static void scrub_fixup(struct scrub_bio *sbio, int ix) 746 if (!is_metadata && !have_csum) {
617{ 747 struct scrub_fixup_nodatasum *fixup_nodatasum;
618 struct scrub_dev *sdev = sbio->sdev; 748
619 struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info; 749 /*
620 struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; 750 * !is_metadata and !have_csum, this means that the data
621 struct btrfs_bio *bbio = NULL; 751 * might not be COW'ed, that it might be modified
622 struct scrub_fixup_nodatasum *fixup; 752 * concurrently. The general strategy to work on the
623 u64 logical = sbio->logical + ix * PAGE_SIZE; 753 * commit root does not help in the case when COW is not
624 u64 length; 754 * used.
625 int i; 755 */
626 int ret; 756 fixup_nodatasum = kzalloc(sizeof(*fixup_nodatasum), GFP_NOFS);
627 DECLARE_COMPLETION_ONSTACK(complete); 757 if (!fixup_nodatasum)
628 758 goto did_not_correct_error;
629 if ((sbio->spag[ix].flags & BTRFS_EXTENT_FLAG_DATA) && 759 fixup_nodatasum->sdev = sdev;
630 (sbio->spag[ix].have_csum == 0)) { 760 fixup_nodatasum->logical = logical;
631 fixup = kzalloc(sizeof(*fixup), GFP_NOFS); 761 fixup_nodatasum->root = fs_info->extent_root;
632 if (!fixup) 762 fixup_nodatasum->mirror_num = failed_mirror_index + 1;
633 goto uncorrectable;
634 fixup->sdev = sdev;
635 fixup->logical = logical;
636 fixup->root = fs_info->extent_root;
637 fixup->mirror_num = sbio->spag[ix].mirror_num;
638 /* 763 /*
639 * increment scrubs_running to prevent cancel requests from 764 * increment scrubs_running to prevent cancel requests from
640 * completing as long as a fixup worker is running. we must also 765 * completing as long as a fixup worker is running. we must also
@@ -649,235 +774,528 @@ static void scrub_fixup(struct scrub_bio *sbio, int ix)
649 atomic_inc(&fs_info->scrubs_paused); 774 atomic_inc(&fs_info->scrubs_paused);
650 mutex_unlock(&fs_info->scrub_lock); 775 mutex_unlock(&fs_info->scrub_lock);
651 atomic_inc(&sdev->fixup_cnt); 776 atomic_inc(&sdev->fixup_cnt);
652 fixup->work.func = scrub_fixup_nodatasum; 777 fixup_nodatasum->work.func = scrub_fixup_nodatasum;
653 btrfs_queue_worker(&fs_info->scrub_workers, &fixup->work); 778 btrfs_queue_worker(&fs_info->scrub_workers,
654 return; 779 &fixup_nodatasum->work);
780 goto out;
655 } 781 }
656 782
657 length = PAGE_SIZE; 783 /*
658 ret = btrfs_map_block(map_tree, REQ_WRITE, logical, &length, 784 * now build and submit the bios for the other mirrors, check
659 &bbio, 0); 785 * checksums
660 if (ret || !bbio || length < PAGE_SIZE) { 786 */
661 printk(KERN_ERR 787 for (mirror_index = 0;
662 "scrub_fixup: btrfs_map_block failed us for %llu\n", 788 mirror_index < BTRFS_MAX_MIRRORS &&
663 (unsigned long long)logical); 789 sblocks_for_recheck[mirror_index].page_count > 0;
664 WARN_ON(1); 790 mirror_index++) {
665 kfree(bbio); 791 if (mirror_index == failed_mirror_index)
666 return; 792 continue;
793
794 /* build and submit the bios, check checksums */
795 ret = scrub_recheck_block(fs_info,
796 sblocks_for_recheck + mirror_index,
797 is_metadata, have_csum, csum,
798 generation, sdev->csum_size);
799 if (ret)
800 goto did_not_correct_error;
667 } 801 }
668 802
669 if (bbio->num_stripes == 1) 803 /*
670 /* there aren't any replicas */ 804 * first try to pick the mirror which is completely without I/O
671 goto uncorrectable; 805 * errors and also does not have a checksum error.
806 * If one is found, and if a checksum is present, the full block
807 * that is known to contain an error is rewritten. Afterwards
808 * the block is known to be corrected.
809 * If a mirror is found which is completely correct, and no
810 * checksum is present, only those pages are rewritten that had
811 * an I/O error in the block to be repaired, since it cannot be
812 * determined, which copy of the other pages is better (and it
813 * could happen otherwise that a correct page would be
814 * overwritten by a bad one).
815 */
816 for (mirror_index = 0;
817 mirror_index < BTRFS_MAX_MIRRORS &&
818 sblocks_for_recheck[mirror_index].page_count > 0;
819 mirror_index++) {
820 struct scrub_block *sblock_other = sblocks_for_recheck +
821 mirror_index;
822
823 if (!sblock_other->header_error &&
824 !sblock_other->checksum_error &&
825 sblock_other->no_io_error_seen) {
826 int force_write = is_metadata || have_csum;
827
828 ret = scrub_repair_block_from_good_copy(sblock_bad,
829 sblock_other,
830 force_write);
831 if (0 == ret)
832 goto corrected_error;
833 }
834 }
672 835
673 /* 836 /*
674 * first find a good copy 837 * in case of I/O errors in the area that is supposed to be
838 * repaired, continue by picking good copies of those pages.
839 * Select the good pages from mirrors to rewrite bad pages from
840 * the area to fix. Afterwards verify the checksum of the block
841 * that is supposed to be repaired. This verification step is
842 * only done for the purpose of statistic counting and for the
843 * final scrub report, whether errors remain.
844 * A perfect algorithm could make use of the checksum and try
845 * all possible combinations of pages from the different mirrors
846 * until the checksum verification succeeds. For example, when
847 * the 2nd page of mirror #1 faces I/O errors, and the 2nd page
848 * of mirror #2 is readable but the final checksum test fails,
849 * then the 2nd page of mirror #3 could be tried, whether now
850 * the final checksum succeedes. But this would be a rare
851 * exception and is therefore not implemented. At least it is
852 * avoided that the good copy is overwritten.
853 * A more useful improvement would be to pick the sectors
854 * without I/O error based on sector sizes (512 bytes on legacy
855 * disks) instead of on PAGE_SIZE. Then maybe 512 byte of one
856 * mirror could be repaired by taking 512 byte of a different
857 * mirror, even if other 512 byte sectors in the same PAGE_SIZE
858 * area are unreadable.
675 */ 859 */
676 for (i = 0; i < bbio->num_stripes; ++i) {
677 if (i + 1 == sbio->spag[ix].mirror_num)
678 continue;
679 860
680 if (scrub_fixup_io(READ, bbio->stripes[i].dev->bdev, 861 /* can only fix I/O errors from here on */
681 bbio->stripes[i].physical >> 9, 862 if (sblock_bad->no_io_error_seen)
682 sbio->bio->bi_io_vec[ix].bv_page)) { 863 goto did_not_correct_error;
683 /* I/O-error, this is not a good copy */ 864
865 success = 1;
866 for (page_num = 0; page_num < sblock_bad->page_count; page_num++) {
867 struct scrub_page *page_bad = sblock_bad->pagev + page_num;
868
869 if (!page_bad->io_error)
684 continue; 870 continue;
871
872 for (mirror_index = 0;
873 mirror_index < BTRFS_MAX_MIRRORS &&
874 sblocks_for_recheck[mirror_index].page_count > 0;
875 mirror_index++) {
876 struct scrub_block *sblock_other = sblocks_for_recheck +
877 mirror_index;
878 struct scrub_page *page_other = sblock_other->pagev +
879 page_num;
880
881 if (!page_other->io_error) {
882 ret = scrub_repair_page_from_good_copy(
883 sblock_bad, sblock_other, page_num, 0);
884 if (0 == ret) {
885 page_bad->io_error = 0;
886 break; /* succeeded for this page */
887 }
888 }
685 } 889 }
686 890
687 if (scrub_fixup_check(sbio, ix) == 0) 891 if (page_bad->io_error) {
688 break; 892 /* did not find a mirror to copy the page from */
893 success = 0;
894 }
689 } 895 }
690 if (i == bbio->num_stripes)
691 goto uncorrectable;
692 896
693 if (!sdev->readonly) { 897 if (success) {
694 /* 898 if (is_metadata || have_csum) {
695 * bi_io_vec[ix].bv_page now contains good data, write it back 899 /*
696 */ 900 * need to verify the checksum now that all
697 if (scrub_fixup_io(WRITE, sdev->dev->bdev, 901 * sectors on disk are repaired (the write
698 (sbio->physical + ix * PAGE_SIZE) >> 9, 902 * request for data to be repaired is on its way).
699 sbio->bio->bi_io_vec[ix].bv_page)) { 903 * Just be lazy and use scrub_recheck_block()
700 /* I/O-error, writeback failed, give up */ 904 * which re-reads the data before the checksum
701 goto uncorrectable; 905 * is verified, but most likely the data comes out
906 * of the page cache.
907 */
908 ret = scrub_recheck_block(fs_info, sblock_bad,
909 is_metadata, have_csum, csum,
910 generation, sdev->csum_size);
911 if (!ret && !sblock_bad->header_error &&
912 !sblock_bad->checksum_error &&
913 sblock_bad->no_io_error_seen)
914 goto corrected_error;
915 else
916 goto did_not_correct_error;
917 } else {
918corrected_error:
919 spin_lock(&sdev->stat_lock);
920 sdev->stat.corrected_errors++;
921 spin_unlock(&sdev->stat_lock);
922 printk_ratelimited(KERN_ERR
923 "btrfs: fixed up error at logical %llu on dev %s\n",
924 (unsigned long long)logical, sdev->dev->name);
702 } 925 }
926 } else {
927did_not_correct_error:
928 spin_lock(&sdev->stat_lock);
929 sdev->stat.uncorrectable_errors++;
930 spin_unlock(&sdev->stat_lock);
931 printk_ratelimited(KERN_ERR
932 "btrfs: unable to fixup (regular) error at logical %llu on dev %s\n",
933 (unsigned long long)logical, sdev->dev->name);
703 } 934 }
704 935
705 kfree(bbio); 936out:
706 spin_lock(&sdev->stat_lock); 937 if (sblocks_for_recheck) {
707 ++sdev->stat.corrected_errors; 938 for (mirror_index = 0; mirror_index < BTRFS_MAX_MIRRORS;
708 spin_unlock(&sdev->stat_lock); 939 mirror_index++) {
940 struct scrub_block *sblock = sblocks_for_recheck +
941 mirror_index;
942 int page_index;
943
944 for (page_index = 0; page_index < SCRUB_PAGES_PER_BIO;
945 page_index++)
946 if (sblock->pagev[page_index].page)
947 __free_page(
948 sblock->pagev[page_index].page);
949 }
950 kfree(sblocks_for_recheck);
951 }
709 952
710 printk_ratelimited(KERN_ERR "btrfs: fixed up error at logical %llu\n", 953 return 0;
711 (unsigned long long)logical); 954}
712 return;
713 955
714uncorrectable: 956static int scrub_setup_recheck_block(struct scrub_dev *sdev,
715 kfree(bbio); 957 struct btrfs_mapping_tree *map_tree,
716 spin_lock(&sdev->stat_lock); 958 u64 length, u64 logical,
717 ++sdev->stat.uncorrectable_errors; 959 struct scrub_block *sblocks_for_recheck)
718 spin_unlock(&sdev->stat_lock); 960{
961 int page_index;
962 int mirror_index;
963 int ret;
964
965 /*
966 * note: the three members sdev, ref_count and outstanding_pages
967 * are not used (and not set) in the blocks that are used for
968 * the recheck procedure
969 */
970
971 page_index = 0;
972 while (length > 0) {
973 u64 sublen = min_t(u64, length, PAGE_SIZE);
974 u64 mapped_length = sublen;
975 struct btrfs_bio *bbio = NULL;
976
977 /*
978 * with a length of PAGE_SIZE, each returned stripe
979 * represents one mirror
980 */
981 ret = btrfs_map_block(map_tree, WRITE, logical, &mapped_length,
982 &bbio, 0);
983 if (ret || !bbio || mapped_length < sublen) {
984 kfree(bbio);
985 return -EIO;
986 }
719 987
720 printk_ratelimited(KERN_ERR "btrfs: unable to fixup (regular) error at " 988 BUG_ON(page_index >= SCRUB_PAGES_PER_BIO);
721 "logical %llu\n", (unsigned long long)logical); 989 for (mirror_index = 0; mirror_index < (int)bbio->num_stripes;
990 mirror_index++) {
991 struct scrub_block *sblock;
992 struct scrub_page *page;
993
994 if (mirror_index >= BTRFS_MAX_MIRRORS)
995 continue;
996
997 sblock = sblocks_for_recheck + mirror_index;
998 page = sblock->pagev + page_index;
999 page->logical = logical;
1000 page->physical = bbio->stripes[mirror_index].physical;
1001 page->bdev = bbio->stripes[mirror_index].dev->bdev;
1002 page->mirror_num = mirror_index + 1;
1003 page->page = alloc_page(GFP_NOFS);
1004 if (!page->page) {
1005 spin_lock(&sdev->stat_lock);
1006 sdev->stat.malloc_errors++;
1007 spin_unlock(&sdev->stat_lock);
1008 return -ENOMEM;
1009 }
1010 sblock->page_count++;
1011 }
1012 kfree(bbio);
1013 length -= sublen;
1014 logical += sublen;
1015 page_index++;
1016 }
1017
1018 return 0;
722} 1019}
723 1020
724static int scrub_fixup_io(int rw, struct block_device *bdev, sector_t sector, 1021/*
725 struct page *page) 1022 * this function will check the on disk data for checksum errors, header
1023 * errors and read I/O errors. If any I/O errors happen, the exact pages
1024 * which are errored are marked as being bad. The goal is to enable scrub
1025 * to take those pages that are not errored from all the mirrors so that
1026 * the pages that are errored in the just handled mirror can be repaired.
1027 */
1028static int scrub_recheck_block(struct btrfs_fs_info *fs_info,
1029 struct scrub_block *sblock, int is_metadata,
1030 int have_csum, u8 *csum, u64 generation,
1031 u16 csum_size)
726{ 1032{
727 struct bio *bio = NULL; 1033 int page_num;
728 int ret; 1034
729 DECLARE_COMPLETION_ONSTACK(complete); 1035 sblock->no_io_error_seen = 1;
1036 sblock->header_error = 0;
1037 sblock->checksum_error = 0;
1038
1039 for (page_num = 0; page_num < sblock->page_count; page_num++) {
1040 struct bio *bio;
1041 int ret;
1042 struct scrub_page *page = sblock->pagev + page_num;
1043 DECLARE_COMPLETION_ONSTACK(complete);
1044
1045 BUG_ON(!page->page);
1046 bio = bio_alloc(GFP_NOFS, 1);
1047 bio->bi_bdev = page->bdev;
1048 bio->bi_sector = page->physical >> 9;
1049 bio->bi_end_io = scrub_complete_bio_end_io;
1050 bio->bi_private = &complete;
1051
1052 ret = bio_add_page(bio, page->page, PAGE_SIZE, 0);
1053 if (PAGE_SIZE != ret) {
1054 bio_put(bio);
1055 return -EIO;
1056 }
1057 btrfsic_submit_bio(READ, bio);
730 1058
731 bio = bio_alloc(GFP_NOFS, 1); 1059 /* this will also unplug the queue */
732 bio->bi_bdev = bdev; 1060 wait_for_completion(&complete);
733 bio->bi_sector = sector;
734 bio_add_page(bio, page, PAGE_SIZE, 0);
735 bio->bi_end_io = scrub_fixup_end_io;
736 bio->bi_private = &complete;
737 btrfsic_submit_bio(rw, bio);
738 1061
739 /* this will also unplug the queue */ 1062 page->io_error = !test_bit(BIO_UPTODATE, &bio->bi_flags);
740 wait_for_completion(&complete); 1063 if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
1064 sblock->no_io_error_seen = 0;
1065 bio_put(bio);
1066 }
741 1067
742 ret = !test_bit(BIO_UPTODATE, &bio->bi_flags); 1068 if (sblock->no_io_error_seen)
743 bio_put(bio); 1069 scrub_recheck_block_checksum(fs_info, sblock, is_metadata,
744 return ret; 1070 have_csum, csum, generation,
1071 csum_size);
1072
1073 return 0;
745} 1074}
746 1075
747static void scrub_bio_end_io(struct bio *bio, int err) 1076static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info,
1077 struct scrub_block *sblock,
1078 int is_metadata, int have_csum,
1079 const u8 *csum, u64 generation,
1080 u16 csum_size)
748{ 1081{
749 struct scrub_bio *sbio = bio->bi_private; 1082 int page_num;
750 struct scrub_dev *sdev = sbio->sdev; 1083 u8 calculated_csum[BTRFS_CSUM_SIZE];
751 struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info; 1084 u32 crc = ~(u32)0;
1085 struct btrfs_root *root = fs_info->extent_root;
1086 void *mapped_buffer;
1087
1088 BUG_ON(!sblock->pagev[0].page);
1089 if (is_metadata) {
1090 struct btrfs_header *h;
1091
1092 mapped_buffer = kmap_atomic(sblock->pagev[0].page);
1093 h = (struct btrfs_header *)mapped_buffer;
1094
1095 if (sblock->pagev[0].logical != le64_to_cpu(h->bytenr) ||
1096 generation != le64_to_cpu(h->generation) ||
1097 memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE) ||
1098 memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid,
1099 BTRFS_UUID_SIZE))
1100 sblock->header_error = 1;
1101 csum = h->csum;
1102 } else {
1103 if (!have_csum)
1104 return;
752 1105
753 sbio->err = err; 1106 mapped_buffer = kmap_atomic(sblock->pagev[0].page);
754 sbio->bio = bio; 1107 }
755 1108
756 btrfs_queue_worker(&fs_info->scrub_workers, &sbio->work); 1109 for (page_num = 0;;) {
1110 if (page_num == 0 && is_metadata)
1111 crc = btrfs_csum_data(root,
1112 ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE,
1113 crc, PAGE_SIZE - BTRFS_CSUM_SIZE);
1114 else
1115 crc = btrfs_csum_data(root, mapped_buffer, crc,
1116 PAGE_SIZE);
1117
1118 kunmap_atomic(mapped_buffer);
1119 page_num++;
1120 if (page_num >= sblock->page_count)
1121 break;
1122 BUG_ON(!sblock->pagev[page_num].page);
1123
1124 mapped_buffer = kmap_atomic(sblock->pagev[page_num].page);
1125 }
1126
1127 btrfs_csum_final(crc, calculated_csum);
1128 if (memcmp(calculated_csum, csum, csum_size))
1129 sblock->checksum_error = 1;
757} 1130}
758 1131
759static void scrub_checksum(struct btrfs_work *work) 1132static void scrub_complete_bio_end_io(struct bio *bio, int err)
760{ 1133{
761 struct scrub_bio *sbio = container_of(work, struct scrub_bio, work); 1134 complete((struct completion *)bio->bi_private);
762 struct scrub_dev *sdev = sbio->sdev; 1135}
763 struct page *page;
764 void *buffer;
765 int i;
766 u64 flags;
767 u64 logical;
768 int ret;
769 1136
770 if (sbio->err) { 1137static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
771 ret = 0; 1138 struct scrub_block *sblock_good,
772 for (i = 0; i < sbio->count; ++i) 1139 int force_write)
773 ret |= scrub_recheck_error(sbio, i); 1140{
774 if (!ret) { 1141 int page_num;
775 spin_lock(&sdev->stat_lock); 1142 int ret = 0;
776 ++sdev->stat.unverified_errors;
777 spin_unlock(&sdev->stat_lock);
778 }
779 1143
780 sbio->bio->bi_flags &= ~(BIO_POOL_MASK - 1); 1144 for (page_num = 0; page_num < sblock_bad->page_count; page_num++) {
781 sbio->bio->bi_flags |= 1 << BIO_UPTODATE; 1145 int ret_sub;
782 sbio->bio->bi_phys_segments = 0;
783 sbio->bio->bi_idx = 0;
784 1146
785 for (i = 0; i < sbio->count; i++) { 1147 ret_sub = scrub_repair_page_from_good_copy(sblock_bad,
786 struct bio_vec *bi; 1148 sblock_good,
787 bi = &sbio->bio->bi_io_vec[i]; 1149 page_num,
788 bi->bv_offset = 0; 1150 force_write);
789 bi->bv_len = PAGE_SIZE; 1151 if (ret_sub)
790 } 1152 ret = ret_sub;
791 goto out;
792 } 1153 }
793 for (i = 0; i < sbio->count; ++i) { 1154
794 page = sbio->bio->bi_io_vec[i].bv_page; 1155 return ret;
795 buffer = kmap_atomic(page); 1156}
796 flags = sbio->spag[i].flags; 1157
797 logical = sbio->logical + i * PAGE_SIZE; 1158static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
798 ret = 0; 1159 struct scrub_block *sblock_good,
799 if (flags & BTRFS_EXTENT_FLAG_DATA) { 1160 int page_num, int force_write)
800 ret = scrub_checksum_data(sdev, sbio->spag + i, buffer); 1161{
801 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { 1162 struct scrub_page *page_bad = sblock_bad->pagev + page_num;
802 ret = scrub_checksum_tree_block(sdev, sbio->spag + i, 1163 struct scrub_page *page_good = sblock_good->pagev + page_num;
803 logical, buffer); 1164
804 } else if (flags & BTRFS_EXTENT_FLAG_SUPER) { 1165 BUG_ON(sblock_bad->pagev[page_num].page == NULL);
805 BUG_ON(i); 1166 BUG_ON(sblock_good->pagev[page_num].page == NULL);
806 (void)scrub_checksum_super(sbio, buffer); 1167 if (force_write || sblock_bad->header_error ||
807 } else { 1168 sblock_bad->checksum_error || page_bad->io_error) {
808 WARN_ON(1); 1169 struct bio *bio;
809 } 1170 int ret;
810 kunmap_atomic(buffer); 1171 DECLARE_COMPLETION_ONSTACK(complete);
811 if (ret) { 1172
812 ret = scrub_recheck_error(sbio, i); 1173 bio = bio_alloc(GFP_NOFS, 1);
813 if (!ret) { 1174 bio->bi_bdev = page_bad->bdev;
814 spin_lock(&sdev->stat_lock); 1175 bio->bi_sector = page_bad->physical >> 9;
815 ++sdev->stat.unverified_errors; 1176 bio->bi_end_io = scrub_complete_bio_end_io;
816 spin_unlock(&sdev->stat_lock); 1177 bio->bi_private = &complete;
817 } 1178
1179 ret = bio_add_page(bio, page_good->page, PAGE_SIZE, 0);
1180 if (PAGE_SIZE != ret) {
1181 bio_put(bio);
1182 return -EIO;
818 } 1183 }
1184 btrfsic_submit_bio(WRITE, bio);
1185
1186 /* this will also unplug the queue */
1187 wait_for_completion(&complete);
1188 bio_put(bio);
819 } 1189 }
820 1190
821out: 1191 return 0;
822 scrub_free_bio(sbio->bio); 1192}
823 sbio->bio = NULL; 1193
824 spin_lock(&sdev->list_lock); 1194static void scrub_checksum(struct scrub_block *sblock)
825 sbio->next_free = sdev->first_free; 1195{
826 sdev->first_free = sbio->index; 1196 u64 flags;
827 spin_unlock(&sdev->list_lock); 1197 int ret;
828 atomic_dec(&sdev->in_flight); 1198
829 wake_up(&sdev->list_wait); 1199 BUG_ON(sblock->page_count < 1);
1200 flags = sblock->pagev[0].flags;
1201 ret = 0;
1202 if (flags & BTRFS_EXTENT_FLAG_DATA)
1203 ret = scrub_checksum_data(sblock);
1204 else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
1205 ret = scrub_checksum_tree_block(sblock);
1206 else if (flags & BTRFS_EXTENT_FLAG_SUPER)
1207 (void)scrub_checksum_super(sblock);
1208 else
1209 WARN_ON(1);
1210 if (ret)
1211 scrub_handle_errored_block(sblock);
830} 1212}
831 1213
832static int scrub_checksum_data(struct scrub_dev *sdev, 1214static int scrub_checksum_data(struct scrub_block *sblock)
833 struct scrub_page *spag, void *buffer)
834{ 1215{
1216 struct scrub_dev *sdev = sblock->sdev;
835 u8 csum[BTRFS_CSUM_SIZE]; 1217 u8 csum[BTRFS_CSUM_SIZE];
1218 u8 *on_disk_csum;
1219 struct page *page;
1220 void *buffer;
836 u32 crc = ~(u32)0; 1221 u32 crc = ~(u32)0;
837 int fail = 0; 1222 int fail = 0;
838 struct btrfs_root *root = sdev->dev->dev_root; 1223 struct btrfs_root *root = sdev->dev->dev_root;
1224 u64 len;
1225 int index;
839 1226
840 if (!spag->have_csum) 1227 BUG_ON(sblock->page_count < 1);
1228 if (!sblock->pagev[0].have_csum)
841 return 0; 1229 return 0;
842 1230
843 crc = btrfs_csum_data(root, buffer, crc, PAGE_SIZE); 1231 on_disk_csum = sblock->pagev[0].csum;
1232 page = sblock->pagev[0].page;
1233 buffer = kmap_atomic(page);
1234
1235 len = sdev->sectorsize;
1236 index = 0;
1237 for (;;) {
1238 u64 l = min_t(u64, len, PAGE_SIZE);
1239
1240 crc = btrfs_csum_data(root, buffer, crc, l);
1241 kunmap_atomic(buffer);
1242 len -= l;
1243 if (len == 0)
1244 break;
1245 index++;
1246 BUG_ON(index >= sblock->page_count);
1247 BUG_ON(!sblock->pagev[index].page);
1248 page = sblock->pagev[index].page;
1249 buffer = kmap_atomic(page);
1250 }
1251
844 btrfs_csum_final(crc, csum); 1252 btrfs_csum_final(crc, csum);
845 if (memcmp(csum, spag->csum, sdev->csum_size)) 1253 if (memcmp(csum, on_disk_csum, sdev->csum_size))
846 fail = 1; 1254 fail = 1;
847 1255
848 spin_lock(&sdev->stat_lock); 1256 if (fail) {
849 ++sdev->stat.data_extents_scrubbed; 1257 spin_lock(&sdev->stat_lock);
850 sdev->stat.data_bytes_scrubbed += PAGE_SIZE;
851 if (fail)
852 ++sdev->stat.csum_errors; 1258 ++sdev->stat.csum_errors;
853 spin_unlock(&sdev->stat_lock); 1259 spin_unlock(&sdev->stat_lock);
1260 }
854 1261
855 return fail; 1262 return fail;
856} 1263}
857 1264
858static int scrub_checksum_tree_block(struct scrub_dev *sdev, 1265static int scrub_checksum_tree_block(struct scrub_block *sblock)
859 struct scrub_page *spag, u64 logical,
860 void *buffer)
861{ 1266{
1267 struct scrub_dev *sdev = sblock->sdev;
862 struct btrfs_header *h; 1268 struct btrfs_header *h;
863 struct btrfs_root *root = sdev->dev->dev_root; 1269 struct btrfs_root *root = sdev->dev->dev_root;
864 struct btrfs_fs_info *fs_info = root->fs_info; 1270 struct btrfs_fs_info *fs_info = root->fs_info;
865 u8 csum[BTRFS_CSUM_SIZE]; 1271 u8 calculated_csum[BTRFS_CSUM_SIZE];
1272 u8 on_disk_csum[BTRFS_CSUM_SIZE];
1273 struct page *page;
1274 void *mapped_buffer;
1275 u64 mapped_size;
1276 void *p;
866 u32 crc = ~(u32)0; 1277 u32 crc = ~(u32)0;
867 int fail = 0; 1278 int fail = 0;
868 int crc_fail = 0; 1279 int crc_fail = 0;
1280 u64 len;
1281 int index;
1282
1283 BUG_ON(sblock->page_count < 1);
1284 page = sblock->pagev[0].page;
1285 mapped_buffer = kmap_atomic(page);
1286 h = (struct btrfs_header *)mapped_buffer;
1287 memcpy(on_disk_csum, h->csum, sdev->csum_size);
869 1288
870 /* 1289 /*
871 * we don't use the getter functions here, as we 1290 * we don't use the getter functions here, as we
872 * a) don't have an extent buffer and 1291 * a) don't have an extent buffer and
873 * b) the page is already kmapped 1292 * b) the page is already kmapped
874 */ 1293 */
875 h = (struct btrfs_header *)buffer;
876 1294
877 if (logical != le64_to_cpu(h->bytenr)) 1295 if (sblock->pagev[0].logical != le64_to_cpu(h->bytenr))
878 ++fail; 1296 ++fail;
879 1297
880 if (spag->generation != le64_to_cpu(h->generation)) 1298 if (sblock->pagev[0].generation != le64_to_cpu(h->generation))
881 ++fail; 1299 ++fail;
882 1300
883 if (memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE)) 1301 if (memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE))
@@ -887,51 +1305,99 @@ static int scrub_checksum_tree_block(struct scrub_dev *sdev,
887 BTRFS_UUID_SIZE)) 1305 BTRFS_UUID_SIZE))
888 ++fail; 1306 ++fail;
889 1307
890 crc = btrfs_csum_data(root, buffer + BTRFS_CSUM_SIZE, crc, 1308 BUG_ON(sdev->nodesize != sdev->leafsize);
891 PAGE_SIZE - BTRFS_CSUM_SIZE); 1309 len = sdev->nodesize - BTRFS_CSUM_SIZE;
892 btrfs_csum_final(crc, csum); 1310 mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE;
893 if (memcmp(csum, h->csum, sdev->csum_size)) 1311 p = ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE;
1312 index = 0;
1313 for (;;) {
1314 u64 l = min_t(u64, len, mapped_size);
1315
1316 crc = btrfs_csum_data(root, p, crc, l);
1317 kunmap_atomic(mapped_buffer);
1318 len -= l;
1319 if (len == 0)
1320 break;
1321 index++;
1322 BUG_ON(index >= sblock->page_count);
1323 BUG_ON(!sblock->pagev[index].page);
1324 page = sblock->pagev[index].page;
1325 mapped_buffer = kmap_atomic(page);
1326 mapped_size = PAGE_SIZE;
1327 p = mapped_buffer;
1328 }
1329
1330 btrfs_csum_final(crc, calculated_csum);
1331 if (memcmp(calculated_csum, on_disk_csum, sdev->csum_size))
894 ++crc_fail; 1332 ++crc_fail;
895 1333
896 spin_lock(&sdev->stat_lock); 1334 if (crc_fail || fail) {
897 ++sdev->stat.tree_extents_scrubbed; 1335 spin_lock(&sdev->stat_lock);
898 sdev->stat.tree_bytes_scrubbed += PAGE_SIZE; 1336 if (crc_fail)
899 if (crc_fail) 1337 ++sdev->stat.csum_errors;
900 ++sdev->stat.csum_errors; 1338 if (fail)
901 if (fail) 1339 ++sdev->stat.verify_errors;
902 ++sdev->stat.verify_errors; 1340 spin_unlock(&sdev->stat_lock);
903 spin_unlock(&sdev->stat_lock); 1341 }
904 1342
905 return fail || crc_fail; 1343 return fail || crc_fail;
906} 1344}
907 1345
908static int scrub_checksum_super(struct scrub_bio *sbio, void *buffer) 1346static int scrub_checksum_super(struct scrub_block *sblock)
909{ 1347{
910 struct btrfs_super_block *s; 1348 struct btrfs_super_block *s;
911 u64 logical; 1349 struct scrub_dev *sdev = sblock->sdev;
912 struct scrub_dev *sdev = sbio->sdev;
913 struct btrfs_root *root = sdev->dev->dev_root; 1350 struct btrfs_root *root = sdev->dev->dev_root;
914 struct btrfs_fs_info *fs_info = root->fs_info; 1351 struct btrfs_fs_info *fs_info = root->fs_info;
915 u8 csum[BTRFS_CSUM_SIZE]; 1352 u8 calculated_csum[BTRFS_CSUM_SIZE];
1353 u8 on_disk_csum[BTRFS_CSUM_SIZE];
1354 struct page *page;
1355 void *mapped_buffer;
1356 u64 mapped_size;
1357 void *p;
916 u32 crc = ~(u32)0; 1358 u32 crc = ~(u32)0;
917 int fail = 0; 1359 int fail = 0;
1360 u64 len;
1361 int index;
918 1362
919 s = (struct btrfs_super_block *)buffer; 1363 BUG_ON(sblock->page_count < 1);
920 logical = sbio->logical; 1364 page = sblock->pagev[0].page;
1365 mapped_buffer = kmap_atomic(page);
1366 s = (struct btrfs_super_block *)mapped_buffer;
1367 memcpy(on_disk_csum, s->csum, sdev->csum_size);
921 1368
922 if (logical != le64_to_cpu(s->bytenr)) 1369 if (sblock->pagev[0].logical != le64_to_cpu(s->bytenr))
923 ++fail; 1370 ++fail;
924 1371
925 if (sbio->spag[0].generation != le64_to_cpu(s->generation)) 1372 if (sblock->pagev[0].generation != le64_to_cpu(s->generation))
926 ++fail; 1373 ++fail;
927 1374
928 if (memcmp(s->fsid, fs_info->fsid, BTRFS_UUID_SIZE)) 1375 if (memcmp(s->fsid, fs_info->fsid, BTRFS_UUID_SIZE))
929 ++fail; 1376 ++fail;
930 1377
931 crc = btrfs_csum_data(root, buffer + BTRFS_CSUM_SIZE, crc, 1378 len = BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE;
932 PAGE_SIZE - BTRFS_CSUM_SIZE); 1379 mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE;
933 btrfs_csum_final(crc, csum); 1380 p = ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE;
934 if (memcmp(csum, s->csum, sbio->sdev->csum_size)) 1381 index = 0;
1382 for (;;) {
1383 u64 l = min_t(u64, len, mapped_size);
1384
1385 crc = btrfs_csum_data(root, p, crc, l);
1386 kunmap_atomic(mapped_buffer);
1387 len -= l;
1388 if (len == 0)
1389 break;
1390 index++;
1391 BUG_ON(index >= sblock->page_count);
1392 BUG_ON(!sblock->pagev[index].page);
1393 page = sblock->pagev[index].page;
1394 mapped_buffer = kmap_atomic(page);
1395 mapped_size = PAGE_SIZE;
1396 p = mapped_buffer;
1397 }
1398
1399 btrfs_csum_final(crc, calculated_csum);
1400 if (memcmp(calculated_csum, on_disk_csum, sdev->csum_size))
935 ++fail; 1401 ++fail;
936 1402
937 if (fail) { 1403 if (fail) {
@@ -948,29 +1414,42 @@ static int scrub_checksum_super(struct scrub_bio *sbio, void *buffer)
948 return fail; 1414 return fail;
949} 1415}
950 1416
951static int scrub_submit(struct scrub_dev *sdev) 1417static void scrub_block_get(struct scrub_block *sblock)
1418{
1419 atomic_inc(&sblock->ref_count);
1420}
1421
1422static void scrub_block_put(struct scrub_block *sblock)
1423{
1424 if (atomic_dec_and_test(&sblock->ref_count)) {
1425 int i;
1426
1427 for (i = 0; i < sblock->page_count; i++)
1428 if (sblock->pagev[i].page)
1429 __free_page(sblock->pagev[i].page);
1430 kfree(sblock);
1431 }
1432}
1433
1434static void scrub_submit(struct scrub_dev *sdev)
952{ 1435{
953 struct scrub_bio *sbio; 1436 struct scrub_bio *sbio;
954 1437
955 if (sdev->curr == -1) 1438 if (sdev->curr == -1)
956 return 0; 1439 return;
957 1440
958 sbio = sdev->bios[sdev->curr]; 1441 sbio = sdev->bios[sdev->curr];
959 sbio->err = 0;
960 sdev->curr = -1; 1442 sdev->curr = -1;
961 atomic_inc(&sdev->in_flight); 1443 atomic_inc(&sdev->in_flight);
962 1444
963 btrfsic_submit_bio(READ, sbio->bio); 1445 btrfsic_submit_bio(READ, sbio->bio);
964
965 return 0;
966} 1446}
967 1447
968static int scrub_page(struct scrub_dev *sdev, u64 logical, u64 len, 1448static int scrub_add_page_to_bio(struct scrub_dev *sdev,
969 u64 physical, u64 flags, u64 gen, int mirror_num, 1449 struct scrub_page *spage)
970 u8 *csum, int force)
971{ 1450{
1451 struct scrub_block *sblock = spage->sblock;
972 struct scrub_bio *sbio; 1452 struct scrub_bio *sbio;
973 struct page *page;
974 int ret; 1453 int ret;
975 1454
976again: 1455again:
@@ -983,7 +1462,7 @@ again:
983 if (sdev->curr != -1) { 1462 if (sdev->curr != -1) {
984 sdev->first_free = sdev->bios[sdev->curr]->next_free; 1463 sdev->first_free = sdev->bios[sdev->curr]->next_free;
985 sdev->bios[sdev->curr]->next_free = -1; 1464 sdev->bios[sdev->curr]->next_free = -1;
986 sdev->bios[sdev->curr]->count = 0; 1465 sdev->bios[sdev->curr]->page_count = 0;
987 spin_unlock(&sdev->list_lock); 1466 spin_unlock(&sdev->list_lock);
988 } else { 1467 } else {
989 spin_unlock(&sdev->list_lock); 1468 spin_unlock(&sdev->list_lock);
@@ -991,62 +1470,200 @@ again:
991 } 1470 }
992 } 1471 }
993 sbio = sdev->bios[sdev->curr]; 1472 sbio = sdev->bios[sdev->curr];
994 if (sbio->count == 0) { 1473 if (sbio->page_count == 0) {
995 struct bio *bio; 1474 struct bio *bio;
996 1475
997 sbio->physical = physical; 1476 sbio->physical = spage->physical;
998 sbio->logical = logical; 1477 sbio->logical = spage->logical;
999 bio = bio_alloc(GFP_NOFS, SCRUB_PAGES_PER_BIO); 1478 bio = sbio->bio;
1000 if (!bio) 1479 if (!bio) {
1001 return -ENOMEM; 1480 bio = bio_alloc(GFP_NOFS, sdev->pages_per_bio);
1481 if (!bio)
1482 return -ENOMEM;
1483 sbio->bio = bio;
1484 }
1002 1485
1003 bio->bi_private = sbio; 1486 bio->bi_private = sbio;
1004 bio->bi_end_io = scrub_bio_end_io; 1487 bio->bi_end_io = scrub_bio_end_io;
1005 bio->bi_bdev = sdev->dev->bdev; 1488 bio->bi_bdev = sdev->dev->bdev;
1006 bio->bi_sector = sbio->physical >> 9; 1489 bio->bi_sector = spage->physical >> 9;
1007 sbio->err = 0; 1490 sbio->err = 0;
1008 sbio->bio = bio; 1491 } else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
1009 } else if (sbio->physical + sbio->count * PAGE_SIZE != physical || 1492 spage->physical ||
1010 sbio->logical + sbio->count * PAGE_SIZE != logical) { 1493 sbio->logical + sbio->page_count * PAGE_SIZE !=
1011 ret = scrub_submit(sdev); 1494 spage->logical) {
1012 if (ret) 1495 scrub_submit(sdev);
1013 return ret;
1014 goto again; 1496 goto again;
1015 } 1497 }
1016 sbio->spag[sbio->count].flags = flags;
1017 sbio->spag[sbio->count].generation = gen;
1018 sbio->spag[sbio->count].have_csum = 0;
1019 sbio->spag[sbio->count].mirror_num = mirror_num;
1020
1021 page = alloc_page(GFP_NOFS);
1022 if (!page)
1023 return -ENOMEM;
1024 1498
1025 ret = bio_add_page(sbio->bio, page, PAGE_SIZE, 0); 1499 sbio->pagev[sbio->page_count] = spage;
1026 if (!ret) { 1500 ret = bio_add_page(sbio->bio, spage->page, PAGE_SIZE, 0);
1027 __free_page(page); 1501 if (ret != PAGE_SIZE) {
1028 ret = scrub_submit(sdev); 1502 if (sbio->page_count < 1) {
1029 if (ret) 1503 bio_put(sbio->bio);
1030 return ret; 1504 sbio->bio = NULL;
1505 return -EIO;
1506 }
1507 scrub_submit(sdev);
1031 goto again; 1508 goto again;
1032 } 1509 }
1033 1510
1034 if (csum) { 1511 scrub_block_get(sblock); /* one for the added page */
1035 sbio->spag[sbio->count].have_csum = 1; 1512 atomic_inc(&sblock->outstanding_pages);
1036 memcpy(sbio->spag[sbio->count].csum, csum, sdev->csum_size); 1513 sbio->page_count++;
1514 if (sbio->page_count == sdev->pages_per_bio)
1515 scrub_submit(sdev);
1516
1517 return 0;
1518}
1519
1520static int scrub_pages(struct scrub_dev *sdev, u64 logical, u64 len,
1521 u64 physical, u64 flags, u64 gen, int mirror_num,
1522 u8 *csum, int force)
1523{
1524 struct scrub_block *sblock;
1525 int index;
1526
1527 sblock = kzalloc(sizeof(*sblock), GFP_NOFS);
1528 if (!sblock) {
1529 spin_lock(&sdev->stat_lock);
1530 sdev->stat.malloc_errors++;
1531 spin_unlock(&sdev->stat_lock);
1532 return -ENOMEM;
1037 } 1533 }
1038 ++sbio->count; 1534
1039 if (sbio->count == SCRUB_PAGES_PER_BIO || force) { 1535 /* one ref inside this function, plus one for each page later on */
1536 atomic_set(&sblock->ref_count, 1);
1537 sblock->sdev = sdev;
1538 sblock->no_io_error_seen = 1;
1539
1540 for (index = 0; len > 0; index++) {
1541 struct scrub_page *spage = sblock->pagev + index;
1542 u64 l = min_t(u64, len, PAGE_SIZE);
1543
1544 BUG_ON(index >= SCRUB_MAX_PAGES_PER_BLOCK);
1545 spage->page = alloc_page(GFP_NOFS);
1546 if (!spage->page) {
1547 spin_lock(&sdev->stat_lock);
1548 sdev->stat.malloc_errors++;
1549 spin_unlock(&sdev->stat_lock);
1550 while (index > 0) {
1551 index--;
1552 __free_page(sblock->pagev[index].page);
1553 }
1554 kfree(sblock);
1555 return -ENOMEM;
1556 }
1557 spage->sblock = sblock;
1558 spage->bdev = sdev->dev->bdev;
1559 spage->flags = flags;
1560 spage->generation = gen;
1561 spage->logical = logical;
1562 spage->physical = physical;
1563 spage->mirror_num = mirror_num;
1564 if (csum) {
1565 spage->have_csum = 1;
1566 memcpy(spage->csum, csum, sdev->csum_size);
1567 } else {
1568 spage->have_csum = 0;
1569 }
1570 sblock->page_count++;
1571 len -= l;
1572 logical += l;
1573 physical += l;
1574 }
1575
1576 BUG_ON(sblock->page_count == 0);
1577 for (index = 0; index < sblock->page_count; index++) {
1578 struct scrub_page *spage = sblock->pagev + index;
1040 int ret; 1579 int ret;
1041 1580
1042 ret = scrub_submit(sdev); 1581 ret = scrub_add_page_to_bio(sdev, spage);
1043 if (ret) 1582 if (ret) {
1583 scrub_block_put(sblock);
1044 return ret; 1584 return ret;
1585 }
1045 } 1586 }
1046 1587
1588 if (force)
1589 scrub_submit(sdev);
1590
1591 /* last one frees, either here or in bio completion for last page */
1592 scrub_block_put(sblock);
1047 return 0; 1593 return 0;
1048} 1594}
1049 1595
1596static void scrub_bio_end_io(struct bio *bio, int err)
1597{
1598 struct scrub_bio *sbio = bio->bi_private;
1599 struct scrub_dev *sdev = sbio->sdev;
1600 struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info;
1601
1602 sbio->err = err;
1603 sbio->bio = bio;
1604
1605 btrfs_queue_worker(&fs_info->scrub_workers, &sbio->work);
1606}
1607
1608static void scrub_bio_end_io_worker(struct btrfs_work *work)
1609{
1610 struct scrub_bio *sbio = container_of(work, struct scrub_bio, work);
1611 struct scrub_dev *sdev = sbio->sdev;
1612 int i;
1613
1614 BUG_ON(sbio->page_count > SCRUB_PAGES_PER_BIO);
1615 if (sbio->err) {
1616 for (i = 0; i < sbio->page_count; i++) {
1617 struct scrub_page *spage = sbio->pagev[i];
1618
1619 spage->io_error = 1;
1620 spage->sblock->no_io_error_seen = 0;
1621 }
1622 }
1623
1624 /* now complete the scrub_block items that have all pages completed */
1625 for (i = 0; i < sbio->page_count; i++) {
1626 struct scrub_page *spage = sbio->pagev[i];
1627 struct scrub_block *sblock = spage->sblock;
1628
1629 if (atomic_dec_and_test(&sblock->outstanding_pages))
1630 scrub_block_complete(sblock);
1631 scrub_block_put(sblock);
1632 }
1633
1634 if (sbio->err) {
1635 /* what is this good for??? */
1636 sbio->bio->bi_flags &= ~(BIO_POOL_MASK - 1);
1637 sbio->bio->bi_flags |= 1 << BIO_UPTODATE;
1638 sbio->bio->bi_phys_segments = 0;
1639 sbio->bio->bi_idx = 0;
1640
1641 for (i = 0; i < sbio->page_count; i++) {
1642 struct bio_vec *bi;
1643 bi = &sbio->bio->bi_io_vec[i];
1644 bi->bv_offset = 0;
1645 bi->bv_len = PAGE_SIZE;
1646 }
1647 }
1648
1649 bio_put(sbio->bio);
1650 sbio->bio = NULL;
1651 spin_lock(&sdev->list_lock);
1652 sbio->next_free = sdev->first_free;
1653 sdev->first_free = sbio->index;
1654 spin_unlock(&sdev->list_lock);
1655 atomic_dec(&sdev->in_flight);
1656 wake_up(&sdev->list_wait);
1657}
1658
1659static void scrub_block_complete(struct scrub_block *sblock)
1660{
1661 if (!sblock->no_io_error_seen)
1662 scrub_handle_errored_block(sblock);
1663 else
1664 scrub_checksum(sblock);
1665}
1666
1050static int scrub_find_csum(struct scrub_dev *sdev, u64 logical, u64 len, 1667static int scrub_find_csum(struct scrub_dev *sdev, u64 logical, u64 len,
1051 u8 *csum) 1668 u8 *csum)
1052{ 1669{
@@ -1054,7 +1671,6 @@ static int scrub_find_csum(struct scrub_dev *sdev, u64 logical, u64 len,
1054 int ret = 0; 1671 int ret = 0;
1055 unsigned long i; 1672 unsigned long i;
1056 unsigned long num_sectors; 1673 unsigned long num_sectors;
1057 u32 sectorsize = sdev->dev->dev_root->sectorsize;
1058 1674
1059 while (!list_empty(&sdev->csum_list)) { 1675 while (!list_empty(&sdev->csum_list)) {
1060 sum = list_first_entry(&sdev->csum_list, 1676 sum = list_first_entry(&sdev->csum_list,
@@ -1072,7 +1688,7 @@ static int scrub_find_csum(struct scrub_dev *sdev, u64 logical, u64 len,
1072 if (!sum) 1688 if (!sum)
1073 return 0; 1689 return 0;
1074 1690
1075 num_sectors = sum->len / sectorsize; 1691 num_sectors = sum->len / sdev->sectorsize;
1076 for (i = 0; i < num_sectors; ++i) { 1692 for (i = 0; i < num_sectors; ++i) {
1077 if (sum->sums[i].bytenr == logical) { 1693 if (sum->sums[i].bytenr == logical) {
1078 memcpy(csum, &sum->sums[i].sum, sdev->csum_size); 1694 memcpy(csum, &sum->sums[i].sum, sdev->csum_size);
@@ -1093,9 +1709,28 @@ static int scrub_extent(struct scrub_dev *sdev, u64 logical, u64 len,
1093{ 1709{
1094 int ret; 1710 int ret;
1095 u8 csum[BTRFS_CSUM_SIZE]; 1711 u8 csum[BTRFS_CSUM_SIZE];
1712 u32 blocksize;
1713
1714 if (flags & BTRFS_EXTENT_FLAG_DATA) {
1715 blocksize = sdev->sectorsize;
1716 spin_lock(&sdev->stat_lock);
1717 sdev->stat.data_extents_scrubbed++;
1718 sdev->stat.data_bytes_scrubbed += len;
1719 spin_unlock(&sdev->stat_lock);
1720 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
1721 BUG_ON(sdev->nodesize != sdev->leafsize);
1722 blocksize = sdev->nodesize;
1723 spin_lock(&sdev->stat_lock);
1724 sdev->stat.tree_extents_scrubbed++;
1725 sdev->stat.tree_bytes_scrubbed += len;
1726 spin_unlock(&sdev->stat_lock);
1727 } else {
1728 blocksize = sdev->sectorsize;
1729 BUG_ON(1);
1730 }
1096 1731
1097 while (len) { 1732 while (len) {
1098 u64 l = min_t(u64, len, PAGE_SIZE); 1733 u64 l = min_t(u64, len, blocksize);
1099 int have_csum = 0; 1734 int have_csum = 0;
1100 1735
1101 if (flags & BTRFS_EXTENT_FLAG_DATA) { 1736 if (flags & BTRFS_EXTENT_FLAG_DATA) {
@@ -1104,8 +1739,8 @@ static int scrub_extent(struct scrub_dev *sdev, u64 logical, u64 len,
1104 if (have_csum == 0) 1739 if (have_csum == 0)
1105 ++sdev->stat.no_csum; 1740 ++sdev->stat.no_csum;
1106 } 1741 }
1107 ret = scrub_page(sdev, logical, l, physical, flags, gen, 1742 ret = scrub_pages(sdev, logical, l, physical, flags, gen,
1108 mirror_num, have_csum ? csum : NULL, 0); 1743 mirror_num, have_csum ? csum : NULL, 0);
1109 if (ret) 1744 if (ret)
1110 return ret; 1745 return ret;
1111 len -= l; 1746 len -= l;
@@ -1170,6 +1805,11 @@ static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev,
1170 if (!path) 1805 if (!path)
1171 return -ENOMEM; 1806 return -ENOMEM;
1172 1807
1808 /*
1809 * work on commit root. The related disk blocks are static as
1810 * long as COW is applied. This means, it is save to rewrite
1811 * them to repair disk errors without any race conditions
1812 */
1173 path->search_commit_root = 1; 1813 path->search_commit_root = 1;
1174 path->skip_locking = 1; 1814 path->skip_locking = 1;
1175 1815
@@ -1516,15 +2156,18 @@ static noinline_for_stack int scrub_supers(struct scrub_dev *sdev)
1516 struct btrfs_device *device = sdev->dev; 2156 struct btrfs_device *device = sdev->dev;
1517 struct btrfs_root *root = device->dev_root; 2157 struct btrfs_root *root = device->dev_root;
1518 2158
2159 if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
2160 return -EIO;
2161
1519 gen = root->fs_info->last_trans_committed; 2162 gen = root->fs_info->last_trans_committed;
1520 2163
1521 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { 2164 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
1522 bytenr = btrfs_sb_offset(i); 2165 bytenr = btrfs_sb_offset(i);
1523 if (bytenr + BTRFS_SUPER_INFO_SIZE >= device->total_bytes) 2166 if (bytenr + BTRFS_SUPER_INFO_SIZE > device->total_bytes)
1524 break; 2167 break;
1525 2168
1526 ret = scrub_page(sdev, bytenr, PAGE_SIZE, bytenr, 2169 ret = scrub_pages(sdev, bytenr, BTRFS_SUPER_INFO_SIZE, bytenr,
1527 BTRFS_EXTENT_FLAG_SUPER, gen, i, NULL, 1); 2170 BTRFS_EXTENT_FLAG_SUPER, gen, i, NULL, 1);
1528 if (ret) 2171 if (ret)
1529 return ret; 2172 return ret;
1530 } 2173 }
@@ -1583,10 +2226,30 @@ int btrfs_scrub_dev(struct btrfs_root *root, u64 devid, u64 start, u64 end,
1583 /* 2226 /*
1584 * check some assumptions 2227 * check some assumptions
1585 */ 2228 */
1586 if (root->sectorsize != PAGE_SIZE || 2229 if (root->nodesize != root->leafsize) {
1587 root->sectorsize != root->leafsize || 2230 printk(KERN_ERR
1588 root->sectorsize != root->nodesize) { 2231 "btrfs_scrub: size assumption nodesize == leafsize (%d == %d) fails\n",
1589 printk(KERN_ERR "btrfs_scrub: size assumptions fail\n"); 2232 root->nodesize, root->leafsize);
2233 return -EINVAL;
2234 }
2235
2236 if (root->nodesize > BTRFS_STRIPE_LEN) {
2237 /*
2238 * in this case scrub is unable to calculate the checksum
2239 * the way scrub is implemented. Do not handle this
2240 * situation at all because it won't ever happen.
2241 */
2242 printk(KERN_ERR
2243 "btrfs_scrub: size assumption nodesize <= BTRFS_STRIPE_LEN (%d <= %d) fails\n",
2244 root->nodesize, BTRFS_STRIPE_LEN);
2245 return -EINVAL;
2246 }
2247
2248 if (root->sectorsize != PAGE_SIZE) {
2249 /* not supported for data w/o checksums */
2250 printk(KERN_ERR
2251 "btrfs_scrub: size assumption sectorsize != PAGE_SIZE (%d != %lld) fails\n",
2252 root->sectorsize, (unsigned long long)PAGE_SIZE);
1590 return -EINVAL; 2253 return -EINVAL;
1591 } 2254 }
1592 2255
@@ -1656,7 +2319,7 @@ int btrfs_scrub_dev(struct btrfs_root *root, u64 devid, u64 start, u64 end,
1656 return ret; 2319 return ret;
1657} 2320}
1658 2321
1659int btrfs_scrub_pause(struct btrfs_root *root) 2322void btrfs_scrub_pause(struct btrfs_root *root)
1660{ 2323{
1661 struct btrfs_fs_info *fs_info = root->fs_info; 2324 struct btrfs_fs_info *fs_info = root->fs_info;
1662 2325
@@ -1671,34 +2334,28 @@ int btrfs_scrub_pause(struct btrfs_root *root)
1671 mutex_lock(&fs_info->scrub_lock); 2334 mutex_lock(&fs_info->scrub_lock);
1672 } 2335 }
1673 mutex_unlock(&fs_info->scrub_lock); 2336 mutex_unlock(&fs_info->scrub_lock);
1674
1675 return 0;
1676} 2337}
1677 2338
1678int btrfs_scrub_continue(struct btrfs_root *root) 2339void btrfs_scrub_continue(struct btrfs_root *root)
1679{ 2340{
1680 struct btrfs_fs_info *fs_info = root->fs_info; 2341 struct btrfs_fs_info *fs_info = root->fs_info;
1681 2342
1682 atomic_dec(&fs_info->scrub_pause_req); 2343 atomic_dec(&fs_info->scrub_pause_req);
1683 wake_up(&fs_info->scrub_pause_wait); 2344 wake_up(&fs_info->scrub_pause_wait);
1684 return 0;
1685} 2345}
1686 2346
1687int btrfs_scrub_pause_super(struct btrfs_root *root) 2347void btrfs_scrub_pause_super(struct btrfs_root *root)
1688{ 2348{
1689 down_write(&root->fs_info->scrub_super_lock); 2349 down_write(&root->fs_info->scrub_super_lock);
1690 return 0;
1691} 2350}
1692 2351
1693int btrfs_scrub_continue_super(struct btrfs_root *root) 2352void btrfs_scrub_continue_super(struct btrfs_root *root)
1694{ 2353{
1695 up_write(&root->fs_info->scrub_super_lock); 2354 up_write(&root->fs_info->scrub_super_lock);
1696 return 0;
1697} 2355}
1698 2356
1699int btrfs_scrub_cancel(struct btrfs_root *root) 2357int __btrfs_scrub_cancel(struct btrfs_fs_info *fs_info)
1700{ 2358{
1701 struct btrfs_fs_info *fs_info = root->fs_info;
1702 2359
1703 mutex_lock(&fs_info->scrub_lock); 2360 mutex_lock(&fs_info->scrub_lock);
1704 if (!atomic_read(&fs_info->scrubs_running)) { 2361 if (!atomic_read(&fs_info->scrubs_running)) {
@@ -1719,6 +2376,11 @@ int btrfs_scrub_cancel(struct btrfs_root *root)
1719 return 0; 2376 return 0;
1720} 2377}
1721 2378
2379int btrfs_scrub_cancel(struct btrfs_root *root)
2380{
2381 return __btrfs_scrub_cancel(root->fs_info);
2382}
2383
1722int btrfs_scrub_cancel_dev(struct btrfs_root *root, struct btrfs_device *dev) 2384int btrfs_scrub_cancel_dev(struct btrfs_root *root, struct btrfs_device *dev)
1723{ 2385{
1724 struct btrfs_fs_info *fs_info = root->fs_info; 2386 struct btrfs_fs_info *fs_info = root->fs_info;
@@ -1741,6 +2403,7 @@ int btrfs_scrub_cancel_dev(struct btrfs_root *root, struct btrfs_device *dev)
1741 2403
1742 return 0; 2404 return 0;
1743} 2405}
2406
1744int btrfs_scrub_cancel_devid(struct btrfs_root *root, u64 devid) 2407int btrfs_scrub_cancel_devid(struct btrfs_root *root, u64 devid)
1745{ 2408{
1746 struct btrfs_fs_info *fs_info = root->fs_info; 2409 struct btrfs_fs_info *fs_info = root->fs_info;
diff --git a/fs/btrfs/struct-funcs.c b/fs/btrfs/struct-funcs.c
index bc1f6ad18442..c6ffa5812419 100644
--- a/fs/btrfs/struct-funcs.c
+++ b/fs/btrfs/struct-funcs.c
@@ -44,8 +44,9 @@
44#define BTRFS_SETGET_FUNCS(name, type, member, bits) \ 44#define BTRFS_SETGET_FUNCS(name, type, member, bits) \
45u##bits btrfs_##name(struct extent_buffer *eb, type *s); \ 45u##bits btrfs_##name(struct extent_buffer *eb, type *s); \
46void btrfs_set_##name(struct extent_buffer *eb, type *s, u##bits val); \ 46void btrfs_set_##name(struct extent_buffer *eb, type *s, u##bits val); \
47u##bits btrfs_##name(struct extent_buffer *eb, \ 47void btrfs_set_token_##name(struct extent_buffer *eb, type *s, u##bits val, struct btrfs_map_token *token); \
48 type *s) \ 48u##bits btrfs_token_##name(struct extent_buffer *eb, \
49 type *s, struct btrfs_map_token *token) \
49{ \ 50{ \
50 unsigned long part_offset = (unsigned long)s; \ 51 unsigned long part_offset = (unsigned long)s; \
51 unsigned long offset = part_offset + offsetof(type, member); \ 52 unsigned long offset = part_offset + offsetof(type, member); \
@@ -54,9 +55,18 @@ u##bits btrfs_##name(struct extent_buffer *eb, \
54 char *kaddr; \ 55 char *kaddr; \
55 unsigned long map_start; \ 56 unsigned long map_start; \
56 unsigned long map_len; \ 57 unsigned long map_len; \
58 unsigned long mem_len = sizeof(((type *)0)->member); \
57 u##bits res; \ 59 u##bits res; \
60 if (token && token->kaddr && token->offset <= offset && \
61 token->eb == eb && \
62 (token->offset + PAGE_CACHE_SIZE >= offset + mem_len)) { \
63 kaddr = token->kaddr; \
64 p = (type *)(kaddr + part_offset - token->offset); \
65 res = le##bits##_to_cpu(p->member); \
66 return res; \
67 } \
58 err = map_private_extent_buffer(eb, offset, \ 68 err = map_private_extent_buffer(eb, offset, \
59 sizeof(((type *)0)->member), \ 69 mem_len, \
60 &kaddr, &map_start, &map_len); \ 70 &kaddr, &map_start, &map_len); \
61 if (err) { \ 71 if (err) { \
62 __le##bits leres; \ 72 __le##bits leres; \
@@ -65,10 +75,15 @@ u##bits btrfs_##name(struct extent_buffer *eb, \
65 } \ 75 } \
66 p = (type *)(kaddr + part_offset - map_start); \ 76 p = (type *)(kaddr + part_offset - map_start); \
67 res = le##bits##_to_cpu(p->member); \ 77 res = le##bits##_to_cpu(p->member); \
78 if (token) { \
79 token->kaddr = kaddr; \
80 token->offset = map_start; \
81 token->eb = eb; \
82 } \
68 return res; \ 83 return res; \
69} \ 84} \
70void btrfs_set_##name(struct extent_buffer *eb, \ 85void btrfs_set_token_##name(struct extent_buffer *eb, \
71 type *s, u##bits val) \ 86 type *s, u##bits val, struct btrfs_map_token *token) \
72{ \ 87{ \
73 unsigned long part_offset = (unsigned long)s; \ 88 unsigned long part_offset = (unsigned long)s; \
74 unsigned long offset = part_offset + offsetof(type, member); \ 89 unsigned long offset = part_offset + offsetof(type, member); \
@@ -77,8 +92,17 @@ void btrfs_set_##name(struct extent_buffer *eb, \
77 char *kaddr; \ 92 char *kaddr; \
78 unsigned long map_start; \ 93 unsigned long map_start; \
79 unsigned long map_len; \ 94 unsigned long map_len; \
95 unsigned long mem_len = sizeof(((type *)0)->member); \
96 if (token && token->kaddr && token->offset <= offset && \
97 token->eb == eb && \
98 (token->offset + PAGE_CACHE_SIZE >= offset + mem_len)) { \
99 kaddr = token->kaddr; \
100 p = (type *)(kaddr + part_offset - token->offset); \
101 p->member = cpu_to_le##bits(val); \
102 return; \
103 } \
80 err = map_private_extent_buffer(eb, offset, \ 104 err = map_private_extent_buffer(eb, offset, \
81 sizeof(((type *)0)->member), \ 105 mem_len, \
82 &kaddr, &map_start, &map_len); \ 106 &kaddr, &map_start, &map_len); \
83 if (err) { \ 107 if (err) { \
84 __le##bits val2; \ 108 __le##bits val2; \
@@ -88,7 +112,22 @@ void btrfs_set_##name(struct extent_buffer *eb, \
88 } \ 112 } \
89 p = (type *)(kaddr + part_offset - map_start); \ 113 p = (type *)(kaddr + part_offset - map_start); \
90 p->member = cpu_to_le##bits(val); \ 114 p->member = cpu_to_le##bits(val); \
91} 115 if (token) { \
116 token->kaddr = kaddr; \
117 token->offset = map_start; \
118 token->eb = eb; \
119 } \
120} \
121void btrfs_set_##name(struct extent_buffer *eb, \
122 type *s, u##bits val) \
123{ \
124 btrfs_set_token_##name(eb, s, val, NULL); \
125} \
126u##bits btrfs_##name(struct extent_buffer *eb, \
127 type *s) \
128{ \
129 return btrfs_token_##name(eb, s, NULL); \
130} \
92 131
93#include "ctree.h" 132#include "ctree.h"
94 133
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 81df3fec6a6d..8d5d380f7bdb 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -76,6 +76,9 @@ static const char *btrfs_decode_error(struct btrfs_fs_info *fs_info, int errno,
76 case -EROFS: 76 case -EROFS:
77 errstr = "Readonly filesystem"; 77 errstr = "Readonly filesystem";
78 break; 78 break;
79 case -EEXIST:
80 errstr = "Object already exists";
81 break;
79 default: 82 default:
80 if (nbuf) { 83 if (nbuf) {
81 if (snprintf(nbuf, 16, "error %d", -errno) >= 0) 84 if (snprintf(nbuf, 16, "error %d", -errno) >= 0)
@@ -116,6 +119,8 @@ static void btrfs_handle_error(struct btrfs_fs_info *fs_info)
116 if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { 119 if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
117 sb->s_flags |= MS_RDONLY; 120 sb->s_flags |= MS_RDONLY;
118 printk(KERN_INFO "btrfs is forced readonly\n"); 121 printk(KERN_INFO "btrfs is forced readonly\n");
122 __btrfs_scrub_cancel(fs_info);
123// WARN_ON(1);
119 } 124 }
120} 125}
121 126
@@ -124,25 +129,132 @@ static void btrfs_handle_error(struct btrfs_fs_info *fs_info)
124 * invokes the approciate error response. 129 * invokes the approciate error response.
125 */ 130 */
126void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function, 131void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
127 unsigned int line, int errno) 132 unsigned int line, int errno, const char *fmt, ...)
128{ 133{
129 struct super_block *sb = fs_info->sb; 134 struct super_block *sb = fs_info->sb;
130 char nbuf[16]; 135 char nbuf[16];
131 const char *errstr; 136 const char *errstr;
137 va_list args;
138 va_start(args, fmt);
132 139
133 /* 140 /*
134 * Special case: if the error is EROFS, and we're already 141 * Special case: if the error is EROFS, and we're already
135 * under MS_RDONLY, then it is safe here. 142 * under MS_RDONLY, then it is safe here.
136 */ 143 */
137 if (errno == -EROFS && (sb->s_flags & MS_RDONLY)) 144 if (errno == -EROFS && (sb->s_flags & MS_RDONLY))
145 return;
146
147 errstr = btrfs_decode_error(fs_info, errno, nbuf);
148 if (fmt) {
149 struct va_format vaf = {
150 .fmt = fmt,
151 .va = &args,
152 };
153
154 printk(KERN_CRIT "BTRFS error (device %s) in %s:%d: %s (%pV)\n",
155 sb->s_id, function, line, errstr, &vaf);
156 } else {
157 printk(KERN_CRIT "BTRFS error (device %s) in %s:%d: %s\n",
158 sb->s_id, function, line, errstr);
159 }
160
161 /* Don't go through full error handling during mount */
162 if (sb->s_flags & MS_BORN) {
163 save_error_info(fs_info);
164 btrfs_handle_error(fs_info);
165 }
166 va_end(args);
167}
168
169const char *logtypes[] = {
170 "emergency",
171 "alert",
172 "critical",
173 "error",
174 "warning",
175 "notice",
176 "info",
177 "debug",
178};
179
180void btrfs_printk(struct btrfs_fs_info *fs_info, const char *fmt, ...)
181{
182 struct super_block *sb = fs_info->sb;
183 char lvl[4];
184 struct va_format vaf;
185 va_list args;
186 const char *type = logtypes[4];
187
188 va_start(args, fmt);
189
190 if (fmt[0] == '<' && isdigit(fmt[1]) && fmt[2] == '>') {
191 strncpy(lvl, fmt, 3);
192 fmt += 3;
193 type = logtypes[fmt[1] - '0'];
194 } else
195 *lvl = '\0';
196
197 vaf.fmt = fmt;
198 vaf.va = &args;
199 printk("%sBTRFS %s (device %s): %pV", lvl, type, sb->s_id, &vaf);
200}
201
202/*
203 * We only mark the transaction aborted and then set the file system read-only.
204 * This will prevent new transactions from starting or trying to join this
205 * one.
206 *
207 * This means that error recovery at the call site is limited to freeing
208 * any local memory allocations and passing the error code up without
209 * further cleanup. The transaction should complete as it normally would
210 * in the call path but will return -EIO.
211 *
212 * We'll complete the cleanup in btrfs_end_transaction and
213 * btrfs_commit_transaction.
214 */
215void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
216 struct btrfs_root *root, const char *function,
217 unsigned int line, int errno)
218{
219 WARN_ONCE(1, KERN_DEBUG "btrfs: Transaction aborted");
220 trans->aborted = errno;
221 /* Nothing used. The other threads that have joined this
222 * transaction may be able to continue. */
223 if (!trans->blocks_used) {
224 btrfs_printk(root->fs_info, "Aborting unused transaction.\n");
138 return; 225 return;
226 }
227 trans->transaction->aborted = errno;
228 __btrfs_std_error(root->fs_info, function, line, errno, NULL);
229}
230/*
231 * __btrfs_panic decodes unexpected, fatal errors from the caller,
232 * issues an alert, and either panics or BUGs, depending on mount options.
233 */
234void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function,
235 unsigned int line, int errno, const char *fmt, ...)
236{
237 char nbuf[16];
238 char *s_id = "<unknown>";
239 const char *errstr;
240 struct va_format vaf = { .fmt = fmt };
241 va_list args;
139 242
140 errstr = btrfs_decode_error(fs_info, errno, nbuf); 243 if (fs_info)
141 printk(KERN_CRIT "BTRFS error (device %s) in %s:%d: %s\n", 244 s_id = fs_info->sb->s_id;
142 sb->s_id, function, line, errstr);
143 save_error_info(fs_info);
144 245
145 btrfs_handle_error(fs_info); 246 va_start(args, fmt);
247 vaf.va = &args;
248
249 errstr = btrfs_decode_error(fs_info, errno, nbuf);
250 if (fs_info->mount_opt & BTRFS_MOUNT_PANIC_ON_FATAL_ERROR)
251 panic(KERN_CRIT "BTRFS panic (device %s) in %s:%d: %pV (%s)\n",
252 s_id, function, line, &vaf, errstr);
253
254 printk(KERN_CRIT "BTRFS panic (device %s) in %s:%d: %pV (%s)\n",
255 s_id, function, line, &vaf, errstr);
256 va_end(args);
257 /* Caller calls BUG() */
146} 258}
147 259
148static void btrfs_put_super(struct super_block *sb) 260static void btrfs_put_super(struct super_block *sb)
@@ -166,7 +278,7 @@ enum {
166 Opt_enospc_debug, Opt_subvolrootid, Opt_defrag, Opt_inode_cache, 278 Opt_enospc_debug, Opt_subvolrootid, Opt_defrag, Opt_inode_cache,
167 Opt_no_space_cache, Opt_recovery, Opt_skip_balance, 279 Opt_no_space_cache, Opt_recovery, Opt_skip_balance,
168 Opt_check_integrity, Opt_check_integrity_including_extent_data, 280 Opt_check_integrity, Opt_check_integrity_including_extent_data,
169 Opt_check_integrity_print_mask, 281 Opt_check_integrity_print_mask, Opt_fatal_errors,
170 Opt_err, 282 Opt_err,
171}; 283};
172 284
@@ -206,12 +318,14 @@ static match_table_t tokens = {
206 {Opt_check_integrity, "check_int"}, 318 {Opt_check_integrity, "check_int"},
207 {Opt_check_integrity_including_extent_data, "check_int_data"}, 319 {Opt_check_integrity_including_extent_data, "check_int_data"},
208 {Opt_check_integrity_print_mask, "check_int_print_mask=%d"}, 320 {Opt_check_integrity_print_mask, "check_int_print_mask=%d"},
321 {Opt_fatal_errors, "fatal_errors=%s"},
209 {Opt_err, NULL}, 322 {Opt_err, NULL},
210}; 323};
211 324
212/* 325/*
213 * Regular mount options parser. Everything that is needed only when 326 * Regular mount options parser. Everything that is needed only when
214 * reading in a new superblock is parsed here. 327 * reading in a new superblock is parsed here.
328 * XXX JDM: This needs to be cleaned up for remount.
215 */ 329 */
216int btrfs_parse_options(struct btrfs_root *root, char *options) 330int btrfs_parse_options(struct btrfs_root *root, char *options)
217{ 331{
@@ -438,6 +552,18 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
438 ret = -EINVAL; 552 ret = -EINVAL;
439 goto out; 553 goto out;
440#endif 554#endif
555 case Opt_fatal_errors:
556 if (strcmp(args[0].from, "panic") == 0)
557 btrfs_set_opt(info->mount_opt,
558 PANIC_ON_FATAL_ERROR);
559 else if (strcmp(args[0].from, "bug") == 0)
560 btrfs_clear_opt(info->mount_opt,
561 PANIC_ON_FATAL_ERROR);
562 else {
563 ret = -EINVAL;
564 goto out;
565 }
566 break;
441 case Opt_err: 567 case Opt_err:
442 printk(KERN_INFO "btrfs: unrecognized mount option " 568 printk(KERN_INFO "btrfs: unrecognized mount option "
443 "'%s'\n", p); 569 "'%s'\n", p);
@@ -762,6 +888,8 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
762 seq_puts(seq, ",inode_cache"); 888 seq_puts(seq, ",inode_cache");
763 if (btrfs_test_opt(root, SKIP_BALANCE)) 889 if (btrfs_test_opt(root, SKIP_BALANCE))
764 seq_puts(seq, ",skip_balance"); 890 seq_puts(seq, ",skip_balance");
891 if (btrfs_test_opt(root, PANIC_ON_FATAL_ERROR))
892 seq_puts(seq, ",fatal_errors=panic");
765 return 0; 893 return 0;
766} 894}
767 895
@@ -995,11 +1123,20 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
995{ 1123{
996 struct btrfs_fs_info *fs_info = btrfs_sb(sb); 1124 struct btrfs_fs_info *fs_info = btrfs_sb(sb);
997 struct btrfs_root *root = fs_info->tree_root; 1125 struct btrfs_root *root = fs_info->tree_root;
1126 unsigned old_flags = sb->s_flags;
1127 unsigned long old_opts = fs_info->mount_opt;
1128 unsigned long old_compress_type = fs_info->compress_type;
1129 u64 old_max_inline = fs_info->max_inline;
1130 u64 old_alloc_start = fs_info->alloc_start;
1131 int old_thread_pool_size = fs_info->thread_pool_size;
1132 unsigned int old_metadata_ratio = fs_info->metadata_ratio;
998 int ret; 1133 int ret;
999 1134
1000 ret = btrfs_parse_options(root, data); 1135 ret = btrfs_parse_options(root, data);
1001 if (ret) 1136 if (ret) {
1002 return -EINVAL; 1137 ret = -EINVAL;
1138 goto restore;
1139 }
1003 1140
1004 if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) 1141 if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
1005 return 0; 1142 return 0;
@@ -1007,26 +1144,44 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
1007 if (*flags & MS_RDONLY) { 1144 if (*flags & MS_RDONLY) {
1008 sb->s_flags |= MS_RDONLY; 1145 sb->s_flags |= MS_RDONLY;
1009 1146
1010 ret = btrfs_commit_super(root); 1147 ret = btrfs_commit_super(root);
1011 WARN_ON(ret); 1148 if (ret)
1149 goto restore;
1012 } else { 1150 } else {
1013 if (fs_info->fs_devices->rw_devices == 0) 1151 if (fs_info->fs_devices->rw_devices == 0)
1014 return -EACCES; 1152 ret = -EACCES;
1153 goto restore;
1015 1154
1016 if (btrfs_super_log_root(fs_info->super_copy) != 0) 1155 if (btrfs_super_log_root(fs_info->super_copy) != 0)
1017 return -EINVAL; 1156 ret = -EINVAL;
1157 goto restore;
1018 1158
1019 ret = btrfs_cleanup_fs_roots(fs_info); 1159 ret = btrfs_cleanup_fs_roots(fs_info);
1020 WARN_ON(ret); 1160 if (ret)
1161 goto restore;
1021 1162
1022 /* recover relocation */ 1163 /* recover relocation */
1023 ret = btrfs_recover_relocation(root); 1164 ret = btrfs_recover_relocation(root);
1024 WARN_ON(ret); 1165 if (ret)
1166 goto restore;
1025 1167
1026 sb->s_flags &= ~MS_RDONLY; 1168 sb->s_flags &= ~MS_RDONLY;
1027 } 1169 }
1028 1170
1029 return 0; 1171 return 0;
1172
1173restore:
1174 /* We've hit an error - don't reset MS_RDONLY */
1175 if (sb->s_flags & MS_RDONLY)
1176 old_flags |= MS_RDONLY;
1177 sb->s_flags = old_flags;
1178 fs_info->mount_opt = old_opts;
1179 fs_info->compress_type = old_compress_type;
1180 fs_info->max_inline = old_max_inline;
1181 fs_info->alloc_start = old_alloc_start;
1182 fs_info->thread_pool_size = old_thread_pool_size;
1183 fs_info->metadata_ratio = old_metadata_ratio;
1184 return ret;
1030} 1185}
1031 1186
1032/* Used to sort the devices by max_avail(descending sort) */ 1187/* Used to sort the devices by max_avail(descending sort) */
@@ -1356,9 +1511,7 @@ static int __init init_btrfs_fs(void)
1356 if (err) 1511 if (err)
1357 return err; 1512 return err;
1358 1513
1359 err = btrfs_init_compress(); 1514 btrfs_init_compress();
1360 if (err)
1361 goto free_sysfs;
1362 1515
1363 err = btrfs_init_cachep(); 1516 err = btrfs_init_cachep();
1364 if (err) 1517 if (err)
@@ -1384,6 +1537,8 @@ static int __init init_btrfs_fs(void)
1384 if (err) 1537 if (err)
1385 goto unregister_ioctl; 1538 goto unregister_ioctl;
1386 1539
1540 btrfs_init_lockdep();
1541
1387 printk(KERN_INFO "%s loaded\n", BTRFS_BUILD_VERSION); 1542 printk(KERN_INFO "%s loaded\n", BTRFS_BUILD_VERSION);
1388 return 0; 1543 return 0;
1389 1544
@@ -1399,7 +1554,6 @@ free_cachep:
1399 btrfs_destroy_cachep(); 1554 btrfs_destroy_cachep();
1400free_compress: 1555free_compress:
1401 btrfs_exit_compress(); 1556 btrfs_exit_compress();
1402free_sysfs:
1403 btrfs_exit_sysfs(); 1557 btrfs_exit_sysfs();
1404 return err; 1558 return err;
1405} 1559}
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 04b77e3ceb7a..8da29e8e4de1 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -31,7 +31,7 @@
31 31
32#define BTRFS_ROOT_TRANS_TAG 0 32#define BTRFS_ROOT_TRANS_TAG 0
33 33
34static noinline void put_transaction(struct btrfs_transaction *transaction) 34void put_transaction(struct btrfs_transaction *transaction)
35{ 35{
36 WARN_ON(atomic_read(&transaction->use_count) == 0); 36 WARN_ON(atomic_read(&transaction->use_count) == 0);
37 if (atomic_dec_and_test(&transaction->use_count)) { 37 if (atomic_dec_and_test(&transaction->use_count)) {
@@ -58,6 +58,12 @@ static noinline int join_transaction(struct btrfs_root *root, int nofail)
58 58
59 spin_lock(&root->fs_info->trans_lock); 59 spin_lock(&root->fs_info->trans_lock);
60loop: 60loop:
61 /* The file system has been taken offline. No new transactions. */
62 if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
63 spin_unlock(&root->fs_info->trans_lock);
64 return -EROFS;
65 }
66
61 if (root->fs_info->trans_no_join) { 67 if (root->fs_info->trans_no_join) {
62 if (!nofail) { 68 if (!nofail) {
63 spin_unlock(&root->fs_info->trans_lock); 69 spin_unlock(&root->fs_info->trans_lock);
@@ -67,6 +73,8 @@ loop:
67 73
68 cur_trans = root->fs_info->running_transaction; 74 cur_trans = root->fs_info->running_transaction;
69 if (cur_trans) { 75 if (cur_trans) {
76 if (cur_trans->aborted)
77 return cur_trans->aborted;
70 atomic_inc(&cur_trans->use_count); 78 atomic_inc(&cur_trans->use_count);
71 atomic_inc(&cur_trans->num_writers); 79 atomic_inc(&cur_trans->num_writers);
72 cur_trans->num_joined++; 80 cur_trans->num_joined++;
@@ -123,6 +131,7 @@ loop:
123 root->fs_info->generation++; 131 root->fs_info->generation++;
124 cur_trans->transid = root->fs_info->generation; 132 cur_trans->transid = root->fs_info->generation;
125 root->fs_info->running_transaction = cur_trans; 133 root->fs_info->running_transaction = cur_trans;
134 cur_trans->aborted = 0;
126 spin_unlock(&root->fs_info->trans_lock); 135 spin_unlock(&root->fs_info->trans_lock);
127 136
128 return 0; 137 return 0;
@@ -318,6 +327,7 @@ again:
318 h->use_count = 1; 327 h->use_count = 1;
319 h->block_rsv = NULL; 328 h->block_rsv = NULL;
320 h->orig_rsv = NULL; 329 h->orig_rsv = NULL;
330 h->aborted = 0;
321 331
322 smp_mb(); 332 smp_mb();
323 if (cur_trans->blocked && may_wait_transaction(root, type)) { 333 if (cur_trans->blocked && may_wait_transaction(root, type)) {
@@ -327,8 +337,7 @@ again:
327 337
328 if (num_bytes) { 338 if (num_bytes) {
329 trace_btrfs_space_reservation(root->fs_info, "transaction", 339 trace_btrfs_space_reservation(root->fs_info, "transaction",
330 (u64)(unsigned long)h, 340 h->transid, num_bytes, 1);
331 num_bytes, 1);
332 h->block_rsv = &root->fs_info->trans_block_rsv; 341 h->block_rsv = &root->fs_info->trans_block_rsv;
333 h->bytes_reserved = num_bytes; 342 h->bytes_reserved = num_bytes;
334 } 343 }
@@ -440,6 +449,7 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
440 struct btrfs_transaction *cur_trans = trans->transaction; 449 struct btrfs_transaction *cur_trans = trans->transaction;
441 struct btrfs_block_rsv *rsv = trans->block_rsv; 450 struct btrfs_block_rsv *rsv = trans->block_rsv;
442 int updates; 451 int updates;
452 int err;
443 453
444 smp_mb(); 454 smp_mb();
445 if (cur_trans->blocked || cur_trans->delayed_refs.flushing) 455 if (cur_trans->blocked || cur_trans->delayed_refs.flushing)
@@ -453,8 +463,11 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
453 463
454 updates = trans->delayed_ref_updates; 464 updates = trans->delayed_ref_updates;
455 trans->delayed_ref_updates = 0; 465 trans->delayed_ref_updates = 0;
456 if (updates) 466 if (updates) {
457 btrfs_run_delayed_refs(trans, root, updates); 467 err = btrfs_run_delayed_refs(trans, root, updates);
468 if (err) /* Error code will also eval true */
469 return err;
470 }
458 471
459 trans->block_rsv = rsv; 472 trans->block_rsv = rsv;
460 473
@@ -525,6 +538,11 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
525 if (throttle) 538 if (throttle)
526 btrfs_run_delayed_iputs(root); 539 btrfs_run_delayed_iputs(root);
527 540
541 if (trans->aborted ||
542 root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
543 return -EIO;
544 }
545
528 return 0; 546 return 0;
529} 547}
530 548
@@ -690,11 +708,13 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
690 ret = btrfs_update_root(trans, tree_root, 708 ret = btrfs_update_root(trans, tree_root,
691 &root->root_key, 709 &root->root_key,
692 &root->root_item); 710 &root->root_item);
693 BUG_ON(ret); 711 if (ret)
712 return ret;
694 713
695 old_root_used = btrfs_root_used(&root->root_item); 714 old_root_used = btrfs_root_used(&root->root_item);
696 ret = btrfs_write_dirty_block_groups(trans, root); 715 ret = btrfs_write_dirty_block_groups(trans, root);
697 BUG_ON(ret); 716 if (ret)
717 return ret;
698 } 718 }
699 719
700 if (root != root->fs_info->extent_root) 720 if (root != root->fs_info->extent_root)
@@ -705,6 +725,10 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
705 725
706/* 726/*
707 * update all the cowonly tree roots on disk 727 * update all the cowonly tree roots on disk
728 *
729 * The error handling in this function may not be obvious. Any of the
730 * failures will cause the file system to go offline. We still need
731 * to clean up the delayed refs.
708 */ 732 */
709static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans, 733static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
710 struct btrfs_root *root) 734 struct btrfs_root *root)
@@ -715,22 +739,30 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
715 int ret; 739 int ret;
716 740
717 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); 741 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
718 BUG_ON(ret); 742 if (ret)
743 return ret;
719 744
720 eb = btrfs_lock_root_node(fs_info->tree_root); 745 eb = btrfs_lock_root_node(fs_info->tree_root);
721 btrfs_cow_block(trans, fs_info->tree_root, eb, NULL, 0, &eb); 746 ret = btrfs_cow_block(trans, fs_info->tree_root, eb, NULL,
747 0, &eb);
722 btrfs_tree_unlock(eb); 748 btrfs_tree_unlock(eb);
723 free_extent_buffer(eb); 749 free_extent_buffer(eb);
724 750
751 if (ret)
752 return ret;
753
725 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); 754 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
726 BUG_ON(ret); 755 if (ret)
756 return ret;
727 757
728 while (!list_empty(&fs_info->dirty_cowonly_roots)) { 758 while (!list_empty(&fs_info->dirty_cowonly_roots)) {
729 next = fs_info->dirty_cowonly_roots.next; 759 next = fs_info->dirty_cowonly_roots.next;
730 list_del_init(next); 760 list_del_init(next);
731 root = list_entry(next, struct btrfs_root, dirty_list); 761 root = list_entry(next, struct btrfs_root, dirty_list);
732 762
733 update_cowonly_root(trans, root); 763 ret = update_cowonly_root(trans, root);
764 if (ret)
765 return ret;
734 } 766 }
735 767
736 down_write(&fs_info->extent_commit_sem); 768 down_write(&fs_info->extent_commit_sem);
@@ -874,7 +906,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
874 906
875 new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS); 907 new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS);
876 if (!new_root_item) { 908 if (!new_root_item) {
877 pending->error = -ENOMEM; 909 ret = pending->error = -ENOMEM;
878 goto fail; 910 goto fail;
879 } 911 }
880 912
@@ -911,21 +943,24 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
911 * insert the directory item 943 * insert the directory item
912 */ 944 */
913 ret = btrfs_set_inode_index(parent_inode, &index); 945 ret = btrfs_set_inode_index(parent_inode, &index);
914 BUG_ON(ret); 946 BUG_ON(ret); /* -ENOMEM */
915 ret = btrfs_insert_dir_item(trans, parent_root, 947 ret = btrfs_insert_dir_item(trans, parent_root,
916 dentry->d_name.name, dentry->d_name.len, 948 dentry->d_name.name, dentry->d_name.len,
917 parent_inode, &key, 949 parent_inode, &key,
918 BTRFS_FT_DIR, index); 950 BTRFS_FT_DIR, index);
919 if (ret) { 951 if (ret == -EEXIST) {
920 pending->error = -EEXIST; 952 pending->error = -EEXIST;
921 dput(parent); 953 dput(parent);
922 goto fail; 954 goto fail;
955 } else if (ret) {
956 goto abort_trans_dput;
923 } 957 }
924 958
925 btrfs_i_size_write(parent_inode, parent_inode->i_size + 959 btrfs_i_size_write(parent_inode, parent_inode->i_size +
926 dentry->d_name.len * 2); 960 dentry->d_name.len * 2);
927 ret = btrfs_update_inode(trans, parent_root, parent_inode); 961 ret = btrfs_update_inode(trans, parent_root, parent_inode);
928 BUG_ON(ret); 962 if (ret)
963 goto abort_trans_dput;
929 964
930 /* 965 /*
931 * pull in the delayed directory update 966 * pull in the delayed directory update
@@ -934,7 +969,10 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
934 * snapshot 969 * snapshot
935 */ 970 */
936 ret = btrfs_run_delayed_items(trans, root); 971 ret = btrfs_run_delayed_items(trans, root);
937 BUG_ON(ret); 972 if (ret) { /* Transaction aborted */
973 dput(parent);
974 goto fail;
975 }
938 976
939 record_root_in_trans(trans, root); 977 record_root_in_trans(trans, root);
940 btrfs_set_root_last_snapshot(&root->root_item, trans->transid); 978 btrfs_set_root_last_snapshot(&root->root_item, trans->transid);
@@ -949,12 +987,21 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
949 btrfs_set_root_flags(new_root_item, root_flags); 987 btrfs_set_root_flags(new_root_item, root_flags);
950 988
951 old = btrfs_lock_root_node(root); 989 old = btrfs_lock_root_node(root);
952 btrfs_cow_block(trans, root, old, NULL, 0, &old); 990 ret = btrfs_cow_block(trans, root, old, NULL, 0, &old);
991 if (ret) {
992 btrfs_tree_unlock(old);
993 free_extent_buffer(old);
994 goto abort_trans_dput;
995 }
996
953 btrfs_set_lock_blocking(old); 997 btrfs_set_lock_blocking(old);
954 998
955 btrfs_copy_root(trans, root, old, &tmp, objectid); 999 ret = btrfs_copy_root(trans, root, old, &tmp, objectid);
1000 /* clean up in any case */
956 btrfs_tree_unlock(old); 1001 btrfs_tree_unlock(old);
957 free_extent_buffer(old); 1002 free_extent_buffer(old);
1003 if (ret)
1004 goto abort_trans_dput;
958 1005
959 /* see comments in should_cow_block() */ 1006 /* see comments in should_cow_block() */
960 root->force_cow = 1; 1007 root->force_cow = 1;
@@ -966,7 +1013,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
966 ret = btrfs_insert_root(trans, tree_root, &key, new_root_item); 1013 ret = btrfs_insert_root(trans, tree_root, &key, new_root_item);
967 btrfs_tree_unlock(tmp); 1014 btrfs_tree_unlock(tmp);
968 free_extent_buffer(tmp); 1015 free_extent_buffer(tmp);
969 BUG_ON(ret); 1016 if (ret)
1017 goto abort_trans_dput;
970 1018
971 /* 1019 /*
972 * insert root back/forward references 1020 * insert root back/forward references
@@ -975,19 +1023,32 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
975 parent_root->root_key.objectid, 1023 parent_root->root_key.objectid,
976 btrfs_ino(parent_inode), index, 1024 btrfs_ino(parent_inode), index,
977 dentry->d_name.name, dentry->d_name.len); 1025 dentry->d_name.name, dentry->d_name.len);
978 BUG_ON(ret);
979 dput(parent); 1026 dput(parent);
1027 if (ret)
1028 goto fail;
980 1029
981 key.offset = (u64)-1; 1030 key.offset = (u64)-1;
982 pending->snap = btrfs_read_fs_root_no_name(root->fs_info, &key); 1031 pending->snap = btrfs_read_fs_root_no_name(root->fs_info, &key);
983 BUG_ON(IS_ERR(pending->snap)); 1032 if (IS_ERR(pending->snap)) {
1033 ret = PTR_ERR(pending->snap);
1034 goto abort_trans;
1035 }
984 1036
985 btrfs_reloc_post_snapshot(trans, pending); 1037 ret = btrfs_reloc_post_snapshot(trans, pending);
1038 if (ret)
1039 goto abort_trans;
1040 ret = 0;
986fail: 1041fail:
987 kfree(new_root_item); 1042 kfree(new_root_item);
988 trans->block_rsv = rsv; 1043 trans->block_rsv = rsv;
989 btrfs_block_rsv_release(root, &pending->block_rsv, (u64)-1); 1044 btrfs_block_rsv_release(root, &pending->block_rsv, (u64)-1);
990 return 0; 1045 return ret;
1046
1047abort_trans_dput:
1048 dput(parent);
1049abort_trans:
1050 btrfs_abort_transaction(trans, root, ret);
1051 goto fail;
991} 1052}
992 1053
993/* 1054/*
@@ -1124,6 +1185,33 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
1124 return 0; 1185 return 0;
1125} 1186}
1126 1187
1188
1189static void cleanup_transaction(struct btrfs_trans_handle *trans,
1190 struct btrfs_root *root)
1191{
1192 struct btrfs_transaction *cur_trans = trans->transaction;
1193
1194 WARN_ON(trans->use_count > 1);
1195
1196 spin_lock(&root->fs_info->trans_lock);
1197 list_del_init(&cur_trans->list);
1198 spin_unlock(&root->fs_info->trans_lock);
1199
1200 btrfs_cleanup_one_transaction(trans->transaction, root);
1201
1202 put_transaction(cur_trans);
1203 put_transaction(cur_trans);
1204
1205 trace_btrfs_transaction_commit(root);
1206
1207 btrfs_scrub_continue(root);
1208
1209 if (current->journal_info == trans)
1210 current->journal_info = NULL;
1211
1212 kmem_cache_free(btrfs_trans_handle_cachep, trans);
1213}
1214
1127/* 1215/*
1128 * btrfs_transaction state sequence: 1216 * btrfs_transaction state sequence:
1129 * in_commit = 0, blocked = 0 (initial) 1217 * in_commit = 0, blocked = 0 (initial)
@@ -1135,10 +1223,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1135 struct btrfs_root *root) 1223 struct btrfs_root *root)
1136{ 1224{
1137 unsigned long joined = 0; 1225 unsigned long joined = 0;
1138 struct btrfs_transaction *cur_trans; 1226 struct btrfs_transaction *cur_trans = trans->transaction;
1139 struct btrfs_transaction *prev_trans = NULL; 1227 struct btrfs_transaction *prev_trans = NULL;
1140 DEFINE_WAIT(wait); 1228 DEFINE_WAIT(wait);
1141 int ret; 1229 int ret = -EIO;
1142 int should_grow = 0; 1230 int should_grow = 0;
1143 unsigned long now = get_seconds(); 1231 unsigned long now = get_seconds();
1144 int flush_on_commit = btrfs_test_opt(root, FLUSHONCOMMIT); 1232 int flush_on_commit = btrfs_test_opt(root, FLUSHONCOMMIT);
@@ -1148,13 +1236,18 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1148 btrfs_trans_release_metadata(trans, root); 1236 btrfs_trans_release_metadata(trans, root);
1149 trans->block_rsv = NULL; 1237 trans->block_rsv = NULL;
1150 1238
1239 if (cur_trans->aborted)
1240 goto cleanup_transaction;
1241
1151 /* make a pass through all the delayed refs we have so far 1242 /* make a pass through all the delayed refs we have so far
1152 * any runnings procs may add more while we are here 1243 * any runnings procs may add more while we are here
1153 */ 1244 */
1154 ret = btrfs_run_delayed_refs(trans, root, 0); 1245 ret = btrfs_run_delayed_refs(trans, root, 0);
1155 BUG_ON(ret); 1246 if (ret)
1247 goto cleanup_transaction;
1156 1248
1157 cur_trans = trans->transaction; 1249 cur_trans = trans->transaction;
1250
1158 /* 1251 /*
1159 * set the flushing flag so procs in this transaction have to 1252 * set the flushing flag so procs in this transaction have to
1160 * start sending their work down. 1253 * start sending their work down.
@@ -1162,19 +1255,20 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1162 cur_trans->delayed_refs.flushing = 1; 1255 cur_trans->delayed_refs.flushing = 1;
1163 1256
1164 ret = btrfs_run_delayed_refs(trans, root, 0); 1257 ret = btrfs_run_delayed_refs(trans, root, 0);
1165 BUG_ON(ret); 1258 if (ret)
1259 goto cleanup_transaction;
1166 1260
1167 spin_lock(&cur_trans->commit_lock); 1261 spin_lock(&cur_trans->commit_lock);
1168 if (cur_trans->in_commit) { 1262 if (cur_trans->in_commit) {
1169 spin_unlock(&cur_trans->commit_lock); 1263 spin_unlock(&cur_trans->commit_lock);
1170 atomic_inc(&cur_trans->use_count); 1264 atomic_inc(&cur_trans->use_count);
1171 btrfs_end_transaction(trans, root); 1265 ret = btrfs_end_transaction(trans, root);
1172 1266
1173 wait_for_commit(root, cur_trans); 1267 wait_for_commit(root, cur_trans);
1174 1268
1175 put_transaction(cur_trans); 1269 put_transaction(cur_trans);
1176 1270
1177 return 0; 1271 return ret;
1178 } 1272 }
1179 1273
1180 trans->transaction->in_commit = 1; 1274 trans->transaction->in_commit = 1;
@@ -1214,12 +1308,12 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1214 1308
1215 if (flush_on_commit || snap_pending) { 1309 if (flush_on_commit || snap_pending) {
1216 btrfs_start_delalloc_inodes(root, 1); 1310 btrfs_start_delalloc_inodes(root, 1);
1217 ret = btrfs_wait_ordered_extents(root, 0, 1); 1311 btrfs_wait_ordered_extents(root, 0, 1);
1218 BUG_ON(ret);
1219 } 1312 }
1220 1313
1221 ret = btrfs_run_delayed_items(trans, root); 1314 ret = btrfs_run_delayed_items(trans, root);
1222 BUG_ON(ret); 1315 if (ret)
1316 goto cleanup_transaction;
1223 1317
1224 /* 1318 /*
1225 * rename don't use btrfs_join_transaction, so, once we 1319 * rename don't use btrfs_join_transaction, so, once we
@@ -1261,13 +1355,22 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1261 mutex_lock(&root->fs_info->reloc_mutex); 1355 mutex_lock(&root->fs_info->reloc_mutex);
1262 1356
1263 ret = btrfs_run_delayed_items(trans, root); 1357 ret = btrfs_run_delayed_items(trans, root);
1264 BUG_ON(ret); 1358 if (ret) {
1359 mutex_unlock(&root->fs_info->reloc_mutex);
1360 goto cleanup_transaction;
1361 }
1265 1362
1266 ret = create_pending_snapshots(trans, root->fs_info); 1363 ret = create_pending_snapshots(trans, root->fs_info);
1267 BUG_ON(ret); 1364 if (ret) {
1365 mutex_unlock(&root->fs_info->reloc_mutex);
1366 goto cleanup_transaction;
1367 }
1268 1368
1269 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); 1369 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
1270 BUG_ON(ret); 1370 if (ret) {
1371 mutex_unlock(&root->fs_info->reloc_mutex);
1372 goto cleanup_transaction;
1373 }
1271 1374
1272 /* 1375 /*
1273 * make sure none of the code above managed to slip in a 1376 * make sure none of the code above managed to slip in a
@@ -1294,7 +1397,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1294 mutex_lock(&root->fs_info->tree_log_mutex); 1397 mutex_lock(&root->fs_info->tree_log_mutex);
1295 1398
1296 ret = commit_fs_roots(trans, root); 1399 ret = commit_fs_roots(trans, root);
1297 BUG_ON(ret); 1400 if (ret) {
1401 mutex_unlock(&root->fs_info->tree_log_mutex);
1402 goto cleanup_transaction;
1403 }
1298 1404
1299 /* commit_fs_roots gets rid of all the tree log roots, it is now 1405 /* commit_fs_roots gets rid of all the tree log roots, it is now
1300 * safe to free the root of tree log roots 1406 * safe to free the root of tree log roots
@@ -1302,7 +1408,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1302 btrfs_free_log_root_tree(trans, root->fs_info); 1408 btrfs_free_log_root_tree(trans, root->fs_info);
1303 1409
1304 ret = commit_cowonly_roots(trans, root); 1410 ret = commit_cowonly_roots(trans, root);
1305 BUG_ON(ret); 1411 if (ret) {
1412 mutex_unlock(&root->fs_info->tree_log_mutex);
1413 goto cleanup_transaction;
1414 }
1306 1415
1307 btrfs_prepare_extent_commit(trans, root); 1416 btrfs_prepare_extent_commit(trans, root);
1308 1417
@@ -1336,8 +1445,18 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1336 wake_up(&root->fs_info->transaction_wait); 1445 wake_up(&root->fs_info->transaction_wait);
1337 1446
1338 ret = btrfs_write_and_wait_transaction(trans, root); 1447 ret = btrfs_write_and_wait_transaction(trans, root);
1339 BUG_ON(ret); 1448 if (ret) {
1340 write_ctree_super(trans, root, 0); 1449 btrfs_error(root->fs_info, ret,
1450 "Error while writing out transaction.");
1451 mutex_unlock(&root->fs_info->tree_log_mutex);
1452 goto cleanup_transaction;
1453 }
1454
1455 ret = write_ctree_super(trans, root, 0);
1456 if (ret) {
1457 mutex_unlock(&root->fs_info->tree_log_mutex);
1458 goto cleanup_transaction;
1459 }
1341 1460
1342 /* 1461 /*
1343 * the super is written, we can safely allow the tree-loggers 1462 * the super is written, we can safely allow the tree-loggers
@@ -1373,6 +1492,15 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1373 btrfs_run_delayed_iputs(root); 1492 btrfs_run_delayed_iputs(root);
1374 1493
1375 return ret; 1494 return ret;
1495
1496cleanup_transaction:
1497 btrfs_printk(root->fs_info, "Skipping commit of aborted transaction.\n");
1498// WARN_ON(1);
1499 if (current->journal_info == trans)
1500 current->journal_info = NULL;
1501 cleanup_transaction(trans, root);
1502
1503 return ret;
1376} 1504}
1377 1505
1378/* 1506/*
@@ -1388,6 +1516,8 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root)
1388 spin_unlock(&fs_info->trans_lock); 1516 spin_unlock(&fs_info->trans_lock);
1389 1517
1390 while (!list_empty(&list)) { 1518 while (!list_empty(&list)) {
1519 int ret;
1520
1391 root = list_entry(list.next, struct btrfs_root, root_list); 1521 root = list_entry(list.next, struct btrfs_root, root_list);
1392 list_del(&root->root_list); 1522 list_del(&root->root_list);
1393 1523
@@ -1395,9 +1525,10 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root)
1395 1525
1396 if (btrfs_header_backref_rev(root->node) < 1526 if (btrfs_header_backref_rev(root->node) <
1397 BTRFS_MIXED_BACKREF_REV) 1527 BTRFS_MIXED_BACKREF_REV)
1398 btrfs_drop_snapshot(root, NULL, 0, 0); 1528 ret = btrfs_drop_snapshot(root, NULL, 0, 0);
1399 else 1529 else
1400 btrfs_drop_snapshot(root, NULL, 1, 0); 1530 ret =btrfs_drop_snapshot(root, NULL, 1, 0);
1531 BUG_ON(ret < 0);
1401 } 1532 }
1402 return 0; 1533 return 0;
1403} 1534}
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 02564e6230ac..fe27379e368b 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -43,6 +43,7 @@ struct btrfs_transaction {
43 wait_queue_head_t commit_wait; 43 wait_queue_head_t commit_wait;
44 struct list_head pending_snapshots; 44 struct list_head pending_snapshots;
45 struct btrfs_delayed_ref_root delayed_refs; 45 struct btrfs_delayed_ref_root delayed_refs;
46 int aborted;
46}; 47};
47 48
48struct btrfs_trans_handle { 49struct btrfs_trans_handle {
@@ -55,6 +56,7 @@ struct btrfs_trans_handle {
55 struct btrfs_transaction *transaction; 56 struct btrfs_transaction *transaction;
56 struct btrfs_block_rsv *block_rsv; 57 struct btrfs_block_rsv *block_rsv;
57 struct btrfs_block_rsv *orig_rsv; 58 struct btrfs_block_rsv *orig_rsv;
59 int aborted;
58}; 60};
59 61
60struct btrfs_pending_snapshot { 62struct btrfs_pending_snapshot {
@@ -114,4 +116,5 @@ int btrfs_wait_marked_extents(struct btrfs_root *root,
114 struct extent_io_tree *dirty_pages, int mark); 116 struct extent_io_tree *dirty_pages, int mark);
115int btrfs_transaction_blocked(struct btrfs_fs_info *info); 117int btrfs_transaction_blocked(struct btrfs_fs_info *info);
116int btrfs_transaction_in_commit(struct btrfs_fs_info *info); 118int btrfs_transaction_in_commit(struct btrfs_fs_info *info);
119void put_transaction(struct btrfs_transaction *transaction);
117#endif 120#endif
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 966cc74f5d6c..d017283ae6f5 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -212,14 +212,13 @@ int btrfs_pin_log_trans(struct btrfs_root *root)
212 * indicate we're done making changes to the log tree 212 * indicate we're done making changes to the log tree
213 * and wake up anyone waiting to do a sync 213 * and wake up anyone waiting to do a sync
214 */ 214 */
215int btrfs_end_log_trans(struct btrfs_root *root) 215void btrfs_end_log_trans(struct btrfs_root *root)
216{ 216{
217 if (atomic_dec_and_test(&root->log_writers)) { 217 if (atomic_dec_and_test(&root->log_writers)) {
218 smp_mb(); 218 smp_mb();
219 if (waitqueue_active(&root->log_writer_wait)) 219 if (waitqueue_active(&root->log_writer_wait))
220 wake_up(&root->log_writer_wait); 220 wake_up(&root->log_writer_wait);
221 } 221 }
222 return 0;
223} 222}
224 223
225 224
@@ -378,12 +377,11 @@ insert:
378 u32 found_size; 377 u32 found_size;
379 found_size = btrfs_item_size_nr(path->nodes[0], 378 found_size = btrfs_item_size_nr(path->nodes[0],
380 path->slots[0]); 379 path->slots[0]);
381 if (found_size > item_size) { 380 if (found_size > item_size)
382 btrfs_truncate_item(trans, root, path, item_size, 1); 381 btrfs_truncate_item(trans, root, path, item_size, 1);
383 } else if (found_size < item_size) { 382 else if (found_size < item_size)
384 ret = btrfs_extend_item(trans, root, path, 383 btrfs_extend_item(trans, root, path,
385 item_size - found_size); 384 item_size - found_size);
386 }
387 } else if (ret) { 385 } else if (ret) {
388 return ret; 386 return ret;
389 } 387 }
@@ -1763,7 +1761,7 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
1763 BTRFS_TREE_LOG_OBJECTID); 1761 BTRFS_TREE_LOG_OBJECTID);
1764 ret = btrfs_free_and_pin_reserved_extent(root, 1762 ret = btrfs_free_and_pin_reserved_extent(root,
1765 bytenr, blocksize); 1763 bytenr, blocksize);
1766 BUG_ON(ret); 1764 BUG_ON(ret); /* -ENOMEM or logic errors */
1767 } 1765 }
1768 free_extent_buffer(next); 1766 free_extent_buffer(next);
1769 continue; 1767 continue;
@@ -1871,20 +1869,26 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,
1871 wret = walk_down_log_tree(trans, log, path, &level, wc); 1869 wret = walk_down_log_tree(trans, log, path, &level, wc);
1872 if (wret > 0) 1870 if (wret > 0)
1873 break; 1871 break;
1874 if (wret < 0) 1872 if (wret < 0) {
1875 ret = wret; 1873 ret = wret;
1874 goto out;
1875 }
1876 1876
1877 wret = walk_up_log_tree(trans, log, path, &level, wc); 1877 wret = walk_up_log_tree(trans, log, path, &level, wc);
1878 if (wret > 0) 1878 if (wret > 0)
1879 break; 1879 break;
1880 if (wret < 0) 1880 if (wret < 0) {
1881 ret = wret; 1881 ret = wret;
1882 goto out;
1883 }
1882 } 1884 }
1883 1885
1884 /* was the root node processed? if not, catch it here */ 1886 /* was the root node processed? if not, catch it here */
1885 if (path->nodes[orig_level]) { 1887 if (path->nodes[orig_level]) {
1886 wc->process_func(log, path->nodes[orig_level], wc, 1888 ret = wc->process_func(log, path->nodes[orig_level], wc,
1887 btrfs_header_generation(path->nodes[orig_level])); 1889 btrfs_header_generation(path->nodes[orig_level]));
1890 if (ret)
1891 goto out;
1888 if (wc->free) { 1892 if (wc->free) {
1889 struct extent_buffer *next; 1893 struct extent_buffer *next;
1890 1894
@@ -1900,10 +1904,11 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,
1900 BTRFS_TREE_LOG_OBJECTID); 1904 BTRFS_TREE_LOG_OBJECTID);
1901 ret = btrfs_free_and_pin_reserved_extent(log, next->start, 1905 ret = btrfs_free_and_pin_reserved_extent(log, next->start,
1902 next->len); 1906 next->len);
1903 BUG_ON(ret); 1907 BUG_ON(ret); /* -ENOMEM or logic errors */
1904 } 1908 }
1905 } 1909 }
1906 1910
1911out:
1907 for (i = 0; i <= orig_level; i++) { 1912 for (i = 0; i <= orig_level; i++) {
1908 if (path->nodes[i]) { 1913 if (path->nodes[i]) {
1909 free_extent_buffer(path->nodes[i]); 1914 free_extent_buffer(path->nodes[i]);
@@ -1963,8 +1968,8 @@ static int wait_log_commit(struct btrfs_trans_handle *trans,
1963 return 0; 1968 return 0;
1964} 1969}
1965 1970
1966static int wait_for_writer(struct btrfs_trans_handle *trans, 1971static void wait_for_writer(struct btrfs_trans_handle *trans,
1967 struct btrfs_root *root) 1972 struct btrfs_root *root)
1968{ 1973{
1969 DEFINE_WAIT(wait); 1974 DEFINE_WAIT(wait);
1970 while (root->fs_info->last_trans_log_full_commit != 1975 while (root->fs_info->last_trans_log_full_commit !=
@@ -1978,7 +1983,6 @@ static int wait_for_writer(struct btrfs_trans_handle *trans,
1978 mutex_lock(&root->log_mutex); 1983 mutex_lock(&root->log_mutex);
1979 finish_wait(&root->log_writer_wait, &wait); 1984 finish_wait(&root->log_writer_wait, &wait);
1980 } 1985 }
1981 return 0;
1982} 1986}
1983 1987
1984/* 1988/*
@@ -2046,7 +2050,11 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2046 * wait for them until later. 2050 * wait for them until later.
2047 */ 2051 */
2048 ret = btrfs_write_marked_extents(log, &log->dirty_log_pages, mark); 2052 ret = btrfs_write_marked_extents(log, &log->dirty_log_pages, mark);
2049 BUG_ON(ret); 2053 if (ret) {
2054 btrfs_abort_transaction(trans, root, ret);
2055 mutex_unlock(&root->log_mutex);
2056 goto out;
2057 }
2050 2058
2051 btrfs_set_root_node(&log->root_item, log->node); 2059 btrfs_set_root_node(&log->root_item, log->node);
2052 2060
@@ -2077,7 +2085,11 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2077 } 2085 }
2078 2086
2079 if (ret) { 2087 if (ret) {
2080 BUG_ON(ret != -ENOSPC); 2088 if (ret != -ENOSPC) {
2089 btrfs_abort_transaction(trans, root, ret);
2090 mutex_unlock(&log_root_tree->log_mutex);
2091 goto out;
2092 }
2081 root->fs_info->last_trans_log_full_commit = trans->transid; 2093 root->fs_info->last_trans_log_full_commit = trans->transid;
2082 btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); 2094 btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
2083 mutex_unlock(&log_root_tree->log_mutex); 2095 mutex_unlock(&log_root_tree->log_mutex);
@@ -2117,7 +2129,11 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2117 ret = btrfs_write_and_wait_marked_extents(log_root_tree, 2129 ret = btrfs_write_and_wait_marked_extents(log_root_tree,
2118 &log_root_tree->dirty_log_pages, 2130 &log_root_tree->dirty_log_pages,
2119 EXTENT_DIRTY | EXTENT_NEW); 2131 EXTENT_DIRTY | EXTENT_NEW);
2120 BUG_ON(ret); 2132 if (ret) {
2133 btrfs_abort_transaction(trans, root, ret);
2134 mutex_unlock(&log_root_tree->log_mutex);
2135 goto out_wake_log_root;
2136 }
2121 btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); 2137 btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
2122 2138
2123 btrfs_set_super_log_root(root->fs_info->super_for_commit, 2139 btrfs_set_super_log_root(root->fs_info->super_for_commit,
@@ -2326,7 +2342,9 @@ out_unlock:
2326 if (ret == -ENOSPC) { 2342 if (ret == -ENOSPC) {
2327 root->fs_info->last_trans_log_full_commit = trans->transid; 2343 root->fs_info->last_trans_log_full_commit = trans->transid;
2328 ret = 0; 2344 ret = 0;
2329 } 2345 } else if (ret < 0)
2346 btrfs_abort_transaction(trans, root, ret);
2347
2330 btrfs_end_log_trans(root); 2348 btrfs_end_log_trans(root);
2331 2349
2332 return err; 2350 return err;
@@ -2357,7 +2375,8 @@ int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
2357 if (ret == -ENOSPC) { 2375 if (ret == -ENOSPC) {
2358 root->fs_info->last_trans_log_full_commit = trans->transid; 2376 root->fs_info->last_trans_log_full_commit = trans->transid;
2359 ret = 0; 2377 ret = 0;
2360 } 2378 } else if (ret < 0 && ret != -ENOENT)
2379 btrfs_abort_transaction(trans, root, ret);
2361 btrfs_end_log_trans(root); 2380 btrfs_end_log_trans(root);
2362 2381
2363 return ret; 2382 return ret;
@@ -3169,13 +3188,20 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree)
3169 fs_info->log_root_recovering = 1; 3188 fs_info->log_root_recovering = 1;
3170 3189
3171 trans = btrfs_start_transaction(fs_info->tree_root, 0); 3190 trans = btrfs_start_transaction(fs_info->tree_root, 0);
3172 BUG_ON(IS_ERR(trans)); 3191 if (IS_ERR(trans)) {
3192 ret = PTR_ERR(trans);
3193 goto error;
3194 }
3173 3195
3174 wc.trans = trans; 3196 wc.trans = trans;
3175 wc.pin = 1; 3197 wc.pin = 1;
3176 3198
3177 ret = walk_log_tree(trans, log_root_tree, &wc); 3199 ret = walk_log_tree(trans, log_root_tree, &wc);
3178 BUG_ON(ret); 3200 if (ret) {
3201 btrfs_error(fs_info, ret, "Failed to pin buffers while "
3202 "recovering log root tree.");
3203 goto error;
3204 }
3179 3205
3180again: 3206again:
3181 key.objectid = BTRFS_TREE_LOG_OBJECTID; 3207 key.objectid = BTRFS_TREE_LOG_OBJECTID;
@@ -3184,8 +3210,12 @@ again:
3184 3210
3185 while (1) { 3211 while (1) {
3186 ret = btrfs_search_slot(NULL, log_root_tree, &key, path, 0, 0); 3212 ret = btrfs_search_slot(NULL, log_root_tree, &key, path, 0, 0);
3187 if (ret < 0) 3213
3188 break; 3214 if (ret < 0) {
3215 btrfs_error(fs_info, ret,
3216 "Couldn't find tree log root.");
3217 goto error;
3218 }
3189 if (ret > 0) { 3219 if (ret > 0) {
3190 if (path->slots[0] == 0) 3220 if (path->slots[0] == 0)
3191 break; 3221 break;
@@ -3199,14 +3229,24 @@ again:
3199 3229
3200 log = btrfs_read_fs_root_no_radix(log_root_tree, 3230 log = btrfs_read_fs_root_no_radix(log_root_tree,
3201 &found_key); 3231 &found_key);
3202 BUG_ON(IS_ERR(log)); 3232 if (IS_ERR(log)) {
3233 ret = PTR_ERR(log);
3234 btrfs_error(fs_info, ret,
3235 "Couldn't read tree log root.");
3236 goto error;
3237 }
3203 3238
3204 tmp_key.objectid = found_key.offset; 3239 tmp_key.objectid = found_key.offset;
3205 tmp_key.type = BTRFS_ROOT_ITEM_KEY; 3240 tmp_key.type = BTRFS_ROOT_ITEM_KEY;
3206 tmp_key.offset = (u64)-1; 3241 tmp_key.offset = (u64)-1;
3207 3242
3208 wc.replay_dest = btrfs_read_fs_root_no_name(fs_info, &tmp_key); 3243 wc.replay_dest = btrfs_read_fs_root_no_name(fs_info, &tmp_key);
3209 BUG_ON(IS_ERR_OR_NULL(wc.replay_dest)); 3244 if (IS_ERR(wc.replay_dest)) {
3245 ret = PTR_ERR(wc.replay_dest);
3246 btrfs_error(fs_info, ret, "Couldn't read target root "
3247 "for tree log recovery.");
3248 goto error;
3249 }
3210 3250
3211 wc.replay_dest->log_root = log; 3251 wc.replay_dest->log_root = log;
3212 btrfs_record_root_in_trans(trans, wc.replay_dest); 3252 btrfs_record_root_in_trans(trans, wc.replay_dest);
@@ -3254,6 +3294,10 @@ again:
3254 3294
3255 kfree(log_root_tree); 3295 kfree(log_root_tree);
3256 return 0; 3296 return 0;
3297
3298error:
3299 btrfs_free_path(path);
3300 return ret;
3257} 3301}
3258 3302
3259/* 3303/*
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h
index 2270ac58d746..862ac813f6b8 100644
--- a/fs/btrfs/tree-log.h
+++ b/fs/btrfs/tree-log.h
@@ -38,7 +38,7 @@ int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
38 struct btrfs_root *root, 38 struct btrfs_root *root,
39 const char *name, int name_len, 39 const char *name, int name_len,
40 struct inode *inode, u64 dirid); 40 struct inode *inode, u64 dirid);
41int btrfs_end_log_trans(struct btrfs_root *root); 41void btrfs_end_log_trans(struct btrfs_root *root);
42int btrfs_pin_log_trans(struct btrfs_root *root); 42int btrfs_pin_log_trans(struct btrfs_root *root);
43int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, 43int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
44 struct btrfs_root *root, struct inode *inode, 44 struct btrfs_root *root, struct inode *inode,
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index ef41f285a475..a872b48be0ae 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -67,7 +67,7 @@ static void free_fs_devices(struct btrfs_fs_devices *fs_devices)
67 kfree(fs_devices); 67 kfree(fs_devices);
68} 68}
69 69
70int btrfs_cleanup_fs_uuids(void) 70void btrfs_cleanup_fs_uuids(void)
71{ 71{
72 struct btrfs_fs_devices *fs_devices; 72 struct btrfs_fs_devices *fs_devices;
73 73
@@ -77,7 +77,6 @@ int btrfs_cleanup_fs_uuids(void)
77 list_del(&fs_devices->list); 77 list_del(&fs_devices->list);
78 free_fs_devices(fs_devices); 78 free_fs_devices(fs_devices);
79 } 79 }
80 return 0;
81} 80}
82 81
83static noinline struct btrfs_device *__find_device(struct list_head *head, 82static noinline struct btrfs_device *__find_device(struct list_head *head,
@@ -130,7 +129,7 @@ static void requeue_list(struct btrfs_pending_bios *pending_bios,
130 * the list if the block device is congested. This way, multiple devices 129 * the list if the block device is congested. This way, multiple devices
131 * can make progress from a single worker thread. 130 * can make progress from a single worker thread.
132 */ 131 */
133static noinline int run_scheduled_bios(struct btrfs_device *device) 132static noinline void run_scheduled_bios(struct btrfs_device *device)
134{ 133{
135 struct bio *pending; 134 struct bio *pending;
136 struct backing_dev_info *bdi; 135 struct backing_dev_info *bdi;
@@ -316,7 +315,6 @@ loop_lock:
316 315
317done: 316done:
318 blk_finish_plug(&plug); 317 blk_finish_plug(&plug);
319 return 0;
320} 318}
321 319
322static void pending_bios_fn(struct btrfs_work *work) 320static void pending_bios_fn(struct btrfs_work *work)
@@ -455,7 +453,7 @@ error:
455 return ERR_PTR(-ENOMEM); 453 return ERR_PTR(-ENOMEM);
456} 454}
457 455
458int btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices) 456void btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices)
459{ 457{
460 struct btrfs_device *device, *next; 458 struct btrfs_device *device, *next;
461 459
@@ -503,7 +501,6 @@ again:
503 fs_devices->latest_trans = latest_transid; 501 fs_devices->latest_trans = latest_transid;
504 502
505 mutex_unlock(&uuid_mutex); 503 mutex_unlock(&uuid_mutex);
506 return 0;
507} 504}
508 505
509static void __free_device(struct work_struct *work) 506static void __free_device(struct work_struct *work)
@@ -552,10 +549,10 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
552 fs_devices->num_can_discard--; 549 fs_devices->num_can_discard--;
553 550
554 new_device = kmalloc(sizeof(*new_device), GFP_NOFS); 551 new_device = kmalloc(sizeof(*new_device), GFP_NOFS);
555 BUG_ON(!new_device); 552 BUG_ON(!new_device); /* -ENOMEM */
556 memcpy(new_device, device, sizeof(*new_device)); 553 memcpy(new_device, device, sizeof(*new_device));
557 new_device->name = kstrdup(device->name, GFP_NOFS); 554 new_device->name = kstrdup(device->name, GFP_NOFS);
558 BUG_ON(device->name && !new_device->name); 555 BUG_ON(device->name && !new_device->name); /* -ENOMEM */
559 new_device->bdev = NULL; 556 new_device->bdev = NULL;
560 new_device->writeable = 0; 557 new_device->writeable = 0;
561 new_device->in_fs_metadata = 0; 558 new_device->in_fs_metadata = 0;
@@ -625,6 +622,8 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
625 printk(KERN_INFO "open %s failed\n", device->name); 622 printk(KERN_INFO "open %s failed\n", device->name);
626 goto error; 623 goto error;
627 } 624 }
625 filemap_write_and_wait(bdev->bd_inode->i_mapping);
626 invalidate_bdev(bdev);
628 set_blocksize(bdev, 4096); 627 set_blocksize(bdev, 4096);
629 628
630 bh = btrfs_read_dev_super(bdev); 629 bh = btrfs_read_dev_super(bdev);
@@ -1039,8 +1038,10 @@ again:
1039 leaf = path->nodes[0]; 1038 leaf = path->nodes[0];
1040 extent = btrfs_item_ptr(leaf, path->slots[0], 1039 extent = btrfs_item_ptr(leaf, path->slots[0],
1041 struct btrfs_dev_extent); 1040 struct btrfs_dev_extent);
1041 } else {
1042 btrfs_error(root->fs_info, ret, "Slot search failed");
1043 goto out;
1042 } 1044 }
1043 BUG_ON(ret);
1044 1045
1045 if (device->bytes_used > 0) { 1046 if (device->bytes_used > 0) {
1046 u64 len = btrfs_dev_extent_length(leaf, extent); 1047 u64 len = btrfs_dev_extent_length(leaf, extent);
@@ -1050,7 +1051,10 @@ again:
1050 spin_unlock(&root->fs_info->free_chunk_lock); 1051 spin_unlock(&root->fs_info->free_chunk_lock);
1051 } 1052 }
1052 ret = btrfs_del_item(trans, root, path); 1053 ret = btrfs_del_item(trans, root, path);
1053 1054 if (ret) {
1055 btrfs_error(root->fs_info, ret,
1056 "Failed to remove dev extent item");
1057 }
1054out: 1058out:
1055 btrfs_free_path(path); 1059 btrfs_free_path(path);
1056 return ret; 1060 return ret;
@@ -1078,7 +1082,8 @@ int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
1078 key.type = BTRFS_DEV_EXTENT_KEY; 1082 key.type = BTRFS_DEV_EXTENT_KEY;
1079 ret = btrfs_insert_empty_item(trans, root, path, &key, 1083 ret = btrfs_insert_empty_item(trans, root, path, &key,
1080 sizeof(*extent)); 1084 sizeof(*extent));
1081 BUG_ON(ret); 1085 if (ret)
1086 goto out;
1082 1087
1083 leaf = path->nodes[0]; 1088 leaf = path->nodes[0];
1084 extent = btrfs_item_ptr(leaf, path->slots[0], 1089 extent = btrfs_item_ptr(leaf, path->slots[0],
@@ -1093,6 +1098,7 @@ int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
1093 1098
1094 btrfs_set_dev_extent_length(leaf, extent, num_bytes); 1099 btrfs_set_dev_extent_length(leaf, extent, num_bytes);
1095 btrfs_mark_buffer_dirty(leaf); 1100 btrfs_mark_buffer_dirty(leaf);
1101out:
1096 btrfs_free_path(path); 1102 btrfs_free_path(path);
1097 return ret; 1103 return ret;
1098} 1104}
@@ -1118,7 +1124,7 @@ static noinline int find_next_chunk(struct btrfs_root *root,
1118 if (ret < 0) 1124 if (ret < 0)
1119 goto error; 1125 goto error;
1120 1126
1121 BUG_ON(ret == 0); 1127 BUG_ON(ret == 0); /* Corruption */
1122 1128
1123 ret = btrfs_previous_item(root, path, 0, BTRFS_CHUNK_ITEM_KEY); 1129 ret = btrfs_previous_item(root, path, 0, BTRFS_CHUNK_ITEM_KEY);
1124 if (ret) { 1130 if (ret) {
@@ -1162,7 +1168,7 @@ static noinline int find_next_devid(struct btrfs_root *root, u64 *objectid)
1162 if (ret < 0) 1168 if (ret < 0)
1163 goto error; 1169 goto error;
1164 1170
1165 BUG_ON(ret == 0); 1171 BUG_ON(ret == 0); /* Corruption */
1166 1172
1167 ret = btrfs_previous_item(root, path, BTRFS_DEV_ITEMS_OBJECTID, 1173 ret = btrfs_previous_item(root, path, BTRFS_DEV_ITEMS_OBJECTID,
1168 BTRFS_DEV_ITEM_KEY); 1174 BTRFS_DEV_ITEM_KEY);
@@ -1350,6 +1356,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1350 } 1356 }
1351 1357
1352 set_blocksize(bdev, 4096); 1358 set_blocksize(bdev, 4096);
1359 invalidate_bdev(bdev);
1353 bh = btrfs_read_dev_super(bdev); 1360 bh = btrfs_read_dev_super(bdev);
1354 if (!bh) { 1361 if (!bh) {
1355 ret = -EINVAL; 1362 ret = -EINVAL;
@@ -1596,7 +1603,7 @@ next_slot:
1596 (unsigned long)btrfs_device_fsid(dev_item), 1603 (unsigned long)btrfs_device_fsid(dev_item),
1597 BTRFS_UUID_SIZE); 1604 BTRFS_UUID_SIZE);
1598 device = btrfs_find_device(root, devid, dev_uuid, fs_uuid); 1605 device = btrfs_find_device(root, devid, dev_uuid, fs_uuid);
1599 BUG_ON(!device); 1606 BUG_ON(!device); /* Logic error */
1600 1607
1601 if (device->fs_devices->seeding) { 1608 if (device->fs_devices->seeding) {
1602 btrfs_set_device_generation(leaf, dev_item, 1609 btrfs_set_device_generation(leaf, dev_item,
@@ -1706,7 +1713,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1706 if (seeding_dev) { 1713 if (seeding_dev) {
1707 sb->s_flags &= ~MS_RDONLY; 1714 sb->s_flags &= ~MS_RDONLY;
1708 ret = btrfs_prepare_sprout(root); 1715 ret = btrfs_prepare_sprout(root);
1709 BUG_ON(ret); 1716 BUG_ON(ret); /* -ENOMEM */
1710 } 1717 }
1711 1718
1712 device->fs_devices = root->fs_info->fs_devices; 1719 device->fs_devices = root->fs_info->fs_devices;
@@ -1744,11 +1751,15 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1744 1751
1745 if (seeding_dev) { 1752 if (seeding_dev) {
1746 ret = init_first_rw_device(trans, root, device); 1753 ret = init_first_rw_device(trans, root, device);
1747 BUG_ON(ret); 1754 if (ret)
1755 goto error_trans;
1748 ret = btrfs_finish_sprout(trans, root); 1756 ret = btrfs_finish_sprout(trans, root);
1749 BUG_ON(ret); 1757 if (ret)
1758 goto error_trans;
1750 } else { 1759 } else {
1751 ret = btrfs_add_device(trans, root, device); 1760 ret = btrfs_add_device(trans, root, device);
1761 if (ret)
1762 goto error_trans;
1752 } 1763 }
1753 1764
1754 /* 1765 /*
@@ -1758,17 +1769,31 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1758 btrfs_clear_space_info_full(root->fs_info); 1769 btrfs_clear_space_info_full(root->fs_info);
1759 1770
1760 unlock_chunks(root); 1771 unlock_chunks(root);
1761 btrfs_commit_transaction(trans, root); 1772 ret = btrfs_commit_transaction(trans, root);
1762 1773
1763 if (seeding_dev) { 1774 if (seeding_dev) {
1764 mutex_unlock(&uuid_mutex); 1775 mutex_unlock(&uuid_mutex);
1765 up_write(&sb->s_umount); 1776 up_write(&sb->s_umount);
1766 1777
1778 if (ret) /* transaction commit */
1779 return ret;
1780
1767 ret = btrfs_relocate_sys_chunks(root); 1781 ret = btrfs_relocate_sys_chunks(root);
1768 BUG_ON(ret); 1782 if (ret < 0)
1783 btrfs_error(root->fs_info, ret,
1784 "Failed to relocate sys chunks after "
1785 "device initialization. This can be fixed "
1786 "using the \"btrfs balance\" command.");
1769 } 1787 }
1770 1788
1771 return ret; 1789 return ret;
1790
1791error_trans:
1792 unlock_chunks(root);
1793 btrfs_abort_transaction(trans, root, ret);
1794 btrfs_end_transaction(trans, root);
1795 kfree(device->name);
1796 kfree(device);
1772error: 1797error:
1773 blkdev_put(bdev, FMODE_EXCL); 1798 blkdev_put(bdev, FMODE_EXCL);
1774 if (seeding_dev) { 1799 if (seeding_dev) {
@@ -1876,10 +1901,20 @@ static int btrfs_free_chunk(struct btrfs_trans_handle *trans,
1876 key.type = BTRFS_CHUNK_ITEM_KEY; 1901 key.type = BTRFS_CHUNK_ITEM_KEY;
1877 1902
1878 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 1903 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1879 BUG_ON(ret); 1904 if (ret < 0)
1905 goto out;
1906 else if (ret > 0) { /* Logic error or corruption */
1907 btrfs_error(root->fs_info, -ENOENT,
1908 "Failed lookup while freeing chunk.");
1909 ret = -ENOENT;
1910 goto out;
1911 }
1880 1912
1881 ret = btrfs_del_item(trans, root, path); 1913 ret = btrfs_del_item(trans, root, path);
1882 1914 if (ret < 0)
1915 btrfs_error(root->fs_info, ret,
1916 "Failed to delete chunk item.");
1917out:
1883 btrfs_free_path(path); 1918 btrfs_free_path(path);
1884 return ret; 1919 return ret;
1885} 1920}
@@ -2041,7 +2076,7 @@ again:
2041 ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0); 2076 ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0);
2042 if (ret < 0) 2077 if (ret < 0)
2043 goto error; 2078 goto error;
2044 BUG_ON(ret == 0); 2079 BUG_ON(ret == 0); /* Corruption */
2045 2080
2046 ret = btrfs_previous_item(chunk_root, path, key.objectid, 2081 ret = btrfs_previous_item(chunk_root, path, key.objectid,
2047 key.type); 2082 key.type);
@@ -2250,15 +2285,13 @@ static void unset_balance_control(struct btrfs_fs_info *fs_info)
2250 * Balance filters. Return 1 if chunk should be filtered out 2285 * Balance filters. Return 1 if chunk should be filtered out
2251 * (should not be balanced). 2286 * (should not be balanced).
2252 */ 2287 */
2253static int chunk_profiles_filter(u64 chunk_profile, 2288static int chunk_profiles_filter(u64 chunk_type,
2254 struct btrfs_balance_args *bargs) 2289 struct btrfs_balance_args *bargs)
2255{ 2290{
2256 chunk_profile &= BTRFS_BLOCK_GROUP_PROFILE_MASK; 2291 chunk_type = chunk_to_extended(chunk_type) &
2257 2292 BTRFS_EXTENDED_PROFILE_MASK;
2258 if (chunk_profile == 0)
2259 chunk_profile = BTRFS_AVAIL_ALLOC_BIT_SINGLE;
2260 2293
2261 if (bargs->profiles & chunk_profile) 2294 if (bargs->profiles & chunk_type)
2262 return 0; 2295 return 0;
2263 2296
2264 return 1; 2297 return 1;
@@ -2365,18 +2398,16 @@ static int chunk_vrange_filter(struct extent_buffer *leaf,
2365 return 1; 2398 return 1;
2366} 2399}
2367 2400
2368static int chunk_soft_convert_filter(u64 chunk_profile, 2401static int chunk_soft_convert_filter(u64 chunk_type,
2369 struct btrfs_balance_args *bargs) 2402 struct btrfs_balance_args *bargs)
2370{ 2403{
2371 if (!(bargs->flags & BTRFS_BALANCE_ARGS_CONVERT)) 2404 if (!(bargs->flags & BTRFS_BALANCE_ARGS_CONVERT))
2372 return 0; 2405 return 0;
2373 2406
2374 chunk_profile &= BTRFS_BLOCK_GROUP_PROFILE_MASK; 2407 chunk_type = chunk_to_extended(chunk_type) &
2408 BTRFS_EXTENDED_PROFILE_MASK;
2375 2409
2376 if (chunk_profile == 0) 2410 if (bargs->target == chunk_type)
2377 chunk_profile = BTRFS_AVAIL_ALLOC_BIT_SINGLE;
2378
2379 if (bargs->target & chunk_profile)
2380 return 1; 2411 return 1;
2381 2412
2382 return 0; 2413 return 0;
@@ -2602,6 +2633,30 @@ error:
2602 return ret; 2633 return ret;
2603} 2634}
2604 2635
2636/**
2637 * alloc_profile_is_valid - see if a given profile is valid and reduced
2638 * @flags: profile to validate
2639 * @extended: if true @flags is treated as an extended profile
2640 */
2641static int alloc_profile_is_valid(u64 flags, int extended)
2642{
2643 u64 mask = (extended ? BTRFS_EXTENDED_PROFILE_MASK :
2644 BTRFS_BLOCK_GROUP_PROFILE_MASK);
2645
2646 flags &= ~BTRFS_BLOCK_GROUP_TYPE_MASK;
2647
2648 /* 1) check that all other bits are zeroed */
2649 if (flags & ~mask)
2650 return 0;
2651
2652 /* 2) see if profile is reduced */
2653 if (flags == 0)
2654 return !extended; /* "0" is valid for usual profiles */
2655
2656 /* true if exactly one bit set */
2657 return (flags & (flags - 1)) == 0;
2658}
2659
2605static inline int balance_need_close(struct btrfs_fs_info *fs_info) 2660static inline int balance_need_close(struct btrfs_fs_info *fs_info)
2606{ 2661{
2607 /* cancel requested || normal exit path */ 2662 /* cancel requested || normal exit path */
@@ -2630,6 +2685,7 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
2630{ 2685{
2631 struct btrfs_fs_info *fs_info = bctl->fs_info; 2686 struct btrfs_fs_info *fs_info = bctl->fs_info;
2632 u64 allowed; 2687 u64 allowed;
2688 int mixed = 0;
2633 int ret; 2689 int ret;
2634 2690
2635 if (btrfs_fs_closing(fs_info) || 2691 if (btrfs_fs_closing(fs_info) ||
@@ -2639,13 +2695,16 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
2639 goto out; 2695 goto out;
2640 } 2696 }
2641 2697
2698 allowed = btrfs_super_incompat_flags(fs_info->super_copy);
2699 if (allowed & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
2700 mixed = 1;
2701
2642 /* 2702 /*
2643 * In case of mixed groups both data and meta should be picked, 2703 * In case of mixed groups both data and meta should be picked,
2644 * and identical options should be given for both of them. 2704 * and identical options should be given for both of them.
2645 */ 2705 */
2646 allowed = btrfs_super_incompat_flags(fs_info->super_copy); 2706 allowed = BTRFS_BALANCE_DATA | BTRFS_BALANCE_METADATA;
2647 if ((allowed & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) && 2707 if (mixed && (bctl->flags & allowed)) {
2648 (bctl->flags & (BTRFS_BALANCE_DATA | BTRFS_BALANCE_METADATA))) {
2649 if (!(bctl->flags & BTRFS_BALANCE_DATA) || 2708 if (!(bctl->flags & BTRFS_BALANCE_DATA) ||
2650 !(bctl->flags & BTRFS_BALANCE_METADATA) || 2709 !(bctl->flags & BTRFS_BALANCE_METADATA) ||
2651 memcmp(&bctl->data, &bctl->meta, sizeof(bctl->data))) { 2710 memcmp(&bctl->data, &bctl->meta, sizeof(bctl->data))) {
@@ -2656,14 +2715,6 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
2656 } 2715 }
2657 } 2716 }
2658 2717
2659 /*
2660 * Profile changing sanity checks. Skip them if a simple
2661 * balance is requested.
2662 */
2663 if (!((bctl->data.flags | bctl->sys.flags | bctl->meta.flags) &
2664 BTRFS_BALANCE_ARGS_CONVERT))
2665 goto do_balance;
2666
2667 allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE; 2718 allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE;
2668 if (fs_info->fs_devices->num_devices == 1) 2719 if (fs_info->fs_devices->num_devices == 1)
2669 allowed |= BTRFS_BLOCK_GROUP_DUP; 2720 allowed |= BTRFS_BLOCK_GROUP_DUP;
@@ -2673,24 +2724,27 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
2673 allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 | 2724 allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 |
2674 BTRFS_BLOCK_GROUP_RAID10); 2725 BTRFS_BLOCK_GROUP_RAID10);
2675 2726
2676 if (!profile_is_valid(bctl->data.target, 1) || 2727 if ((bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
2677 bctl->data.target & ~allowed) { 2728 (!alloc_profile_is_valid(bctl->data.target, 1) ||
2729 (bctl->data.target & ~allowed))) {
2678 printk(KERN_ERR "btrfs: unable to start balance with target " 2730 printk(KERN_ERR "btrfs: unable to start balance with target "
2679 "data profile %llu\n", 2731 "data profile %llu\n",
2680 (unsigned long long)bctl->data.target); 2732 (unsigned long long)bctl->data.target);
2681 ret = -EINVAL; 2733 ret = -EINVAL;
2682 goto out; 2734 goto out;
2683 } 2735 }
2684 if (!profile_is_valid(bctl->meta.target, 1) || 2736 if ((bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
2685 bctl->meta.target & ~allowed) { 2737 (!alloc_profile_is_valid(bctl->meta.target, 1) ||
2738 (bctl->meta.target & ~allowed))) {
2686 printk(KERN_ERR "btrfs: unable to start balance with target " 2739 printk(KERN_ERR "btrfs: unable to start balance with target "
2687 "metadata profile %llu\n", 2740 "metadata profile %llu\n",
2688 (unsigned long long)bctl->meta.target); 2741 (unsigned long long)bctl->meta.target);
2689 ret = -EINVAL; 2742 ret = -EINVAL;
2690 goto out; 2743 goto out;
2691 } 2744 }
2692 if (!profile_is_valid(bctl->sys.target, 1) || 2745 if ((bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
2693 bctl->sys.target & ~allowed) { 2746 (!alloc_profile_is_valid(bctl->sys.target, 1) ||
2747 (bctl->sys.target & ~allowed))) {
2694 printk(KERN_ERR "btrfs: unable to start balance with target " 2748 printk(KERN_ERR "btrfs: unable to start balance with target "
2695 "system profile %llu\n", 2749 "system profile %llu\n",
2696 (unsigned long long)bctl->sys.target); 2750 (unsigned long long)bctl->sys.target);
@@ -2698,7 +2752,9 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
2698 goto out; 2752 goto out;
2699 } 2753 }
2700 2754
2701 if (bctl->data.target & BTRFS_BLOCK_GROUP_DUP) { 2755 /* allow dup'ed data chunks only in mixed mode */
2756 if (!mixed && (bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
2757 (bctl->data.target & BTRFS_BLOCK_GROUP_DUP)) {
2702 printk(KERN_ERR "btrfs: dup for data is not allowed\n"); 2758 printk(KERN_ERR "btrfs: dup for data is not allowed\n");
2703 ret = -EINVAL; 2759 ret = -EINVAL;
2704 goto out; 2760 goto out;
@@ -2724,7 +2780,6 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
2724 } 2780 }
2725 } 2781 }
2726 2782
2727do_balance:
2728 ret = insert_balance_item(fs_info->tree_root, bctl); 2783 ret = insert_balance_item(fs_info->tree_root, bctl);
2729 if (ret && ret != -EEXIST) 2784 if (ret && ret != -EEXIST)
2730 goto out; 2785 goto out;
@@ -2967,7 +3022,7 @@ again:
2967 key.offset = (u64)-1; 3022 key.offset = (u64)-1;
2968 key.type = BTRFS_DEV_EXTENT_KEY; 3023 key.type = BTRFS_DEV_EXTENT_KEY;
2969 3024
2970 while (1) { 3025 do {
2971 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 3026 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2972 if (ret < 0) 3027 if (ret < 0)
2973 goto done; 3028 goto done;
@@ -3009,8 +3064,7 @@ again:
3009 goto done; 3064 goto done;
3010 if (ret == -ENOSPC) 3065 if (ret == -ENOSPC)
3011 failed++; 3066 failed++;
3012 key.offset -= 1; 3067 } while (key.offset-- > 0);
3013 }
3014 3068
3015 if (failed && !retried) { 3069 if (failed && !retried) {
3016 failed = 0; 3070 failed = 0;
@@ -3128,11 +3182,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
3128 int i; 3182 int i;
3129 int j; 3183 int j;
3130 3184
3131 if ((type & BTRFS_BLOCK_GROUP_RAID1) && 3185 BUG_ON(!alloc_profile_is_valid(type, 0));
3132 (type & BTRFS_BLOCK_GROUP_DUP)) {
3133 WARN_ON(1);
3134 type &= ~BTRFS_BLOCK_GROUP_DUP;
3135 }
3136 3186
3137 if (list_empty(&fs_devices->alloc_list)) 3187 if (list_empty(&fs_devices->alloc_list))
3138 return -ENOSPC; 3188 return -ENOSPC;
@@ -3328,13 +3378,15 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
3328 write_lock(&em_tree->lock); 3378 write_lock(&em_tree->lock);
3329 ret = add_extent_mapping(em_tree, em); 3379 ret = add_extent_mapping(em_tree, em);
3330 write_unlock(&em_tree->lock); 3380 write_unlock(&em_tree->lock);
3331 BUG_ON(ret);
3332 free_extent_map(em); 3381 free_extent_map(em);
3382 if (ret)
3383 goto error;
3333 3384
3334 ret = btrfs_make_block_group(trans, extent_root, 0, type, 3385 ret = btrfs_make_block_group(trans, extent_root, 0, type,
3335 BTRFS_FIRST_CHUNK_TREE_OBJECTID, 3386 BTRFS_FIRST_CHUNK_TREE_OBJECTID,
3336 start, num_bytes); 3387 start, num_bytes);
3337 BUG_ON(ret); 3388 if (ret)
3389 goto error;
3338 3390
3339 for (i = 0; i < map->num_stripes; ++i) { 3391 for (i = 0; i < map->num_stripes; ++i) {
3340 struct btrfs_device *device; 3392 struct btrfs_device *device;
@@ -3347,7 +3399,10 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
3347 info->chunk_root->root_key.objectid, 3399 info->chunk_root->root_key.objectid,
3348 BTRFS_FIRST_CHUNK_TREE_OBJECTID, 3400 BTRFS_FIRST_CHUNK_TREE_OBJECTID,
3349 start, dev_offset, stripe_size); 3401 start, dev_offset, stripe_size);
3350 BUG_ON(ret); 3402 if (ret) {
3403 btrfs_abort_transaction(trans, extent_root, ret);
3404 goto error;
3405 }
3351 } 3406 }
3352 3407
3353 kfree(devices_info); 3408 kfree(devices_info);
@@ -3383,7 +3438,8 @@ static int __finish_chunk_alloc(struct btrfs_trans_handle *trans,
3383 device = map->stripes[index].dev; 3438 device = map->stripes[index].dev;
3384 device->bytes_used += stripe_size; 3439 device->bytes_used += stripe_size;
3385 ret = btrfs_update_device(trans, device); 3440 ret = btrfs_update_device(trans, device);
3386 BUG_ON(ret); 3441 if (ret)
3442 goto out_free;
3387 index++; 3443 index++;
3388 } 3444 }
3389 3445
@@ -3420,16 +3476,19 @@ static int __finish_chunk_alloc(struct btrfs_trans_handle *trans,
3420 key.offset = chunk_offset; 3476 key.offset = chunk_offset;
3421 3477
3422 ret = btrfs_insert_item(trans, chunk_root, &key, chunk, item_size); 3478 ret = btrfs_insert_item(trans, chunk_root, &key, chunk, item_size);
3423 BUG_ON(ret);
3424 3479
3425 if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) { 3480 if (ret == 0 && map->type & BTRFS_BLOCK_GROUP_SYSTEM) {
3481 /*
3482 * TODO: Cleanup of inserted chunk root in case of
3483 * failure.
3484 */
3426 ret = btrfs_add_system_chunk(chunk_root, &key, chunk, 3485 ret = btrfs_add_system_chunk(chunk_root, &key, chunk,
3427 item_size); 3486 item_size);
3428 BUG_ON(ret);
3429 } 3487 }
3430 3488
3489out_free:
3431 kfree(chunk); 3490 kfree(chunk);
3432 return 0; 3491 return ret;
3433} 3492}
3434 3493
3435/* 3494/*
@@ -3461,7 +3520,8 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
3461 3520
3462 ret = __finish_chunk_alloc(trans, extent_root, map, chunk_offset, 3521 ret = __finish_chunk_alloc(trans, extent_root, map, chunk_offset,
3463 chunk_size, stripe_size); 3522 chunk_size, stripe_size);
3464 BUG_ON(ret); 3523 if (ret)
3524 return ret;
3465 return 0; 3525 return 0;
3466} 3526}
3467 3527
@@ -3493,7 +3553,8 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans,
3493 3553
3494 ret = __btrfs_alloc_chunk(trans, extent_root, &map, &chunk_size, 3554 ret = __btrfs_alloc_chunk(trans, extent_root, &map, &chunk_size,
3495 &stripe_size, chunk_offset, alloc_profile); 3555 &stripe_size, chunk_offset, alloc_profile);
3496 BUG_ON(ret); 3556 if (ret)
3557 return ret;
3497 3558
3498 sys_chunk_offset = chunk_offset + chunk_size; 3559 sys_chunk_offset = chunk_offset + chunk_size;
3499 3560
@@ -3504,10 +3565,12 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans,
3504 ret = __btrfs_alloc_chunk(trans, extent_root, &sys_map, 3565 ret = __btrfs_alloc_chunk(trans, extent_root, &sys_map,
3505 &sys_chunk_size, &sys_stripe_size, 3566 &sys_chunk_size, &sys_stripe_size,
3506 sys_chunk_offset, alloc_profile); 3567 sys_chunk_offset, alloc_profile);
3507 BUG_ON(ret); 3568 if (ret)
3569 goto abort;
3508 3570
3509 ret = btrfs_add_device(trans, fs_info->chunk_root, device); 3571 ret = btrfs_add_device(trans, fs_info->chunk_root, device);
3510 BUG_ON(ret); 3572 if (ret)
3573 goto abort;
3511 3574
3512 /* 3575 /*
3513 * Modifying chunk tree needs allocating new blocks from both 3576 * Modifying chunk tree needs allocating new blocks from both
@@ -3517,13 +3580,20 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans,
3517 */ 3580 */
3518 ret = __finish_chunk_alloc(trans, extent_root, map, chunk_offset, 3581 ret = __finish_chunk_alloc(trans, extent_root, map, chunk_offset,
3519 chunk_size, stripe_size); 3582 chunk_size, stripe_size);
3520 BUG_ON(ret); 3583 if (ret)
3584 goto abort;
3521 3585
3522 ret = __finish_chunk_alloc(trans, extent_root, sys_map, 3586 ret = __finish_chunk_alloc(trans, extent_root, sys_map,
3523 sys_chunk_offset, sys_chunk_size, 3587 sys_chunk_offset, sys_chunk_size,
3524 sys_stripe_size); 3588 sys_stripe_size);
3525 BUG_ON(ret); 3589 if (ret)
3590 goto abort;
3591
3526 return 0; 3592 return 0;
3593
3594abort:
3595 btrfs_abort_transaction(trans, root, ret);
3596 return ret;
3527} 3597}
3528 3598
3529int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset) 3599int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset)
@@ -3874,7 +3944,7 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
3874 do_div(length, map->num_stripes); 3944 do_div(length, map->num_stripes);
3875 3945
3876 buf = kzalloc(sizeof(u64) * map->num_stripes, GFP_NOFS); 3946 buf = kzalloc(sizeof(u64) * map->num_stripes, GFP_NOFS);
3877 BUG_ON(!buf); 3947 BUG_ON(!buf); /* -ENOMEM */
3878 3948
3879 for (i = 0; i < map->num_stripes; i++) { 3949 for (i = 0; i < map->num_stripes; i++) {
3880 if (devid && map->stripes[i].dev->devid != devid) 3950 if (devid && map->stripes[i].dev->devid != devid)
@@ -3967,7 +4037,7 @@ struct async_sched {
3967 * This will add one bio to the pending list for a device and make sure 4037 * This will add one bio to the pending list for a device and make sure
3968 * the work struct is scheduled. 4038 * the work struct is scheduled.
3969 */ 4039 */
3970static noinline int schedule_bio(struct btrfs_root *root, 4040static noinline void schedule_bio(struct btrfs_root *root,
3971 struct btrfs_device *device, 4041 struct btrfs_device *device,
3972 int rw, struct bio *bio) 4042 int rw, struct bio *bio)
3973{ 4043{
@@ -3979,7 +4049,7 @@ static noinline int schedule_bio(struct btrfs_root *root,
3979 bio_get(bio); 4049 bio_get(bio);
3980 btrfsic_submit_bio(rw, bio); 4050 btrfsic_submit_bio(rw, bio);
3981 bio_put(bio); 4051 bio_put(bio);
3982 return 0; 4052 return;
3983 } 4053 }
3984 4054
3985 /* 4055 /*
@@ -4013,7 +4083,6 @@ static noinline int schedule_bio(struct btrfs_root *root,
4013 if (should_queue) 4083 if (should_queue)
4014 btrfs_queue_worker(&root->fs_info->submit_workers, 4084 btrfs_queue_worker(&root->fs_info->submit_workers,
4015 &device->work); 4085 &device->work);
4016 return 0;
4017} 4086}
4018 4087
4019int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, 4088int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
@@ -4036,7 +4105,8 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
4036 4105
4037 ret = btrfs_map_block(map_tree, rw, logical, &map_length, &bbio, 4106 ret = btrfs_map_block(map_tree, rw, logical, &map_length, &bbio,
4038 mirror_num); 4107 mirror_num);
4039 BUG_ON(ret); 4108 if (ret) /* -ENOMEM */
4109 return ret;
4040 4110
4041 total_devs = bbio->num_stripes; 4111 total_devs = bbio->num_stripes;
4042 if (map_length < length) { 4112 if (map_length < length) {
@@ -4055,7 +4125,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
4055 while (dev_nr < total_devs) { 4125 while (dev_nr < total_devs) {
4056 if (dev_nr < total_devs - 1) { 4126 if (dev_nr < total_devs - 1) {
4057 bio = bio_clone(first_bio, GFP_NOFS); 4127 bio = bio_clone(first_bio, GFP_NOFS);
4058 BUG_ON(!bio); 4128 BUG_ON(!bio); /* -ENOMEM */
4059 } else { 4129 } else {
4060 bio = first_bio; 4130 bio = first_bio;
4061 } 4131 }
@@ -4209,13 +4279,13 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
4209 write_lock(&map_tree->map_tree.lock); 4279 write_lock(&map_tree->map_tree.lock);
4210 ret = add_extent_mapping(&map_tree->map_tree, em); 4280 ret = add_extent_mapping(&map_tree->map_tree, em);
4211 write_unlock(&map_tree->map_tree.lock); 4281 write_unlock(&map_tree->map_tree.lock);
4212 BUG_ON(ret); 4282 BUG_ON(ret); /* Tree corruption */
4213 free_extent_map(em); 4283 free_extent_map(em);
4214 4284
4215 return 0; 4285 return 0;
4216} 4286}
4217 4287
4218static int fill_device_from_item(struct extent_buffer *leaf, 4288static void fill_device_from_item(struct extent_buffer *leaf,
4219 struct btrfs_dev_item *dev_item, 4289 struct btrfs_dev_item *dev_item,
4220 struct btrfs_device *device) 4290 struct btrfs_device *device)
4221{ 4291{
@@ -4232,8 +4302,6 @@ static int fill_device_from_item(struct extent_buffer *leaf,
4232 4302
4233 ptr = (unsigned long)btrfs_device_uuid(dev_item); 4303 ptr = (unsigned long)btrfs_device_uuid(dev_item);
4234 read_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE); 4304 read_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
4235
4236 return 0;
4237} 4305}
4238 4306
4239static int open_seed_devices(struct btrfs_root *root, u8 *fsid) 4307static int open_seed_devices(struct btrfs_root *root, u8 *fsid)
@@ -4384,7 +4452,7 @@ int btrfs_read_sys_array(struct btrfs_root *root)
4384 * to silence the warning eg. on PowerPC 64. 4452 * to silence the warning eg. on PowerPC 64.
4385 */ 4453 */
4386 if (PAGE_CACHE_SIZE > BTRFS_SUPER_INFO_SIZE) 4454 if (PAGE_CACHE_SIZE > BTRFS_SUPER_INFO_SIZE)
4387 SetPageUptodate(sb->first_page); 4455 SetPageUptodate(sb->pages[0]);
4388 4456
4389 write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE); 4457 write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE);
4390 array_size = btrfs_super_sys_array_size(super_copy); 4458 array_size = btrfs_super_sys_array_size(super_copy);
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 19ac95048b88..bb6b03f97aaa 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -260,12 +260,12 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
260int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, 260int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
261 struct btrfs_fs_devices **fs_devices_ret); 261 struct btrfs_fs_devices **fs_devices_ret);
262int btrfs_close_devices(struct btrfs_fs_devices *fs_devices); 262int btrfs_close_devices(struct btrfs_fs_devices *fs_devices);
263int btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices); 263void btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices);
264int btrfs_add_device(struct btrfs_trans_handle *trans, 264int btrfs_add_device(struct btrfs_trans_handle *trans,
265 struct btrfs_root *root, 265 struct btrfs_root *root,
266 struct btrfs_device *device); 266 struct btrfs_device *device);
267int btrfs_rm_device(struct btrfs_root *root, char *device_path); 267int btrfs_rm_device(struct btrfs_root *root, char *device_path);
268int btrfs_cleanup_fs_uuids(void); 268void btrfs_cleanup_fs_uuids(void);
269int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len); 269int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len);
270int btrfs_grow_device(struct btrfs_trans_handle *trans, 270int btrfs_grow_device(struct btrfs_trans_handle *trans,
271 struct btrfs_device *device, u64 new_size); 271 struct btrfs_device *device, u64 new_size);