aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-03-30 15:44:29 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-03-30 15:44:29 -0400
commit9613bebb223dea3179c265dc31e1bb41ae39f321 (patch)
tree39bf883573d23775a53be3172323c0237fef5630
parent40380f1c7841a5dcbf0b20f0b6da11969211ef77 (diff)
parentbc3f116fec194f1d7329b160c266fe16b9266a1e (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
Pull btrfs fixes and features from Chris Mason: "We've merged in the error handling patches from SuSE. These are already shipping in the sles kernel, and they give btrfs the ability to abort transactions and go readonly on errors. It involves a lot of churn as they clarify BUG_ONs, and remove the ones we now properly deal with. Josef reworked the way our metadata interacts with the page cache. page->private now points to the btrfs extent_buffer object, which makes everything faster. He changed it so we write an whole extent buffer at a time instead of allowing individual pages to go down,, which will be important for the raid5/6 code (for the 3.5 merge window ;) Josef also made us more aggressive about dropping pages for metadata blocks that were freed due to COW. Overall, our metadata caching is much faster now. We've integrated my patch for metadata bigger than the page size. This allows metadata blocks up to 64KB in size. In practice 16K and 32K seem to work best. For workloads with lots of metadata, this cuts down the size of the extent allocation tree dramatically and fragments much less. Scrub was updated to support the larger block sizes, which ended up being a fairly large change (thanks Stefan Behrens). We also have an assortment of fixes and updates, especially to the balancing code (Ilya Dryomov), the back ref walker (Jan Schmidt) and the defragging code (Liu Bo)." Fixed up trivial conflicts in fs/btrfs/scrub.c that were just due to removal of the second argument to k[un]map_atomic() in commit 7ac687d9e047. * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (75 commits) Btrfs: update the checks for mixed block groups with big metadata blocks Btrfs: update to the right index of defragment Btrfs: do not bother to defrag an extent if it is a big real extent Btrfs: add a check to decide if we should defrag the range Btrfs: fix recursive defragment with autodefrag option Btrfs: fix the mismatch of page->mapping Btrfs: fix race between direct io and autodefrag Btrfs: fix deadlock during allocating chunks Btrfs: show useful info in space reservation tracepoint Btrfs: don't use crc items bigger than 4KB Btrfs: flush out and clean up any block device pages during mount btrfs: disallow unequal data/metadata blocksize for mixed block groups Btrfs: enhance superblock sanity checks Btrfs: change scrub to support big blocks Btrfs: minor cleanup in scrub Btrfs: introduce common define for max number of mirrors Btrfs: fix infinite loop in btrfs_shrink_device() Btrfs: fix memory leak in resolver code Btrfs: allow dup for data chunks in mixed mode Btrfs: validate target profiles only if we are going to use them ...
-rw-r--r--fs/btrfs/async-thread.c15
-rw-r--r--fs/btrfs/async-thread.h4
-rw-r--r--fs/btrfs/backref.c122
-rw-r--r--fs/btrfs/backref.h5
-rw-r--r--fs/btrfs/compression.c38
-rw-r--r--fs/btrfs/compression.h2
-rw-r--r--fs/btrfs/ctree.c384
-rw-r--r--fs/btrfs/ctree.h169
-rw-r--r--fs/btrfs/delayed-inode.c33
-rw-r--r--fs/btrfs/delayed-ref.c33
-rw-r--r--fs/btrfs/dir-item.c10
-rw-r--r--fs/btrfs/disk-io.c649
-rw-r--r--fs/btrfs/disk-io.h10
-rw-r--r--fs/btrfs/export.c2
-rw-r--r--fs/btrfs/extent-tree.c737
-rw-r--r--fs/btrfs/extent_io.c1035
-rw-r--r--fs/btrfs/extent_io.h62
-rw-r--r--fs/btrfs/file-item.c57
-rw-r--r--fs/btrfs/file.c52
-rw-r--r--fs/btrfs/free-space-cache.c15
-rw-r--r--fs/btrfs/inode-item.c6
-rw-r--r--fs/btrfs/inode-map.c25
-rw-r--r--fs/btrfs/inode.c457
-rw-r--r--fs/btrfs/ioctl.c194
-rw-r--r--fs/btrfs/locking.c6
-rw-r--r--fs/btrfs/locking.h4
-rw-r--r--fs/btrfs/ordered-data.c60
-rw-r--r--fs/btrfs/ordered-data.h24
-rw-r--r--fs/btrfs/orphan.c2
-rw-r--r--fs/btrfs/reada.c10
-rw-r--r--fs/btrfs/relocation.c130
-rw-r--r--fs/btrfs/root-tree.c25
-rw-r--r--fs/btrfs/scrub.c1407
-rw-r--r--fs/btrfs/struct-funcs.c53
-rw-r--r--fs/btrfs/super.c192
-rw-r--r--fs/btrfs/transaction.c213
-rw-r--r--fs/btrfs/transaction.h3
-rw-r--r--fs/btrfs/tree-log.c96
-rw-r--r--fs/btrfs/tree-log.h2
-rw-r--r--fs/btrfs/volumes.c240
-rw-r--r--fs/btrfs/volumes.h4
-rw-r--r--include/trace/events/btrfs.h44
42 files changed, 4406 insertions, 2225 deletions
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index 0cc20b35c1c4..42704149b723 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -171,11 +171,11 @@ out:
171 spin_unlock_irqrestore(&workers->lock, flags); 171 spin_unlock_irqrestore(&workers->lock, flags);
172} 172}
173 173
174static noinline int run_ordered_completions(struct btrfs_workers *workers, 174static noinline void run_ordered_completions(struct btrfs_workers *workers,
175 struct btrfs_work *work) 175 struct btrfs_work *work)
176{ 176{
177 if (!workers->ordered) 177 if (!workers->ordered)
178 return 0; 178 return;
179 179
180 set_bit(WORK_DONE_BIT, &work->flags); 180 set_bit(WORK_DONE_BIT, &work->flags);
181 181
@@ -213,7 +213,6 @@ static noinline int run_ordered_completions(struct btrfs_workers *workers,
213 } 213 }
214 214
215 spin_unlock(&workers->order_lock); 215 spin_unlock(&workers->order_lock);
216 return 0;
217} 216}
218 217
219static void put_worker(struct btrfs_worker_thread *worker) 218static void put_worker(struct btrfs_worker_thread *worker)
@@ -399,7 +398,7 @@ again:
399/* 398/*
400 * this will wait for all the worker threads to shutdown 399 * this will wait for all the worker threads to shutdown
401 */ 400 */
402int btrfs_stop_workers(struct btrfs_workers *workers) 401void btrfs_stop_workers(struct btrfs_workers *workers)
403{ 402{
404 struct list_head *cur; 403 struct list_head *cur;
405 struct btrfs_worker_thread *worker; 404 struct btrfs_worker_thread *worker;
@@ -427,7 +426,6 @@ int btrfs_stop_workers(struct btrfs_workers *workers)
427 put_worker(worker); 426 put_worker(worker);
428 } 427 }
429 spin_unlock_irq(&workers->lock); 428 spin_unlock_irq(&workers->lock);
430 return 0;
431} 429}
432 430
433/* 431/*
@@ -615,14 +613,14 @@ found:
615 * it was taken from. It is intended for use with long running work functions 613 * it was taken from. It is intended for use with long running work functions
616 * that make some progress and want to give the cpu up for others. 614 * that make some progress and want to give the cpu up for others.
617 */ 615 */
618int btrfs_requeue_work(struct btrfs_work *work) 616void btrfs_requeue_work(struct btrfs_work *work)
619{ 617{
620 struct btrfs_worker_thread *worker = work->worker; 618 struct btrfs_worker_thread *worker = work->worker;
621 unsigned long flags; 619 unsigned long flags;
622 int wake = 0; 620 int wake = 0;
623 621
624 if (test_and_set_bit(WORK_QUEUED_BIT, &work->flags)) 622 if (test_and_set_bit(WORK_QUEUED_BIT, &work->flags))
625 goto out; 623 return;
626 624
627 spin_lock_irqsave(&worker->lock, flags); 625 spin_lock_irqsave(&worker->lock, flags);
628 if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) 626 if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags))
@@ -649,9 +647,6 @@ int btrfs_requeue_work(struct btrfs_work *work)
649 if (wake) 647 if (wake)
650 wake_up_process(worker->task); 648 wake_up_process(worker->task);
651 spin_unlock_irqrestore(&worker->lock, flags); 649 spin_unlock_irqrestore(&worker->lock, flags);
652out:
653
654 return 0;
655} 650}
656 651
657void btrfs_set_work_high_prio(struct btrfs_work *work) 652void btrfs_set_work_high_prio(struct btrfs_work *work)
diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h
index f34cc31fa3c9..063698b90ce2 100644
--- a/fs/btrfs/async-thread.h
+++ b/fs/btrfs/async-thread.h
@@ -111,9 +111,9 @@ struct btrfs_workers {
111 111
112void btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work); 112void btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work);
113int btrfs_start_workers(struct btrfs_workers *workers); 113int btrfs_start_workers(struct btrfs_workers *workers);
114int btrfs_stop_workers(struct btrfs_workers *workers); 114void btrfs_stop_workers(struct btrfs_workers *workers);
115void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max, 115void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max,
116 struct btrfs_workers *async_starter); 116 struct btrfs_workers *async_starter);
117int btrfs_requeue_work(struct btrfs_work *work); 117void btrfs_requeue_work(struct btrfs_work *work);
118void btrfs_set_work_high_prio(struct btrfs_work *work); 118void btrfs_set_work_high_prio(struct btrfs_work *work);
119#endif 119#endif
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 0436c12da8c2..f4e90748940a 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -116,6 +116,7 @@ add_parent:
116 * to a logical address 116 * to a logical address
117 */ 117 */
118static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, 118static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
119 int search_commit_root,
119 struct __prelim_ref *ref, 120 struct __prelim_ref *ref,
120 struct ulist *parents) 121 struct ulist *parents)
121{ 122{
@@ -131,6 +132,7 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
131 path = btrfs_alloc_path(); 132 path = btrfs_alloc_path();
132 if (!path) 133 if (!path)
133 return -ENOMEM; 134 return -ENOMEM;
135 path->search_commit_root = !!search_commit_root;
134 136
135 root_key.objectid = ref->root_id; 137 root_key.objectid = ref->root_id;
136 root_key.type = BTRFS_ROOT_ITEM_KEY; 138 root_key.type = BTRFS_ROOT_ITEM_KEY;
@@ -188,6 +190,7 @@ out:
188 * resolve all indirect backrefs from the list 190 * resolve all indirect backrefs from the list
189 */ 191 */
190static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, 192static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info,
193 int search_commit_root,
191 struct list_head *head) 194 struct list_head *head)
192{ 195{
193 int err; 196 int err;
@@ -212,7 +215,8 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info,
212 continue; 215 continue;
213 if (ref->count == 0) 216 if (ref->count == 0)
214 continue; 217 continue;
215 err = __resolve_indirect_ref(fs_info, ref, parents); 218 err = __resolve_indirect_ref(fs_info, search_commit_root,
219 ref, parents);
216 if (err) { 220 if (err) {
217 if (ret == 0) 221 if (ret == 0)
218 ret = err; 222 ret = err;
@@ -586,6 +590,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
586 struct btrfs_delayed_ref_head *head; 590 struct btrfs_delayed_ref_head *head;
587 int info_level = 0; 591 int info_level = 0;
588 int ret; 592 int ret;
593 int search_commit_root = (trans == BTRFS_BACKREF_SEARCH_COMMIT_ROOT);
589 struct list_head prefs_delayed; 594 struct list_head prefs_delayed;
590 struct list_head prefs; 595 struct list_head prefs;
591 struct __prelim_ref *ref; 596 struct __prelim_ref *ref;
@@ -600,6 +605,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
600 path = btrfs_alloc_path(); 605 path = btrfs_alloc_path();
601 if (!path) 606 if (!path)
602 return -ENOMEM; 607 return -ENOMEM;
608 path->search_commit_root = !!search_commit_root;
603 609
604 /* 610 /*
605 * grab both a lock on the path and a lock on the delayed ref head. 611 * grab both a lock on the path and a lock on the delayed ref head.
@@ -614,35 +620,39 @@ again:
614 goto out; 620 goto out;
615 BUG_ON(ret == 0); 621 BUG_ON(ret == 0);
616 622
617 /* 623 if (trans != BTRFS_BACKREF_SEARCH_COMMIT_ROOT) {
618 * look if there are updates for this ref queued and lock the head 624 /*
619 */ 625 * look if there are updates for this ref queued and lock the
620 delayed_refs = &trans->transaction->delayed_refs; 626 * head
621 spin_lock(&delayed_refs->lock); 627 */
622 head = btrfs_find_delayed_ref_head(trans, bytenr); 628 delayed_refs = &trans->transaction->delayed_refs;
623 if (head) { 629 spin_lock(&delayed_refs->lock);
624 if (!mutex_trylock(&head->mutex)) { 630 head = btrfs_find_delayed_ref_head(trans, bytenr);
625 atomic_inc(&head->node.refs); 631 if (head) {
626 spin_unlock(&delayed_refs->lock); 632 if (!mutex_trylock(&head->mutex)) {
627 633 atomic_inc(&head->node.refs);
628 btrfs_release_path(path); 634 spin_unlock(&delayed_refs->lock);
629 635
630 /* 636 btrfs_release_path(path);
631 * Mutex was contended, block until it's 637
632 * released and try again 638 /*
633 */ 639 * Mutex was contended, block until it's
634 mutex_lock(&head->mutex); 640 * released and try again
635 mutex_unlock(&head->mutex); 641 */
636 btrfs_put_delayed_ref(&head->node); 642 mutex_lock(&head->mutex);
637 goto again; 643 mutex_unlock(&head->mutex);
638 } 644 btrfs_put_delayed_ref(&head->node);
639 ret = __add_delayed_refs(head, seq, &info_key, &prefs_delayed); 645 goto again;
640 if (ret) { 646 }
641 spin_unlock(&delayed_refs->lock); 647 ret = __add_delayed_refs(head, seq, &info_key,
642 goto out; 648 &prefs_delayed);
649 if (ret) {
650 spin_unlock(&delayed_refs->lock);
651 goto out;
652 }
643 } 653 }
654 spin_unlock(&delayed_refs->lock);
644 } 655 }
645 spin_unlock(&delayed_refs->lock);
646 656
647 if (path->slots[0]) { 657 if (path->slots[0]) {
648 struct extent_buffer *leaf; 658 struct extent_buffer *leaf;
@@ -679,7 +689,7 @@ again:
679 if (ret) 689 if (ret)
680 goto out; 690 goto out;
681 691
682 ret = __resolve_indirect_refs(fs_info, &prefs); 692 ret = __resolve_indirect_refs(fs_info, search_commit_root, &prefs);
683 if (ret) 693 if (ret)
684 goto out; 694 goto out;
685 695
@@ -1074,8 +1084,7 @@ int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb,
1074 return 0; 1084 return 0;
1075} 1085}
1076 1086
1077static int iterate_leaf_refs(struct btrfs_fs_info *fs_info, 1087static int iterate_leaf_refs(struct btrfs_fs_info *fs_info, u64 logical,
1078 struct btrfs_path *path, u64 logical,
1079 u64 orig_extent_item_objectid, 1088 u64 orig_extent_item_objectid,
1080 u64 extent_item_pos, u64 root, 1089 u64 extent_item_pos, u64 root,
1081 iterate_extent_inodes_t *iterate, void *ctx) 1090 iterate_extent_inodes_t *iterate, void *ctx)
@@ -1143,35 +1152,38 @@ static int iterate_leaf_refs(struct btrfs_fs_info *fs_info,
1143 * calls iterate() for every inode that references the extent identified by 1152 * calls iterate() for every inode that references the extent identified by
1144 * the given parameters. 1153 * the given parameters.
1145 * when the iterator function returns a non-zero value, iteration stops. 1154 * when the iterator function returns a non-zero value, iteration stops.
1146 * path is guaranteed to be in released state when iterate() is called.
1147 */ 1155 */
1148int iterate_extent_inodes(struct btrfs_fs_info *fs_info, 1156int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
1149 struct btrfs_path *path,
1150 u64 extent_item_objectid, u64 extent_item_pos, 1157 u64 extent_item_objectid, u64 extent_item_pos,
1158 int search_commit_root,
1151 iterate_extent_inodes_t *iterate, void *ctx) 1159 iterate_extent_inodes_t *iterate, void *ctx)
1152{ 1160{
1153 int ret; 1161 int ret;
1154 struct list_head data_refs = LIST_HEAD_INIT(data_refs); 1162 struct list_head data_refs = LIST_HEAD_INIT(data_refs);
1155 struct list_head shared_refs = LIST_HEAD_INIT(shared_refs); 1163 struct list_head shared_refs = LIST_HEAD_INIT(shared_refs);
1156 struct btrfs_trans_handle *trans; 1164 struct btrfs_trans_handle *trans;
1157 struct ulist *refs; 1165 struct ulist *refs = NULL;
1158 struct ulist *roots; 1166 struct ulist *roots = NULL;
1159 struct ulist_node *ref_node = NULL; 1167 struct ulist_node *ref_node = NULL;
1160 struct ulist_node *root_node = NULL; 1168 struct ulist_node *root_node = NULL;
1161 struct seq_list seq_elem; 1169 struct seq_list seq_elem;
1162 struct btrfs_delayed_ref_root *delayed_refs; 1170 struct btrfs_delayed_ref_root *delayed_refs = NULL;
1163
1164 trans = btrfs_join_transaction(fs_info->extent_root);
1165 if (IS_ERR(trans))
1166 return PTR_ERR(trans);
1167 1171
1168 pr_debug("resolving all inodes for extent %llu\n", 1172 pr_debug("resolving all inodes for extent %llu\n",
1169 extent_item_objectid); 1173 extent_item_objectid);
1170 1174
1171 delayed_refs = &trans->transaction->delayed_refs; 1175 if (search_commit_root) {
1172 spin_lock(&delayed_refs->lock); 1176 trans = BTRFS_BACKREF_SEARCH_COMMIT_ROOT;
1173 btrfs_get_delayed_seq(delayed_refs, &seq_elem); 1177 } else {
1174 spin_unlock(&delayed_refs->lock); 1178 trans = btrfs_join_transaction(fs_info->extent_root);
1179 if (IS_ERR(trans))
1180 return PTR_ERR(trans);
1181
1182 delayed_refs = &trans->transaction->delayed_refs;
1183 spin_lock(&delayed_refs->lock);
1184 btrfs_get_delayed_seq(delayed_refs, &seq_elem);
1185 spin_unlock(&delayed_refs->lock);
1186 }
1175 1187
1176 ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid, 1188 ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid,
1177 extent_item_pos, seq_elem.seq, 1189 extent_item_pos, seq_elem.seq,
@@ -1188,7 +1200,7 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
1188 while (!ret && (root_node = ulist_next(roots, root_node))) { 1200 while (!ret && (root_node = ulist_next(roots, root_node))) {
1189 pr_debug("root %llu references leaf %llu\n", 1201 pr_debug("root %llu references leaf %llu\n",
1190 root_node->val, ref_node->val); 1202 root_node->val, ref_node->val);
1191 ret = iterate_leaf_refs(fs_info, path, ref_node->val, 1203 ret = iterate_leaf_refs(fs_info, ref_node->val,
1192 extent_item_objectid, 1204 extent_item_objectid,
1193 extent_item_pos, root_node->val, 1205 extent_item_pos, root_node->val,
1194 iterate, ctx); 1206 iterate, ctx);
@@ -1198,8 +1210,11 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
1198 ulist_free(refs); 1210 ulist_free(refs);
1199 ulist_free(roots); 1211 ulist_free(roots);
1200out: 1212out:
1201 btrfs_put_delayed_seq(delayed_refs, &seq_elem); 1213 if (!search_commit_root) {
1202 btrfs_end_transaction(trans, fs_info->extent_root); 1214 btrfs_put_delayed_seq(delayed_refs, &seq_elem);
1215 btrfs_end_transaction(trans, fs_info->extent_root);
1216 }
1217
1203 return ret; 1218 return ret;
1204} 1219}
1205 1220
@@ -1210,6 +1225,7 @@ int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
1210 int ret; 1225 int ret;
1211 u64 extent_item_pos; 1226 u64 extent_item_pos;
1212 struct btrfs_key found_key; 1227 struct btrfs_key found_key;
1228 int search_commit_root = path->search_commit_root;
1213 1229
1214 ret = extent_from_logical(fs_info, logical, path, 1230 ret = extent_from_logical(fs_info, logical, path,
1215 &found_key); 1231 &found_key);
@@ -1220,8 +1236,9 @@ int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
1220 return ret; 1236 return ret;
1221 1237
1222 extent_item_pos = logical - found_key.objectid; 1238 extent_item_pos = logical - found_key.objectid;
1223 ret = iterate_extent_inodes(fs_info, path, found_key.objectid, 1239 ret = iterate_extent_inodes(fs_info, found_key.objectid,
1224 extent_item_pos, iterate, ctx); 1240 extent_item_pos, search_commit_root,
1241 iterate, ctx);
1225 1242
1226 return ret; 1243 return ret;
1227} 1244}
@@ -1342,12 +1359,6 @@ int paths_from_inode(u64 inum, struct inode_fs_paths *ipath)
1342 inode_to_path, ipath); 1359 inode_to_path, ipath);
1343} 1360}
1344 1361
1345/*
1346 * allocates space to return multiple file system paths for an inode.
1347 * total_bytes to allocate are passed, note that space usable for actual path
1348 * information will be total_bytes - sizeof(struct inode_fs_paths).
1349 * the returned pointer must be freed with free_ipath() in the end.
1350 */
1351struct btrfs_data_container *init_data_container(u32 total_bytes) 1362struct btrfs_data_container *init_data_container(u32 total_bytes)
1352{ 1363{
1353 struct btrfs_data_container *data; 1364 struct btrfs_data_container *data;
@@ -1403,5 +1414,6 @@ struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root,
1403 1414
1404void free_ipath(struct inode_fs_paths *ipath) 1415void free_ipath(struct inode_fs_paths *ipath)
1405{ 1416{
1417 kfree(ipath->fspath);
1406 kfree(ipath); 1418 kfree(ipath);
1407} 1419}
diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h
index d00dfa9ca934..57ea2e959e4d 100644
--- a/fs/btrfs/backref.h
+++ b/fs/btrfs/backref.h
@@ -22,6 +22,8 @@
22#include "ioctl.h" 22#include "ioctl.h"
23#include "ulist.h" 23#include "ulist.h"
24 24
25#define BTRFS_BACKREF_SEARCH_COMMIT_ROOT ((struct btrfs_trans_handle *)0)
26
25struct inode_fs_paths { 27struct inode_fs_paths {
26 struct btrfs_path *btrfs_path; 28 struct btrfs_path *btrfs_path;
27 struct btrfs_root *fs_root; 29 struct btrfs_root *fs_root;
@@ -44,9 +46,8 @@ int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb,
44 u64 *out_root, u8 *out_level); 46 u64 *out_root, u8 *out_level);
45 47
46int iterate_extent_inodes(struct btrfs_fs_info *fs_info, 48int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
47 struct btrfs_path *path,
48 u64 extent_item_objectid, 49 u64 extent_item_objectid,
49 u64 extent_offset, 50 u64 extent_offset, int search_commit_root,
50 iterate_extent_inodes_t *iterate, void *ctx); 51 iterate_extent_inodes_t *iterate, void *ctx);
51 52
52int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info, 53int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index b805afb37fa8..d286b40a5671 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -226,8 +226,8 @@ out:
226 * Clear the writeback bits on all of the file 226 * Clear the writeback bits on all of the file
227 * pages for a compressed write 227 * pages for a compressed write
228 */ 228 */
229static noinline int end_compressed_writeback(struct inode *inode, u64 start, 229static noinline void end_compressed_writeback(struct inode *inode, u64 start,
230 unsigned long ram_size) 230 unsigned long ram_size)
231{ 231{
232 unsigned long index = start >> PAGE_CACHE_SHIFT; 232 unsigned long index = start >> PAGE_CACHE_SHIFT;
233 unsigned long end_index = (start + ram_size - 1) >> PAGE_CACHE_SHIFT; 233 unsigned long end_index = (start + ram_size - 1) >> PAGE_CACHE_SHIFT;
@@ -253,7 +253,6 @@ static noinline int end_compressed_writeback(struct inode *inode, u64 start,
253 index += ret; 253 index += ret;
254 } 254 }
255 /* the inode may be gone now */ 255 /* the inode may be gone now */
256 return 0;
257} 256}
258 257
259/* 258/*
@@ -392,16 +391,16 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
392 */ 391 */
393 atomic_inc(&cb->pending_bios); 392 atomic_inc(&cb->pending_bios);
394 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); 393 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
395 BUG_ON(ret); 394 BUG_ON(ret); /* -ENOMEM */
396 395
397 if (!skip_sum) { 396 if (!skip_sum) {
398 ret = btrfs_csum_one_bio(root, inode, bio, 397 ret = btrfs_csum_one_bio(root, inode, bio,
399 start, 1); 398 start, 1);
400 BUG_ON(ret); 399 BUG_ON(ret); /* -ENOMEM */
401 } 400 }
402 401
403 ret = btrfs_map_bio(root, WRITE, bio, 0, 1); 402 ret = btrfs_map_bio(root, WRITE, bio, 0, 1);
404 BUG_ON(ret); 403 BUG_ON(ret); /* -ENOMEM */
405 404
406 bio_put(bio); 405 bio_put(bio);
407 406
@@ -421,15 +420,15 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
421 bio_get(bio); 420 bio_get(bio);
422 421
423 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); 422 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
424 BUG_ON(ret); 423 BUG_ON(ret); /* -ENOMEM */
425 424
426 if (!skip_sum) { 425 if (!skip_sum) {
427 ret = btrfs_csum_one_bio(root, inode, bio, start, 1); 426 ret = btrfs_csum_one_bio(root, inode, bio, start, 1);
428 BUG_ON(ret); 427 BUG_ON(ret); /* -ENOMEM */
429 } 428 }
430 429
431 ret = btrfs_map_bio(root, WRITE, bio, 0, 1); 430 ret = btrfs_map_bio(root, WRITE, bio, 0, 1);
432 BUG_ON(ret); 431 BUG_ON(ret); /* -ENOMEM */
433 432
434 bio_put(bio); 433 bio_put(bio);
435 return 0; 434 return 0;
@@ -497,7 +496,7 @@ static noinline int add_ra_bio_pages(struct inode *inode,
497 * sure they map to this compressed extent on disk. 496 * sure they map to this compressed extent on disk.
498 */ 497 */
499 set_page_extent_mapped(page); 498 set_page_extent_mapped(page);
500 lock_extent(tree, last_offset, end, GFP_NOFS); 499 lock_extent(tree, last_offset, end);
501 read_lock(&em_tree->lock); 500 read_lock(&em_tree->lock);
502 em = lookup_extent_mapping(em_tree, last_offset, 501 em = lookup_extent_mapping(em_tree, last_offset,
503 PAGE_CACHE_SIZE); 502 PAGE_CACHE_SIZE);
@@ -507,7 +506,7 @@ static noinline int add_ra_bio_pages(struct inode *inode,
507 (last_offset + PAGE_CACHE_SIZE > extent_map_end(em)) || 506 (last_offset + PAGE_CACHE_SIZE > extent_map_end(em)) ||
508 (em->block_start >> 9) != cb->orig_bio->bi_sector) { 507 (em->block_start >> 9) != cb->orig_bio->bi_sector) {
509 free_extent_map(em); 508 free_extent_map(em);
510 unlock_extent(tree, last_offset, end, GFP_NOFS); 509 unlock_extent(tree, last_offset, end);
511 unlock_page(page); 510 unlock_page(page);
512 page_cache_release(page); 511 page_cache_release(page);
513 break; 512 break;
@@ -535,7 +534,7 @@ static noinline int add_ra_bio_pages(struct inode *inode,
535 nr_pages++; 534 nr_pages++;
536 page_cache_release(page); 535 page_cache_release(page);
537 } else { 536 } else {
538 unlock_extent(tree, last_offset, end, GFP_NOFS); 537 unlock_extent(tree, last_offset, end);
539 unlock_page(page); 538 unlock_page(page);
540 page_cache_release(page); 539 page_cache_release(page);
541 break; 540 break;
@@ -662,7 +661,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
662 bio_get(comp_bio); 661 bio_get(comp_bio);
663 662
664 ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0); 663 ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0);
665 BUG_ON(ret); 664 BUG_ON(ret); /* -ENOMEM */
666 665
667 /* 666 /*
668 * inc the count before we submit the bio so 667 * inc the count before we submit the bio so
@@ -675,14 +674,14 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
675 if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) { 674 if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
676 ret = btrfs_lookup_bio_sums(root, inode, 675 ret = btrfs_lookup_bio_sums(root, inode,
677 comp_bio, sums); 676 comp_bio, sums);
678 BUG_ON(ret); 677 BUG_ON(ret); /* -ENOMEM */
679 } 678 }
680 sums += (comp_bio->bi_size + root->sectorsize - 1) / 679 sums += (comp_bio->bi_size + root->sectorsize - 1) /
681 root->sectorsize; 680 root->sectorsize;
682 681
683 ret = btrfs_map_bio(root, READ, comp_bio, 682 ret = btrfs_map_bio(root, READ, comp_bio,
684 mirror_num, 0); 683 mirror_num, 0);
685 BUG_ON(ret); 684 BUG_ON(ret); /* -ENOMEM */
686 685
687 bio_put(comp_bio); 686 bio_put(comp_bio);
688 687
@@ -698,15 +697,15 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
698 bio_get(comp_bio); 697 bio_get(comp_bio);
699 698
700 ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0); 699 ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0);
701 BUG_ON(ret); 700 BUG_ON(ret); /* -ENOMEM */
702 701
703 if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) { 702 if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
704 ret = btrfs_lookup_bio_sums(root, inode, comp_bio, sums); 703 ret = btrfs_lookup_bio_sums(root, inode, comp_bio, sums);
705 BUG_ON(ret); 704 BUG_ON(ret); /* -ENOMEM */
706 } 705 }
707 706
708 ret = btrfs_map_bio(root, READ, comp_bio, mirror_num, 0); 707 ret = btrfs_map_bio(root, READ, comp_bio, mirror_num, 0);
709 BUG_ON(ret); 708 BUG_ON(ret); /* -ENOMEM */
710 709
711 bio_put(comp_bio); 710 bio_put(comp_bio);
712 return 0; 711 return 0;
@@ -734,7 +733,7 @@ struct btrfs_compress_op *btrfs_compress_op[] = {
734 &btrfs_lzo_compress, 733 &btrfs_lzo_compress,
735}; 734};
736 735
737int __init btrfs_init_compress(void) 736void __init btrfs_init_compress(void)
738{ 737{
739 int i; 738 int i;
740 739
@@ -744,7 +743,6 @@ int __init btrfs_init_compress(void)
744 atomic_set(&comp_alloc_workspace[i], 0); 743 atomic_set(&comp_alloc_workspace[i], 0);
745 init_waitqueue_head(&comp_workspace_wait[i]); 744 init_waitqueue_head(&comp_workspace_wait[i]);
746 } 745 }
747 return 0;
748} 746}
749 747
750/* 748/*
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index a12059f4f0fd..9afb0a62ae82 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -19,7 +19,7 @@
19#ifndef __BTRFS_COMPRESSION_ 19#ifndef __BTRFS_COMPRESSION_
20#define __BTRFS_COMPRESSION_ 20#define __BTRFS_COMPRESSION_
21 21
22int btrfs_init_compress(void); 22void btrfs_init_compress(void);
23void btrfs_exit_compress(void); 23void btrfs_exit_compress(void);
24 24
25int btrfs_compress_pages(int type, struct address_space *mapping, 25int btrfs_compress_pages(int type, struct address_space *mapping,
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 0639a555e16e..e801f226d7e0 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -36,7 +36,7 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
36 struct btrfs_root *root, 36 struct btrfs_root *root,
37 struct extent_buffer *dst_buf, 37 struct extent_buffer *dst_buf,
38 struct extent_buffer *src_buf); 38 struct extent_buffer *src_buf);
39static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, 39static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
40 struct btrfs_path *path, int level, int slot); 40 struct btrfs_path *path, int level, int slot);
41 41
42struct btrfs_path *btrfs_alloc_path(void) 42struct btrfs_path *btrfs_alloc_path(void)
@@ -156,10 +156,23 @@ struct extent_buffer *btrfs_root_node(struct btrfs_root *root)
156{ 156{
157 struct extent_buffer *eb; 157 struct extent_buffer *eb;
158 158
159 rcu_read_lock(); 159 while (1) {
160 eb = rcu_dereference(root->node); 160 rcu_read_lock();
161 extent_buffer_get(eb); 161 eb = rcu_dereference(root->node);
162 rcu_read_unlock(); 162
163 /*
164 * RCU really hurts here, we could free up the root node because
165 * it was cow'ed but we may not get the new root node yet so do
166 * the inc_not_zero dance and if it doesn't work then
167 * synchronize_rcu and try again.
168 */
169 if (atomic_inc_not_zero(&eb->refs)) {
170 rcu_read_unlock();
171 break;
172 }
173 rcu_read_unlock();
174 synchronize_rcu();
175 }
163 return eb; 176 return eb;
164} 177}
165 178
@@ -331,8 +344,13 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
331 if (btrfs_block_can_be_shared(root, buf)) { 344 if (btrfs_block_can_be_shared(root, buf)) {
332 ret = btrfs_lookup_extent_info(trans, root, buf->start, 345 ret = btrfs_lookup_extent_info(trans, root, buf->start,
333 buf->len, &refs, &flags); 346 buf->len, &refs, &flags);
334 BUG_ON(ret); 347 if (ret)
335 BUG_ON(refs == 0); 348 return ret;
349 if (refs == 0) {
350 ret = -EROFS;
351 btrfs_std_error(root->fs_info, ret);
352 return ret;
353 }
336 } else { 354 } else {
337 refs = 1; 355 refs = 1;
338 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID || 356 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
@@ -351,14 +369,14 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
351 root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) && 369 root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) &&
352 !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) { 370 !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) {
353 ret = btrfs_inc_ref(trans, root, buf, 1, 1); 371 ret = btrfs_inc_ref(trans, root, buf, 1, 1);
354 BUG_ON(ret); 372 BUG_ON(ret); /* -ENOMEM */
355 373
356 if (root->root_key.objectid == 374 if (root->root_key.objectid ==
357 BTRFS_TREE_RELOC_OBJECTID) { 375 BTRFS_TREE_RELOC_OBJECTID) {
358 ret = btrfs_dec_ref(trans, root, buf, 0, 1); 376 ret = btrfs_dec_ref(trans, root, buf, 0, 1);
359 BUG_ON(ret); 377 BUG_ON(ret); /* -ENOMEM */
360 ret = btrfs_inc_ref(trans, root, cow, 1, 1); 378 ret = btrfs_inc_ref(trans, root, cow, 1, 1);
361 BUG_ON(ret); 379 BUG_ON(ret); /* -ENOMEM */
362 } 380 }
363 new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; 381 new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
364 } else { 382 } else {
@@ -368,14 +386,15 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
368 ret = btrfs_inc_ref(trans, root, cow, 1, 1); 386 ret = btrfs_inc_ref(trans, root, cow, 1, 1);
369 else 387 else
370 ret = btrfs_inc_ref(trans, root, cow, 0, 1); 388 ret = btrfs_inc_ref(trans, root, cow, 0, 1);
371 BUG_ON(ret); 389 BUG_ON(ret); /* -ENOMEM */
372 } 390 }
373 if (new_flags != 0) { 391 if (new_flags != 0) {
374 ret = btrfs_set_disk_extent_flags(trans, root, 392 ret = btrfs_set_disk_extent_flags(trans, root,
375 buf->start, 393 buf->start,
376 buf->len, 394 buf->len,
377 new_flags, 0); 395 new_flags, 0);
378 BUG_ON(ret); 396 if (ret)
397 return ret;
379 } 398 }
380 } else { 399 } else {
381 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) { 400 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
@@ -384,9 +403,9 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
384 ret = btrfs_inc_ref(trans, root, cow, 1, 1); 403 ret = btrfs_inc_ref(trans, root, cow, 1, 1);
385 else 404 else
386 ret = btrfs_inc_ref(trans, root, cow, 0, 1); 405 ret = btrfs_inc_ref(trans, root, cow, 0, 1);
387 BUG_ON(ret); 406 BUG_ON(ret); /* -ENOMEM */
388 ret = btrfs_dec_ref(trans, root, buf, 1, 1); 407 ret = btrfs_dec_ref(trans, root, buf, 1, 1);
389 BUG_ON(ret); 408 BUG_ON(ret); /* -ENOMEM */
390 } 409 }
391 clean_tree_block(trans, root, buf); 410 clean_tree_block(trans, root, buf);
392 *last_ref = 1; 411 *last_ref = 1;
@@ -415,7 +434,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
415{ 434{
416 struct btrfs_disk_key disk_key; 435 struct btrfs_disk_key disk_key;
417 struct extent_buffer *cow; 436 struct extent_buffer *cow;
418 int level; 437 int level, ret;
419 int last_ref = 0; 438 int last_ref = 0;
420 int unlock_orig = 0; 439 int unlock_orig = 0;
421 u64 parent_start; 440 u64 parent_start;
@@ -467,7 +486,11 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
467 (unsigned long)btrfs_header_fsid(cow), 486 (unsigned long)btrfs_header_fsid(cow),
468 BTRFS_FSID_SIZE); 487 BTRFS_FSID_SIZE);
469 488
470 update_ref_for_cow(trans, root, buf, cow, &last_ref); 489 ret = update_ref_for_cow(trans, root, buf, cow, &last_ref);
490 if (ret) {
491 btrfs_abort_transaction(trans, root, ret);
492 return ret;
493 }
471 494
472 if (root->ref_cows) 495 if (root->ref_cows)
473 btrfs_reloc_cow_block(trans, root, buf, cow); 496 btrfs_reloc_cow_block(trans, root, buf, cow);
@@ -504,7 +527,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
504 } 527 }
505 if (unlock_orig) 528 if (unlock_orig)
506 btrfs_tree_unlock(buf); 529 btrfs_tree_unlock(buf);
507 free_extent_buffer(buf); 530 free_extent_buffer_stale(buf);
508 btrfs_mark_buffer_dirty(cow); 531 btrfs_mark_buffer_dirty(cow);
509 *cow_ret = cow; 532 *cow_ret = cow;
510 return 0; 533 return 0;
@@ -934,7 +957,12 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
934 957
935 /* promote the child to a root */ 958 /* promote the child to a root */
936 child = read_node_slot(root, mid, 0); 959 child = read_node_slot(root, mid, 0);
937 BUG_ON(!child); 960 if (!child) {
961 ret = -EROFS;
962 btrfs_std_error(root->fs_info, ret);
963 goto enospc;
964 }
965
938 btrfs_tree_lock(child); 966 btrfs_tree_lock(child);
939 btrfs_set_lock_blocking(child); 967 btrfs_set_lock_blocking(child);
940 ret = btrfs_cow_block(trans, root, child, mid, 0, &child); 968 ret = btrfs_cow_block(trans, root, child, mid, 0, &child);
@@ -959,7 +987,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
959 root_sub_used(root, mid->len); 987 root_sub_used(root, mid->len);
960 btrfs_free_tree_block(trans, root, mid, 0, 1, 0); 988 btrfs_free_tree_block(trans, root, mid, 0, 1, 0);
961 /* once for the root ptr */ 989 /* once for the root ptr */
962 free_extent_buffer(mid); 990 free_extent_buffer_stale(mid);
963 return 0; 991 return 0;
964 } 992 }
965 if (btrfs_header_nritems(mid) > 993 if (btrfs_header_nritems(mid) >
@@ -1010,13 +1038,10 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1010 if (btrfs_header_nritems(right) == 0) { 1038 if (btrfs_header_nritems(right) == 0) {
1011 clean_tree_block(trans, root, right); 1039 clean_tree_block(trans, root, right);
1012 btrfs_tree_unlock(right); 1040 btrfs_tree_unlock(right);
1013 wret = del_ptr(trans, root, path, level + 1, pslot + 1041 del_ptr(trans, root, path, level + 1, pslot + 1);
1014 1);
1015 if (wret)
1016 ret = wret;
1017 root_sub_used(root, right->len); 1042 root_sub_used(root, right->len);
1018 btrfs_free_tree_block(trans, root, right, 0, 1, 0); 1043 btrfs_free_tree_block(trans, root, right, 0, 1, 0);
1019 free_extent_buffer(right); 1044 free_extent_buffer_stale(right);
1020 right = NULL; 1045 right = NULL;
1021 } else { 1046 } else {
1022 struct btrfs_disk_key right_key; 1047 struct btrfs_disk_key right_key;
@@ -1035,7 +1060,11 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1035 * otherwise we would have pulled some pointers from the 1060 * otherwise we would have pulled some pointers from the
1036 * right 1061 * right
1037 */ 1062 */
1038 BUG_ON(!left); 1063 if (!left) {
1064 ret = -EROFS;
1065 btrfs_std_error(root->fs_info, ret);
1066 goto enospc;
1067 }
1039 wret = balance_node_right(trans, root, mid, left); 1068 wret = balance_node_right(trans, root, mid, left);
1040 if (wret < 0) { 1069 if (wret < 0) {
1041 ret = wret; 1070 ret = wret;
@@ -1051,12 +1080,10 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1051 if (btrfs_header_nritems(mid) == 0) { 1080 if (btrfs_header_nritems(mid) == 0) {
1052 clean_tree_block(trans, root, mid); 1081 clean_tree_block(trans, root, mid);
1053 btrfs_tree_unlock(mid); 1082 btrfs_tree_unlock(mid);
1054 wret = del_ptr(trans, root, path, level + 1, pslot); 1083 del_ptr(trans, root, path, level + 1, pslot);
1055 if (wret)
1056 ret = wret;
1057 root_sub_used(root, mid->len); 1084 root_sub_used(root, mid->len);
1058 btrfs_free_tree_block(trans, root, mid, 0, 1, 0); 1085 btrfs_free_tree_block(trans, root, mid, 0, 1, 0);
1059 free_extent_buffer(mid); 1086 free_extent_buffer_stale(mid);
1060 mid = NULL; 1087 mid = NULL;
1061 } else { 1088 } else {
1062 /* update the parent key to reflect our changes */ 1089 /* update the parent key to reflect our changes */
@@ -1382,7 +1409,8 @@ static noinline int reada_for_balance(struct btrfs_root *root,
1382 * if lowest_unlock is 1, level 0 won't be unlocked 1409 * if lowest_unlock is 1, level 0 won't be unlocked
1383 */ 1410 */
1384static noinline void unlock_up(struct btrfs_path *path, int level, 1411static noinline void unlock_up(struct btrfs_path *path, int level,
1385 int lowest_unlock) 1412 int lowest_unlock, int min_write_lock_level,
1413 int *write_lock_level)
1386{ 1414{
1387 int i; 1415 int i;
1388 int skip_level = level; 1416 int skip_level = level;
@@ -1414,6 +1442,11 @@ static noinline void unlock_up(struct btrfs_path *path, int level,
1414 if (i >= lowest_unlock && i > skip_level && path->locks[i]) { 1442 if (i >= lowest_unlock && i > skip_level && path->locks[i]) {
1415 btrfs_tree_unlock_rw(t, path->locks[i]); 1443 btrfs_tree_unlock_rw(t, path->locks[i]);
1416 path->locks[i] = 0; 1444 path->locks[i] = 0;
1445 if (write_lock_level &&
1446 i > min_write_lock_level &&
1447 i <= *write_lock_level) {
1448 *write_lock_level = i - 1;
1449 }
1417 } 1450 }
1418 } 1451 }
1419} 1452}
@@ -1637,6 +1670,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
1637 /* everything at write_lock_level or lower must be write locked */ 1670 /* everything at write_lock_level or lower must be write locked */
1638 int write_lock_level = 0; 1671 int write_lock_level = 0;
1639 u8 lowest_level = 0; 1672 u8 lowest_level = 0;
1673 int min_write_lock_level;
1640 1674
1641 lowest_level = p->lowest_level; 1675 lowest_level = p->lowest_level;
1642 WARN_ON(lowest_level && ins_len > 0); 1676 WARN_ON(lowest_level && ins_len > 0);
@@ -1664,6 +1698,8 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
1664 if (cow && (p->keep_locks || p->lowest_level)) 1698 if (cow && (p->keep_locks || p->lowest_level))
1665 write_lock_level = BTRFS_MAX_LEVEL; 1699 write_lock_level = BTRFS_MAX_LEVEL;
1666 1700
1701 min_write_lock_level = write_lock_level;
1702
1667again: 1703again:
1668 /* 1704 /*
1669 * we try very hard to do read locks on the root 1705 * we try very hard to do read locks on the root
@@ -1795,7 +1831,8 @@ cow_done:
1795 goto again; 1831 goto again;
1796 } 1832 }
1797 1833
1798 unlock_up(p, level, lowest_unlock); 1834 unlock_up(p, level, lowest_unlock,
1835 min_write_lock_level, &write_lock_level);
1799 1836
1800 if (level == lowest_level) { 1837 if (level == lowest_level) {
1801 if (dec) 1838 if (dec)
@@ -1857,7 +1894,8 @@ cow_done:
1857 } 1894 }
1858 } 1895 }
1859 if (!p->search_for_split) 1896 if (!p->search_for_split)
1860 unlock_up(p, level, lowest_unlock); 1897 unlock_up(p, level, lowest_unlock,
1898 min_write_lock_level, &write_lock_level);
1861 goto done; 1899 goto done;
1862 } 1900 }
1863 } 1901 }
@@ -1881,15 +1919,12 @@ done:
1881 * fixing up pointers when a given leaf/node is not in slot 0 of the 1919 * fixing up pointers when a given leaf/node is not in slot 0 of the
1882 * higher levels 1920 * higher levels
1883 * 1921 *
1884 * If this fails to write a tree block, it returns -1, but continues
1885 * fixing up the blocks in ram so the tree is consistent.
1886 */ 1922 */
1887static int fixup_low_keys(struct btrfs_trans_handle *trans, 1923static void fixup_low_keys(struct btrfs_trans_handle *trans,
1888 struct btrfs_root *root, struct btrfs_path *path, 1924 struct btrfs_root *root, struct btrfs_path *path,
1889 struct btrfs_disk_key *key, int level) 1925 struct btrfs_disk_key *key, int level)
1890{ 1926{
1891 int i; 1927 int i;
1892 int ret = 0;
1893 struct extent_buffer *t; 1928 struct extent_buffer *t;
1894 1929
1895 for (i = level; i < BTRFS_MAX_LEVEL; i++) { 1930 for (i = level; i < BTRFS_MAX_LEVEL; i++) {
@@ -1902,7 +1937,6 @@ static int fixup_low_keys(struct btrfs_trans_handle *trans,
1902 if (tslot != 0) 1937 if (tslot != 0)
1903 break; 1938 break;
1904 } 1939 }
1905 return ret;
1906} 1940}
1907 1941
1908/* 1942/*
@@ -1911,9 +1945,9 @@ static int fixup_low_keys(struct btrfs_trans_handle *trans,
1911 * This function isn't completely safe. It's the caller's responsibility 1945 * This function isn't completely safe. It's the caller's responsibility
1912 * that the new key won't break the order 1946 * that the new key won't break the order
1913 */ 1947 */
1914int btrfs_set_item_key_safe(struct btrfs_trans_handle *trans, 1948void btrfs_set_item_key_safe(struct btrfs_trans_handle *trans,
1915 struct btrfs_root *root, struct btrfs_path *path, 1949 struct btrfs_root *root, struct btrfs_path *path,
1916 struct btrfs_key *new_key) 1950 struct btrfs_key *new_key)
1917{ 1951{
1918 struct btrfs_disk_key disk_key; 1952 struct btrfs_disk_key disk_key;
1919 struct extent_buffer *eb; 1953 struct extent_buffer *eb;
@@ -1923,13 +1957,11 @@ int btrfs_set_item_key_safe(struct btrfs_trans_handle *trans,
1923 slot = path->slots[0]; 1957 slot = path->slots[0];
1924 if (slot > 0) { 1958 if (slot > 0) {
1925 btrfs_item_key(eb, &disk_key, slot - 1); 1959 btrfs_item_key(eb, &disk_key, slot - 1);
1926 if (comp_keys(&disk_key, new_key) >= 0) 1960 BUG_ON(comp_keys(&disk_key, new_key) >= 0);
1927 return -1;
1928 } 1961 }
1929 if (slot < btrfs_header_nritems(eb) - 1) { 1962 if (slot < btrfs_header_nritems(eb) - 1) {
1930 btrfs_item_key(eb, &disk_key, slot + 1); 1963 btrfs_item_key(eb, &disk_key, slot + 1);
1931 if (comp_keys(&disk_key, new_key) <= 0) 1964 BUG_ON(comp_keys(&disk_key, new_key) <= 0);
1932 return -1;
1933 } 1965 }
1934 1966
1935 btrfs_cpu_key_to_disk(&disk_key, new_key); 1967 btrfs_cpu_key_to_disk(&disk_key, new_key);
@@ -1937,7 +1969,6 @@ int btrfs_set_item_key_safe(struct btrfs_trans_handle *trans,
1937 btrfs_mark_buffer_dirty(eb); 1969 btrfs_mark_buffer_dirty(eb);
1938 if (slot == 0) 1970 if (slot == 0)
1939 fixup_low_keys(trans, root, path, &disk_key, 1); 1971 fixup_low_keys(trans, root, path, &disk_key, 1);
1940 return 0;
1941} 1972}
1942 1973
1943/* 1974/*
@@ -2140,12 +2171,11 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
2140 * 2171 *
2141 * slot and level indicate where you want the key to go, and 2172 * slot and level indicate where you want the key to go, and
2142 * blocknr is the block the key points to. 2173 * blocknr is the block the key points to.
2143 *
2144 * returns zero on success and < 0 on any error
2145 */ 2174 */
2146static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root 2175static void insert_ptr(struct btrfs_trans_handle *trans,
2147 *root, struct btrfs_path *path, struct btrfs_disk_key 2176 struct btrfs_root *root, struct btrfs_path *path,
2148 *key, u64 bytenr, int slot, int level) 2177 struct btrfs_disk_key *key, u64 bytenr,
2178 int slot, int level)
2149{ 2179{
2150 struct extent_buffer *lower; 2180 struct extent_buffer *lower;
2151 int nritems; 2181 int nritems;
@@ -2155,8 +2185,7 @@ static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root
2155 lower = path->nodes[level]; 2185 lower = path->nodes[level];
2156 nritems = btrfs_header_nritems(lower); 2186 nritems = btrfs_header_nritems(lower);
2157 BUG_ON(slot > nritems); 2187 BUG_ON(slot > nritems);
2158 if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root)) 2188 BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(root));
2159 BUG();
2160 if (slot != nritems) { 2189 if (slot != nritems) {
2161 memmove_extent_buffer(lower, 2190 memmove_extent_buffer(lower,
2162 btrfs_node_key_ptr_offset(slot + 1), 2191 btrfs_node_key_ptr_offset(slot + 1),
@@ -2169,7 +2198,6 @@ static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root
2169 btrfs_set_node_ptr_generation(lower, slot, trans->transid); 2198 btrfs_set_node_ptr_generation(lower, slot, trans->transid);
2170 btrfs_set_header_nritems(lower, nritems + 1); 2199 btrfs_set_header_nritems(lower, nritems + 1);
2171 btrfs_mark_buffer_dirty(lower); 2200 btrfs_mark_buffer_dirty(lower);
2172 return 0;
2173} 2201}
2174 2202
2175/* 2203/*
@@ -2190,7 +2218,6 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
2190 struct btrfs_disk_key disk_key; 2218 struct btrfs_disk_key disk_key;
2191 int mid; 2219 int mid;
2192 int ret; 2220 int ret;
2193 int wret;
2194 u32 c_nritems; 2221 u32 c_nritems;
2195 2222
2196 c = path->nodes[level]; 2223 c = path->nodes[level];
@@ -2247,11 +2274,8 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
2247 btrfs_mark_buffer_dirty(c); 2274 btrfs_mark_buffer_dirty(c);
2248 btrfs_mark_buffer_dirty(split); 2275 btrfs_mark_buffer_dirty(split);
2249 2276
2250 wret = insert_ptr(trans, root, path, &disk_key, split->start, 2277 insert_ptr(trans, root, path, &disk_key, split->start,
2251 path->slots[level + 1] + 1, 2278 path->slots[level + 1] + 1, level + 1);
2252 level + 1);
2253 if (wret)
2254 ret = wret;
2255 2279
2256 if (path->slots[level] >= mid) { 2280 if (path->slots[level] >= mid) {
2257 path->slots[level] -= mid; 2281 path->slots[level] -= mid;
@@ -2320,6 +2344,7 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
2320{ 2344{
2321 struct extent_buffer *left = path->nodes[0]; 2345 struct extent_buffer *left = path->nodes[0];
2322 struct extent_buffer *upper = path->nodes[1]; 2346 struct extent_buffer *upper = path->nodes[1];
2347 struct btrfs_map_token token;
2323 struct btrfs_disk_key disk_key; 2348 struct btrfs_disk_key disk_key;
2324 int slot; 2349 int slot;
2325 u32 i; 2350 u32 i;
@@ -2331,6 +2356,8 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
2331 u32 data_end; 2356 u32 data_end;
2332 u32 this_item_size; 2357 u32 this_item_size;
2333 2358
2359 btrfs_init_map_token(&token);
2360
2334 if (empty) 2361 if (empty)
2335 nr = 0; 2362 nr = 0;
2336 else 2363 else
@@ -2408,8 +2435,8 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
2408 push_space = BTRFS_LEAF_DATA_SIZE(root); 2435 push_space = BTRFS_LEAF_DATA_SIZE(root);
2409 for (i = 0; i < right_nritems; i++) { 2436 for (i = 0; i < right_nritems; i++) {
2410 item = btrfs_item_nr(right, i); 2437 item = btrfs_item_nr(right, i);
2411 push_space -= btrfs_item_size(right, item); 2438 push_space -= btrfs_token_item_size(right, item, &token);
2412 btrfs_set_item_offset(right, item, push_space); 2439 btrfs_set_token_item_offset(right, item, push_space, &token);
2413 } 2440 }
2414 2441
2415 left_nritems -= push_items; 2442 left_nritems -= push_items;
@@ -2537,9 +2564,11 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
2537 u32 old_left_nritems; 2564 u32 old_left_nritems;
2538 u32 nr; 2565 u32 nr;
2539 int ret = 0; 2566 int ret = 0;
2540 int wret;
2541 u32 this_item_size; 2567 u32 this_item_size;
2542 u32 old_left_item_size; 2568 u32 old_left_item_size;
2569 struct btrfs_map_token token;
2570
2571 btrfs_init_map_token(&token);
2543 2572
2544 if (empty) 2573 if (empty)
2545 nr = min(right_nritems, max_slot); 2574 nr = min(right_nritems, max_slot);
@@ -2600,9 +2629,10 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
2600 2629
2601 item = btrfs_item_nr(left, i); 2630 item = btrfs_item_nr(left, i);
2602 2631
2603 ioff = btrfs_item_offset(left, item); 2632 ioff = btrfs_token_item_offset(left, item, &token);
2604 btrfs_set_item_offset(left, item, 2633 btrfs_set_token_item_offset(left, item,
2605 ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size)); 2634 ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size),
2635 &token);
2606 } 2636 }
2607 btrfs_set_header_nritems(left, old_left_nritems + push_items); 2637 btrfs_set_header_nritems(left, old_left_nritems + push_items);
2608 2638
@@ -2632,8 +2662,9 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
2632 for (i = 0; i < right_nritems; i++) { 2662 for (i = 0; i < right_nritems; i++) {
2633 item = btrfs_item_nr(right, i); 2663 item = btrfs_item_nr(right, i);
2634 2664
2635 push_space = push_space - btrfs_item_size(right, item); 2665 push_space = push_space - btrfs_token_item_size(right,
2636 btrfs_set_item_offset(right, item, push_space); 2666 item, &token);
2667 btrfs_set_token_item_offset(right, item, push_space, &token);
2637 } 2668 }
2638 2669
2639 btrfs_mark_buffer_dirty(left); 2670 btrfs_mark_buffer_dirty(left);
@@ -2643,9 +2674,7 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
2643 clean_tree_block(trans, root, right); 2674 clean_tree_block(trans, root, right);
2644 2675
2645 btrfs_item_key(right, &disk_key, 0); 2676 btrfs_item_key(right, &disk_key, 0);
2646 wret = fixup_low_keys(trans, root, path, &disk_key, 1); 2677 fixup_low_keys(trans, root, path, &disk_key, 1);
2647 if (wret)
2648 ret = wret;
2649 2678
2650 /* then fixup the leaf pointer in the path */ 2679 /* then fixup the leaf pointer in the path */
2651 if (path->slots[0] < push_items) { 2680 if (path->slots[0] < push_items) {
@@ -2716,7 +2745,8 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
2716 path->nodes[1], slot - 1, &left); 2745 path->nodes[1], slot - 1, &left);
2717 if (ret) { 2746 if (ret) {
2718 /* we hit -ENOSPC, but it isn't fatal here */ 2747 /* we hit -ENOSPC, but it isn't fatal here */
2719 ret = 1; 2748 if (ret == -ENOSPC)
2749 ret = 1;
2720 goto out; 2750 goto out;
2721 } 2751 }
2722 2752
@@ -2738,22 +2768,21 @@ out:
2738/* 2768/*
2739 * split the path's leaf in two, making sure there is at least data_size 2769 * split the path's leaf in two, making sure there is at least data_size
2740 * available for the resulting leaf level of the path. 2770 * available for the resulting leaf level of the path.
2741 *
2742 * returns 0 if all went well and < 0 on failure.
2743 */ 2771 */
2744static noinline int copy_for_split(struct btrfs_trans_handle *trans, 2772static noinline void copy_for_split(struct btrfs_trans_handle *trans,
2745 struct btrfs_root *root, 2773 struct btrfs_root *root,
2746 struct btrfs_path *path, 2774 struct btrfs_path *path,
2747 struct extent_buffer *l, 2775 struct extent_buffer *l,
2748 struct extent_buffer *right, 2776 struct extent_buffer *right,
2749 int slot, int mid, int nritems) 2777 int slot, int mid, int nritems)
2750{ 2778{
2751 int data_copy_size; 2779 int data_copy_size;
2752 int rt_data_off; 2780 int rt_data_off;
2753 int i; 2781 int i;
2754 int ret = 0;
2755 int wret;
2756 struct btrfs_disk_key disk_key; 2782 struct btrfs_disk_key disk_key;
2783 struct btrfs_map_token token;
2784
2785 btrfs_init_map_token(&token);
2757 2786
2758 nritems = nritems - mid; 2787 nritems = nritems - mid;
2759 btrfs_set_header_nritems(right, nritems); 2788 btrfs_set_header_nritems(right, nritems);
@@ -2775,17 +2804,15 @@ static noinline int copy_for_split(struct btrfs_trans_handle *trans,
2775 struct btrfs_item *item = btrfs_item_nr(right, i); 2804 struct btrfs_item *item = btrfs_item_nr(right, i);
2776 u32 ioff; 2805 u32 ioff;
2777 2806
2778 ioff = btrfs_item_offset(right, item); 2807 ioff = btrfs_token_item_offset(right, item, &token);
2779 btrfs_set_item_offset(right, item, ioff + rt_data_off); 2808 btrfs_set_token_item_offset(right, item,
2809 ioff + rt_data_off, &token);
2780 } 2810 }
2781 2811
2782 btrfs_set_header_nritems(l, mid); 2812 btrfs_set_header_nritems(l, mid);
2783 ret = 0;
2784 btrfs_item_key(right, &disk_key, 0); 2813 btrfs_item_key(right, &disk_key, 0);
2785 wret = insert_ptr(trans, root, path, &disk_key, right->start, 2814 insert_ptr(trans, root, path, &disk_key, right->start,
2786 path->slots[1] + 1, 1); 2815 path->slots[1] + 1, 1);
2787 if (wret)
2788 ret = wret;
2789 2816
2790 btrfs_mark_buffer_dirty(right); 2817 btrfs_mark_buffer_dirty(right);
2791 btrfs_mark_buffer_dirty(l); 2818 btrfs_mark_buffer_dirty(l);
@@ -2803,8 +2830,6 @@ static noinline int copy_for_split(struct btrfs_trans_handle *trans,
2803 } 2830 }
2804 2831
2805 BUG_ON(path->slots[0] < 0); 2832 BUG_ON(path->slots[0] < 0);
2806
2807 return ret;
2808} 2833}
2809 2834
2810/* 2835/*
@@ -2993,12 +3018,8 @@ again:
2993 if (split == 0) { 3018 if (split == 0) {
2994 if (mid <= slot) { 3019 if (mid <= slot) {
2995 btrfs_set_header_nritems(right, 0); 3020 btrfs_set_header_nritems(right, 0);
2996 wret = insert_ptr(trans, root, path, 3021 insert_ptr(trans, root, path, &disk_key, right->start,
2997 &disk_key, right->start, 3022 path->slots[1] + 1, 1);
2998 path->slots[1] + 1, 1);
2999 if (wret)
3000 ret = wret;
3001
3002 btrfs_tree_unlock(path->nodes[0]); 3023 btrfs_tree_unlock(path->nodes[0]);
3003 free_extent_buffer(path->nodes[0]); 3024 free_extent_buffer(path->nodes[0]);
3004 path->nodes[0] = right; 3025 path->nodes[0] = right;
@@ -3006,29 +3027,21 @@ again:
3006 path->slots[1] += 1; 3027 path->slots[1] += 1;
3007 } else { 3028 } else {
3008 btrfs_set_header_nritems(right, 0); 3029 btrfs_set_header_nritems(right, 0);
3009 wret = insert_ptr(trans, root, path, 3030 insert_ptr(trans, root, path, &disk_key, right->start,
3010 &disk_key,
3011 right->start,
3012 path->slots[1], 1); 3031 path->slots[1], 1);
3013 if (wret)
3014 ret = wret;
3015 btrfs_tree_unlock(path->nodes[0]); 3032 btrfs_tree_unlock(path->nodes[0]);
3016 free_extent_buffer(path->nodes[0]); 3033 free_extent_buffer(path->nodes[0]);
3017 path->nodes[0] = right; 3034 path->nodes[0] = right;
3018 path->slots[0] = 0; 3035 path->slots[0] = 0;
3019 if (path->slots[1] == 0) { 3036 if (path->slots[1] == 0)
3020 wret = fixup_low_keys(trans, root, 3037 fixup_low_keys(trans, root, path,
3021 path, &disk_key, 1); 3038 &disk_key, 1);
3022 if (wret)
3023 ret = wret;
3024 }
3025 } 3039 }
3026 btrfs_mark_buffer_dirty(right); 3040 btrfs_mark_buffer_dirty(right);
3027 return ret; 3041 return ret;
3028 } 3042 }
3029 3043
3030 ret = copy_for_split(trans, root, path, l, right, slot, mid, nritems); 3044 copy_for_split(trans, root, path, l, right, slot, mid, nritems);
3031 BUG_ON(ret);
3032 3045
3033 if (split == 2) { 3046 if (split == 2) {
3034 BUG_ON(num_doubles != 0); 3047 BUG_ON(num_doubles != 0);
@@ -3036,7 +3049,7 @@ again:
3036 goto again; 3049 goto again;
3037 } 3050 }
3038 3051
3039 return ret; 3052 return 0;
3040 3053
3041push_for_double: 3054push_for_double:
3042 push_for_double_split(trans, root, path, data_size); 3055 push_for_double_split(trans, root, path, data_size);
@@ -3238,11 +3251,9 @@ int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
3238 return ret; 3251 return ret;
3239 3252
3240 path->slots[0]++; 3253 path->slots[0]++;
3241 ret = setup_items_for_insert(trans, root, path, new_key, &item_size, 3254 setup_items_for_insert(trans, root, path, new_key, &item_size,
3242 item_size, item_size + 3255 item_size, item_size +
3243 sizeof(struct btrfs_item), 1); 3256 sizeof(struct btrfs_item), 1);
3244 BUG_ON(ret);
3245
3246 leaf = path->nodes[0]; 3257 leaf = path->nodes[0];
3247 memcpy_extent_buffer(leaf, 3258 memcpy_extent_buffer(leaf,
3248 btrfs_item_ptr_offset(leaf, path->slots[0]), 3259 btrfs_item_ptr_offset(leaf, path->slots[0]),
@@ -3257,10 +3268,10 @@ int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
3257 * off the end of the item or if we shift the item to chop bytes off 3268 * off the end of the item or if we shift the item to chop bytes off
3258 * the front. 3269 * the front.
3259 */ 3270 */
3260int btrfs_truncate_item(struct btrfs_trans_handle *trans, 3271void btrfs_truncate_item(struct btrfs_trans_handle *trans,
3261 struct btrfs_root *root, 3272 struct btrfs_root *root,
3262 struct btrfs_path *path, 3273 struct btrfs_path *path,
3263 u32 new_size, int from_end) 3274 u32 new_size, int from_end)
3264{ 3275{
3265 int slot; 3276 int slot;
3266 struct extent_buffer *leaf; 3277 struct extent_buffer *leaf;
@@ -3271,13 +3282,16 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans,
3271 unsigned int old_size; 3282 unsigned int old_size;
3272 unsigned int size_diff; 3283 unsigned int size_diff;
3273 int i; 3284 int i;
3285 struct btrfs_map_token token;
3286
3287 btrfs_init_map_token(&token);
3274 3288
3275 leaf = path->nodes[0]; 3289 leaf = path->nodes[0];
3276 slot = path->slots[0]; 3290 slot = path->slots[0];
3277 3291
3278 old_size = btrfs_item_size_nr(leaf, slot); 3292 old_size = btrfs_item_size_nr(leaf, slot);
3279 if (old_size == new_size) 3293 if (old_size == new_size)
3280 return 0; 3294 return;
3281 3295
3282 nritems = btrfs_header_nritems(leaf); 3296 nritems = btrfs_header_nritems(leaf);
3283 data_end = leaf_data_end(root, leaf); 3297 data_end = leaf_data_end(root, leaf);
@@ -3297,8 +3311,9 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans,
3297 u32 ioff; 3311 u32 ioff;
3298 item = btrfs_item_nr(leaf, i); 3312 item = btrfs_item_nr(leaf, i);
3299 3313
3300 ioff = btrfs_item_offset(leaf, item); 3314 ioff = btrfs_token_item_offset(leaf, item, &token);
3301 btrfs_set_item_offset(leaf, item, ioff + size_diff); 3315 btrfs_set_token_item_offset(leaf, item,
3316 ioff + size_diff, &token);
3302 } 3317 }
3303 3318
3304 /* shift the data */ 3319 /* shift the data */
@@ -3350,15 +3365,14 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans,
3350 btrfs_print_leaf(root, leaf); 3365 btrfs_print_leaf(root, leaf);
3351 BUG(); 3366 BUG();
3352 } 3367 }
3353 return 0;
3354} 3368}
3355 3369
3356/* 3370/*
3357 * make the item pointed to by the path bigger, data_size is the new size. 3371 * make the item pointed to by the path bigger, data_size is the new size.
3358 */ 3372 */
3359int btrfs_extend_item(struct btrfs_trans_handle *trans, 3373void btrfs_extend_item(struct btrfs_trans_handle *trans,
3360 struct btrfs_root *root, struct btrfs_path *path, 3374 struct btrfs_root *root, struct btrfs_path *path,
3361 u32 data_size) 3375 u32 data_size)
3362{ 3376{
3363 int slot; 3377 int slot;
3364 struct extent_buffer *leaf; 3378 struct extent_buffer *leaf;
@@ -3368,6 +3382,9 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans,
3368 unsigned int old_data; 3382 unsigned int old_data;
3369 unsigned int old_size; 3383 unsigned int old_size;
3370 int i; 3384 int i;
3385 struct btrfs_map_token token;
3386
3387 btrfs_init_map_token(&token);
3371 3388
3372 leaf = path->nodes[0]; 3389 leaf = path->nodes[0];
3373 3390
@@ -3397,8 +3414,9 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans,
3397 u32 ioff; 3414 u32 ioff;
3398 item = btrfs_item_nr(leaf, i); 3415 item = btrfs_item_nr(leaf, i);
3399 3416
3400 ioff = btrfs_item_offset(leaf, item); 3417 ioff = btrfs_token_item_offset(leaf, item, &token);
3401 btrfs_set_item_offset(leaf, item, ioff - data_size); 3418 btrfs_set_token_item_offset(leaf, item,
3419 ioff - data_size, &token);
3402 } 3420 }
3403 3421
3404 /* shift the data */ 3422 /* shift the data */
@@ -3416,7 +3434,6 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans,
3416 btrfs_print_leaf(root, leaf); 3434 btrfs_print_leaf(root, leaf);
3417 BUG(); 3435 BUG();
3418 } 3436 }
3419 return 0;
3420} 3437}
3421 3438
3422/* 3439/*
@@ -3441,6 +3458,9 @@ int btrfs_insert_some_items(struct btrfs_trans_handle *trans,
3441 unsigned int data_end; 3458 unsigned int data_end;
3442 struct btrfs_disk_key disk_key; 3459 struct btrfs_disk_key disk_key;
3443 struct btrfs_key found_key; 3460 struct btrfs_key found_key;
3461 struct btrfs_map_token token;
3462
3463 btrfs_init_map_token(&token);
3444 3464
3445 for (i = 0; i < nr; i++) { 3465 for (i = 0; i < nr; i++) {
3446 if (total_size + data_size[i] + sizeof(struct btrfs_item) > 3466 if (total_size + data_size[i] + sizeof(struct btrfs_item) >
@@ -3506,8 +3526,9 @@ int btrfs_insert_some_items(struct btrfs_trans_handle *trans,
3506 u32 ioff; 3526 u32 ioff;
3507 3527
3508 item = btrfs_item_nr(leaf, i); 3528 item = btrfs_item_nr(leaf, i);
3509 ioff = btrfs_item_offset(leaf, item); 3529 ioff = btrfs_token_item_offset(leaf, item, &token);
3510 btrfs_set_item_offset(leaf, item, ioff - total_data); 3530 btrfs_set_token_item_offset(leaf, item,
3531 ioff - total_data, &token);
3511 } 3532 }
3512 /* shift the items */ 3533 /* shift the items */
3513 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr), 3534 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr),
@@ -3534,9 +3555,10 @@ int btrfs_insert_some_items(struct btrfs_trans_handle *trans,
3534 btrfs_cpu_key_to_disk(&disk_key, cpu_key + i); 3555 btrfs_cpu_key_to_disk(&disk_key, cpu_key + i);
3535 btrfs_set_item_key(leaf, &disk_key, slot + i); 3556 btrfs_set_item_key(leaf, &disk_key, slot + i);
3536 item = btrfs_item_nr(leaf, slot + i); 3557 item = btrfs_item_nr(leaf, slot + i);
3537 btrfs_set_item_offset(leaf, item, data_end - data_size[i]); 3558 btrfs_set_token_item_offset(leaf, item,
3559 data_end - data_size[i], &token);
3538 data_end -= data_size[i]; 3560 data_end -= data_size[i];
3539 btrfs_set_item_size(leaf, item, data_size[i]); 3561 btrfs_set_token_item_size(leaf, item, data_size[i], &token);
3540 } 3562 }
3541 btrfs_set_header_nritems(leaf, nritems + nr); 3563 btrfs_set_header_nritems(leaf, nritems + nr);
3542 btrfs_mark_buffer_dirty(leaf); 3564 btrfs_mark_buffer_dirty(leaf);
@@ -3544,7 +3566,7 @@ int btrfs_insert_some_items(struct btrfs_trans_handle *trans,
3544 ret = 0; 3566 ret = 0;
3545 if (slot == 0) { 3567 if (slot == 0) {
3546 btrfs_cpu_key_to_disk(&disk_key, cpu_key); 3568 btrfs_cpu_key_to_disk(&disk_key, cpu_key);
3547 ret = fixup_low_keys(trans, root, path, &disk_key, 1); 3569 fixup_low_keys(trans, root, path, &disk_key, 1);
3548 } 3570 }
3549 3571
3550 if (btrfs_leaf_free_space(root, leaf) < 0) { 3572 if (btrfs_leaf_free_space(root, leaf) < 0) {
@@ -3562,19 +3584,21 @@ out:
3562 * to save stack depth by doing the bulk of the work in a function 3584 * to save stack depth by doing the bulk of the work in a function
3563 * that doesn't call btrfs_search_slot 3585 * that doesn't call btrfs_search_slot
3564 */ 3586 */
3565int setup_items_for_insert(struct btrfs_trans_handle *trans, 3587void setup_items_for_insert(struct btrfs_trans_handle *trans,
3566 struct btrfs_root *root, struct btrfs_path *path, 3588 struct btrfs_root *root, struct btrfs_path *path,
3567 struct btrfs_key *cpu_key, u32 *data_size, 3589 struct btrfs_key *cpu_key, u32 *data_size,
3568 u32 total_data, u32 total_size, int nr) 3590 u32 total_data, u32 total_size, int nr)
3569{ 3591{
3570 struct btrfs_item *item; 3592 struct btrfs_item *item;
3571 int i; 3593 int i;
3572 u32 nritems; 3594 u32 nritems;
3573 unsigned int data_end; 3595 unsigned int data_end;
3574 struct btrfs_disk_key disk_key; 3596 struct btrfs_disk_key disk_key;
3575 int ret;
3576 struct extent_buffer *leaf; 3597 struct extent_buffer *leaf;
3577 int slot; 3598 int slot;
3599 struct btrfs_map_token token;
3600
3601 btrfs_init_map_token(&token);
3578 3602
3579 leaf = path->nodes[0]; 3603 leaf = path->nodes[0];
3580 slot = path->slots[0]; 3604 slot = path->slots[0];
@@ -3606,8 +3630,9 @@ int setup_items_for_insert(struct btrfs_trans_handle *trans,
3606 u32 ioff; 3630 u32 ioff;
3607 3631
3608 item = btrfs_item_nr(leaf, i); 3632 item = btrfs_item_nr(leaf, i);
3609 ioff = btrfs_item_offset(leaf, item); 3633 ioff = btrfs_token_item_offset(leaf, item, &token);
3610 btrfs_set_item_offset(leaf, item, ioff - total_data); 3634 btrfs_set_token_item_offset(leaf, item,
3635 ioff - total_data, &token);
3611 } 3636 }
3612 /* shift the items */ 3637 /* shift the items */
3613 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr), 3638 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr),
@@ -3626,17 +3651,17 @@ int setup_items_for_insert(struct btrfs_trans_handle *trans,
3626 btrfs_cpu_key_to_disk(&disk_key, cpu_key + i); 3651 btrfs_cpu_key_to_disk(&disk_key, cpu_key + i);
3627 btrfs_set_item_key(leaf, &disk_key, slot + i); 3652 btrfs_set_item_key(leaf, &disk_key, slot + i);
3628 item = btrfs_item_nr(leaf, slot + i); 3653 item = btrfs_item_nr(leaf, slot + i);
3629 btrfs_set_item_offset(leaf, item, data_end - data_size[i]); 3654 btrfs_set_token_item_offset(leaf, item,
3655 data_end - data_size[i], &token);
3630 data_end -= data_size[i]; 3656 data_end -= data_size[i];
3631 btrfs_set_item_size(leaf, item, data_size[i]); 3657 btrfs_set_token_item_size(leaf, item, data_size[i], &token);
3632 } 3658 }
3633 3659
3634 btrfs_set_header_nritems(leaf, nritems + nr); 3660 btrfs_set_header_nritems(leaf, nritems + nr);
3635 3661
3636 ret = 0;
3637 if (slot == 0) { 3662 if (slot == 0) {
3638 btrfs_cpu_key_to_disk(&disk_key, cpu_key); 3663 btrfs_cpu_key_to_disk(&disk_key, cpu_key);
3639 ret = fixup_low_keys(trans, root, path, &disk_key, 1); 3664 fixup_low_keys(trans, root, path, &disk_key, 1);
3640 } 3665 }
3641 btrfs_unlock_up_safe(path, 1); 3666 btrfs_unlock_up_safe(path, 1);
3642 btrfs_mark_buffer_dirty(leaf); 3667 btrfs_mark_buffer_dirty(leaf);
@@ -3645,7 +3670,6 @@ int setup_items_for_insert(struct btrfs_trans_handle *trans,
3645 btrfs_print_leaf(root, leaf); 3670 btrfs_print_leaf(root, leaf);
3646 BUG(); 3671 BUG();
3647 } 3672 }
3648 return ret;
3649} 3673}
3650 3674
3651/* 3675/*
@@ -3672,16 +3696,14 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
3672 if (ret == 0) 3696 if (ret == 0)
3673 return -EEXIST; 3697 return -EEXIST;
3674 if (ret < 0) 3698 if (ret < 0)
3675 goto out; 3699 return ret;
3676 3700
3677 slot = path->slots[0]; 3701 slot = path->slots[0];
3678 BUG_ON(slot < 0); 3702 BUG_ON(slot < 0);
3679 3703
3680 ret = setup_items_for_insert(trans, root, path, cpu_key, data_size, 3704 setup_items_for_insert(trans, root, path, cpu_key, data_size,
3681 total_data, total_size, nr); 3705 total_data, total_size, nr);
3682 3706 return 0;
3683out:
3684 return ret;
3685} 3707}
3686 3708
3687/* 3709/*
@@ -3717,13 +3739,11 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
3717 * the tree should have been previously balanced so the deletion does not 3739 * the tree should have been previously balanced so the deletion does not
3718 * empty a node. 3740 * empty a node.
3719 */ 3741 */
3720static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, 3742static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3721 struct btrfs_path *path, int level, int slot) 3743 struct btrfs_path *path, int level, int slot)
3722{ 3744{
3723 struct extent_buffer *parent = path->nodes[level]; 3745 struct extent_buffer *parent = path->nodes[level];
3724 u32 nritems; 3746 u32 nritems;
3725 int ret = 0;
3726 int wret;
3727 3747
3728 nritems = btrfs_header_nritems(parent); 3748 nritems = btrfs_header_nritems(parent);
3729 if (slot != nritems - 1) { 3749 if (slot != nritems - 1) {
@@ -3743,12 +3763,9 @@ static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3743 struct btrfs_disk_key disk_key; 3763 struct btrfs_disk_key disk_key;
3744 3764
3745 btrfs_node_key(parent, &disk_key, 0); 3765 btrfs_node_key(parent, &disk_key, 0);
3746 wret = fixup_low_keys(trans, root, path, &disk_key, level + 1); 3766 fixup_low_keys(trans, root, path, &disk_key, level + 1);
3747 if (wret)
3748 ret = wret;
3749 } 3767 }
3750 btrfs_mark_buffer_dirty(parent); 3768 btrfs_mark_buffer_dirty(parent);
3751 return ret;
3752} 3769}
3753 3770
3754/* 3771/*
@@ -3761,17 +3778,13 @@ static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3761 * The path must have already been setup for deleting the leaf, including 3778 * The path must have already been setup for deleting the leaf, including
3762 * all the proper balancing. path->nodes[1] must be locked. 3779 * all the proper balancing. path->nodes[1] must be locked.
3763 */ 3780 */
3764static noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans, 3781static noinline void btrfs_del_leaf(struct btrfs_trans_handle *trans,
3765 struct btrfs_root *root, 3782 struct btrfs_root *root,
3766 struct btrfs_path *path, 3783 struct btrfs_path *path,
3767 struct extent_buffer *leaf) 3784 struct extent_buffer *leaf)
3768{ 3785{
3769 int ret;
3770
3771 WARN_ON(btrfs_header_generation(leaf) != trans->transid); 3786 WARN_ON(btrfs_header_generation(leaf) != trans->transid);
3772 ret = del_ptr(trans, root, path, 1, path->slots[1]); 3787 del_ptr(trans, root, path, 1, path->slots[1]);
3773 if (ret)
3774 return ret;
3775 3788
3776 /* 3789 /*
3777 * btrfs_free_extent is expensive, we want to make sure we 3790 * btrfs_free_extent is expensive, we want to make sure we
@@ -3781,8 +3794,9 @@ static noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans,
3781 3794
3782 root_sub_used(root, leaf->len); 3795 root_sub_used(root, leaf->len);
3783 3796
3797 extent_buffer_get(leaf);
3784 btrfs_free_tree_block(trans, root, leaf, 0, 1, 0); 3798 btrfs_free_tree_block(trans, root, leaf, 0, 1, 0);
3785 return 0; 3799 free_extent_buffer_stale(leaf);
3786} 3800}
3787/* 3801/*
3788 * delete the item at the leaf level in path. If that empties 3802 * delete the item at the leaf level in path. If that empties
@@ -3799,6 +3813,9 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3799 int wret; 3813 int wret;
3800 int i; 3814 int i;
3801 u32 nritems; 3815 u32 nritems;
3816 struct btrfs_map_token token;
3817
3818 btrfs_init_map_token(&token);
3802 3819
3803 leaf = path->nodes[0]; 3820 leaf = path->nodes[0];
3804 last_off = btrfs_item_offset_nr(leaf, slot + nr - 1); 3821 last_off = btrfs_item_offset_nr(leaf, slot + nr - 1);
@@ -3820,8 +3837,9 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3820 u32 ioff; 3837 u32 ioff;
3821 3838
3822 item = btrfs_item_nr(leaf, i); 3839 item = btrfs_item_nr(leaf, i);
3823 ioff = btrfs_item_offset(leaf, item); 3840 ioff = btrfs_token_item_offset(leaf, item, &token);
3824 btrfs_set_item_offset(leaf, item, ioff + dsize); 3841 btrfs_set_token_item_offset(leaf, item,
3842 ioff + dsize, &token);
3825 } 3843 }
3826 3844
3827 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot), 3845 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot),
@@ -3839,8 +3857,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3839 } else { 3857 } else {
3840 btrfs_set_path_blocking(path); 3858 btrfs_set_path_blocking(path);
3841 clean_tree_block(trans, root, leaf); 3859 clean_tree_block(trans, root, leaf);
3842 ret = btrfs_del_leaf(trans, root, path, leaf); 3860 btrfs_del_leaf(trans, root, path, leaf);
3843 BUG_ON(ret);
3844 } 3861 }
3845 } else { 3862 } else {
3846 int used = leaf_space_used(leaf, 0, nritems); 3863 int used = leaf_space_used(leaf, 0, nritems);
@@ -3848,10 +3865,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3848 struct btrfs_disk_key disk_key; 3865 struct btrfs_disk_key disk_key;
3849 3866
3850 btrfs_item_key(leaf, &disk_key, 0); 3867 btrfs_item_key(leaf, &disk_key, 0);
3851 wret = fixup_low_keys(trans, root, path, 3868 fixup_low_keys(trans, root, path, &disk_key, 1);
3852 &disk_key, 1);
3853 if (wret)
3854 ret = wret;
3855 } 3869 }
3856 3870
3857 /* delete the leaf if it is mostly empty */ 3871 /* delete the leaf if it is mostly empty */
@@ -3879,9 +3893,9 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3879 3893
3880 if (btrfs_header_nritems(leaf) == 0) { 3894 if (btrfs_header_nritems(leaf) == 0) {
3881 path->slots[1] = slot; 3895 path->slots[1] = slot;
3882 ret = btrfs_del_leaf(trans, root, path, leaf); 3896 btrfs_del_leaf(trans, root, path, leaf);
3883 BUG_ON(ret);
3884 free_extent_buffer(leaf); 3897 free_extent_buffer(leaf);
3898 ret = 0;
3885 } else { 3899 } else {
3886 /* if we're still in the path, make sure 3900 /* if we're still in the path, make sure
3887 * we're dirty. Otherwise, one of the 3901 * we're dirty. Otherwise, one of the
@@ -4059,18 +4073,18 @@ find_next_key:
4059 path->slots[level] = slot; 4073 path->slots[level] = slot;
4060 if (level == path->lowest_level) { 4074 if (level == path->lowest_level) {
4061 ret = 0; 4075 ret = 0;
4062 unlock_up(path, level, 1); 4076 unlock_up(path, level, 1, 0, NULL);
4063 goto out; 4077 goto out;
4064 } 4078 }
4065 btrfs_set_path_blocking(path); 4079 btrfs_set_path_blocking(path);
4066 cur = read_node_slot(root, cur, slot); 4080 cur = read_node_slot(root, cur, slot);
4067 BUG_ON(!cur); 4081 BUG_ON(!cur); /* -ENOMEM */
4068 4082
4069 btrfs_tree_read_lock(cur); 4083 btrfs_tree_read_lock(cur);
4070 4084
4071 path->locks[level - 1] = BTRFS_READ_LOCK; 4085 path->locks[level - 1] = BTRFS_READ_LOCK;
4072 path->nodes[level - 1] = cur; 4086 path->nodes[level - 1] = cur;
4073 unlock_up(path, level, 1); 4087 unlock_up(path, level, 1, 0, NULL);
4074 btrfs_clear_path_blocking(path, NULL, 0); 4088 btrfs_clear_path_blocking(path, NULL, 0);
4075 } 4089 }
4076out: 4090out:
@@ -4306,7 +4320,7 @@ again:
4306 } 4320 }
4307 ret = 0; 4321 ret = 0;
4308done: 4322done:
4309 unlock_up(path, 0, 1); 4323 unlock_up(path, 0, 1, 0, NULL);
4310 path->leave_spinning = old_spinning; 4324 path->leave_spinning = old_spinning;
4311 if (!old_spinning) 4325 if (!old_spinning)
4312 btrfs_set_path_blocking(path); 4326 btrfs_set_path_blocking(path);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 80b6486fd5e6..5b8ef8eb3521 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -48,6 +48,8 @@ struct btrfs_ordered_sum;
48 48
49#define BTRFS_MAGIC "_BHRfS_M" 49#define BTRFS_MAGIC "_BHRfS_M"
50 50
51#define BTRFS_MAX_MIRRORS 2
52
51#define BTRFS_MAX_LEVEL 8 53#define BTRFS_MAX_LEVEL 8
52 54
53#define BTRFS_COMPAT_EXTENT_TREE_V0 55#define BTRFS_COMPAT_EXTENT_TREE_V0
@@ -138,6 +140,12 @@ struct btrfs_ordered_sum;
138#define BTRFS_EMPTY_SUBVOL_DIR_OBJECTID 2 140#define BTRFS_EMPTY_SUBVOL_DIR_OBJECTID 2
139 141
140/* 142/*
143 * the max metadata block size. This limit is somewhat artificial,
144 * but the memmove costs go through the roof for larger blocks.
145 */
146#define BTRFS_MAX_METADATA_BLOCKSIZE 65536
147
148/*
141 * we can actually store much bigger names, but lets not confuse the rest 149 * we can actually store much bigger names, but lets not confuse the rest
142 * of linux 150 * of linux
143 */ 151 */
@@ -461,6 +469,19 @@ struct btrfs_super_block {
461#define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (1ULL << 1) 469#define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (1ULL << 1)
462#define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS (1ULL << 2) 470#define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS (1ULL << 2)
463#define BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO (1ULL << 3) 471#define BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO (1ULL << 3)
472/*
473 * some patches floated around with a second compression method
474 * lets save that incompat here for when they do get in
475 * Note we don't actually support it, we're just reserving the
476 * number
477 */
478#define BTRFS_FEATURE_INCOMPAT_COMPRESS_LZOv2 (1ULL << 4)
479
480/*
481 * older kernels tried to do bigger metadata blocks, but the
482 * code was pretty buggy. Lets not let them try anymore.
483 */
484#define BTRFS_FEATURE_INCOMPAT_BIG_METADATA (1ULL << 5)
464 485
465#define BTRFS_FEATURE_COMPAT_SUPP 0ULL 486#define BTRFS_FEATURE_COMPAT_SUPP 0ULL
466#define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL 487#define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL
@@ -468,6 +489,7 @@ struct btrfs_super_block {
468 (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \ 489 (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \
469 BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL | \ 490 BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL | \
470 BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS | \ 491 BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS | \
492 BTRFS_FEATURE_INCOMPAT_BIG_METADATA | \
471 BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO) 493 BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO)
472 494
473/* 495/*
@@ -829,6 +851,21 @@ struct btrfs_csum_item {
829 */ 851 */
830#define BTRFS_AVAIL_ALLOC_BIT_SINGLE (1ULL << 48) 852#define BTRFS_AVAIL_ALLOC_BIT_SINGLE (1ULL << 48)
831 853
854#define BTRFS_EXTENDED_PROFILE_MASK (BTRFS_BLOCK_GROUP_PROFILE_MASK | \
855 BTRFS_AVAIL_ALLOC_BIT_SINGLE)
856
857static inline u64 chunk_to_extended(u64 flags)
858{
859 if ((flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0)
860 flags |= BTRFS_AVAIL_ALLOC_BIT_SINGLE;
861
862 return flags;
863}
864static inline u64 extended_to_chunk(u64 flags)
865{
866 return flags & ~BTRFS_AVAIL_ALLOC_BIT_SINGLE;
867}
868
832struct btrfs_block_group_item { 869struct btrfs_block_group_item {
833 __le64 used; 870 __le64 used;
834 __le64 chunk_objectid; 871 __le64 chunk_objectid;
@@ -1503,6 +1540,7 @@ struct btrfs_ioctl_defrag_range_args {
1503#define BTRFS_MOUNT_SKIP_BALANCE (1 << 19) 1540#define BTRFS_MOUNT_SKIP_BALANCE (1 << 19)
1504#define BTRFS_MOUNT_CHECK_INTEGRITY (1 << 20) 1541#define BTRFS_MOUNT_CHECK_INTEGRITY (1 << 20)
1505#define BTRFS_MOUNT_CHECK_INTEGRITY_INCLUDING_EXTENT_DATA (1 << 21) 1542#define BTRFS_MOUNT_CHECK_INTEGRITY_INCLUDING_EXTENT_DATA (1 << 21)
1543#define BTRFS_MOUNT_PANIC_ON_FATAL_ERROR (1 << 22)
1506 1544
1507#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) 1545#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt)
1508#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) 1546#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt)
@@ -1526,6 +1564,17 @@ struct btrfs_ioctl_defrag_range_args {
1526 1564
1527#define BTRFS_INODE_ROOT_ITEM_INIT (1 << 31) 1565#define BTRFS_INODE_ROOT_ITEM_INIT (1 << 31)
1528 1566
1567struct btrfs_map_token {
1568 struct extent_buffer *eb;
1569 char *kaddr;
1570 unsigned long offset;
1571};
1572
1573static inline void btrfs_init_map_token (struct btrfs_map_token *token)
1574{
1575 memset(token, 0, sizeof(*token));
1576}
1577
1529/* some macros to generate set/get funcs for the struct fields. This 1578/* some macros to generate set/get funcs for the struct fields. This
1530 * assumes there is a lefoo_to_cpu for every type, so lets make a simple 1579 * assumes there is a lefoo_to_cpu for every type, so lets make a simple
1531 * one for u8: 1580 * one for u8:
@@ -1549,20 +1598,22 @@ struct btrfs_ioctl_defrag_range_args {
1549#ifndef BTRFS_SETGET_FUNCS 1598#ifndef BTRFS_SETGET_FUNCS
1550#define BTRFS_SETGET_FUNCS(name, type, member, bits) \ 1599#define BTRFS_SETGET_FUNCS(name, type, member, bits) \
1551u##bits btrfs_##name(struct extent_buffer *eb, type *s); \ 1600u##bits btrfs_##name(struct extent_buffer *eb, type *s); \
1601u##bits btrfs_token_##name(struct extent_buffer *eb, type *s, struct btrfs_map_token *token); \
1602void btrfs_set_token_##name(struct extent_buffer *eb, type *s, u##bits val, struct btrfs_map_token *token);\
1552void btrfs_set_##name(struct extent_buffer *eb, type *s, u##bits val); 1603void btrfs_set_##name(struct extent_buffer *eb, type *s, u##bits val);
1553#endif 1604#endif
1554 1605
1555#define BTRFS_SETGET_HEADER_FUNCS(name, type, member, bits) \ 1606#define BTRFS_SETGET_HEADER_FUNCS(name, type, member, bits) \
1556static inline u##bits btrfs_##name(struct extent_buffer *eb) \ 1607static inline u##bits btrfs_##name(struct extent_buffer *eb) \
1557{ \ 1608{ \
1558 type *p = page_address(eb->first_page); \ 1609 type *p = page_address(eb->pages[0]); \
1559 u##bits res = le##bits##_to_cpu(p->member); \ 1610 u##bits res = le##bits##_to_cpu(p->member); \
1560 return res; \ 1611 return res; \
1561} \ 1612} \
1562static inline void btrfs_set_##name(struct extent_buffer *eb, \ 1613static inline void btrfs_set_##name(struct extent_buffer *eb, \
1563 u##bits val) \ 1614 u##bits val) \
1564{ \ 1615{ \
1565 type *p = page_address(eb->first_page); \ 1616 type *p = page_address(eb->pages[0]); \
1566 p->member = cpu_to_le##bits(val); \ 1617 p->member = cpu_to_le##bits(val); \
1567} 1618}
1568 1619
@@ -2466,8 +2517,7 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
2466 struct btrfs_root *root, 2517 struct btrfs_root *root,
2467 u64 num_bytes, u64 min_alloc_size, 2518 u64 num_bytes, u64 min_alloc_size,
2468 u64 empty_size, u64 hint_byte, 2519 u64 empty_size, u64 hint_byte,
2469 u64 search_end, struct btrfs_key *ins, 2520 struct btrfs_key *ins, u64 data);
2470 u64 data);
2471int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 2521int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2472 struct extent_buffer *buf, int full_backref, int for_cow); 2522 struct extent_buffer *buf, int full_backref, int for_cow);
2473int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 2523int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
@@ -2484,8 +2534,8 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
2484int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len); 2534int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len);
2485int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root, 2535int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root,
2486 u64 start, u64 len); 2536 u64 start, u64 len);
2487int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, 2537void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
2488 struct btrfs_root *root); 2538 struct btrfs_root *root);
2489int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, 2539int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
2490 struct btrfs_root *root); 2540 struct btrfs_root *root);
2491int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, 2541int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
@@ -2548,8 +2598,8 @@ void btrfs_block_rsv_release(struct btrfs_root *root,
2548 u64 num_bytes); 2598 u64 num_bytes);
2549int btrfs_set_block_group_ro(struct btrfs_root *root, 2599int btrfs_set_block_group_ro(struct btrfs_root *root,
2550 struct btrfs_block_group_cache *cache); 2600 struct btrfs_block_group_cache *cache);
2551int btrfs_set_block_group_rw(struct btrfs_root *root, 2601void btrfs_set_block_group_rw(struct btrfs_root *root,
2552 struct btrfs_block_group_cache *cache); 2602 struct btrfs_block_group_cache *cache);
2553void btrfs_put_block_group_cache(struct btrfs_fs_info *info); 2603void btrfs_put_block_group_cache(struct btrfs_fs_info *info);
2554u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo); 2604u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo);
2555int btrfs_error_unpin_extent_range(struct btrfs_root *root, 2605int btrfs_error_unpin_extent_range(struct btrfs_root *root,
@@ -2568,9 +2618,9 @@ int btrfs_comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2);
2568int btrfs_previous_item(struct btrfs_root *root, 2618int btrfs_previous_item(struct btrfs_root *root,
2569 struct btrfs_path *path, u64 min_objectid, 2619 struct btrfs_path *path, u64 min_objectid,
2570 int type); 2620 int type);
2571int btrfs_set_item_key_safe(struct btrfs_trans_handle *trans, 2621void btrfs_set_item_key_safe(struct btrfs_trans_handle *trans,
2572 struct btrfs_root *root, struct btrfs_path *path, 2622 struct btrfs_root *root, struct btrfs_path *path,
2573 struct btrfs_key *new_key); 2623 struct btrfs_key *new_key);
2574struct extent_buffer *btrfs_root_node(struct btrfs_root *root); 2624struct extent_buffer *btrfs_root_node(struct btrfs_root *root);
2575struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root); 2625struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root);
2576int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, 2626int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
@@ -2590,12 +2640,13 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
2590 struct extent_buffer **cow_ret, u64 new_root_objectid); 2640 struct extent_buffer **cow_ret, u64 new_root_objectid);
2591int btrfs_block_can_be_shared(struct btrfs_root *root, 2641int btrfs_block_can_be_shared(struct btrfs_root *root,
2592 struct extent_buffer *buf); 2642 struct extent_buffer *buf);
2593int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root 2643void btrfs_extend_item(struct btrfs_trans_handle *trans,
2594 *root, struct btrfs_path *path, u32 data_size); 2644 struct btrfs_root *root, struct btrfs_path *path,
2595int btrfs_truncate_item(struct btrfs_trans_handle *trans, 2645 u32 data_size);
2596 struct btrfs_root *root, 2646void btrfs_truncate_item(struct btrfs_trans_handle *trans,
2597 struct btrfs_path *path, 2647 struct btrfs_root *root,
2598 u32 new_size, int from_end); 2648 struct btrfs_path *path,
2649 u32 new_size, int from_end);
2599int btrfs_split_item(struct btrfs_trans_handle *trans, 2650int btrfs_split_item(struct btrfs_trans_handle *trans,
2600 struct btrfs_root *root, 2651 struct btrfs_root *root,
2601 struct btrfs_path *path, 2652 struct btrfs_path *path,
@@ -2629,10 +2680,10 @@ static inline int btrfs_del_item(struct btrfs_trans_handle *trans,
2629 return btrfs_del_items(trans, root, path, path->slots[0], 1); 2680 return btrfs_del_items(trans, root, path, path->slots[0], 1);
2630} 2681}
2631 2682
2632int setup_items_for_insert(struct btrfs_trans_handle *trans, 2683void setup_items_for_insert(struct btrfs_trans_handle *trans,
2633 struct btrfs_root *root, struct btrfs_path *path, 2684 struct btrfs_root *root, struct btrfs_path *path,
2634 struct btrfs_key *cpu_key, u32 *data_size, 2685 struct btrfs_key *cpu_key, u32 *data_size,
2635 u32 total_data, u32 total_size, int nr); 2686 u32 total_data, u32 total_size, int nr);
2636int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root 2687int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
2637 *root, struct btrfs_key *key, void *data, u32 data_size); 2688 *root, struct btrfs_key *key, void *data, u32 data_size);
2638int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, 2689int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
@@ -2659,9 +2710,9 @@ static inline int btrfs_next_item(struct btrfs_root *root, struct btrfs_path *p)
2659} 2710}
2660int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path); 2711int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path);
2661int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf); 2712int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf);
2662void btrfs_drop_snapshot(struct btrfs_root *root, 2713int __must_check btrfs_drop_snapshot(struct btrfs_root *root,
2663 struct btrfs_block_rsv *block_rsv, int update_ref, 2714 struct btrfs_block_rsv *block_rsv,
2664 int for_reloc); 2715 int update_ref, int for_reloc);
2665int btrfs_drop_subtree(struct btrfs_trans_handle *trans, 2716int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
2666 struct btrfs_root *root, 2717 struct btrfs_root *root,
2667 struct extent_buffer *node, 2718 struct extent_buffer *node,
@@ -2687,24 +2738,6 @@ static inline void free_fs_info(struct btrfs_fs_info *fs_info)
2687 kfree(fs_info->super_for_commit); 2738 kfree(fs_info->super_for_commit);
2688 kfree(fs_info); 2739 kfree(fs_info);
2689} 2740}
2690/**
2691 * profile_is_valid - tests whether a given profile is valid and reduced
2692 * @flags: profile to validate
2693 * @extended: if true @flags is treated as an extended profile
2694 */
2695static inline int profile_is_valid(u64 flags, int extended)
2696{
2697 u64 mask = ~BTRFS_BLOCK_GROUP_PROFILE_MASK;
2698
2699 flags &= ~BTRFS_BLOCK_GROUP_TYPE_MASK;
2700 if (extended)
2701 mask &= ~BTRFS_AVAIL_ALLOC_BIT_SINGLE;
2702
2703 if (flags & mask)
2704 return 0;
2705 /* true if zero or exactly one bit set */
2706 return (flags & (~flags + 1)) == flags;
2707}
2708 2741
2709/* root-item.c */ 2742/* root-item.c */
2710int btrfs_find_root_ref(struct btrfs_root *tree_root, 2743int btrfs_find_root_ref(struct btrfs_root *tree_root,
@@ -2723,9 +2756,10 @@ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2723int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root 2756int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root
2724 *root, struct btrfs_key *key, struct btrfs_root_item 2757 *root, struct btrfs_key *key, struct btrfs_root_item
2725 *item); 2758 *item);
2726int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root 2759int __must_check btrfs_update_root(struct btrfs_trans_handle *trans,
2727 *root, struct btrfs_key *key, struct btrfs_root_item 2760 struct btrfs_root *root,
2728 *item); 2761 struct btrfs_key *key,
2762 struct btrfs_root_item *item);
2729int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct 2763int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct
2730 btrfs_root_item *item, struct btrfs_key *key); 2764 btrfs_root_item *item, struct btrfs_key *key);
2731int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid); 2765int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid);
@@ -2909,7 +2943,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root);
2909void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans, 2943void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans,
2910 struct btrfs_root *root); 2944 struct btrfs_root *root);
2911int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size); 2945int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size);
2912int btrfs_invalidate_inodes(struct btrfs_root *root); 2946void btrfs_invalidate_inodes(struct btrfs_root *root);
2913void btrfs_add_delayed_iput(struct inode *inode); 2947void btrfs_add_delayed_iput(struct inode *inode);
2914void btrfs_run_delayed_iputs(struct btrfs_root *root); 2948void btrfs_run_delayed_iputs(struct btrfs_root *root);
2915int btrfs_prealloc_file_range(struct inode *inode, int mode, 2949int btrfs_prealloc_file_range(struct inode *inode, int mode,
@@ -2961,13 +2995,41 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size);
2961/* super.c */ 2995/* super.c */
2962int btrfs_parse_options(struct btrfs_root *root, char *options); 2996int btrfs_parse_options(struct btrfs_root *root, char *options);
2963int btrfs_sync_fs(struct super_block *sb, int wait); 2997int btrfs_sync_fs(struct super_block *sb, int wait);
2998void btrfs_printk(struct btrfs_fs_info *fs_info, const char *fmt, ...);
2964void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function, 2999void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
2965 unsigned int line, int errno); 3000 unsigned int line, int errno, const char *fmt, ...);
3001
3002void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
3003 struct btrfs_root *root, const char *function,
3004 unsigned int line, int errno);
3005
3006#define btrfs_abort_transaction(trans, root, errno) \
3007do { \
3008 __btrfs_abort_transaction(trans, root, __func__, \
3009 __LINE__, errno); \
3010} while (0)
2966 3011
2967#define btrfs_std_error(fs_info, errno) \ 3012#define btrfs_std_error(fs_info, errno) \
2968do { \ 3013do { \
2969 if ((errno)) \ 3014 if ((errno)) \
2970 __btrfs_std_error((fs_info), __func__, __LINE__, (errno));\ 3015 __btrfs_std_error((fs_info), __func__, \
3016 __LINE__, (errno), NULL); \
3017} while (0)
3018
3019#define btrfs_error(fs_info, errno, fmt, args...) \
3020do { \
3021 __btrfs_std_error((fs_info), __func__, __LINE__, \
3022 (errno), fmt, ##args); \
3023} while (0)
3024
3025void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function,
3026 unsigned int line, int errno, const char *fmt, ...);
3027
3028#define btrfs_panic(fs_info, errno, fmt, args...) \
3029do { \
3030 struct btrfs_fs_info *_i = (fs_info); \
3031 __btrfs_panic(_i, __func__, __LINE__, errno, fmt, ##args); \
3032 BUG_ON(!(_i->mount_opt & BTRFS_MOUNT_PANIC_ON_FATAL_ERROR)); \
2971} while (0) 3033} while (0)
2972 3034
2973/* acl.c */ 3035/* acl.c */
@@ -3003,16 +3065,17 @@ void btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
3003void btrfs_reloc_pre_snapshot(struct btrfs_trans_handle *trans, 3065void btrfs_reloc_pre_snapshot(struct btrfs_trans_handle *trans,
3004 struct btrfs_pending_snapshot *pending, 3066 struct btrfs_pending_snapshot *pending,
3005 u64 *bytes_to_reserve); 3067 u64 *bytes_to_reserve);
3006void btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans, 3068int btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans,
3007 struct btrfs_pending_snapshot *pending); 3069 struct btrfs_pending_snapshot *pending);
3008 3070
3009/* scrub.c */ 3071/* scrub.c */
3010int btrfs_scrub_dev(struct btrfs_root *root, u64 devid, u64 start, u64 end, 3072int btrfs_scrub_dev(struct btrfs_root *root, u64 devid, u64 start, u64 end,
3011 struct btrfs_scrub_progress *progress, int readonly); 3073 struct btrfs_scrub_progress *progress, int readonly);
3012int btrfs_scrub_pause(struct btrfs_root *root); 3074void btrfs_scrub_pause(struct btrfs_root *root);
3013int btrfs_scrub_pause_super(struct btrfs_root *root); 3075void btrfs_scrub_pause_super(struct btrfs_root *root);
3014int btrfs_scrub_continue(struct btrfs_root *root); 3076void btrfs_scrub_continue(struct btrfs_root *root);
3015int btrfs_scrub_continue_super(struct btrfs_root *root); 3077void btrfs_scrub_continue_super(struct btrfs_root *root);
3078int __btrfs_scrub_cancel(struct btrfs_fs_info *info);
3016int btrfs_scrub_cancel(struct btrfs_root *root); 3079int btrfs_scrub_cancel(struct btrfs_root *root);
3017int btrfs_scrub_cancel_dev(struct btrfs_root *root, struct btrfs_device *dev); 3080int btrfs_scrub_cancel_dev(struct btrfs_root *root, struct btrfs_device *dev);
3018int btrfs_scrub_cancel_devid(struct btrfs_root *root, u64 devid); 3081int btrfs_scrub_cancel_devid(struct btrfs_root *root, u64 devid);
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index fe4cd0f1cef1..03e3748d84d0 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -115,6 +115,7 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node(struct inode *inode)
115 return NULL; 115 return NULL;
116} 116}
117 117
118/* Will return either the node or PTR_ERR(-ENOMEM) */
118static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node( 119static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node(
119 struct inode *inode) 120 struct inode *inode)
120{ 121{
@@ -836,10 +837,8 @@ static int btrfs_batch_insert_items(struct btrfs_trans_handle *trans,
836 btrfs_clear_path_blocking(path, NULL, 0); 837 btrfs_clear_path_blocking(path, NULL, 0);
837 838
838 /* insert the keys of the items */ 839 /* insert the keys of the items */
839 ret = setup_items_for_insert(trans, root, path, keys, data_size, 840 setup_items_for_insert(trans, root, path, keys, data_size,
840 total_data_size, total_size, nitems); 841 total_data_size, total_size, nitems);
841 if (ret)
842 goto error;
843 842
844 /* insert the dir index items */ 843 /* insert the dir index items */
845 slot = path->slots[0]; 844 slot = path->slots[0];
@@ -1108,16 +1107,25 @@ static int btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
1108 return 0; 1107 return 0;
1109} 1108}
1110 1109
1111/* Called when committing the transaction. */ 1110/*
1111 * Called when committing the transaction.
1112 * Returns 0 on success.
1113 * Returns < 0 on error and returns with an aborted transaction with any
1114 * outstanding delayed items cleaned up.
1115 */
1112int btrfs_run_delayed_items(struct btrfs_trans_handle *trans, 1116int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
1113 struct btrfs_root *root) 1117 struct btrfs_root *root)
1114{ 1118{
1119 struct btrfs_root *curr_root = root;
1115 struct btrfs_delayed_root *delayed_root; 1120 struct btrfs_delayed_root *delayed_root;
1116 struct btrfs_delayed_node *curr_node, *prev_node; 1121 struct btrfs_delayed_node *curr_node, *prev_node;
1117 struct btrfs_path *path; 1122 struct btrfs_path *path;
1118 struct btrfs_block_rsv *block_rsv; 1123 struct btrfs_block_rsv *block_rsv;
1119 int ret = 0; 1124 int ret = 0;
1120 1125
1126 if (trans->aborted)
1127 return -EIO;
1128
1121 path = btrfs_alloc_path(); 1129 path = btrfs_alloc_path();
1122 if (!path) 1130 if (!path)
1123 return -ENOMEM; 1131 return -ENOMEM;
@@ -1130,17 +1138,18 @@ int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
1130 1138
1131 curr_node = btrfs_first_delayed_node(delayed_root); 1139 curr_node = btrfs_first_delayed_node(delayed_root);
1132 while (curr_node) { 1140 while (curr_node) {
1133 root = curr_node->root; 1141 curr_root = curr_node->root;
1134 ret = btrfs_insert_delayed_items(trans, path, root, 1142 ret = btrfs_insert_delayed_items(trans, path, curr_root,
1135 curr_node); 1143 curr_node);
1136 if (!ret) 1144 if (!ret)
1137 ret = btrfs_delete_delayed_items(trans, path, root, 1145 ret = btrfs_delete_delayed_items(trans, path,
1138 curr_node); 1146 curr_root, curr_node);
1139 if (!ret) 1147 if (!ret)
1140 ret = btrfs_update_delayed_inode(trans, root, path, 1148 ret = btrfs_update_delayed_inode(trans, curr_root,
1141 curr_node); 1149 path, curr_node);
1142 if (ret) { 1150 if (ret) {
1143 btrfs_release_delayed_node(curr_node); 1151 btrfs_release_delayed_node(curr_node);
1152 btrfs_abort_transaction(trans, root, ret);
1144 break; 1153 break;
1145 } 1154 }
1146 1155
@@ -1151,6 +1160,7 @@ int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
1151 1160
1152 btrfs_free_path(path); 1161 btrfs_free_path(path);
1153 trans->block_rsv = block_rsv; 1162 trans->block_rsv = block_rsv;
1163
1154 return ret; 1164 return ret;
1155} 1165}
1156 1166
@@ -1371,6 +1381,7 @@ void btrfs_balance_delayed_items(struct btrfs_root *root)
1371 btrfs_wq_run_delayed_node(delayed_root, root, 0); 1381 btrfs_wq_run_delayed_node(delayed_root, root, 0);
1372} 1382}
1373 1383
1384/* Will return 0 or -ENOMEM */
1374int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans, 1385int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
1375 struct btrfs_root *root, const char *name, 1386 struct btrfs_root *root, const char *name,
1376 int name_len, struct inode *dir, 1387 int name_len, struct inode *dir,
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index 66e4f29505a3..69f22e3ab3bc 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -420,7 +420,7 @@ update_existing_head_ref(struct btrfs_delayed_ref_node *existing,
420 * this does all the dirty work in terms of maintaining the correct 420 * this does all the dirty work in terms of maintaining the correct
421 * overall modification count. 421 * overall modification count.
422 */ 422 */
423static noinline int add_delayed_ref_head(struct btrfs_fs_info *fs_info, 423static noinline void add_delayed_ref_head(struct btrfs_fs_info *fs_info,
424 struct btrfs_trans_handle *trans, 424 struct btrfs_trans_handle *trans,
425 struct btrfs_delayed_ref_node *ref, 425 struct btrfs_delayed_ref_node *ref,
426 u64 bytenr, u64 num_bytes, 426 u64 bytenr, u64 num_bytes,
@@ -487,20 +487,19 @@ static noinline int add_delayed_ref_head(struct btrfs_fs_info *fs_info,
487 * we've updated the existing ref, free the newly 487 * we've updated the existing ref, free the newly
488 * allocated ref 488 * allocated ref
489 */ 489 */
490 kfree(ref); 490 kfree(head_ref);
491 } else { 491 } else {
492 delayed_refs->num_heads++; 492 delayed_refs->num_heads++;
493 delayed_refs->num_heads_ready++; 493 delayed_refs->num_heads_ready++;
494 delayed_refs->num_entries++; 494 delayed_refs->num_entries++;
495 trans->delayed_ref_updates++; 495 trans->delayed_ref_updates++;
496 } 496 }
497 return 0;
498} 497}
499 498
500/* 499/*
501 * helper to insert a delayed tree ref into the rbtree. 500 * helper to insert a delayed tree ref into the rbtree.
502 */ 501 */
503static noinline int add_delayed_tree_ref(struct btrfs_fs_info *fs_info, 502static noinline void add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
504 struct btrfs_trans_handle *trans, 503 struct btrfs_trans_handle *trans,
505 struct btrfs_delayed_ref_node *ref, 504 struct btrfs_delayed_ref_node *ref,
506 u64 bytenr, u64 num_bytes, u64 parent, 505 u64 bytenr, u64 num_bytes, u64 parent,
@@ -549,18 +548,17 @@ static noinline int add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
549 * we've updated the existing ref, free the newly 548 * we've updated the existing ref, free the newly
550 * allocated ref 549 * allocated ref
551 */ 550 */
552 kfree(ref); 551 kfree(full_ref);
553 } else { 552 } else {
554 delayed_refs->num_entries++; 553 delayed_refs->num_entries++;
555 trans->delayed_ref_updates++; 554 trans->delayed_ref_updates++;
556 } 555 }
557 return 0;
558} 556}
559 557
560/* 558/*
561 * helper to insert a delayed data ref into the rbtree. 559 * helper to insert a delayed data ref into the rbtree.
562 */ 560 */
563static noinline int add_delayed_data_ref(struct btrfs_fs_info *fs_info, 561static noinline void add_delayed_data_ref(struct btrfs_fs_info *fs_info,
564 struct btrfs_trans_handle *trans, 562 struct btrfs_trans_handle *trans,
565 struct btrfs_delayed_ref_node *ref, 563 struct btrfs_delayed_ref_node *ref,
566 u64 bytenr, u64 num_bytes, u64 parent, 564 u64 bytenr, u64 num_bytes, u64 parent,
@@ -611,12 +609,11 @@ static noinline int add_delayed_data_ref(struct btrfs_fs_info *fs_info,
611 * we've updated the existing ref, free the newly 609 * we've updated the existing ref, free the newly
612 * allocated ref 610 * allocated ref
613 */ 611 */
614 kfree(ref); 612 kfree(full_ref);
615 } else { 613 } else {
616 delayed_refs->num_entries++; 614 delayed_refs->num_entries++;
617 trans->delayed_ref_updates++; 615 trans->delayed_ref_updates++;
618 } 616 }
619 return 0;
620} 617}
621 618
622/* 619/*
@@ -634,7 +631,6 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
634 struct btrfs_delayed_tree_ref *ref; 631 struct btrfs_delayed_tree_ref *ref;
635 struct btrfs_delayed_ref_head *head_ref; 632 struct btrfs_delayed_ref_head *head_ref;
636 struct btrfs_delayed_ref_root *delayed_refs; 633 struct btrfs_delayed_ref_root *delayed_refs;
637 int ret;
638 634
639 BUG_ON(extent_op && extent_op->is_data); 635 BUG_ON(extent_op && extent_op->is_data);
640 ref = kmalloc(sizeof(*ref), GFP_NOFS); 636 ref = kmalloc(sizeof(*ref), GFP_NOFS);
@@ -656,14 +652,12 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
656 * insert both the head node and the new ref without dropping 652 * insert both the head node and the new ref without dropping
657 * the spin lock 653 * the spin lock
658 */ 654 */
659 ret = add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr, 655 add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr,
660 num_bytes, action, 0); 656 num_bytes, action, 0);
661 BUG_ON(ret);
662 657
663 ret = add_delayed_tree_ref(fs_info, trans, &ref->node, bytenr, 658 add_delayed_tree_ref(fs_info, trans, &ref->node, bytenr,
664 num_bytes, parent, ref_root, level, action, 659 num_bytes, parent, ref_root, level, action,
665 for_cow); 660 for_cow);
666 BUG_ON(ret);
667 if (!need_ref_seq(for_cow, ref_root) && 661 if (!need_ref_seq(for_cow, ref_root) &&
668 waitqueue_active(&delayed_refs->seq_wait)) 662 waitqueue_active(&delayed_refs->seq_wait))
669 wake_up(&delayed_refs->seq_wait); 663 wake_up(&delayed_refs->seq_wait);
@@ -685,7 +679,6 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
685 struct btrfs_delayed_data_ref *ref; 679 struct btrfs_delayed_data_ref *ref;
686 struct btrfs_delayed_ref_head *head_ref; 680 struct btrfs_delayed_ref_head *head_ref;
687 struct btrfs_delayed_ref_root *delayed_refs; 681 struct btrfs_delayed_ref_root *delayed_refs;
688 int ret;
689 682
690 BUG_ON(extent_op && !extent_op->is_data); 683 BUG_ON(extent_op && !extent_op->is_data);
691 ref = kmalloc(sizeof(*ref), GFP_NOFS); 684 ref = kmalloc(sizeof(*ref), GFP_NOFS);
@@ -707,14 +700,12 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
707 * insert both the head node and the new ref without dropping 700 * insert both the head node and the new ref without dropping
708 * the spin lock 701 * the spin lock
709 */ 702 */
710 ret = add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr, 703 add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr,
711 num_bytes, action, 1); 704 num_bytes, action, 1);
712 BUG_ON(ret);
713 705
714 ret = add_delayed_data_ref(fs_info, trans, &ref->node, bytenr, 706 add_delayed_data_ref(fs_info, trans, &ref->node, bytenr,
715 num_bytes, parent, ref_root, owner, offset, 707 num_bytes, parent, ref_root, owner, offset,
716 action, for_cow); 708 action, for_cow);
717 BUG_ON(ret);
718 if (!need_ref_seq(for_cow, ref_root) && 709 if (!need_ref_seq(for_cow, ref_root) &&
719 waitqueue_active(&delayed_refs->seq_wait)) 710 waitqueue_active(&delayed_refs->seq_wait))
720 wake_up(&delayed_refs->seq_wait); 711 wake_up(&delayed_refs->seq_wait);
@@ -729,7 +720,6 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
729{ 720{
730 struct btrfs_delayed_ref_head *head_ref; 721 struct btrfs_delayed_ref_head *head_ref;
731 struct btrfs_delayed_ref_root *delayed_refs; 722 struct btrfs_delayed_ref_root *delayed_refs;
732 int ret;
733 723
734 head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS); 724 head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS);
735 if (!head_ref) 725 if (!head_ref)
@@ -740,10 +730,9 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
740 delayed_refs = &trans->transaction->delayed_refs; 730 delayed_refs = &trans->transaction->delayed_refs;
741 spin_lock(&delayed_refs->lock); 731 spin_lock(&delayed_refs->lock);
742 732
743 ret = add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr, 733 add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr,
744 num_bytes, BTRFS_UPDATE_DELAYED_HEAD, 734 num_bytes, BTRFS_UPDATE_DELAYED_HEAD,
745 extent_op->is_data); 735 extent_op->is_data);
746 BUG_ON(ret);
747 736
748 if (waitqueue_active(&delayed_refs->seq_wait)) 737 if (waitqueue_active(&delayed_refs->seq_wait))
749 wake_up(&delayed_refs->seq_wait); 738 wake_up(&delayed_refs->seq_wait);
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index 31d84e78129b..c1a074d0696f 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -49,9 +49,8 @@ static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle
49 di = btrfs_match_dir_item_name(root, path, name, name_len); 49 di = btrfs_match_dir_item_name(root, path, name, name_len);
50 if (di) 50 if (di)
51 return ERR_PTR(-EEXIST); 51 return ERR_PTR(-EEXIST);
52 ret = btrfs_extend_item(trans, root, path, data_size); 52 btrfs_extend_item(trans, root, path, data_size);
53 } 53 } else if (ret < 0)
54 if (ret < 0)
55 return ERR_PTR(ret); 54 return ERR_PTR(ret);
56 WARN_ON(ret > 0); 55 WARN_ON(ret > 0);
57 leaf = path->nodes[0]; 56 leaf = path->nodes[0];
@@ -116,6 +115,7 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
116 * 'location' is the key to stuff into the directory item, 'type' is the 115 * 'location' is the key to stuff into the directory item, 'type' is the
117 * type of the inode we're pointing to, and 'index' is the sequence number 116 * type of the inode we're pointing to, and 'index' is the sequence number
118 * to use for the second index (if one is created). 117 * to use for the second index (if one is created).
118 * Will return 0 or -ENOMEM
119 */ 119 */
120int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root 120int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root
121 *root, const char *name, int name_len, 121 *root, const char *name, int name_len,
@@ -383,8 +383,8 @@ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans,
383 start = btrfs_item_ptr_offset(leaf, path->slots[0]); 383 start = btrfs_item_ptr_offset(leaf, path->slots[0]);
384 memmove_extent_buffer(leaf, ptr, ptr + sub_item_len, 384 memmove_extent_buffer(leaf, ptr, ptr + sub_item_len,
385 item_len - (ptr + sub_item_len - start)); 385 item_len - (ptr + sub_item_len - start));
386 ret = btrfs_truncate_item(trans, root, path, 386 btrfs_truncate_item(trans, root, path,
387 item_len - sub_item_len, 1); 387 item_len - sub_item_len, 1);
388 } 388 }
389 return ret; 389 return ret;
390} 390}
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 534266fe505f..20196f411206 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -48,20 +48,19 @@
48static struct extent_io_ops btree_extent_io_ops; 48static struct extent_io_ops btree_extent_io_ops;
49static void end_workqueue_fn(struct btrfs_work *work); 49static void end_workqueue_fn(struct btrfs_work *work);
50static void free_fs_root(struct btrfs_root *root); 50static void free_fs_root(struct btrfs_root *root);
51static void btrfs_check_super_valid(struct btrfs_fs_info *fs_info, 51static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
52 int read_only); 52 int read_only);
53static int btrfs_destroy_ordered_operations(struct btrfs_root *root); 53static void btrfs_destroy_ordered_operations(struct btrfs_root *root);
54static int btrfs_destroy_ordered_extents(struct btrfs_root *root); 54static void btrfs_destroy_ordered_extents(struct btrfs_root *root);
55static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, 55static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
56 struct btrfs_root *root); 56 struct btrfs_root *root);
57static int btrfs_destroy_pending_snapshots(struct btrfs_transaction *t); 57static void btrfs_destroy_pending_snapshots(struct btrfs_transaction *t);
58static int btrfs_destroy_delalloc_inodes(struct btrfs_root *root); 58static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root);
59static int btrfs_destroy_marked_extents(struct btrfs_root *root, 59static int btrfs_destroy_marked_extents(struct btrfs_root *root,
60 struct extent_io_tree *dirty_pages, 60 struct extent_io_tree *dirty_pages,
61 int mark); 61 int mark);
62static int btrfs_destroy_pinned_extent(struct btrfs_root *root, 62static int btrfs_destroy_pinned_extent(struct btrfs_root *root,
63 struct extent_io_tree *pinned_extents); 63 struct extent_io_tree *pinned_extents);
64static int btrfs_cleanup_transaction(struct btrfs_root *root);
65 64
66/* 65/*
67 * end_io_wq structs are used to do processing in task context when an IO is 66 * end_io_wq structs are used to do processing in task context when an IO is
@@ -99,6 +98,7 @@ struct async_submit_bio {
99 */ 98 */
100 u64 bio_offset; 99 u64 bio_offset;
101 struct btrfs_work work; 100 struct btrfs_work work;
101 int error;
102}; 102};
103 103
104/* 104/*
@@ -332,8 +332,8 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
332 return 0; 332 return 0;
333 333
334 lock_extent_bits(io_tree, eb->start, eb->start + eb->len - 1, 334 lock_extent_bits(io_tree, eb->start, eb->start + eb->len - 1,
335 0, &cached_state, GFP_NOFS); 335 0, &cached_state);
336 if (extent_buffer_uptodate(io_tree, eb, cached_state) && 336 if (extent_buffer_uptodate(eb) &&
337 btrfs_header_generation(eb) == parent_transid) { 337 btrfs_header_generation(eb) == parent_transid) {
338 ret = 0; 338 ret = 0;
339 goto out; 339 goto out;
@@ -344,7 +344,7 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
344 (unsigned long long)parent_transid, 344 (unsigned long long)parent_transid,
345 (unsigned long long)btrfs_header_generation(eb)); 345 (unsigned long long)btrfs_header_generation(eb));
346 ret = 1; 346 ret = 1;
347 clear_extent_buffer_uptodate(io_tree, eb, &cached_state); 347 clear_extent_buffer_uptodate(eb);
348out: 348out:
349 unlock_extent_cached(io_tree, eb->start, eb->start + eb->len - 1, 349 unlock_extent_cached(io_tree, eb->start, eb->start + eb->len - 1,
350 &cached_state, GFP_NOFS); 350 &cached_state, GFP_NOFS);
@@ -360,9 +360,11 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,
360 u64 start, u64 parent_transid) 360 u64 start, u64 parent_transid)
361{ 361{
362 struct extent_io_tree *io_tree; 362 struct extent_io_tree *io_tree;
363 int failed = 0;
363 int ret; 364 int ret;
364 int num_copies = 0; 365 int num_copies = 0;
365 int mirror_num = 0; 366 int mirror_num = 0;
367 int failed_mirror = 0;
366 368
367 clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags); 369 clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
368 io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; 370 io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree;
@@ -370,9 +372,8 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,
370 ret = read_extent_buffer_pages(io_tree, eb, start, 372 ret = read_extent_buffer_pages(io_tree, eb, start,
371 WAIT_COMPLETE, 373 WAIT_COMPLETE,
372 btree_get_extent, mirror_num); 374 btree_get_extent, mirror_num);
373 if (!ret && 375 if (!ret && !verify_parent_transid(io_tree, eb, parent_transid))
374 !verify_parent_transid(io_tree, eb, parent_transid)) 376 break;
375 return ret;
376 377
377 /* 378 /*
378 * This buffer's crc is fine, but its contents are corrupted, so 379 * This buffer's crc is fine, but its contents are corrupted, so
@@ -380,18 +381,31 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,
380 * any less wrong. 381 * any less wrong.
381 */ 382 */
382 if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags)) 383 if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags))
383 return ret; 384 break;
385
386 if (!failed_mirror) {
387 failed = 1;
388 printk(KERN_ERR "failed mirror was %d\n", eb->failed_mirror);
389 failed_mirror = eb->failed_mirror;
390 }
384 391
385 num_copies = btrfs_num_copies(&root->fs_info->mapping_tree, 392 num_copies = btrfs_num_copies(&root->fs_info->mapping_tree,
386 eb->start, eb->len); 393 eb->start, eb->len);
387 if (num_copies == 1) 394 if (num_copies == 1)
388 return ret; 395 break;
389 396
390 mirror_num++; 397 mirror_num++;
398 if (mirror_num == failed_mirror)
399 mirror_num++;
400
391 if (mirror_num > num_copies) 401 if (mirror_num > num_copies)
392 return ret; 402 break;
393 } 403 }
394 return -EIO; 404
405 if (failed && !ret)
406 repair_eb_io_failure(root, eb, failed_mirror);
407
408 return ret;
395} 409}
396 410
397/* 411/*
@@ -404,50 +418,27 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
404 struct extent_io_tree *tree; 418 struct extent_io_tree *tree;
405 u64 start = (u64)page->index << PAGE_CACHE_SHIFT; 419 u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
406 u64 found_start; 420 u64 found_start;
407 unsigned long len;
408 struct extent_buffer *eb; 421 struct extent_buffer *eb;
409 int ret;
410 422
411 tree = &BTRFS_I(page->mapping->host)->io_tree; 423 tree = &BTRFS_I(page->mapping->host)->io_tree;
412 424
413 if (page->private == EXTENT_PAGE_PRIVATE) { 425 eb = (struct extent_buffer *)page->private;
414 WARN_ON(1); 426 if (page != eb->pages[0])
415 goto out; 427 return 0;
416 }
417 if (!page->private) {
418 WARN_ON(1);
419 goto out;
420 }
421 len = page->private >> 2;
422 WARN_ON(len == 0);
423
424 eb = alloc_extent_buffer(tree, start, len, page);
425 if (eb == NULL) {
426 WARN_ON(1);
427 goto out;
428 }
429 ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE,
430 btrfs_header_generation(eb));
431 BUG_ON(ret);
432 WARN_ON(!btrfs_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN));
433
434 found_start = btrfs_header_bytenr(eb); 428 found_start = btrfs_header_bytenr(eb);
435 if (found_start != start) { 429 if (found_start != start) {
436 WARN_ON(1); 430 WARN_ON(1);
437 goto err; 431 return 0;
438 } 432 }
439 if (eb->first_page != page) { 433 if (eb->pages[0] != page) {
440 WARN_ON(1); 434 WARN_ON(1);
441 goto err; 435 return 0;
442 } 436 }
443 if (!PageUptodate(page)) { 437 if (!PageUptodate(page)) {
444 WARN_ON(1); 438 WARN_ON(1);
445 goto err; 439 return 0;
446 } 440 }
447 csum_tree_block(root, eb, 0); 441 csum_tree_block(root, eb, 0);
448err:
449 free_extent_buffer(eb);
450out:
451 return 0; 442 return 0;
452} 443}
453 444
@@ -537,34 +528,74 @@ static noinline int check_leaf(struct btrfs_root *root,
537 return 0; 528 return 0;
538} 529}
539 530
531struct extent_buffer *find_eb_for_page(struct extent_io_tree *tree,
532 struct page *page, int max_walk)
533{
534 struct extent_buffer *eb;
535 u64 start = page_offset(page);
536 u64 target = start;
537 u64 min_start;
538
539 if (start < max_walk)
540 min_start = 0;
541 else
542 min_start = start - max_walk;
543
544 while (start >= min_start) {
545 eb = find_extent_buffer(tree, start, 0);
546 if (eb) {
547 /*
548 * we found an extent buffer and it contains our page
549 * horray!
550 */
551 if (eb->start <= target &&
552 eb->start + eb->len > target)
553 return eb;
554
555 /* we found an extent buffer that wasn't for us */
556 free_extent_buffer(eb);
557 return NULL;
558 }
559 if (start == 0)
560 break;
561 start -= PAGE_CACHE_SIZE;
562 }
563 return NULL;
564}
565
540static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, 566static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
541 struct extent_state *state) 567 struct extent_state *state)
542{ 568{
543 struct extent_io_tree *tree; 569 struct extent_io_tree *tree;
544 u64 found_start; 570 u64 found_start;
545 int found_level; 571 int found_level;
546 unsigned long len;
547 struct extent_buffer *eb; 572 struct extent_buffer *eb;
548 struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; 573 struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
549 int ret = 0; 574 int ret = 0;
575 int reads_done;
550 576
551 tree = &BTRFS_I(page->mapping->host)->io_tree;
552 if (page->private == EXTENT_PAGE_PRIVATE)
553 goto out;
554 if (!page->private) 577 if (!page->private)
555 goto out; 578 goto out;
556 579
557 len = page->private >> 2; 580 tree = &BTRFS_I(page->mapping->host)->io_tree;
558 WARN_ON(len == 0); 581 eb = (struct extent_buffer *)page->private;
559 582
560 eb = alloc_extent_buffer(tree, start, len, page); 583 /* the pending IO might have been the only thing that kept this buffer
561 if (eb == NULL) { 584 * in memory. Make sure we have a ref for all this other checks
585 */
586 extent_buffer_get(eb);
587
588 reads_done = atomic_dec_and_test(&eb->io_pages);
589 if (!reads_done)
590 goto err;
591
592 if (test_bit(EXTENT_BUFFER_IOERR, &eb->bflags)) {
562 ret = -EIO; 593 ret = -EIO;
563 goto out; 594 goto err;
564 } 595 }
565 596
566 found_start = btrfs_header_bytenr(eb); 597 found_start = btrfs_header_bytenr(eb);
567 if (found_start != start) { 598 if (found_start != eb->start) {
568 printk_ratelimited(KERN_INFO "btrfs bad tree block start " 599 printk_ratelimited(KERN_INFO "btrfs bad tree block start "
569 "%llu %llu\n", 600 "%llu %llu\n",
570 (unsigned long long)found_start, 601 (unsigned long long)found_start,
@@ -572,13 +603,6 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
572 ret = -EIO; 603 ret = -EIO;
573 goto err; 604 goto err;
574 } 605 }
575 if (eb->first_page != page) {
576 printk(KERN_INFO "btrfs bad first page %lu %lu\n",
577 eb->first_page->index, page->index);
578 WARN_ON(1);
579 ret = -EIO;
580 goto err;
581 }
582 if (check_tree_block_fsid(root, eb)) { 606 if (check_tree_block_fsid(root, eb)) {
583 printk_ratelimited(KERN_INFO "btrfs bad fsid on block %llu\n", 607 printk_ratelimited(KERN_INFO "btrfs bad fsid on block %llu\n",
584 (unsigned long long)eb->start); 608 (unsigned long long)eb->start);
@@ -606,48 +630,31 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
606 ret = -EIO; 630 ret = -EIO;
607 } 631 }
608 632
609 end = min_t(u64, eb->len, PAGE_CACHE_SIZE); 633 if (!ret)
610 end = eb->start + end - 1; 634 set_extent_buffer_uptodate(eb);
611err: 635err:
612 if (test_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) { 636 if (test_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) {
613 clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags); 637 clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags);
614 btree_readahead_hook(root, eb, eb->start, ret); 638 btree_readahead_hook(root, eb, eb->start, ret);
615 } 639 }
616 640
641 if (ret)
642 clear_extent_buffer_uptodate(eb);
617 free_extent_buffer(eb); 643 free_extent_buffer(eb);
618out: 644out:
619 return ret; 645 return ret;
620} 646}
621 647
622static int btree_io_failed_hook(struct bio *failed_bio, 648static int btree_io_failed_hook(struct page *page, int failed_mirror)
623 struct page *page, u64 start, u64 end,
624 int mirror_num, struct extent_state *state)
625{ 649{
626 struct extent_io_tree *tree;
627 unsigned long len;
628 struct extent_buffer *eb; 650 struct extent_buffer *eb;
629 struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; 651 struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
630 652
631 tree = &BTRFS_I(page->mapping->host)->io_tree; 653 eb = (struct extent_buffer *)page->private;
632 if (page->private == EXTENT_PAGE_PRIVATE) 654 set_bit(EXTENT_BUFFER_IOERR, &eb->bflags);
633 goto out; 655 eb->failed_mirror = failed_mirror;
634 if (!page->private) 656 if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags))
635 goto out;
636
637 len = page->private >> 2;
638 WARN_ON(len == 0);
639
640 eb = alloc_extent_buffer(tree, start, len, page);
641 if (eb == NULL)
642 goto out;
643
644 if (test_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) {
645 clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags);
646 btree_readahead_hook(root, eb, eb->start, -EIO); 657 btree_readahead_hook(root, eb, eb->start, -EIO);
647 }
648 free_extent_buffer(eb);
649
650out:
651 return -EIO; /* we fixed nothing */ 658 return -EIO; /* we fixed nothing */
652} 659}
653 660
@@ -719,11 +726,14 @@ unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info)
719static void run_one_async_start(struct btrfs_work *work) 726static void run_one_async_start(struct btrfs_work *work)
720{ 727{
721 struct async_submit_bio *async; 728 struct async_submit_bio *async;
729 int ret;
722 730
723 async = container_of(work, struct async_submit_bio, work); 731 async = container_of(work, struct async_submit_bio, work);
724 async->submit_bio_start(async->inode, async->rw, async->bio, 732 ret = async->submit_bio_start(async->inode, async->rw, async->bio,
725 async->mirror_num, async->bio_flags, 733 async->mirror_num, async->bio_flags,
726 async->bio_offset); 734 async->bio_offset);
735 if (ret)
736 async->error = ret;
727} 737}
728 738
729static void run_one_async_done(struct btrfs_work *work) 739static void run_one_async_done(struct btrfs_work *work)
@@ -744,6 +754,12 @@ static void run_one_async_done(struct btrfs_work *work)
744 waitqueue_active(&fs_info->async_submit_wait)) 754 waitqueue_active(&fs_info->async_submit_wait))
745 wake_up(&fs_info->async_submit_wait); 755 wake_up(&fs_info->async_submit_wait);
746 756
757 /* If an error occured we just want to clean up the bio and move on */
758 if (async->error) {
759 bio_endio(async->bio, async->error);
760 return;
761 }
762
747 async->submit_bio_done(async->inode, async->rw, async->bio, 763 async->submit_bio_done(async->inode, async->rw, async->bio,
748 async->mirror_num, async->bio_flags, 764 async->mirror_num, async->bio_flags,
749 async->bio_offset); 765 async->bio_offset);
@@ -785,6 +801,8 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
785 async->bio_flags = bio_flags; 801 async->bio_flags = bio_flags;
786 async->bio_offset = bio_offset; 802 async->bio_offset = bio_offset;
787 803
804 async->error = 0;
805
788 atomic_inc(&fs_info->nr_async_submits); 806 atomic_inc(&fs_info->nr_async_submits);
789 807
790 if (rw & REQ_SYNC) 808 if (rw & REQ_SYNC)
@@ -806,15 +824,18 @@ static int btree_csum_one_bio(struct bio *bio)
806 struct bio_vec *bvec = bio->bi_io_vec; 824 struct bio_vec *bvec = bio->bi_io_vec;
807 int bio_index = 0; 825 int bio_index = 0;
808 struct btrfs_root *root; 826 struct btrfs_root *root;
827 int ret = 0;
809 828
810 WARN_ON(bio->bi_vcnt <= 0); 829 WARN_ON(bio->bi_vcnt <= 0);
811 while (bio_index < bio->bi_vcnt) { 830 while (bio_index < bio->bi_vcnt) {
812 root = BTRFS_I(bvec->bv_page->mapping->host)->root; 831 root = BTRFS_I(bvec->bv_page->mapping->host)->root;
813 csum_dirty_buffer(root, bvec->bv_page); 832 ret = csum_dirty_buffer(root, bvec->bv_page);
833 if (ret)
834 break;
814 bio_index++; 835 bio_index++;
815 bvec++; 836 bvec++;
816 } 837 }
817 return 0; 838 return ret;
818} 839}
819 840
820static int __btree_submit_bio_start(struct inode *inode, int rw, 841static int __btree_submit_bio_start(struct inode *inode, int rw,
@@ -826,8 +847,7 @@ static int __btree_submit_bio_start(struct inode *inode, int rw,
826 * when we're called for a write, we're already in the async 847 * when we're called for a write, we're already in the async
827 * submission context. Just jump into btrfs_map_bio 848 * submission context. Just jump into btrfs_map_bio
828 */ 849 */
829 btree_csum_one_bio(bio); 850 return btree_csum_one_bio(bio);
830 return 0;
831} 851}
832 852
833static int __btree_submit_bio_done(struct inode *inode, int rw, struct bio *bio, 853static int __btree_submit_bio_done(struct inode *inode, int rw, struct bio *bio,
@@ -847,15 +867,16 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
847{ 867{
848 int ret; 868 int ret;
849 869
850 ret = btrfs_bio_wq_end_io(BTRFS_I(inode)->root->fs_info,
851 bio, 1);
852 BUG_ON(ret);
853
854 if (!(rw & REQ_WRITE)) { 870 if (!(rw & REQ_WRITE)) {
871
855 /* 872 /*
856 * called for a read, do the setup so that checksum validation 873 * called for a read, do the setup so that checksum validation
857 * can happen in the async kernel threads 874 * can happen in the async kernel threads
858 */ 875 */
876 ret = btrfs_bio_wq_end_io(BTRFS_I(inode)->root->fs_info,
877 bio, 1);
878 if (ret)
879 return ret;
859 return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, 880 return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio,
860 mirror_num, 0); 881 mirror_num, 0);
861 } 882 }
@@ -893,34 +914,6 @@ static int btree_migratepage(struct address_space *mapping,
893} 914}
894#endif 915#endif
895 916
896static int btree_writepage(struct page *page, struct writeback_control *wbc)
897{
898 struct extent_io_tree *tree;
899 struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
900 struct extent_buffer *eb;
901 int was_dirty;
902
903 tree = &BTRFS_I(page->mapping->host)->io_tree;
904 if (!(current->flags & PF_MEMALLOC)) {
905 return extent_write_full_page(tree, page,
906 btree_get_extent, wbc);
907 }
908
909 redirty_page_for_writepage(wbc, page);
910 eb = btrfs_find_tree_block(root, page_offset(page), PAGE_CACHE_SIZE);
911 WARN_ON(!eb);
912
913 was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags);
914 if (!was_dirty) {
915 spin_lock(&root->fs_info->delalloc_lock);
916 root->fs_info->dirty_metadata_bytes += PAGE_CACHE_SIZE;
917 spin_unlock(&root->fs_info->delalloc_lock);
918 }
919 free_extent_buffer(eb);
920
921 unlock_page(page);
922 return 0;
923}
924 917
925static int btree_writepages(struct address_space *mapping, 918static int btree_writepages(struct address_space *mapping,
926 struct writeback_control *wbc) 919 struct writeback_control *wbc)
@@ -940,7 +933,7 @@ static int btree_writepages(struct address_space *mapping,
940 if (num_dirty < thresh) 933 if (num_dirty < thresh)
941 return 0; 934 return 0;
942 } 935 }
943 return extent_writepages(tree, mapping, btree_get_extent, wbc); 936 return btree_write_cache_pages(mapping, wbc);
944} 937}
945 938
946static int btree_readpage(struct file *file, struct page *page) 939static int btree_readpage(struct file *file, struct page *page)
@@ -952,16 +945,8 @@ static int btree_readpage(struct file *file, struct page *page)
952 945
953static int btree_releasepage(struct page *page, gfp_t gfp_flags) 946static int btree_releasepage(struct page *page, gfp_t gfp_flags)
954{ 947{
955 struct extent_io_tree *tree;
956 struct extent_map_tree *map;
957 int ret;
958
959 if (PageWriteback(page) || PageDirty(page)) 948 if (PageWriteback(page) || PageDirty(page))
960 return 0; 949 return 0;
961
962 tree = &BTRFS_I(page->mapping->host)->io_tree;
963 map = &BTRFS_I(page->mapping->host)->extent_tree;
964
965 /* 950 /*
966 * We need to mask out eg. __GFP_HIGHMEM and __GFP_DMA32 as we're doing 951 * We need to mask out eg. __GFP_HIGHMEM and __GFP_DMA32 as we're doing
967 * slab allocation from alloc_extent_state down the callchain where 952 * slab allocation from alloc_extent_state down the callchain where
@@ -969,18 +954,7 @@ static int btree_releasepage(struct page *page, gfp_t gfp_flags)
969 */ 954 */
970 gfp_flags &= ~GFP_SLAB_BUG_MASK; 955 gfp_flags &= ~GFP_SLAB_BUG_MASK;
971 956
972 ret = try_release_extent_state(map, tree, page, gfp_flags); 957 return try_release_extent_buffer(page, gfp_flags);
973 if (!ret)
974 return 0;
975
976 ret = try_release_extent_buffer(tree, page);
977 if (ret == 1) {
978 ClearPagePrivate(page);
979 set_page_private(page, 0);
980 page_cache_release(page);
981 }
982
983 return ret;
984} 958}
985 959
986static void btree_invalidatepage(struct page *page, unsigned long offset) 960static void btree_invalidatepage(struct page *page, unsigned long offset)
@@ -998,15 +972,28 @@ static void btree_invalidatepage(struct page *page, unsigned long offset)
998 } 972 }
999} 973}
1000 974
975static int btree_set_page_dirty(struct page *page)
976{
977 struct extent_buffer *eb;
978
979 BUG_ON(!PagePrivate(page));
980 eb = (struct extent_buffer *)page->private;
981 BUG_ON(!eb);
982 BUG_ON(!test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
983 BUG_ON(!atomic_read(&eb->refs));
984 btrfs_assert_tree_locked(eb);
985 return __set_page_dirty_nobuffers(page);
986}
987
1001static const struct address_space_operations btree_aops = { 988static const struct address_space_operations btree_aops = {
1002 .readpage = btree_readpage, 989 .readpage = btree_readpage,
1003 .writepage = btree_writepage,
1004 .writepages = btree_writepages, 990 .writepages = btree_writepages,
1005 .releasepage = btree_releasepage, 991 .releasepage = btree_releasepage,
1006 .invalidatepage = btree_invalidatepage, 992 .invalidatepage = btree_invalidatepage,
1007#ifdef CONFIG_MIGRATION 993#ifdef CONFIG_MIGRATION
1008 .migratepage = btree_migratepage, 994 .migratepage = btree_migratepage,
1009#endif 995#endif
996 .set_page_dirty = btree_set_page_dirty,
1010}; 997};
1011 998
1012int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize, 999int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize,
@@ -1049,7 +1036,7 @@ int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize,
1049 if (test_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags)) { 1036 if (test_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags)) {
1050 free_extent_buffer(buf); 1037 free_extent_buffer(buf);
1051 return -EIO; 1038 return -EIO;
1052 } else if (extent_buffer_uptodate(io_tree, buf, NULL)) { 1039 } else if (extent_buffer_uptodate(buf)) {
1053 *eb = buf; 1040 *eb = buf;
1054 } else { 1041 } else {
1055 free_extent_buffer(buf); 1042 free_extent_buffer(buf);
@@ -1074,20 +1061,20 @@ struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
1074 struct extent_buffer *eb; 1061 struct extent_buffer *eb;
1075 1062
1076 eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->io_tree, 1063 eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->io_tree,
1077 bytenr, blocksize, NULL); 1064 bytenr, blocksize);
1078 return eb; 1065 return eb;
1079} 1066}
1080 1067
1081 1068
1082int btrfs_write_tree_block(struct extent_buffer *buf) 1069int btrfs_write_tree_block(struct extent_buffer *buf)
1083{ 1070{
1084 return filemap_fdatawrite_range(buf->first_page->mapping, buf->start, 1071 return filemap_fdatawrite_range(buf->pages[0]->mapping, buf->start,
1085 buf->start + buf->len - 1); 1072 buf->start + buf->len - 1);
1086} 1073}
1087 1074
1088int btrfs_wait_tree_block_writeback(struct extent_buffer *buf) 1075int btrfs_wait_tree_block_writeback(struct extent_buffer *buf)
1089{ 1076{
1090 return filemap_fdatawait_range(buf->first_page->mapping, 1077 return filemap_fdatawait_range(buf->pages[0]->mapping,
1091 buf->start, buf->start + buf->len - 1); 1078 buf->start, buf->start + buf->len - 1);
1092} 1079}
1093 1080
@@ -1102,17 +1089,13 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
1102 return NULL; 1089 return NULL;
1103 1090
1104 ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid); 1091 ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid);
1105
1106 if (ret == 0)
1107 set_bit(EXTENT_BUFFER_UPTODATE, &buf->bflags);
1108 return buf; 1092 return buf;
1109 1093
1110} 1094}
1111 1095
1112int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, 1096void clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1113 struct extent_buffer *buf) 1097 struct extent_buffer *buf)
1114{ 1098{
1115 struct inode *btree_inode = root->fs_info->btree_inode;
1116 if (btrfs_header_generation(buf) == 1099 if (btrfs_header_generation(buf) ==
1117 root->fs_info->running_transaction->transid) { 1100 root->fs_info->running_transaction->transid) {
1118 btrfs_assert_tree_locked(buf); 1101 btrfs_assert_tree_locked(buf);
@@ -1121,23 +1104,27 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1121 spin_lock(&root->fs_info->delalloc_lock); 1104 spin_lock(&root->fs_info->delalloc_lock);
1122 if (root->fs_info->dirty_metadata_bytes >= buf->len) 1105 if (root->fs_info->dirty_metadata_bytes >= buf->len)
1123 root->fs_info->dirty_metadata_bytes -= buf->len; 1106 root->fs_info->dirty_metadata_bytes -= buf->len;
1124 else 1107 else {
1125 WARN_ON(1); 1108 spin_unlock(&root->fs_info->delalloc_lock);
1109 btrfs_panic(root->fs_info, -EOVERFLOW,
1110 "Can't clear %lu bytes from "
1111 " dirty_mdatadata_bytes (%lu)",
1112 buf->len,
1113 root->fs_info->dirty_metadata_bytes);
1114 }
1126 spin_unlock(&root->fs_info->delalloc_lock); 1115 spin_unlock(&root->fs_info->delalloc_lock);
1127 } 1116 }
1128 1117
1129 /* ugh, clear_extent_buffer_dirty needs to lock the page */ 1118 /* ugh, clear_extent_buffer_dirty needs to lock the page */
1130 btrfs_set_lock_blocking(buf); 1119 btrfs_set_lock_blocking(buf);
1131 clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, 1120 clear_extent_buffer_dirty(buf);
1132 buf);
1133 } 1121 }
1134 return 0;
1135} 1122}
1136 1123
1137static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, 1124static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
1138 u32 stripesize, struct btrfs_root *root, 1125 u32 stripesize, struct btrfs_root *root,
1139 struct btrfs_fs_info *fs_info, 1126 struct btrfs_fs_info *fs_info,
1140 u64 objectid) 1127 u64 objectid)
1141{ 1128{
1142 root->node = NULL; 1129 root->node = NULL;
1143 root->commit_root = NULL; 1130 root->commit_root = NULL;
@@ -1189,13 +1176,12 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
1189 root->defrag_running = 0; 1176 root->defrag_running = 0;
1190 root->root_key.objectid = objectid; 1177 root->root_key.objectid = objectid;
1191 root->anon_dev = 0; 1178 root->anon_dev = 0;
1192 return 0;
1193} 1179}
1194 1180
1195static int find_and_setup_root(struct btrfs_root *tree_root, 1181static int __must_check find_and_setup_root(struct btrfs_root *tree_root,
1196 struct btrfs_fs_info *fs_info, 1182 struct btrfs_fs_info *fs_info,
1197 u64 objectid, 1183 u64 objectid,
1198 struct btrfs_root *root) 1184 struct btrfs_root *root)
1199{ 1185{
1200 int ret; 1186 int ret;
1201 u32 blocksize; 1187 u32 blocksize;
@@ -1208,7 +1194,8 @@ static int find_and_setup_root(struct btrfs_root *tree_root,
1208 &root->root_item, &root->root_key); 1194 &root->root_item, &root->root_key);
1209 if (ret > 0) 1195 if (ret > 0)
1210 return -ENOENT; 1196 return -ENOENT;
1211 BUG_ON(ret); 1197 else if (ret < 0)
1198 return ret;
1212 1199
1213 generation = btrfs_root_generation(&root->root_item); 1200 generation = btrfs_root_generation(&root->root_item);
1214 blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); 1201 blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
@@ -1377,7 +1364,7 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
1377 root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), 1364 root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
1378 blocksize, generation); 1365 blocksize, generation);
1379 root->commit_root = btrfs_root_node(root); 1366 root->commit_root = btrfs_root_node(root);
1380 BUG_ON(!root->node); 1367 BUG_ON(!root->node); /* -ENOMEM */
1381out: 1368out:
1382 if (location->objectid != BTRFS_TREE_LOG_OBJECTID) { 1369 if (location->objectid != BTRFS_TREE_LOG_OBJECTID) {
1383 root->ref_cows = 1; 1370 root->ref_cows = 1;
@@ -1513,41 +1500,6 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi)
1513 return 0; 1500 return 0;
1514} 1501}
1515 1502
1516static int bio_ready_for_csum(struct bio *bio)
1517{
1518 u64 length = 0;
1519 u64 buf_len = 0;
1520 u64 start = 0;
1521 struct page *page;
1522 struct extent_io_tree *io_tree = NULL;
1523 struct bio_vec *bvec;
1524 int i;
1525 int ret;
1526
1527 bio_for_each_segment(bvec, bio, i) {
1528 page = bvec->bv_page;
1529 if (page->private == EXTENT_PAGE_PRIVATE) {
1530 length += bvec->bv_len;
1531 continue;
1532 }
1533 if (!page->private) {
1534 length += bvec->bv_len;
1535 continue;
1536 }
1537 length = bvec->bv_len;
1538 buf_len = page->private >> 2;
1539 start = page_offset(page) + bvec->bv_offset;
1540 io_tree = &BTRFS_I(page->mapping->host)->io_tree;
1541 }
1542 /* are we fully contained in this bio? */
1543 if (buf_len <= length)
1544 return 1;
1545
1546 ret = extent_range_uptodate(io_tree, start + length,
1547 start + buf_len - 1);
1548 return ret;
1549}
1550
1551/* 1503/*
1552 * called by the kthread helper functions to finally call the bio end_io 1504 * called by the kthread helper functions to finally call the bio end_io
1553 * functions. This is where read checksum verification actually happens 1505 * functions. This is where read checksum verification actually happens
@@ -1563,17 +1515,6 @@ static void end_workqueue_fn(struct btrfs_work *work)
1563 bio = end_io_wq->bio; 1515 bio = end_io_wq->bio;
1564 fs_info = end_io_wq->info; 1516 fs_info = end_io_wq->info;
1565 1517
1566 /* metadata bio reads are special because the whole tree block must
1567 * be checksummed at once. This makes sure the entire block is in
1568 * ram and up to date before trying to verify things. For
1569 * blocksize <= pagesize, it is basically a noop
1570 */
1571 if (!(bio->bi_rw & REQ_WRITE) && end_io_wq->metadata &&
1572 !bio_ready_for_csum(bio)) {
1573 btrfs_queue_worker(&fs_info->endio_meta_workers,
1574 &end_io_wq->work);
1575 return;
1576 }
1577 error = end_io_wq->error; 1518 error = end_io_wq->error;
1578 bio->bi_private = end_io_wq->private; 1519 bio->bi_private = end_io_wq->private;
1579 bio->bi_end_io = end_io_wq->end_io; 1520 bio->bi_end_io = end_io_wq->end_io;
@@ -1614,9 +1555,10 @@ static int transaction_kthread(void *arg)
1614 u64 transid; 1555 u64 transid;
1615 unsigned long now; 1556 unsigned long now;
1616 unsigned long delay; 1557 unsigned long delay;
1617 int ret; 1558 bool cannot_commit;
1618 1559
1619 do { 1560 do {
1561 cannot_commit = false;
1620 delay = HZ * 30; 1562 delay = HZ * 30;
1621 vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); 1563 vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE);
1622 mutex_lock(&root->fs_info->transaction_kthread_mutex); 1564 mutex_lock(&root->fs_info->transaction_kthread_mutex);
@@ -1638,11 +1580,14 @@ static int transaction_kthread(void *arg)
1638 transid = cur->transid; 1580 transid = cur->transid;
1639 spin_unlock(&root->fs_info->trans_lock); 1581 spin_unlock(&root->fs_info->trans_lock);
1640 1582
1583 /* If the file system is aborted, this will always fail. */
1641 trans = btrfs_join_transaction(root); 1584 trans = btrfs_join_transaction(root);
1642 BUG_ON(IS_ERR(trans)); 1585 if (IS_ERR(trans)) {
1586 cannot_commit = true;
1587 goto sleep;
1588 }
1643 if (transid == trans->transid) { 1589 if (transid == trans->transid) {
1644 ret = btrfs_commit_transaction(trans, root); 1590 btrfs_commit_transaction(trans, root);
1645 BUG_ON(ret);
1646 } else { 1591 } else {
1647 btrfs_end_transaction(trans, root); 1592 btrfs_end_transaction(trans, root);
1648 } 1593 }
@@ -1653,7 +1598,8 @@ sleep:
1653 if (!try_to_freeze()) { 1598 if (!try_to_freeze()) {
1654 set_current_state(TASK_INTERRUPTIBLE); 1599 set_current_state(TASK_INTERRUPTIBLE);
1655 if (!kthread_should_stop() && 1600 if (!kthread_should_stop() &&
1656 !btrfs_transaction_blocked(root->fs_info)) 1601 (!btrfs_transaction_blocked(root->fs_info) ||
1602 cannot_commit))
1657 schedule_timeout(delay); 1603 schedule_timeout(delay);
1658 __set_current_state(TASK_RUNNING); 1604 __set_current_state(TASK_RUNNING);
1659 } 1605 }
@@ -2042,6 +1988,7 @@ int open_ctree(struct super_block *sb,
2042 RB_CLEAR_NODE(&BTRFS_I(fs_info->btree_inode)->rb_node); 1988 RB_CLEAR_NODE(&BTRFS_I(fs_info->btree_inode)->rb_node);
2043 extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree, 1989 extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree,
2044 fs_info->btree_inode->i_mapping); 1990 fs_info->btree_inode->i_mapping);
1991 BTRFS_I(fs_info->btree_inode)->io_tree.track_uptodate = 0;
2045 extent_map_tree_init(&BTRFS_I(fs_info->btree_inode)->extent_tree); 1992 extent_map_tree_init(&BTRFS_I(fs_info->btree_inode)->extent_tree);
2046 1993
2047 BTRFS_I(fs_info->btree_inode)->io_tree.ops = &btree_extent_io_ops; 1994 BTRFS_I(fs_info->btree_inode)->io_tree.ops = &btree_extent_io_ops;
@@ -2084,6 +2031,7 @@ int open_ctree(struct super_block *sb,
2084 __setup_root(4096, 4096, 4096, 4096, tree_root, 2031 __setup_root(4096, 4096, 4096, 4096, tree_root,
2085 fs_info, BTRFS_ROOT_TREE_OBJECTID); 2032 fs_info, BTRFS_ROOT_TREE_OBJECTID);
2086 2033
2034 invalidate_bdev(fs_devices->latest_bdev);
2087 bh = btrfs_read_dev_super(fs_devices->latest_bdev); 2035 bh = btrfs_read_dev_super(fs_devices->latest_bdev);
2088 if (!bh) { 2036 if (!bh) {
2089 err = -EINVAL; 2037 err = -EINVAL;
@@ -2104,7 +2052,12 @@ int open_ctree(struct super_block *sb,
2104 /* check FS state, whether FS is broken. */ 2052 /* check FS state, whether FS is broken. */
2105 fs_info->fs_state |= btrfs_super_flags(disk_super); 2053 fs_info->fs_state |= btrfs_super_flags(disk_super);
2106 2054
2107 btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY); 2055 ret = btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY);
2056 if (ret) {
2057 printk(KERN_ERR "btrfs: superblock contains fatal errors\n");
2058 err = ret;
2059 goto fail_alloc;
2060 }
2108 2061
2109 /* 2062 /*
2110 * run through our array of backup supers and setup 2063 * run through our array of backup supers and setup
@@ -2135,10 +2088,55 @@ int open_ctree(struct super_block *sb,
2135 goto fail_alloc; 2088 goto fail_alloc;
2136 } 2089 }
2137 2090
2091 if (btrfs_super_leafsize(disk_super) !=
2092 btrfs_super_nodesize(disk_super)) {
2093 printk(KERN_ERR "BTRFS: couldn't mount because metadata "
2094 "blocksizes don't match. node %d leaf %d\n",
2095 btrfs_super_nodesize(disk_super),
2096 btrfs_super_leafsize(disk_super));
2097 err = -EINVAL;
2098 goto fail_alloc;
2099 }
2100 if (btrfs_super_leafsize(disk_super) > BTRFS_MAX_METADATA_BLOCKSIZE) {
2101 printk(KERN_ERR "BTRFS: couldn't mount because metadata "
2102 "blocksize (%d) was too large\n",
2103 btrfs_super_leafsize(disk_super));
2104 err = -EINVAL;
2105 goto fail_alloc;
2106 }
2107
2138 features = btrfs_super_incompat_flags(disk_super); 2108 features = btrfs_super_incompat_flags(disk_super);
2139 features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF; 2109 features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF;
2140 if (tree_root->fs_info->compress_type & BTRFS_COMPRESS_LZO) 2110 if (tree_root->fs_info->compress_type & BTRFS_COMPRESS_LZO)
2141 features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; 2111 features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO;
2112
2113 /*
2114 * flag our filesystem as having big metadata blocks if
2115 * they are bigger than the page size
2116 */
2117 if (btrfs_super_leafsize(disk_super) > PAGE_CACHE_SIZE) {
2118 if (!(features & BTRFS_FEATURE_INCOMPAT_BIG_METADATA))
2119 printk(KERN_INFO "btrfs flagging fs with big metadata feature\n");
2120 features |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA;
2121 }
2122
2123 nodesize = btrfs_super_nodesize(disk_super);
2124 leafsize = btrfs_super_leafsize(disk_super);
2125 sectorsize = btrfs_super_sectorsize(disk_super);
2126 stripesize = btrfs_super_stripesize(disk_super);
2127
2128 /*
2129 * mixed block groups end up with duplicate but slightly offset
2130 * extent buffers for the same range. It leads to corruptions
2131 */
2132 if ((features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) &&
2133 (sectorsize != leafsize)) {
2134 printk(KERN_WARNING "btrfs: unequal leaf/node/sector sizes "
2135 "are not allowed for mixed block groups on %s\n",
2136 sb->s_id);
2137 goto fail_alloc;
2138 }
2139
2142 btrfs_set_super_incompat_flags(disk_super, features); 2140 btrfs_set_super_incompat_flags(disk_super, features);
2143 2141
2144 features = btrfs_super_compat_ro_flags(disk_super) & 2142 features = btrfs_super_compat_ro_flags(disk_super) &
@@ -2242,10 +2240,6 @@ int open_ctree(struct super_block *sb,
2242 fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, 2240 fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages,
2243 4 * 1024 * 1024 / PAGE_CACHE_SIZE); 2241 4 * 1024 * 1024 / PAGE_CACHE_SIZE);
2244 2242
2245 nodesize = btrfs_super_nodesize(disk_super);
2246 leafsize = btrfs_super_leafsize(disk_super);
2247 sectorsize = btrfs_super_sectorsize(disk_super);
2248 stripesize = btrfs_super_stripesize(disk_super);
2249 tree_root->nodesize = nodesize; 2243 tree_root->nodesize = nodesize;
2250 tree_root->leafsize = leafsize; 2244 tree_root->leafsize = leafsize;
2251 tree_root->sectorsize = sectorsize; 2245 tree_root->sectorsize = sectorsize;
@@ -2285,7 +2279,7 @@ int open_ctree(struct super_block *sb,
2285 chunk_root->node = read_tree_block(chunk_root, 2279 chunk_root->node = read_tree_block(chunk_root,
2286 btrfs_super_chunk_root(disk_super), 2280 btrfs_super_chunk_root(disk_super),
2287 blocksize, generation); 2281 blocksize, generation);
2288 BUG_ON(!chunk_root->node); 2282 BUG_ON(!chunk_root->node); /* -ENOMEM */
2289 if (!test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) { 2283 if (!test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) {
2290 printk(KERN_WARNING "btrfs: failed to read chunk root on %s\n", 2284 printk(KERN_WARNING "btrfs: failed to read chunk root on %s\n",
2291 sb->s_id); 2285 sb->s_id);
@@ -2425,21 +2419,31 @@ retry_root_backup:
2425 log_tree_root->node = read_tree_block(tree_root, bytenr, 2419 log_tree_root->node = read_tree_block(tree_root, bytenr,
2426 blocksize, 2420 blocksize,
2427 generation + 1); 2421 generation + 1);
2422 /* returns with log_tree_root freed on success */
2428 ret = btrfs_recover_log_trees(log_tree_root); 2423 ret = btrfs_recover_log_trees(log_tree_root);
2429 BUG_ON(ret); 2424 if (ret) {
2425 btrfs_error(tree_root->fs_info, ret,
2426 "Failed to recover log tree");
2427 free_extent_buffer(log_tree_root->node);
2428 kfree(log_tree_root);
2429 goto fail_trans_kthread;
2430 }
2430 2431
2431 if (sb->s_flags & MS_RDONLY) { 2432 if (sb->s_flags & MS_RDONLY) {
2432 ret = btrfs_commit_super(tree_root); 2433 ret = btrfs_commit_super(tree_root);
2433 BUG_ON(ret); 2434 if (ret)
2435 goto fail_trans_kthread;
2434 } 2436 }
2435 } 2437 }
2436 2438
2437 ret = btrfs_find_orphan_roots(tree_root); 2439 ret = btrfs_find_orphan_roots(tree_root);
2438 BUG_ON(ret); 2440 if (ret)
2441 goto fail_trans_kthread;
2439 2442
2440 if (!(sb->s_flags & MS_RDONLY)) { 2443 if (!(sb->s_flags & MS_RDONLY)) {
2441 ret = btrfs_cleanup_fs_roots(fs_info); 2444 ret = btrfs_cleanup_fs_roots(fs_info);
2442 BUG_ON(ret); 2445 if (ret) {
2446 }
2443 2447
2444 ret = btrfs_recover_relocation(tree_root); 2448 ret = btrfs_recover_relocation(tree_root);
2445 if (ret < 0) { 2449 if (ret < 0) {
@@ -2859,6 +2863,8 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors)
2859 if (total_errors > max_errors) { 2863 if (total_errors > max_errors) {
2860 printk(KERN_ERR "btrfs: %d errors while writing supers\n", 2864 printk(KERN_ERR "btrfs: %d errors while writing supers\n",
2861 total_errors); 2865 total_errors);
2866
2867 /* This shouldn't happen. FUA is masked off if unsupported */
2862 BUG(); 2868 BUG();
2863 } 2869 }
2864 2870
@@ -2875,9 +2881,9 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors)
2875 } 2881 }
2876 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); 2882 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
2877 if (total_errors > max_errors) { 2883 if (total_errors > max_errors) {
2878 printk(KERN_ERR "btrfs: %d errors while writing supers\n", 2884 btrfs_error(root->fs_info, -EIO,
2879 total_errors); 2885 "%d errors while writing supers", total_errors);
2880 BUG(); 2886 return -EIO;
2881 } 2887 }
2882 return 0; 2888 return 0;
2883} 2889}
@@ -2891,7 +2897,20 @@ int write_ctree_super(struct btrfs_trans_handle *trans,
2891 return ret; 2897 return ret;
2892} 2898}
2893 2899
2894int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) 2900/* Kill all outstanding I/O */
2901void btrfs_abort_devices(struct btrfs_root *root)
2902{
2903 struct list_head *head;
2904 struct btrfs_device *dev;
2905 mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
2906 head = &root->fs_info->fs_devices->devices;
2907 list_for_each_entry_rcu(dev, head, dev_list) {
2908 blk_abort_queue(dev->bdev->bd_disk->queue);
2909 }
2910 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
2911}
2912
2913void btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
2895{ 2914{
2896 spin_lock(&fs_info->fs_roots_radix_lock); 2915 spin_lock(&fs_info->fs_roots_radix_lock);
2897 radix_tree_delete(&fs_info->fs_roots_radix, 2916 radix_tree_delete(&fs_info->fs_roots_radix,
@@ -2904,7 +2923,6 @@ int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
2904 __btrfs_remove_free_space_cache(root->free_ino_pinned); 2923 __btrfs_remove_free_space_cache(root->free_ino_pinned);
2905 __btrfs_remove_free_space_cache(root->free_ino_ctl); 2924 __btrfs_remove_free_space_cache(root->free_ino_ctl);
2906 free_fs_root(root); 2925 free_fs_root(root);
2907 return 0;
2908} 2926}
2909 2927
2910static void free_fs_root(struct btrfs_root *root) 2928static void free_fs_root(struct btrfs_root *root)
@@ -2921,7 +2939,7 @@ static void free_fs_root(struct btrfs_root *root)
2921 kfree(root); 2939 kfree(root);
2922} 2940}
2923 2941
2924static int del_fs_roots(struct btrfs_fs_info *fs_info) 2942static void del_fs_roots(struct btrfs_fs_info *fs_info)
2925{ 2943{
2926 int ret; 2944 int ret;
2927 struct btrfs_root *gang[8]; 2945 struct btrfs_root *gang[8];
@@ -2950,7 +2968,6 @@ static int del_fs_roots(struct btrfs_fs_info *fs_info)
2950 for (i = 0; i < ret; i++) 2968 for (i = 0; i < ret; i++)
2951 btrfs_free_fs_root(fs_info, gang[i]); 2969 btrfs_free_fs_root(fs_info, gang[i]);
2952 } 2970 }
2953 return 0;
2954} 2971}
2955 2972
2956int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info) 2973int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info)
@@ -2999,14 +3016,21 @@ int btrfs_commit_super(struct btrfs_root *root)
2999 if (IS_ERR(trans)) 3016 if (IS_ERR(trans))
3000 return PTR_ERR(trans); 3017 return PTR_ERR(trans);
3001 ret = btrfs_commit_transaction(trans, root); 3018 ret = btrfs_commit_transaction(trans, root);
3002 BUG_ON(ret); 3019 if (ret)
3020 return ret;
3003 /* run commit again to drop the original snapshot */ 3021 /* run commit again to drop the original snapshot */
3004 trans = btrfs_join_transaction(root); 3022 trans = btrfs_join_transaction(root);
3005 if (IS_ERR(trans)) 3023 if (IS_ERR(trans))
3006 return PTR_ERR(trans); 3024 return PTR_ERR(trans);
3007 btrfs_commit_transaction(trans, root); 3025 ret = btrfs_commit_transaction(trans, root);
3026 if (ret)
3027 return ret;
3008 ret = btrfs_write_and_wait_transaction(NULL, root); 3028 ret = btrfs_write_and_wait_transaction(NULL, root);
3009 BUG_ON(ret); 3029 if (ret) {
3030 btrfs_error(root->fs_info, ret,
3031 "Failed to sync btree inode to disk.");
3032 return ret;
3033 }
3010 3034
3011 ret = write_ctree_super(NULL, root, 0); 3035 ret = write_ctree_super(NULL, root, 0);
3012 return ret; 3036 return ret;
@@ -3122,10 +3146,9 @@ int close_ctree(struct btrfs_root *root)
3122int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid) 3146int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid)
3123{ 3147{
3124 int ret; 3148 int ret;
3125 struct inode *btree_inode = buf->first_page->mapping->host; 3149 struct inode *btree_inode = buf->pages[0]->mapping->host;
3126 3150
3127 ret = extent_buffer_uptodate(&BTRFS_I(btree_inode)->io_tree, buf, 3151 ret = extent_buffer_uptodate(buf);
3128 NULL);
3129 if (!ret) 3152 if (!ret)
3130 return ret; 3153 return ret;
3131 3154
@@ -3136,16 +3159,13 @@ int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid)
3136 3159
3137int btrfs_set_buffer_uptodate(struct extent_buffer *buf) 3160int btrfs_set_buffer_uptodate(struct extent_buffer *buf)
3138{ 3161{
3139 struct inode *btree_inode = buf->first_page->mapping->host; 3162 return set_extent_buffer_uptodate(buf);
3140 return set_extent_buffer_uptodate(&BTRFS_I(btree_inode)->io_tree,
3141 buf);
3142} 3163}
3143 3164
3144void btrfs_mark_buffer_dirty(struct extent_buffer *buf) 3165void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
3145{ 3166{
3146 struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; 3167 struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root;
3147 u64 transid = btrfs_header_generation(buf); 3168 u64 transid = btrfs_header_generation(buf);
3148 struct inode *btree_inode = root->fs_info->btree_inode;
3149 int was_dirty; 3169 int was_dirty;
3150 3170
3151 btrfs_assert_tree_locked(buf); 3171 btrfs_assert_tree_locked(buf);
@@ -3157,8 +3177,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
3157 (unsigned long long)root->fs_info->generation); 3177 (unsigned long long)root->fs_info->generation);
3158 WARN_ON(1); 3178 WARN_ON(1);
3159 } 3179 }
3160 was_dirty = set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, 3180 was_dirty = set_extent_buffer_dirty(buf);
3161 buf);
3162 if (!was_dirty) { 3181 if (!was_dirty) {
3163 spin_lock(&root->fs_info->delalloc_lock); 3182 spin_lock(&root->fs_info->delalloc_lock);
3164 root->fs_info->dirty_metadata_bytes += buf->len; 3183 root->fs_info->dirty_metadata_bytes += buf->len;
@@ -3212,12 +3231,8 @@ void __btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)
3212 3231
3213int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid) 3232int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid)
3214{ 3233{
3215 struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; 3234 struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root;
3216 int ret; 3235 return btree_read_extent_buffer_pages(root, buf, 0, parent_transid);
3217 ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid);
3218 if (ret == 0)
3219 set_bit(EXTENT_BUFFER_UPTODATE, &buf->bflags);
3220 return ret;
3221} 3236}
3222 3237
3223static int btree_lock_page_hook(struct page *page, void *data, 3238static int btree_lock_page_hook(struct page *page, void *data,
@@ -3225,17 +3240,21 @@ static int btree_lock_page_hook(struct page *page, void *data,
3225{ 3240{
3226 struct inode *inode = page->mapping->host; 3241 struct inode *inode = page->mapping->host;
3227 struct btrfs_root *root = BTRFS_I(inode)->root; 3242 struct btrfs_root *root = BTRFS_I(inode)->root;
3228 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
3229 struct extent_buffer *eb; 3243 struct extent_buffer *eb;
3230 unsigned long len;
3231 u64 bytenr = page_offset(page);
3232 3244
3233 if (page->private == EXTENT_PAGE_PRIVATE) 3245 /*
3246 * We culled this eb but the page is still hanging out on the mapping,
3247 * carry on.
3248 */
3249 if (!PagePrivate(page))
3234 goto out; 3250 goto out;
3235 3251
3236 len = page->private >> 2; 3252 eb = (struct extent_buffer *)page->private;
3237 eb = find_extent_buffer(io_tree, bytenr, len); 3253 if (!eb) {
3238 if (!eb) 3254 WARN_ON(1);
3255 goto out;
3256 }
3257 if (page != eb->pages[0])
3239 goto out; 3258 goto out;
3240 3259
3241 if (!btrfs_try_tree_write_lock(eb)) { 3260 if (!btrfs_try_tree_write_lock(eb)) {
@@ -3254,7 +3273,6 @@ static int btree_lock_page_hook(struct page *page, void *data,
3254 } 3273 }
3255 3274
3256 btrfs_tree_unlock(eb); 3275 btrfs_tree_unlock(eb);
3257 free_extent_buffer(eb);
3258out: 3276out:
3259 if (!trylock_page(page)) { 3277 if (!trylock_page(page)) {
3260 flush_fn(data); 3278 flush_fn(data);
@@ -3263,15 +3281,23 @@ out:
3263 return 0; 3281 return 0;
3264} 3282}
3265 3283
3266static void btrfs_check_super_valid(struct btrfs_fs_info *fs_info, 3284static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
3267 int read_only) 3285 int read_only)
3268{ 3286{
3287 if (btrfs_super_csum_type(fs_info->super_copy) >= ARRAY_SIZE(btrfs_csum_sizes)) {
3288 printk(KERN_ERR "btrfs: unsupported checksum algorithm\n");
3289 return -EINVAL;
3290 }
3291
3269 if (read_only) 3292 if (read_only)
3270 return; 3293 return 0;
3271 3294
3272 if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) 3295 if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
3273 printk(KERN_WARNING "warning: mount fs with errors, " 3296 printk(KERN_WARNING "warning: mount fs with errors, "
3274 "running btrfsck is recommended\n"); 3297 "running btrfsck is recommended\n");
3298 }
3299
3300 return 0;
3275} 3301}
3276 3302
3277int btrfs_error_commit_super(struct btrfs_root *root) 3303int btrfs_error_commit_super(struct btrfs_root *root)
@@ -3293,7 +3319,7 @@ int btrfs_error_commit_super(struct btrfs_root *root)
3293 return ret; 3319 return ret;
3294} 3320}
3295 3321
3296static int btrfs_destroy_ordered_operations(struct btrfs_root *root) 3322static void btrfs_destroy_ordered_operations(struct btrfs_root *root)
3297{ 3323{
3298 struct btrfs_inode *btrfs_inode; 3324 struct btrfs_inode *btrfs_inode;
3299 struct list_head splice; 3325 struct list_head splice;
@@ -3315,11 +3341,9 @@ static int btrfs_destroy_ordered_operations(struct btrfs_root *root)
3315 3341
3316 spin_unlock(&root->fs_info->ordered_extent_lock); 3342 spin_unlock(&root->fs_info->ordered_extent_lock);
3317 mutex_unlock(&root->fs_info->ordered_operations_mutex); 3343 mutex_unlock(&root->fs_info->ordered_operations_mutex);
3318
3319 return 0;
3320} 3344}
3321 3345
3322static int btrfs_destroy_ordered_extents(struct btrfs_root *root) 3346static void btrfs_destroy_ordered_extents(struct btrfs_root *root)
3323{ 3347{
3324 struct list_head splice; 3348 struct list_head splice;
3325 struct btrfs_ordered_extent *ordered; 3349 struct btrfs_ordered_extent *ordered;
@@ -3351,12 +3375,10 @@ static int btrfs_destroy_ordered_extents(struct btrfs_root *root)
3351 } 3375 }
3352 3376
3353 spin_unlock(&root->fs_info->ordered_extent_lock); 3377 spin_unlock(&root->fs_info->ordered_extent_lock);
3354
3355 return 0;
3356} 3378}
3357 3379
3358static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, 3380int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
3359 struct btrfs_root *root) 3381 struct btrfs_root *root)
3360{ 3382{
3361 struct rb_node *node; 3383 struct rb_node *node;
3362 struct btrfs_delayed_ref_root *delayed_refs; 3384 struct btrfs_delayed_ref_root *delayed_refs;
@@ -3365,6 +3387,7 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
3365 3387
3366 delayed_refs = &trans->delayed_refs; 3388 delayed_refs = &trans->delayed_refs;
3367 3389
3390again:
3368 spin_lock(&delayed_refs->lock); 3391 spin_lock(&delayed_refs->lock);
3369 if (delayed_refs->num_entries == 0) { 3392 if (delayed_refs->num_entries == 0) {
3370 spin_unlock(&delayed_refs->lock); 3393 spin_unlock(&delayed_refs->lock);
@@ -3386,6 +3409,7 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
3386 struct btrfs_delayed_ref_head *head; 3409 struct btrfs_delayed_ref_head *head;
3387 3410
3388 head = btrfs_delayed_node_to_head(ref); 3411 head = btrfs_delayed_node_to_head(ref);
3412 spin_unlock(&delayed_refs->lock);
3389 mutex_lock(&head->mutex); 3413 mutex_lock(&head->mutex);
3390 kfree(head->extent_op); 3414 kfree(head->extent_op);
3391 delayed_refs->num_heads--; 3415 delayed_refs->num_heads--;
@@ -3393,8 +3417,9 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
3393 delayed_refs->num_heads_ready--; 3417 delayed_refs->num_heads_ready--;
3394 list_del_init(&head->cluster); 3418 list_del_init(&head->cluster);
3395 mutex_unlock(&head->mutex); 3419 mutex_unlock(&head->mutex);
3420 btrfs_put_delayed_ref(ref);
3421 goto again;
3396 } 3422 }
3397
3398 spin_unlock(&delayed_refs->lock); 3423 spin_unlock(&delayed_refs->lock);
3399 btrfs_put_delayed_ref(ref); 3424 btrfs_put_delayed_ref(ref);
3400 3425
@@ -3407,7 +3432,7 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
3407 return ret; 3432 return ret;
3408} 3433}
3409 3434
3410static int btrfs_destroy_pending_snapshots(struct btrfs_transaction *t) 3435static void btrfs_destroy_pending_snapshots(struct btrfs_transaction *t)
3411{ 3436{
3412 struct btrfs_pending_snapshot *snapshot; 3437 struct btrfs_pending_snapshot *snapshot;
3413 struct list_head splice; 3438 struct list_head splice;
@@ -3425,11 +3450,9 @@ static int btrfs_destroy_pending_snapshots(struct btrfs_transaction *t)
3425 3450
3426 kfree(snapshot); 3451 kfree(snapshot);
3427 } 3452 }
3428
3429 return 0;
3430} 3453}
3431 3454
3432static int btrfs_destroy_delalloc_inodes(struct btrfs_root *root) 3455static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root)
3433{ 3456{
3434 struct btrfs_inode *btrfs_inode; 3457 struct btrfs_inode *btrfs_inode;
3435 struct list_head splice; 3458 struct list_head splice;
@@ -3449,8 +3472,6 @@ static int btrfs_destroy_delalloc_inodes(struct btrfs_root *root)
3449 } 3472 }
3450 3473
3451 spin_unlock(&root->fs_info->delalloc_lock); 3474 spin_unlock(&root->fs_info->delalloc_lock);
3452
3453 return 0;
3454} 3475}
3455 3476
3456static int btrfs_destroy_marked_extents(struct btrfs_root *root, 3477static int btrfs_destroy_marked_extents(struct btrfs_root *root,
@@ -3541,13 +3562,43 @@ static int btrfs_destroy_pinned_extent(struct btrfs_root *root,
3541 return 0; 3562 return 0;
3542} 3563}
3543 3564
3544static int btrfs_cleanup_transaction(struct btrfs_root *root) 3565void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
3566 struct btrfs_root *root)
3567{
3568 btrfs_destroy_delayed_refs(cur_trans, root);
3569 btrfs_block_rsv_release(root, &root->fs_info->trans_block_rsv,
3570 cur_trans->dirty_pages.dirty_bytes);
3571
3572 /* FIXME: cleanup wait for commit */
3573 cur_trans->in_commit = 1;
3574 cur_trans->blocked = 1;
3575 if (waitqueue_active(&root->fs_info->transaction_blocked_wait))
3576 wake_up(&root->fs_info->transaction_blocked_wait);
3577
3578 cur_trans->blocked = 0;
3579 if (waitqueue_active(&root->fs_info->transaction_wait))
3580 wake_up(&root->fs_info->transaction_wait);
3581
3582 cur_trans->commit_done = 1;
3583 if (waitqueue_active(&cur_trans->commit_wait))
3584 wake_up(&cur_trans->commit_wait);
3585
3586 btrfs_destroy_pending_snapshots(cur_trans);
3587
3588 btrfs_destroy_marked_extents(root, &cur_trans->dirty_pages,
3589 EXTENT_DIRTY);
3590
3591 /*
3592 memset(cur_trans, 0, sizeof(*cur_trans));
3593 kmem_cache_free(btrfs_transaction_cachep, cur_trans);
3594 */
3595}
3596
3597int btrfs_cleanup_transaction(struct btrfs_root *root)
3545{ 3598{
3546 struct btrfs_transaction *t; 3599 struct btrfs_transaction *t;
3547 LIST_HEAD(list); 3600 LIST_HEAD(list);
3548 3601
3549 WARN_ON(1);
3550
3551 mutex_lock(&root->fs_info->transaction_kthread_mutex); 3602 mutex_lock(&root->fs_info->transaction_kthread_mutex);
3552 3603
3553 spin_lock(&root->fs_info->trans_lock); 3604 spin_lock(&root->fs_info->trans_lock);
@@ -3612,6 +3663,17 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root)
3612 return 0; 3663 return 0;
3613} 3664}
3614 3665
3666static int btree_writepage_io_failed_hook(struct bio *bio, struct page *page,
3667 u64 start, u64 end,
3668 struct extent_state *state)
3669{
3670 struct super_block *sb = page->mapping->host->i_sb;
3671 struct btrfs_fs_info *fs_info = btrfs_sb(sb);
3672 btrfs_error(fs_info, -EIO,
3673 "Error occured while writing out btree at %llu", start);
3674 return -EIO;
3675}
3676
3615static struct extent_io_ops btree_extent_io_ops = { 3677static struct extent_io_ops btree_extent_io_ops = {
3616 .write_cache_pages_lock_hook = btree_lock_page_hook, 3678 .write_cache_pages_lock_hook = btree_lock_page_hook,
3617 .readpage_end_io_hook = btree_readpage_end_io_hook, 3679 .readpage_end_io_hook = btree_readpage_end_io_hook,
@@ -3619,4 +3681,5 @@ static struct extent_io_ops btree_extent_io_ops = {
3619 .submit_bio_hook = btree_submit_bio_hook, 3681 .submit_bio_hook = btree_submit_bio_hook,
3620 /* note we're sharing with inode.c for the merge bio hook */ 3682 /* note we're sharing with inode.c for the merge bio hook */
3621 .merge_bio_hook = btrfs_merge_bio_hook, 3683 .merge_bio_hook = btrfs_merge_bio_hook,
3684 .writepage_io_failed_hook = btree_writepage_io_failed_hook,
3622}; 3685};
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index e4bc4741319b..a7ace1a2dd12 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -44,8 +44,8 @@ int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize,
44 int mirror_num, struct extent_buffer **eb); 44 int mirror_num, struct extent_buffer **eb);
45struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, 45struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
46 u64 bytenr, u32 blocksize); 46 u64 bytenr, u32 blocksize);
47int clean_tree_block(struct btrfs_trans_handle *trans, 47void clean_tree_block(struct btrfs_trans_handle *trans,
48 struct btrfs_root *root, struct extent_buffer *buf); 48 struct btrfs_root *root, struct extent_buffer *buf);
49int open_ctree(struct super_block *sb, 49int open_ctree(struct super_block *sb,
50 struct btrfs_fs_devices *fs_devices, 50 struct btrfs_fs_devices *fs_devices,
51 char *options); 51 char *options);
@@ -64,7 +64,7 @@ struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
64int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info); 64int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info);
65void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr); 65void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr);
66void __btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr); 66void __btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr);
67int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root); 67void btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root);
68void btrfs_mark_buffer_dirty(struct extent_buffer *buf); 68void btrfs_mark_buffer_dirty(struct extent_buffer *buf);
69int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid); 69int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid);
70int btrfs_set_buffer_uptodate(struct extent_buffer *buf); 70int btrfs_set_buffer_uptodate(struct extent_buffer *buf);
@@ -85,6 +85,10 @@ int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
85 struct btrfs_fs_info *fs_info); 85 struct btrfs_fs_info *fs_info);
86int btrfs_add_log_tree(struct btrfs_trans_handle *trans, 86int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
87 struct btrfs_root *root); 87 struct btrfs_root *root);
88int btrfs_cleanup_transaction(struct btrfs_root *root);
89void btrfs_cleanup_one_transaction(struct btrfs_transaction *trans,
90 struct btrfs_root *root);
91void btrfs_abort_devices(struct btrfs_root *root);
88 92
89#ifdef CONFIG_DEBUG_LOCK_ALLOC 93#ifdef CONFIG_DEBUG_LOCK_ALLOC
90void btrfs_init_lockdep(void); 94void btrfs_init_lockdep(void);
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index 5f77166fd01c..e887ee62b6d4 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -193,7 +193,7 @@ static struct dentry *btrfs_get_parent(struct dentry *child)
193 if (ret < 0) 193 if (ret < 0)
194 goto fail; 194 goto fail;
195 195
196 BUG_ON(ret == 0); 196 BUG_ON(ret == 0); /* Key with offset of -1 found */
197 if (path->slots[0] == 0) { 197 if (path->slots[0] == 0) {
198 ret = -ENOENT; 198 ret = -ENOENT;
199 goto fail; 199 goto fail;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 37e0a800d34e..a84420491c11 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -245,7 +245,7 @@ static int exclude_super_stripes(struct btrfs_root *root,
245 cache->bytes_super += stripe_len; 245 cache->bytes_super += stripe_len;
246 ret = add_excluded_extent(root, cache->key.objectid, 246 ret = add_excluded_extent(root, cache->key.objectid,
247 stripe_len); 247 stripe_len);
248 BUG_ON(ret); 248 BUG_ON(ret); /* -ENOMEM */
249 } 249 }
250 250
251 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { 251 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
@@ -253,13 +253,13 @@ static int exclude_super_stripes(struct btrfs_root *root,
253 ret = btrfs_rmap_block(&root->fs_info->mapping_tree, 253 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
254 cache->key.objectid, bytenr, 254 cache->key.objectid, bytenr,
255 0, &logical, &nr, &stripe_len); 255 0, &logical, &nr, &stripe_len);
256 BUG_ON(ret); 256 BUG_ON(ret); /* -ENOMEM */
257 257
258 while (nr--) { 258 while (nr--) {
259 cache->bytes_super += stripe_len; 259 cache->bytes_super += stripe_len;
260 ret = add_excluded_extent(root, logical[nr], 260 ret = add_excluded_extent(root, logical[nr],
261 stripe_len); 261 stripe_len);
262 BUG_ON(ret); 262 BUG_ON(ret); /* -ENOMEM */
263 } 263 }
264 264
265 kfree(logical); 265 kfree(logical);
@@ -321,7 +321,7 @@ static u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
321 total_added += size; 321 total_added += size;
322 ret = btrfs_add_free_space(block_group, start, 322 ret = btrfs_add_free_space(block_group, start,
323 size); 323 size);
324 BUG_ON(ret); 324 BUG_ON(ret); /* -ENOMEM or logic error */
325 start = extent_end + 1; 325 start = extent_end + 1;
326 } else { 326 } else {
327 break; 327 break;
@@ -332,7 +332,7 @@ static u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
332 size = end - start; 332 size = end - start;
333 total_added += size; 333 total_added += size;
334 ret = btrfs_add_free_space(block_group, start, size); 334 ret = btrfs_add_free_space(block_group, start, size);
335 BUG_ON(ret); 335 BUG_ON(ret); /* -ENOMEM or logic error */
336 } 336 }
337 337
338 return total_added; 338 return total_added;
@@ -474,7 +474,8 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
474 int ret = 0; 474 int ret = 0;
475 475
476 caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS); 476 caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS);
477 BUG_ON(!caching_ctl); 477 if (!caching_ctl)
478 return -ENOMEM;
478 479
479 INIT_LIST_HEAD(&caching_ctl->list); 480 INIT_LIST_HEAD(&caching_ctl->list);
480 mutex_init(&caching_ctl->mutex); 481 mutex_init(&caching_ctl->mutex);
@@ -982,7 +983,7 @@ static int convert_extent_item_v0(struct btrfs_trans_handle *trans,
982 ret = btrfs_next_leaf(root, path); 983 ret = btrfs_next_leaf(root, path);
983 if (ret < 0) 984 if (ret < 0)
984 return ret; 985 return ret;
985 BUG_ON(ret > 0); 986 BUG_ON(ret > 0); /* Corruption */
986 leaf = path->nodes[0]; 987 leaf = path->nodes[0];
987 } 988 }
988 btrfs_item_key_to_cpu(leaf, &found_key, 989 btrfs_item_key_to_cpu(leaf, &found_key,
@@ -1008,9 +1009,9 @@ static int convert_extent_item_v0(struct btrfs_trans_handle *trans,
1008 new_size + extra_size, 1); 1009 new_size + extra_size, 1);
1009 if (ret < 0) 1010 if (ret < 0)
1010 return ret; 1011 return ret;
1011 BUG_ON(ret); 1012 BUG_ON(ret); /* Corruption */
1012 1013
1013 ret = btrfs_extend_item(trans, root, path, new_size); 1014 btrfs_extend_item(trans, root, path, new_size);
1014 1015
1015 leaf = path->nodes[0]; 1016 leaf = path->nodes[0];
1016 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1017 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
@@ -1478,7 +1479,11 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
1478 err = ret; 1479 err = ret;
1479 goto out; 1480 goto out;
1480 } 1481 }
1481 BUG_ON(ret); 1482 if (ret && !insert) {
1483 err = -ENOENT;
1484 goto out;
1485 }
1486 BUG_ON(ret); /* Corruption */
1482 1487
1483 leaf = path->nodes[0]; 1488 leaf = path->nodes[0];
1484 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 1489 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
@@ -1592,13 +1597,13 @@ out:
1592 * helper to add new inline back ref 1597 * helper to add new inline back ref
1593 */ 1598 */
1594static noinline_for_stack 1599static noinline_for_stack
1595int setup_inline_extent_backref(struct btrfs_trans_handle *trans, 1600void setup_inline_extent_backref(struct btrfs_trans_handle *trans,
1596 struct btrfs_root *root, 1601 struct btrfs_root *root,
1597 struct btrfs_path *path, 1602 struct btrfs_path *path,
1598 struct btrfs_extent_inline_ref *iref, 1603 struct btrfs_extent_inline_ref *iref,
1599 u64 parent, u64 root_objectid, 1604 u64 parent, u64 root_objectid,
1600 u64 owner, u64 offset, int refs_to_add, 1605 u64 owner, u64 offset, int refs_to_add,
1601 struct btrfs_delayed_extent_op *extent_op) 1606 struct btrfs_delayed_extent_op *extent_op)
1602{ 1607{
1603 struct extent_buffer *leaf; 1608 struct extent_buffer *leaf;
1604 struct btrfs_extent_item *ei; 1609 struct btrfs_extent_item *ei;
@@ -1608,7 +1613,6 @@ int setup_inline_extent_backref(struct btrfs_trans_handle *trans,
1608 u64 refs; 1613 u64 refs;
1609 int size; 1614 int size;
1610 int type; 1615 int type;
1611 int ret;
1612 1616
1613 leaf = path->nodes[0]; 1617 leaf = path->nodes[0];
1614 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1618 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
@@ -1617,7 +1621,7 @@ int setup_inline_extent_backref(struct btrfs_trans_handle *trans,
1617 type = extent_ref_type(parent, owner); 1621 type = extent_ref_type(parent, owner);
1618 size = btrfs_extent_inline_ref_size(type); 1622 size = btrfs_extent_inline_ref_size(type);
1619 1623
1620 ret = btrfs_extend_item(trans, root, path, size); 1624 btrfs_extend_item(trans, root, path, size);
1621 1625
1622 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1626 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1623 refs = btrfs_extent_refs(leaf, ei); 1627 refs = btrfs_extent_refs(leaf, ei);
@@ -1652,7 +1656,6 @@ int setup_inline_extent_backref(struct btrfs_trans_handle *trans,
1652 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid); 1656 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
1653 } 1657 }
1654 btrfs_mark_buffer_dirty(leaf); 1658 btrfs_mark_buffer_dirty(leaf);
1655 return 0;
1656} 1659}
1657 1660
1658static int lookup_extent_backref(struct btrfs_trans_handle *trans, 1661static int lookup_extent_backref(struct btrfs_trans_handle *trans,
@@ -1687,12 +1690,12 @@ static int lookup_extent_backref(struct btrfs_trans_handle *trans,
1687 * helper to update/remove inline back ref 1690 * helper to update/remove inline back ref
1688 */ 1691 */
1689static noinline_for_stack 1692static noinline_for_stack
1690int update_inline_extent_backref(struct btrfs_trans_handle *trans, 1693void update_inline_extent_backref(struct btrfs_trans_handle *trans,
1691 struct btrfs_root *root, 1694 struct btrfs_root *root,
1692 struct btrfs_path *path, 1695 struct btrfs_path *path,
1693 struct btrfs_extent_inline_ref *iref, 1696 struct btrfs_extent_inline_ref *iref,
1694 int refs_to_mod, 1697 int refs_to_mod,
1695 struct btrfs_delayed_extent_op *extent_op) 1698 struct btrfs_delayed_extent_op *extent_op)
1696{ 1699{
1697 struct extent_buffer *leaf; 1700 struct extent_buffer *leaf;
1698 struct btrfs_extent_item *ei; 1701 struct btrfs_extent_item *ei;
@@ -1703,7 +1706,6 @@ int update_inline_extent_backref(struct btrfs_trans_handle *trans,
1703 u32 item_size; 1706 u32 item_size;
1704 int size; 1707 int size;
1705 int type; 1708 int type;
1706 int ret;
1707 u64 refs; 1709 u64 refs;
1708 1710
1709 leaf = path->nodes[0]; 1711 leaf = path->nodes[0];
@@ -1745,10 +1747,9 @@ int update_inline_extent_backref(struct btrfs_trans_handle *trans,
1745 memmove_extent_buffer(leaf, ptr, ptr + size, 1747 memmove_extent_buffer(leaf, ptr, ptr + size,
1746 end - ptr - size); 1748 end - ptr - size);
1747 item_size -= size; 1749 item_size -= size;
1748 ret = btrfs_truncate_item(trans, root, path, item_size, 1); 1750 btrfs_truncate_item(trans, root, path, item_size, 1);
1749 } 1751 }
1750 btrfs_mark_buffer_dirty(leaf); 1752 btrfs_mark_buffer_dirty(leaf);
1751 return 0;
1752} 1753}
1753 1754
1754static noinline_for_stack 1755static noinline_for_stack
@@ -1768,13 +1769,13 @@ int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
1768 root_objectid, owner, offset, 1); 1769 root_objectid, owner, offset, 1);
1769 if (ret == 0) { 1770 if (ret == 0) {
1770 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID); 1771 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID);
1771 ret = update_inline_extent_backref(trans, root, path, iref, 1772 update_inline_extent_backref(trans, root, path, iref,
1772 refs_to_add, extent_op); 1773 refs_to_add, extent_op);
1773 } else if (ret == -ENOENT) { 1774 } else if (ret == -ENOENT) {
1774 ret = setup_inline_extent_backref(trans, root, path, iref, 1775 setup_inline_extent_backref(trans, root, path, iref, parent,
1775 parent, root_objectid, 1776 root_objectid, owner, offset,
1776 owner, offset, refs_to_add, 1777 refs_to_add, extent_op);
1777 extent_op); 1778 ret = 0;
1778 } 1779 }
1779 return ret; 1780 return ret;
1780} 1781}
@@ -1804,12 +1805,12 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans,
1804 struct btrfs_extent_inline_ref *iref, 1805 struct btrfs_extent_inline_ref *iref,
1805 int refs_to_drop, int is_data) 1806 int refs_to_drop, int is_data)
1806{ 1807{
1807 int ret; 1808 int ret = 0;
1808 1809
1809 BUG_ON(!is_data && refs_to_drop != 1); 1810 BUG_ON(!is_data && refs_to_drop != 1);
1810 if (iref) { 1811 if (iref) {
1811 ret = update_inline_extent_backref(trans, root, path, iref, 1812 update_inline_extent_backref(trans, root, path, iref,
1812 -refs_to_drop, NULL); 1813 -refs_to_drop, NULL);
1813 } else if (is_data) { 1814 } else if (is_data) {
1814 ret = remove_extent_data_ref(trans, root, path, refs_to_drop); 1815 ret = remove_extent_data_ref(trans, root, path, refs_to_drop);
1815 } else { 1816 } else {
@@ -1835,6 +1836,7 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
1835 /* Tell the block device(s) that the sectors can be discarded */ 1836 /* Tell the block device(s) that the sectors can be discarded */
1836 ret = btrfs_map_block(&root->fs_info->mapping_tree, REQ_DISCARD, 1837 ret = btrfs_map_block(&root->fs_info->mapping_tree, REQ_DISCARD,
1837 bytenr, &num_bytes, &bbio, 0); 1838 bytenr, &num_bytes, &bbio, 0);
1839 /* Error condition is -ENOMEM */
1838 if (!ret) { 1840 if (!ret) {
1839 struct btrfs_bio_stripe *stripe = bbio->stripes; 1841 struct btrfs_bio_stripe *stripe = bbio->stripes;
1840 int i; 1842 int i;
@@ -1850,7 +1852,7 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
1850 if (!ret) 1852 if (!ret)
1851 discarded_bytes += stripe->length; 1853 discarded_bytes += stripe->length;
1852 else if (ret != -EOPNOTSUPP) 1854 else if (ret != -EOPNOTSUPP)
1853 break; 1855 break; /* Logic errors or -ENOMEM, or -EIO but I don't know how that could happen JDM */
1854 1856
1855 /* 1857 /*
1856 * Just in case we get back EOPNOTSUPP for some reason, 1858 * Just in case we get back EOPNOTSUPP for some reason,
@@ -1869,6 +1871,7 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
1869 return ret; 1871 return ret;
1870} 1872}
1871 1873
1874/* Can return -ENOMEM */
1872int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, 1875int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1873 struct btrfs_root *root, 1876 struct btrfs_root *root,
1874 u64 bytenr, u64 num_bytes, u64 parent, 1877 u64 bytenr, u64 num_bytes, u64 parent,
@@ -1944,7 +1947,8 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1944 ret = insert_extent_backref(trans, root->fs_info->extent_root, 1947 ret = insert_extent_backref(trans, root->fs_info->extent_root,
1945 path, bytenr, parent, root_objectid, 1948 path, bytenr, parent, root_objectid,
1946 owner, offset, refs_to_add); 1949 owner, offset, refs_to_add);
1947 BUG_ON(ret); 1950 if (ret)
1951 btrfs_abort_transaction(trans, root, ret);
1948out: 1952out:
1949 btrfs_free_path(path); 1953 btrfs_free_path(path);
1950 return err; 1954 return err;
@@ -2031,6 +2035,9 @@ static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
2031 int ret; 2035 int ret;
2032 int err = 0; 2036 int err = 0;
2033 2037
2038 if (trans->aborted)
2039 return 0;
2040
2034 path = btrfs_alloc_path(); 2041 path = btrfs_alloc_path();
2035 if (!path) 2042 if (!path)
2036 return -ENOMEM; 2043 return -ENOMEM;
@@ -2128,7 +2135,11 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
2128 struct btrfs_delayed_extent_op *extent_op, 2135 struct btrfs_delayed_extent_op *extent_op,
2129 int insert_reserved) 2136 int insert_reserved)
2130{ 2137{
2131 int ret; 2138 int ret = 0;
2139
2140 if (trans->aborted)
2141 return 0;
2142
2132 if (btrfs_delayed_ref_is_head(node)) { 2143 if (btrfs_delayed_ref_is_head(node)) {
2133 struct btrfs_delayed_ref_head *head; 2144 struct btrfs_delayed_ref_head *head;
2134 /* 2145 /*
@@ -2146,11 +2157,10 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
2146 ret = btrfs_del_csums(trans, root, 2157 ret = btrfs_del_csums(trans, root,
2147 node->bytenr, 2158 node->bytenr,
2148 node->num_bytes); 2159 node->num_bytes);
2149 BUG_ON(ret);
2150 } 2160 }
2151 } 2161 }
2152 mutex_unlock(&head->mutex); 2162 mutex_unlock(&head->mutex);
2153 return 0; 2163 return ret;
2154 } 2164 }
2155 2165
2156 if (node->type == BTRFS_TREE_BLOCK_REF_KEY || 2166 if (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
@@ -2197,6 +2207,10 @@ again:
2197 return NULL; 2207 return NULL;
2198} 2208}
2199 2209
2210/*
2211 * Returns 0 on success or if called with an already aborted transaction.
2212 * Returns -ENOMEM or -EIO on failure and will abort the transaction.
2213 */
2200static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, 2214static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
2201 struct btrfs_root *root, 2215 struct btrfs_root *root,
2202 struct list_head *cluster) 2216 struct list_head *cluster)
@@ -2285,9 +2299,13 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
2285 2299
2286 ret = run_delayed_extent_op(trans, root, 2300 ret = run_delayed_extent_op(trans, root,
2287 ref, extent_op); 2301 ref, extent_op);
2288 BUG_ON(ret);
2289 kfree(extent_op); 2302 kfree(extent_op);
2290 2303
2304 if (ret) {
2305 printk(KERN_DEBUG "btrfs: run_delayed_extent_op returned %d\n", ret);
2306 return ret;
2307 }
2308
2291 goto next; 2309 goto next;
2292 } 2310 }
2293 2311
@@ -2308,11 +2326,16 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
2308 2326
2309 ret = run_one_delayed_ref(trans, root, ref, extent_op, 2327 ret = run_one_delayed_ref(trans, root, ref, extent_op,
2310 must_insert_reserved); 2328 must_insert_reserved);
2311 BUG_ON(ret);
2312 2329
2313 btrfs_put_delayed_ref(ref); 2330 btrfs_put_delayed_ref(ref);
2314 kfree(extent_op); 2331 kfree(extent_op);
2315 count++; 2332 count++;
2333
2334 if (ret) {
2335 printk(KERN_DEBUG "btrfs: run_one_delayed_ref returned %d\n", ret);
2336 return ret;
2337 }
2338
2316next: 2339next:
2317 do_chunk_alloc(trans, root->fs_info->extent_root, 2340 do_chunk_alloc(trans, root->fs_info->extent_root,
2318 2 * 1024 * 1024, 2341 2 * 1024 * 1024,
@@ -2347,6 +2370,9 @@ static void wait_for_more_refs(struct btrfs_delayed_ref_root *delayed_refs,
2347 * 0, which means to process everything in the tree at the start 2370 * 0, which means to process everything in the tree at the start
2348 * of the run (but not newly added entries), or it can be some target 2371 * of the run (but not newly added entries), or it can be some target
2349 * number you'd like to process. 2372 * number you'd like to process.
2373 *
2374 * Returns 0 on success or if called with an aborted transaction
2375 * Returns <0 on error and aborts the transaction
2350 */ 2376 */
2351int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, 2377int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2352 struct btrfs_root *root, unsigned long count) 2378 struct btrfs_root *root, unsigned long count)
@@ -2362,6 +2388,10 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2362 unsigned long num_refs = 0; 2388 unsigned long num_refs = 0;
2363 int consider_waiting; 2389 int consider_waiting;
2364 2390
2391 /* We'll clean this up in btrfs_cleanup_transaction */
2392 if (trans->aborted)
2393 return 0;
2394
2365 if (root == root->fs_info->extent_root) 2395 if (root == root->fs_info->extent_root)
2366 root = root->fs_info->tree_root; 2396 root = root->fs_info->tree_root;
2367 2397
@@ -2419,7 +2449,11 @@ again:
2419 } 2449 }
2420 2450
2421 ret = run_clustered_refs(trans, root, &cluster); 2451 ret = run_clustered_refs(trans, root, &cluster);
2422 BUG_ON(ret < 0); 2452 if (ret < 0) {
2453 spin_unlock(&delayed_refs->lock);
2454 btrfs_abort_transaction(trans, root, ret);
2455 return ret;
2456 }
2423 2457
2424 count -= min_t(unsigned long, ret, count); 2458 count -= min_t(unsigned long, ret, count);
2425 2459
@@ -2584,7 +2618,7 @@ static noinline int check_committed_ref(struct btrfs_trans_handle *trans,
2584 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); 2618 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
2585 if (ret < 0) 2619 if (ret < 0)
2586 goto out; 2620 goto out;
2587 BUG_ON(ret == 0); 2621 BUG_ON(ret == 0); /* Corruption */
2588 2622
2589 ret = -ENOENT; 2623 ret = -ENOENT;
2590 if (path->slots[0] == 0) 2624 if (path->slots[0] == 0)
@@ -2738,7 +2772,6 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
2738 } 2772 }
2739 return 0; 2773 return 0;
2740fail: 2774fail:
2741 BUG();
2742 return ret; 2775 return ret;
2743} 2776}
2744 2777
@@ -2767,7 +2800,7 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans,
2767 ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1); 2800 ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
2768 if (ret < 0) 2801 if (ret < 0)
2769 goto fail; 2802 goto fail;
2770 BUG_ON(ret); 2803 BUG_ON(ret); /* Corruption */
2771 2804
2772 leaf = path->nodes[0]; 2805 leaf = path->nodes[0];
2773 bi = btrfs_item_ptr_offset(leaf, path->slots[0]); 2806 bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
@@ -2775,8 +2808,10 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans,
2775 btrfs_mark_buffer_dirty(leaf); 2808 btrfs_mark_buffer_dirty(leaf);
2776 btrfs_release_path(path); 2809 btrfs_release_path(path);
2777fail: 2810fail:
2778 if (ret) 2811 if (ret) {
2812 btrfs_abort_transaction(trans, root, ret);
2779 return ret; 2813 return ret;
2814 }
2780 return 0; 2815 return 0;
2781 2816
2782} 2817}
@@ -2949,7 +2984,8 @@ again:
2949 if (last == 0) { 2984 if (last == 0) {
2950 err = btrfs_run_delayed_refs(trans, root, 2985 err = btrfs_run_delayed_refs(trans, root,
2951 (unsigned long)-1); 2986 (unsigned long)-1);
2952 BUG_ON(err); 2987 if (err) /* File system offline */
2988 goto out;
2953 } 2989 }
2954 2990
2955 cache = btrfs_lookup_first_block_group(root->fs_info, last); 2991 cache = btrfs_lookup_first_block_group(root->fs_info, last);
@@ -2976,7 +3012,9 @@ again:
2976 last = cache->key.objectid + cache->key.offset; 3012 last = cache->key.objectid + cache->key.offset;
2977 3013
2978 err = write_one_cache_group(trans, root, path, cache); 3014 err = write_one_cache_group(trans, root, path, cache);
2979 BUG_ON(err); 3015 if (err) /* File system offline */
3016 goto out;
3017
2980 btrfs_put_block_group(cache); 3018 btrfs_put_block_group(cache);
2981 } 3019 }
2982 3020
@@ -2989,7 +3027,8 @@ again:
2989 if (last == 0) { 3027 if (last == 0) {
2990 err = btrfs_run_delayed_refs(trans, root, 3028 err = btrfs_run_delayed_refs(trans, root,
2991 (unsigned long)-1); 3029 (unsigned long)-1);
2992 BUG_ON(err); 3030 if (err) /* File system offline */
3031 goto out;
2993 } 3032 }
2994 3033
2995 cache = btrfs_lookup_first_block_group(root->fs_info, last); 3034 cache = btrfs_lookup_first_block_group(root->fs_info, last);
@@ -3014,20 +3053,21 @@ again:
3014 continue; 3053 continue;
3015 } 3054 }
3016 3055
3017 btrfs_write_out_cache(root, trans, cache, path); 3056 err = btrfs_write_out_cache(root, trans, cache, path);
3018 3057
3019 /* 3058 /*
3020 * If we didn't have an error then the cache state is still 3059 * If we didn't have an error then the cache state is still
3021 * NEED_WRITE, so we can set it to WRITTEN. 3060 * NEED_WRITE, so we can set it to WRITTEN.
3022 */ 3061 */
3023 if (cache->disk_cache_state == BTRFS_DC_NEED_WRITE) 3062 if (!err && cache->disk_cache_state == BTRFS_DC_NEED_WRITE)
3024 cache->disk_cache_state = BTRFS_DC_WRITTEN; 3063 cache->disk_cache_state = BTRFS_DC_WRITTEN;
3025 last = cache->key.objectid + cache->key.offset; 3064 last = cache->key.objectid + cache->key.offset;
3026 btrfs_put_block_group(cache); 3065 btrfs_put_block_group(cache);
3027 } 3066 }
3067out:
3028 3068
3029 btrfs_free_path(path); 3069 btrfs_free_path(path);
3030 return 0; 3070 return err;
3031} 3071}
3032 3072
3033int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr) 3073int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr)
@@ -3098,11 +3138,8 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
3098 3138
3099static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) 3139static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
3100{ 3140{
3101 u64 extra_flags = flags & BTRFS_BLOCK_GROUP_PROFILE_MASK; 3141 u64 extra_flags = chunk_to_extended(flags) &
3102 3142 BTRFS_EXTENDED_PROFILE_MASK;
3103 /* chunk -> extended profile */
3104 if (extra_flags == 0)
3105 extra_flags = BTRFS_AVAIL_ALLOC_BIT_SINGLE;
3106 3143
3107 if (flags & BTRFS_BLOCK_GROUP_DATA) 3144 if (flags & BTRFS_BLOCK_GROUP_DATA)
3108 fs_info->avail_data_alloc_bits |= extra_flags; 3145 fs_info->avail_data_alloc_bits |= extra_flags;
@@ -3113,6 +3150,35 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
3113} 3150}
3114 3151
3115/* 3152/*
3153 * returns target flags in extended format or 0 if restripe for this
3154 * chunk_type is not in progress
3155 */
3156static u64 get_restripe_target(struct btrfs_fs_info *fs_info, u64 flags)
3157{
3158 struct btrfs_balance_control *bctl = fs_info->balance_ctl;
3159 u64 target = 0;
3160
3161 BUG_ON(!mutex_is_locked(&fs_info->volume_mutex) &&
3162 !spin_is_locked(&fs_info->balance_lock));
3163
3164 if (!bctl)
3165 return 0;
3166
3167 if (flags & BTRFS_BLOCK_GROUP_DATA &&
3168 bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) {
3169 target = BTRFS_BLOCK_GROUP_DATA | bctl->data.target;
3170 } else if (flags & BTRFS_BLOCK_GROUP_SYSTEM &&
3171 bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) {
3172 target = BTRFS_BLOCK_GROUP_SYSTEM | bctl->sys.target;
3173 } else if (flags & BTRFS_BLOCK_GROUP_METADATA &&
3174 bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) {
3175 target = BTRFS_BLOCK_GROUP_METADATA | bctl->meta.target;
3176 }
3177
3178 return target;
3179}
3180
3181/*
3116 * @flags: available profiles in extended format (see ctree.h) 3182 * @flags: available profiles in extended format (see ctree.h)
3117 * 3183 *
3118 * Returns reduced profile in chunk format. If profile changing is in 3184 * Returns reduced profile in chunk format. If profile changing is in
@@ -3128,31 +3194,19 @@ u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
3128 */ 3194 */
3129 u64 num_devices = root->fs_info->fs_devices->rw_devices + 3195 u64 num_devices = root->fs_info->fs_devices->rw_devices +
3130 root->fs_info->fs_devices->missing_devices; 3196 root->fs_info->fs_devices->missing_devices;
3197 u64 target;
3131 3198
3132 /* pick restriper's target profile if it's available */ 3199 /*
3200 * see if restripe for this chunk_type is in progress, if so
3201 * try to reduce to the target profile
3202 */
3133 spin_lock(&root->fs_info->balance_lock); 3203 spin_lock(&root->fs_info->balance_lock);
3134 if (root->fs_info->balance_ctl) { 3204 target = get_restripe_target(root->fs_info, flags);
3135 struct btrfs_balance_control *bctl = root->fs_info->balance_ctl; 3205 if (target) {
3136 u64 tgt = 0; 3206 /* pick target profile only if it's already available */
3137 3207 if ((flags & target) & BTRFS_EXTENDED_PROFILE_MASK) {
3138 if ((flags & BTRFS_BLOCK_GROUP_DATA) &&
3139 (bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
3140 (flags & bctl->data.target)) {
3141 tgt = BTRFS_BLOCK_GROUP_DATA | bctl->data.target;
3142 } else if ((flags & BTRFS_BLOCK_GROUP_SYSTEM) &&
3143 (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
3144 (flags & bctl->sys.target)) {
3145 tgt = BTRFS_BLOCK_GROUP_SYSTEM | bctl->sys.target;
3146 } else if ((flags & BTRFS_BLOCK_GROUP_METADATA) &&
3147 (bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
3148 (flags & bctl->meta.target)) {
3149 tgt = BTRFS_BLOCK_GROUP_METADATA | bctl->meta.target;
3150 }
3151
3152 if (tgt) {
3153 spin_unlock(&root->fs_info->balance_lock); 3208 spin_unlock(&root->fs_info->balance_lock);
3154 flags = tgt; 3209 return extended_to_chunk(target);
3155 goto out;
3156 } 3210 }
3157 } 3211 }
3158 spin_unlock(&root->fs_info->balance_lock); 3212 spin_unlock(&root->fs_info->balance_lock);
@@ -3180,10 +3234,7 @@ u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
3180 flags &= ~BTRFS_BLOCK_GROUP_RAID0; 3234 flags &= ~BTRFS_BLOCK_GROUP_RAID0;
3181 } 3235 }
3182 3236
3183out: 3237 return extended_to_chunk(flags);
3184 /* extended -> chunk profile */
3185 flags &= ~BTRFS_AVAIL_ALLOC_BIT_SINGLE;
3186 return flags;
3187} 3238}
3188 3239
3189static u64 get_alloc_profile(struct btrfs_root *root, u64 flags) 3240static u64 get_alloc_profile(struct btrfs_root *root, u64 flags)
@@ -3312,8 +3363,7 @@ commit_trans:
3312 } 3363 }
3313 data_sinfo->bytes_may_use += bytes; 3364 data_sinfo->bytes_may_use += bytes;
3314 trace_btrfs_space_reservation(root->fs_info, "space_info", 3365 trace_btrfs_space_reservation(root->fs_info, "space_info",
3315 (u64)(unsigned long)data_sinfo, 3366 data_sinfo->flags, bytes, 1);
3316 bytes, 1);
3317 spin_unlock(&data_sinfo->lock); 3367 spin_unlock(&data_sinfo->lock);
3318 3368
3319 return 0; 3369 return 0;
@@ -3334,8 +3384,7 @@ void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes)
3334 spin_lock(&data_sinfo->lock); 3384 spin_lock(&data_sinfo->lock);
3335 data_sinfo->bytes_may_use -= bytes; 3385 data_sinfo->bytes_may_use -= bytes;
3336 trace_btrfs_space_reservation(root->fs_info, "space_info", 3386 trace_btrfs_space_reservation(root->fs_info, "space_info",
3337 (u64)(unsigned long)data_sinfo, 3387 data_sinfo->flags, bytes, 0);
3338 bytes, 0);
3339 spin_unlock(&data_sinfo->lock); 3388 spin_unlock(&data_sinfo->lock);
3340} 3389}
3341 3390
@@ -3396,6 +3445,50 @@ static int should_alloc_chunk(struct btrfs_root *root,
3396 return 1; 3445 return 1;
3397} 3446}
3398 3447
3448static u64 get_system_chunk_thresh(struct btrfs_root *root, u64 type)
3449{
3450 u64 num_dev;
3451
3452 if (type & BTRFS_BLOCK_GROUP_RAID10 ||
3453 type & BTRFS_BLOCK_GROUP_RAID0)
3454 num_dev = root->fs_info->fs_devices->rw_devices;
3455 else if (type & BTRFS_BLOCK_GROUP_RAID1)
3456 num_dev = 2;
3457 else
3458 num_dev = 1; /* DUP or single */
3459
3460 /* metadata for updaing devices and chunk tree */
3461 return btrfs_calc_trans_metadata_size(root, num_dev + 1);
3462}
3463
3464static void check_system_chunk(struct btrfs_trans_handle *trans,
3465 struct btrfs_root *root, u64 type)
3466{
3467 struct btrfs_space_info *info;
3468 u64 left;
3469 u64 thresh;
3470
3471 info = __find_space_info(root->fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
3472 spin_lock(&info->lock);
3473 left = info->total_bytes - info->bytes_used - info->bytes_pinned -
3474 info->bytes_reserved - info->bytes_readonly;
3475 spin_unlock(&info->lock);
3476
3477 thresh = get_system_chunk_thresh(root, type);
3478 if (left < thresh && btrfs_test_opt(root, ENOSPC_DEBUG)) {
3479 printk(KERN_INFO "left=%llu, need=%llu, flags=%llu\n",
3480 left, thresh, type);
3481 dump_space_info(info, 0, 0);
3482 }
3483
3484 if (left < thresh) {
3485 u64 flags;
3486
3487 flags = btrfs_get_alloc_profile(root->fs_info->chunk_root, 0);
3488 btrfs_alloc_chunk(trans, root, flags);
3489 }
3490}
3491
3399static int do_chunk_alloc(struct btrfs_trans_handle *trans, 3492static int do_chunk_alloc(struct btrfs_trans_handle *trans,
3400 struct btrfs_root *extent_root, u64 alloc_bytes, 3493 struct btrfs_root *extent_root, u64 alloc_bytes,
3401 u64 flags, int force) 3494 u64 flags, int force)
@@ -3405,15 +3498,13 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
3405 int wait_for_alloc = 0; 3498 int wait_for_alloc = 0;
3406 int ret = 0; 3499 int ret = 0;
3407 3500
3408 BUG_ON(!profile_is_valid(flags, 0));
3409
3410 space_info = __find_space_info(extent_root->fs_info, flags); 3501 space_info = __find_space_info(extent_root->fs_info, flags);
3411 if (!space_info) { 3502 if (!space_info) {
3412 ret = update_space_info(extent_root->fs_info, flags, 3503 ret = update_space_info(extent_root->fs_info, flags,
3413 0, 0, &space_info); 3504 0, 0, &space_info);
3414 BUG_ON(ret); 3505 BUG_ON(ret); /* -ENOMEM */
3415 } 3506 }
3416 BUG_ON(!space_info); 3507 BUG_ON(!space_info); /* Logic error */
3417 3508
3418again: 3509again:
3419 spin_lock(&space_info->lock); 3510 spin_lock(&space_info->lock);
@@ -3468,6 +3559,12 @@ again:
3468 force_metadata_allocation(fs_info); 3559 force_metadata_allocation(fs_info);
3469 } 3560 }
3470 3561
3562 /*
3563 * Check if we have enough space in SYSTEM chunk because we may need
3564 * to update devices.
3565 */
3566 check_system_chunk(trans, extent_root, flags);
3567
3471 ret = btrfs_alloc_chunk(trans, extent_root, flags); 3568 ret = btrfs_alloc_chunk(trans, extent_root, flags);
3472 if (ret < 0 && ret != -ENOSPC) 3569 if (ret < 0 && ret != -ENOSPC)
3473 goto out; 3570 goto out;
@@ -3678,8 +3775,10 @@ again:
3678 ret = wait_event_interruptible(space_info->wait, 3775 ret = wait_event_interruptible(space_info->wait,
3679 !space_info->flush); 3776 !space_info->flush);
3680 /* Must have been interrupted, return */ 3777 /* Must have been interrupted, return */
3681 if (ret) 3778 if (ret) {
3779 printk(KERN_DEBUG "btrfs: %s returning -EINTR\n", __func__);
3682 return -EINTR; 3780 return -EINTR;
3781 }
3683 3782
3684 spin_lock(&space_info->lock); 3783 spin_lock(&space_info->lock);
3685 } 3784 }
@@ -3700,9 +3799,7 @@ again:
3700 if (used + orig_bytes <= space_info->total_bytes) { 3799 if (used + orig_bytes <= space_info->total_bytes) {
3701 space_info->bytes_may_use += orig_bytes; 3800 space_info->bytes_may_use += orig_bytes;
3702 trace_btrfs_space_reservation(root->fs_info, 3801 trace_btrfs_space_reservation(root->fs_info,
3703 "space_info", 3802 "space_info", space_info->flags, orig_bytes, 1);
3704 (u64)(unsigned long)space_info,
3705 orig_bytes, 1);
3706 ret = 0; 3803 ret = 0;
3707 } else { 3804 } else {
3708 /* 3805 /*
@@ -3771,9 +3868,7 @@ again:
3771 if (used + num_bytes < space_info->total_bytes + avail) { 3868 if (used + num_bytes < space_info->total_bytes + avail) {
3772 space_info->bytes_may_use += orig_bytes; 3869 space_info->bytes_may_use += orig_bytes;
3773 trace_btrfs_space_reservation(root->fs_info, 3870 trace_btrfs_space_reservation(root->fs_info,
3774 "space_info", 3871 "space_info", space_info->flags, orig_bytes, 1);
3775 (u64)(unsigned long)space_info,
3776 orig_bytes, 1);
3777 ret = 0; 3872 ret = 0;
3778 } else { 3873 } else {
3779 wait_ordered = true; 3874 wait_ordered = true;
@@ -3836,8 +3931,9 @@ out:
3836 return ret; 3931 return ret;
3837} 3932}
3838 3933
3839static struct btrfs_block_rsv *get_block_rsv(struct btrfs_trans_handle *trans, 3934static struct btrfs_block_rsv *get_block_rsv(
3840 struct btrfs_root *root) 3935 const struct btrfs_trans_handle *trans,
3936 const struct btrfs_root *root)
3841{ 3937{
3842 struct btrfs_block_rsv *block_rsv = NULL; 3938 struct btrfs_block_rsv *block_rsv = NULL;
3843 3939
@@ -3918,8 +4014,7 @@ static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
3918 spin_lock(&space_info->lock); 4014 spin_lock(&space_info->lock);
3919 space_info->bytes_may_use -= num_bytes; 4015 space_info->bytes_may_use -= num_bytes;
3920 trace_btrfs_space_reservation(fs_info, "space_info", 4016 trace_btrfs_space_reservation(fs_info, "space_info",
3921 (u64)(unsigned long)space_info, 4017 space_info->flags, num_bytes, 0);
3922 num_bytes, 0);
3923 space_info->reservation_progress++; 4018 space_info->reservation_progress++;
3924 spin_unlock(&space_info->lock); 4019 spin_unlock(&space_info->lock);
3925 } 4020 }
@@ -4137,14 +4232,14 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
4137 block_rsv->reserved += num_bytes; 4232 block_rsv->reserved += num_bytes;
4138 sinfo->bytes_may_use += num_bytes; 4233 sinfo->bytes_may_use += num_bytes;
4139 trace_btrfs_space_reservation(fs_info, "space_info", 4234 trace_btrfs_space_reservation(fs_info, "space_info",
4140 (u64)(unsigned long)sinfo, num_bytes, 1); 4235 sinfo->flags, num_bytes, 1);
4141 } 4236 }
4142 4237
4143 if (block_rsv->reserved >= block_rsv->size) { 4238 if (block_rsv->reserved >= block_rsv->size) {
4144 num_bytes = block_rsv->reserved - block_rsv->size; 4239 num_bytes = block_rsv->reserved - block_rsv->size;
4145 sinfo->bytes_may_use -= num_bytes; 4240 sinfo->bytes_may_use -= num_bytes;
4146 trace_btrfs_space_reservation(fs_info, "space_info", 4241 trace_btrfs_space_reservation(fs_info, "space_info",
4147 (u64)(unsigned long)sinfo, num_bytes, 0); 4242 sinfo->flags, num_bytes, 0);
4148 sinfo->reservation_progress++; 4243 sinfo->reservation_progress++;
4149 block_rsv->reserved = block_rsv->size; 4244 block_rsv->reserved = block_rsv->size;
4150 block_rsv->full = 1; 4245 block_rsv->full = 1;
@@ -4198,12 +4293,12 @@ void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
4198 return; 4293 return;
4199 4294
4200 trace_btrfs_space_reservation(root->fs_info, "transaction", 4295 trace_btrfs_space_reservation(root->fs_info, "transaction",
4201 (u64)(unsigned long)trans, 4296 trans->transid, trans->bytes_reserved, 0);
4202 trans->bytes_reserved, 0);
4203 btrfs_block_rsv_release(root, trans->block_rsv, trans->bytes_reserved); 4297 btrfs_block_rsv_release(root, trans->block_rsv, trans->bytes_reserved);
4204 trans->bytes_reserved = 0; 4298 trans->bytes_reserved = 0;
4205} 4299}
4206 4300
4301/* Can only return 0 or -ENOSPC */
4207int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans, 4302int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
4208 struct inode *inode) 4303 struct inode *inode)
4209{ 4304{
@@ -4540,7 +4635,7 @@ static int update_block_group(struct btrfs_trans_handle *trans,
4540 while (total) { 4635 while (total) {
4541 cache = btrfs_lookup_block_group(info, bytenr); 4636 cache = btrfs_lookup_block_group(info, bytenr);
4542 if (!cache) 4637 if (!cache)
4543 return -1; 4638 return -ENOENT;
4544 if (cache->flags & (BTRFS_BLOCK_GROUP_DUP | 4639 if (cache->flags & (BTRFS_BLOCK_GROUP_DUP |
4545 BTRFS_BLOCK_GROUP_RAID1 | 4640 BTRFS_BLOCK_GROUP_RAID1 |
4546 BTRFS_BLOCK_GROUP_RAID10)) 4641 BTRFS_BLOCK_GROUP_RAID10))
@@ -4643,7 +4738,7 @@ int btrfs_pin_extent(struct btrfs_root *root,
4643 struct btrfs_block_group_cache *cache; 4738 struct btrfs_block_group_cache *cache;
4644 4739
4645 cache = btrfs_lookup_block_group(root->fs_info, bytenr); 4740 cache = btrfs_lookup_block_group(root->fs_info, bytenr);
4646 BUG_ON(!cache); 4741 BUG_ON(!cache); /* Logic error */
4647 4742
4648 pin_down_extent(root, cache, bytenr, num_bytes, reserved); 4743 pin_down_extent(root, cache, bytenr, num_bytes, reserved);
4649 4744
@@ -4661,7 +4756,7 @@ int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans,
4661 struct btrfs_block_group_cache *cache; 4756 struct btrfs_block_group_cache *cache;
4662 4757
4663 cache = btrfs_lookup_block_group(root->fs_info, bytenr); 4758 cache = btrfs_lookup_block_group(root->fs_info, bytenr);
4664 BUG_ON(!cache); 4759 BUG_ON(!cache); /* Logic error */
4665 4760
4666 /* 4761 /*
4667 * pull in the free space cache (if any) so that our pin 4762 * pull in the free space cache (if any) so that our pin
@@ -4706,6 +4801,7 @@ static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
4706{ 4801{
4707 struct btrfs_space_info *space_info = cache->space_info; 4802 struct btrfs_space_info *space_info = cache->space_info;
4708 int ret = 0; 4803 int ret = 0;
4804
4709 spin_lock(&space_info->lock); 4805 spin_lock(&space_info->lock);
4710 spin_lock(&cache->lock); 4806 spin_lock(&cache->lock);
4711 if (reserve != RESERVE_FREE) { 4807 if (reserve != RESERVE_FREE) {
@@ -4716,9 +4812,8 @@ static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
4716 space_info->bytes_reserved += num_bytes; 4812 space_info->bytes_reserved += num_bytes;
4717 if (reserve == RESERVE_ALLOC) { 4813 if (reserve == RESERVE_ALLOC) {
4718 trace_btrfs_space_reservation(cache->fs_info, 4814 trace_btrfs_space_reservation(cache->fs_info,
4719 "space_info", 4815 "space_info", space_info->flags,
4720 (u64)(unsigned long)space_info, 4816 num_bytes, 0);
4721 num_bytes, 0);
4722 space_info->bytes_may_use -= num_bytes; 4817 space_info->bytes_may_use -= num_bytes;
4723 } 4818 }
4724 } 4819 }
@@ -4734,7 +4829,7 @@ static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
4734 return ret; 4829 return ret;
4735} 4830}
4736 4831
4737int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, 4832void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
4738 struct btrfs_root *root) 4833 struct btrfs_root *root)
4739{ 4834{
4740 struct btrfs_fs_info *fs_info = root->fs_info; 4835 struct btrfs_fs_info *fs_info = root->fs_info;
@@ -4764,7 +4859,6 @@ int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
4764 up_write(&fs_info->extent_commit_sem); 4859 up_write(&fs_info->extent_commit_sem);
4765 4860
4766 update_global_block_rsv(fs_info); 4861 update_global_block_rsv(fs_info);
4767 return 0;
4768} 4862}
4769 4863
4770static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) 4864static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
@@ -4779,7 +4873,7 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
4779 if (cache) 4873 if (cache)
4780 btrfs_put_block_group(cache); 4874 btrfs_put_block_group(cache);
4781 cache = btrfs_lookup_block_group(fs_info, start); 4875 cache = btrfs_lookup_block_group(fs_info, start);
4782 BUG_ON(!cache); 4876 BUG_ON(!cache); /* Logic error */
4783 } 4877 }
4784 4878
4785 len = cache->key.objectid + cache->key.offset - start; 4879 len = cache->key.objectid + cache->key.offset - start;
@@ -4816,6 +4910,9 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
4816 u64 end; 4910 u64 end;
4817 int ret; 4911 int ret;
4818 4912
4913 if (trans->aborted)
4914 return 0;
4915
4819 if (fs_info->pinned_extents == &fs_info->freed_extents[0]) 4916 if (fs_info->pinned_extents == &fs_info->freed_extents[0])
4820 unpin = &fs_info->freed_extents[1]; 4917 unpin = &fs_info->freed_extents[1];
4821 else 4918 else
@@ -4901,7 +4998,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
4901 ret = remove_extent_backref(trans, extent_root, path, 4998 ret = remove_extent_backref(trans, extent_root, path,
4902 NULL, refs_to_drop, 4999 NULL, refs_to_drop,
4903 is_data); 5000 is_data);
4904 BUG_ON(ret); 5001 if (ret)
5002 goto abort;
4905 btrfs_release_path(path); 5003 btrfs_release_path(path);
4906 path->leave_spinning = 1; 5004 path->leave_spinning = 1;
4907 5005
@@ -4919,10 +5017,11 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
4919 btrfs_print_leaf(extent_root, 5017 btrfs_print_leaf(extent_root,
4920 path->nodes[0]); 5018 path->nodes[0]);
4921 } 5019 }
4922 BUG_ON(ret); 5020 if (ret < 0)
5021 goto abort;
4923 extent_slot = path->slots[0]; 5022 extent_slot = path->slots[0];
4924 } 5023 }
4925 } else { 5024 } else if (ret == -ENOENT) {
4926 btrfs_print_leaf(extent_root, path->nodes[0]); 5025 btrfs_print_leaf(extent_root, path->nodes[0]);
4927 WARN_ON(1); 5026 WARN_ON(1);
4928 printk(KERN_ERR "btrfs unable to find ref byte nr %llu " 5027 printk(KERN_ERR "btrfs unable to find ref byte nr %llu "
@@ -4932,6 +5031,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
4932 (unsigned long long)root_objectid, 5031 (unsigned long long)root_objectid,
4933 (unsigned long long)owner_objectid, 5032 (unsigned long long)owner_objectid,
4934 (unsigned long long)owner_offset); 5033 (unsigned long long)owner_offset);
5034 } else {
5035 goto abort;
4935 } 5036 }
4936 5037
4937 leaf = path->nodes[0]; 5038 leaf = path->nodes[0];
@@ -4941,7 +5042,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
4941 BUG_ON(found_extent || extent_slot != path->slots[0]); 5042 BUG_ON(found_extent || extent_slot != path->slots[0]);
4942 ret = convert_extent_item_v0(trans, extent_root, path, 5043 ret = convert_extent_item_v0(trans, extent_root, path,
4943 owner_objectid, 0); 5044 owner_objectid, 0);
4944 BUG_ON(ret < 0); 5045 if (ret < 0)
5046 goto abort;
4945 5047
4946 btrfs_release_path(path); 5048 btrfs_release_path(path);
4947 path->leave_spinning = 1; 5049 path->leave_spinning = 1;
@@ -4958,7 +5060,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
4958 (unsigned long long)bytenr); 5060 (unsigned long long)bytenr);
4959 btrfs_print_leaf(extent_root, path->nodes[0]); 5061 btrfs_print_leaf(extent_root, path->nodes[0]);
4960 } 5062 }
4961 BUG_ON(ret); 5063 if (ret < 0)
5064 goto abort;
4962 extent_slot = path->slots[0]; 5065 extent_slot = path->slots[0];
4963 leaf = path->nodes[0]; 5066 leaf = path->nodes[0];
4964 item_size = btrfs_item_size_nr(leaf, extent_slot); 5067 item_size = btrfs_item_size_nr(leaf, extent_slot);
@@ -4995,7 +5098,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
4995 ret = remove_extent_backref(trans, extent_root, path, 5098 ret = remove_extent_backref(trans, extent_root, path,
4996 iref, refs_to_drop, 5099 iref, refs_to_drop,
4997 is_data); 5100 is_data);
4998 BUG_ON(ret); 5101 if (ret)
5102 goto abort;
4999 } 5103 }
5000 } else { 5104 } else {
5001 if (found_extent) { 5105 if (found_extent) {
@@ -5012,23 +5116,27 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
5012 5116
5013 ret = btrfs_del_items(trans, extent_root, path, path->slots[0], 5117 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
5014 num_to_del); 5118 num_to_del);
5015 BUG_ON(ret); 5119 if (ret)
5120 goto abort;
5016 btrfs_release_path(path); 5121 btrfs_release_path(path);
5017 5122
5018 if (is_data) { 5123 if (is_data) {
5019 ret = btrfs_del_csums(trans, root, bytenr, num_bytes); 5124 ret = btrfs_del_csums(trans, root, bytenr, num_bytes);
5020 BUG_ON(ret); 5125 if (ret)
5021 } else { 5126 goto abort;
5022 invalidate_mapping_pages(info->btree_inode->i_mapping,
5023 bytenr >> PAGE_CACHE_SHIFT,
5024 (bytenr + num_bytes - 1) >> PAGE_CACHE_SHIFT);
5025 } 5127 }
5026 5128
5027 ret = update_block_group(trans, root, bytenr, num_bytes, 0); 5129 ret = update_block_group(trans, root, bytenr, num_bytes, 0);
5028 BUG_ON(ret); 5130 if (ret)
5131 goto abort;
5029 } 5132 }
5133out:
5030 btrfs_free_path(path); 5134 btrfs_free_path(path);
5031 return ret; 5135 return ret;
5136
5137abort:
5138 btrfs_abort_transaction(trans, extent_root, ret);
5139 goto out;
5032} 5140}
5033 5141
5034/* 5142/*
@@ -5124,7 +5232,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
5124 parent, root->root_key.objectid, 5232 parent, root->root_key.objectid,
5125 btrfs_header_level(buf), 5233 btrfs_header_level(buf),
5126 BTRFS_DROP_DELAYED_REF, NULL, for_cow); 5234 BTRFS_DROP_DELAYED_REF, NULL, for_cow);
5127 BUG_ON(ret); 5235 BUG_ON(ret); /* -ENOMEM */
5128 } 5236 }
5129 5237
5130 if (!last_ref) 5238 if (!last_ref)
@@ -5158,6 +5266,7 @@ out:
5158 btrfs_put_block_group(cache); 5266 btrfs_put_block_group(cache);
5159} 5267}
5160 5268
5269/* Can return -ENOMEM */
5161int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, 5270int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root,
5162 u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, 5271 u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
5163 u64 owner, u64 offset, int for_cow) 5272 u64 owner, u64 offset, int for_cow)
@@ -5179,14 +5288,12 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root,
5179 num_bytes, 5288 num_bytes,
5180 parent, root_objectid, (int)owner, 5289 parent, root_objectid, (int)owner,
5181 BTRFS_DROP_DELAYED_REF, NULL, for_cow); 5290 BTRFS_DROP_DELAYED_REF, NULL, for_cow);
5182 BUG_ON(ret);
5183 } else { 5291 } else {
5184 ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr, 5292 ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
5185 num_bytes, 5293 num_bytes,
5186 parent, root_objectid, owner, 5294 parent, root_objectid, owner,
5187 offset, BTRFS_DROP_DELAYED_REF, 5295 offset, BTRFS_DROP_DELAYED_REF,
5188 NULL, for_cow); 5296 NULL, for_cow);
5189 BUG_ON(ret);
5190 } 5297 }
5191 return ret; 5298 return ret;
5192} 5299}
@@ -5243,28 +5350,34 @@ wait_block_group_cache_done(struct btrfs_block_group_cache *cache)
5243 return 0; 5350 return 0;
5244} 5351}
5245 5352
5246static int get_block_group_index(struct btrfs_block_group_cache *cache) 5353static int __get_block_group_index(u64 flags)
5247{ 5354{
5248 int index; 5355 int index;
5249 if (cache->flags & BTRFS_BLOCK_GROUP_RAID10) 5356
5357 if (flags & BTRFS_BLOCK_GROUP_RAID10)
5250 index = 0; 5358 index = 0;
5251 else if (cache->flags & BTRFS_BLOCK_GROUP_RAID1) 5359 else if (flags & BTRFS_BLOCK_GROUP_RAID1)
5252 index = 1; 5360 index = 1;
5253 else if (cache->flags & BTRFS_BLOCK_GROUP_DUP) 5361 else if (flags & BTRFS_BLOCK_GROUP_DUP)
5254 index = 2; 5362 index = 2;
5255 else if (cache->flags & BTRFS_BLOCK_GROUP_RAID0) 5363 else if (flags & BTRFS_BLOCK_GROUP_RAID0)
5256 index = 3; 5364 index = 3;
5257 else 5365 else
5258 index = 4; 5366 index = 4;
5367
5259 return index; 5368 return index;
5260} 5369}
5261 5370
5371static int get_block_group_index(struct btrfs_block_group_cache *cache)
5372{
5373 return __get_block_group_index(cache->flags);
5374}
5375
5262enum btrfs_loop_type { 5376enum btrfs_loop_type {
5263 LOOP_FIND_IDEAL = 0, 5377 LOOP_CACHING_NOWAIT = 0,
5264 LOOP_CACHING_NOWAIT = 1, 5378 LOOP_CACHING_WAIT = 1,
5265 LOOP_CACHING_WAIT = 2, 5379 LOOP_ALLOC_CHUNK = 2,
5266 LOOP_ALLOC_CHUNK = 3, 5380 LOOP_NO_EMPTY_SIZE = 3,
5267 LOOP_NO_EMPTY_SIZE = 4,
5268}; 5381};
5269 5382
5270/* 5383/*
@@ -5278,7 +5391,6 @@ enum btrfs_loop_type {
5278static noinline int find_free_extent(struct btrfs_trans_handle *trans, 5391static noinline int find_free_extent(struct btrfs_trans_handle *trans,
5279 struct btrfs_root *orig_root, 5392 struct btrfs_root *orig_root,
5280 u64 num_bytes, u64 empty_size, 5393 u64 num_bytes, u64 empty_size,
5281 u64 search_start, u64 search_end,
5282 u64 hint_byte, struct btrfs_key *ins, 5394 u64 hint_byte, struct btrfs_key *ins,
5283 u64 data) 5395 u64 data)
5284{ 5396{
@@ -5287,6 +5399,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
5287 struct btrfs_free_cluster *last_ptr = NULL; 5399 struct btrfs_free_cluster *last_ptr = NULL;
5288 struct btrfs_block_group_cache *block_group = NULL; 5400 struct btrfs_block_group_cache *block_group = NULL;
5289 struct btrfs_block_group_cache *used_block_group; 5401 struct btrfs_block_group_cache *used_block_group;
5402 u64 search_start = 0;
5290 int empty_cluster = 2 * 1024 * 1024; 5403 int empty_cluster = 2 * 1024 * 1024;
5291 int allowed_chunk_alloc = 0; 5404 int allowed_chunk_alloc = 0;
5292 int done_chunk_alloc = 0; 5405 int done_chunk_alloc = 0;
@@ -5300,8 +5413,6 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
5300 bool failed_alloc = false; 5413 bool failed_alloc = false;
5301 bool use_cluster = true; 5414 bool use_cluster = true;
5302 bool have_caching_bg = false; 5415 bool have_caching_bg = false;
5303 u64 ideal_cache_percent = 0;
5304 u64 ideal_cache_offset = 0;
5305 5416
5306 WARN_ON(num_bytes < root->sectorsize); 5417 WARN_ON(num_bytes < root->sectorsize);
5307 btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); 5418 btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
@@ -5351,7 +5462,6 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
5351 empty_cluster = 0; 5462 empty_cluster = 0;
5352 5463
5353 if (search_start == hint_byte) { 5464 if (search_start == hint_byte) {
5354ideal_cache:
5355 block_group = btrfs_lookup_block_group(root->fs_info, 5465 block_group = btrfs_lookup_block_group(root->fs_info,
5356 search_start); 5466 search_start);
5357 used_block_group = block_group; 5467 used_block_group = block_group;
@@ -5363,8 +5473,7 @@ ideal_cache:
5363 * picked out then we don't care that the block group is cached. 5473 * picked out then we don't care that the block group is cached.
5364 */ 5474 */
5365 if (block_group && block_group_bits(block_group, data) && 5475 if (block_group && block_group_bits(block_group, data) &&
5366 (block_group->cached != BTRFS_CACHE_NO || 5476 block_group->cached != BTRFS_CACHE_NO) {
5367 search_start == ideal_cache_offset)) {
5368 down_read(&space_info->groups_sem); 5477 down_read(&space_info->groups_sem);
5369 if (list_empty(&block_group->list) || 5478 if (list_empty(&block_group->list) ||
5370 block_group->ro) { 5479 block_group->ro) {
@@ -5418,44 +5527,13 @@ search:
5418have_block_group: 5527have_block_group:
5419 cached = block_group_cache_done(block_group); 5528 cached = block_group_cache_done(block_group);
5420 if (unlikely(!cached)) { 5529 if (unlikely(!cached)) {
5421 u64 free_percent;
5422
5423 found_uncached_bg = true; 5530 found_uncached_bg = true;
5424 ret = cache_block_group(block_group, trans, 5531 ret = cache_block_group(block_group, trans,
5425 orig_root, 1); 5532 orig_root, 0);
5426 if (block_group->cached == BTRFS_CACHE_FINISHED) 5533 BUG_ON(ret < 0);
5427 goto alloc; 5534 ret = 0;
5428
5429 free_percent = btrfs_block_group_used(&block_group->item);
5430 free_percent *= 100;
5431 free_percent = div64_u64(free_percent,
5432 block_group->key.offset);
5433 free_percent = 100 - free_percent;
5434 if (free_percent > ideal_cache_percent &&
5435 likely(!block_group->ro)) {
5436 ideal_cache_offset = block_group->key.objectid;
5437 ideal_cache_percent = free_percent;
5438 }
5439
5440 /*
5441 * The caching workers are limited to 2 threads, so we
5442 * can queue as much work as we care to.
5443 */
5444 if (loop > LOOP_FIND_IDEAL) {
5445 ret = cache_block_group(block_group, trans,
5446 orig_root, 0);
5447 BUG_ON(ret);
5448 }
5449
5450 /*
5451 * If loop is set for cached only, try the next block
5452 * group.
5453 */
5454 if (loop == LOOP_FIND_IDEAL)
5455 goto loop;
5456 } 5535 }
5457 5536
5458alloc:
5459 if (unlikely(block_group->ro)) 5537 if (unlikely(block_group->ro))
5460 goto loop; 5538 goto loop;
5461 5539
@@ -5606,11 +5684,6 @@ unclustered_alloc:
5606 } 5684 }
5607checks: 5685checks:
5608 search_start = stripe_align(root, offset); 5686 search_start = stripe_align(root, offset);
5609 /* move on to the next group */
5610 if (search_start + num_bytes >= search_end) {
5611 btrfs_add_free_space(used_block_group, offset, num_bytes);
5612 goto loop;
5613 }
5614 5687
5615 /* move on to the next group */ 5688 /* move on to the next group */
5616 if (search_start + num_bytes > 5689 if (search_start + num_bytes >
@@ -5661,9 +5734,7 @@ loop:
5661 if (!ins->objectid && ++index < BTRFS_NR_RAID_TYPES) 5734 if (!ins->objectid && ++index < BTRFS_NR_RAID_TYPES)
5662 goto search; 5735 goto search;
5663 5736
5664 /* LOOP_FIND_IDEAL, only search caching/cached bg's, and don't wait for 5737 /*
5665 * for them to make caching progress. Also
5666 * determine the best possible bg to cache
5667 * LOOP_CACHING_NOWAIT, search partially cached block groups, kicking 5738 * LOOP_CACHING_NOWAIT, search partially cached block groups, kicking
5668 * caching kthreads as we move along 5739 * caching kthreads as we move along
5669 * LOOP_CACHING_WAIT, search everything, and wait if our bg is caching 5740 * LOOP_CACHING_WAIT, search everything, and wait if our bg is caching
@@ -5673,50 +5744,17 @@ loop:
5673 */ 5744 */
5674 if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE) { 5745 if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE) {
5675 index = 0; 5746 index = 0;
5676 if (loop == LOOP_FIND_IDEAL && found_uncached_bg) {
5677 found_uncached_bg = false;
5678 loop++;
5679 if (!ideal_cache_percent)
5680 goto search;
5681
5682 /*
5683 * 1 of the following 2 things have happened so far
5684 *
5685 * 1) We found an ideal block group for caching that
5686 * is mostly full and will cache quickly, so we might
5687 * as well wait for it.
5688 *
5689 * 2) We searched for cached only and we didn't find
5690 * anything, and we didn't start any caching kthreads
5691 * either, so chances are we will loop through and
5692 * start a couple caching kthreads, and then come back
5693 * around and just wait for them. This will be slower
5694 * because we will have 2 caching kthreads reading at
5695 * the same time when we could have just started one
5696 * and waited for it to get far enough to give us an
5697 * allocation, so go ahead and go to the wait caching
5698 * loop.
5699 */
5700 loop = LOOP_CACHING_WAIT;
5701 search_start = ideal_cache_offset;
5702 ideal_cache_percent = 0;
5703 goto ideal_cache;
5704 } else if (loop == LOOP_FIND_IDEAL) {
5705 /*
5706 * Didn't find a uncached bg, wait on anything we find
5707 * next.
5708 */
5709 loop = LOOP_CACHING_WAIT;
5710 goto search;
5711 }
5712
5713 loop++; 5747 loop++;
5714
5715 if (loop == LOOP_ALLOC_CHUNK) { 5748 if (loop == LOOP_ALLOC_CHUNK) {
5716 if (allowed_chunk_alloc) { 5749 if (allowed_chunk_alloc) {
5717 ret = do_chunk_alloc(trans, root, num_bytes + 5750 ret = do_chunk_alloc(trans, root, num_bytes +
5718 2 * 1024 * 1024, data, 5751 2 * 1024 * 1024, data,
5719 CHUNK_ALLOC_LIMITED); 5752 CHUNK_ALLOC_LIMITED);
5753 if (ret < 0) {
5754 btrfs_abort_transaction(trans,
5755 root, ret);
5756 goto out;
5757 }
5720 allowed_chunk_alloc = 0; 5758 allowed_chunk_alloc = 0;
5721 if (ret == 1) 5759 if (ret == 1)
5722 done_chunk_alloc = 1; 5760 done_chunk_alloc = 1;
@@ -5745,6 +5783,7 @@ loop:
5745 } else if (ins->objectid) { 5783 } else if (ins->objectid) {
5746 ret = 0; 5784 ret = 0;
5747 } 5785 }
5786out:
5748 5787
5749 return ret; 5788 return ret;
5750} 5789}
@@ -5798,12 +5837,10 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
5798 struct btrfs_root *root, 5837 struct btrfs_root *root,
5799 u64 num_bytes, u64 min_alloc_size, 5838 u64 num_bytes, u64 min_alloc_size,
5800 u64 empty_size, u64 hint_byte, 5839 u64 empty_size, u64 hint_byte,
5801 u64 search_end, struct btrfs_key *ins, 5840 struct btrfs_key *ins, u64 data)
5802 u64 data)
5803{ 5841{
5804 bool final_tried = false; 5842 bool final_tried = false;
5805 int ret; 5843 int ret;
5806 u64 search_start = 0;
5807 5844
5808 data = btrfs_get_alloc_profile(root, data); 5845 data = btrfs_get_alloc_profile(root, data);
5809again: 5846again:
@@ -5811,23 +5848,31 @@ again:
5811 * the only place that sets empty_size is btrfs_realloc_node, which 5848 * the only place that sets empty_size is btrfs_realloc_node, which
5812 * is not called recursively on allocations 5849 * is not called recursively on allocations
5813 */ 5850 */
5814 if (empty_size || root->ref_cows) 5851 if (empty_size || root->ref_cows) {
5815 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 5852 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
5816 num_bytes + 2 * 1024 * 1024, data, 5853 num_bytes + 2 * 1024 * 1024, data,
5817 CHUNK_ALLOC_NO_FORCE); 5854 CHUNK_ALLOC_NO_FORCE);
5855 if (ret < 0 && ret != -ENOSPC) {
5856 btrfs_abort_transaction(trans, root, ret);
5857 return ret;
5858 }
5859 }
5818 5860
5819 WARN_ON(num_bytes < root->sectorsize); 5861 WARN_ON(num_bytes < root->sectorsize);
5820 ret = find_free_extent(trans, root, num_bytes, empty_size, 5862 ret = find_free_extent(trans, root, num_bytes, empty_size,
5821 search_start, search_end, hint_byte, 5863 hint_byte, ins, data);
5822 ins, data);
5823 5864
5824 if (ret == -ENOSPC) { 5865 if (ret == -ENOSPC) {
5825 if (!final_tried) { 5866 if (!final_tried) {
5826 num_bytes = num_bytes >> 1; 5867 num_bytes = num_bytes >> 1;
5827 num_bytes = num_bytes & ~(root->sectorsize - 1); 5868 num_bytes = num_bytes & ~(root->sectorsize - 1);
5828 num_bytes = max(num_bytes, min_alloc_size); 5869 num_bytes = max(num_bytes, min_alloc_size);
5829 do_chunk_alloc(trans, root->fs_info->extent_root, 5870 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
5830 num_bytes, data, CHUNK_ALLOC_FORCE); 5871 num_bytes, data, CHUNK_ALLOC_FORCE);
5872 if (ret < 0 && ret != -ENOSPC) {
5873 btrfs_abort_transaction(trans, root, ret);
5874 return ret;
5875 }
5831 if (num_bytes == min_alloc_size) 5876 if (num_bytes == min_alloc_size)
5832 final_tried = true; 5877 final_tried = true;
5833 goto again; 5878 goto again;
@@ -5838,7 +5883,8 @@ again:
5838 printk(KERN_ERR "btrfs allocation failed flags %llu, " 5883 printk(KERN_ERR "btrfs allocation failed flags %llu, "
5839 "wanted %llu\n", (unsigned long long)data, 5884 "wanted %llu\n", (unsigned long long)data,
5840 (unsigned long long)num_bytes); 5885 (unsigned long long)num_bytes);
5841 dump_space_info(sinfo, num_bytes, 1); 5886 if (sinfo)
5887 dump_space_info(sinfo, num_bytes, 1);
5842 } 5888 }
5843 } 5889 }
5844 5890
@@ -5917,7 +5963,10 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
5917 path->leave_spinning = 1; 5963 path->leave_spinning = 1;
5918 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path, 5964 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
5919 ins, size); 5965 ins, size);
5920 BUG_ON(ret); 5966 if (ret) {
5967 btrfs_free_path(path);
5968 return ret;
5969 }
5921 5970
5922 leaf = path->nodes[0]; 5971 leaf = path->nodes[0];
5923 extent_item = btrfs_item_ptr(leaf, path->slots[0], 5972 extent_item = btrfs_item_ptr(leaf, path->slots[0],
@@ -5947,7 +5996,7 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
5947 btrfs_free_path(path); 5996 btrfs_free_path(path);
5948 5997
5949 ret = update_block_group(trans, root, ins->objectid, ins->offset, 1); 5998 ret = update_block_group(trans, root, ins->objectid, ins->offset, 1);
5950 if (ret) { 5999 if (ret) { /* -ENOENT, logic error */
5951 printk(KERN_ERR "btrfs update block group failed for %llu " 6000 printk(KERN_ERR "btrfs update block group failed for %llu "
5952 "%llu\n", (unsigned long long)ins->objectid, 6001 "%llu\n", (unsigned long long)ins->objectid,
5953 (unsigned long long)ins->offset); 6002 (unsigned long long)ins->offset);
@@ -5978,7 +6027,10 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
5978 path->leave_spinning = 1; 6027 path->leave_spinning = 1;
5979 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path, 6028 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
5980 ins, size); 6029 ins, size);
5981 BUG_ON(ret); 6030 if (ret) {
6031 btrfs_free_path(path);
6032 return ret;
6033 }
5982 6034
5983 leaf = path->nodes[0]; 6035 leaf = path->nodes[0];
5984 extent_item = btrfs_item_ptr(leaf, path->slots[0], 6036 extent_item = btrfs_item_ptr(leaf, path->slots[0],
@@ -6008,7 +6060,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
6008 btrfs_free_path(path); 6060 btrfs_free_path(path);
6009 6061
6010 ret = update_block_group(trans, root, ins->objectid, ins->offset, 1); 6062 ret = update_block_group(trans, root, ins->objectid, ins->offset, 1);
6011 if (ret) { 6063 if (ret) { /* -ENOENT, logic error */
6012 printk(KERN_ERR "btrfs update block group failed for %llu " 6064 printk(KERN_ERR "btrfs update block group failed for %llu "
6013 "%llu\n", (unsigned long long)ins->objectid, 6065 "%llu\n", (unsigned long long)ins->objectid,
6014 (unsigned long long)ins->offset); 6066 (unsigned long long)ins->offset);
@@ -6056,28 +6108,28 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
6056 if (!caching_ctl) { 6108 if (!caching_ctl) {
6057 BUG_ON(!block_group_cache_done(block_group)); 6109 BUG_ON(!block_group_cache_done(block_group));
6058 ret = btrfs_remove_free_space(block_group, start, num_bytes); 6110 ret = btrfs_remove_free_space(block_group, start, num_bytes);
6059 BUG_ON(ret); 6111 BUG_ON(ret); /* -ENOMEM */
6060 } else { 6112 } else {
6061 mutex_lock(&caching_ctl->mutex); 6113 mutex_lock(&caching_ctl->mutex);
6062 6114
6063 if (start >= caching_ctl->progress) { 6115 if (start >= caching_ctl->progress) {
6064 ret = add_excluded_extent(root, start, num_bytes); 6116 ret = add_excluded_extent(root, start, num_bytes);
6065 BUG_ON(ret); 6117 BUG_ON(ret); /* -ENOMEM */
6066 } else if (start + num_bytes <= caching_ctl->progress) { 6118 } else if (start + num_bytes <= caching_ctl->progress) {
6067 ret = btrfs_remove_free_space(block_group, 6119 ret = btrfs_remove_free_space(block_group,
6068 start, num_bytes); 6120 start, num_bytes);
6069 BUG_ON(ret); 6121 BUG_ON(ret); /* -ENOMEM */
6070 } else { 6122 } else {
6071 num_bytes = caching_ctl->progress - start; 6123 num_bytes = caching_ctl->progress - start;
6072 ret = btrfs_remove_free_space(block_group, 6124 ret = btrfs_remove_free_space(block_group,
6073 start, num_bytes); 6125 start, num_bytes);
6074 BUG_ON(ret); 6126 BUG_ON(ret); /* -ENOMEM */
6075 6127
6076 start = caching_ctl->progress; 6128 start = caching_ctl->progress;
6077 num_bytes = ins->objectid + ins->offset - 6129 num_bytes = ins->objectid + ins->offset -
6078 caching_ctl->progress; 6130 caching_ctl->progress;
6079 ret = add_excluded_extent(root, start, num_bytes); 6131 ret = add_excluded_extent(root, start, num_bytes);
6080 BUG_ON(ret); 6132 BUG_ON(ret); /* -ENOMEM */
6081 } 6133 }
6082 6134
6083 mutex_unlock(&caching_ctl->mutex); 6135 mutex_unlock(&caching_ctl->mutex);
@@ -6086,7 +6138,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
6086 6138
6087 ret = btrfs_update_reserved_bytes(block_group, ins->offset, 6139 ret = btrfs_update_reserved_bytes(block_group, ins->offset,
6088 RESERVE_ALLOC_NO_ACCOUNT); 6140 RESERVE_ALLOC_NO_ACCOUNT);
6089 BUG_ON(ret); 6141 BUG_ON(ret); /* logic error */
6090 btrfs_put_block_group(block_group); 6142 btrfs_put_block_group(block_group);
6091 ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, 6143 ret = alloc_reserved_file_extent(trans, root, 0, root_objectid,
6092 0, owner, offset, ins, 1); 6144 0, owner, offset, ins, 1);
@@ -6107,6 +6159,7 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
6107 btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level); 6159 btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level);
6108 btrfs_tree_lock(buf); 6160 btrfs_tree_lock(buf);
6109 clean_tree_block(trans, root, buf); 6161 clean_tree_block(trans, root, buf);
6162 clear_bit(EXTENT_BUFFER_STALE, &buf->bflags);
6110 6163
6111 btrfs_set_lock_blocking(buf); 6164 btrfs_set_lock_blocking(buf);
6112 btrfs_set_buffer_uptodate(buf); 6165 btrfs_set_buffer_uptodate(buf);
@@ -6214,7 +6267,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
6214 return ERR_CAST(block_rsv); 6267 return ERR_CAST(block_rsv);
6215 6268
6216 ret = btrfs_reserve_extent(trans, root, blocksize, blocksize, 6269 ret = btrfs_reserve_extent(trans, root, blocksize, blocksize,
6217 empty_size, hint, (u64)-1, &ins, 0); 6270 empty_size, hint, &ins, 0);
6218 if (ret) { 6271 if (ret) {
6219 unuse_block_rsv(root->fs_info, block_rsv, blocksize); 6272 unuse_block_rsv(root->fs_info, block_rsv, blocksize);
6220 return ERR_PTR(ret); 6273 return ERR_PTR(ret);
@@ -6222,7 +6275,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
6222 6275
6223 buf = btrfs_init_new_buffer(trans, root, ins.objectid, 6276 buf = btrfs_init_new_buffer(trans, root, ins.objectid,
6224 blocksize, level); 6277 blocksize, level);
6225 BUG_ON(IS_ERR(buf)); 6278 BUG_ON(IS_ERR(buf)); /* -ENOMEM */
6226 6279
6227 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) { 6280 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
6228 if (parent == 0) 6281 if (parent == 0)
@@ -6234,7 +6287,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
6234 if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { 6287 if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
6235 struct btrfs_delayed_extent_op *extent_op; 6288 struct btrfs_delayed_extent_op *extent_op;
6236 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); 6289 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
6237 BUG_ON(!extent_op); 6290 BUG_ON(!extent_op); /* -ENOMEM */
6238 if (key) 6291 if (key)
6239 memcpy(&extent_op->key, key, sizeof(extent_op->key)); 6292 memcpy(&extent_op->key, key, sizeof(extent_op->key));
6240 else 6293 else
@@ -6249,7 +6302,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
6249 ins.offset, parent, root_objectid, 6302 ins.offset, parent, root_objectid,
6250 level, BTRFS_ADD_DELAYED_EXTENT, 6303 level, BTRFS_ADD_DELAYED_EXTENT,
6251 extent_op, for_cow); 6304 extent_op, for_cow);
6252 BUG_ON(ret); 6305 BUG_ON(ret); /* -ENOMEM */
6253 } 6306 }
6254 return buf; 6307 return buf;
6255} 6308}
@@ -6319,7 +6372,9 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
6319 /* We don't lock the tree block, it's OK to be racy here */ 6372 /* We don't lock the tree block, it's OK to be racy here */
6320 ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize, 6373 ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize,
6321 &refs, &flags); 6374 &refs, &flags);
6322 BUG_ON(ret); 6375 /* We don't care about errors in readahead. */
6376 if (ret < 0)
6377 continue;
6323 BUG_ON(refs == 0); 6378 BUG_ON(refs == 0);
6324 6379
6325 if (wc->stage == DROP_REFERENCE) { 6380 if (wc->stage == DROP_REFERENCE) {
@@ -6386,7 +6441,9 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
6386 eb->start, eb->len, 6441 eb->start, eb->len,
6387 &wc->refs[level], 6442 &wc->refs[level],
6388 &wc->flags[level]); 6443 &wc->flags[level]);
6389 BUG_ON(ret); 6444 BUG_ON(ret == -ENOMEM);
6445 if (ret)
6446 return ret;
6390 BUG_ON(wc->refs[level] == 0); 6447 BUG_ON(wc->refs[level] == 0);
6391 } 6448 }
6392 6449
@@ -6405,12 +6462,12 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
6405 if (!(wc->flags[level] & flag)) { 6462 if (!(wc->flags[level] & flag)) {
6406 BUG_ON(!path->locks[level]); 6463 BUG_ON(!path->locks[level]);
6407 ret = btrfs_inc_ref(trans, root, eb, 1, wc->for_reloc); 6464 ret = btrfs_inc_ref(trans, root, eb, 1, wc->for_reloc);
6408 BUG_ON(ret); 6465 BUG_ON(ret); /* -ENOMEM */
6409 ret = btrfs_dec_ref(trans, root, eb, 0, wc->for_reloc); 6466 ret = btrfs_dec_ref(trans, root, eb, 0, wc->for_reloc);
6410 BUG_ON(ret); 6467 BUG_ON(ret); /* -ENOMEM */
6411 ret = btrfs_set_disk_extent_flags(trans, root, eb->start, 6468 ret = btrfs_set_disk_extent_flags(trans, root, eb->start,
6412 eb->len, flag, 0); 6469 eb->len, flag, 0);
6413 BUG_ON(ret); 6470 BUG_ON(ret); /* -ENOMEM */
6414 wc->flags[level] |= flag; 6471 wc->flags[level] |= flag;
6415 } 6472 }
6416 6473
@@ -6482,7 +6539,11 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
6482 ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize, 6539 ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize,
6483 &wc->refs[level - 1], 6540 &wc->refs[level - 1],
6484 &wc->flags[level - 1]); 6541 &wc->flags[level - 1]);
6485 BUG_ON(ret); 6542 if (ret < 0) {
6543 btrfs_tree_unlock(next);
6544 return ret;
6545 }
6546
6486 BUG_ON(wc->refs[level - 1] == 0); 6547 BUG_ON(wc->refs[level - 1] == 0);
6487 *lookup_info = 0; 6548 *lookup_info = 0;
6488 6549
@@ -6551,7 +6612,7 @@ skip:
6551 6612
6552 ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent, 6613 ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent,
6553 root->root_key.objectid, level - 1, 0, 0); 6614 root->root_key.objectid, level - 1, 0, 0);
6554 BUG_ON(ret); 6615 BUG_ON(ret); /* -ENOMEM */
6555 } 6616 }
6556 btrfs_tree_unlock(next); 6617 btrfs_tree_unlock(next);
6557 free_extent_buffer(next); 6618 free_extent_buffer(next);
@@ -6609,7 +6670,10 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
6609 eb->start, eb->len, 6670 eb->start, eb->len,
6610 &wc->refs[level], 6671 &wc->refs[level],
6611 &wc->flags[level]); 6672 &wc->flags[level]);
6612 BUG_ON(ret); 6673 if (ret < 0) {
6674 btrfs_tree_unlock_rw(eb, path->locks[level]);
6675 return ret;
6676 }
6613 BUG_ON(wc->refs[level] == 0); 6677 BUG_ON(wc->refs[level] == 0);
6614 if (wc->refs[level] == 1) { 6678 if (wc->refs[level] == 1) {
6615 btrfs_tree_unlock_rw(eb, path->locks[level]); 6679 btrfs_tree_unlock_rw(eb, path->locks[level]);
@@ -6629,7 +6693,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
6629 else 6693 else
6630 ret = btrfs_dec_ref(trans, root, eb, 0, 6694 ret = btrfs_dec_ref(trans, root, eb, 0,
6631 wc->for_reloc); 6695 wc->for_reloc);
6632 BUG_ON(ret); 6696 BUG_ON(ret); /* -ENOMEM */
6633 } 6697 }
6634 /* make block locked assertion in clean_tree_block happy */ 6698 /* make block locked assertion in clean_tree_block happy */
6635 if (!path->locks[level] && 6699 if (!path->locks[level] &&
@@ -6738,7 +6802,7 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
6738 * also make sure backrefs for the shared block and all lower level 6802 * also make sure backrefs for the shared block and all lower level
6739 * blocks are properly updated. 6803 * blocks are properly updated.
6740 */ 6804 */
6741void btrfs_drop_snapshot(struct btrfs_root *root, 6805int btrfs_drop_snapshot(struct btrfs_root *root,
6742 struct btrfs_block_rsv *block_rsv, int update_ref, 6806 struct btrfs_block_rsv *block_rsv, int update_ref,
6743 int for_reloc) 6807 int for_reloc)
6744{ 6808{
@@ -6766,7 +6830,10 @@ void btrfs_drop_snapshot(struct btrfs_root *root,
6766 } 6830 }
6767 6831
6768 trans = btrfs_start_transaction(tree_root, 0); 6832 trans = btrfs_start_transaction(tree_root, 0);
6769 BUG_ON(IS_ERR(trans)); 6833 if (IS_ERR(trans)) {
6834 err = PTR_ERR(trans);
6835 goto out_free;
6836 }
6770 6837
6771 if (block_rsv) 6838 if (block_rsv)
6772 trans->block_rsv = block_rsv; 6839 trans->block_rsv = block_rsv;
@@ -6791,7 +6858,7 @@ void btrfs_drop_snapshot(struct btrfs_root *root,
6791 path->lowest_level = 0; 6858 path->lowest_level = 0;
6792 if (ret < 0) { 6859 if (ret < 0) {
6793 err = ret; 6860 err = ret;
6794 goto out_free; 6861 goto out_end_trans;
6795 } 6862 }
6796 WARN_ON(ret > 0); 6863 WARN_ON(ret > 0);
6797 6864
@@ -6811,7 +6878,10 @@ void btrfs_drop_snapshot(struct btrfs_root *root,
6811 path->nodes[level]->len, 6878 path->nodes[level]->len,
6812 &wc->refs[level], 6879 &wc->refs[level],
6813 &wc->flags[level]); 6880 &wc->flags[level]);
6814 BUG_ON(ret); 6881 if (ret < 0) {
6882 err = ret;
6883 goto out_end_trans;
6884 }
6815 BUG_ON(wc->refs[level] == 0); 6885 BUG_ON(wc->refs[level] == 0);
6816 6886
6817 if (level == root_item->drop_level) 6887 if (level == root_item->drop_level)
@@ -6862,26 +6932,40 @@ void btrfs_drop_snapshot(struct btrfs_root *root,
6862 ret = btrfs_update_root(trans, tree_root, 6932 ret = btrfs_update_root(trans, tree_root,
6863 &root->root_key, 6933 &root->root_key,
6864 root_item); 6934 root_item);
6865 BUG_ON(ret); 6935 if (ret) {
6936 btrfs_abort_transaction(trans, tree_root, ret);
6937 err = ret;
6938 goto out_end_trans;
6939 }
6866 6940
6867 btrfs_end_transaction_throttle(trans, tree_root); 6941 btrfs_end_transaction_throttle(trans, tree_root);
6868 trans = btrfs_start_transaction(tree_root, 0); 6942 trans = btrfs_start_transaction(tree_root, 0);
6869 BUG_ON(IS_ERR(trans)); 6943 if (IS_ERR(trans)) {
6944 err = PTR_ERR(trans);
6945 goto out_free;
6946 }
6870 if (block_rsv) 6947 if (block_rsv)
6871 trans->block_rsv = block_rsv; 6948 trans->block_rsv = block_rsv;
6872 } 6949 }
6873 } 6950 }
6874 btrfs_release_path(path); 6951 btrfs_release_path(path);
6875 BUG_ON(err); 6952 if (err)
6953 goto out_end_trans;
6876 6954
6877 ret = btrfs_del_root(trans, tree_root, &root->root_key); 6955 ret = btrfs_del_root(trans, tree_root, &root->root_key);
6878 BUG_ON(ret); 6956 if (ret) {
6957 btrfs_abort_transaction(trans, tree_root, ret);
6958 goto out_end_trans;
6959 }
6879 6960
6880 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) { 6961 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
6881 ret = btrfs_find_last_root(tree_root, root->root_key.objectid, 6962 ret = btrfs_find_last_root(tree_root, root->root_key.objectid,
6882 NULL, NULL); 6963 NULL, NULL);
6883 BUG_ON(ret < 0); 6964 if (ret < 0) {
6884 if (ret > 0) { 6965 btrfs_abort_transaction(trans, tree_root, ret);
6966 err = ret;
6967 goto out_end_trans;
6968 } else if (ret > 0) {
6885 /* if we fail to delete the orphan item this time 6969 /* if we fail to delete the orphan item this time
6886 * around, it'll get picked up the next time. 6970 * around, it'll get picked up the next time.
6887 * 6971 *
@@ -6899,14 +6983,15 @@ void btrfs_drop_snapshot(struct btrfs_root *root,
6899 free_extent_buffer(root->commit_root); 6983 free_extent_buffer(root->commit_root);
6900 kfree(root); 6984 kfree(root);
6901 } 6985 }
6902out_free: 6986out_end_trans:
6903 btrfs_end_transaction_throttle(trans, tree_root); 6987 btrfs_end_transaction_throttle(trans, tree_root);
6988out_free:
6904 kfree(wc); 6989 kfree(wc);
6905 btrfs_free_path(path); 6990 btrfs_free_path(path);
6906out: 6991out:
6907 if (err) 6992 if (err)
6908 btrfs_std_error(root->fs_info, err); 6993 btrfs_std_error(root->fs_info, err);
6909 return; 6994 return err;
6910} 6995}
6911 6996
6912/* 6997/*
@@ -6983,31 +7068,15 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
6983static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) 7068static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
6984{ 7069{
6985 u64 num_devices; 7070 u64 num_devices;
6986 u64 stripped = BTRFS_BLOCK_GROUP_RAID0 | 7071 u64 stripped;
6987 BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
6988 7072
6989 if (root->fs_info->balance_ctl) { 7073 /*
6990 struct btrfs_balance_control *bctl = root->fs_info->balance_ctl; 7074 * if restripe for this chunk_type is on pick target profile and
6991 u64 tgt = 0; 7075 * return, otherwise do the usual balance
6992 7076 */
6993 /* pick restriper's target profile and return */ 7077 stripped = get_restripe_target(root->fs_info, flags);
6994 if (flags & BTRFS_BLOCK_GROUP_DATA && 7078 if (stripped)
6995 bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) { 7079 return extended_to_chunk(stripped);
6996 tgt = BTRFS_BLOCK_GROUP_DATA | bctl->data.target;
6997 } else if (flags & BTRFS_BLOCK_GROUP_SYSTEM &&
6998 bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) {
6999 tgt = BTRFS_BLOCK_GROUP_SYSTEM | bctl->sys.target;
7000 } else if (flags & BTRFS_BLOCK_GROUP_METADATA &&
7001 bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) {
7002 tgt = BTRFS_BLOCK_GROUP_METADATA | bctl->meta.target;
7003 }
7004
7005 if (tgt) {
7006 /* extended -> chunk profile */
7007 tgt &= ~BTRFS_AVAIL_ALLOC_BIT_SINGLE;
7008 return tgt;
7009 }
7010 }
7011 7080
7012 /* 7081 /*
7013 * we add in the count of missing devices because we want 7082 * we add in the count of missing devices because we want
@@ -7017,6 +7086,9 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
7017 num_devices = root->fs_info->fs_devices->rw_devices + 7086 num_devices = root->fs_info->fs_devices->rw_devices +
7018 root->fs_info->fs_devices->missing_devices; 7087 root->fs_info->fs_devices->missing_devices;
7019 7088
7089 stripped = BTRFS_BLOCK_GROUP_RAID0 |
7090 BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
7091
7020 if (num_devices == 1) { 7092 if (num_devices == 1) {
7021 stripped |= BTRFS_BLOCK_GROUP_DUP; 7093 stripped |= BTRFS_BLOCK_GROUP_DUP;
7022 stripped = flags & ~stripped; 7094 stripped = flags & ~stripped;
@@ -7029,7 +7101,6 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
7029 if (flags & (BTRFS_BLOCK_GROUP_RAID1 | 7101 if (flags & (BTRFS_BLOCK_GROUP_RAID1 |
7030 BTRFS_BLOCK_GROUP_RAID10)) 7102 BTRFS_BLOCK_GROUP_RAID10))
7031 return stripped | BTRFS_BLOCK_GROUP_DUP; 7103 return stripped | BTRFS_BLOCK_GROUP_DUP;
7032 return flags;
7033 } else { 7104 } else {
7034 /* they already had raid on here, just return */ 7105 /* they already had raid on here, just return */
7035 if (flags & stripped) 7106 if (flags & stripped)
@@ -7042,9 +7113,9 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
7042 if (flags & BTRFS_BLOCK_GROUP_DUP) 7113 if (flags & BTRFS_BLOCK_GROUP_DUP)
7043 return stripped | BTRFS_BLOCK_GROUP_RAID1; 7114 return stripped | BTRFS_BLOCK_GROUP_RAID1;
7044 7115
7045 /* turn single device chunks into raid0 */ 7116 /* this is drive concat, leave it alone */
7046 return stripped | BTRFS_BLOCK_GROUP_RAID0;
7047 } 7117 }
7118
7048 return flags; 7119 return flags;
7049} 7120}
7050 7121
@@ -7103,12 +7174,16 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
7103 BUG_ON(cache->ro); 7174 BUG_ON(cache->ro);
7104 7175
7105 trans = btrfs_join_transaction(root); 7176 trans = btrfs_join_transaction(root);
7106 BUG_ON(IS_ERR(trans)); 7177 if (IS_ERR(trans))
7178 return PTR_ERR(trans);
7107 7179
7108 alloc_flags = update_block_group_flags(root, cache->flags); 7180 alloc_flags = update_block_group_flags(root, cache->flags);
7109 if (alloc_flags != cache->flags) 7181 if (alloc_flags != cache->flags) {
7110 do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 7182 ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
7111 CHUNK_ALLOC_FORCE); 7183 CHUNK_ALLOC_FORCE);
7184 if (ret < 0)
7185 goto out;
7186 }
7112 7187
7113 ret = set_block_group_ro(cache, 0); 7188 ret = set_block_group_ro(cache, 0);
7114 if (!ret) 7189 if (!ret)
@@ -7188,7 +7263,7 @@ u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo)
7188 return free_bytes; 7263 return free_bytes;
7189} 7264}
7190 7265
7191int btrfs_set_block_group_rw(struct btrfs_root *root, 7266void btrfs_set_block_group_rw(struct btrfs_root *root,
7192 struct btrfs_block_group_cache *cache) 7267 struct btrfs_block_group_cache *cache)
7193{ 7268{
7194 struct btrfs_space_info *sinfo = cache->space_info; 7269 struct btrfs_space_info *sinfo = cache->space_info;
@@ -7204,7 +7279,6 @@ int btrfs_set_block_group_rw(struct btrfs_root *root,
7204 cache->ro = 0; 7279 cache->ro = 0;
7205 spin_unlock(&cache->lock); 7280 spin_unlock(&cache->lock);
7206 spin_unlock(&sinfo->lock); 7281 spin_unlock(&sinfo->lock);
7207 return 0;
7208} 7282}
7209 7283
7210/* 7284/*
@@ -7222,6 +7296,7 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
7222 u64 min_free; 7296 u64 min_free;
7223 u64 dev_min = 1; 7297 u64 dev_min = 1;
7224 u64 dev_nr = 0; 7298 u64 dev_nr = 0;
7299 u64 target;
7225 int index; 7300 int index;
7226 int full = 0; 7301 int full = 0;
7227 int ret = 0; 7302 int ret = 0;
@@ -7262,13 +7337,11 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
7262 /* 7337 /*
7263 * ok we don't have enough space, but maybe we have free space on our 7338 * ok we don't have enough space, but maybe we have free space on our
7264 * devices to allocate new chunks for relocation, so loop through our 7339 * devices to allocate new chunks for relocation, so loop through our
7265 * alloc devices and guess if we have enough space. However, if we 7340 * alloc devices and guess if we have enough space. if this block
7266 * were marked as full, then we know there aren't enough chunks, and we 7341 * group is going to be restriped, run checks against the target
7267 * can just return. 7342 * profile instead of the current one.
7268 */ 7343 */
7269 ret = -1; 7344 ret = -1;
7270 if (full)
7271 goto out;
7272 7345
7273 /* 7346 /*
7274 * index: 7347 * index:
@@ -7278,7 +7351,20 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
7278 * 3: raid0 7351 * 3: raid0
7279 * 4: single 7352 * 4: single
7280 */ 7353 */
7281 index = get_block_group_index(block_group); 7354 target = get_restripe_target(root->fs_info, block_group->flags);
7355 if (target) {
7356 index = __get_block_group_index(extended_to_chunk(target));
7357 } else {
7358 /*
7359 * this is just a balance, so if we were marked as full
7360 * we know there is no space for a new chunk
7361 */
7362 if (full)
7363 goto out;
7364
7365 index = get_block_group_index(block_group);
7366 }
7367
7282 if (index == 0) { 7368 if (index == 0) {
7283 dev_min = 4; 7369 dev_min = 4;
7284 /* Divide by 2 */ 7370 /* Divide by 2 */
@@ -7572,7 +7658,7 @@ int btrfs_read_block_groups(struct btrfs_root *root)
7572 ret = update_space_info(info, cache->flags, found_key.offset, 7658 ret = update_space_info(info, cache->flags, found_key.offset,
7573 btrfs_block_group_used(&cache->item), 7659 btrfs_block_group_used(&cache->item),
7574 &space_info); 7660 &space_info);
7575 BUG_ON(ret); 7661 BUG_ON(ret); /* -ENOMEM */
7576 cache->space_info = space_info; 7662 cache->space_info = space_info;
7577 spin_lock(&cache->space_info->lock); 7663 spin_lock(&cache->space_info->lock);
7578 cache->space_info->bytes_readonly += cache->bytes_super; 7664 cache->space_info->bytes_readonly += cache->bytes_super;
@@ -7581,7 +7667,7 @@ int btrfs_read_block_groups(struct btrfs_root *root)
7581 __link_block_group(space_info, cache); 7667 __link_block_group(space_info, cache);
7582 7668
7583 ret = btrfs_add_block_group_cache(root->fs_info, cache); 7669 ret = btrfs_add_block_group_cache(root->fs_info, cache);
7584 BUG_ON(ret); 7670 BUG_ON(ret); /* Logic error */
7585 7671
7586 set_avail_alloc_bits(root->fs_info, cache->flags); 7672 set_avail_alloc_bits(root->fs_info, cache->flags);
7587 if (btrfs_chunk_readonly(root, cache->key.objectid)) 7673 if (btrfs_chunk_readonly(root, cache->key.objectid))
@@ -7663,7 +7749,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
7663 7749
7664 ret = update_space_info(root->fs_info, cache->flags, size, bytes_used, 7750 ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
7665 &cache->space_info); 7751 &cache->space_info);
7666 BUG_ON(ret); 7752 BUG_ON(ret); /* -ENOMEM */
7667 update_global_block_rsv(root->fs_info); 7753 update_global_block_rsv(root->fs_info);
7668 7754
7669 spin_lock(&cache->space_info->lock); 7755 spin_lock(&cache->space_info->lock);
@@ -7673,11 +7759,14 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
7673 __link_block_group(cache->space_info, cache); 7759 __link_block_group(cache->space_info, cache);
7674 7760
7675 ret = btrfs_add_block_group_cache(root->fs_info, cache); 7761 ret = btrfs_add_block_group_cache(root->fs_info, cache);
7676 BUG_ON(ret); 7762 BUG_ON(ret); /* Logic error */
7677 7763
7678 ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item, 7764 ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item,
7679 sizeof(cache->item)); 7765 sizeof(cache->item));
7680 BUG_ON(ret); 7766 if (ret) {
7767 btrfs_abort_transaction(trans, extent_root, ret);
7768 return ret;
7769 }
7681 7770
7682 set_avail_alloc_bits(extent_root->fs_info, type); 7771 set_avail_alloc_bits(extent_root->fs_info, type);
7683 7772
@@ -7686,11 +7775,8 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
7686 7775
7687static void clear_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) 7776static void clear_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
7688{ 7777{
7689 u64 extra_flags = flags & BTRFS_BLOCK_GROUP_PROFILE_MASK; 7778 u64 extra_flags = chunk_to_extended(flags) &
7690 7779 BTRFS_EXTENDED_PROFILE_MASK;
7691 /* chunk -> extended profile */
7692 if (extra_flags == 0)
7693 extra_flags = BTRFS_AVAIL_ALLOC_BIT_SINGLE;
7694 7780
7695 if (flags & BTRFS_BLOCK_GROUP_DATA) 7781 if (flags & BTRFS_BLOCK_GROUP_DATA)
7696 fs_info->avail_data_alloc_bits &= ~extra_flags; 7782 fs_info->avail_data_alloc_bits &= ~extra_flags;
@@ -7758,7 +7844,10 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
7758 inode = lookup_free_space_inode(tree_root, block_group, path); 7844 inode = lookup_free_space_inode(tree_root, block_group, path);
7759 if (!IS_ERR(inode)) { 7845 if (!IS_ERR(inode)) {
7760 ret = btrfs_orphan_add(trans, inode); 7846 ret = btrfs_orphan_add(trans, inode);
7761 BUG_ON(ret); 7847 if (ret) {
7848 btrfs_add_delayed_iput(inode);
7849 goto out;
7850 }
7762 clear_nlink(inode); 7851 clear_nlink(inode);
7763 /* One for the block groups ref */ 7852 /* One for the block groups ref */
7764 spin_lock(&block_group->lock); 7853 spin_lock(&block_group->lock);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 2862454bcdb3..8d904dd7ea9f 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -19,6 +19,7 @@
19#include "btrfs_inode.h" 19#include "btrfs_inode.h"
20#include "volumes.h" 20#include "volumes.h"
21#include "check-integrity.h" 21#include "check-integrity.h"
22#include "locking.h"
22 23
23static struct kmem_cache *extent_state_cache; 24static struct kmem_cache *extent_state_cache;
24static struct kmem_cache *extent_buffer_cache; 25static struct kmem_cache *extent_buffer_cache;
@@ -53,6 +54,13 @@ struct extent_page_data {
53 unsigned int sync_io:1; 54 unsigned int sync_io:1;
54}; 55};
55 56
57static noinline void flush_write_bio(void *data);
58static inline struct btrfs_fs_info *
59tree_fs_info(struct extent_io_tree *tree)
60{
61 return btrfs_sb(tree->mapping->host->i_sb);
62}
63
56int __init extent_io_init(void) 64int __init extent_io_init(void)
57{ 65{
58 extent_state_cache = kmem_cache_create("extent_state", 66 extent_state_cache = kmem_cache_create("extent_state",
@@ -136,6 +144,7 @@ static struct extent_state *alloc_extent_state(gfp_t mask)
136#endif 144#endif
137 atomic_set(&state->refs, 1); 145 atomic_set(&state->refs, 1);
138 init_waitqueue_head(&state->wq); 146 init_waitqueue_head(&state->wq);
147 trace_alloc_extent_state(state, mask, _RET_IP_);
139 return state; 148 return state;
140} 149}
141 150
@@ -153,6 +162,7 @@ void free_extent_state(struct extent_state *state)
153 list_del(&state->leak_list); 162 list_del(&state->leak_list);
154 spin_unlock_irqrestore(&leak_lock, flags); 163 spin_unlock_irqrestore(&leak_lock, flags);
155#endif 164#endif
165 trace_free_extent_state(state, _RET_IP_);
156 kmem_cache_free(extent_state_cache, state); 166 kmem_cache_free(extent_state_cache, state);
157 } 167 }
158} 168}
@@ -439,6 +449,13 @@ alloc_extent_state_atomic(struct extent_state *prealloc)
439 return prealloc; 449 return prealloc;
440} 450}
441 451
452void extent_io_tree_panic(struct extent_io_tree *tree, int err)
453{
454 btrfs_panic(tree_fs_info(tree), err, "Locking error: "
455 "Extent tree was modified by another "
456 "thread while locked.");
457}
458
442/* 459/*
443 * clear some bits on a range in the tree. This may require splitting 460 * clear some bits on a range in the tree. This may require splitting
444 * or inserting elements in the tree, so the gfp mask is used to 461 * or inserting elements in the tree, so the gfp mask is used to
@@ -449,8 +466,7 @@ alloc_extent_state_atomic(struct extent_state *prealloc)
449 * 466 *
450 * the range [start, end] is inclusive. 467 * the range [start, end] is inclusive.
451 * 468 *
452 * This takes the tree lock, and returns < 0 on error, > 0 if any of the 469 * This takes the tree lock, and returns 0 on success and < 0 on error.
453 * bits were already set, or zero if none of the bits were already set.
454 */ 470 */
455int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 471int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
456 int bits, int wake, int delete, 472 int bits, int wake, int delete,
@@ -464,7 +480,6 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
464 struct rb_node *node; 480 struct rb_node *node;
465 u64 last_end; 481 u64 last_end;
466 int err; 482 int err;
467 int set = 0;
468 int clear = 0; 483 int clear = 0;
469 484
470 if (delete) 485 if (delete)
@@ -542,12 +557,14 @@ hit_next:
542 prealloc = alloc_extent_state_atomic(prealloc); 557 prealloc = alloc_extent_state_atomic(prealloc);
543 BUG_ON(!prealloc); 558 BUG_ON(!prealloc);
544 err = split_state(tree, state, prealloc, start); 559 err = split_state(tree, state, prealloc, start);
545 BUG_ON(err == -EEXIST); 560 if (err)
561 extent_io_tree_panic(tree, err);
562
546 prealloc = NULL; 563 prealloc = NULL;
547 if (err) 564 if (err)
548 goto out; 565 goto out;
549 if (state->end <= end) { 566 if (state->end <= end) {
550 set |= clear_state_bit(tree, state, &bits, wake); 567 clear_state_bit(tree, state, &bits, wake);
551 if (last_end == (u64)-1) 568 if (last_end == (u64)-1)
552 goto out; 569 goto out;
553 start = last_end + 1; 570 start = last_end + 1;
@@ -564,17 +581,19 @@ hit_next:
564 prealloc = alloc_extent_state_atomic(prealloc); 581 prealloc = alloc_extent_state_atomic(prealloc);
565 BUG_ON(!prealloc); 582 BUG_ON(!prealloc);
566 err = split_state(tree, state, prealloc, end + 1); 583 err = split_state(tree, state, prealloc, end + 1);
567 BUG_ON(err == -EEXIST); 584 if (err)
585 extent_io_tree_panic(tree, err);
586
568 if (wake) 587 if (wake)
569 wake_up(&state->wq); 588 wake_up(&state->wq);
570 589
571 set |= clear_state_bit(tree, prealloc, &bits, wake); 590 clear_state_bit(tree, prealloc, &bits, wake);
572 591
573 prealloc = NULL; 592 prealloc = NULL;
574 goto out; 593 goto out;
575 } 594 }
576 595
577 set |= clear_state_bit(tree, state, &bits, wake); 596 clear_state_bit(tree, state, &bits, wake);
578next: 597next:
579 if (last_end == (u64)-1) 598 if (last_end == (u64)-1)
580 goto out; 599 goto out;
@@ -591,7 +610,7 @@ out:
591 if (prealloc) 610 if (prealloc)
592 free_extent_state(prealloc); 611 free_extent_state(prealloc);
593 612
594 return set; 613 return 0;
595 614
596search_again: 615search_again:
597 if (start > end) 616 if (start > end)
@@ -602,8 +621,8 @@ search_again:
602 goto again; 621 goto again;
603} 622}
604 623
605static int wait_on_state(struct extent_io_tree *tree, 624static void wait_on_state(struct extent_io_tree *tree,
606 struct extent_state *state) 625 struct extent_state *state)
607 __releases(tree->lock) 626 __releases(tree->lock)
608 __acquires(tree->lock) 627 __acquires(tree->lock)
609{ 628{
@@ -613,7 +632,6 @@ static int wait_on_state(struct extent_io_tree *tree,
613 schedule(); 632 schedule();
614 spin_lock(&tree->lock); 633 spin_lock(&tree->lock);
615 finish_wait(&state->wq, &wait); 634 finish_wait(&state->wq, &wait);
616 return 0;
617} 635}
618 636
619/* 637/*
@@ -621,7 +639,7 @@ static int wait_on_state(struct extent_io_tree *tree,
621 * The range [start, end] is inclusive. 639 * The range [start, end] is inclusive.
622 * The tree lock is taken by this function 640 * The tree lock is taken by this function
623 */ 641 */
624int wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits) 642void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits)
625{ 643{
626 struct extent_state *state; 644 struct extent_state *state;
627 struct rb_node *node; 645 struct rb_node *node;
@@ -658,7 +676,6 @@ again:
658 } 676 }
659out: 677out:
660 spin_unlock(&tree->lock); 678 spin_unlock(&tree->lock);
661 return 0;
662} 679}
663 680
664static void set_state_bits(struct extent_io_tree *tree, 681static void set_state_bits(struct extent_io_tree *tree,
@@ -706,9 +723,10 @@ static void uncache_state(struct extent_state **cached_ptr)
706 * [start, end] is inclusive This takes the tree lock. 723 * [start, end] is inclusive This takes the tree lock.
707 */ 724 */
708 725
709int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 726static int __must_check
710 int bits, int exclusive_bits, u64 *failed_start, 727__set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
711 struct extent_state **cached_state, gfp_t mask) 728 int bits, int exclusive_bits, u64 *failed_start,
729 struct extent_state **cached_state, gfp_t mask)
712{ 730{
713 struct extent_state *state; 731 struct extent_state *state;
714 struct extent_state *prealloc = NULL; 732 struct extent_state *prealloc = NULL;
@@ -742,8 +760,10 @@ again:
742 prealloc = alloc_extent_state_atomic(prealloc); 760 prealloc = alloc_extent_state_atomic(prealloc);
743 BUG_ON(!prealloc); 761 BUG_ON(!prealloc);
744 err = insert_state(tree, prealloc, start, end, &bits); 762 err = insert_state(tree, prealloc, start, end, &bits);
763 if (err)
764 extent_io_tree_panic(tree, err);
765
745 prealloc = NULL; 766 prealloc = NULL;
746 BUG_ON(err == -EEXIST);
747 goto out; 767 goto out;
748 } 768 }
749 state = rb_entry(node, struct extent_state, rb_node); 769 state = rb_entry(node, struct extent_state, rb_node);
@@ -809,7 +829,9 @@ hit_next:
809 prealloc = alloc_extent_state_atomic(prealloc); 829 prealloc = alloc_extent_state_atomic(prealloc);
810 BUG_ON(!prealloc); 830 BUG_ON(!prealloc);
811 err = split_state(tree, state, prealloc, start); 831 err = split_state(tree, state, prealloc, start);
812 BUG_ON(err == -EEXIST); 832 if (err)
833 extent_io_tree_panic(tree, err);
834
813 prealloc = NULL; 835 prealloc = NULL;
814 if (err) 836 if (err)
815 goto out; 837 goto out;
@@ -846,12 +868,9 @@ hit_next:
846 */ 868 */
847 err = insert_state(tree, prealloc, start, this_end, 869 err = insert_state(tree, prealloc, start, this_end,
848 &bits); 870 &bits);
849 BUG_ON(err == -EEXIST); 871 if (err)
850 if (err) { 872 extent_io_tree_panic(tree, err);
851 free_extent_state(prealloc); 873
852 prealloc = NULL;
853 goto out;
854 }
855 cache_state(prealloc, cached_state); 874 cache_state(prealloc, cached_state);
856 prealloc = NULL; 875 prealloc = NULL;
857 start = this_end + 1; 876 start = this_end + 1;
@@ -873,7 +892,8 @@ hit_next:
873 prealloc = alloc_extent_state_atomic(prealloc); 892 prealloc = alloc_extent_state_atomic(prealloc);
874 BUG_ON(!prealloc); 893 BUG_ON(!prealloc);
875 err = split_state(tree, state, prealloc, end + 1); 894 err = split_state(tree, state, prealloc, end + 1);
876 BUG_ON(err == -EEXIST); 895 if (err)
896 extent_io_tree_panic(tree, err);
877 897
878 set_state_bits(tree, prealloc, &bits); 898 set_state_bits(tree, prealloc, &bits);
879 cache_state(prealloc, cached_state); 899 cache_state(prealloc, cached_state);
@@ -900,6 +920,15 @@ search_again:
900 goto again; 920 goto again;
901} 921}
902 922
923int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits,
924 u64 *failed_start, struct extent_state **cached_state,
925 gfp_t mask)
926{
927 return __set_extent_bit(tree, start, end, bits, 0, failed_start,
928 cached_state, mask);
929}
930
931
903/** 932/**
904 * convert_extent - convert all bits in a given range from one bit to another 933 * convert_extent - convert all bits in a given range from one bit to another
905 * @tree: the io tree to search 934 * @tree: the io tree to search
@@ -946,7 +975,8 @@ again:
946 } 975 }
947 err = insert_state(tree, prealloc, start, end, &bits); 976 err = insert_state(tree, prealloc, start, end, &bits);
948 prealloc = NULL; 977 prealloc = NULL;
949 BUG_ON(err == -EEXIST); 978 if (err)
979 extent_io_tree_panic(tree, err);
950 goto out; 980 goto out;
951 } 981 }
952 state = rb_entry(node, struct extent_state, rb_node); 982 state = rb_entry(node, struct extent_state, rb_node);
@@ -1002,7 +1032,8 @@ hit_next:
1002 goto out; 1032 goto out;
1003 } 1033 }
1004 err = split_state(tree, state, prealloc, start); 1034 err = split_state(tree, state, prealloc, start);
1005 BUG_ON(err == -EEXIST); 1035 if (err)
1036 extent_io_tree_panic(tree, err);
1006 prealloc = NULL; 1037 prealloc = NULL;
1007 if (err) 1038 if (err)
1008 goto out; 1039 goto out;
@@ -1041,12 +1072,8 @@ hit_next:
1041 */ 1072 */
1042 err = insert_state(tree, prealloc, start, this_end, 1073 err = insert_state(tree, prealloc, start, this_end,
1043 &bits); 1074 &bits);
1044 BUG_ON(err == -EEXIST); 1075 if (err)
1045 if (err) { 1076 extent_io_tree_panic(tree, err);
1046 free_extent_state(prealloc);
1047 prealloc = NULL;
1048 goto out;
1049 }
1050 prealloc = NULL; 1077 prealloc = NULL;
1051 start = this_end + 1; 1078 start = this_end + 1;
1052 goto search_again; 1079 goto search_again;
@@ -1065,7 +1092,8 @@ hit_next:
1065 } 1092 }
1066 1093
1067 err = split_state(tree, state, prealloc, end + 1); 1094 err = split_state(tree, state, prealloc, end + 1);
1068 BUG_ON(err == -EEXIST); 1095 if (err)
1096 extent_io_tree_panic(tree, err);
1069 1097
1070 set_state_bits(tree, prealloc, &bits); 1098 set_state_bits(tree, prealloc, &bits);
1071 clear_state_bit(tree, prealloc, &clear_bits, 0); 1099 clear_state_bit(tree, prealloc, &clear_bits, 0);
@@ -1095,14 +1123,14 @@ search_again:
1095int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, 1123int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
1096 gfp_t mask) 1124 gfp_t mask)
1097{ 1125{
1098 return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL, 1126 return set_extent_bit(tree, start, end, EXTENT_DIRTY, NULL,
1099 NULL, mask); 1127 NULL, mask);
1100} 1128}
1101 1129
1102int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 1130int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
1103 int bits, gfp_t mask) 1131 int bits, gfp_t mask)
1104{ 1132{
1105 return set_extent_bit(tree, start, end, bits, 0, NULL, 1133 return set_extent_bit(tree, start, end, bits, NULL,
1106 NULL, mask); 1134 NULL, mask);
1107} 1135}
1108 1136
@@ -1117,7 +1145,7 @@ int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
1117{ 1145{
1118 return set_extent_bit(tree, start, end, 1146 return set_extent_bit(tree, start, end,
1119 EXTENT_DELALLOC | EXTENT_UPTODATE, 1147 EXTENT_DELALLOC | EXTENT_UPTODATE,
1120 0, NULL, cached_state, mask); 1148 NULL, cached_state, mask);
1121} 1149}
1122 1150
1123int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, 1151int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
@@ -1131,7 +1159,7 @@ int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
1131int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, 1159int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
1132 gfp_t mask) 1160 gfp_t mask)
1133{ 1161{
1134 return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL, 1162 return set_extent_bit(tree, start, end, EXTENT_NEW, NULL,
1135 NULL, mask); 1163 NULL, mask);
1136} 1164}
1137 1165
@@ -1139,7 +1167,7 @@ int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
1139 struct extent_state **cached_state, gfp_t mask) 1167 struct extent_state **cached_state, gfp_t mask)
1140{ 1168{
1141 return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 1169 return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0,
1142 NULL, cached_state, mask); 1170 cached_state, mask);
1143} 1171}
1144 1172
1145static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, 1173static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start,
@@ -1155,42 +1183,40 @@ static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start,
1155 * us if waiting is desired. 1183 * us if waiting is desired.
1156 */ 1184 */
1157int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 1185int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
1158 int bits, struct extent_state **cached_state, gfp_t mask) 1186 int bits, struct extent_state **cached_state)
1159{ 1187{
1160 int err; 1188 int err;
1161 u64 failed_start; 1189 u64 failed_start;
1162 while (1) { 1190 while (1) {
1163 err = set_extent_bit(tree, start, end, EXTENT_LOCKED | bits, 1191 err = __set_extent_bit(tree, start, end, EXTENT_LOCKED | bits,
1164 EXTENT_LOCKED, &failed_start, 1192 EXTENT_LOCKED, &failed_start,
1165 cached_state, mask); 1193 cached_state, GFP_NOFS);
1166 if (err == -EEXIST && (mask & __GFP_WAIT)) { 1194 if (err == -EEXIST) {
1167 wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED); 1195 wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED);
1168 start = failed_start; 1196 start = failed_start;
1169 } else { 1197 } else
1170 break; 1198 break;
1171 }
1172 WARN_ON(start > end); 1199 WARN_ON(start > end);
1173 } 1200 }
1174 return err; 1201 return err;
1175} 1202}
1176 1203
1177int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) 1204int lock_extent(struct extent_io_tree *tree, u64 start, u64 end)
1178{ 1205{
1179 return lock_extent_bits(tree, start, end, 0, NULL, mask); 1206 return lock_extent_bits(tree, start, end, 0, NULL);
1180} 1207}
1181 1208
1182int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end, 1209int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end)
1183 gfp_t mask)
1184{ 1210{
1185 int err; 1211 int err;
1186 u64 failed_start; 1212 u64 failed_start;
1187 1213
1188 err = set_extent_bit(tree, start, end, EXTENT_LOCKED, EXTENT_LOCKED, 1214 err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, EXTENT_LOCKED,
1189 &failed_start, NULL, mask); 1215 &failed_start, NULL, GFP_NOFS);
1190 if (err == -EEXIST) { 1216 if (err == -EEXIST) {
1191 if (failed_start > start) 1217 if (failed_start > start)
1192 clear_extent_bit(tree, start, failed_start - 1, 1218 clear_extent_bit(tree, start, failed_start - 1,
1193 EXTENT_LOCKED, 1, 0, NULL, mask); 1219 EXTENT_LOCKED, 1, 0, NULL, GFP_NOFS);
1194 return 0; 1220 return 0;
1195 } 1221 }
1196 return 1; 1222 return 1;
@@ -1203,10 +1229,10 @@ int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end,
1203 mask); 1229 mask);
1204} 1230}
1205 1231
1206int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) 1232int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end)
1207{ 1233{
1208 return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL, 1234 return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL,
1209 mask); 1235 GFP_NOFS);
1210} 1236}
1211 1237
1212/* 1238/*
@@ -1220,7 +1246,7 @@ static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
1220 1246
1221 while (index <= end_index) { 1247 while (index <= end_index) {
1222 page = find_get_page(tree->mapping, index); 1248 page = find_get_page(tree->mapping, index);
1223 BUG_ON(!page); 1249 BUG_ON(!page); /* Pages should be in the extent_io_tree */
1224 set_page_writeback(page); 1250 set_page_writeback(page);
1225 page_cache_release(page); 1251 page_cache_release(page);
1226 index++; 1252 index++;
@@ -1343,9 +1369,9 @@ out:
1343 return found; 1369 return found;
1344} 1370}
1345 1371
1346static noinline int __unlock_for_delalloc(struct inode *inode, 1372static noinline void __unlock_for_delalloc(struct inode *inode,
1347 struct page *locked_page, 1373 struct page *locked_page,
1348 u64 start, u64 end) 1374 u64 start, u64 end)
1349{ 1375{
1350 int ret; 1376 int ret;
1351 struct page *pages[16]; 1377 struct page *pages[16];
@@ -1355,7 +1381,7 @@ static noinline int __unlock_for_delalloc(struct inode *inode,
1355 int i; 1381 int i;
1356 1382
1357 if (index == locked_page->index && end_index == index) 1383 if (index == locked_page->index && end_index == index)
1358 return 0; 1384 return;
1359 1385
1360 while (nr_pages > 0) { 1386 while (nr_pages > 0) {
1361 ret = find_get_pages_contig(inode->i_mapping, index, 1387 ret = find_get_pages_contig(inode->i_mapping, index,
@@ -1370,7 +1396,6 @@ static noinline int __unlock_for_delalloc(struct inode *inode,
1370 index += ret; 1396 index += ret;
1371 cond_resched(); 1397 cond_resched();
1372 } 1398 }
1373 return 0;
1374} 1399}
1375 1400
1376static noinline int lock_delalloc_pages(struct inode *inode, 1401static noinline int lock_delalloc_pages(struct inode *inode,
@@ -1500,11 +1525,10 @@ again:
1500 goto out_failed; 1525 goto out_failed;
1501 } 1526 }
1502 } 1527 }
1503 BUG_ON(ret); 1528 BUG_ON(ret); /* Only valid values are 0 and -EAGAIN */
1504 1529
1505 /* step three, lock the state bits for the whole range */ 1530 /* step three, lock the state bits for the whole range */
1506 lock_extent_bits(tree, delalloc_start, delalloc_end, 1531 lock_extent_bits(tree, delalloc_start, delalloc_end, 0, &cached_state);
1507 0, &cached_state, GFP_NOFS);
1508 1532
1509 /* then test to make sure it is all still delalloc */ 1533 /* then test to make sure it is all still delalloc */
1510 ret = test_range_bit(tree, delalloc_start, delalloc_end, 1534 ret = test_range_bit(tree, delalloc_start, delalloc_end,
@@ -1761,39 +1785,34 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
1761 * helper function to set a given page up to date if all the 1785 * helper function to set a given page up to date if all the
1762 * extents in the tree for that page are up to date 1786 * extents in the tree for that page are up to date
1763 */ 1787 */
1764static int check_page_uptodate(struct extent_io_tree *tree, 1788static void check_page_uptodate(struct extent_io_tree *tree, struct page *page)
1765 struct page *page)
1766{ 1789{
1767 u64 start = (u64)page->index << PAGE_CACHE_SHIFT; 1790 u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
1768 u64 end = start + PAGE_CACHE_SIZE - 1; 1791 u64 end = start + PAGE_CACHE_SIZE - 1;
1769 if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL)) 1792 if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL))
1770 SetPageUptodate(page); 1793 SetPageUptodate(page);
1771 return 0;
1772} 1794}
1773 1795
1774/* 1796/*
1775 * helper function to unlock a page if all the extents in the tree 1797 * helper function to unlock a page if all the extents in the tree
1776 * for that page are unlocked 1798 * for that page are unlocked
1777 */ 1799 */
1778static int check_page_locked(struct extent_io_tree *tree, 1800static void check_page_locked(struct extent_io_tree *tree, struct page *page)
1779 struct page *page)
1780{ 1801{
1781 u64 start = (u64)page->index << PAGE_CACHE_SHIFT; 1802 u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
1782 u64 end = start + PAGE_CACHE_SIZE - 1; 1803 u64 end = start + PAGE_CACHE_SIZE - 1;
1783 if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0, NULL)) 1804 if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0, NULL))
1784 unlock_page(page); 1805 unlock_page(page);
1785 return 0;
1786} 1806}
1787 1807
1788/* 1808/*
1789 * helper function to end page writeback if all the extents 1809 * helper function to end page writeback if all the extents
1790 * in the tree for that page are done with writeback 1810 * in the tree for that page are done with writeback
1791 */ 1811 */
1792static int check_page_writeback(struct extent_io_tree *tree, 1812static void check_page_writeback(struct extent_io_tree *tree,
1793 struct page *page) 1813 struct page *page)
1794{ 1814{
1795 end_page_writeback(page); 1815 end_page_writeback(page);
1796 return 0;
1797} 1816}
1798 1817
1799/* 1818/*
@@ -1912,6 +1931,26 @@ int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start,
1912 return 0; 1931 return 0;
1913} 1932}
1914 1933
1934int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb,
1935 int mirror_num)
1936{
1937 struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
1938 u64 start = eb->start;
1939 unsigned long i, num_pages = num_extent_pages(eb->start, eb->len);
1940 int ret;
1941
1942 for (i = 0; i < num_pages; i++) {
1943 struct page *p = extent_buffer_page(eb, i);
1944 ret = repair_io_failure(map_tree, start, PAGE_CACHE_SIZE,
1945 start, p, mirror_num);
1946 if (ret)
1947 break;
1948 start += PAGE_CACHE_SIZE;
1949 }
1950
1951 return ret;
1952}
1953
1915/* 1954/*
1916 * each time an IO finishes, we do a fast check in the IO failure tree 1955 * each time an IO finishes, we do a fast check in the IO failure tree
1917 * to see if we need to process or clean up an io_failure_record 1956 * to see if we need to process or clean up an io_failure_record
@@ -2258,6 +2297,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
2258 u64 start; 2297 u64 start;
2259 u64 end; 2298 u64 end;
2260 int whole_page; 2299 int whole_page;
2300 int failed_mirror;
2261 int ret; 2301 int ret;
2262 2302
2263 if (err) 2303 if (err)
@@ -2304,9 +2344,16 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
2304 else 2344 else
2305 clean_io_failure(start, page); 2345 clean_io_failure(start, page);
2306 } 2346 }
2307 if (!uptodate) { 2347
2308 int failed_mirror; 2348 if (!uptodate)
2309 failed_mirror = (int)(unsigned long)bio->bi_bdev; 2349 failed_mirror = (int)(unsigned long)bio->bi_bdev;
2350
2351 if (!uptodate && tree->ops && tree->ops->readpage_io_failed_hook) {
2352 ret = tree->ops->readpage_io_failed_hook(page, failed_mirror);
2353 if (!ret && !err &&
2354 test_bit(BIO_UPTODATE, &bio->bi_flags))
2355 uptodate = 1;
2356 } else if (!uptodate) {
2310 /* 2357 /*
2311 * The generic bio_readpage_error handles errors the 2358 * The generic bio_readpage_error handles errors the
2312 * following way: If possible, new read requests are 2359 * following way: If possible, new read requests are
@@ -2320,7 +2367,6 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
2320 ret = bio_readpage_error(bio, page, start, end, 2367 ret = bio_readpage_error(bio, page, start, end,
2321 failed_mirror, NULL); 2368 failed_mirror, NULL);
2322 if (ret == 0) { 2369 if (ret == 0) {
2323error_handled:
2324 uptodate = 2370 uptodate =
2325 test_bit(BIO_UPTODATE, &bio->bi_flags); 2371 test_bit(BIO_UPTODATE, &bio->bi_flags);
2326 if (err) 2372 if (err)
@@ -2328,16 +2374,9 @@ error_handled:
2328 uncache_state(&cached); 2374 uncache_state(&cached);
2329 continue; 2375 continue;
2330 } 2376 }
2331 if (tree->ops && tree->ops->readpage_io_failed_hook) {
2332 ret = tree->ops->readpage_io_failed_hook(
2333 bio, page, start, end,
2334 failed_mirror, state);
2335 if (ret == 0)
2336 goto error_handled;
2337 }
2338 } 2377 }
2339 2378
2340 if (uptodate) { 2379 if (uptodate && tree->track_uptodate) {
2341 set_extent_uptodate(tree, start, end, &cached, 2380 set_extent_uptodate(tree, start, end, &cached,
2342 GFP_ATOMIC); 2381 GFP_ATOMIC);
2343 } 2382 }
@@ -2386,8 +2425,12 @@ btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
2386 return bio; 2425 return bio;
2387} 2426}
2388 2427
2389static int submit_one_bio(int rw, struct bio *bio, int mirror_num, 2428/*
2390 unsigned long bio_flags) 2429 * Since writes are async, they will only return -ENOMEM.
2430 * Reads can return the full range of I/O error conditions.
2431 */
2432static int __must_check submit_one_bio(int rw, struct bio *bio,
2433 int mirror_num, unsigned long bio_flags)
2391{ 2434{
2392 int ret = 0; 2435 int ret = 0;
2393 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 2436 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
@@ -2413,6 +2456,19 @@ static int submit_one_bio(int rw, struct bio *bio, int mirror_num,
2413 return ret; 2456 return ret;
2414} 2457}
2415 2458
2459static int merge_bio(struct extent_io_tree *tree, struct page *page,
2460 unsigned long offset, size_t size, struct bio *bio,
2461 unsigned long bio_flags)
2462{
2463 int ret = 0;
2464 if (tree->ops && tree->ops->merge_bio_hook)
2465 ret = tree->ops->merge_bio_hook(page, offset, size, bio,
2466 bio_flags);
2467 BUG_ON(ret < 0);
2468 return ret;
2469
2470}
2471
2416static int submit_extent_page(int rw, struct extent_io_tree *tree, 2472static int submit_extent_page(int rw, struct extent_io_tree *tree,
2417 struct page *page, sector_t sector, 2473 struct page *page, sector_t sector,
2418 size_t size, unsigned long offset, 2474 size_t size, unsigned long offset,
@@ -2441,12 +2497,12 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
2441 sector; 2497 sector;
2442 2498
2443 if (prev_bio_flags != bio_flags || !contig || 2499 if (prev_bio_flags != bio_flags || !contig ||
2444 (tree->ops && tree->ops->merge_bio_hook && 2500 merge_bio(tree, page, offset, page_size, bio, bio_flags) ||
2445 tree->ops->merge_bio_hook(page, offset, page_size, bio,
2446 bio_flags)) ||
2447 bio_add_page(bio, page, page_size, offset) < page_size) { 2501 bio_add_page(bio, page, page_size, offset) < page_size) {
2448 ret = submit_one_bio(rw, bio, mirror_num, 2502 ret = submit_one_bio(rw, bio, mirror_num,
2449 prev_bio_flags); 2503 prev_bio_flags);
2504 if (ret < 0)
2505 return ret;
2450 bio = NULL; 2506 bio = NULL;
2451 } else { 2507 } else {
2452 return 0; 2508 return 0;
@@ -2473,25 +2529,31 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
2473 return ret; 2529 return ret;
2474} 2530}
2475 2531
2476void set_page_extent_mapped(struct page *page) 2532void attach_extent_buffer_page(struct extent_buffer *eb, struct page *page)
2477{ 2533{
2478 if (!PagePrivate(page)) { 2534 if (!PagePrivate(page)) {
2479 SetPagePrivate(page); 2535 SetPagePrivate(page);
2480 page_cache_get(page); 2536 page_cache_get(page);
2481 set_page_private(page, EXTENT_PAGE_PRIVATE); 2537 set_page_private(page, (unsigned long)eb);
2538 } else {
2539 WARN_ON(page->private != (unsigned long)eb);
2482 } 2540 }
2483} 2541}
2484 2542
2485static void set_page_extent_head(struct page *page, unsigned long len) 2543void set_page_extent_mapped(struct page *page)
2486{ 2544{
2487 WARN_ON(!PagePrivate(page)); 2545 if (!PagePrivate(page)) {
2488 set_page_private(page, EXTENT_PAGE_PRIVATE_FIRST_PAGE | len << 2); 2546 SetPagePrivate(page);
2547 page_cache_get(page);
2548 set_page_private(page, EXTENT_PAGE_PRIVATE);
2549 }
2489} 2550}
2490 2551
2491/* 2552/*
2492 * basic readpage implementation. Locked extent state structs are inserted 2553 * basic readpage implementation. Locked extent state structs are inserted
2493 * into the tree that are removed when the IO is done (by the end_io 2554 * into the tree that are removed when the IO is done (by the end_io
2494 * handlers) 2555 * handlers)
2556 * XXX JDM: This needs looking at to ensure proper page locking
2495 */ 2557 */
2496static int __extent_read_full_page(struct extent_io_tree *tree, 2558static int __extent_read_full_page(struct extent_io_tree *tree,
2497 struct page *page, 2559 struct page *page,
@@ -2531,11 +2593,11 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2531 2593
2532 end = page_end; 2594 end = page_end;
2533 while (1) { 2595 while (1) {
2534 lock_extent(tree, start, end, GFP_NOFS); 2596 lock_extent(tree, start, end);
2535 ordered = btrfs_lookup_ordered_extent(inode, start); 2597 ordered = btrfs_lookup_ordered_extent(inode, start);
2536 if (!ordered) 2598 if (!ordered)
2537 break; 2599 break;
2538 unlock_extent(tree, start, end, GFP_NOFS); 2600 unlock_extent(tree, start, end);
2539 btrfs_start_ordered_extent(inode, ordered, 1); 2601 btrfs_start_ordered_extent(inode, ordered, 1);
2540 btrfs_put_ordered_extent(ordered); 2602 btrfs_put_ordered_extent(ordered);
2541 } 2603 }
@@ -2572,7 +2634,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2572 end - cur + 1, 0); 2634 end - cur + 1, 0);
2573 if (IS_ERR_OR_NULL(em)) { 2635 if (IS_ERR_OR_NULL(em)) {
2574 SetPageError(page); 2636 SetPageError(page);
2575 unlock_extent(tree, cur, end, GFP_NOFS); 2637 unlock_extent(tree, cur, end);
2576 break; 2638 break;
2577 } 2639 }
2578 extent_offset = cur - em->start; 2640 extent_offset = cur - em->start;
@@ -2624,7 +2686,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2624 if (test_range_bit(tree, cur, cur_end, 2686 if (test_range_bit(tree, cur, cur_end,
2625 EXTENT_UPTODATE, 1, NULL)) { 2687 EXTENT_UPTODATE, 1, NULL)) {
2626 check_page_uptodate(tree, page); 2688 check_page_uptodate(tree, page);
2627 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); 2689 unlock_extent(tree, cur, cur + iosize - 1);
2628 cur = cur + iosize; 2690 cur = cur + iosize;
2629 pg_offset += iosize; 2691 pg_offset += iosize;
2630 continue; 2692 continue;
@@ -2634,7 +2696,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2634 */ 2696 */
2635 if (block_start == EXTENT_MAP_INLINE) { 2697 if (block_start == EXTENT_MAP_INLINE) {
2636 SetPageError(page); 2698 SetPageError(page);
2637 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); 2699 unlock_extent(tree, cur, cur + iosize - 1);
2638 cur = cur + iosize; 2700 cur = cur + iosize;
2639 pg_offset += iosize; 2701 pg_offset += iosize;
2640 continue; 2702 continue;
@@ -2654,6 +2716,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2654 end_bio_extent_readpage, mirror_num, 2716 end_bio_extent_readpage, mirror_num,
2655 *bio_flags, 2717 *bio_flags,
2656 this_bio_flag); 2718 this_bio_flag);
2719 BUG_ON(ret == -ENOMEM);
2657 nr++; 2720 nr++;
2658 *bio_flags = this_bio_flag; 2721 *bio_flags = this_bio_flag;
2659 } 2722 }
@@ -2795,7 +2858,11 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2795 delalloc_end, 2858 delalloc_end,
2796 &page_started, 2859 &page_started,
2797 &nr_written); 2860 &nr_written);
2798 BUG_ON(ret); 2861 /* File system has been set read-only */
2862 if (ret) {
2863 SetPageError(page);
2864 goto done;
2865 }
2799 /* 2866 /*
2800 * delalloc_end is already one less than the total 2867 * delalloc_end is already one less than the total
2801 * length, so we don't subtract one from 2868 * length, so we don't subtract one from
@@ -2968,6 +3035,275 @@ done_unlocked:
2968 return 0; 3035 return 0;
2969} 3036}
2970 3037
3038static int eb_wait(void *word)
3039{
3040 io_schedule();
3041 return 0;
3042}
3043
3044static void wait_on_extent_buffer_writeback(struct extent_buffer *eb)
3045{
3046 wait_on_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK, eb_wait,
3047 TASK_UNINTERRUPTIBLE);
3048}
3049
3050static int lock_extent_buffer_for_io(struct extent_buffer *eb,
3051 struct btrfs_fs_info *fs_info,
3052 struct extent_page_data *epd)
3053{
3054 unsigned long i, num_pages;
3055 int flush = 0;
3056 int ret = 0;
3057
3058 if (!btrfs_try_tree_write_lock(eb)) {
3059 flush = 1;
3060 flush_write_bio(epd);
3061 btrfs_tree_lock(eb);
3062 }
3063
3064 if (test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags)) {
3065 btrfs_tree_unlock(eb);
3066 if (!epd->sync_io)
3067 return 0;
3068 if (!flush) {
3069 flush_write_bio(epd);
3070 flush = 1;
3071 }
3072 while (1) {
3073 wait_on_extent_buffer_writeback(eb);
3074 btrfs_tree_lock(eb);
3075 if (!test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags))
3076 break;
3077 btrfs_tree_unlock(eb);
3078 }
3079 }
3080
3081 if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
3082 set_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
3083 btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN);
3084 spin_lock(&fs_info->delalloc_lock);
3085 if (fs_info->dirty_metadata_bytes >= eb->len)
3086 fs_info->dirty_metadata_bytes -= eb->len;
3087 else
3088 WARN_ON(1);
3089 spin_unlock(&fs_info->delalloc_lock);
3090 ret = 1;
3091 }
3092
3093 btrfs_tree_unlock(eb);
3094
3095 if (!ret)
3096 return ret;
3097
3098 num_pages = num_extent_pages(eb->start, eb->len);
3099 for (i = 0; i < num_pages; i++) {
3100 struct page *p = extent_buffer_page(eb, i);
3101
3102 if (!trylock_page(p)) {
3103 if (!flush) {
3104 flush_write_bio(epd);
3105 flush = 1;
3106 }
3107 lock_page(p);
3108 }
3109 }
3110
3111 return ret;
3112}
3113
3114static void end_extent_buffer_writeback(struct extent_buffer *eb)
3115{
3116 clear_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
3117 smp_mb__after_clear_bit();
3118 wake_up_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK);
3119}
3120
3121static void end_bio_extent_buffer_writepage(struct bio *bio, int err)
3122{
3123 int uptodate = err == 0;
3124 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
3125 struct extent_buffer *eb;
3126 int done;
3127
3128 do {
3129 struct page *page = bvec->bv_page;
3130
3131 bvec--;
3132 eb = (struct extent_buffer *)page->private;
3133 BUG_ON(!eb);
3134 done = atomic_dec_and_test(&eb->io_pages);
3135
3136 if (!uptodate || test_bit(EXTENT_BUFFER_IOERR, &eb->bflags)) {
3137 set_bit(EXTENT_BUFFER_IOERR, &eb->bflags);
3138 ClearPageUptodate(page);
3139 SetPageError(page);
3140 }
3141
3142 end_page_writeback(page);
3143
3144 if (!done)
3145 continue;
3146
3147 end_extent_buffer_writeback(eb);
3148 } while (bvec >= bio->bi_io_vec);
3149
3150 bio_put(bio);
3151
3152}
3153
3154static int write_one_eb(struct extent_buffer *eb,
3155 struct btrfs_fs_info *fs_info,
3156 struct writeback_control *wbc,
3157 struct extent_page_data *epd)
3158{
3159 struct block_device *bdev = fs_info->fs_devices->latest_bdev;
3160 u64 offset = eb->start;
3161 unsigned long i, num_pages;
3162 int rw = (epd->sync_io ? WRITE_SYNC : WRITE);
3163 int ret;
3164
3165 clear_bit(EXTENT_BUFFER_IOERR, &eb->bflags);
3166 num_pages = num_extent_pages(eb->start, eb->len);
3167 atomic_set(&eb->io_pages, num_pages);
3168 for (i = 0; i < num_pages; i++) {
3169 struct page *p = extent_buffer_page(eb, i);
3170
3171 clear_page_dirty_for_io(p);
3172 set_page_writeback(p);
3173 ret = submit_extent_page(rw, eb->tree, p, offset >> 9,
3174 PAGE_CACHE_SIZE, 0, bdev, &epd->bio,
3175 -1, end_bio_extent_buffer_writepage,
3176 0, 0, 0);
3177 if (ret) {
3178 set_bit(EXTENT_BUFFER_IOERR, &eb->bflags);
3179 SetPageError(p);
3180 if (atomic_sub_and_test(num_pages - i, &eb->io_pages))
3181 end_extent_buffer_writeback(eb);
3182 ret = -EIO;
3183 break;
3184 }
3185 offset += PAGE_CACHE_SIZE;
3186 update_nr_written(p, wbc, 1);
3187 unlock_page(p);
3188 }
3189
3190 if (unlikely(ret)) {
3191 for (; i < num_pages; i++) {
3192 struct page *p = extent_buffer_page(eb, i);
3193 unlock_page(p);
3194 }
3195 }
3196
3197 return ret;
3198}
3199
3200int btree_write_cache_pages(struct address_space *mapping,
3201 struct writeback_control *wbc)
3202{
3203 struct extent_io_tree *tree = &BTRFS_I(mapping->host)->io_tree;
3204 struct btrfs_fs_info *fs_info = BTRFS_I(mapping->host)->root->fs_info;
3205 struct extent_buffer *eb, *prev_eb = NULL;
3206 struct extent_page_data epd = {
3207 .bio = NULL,
3208 .tree = tree,
3209 .extent_locked = 0,
3210 .sync_io = wbc->sync_mode == WB_SYNC_ALL,
3211 };
3212 int ret = 0;
3213 int done = 0;
3214 int nr_to_write_done = 0;
3215 struct pagevec pvec;
3216 int nr_pages;
3217 pgoff_t index;
3218 pgoff_t end; /* Inclusive */
3219 int scanned = 0;
3220 int tag;
3221
3222 pagevec_init(&pvec, 0);
3223 if (wbc->range_cyclic) {
3224 index = mapping->writeback_index; /* Start from prev offset */
3225 end = -1;
3226 } else {
3227 index = wbc->range_start >> PAGE_CACHE_SHIFT;
3228 end = wbc->range_end >> PAGE_CACHE_SHIFT;
3229 scanned = 1;
3230 }
3231 if (wbc->sync_mode == WB_SYNC_ALL)
3232 tag = PAGECACHE_TAG_TOWRITE;
3233 else
3234 tag = PAGECACHE_TAG_DIRTY;
3235retry:
3236 if (wbc->sync_mode == WB_SYNC_ALL)
3237 tag_pages_for_writeback(mapping, index, end);
3238 while (!done && !nr_to_write_done && (index <= end) &&
3239 (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
3240 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
3241 unsigned i;
3242
3243 scanned = 1;
3244 for (i = 0; i < nr_pages; i++) {
3245 struct page *page = pvec.pages[i];
3246
3247 if (!PagePrivate(page))
3248 continue;
3249
3250 if (!wbc->range_cyclic && page->index > end) {
3251 done = 1;
3252 break;
3253 }
3254
3255 eb = (struct extent_buffer *)page->private;
3256 if (!eb) {
3257 WARN_ON(1);
3258 continue;
3259 }
3260
3261 if (eb == prev_eb)
3262 continue;
3263
3264 if (!atomic_inc_not_zero(&eb->refs)) {
3265 WARN_ON(1);
3266 continue;
3267 }
3268
3269 prev_eb = eb;
3270 ret = lock_extent_buffer_for_io(eb, fs_info, &epd);
3271 if (!ret) {
3272 free_extent_buffer(eb);
3273 continue;
3274 }
3275
3276 ret = write_one_eb(eb, fs_info, wbc, &epd);
3277 if (ret) {
3278 done = 1;
3279 free_extent_buffer(eb);
3280 break;
3281 }
3282 free_extent_buffer(eb);
3283
3284 /*
3285 * the filesystem may choose to bump up nr_to_write.
3286 * We have to make sure to honor the new nr_to_write
3287 * at any time
3288 */
3289 nr_to_write_done = wbc->nr_to_write <= 0;
3290 }
3291 pagevec_release(&pvec);
3292 cond_resched();
3293 }
3294 if (!scanned && !done) {
3295 /*
3296 * We hit the last page and there is more work to be done: wrap
3297 * back to the start of the file
3298 */
3299 scanned = 1;
3300 index = 0;
3301 goto retry;
3302 }
3303 flush_write_bio(&epd);
3304 return ret;
3305}
3306
2971/** 3307/**
2972 * write_cache_pages - walk the list of dirty pages of the given address space and write all of them. 3308 * write_cache_pages - walk the list of dirty pages of the given address space and write all of them.
2973 * @mapping: address space structure to write 3309 * @mapping: address space structure to write
@@ -3099,10 +3435,14 @@ retry:
3099static void flush_epd_write_bio(struct extent_page_data *epd) 3435static void flush_epd_write_bio(struct extent_page_data *epd)
3100{ 3436{
3101 if (epd->bio) { 3437 if (epd->bio) {
3438 int rw = WRITE;
3439 int ret;
3440
3102 if (epd->sync_io) 3441 if (epd->sync_io)
3103 submit_one_bio(WRITE_SYNC, epd->bio, 0, 0); 3442 rw = WRITE_SYNC;
3104 else 3443
3105 submit_one_bio(WRITE, epd->bio, 0, 0); 3444 ret = submit_one_bio(rw, epd->bio, 0, 0);
3445 BUG_ON(ret < 0); /* -ENOMEM */
3106 epd->bio = NULL; 3446 epd->bio = NULL;
3107 } 3447 }
3108} 3448}
@@ -3219,7 +3559,7 @@ int extent_readpages(struct extent_io_tree *tree,
3219 } 3559 }
3220 BUG_ON(!list_empty(pages)); 3560 BUG_ON(!list_empty(pages));
3221 if (bio) 3561 if (bio)
3222 submit_one_bio(READ, bio, 0, bio_flags); 3562 return submit_one_bio(READ, bio, 0, bio_flags);
3223 return 0; 3563 return 0;
3224} 3564}
3225 3565
@@ -3240,7 +3580,7 @@ int extent_invalidatepage(struct extent_io_tree *tree,
3240 if (start > end) 3580 if (start > end)
3241 return 0; 3581 return 0;
3242 3582
3243 lock_extent_bits(tree, start, end, 0, &cached_state, GFP_NOFS); 3583 lock_extent_bits(tree, start, end, 0, &cached_state);
3244 wait_on_page_writeback(page); 3584 wait_on_page_writeback(page);
3245 clear_extent_bit(tree, start, end, 3585 clear_extent_bit(tree, start, end,
3246 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC | 3586 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC |
@@ -3454,7 +3794,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
3454 } 3794 }
3455 3795
3456 lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0, 3796 lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0,
3457 &cached_state, GFP_NOFS); 3797 &cached_state);
3458 3798
3459 em = get_extent_skip_holes(inode, start, last_for_get_extent, 3799 em = get_extent_skip_holes(inode, start, last_for_get_extent,
3460 get_extent); 3800 get_extent);
@@ -3548,26 +3888,7 @@ out:
3548inline struct page *extent_buffer_page(struct extent_buffer *eb, 3888inline struct page *extent_buffer_page(struct extent_buffer *eb,
3549 unsigned long i) 3889 unsigned long i)
3550{ 3890{
3551 struct page *p; 3891 return eb->pages[i];
3552 struct address_space *mapping;
3553
3554 if (i == 0)
3555 return eb->first_page;
3556 i += eb->start >> PAGE_CACHE_SHIFT;
3557 mapping = eb->first_page->mapping;
3558 if (!mapping)
3559 return NULL;
3560
3561 /*
3562 * extent_buffer_page is only called after pinning the page
3563 * by increasing the reference count. So we know the page must
3564 * be in the radix tree.
3565 */
3566 rcu_read_lock();
3567 p = radix_tree_lookup(&mapping->page_tree, i);
3568 rcu_read_unlock();
3569
3570 return p;
3571} 3892}
3572 3893
3573inline unsigned long num_extent_pages(u64 start, u64 len) 3894inline unsigned long num_extent_pages(u64 start, u64 len)
@@ -3576,6 +3897,19 @@ inline unsigned long num_extent_pages(u64 start, u64 len)
3576 (start >> PAGE_CACHE_SHIFT); 3897 (start >> PAGE_CACHE_SHIFT);
3577} 3898}
3578 3899
3900static void __free_extent_buffer(struct extent_buffer *eb)
3901{
3902#if LEAK_DEBUG
3903 unsigned long flags;
3904 spin_lock_irqsave(&leak_lock, flags);
3905 list_del(&eb->leak_list);
3906 spin_unlock_irqrestore(&leak_lock, flags);
3907#endif
3908 if (eb->pages && eb->pages != eb->inline_pages)
3909 kfree(eb->pages);
3910 kmem_cache_free(extent_buffer_cache, eb);
3911}
3912
3579static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, 3913static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
3580 u64 start, 3914 u64 start,
3581 unsigned long len, 3915 unsigned long len,
@@ -3591,6 +3925,7 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
3591 return NULL; 3925 return NULL;
3592 eb->start = start; 3926 eb->start = start;
3593 eb->len = len; 3927 eb->len = len;
3928 eb->tree = tree;
3594 rwlock_init(&eb->lock); 3929 rwlock_init(&eb->lock);
3595 atomic_set(&eb->write_locks, 0); 3930 atomic_set(&eb->write_locks, 0);
3596 atomic_set(&eb->read_locks, 0); 3931 atomic_set(&eb->read_locks, 0);
@@ -3607,20 +3942,32 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
3607 list_add(&eb->leak_list, &buffers); 3942 list_add(&eb->leak_list, &buffers);
3608 spin_unlock_irqrestore(&leak_lock, flags); 3943 spin_unlock_irqrestore(&leak_lock, flags);
3609#endif 3944#endif
3945 spin_lock_init(&eb->refs_lock);
3610 atomic_set(&eb->refs, 1); 3946 atomic_set(&eb->refs, 1);
3947 atomic_set(&eb->io_pages, 0);
3948
3949 if (len > MAX_INLINE_EXTENT_BUFFER_SIZE) {
3950 struct page **pages;
3951 int num_pages = (len + PAGE_CACHE_SIZE - 1) >>
3952 PAGE_CACHE_SHIFT;
3953 pages = kzalloc(num_pages, mask);
3954 if (!pages) {
3955 __free_extent_buffer(eb);
3956 return NULL;
3957 }
3958 eb->pages = pages;
3959 } else {
3960 eb->pages = eb->inline_pages;
3961 }
3611 3962
3612 return eb; 3963 return eb;
3613} 3964}
3614 3965
3615static void __free_extent_buffer(struct extent_buffer *eb) 3966static int extent_buffer_under_io(struct extent_buffer *eb)
3616{ 3967{
3617#if LEAK_DEBUG 3968 return (atomic_read(&eb->io_pages) ||
3618 unsigned long flags; 3969 test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags) ||
3619 spin_lock_irqsave(&leak_lock, flags); 3970 test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
3620 list_del(&eb->leak_list);
3621 spin_unlock_irqrestore(&leak_lock, flags);
3622#endif
3623 kmem_cache_free(extent_buffer_cache, eb);
3624} 3971}
3625 3972
3626/* 3973/*
@@ -3632,8 +3979,7 @@ static void btrfs_release_extent_buffer_page(struct extent_buffer *eb,
3632 unsigned long index; 3979 unsigned long index;
3633 struct page *page; 3980 struct page *page;
3634 3981
3635 if (!eb->first_page) 3982 BUG_ON(extent_buffer_under_io(eb));
3636 return;
3637 3983
3638 index = num_extent_pages(eb->start, eb->len); 3984 index = num_extent_pages(eb->start, eb->len);
3639 if (start_idx >= index) 3985 if (start_idx >= index)
@@ -3642,8 +3988,34 @@ static void btrfs_release_extent_buffer_page(struct extent_buffer *eb,
3642 do { 3988 do {
3643 index--; 3989 index--;
3644 page = extent_buffer_page(eb, index); 3990 page = extent_buffer_page(eb, index);
3645 if (page) 3991 if (page) {
3992 spin_lock(&page->mapping->private_lock);
3993 /*
3994 * We do this since we'll remove the pages after we've
3995 * removed the eb from the radix tree, so we could race
3996 * and have this page now attached to the new eb. So
3997 * only clear page_private if it's still connected to
3998 * this eb.
3999 */
4000 if (PagePrivate(page) &&
4001 page->private == (unsigned long)eb) {
4002 BUG_ON(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
4003 BUG_ON(PageDirty(page));
4004 BUG_ON(PageWriteback(page));
4005 /*
4006 * We need to make sure we haven't be attached
4007 * to a new eb.
4008 */
4009 ClearPagePrivate(page);
4010 set_page_private(page, 0);
4011 /* One for the page private */
4012 page_cache_release(page);
4013 }
4014 spin_unlock(&page->mapping->private_lock);
4015
4016 /* One for when we alloced the page */
3646 page_cache_release(page); 4017 page_cache_release(page);
4018 }
3647 } while (index != start_idx); 4019 } while (index != start_idx);
3648} 4020}
3649 4021
@@ -3656,9 +4028,50 @@ static inline void btrfs_release_extent_buffer(struct extent_buffer *eb)
3656 __free_extent_buffer(eb); 4028 __free_extent_buffer(eb);
3657} 4029}
3658 4030
4031static void check_buffer_tree_ref(struct extent_buffer *eb)
4032{
4033 /* the ref bit is tricky. We have to make sure it is set
4034 * if we have the buffer dirty. Otherwise the
4035 * code to free a buffer can end up dropping a dirty
4036 * page
4037 *
4038 * Once the ref bit is set, it won't go away while the
4039 * buffer is dirty or in writeback, and it also won't
4040 * go away while we have the reference count on the
4041 * eb bumped.
4042 *
4043 * We can't just set the ref bit without bumping the
4044 * ref on the eb because free_extent_buffer might
4045 * see the ref bit and try to clear it. If this happens
4046 * free_extent_buffer might end up dropping our original
4047 * ref by mistake and freeing the page before we are able
4048 * to add one more ref.
4049 *
4050 * So bump the ref count first, then set the bit. If someone
4051 * beat us to it, drop the ref we added.
4052 */
4053 if (!test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) {
4054 atomic_inc(&eb->refs);
4055 if (test_and_set_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
4056 atomic_dec(&eb->refs);
4057 }
4058}
4059
4060static void mark_extent_buffer_accessed(struct extent_buffer *eb)
4061{
4062 unsigned long num_pages, i;
4063
4064 check_buffer_tree_ref(eb);
4065
4066 num_pages = num_extent_pages(eb->start, eb->len);
4067 for (i = 0; i < num_pages; i++) {
4068 struct page *p = extent_buffer_page(eb, i);
4069 mark_page_accessed(p);
4070 }
4071}
4072
3659struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, 4073struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
3660 u64 start, unsigned long len, 4074 u64 start, unsigned long len)
3661 struct page *page0)
3662{ 4075{
3663 unsigned long num_pages = num_extent_pages(start, len); 4076 unsigned long num_pages = num_extent_pages(start, len);
3664 unsigned long i; 4077 unsigned long i;
@@ -3674,7 +4087,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
3674 eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT); 4087 eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
3675 if (eb && atomic_inc_not_zero(&eb->refs)) { 4088 if (eb && atomic_inc_not_zero(&eb->refs)) {
3676 rcu_read_unlock(); 4089 rcu_read_unlock();
3677 mark_page_accessed(eb->first_page); 4090 mark_extent_buffer_accessed(eb);
3678 return eb; 4091 return eb;
3679 } 4092 }
3680 rcu_read_unlock(); 4093 rcu_read_unlock();
@@ -3683,32 +4096,43 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
3683 if (!eb) 4096 if (!eb)
3684 return NULL; 4097 return NULL;
3685 4098
3686 if (page0) { 4099 for (i = 0; i < num_pages; i++, index++) {
3687 eb->first_page = page0;
3688 i = 1;
3689 index++;
3690 page_cache_get(page0);
3691 mark_page_accessed(page0);
3692 set_page_extent_mapped(page0);
3693 set_page_extent_head(page0, len);
3694 uptodate = PageUptodate(page0);
3695 } else {
3696 i = 0;
3697 }
3698 for (; i < num_pages; i++, index++) {
3699 p = find_or_create_page(mapping, index, GFP_NOFS); 4100 p = find_or_create_page(mapping, index, GFP_NOFS);
3700 if (!p) { 4101 if (!p) {
3701 WARN_ON(1); 4102 WARN_ON(1);
3702 goto free_eb; 4103 goto free_eb;
3703 } 4104 }
3704 set_page_extent_mapped(p); 4105
3705 mark_page_accessed(p); 4106 spin_lock(&mapping->private_lock);
3706 if (i == 0) { 4107 if (PagePrivate(p)) {
3707 eb->first_page = p; 4108 /*
3708 set_page_extent_head(p, len); 4109 * We could have already allocated an eb for this page
3709 } else { 4110 * and attached one so lets see if we can get a ref on
3710 set_page_private(p, EXTENT_PAGE_PRIVATE); 4111 * the existing eb, and if we can we know it's good and
4112 * we can just return that one, else we know we can just
4113 * overwrite page->private.
4114 */
4115 exists = (struct extent_buffer *)p->private;
4116 if (atomic_inc_not_zero(&exists->refs)) {
4117 spin_unlock(&mapping->private_lock);
4118 unlock_page(p);
4119 mark_extent_buffer_accessed(exists);
4120 goto free_eb;
4121 }
4122
4123 /*
4124 * Do this so attach doesn't complain and we need to
4125 * drop the ref the old guy had.
4126 */
4127 ClearPagePrivate(p);
4128 WARN_ON(PageDirty(p));
4129 page_cache_release(p);
3711 } 4130 }
4131 attach_extent_buffer_page(eb, p);
4132 spin_unlock(&mapping->private_lock);
4133 WARN_ON(PageDirty(p));
4134 mark_page_accessed(p);
4135 eb->pages[i] = p;
3712 if (!PageUptodate(p)) 4136 if (!PageUptodate(p))
3713 uptodate = 0; 4137 uptodate = 0;
3714 4138
@@ -3716,12 +4140,10 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
3716 * see below about how we avoid a nasty race with release page 4140 * see below about how we avoid a nasty race with release page
3717 * and why we unlock later 4141 * and why we unlock later
3718 */ 4142 */
3719 if (i != 0)
3720 unlock_page(p);
3721 } 4143 }
3722 if (uptodate) 4144 if (uptodate)
3723 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); 4145 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
3724 4146again:
3725 ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM); 4147 ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
3726 if (ret) 4148 if (ret)
3727 goto free_eb; 4149 goto free_eb;
@@ -3731,14 +4153,21 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
3731 if (ret == -EEXIST) { 4153 if (ret == -EEXIST) {
3732 exists = radix_tree_lookup(&tree->buffer, 4154 exists = radix_tree_lookup(&tree->buffer,
3733 start >> PAGE_CACHE_SHIFT); 4155 start >> PAGE_CACHE_SHIFT);
3734 /* add one reference for the caller */ 4156 if (!atomic_inc_not_zero(&exists->refs)) {
3735 atomic_inc(&exists->refs); 4157 spin_unlock(&tree->buffer_lock);
4158 radix_tree_preload_end();
4159 exists = NULL;
4160 goto again;
4161 }
3736 spin_unlock(&tree->buffer_lock); 4162 spin_unlock(&tree->buffer_lock);
3737 radix_tree_preload_end(); 4163 radix_tree_preload_end();
4164 mark_extent_buffer_accessed(exists);
3738 goto free_eb; 4165 goto free_eb;
3739 } 4166 }
3740 /* add one reference for the tree */ 4167 /* add one reference for the tree */
3741 atomic_inc(&eb->refs); 4168 spin_lock(&eb->refs_lock);
4169 check_buffer_tree_ref(eb);
4170 spin_unlock(&eb->refs_lock);
3742 spin_unlock(&tree->buffer_lock); 4171 spin_unlock(&tree->buffer_lock);
3743 radix_tree_preload_end(); 4172 radix_tree_preload_end();
3744 4173
@@ -3751,15 +4180,20 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
3751 * after the extent buffer is in the radix tree so 4180 * after the extent buffer is in the radix tree so
3752 * it doesn't get lost 4181 * it doesn't get lost
3753 */ 4182 */
3754 set_page_extent_mapped(eb->first_page); 4183 SetPageChecked(eb->pages[0]);
3755 set_page_extent_head(eb->first_page, eb->len); 4184 for (i = 1; i < num_pages; i++) {
3756 if (!page0) 4185 p = extent_buffer_page(eb, i);
3757 unlock_page(eb->first_page); 4186 ClearPageChecked(p);
4187 unlock_page(p);
4188 }
4189 unlock_page(eb->pages[0]);
3758 return eb; 4190 return eb;
3759 4191
3760free_eb: 4192free_eb:
3761 if (eb->first_page && !page0) 4193 for (i = 0; i < num_pages; i++) {
3762 unlock_page(eb->first_page); 4194 if (eb->pages[i])
4195 unlock_page(eb->pages[i]);
4196 }
3763 4197
3764 if (!atomic_dec_and_test(&eb->refs)) 4198 if (!atomic_dec_and_test(&eb->refs))
3765 return exists; 4199 return exists;
@@ -3776,7 +4210,7 @@ struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
3776 eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT); 4210 eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
3777 if (eb && atomic_inc_not_zero(&eb->refs)) { 4211 if (eb && atomic_inc_not_zero(&eb->refs)) {
3778 rcu_read_unlock(); 4212 rcu_read_unlock();
3779 mark_page_accessed(eb->first_page); 4213 mark_extent_buffer_accessed(eb);
3780 return eb; 4214 return eb;
3781 } 4215 }
3782 rcu_read_unlock(); 4216 rcu_read_unlock();
@@ -3784,19 +4218,71 @@ struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
3784 return NULL; 4218 return NULL;
3785} 4219}
3786 4220
4221static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head)
4222{
4223 struct extent_buffer *eb =
4224 container_of(head, struct extent_buffer, rcu_head);
4225
4226 __free_extent_buffer(eb);
4227}
4228
4229/* Expects to have eb->eb_lock already held */
4230static void release_extent_buffer(struct extent_buffer *eb, gfp_t mask)
4231{
4232 WARN_ON(atomic_read(&eb->refs) == 0);
4233 if (atomic_dec_and_test(&eb->refs)) {
4234 struct extent_io_tree *tree = eb->tree;
4235
4236 spin_unlock(&eb->refs_lock);
4237
4238 spin_lock(&tree->buffer_lock);
4239 radix_tree_delete(&tree->buffer,
4240 eb->start >> PAGE_CACHE_SHIFT);
4241 spin_unlock(&tree->buffer_lock);
4242
4243 /* Should be safe to release our pages at this point */
4244 btrfs_release_extent_buffer_page(eb, 0);
4245
4246 call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu);
4247 return;
4248 }
4249 spin_unlock(&eb->refs_lock);
4250}
4251
3787void free_extent_buffer(struct extent_buffer *eb) 4252void free_extent_buffer(struct extent_buffer *eb)
3788{ 4253{
3789 if (!eb) 4254 if (!eb)
3790 return; 4255 return;
3791 4256
3792 if (!atomic_dec_and_test(&eb->refs)) 4257 spin_lock(&eb->refs_lock);
4258 if (atomic_read(&eb->refs) == 2 &&
4259 test_bit(EXTENT_BUFFER_STALE, &eb->bflags) &&
4260 !extent_buffer_under_io(eb) &&
4261 test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
4262 atomic_dec(&eb->refs);
4263
4264 /*
4265 * I know this is terrible, but it's temporary until we stop tracking
4266 * the uptodate bits and such for the extent buffers.
4267 */
4268 release_extent_buffer(eb, GFP_ATOMIC);
4269}
4270
4271void free_extent_buffer_stale(struct extent_buffer *eb)
4272{
4273 if (!eb)
3793 return; 4274 return;
3794 4275
3795 WARN_ON(1); 4276 spin_lock(&eb->refs_lock);
4277 set_bit(EXTENT_BUFFER_STALE, &eb->bflags);
4278
4279 if (atomic_read(&eb->refs) == 2 && !extent_buffer_under_io(eb) &&
4280 test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
4281 atomic_dec(&eb->refs);
4282 release_extent_buffer(eb, GFP_NOFS);
3796} 4283}
3797 4284
3798int clear_extent_buffer_dirty(struct extent_io_tree *tree, 4285void clear_extent_buffer_dirty(struct extent_buffer *eb)
3799 struct extent_buffer *eb)
3800{ 4286{
3801 unsigned long i; 4287 unsigned long i;
3802 unsigned long num_pages; 4288 unsigned long num_pages;
@@ -3812,10 +4298,6 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree,
3812 lock_page(page); 4298 lock_page(page);
3813 WARN_ON(!PagePrivate(page)); 4299 WARN_ON(!PagePrivate(page));
3814 4300
3815 set_page_extent_mapped(page);
3816 if (i == 0)
3817 set_page_extent_head(page, eb->len);
3818
3819 clear_page_dirty_for_io(page); 4301 clear_page_dirty_for_io(page);
3820 spin_lock_irq(&page->mapping->tree_lock); 4302 spin_lock_irq(&page->mapping->tree_lock);
3821 if (!PageDirty(page)) { 4303 if (!PageDirty(page)) {
@@ -3827,24 +4309,29 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree,
3827 ClearPageError(page); 4309 ClearPageError(page);
3828 unlock_page(page); 4310 unlock_page(page);
3829 } 4311 }
3830 return 0; 4312 WARN_ON(atomic_read(&eb->refs) == 0);
3831} 4313}
3832 4314
3833int set_extent_buffer_dirty(struct extent_io_tree *tree, 4315int set_extent_buffer_dirty(struct extent_buffer *eb)
3834 struct extent_buffer *eb)
3835{ 4316{
3836 unsigned long i; 4317 unsigned long i;
3837 unsigned long num_pages; 4318 unsigned long num_pages;
3838 int was_dirty = 0; 4319 int was_dirty = 0;
3839 4320
4321 check_buffer_tree_ref(eb);
4322
3840 was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags); 4323 was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags);
4324
3841 num_pages = num_extent_pages(eb->start, eb->len); 4325 num_pages = num_extent_pages(eb->start, eb->len);
4326 WARN_ON(atomic_read(&eb->refs) == 0);
4327 WARN_ON(!test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags));
4328
3842 for (i = 0; i < num_pages; i++) 4329 for (i = 0; i < num_pages; i++)
3843 __set_page_dirty_nobuffers(extent_buffer_page(eb, i)); 4330 set_page_dirty(extent_buffer_page(eb, i));
3844 return was_dirty; 4331 return was_dirty;
3845} 4332}
3846 4333
3847static int __eb_straddles_pages(u64 start, u64 len) 4334static int range_straddles_pages(u64 start, u64 len)
3848{ 4335{
3849 if (len < PAGE_CACHE_SIZE) 4336 if (len < PAGE_CACHE_SIZE)
3850 return 1; 4337 return 1;
@@ -3855,25 +4342,14 @@ static int __eb_straddles_pages(u64 start, u64 len)
3855 return 0; 4342 return 0;
3856} 4343}
3857 4344
3858static int eb_straddles_pages(struct extent_buffer *eb) 4345int clear_extent_buffer_uptodate(struct extent_buffer *eb)
3859{
3860 return __eb_straddles_pages(eb->start, eb->len);
3861}
3862
3863int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
3864 struct extent_buffer *eb,
3865 struct extent_state **cached_state)
3866{ 4346{
3867 unsigned long i; 4347 unsigned long i;
3868 struct page *page; 4348 struct page *page;
3869 unsigned long num_pages; 4349 unsigned long num_pages;
3870 4350
3871 num_pages = num_extent_pages(eb->start, eb->len);
3872 clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); 4351 clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
3873 4352 num_pages = num_extent_pages(eb->start, eb->len);
3874 clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
3875 cached_state, GFP_NOFS);
3876
3877 for (i = 0; i < num_pages; i++) { 4353 for (i = 0; i < num_pages; i++) {
3878 page = extent_buffer_page(eb, i); 4354 page = extent_buffer_page(eb, i);
3879 if (page) 4355 if (page)
@@ -3882,27 +4358,16 @@ int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
3882 return 0; 4358 return 0;
3883} 4359}
3884 4360
3885int set_extent_buffer_uptodate(struct extent_io_tree *tree, 4361int set_extent_buffer_uptodate(struct extent_buffer *eb)
3886 struct extent_buffer *eb)
3887{ 4362{
3888 unsigned long i; 4363 unsigned long i;
3889 struct page *page; 4364 struct page *page;
3890 unsigned long num_pages; 4365 unsigned long num_pages;
3891 4366
4367 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
3892 num_pages = num_extent_pages(eb->start, eb->len); 4368 num_pages = num_extent_pages(eb->start, eb->len);
3893
3894 if (eb_straddles_pages(eb)) {
3895 set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
3896 NULL, GFP_NOFS);
3897 }
3898 for (i = 0; i < num_pages; i++) { 4369 for (i = 0; i < num_pages; i++) {
3899 page = extent_buffer_page(eb, i); 4370 page = extent_buffer_page(eb, i);
3900 if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) ||
3901 ((i == num_pages - 1) &&
3902 ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) {
3903 check_page_uptodate(tree, page);
3904 continue;
3905 }
3906 SetPageUptodate(page); 4371 SetPageUptodate(page);
3907 } 4372 }
3908 return 0; 4373 return 0;
@@ -3917,7 +4382,7 @@ int extent_range_uptodate(struct extent_io_tree *tree,
3917 int uptodate; 4382 int uptodate;
3918 unsigned long index; 4383 unsigned long index;
3919 4384
3920 if (__eb_straddles_pages(start, end - start + 1)) { 4385 if (range_straddles_pages(start, end - start + 1)) {
3921 ret = test_range_bit(tree, start, end, 4386 ret = test_range_bit(tree, start, end,
3922 EXTENT_UPTODATE, 1, NULL); 4387 EXTENT_UPTODATE, 1, NULL);
3923 if (ret) 4388 if (ret)
@@ -3939,35 +4404,9 @@ int extent_range_uptodate(struct extent_io_tree *tree,
3939 return pg_uptodate; 4404 return pg_uptodate;
3940} 4405}
3941 4406
3942int extent_buffer_uptodate(struct extent_io_tree *tree, 4407int extent_buffer_uptodate(struct extent_buffer *eb)
3943 struct extent_buffer *eb,
3944 struct extent_state *cached_state)
3945{ 4408{
3946 int ret = 0; 4409 return test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
3947 unsigned long num_pages;
3948 unsigned long i;
3949 struct page *page;
3950 int pg_uptodate = 1;
3951
3952 if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
3953 return 1;
3954
3955 if (eb_straddles_pages(eb)) {
3956 ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1,
3957 EXTENT_UPTODATE, 1, cached_state);
3958 if (ret)
3959 return ret;
3960 }
3961
3962 num_pages = num_extent_pages(eb->start, eb->len);
3963 for (i = 0; i < num_pages; i++) {
3964 page = extent_buffer_page(eb, i);
3965 if (!PageUptodate(page)) {
3966 pg_uptodate = 0;
3967 break;
3968 }
3969 }
3970 return pg_uptodate;
3971} 4410}
3972 4411
3973int read_extent_buffer_pages(struct extent_io_tree *tree, 4412int read_extent_buffer_pages(struct extent_io_tree *tree,
@@ -3981,21 +4420,14 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
3981 int ret = 0; 4420 int ret = 0;
3982 int locked_pages = 0; 4421 int locked_pages = 0;
3983 int all_uptodate = 1; 4422 int all_uptodate = 1;
3984 int inc_all_pages = 0;
3985 unsigned long num_pages; 4423 unsigned long num_pages;
4424 unsigned long num_reads = 0;
3986 struct bio *bio = NULL; 4425 struct bio *bio = NULL;
3987 unsigned long bio_flags = 0; 4426 unsigned long bio_flags = 0;
3988 4427
3989 if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags)) 4428 if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
3990 return 0; 4429 return 0;
3991 4430
3992 if (eb_straddles_pages(eb)) {
3993 if (test_range_bit(tree, eb->start, eb->start + eb->len - 1,
3994 EXTENT_UPTODATE, 1, NULL)) {
3995 return 0;
3996 }
3997 }
3998
3999 if (start) { 4431 if (start) {
4000 WARN_ON(start < eb->start); 4432 WARN_ON(start < eb->start);
4001 start_i = (start >> PAGE_CACHE_SHIFT) - 4433 start_i = (start >> PAGE_CACHE_SHIFT) -
@@ -4014,8 +4446,10 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
4014 lock_page(page); 4446 lock_page(page);
4015 } 4447 }
4016 locked_pages++; 4448 locked_pages++;
4017 if (!PageUptodate(page)) 4449 if (!PageUptodate(page)) {
4450 num_reads++;
4018 all_uptodate = 0; 4451 all_uptodate = 0;
4452 }
4019 } 4453 }
4020 if (all_uptodate) { 4454 if (all_uptodate) {
4021 if (start_i == 0) 4455 if (start_i == 0)
@@ -4023,20 +4457,12 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
4023 goto unlock_exit; 4457 goto unlock_exit;
4024 } 4458 }
4025 4459
4460 clear_bit(EXTENT_BUFFER_IOERR, &eb->bflags);
4461 eb->failed_mirror = 0;
4462 atomic_set(&eb->io_pages, num_reads);
4026 for (i = start_i; i < num_pages; i++) { 4463 for (i = start_i; i < num_pages; i++) {
4027 page = extent_buffer_page(eb, i); 4464 page = extent_buffer_page(eb, i);
4028
4029 WARN_ON(!PagePrivate(page));
4030
4031 set_page_extent_mapped(page);
4032 if (i == 0)
4033 set_page_extent_head(page, eb->len);
4034
4035 if (inc_all_pages)
4036 page_cache_get(page);
4037 if (!PageUptodate(page)) { 4465 if (!PageUptodate(page)) {
4038 if (start_i == 0)
4039 inc_all_pages = 1;
4040 ClearPageError(page); 4466 ClearPageError(page);
4041 err = __extent_read_full_page(tree, page, 4467 err = __extent_read_full_page(tree, page,
4042 get_extent, &bio, 4468 get_extent, &bio,
@@ -4048,8 +4474,11 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
4048 } 4474 }
4049 } 4475 }
4050 4476
4051 if (bio) 4477 if (bio) {
4052 submit_one_bio(READ, bio, mirror_num, bio_flags); 4478 err = submit_one_bio(READ, bio, mirror_num, bio_flags);
4479 if (err)
4480 return err;
4481 }
4053 4482
4054 if (ret || wait != WAIT_COMPLETE) 4483 if (ret || wait != WAIT_COMPLETE)
4055 return ret; 4484 return ret;
@@ -4061,8 +4490,6 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
4061 ret = -EIO; 4490 ret = -EIO;
4062 } 4491 }
4063 4492
4064 if (!ret)
4065 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
4066 return ret; 4493 return ret;
4067 4494
4068unlock_exit: 4495unlock_exit:
@@ -4304,15 +4731,20 @@ static void copy_pages(struct page *dst_page, struct page *src_page,
4304{ 4731{
4305 char *dst_kaddr = page_address(dst_page); 4732 char *dst_kaddr = page_address(dst_page);
4306 char *src_kaddr; 4733 char *src_kaddr;
4734 int must_memmove = 0;
4307 4735
4308 if (dst_page != src_page) { 4736 if (dst_page != src_page) {
4309 src_kaddr = page_address(src_page); 4737 src_kaddr = page_address(src_page);
4310 } else { 4738 } else {
4311 src_kaddr = dst_kaddr; 4739 src_kaddr = dst_kaddr;
4312 BUG_ON(areas_overlap(src_off, dst_off, len)); 4740 if (areas_overlap(src_off, dst_off, len))
4741 must_memmove = 1;
4313 } 4742 }
4314 4743
4315 memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len); 4744 if (must_memmove)
4745 memmove(dst_kaddr + dst_off, src_kaddr + src_off, len);
4746 else
4747 memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len);
4316} 4748}
4317 4749
4318void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, 4750void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
@@ -4382,7 +4814,7 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
4382 "len %lu len %lu\n", dst_offset, len, dst->len); 4814 "len %lu len %lu\n", dst_offset, len, dst->len);
4383 BUG_ON(1); 4815 BUG_ON(1);
4384 } 4816 }
4385 if (!areas_overlap(src_offset, dst_offset, len)) { 4817 if (dst_offset < src_offset) {
4386 memcpy_extent_buffer(dst, dst_offset, src_offset, len); 4818 memcpy_extent_buffer(dst, dst_offset, src_offset, len);
4387 return; 4819 return;
4388 } 4820 }
@@ -4408,47 +4840,48 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
4408 } 4840 }
4409} 4841}
4410 4842
4411static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head) 4843int try_release_extent_buffer(struct page *page, gfp_t mask)
4412{ 4844{
4413 struct extent_buffer *eb =
4414 container_of(head, struct extent_buffer, rcu_head);
4415
4416 btrfs_release_extent_buffer(eb);
4417}
4418
4419int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page)
4420{
4421 u64 start = page_offset(page);
4422 struct extent_buffer *eb; 4845 struct extent_buffer *eb;
4423 int ret = 1;
4424 4846
4425 spin_lock(&tree->buffer_lock); 4847 /*
4426 eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT); 4848 * We need to make sure noboody is attaching this page to an eb right
4427 if (!eb) { 4849 * now.
4428 spin_unlock(&tree->buffer_lock); 4850 */
4429 return ret; 4851 spin_lock(&page->mapping->private_lock);
4852 if (!PagePrivate(page)) {
4853 spin_unlock(&page->mapping->private_lock);
4854 return 1;
4430 } 4855 }
4431 4856
4432 if (test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) { 4857 eb = (struct extent_buffer *)page->private;
4433 ret = 0; 4858 BUG_ON(!eb);
4434 goto out;
4435 }
4436 4859
4437 /* 4860 /*
4438 * set @eb->refs to 0 if it is already 1, and then release the @eb. 4861 * This is a little awful but should be ok, we need to make sure that
4439 * Or go back. 4862 * the eb doesn't disappear out from under us while we're looking at
4863 * this page.
4440 */ 4864 */
4441 if (atomic_cmpxchg(&eb->refs, 1, 0) != 1) { 4865 spin_lock(&eb->refs_lock);
4442 ret = 0; 4866 if (atomic_read(&eb->refs) != 1 || extent_buffer_under_io(eb)) {
4443 goto out; 4867 spin_unlock(&eb->refs_lock);
4868 spin_unlock(&page->mapping->private_lock);
4869 return 0;
4444 } 4870 }
4871 spin_unlock(&page->mapping->private_lock);
4445 4872
4446 radix_tree_delete(&tree->buffer, start >> PAGE_CACHE_SHIFT); 4873 if ((mask & GFP_NOFS) == GFP_NOFS)
4447out: 4874 mask = GFP_NOFS;
4448 spin_unlock(&tree->buffer_lock);
4449 4875
4450 /* at this point we can safely release the extent buffer */ 4876 /*
4451 if (atomic_read(&eb->refs) == 0) 4877 * If tree ref isn't set then we know the ref on this eb is a real ref,
4452 call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu); 4878 * so just return, this page will likely be freed soon anyway.
4453 return ret; 4879 */
4880 if (!test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) {
4881 spin_unlock(&eb->refs_lock);
4882 return 0;
4883 }
4884 release_extent_buffer(eb, mask);
4885
4886 return 1;
4454} 4887}
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index cecc3518c121..faf10eb57f75 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -35,6 +35,10 @@
35#define EXTENT_BUFFER_DIRTY 2 35#define EXTENT_BUFFER_DIRTY 2
36#define EXTENT_BUFFER_CORRUPT 3 36#define EXTENT_BUFFER_CORRUPT 3
37#define EXTENT_BUFFER_READAHEAD 4 /* this got triggered by readahead */ 37#define EXTENT_BUFFER_READAHEAD 4 /* this got triggered by readahead */
38#define EXTENT_BUFFER_TREE_REF 5
39#define EXTENT_BUFFER_STALE 6
40#define EXTENT_BUFFER_WRITEBACK 7
41#define EXTENT_BUFFER_IOERR 8
38 42
39/* these are flags for extent_clear_unlock_delalloc */ 43/* these are flags for extent_clear_unlock_delalloc */
40#define EXTENT_CLEAR_UNLOCK_PAGE 0x1 44#define EXTENT_CLEAR_UNLOCK_PAGE 0x1
@@ -54,6 +58,7 @@
54#define EXTENT_PAGE_PRIVATE_FIRST_PAGE 3 58#define EXTENT_PAGE_PRIVATE_FIRST_PAGE 3
55 59
56struct extent_state; 60struct extent_state;
61struct btrfs_root;
57 62
58typedef int (extent_submit_bio_hook_t)(struct inode *inode, int rw, 63typedef int (extent_submit_bio_hook_t)(struct inode *inode, int rw,
59 struct bio *bio, int mirror_num, 64 struct bio *bio, int mirror_num,
@@ -69,9 +74,7 @@ struct extent_io_ops {
69 size_t size, struct bio *bio, 74 size_t size, struct bio *bio,
70 unsigned long bio_flags); 75 unsigned long bio_flags);
71 int (*readpage_io_hook)(struct page *page, u64 start, u64 end); 76 int (*readpage_io_hook)(struct page *page, u64 start, u64 end);
72 int (*readpage_io_failed_hook)(struct bio *bio, struct page *page, 77 int (*readpage_io_failed_hook)(struct page *page, int failed_mirror);
73 u64 start, u64 end, int failed_mirror,
74 struct extent_state *state);
75 int (*writepage_io_failed_hook)(struct bio *bio, struct page *page, 78 int (*writepage_io_failed_hook)(struct bio *bio, struct page *page,
76 u64 start, u64 end, 79 u64 start, u64 end,
77 struct extent_state *state); 80 struct extent_state *state);
@@ -97,6 +100,7 @@ struct extent_io_tree {
97 struct radix_tree_root buffer; 100 struct radix_tree_root buffer;
98 struct address_space *mapping; 101 struct address_space *mapping;
99 u64 dirty_bytes; 102 u64 dirty_bytes;
103 int track_uptodate;
100 spinlock_t lock; 104 spinlock_t lock;
101 spinlock_t buffer_lock; 105 spinlock_t buffer_lock;
102 struct extent_io_ops *ops; 106 struct extent_io_ops *ops;
@@ -119,16 +123,21 @@ struct extent_state {
119 struct list_head leak_list; 123 struct list_head leak_list;
120}; 124};
121 125
126#define INLINE_EXTENT_BUFFER_PAGES 16
127#define MAX_INLINE_EXTENT_BUFFER_SIZE (INLINE_EXTENT_BUFFER_PAGES * PAGE_CACHE_SIZE)
122struct extent_buffer { 128struct extent_buffer {
123 u64 start; 129 u64 start;
124 unsigned long len; 130 unsigned long len;
125 unsigned long map_start; 131 unsigned long map_start;
126 unsigned long map_len; 132 unsigned long map_len;
127 struct page *first_page;
128 unsigned long bflags; 133 unsigned long bflags;
134 struct extent_io_tree *tree;
135 spinlock_t refs_lock;
136 atomic_t refs;
137 atomic_t io_pages;
138 int failed_mirror;
129 struct list_head leak_list; 139 struct list_head leak_list;
130 struct rcu_head rcu_head; 140 struct rcu_head rcu_head;
131 atomic_t refs;
132 pid_t lock_owner; 141 pid_t lock_owner;
133 142
134 /* count of read lock holders on the extent buffer */ 143 /* count of read lock holders on the extent buffer */
@@ -152,6 +161,9 @@ struct extent_buffer {
152 * to unlock 161 * to unlock
153 */ 162 */
154 wait_queue_head_t read_lock_wq; 163 wait_queue_head_t read_lock_wq;
164 wait_queue_head_t lock_wq;
165 struct page *inline_pages[INLINE_EXTENT_BUFFER_PAGES];
166 struct page **pages;
155}; 167};
156 168
157static inline void extent_set_compress_type(unsigned long *bio_flags, 169static inline void extent_set_compress_type(unsigned long *bio_flags,
@@ -178,18 +190,17 @@ void extent_io_tree_init(struct extent_io_tree *tree,
178int try_release_extent_mapping(struct extent_map_tree *map, 190int try_release_extent_mapping(struct extent_map_tree *map,
179 struct extent_io_tree *tree, struct page *page, 191 struct extent_io_tree *tree, struct page *page,
180 gfp_t mask); 192 gfp_t mask);
181int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page); 193int try_release_extent_buffer(struct page *page, gfp_t mask);
182int try_release_extent_state(struct extent_map_tree *map, 194int try_release_extent_state(struct extent_map_tree *map,
183 struct extent_io_tree *tree, struct page *page, 195 struct extent_io_tree *tree, struct page *page,
184 gfp_t mask); 196 gfp_t mask);
185int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); 197int lock_extent(struct extent_io_tree *tree, u64 start, u64 end);
186int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 198int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
187 int bits, struct extent_state **cached, gfp_t mask); 199 int bits, struct extent_state **cached);
188int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); 200int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end);
189int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end, 201int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end,
190 struct extent_state **cached, gfp_t mask); 202 struct extent_state **cached, gfp_t mask);
191int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end, 203int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end);
192 gfp_t mask);
193int extent_read_full_page(struct extent_io_tree *tree, struct page *page, 204int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
194 get_extent_t *get_extent, int mirror_num); 205 get_extent_t *get_extent, int mirror_num);
195int __init extent_io_init(void); 206int __init extent_io_init(void);
@@ -210,7 +221,7 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
210int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 221int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
211 int bits, gfp_t mask); 222 int bits, gfp_t mask);
212int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 223int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
213 int bits, int exclusive_bits, u64 *failed_start, 224 int bits, u64 *failed_start,
214 struct extent_state **cached_state, gfp_t mask); 225 struct extent_state **cached_state, gfp_t mask);
215int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, 226int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
216 struct extent_state **cached_state, gfp_t mask); 227 struct extent_state **cached_state, gfp_t mask);
@@ -240,6 +251,8 @@ int extent_writepages(struct extent_io_tree *tree,
240 struct address_space *mapping, 251 struct address_space *mapping,
241 get_extent_t *get_extent, 252 get_extent_t *get_extent,
242 struct writeback_control *wbc); 253 struct writeback_control *wbc);
254int btree_write_cache_pages(struct address_space *mapping,
255 struct writeback_control *wbc);
243int extent_readpages(struct extent_io_tree *tree, 256int extent_readpages(struct extent_io_tree *tree,
244 struct address_space *mapping, 257 struct address_space *mapping,
245 struct list_head *pages, unsigned nr_pages, 258 struct list_head *pages, unsigned nr_pages,
@@ -251,11 +264,11 @@ int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private);
251void set_page_extent_mapped(struct page *page); 264void set_page_extent_mapped(struct page *page);
252 265
253struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, 266struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
254 u64 start, unsigned long len, 267 u64 start, unsigned long len);
255 struct page *page0);
256struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, 268struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
257 u64 start, unsigned long len); 269 u64 start, unsigned long len);
258void free_extent_buffer(struct extent_buffer *eb); 270void free_extent_buffer(struct extent_buffer *eb);
271void free_extent_buffer_stale(struct extent_buffer *eb);
259#define WAIT_NONE 0 272#define WAIT_NONE 0
260#define WAIT_COMPLETE 1 273#define WAIT_COMPLETE 1
261#define WAIT_PAGE_LOCK 2 274#define WAIT_PAGE_LOCK 2
@@ -287,19 +300,12 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
287 unsigned long src_offset, unsigned long len); 300 unsigned long src_offset, unsigned long len);
288void memset_extent_buffer(struct extent_buffer *eb, char c, 301void memset_extent_buffer(struct extent_buffer *eb, char c,
289 unsigned long start, unsigned long len); 302 unsigned long start, unsigned long len);
290int wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits); 303void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits);
291int clear_extent_buffer_dirty(struct extent_io_tree *tree, 304void clear_extent_buffer_dirty(struct extent_buffer *eb);
292 struct extent_buffer *eb); 305int set_extent_buffer_dirty(struct extent_buffer *eb);
293int set_extent_buffer_dirty(struct extent_io_tree *tree, 306int set_extent_buffer_uptodate(struct extent_buffer *eb);
294 struct extent_buffer *eb); 307int clear_extent_buffer_uptodate(struct extent_buffer *eb);
295int set_extent_buffer_uptodate(struct extent_io_tree *tree, 308int extent_buffer_uptodate(struct extent_buffer *eb);
296 struct extent_buffer *eb);
297int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
298 struct extent_buffer *eb,
299 struct extent_state **cached_state);
300int extent_buffer_uptodate(struct extent_io_tree *tree,
301 struct extent_buffer *eb,
302 struct extent_state *cached_state);
303int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset, 309int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset,
304 unsigned long min_len, char **map, 310 unsigned long min_len, char **map,
305 unsigned long *map_start, 311 unsigned long *map_start,
@@ -320,4 +326,6 @@ int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start,
320 u64 length, u64 logical, struct page *page, 326 u64 length, u64 logical, struct page *page,
321 int mirror_num); 327 int mirror_num);
322int end_extent_writepage(struct page *page, int err, u64 start, u64 end); 328int end_extent_writepage(struct page *page, int err, u64 start, u64 end);
329int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb,
330 int mirror_num);
323#endif 331#endif
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 078b4fd54500..5d158d320233 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -25,10 +25,12 @@
25#include "transaction.h" 25#include "transaction.h"
26#include "print-tree.h" 26#include "print-tree.h"
27 27
28#define MAX_CSUM_ITEMS(r, size) ((((BTRFS_LEAF_DATA_SIZE(r) - \ 28#define __MAX_CSUM_ITEMS(r, size) ((((BTRFS_LEAF_DATA_SIZE(r) - \
29 sizeof(struct btrfs_item) * 2) / \ 29 sizeof(struct btrfs_item) * 2) / \
30 size) - 1)) 30 size) - 1))
31 31
32#define MAX_CSUM_ITEMS(r, size) (min(__MAX_CSUM_ITEMS(r, size), PAGE_CACHE_SIZE))
33
32#define MAX_ORDERED_SUM_BYTES(r) ((PAGE_SIZE - \ 34#define MAX_ORDERED_SUM_BYTES(r) ((PAGE_SIZE - \
33 sizeof(struct btrfs_ordered_sum)) / \ 35 sizeof(struct btrfs_ordered_sum)) / \
34 sizeof(struct btrfs_sector_sum) * \ 36 sizeof(struct btrfs_sector_sum) * \
@@ -59,7 +61,7 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
59 sizeof(*item)); 61 sizeof(*item));
60 if (ret < 0) 62 if (ret < 0)
61 goto out; 63 goto out;
62 BUG_ON(ret); 64 BUG_ON(ret); /* Can't happen */
63 leaf = path->nodes[0]; 65 leaf = path->nodes[0];
64 item = btrfs_item_ptr(leaf, path->slots[0], 66 item = btrfs_item_ptr(leaf, path->slots[0],
65 struct btrfs_file_extent_item); 67 struct btrfs_file_extent_item);
@@ -284,6 +286,7 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
284 struct btrfs_ordered_sum *sums; 286 struct btrfs_ordered_sum *sums;
285 struct btrfs_sector_sum *sector_sum; 287 struct btrfs_sector_sum *sector_sum;
286 struct btrfs_csum_item *item; 288 struct btrfs_csum_item *item;
289 LIST_HEAD(tmplist);
287 unsigned long offset; 290 unsigned long offset;
288 int ret; 291 int ret;
289 size_t size; 292 size_t size;
@@ -358,7 +361,10 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
358 MAX_ORDERED_SUM_BYTES(root)); 361 MAX_ORDERED_SUM_BYTES(root));
359 sums = kzalloc(btrfs_ordered_sum_size(root, size), 362 sums = kzalloc(btrfs_ordered_sum_size(root, size),
360 GFP_NOFS); 363 GFP_NOFS);
361 BUG_ON(!sums); 364 if (!sums) {
365 ret = -ENOMEM;
366 goto fail;
367 }
362 368
363 sector_sum = sums->sums; 369 sector_sum = sums->sums;
364 sums->bytenr = start; 370 sums->bytenr = start;
@@ -380,12 +386,19 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
380 offset += csum_size; 386 offset += csum_size;
381 sector_sum++; 387 sector_sum++;
382 } 388 }
383 list_add_tail(&sums->list, list); 389 list_add_tail(&sums->list, &tmplist);
384 } 390 }
385 path->slots[0]++; 391 path->slots[0]++;
386 } 392 }
387 ret = 0; 393 ret = 0;
388fail: 394fail:
395 while (ret < 0 && !list_empty(&tmplist)) {
396 sums = list_entry(&tmplist, struct btrfs_ordered_sum, list);
397 list_del(&sums->list);
398 kfree(sums);
399 }
400 list_splice_tail(&tmplist, list);
401
389 btrfs_free_path(path); 402 btrfs_free_path(path);
390 return ret; 403 return ret;
391} 404}
@@ -420,7 +433,7 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
420 offset = page_offset(bvec->bv_page) + bvec->bv_offset; 433 offset = page_offset(bvec->bv_page) + bvec->bv_offset;
421 434
422 ordered = btrfs_lookup_ordered_extent(inode, offset); 435 ordered = btrfs_lookup_ordered_extent(inode, offset);
423 BUG_ON(!ordered); 436 BUG_ON(!ordered); /* Logic error */
424 sums->bytenr = ordered->start; 437 sums->bytenr = ordered->start;
425 438
426 while (bio_index < bio->bi_vcnt) { 439 while (bio_index < bio->bi_vcnt) {
@@ -439,11 +452,11 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
439 452
440 sums = kzalloc(btrfs_ordered_sum_size(root, bytes_left), 453 sums = kzalloc(btrfs_ordered_sum_size(root, bytes_left),
441 GFP_NOFS); 454 GFP_NOFS);
442 BUG_ON(!sums); 455 BUG_ON(!sums); /* -ENOMEM */
443 sector_sum = sums->sums; 456 sector_sum = sums->sums;
444 sums->len = bytes_left; 457 sums->len = bytes_left;
445 ordered = btrfs_lookup_ordered_extent(inode, offset); 458 ordered = btrfs_lookup_ordered_extent(inode, offset);
446 BUG_ON(!ordered); 459 BUG_ON(!ordered); /* Logic error */
447 sums->bytenr = ordered->start; 460 sums->bytenr = ordered->start;
448 } 461 }
449 462
@@ -483,18 +496,17 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
483 * This calls btrfs_truncate_item with the correct args based on the 496 * This calls btrfs_truncate_item with the correct args based on the
484 * overlap, and fixes up the key as required. 497 * overlap, and fixes up the key as required.
485 */ 498 */
486static noinline int truncate_one_csum(struct btrfs_trans_handle *trans, 499static noinline void truncate_one_csum(struct btrfs_trans_handle *trans,
487 struct btrfs_root *root, 500 struct btrfs_root *root,
488 struct btrfs_path *path, 501 struct btrfs_path *path,
489 struct btrfs_key *key, 502 struct btrfs_key *key,
490 u64 bytenr, u64 len) 503 u64 bytenr, u64 len)
491{ 504{
492 struct extent_buffer *leaf; 505 struct extent_buffer *leaf;
493 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); 506 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
494 u64 csum_end; 507 u64 csum_end;
495 u64 end_byte = bytenr + len; 508 u64 end_byte = bytenr + len;
496 u32 blocksize_bits = root->fs_info->sb->s_blocksize_bits; 509 u32 blocksize_bits = root->fs_info->sb->s_blocksize_bits;
497 int ret;
498 510
499 leaf = path->nodes[0]; 511 leaf = path->nodes[0];
500 csum_end = btrfs_item_size_nr(leaf, path->slots[0]) / csum_size; 512 csum_end = btrfs_item_size_nr(leaf, path->slots[0]) / csum_size;
@@ -510,7 +522,7 @@ static noinline int truncate_one_csum(struct btrfs_trans_handle *trans,
510 */ 522 */
511 u32 new_size = (bytenr - key->offset) >> blocksize_bits; 523 u32 new_size = (bytenr - key->offset) >> blocksize_bits;
512 new_size *= csum_size; 524 new_size *= csum_size;
513 ret = btrfs_truncate_item(trans, root, path, new_size, 1); 525 btrfs_truncate_item(trans, root, path, new_size, 1);
514 } else if (key->offset >= bytenr && csum_end > end_byte && 526 } else if (key->offset >= bytenr && csum_end > end_byte &&
515 end_byte > key->offset) { 527 end_byte > key->offset) {
516 /* 528 /*
@@ -522,15 +534,13 @@ static noinline int truncate_one_csum(struct btrfs_trans_handle *trans,
522 u32 new_size = (csum_end - end_byte) >> blocksize_bits; 534 u32 new_size = (csum_end - end_byte) >> blocksize_bits;
523 new_size *= csum_size; 535 new_size *= csum_size;
524 536
525 ret = btrfs_truncate_item(trans, root, path, new_size, 0); 537 btrfs_truncate_item(trans, root, path, new_size, 0);
526 538
527 key->offset = end_byte; 539 key->offset = end_byte;
528 ret = btrfs_set_item_key_safe(trans, root, path, key); 540 btrfs_set_item_key_safe(trans, root, path, key);
529 BUG_ON(ret);
530 } else { 541 } else {
531 BUG(); 542 BUG();
532 } 543 }
533 return 0;
534} 544}
535 545
536/* 546/*
@@ -635,13 +645,14 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
635 * item changed size or key 645 * item changed size or key
636 */ 646 */
637 ret = btrfs_split_item(trans, root, path, &key, offset); 647 ret = btrfs_split_item(trans, root, path, &key, offset);
638 BUG_ON(ret && ret != -EAGAIN); 648 if (ret && ret != -EAGAIN) {
649 btrfs_abort_transaction(trans, root, ret);
650 goto out;
651 }
639 652
640 key.offset = end_byte - 1; 653 key.offset = end_byte - 1;
641 } else { 654 } else {
642 ret = truncate_one_csum(trans, root, path, 655 truncate_one_csum(trans, root, path, &key, bytenr, len);
643 &key, bytenr, len);
644 BUG_ON(ret);
645 if (key.offset < bytenr) 656 if (key.offset < bytenr)
646 break; 657 break;
647 } 658 }
@@ -772,7 +783,7 @@ again:
772 if (diff != csum_size) 783 if (diff != csum_size)
773 goto insert; 784 goto insert;
774 785
775 ret = btrfs_extend_item(trans, root, path, diff); 786 btrfs_extend_item(trans, root, path, diff);
776 goto csum; 787 goto csum;
777 } 788 }
778 789
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index e8d06b6b9194..d83260d7498f 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -452,7 +452,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
452 split = alloc_extent_map(); 452 split = alloc_extent_map();
453 if (!split2) 453 if (!split2)
454 split2 = alloc_extent_map(); 454 split2 = alloc_extent_map();
455 BUG_ON(!split || !split2); 455 BUG_ON(!split || !split2); /* -ENOMEM */
456 456
457 write_lock(&em_tree->lock); 457 write_lock(&em_tree->lock);
458 em = lookup_extent_mapping(em_tree, start, len); 458 em = lookup_extent_mapping(em_tree, start, len);
@@ -494,7 +494,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
494 split->flags = flags; 494 split->flags = flags;
495 split->compress_type = em->compress_type; 495 split->compress_type = em->compress_type;
496 ret = add_extent_mapping(em_tree, split); 496 ret = add_extent_mapping(em_tree, split);
497 BUG_ON(ret); 497 BUG_ON(ret); /* Logic error */
498 free_extent_map(split); 498 free_extent_map(split);
499 split = split2; 499 split = split2;
500 split2 = NULL; 500 split2 = NULL;
@@ -520,7 +520,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
520 } 520 }
521 521
522 ret = add_extent_mapping(em_tree, split); 522 ret = add_extent_mapping(em_tree, split);
523 BUG_ON(ret); 523 BUG_ON(ret); /* Logic error */
524 free_extent_map(split); 524 free_extent_map(split);
525 split = NULL; 525 split = NULL;
526 } 526 }
@@ -679,7 +679,7 @@ next_slot:
679 root->root_key.objectid, 679 root->root_key.objectid,
680 new_key.objectid, 680 new_key.objectid,
681 start - extent_offset, 0); 681 start - extent_offset, 0);
682 BUG_ON(ret); 682 BUG_ON(ret); /* -ENOMEM */
683 *hint_byte = disk_bytenr; 683 *hint_byte = disk_bytenr;
684 } 684 }
685 key.offset = start; 685 key.offset = start;
@@ -754,7 +754,7 @@ next_slot:
754 root->root_key.objectid, 754 root->root_key.objectid,
755 key.objectid, key.offset - 755 key.objectid, key.offset -
756 extent_offset, 0); 756 extent_offset, 0);
757 BUG_ON(ret); 757 BUG_ON(ret); /* -ENOMEM */
758 inode_sub_bytes(inode, 758 inode_sub_bytes(inode,
759 extent_end - key.offset); 759 extent_end - key.offset);
760 *hint_byte = disk_bytenr; 760 *hint_byte = disk_bytenr;
@@ -770,7 +770,10 @@ next_slot:
770 770
771 ret = btrfs_del_items(trans, root, path, del_slot, 771 ret = btrfs_del_items(trans, root, path, del_slot,
772 del_nr); 772 del_nr);
773 BUG_ON(ret); 773 if (ret) {
774 btrfs_abort_transaction(trans, root, ret);
775 goto out;
776 }
774 777
775 del_nr = 0; 778 del_nr = 0;
776 del_slot = 0; 779 del_slot = 0;
@@ -782,11 +785,13 @@ next_slot:
782 BUG_ON(1); 785 BUG_ON(1);
783 } 786 }
784 787
785 if (del_nr > 0) { 788 if (!ret && del_nr > 0) {
786 ret = btrfs_del_items(trans, root, path, del_slot, del_nr); 789 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
787 BUG_ON(ret); 790 if (ret)
791 btrfs_abort_transaction(trans, root, ret);
788 } 792 }
789 793
794out:
790 btrfs_free_path(path); 795 btrfs_free_path(path);
791 return ret; 796 return ret;
792} 797}
@@ -944,7 +949,10 @@ again:
944 btrfs_release_path(path); 949 btrfs_release_path(path);
945 goto again; 950 goto again;
946 } 951 }
947 BUG_ON(ret < 0); 952 if (ret < 0) {
953 btrfs_abort_transaction(trans, root, ret);
954 goto out;
955 }
948 956
949 leaf = path->nodes[0]; 957 leaf = path->nodes[0];
950 fi = btrfs_item_ptr(leaf, path->slots[0] - 1, 958 fi = btrfs_item_ptr(leaf, path->slots[0] - 1,
@@ -963,7 +971,7 @@ again:
963 ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0, 971 ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0,
964 root->root_key.objectid, 972 root->root_key.objectid,
965 ino, orig_offset, 0); 973 ino, orig_offset, 0);
966 BUG_ON(ret); 974 BUG_ON(ret); /* -ENOMEM */
967 975
968 if (split == start) { 976 if (split == start) {
969 key.offset = start; 977 key.offset = start;
@@ -990,7 +998,7 @@ again:
990 ret = btrfs_free_extent(trans, root, bytenr, num_bytes, 998 ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
991 0, root->root_key.objectid, 999 0, root->root_key.objectid,
992 ino, orig_offset, 0); 1000 ino, orig_offset, 0);
993 BUG_ON(ret); 1001 BUG_ON(ret); /* -ENOMEM */
994 } 1002 }
995 other_start = 0; 1003 other_start = 0;
996 other_end = start; 1004 other_end = start;
@@ -1007,7 +1015,7 @@ again:
1007 ret = btrfs_free_extent(trans, root, bytenr, num_bytes, 1015 ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
1008 0, root->root_key.objectid, 1016 0, root->root_key.objectid,
1009 ino, orig_offset, 0); 1017 ino, orig_offset, 0);
1010 BUG_ON(ret); 1018 BUG_ON(ret); /* -ENOMEM */
1011 } 1019 }
1012 if (del_nr == 0) { 1020 if (del_nr == 0) {
1013 fi = btrfs_item_ptr(leaf, path->slots[0], 1021 fi = btrfs_item_ptr(leaf, path->slots[0],
@@ -1025,7 +1033,10 @@ again:
1025 btrfs_mark_buffer_dirty(leaf); 1033 btrfs_mark_buffer_dirty(leaf);
1026 1034
1027 ret = btrfs_del_items(trans, root, path, del_slot, del_nr); 1035 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
1028 BUG_ON(ret); 1036 if (ret < 0) {
1037 btrfs_abort_transaction(trans, root, ret);
1038 goto out;
1039 }
1029 } 1040 }
1030out: 1041out:
1031 btrfs_free_path(path); 1042 btrfs_free_path(path);
@@ -1105,8 +1116,7 @@ again:
1105 if (start_pos < inode->i_size) { 1116 if (start_pos < inode->i_size) {
1106 struct btrfs_ordered_extent *ordered; 1117 struct btrfs_ordered_extent *ordered;
1107 lock_extent_bits(&BTRFS_I(inode)->io_tree, 1118 lock_extent_bits(&BTRFS_I(inode)->io_tree,
1108 start_pos, last_pos - 1, 0, &cached_state, 1119 start_pos, last_pos - 1, 0, &cached_state);
1109 GFP_NOFS);
1110 ordered = btrfs_lookup_first_ordered_extent(inode, 1120 ordered = btrfs_lookup_first_ordered_extent(inode,
1111 last_pos - 1); 1121 last_pos - 1);
1112 if (ordered && 1122 if (ordered &&
@@ -1638,7 +1648,7 @@ static long btrfs_fallocate(struct file *file, int mode,
1638 * transaction 1648 * transaction
1639 */ 1649 */
1640 lock_extent_bits(&BTRFS_I(inode)->io_tree, alloc_start, 1650 lock_extent_bits(&BTRFS_I(inode)->io_tree, alloc_start,
1641 locked_end, 0, &cached_state, GFP_NOFS); 1651 locked_end, 0, &cached_state);
1642 ordered = btrfs_lookup_first_ordered_extent(inode, 1652 ordered = btrfs_lookup_first_ordered_extent(inode,
1643 alloc_end - 1); 1653 alloc_end - 1);
1644 if (ordered && 1654 if (ordered &&
@@ -1667,7 +1677,13 @@ static long btrfs_fallocate(struct file *file, int mode,
1667 1677
1668 em = btrfs_get_extent(inode, NULL, 0, cur_offset, 1678 em = btrfs_get_extent(inode, NULL, 0, cur_offset,
1669 alloc_end - cur_offset, 0); 1679 alloc_end - cur_offset, 0);
1670 BUG_ON(IS_ERR_OR_NULL(em)); 1680 if (IS_ERR_OR_NULL(em)) {
1681 if (!em)
1682 ret = -ENOMEM;
1683 else
1684 ret = PTR_ERR(em);
1685 break;
1686 }
1671 last_byte = min(extent_map_end(em), alloc_end); 1687 last_byte = min(extent_map_end(em), alloc_end);
1672 actual_end = min_t(u64, extent_map_end(em), offset + len); 1688 actual_end = min_t(u64, extent_map_end(em), offset + len);
1673 last_byte = (last_byte + mask) & ~mask; 1689 last_byte = (last_byte + mask) & ~mask;
@@ -1737,7 +1753,7 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int origin)
1737 return -ENXIO; 1753 return -ENXIO;
1738 1754
1739 lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, 0, 1755 lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, 0,
1740 &cached_state, GFP_NOFS); 1756 &cached_state);
1741 1757
1742 /* 1758 /*
1743 * Delalloc is such a pain. If we have a hole and we have pending 1759 * Delalloc is such a pain. If we have a hole and we have pending
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index b02e379b14c7..e88330d3df52 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -230,11 +230,13 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root,
230 230
231 if (ret) { 231 if (ret) {
232 trans->block_rsv = rsv; 232 trans->block_rsv = rsv;
233 WARN_ON(1); 233 btrfs_abort_transaction(trans, root, ret);
234 return ret; 234 return ret;
235 } 235 }
236 236
237 ret = btrfs_update_inode(trans, root, inode); 237 ret = btrfs_update_inode(trans, root, inode);
238 if (ret)
239 btrfs_abort_transaction(trans, root, ret);
238 trans->block_rsv = rsv; 240 trans->block_rsv = rsv;
239 241
240 return ret; 242 return ret;
@@ -869,7 +871,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
869 io_ctl_prepare_pages(&io_ctl, inode, 0); 871 io_ctl_prepare_pages(&io_ctl, inode, 0);
870 872
871 lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, 873 lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
872 0, &cached_state, GFP_NOFS); 874 0, &cached_state);
873 875
874 node = rb_first(&ctl->free_space_offset); 876 node = rb_first(&ctl->free_space_offset);
875 if (!node && cluster) { 877 if (!node && cluster) {
@@ -1948,14 +1950,14 @@ again:
1948 */ 1950 */
1949 ret = btrfs_add_free_space(block_group, old_start, 1951 ret = btrfs_add_free_space(block_group, old_start,
1950 offset - old_start); 1952 offset - old_start);
1951 WARN_ON(ret); 1953 WARN_ON(ret); /* -ENOMEM */
1952 goto out; 1954 goto out;
1953 } 1955 }
1954 1956
1955 ret = remove_from_bitmap(ctl, info, &offset, &bytes); 1957 ret = remove_from_bitmap(ctl, info, &offset, &bytes);
1956 if (ret == -EAGAIN) 1958 if (ret == -EAGAIN)
1957 goto again; 1959 goto again;
1958 BUG_ON(ret); 1960 BUG_ON(ret); /* logic error */
1959out_lock: 1961out_lock:
1960 spin_unlock(&ctl->tree_lock); 1962 spin_unlock(&ctl->tree_lock);
1961out: 1963out:
@@ -2346,7 +2348,7 @@ again:
2346 rb_erase(&entry->offset_index, &ctl->free_space_offset); 2348 rb_erase(&entry->offset_index, &ctl->free_space_offset);
2347 ret = tree_insert_offset(&cluster->root, entry->offset, 2349 ret = tree_insert_offset(&cluster->root, entry->offset,
2348 &entry->offset_index, 1); 2350 &entry->offset_index, 1);
2349 BUG_ON(ret); 2351 BUG_ON(ret); /* -EEXIST; Logic error */
2350 2352
2351 trace_btrfs_setup_cluster(block_group, cluster, 2353 trace_btrfs_setup_cluster(block_group, cluster,
2352 total_found * block_group->sectorsize, 1); 2354 total_found * block_group->sectorsize, 1);
@@ -2439,7 +2441,7 @@ setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
2439 ret = tree_insert_offset(&cluster->root, entry->offset, 2441 ret = tree_insert_offset(&cluster->root, entry->offset,
2440 &entry->offset_index, 0); 2442 &entry->offset_index, 0);
2441 total_size += entry->bytes; 2443 total_size += entry->bytes;
2442 BUG_ON(ret); 2444 BUG_ON(ret); /* -EEXIST; Logic error */
2443 } while (node && entry != last); 2445 } while (node && entry != last);
2444 2446
2445 cluster->max_size = max_extent; 2447 cluster->max_size = max_extent;
@@ -2830,6 +2832,7 @@ u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root)
2830 int ret; 2832 int ret;
2831 2833
2832 ret = search_bitmap(ctl, entry, &offset, &count); 2834 ret = search_bitmap(ctl, entry, &offset, &count);
2835 /* Logic error; Should be empty if it can't find anything */
2833 BUG_ON(ret); 2836 BUG_ON(ret);
2834 2837
2835 ino = offset; 2838 ino = offset;
diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c
index baa74f3db691..a13cf1a96c73 100644
--- a/fs/btrfs/inode-item.c
+++ b/fs/btrfs/inode-item.c
@@ -19,6 +19,7 @@
19#include "ctree.h" 19#include "ctree.h"
20#include "disk-io.h" 20#include "disk-io.h"
21#include "transaction.h" 21#include "transaction.h"
22#include "print-tree.h"
22 23
23static int find_name_in_backref(struct btrfs_path *path, const char *name, 24static int find_name_in_backref(struct btrfs_path *path, const char *name,
24 int name_len, struct btrfs_inode_ref **ref_ret) 25 int name_len, struct btrfs_inode_ref **ref_ret)
@@ -128,13 +129,14 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans,
128 item_start = btrfs_item_ptr_offset(leaf, path->slots[0]); 129 item_start = btrfs_item_ptr_offset(leaf, path->slots[0]);
129 memmove_extent_buffer(leaf, ptr, ptr + sub_item_len, 130 memmove_extent_buffer(leaf, ptr, ptr + sub_item_len,
130 item_size - (ptr + sub_item_len - item_start)); 131 item_size - (ptr + sub_item_len - item_start));
131 ret = btrfs_truncate_item(trans, root, path, 132 btrfs_truncate_item(trans, root, path,
132 item_size - sub_item_len, 1); 133 item_size - sub_item_len, 1);
133out: 134out:
134 btrfs_free_path(path); 135 btrfs_free_path(path);
135 return ret; 136 return ret;
136} 137}
137 138
139/* Will return 0, -ENOMEM, -EMLINK, or -EEXIST or anything from the CoW path */
138int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans, 140int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
139 struct btrfs_root *root, 141 struct btrfs_root *root,
140 const char *name, int name_len, 142 const char *name, int name_len,
@@ -165,7 +167,7 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
165 goto out; 167 goto out;
166 168
167 old_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]); 169 old_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]);
168 ret = btrfs_extend_item(trans, root, path, ins_len); 170 btrfs_extend_item(trans, root, path, ins_len);
169 ref = btrfs_item_ptr(path->nodes[0], path->slots[0], 171 ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
170 struct btrfs_inode_ref); 172 struct btrfs_inode_ref);
171 ref = (struct btrfs_inode_ref *)((unsigned long)ref + old_size); 173 ref = (struct btrfs_inode_ref *)((unsigned long)ref + old_size);
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index ee15d88b33d2..b1a1c929ba80 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -178,7 +178,7 @@ static void start_caching(struct btrfs_root *root)
178 178
179 tsk = kthread_run(caching_kthread, root, "btrfs-ino-cache-%llu\n", 179 tsk = kthread_run(caching_kthread, root, "btrfs-ino-cache-%llu\n",
180 root->root_key.objectid); 180 root->root_key.objectid);
181 BUG_ON(IS_ERR(tsk)); 181 BUG_ON(IS_ERR(tsk)); /* -ENOMEM */
182} 182}
183 183
184int btrfs_find_free_ino(struct btrfs_root *root, u64 *objectid) 184int btrfs_find_free_ino(struct btrfs_root *root, u64 *objectid)
@@ -271,7 +271,7 @@ void btrfs_unpin_free_ino(struct btrfs_root *root)
271 break; 271 break;
272 272
273 info = rb_entry(n, struct btrfs_free_space, offset_index); 273 info = rb_entry(n, struct btrfs_free_space, offset_index);
274 BUG_ON(info->bitmap); 274 BUG_ON(info->bitmap); /* Logic error */
275 275
276 if (info->offset > root->cache_progress) 276 if (info->offset > root->cache_progress)
277 goto free; 277 goto free;
@@ -439,17 +439,16 @@ int btrfs_save_ino_cache(struct btrfs_root *root,
439 if (ret) 439 if (ret)
440 goto out; 440 goto out;
441 trace_btrfs_space_reservation(root->fs_info, "ino_cache", 441 trace_btrfs_space_reservation(root->fs_info, "ino_cache",
442 (u64)(unsigned long)trans, 442 trans->transid, trans->bytes_reserved, 1);
443 trans->bytes_reserved, 1);
444again: 443again:
445 inode = lookup_free_ino_inode(root, path); 444 inode = lookup_free_ino_inode(root, path);
446 if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) { 445 if (IS_ERR(inode) && (PTR_ERR(inode) != -ENOENT || retry)) {
447 ret = PTR_ERR(inode); 446 ret = PTR_ERR(inode);
448 goto out_release; 447 goto out_release;
449 } 448 }
450 449
451 if (IS_ERR(inode)) { 450 if (IS_ERR(inode)) {
452 BUG_ON(retry); 451 BUG_ON(retry); /* Logic error */
453 retry = true; 452 retry = true;
454 453
455 ret = create_free_ino_inode(root, trans, path); 454 ret = create_free_ino_inode(root, trans, path);
@@ -460,12 +459,17 @@ again:
460 459
461 BTRFS_I(inode)->generation = 0; 460 BTRFS_I(inode)->generation = 0;
462 ret = btrfs_update_inode(trans, root, inode); 461 ret = btrfs_update_inode(trans, root, inode);
463 WARN_ON(ret); 462 if (ret) {
463 btrfs_abort_transaction(trans, root, ret);
464 goto out_put;
465 }
464 466
465 if (i_size_read(inode) > 0) { 467 if (i_size_read(inode) > 0) {
466 ret = btrfs_truncate_free_space_cache(root, trans, path, inode); 468 ret = btrfs_truncate_free_space_cache(root, trans, path, inode);
467 if (ret) 469 if (ret) {
470 btrfs_abort_transaction(trans, root, ret);
468 goto out_put; 471 goto out_put;
472 }
469 } 473 }
470 474
471 spin_lock(&root->cache_lock); 475 spin_lock(&root->cache_lock);
@@ -502,8 +506,7 @@ out_put:
502 iput(inode); 506 iput(inode);
503out_release: 507out_release:
504 trace_btrfs_space_reservation(root->fs_info, "ino_cache", 508 trace_btrfs_space_reservation(root->fs_info, "ino_cache",
505 (u64)(unsigned long)trans, 509 trans->transid, trans->bytes_reserved, 0);
506 trans->bytes_reserved, 0);
507 btrfs_block_rsv_release(root, trans->block_rsv, trans->bytes_reserved); 510 btrfs_block_rsv_release(root, trans->block_rsv, trans->bytes_reserved);
508out: 511out:
509 trans->block_rsv = rsv; 512 trans->block_rsv = rsv;
@@ -532,7 +535,7 @@ static int btrfs_find_highest_objectid(struct btrfs_root *root, u64 *objectid)
532 ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0); 535 ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
533 if (ret < 0) 536 if (ret < 0)
534 goto error; 537 goto error;
535 BUG_ON(ret == 0); 538 BUG_ON(ret == 0); /* Corruption */
536 if (path->slots[0] > 0) { 539 if (path->slots[0] > 0) {
537 slot = path->slots[0] - 1; 540 slot = path->slots[0] - 1;
538 l = path->nodes[0]; 541 l = path->nodes[0];
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 3a0b5c1f9d31..115bc05e42b0 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -150,7 +150,6 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
150 inode_add_bytes(inode, size); 150 inode_add_bytes(inode, size);
151 ret = btrfs_insert_empty_item(trans, root, path, &key, 151 ret = btrfs_insert_empty_item(trans, root, path, &key,
152 datasize); 152 datasize);
153 BUG_ON(ret);
154 if (ret) { 153 if (ret) {
155 err = ret; 154 err = ret;
156 goto fail; 155 goto fail;
@@ -206,9 +205,9 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
206 * could end up racing with unlink. 205 * could end up racing with unlink.
207 */ 206 */
208 BTRFS_I(inode)->disk_i_size = inode->i_size; 207 BTRFS_I(inode)->disk_i_size = inode->i_size;
209 btrfs_update_inode(trans, root, inode); 208 ret = btrfs_update_inode(trans, root, inode);
210 209
211 return 0; 210 return ret;
212fail: 211fail:
213 btrfs_free_path(path); 212 btrfs_free_path(path);
214 return err; 213 return err;
@@ -250,14 +249,18 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
250 249
251 ret = btrfs_drop_extents(trans, inode, start, aligned_end, 250 ret = btrfs_drop_extents(trans, inode, start, aligned_end,
252 &hint_byte, 1); 251 &hint_byte, 1);
253 BUG_ON(ret); 252 if (ret)
253 return ret;
254 254
255 if (isize > actual_end) 255 if (isize > actual_end)
256 inline_len = min_t(u64, isize, actual_end); 256 inline_len = min_t(u64, isize, actual_end);
257 ret = insert_inline_extent(trans, root, inode, start, 257 ret = insert_inline_extent(trans, root, inode, start,
258 inline_len, compressed_size, 258 inline_len, compressed_size,
259 compress_type, compressed_pages); 259 compress_type, compressed_pages);
260 BUG_ON(ret); 260 if (ret) {
261 btrfs_abort_transaction(trans, root, ret);
262 return ret;
263 }
261 btrfs_delalloc_release_metadata(inode, end + 1 - start); 264 btrfs_delalloc_release_metadata(inode, end + 1 - start);
262 btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0); 265 btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0);
263 return 0; 266 return 0;
@@ -293,7 +296,7 @@ static noinline int add_async_extent(struct async_cow *cow,
293 struct async_extent *async_extent; 296 struct async_extent *async_extent;
294 297
295 async_extent = kmalloc(sizeof(*async_extent), GFP_NOFS); 298 async_extent = kmalloc(sizeof(*async_extent), GFP_NOFS);
296 BUG_ON(!async_extent); 299 BUG_ON(!async_extent); /* -ENOMEM */
297 async_extent->start = start; 300 async_extent->start = start;
298 async_extent->ram_size = ram_size; 301 async_extent->ram_size = ram_size;
299 async_extent->compressed_size = compressed_size; 302 async_extent->compressed_size = compressed_size;
@@ -344,8 +347,9 @@ static noinline int compress_file_range(struct inode *inode,
344 int will_compress; 347 int will_compress;
345 int compress_type = root->fs_info->compress_type; 348 int compress_type = root->fs_info->compress_type;
346 349
347 /* if this is a small write inside eof, kick off a defragbot */ 350 /* if this is a small write inside eof, kick off a defrag */
348 if (end <= BTRFS_I(inode)->disk_i_size && (end - start + 1) < 16 * 1024) 351 if ((end - start + 1) < 16 * 1024 &&
352 (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size))
349 btrfs_add_inode_defrag(NULL, inode); 353 btrfs_add_inode_defrag(NULL, inode);
350 354
351 actual_end = min_t(u64, isize, end + 1); 355 actual_end = min_t(u64, isize, end + 1);
@@ -433,7 +437,11 @@ again:
433cont: 437cont:
434 if (start == 0) { 438 if (start == 0) {
435 trans = btrfs_join_transaction(root); 439 trans = btrfs_join_transaction(root);
436 BUG_ON(IS_ERR(trans)); 440 if (IS_ERR(trans)) {
441 ret = PTR_ERR(trans);
442 trans = NULL;
443 goto cleanup_and_out;
444 }
437 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 445 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
438 446
439 /* lets try to make an inline extent */ 447 /* lets try to make an inline extent */
@@ -450,11 +458,11 @@ cont:
450 total_compressed, 458 total_compressed,
451 compress_type, pages); 459 compress_type, pages);
452 } 460 }
453 if (ret == 0) { 461 if (ret <= 0) {
454 /* 462 /*
455 * inline extent creation worked, we don't need 463 * inline extent creation worked or returned error,
456 * to create any more async work items. Unlock 464 * we don't need to create any more async work items.
457 * and free up our temp pages. 465 * Unlock and free up our temp pages.
458 */ 466 */
459 extent_clear_unlock_delalloc(inode, 467 extent_clear_unlock_delalloc(inode,
460 &BTRFS_I(inode)->io_tree, 468 &BTRFS_I(inode)->io_tree,
@@ -547,7 +555,7 @@ cleanup_and_bail_uncompressed:
547 } 555 }
548 556
549out: 557out:
550 return 0; 558 return ret;
551 559
552free_pages_out: 560free_pages_out:
553 for (i = 0; i < nr_pages_ret; i++) { 561 for (i = 0; i < nr_pages_ret; i++) {
@@ -557,6 +565,20 @@ free_pages_out:
557 kfree(pages); 565 kfree(pages);
558 566
559 goto out; 567 goto out;
568
569cleanup_and_out:
570 extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,
571 start, end, NULL,
572 EXTENT_CLEAR_UNLOCK_PAGE |
573 EXTENT_CLEAR_DIRTY |
574 EXTENT_CLEAR_DELALLOC |
575 EXTENT_SET_WRITEBACK |
576 EXTENT_END_WRITEBACK);
577 if (!trans || IS_ERR(trans))
578 btrfs_error(root->fs_info, ret, "Failed to join transaction");
579 else
580 btrfs_abort_transaction(trans, root, ret);
581 goto free_pages_out;
560} 582}
561 583
562/* 584/*
@@ -597,7 +619,7 @@ retry:
597 619
598 lock_extent(io_tree, async_extent->start, 620 lock_extent(io_tree, async_extent->start,
599 async_extent->start + 621 async_extent->start +
600 async_extent->ram_size - 1, GFP_NOFS); 622 async_extent->ram_size - 1);
601 623
602 /* allocate blocks */ 624 /* allocate blocks */
603 ret = cow_file_range(inode, async_cow->locked_page, 625 ret = cow_file_range(inode, async_cow->locked_page,
@@ -606,6 +628,8 @@ retry:
606 async_extent->ram_size - 1, 628 async_extent->ram_size - 1,
607 &page_started, &nr_written, 0); 629 &page_started, &nr_written, 0);
608 630
631 /* JDM XXX */
632
609 /* 633 /*
610 * if page_started, cow_file_range inserted an 634 * if page_started, cow_file_range inserted an
611 * inline extent and took care of all the unlocking 635 * inline extent and took care of all the unlocking
@@ -625,18 +649,21 @@ retry:
625 } 649 }
626 650
627 lock_extent(io_tree, async_extent->start, 651 lock_extent(io_tree, async_extent->start,
628 async_extent->start + async_extent->ram_size - 1, 652 async_extent->start + async_extent->ram_size - 1);
629 GFP_NOFS);
630 653
631 trans = btrfs_join_transaction(root); 654 trans = btrfs_join_transaction(root);
632 BUG_ON(IS_ERR(trans)); 655 if (IS_ERR(trans)) {
633 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 656 ret = PTR_ERR(trans);
634 ret = btrfs_reserve_extent(trans, root, 657 } else {
658 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
659 ret = btrfs_reserve_extent(trans, root,
635 async_extent->compressed_size, 660 async_extent->compressed_size,
636 async_extent->compressed_size, 661 async_extent->compressed_size,
637 0, alloc_hint, 662 0, alloc_hint, &ins, 1);
638 (u64)-1, &ins, 1); 663 if (ret)
639 btrfs_end_transaction(trans, root); 664 btrfs_abort_transaction(trans, root, ret);
665 btrfs_end_transaction(trans, root);
666 }
640 667
641 if (ret) { 668 if (ret) {
642 int i; 669 int i;
@@ -649,8 +676,10 @@ retry:
649 async_extent->pages = NULL; 676 async_extent->pages = NULL;
650 unlock_extent(io_tree, async_extent->start, 677 unlock_extent(io_tree, async_extent->start,
651 async_extent->start + 678 async_extent->start +
652 async_extent->ram_size - 1, GFP_NOFS); 679 async_extent->ram_size - 1);
653 goto retry; 680 if (ret == -ENOSPC)
681 goto retry;
682 goto out_free; /* JDM: Requeue? */
654 } 683 }
655 684
656 /* 685 /*
@@ -662,7 +691,7 @@ retry:
662 async_extent->ram_size - 1, 0); 691 async_extent->ram_size - 1, 0);
663 692
664 em = alloc_extent_map(); 693 em = alloc_extent_map();
665 BUG_ON(!em); 694 BUG_ON(!em); /* -ENOMEM */
666 em->start = async_extent->start; 695 em->start = async_extent->start;
667 em->len = async_extent->ram_size; 696 em->len = async_extent->ram_size;
668 em->orig_start = em->start; 697 em->orig_start = em->start;
@@ -694,7 +723,7 @@ retry:
694 ins.offset, 723 ins.offset,
695 BTRFS_ORDERED_COMPRESSED, 724 BTRFS_ORDERED_COMPRESSED,
696 async_extent->compress_type); 725 async_extent->compress_type);
697 BUG_ON(ret); 726 BUG_ON(ret); /* -ENOMEM */
698 727
699 /* 728 /*
700 * clear dirty, set writeback and unlock the pages. 729 * clear dirty, set writeback and unlock the pages.
@@ -716,13 +745,17 @@ retry:
716 ins.offset, async_extent->pages, 745 ins.offset, async_extent->pages,
717 async_extent->nr_pages); 746 async_extent->nr_pages);
718 747
719 BUG_ON(ret); 748 BUG_ON(ret); /* -ENOMEM */
720 alloc_hint = ins.objectid + ins.offset; 749 alloc_hint = ins.objectid + ins.offset;
721 kfree(async_extent); 750 kfree(async_extent);
722 cond_resched(); 751 cond_resched();
723 } 752 }
724 753 ret = 0;
725 return 0; 754out:
755 return ret;
756out_free:
757 kfree(async_extent);
758 goto out;
726} 759}
727 760
728static u64 get_extent_allocation_hint(struct inode *inode, u64 start, 761static u64 get_extent_allocation_hint(struct inode *inode, u64 start,
@@ -791,7 +824,18 @@ static noinline int cow_file_range(struct inode *inode,
791 824
792 BUG_ON(btrfs_is_free_space_inode(root, inode)); 825 BUG_ON(btrfs_is_free_space_inode(root, inode));
793 trans = btrfs_join_transaction(root); 826 trans = btrfs_join_transaction(root);
794 BUG_ON(IS_ERR(trans)); 827 if (IS_ERR(trans)) {
828 extent_clear_unlock_delalloc(inode,
829 &BTRFS_I(inode)->io_tree,
830 start, end, NULL,
831 EXTENT_CLEAR_UNLOCK_PAGE |
832 EXTENT_CLEAR_UNLOCK |
833 EXTENT_CLEAR_DELALLOC |
834 EXTENT_CLEAR_DIRTY |
835 EXTENT_SET_WRITEBACK |
836 EXTENT_END_WRITEBACK);
837 return PTR_ERR(trans);
838 }
795 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 839 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
796 840
797 num_bytes = (end - start + blocksize) & ~(blocksize - 1); 841 num_bytes = (end - start + blocksize) & ~(blocksize - 1);
@@ -800,7 +844,8 @@ static noinline int cow_file_range(struct inode *inode,
800 ret = 0; 844 ret = 0;
801 845
802 /* if this is a small write inside eof, kick off defrag */ 846 /* if this is a small write inside eof, kick off defrag */
803 if (end <= BTRFS_I(inode)->disk_i_size && num_bytes < 64 * 1024) 847 if (num_bytes < 64 * 1024 &&
848 (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size))
804 btrfs_add_inode_defrag(trans, inode); 849 btrfs_add_inode_defrag(trans, inode);
805 850
806 if (start == 0) { 851 if (start == 0) {
@@ -821,8 +866,10 @@ static noinline int cow_file_range(struct inode *inode,
821 *nr_written = *nr_written + 866 *nr_written = *nr_written +
822 (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE; 867 (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE;
823 *page_started = 1; 868 *page_started = 1;
824 ret = 0;
825 goto out; 869 goto out;
870 } else if (ret < 0) {
871 btrfs_abort_transaction(trans, root, ret);
872 goto out_unlock;
826 } 873 }
827 } 874 }
828 875
@@ -838,11 +885,14 @@ static noinline int cow_file_range(struct inode *inode,
838 cur_alloc_size = disk_num_bytes; 885 cur_alloc_size = disk_num_bytes;
839 ret = btrfs_reserve_extent(trans, root, cur_alloc_size, 886 ret = btrfs_reserve_extent(trans, root, cur_alloc_size,
840 root->sectorsize, 0, alloc_hint, 887 root->sectorsize, 0, alloc_hint,
841 (u64)-1, &ins, 1); 888 &ins, 1);
842 BUG_ON(ret); 889 if (ret < 0) {
890 btrfs_abort_transaction(trans, root, ret);
891 goto out_unlock;
892 }
843 893
844 em = alloc_extent_map(); 894 em = alloc_extent_map();
845 BUG_ON(!em); 895 BUG_ON(!em); /* -ENOMEM */
846 em->start = start; 896 em->start = start;
847 em->orig_start = em->start; 897 em->orig_start = em->start;
848 ram_size = ins.offset; 898 ram_size = ins.offset;
@@ -868,13 +918,16 @@ static noinline int cow_file_range(struct inode *inode,
868 cur_alloc_size = ins.offset; 918 cur_alloc_size = ins.offset;
869 ret = btrfs_add_ordered_extent(inode, start, ins.objectid, 919 ret = btrfs_add_ordered_extent(inode, start, ins.objectid,
870 ram_size, cur_alloc_size, 0); 920 ram_size, cur_alloc_size, 0);
871 BUG_ON(ret); 921 BUG_ON(ret); /* -ENOMEM */
872 922
873 if (root->root_key.objectid == 923 if (root->root_key.objectid ==
874 BTRFS_DATA_RELOC_TREE_OBJECTID) { 924 BTRFS_DATA_RELOC_TREE_OBJECTID) {
875 ret = btrfs_reloc_clone_csums(inode, start, 925 ret = btrfs_reloc_clone_csums(inode, start,
876 cur_alloc_size); 926 cur_alloc_size);
877 BUG_ON(ret); 927 if (ret) {
928 btrfs_abort_transaction(trans, root, ret);
929 goto out_unlock;
930 }
878 } 931 }
879 932
880 if (disk_num_bytes < cur_alloc_size) 933 if (disk_num_bytes < cur_alloc_size)
@@ -899,11 +952,23 @@ static noinline int cow_file_range(struct inode *inode,
899 alloc_hint = ins.objectid + ins.offset; 952 alloc_hint = ins.objectid + ins.offset;
900 start += cur_alloc_size; 953 start += cur_alloc_size;
901 } 954 }
902out:
903 ret = 0; 955 ret = 0;
956out:
904 btrfs_end_transaction(trans, root); 957 btrfs_end_transaction(trans, root);
905 958
906 return ret; 959 return ret;
960out_unlock:
961 extent_clear_unlock_delalloc(inode,
962 &BTRFS_I(inode)->io_tree,
963 start, end, NULL,
964 EXTENT_CLEAR_UNLOCK_PAGE |
965 EXTENT_CLEAR_UNLOCK |
966 EXTENT_CLEAR_DELALLOC |
967 EXTENT_CLEAR_DIRTY |
968 EXTENT_SET_WRITEBACK |
969 EXTENT_END_WRITEBACK);
970
971 goto out;
907} 972}
908 973
909/* 974/*
@@ -969,7 +1034,7 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
969 1, 0, NULL, GFP_NOFS); 1034 1, 0, NULL, GFP_NOFS);
970 while (start < end) { 1035 while (start < end) {
971 async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS); 1036 async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS);
972 BUG_ON(!async_cow); 1037 BUG_ON(!async_cow); /* -ENOMEM */
973 async_cow->inode = inode; 1038 async_cow->inode = inode;
974 async_cow->root = root; 1039 async_cow->root = root;
975 async_cow->locked_page = locked_page; 1040 async_cow->locked_page = locked_page;
@@ -1060,7 +1125,7 @@ static noinline int run_delalloc_nocow(struct inode *inode,
1060 u64 disk_bytenr; 1125 u64 disk_bytenr;
1061 u64 num_bytes; 1126 u64 num_bytes;
1062 int extent_type; 1127 int extent_type;
1063 int ret; 1128 int ret, err;
1064 int type; 1129 int type;
1065 int nocow; 1130 int nocow;
1066 int check_prev = 1; 1131 int check_prev = 1;
@@ -1078,7 +1143,11 @@ static noinline int run_delalloc_nocow(struct inode *inode,
1078 else 1143 else
1079 trans = btrfs_join_transaction(root); 1144 trans = btrfs_join_transaction(root);
1080 1145
1081 BUG_ON(IS_ERR(trans)); 1146 if (IS_ERR(trans)) {
1147 btrfs_free_path(path);
1148 return PTR_ERR(trans);
1149 }
1150
1082 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 1151 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
1083 1152
1084 cow_start = (u64)-1; 1153 cow_start = (u64)-1;
@@ -1086,7 +1155,10 @@ static noinline int run_delalloc_nocow(struct inode *inode,
1086 while (1) { 1155 while (1) {
1087 ret = btrfs_lookup_file_extent(trans, root, path, ino, 1156 ret = btrfs_lookup_file_extent(trans, root, path, ino,
1088 cur_offset, 0); 1157 cur_offset, 0);
1089 BUG_ON(ret < 0); 1158 if (ret < 0) {
1159 btrfs_abort_transaction(trans, root, ret);
1160 goto error;
1161 }
1090 if (ret > 0 && path->slots[0] > 0 && check_prev) { 1162 if (ret > 0 && path->slots[0] > 0 && check_prev) {
1091 leaf = path->nodes[0]; 1163 leaf = path->nodes[0];
1092 btrfs_item_key_to_cpu(leaf, &found_key, 1164 btrfs_item_key_to_cpu(leaf, &found_key,
@@ -1100,8 +1172,10 @@ next_slot:
1100 leaf = path->nodes[0]; 1172 leaf = path->nodes[0];
1101 if (path->slots[0] >= btrfs_header_nritems(leaf)) { 1173 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
1102 ret = btrfs_next_leaf(root, path); 1174 ret = btrfs_next_leaf(root, path);
1103 if (ret < 0) 1175 if (ret < 0) {
1104 BUG_ON(1); 1176 btrfs_abort_transaction(trans, root, ret);
1177 goto error;
1178 }
1105 if (ret > 0) 1179 if (ret > 0)
1106 break; 1180 break;
1107 leaf = path->nodes[0]; 1181 leaf = path->nodes[0];
@@ -1189,7 +1263,10 @@ out_check:
1189 ret = cow_file_range(inode, locked_page, cow_start, 1263 ret = cow_file_range(inode, locked_page, cow_start,
1190 found_key.offset - 1, page_started, 1264 found_key.offset - 1, page_started,
1191 nr_written, 1); 1265 nr_written, 1);
1192 BUG_ON(ret); 1266 if (ret) {
1267 btrfs_abort_transaction(trans, root, ret);
1268 goto error;
1269 }
1193 cow_start = (u64)-1; 1270 cow_start = (u64)-1;
1194 } 1271 }
1195 1272
@@ -1198,7 +1275,7 @@ out_check:
1198 struct extent_map_tree *em_tree; 1275 struct extent_map_tree *em_tree;
1199 em_tree = &BTRFS_I(inode)->extent_tree; 1276 em_tree = &BTRFS_I(inode)->extent_tree;
1200 em = alloc_extent_map(); 1277 em = alloc_extent_map();
1201 BUG_ON(!em); 1278 BUG_ON(!em); /* -ENOMEM */
1202 em->start = cur_offset; 1279 em->start = cur_offset;
1203 em->orig_start = em->start; 1280 em->orig_start = em->start;
1204 em->len = num_bytes; 1281 em->len = num_bytes;
@@ -1224,13 +1301,16 @@ out_check:
1224 1301
1225 ret = btrfs_add_ordered_extent(inode, cur_offset, disk_bytenr, 1302 ret = btrfs_add_ordered_extent(inode, cur_offset, disk_bytenr,
1226 num_bytes, num_bytes, type); 1303 num_bytes, num_bytes, type);
1227 BUG_ON(ret); 1304 BUG_ON(ret); /* -ENOMEM */
1228 1305
1229 if (root->root_key.objectid == 1306 if (root->root_key.objectid ==
1230 BTRFS_DATA_RELOC_TREE_OBJECTID) { 1307 BTRFS_DATA_RELOC_TREE_OBJECTID) {
1231 ret = btrfs_reloc_clone_csums(inode, cur_offset, 1308 ret = btrfs_reloc_clone_csums(inode, cur_offset,
1232 num_bytes); 1309 num_bytes);
1233 BUG_ON(ret); 1310 if (ret) {
1311 btrfs_abort_transaction(trans, root, ret);
1312 goto error;
1313 }
1234 } 1314 }
1235 1315
1236 extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, 1316 extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,
@@ -1249,18 +1329,23 @@ out_check:
1249 if (cow_start != (u64)-1) { 1329 if (cow_start != (u64)-1) {
1250 ret = cow_file_range(inode, locked_page, cow_start, end, 1330 ret = cow_file_range(inode, locked_page, cow_start, end,
1251 page_started, nr_written, 1); 1331 page_started, nr_written, 1);
1252 BUG_ON(ret); 1332 if (ret) {
1333 btrfs_abort_transaction(trans, root, ret);
1334 goto error;
1335 }
1253 } 1336 }
1254 1337
1338error:
1255 if (nolock) { 1339 if (nolock) {
1256 ret = btrfs_end_transaction_nolock(trans, root); 1340 err = btrfs_end_transaction_nolock(trans, root);
1257 BUG_ON(ret);
1258 } else { 1341 } else {
1259 ret = btrfs_end_transaction(trans, root); 1342 err = btrfs_end_transaction(trans, root);
1260 BUG_ON(ret);
1261 } 1343 }
1344 if (!ret)
1345 ret = err;
1346
1262 btrfs_free_path(path); 1347 btrfs_free_path(path);
1263 return 0; 1348 return ret;
1264} 1349}
1265 1350
1266/* 1351/*
@@ -1425,10 +1510,11 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
1425 map_length = length; 1510 map_length = length;
1426 ret = btrfs_map_block(map_tree, READ, logical, 1511 ret = btrfs_map_block(map_tree, READ, logical,
1427 &map_length, NULL, 0); 1512 &map_length, NULL, 0);
1428 1513 /* Will always return 0 or 1 with map_multi == NULL */
1514 BUG_ON(ret < 0);
1429 if (map_length < length + size) 1515 if (map_length < length + size)
1430 return 1; 1516 return 1;
1431 return ret; 1517 return 0;
1432} 1518}
1433 1519
1434/* 1520/*
@@ -1448,7 +1534,7 @@ static int __btrfs_submit_bio_start(struct inode *inode, int rw,
1448 int ret = 0; 1534 int ret = 0;
1449 1535
1450 ret = btrfs_csum_one_bio(root, inode, bio, 0, 0); 1536 ret = btrfs_csum_one_bio(root, inode, bio, 0, 0);
1451 BUG_ON(ret); 1537 BUG_ON(ret); /* -ENOMEM */
1452 return 0; 1538 return 0;
1453} 1539}
1454 1540
@@ -1479,14 +1565,16 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
1479 struct btrfs_root *root = BTRFS_I(inode)->root; 1565 struct btrfs_root *root = BTRFS_I(inode)->root;
1480 int ret = 0; 1566 int ret = 0;
1481 int skip_sum; 1567 int skip_sum;
1568 int metadata = 0;
1482 1569
1483 skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; 1570 skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
1484 1571
1485 if (btrfs_is_free_space_inode(root, inode)) 1572 if (btrfs_is_free_space_inode(root, inode))
1486 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 2); 1573 metadata = 2;
1487 else 1574
1488 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); 1575 ret = btrfs_bio_wq_end_io(root->fs_info, bio, metadata);
1489 BUG_ON(ret); 1576 if (ret)
1577 return ret;
1490 1578
1491 if (!(rw & REQ_WRITE)) { 1579 if (!(rw & REQ_WRITE)) {
1492 if (bio_flags & EXTENT_BIO_COMPRESSED) { 1580 if (bio_flags & EXTENT_BIO_COMPRESSED) {
@@ -1571,7 +1659,7 @@ again:
1571 page_end = page_offset(page) + PAGE_CACHE_SIZE - 1; 1659 page_end = page_offset(page) + PAGE_CACHE_SIZE - 1;
1572 1660
1573 lock_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end, 0, 1661 lock_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end, 0,
1574 &cached_state, GFP_NOFS); 1662 &cached_state);
1575 1663
1576 /* already ordered? We're done */ 1664 /* already ordered? We're done */
1577 if (PagePrivate2(page)) 1665 if (PagePrivate2(page))
@@ -1675,13 +1763,15 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
1675 */ 1763 */
1676 ret = btrfs_drop_extents(trans, inode, file_pos, file_pos + num_bytes, 1764 ret = btrfs_drop_extents(trans, inode, file_pos, file_pos + num_bytes,
1677 &hint, 0); 1765 &hint, 0);
1678 BUG_ON(ret); 1766 if (ret)
1767 goto out;
1679 1768
1680 ins.objectid = btrfs_ino(inode); 1769 ins.objectid = btrfs_ino(inode);
1681 ins.offset = file_pos; 1770 ins.offset = file_pos;
1682 ins.type = BTRFS_EXTENT_DATA_KEY; 1771 ins.type = BTRFS_EXTENT_DATA_KEY;
1683 ret = btrfs_insert_empty_item(trans, root, path, &ins, sizeof(*fi)); 1772 ret = btrfs_insert_empty_item(trans, root, path, &ins, sizeof(*fi));
1684 BUG_ON(ret); 1773 if (ret)
1774 goto out;
1685 leaf = path->nodes[0]; 1775 leaf = path->nodes[0];
1686 fi = btrfs_item_ptr(leaf, path->slots[0], 1776 fi = btrfs_item_ptr(leaf, path->slots[0],
1687 struct btrfs_file_extent_item); 1777 struct btrfs_file_extent_item);
@@ -1709,10 +1799,10 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
1709 ret = btrfs_alloc_reserved_file_extent(trans, root, 1799 ret = btrfs_alloc_reserved_file_extent(trans, root,
1710 root->root_key.objectid, 1800 root->root_key.objectid,
1711 btrfs_ino(inode), file_pos, &ins); 1801 btrfs_ino(inode), file_pos, &ins);
1712 BUG_ON(ret); 1802out:
1713 btrfs_free_path(path); 1803 btrfs_free_path(path);
1714 1804
1715 return 0; 1805 return ret;
1716} 1806}
1717 1807
1718/* 1808/*
@@ -1740,35 +1830,41 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1740 end - start + 1); 1830 end - start + 1);
1741 if (!ret) 1831 if (!ret)
1742 return 0; 1832 return 0;
1743 BUG_ON(!ordered_extent); 1833 BUG_ON(!ordered_extent); /* Logic error */
1744 1834
1745 nolock = btrfs_is_free_space_inode(root, inode); 1835 nolock = btrfs_is_free_space_inode(root, inode);
1746 1836
1747 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) { 1837 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
1748 BUG_ON(!list_empty(&ordered_extent->list)); 1838 BUG_ON(!list_empty(&ordered_extent->list)); /* Logic error */
1749 ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); 1839 ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
1750 if (!ret) { 1840 if (!ret) {
1751 if (nolock) 1841 if (nolock)
1752 trans = btrfs_join_transaction_nolock(root); 1842 trans = btrfs_join_transaction_nolock(root);
1753 else 1843 else
1754 trans = btrfs_join_transaction(root); 1844 trans = btrfs_join_transaction(root);
1755 BUG_ON(IS_ERR(trans)); 1845 if (IS_ERR(trans))
1846 return PTR_ERR(trans);
1756 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 1847 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
1757 ret = btrfs_update_inode_fallback(trans, root, inode); 1848 ret = btrfs_update_inode_fallback(trans, root, inode);
1758 BUG_ON(ret); 1849 if (ret) /* -ENOMEM or corruption */
1850 btrfs_abort_transaction(trans, root, ret);
1759 } 1851 }
1760 goto out; 1852 goto out;
1761 } 1853 }
1762 1854
1763 lock_extent_bits(io_tree, ordered_extent->file_offset, 1855 lock_extent_bits(io_tree, ordered_extent->file_offset,
1764 ordered_extent->file_offset + ordered_extent->len - 1, 1856 ordered_extent->file_offset + ordered_extent->len - 1,
1765 0, &cached_state, GFP_NOFS); 1857 0, &cached_state);
1766 1858
1767 if (nolock) 1859 if (nolock)
1768 trans = btrfs_join_transaction_nolock(root); 1860 trans = btrfs_join_transaction_nolock(root);
1769 else 1861 else
1770 trans = btrfs_join_transaction(root); 1862 trans = btrfs_join_transaction(root);
1771 BUG_ON(IS_ERR(trans)); 1863 if (IS_ERR(trans)) {
1864 ret = PTR_ERR(trans);
1865 trans = NULL;
1866 goto out_unlock;
1867 }
1772 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 1868 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
1773 1869
1774 if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) 1870 if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags))
@@ -1779,7 +1875,6 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1779 ordered_extent->file_offset, 1875 ordered_extent->file_offset,
1780 ordered_extent->file_offset + 1876 ordered_extent->file_offset +
1781 ordered_extent->len); 1877 ordered_extent->len);
1782 BUG_ON(ret);
1783 } else { 1878 } else {
1784 BUG_ON(root == root->fs_info->tree_root); 1879 BUG_ON(root == root->fs_info->tree_root);
1785 ret = insert_reserved_file_extent(trans, inode, 1880 ret = insert_reserved_file_extent(trans, inode,
@@ -1793,11 +1888,14 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1793 unpin_extent_cache(&BTRFS_I(inode)->extent_tree, 1888 unpin_extent_cache(&BTRFS_I(inode)->extent_tree,
1794 ordered_extent->file_offset, 1889 ordered_extent->file_offset,
1795 ordered_extent->len); 1890 ordered_extent->len);
1796 BUG_ON(ret);
1797 } 1891 }
1798 unlock_extent_cached(io_tree, ordered_extent->file_offset, 1892 unlock_extent_cached(io_tree, ordered_extent->file_offset,
1799 ordered_extent->file_offset + 1893 ordered_extent->file_offset +
1800 ordered_extent->len - 1, &cached_state, GFP_NOFS); 1894 ordered_extent->len - 1, &cached_state, GFP_NOFS);
1895 if (ret < 0) {
1896 btrfs_abort_transaction(trans, root, ret);
1897 goto out;
1898 }
1801 1899
1802 add_pending_csums(trans, inode, ordered_extent->file_offset, 1900 add_pending_csums(trans, inode, ordered_extent->file_offset,
1803 &ordered_extent->list); 1901 &ordered_extent->list);
@@ -1805,7 +1903,10 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1805 ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); 1903 ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
1806 if (!ret || !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) { 1904 if (!ret || !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) {
1807 ret = btrfs_update_inode_fallback(trans, root, inode); 1905 ret = btrfs_update_inode_fallback(trans, root, inode);
1808 BUG_ON(ret); 1906 if (ret) { /* -ENOMEM or corruption */
1907 btrfs_abort_transaction(trans, root, ret);
1908 goto out;
1909 }
1809 } 1910 }
1810 ret = 0; 1911 ret = 0;
1811out: 1912out:
@@ -1824,6 +1925,11 @@ out:
1824 btrfs_put_ordered_extent(ordered_extent); 1925 btrfs_put_ordered_extent(ordered_extent);
1825 1926
1826 return 0; 1927 return 0;
1928out_unlock:
1929 unlock_extent_cached(io_tree, ordered_extent->file_offset,
1930 ordered_extent->file_offset +
1931 ordered_extent->len - 1, &cached_state, GFP_NOFS);
1932 goto out;
1827} 1933}
1828 1934
1829static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, 1935static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
@@ -1905,6 +2011,8 @@ struct delayed_iput {
1905 struct inode *inode; 2011 struct inode *inode;
1906}; 2012};
1907 2013
2014/* JDM: If this is fs-wide, why can't we add a pointer to
2015 * btrfs_inode instead and avoid the allocation? */
1908void btrfs_add_delayed_iput(struct inode *inode) 2016void btrfs_add_delayed_iput(struct inode *inode)
1909{ 2017{
1910 struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; 2018 struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
@@ -2051,20 +2159,27 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
2051 /* grab metadata reservation from transaction handle */ 2159 /* grab metadata reservation from transaction handle */
2052 if (reserve) { 2160 if (reserve) {
2053 ret = btrfs_orphan_reserve_metadata(trans, inode); 2161 ret = btrfs_orphan_reserve_metadata(trans, inode);
2054 BUG_ON(ret); 2162 BUG_ON(ret); /* -ENOSPC in reservation; Logic error? JDM */
2055 } 2163 }
2056 2164
2057 /* insert an orphan item to track this unlinked/truncated file */ 2165 /* insert an orphan item to track this unlinked/truncated file */
2058 if (insert >= 1) { 2166 if (insert >= 1) {
2059 ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode)); 2167 ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode));
2060 BUG_ON(ret && ret != -EEXIST); 2168 if (ret && ret != -EEXIST) {
2169 btrfs_abort_transaction(trans, root, ret);
2170 return ret;
2171 }
2172 ret = 0;
2061 } 2173 }
2062 2174
2063 /* insert an orphan item to track subvolume contains orphan files */ 2175 /* insert an orphan item to track subvolume contains orphan files */
2064 if (insert >= 2) { 2176 if (insert >= 2) {
2065 ret = btrfs_insert_orphan_item(trans, root->fs_info->tree_root, 2177 ret = btrfs_insert_orphan_item(trans, root->fs_info->tree_root,
2066 root->root_key.objectid); 2178 root->root_key.objectid);
2067 BUG_ON(ret); 2179 if (ret && ret != -EEXIST) {
2180 btrfs_abort_transaction(trans, root, ret);
2181 return ret;
2182 }
2068 } 2183 }
2069 return 0; 2184 return 0;
2070} 2185}
@@ -2094,7 +2209,7 @@ int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode)
2094 2209
2095 if (trans && delete_item) { 2210 if (trans && delete_item) {
2096 ret = btrfs_del_orphan_item(trans, root, btrfs_ino(inode)); 2211 ret = btrfs_del_orphan_item(trans, root, btrfs_ino(inode));
2097 BUG_ON(ret); 2212 BUG_ON(ret); /* -ENOMEM or corruption (JDM: Recheck) */
2098 } 2213 }
2099 2214
2100 if (release_rsv) 2215 if (release_rsv)
@@ -2228,7 +2343,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
2228 } 2343 }
2229 ret = btrfs_del_orphan_item(trans, root, 2344 ret = btrfs_del_orphan_item(trans, root,
2230 found_key.objectid); 2345 found_key.objectid);
2231 BUG_ON(ret); 2346 BUG_ON(ret); /* -ENOMEM or corruption (JDM: Recheck) */
2232 btrfs_end_transaction(trans, root); 2347 btrfs_end_transaction(trans, root);
2233 continue; 2348 continue;
2234 } 2349 }
@@ -2610,16 +2725,22 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
2610 printk(KERN_INFO "btrfs failed to delete reference to %.*s, " 2725 printk(KERN_INFO "btrfs failed to delete reference to %.*s, "
2611 "inode %llu parent %llu\n", name_len, name, 2726 "inode %llu parent %llu\n", name_len, name,
2612 (unsigned long long)ino, (unsigned long long)dir_ino); 2727 (unsigned long long)ino, (unsigned long long)dir_ino);
2728 btrfs_abort_transaction(trans, root, ret);
2613 goto err; 2729 goto err;
2614 } 2730 }
2615 2731
2616 ret = btrfs_delete_delayed_dir_index(trans, root, dir, index); 2732 ret = btrfs_delete_delayed_dir_index(trans, root, dir, index);
2617 if (ret) 2733 if (ret) {
2734 btrfs_abort_transaction(trans, root, ret);
2618 goto err; 2735 goto err;
2736 }
2619 2737
2620 ret = btrfs_del_inode_ref_in_log(trans, root, name, name_len, 2738 ret = btrfs_del_inode_ref_in_log(trans, root, name, name_len,
2621 inode, dir_ino); 2739 inode, dir_ino);
2622 BUG_ON(ret != 0 && ret != -ENOENT); 2740 if (ret != 0 && ret != -ENOENT) {
2741 btrfs_abort_transaction(trans, root, ret);
2742 goto err;
2743 }
2623 2744
2624 ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len, 2745 ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len,
2625 dir, index); 2746 dir, index);
@@ -2777,7 +2898,7 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
2777 err = ret; 2898 err = ret;
2778 goto out; 2899 goto out;
2779 } 2900 }
2780 BUG_ON(ret == 0); 2901 BUG_ON(ret == 0); /* Corruption */
2781 if (check_path_shared(root, path)) 2902 if (check_path_shared(root, path))
2782 goto out; 2903 goto out;
2783 btrfs_release_path(path); 2904 btrfs_release_path(path);
@@ -2810,7 +2931,7 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
2810 err = PTR_ERR(ref); 2931 err = PTR_ERR(ref);
2811 goto out; 2932 goto out;
2812 } 2933 }
2813 BUG_ON(!ref); 2934 BUG_ON(!ref); /* Logic error */
2814 if (check_path_shared(root, path)) 2935 if (check_path_shared(root, path))
2815 goto out; 2936 goto out;
2816 index = btrfs_inode_ref_index(path->nodes[0], ref); 2937 index = btrfs_inode_ref_index(path->nodes[0], ref);
@@ -2917,23 +3038,42 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
2917 3038
2918 di = btrfs_lookup_dir_item(trans, root, path, dir_ino, 3039 di = btrfs_lookup_dir_item(trans, root, path, dir_ino,
2919 name, name_len, -1); 3040 name, name_len, -1);
2920 BUG_ON(IS_ERR_OR_NULL(di)); 3041 if (IS_ERR_OR_NULL(di)) {
3042 if (!di)
3043 ret = -ENOENT;
3044 else
3045 ret = PTR_ERR(di);
3046 goto out;
3047 }
2921 3048
2922 leaf = path->nodes[0]; 3049 leaf = path->nodes[0];
2923 btrfs_dir_item_key_to_cpu(leaf, di, &key); 3050 btrfs_dir_item_key_to_cpu(leaf, di, &key);
2924 WARN_ON(key.type != BTRFS_ROOT_ITEM_KEY || key.objectid != objectid); 3051 WARN_ON(key.type != BTRFS_ROOT_ITEM_KEY || key.objectid != objectid);
2925 ret = btrfs_delete_one_dir_name(trans, root, path, di); 3052 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2926 BUG_ON(ret); 3053 if (ret) {
3054 btrfs_abort_transaction(trans, root, ret);
3055 goto out;
3056 }
2927 btrfs_release_path(path); 3057 btrfs_release_path(path);
2928 3058
2929 ret = btrfs_del_root_ref(trans, root->fs_info->tree_root, 3059 ret = btrfs_del_root_ref(trans, root->fs_info->tree_root,
2930 objectid, root->root_key.objectid, 3060 objectid, root->root_key.objectid,
2931 dir_ino, &index, name, name_len); 3061 dir_ino, &index, name, name_len);
2932 if (ret < 0) { 3062 if (ret < 0) {
2933 BUG_ON(ret != -ENOENT); 3063 if (ret != -ENOENT) {
3064 btrfs_abort_transaction(trans, root, ret);
3065 goto out;
3066 }
2934 di = btrfs_search_dir_index_item(root, path, dir_ino, 3067 di = btrfs_search_dir_index_item(root, path, dir_ino,
2935 name, name_len); 3068 name, name_len);
2936 BUG_ON(IS_ERR_OR_NULL(di)); 3069 if (IS_ERR_OR_NULL(di)) {
3070 if (!di)
3071 ret = -ENOENT;
3072 else
3073 ret = PTR_ERR(di);
3074 btrfs_abort_transaction(trans, root, ret);
3075 goto out;
3076 }
2937 3077
2938 leaf = path->nodes[0]; 3078 leaf = path->nodes[0];
2939 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 3079 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
@@ -2943,15 +3083,19 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
2943 btrfs_release_path(path); 3083 btrfs_release_path(path);
2944 3084
2945 ret = btrfs_delete_delayed_dir_index(trans, root, dir, index); 3085 ret = btrfs_delete_delayed_dir_index(trans, root, dir, index);
2946 BUG_ON(ret); 3086 if (ret) {
3087 btrfs_abort_transaction(trans, root, ret);
3088 goto out;
3089 }
2947 3090
2948 btrfs_i_size_write(dir, dir->i_size - name_len * 2); 3091 btrfs_i_size_write(dir, dir->i_size - name_len * 2);
2949 dir->i_mtime = dir->i_ctime = CURRENT_TIME; 3092 dir->i_mtime = dir->i_ctime = CURRENT_TIME;
2950 ret = btrfs_update_inode(trans, root, dir); 3093 ret = btrfs_update_inode(trans, root, dir);
2951 BUG_ON(ret); 3094 if (ret)
2952 3095 btrfs_abort_transaction(trans, root, ret);
3096out:
2953 btrfs_free_path(path); 3097 btrfs_free_path(path);
2954 return 0; 3098 return ret;
2955} 3099}
2956 3100
2957static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) 3101static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
@@ -3161,8 +3305,8 @@ search_again:
3161 } 3305 }
3162 size = 3306 size =
3163 btrfs_file_extent_calc_inline_size(size); 3307 btrfs_file_extent_calc_inline_size(size);
3164 ret = btrfs_truncate_item(trans, root, path, 3308 btrfs_truncate_item(trans, root, path,
3165 size, 1); 3309 size, 1);
3166 } else if (root->ref_cows) { 3310 } else if (root->ref_cows) {
3167 inode_sub_bytes(inode, item_end + 1 - 3311 inode_sub_bytes(inode, item_end + 1 -
3168 found_key.offset); 3312 found_key.offset);
@@ -3210,7 +3354,11 @@ delete:
3210 ret = btrfs_del_items(trans, root, path, 3354 ret = btrfs_del_items(trans, root, path,
3211 pending_del_slot, 3355 pending_del_slot,
3212 pending_del_nr); 3356 pending_del_nr);
3213 BUG_ON(ret); 3357 if (ret) {
3358 btrfs_abort_transaction(trans,
3359 root, ret);
3360 goto error;
3361 }
3214 pending_del_nr = 0; 3362 pending_del_nr = 0;
3215 } 3363 }
3216 btrfs_release_path(path); 3364 btrfs_release_path(path);
@@ -3223,8 +3371,10 @@ out:
3223 if (pending_del_nr) { 3371 if (pending_del_nr) {
3224 ret = btrfs_del_items(trans, root, path, pending_del_slot, 3372 ret = btrfs_del_items(trans, root, path, pending_del_slot,
3225 pending_del_nr); 3373 pending_del_nr);
3226 BUG_ON(ret); 3374 if (ret)
3375 btrfs_abort_transaction(trans, root, ret);
3227 } 3376 }
3377error:
3228 btrfs_free_path(path); 3378 btrfs_free_path(path);
3229 return err; 3379 return err;
3230} 3380}
@@ -3282,8 +3432,7 @@ again:
3282 } 3432 }
3283 wait_on_page_writeback(page); 3433 wait_on_page_writeback(page);
3284 3434
3285 lock_extent_bits(io_tree, page_start, page_end, 0, &cached_state, 3435 lock_extent_bits(io_tree, page_start, page_end, 0, &cached_state);
3286 GFP_NOFS);
3287 set_page_extent_mapped(page); 3436 set_page_extent_mapped(page);
3288 3437
3289 ordered = btrfs_lookup_ordered_extent(inode, page_start); 3438 ordered = btrfs_lookup_ordered_extent(inode, page_start);
@@ -3359,7 +3508,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
3359 btrfs_wait_ordered_range(inode, hole_start, 3508 btrfs_wait_ordered_range(inode, hole_start,
3360 block_end - hole_start); 3509 block_end - hole_start);
3361 lock_extent_bits(io_tree, hole_start, block_end - 1, 0, 3510 lock_extent_bits(io_tree, hole_start, block_end - 1, 0,
3362 &cached_state, GFP_NOFS); 3511 &cached_state);
3363 ordered = btrfs_lookup_ordered_extent(inode, hole_start); 3512 ordered = btrfs_lookup_ordered_extent(inode, hole_start);
3364 if (!ordered) 3513 if (!ordered)
3365 break; 3514 break;
@@ -3372,7 +3521,10 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
3372 while (1) { 3521 while (1) {
3373 em = btrfs_get_extent(inode, NULL, 0, cur_offset, 3522 em = btrfs_get_extent(inode, NULL, 0, cur_offset,
3374 block_end - cur_offset, 0); 3523 block_end - cur_offset, 0);
3375 BUG_ON(IS_ERR_OR_NULL(em)); 3524 if (IS_ERR(em)) {
3525 err = PTR_ERR(em);
3526 break;
3527 }
3376 last_byte = min(extent_map_end(em), block_end); 3528 last_byte = min(extent_map_end(em), block_end);
3377 last_byte = (last_byte + mask) & ~mask; 3529 last_byte = (last_byte + mask) & ~mask;
3378 if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) { 3530 if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
@@ -3389,7 +3541,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
3389 cur_offset + hole_size, 3541 cur_offset + hole_size,
3390 &hint_byte, 1); 3542 &hint_byte, 1);
3391 if (err) { 3543 if (err) {
3392 btrfs_update_inode(trans, root, inode); 3544 btrfs_abort_transaction(trans, root, err);
3393 btrfs_end_transaction(trans, root); 3545 btrfs_end_transaction(trans, root);
3394 break; 3546 break;
3395 } 3547 }
@@ -3399,7 +3551,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
3399 0, hole_size, 0, hole_size, 3551 0, hole_size, 0, hole_size,
3400 0, 0, 0); 3552 0, 0, 0);
3401 if (err) { 3553 if (err) {
3402 btrfs_update_inode(trans, root, inode); 3554 btrfs_abort_transaction(trans, root, err);
3403 btrfs_end_transaction(trans, root); 3555 btrfs_end_transaction(trans, root);
3404 break; 3556 break;
3405 } 3557 }
@@ -3779,7 +3931,7 @@ static void inode_tree_del(struct inode *inode)
3779 } 3931 }
3780} 3932}
3781 3933
3782int btrfs_invalidate_inodes(struct btrfs_root *root) 3934void btrfs_invalidate_inodes(struct btrfs_root *root)
3783{ 3935{
3784 struct rb_node *node; 3936 struct rb_node *node;
3785 struct rb_node *prev; 3937 struct rb_node *prev;
@@ -3839,7 +3991,6 @@ again:
3839 node = rb_next(node); 3991 node = rb_next(node);
3840 } 3992 }
3841 spin_unlock(&root->inode_lock); 3993 spin_unlock(&root->inode_lock);
3842 return 0;
3843} 3994}
3844 3995
3845static int btrfs_init_locked_inode(struct inode *inode, void *p) 3996static int btrfs_init_locked_inode(struct inode *inode, void *p)
@@ -4581,18 +4732,26 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
4581 parent_ino, index); 4732 parent_ino, index);
4582 } 4733 }
4583 4734
4584 if (ret == 0) { 4735 /* Nothing to clean up yet */
4585 ret = btrfs_insert_dir_item(trans, root, name, name_len, 4736 if (ret)
4586 parent_inode, &key, 4737 return ret;
4587 btrfs_inode_type(inode), index);
4588 if (ret)
4589 goto fail_dir_item;
4590 4738
4591 btrfs_i_size_write(parent_inode, parent_inode->i_size + 4739 ret = btrfs_insert_dir_item(trans, root, name, name_len,
4592 name_len * 2); 4740 parent_inode, &key,
4593 parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME; 4741 btrfs_inode_type(inode), index);
4594 ret = btrfs_update_inode(trans, root, parent_inode); 4742 if (ret == -EEXIST)
4743 goto fail_dir_item;
4744 else if (ret) {
4745 btrfs_abort_transaction(trans, root, ret);
4746 return ret;
4595 } 4747 }
4748
4749 btrfs_i_size_write(parent_inode, parent_inode->i_size +
4750 name_len * 2);
4751 parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME;
4752 ret = btrfs_update_inode(trans, root, parent_inode);
4753 if (ret)
4754 btrfs_abort_transaction(trans, root, ret);
4596 return ret; 4755 return ret;
4597 4756
4598fail_dir_item: 4757fail_dir_item:
@@ -4806,7 +4965,8 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
4806 } else { 4965 } else {
4807 struct dentry *parent = dentry->d_parent; 4966 struct dentry *parent = dentry->d_parent;
4808 err = btrfs_update_inode(trans, root, inode); 4967 err = btrfs_update_inode(trans, root, inode);
4809 BUG_ON(err); 4968 if (err)
4969 goto fail;
4810 d_instantiate(dentry, inode); 4970 d_instantiate(dentry, inode);
4811 btrfs_log_new_name(trans, inode, NULL, parent); 4971 btrfs_log_new_name(trans, inode, NULL, parent);
4812 } 4972 }
@@ -5137,7 +5297,7 @@ again:
5137 ret = uncompress_inline(path, inode, page, 5297 ret = uncompress_inline(path, inode, page,
5138 pg_offset, 5298 pg_offset,
5139 extent_offset, item); 5299 extent_offset, item);
5140 BUG_ON(ret); 5300 BUG_ON(ret); /* -ENOMEM */
5141 } else { 5301 } else {
5142 map = kmap(page); 5302 map = kmap(page);
5143 read_extent_buffer(leaf, map + pg_offset, ptr, 5303 read_extent_buffer(leaf, map + pg_offset, ptr,
@@ -5252,6 +5412,7 @@ out:
5252 free_extent_map(em); 5412 free_extent_map(em);
5253 return ERR_PTR(err); 5413 return ERR_PTR(err);
5254 } 5414 }
5415 BUG_ON(!em); /* Error is always set */
5255 return em; 5416 return em;
5256} 5417}
5257 5418
@@ -5414,7 +5575,7 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
5414 5575
5415 alloc_hint = get_extent_allocation_hint(inode, start, len); 5576 alloc_hint = get_extent_allocation_hint(inode, start, len);
5416 ret = btrfs_reserve_extent(trans, root, len, root->sectorsize, 0, 5577 ret = btrfs_reserve_extent(trans, root, len, root->sectorsize, 0,
5417 alloc_hint, (u64)-1, &ins, 1); 5578 alloc_hint, &ins, 1);
5418 if (ret) { 5579 if (ret) {
5419 em = ERR_PTR(ret); 5580 em = ERR_PTR(ret);
5420 goto out; 5581 goto out;
@@ -5602,7 +5763,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
5602 free_extent_map(em); 5763 free_extent_map(em);
5603 /* DIO will do one hole at a time, so just unlock a sector */ 5764 /* DIO will do one hole at a time, so just unlock a sector */
5604 unlock_extent(&BTRFS_I(inode)->io_tree, start, 5765 unlock_extent(&BTRFS_I(inode)->io_tree, start,
5605 start + root->sectorsize - 1, GFP_NOFS); 5766 start + root->sectorsize - 1);
5606 return 0; 5767 return 0;
5607 } 5768 }
5608 5769
@@ -5743,7 +5904,7 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
5743 } while (bvec <= bvec_end); 5904 } while (bvec <= bvec_end);
5744 5905
5745 unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset, 5906 unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset,
5746 dip->logical_offset + dip->bytes - 1, GFP_NOFS); 5907 dip->logical_offset + dip->bytes - 1);
5747 bio->bi_private = dip->private; 5908 bio->bi_private = dip->private;
5748 5909
5749 kfree(dip->csums); 5910 kfree(dip->csums);
@@ -5794,7 +5955,7 @@ again:
5794 5955
5795 lock_extent_bits(&BTRFS_I(inode)->io_tree, ordered->file_offset, 5956 lock_extent_bits(&BTRFS_I(inode)->io_tree, ordered->file_offset,
5796 ordered->file_offset + ordered->len - 1, 0, 5957 ordered->file_offset + ordered->len - 1, 0,
5797 &cached_state, GFP_NOFS); 5958 &cached_state);
5798 5959
5799 if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags)) { 5960 if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags)) {
5800 ret = btrfs_mark_extent_written(trans, inode, 5961 ret = btrfs_mark_extent_written(trans, inode,
@@ -5868,7 +6029,7 @@ static int __btrfs_submit_bio_start_direct_io(struct inode *inode, int rw,
5868 int ret; 6029 int ret;
5869 struct btrfs_root *root = BTRFS_I(inode)->root; 6030 struct btrfs_root *root = BTRFS_I(inode)->root;
5870 ret = btrfs_csum_one_bio(root, inode, bio, offset, 1); 6031 ret = btrfs_csum_one_bio(root, inode, bio, offset, 1);
5871 BUG_ON(ret); 6032 BUG_ON(ret); /* -ENOMEM */
5872 return 0; 6033 return 0;
5873} 6034}
5874 6035
@@ -6209,7 +6370,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
6209 6370
6210 while (1) { 6371 while (1) {
6211 lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, 6372 lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
6212 0, &cached_state, GFP_NOFS); 6373 0, &cached_state);
6213 /* 6374 /*
6214 * We're concerned with the entire range that we're going to be 6375 * We're concerned with the entire range that we're going to be
6215 * doing DIO to, so we need to make sure theres no ordered 6376 * doing DIO to, so we need to make sure theres no ordered
@@ -6233,7 +6394,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
6233 if (writing) { 6394 if (writing) {
6234 write_bits = EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING; 6395 write_bits = EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING;
6235 ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, 6396 ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
6236 EXTENT_DELALLOC, 0, NULL, &cached_state, 6397 EXTENT_DELALLOC, NULL, &cached_state,
6237 GFP_NOFS); 6398 GFP_NOFS);
6238 if (ret) { 6399 if (ret) {
6239 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, 6400 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
@@ -6363,8 +6524,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
6363 btrfs_releasepage(page, GFP_NOFS); 6524 btrfs_releasepage(page, GFP_NOFS);
6364 return; 6525 return;
6365 } 6526 }
6366 lock_extent_bits(tree, page_start, page_end, 0, &cached_state, 6527 lock_extent_bits(tree, page_start, page_end, 0, &cached_state);
6367 GFP_NOFS);
6368 ordered = btrfs_lookup_ordered_extent(page->mapping->host, 6528 ordered = btrfs_lookup_ordered_extent(page->mapping->host,
6369 page_offset(page)); 6529 page_offset(page));
6370 if (ordered) { 6530 if (ordered) {
@@ -6386,8 +6546,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
6386 } 6546 }
6387 btrfs_put_ordered_extent(ordered); 6547 btrfs_put_ordered_extent(ordered);
6388 cached_state = NULL; 6548 cached_state = NULL;
6389 lock_extent_bits(tree, page_start, page_end, 0, &cached_state, 6549 lock_extent_bits(tree, page_start, page_end, 0, &cached_state);
6390 GFP_NOFS);
6391 } 6550 }
6392 clear_extent_bit(tree, page_start, page_end, 6551 clear_extent_bit(tree, page_start, page_end,
6393 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC | 6552 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC |
@@ -6462,8 +6621,7 @@ again:
6462 } 6621 }
6463 wait_on_page_writeback(page); 6622 wait_on_page_writeback(page);
6464 6623
6465 lock_extent_bits(io_tree, page_start, page_end, 0, &cached_state, 6624 lock_extent_bits(io_tree, page_start, page_end, 0, &cached_state);
6466 GFP_NOFS);
6467 set_page_extent_mapped(page); 6625 set_page_extent_mapped(page);
6468 6626
6469 /* 6627 /*
@@ -6737,10 +6895,9 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
6737 btrfs_i_size_write(inode, 0); 6895 btrfs_i_size_write(inode, 0);
6738 6896
6739 err = btrfs_update_inode(trans, new_root, inode); 6897 err = btrfs_update_inode(trans, new_root, inode);
6740 BUG_ON(err);
6741 6898
6742 iput(inode); 6899 iput(inode);
6743 return 0; 6900 return err;
6744} 6901}
6745 6902
6746struct inode *btrfs_alloc_inode(struct super_block *sb) 6903struct inode *btrfs_alloc_inode(struct super_block *sb)
@@ -6783,6 +6940,8 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
6783 extent_map_tree_init(&ei->extent_tree); 6940 extent_map_tree_init(&ei->extent_tree);
6784 extent_io_tree_init(&ei->io_tree, &inode->i_data); 6941 extent_io_tree_init(&ei->io_tree, &inode->i_data);
6785 extent_io_tree_init(&ei->io_failure_tree, &inode->i_data); 6942 extent_io_tree_init(&ei->io_failure_tree, &inode->i_data);
6943 ei->io_tree.track_uptodate = 1;
6944 ei->io_failure_tree.track_uptodate = 1;
6786 mutex_init(&ei->log_mutex); 6945 mutex_init(&ei->log_mutex);
6787 mutex_init(&ei->delalloc_mutex); 6946 mutex_init(&ei->delalloc_mutex);
6788 btrfs_ordered_inode_tree_init(&ei->ordered_tree); 6947 btrfs_ordered_inode_tree_init(&ei->ordered_tree);
@@ -7072,7 +7231,10 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
7072 if (!ret) 7231 if (!ret)
7073 ret = btrfs_update_inode(trans, root, old_inode); 7232 ret = btrfs_update_inode(trans, root, old_inode);
7074 } 7233 }
7075 BUG_ON(ret); 7234 if (ret) {
7235 btrfs_abort_transaction(trans, root, ret);
7236 goto out_fail;
7237 }
7076 7238
7077 if (new_inode) { 7239 if (new_inode) {
7078 new_inode->i_ctime = CURRENT_TIME; 7240 new_inode->i_ctime = CURRENT_TIME;
@@ -7090,11 +7252,14 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
7090 new_dentry->d_name.name, 7252 new_dentry->d_name.name,
7091 new_dentry->d_name.len); 7253 new_dentry->d_name.len);
7092 } 7254 }
7093 BUG_ON(ret); 7255 if (!ret && new_inode->i_nlink == 0) {
7094 if (new_inode->i_nlink == 0) {
7095 ret = btrfs_orphan_add(trans, new_dentry->d_inode); 7256 ret = btrfs_orphan_add(trans, new_dentry->d_inode);
7096 BUG_ON(ret); 7257 BUG_ON(ret);
7097 } 7258 }
7259 if (ret) {
7260 btrfs_abort_transaction(trans, root, ret);
7261 goto out_fail;
7262 }
7098 } 7263 }
7099 7264
7100 fixup_inode_flags(new_dir, old_inode); 7265 fixup_inode_flags(new_dir, old_inode);
@@ -7102,7 +7267,10 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
7102 ret = btrfs_add_link(trans, new_dir, old_inode, 7267 ret = btrfs_add_link(trans, new_dir, old_inode,
7103 new_dentry->d_name.name, 7268 new_dentry->d_name.name,
7104 new_dentry->d_name.len, 0, index); 7269 new_dentry->d_name.len, 0, index);
7105 BUG_ON(ret); 7270 if (ret) {
7271 btrfs_abort_transaction(trans, root, ret);
7272 goto out_fail;
7273 }
7106 7274
7107 if (old_ino != BTRFS_FIRST_FREE_OBJECTID) { 7275 if (old_ino != BTRFS_FIRST_FREE_OBJECTID) {
7108 struct dentry *parent = new_dentry->d_parent; 7276 struct dentry *parent = new_dentry->d_parent;
@@ -7315,7 +7483,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
7315 } 7483 }
7316 7484
7317 ret = btrfs_reserve_extent(trans, root, num_bytes, min_size, 7485 ret = btrfs_reserve_extent(trans, root, num_bytes, min_size,
7318 0, *alloc_hint, (u64)-1, &ins, 1); 7486 0, *alloc_hint, &ins, 1);
7319 if (ret) { 7487 if (ret) {
7320 if (own_trans) 7488 if (own_trans)
7321 btrfs_end_transaction(trans, root); 7489 btrfs_end_transaction(trans, root);
@@ -7327,7 +7495,12 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
7327 ins.offset, ins.offset, 7495 ins.offset, ins.offset,
7328 ins.offset, 0, 0, 0, 7496 ins.offset, 0, 0, 0,
7329 BTRFS_FILE_EXTENT_PREALLOC); 7497 BTRFS_FILE_EXTENT_PREALLOC);
7330 BUG_ON(ret); 7498 if (ret) {
7499 btrfs_abort_transaction(trans, root, ret);
7500 if (own_trans)
7501 btrfs_end_transaction(trans, root);
7502 break;
7503 }
7331 btrfs_drop_extent_cache(inode, cur_offset, 7504 btrfs_drop_extent_cache(inode, cur_offset,
7332 cur_offset + ins.offset -1, 0); 7505 cur_offset + ins.offset -1, 0);
7333 7506
@@ -7349,7 +7522,13 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
7349 } 7522 }
7350 7523
7351 ret = btrfs_update_inode(trans, root, inode); 7524 ret = btrfs_update_inode(trans, root, inode);
7352 BUG_ON(ret); 7525
7526 if (ret) {
7527 btrfs_abort_transaction(trans, root, ret);
7528 if (own_trans)
7529 btrfs_end_transaction(trans, root);
7530 break;
7531 }
7353 7532
7354 if (own_trans) 7533 if (own_trans)
7355 btrfs_end_transaction(trans, root); 7534 btrfs_end_transaction(trans, root);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index d8b54715c2de..18cc23d164a8 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -425,22 +425,37 @@ static noinline int create_subvol(struct btrfs_root *root,
425 425
426 key.offset = (u64)-1; 426 key.offset = (u64)-1;
427 new_root = btrfs_read_fs_root_no_name(root->fs_info, &key); 427 new_root = btrfs_read_fs_root_no_name(root->fs_info, &key);
428 BUG_ON(IS_ERR(new_root)); 428 if (IS_ERR(new_root)) {
429 btrfs_abort_transaction(trans, root, PTR_ERR(new_root));
430 ret = PTR_ERR(new_root);
431 goto fail;
432 }
429 433
430 btrfs_record_root_in_trans(trans, new_root); 434 btrfs_record_root_in_trans(trans, new_root);
431 435
432 ret = btrfs_create_subvol_root(trans, new_root, new_dirid); 436 ret = btrfs_create_subvol_root(trans, new_root, new_dirid);
437 if (ret) {
438 /* We potentially lose an unused inode item here */
439 btrfs_abort_transaction(trans, root, ret);
440 goto fail;
441 }
442
433 /* 443 /*
434 * insert the directory item 444 * insert the directory item
435 */ 445 */
436 ret = btrfs_set_inode_index(dir, &index); 446 ret = btrfs_set_inode_index(dir, &index);
437 BUG_ON(ret); 447 if (ret) {
448 btrfs_abort_transaction(trans, root, ret);
449 goto fail;
450 }
438 451
439 ret = btrfs_insert_dir_item(trans, root, 452 ret = btrfs_insert_dir_item(trans, root,
440 name, namelen, dir, &key, 453 name, namelen, dir, &key,
441 BTRFS_FT_DIR, index); 454 BTRFS_FT_DIR, index);
442 if (ret) 455 if (ret) {
456 btrfs_abort_transaction(trans, root, ret);
443 goto fail; 457 goto fail;
458 }
444 459
445 btrfs_i_size_write(dir, dir->i_size + namelen * 2); 460 btrfs_i_size_write(dir, dir->i_size + namelen * 2);
446 ret = btrfs_update_inode(trans, root, dir); 461 ret = btrfs_update_inode(trans, root, dir);
@@ -769,6 +784,31 @@ none:
769 return -ENOENT; 784 return -ENOENT;
770} 785}
771 786
787/*
788 * Validaty check of prev em and next em:
789 * 1) no prev/next em
790 * 2) prev/next em is an hole/inline extent
791 */
792static int check_adjacent_extents(struct inode *inode, struct extent_map *em)
793{
794 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
795 struct extent_map *prev = NULL, *next = NULL;
796 int ret = 0;
797
798 read_lock(&em_tree->lock);
799 prev = lookup_extent_mapping(em_tree, em->start - 1, (u64)-1);
800 next = lookup_extent_mapping(em_tree, em->start + em->len, (u64)-1);
801 read_unlock(&em_tree->lock);
802
803 if ((!prev || prev->block_start >= EXTENT_MAP_LAST_BYTE) &&
804 (!next || next->block_start >= EXTENT_MAP_LAST_BYTE))
805 ret = 1;
806 free_extent_map(prev);
807 free_extent_map(next);
808
809 return ret;
810}
811
772static int should_defrag_range(struct inode *inode, u64 start, u64 len, 812static int should_defrag_range(struct inode *inode, u64 start, u64 len,
773 int thresh, u64 *last_len, u64 *skip, 813 int thresh, u64 *last_len, u64 *skip,
774 u64 *defrag_end) 814 u64 *defrag_end)
@@ -797,17 +837,25 @@ static int should_defrag_range(struct inode *inode, u64 start, u64 len,
797 837
798 if (!em) { 838 if (!em) {
799 /* get the big lock and read metadata off disk */ 839 /* get the big lock and read metadata off disk */
800 lock_extent(io_tree, start, start + len - 1, GFP_NOFS); 840 lock_extent(io_tree, start, start + len - 1);
801 em = btrfs_get_extent(inode, NULL, 0, start, len, 0); 841 em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
802 unlock_extent(io_tree, start, start + len - 1, GFP_NOFS); 842 unlock_extent(io_tree, start, start + len - 1);
803 843
804 if (IS_ERR(em)) 844 if (IS_ERR(em))
805 return 0; 845 return 0;
806 } 846 }
807 847
808 /* this will cover holes, and inline extents */ 848 /* this will cover holes, and inline extents */
809 if (em->block_start >= EXTENT_MAP_LAST_BYTE) 849 if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
810 ret = 0; 850 ret = 0;
851 goto out;
852 }
853
854 /* If we have nothing to merge with us, just skip. */
855 if (check_adjacent_extents(inode, em)) {
856 ret = 0;
857 goto out;
858 }
811 859
812 /* 860 /*
813 * we hit a real extent, if it is big don't bother defragging it again 861 * we hit a real extent, if it is big don't bother defragging it again
@@ -815,6 +863,7 @@ static int should_defrag_range(struct inode *inode, u64 start, u64 len,
815 if ((*last_len == 0 || *last_len >= thresh) && em->len >= thresh) 863 if ((*last_len == 0 || *last_len >= thresh) && em->len >= thresh)
816 ret = 0; 864 ret = 0;
817 865
866out:
818 /* 867 /*
819 * last_len ends up being a counter of how many bytes we've defragged. 868 * last_len ends up being a counter of how many bytes we've defragged.
820 * every time we choose not to defrag an extent, we reset *last_len 869 * every time we choose not to defrag an extent, we reset *last_len
@@ -856,6 +905,7 @@ static int cluster_pages_for_defrag(struct inode *inode,
856 u64 isize = i_size_read(inode); 905 u64 isize = i_size_read(inode);
857 u64 page_start; 906 u64 page_start;
858 u64 page_end; 907 u64 page_end;
908 u64 page_cnt;
859 int ret; 909 int ret;
860 int i; 910 int i;
861 int i_done; 911 int i_done;
@@ -864,19 +914,21 @@ static int cluster_pages_for_defrag(struct inode *inode,
864 struct extent_io_tree *tree; 914 struct extent_io_tree *tree;
865 gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); 915 gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
866 916
867 if (isize == 0)
868 return 0;
869 file_end = (isize - 1) >> PAGE_CACHE_SHIFT; 917 file_end = (isize - 1) >> PAGE_CACHE_SHIFT;
918 if (!isize || start_index > file_end)
919 return 0;
920
921 page_cnt = min_t(u64, (u64)num_pages, (u64)file_end - start_index + 1);
870 922
871 ret = btrfs_delalloc_reserve_space(inode, 923 ret = btrfs_delalloc_reserve_space(inode,
872 num_pages << PAGE_CACHE_SHIFT); 924 page_cnt << PAGE_CACHE_SHIFT);
873 if (ret) 925 if (ret)
874 return ret; 926 return ret;
875 i_done = 0; 927 i_done = 0;
876 tree = &BTRFS_I(inode)->io_tree; 928 tree = &BTRFS_I(inode)->io_tree;
877 929
878 /* step one, lock all the pages */ 930 /* step one, lock all the pages */
879 for (i = 0; i < num_pages; i++) { 931 for (i = 0; i < page_cnt; i++) {
880 struct page *page; 932 struct page *page;
881again: 933again:
882 page = find_or_create_page(inode->i_mapping, 934 page = find_or_create_page(inode->i_mapping,
@@ -887,10 +939,10 @@ again:
887 page_start = page_offset(page); 939 page_start = page_offset(page);
888 page_end = page_start + PAGE_CACHE_SIZE - 1; 940 page_end = page_start + PAGE_CACHE_SIZE - 1;
889 while (1) { 941 while (1) {
890 lock_extent(tree, page_start, page_end, GFP_NOFS); 942 lock_extent(tree, page_start, page_end);
891 ordered = btrfs_lookup_ordered_extent(inode, 943 ordered = btrfs_lookup_ordered_extent(inode,
892 page_start); 944 page_start);
893 unlock_extent(tree, page_start, page_end, GFP_NOFS); 945 unlock_extent(tree, page_start, page_end);
894 if (!ordered) 946 if (!ordered)
895 break; 947 break;
896 948
@@ -898,6 +950,15 @@ again:
898 btrfs_start_ordered_extent(inode, ordered, 1); 950 btrfs_start_ordered_extent(inode, ordered, 1);
899 btrfs_put_ordered_extent(ordered); 951 btrfs_put_ordered_extent(ordered);
900 lock_page(page); 952 lock_page(page);
953 /*
954 * we unlocked the page above, so we need check if
955 * it was released or not.
956 */
957 if (page->mapping != inode->i_mapping) {
958 unlock_page(page);
959 page_cache_release(page);
960 goto again;
961 }
901 } 962 }
902 963
903 if (!PageUptodate(page)) { 964 if (!PageUptodate(page)) {
@@ -911,15 +972,6 @@ again:
911 } 972 }
912 } 973 }
913 974
914 isize = i_size_read(inode);
915 file_end = (isize - 1) >> PAGE_CACHE_SHIFT;
916 if (!isize || page->index > file_end) {
917 /* whoops, we blew past eof, skip this page */
918 unlock_page(page);
919 page_cache_release(page);
920 break;
921 }
922
923 if (page->mapping != inode->i_mapping) { 975 if (page->mapping != inode->i_mapping) {
924 unlock_page(page); 976 unlock_page(page);
925 page_cache_release(page); 977 page_cache_release(page);
@@ -946,19 +998,18 @@ again:
946 page_end = page_offset(pages[i_done - 1]) + PAGE_CACHE_SIZE; 998 page_end = page_offset(pages[i_done - 1]) + PAGE_CACHE_SIZE;
947 999
948 lock_extent_bits(&BTRFS_I(inode)->io_tree, 1000 lock_extent_bits(&BTRFS_I(inode)->io_tree,
949 page_start, page_end - 1, 0, &cached_state, 1001 page_start, page_end - 1, 0, &cached_state);
950 GFP_NOFS);
951 clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, 1002 clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start,
952 page_end - 1, EXTENT_DIRTY | EXTENT_DELALLOC | 1003 page_end - 1, EXTENT_DIRTY | EXTENT_DELALLOC |
953 EXTENT_DO_ACCOUNTING, 0, 0, &cached_state, 1004 EXTENT_DO_ACCOUNTING, 0, 0, &cached_state,
954 GFP_NOFS); 1005 GFP_NOFS);
955 1006
956 if (i_done != num_pages) { 1007 if (i_done != page_cnt) {
957 spin_lock(&BTRFS_I(inode)->lock); 1008 spin_lock(&BTRFS_I(inode)->lock);
958 BTRFS_I(inode)->outstanding_extents++; 1009 BTRFS_I(inode)->outstanding_extents++;
959 spin_unlock(&BTRFS_I(inode)->lock); 1010 spin_unlock(&BTRFS_I(inode)->lock);
960 btrfs_delalloc_release_space(inode, 1011 btrfs_delalloc_release_space(inode,
961 (num_pages - i_done) << PAGE_CACHE_SHIFT); 1012 (page_cnt - i_done) << PAGE_CACHE_SHIFT);
962 } 1013 }
963 1014
964 1015
@@ -983,7 +1034,7 @@ out:
983 unlock_page(pages[i]); 1034 unlock_page(pages[i]);
984 page_cache_release(pages[i]); 1035 page_cache_release(pages[i]);
985 } 1036 }
986 btrfs_delalloc_release_space(inode, num_pages << PAGE_CACHE_SHIFT); 1037 btrfs_delalloc_release_space(inode, page_cnt << PAGE_CACHE_SHIFT);
987 return ret; 1038 return ret;
988 1039
989} 1040}
@@ -1089,12 +1140,9 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
1089 if (!(inode->i_sb->s_flags & MS_ACTIVE)) 1140 if (!(inode->i_sb->s_flags & MS_ACTIVE))
1090 break; 1141 break;
1091 1142
1092 if (!newer_than && 1143 if (!should_defrag_range(inode, (u64)i << PAGE_CACHE_SHIFT,
1093 !should_defrag_range(inode, (u64)i << PAGE_CACHE_SHIFT, 1144 PAGE_CACHE_SIZE, extent_thresh,
1094 PAGE_CACHE_SIZE, 1145 &last_len, &skip, &defrag_end)) {
1095 extent_thresh,
1096 &last_len, &skip,
1097 &defrag_end)) {
1098 unsigned long next; 1146 unsigned long next;
1099 /* 1147 /*
1100 * the should_defrag function tells us how much to skip 1148 * the should_defrag function tells us how much to skip
@@ -1123,17 +1171,24 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
1123 ra_index += max_cluster; 1171 ra_index += max_cluster;
1124 } 1172 }
1125 1173
1174 mutex_lock(&inode->i_mutex);
1126 ret = cluster_pages_for_defrag(inode, pages, i, cluster); 1175 ret = cluster_pages_for_defrag(inode, pages, i, cluster);
1127 if (ret < 0) 1176 if (ret < 0) {
1177 mutex_unlock(&inode->i_mutex);
1128 goto out_ra; 1178 goto out_ra;
1179 }
1129 1180
1130 defrag_count += ret; 1181 defrag_count += ret;
1131 balance_dirty_pages_ratelimited_nr(inode->i_mapping, ret); 1182 balance_dirty_pages_ratelimited_nr(inode->i_mapping, ret);
1183 mutex_unlock(&inode->i_mutex);
1132 1184
1133 if (newer_than) { 1185 if (newer_than) {
1134 if (newer_off == (u64)-1) 1186 if (newer_off == (u64)-1)
1135 break; 1187 break;
1136 1188
1189 if (ret > 0)
1190 i += ret;
1191
1137 newer_off = max(newer_off + 1, 1192 newer_off = max(newer_off + 1,
1138 (u64)i << PAGE_CACHE_SHIFT); 1193 (u64)i << PAGE_CACHE_SHIFT);
1139 1194
@@ -1966,7 +2021,11 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
1966 dest->root_key.objectid, 2021 dest->root_key.objectid,
1967 dentry->d_name.name, 2022 dentry->d_name.name,
1968 dentry->d_name.len); 2023 dentry->d_name.len);
1969 BUG_ON(ret); 2024 if (ret) {
2025 err = ret;
2026 btrfs_abort_transaction(trans, root, ret);
2027 goto out_end_trans;
2028 }
1970 2029
1971 btrfs_record_root_in_trans(trans, dest); 2030 btrfs_record_root_in_trans(trans, dest);
1972 2031
@@ -1979,11 +2038,16 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
1979 ret = btrfs_insert_orphan_item(trans, 2038 ret = btrfs_insert_orphan_item(trans,
1980 root->fs_info->tree_root, 2039 root->fs_info->tree_root,
1981 dest->root_key.objectid); 2040 dest->root_key.objectid);
1982 BUG_ON(ret); 2041 if (ret) {
2042 btrfs_abort_transaction(trans, root, ret);
2043 err = ret;
2044 goto out_end_trans;
2045 }
1983 } 2046 }
1984 2047out_end_trans:
1985 ret = btrfs_end_transaction(trans, root); 2048 ret = btrfs_end_transaction(trans, root);
1986 BUG_ON(ret); 2049 if (ret && !err)
2050 err = ret;
1987 inode->i_flags |= S_DEAD; 2051 inode->i_flags |= S_DEAD;
1988out_up_write: 2052out_up_write:
1989 up_write(&root->fs_info->subvol_sem); 2053 up_write(&root->fs_info->subvol_sem);
@@ -2326,13 +2390,13 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
2326 another, and lock file content */ 2390 another, and lock file content */
2327 while (1) { 2391 while (1) {
2328 struct btrfs_ordered_extent *ordered; 2392 struct btrfs_ordered_extent *ordered;
2329 lock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS); 2393 lock_extent(&BTRFS_I(src)->io_tree, off, off+len);
2330 ordered = btrfs_lookup_first_ordered_extent(src, off+len); 2394 ordered = btrfs_lookup_first_ordered_extent(src, off+len);
2331 if (!ordered && 2395 if (!ordered &&
2332 !test_range_bit(&BTRFS_I(src)->io_tree, off, off+len, 2396 !test_range_bit(&BTRFS_I(src)->io_tree, off, off+len,
2333 EXTENT_DELALLOC, 0, NULL)) 2397 EXTENT_DELALLOC, 0, NULL))
2334 break; 2398 break;
2335 unlock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS); 2399 unlock_extent(&BTRFS_I(src)->io_tree, off, off+len);
2336 if (ordered) 2400 if (ordered)
2337 btrfs_put_ordered_extent(ordered); 2401 btrfs_put_ordered_extent(ordered);
2338 btrfs_wait_ordered_range(src, off, len); 2402 btrfs_wait_ordered_range(src, off, len);
@@ -2447,11 +2511,21 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
2447 new_key.offset, 2511 new_key.offset,
2448 new_key.offset + datal, 2512 new_key.offset + datal,
2449 &hint_byte, 1); 2513 &hint_byte, 1);
2450 BUG_ON(ret); 2514 if (ret) {
2515 btrfs_abort_transaction(trans, root,
2516 ret);
2517 btrfs_end_transaction(trans, root);
2518 goto out;
2519 }
2451 2520
2452 ret = btrfs_insert_empty_item(trans, root, path, 2521 ret = btrfs_insert_empty_item(trans, root, path,
2453 &new_key, size); 2522 &new_key, size);
2454 BUG_ON(ret); 2523 if (ret) {
2524 btrfs_abort_transaction(trans, root,
2525 ret);
2526 btrfs_end_transaction(trans, root);
2527 goto out;
2528 }
2455 2529
2456 leaf = path->nodes[0]; 2530 leaf = path->nodes[0];
2457 slot = path->slots[0]; 2531 slot = path->slots[0];
@@ -2478,7 +2552,15 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
2478 btrfs_ino(inode), 2552 btrfs_ino(inode),
2479 new_key.offset - datao, 2553 new_key.offset - datao,
2480 0); 2554 0);
2481 BUG_ON(ret); 2555 if (ret) {
2556 btrfs_abort_transaction(trans,
2557 root,
2558 ret);
2559 btrfs_end_transaction(trans,
2560 root);
2561 goto out;
2562
2563 }
2482 } 2564 }
2483 } else if (type == BTRFS_FILE_EXTENT_INLINE) { 2565 } else if (type == BTRFS_FILE_EXTENT_INLINE) {
2484 u64 skip = 0; 2566 u64 skip = 0;
@@ -2503,11 +2585,21 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
2503 new_key.offset, 2585 new_key.offset,
2504 new_key.offset + datal, 2586 new_key.offset + datal,
2505 &hint_byte, 1); 2587 &hint_byte, 1);
2506 BUG_ON(ret); 2588 if (ret) {
2589 btrfs_abort_transaction(trans, root,
2590 ret);
2591 btrfs_end_transaction(trans, root);
2592 goto out;
2593 }
2507 2594
2508 ret = btrfs_insert_empty_item(trans, root, path, 2595 ret = btrfs_insert_empty_item(trans, root, path,
2509 &new_key, size); 2596 &new_key, size);
2510 BUG_ON(ret); 2597 if (ret) {
2598 btrfs_abort_transaction(trans, root,
2599 ret);
2600 btrfs_end_transaction(trans, root);
2601 goto out;
2602 }
2511 2603
2512 if (skip) { 2604 if (skip) {
2513 u32 start = 2605 u32 start =
@@ -2541,8 +2633,12 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
2541 btrfs_i_size_write(inode, endoff); 2633 btrfs_i_size_write(inode, endoff);
2542 2634
2543 ret = btrfs_update_inode(trans, root, inode); 2635 ret = btrfs_update_inode(trans, root, inode);
2544 BUG_ON(ret); 2636 if (ret) {
2545 btrfs_end_transaction(trans, root); 2637 btrfs_abort_transaction(trans, root, ret);
2638 btrfs_end_transaction(trans, root);
2639 goto out;
2640 }
2641 ret = btrfs_end_transaction(trans, root);
2546 } 2642 }
2547next: 2643next:
2548 btrfs_release_path(path); 2644 btrfs_release_path(path);
@@ -2551,7 +2647,7 @@ next:
2551 ret = 0; 2647 ret = 0;
2552out: 2648out:
2553 btrfs_release_path(path); 2649 btrfs_release_path(path);
2554 unlock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS); 2650 unlock_extent(&BTRFS_I(src)->io_tree, off, off+len);
2555out_unlock: 2651out_unlock:
2556 mutex_unlock(&src->i_mutex); 2652 mutex_unlock(&src->i_mutex);
2557 mutex_unlock(&inode->i_mutex); 2653 mutex_unlock(&inode->i_mutex);
@@ -3066,8 +3162,8 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_root *root,
3066 goto out; 3162 goto out;
3067 3163
3068 extent_item_pos = loi->logical - key.objectid; 3164 extent_item_pos = loi->logical - key.objectid;
3069 ret = iterate_extent_inodes(root->fs_info, path, key.objectid, 3165 ret = iterate_extent_inodes(root->fs_info, key.objectid,
3070 extent_item_pos, build_ino_list, 3166 extent_item_pos, 0, build_ino_list,
3071 inodes); 3167 inodes);
3072 3168
3073 if (ret < 0) 3169 if (ret < 0)
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c
index 5e178d8f7167..272f911203ff 100644
--- a/fs/btrfs/locking.c
+++ b/fs/btrfs/locking.c
@@ -208,7 +208,7 @@ void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb)
208 * take a spinning write lock. This will wait for both 208 * take a spinning write lock. This will wait for both
209 * blocking readers or writers 209 * blocking readers or writers
210 */ 210 */
211int btrfs_tree_lock(struct extent_buffer *eb) 211void btrfs_tree_lock(struct extent_buffer *eb)
212{ 212{
213again: 213again:
214 wait_event(eb->read_lock_wq, atomic_read(&eb->blocking_readers) == 0); 214 wait_event(eb->read_lock_wq, atomic_read(&eb->blocking_readers) == 0);
@@ -230,13 +230,12 @@ again:
230 atomic_inc(&eb->spinning_writers); 230 atomic_inc(&eb->spinning_writers);
231 atomic_inc(&eb->write_locks); 231 atomic_inc(&eb->write_locks);
232 eb->lock_owner = current->pid; 232 eb->lock_owner = current->pid;
233 return 0;
234} 233}
235 234
236/* 235/*
237 * drop a spinning or a blocking write lock. 236 * drop a spinning or a blocking write lock.
238 */ 237 */
239int btrfs_tree_unlock(struct extent_buffer *eb) 238void btrfs_tree_unlock(struct extent_buffer *eb)
240{ 239{
241 int blockers = atomic_read(&eb->blocking_writers); 240 int blockers = atomic_read(&eb->blocking_writers);
242 241
@@ -255,7 +254,6 @@ int btrfs_tree_unlock(struct extent_buffer *eb)
255 atomic_dec(&eb->spinning_writers); 254 atomic_dec(&eb->spinning_writers);
256 write_unlock(&eb->lock); 255 write_unlock(&eb->lock);
257 } 256 }
258 return 0;
259} 257}
260 258
261void btrfs_assert_tree_locked(struct extent_buffer *eb) 259void btrfs_assert_tree_locked(struct extent_buffer *eb)
diff --git a/fs/btrfs/locking.h b/fs/btrfs/locking.h
index 17247ddb81a0..ca52681e5f40 100644
--- a/fs/btrfs/locking.h
+++ b/fs/btrfs/locking.h
@@ -24,8 +24,8 @@
24#define BTRFS_WRITE_LOCK_BLOCKING 3 24#define BTRFS_WRITE_LOCK_BLOCKING 3
25#define BTRFS_READ_LOCK_BLOCKING 4 25#define BTRFS_READ_LOCK_BLOCKING 4
26 26
27int btrfs_tree_lock(struct extent_buffer *eb); 27void btrfs_tree_lock(struct extent_buffer *eb);
28int btrfs_tree_unlock(struct extent_buffer *eb); 28void btrfs_tree_unlock(struct extent_buffer *eb);
29int btrfs_try_spin_lock(struct extent_buffer *eb); 29int btrfs_try_spin_lock(struct extent_buffer *eb);
30 30
31void btrfs_tree_read_lock(struct extent_buffer *eb); 31void btrfs_tree_read_lock(struct extent_buffer *eb);
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index a1c940425307..bbf6d0d9aebe 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -59,6 +59,14 @@ static struct rb_node *tree_insert(struct rb_root *root, u64 file_offset,
59 return NULL; 59 return NULL;
60} 60}
61 61
62static void ordered_data_tree_panic(struct inode *inode, int errno,
63 u64 offset)
64{
65 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
66 btrfs_panic(fs_info, errno, "Inconsistency in ordered tree at offset "
67 "%llu\n", (unsigned long long)offset);
68}
69
62/* 70/*
63 * look for a given offset in the tree, and if it can't be found return the 71 * look for a given offset in the tree, and if it can't be found return the
64 * first lesser offset 72 * first lesser offset
@@ -207,7 +215,8 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
207 spin_lock(&tree->lock); 215 spin_lock(&tree->lock);
208 node = tree_insert(&tree->tree, file_offset, 216 node = tree_insert(&tree->tree, file_offset,
209 &entry->rb_node); 217 &entry->rb_node);
210 BUG_ON(node); 218 if (node)
219 ordered_data_tree_panic(inode, -EEXIST, file_offset);
211 spin_unlock(&tree->lock); 220 spin_unlock(&tree->lock);
212 221
213 spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); 222 spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);
@@ -215,7 +224,6 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
215 &BTRFS_I(inode)->root->fs_info->ordered_extents); 224 &BTRFS_I(inode)->root->fs_info->ordered_extents);
216 spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); 225 spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);
217 226
218 BUG_ON(node);
219 return 0; 227 return 0;
220} 228}
221 229
@@ -249,9 +257,9 @@ int btrfs_add_ordered_extent_compress(struct inode *inode, u64 file_offset,
249 * when an ordered extent is finished. If the list covers more than one 257 * when an ordered extent is finished. If the list covers more than one
250 * ordered extent, it is split across multiples. 258 * ordered extent, it is split across multiples.
251 */ 259 */
252int btrfs_add_ordered_sum(struct inode *inode, 260void btrfs_add_ordered_sum(struct inode *inode,
253 struct btrfs_ordered_extent *entry, 261 struct btrfs_ordered_extent *entry,
254 struct btrfs_ordered_sum *sum) 262 struct btrfs_ordered_sum *sum)
255{ 263{
256 struct btrfs_ordered_inode_tree *tree; 264 struct btrfs_ordered_inode_tree *tree;
257 265
@@ -259,7 +267,6 @@ int btrfs_add_ordered_sum(struct inode *inode,
259 spin_lock(&tree->lock); 267 spin_lock(&tree->lock);
260 list_add_tail(&sum->list, &entry->list); 268 list_add_tail(&sum->list, &entry->list);
261 spin_unlock(&tree->lock); 269 spin_unlock(&tree->lock);
262 return 0;
263} 270}
264 271
265/* 272/*
@@ -384,7 +391,7 @@ out:
384 * used to drop a reference on an ordered extent. This will free 391 * used to drop a reference on an ordered extent. This will free
385 * the extent if the last reference is dropped 392 * the extent if the last reference is dropped
386 */ 393 */
387int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry) 394void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
388{ 395{
389 struct list_head *cur; 396 struct list_head *cur;
390 struct btrfs_ordered_sum *sum; 397 struct btrfs_ordered_sum *sum;
@@ -400,7 +407,6 @@ int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
400 } 407 }
401 kfree(entry); 408 kfree(entry);
402 } 409 }
403 return 0;
404} 410}
405 411
406/* 412/*
@@ -408,8 +414,8 @@ int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
408 * and you must wake_up entry->wait. You must hold the tree lock 414 * and you must wake_up entry->wait. You must hold the tree lock
409 * while you call this function. 415 * while you call this function.
410 */ 416 */
411static int __btrfs_remove_ordered_extent(struct inode *inode, 417static void __btrfs_remove_ordered_extent(struct inode *inode,
412 struct btrfs_ordered_extent *entry) 418 struct btrfs_ordered_extent *entry)
413{ 419{
414 struct btrfs_ordered_inode_tree *tree; 420 struct btrfs_ordered_inode_tree *tree;
415 struct btrfs_root *root = BTRFS_I(inode)->root; 421 struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -436,35 +442,30 @@ static int __btrfs_remove_ordered_extent(struct inode *inode,
436 list_del_init(&BTRFS_I(inode)->ordered_operations); 442 list_del_init(&BTRFS_I(inode)->ordered_operations);
437 } 443 }
438 spin_unlock(&root->fs_info->ordered_extent_lock); 444 spin_unlock(&root->fs_info->ordered_extent_lock);
439
440 return 0;
441} 445}
442 446
443/* 447/*
444 * remove an ordered extent from the tree. No references are dropped 448 * remove an ordered extent from the tree. No references are dropped
445 * but any waiters are woken. 449 * but any waiters are woken.
446 */ 450 */
447int btrfs_remove_ordered_extent(struct inode *inode, 451void btrfs_remove_ordered_extent(struct inode *inode,
448 struct btrfs_ordered_extent *entry) 452 struct btrfs_ordered_extent *entry)
449{ 453{
450 struct btrfs_ordered_inode_tree *tree; 454 struct btrfs_ordered_inode_tree *tree;
451 int ret;
452 455
453 tree = &BTRFS_I(inode)->ordered_tree; 456 tree = &BTRFS_I(inode)->ordered_tree;
454 spin_lock(&tree->lock); 457 spin_lock(&tree->lock);
455 ret = __btrfs_remove_ordered_extent(inode, entry); 458 __btrfs_remove_ordered_extent(inode, entry);
456 spin_unlock(&tree->lock); 459 spin_unlock(&tree->lock);
457 wake_up(&entry->wait); 460 wake_up(&entry->wait);
458
459 return ret;
460} 461}
461 462
462/* 463/*
463 * wait for all the ordered extents in a root. This is done when balancing 464 * wait for all the ordered extents in a root. This is done when balancing
464 * space between drives. 465 * space between drives.
465 */ 466 */
466int btrfs_wait_ordered_extents(struct btrfs_root *root, 467void btrfs_wait_ordered_extents(struct btrfs_root *root,
467 int nocow_only, int delay_iput) 468 int nocow_only, int delay_iput)
468{ 469{
469 struct list_head splice; 470 struct list_head splice;
470 struct list_head *cur; 471 struct list_head *cur;
@@ -512,7 +513,6 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root,
512 spin_lock(&root->fs_info->ordered_extent_lock); 513 spin_lock(&root->fs_info->ordered_extent_lock);
513 } 514 }
514 spin_unlock(&root->fs_info->ordered_extent_lock); 515 spin_unlock(&root->fs_info->ordered_extent_lock);
515 return 0;
516} 516}
517 517
518/* 518/*
@@ -525,7 +525,7 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root,
525 * extra check to make sure the ordered operation list really is empty 525 * extra check to make sure the ordered operation list really is empty
526 * before we return 526 * before we return
527 */ 527 */
528int btrfs_run_ordered_operations(struct btrfs_root *root, int wait) 528void btrfs_run_ordered_operations(struct btrfs_root *root, int wait)
529{ 529{
530 struct btrfs_inode *btrfs_inode; 530 struct btrfs_inode *btrfs_inode;
531 struct inode *inode; 531 struct inode *inode;
@@ -573,8 +573,6 @@ again:
573 573
574 spin_unlock(&root->fs_info->ordered_extent_lock); 574 spin_unlock(&root->fs_info->ordered_extent_lock);
575 mutex_unlock(&root->fs_info->ordered_operations_mutex); 575 mutex_unlock(&root->fs_info->ordered_operations_mutex);
576
577 return 0;
578} 576}
579 577
580/* 578/*
@@ -609,7 +607,7 @@ void btrfs_start_ordered_extent(struct inode *inode,
609/* 607/*
610 * Used to wait on ordered extents across a large range of bytes. 608 * Used to wait on ordered extents across a large range of bytes.
611 */ 609 */
612int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) 610void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
613{ 611{
614 u64 end; 612 u64 end;
615 u64 orig_end; 613 u64 orig_end;
@@ -664,7 +662,6 @@ again:
664 schedule_timeout(1); 662 schedule_timeout(1);
665 goto again; 663 goto again;
666 } 664 }
667 return 0;
668} 665}
669 666
670/* 667/*
@@ -948,9 +945,8 @@ out:
948 * If trans is not null, we'll do a friendly check for a transaction that 945 * If trans is not null, we'll do a friendly check for a transaction that
949 * is already flushing things and force the IO down ourselves. 946 * is already flushing things and force the IO down ourselves.
950 */ 947 */
951int btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, 948void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
952 struct btrfs_root *root, 949 struct btrfs_root *root, struct inode *inode)
953 struct inode *inode)
954{ 950{
955 u64 last_mod; 951 u64 last_mod;
956 952
@@ -961,7 +957,7 @@ int btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
961 * commit, we can safely return without doing anything 957 * commit, we can safely return without doing anything
962 */ 958 */
963 if (last_mod < root->fs_info->last_trans_committed) 959 if (last_mod < root->fs_info->last_trans_committed)
964 return 0; 960 return;
965 961
966 /* 962 /*
967 * the transaction is already committing. Just start the IO and 963 * the transaction is already committing. Just start the IO and
@@ -969,7 +965,7 @@ int btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
969 */ 965 */
970 if (trans && root->fs_info->running_transaction->blocked) { 966 if (trans && root->fs_info->running_transaction->blocked) {
971 btrfs_wait_ordered_range(inode, 0, (u64)-1); 967 btrfs_wait_ordered_range(inode, 0, (u64)-1);
972 return 0; 968 return;
973 } 969 }
974 970
975 spin_lock(&root->fs_info->ordered_extent_lock); 971 spin_lock(&root->fs_info->ordered_extent_lock);
@@ -978,6 +974,4 @@ int btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
978 &root->fs_info->ordered_operations); 974 &root->fs_info->ordered_operations);
979 } 975 }
980 spin_unlock(&root->fs_info->ordered_extent_lock); 976 spin_unlock(&root->fs_info->ordered_extent_lock);
981
982 return 0;
983} 977}
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index ff1f69aa1883..c355ad4dc1a6 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -138,8 +138,8 @@ btrfs_ordered_inode_tree_init(struct btrfs_ordered_inode_tree *t)
138 t->last = NULL; 138 t->last = NULL;
139} 139}
140 140
141int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry); 141void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry);
142int btrfs_remove_ordered_extent(struct inode *inode, 142void btrfs_remove_ordered_extent(struct inode *inode,
143 struct btrfs_ordered_extent *entry); 143 struct btrfs_ordered_extent *entry);
144int btrfs_dec_test_ordered_pending(struct inode *inode, 144int btrfs_dec_test_ordered_pending(struct inode *inode,
145 struct btrfs_ordered_extent **cached, 145 struct btrfs_ordered_extent **cached,
@@ -154,14 +154,14 @@ int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset,
154int btrfs_add_ordered_extent_compress(struct inode *inode, u64 file_offset, 154int btrfs_add_ordered_extent_compress(struct inode *inode, u64 file_offset,
155 u64 start, u64 len, u64 disk_len, 155 u64 start, u64 len, u64 disk_len,
156 int type, int compress_type); 156 int type, int compress_type);
157int btrfs_add_ordered_sum(struct inode *inode, 157void btrfs_add_ordered_sum(struct inode *inode,
158 struct btrfs_ordered_extent *entry, 158 struct btrfs_ordered_extent *entry,
159 struct btrfs_ordered_sum *sum); 159 struct btrfs_ordered_sum *sum);
160struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode, 160struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode,
161 u64 file_offset); 161 u64 file_offset);
162void btrfs_start_ordered_extent(struct inode *inode, 162void btrfs_start_ordered_extent(struct inode *inode,
163 struct btrfs_ordered_extent *entry, int wait); 163 struct btrfs_ordered_extent *entry, int wait);
164int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len); 164void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len);
165struct btrfs_ordered_extent * 165struct btrfs_ordered_extent *
166btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset); 166btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset);
167struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode, 167struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode,
@@ -170,10 +170,10 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode,
170int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, 170int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
171 struct btrfs_ordered_extent *ordered); 171 struct btrfs_ordered_extent *ordered);
172int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum); 172int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum);
173int btrfs_run_ordered_operations(struct btrfs_root *root, int wait); 173void btrfs_run_ordered_operations(struct btrfs_root *root, int wait);
174int btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, 174void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
175 struct btrfs_root *root, 175 struct btrfs_root *root,
176 struct inode *inode); 176 struct inode *inode);
177int btrfs_wait_ordered_extents(struct btrfs_root *root, 177void btrfs_wait_ordered_extents(struct btrfs_root *root,
178 int nocow_only, int delay_iput); 178 int nocow_only, int delay_iput);
179#endif 179#endif
diff --git a/fs/btrfs/orphan.c b/fs/btrfs/orphan.c
index f8be250963a0..24cad1695af7 100644
--- a/fs/btrfs/orphan.c
+++ b/fs/btrfs/orphan.c
@@ -58,7 +58,7 @@ int btrfs_del_orphan_item(struct btrfs_trans_handle *trans,
58 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 58 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
59 if (ret < 0) 59 if (ret < 0)
60 goto out; 60 goto out;
61 if (ret) { 61 if (ret) { /* JDM: Really? */
62 ret = -ENOENT; 62 ret = -ENOENT;
63 goto out; 63 goto out;
64 } 64 }
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
index 22db04550f6a..dc5d33146fdb 100644
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c
@@ -54,7 +54,6 @@
54 * than the 2 started one after another. 54 * than the 2 started one after another.
55 */ 55 */
56 56
57#define MAX_MIRRORS 2
58#define MAX_IN_FLIGHT 6 57#define MAX_IN_FLIGHT 6
59 58
60struct reada_extctl { 59struct reada_extctl {
@@ -71,7 +70,7 @@ struct reada_extent {
71 struct list_head extctl; 70 struct list_head extctl;
72 struct kref refcnt; 71 struct kref refcnt;
73 spinlock_t lock; 72 spinlock_t lock;
74 struct reada_zone *zones[MAX_MIRRORS]; 73 struct reada_zone *zones[BTRFS_MAX_MIRRORS];
75 int nzones; 74 int nzones;
76 struct btrfs_device *scheduled_for; 75 struct btrfs_device *scheduled_for;
77}; 76};
@@ -84,7 +83,8 @@ struct reada_zone {
84 spinlock_t lock; 83 spinlock_t lock;
85 int locked; 84 int locked;
86 struct btrfs_device *device; 85 struct btrfs_device *device;
87 struct btrfs_device *devs[MAX_MIRRORS]; /* full list, incl self */ 86 struct btrfs_device *devs[BTRFS_MAX_MIRRORS]; /* full list, incl
87 * self */
88 int ndevs; 88 int ndevs;
89 struct kref refcnt; 89 struct kref refcnt;
90}; 90};
@@ -365,9 +365,9 @@ again:
365 if (ret || !bbio || length < blocksize) 365 if (ret || !bbio || length < blocksize)
366 goto error; 366 goto error;
367 367
368 if (bbio->num_stripes > MAX_MIRRORS) { 368 if (bbio->num_stripes > BTRFS_MAX_MIRRORS) {
369 printk(KERN_ERR "btrfs readahead: more than %d copies not " 369 printk(KERN_ERR "btrfs readahead: more than %d copies not "
370 "supported", MAX_MIRRORS); 370 "supported", BTRFS_MAX_MIRRORS);
371 goto error; 371 goto error;
372 } 372 }
373 373
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 8c1aae2c845d..017281dbb2a7 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -326,6 +326,19 @@ static struct rb_node *tree_search(struct rb_root *root, u64 bytenr)
326 return NULL; 326 return NULL;
327} 327}
328 328
329void backref_tree_panic(struct rb_node *rb_node, int errno,
330 u64 bytenr)
331{
332
333 struct btrfs_fs_info *fs_info = NULL;
334 struct backref_node *bnode = rb_entry(rb_node, struct backref_node,
335 rb_node);
336 if (bnode->root)
337 fs_info = bnode->root->fs_info;
338 btrfs_panic(fs_info, errno, "Inconsistency in backref cache "
339 "found at offset %llu\n", (unsigned long long)bytenr);
340}
341
329/* 342/*
330 * walk up backref nodes until reach node presents tree root 343 * walk up backref nodes until reach node presents tree root
331 */ 344 */
@@ -452,7 +465,8 @@ static void update_backref_node(struct backref_cache *cache,
452 rb_erase(&node->rb_node, &cache->rb_root); 465 rb_erase(&node->rb_node, &cache->rb_root);
453 node->bytenr = bytenr; 466 node->bytenr = bytenr;
454 rb_node = tree_insert(&cache->rb_root, node->bytenr, &node->rb_node); 467 rb_node = tree_insert(&cache->rb_root, node->bytenr, &node->rb_node);
455 BUG_ON(rb_node); 468 if (rb_node)
469 backref_tree_panic(rb_node, -EEXIST, bytenr);
456} 470}
457 471
458/* 472/*
@@ -999,7 +1013,8 @@ next:
999 if (!cowonly) { 1013 if (!cowonly) {
1000 rb_node = tree_insert(&cache->rb_root, node->bytenr, 1014 rb_node = tree_insert(&cache->rb_root, node->bytenr,
1001 &node->rb_node); 1015 &node->rb_node);
1002 BUG_ON(rb_node); 1016 if (rb_node)
1017 backref_tree_panic(rb_node, -EEXIST, node->bytenr);
1003 list_add_tail(&node->lower, &cache->leaves); 1018 list_add_tail(&node->lower, &cache->leaves);
1004 } 1019 }
1005 1020
@@ -1034,7 +1049,9 @@ next:
1034 if (!cowonly) { 1049 if (!cowonly) {
1035 rb_node = tree_insert(&cache->rb_root, upper->bytenr, 1050 rb_node = tree_insert(&cache->rb_root, upper->bytenr,
1036 &upper->rb_node); 1051 &upper->rb_node);
1037 BUG_ON(rb_node); 1052 if (rb_node)
1053 backref_tree_panic(rb_node, -EEXIST,
1054 upper->bytenr);
1038 } 1055 }
1039 1056
1040 list_add_tail(&edge->list[UPPER], &upper->lower); 1057 list_add_tail(&edge->list[UPPER], &upper->lower);
@@ -1180,7 +1197,8 @@ static int clone_backref_node(struct btrfs_trans_handle *trans,
1180 1197
1181 rb_node = tree_insert(&cache->rb_root, new_node->bytenr, 1198 rb_node = tree_insert(&cache->rb_root, new_node->bytenr,
1182 &new_node->rb_node); 1199 &new_node->rb_node);
1183 BUG_ON(rb_node); 1200 if (rb_node)
1201 backref_tree_panic(rb_node, -EEXIST, new_node->bytenr);
1184 1202
1185 if (!new_node->lowest) { 1203 if (!new_node->lowest) {
1186 list_for_each_entry(new_edge, &new_node->lower, list[UPPER]) { 1204 list_for_each_entry(new_edge, &new_node->lower, list[UPPER]) {
@@ -1203,14 +1221,15 @@ fail:
1203/* 1221/*
1204 * helper to add 'address of tree root -> reloc tree' mapping 1222 * helper to add 'address of tree root -> reloc tree' mapping
1205 */ 1223 */
1206static int __add_reloc_root(struct btrfs_root *root) 1224static int __must_check __add_reloc_root(struct btrfs_root *root)
1207{ 1225{
1208 struct rb_node *rb_node; 1226 struct rb_node *rb_node;
1209 struct mapping_node *node; 1227 struct mapping_node *node;
1210 struct reloc_control *rc = root->fs_info->reloc_ctl; 1228 struct reloc_control *rc = root->fs_info->reloc_ctl;
1211 1229
1212 node = kmalloc(sizeof(*node), GFP_NOFS); 1230 node = kmalloc(sizeof(*node), GFP_NOFS);
1213 BUG_ON(!node); 1231 if (!node)
1232 return -ENOMEM;
1214 1233
1215 node->bytenr = root->node->start; 1234 node->bytenr = root->node->start;
1216 node->data = root; 1235 node->data = root;
@@ -1219,7 +1238,12 @@ static int __add_reloc_root(struct btrfs_root *root)
1219 rb_node = tree_insert(&rc->reloc_root_tree.rb_root, 1238 rb_node = tree_insert(&rc->reloc_root_tree.rb_root,
1220 node->bytenr, &node->rb_node); 1239 node->bytenr, &node->rb_node);
1221 spin_unlock(&rc->reloc_root_tree.lock); 1240 spin_unlock(&rc->reloc_root_tree.lock);
1222 BUG_ON(rb_node); 1241 if (rb_node) {
1242 kfree(node);
1243 btrfs_panic(root->fs_info, -EEXIST, "Duplicate root found "
1244 "for start=%llu while inserting into relocation "
1245 "tree\n");
1246 }
1223 1247
1224 list_add_tail(&root->root_list, &rc->reloc_roots); 1248 list_add_tail(&root->root_list, &rc->reloc_roots);
1225 return 0; 1249 return 0;
@@ -1252,7 +1276,8 @@ static int __update_reloc_root(struct btrfs_root *root, int del)
1252 rb_node = tree_insert(&rc->reloc_root_tree.rb_root, 1276 rb_node = tree_insert(&rc->reloc_root_tree.rb_root,
1253 node->bytenr, &node->rb_node); 1277 node->bytenr, &node->rb_node);
1254 spin_unlock(&rc->reloc_root_tree.lock); 1278 spin_unlock(&rc->reloc_root_tree.lock);
1255 BUG_ON(rb_node); 1279 if (rb_node)
1280 backref_tree_panic(rb_node, -EEXIST, node->bytenr);
1256 } else { 1281 } else {
1257 list_del_init(&root->root_list); 1282 list_del_init(&root->root_list);
1258 kfree(node); 1283 kfree(node);
@@ -1334,6 +1359,7 @@ int btrfs_init_reloc_root(struct btrfs_trans_handle *trans,
1334 struct btrfs_root *reloc_root; 1359 struct btrfs_root *reloc_root;
1335 struct reloc_control *rc = root->fs_info->reloc_ctl; 1360 struct reloc_control *rc = root->fs_info->reloc_ctl;
1336 int clear_rsv = 0; 1361 int clear_rsv = 0;
1362 int ret;
1337 1363
1338 if (root->reloc_root) { 1364 if (root->reloc_root) {
1339 reloc_root = root->reloc_root; 1365 reloc_root = root->reloc_root;
@@ -1353,7 +1379,8 @@ int btrfs_init_reloc_root(struct btrfs_trans_handle *trans,
1353 if (clear_rsv) 1379 if (clear_rsv)
1354 trans->block_rsv = NULL; 1380 trans->block_rsv = NULL;
1355 1381
1356 __add_reloc_root(reloc_root); 1382 ret = __add_reloc_root(reloc_root);
1383 BUG_ON(ret < 0);
1357 root->reloc_root = reloc_root; 1384 root->reloc_root = reloc_root;
1358 return 0; 1385 return 0;
1359} 1386}
@@ -1577,15 +1604,14 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
1577 WARN_ON(!IS_ALIGNED(end, root->sectorsize)); 1604 WARN_ON(!IS_ALIGNED(end, root->sectorsize));
1578 end--; 1605 end--;
1579 ret = try_lock_extent(&BTRFS_I(inode)->io_tree, 1606 ret = try_lock_extent(&BTRFS_I(inode)->io_tree,
1580 key.offset, end, 1607 key.offset, end);
1581 GFP_NOFS);
1582 if (!ret) 1608 if (!ret)
1583 continue; 1609 continue;
1584 1610
1585 btrfs_drop_extent_cache(inode, key.offset, end, 1611 btrfs_drop_extent_cache(inode, key.offset, end,
1586 1); 1612 1);
1587 unlock_extent(&BTRFS_I(inode)->io_tree, 1613 unlock_extent(&BTRFS_I(inode)->io_tree,
1588 key.offset, end, GFP_NOFS); 1614 key.offset, end);
1589 } 1615 }
1590 } 1616 }
1591 1617
@@ -1956,9 +1982,9 @@ static int invalidate_extent_cache(struct btrfs_root *root,
1956 } 1982 }
1957 1983
1958 /* the lock_extent waits for readpage to complete */ 1984 /* the lock_extent waits for readpage to complete */
1959 lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); 1985 lock_extent(&BTRFS_I(inode)->io_tree, start, end);
1960 btrfs_drop_extent_cache(inode, start, end, 1); 1986 btrfs_drop_extent_cache(inode, start, end, 1);
1961 unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); 1987 unlock_extent(&BTRFS_I(inode)->io_tree, start, end);
1962 } 1988 }
1963 return 0; 1989 return 0;
1964} 1990}
@@ -2246,7 +2272,8 @@ again:
2246 } else { 2272 } else {
2247 list_del_init(&reloc_root->root_list); 2273 list_del_init(&reloc_root->root_list);
2248 } 2274 }
2249 btrfs_drop_snapshot(reloc_root, rc->block_rsv, 0, 1); 2275 ret = btrfs_drop_snapshot(reloc_root, rc->block_rsv, 0, 1);
2276 BUG_ON(ret < 0);
2250 } 2277 }
2251 2278
2252 if (found) { 2279 if (found) {
@@ -2862,12 +2889,12 @@ int prealloc_file_extent_cluster(struct inode *inode,
2862 else 2889 else
2863 end = cluster->end - offset; 2890 end = cluster->end - offset;
2864 2891
2865 lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); 2892 lock_extent(&BTRFS_I(inode)->io_tree, start, end);
2866 num_bytes = end + 1 - start; 2893 num_bytes = end + 1 - start;
2867 ret = btrfs_prealloc_file_range(inode, 0, start, 2894 ret = btrfs_prealloc_file_range(inode, 0, start,
2868 num_bytes, num_bytes, 2895 num_bytes, num_bytes,
2869 end + 1, &alloc_hint); 2896 end + 1, &alloc_hint);
2870 unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); 2897 unlock_extent(&BTRFS_I(inode)->io_tree, start, end);
2871 if (ret) 2898 if (ret)
2872 break; 2899 break;
2873 nr++; 2900 nr++;
@@ -2899,7 +2926,7 @@ int setup_extent_mapping(struct inode *inode, u64 start, u64 end,
2899 em->bdev = root->fs_info->fs_devices->latest_bdev; 2926 em->bdev = root->fs_info->fs_devices->latest_bdev;
2900 set_bit(EXTENT_FLAG_PINNED, &em->flags); 2927 set_bit(EXTENT_FLAG_PINNED, &em->flags);
2901 2928
2902 lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); 2929 lock_extent(&BTRFS_I(inode)->io_tree, start, end);
2903 while (1) { 2930 while (1) {
2904 write_lock(&em_tree->lock); 2931 write_lock(&em_tree->lock);
2905 ret = add_extent_mapping(em_tree, em); 2932 ret = add_extent_mapping(em_tree, em);
@@ -2910,7 +2937,7 @@ int setup_extent_mapping(struct inode *inode, u64 start, u64 end,
2910 } 2937 }
2911 btrfs_drop_extent_cache(inode, start, end, 0); 2938 btrfs_drop_extent_cache(inode, start, end, 0);
2912 } 2939 }
2913 unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); 2940 unlock_extent(&BTRFS_I(inode)->io_tree, start, end);
2914 return ret; 2941 return ret;
2915} 2942}
2916 2943
@@ -2990,8 +3017,7 @@ static int relocate_file_extent_cluster(struct inode *inode,
2990 page_start = (u64)page->index << PAGE_CACHE_SHIFT; 3017 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
2991 page_end = page_start + PAGE_CACHE_SIZE - 1; 3018 page_end = page_start + PAGE_CACHE_SIZE - 1;
2992 3019
2993 lock_extent(&BTRFS_I(inode)->io_tree, 3020 lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end);
2994 page_start, page_end, GFP_NOFS);
2995 3021
2996 set_page_extent_mapped(page); 3022 set_page_extent_mapped(page);
2997 3023
@@ -3007,7 +3033,7 @@ static int relocate_file_extent_cluster(struct inode *inode,
3007 set_page_dirty(page); 3033 set_page_dirty(page);
3008 3034
3009 unlock_extent(&BTRFS_I(inode)->io_tree, 3035 unlock_extent(&BTRFS_I(inode)->io_tree,
3010 page_start, page_end, GFP_NOFS); 3036 page_start, page_end);
3011 unlock_page(page); 3037 unlock_page(page);
3012 page_cache_release(page); 3038 page_cache_release(page);
3013 3039
@@ -3154,7 +3180,8 @@ static int add_tree_block(struct reloc_control *rc,
3154 block->key_ready = 0; 3180 block->key_ready = 0;
3155 3181
3156 rb_node = tree_insert(blocks, block->bytenr, &block->rb_node); 3182 rb_node = tree_insert(blocks, block->bytenr, &block->rb_node);
3157 BUG_ON(rb_node); 3183 if (rb_node)
3184 backref_tree_panic(rb_node, -EEXIST, block->bytenr);
3158 3185
3159 return 0; 3186 return 0;
3160} 3187}
@@ -3426,7 +3453,9 @@ static int find_data_references(struct reloc_control *rc,
3426 block->key_ready = 1; 3453 block->key_ready = 1;
3427 rb_node = tree_insert(blocks, block->bytenr, 3454 rb_node = tree_insert(blocks, block->bytenr,
3428 &block->rb_node); 3455 &block->rb_node);
3429 BUG_ON(rb_node); 3456 if (rb_node)
3457 backref_tree_panic(rb_node, -EEXIST,
3458 block->bytenr);
3430 } 3459 }
3431 if (counted) 3460 if (counted)
3432 added = 1; 3461 added = 1;
@@ -4073,10 +4102,11 @@ out:
4073static noinline_for_stack int mark_garbage_root(struct btrfs_root *root) 4102static noinline_for_stack int mark_garbage_root(struct btrfs_root *root)
4074{ 4103{
4075 struct btrfs_trans_handle *trans; 4104 struct btrfs_trans_handle *trans;
4076 int ret; 4105 int ret, err;
4077 4106
4078 trans = btrfs_start_transaction(root->fs_info->tree_root, 0); 4107 trans = btrfs_start_transaction(root->fs_info->tree_root, 0);
4079 BUG_ON(IS_ERR(trans)); 4108 if (IS_ERR(trans))
4109 return PTR_ERR(trans);
4080 4110
4081 memset(&root->root_item.drop_progress, 0, 4111 memset(&root->root_item.drop_progress, 0,
4082 sizeof(root->root_item.drop_progress)); 4112 sizeof(root->root_item.drop_progress));
@@ -4084,11 +4114,11 @@ static noinline_for_stack int mark_garbage_root(struct btrfs_root *root)
4084 btrfs_set_root_refs(&root->root_item, 0); 4114 btrfs_set_root_refs(&root->root_item, 0);
4085 ret = btrfs_update_root(trans, root->fs_info->tree_root, 4115 ret = btrfs_update_root(trans, root->fs_info->tree_root,
4086 &root->root_key, &root->root_item); 4116 &root->root_key, &root->root_item);
4087 BUG_ON(ret);
4088 4117
4089 ret = btrfs_end_transaction(trans, root->fs_info->tree_root); 4118 err = btrfs_end_transaction(trans, root->fs_info->tree_root);
4090 BUG_ON(ret); 4119 if (err)
4091 return 0; 4120 return err;
4121 return ret;
4092} 4122}
4093 4123
4094/* 4124/*
@@ -4156,7 +4186,11 @@ int btrfs_recover_relocation(struct btrfs_root *root)
4156 err = ret; 4186 err = ret;
4157 goto out; 4187 goto out;
4158 } 4188 }
4159 mark_garbage_root(reloc_root); 4189 ret = mark_garbage_root(reloc_root);
4190 if (ret < 0) {
4191 err = ret;
4192 goto out;
4193 }
4160 } 4194 }
4161 } 4195 }
4162 4196
@@ -4202,13 +4236,19 @@ int btrfs_recover_relocation(struct btrfs_root *root)
4202 4236
4203 fs_root = read_fs_root(root->fs_info, 4237 fs_root = read_fs_root(root->fs_info,
4204 reloc_root->root_key.offset); 4238 reloc_root->root_key.offset);
4205 BUG_ON(IS_ERR(fs_root)); 4239 if (IS_ERR(fs_root)) {
4240 err = PTR_ERR(fs_root);
4241 goto out_free;
4242 }
4206 4243
4207 __add_reloc_root(reloc_root); 4244 err = __add_reloc_root(reloc_root);
4245 BUG_ON(err < 0); /* -ENOMEM or logic error */
4208 fs_root->reloc_root = reloc_root; 4246 fs_root->reloc_root = reloc_root;
4209 } 4247 }
4210 4248
4211 btrfs_commit_transaction(trans, rc->extent_root); 4249 err = btrfs_commit_transaction(trans, rc->extent_root);
4250 if (err)
4251 goto out_free;
4212 4252
4213 merge_reloc_roots(rc); 4253 merge_reloc_roots(rc);
4214 4254
@@ -4218,7 +4258,7 @@ int btrfs_recover_relocation(struct btrfs_root *root)
4218 if (IS_ERR(trans)) 4258 if (IS_ERR(trans))
4219 err = PTR_ERR(trans); 4259 err = PTR_ERR(trans);
4220 else 4260 else
4221 btrfs_commit_transaction(trans, rc->extent_root); 4261 err = btrfs_commit_transaction(trans, rc->extent_root);
4222out_free: 4262out_free:
4223 kfree(rc); 4263 kfree(rc);
4224out: 4264out:
@@ -4267,6 +4307,8 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)
4267 disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt; 4307 disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt;
4268 ret = btrfs_lookup_csums_range(root->fs_info->csum_root, disk_bytenr, 4308 ret = btrfs_lookup_csums_range(root->fs_info->csum_root, disk_bytenr,
4269 disk_bytenr + len - 1, &list, 0); 4309 disk_bytenr + len - 1, &list, 0);
4310 if (ret)
4311 goto out;
4270 4312
4271 while (!list_empty(&list)) { 4313 while (!list_empty(&list)) {
4272 sums = list_entry(list.next, struct btrfs_ordered_sum, list); 4314 sums = list_entry(list.next, struct btrfs_ordered_sum, list);
@@ -4284,6 +4326,7 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)
4284 4326
4285 btrfs_add_ordered_sum(inode, ordered, sums); 4327 btrfs_add_ordered_sum(inode, ordered, sums);
4286 } 4328 }
4329out:
4287 btrfs_put_ordered_extent(ordered); 4330 btrfs_put_ordered_extent(ordered);
4288 return ret; 4331 return ret;
4289} 4332}
@@ -4380,7 +4423,7 @@ void btrfs_reloc_pre_snapshot(struct btrfs_trans_handle *trans,
4380 * called after snapshot is created. migrate block reservation 4423 * called after snapshot is created. migrate block reservation
4381 * and create reloc root for the newly created snapshot 4424 * and create reloc root for the newly created snapshot
4382 */ 4425 */
4383void btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans, 4426int btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans,
4384 struct btrfs_pending_snapshot *pending) 4427 struct btrfs_pending_snapshot *pending)
4385{ 4428{
4386 struct btrfs_root *root = pending->root; 4429 struct btrfs_root *root = pending->root;
@@ -4390,7 +4433,7 @@ void btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans,
4390 int ret; 4433 int ret;
4391 4434
4392 if (!root->reloc_root) 4435 if (!root->reloc_root)
4393 return; 4436 return 0;
4394 4437
4395 rc = root->fs_info->reloc_ctl; 4438 rc = root->fs_info->reloc_ctl;
4396 rc->merging_rsv_size += rc->nodes_relocated; 4439 rc->merging_rsv_size += rc->nodes_relocated;
@@ -4399,18 +4442,21 @@ void btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans,
4399 ret = btrfs_block_rsv_migrate(&pending->block_rsv, 4442 ret = btrfs_block_rsv_migrate(&pending->block_rsv,
4400 rc->block_rsv, 4443 rc->block_rsv,
4401 rc->nodes_relocated); 4444 rc->nodes_relocated);
4402 BUG_ON(ret); 4445 if (ret)
4446 return ret;
4403 } 4447 }
4404 4448
4405 new_root = pending->snap; 4449 new_root = pending->snap;
4406 reloc_root = create_reloc_root(trans, root->reloc_root, 4450 reloc_root = create_reloc_root(trans, root->reloc_root,
4407 new_root->root_key.objectid); 4451 new_root->root_key.objectid);
4452 if (IS_ERR(reloc_root))
4453 return PTR_ERR(reloc_root);
4408 4454
4409 __add_reloc_root(reloc_root); 4455 ret = __add_reloc_root(reloc_root);
4456 BUG_ON(ret < 0);
4410 new_root->reloc_root = reloc_root; 4457 new_root->reloc_root = reloc_root;
4411 4458
4412 if (rc->create_reloc_tree) { 4459 if (rc->create_reloc_tree)
4413 ret = clone_backref_node(trans, rc, root, reloc_root); 4460 ret = clone_backref_node(trans, rc, root, reloc_root);
4414 BUG_ON(ret); 4461 return ret;
4415 }
4416} 4462}
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index f4099904565a..24fb8ce4e071 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -93,10 +93,14 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root
93 unsigned long ptr; 93 unsigned long ptr;
94 94
95 path = btrfs_alloc_path(); 95 path = btrfs_alloc_path();
96 BUG_ON(!path); 96 if (!path)
97 return -ENOMEM;
98
97 ret = btrfs_search_slot(trans, root, key, path, 0, 1); 99 ret = btrfs_search_slot(trans, root, key, path, 0, 1);
98 if (ret < 0) 100 if (ret < 0) {
101 btrfs_abort_transaction(trans, root, ret);
99 goto out; 102 goto out;
103 }
100 104
101 if (ret != 0) { 105 if (ret != 0) {
102 btrfs_print_leaf(root, path->nodes[0]); 106 btrfs_print_leaf(root, path->nodes[0]);
@@ -116,13 +120,10 @@ out:
116 return ret; 120 return ret;
117} 121}
118 122
119int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root 123int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
120 *root, struct btrfs_key *key, struct btrfs_root_item 124 struct btrfs_key *key, struct btrfs_root_item *item)
121 *item)
122{ 125{
123 int ret; 126 return btrfs_insert_item(trans, root, key, item, sizeof(*item));
124 ret = btrfs_insert_item(trans, root, key, item, sizeof(*item));
125 return ret;
126} 127}
127 128
128/* 129/*
@@ -384,6 +385,8 @@ int btrfs_find_root_ref(struct btrfs_root *tree_root,
384 * 385 *
385 * For a back ref the root_id is the id of the subvol or snapshot and 386 * For a back ref the root_id is the id of the subvol or snapshot and
386 * ref_id is the id of the tree referencing it. 387 * ref_id is the id of the tree referencing it.
388 *
389 * Will return 0, -ENOMEM, or anything from the CoW path
387 */ 390 */
388int btrfs_add_root_ref(struct btrfs_trans_handle *trans, 391int btrfs_add_root_ref(struct btrfs_trans_handle *trans,
389 struct btrfs_root *tree_root, 392 struct btrfs_root *tree_root,
@@ -407,7 +410,11 @@ int btrfs_add_root_ref(struct btrfs_trans_handle *trans,
407again: 410again:
408 ret = btrfs_insert_empty_item(trans, tree_root, path, &key, 411 ret = btrfs_insert_empty_item(trans, tree_root, path, &key,
409 sizeof(*ref) + name_len); 412 sizeof(*ref) + name_len);
410 BUG_ON(ret); 413 if (ret) {
414 btrfs_abort_transaction(trans, tree_root, ret);
415 btrfs_free_path(path);
416 return ret;
417 }
411 418
412 leaf = path->nodes[0]; 419 leaf = path->nodes[0];
413 ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref); 420 ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref);
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 390e7102b0ff..90acc82046c3 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -36,37 +36,30 @@
36 * Future enhancements: 36 * Future enhancements:
37 * - In case an unrepairable extent is encountered, track which files are 37 * - In case an unrepairable extent is encountered, track which files are
38 * affected and report them 38 * affected and report them
39 * - In case of a read error on files with nodatasum, map the file and read
40 * the extent to trigger a writeback of the good copy
41 * - track and record media errors, throw out bad devices 39 * - track and record media errors, throw out bad devices
42 * - add a mode to also read unallocated space 40 * - add a mode to also read unallocated space
43 */ 41 */
44 42
45struct scrub_bio; 43struct scrub_block;
46struct scrub_page;
47struct scrub_dev; 44struct scrub_dev;
48static void scrub_bio_end_io(struct bio *bio, int err);
49static void scrub_checksum(struct btrfs_work *work);
50static int scrub_checksum_data(struct scrub_dev *sdev,
51 struct scrub_page *spag, void *buffer);
52static int scrub_checksum_tree_block(struct scrub_dev *sdev,
53 struct scrub_page *spag, u64 logical,
54 void *buffer);
55static int scrub_checksum_super(struct scrub_bio *sbio, void *buffer);
56static int scrub_fixup_check(struct scrub_bio *sbio, int ix);
57static void scrub_fixup_end_io(struct bio *bio, int err);
58static int scrub_fixup_io(int rw, struct block_device *bdev, sector_t sector,
59 struct page *page);
60static void scrub_fixup(struct scrub_bio *sbio, int ix);
61 45
62#define SCRUB_PAGES_PER_BIO 16 /* 64k per bio */ 46#define SCRUB_PAGES_PER_BIO 16 /* 64k per bio */
63#define SCRUB_BIOS_PER_DEV 16 /* 1 MB per device in flight */ 47#define SCRUB_BIOS_PER_DEV 16 /* 1 MB per device in flight */
48#define SCRUB_MAX_PAGES_PER_BLOCK 16 /* 64k per node/leaf/sector */
64 49
65struct scrub_page { 50struct scrub_page {
51 struct scrub_block *sblock;
52 struct page *page;
53 struct block_device *bdev;
66 u64 flags; /* extent flags */ 54 u64 flags; /* extent flags */
67 u64 generation; 55 u64 generation;
68 int mirror_num; 56 u64 logical;
69 int have_csum; 57 u64 physical;
58 struct {
59 unsigned int mirror_num:8;
60 unsigned int have_csum:1;
61 unsigned int io_error:1;
62 };
70 u8 csum[BTRFS_CSUM_SIZE]; 63 u8 csum[BTRFS_CSUM_SIZE];
71}; 64};
72 65
@@ -77,12 +70,25 @@ struct scrub_bio {
77 int err; 70 int err;
78 u64 logical; 71 u64 logical;
79 u64 physical; 72 u64 physical;
80 struct scrub_page spag[SCRUB_PAGES_PER_BIO]; 73 struct scrub_page *pagev[SCRUB_PAGES_PER_BIO];
81 u64 count; 74 int page_count;
82 int next_free; 75 int next_free;
83 struct btrfs_work work; 76 struct btrfs_work work;
84}; 77};
85 78
79struct scrub_block {
80 struct scrub_page pagev[SCRUB_MAX_PAGES_PER_BLOCK];
81 int page_count;
82 atomic_t outstanding_pages;
83 atomic_t ref_count; /* free mem on transition to zero */
84 struct scrub_dev *sdev;
85 struct {
86 unsigned int header_error:1;
87 unsigned int checksum_error:1;
88 unsigned int no_io_error_seen:1;
89 };
90};
91
86struct scrub_dev { 92struct scrub_dev {
87 struct scrub_bio *bios[SCRUB_BIOS_PER_DEV]; 93 struct scrub_bio *bios[SCRUB_BIOS_PER_DEV];
88 struct btrfs_device *dev; 94 struct btrfs_device *dev;
@@ -96,6 +102,10 @@ struct scrub_dev {
96 struct list_head csum_list; 102 struct list_head csum_list;
97 atomic_t cancel_req; 103 atomic_t cancel_req;
98 int readonly; 104 int readonly;
105 int pages_per_bio; /* <= SCRUB_PAGES_PER_BIO */
106 u32 sectorsize;
107 u32 nodesize;
108 u32 leafsize;
99 /* 109 /*
100 * statistics 110 * statistics
101 */ 111 */
@@ -124,6 +134,43 @@ struct scrub_warning {
124 int scratch_bufsize; 134 int scratch_bufsize;
125}; 135};
126 136
137
138static int scrub_handle_errored_block(struct scrub_block *sblock_to_check);
139static int scrub_setup_recheck_block(struct scrub_dev *sdev,
140 struct btrfs_mapping_tree *map_tree,
141 u64 length, u64 logical,
142 struct scrub_block *sblock);
143static int scrub_recheck_block(struct btrfs_fs_info *fs_info,
144 struct scrub_block *sblock, int is_metadata,
145 int have_csum, u8 *csum, u64 generation,
146 u16 csum_size);
147static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info,
148 struct scrub_block *sblock,
149 int is_metadata, int have_csum,
150 const u8 *csum, u64 generation,
151 u16 csum_size);
152static void scrub_complete_bio_end_io(struct bio *bio, int err);
153static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
154 struct scrub_block *sblock_good,
155 int force_write);
156static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
157 struct scrub_block *sblock_good,
158 int page_num, int force_write);
159static int scrub_checksum_data(struct scrub_block *sblock);
160static int scrub_checksum_tree_block(struct scrub_block *sblock);
161static int scrub_checksum_super(struct scrub_block *sblock);
162static void scrub_block_get(struct scrub_block *sblock);
163static void scrub_block_put(struct scrub_block *sblock);
164static int scrub_add_page_to_bio(struct scrub_dev *sdev,
165 struct scrub_page *spage);
166static int scrub_pages(struct scrub_dev *sdev, u64 logical, u64 len,
167 u64 physical, u64 flags, u64 gen, int mirror_num,
168 u8 *csum, int force);
169static void scrub_bio_end_io(struct bio *bio, int err);
170static void scrub_bio_end_io_worker(struct btrfs_work *work);
171static void scrub_block_complete(struct scrub_block *sblock);
172
173
127static void scrub_free_csums(struct scrub_dev *sdev) 174static void scrub_free_csums(struct scrub_dev *sdev)
128{ 175{
129 while (!list_empty(&sdev->csum_list)) { 176 while (!list_empty(&sdev->csum_list)) {
@@ -135,23 +182,6 @@ static void scrub_free_csums(struct scrub_dev *sdev)
135 } 182 }
136} 183}
137 184
138static void scrub_free_bio(struct bio *bio)
139{
140 int i;
141 struct page *last_page = NULL;
142
143 if (!bio)
144 return;
145
146 for (i = 0; i < bio->bi_vcnt; ++i) {
147 if (bio->bi_io_vec[i].bv_page == last_page)
148 continue;
149 last_page = bio->bi_io_vec[i].bv_page;
150 __free_page(last_page);
151 }
152 bio_put(bio);
153}
154
155static noinline_for_stack void scrub_free_dev(struct scrub_dev *sdev) 185static noinline_for_stack void scrub_free_dev(struct scrub_dev *sdev)
156{ 186{
157 int i; 187 int i;
@@ -159,13 +189,23 @@ static noinline_for_stack void scrub_free_dev(struct scrub_dev *sdev)
159 if (!sdev) 189 if (!sdev)
160 return; 190 return;
161 191
192 /* this can happen when scrub is cancelled */
193 if (sdev->curr != -1) {
194 struct scrub_bio *sbio = sdev->bios[sdev->curr];
195
196 for (i = 0; i < sbio->page_count; i++) {
197 BUG_ON(!sbio->pagev[i]);
198 BUG_ON(!sbio->pagev[i]->page);
199 scrub_block_put(sbio->pagev[i]->sblock);
200 }
201 bio_put(sbio->bio);
202 }
203
162 for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) { 204 for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) {
163 struct scrub_bio *sbio = sdev->bios[i]; 205 struct scrub_bio *sbio = sdev->bios[i];
164 206
165 if (!sbio) 207 if (!sbio)
166 break; 208 break;
167
168 scrub_free_bio(sbio->bio);
169 kfree(sbio); 209 kfree(sbio);
170 } 210 }
171 211
@@ -179,11 +219,16 @@ struct scrub_dev *scrub_setup_dev(struct btrfs_device *dev)
179 struct scrub_dev *sdev; 219 struct scrub_dev *sdev;
180 int i; 220 int i;
181 struct btrfs_fs_info *fs_info = dev->dev_root->fs_info; 221 struct btrfs_fs_info *fs_info = dev->dev_root->fs_info;
222 int pages_per_bio;
182 223
224 pages_per_bio = min_t(int, SCRUB_PAGES_PER_BIO,
225 bio_get_nr_vecs(dev->bdev));
183 sdev = kzalloc(sizeof(*sdev), GFP_NOFS); 226 sdev = kzalloc(sizeof(*sdev), GFP_NOFS);
184 if (!sdev) 227 if (!sdev)
185 goto nomem; 228 goto nomem;
186 sdev->dev = dev; 229 sdev->dev = dev;
230 sdev->pages_per_bio = pages_per_bio;
231 sdev->curr = -1;
187 for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) { 232 for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) {
188 struct scrub_bio *sbio; 233 struct scrub_bio *sbio;
189 234
@@ -194,8 +239,8 @@ struct scrub_dev *scrub_setup_dev(struct btrfs_device *dev)
194 239
195 sbio->index = i; 240 sbio->index = i;
196 sbio->sdev = sdev; 241 sbio->sdev = sdev;
197 sbio->count = 0; 242 sbio->page_count = 0;
198 sbio->work.func = scrub_checksum; 243 sbio->work.func = scrub_bio_end_io_worker;
199 244
200 if (i != SCRUB_BIOS_PER_DEV-1) 245 if (i != SCRUB_BIOS_PER_DEV-1)
201 sdev->bios[i]->next_free = i + 1; 246 sdev->bios[i]->next_free = i + 1;
@@ -203,7 +248,9 @@ struct scrub_dev *scrub_setup_dev(struct btrfs_device *dev)
203 sdev->bios[i]->next_free = -1; 248 sdev->bios[i]->next_free = -1;
204 } 249 }
205 sdev->first_free = 0; 250 sdev->first_free = 0;
206 sdev->curr = -1; 251 sdev->nodesize = dev->dev_root->nodesize;
252 sdev->leafsize = dev->dev_root->leafsize;
253 sdev->sectorsize = dev->dev_root->sectorsize;
207 atomic_set(&sdev->in_flight, 0); 254 atomic_set(&sdev->in_flight, 0);
208 atomic_set(&sdev->fixup_cnt, 0); 255 atomic_set(&sdev->fixup_cnt, 0);
209 atomic_set(&sdev->cancel_req, 0); 256 atomic_set(&sdev->cancel_req, 0);
@@ -294,10 +341,9 @@ err:
294 return 0; 341 return 0;
295} 342}
296 343
297static void scrub_print_warning(const char *errstr, struct scrub_bio *sbio, 344static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
298 int ix)
299{ 345{
300 struct btrfs_device *dev = sbio->sdev->dev; 346 struct btrfs_device *dev = sblock->sdev->dev;
301 struct btrfs_fs_info *fs_info = dev->dev_root->fs_info; 347 struct btrfs_fs_info *fs_info = dev->dev_root->fs_info;
302 struct btrfs_path *path; 348 struct btrfs_path *path;
303 struct btrfs_key found_key; 349 struct btrfs_key found_key;
@@ -316,8 +362,9 @@ static void scrub_print_warning(const char *errstr, struct scrub_bio *sbio,
316 362
317 swarn.scratch_buf = kmalloc(bufsize, GFP_NOFS); 363 swarn.scratch_buf = kmalloc(bufsize, GFP_NOFS);
318 swarn.msg_buf = kmalloc(bufsize, GFP_NOFS); 364 swarn.msg_buf = kmalloc(bufsize, GFP_NOFS);
319 swarn.sector = (sbio->physical + ix * PAGE_SIZE) >> 9; 365 BUG_ON(sblock->page_count < 1);
320 swarn.logical = sbio->logical + ix * PAGE_SIZE; 366 swarn.sector = (sblock->pagev[0].physical) >> 9;
367 swarn.logical = sblock->pagev[0].logical;
321 swarn.errstr = errstr; 368 swarn.errstr = errstr;
322 swarn.dev = dev; 369 swarn.dev = dev;
323 swarn.msg_bufsize = bufsize; 370 swarn.msg_bufsize = bufsize;
@@ -342,7 +389,8 @@ static void scrub_print_warning(const char *errstr, struct scrub_bio *sbio,
342 do { 389 do {
343 ret = tree_backref_for_extent(&ptr, eb, ei, item_size, 390 ret = tree_backref_for_extent(&ptr, eb, ei, item_size,
344 &ref_root, &ref_level); 391 &ref_root, &ref_level);
345 printk(KERN_WARNING "%s at logical %llu on dev %s, " 392 printk(KERN_WARNING
393 "btrfs: %s at logical %llu on dev %s, "
346 "sector %llu: metadata %s (level %d) in tree " 394 "sector %llu: metadata %s (level %d) in tree "
347 "%llu\n", errstr, swarn.logical, dev->name, 395 "%llu\n", errstr, swarn.logical, dev->name,
348 (unsigned long long)swarn.sector, 396 (unsigned long long)swarn.sector,
@@ -352,8 +400,8 @@ static void scrub_print_warning(const char *errstr, struct scrub_bio *sbio,
352 } while (ret != 1); 400 } while (ret != 1);
353 } else { 401 } else {
354 swarn.path = path; 402 swarn.path = path;
355 iterate_extent_inodes(fs_info, path, found_key.objectid, 403 iterate_extent_inodes(fs_info, found_key.objectid,
356 extent_item_pos, 404 extent_item_pos, 1,
357 scrub_print_warning_inode, &swarn); 405 scrub_print_warning_inode, &swarn);
358 } 406 }
359 407
@@ -531,9 +579,9 @@ out:
531 spin_lock(&sdev->stat_lock); 579 spin_lock(&sdev->stat_lock);
532 ++sdev->stat.uncorrectable_errors; 580 ++sdev->stat.uncorrectable_errors;
533 spin_unlock(&sdev->stat_lock); 581 spin_unlock(&sdev->stat_lock);
534 printk_ratelimited(KERN_ERR "btrfs: unable to fixup " 582 printk_ratelimited(KERN_ERR
535 "(nodatasum) error at logical %llu\n", 583 "btrfs: unable to fixup (nodatasum) error at logical %llu on dev %s\n",
536 fixup->logical); 584 (unsigned long long)fixup->logical, sdev->dev->name);
537 } 585 }
538 586
539 btrfs_free_path(path); 587 btrfs_free_path(path);
@@ -550,91 +598,168 @@ out:
550} 598}
551 599
552/* 600/*
553 * scrub_recheck_error gets called when either verification of the page 601 * scrub_handle_errored_block gets called when either verification of the
554 * failed or the bio failed to read, e.g. with EIO. In the latter case, 602 * pages failed or the bio failed to read, e.g. with EIO. In the latter
555 * recheck_error gets called for every page in the bio, even though only 603 * case, this function handles all pages in the bio, even though only one
556 * one may be bad 604 * may be bad.
605 * The goal of this function is to repair the errored block by using the
606 * contents of one of the mirrors.
557 */ 607 */
558static int scrub_recheck_error(struct scrub_bio *sbio, int ix) 608static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
559{ 609{
560 struct scrub_dev *sdev = sbio->sdev; 610 struct scrub_dev *sdev = sblock_to_check->sdev;
561 u64 sector = (sbio->physical + ix * PAGE_SIZE) >> 9; 611 struct btrfs_fs_info *fs_info;
612 u64 length;
613 u64 logical;
614 u64 generation;
615 unsigned int failed_mirror_index;
616 unsigned int is_metadata;
617 unsigned int have_csum;
618 u8 *csum;
619 struct scrub_block *sblocks_for_recheck; /* holds one for each mirror */
620 struct scrub_block *sblock_bad;
621 int ret;
622 int mirror_index;
623 int page_num;
624 int success;
562 static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL, 625 static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
563 DEFAULT_RATELIMIT_BURST); 626 DEFAULT_RATELIMIT_BURST);
627
628 BUG_ON(sblock_to_check->page_count < 1);
629 fs_info = sdev->dev->dev_root->fs_info;
630 length = sblock_to_check->page_count * PAGE_SIZE;
631 logical = sblock_to_check->pagev[0].logical;
632 generation = sblock_to_check->pagev[0].generation;
633 BUG_ON(sblock_to_check->pagev[0].mirror_num < 1);
634 failed_mirror_index = sblock_to_check->pagev[0].mirror_num - 1;
635 is_metadata = !(sblock_to_check->pagev[0].flags &
636 BTRFS_EXTENT_FLAG_DATA);
637 have_csum = sblock_to_check->pagev[0].have_csum;
638 csum = sblock_to_check->pagev[0].csum;
564 639
565 if (sbio->err) { 640 /*
566 if (scrub_fixup_io(READ, sbio->sdev->dev->bdev, sector, 641 * read all mirrors one after the other. This includes to
567 sbio->bio->bi_io_vec[ix].bv_page) == 0) { 642 * re-read the extent or metadata block that failed (that was
568 if (scrub_fixup_check(sbio, ix) == 0) 643 * the cause that this fixup code is called) another time,
569 return 0; 644 * page by page this time in order to know which pages
570 } 645 * caused I/O errors and which ones are good (for all mirrors).
571 if (__ratelimit(&_rs)) 646 * It is the goal to handle the situation when more than one
572 scrub_print_warning("i/o error", sbio, ix); 647 * mirror contains I/O errors, but the errors do not
573 } else { 648 * overlap, i.e. the data can be repaired by selecting the
574 if (__ratelimit(&_rs)) 649 * pages from those mirrors without I/O error on the
575 scrub_print_warning("checksum error", sbio, ix); 650 * particular pages. One example (with blocks >= 2 * PAGE_SIZE)
651 * would be that mirror #1 has an I/O error on the first page,
652 * the second page is good, and mirror #2 has an I/O error on
653 * the second page, but the first page is good.
654 * Then the first page of the first mirror can be repaired by
655 * taking the first page of the second mirror, and the
656 * second page of the second mirror can be repaired by
657 * copying the contents of the 2nd page of the 1st mirror.
658 * One more note: if the pages of one mirror contain I/O
659 * errors, the checksum cannot be verified. In order to get
660 * the best data for repairing, the first attempt is to find
661 * a mirror without I/O errors and with a validated checksum.
662 * Only if this is not possible, the pages are picked from
663 * mirrors with I/O errors without considering the checksum.
664 * If the latter is the case, at the end, the checksum of the
665 * repaired area is verified in order to correctly maintain
666 * the statistics.
667 */
668
669 sblocks_for_recheck = kzalloc(BTRFS_MAX_MIRRORS *
670 sizeof(*sblocks_for_recheck),
671 GFP_NOFS);
672 if (!sblocks_for_recheck) {
673 spin_lock(&sdev->stat_lock);
674 sdev->stat.malloc_errors++;
675 sdev->stat.read_errors++;
676 sdev->stat.uncorrectable_errors++;
677 spin_unlock(&sdev->stat_lock);
678 goto out;
576 } 679 }
577 680
578 spin_lock(&sdev->stat_lock); 681 /* setup the context, map the logical blocks and alloc the pages */
579 ++sdev->stat.read_errors; 682 ret = scrub_setup_recheck_block(sdev, &fs_info->mapping_tree, length,
580 spin_unlock(&sdev->stat_lock); 683 logical, sblocks_for_recheck);
684 if (ret) {
685 spin_lock(&sdev->stat_lock);
686 sdev->stat.read_errors++;
687 sdev->stat.uncorrectable_errors++;
688 spin_unlock(&sdev->stat_lock);
689 goto out;
690 }
691 BUG_ON(failed_mirror_index >= BTRFS_MAX_MIRRORS);
692 sblock_bad = sblocks_for_recheck + failed_mirror_index;
581 693
582 scrub_fixup(sbio, ix); 694 /* build and submit the bios for the failed mirror, check checksums */
583 return 1; 695 ret = scrub_recheck_block(fs_info, sblock_bad, is_metadata, have_csum,
584} 696 csum, generation, sdev->csum_size);
697 if (ret) {
698 spin_lock(&sdev->stat_lock);
699 sdev->stat.read_errors++;
700 sdev->stat.uncorrectable_errors++;
701 spin_unlock(&sdev->stat_lock);
702 goto out;
703 }
585 704
586static int scrub_fixup_check(struct scrub_bio *sbio, int ix) 705 if (!sblock_bad->header_error && !sblock_bad->checksum_error &&
587{ 706 sblock_bad->no_io_error_seen) {
588 int ret = 1; 707 /*
589 struct page *page; 708 * the error disappeared after reading page by page, or
590 void *buffer; 709 * the area was part of a huge bio and other parts of the
591 u64 flags = sbio->spag[ix].flags; 710 * bio caused I/O errors, or the block layer merged several
711 * read requests into one and the error is caused by a
712 * different bio (usually one of the two latter cases is
713 * the cause)
714 */
715 spin_lock(&sdev->stat_lock);
716 sdev->stat.unverified_errors++;
717 spin_unlock(&sdev->stat_lock);
592 718
593 page = sbio->bio->bi_io_vec[ix].bv_page; 719 goto out;
594 buffer = kmap_atomic(page);
595 if (flags & BTRFS_EXTENT_FLAG_DATA) {
596 ret = scrub_checksum_data(sbio->sdev,
597 sbio->spag + ix, buffer);
598 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
599 ret = scrub_checksum_tree_block(sbio->sdev,
600 sbio->spag + ix,
601 sbio->logical + ix * PAGE_SIZE,
602 buffer);
603 } else {
604 WARN_ON(1);
605 } 720 }
606 kunmap_atomic(buffer);
607 721
608 return ret; 722 if (!sblock_bad->no_io_error_seen) {
609} 723 spin_lock(&sdev->stat_lock);
724 sdev->stat.read_errors++;
725 spin_unlock(&sdev->stat_lock);
726 if (__ratelimit(&_rs))
727 scrub_print_warning("i/o error", sblock_to_check);
728 } else if (sblock_bad->checksum_error) {
729 spin_lock(&sdev->stat_lock);
730 sdev->stat.csum_errors++;
731 spin_unlock(&sdev->stat_lock);
732 if (__ratelimit(&_rs))
733 scrub_print_warning("checksum error", sblock_to_check);
734 } else if (sblock_bad->header_error) {
735 spin_lock(&sdev->stat_lock);
736 sdev->stat.verify_errors++;
737 spin_unlock(&sdev->stat_lock);
738 if (__ratelimit(&_rs))
739 scrub_print_warning("checksum/header error",
740 sblock_to_check);
741 }
610 742
611static void scrub_fixup_end_io(struct bio *bio, int err) 743 if (sdev->readonly)
612{ 744 goto did_not_correct_error;
613 complete((struct completion *)bio->bi_private);
614}
615 745
616static void scrub_fixup(struct scrub_bio *sbio, int ix) 746 if (!is_metadata && !have_csum) {
617{ 747 struct scrub_fixup_nodatasum *fixup_nodatasum;
618 struct scrub_dev *sdev = sbio->sdev; 748
619 struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info; 749 /*
620 struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; 750 * !is_metadata and !have_csum, this means that the data
621 struct btrfs_bio *bbio = NULL; 751 * might not be COW'ed, that it might be modified
622 struct scrub_fixup_nodatasum *fixup; 752 * concurrently. The general strategy to work on the
623 u64 logical = sbio->logical + ix * PAGE_SIZE; 753 * commit root does not help in the case when COW is not
624 u64 length; 754 * used.
625 int i; 755 */
626 int ret; 756 fixup_nodatasum = kzalloc(sizeof(*fixup_nodatasum), GFP_NOFS);
627 DECLARE_COMPLETION_ONSTACK(complete); 757 if (!fixup_nodatasum)
628 758 goto did_not_correct_error;
629 if ((sbio->spag[ix].flags & BTRFS_EXTENT_FLAG_DATA) && 759 fixup_nodatasum->sdev = sdev;
630 (sbio->spag[ix].have_csum == 0)) { 760 fixup_nodatasum->logical = logical;
631 fixup = kzalloc(sizeof(*fixup), GFP_NOFS); 761 fixup_nodatasum->root = fs_info->extent_root;
632 if (!fixup) 762 fixup_nodatasum->mirror_num = failed_mirror_index + 1;
633 goto uncorrectable;
634 fixup->sdev = sdev;
635 fixup->logical = logical;
636 fixup->root = fs_info->extent_root;
637 fixup->mirror_num = sbio->spag[ix].mirror_num;
638 /* 763 /*
639 * increment scrubs_running to prevent cancel requests from 764 * increment scrubs_running to prevent cancel requests from
640 * completing as long as a fixup worker is running. we must also 765 * completing as long as a fixup worker is running. we must also
@@ -649,235 +774,528 @@ static void scrub_fixup(struct scrub_bio *sbio, int ix)
649 atomic_inc(&fs_info->scrubs_paused); 774 atomic_inc(&fs_info->scrubs_paused);
650 mutex_unlock(&fs_info->scrub_lock); 775 mutex_unlock(&fs_info->scrub_lock);
651 atomic_inc(&sdev->fixup_cnt); 776 atomic_inc(&sdev->fixup_cnt);
652 fixup->work.func = scrub_fixup_nodatasum; 777 fixup_nodatasum->work.func = scrub_fixup_nodatasum;
653 btrfs_queue_worker(&fs_info->scrub_workers, &fixup->work); 778 btrfs_queue_worker(&fs_info->scrub_workers,
654 return; 779 &fixup_nodatasum->work);
780 goto out;
655 } 781 }
656 782
657 length = PAGE_SIZE; 783 /*
658 ret = btrfs_map_block(map_tree, REQ_WRITE, logical, &length, 784 * now build and submit the bios for the other mirrors, check
659 &bbio, 0); 785 * checksums
660 if (ret || !bbio || length < PAGE_SIZE) { 786 */
661 printk(KERN_ERR 787 for (mirror_index = 0;
662 "scrub_fixup: btrfs_map_block failed us for %llu\n", 788 mirror_index < BTRFS_MAX_MIRRORS &&
663 (unsigned long long)logical); 789 sblocks_for_recheck[mirror_index].page_count > 0;
664 WARN_ON(1); 790 mirror_index++) {
665 kfree(bbio); 791 if (mirror_index == failed_mirror_index)
666 return; 792 continue;
793
794 /* build and submit the bios, check checksums */
795 ret = scrub_recheck_block(fs_info,
796 sblocks_for_recheck + mirror_index,
797 is_metadata, have_csum, csum,
798 generation, sdev->csum_size);
799 if (ret)
800 goto did_not_correct_error;
667 } 801 }
668 802
669 if (bbio->num_stripes == 1) 803 /*
670 /* there aren't any replicas */ 804 * first try to pick the mirror which is completely without I/O
671 goto uncorrectable; 805 * errors and also does not have a checksum error.
806 * If one is found, and if a checksum is present, the full block
807 * that is known to contain an error is rewritten. Afterwards
808 * the block is known to be corrected.
809 * If a mirror is found which is completely correct, and no
810 * checksum is present, only those pages are rewritten that had
811 * an I/O error in the block to be repaired, since it cannot be
812 * determined, which copy of the other pages is better (and it
813 * could happen otherwise that a correct page would be
814 * overwritten by a bad one).
815 */
816 for (mirror_index = 0;
817 mirror_index < BTRFS_MAX_MIRRORS &&
818 sblocks_for_recheck[mirror_index].page_count > 0;
819 mirror_index++) {
820 struct scrub_block *sblock_other = sblocks_for_recheck +
821 mirror_index;
822
823 if (!sblock_other->header_error &&
824 !sblock_other->checksum_error &&
825 sblock_other->no_io_error_seen) {
826 int force_write = is_metadata || have_csum;
827
828 ret = scrub_repair_block_from_good_copy(sblock_bad,
829 sblock_other,
830 force_write);
831 if (0 == ret)
832 goto corrected_error;
833 }
834 }
672 835
673 /* 836 /*
674 * first find a good copy 837 * in case of I/O errors in the area that is supposed to be
838 * repaired, continue by picking good copies of those pages.
839 * Select the good pages from mirrors to rewrite bad pages from
840 * the area to fix. Afterwards verify the checksum of the block
841 * that is supposed to be repaired. This verification step is
842 * only done for the purpose of statistic counting and for the
843 * final scrub report, whether errors remain.
844 * A perfect algorithm could make use of the checksum and try
845 * all possible combinations of pages from the different mirrors
846 * until the checksum verification succeeds. For example, when
847 * the 2nd page of mirror #1 faces I/O errors, and the 2nd page
848 * of mirror #2 is readable but the final checksum test fails,
849 * then the 2nd page of mirror #3 could be tried, whether now
850 * the final checksum succeedes. But this would be a rare
851 * exception and is therefore not implemented. At least it is
852 * avoided that the good copy is overwritten.
853 * A more useful improvement would be to pick the sectors
854 * without I/O error based on sector sizes (512 bytes on legacy
855 * disks) instead of on PAGE_SIZE. Then maybe 512 byte of one
856 * mirror could be repaired by taking 512 byte of a different
857 * mirror, even if other 512 byte sectors in the same PAGE_SIZE
858 * area are unreadable.
675 */ 859 */
676 for (i = 0; i < bbio->num_stripes; ++i) {
677 if (i + 1 == sbio->spag[ix].mirror_num)
678 continue;
679 860
680 if (scrub_fixup_io(READ, bbio->stripes[i].dev->bdev, 861 /* can only fix I/O errors from here on */
681 bbio->stripes[i].physical >> 9, 862 if (sblock_bad->no_io_error_seen)
682 sbio->bio->bi_io_vec[ix].bv_page)) { 863 goto did_not_correct_error;
683 /* I/O-error, this is not a good copy */ 864
865 success = 1;
866 for (page_num = 0; page_num < sblock_bad->page_count; page_num++) {
867 struct scrub_page *page_bad = sblock_bad->pagev + page_num;
868
869 if (!page_bad->io_error)
684 continue; 870 continue;
871
872 for (mirror_index = 0;
873 mirror_index < BTRFS_MAX_MIRRORS &&
874 sblocks_for_recheck[mirror_index].page_count > 0;
875 mirror_index++) {
876 struct scrub_block *sblock_other = sblocks_for_recheck +
877 mirror_index;
878 struct scrub_page *page_other = sblock_other->pagev +
879 page_num;
880
881 if (!page_other->io_error) {
882 ret = scrub_repair_page_from_good_copy(
883 sblock_bad, sblock_other, page_num, 0);
884 if (0 == ret) {
885 page_bad->io_error = 0;
886 break; /* succeeded for this page */
887 }
888 }
685 } 889 }
686 890
687 if (scrub_fixup_check(sbio, ix) == 0) 891 if (page_bad->io_error) {
688 break; 892 /* did not find a mirror to copy the page from */
893 success = 0;
894 }
689 } 895 }
690 if (i == bbio->num_stripes)
691 goto uncorrectable;
692 896
693 if (!sdev->readonly) { 897 if (success) {
694 /* 898 if (is_metadata || have_csum) {
695 * bi_io_vec[ix].bv_page now contains good data, write it back 899 /*
696 */ 900 * need to verify the checksum now that all
697 if (scrub_fixup_io(WRITE, sdev->dev->bdev, 901 * sectors on disk are repaired (the write
698 (sbio->physical + ix * PAGE_SIZE) >> 9, 902 * request for data to be repaired is on its way).
699 sbio->bio->bi_io_vec[ix].bv_page)) { 903 * Just be lazy and use scrub_recheck_block()
700 /* I/O-error, writeback failed, give up */ 904 * which re-reads the data before the checksum
701 goto uncorrectable; 905 * is verified, but most likely the data comes out
906 * of the page cache.
907 */
908 ret = scrub_recheck_block(fs_info, sblock_bad,
909 is_metadata, have_csum, csum,
910 generation, sdev->csum_size);
911 if (!ret && !sblock_bad->header_error &&
912 !sblock_bad->checksum_error &&
913 sblock_bad->no_io_error_seen)
914 goto corrected_error;
915 else
916 goto did_not_correct_error;
917 } else {
918corrected_error:
919 spin_lock(&sdev->stat_lock);
920 sdev->stat.corrected_errors++;
921 spin_unlock(&sdev->stat_lock);
922 printk_ratelimited(KERN_ERR
923 "btrfs: fixed up error at logical %llu on dev %s\n",
924 (unsigned long long)logical, sdev->dev->name);
702 } 925 }
926 } else {
927did_not_correct_error:
928 spin_lock(&sdev->stat_lock);
929 sdev->stat.uncorrectable_errors++;
930 spin_unlock(&sdev->stat_lock);
931 printk_ratelimited(KERN_ERR
932 "btrfs: unable to fixup (regular) error at logical %llu on dev %s\n",
933 (unsigned long long)logical, sdev->dev->name);
703 } 934 }
704 935
705 kfree(bbio); 936out:
706 spin_lock(&sdev->stat_lock); 937 if (sblocks_for_recheck) {
707 ++sdev->stat.corrected_errors; 938 for (mirror_index = 0; mirror_index < BTRFS_MAX_MIRRORS;
708 spin_unlock(&sdev->stat_lock); 939 mirror_index++) {
940 struct scrub_block *sblock = sblocks_for_recheck +
941 mirror_index;
942 int page_index;
943
944 for (page_index = 0; page_index < SCRUB_PAGES_PER_BIO;
945 page_index++)
946 if (sblock->pagev[page_index].page)
947 __free_page(
948 sblock->pagev[page_index].page);
949 }
950 kfree(sblocks_for_recheck);
951 }
709 952
710 printk_ratelimited(KERN_ERR "btrfs: fixed up error at logical %llu\n", 953 return 0;
711 (unsigned long long)logical); 954}
712 return;
713 955
714uncorrectable: 956static int scrub_setup_recheck_block(struct scrub_dev *sdev,
715 kfree(bbio); 957 struct btrfs_mapping_tree *map_tree,
716 spin_lock(&sdev->stat_lock); 958 u64 length, u64 logical,
717 ++sdev->stat.uncorrectable_errors; 959 struct scrub_block *sblocks_for_recheck)
718 spin_unlock(&sdev->stat_lock); 960{
961 int page_index;
962 int mirror_index;
963 int ret;
964
965 /*
966 * note: the three members sdev, ref_count and outstanding_pages
967 * are not used (and not set) in the blocks that are used for
968 * the recheck procedure
969 */
970
971 page_index = 0;
972 while (length > 0) {
973 u64 sublen = min_t(u64, length, PAGE_SIZE);
974 u64 mapped_length = sublen;
975 struct btrfs_bio *bbio = NULL;
976
977 /*
978 * with a length of PAGE_SIZE, each returned stripe
979 * represents one mirror
980 */
981 ret = btrfs_map_block(map_tree, WRITE, logical, &mapped_length,
982 &bbio, 0);
983 if (ret || !bbio || mapped_length < sublen) {
984 kfree(bbio);
985 return -EIO;
986 }
719 987
720 printk_ratelimited(KERN_ERR "btrfs: unable to fixup (regular) error at " 988 BUG_ON(page_index >= SCRUB_PAGES_PER_BIO);
721 "logical %llu\n", (unsigned long long)logical); 989 for (mirror_index = 0; mirror_index < (int)bbio->num_stripes;
990 mirror_index++) {
991 struct scrub_block *sblock;
992 struct scrub_page *page;
993
994 if (mirror_index >= BTRFS_MAX_MIRRORS)
995 continue;
996
997 sblock = sblocks_for_recheck + mirror_index;
998 page = sblock->pagev + page_index;
999 page->logical = logical;
1000 page->physical = bbio->stripes[mirror_index].physical;
1001 page->bdev = bbio->stripes[mirror_index].dev->bdev;
1002 page->mirror_num = mirror_index + 1;
1003 page->page = alloc_page(GFP_NOFS);
1004 if (!page->page) {
1005 spin_lock(&sdev->stat_lock);
1006 sdev->stat.malloc_errors++;
1007 spin_unlock(&sdev->stat_lock);
1008 return -ENOMEM;
1009 }
1010 sblock->page_count++;
1011 }
1012 kfree(bbio);
1013 length -= sublen;
1014 logical += sublen;
1015 page_index++;
1016 }
1017
1018 return 0;
722} 1019}
723 1020
724static int scrub_fixup_io(int rw, struct block_device *bdev, sector_t sector, 1021/*
725 struct page *page) 1022 * this function will check the on disk data for checksum errors, header
1023 * errors and read I/O errors. If any I/O errors happen, the exact pages
1024 * which are errored are marked as being bad. The goal is to enable scrub
1025 * to take those pages that are not errored from all the mirrors so that
1026 * the pages that are errored in the just handled mirror can be repaired.
1027 */
1028static int scrub_recheck_block(struct btrfs_fs_info *fs_info,
1029 struct scrub_block *sblock, int is_metadata,
1030 int have_csum, u8 *csum, u64 generation,
1031 u16 csum_size)
726{ 1032{
727 struct bio *bio = NULL; 1033 int page_num;
728 int ret; 1034
729 DECLARE_COMPLETION_ONSTACK(complete); 1035 sblock->no_io_error_seen = 1;
1036 sblock->header_error = 0;
1037 sblock->checksum_error = 0;
1038
1039 for (page_num = 0; page_num < sblock->page_count; page_num++) {
1040 struct bio *bio;
1041 int ret;
1042 struct scrub_page *page = sblock->pagev + page_num;
1043 DECLARE_COMPLETION_ONSTACK(complete);
1044
1045 BUG_ON(!page->page);
1046 bio = bio_alloc(GFP_NOFS, 1);
1047 bio->bi_bdev = page->bdev;
1048 bio->bi_sector = page->physical >> 9;
1049 bio->bi_end_io = scrub_complete_bio_end_io;
1050 bio->bi_private = &complete;
1051
1052 ret = bio_add_page(bio, page->page, PAGE_SIZE, 0);
1053 if (PAGE_SIZE != ret) {
1054 bio_put(bio);
1055 return -EIO;
1056 }
1057 btrfsic_submit_bio(READ, bio);
730 1058
731 bio = bio_alloc(GFP_NOFS, 1); 1059 /* this will also unplug the queue */
732 bio->bi_bdev = bdev; 1060 wait_for_completion(&complete);
733 bio->bi_sector = sector;
734 bio_add_page(bio, page, PAGE_SIZE, 0);
735 bio->bi_end_io = scrub_fixup_end_io;
736 bio->bi_private = &complete;
737 btrfsic_submit_bio(rw, bio);
738 1061
739 /* this will also unplug the queue */ 1062 page->io_error = !test_bit(BIO_UPTODATE, &bio->bi_flags);
740 wait_for_completion(&complete); 1063 if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
1064 sblock->no_io_error_seen = 0;
1065 bio_put(bio);
1066 }
741 1067
742 ret = !test_bit(BIO_UPTODATE, &bio->bi_flags); 1068 if (sblock->no_io_error_seen)
743 bio_put(bio); 1069 scrub_recheck_block_checksum(fs_info, sblock, is_metadata,
744 return ret; 1070 have_csum, csum, generation,
1071 csum_size);
1072
1073 return 0;
745} 1074}
746 1075
747static void scrub_bio_end_io(struct bio *bio, int err) 1076static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info,
1077 struct scrub_block *sblock,
1078 int is_metadata, int have_csum,
1079 const u8 *csum, u64 generation,
1080 u16 csum_size)
748{ 1081{
749 struct scrub_bio *sbio = bio->bi_private; 1082 int page_num;
750 struct scrub_dev *sdev = sbio->sdev; 1083 u8 calculated_csum[BTRFS_CSUM_SIZE];
751 struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info; 1084 u32 crc = ~(u32)0;
1085 struct btrfs_root *root = fs_info->extent_root;
1086 void *mapped_buffer;
1087
1088 BUG_ON(!sblock->pagev[0].page);
1089 if (is_metadata) {
1090 struct btrfs_header *h;
1091
1092 mapped_buffer = kmap_atomic(sblock->pagev[0].page);
1093 h = (struct btrfs_header *)mapped_buffer;
1094
1095 if (sblock->pagev[0].logical != le64_to_cpu(h->bytenr) ||
1096 generation != le64_to_cpu(h->generation) ||
1097 memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE) ||
1098 memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid,
1099 BTRFS_UUID_SIZE))
1100 sblock->header_error = 1;
1101 csum = h->csum;
1102 } else {
1103 if (!have_csum)
1104 return;
752 1105
753 sbio->err = err; 1106 mapped_buffer = kmap_atomic(sblock->pagev[0].page);
754 sbio->bio = bio; 1107 }
755 1108
756 btrfs_queue_worker(&fs_info->scrub_workers, &sbio->work); 1109 for (page_num = 0;;) {
1110 if (page_num == 0 && is_metadata)
1111 crc = btrfs_csum_data(root,
1112 ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE,
1113 crc, PAGE_SIZE - BTRFS_CSUM_SIZE);
1114 else
1115 crc = btrfs_csum_data(root, mapped_buffer, crc,
1116 PAGE_SIZE);
1117
1118 kunmap_atomic(mapped_buffer);
1119 page_num++;
1120 if (page_num >= sblock->page_count)
1121 break;
1122 BUG_ON(!sblock->pagev[page_num].page);
1123
1124 mapped_buffer = kmap_atomic(sblock->pagev[page_num].page);
1125 }
1126
1127 btrfs_csum_final(crc, calculated_csum);
1128 if (memcmp(calculated_csum, csum, csum_size))
1129 sblock->checksum_error = 1;
757} 1130}
758 1131
759static void scrub_checksum(struct btrfs_work *work) 1132static void scrub_complete_bio_end_io(struct bio *bio, int err)
760{ 1133{
761 struct scrub_bio *sbio = container_of(work, struct scrub_bio, work); 1134 complete((struct completion *)bio->bi_private);
762 struct scrub_dev *sdev = sbio->sdev; 1135}
763 struct page *page;
764 void *buffer;
765 int i;
766 u64 flags;
767 u64 logical;
768 int ret;
769 1136
770 if (sbio->err) { 1137static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
771 ret = 0; 1138 struct scrub_block *sblock_good,
772 for (i = 0; i < sbio->count; ++i) 1139 int force_write)
773 ret |= scrub_recheck_error(sbio, i); 1140{
774 if (!ret) { 1141 int page_num;
775 spin_lock(&sdev->stat_lock); 1142 int ret = 0;
776 ++sdev->stat.unverified_errors;
777 spin_unlock(&sdev->stat_lock);
778 }
779 1143
780 sbio->bio->bi_flags &= ~(BIO_POOL_MASK - 1); 1144 for (page_num = 0; page_num < sblock_bad->page_count; page_num++) {
781 sbio->bio->bi_flags |= 1 << BIO_UPTODATE; 1145 int ret_sub;
782 sbio->bio->bi_phys_segments = 0;
783 sbio->bio->bi_idx = 0;
784 1146
785 for (i = 0; i < sbio->count; i++) { 1147 ret_sub = scrub_repair_page_from_good_copy(sblock_bad,
786 struct bio_vec *bi; 1148 sblock_good,
787 bi = &sbio->bio->bi_io_vec[i]; 1149 page_num,
788 bi->bv_offset = 0; 1150 force_write);
789 bi->bv_len = PAGE_SIZE; 1151 if (ret_sub)
790 } 1152 ret = ret_sub;
791 goto out;
792 } 1153 }
793 for (i = 0; i < sbio->count; ++i) { 1154
794 page = sbio->bio->bi_io_vec[i].bv_page; 1155 return ret;
795 buffer = kmap_atomic(page); 1156}
796 flags = sbio->spag[i].flags; 1157
797 logical = sbio->logical + i * PAGE_SIZE; 1158static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
798 ret = 0; 1159 struct scrub_block *sblock_good,
799 if (flags & BTRFS_EXTENT_FLAG_DATA) { 1160 int page_num, int force_write)
800 ret = scrub_checksum_data(sdev, sbio->spag + i, buffer); 1161{
801 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { 1162 struct scrub_page *page_bad = sblock_bad->pagev + page_num;
802 ret = scrub_checksum_tree_block(sdev, sbio->spag + i, 1163 struct scrub_page *page_good = sblock_good->pagev + page_num;
803 logical, buffer); 1164
804 } else if (flags & BTRFS_EXTENT_FLAG_SUPER) { 1165 BUG_ON(sblock_bad->pagev[page_num].page == NULL);
805 BUG_ON(i); 1166 BUG_ON(sblock_good->pagev[page_num].page == NULL);
806 (void)scrub_checksum_super(sbio, buffer); 1167 if (force_write || sblock_bad->header_error ||
807 } else { 1168 sblock_bad->checksum_error || page_bad->io_error) {
808 WARN_ON(1); 1169 struct bio *bio;
809 } 1170 int ret;
810 kunmap_atomic(buffer); 1171 DECLARE_COMPLETION_ONSTACK(complete);
811 if (ret) { 1172
812 ret = scrub_recheck_error(sbio, i); 1173 bio = bio_alloc(GFP_NOFS, 1);
813 if (!ret) { 1174 bio->bi_bdev = page_bad->bdev;
814 spin_lock(&sdev->stat_lock); 1175 bio->bi_sector = page_bad->physical >> 9;
815 ++sdev->stat.unverified_errors; 1176 bio->bi_end_io = scrub_complete_bio_end_io;
816 spin_unlock(&sdev->stat_lock); 1177 bio->bi_private = &complete;
817 } 1178
1179 ret = bio_add_page(bio, page_good->page, PAGE_SIZE, 0);
1180 if (PAGE_SIZE != ret) {
1181 bio_put(bio);
1182 return -EIO;
818 } 1183 }
1184 btrfsic_submit_bio(WRITE, bio);
1185
1186 /* this will also unplug the queue */
1187 wait_for_completion(&complete);
1188 bio_put(bio);
819 } 1189 }
820 1190
821out: 1191 return 0;
822 scrub_free_bio(sbio->bio); 1192}
823 sbio->bio = NULL; 1193
824 spin_lock(&sdev->list_lock); 1194static void scrub_checksum(struct scrub_block *sblock)
825 sbio->next_free = sdev->first_free; 1195{
826 sdev->first_free = sbio->index; 1196 u64 flags;
827 spin_unlock(&sdev->list_lock); 1197 int ret;
828 atomic_dec(&sdev->in_flight); 1198
829 wake_up(&sdev->list_wait); 1199 BUG_ON(sblock->page_count < 1);
1200 flags = sblock->pagev[0].flags;
1201 ret = 0;
1202 if (flags & BTRFS_EXTENT_FLAG_DATA)
1203 ret = scrub_checksum_data(sblock);
1204 else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
1205 ret = scrub_checksum_tree_block(sblock);
1206 else if (flags & BTRFS_EXTENT_FLAG_SUPER)
1207 (void)scrub_checksum_super(sblock);
1208 else
1209 WARN_ON(1);
1210 if (ret)
1211 scrub_handle_errored_block(sblock);
830} 1212}
831 1213
832static int scrub_checksum_data(struct scrub_dev *sdev, 1214static int scrub_checksum_data(struct scrub_block *sblock)
833 struct scrub_page *spag, void *buffer)
834{ 1215{
1216 struct scrub_dev *sdev = sblock->sdev;
835 u8 csum[BTRFS_CSUM_SIZE]; 1217 u8 csum[BTRFS_CSUM_SIZE];
1218 u8 *on_disk_csum;
1219 struct page *page;
1220 void *buffer;
836 u32 crc = ~(u32)0; 1221 u32 crc = ~(u32)0;
837 int fail = 0; 1222 int fail = 0;
838 struct btrfs_root *root = sdev->dev->dev_root; 1223 struct btrfs_root *root = sdev->dev->dev_root;
1224 u64 len;
1225 int index;
839 1226
840 if (!spag->have_csum) 1227 BUG_ON(sblock->page_count < 1);
1228 if (!sblock->pagev[0].have_csum)
841 return 0; 1229 return 0;
842 1230
843 crc = btrfs_csum_data(root, buffer, crc, PAGE_SIZE); 1231 on_disk_csum = sblock->pagev[0].csum;
1232 page = sblock->pagev[0].page;
1233 buffer = kmap_atomic(page);
1234
1235 len = sdev->sectorsize;
1236 index = 0;
1237 for (;;) {
1238 u64 l = min_t(u64, len, PAGE_SIZE);
1239
1240 crc = btrfs_csum_data(root, buffer, crc, l);
1241 kunmap_atomic(buffer);
1242 len -= l;
1243 if (len == 0)
1244 break;
1245 index++;
1246 BUG_ON(index >= sblock->page_count);
1247 BUG_ON(!sblock->pagev[index].page);
1248 page = sblock->pagev[index].page;
1249 buffer = kmap_atomic(page);
1250 }
1251
844 btrfs_csum_final(crc, csum); 1252 btrfs_csum_final(crc, csum);
845 if (memcmp(csum, spag->csum, sdev->csum_size)) 1253 if (memcmp(csum, on_disk_csum, sdev->csum_size))
846 fail = 1; 1254 fail = 1;
847 1255
848 spin_lock(&sdev->stat_lock); 1256 if (fail) {
849 ++sdev->stat.data_extents_scrubbed; 1257 spin_lock(&sdev->stat_lock);
850 sdev->stat.data_bytes_scrubbed += PAGE_SIZE;
851 if (fail)
852 ++sdev->stat.csum_errors; 1258 ++sdev->stat.csum_errors;
853 spin_unlock(&sdev->stat_lock); 1259 spin_unlock(&sdev->stat_lock);
1260 }
854 1261
855 return fail; 1262 return fail;
856} 1263}
857 1264
858static int scrub_checksum_tree_block(struct scrub_dev *sdev, 1265static int scrub_checksum_tree_block(struct scrub_block *sblock)
859 struct scrub_page *spag, u64 logical,
860 void *buffer)
861{ 1266{
1267 struct scrub_dev *sdev = sblock->sdev;
862 struct btrfs_header *h; 1268 struct btrfs_header *h;
863 struct btrfs_root *root = sdev->dev->dev_root; 1269 struct btrfs_root *root = sdev->dev->dev_root;
864 struct btrfs_fs_info *fs_info = root->fs_info; 1270 struct btrfs_fs_info *fs_info = root->fs_info;
865 u8 csum[BTRFS_CSUM_SIZE]; 1271 u8 calculated_csum[BTRFS_CSUM_SIZE];
1272 u8 on_disk_csum[BTRFS_CSUM_SIZE];
1273 struct page *page;
1274 void *mapped_buffer;
1275 u64 mapped_size;
1276 void *p;
866 u32 crc = ~(u32)0; 1277 u32 crc = ~(u32)0;
867 int fail = 0; 1278 int fail = 0;
868 int crc_fail = 0; 1279 int crc_fail = 0;
1280 u64 len;
1281 int index;
1282
1283 BUG_ON(sblock->page_count < 1);
1284 page = sblock->pagev[0].page;
1285 mapped_buffer = kmap_atomic(page);
1286 h = (struct btrfs_header *)mapped_buffer;
1287 memcpy(on_disk_csum, h->csum, sdev->csum_size);
869 1288
870 /* 1289 /*
871 * we don't use the getter functions here, as we 1290 * we don't use the getter functions here, as we
872 * a) don't have an extent buffer and 1291 * a) don't have an extent buffer and
873 * b) the page is already kmapped 1292 * b) the page is already kmapped
874 */ 1293 */
875 h = (struct btrfs_header *)buffer;
876 1294
877 if (logical != le64_to_cpu(h->bytenr)) 1295 if (sblock->pagev[0].logical != le64_to_cpu(h->bytenr))
878 ++fail; 1296 ++fail;
879 1297
880 if (spag->generation != le64_to_cpu(h->generation)) 1298 if (sblock->pagev[0].generation != le64_to_cpu(h->generation))
881 ++fail; 1299 ++fail;
882 1300
883 if (memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE)) 1301 if (memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE))
@@ -887,51 +1305,99 @@ static int scrub_checksum_tree_block(struct scrub_dev *sdev,
887 BTRFS_UUID_SIZE)) 1305 BTRFS_UUID_SIZE))
888 ++fail; 1306 ++fail;
889 1307
890 crc = btrfs_csum_data(root, buffer + BTRFS_CSUM_SIZE, crc, 1308 BUG_ON(sdev->nodesize != sdev->leafsize);
891 PAGE_SIZE - BTRFS_CSUM_SIZE); 1309 len = sdev->nodesize - BTRFS_CSUM_SIZE;
892 btrfs_csum_final(crc, csum); 1310 mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE;
893 if (memcmp(csum, h->csum, sdev->csum_size)) 1311 p = ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE;
1312 index = 0;
1313 for (;;) {
1314 u64 l = min_t(u64, len, mapped_size);
1315
1316 crc = btrfs_csum_data(root, p, crc, l);
1317 kunmap_atomic(mapped_buffer);
1318 len -= l;
1319 if (len == 0)
1320 break;
1321 index++;
1322 BUG_ON(index >= sblock->page_count);
1323 BUG_ON(!sblock->pagev[index].page);
1324 page = sblock->pagev[index].page;
1325 mapped_buffer = kmap_atomic(page);
1326 mapped_size = PAGE_SIZE;
1327 p = mapped_buffer;
1328 }
1329
1330 btrfs_csum_final(crc, calculated_csum);
1331 if (memcmp(calculated_csum, on_disk_csum, sdev->csum_size))
894 ++crc_fail; 1332 ++crc_fail;
895 1333
896 spin_lock(&sdev->stat_lock); 1334 if (crc_fail || fail) {
897 ++sdev->stat.tree_extents_scrubbed; 1335 spin_lock(&sdev->stat_lock);
898 sdev->stat.tree_bytes_scrubbed += PAGE_SIZE; 1336 if (crc_fail)
899 if (crc_fail) 1337 ++sdev->stat.csum_errors;
900 ++sdev->stat.csum_errors; 1338 if (fail)
901 if (fail) 1339 ++sdev->stat.verify_errors;
902 ++sdev->stat.verify_errors; 1340 spin_unlock(&sdev->stat_lock);
903 spin_unlock(&sdev->stat_lock); 1341 }
904 1342
905 return fail || crc_fail; 1343 return fail || crc_fail;
906} 1344}
907 1345
908static int scrub_checksum_super(struct scrub_bio *sbio, void *buffer) 1346static int scrub_checksum_super(struct scrub_block *sblock)
909{ 1347{
910 struct btrfs_super_block *s; 1348 struct btrfs_super_block *s;
911 u64 logical; 1349 struct scrub_dev *sdev = sblock->sdev;
912 struct scrub_dev *sdev = sbio->sdev;
913 struct btrfs_root *root = sdev->dev->dev_root; 1350 struct btrfs_root *root = sdev->dev->dev_root;
914 struct btrfs_fs_info *fs_info = root->fs_info; 1351 struct btrfs_fs_info *fs_info = root->fs_info;
915 u8 csum[BTRFS_CSUM_SIZE]; 1352 u8 calculated_csum[BTRFS_CSUM_SIZE];
1353 u8 on_disk_csum[BTRFS_CSUM_SIZE];
1354 struct page *page;
1355 void *mapped_buffer;
1356 u64 mapped_size;
1357 void *p;
916 u32 crc = ~(u32)0; 1358 u32 crc = ~(u32)0;
917 int fail = 0; 1359 int fail = 0;
1360 u64 len;
1361 int index;
918 1362
919 s = (struct btrfs_super_block *)buffer; 1363 BUG_ON(sblock->page_count < 1);
920 logical = sbio->logical; 1364 page = sblock->pagev[0].page;
1365 mapped_buffer = kmap_atomic(page);
1366 s = (struct btrfs_super_block *)mapped_buffer;
1367 memcpy(on_disk_csum, s->csum, sdev->csum_size);
921 1368
922 if (logical != le64_to_cpu(s->bytenr)) 1369 if (sblock->pagev[0].logical != le64_to_cpu(s->bytenr))
923 ++fail; 1370 ++fail;
924 1371
925 if (sbio->spag[0].generation != le64_to_cpu(s->generation)) 1372 if (sblock->pagev[0].generation != le64_to_cpu(s->generation))
926 ++fail; 1373 ++fail;
927 1374
928 if (memcmp(s->fsid, fs_info->fsid, BTRFS_UUID_SIZE)) 1375 if (memcmp(s->fsid, fs_info->fsid, BTRFS_UUID_SIZE))
929 ++fail; 1376 ++fail;
930 1377
931 crc = btrfs_csum_data(root, buffer + BTRFS_CSUM_SIZE, crc, 1378 len = BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE;
932 PAGE_SIZE - BTRFS_CSUM_SIZE); 1379 mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE;
933 btrfs_csum_final(crc, csum); 1380 p = ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE;
934 if (memcmp(csum, s->csum, sbio->sdev->csum_size)) 1381 index = 0;
1382 for (;;) {
1383 u64 l = min_t(u64, len, mapped_size);
1384
1385 crc = btrfs_csum_data(root, p, crc, l);
1386 kunmap_atomic(mapped_buffer);
1387 len -= l;
1388 if (len == 0)
1389 break;
1390 index++;
1391 BUG_ON(index >= sblock->page_count);
1392 BUG_ON(!sblock->pagev[index].page);
1393 page = sblock->pagev[index].page;
1394 mapped_buffer = kmap_atomic(page);
1395 mapped_size = PAGE_SIZE;
1396 p = mapped_buffer;
1397 }
1398
1399 btrfs_csum_final(crc, calculated_csum);
1400 if (memcmp(calculated_csum, on_disk_csum, sdev->csum_size))
935 ++fail; 1401 ++fail;
936 1402
937 if (fail) { 1403 if (fail) {
@@ -948,29 +1414,42 @@ static int scrub_checksum_super(struct scrub_bio *sbio, void *buffer)
948 return fail; 1414 return fail;
949} 1415}
950 1416
951static int scrub_submit(struct scrub_dev *sdev) 1417static void scrub_block_get(struct scrub_block *sblock)
1418{
1419 atomic_inc(&sblock->ref_count);
1420}
1421
1422static void scrub_block_put(struct scrub_block *sblock)
1423{
1424 if (atomic_dec_and_test(&sblock->ref_count)) {
1425 int i;
1426
1427 for (i = 0; i < sblock->page_count; i++)
1428 if (sblock->pagev[i].page)
1429 __free_page(sblock->pagev[i].page);
1430 kfree(sblock);
1431 }
1432}
1433
1434static void scrub_submit(struct scrub_dev *sdev)
952{ 1435{
953 struct scrub_bio *sbio; 1436 struct scrub_bio *sbio;
954 1437
955 if (sdev->curr == -1) 1438 if (sdev->curr == -1)
956 return 0; 1439 return;
957 1440
958 sbio = sdev->bios[sdev->curr]; 1441 sbio = sdev->bios[sdev->curr];
959 sbio->err = 0;
960 sdev->curr = -1; 1442 sdev->curr = -1;
961 atomic_inc(&sdev->in_flight); 1443 atomic_inc(&sdev->in_flight);
962 1444
963 btrfsic_submit_bio(READ, sbio->bio); 1445 btrfsic_submit_bio(READ, sbio->bio);
964
965 return 0;
966} 1446}
967 1447
968static int scrub_page(struct scrub_dev *sdev, u64 logical, u64 len, 1448static int scrub_add_page_to_bio(struct scrub_dev *sdev,
969 u64 physical, u64 flags, u64 gen, int mirror_num, 1449 struct scrub_page *spage)
970 u8 *csum, int force)
971{ 1450{
1451 struct scrub_block *sblock = spage->sblock;
972 struct scrub_bio *sbio; 1452 struct scrub_bio *sbio;
973 struct page *page;
974 int ret; 1453 int ret;
975 1454
976again: 1455again:
@@ -983,7 +1462,7 @@ again:
983 if (sdev->curr != -1) { 1462 if (sdev->curr != -1) {
984 sdev->first_free = sdev->bios[sdev->curr]->next_free; 1463 sdev->first_free = sdev->bios[sdev->curr]->next_free;
985 sdev->bios[sdev->curr]->next_free = -1; 1464 sdev->bios[sdev->curr]->next_free = -1;
986 sdev->bios[sdev->curr]->count = 0; 1465 sdev->bios[sdev->curr]->page_count = 0;
987 spin_unlock(&sdev->list_lock); 1466 spin_unlock(&sdev->list_lock);
988 } else { 1467 } else {
989 spin_unlock(&sdev->list_lock); 1468 spin_unlock(&sdev->list_lock);
@@ -991,62 +1470,200 @@ again:
991 } 1470 }
992 } 1471 }
993 sbio = sdev->bios[sdev->curr]; 1472 sbio = sdev->bios[sdev->curr];
994 if (sbio->count == 0) { 1473 if (sbio->page_count == 0) {
995 struct bio *bio; 1474 struct bio *bio;
996 1475
997 sbio->physical = physical; 1476 sbio->physical = spage->physical;
998 sbio->logical = logical; 1477 sbio->logical = spage->logical;
999 bio = bio_alloc(GFP_NOFS, SCRUB_PAGES_PER_BIO); 1478 bio = sbio->bio;
1000 if (!bio) 1479 if (!bio) {
1001 return -ENOMEM; 1480 bio = bio_alloc(GFP_NOFS, sdev->pages_per_bio);
1481 if (!bio)
1482 return -ENOMEM;
1483 sbio->bio = bio;
1484 }
1002 1485
1003 bio->bi_private = sbio; 1486 bio->bi_private = sbio;
1004 bio->bi_end_io = scrub_bio_end_io; 1487 bio->bi_end_io = scrub_bio_end_io;
1005 bio->bi_bdev = sdev->dev->bdev; 1488 bio->bi_bdev = sdev->dev->bdev;
1006 bio->bi_sector = sbio->physical >> 9; 1489 bio->bi_sector = spage->physical >> 9;
1007 sbio->err = 0; 1490 sbio->err = 0;
1008 sbio->bio = bio; 1491 } else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
1009 } else if (sbio->physical + sbio->count * PAGE_SIZE != physical || 1492 spage->physical ||
1010 sbio->logical + sbio->count * PAGE_SIZE != logical) { 1493 sbio->logical + sbio->page_count * PAGE_SIZE !=
1011 ret = scrub_submit(sdev); 1494 spage->logical) {
1012 if (ret) 1495 scrub_submit(sdev);
1013 return ret;
1014 goto again; 1496 goto again;
1015 } 1497 }
1016 sbio->spag[sbio->count].flags = flags;
1017 sbio->spag[sbio->count].generation = gen;
1018 sbio->spag[sbio->count].have_csum = 0;
1019 sbio->spag[sbio->count].mirror_num = mirror_num;
1020
1021 page = alloc_page(GFP_NOFS);
1022 if (!page)
1023 return -ENOMEM;
1024 1498
1025 ret = bio_add_page(sbio->bio, page, PAGE_SIZE, 0); 1499 sbio->pagev[sbio->page_count] = spage;
1026 if (!ret) { 1500 ret = bio_add_page(sbio->bio, spage->page, PAGE_SIZE, 0);
1027 __free_page(page); 1501 if (ret != PAGE_SIZE) {
1028 ret = scrub_submit(sdev); 1502 if (sbio->page_count < 1) {
1029 if (ret) 1503 bio_put(sbio->bio);
1030 return ret; 1504 sbio->bio = NULL;
1505 return -EIO;
1506 }
1507 scrub_submit(sdev);
1031 goto again; 1508 goto again;
1032 } 1509 }
1033 1510
1034 if (csum) { 1511 scrub_block_get(sblock); /* one for the added page */
1035 sbio->spag[sbio->count].have_csum = 1; 1512 atomic_inc(&sblock->outstanding_pages);
1036 memcpy(sbio->spag[sbio->count].csum, csum, sdev->csum_size); 1513 sbio->page_count++;
1514 if (sbio->page_count == sdev->pages_per_bio)
1515 scrub_submit(sdev);
1516
1517 return 0;
1518}
1519
1520static int scrub_pages(struct scrub_dev *sdev, u64 logical, u64 len,
1521 u64 physical, u64 flags, u64 gen, int mirror_num,
1522 u8 *csum, int force)
1523{
1524 struct scrub_block *sblock;
1525 int index;
1526
1527 sblock = kzalloc(sizeof(*sblock), GFP_NOFS);
1528 if (!sblock) {
1529 spin_lock(&sdev->stat_lock);
1530 sdev->stat.malloc_errors++;
1531 spin_unlock(&sdev->stat_lock);
1532 return -ENOMEM;
1037 } 1533 }
1038 ++sbio->count; 1534
1039 if (sbio->count == SCRUB_PAGES_PER_BIO || force) { 1535 /* one ref inside this function, plus one for each page later on */
1536 atomic_set(&sblock->ref_count, 1);
1537 sblock->sdev = sdev;
1538 sblock->no_io_error_seen = 1;
1539
1540 for (index = 0; len > 0; index++) {
1541 struct scrub_page *spage = sblock->pagev + index;
1542 u64 l = min_t(u64, len, PAGE_SIZE);
1543
1544 BUG_ON(index >= SCRUB_MAX_PAGES_PER_BLOCK);
1545 spage->page = alloc_page(GFP_NOFS);
1546 if (!spage->page) {
1547 spin_lock(&sdev->stat_lock);
1548 sdev->stat.malloc_errors++;
1549 spin_unlock(&sdev->stat_lock);
1550 while (index > 0) {
1551 index--;
1552 __free_page(sblock->pagev[index].page);
1553 }
1554 kfree(sblock);
1555 return -ENOMEM;
1556 }
1557 spage->sblock = sblock;
1558 spage->bdev = sdev->dev->bdev;
1559 spage->flags = flags;
1560 spage->generation = gen;
1561 spage->logical = logical;
1562 spage->physical = physical;
1563 spage->mirror_num = mirror_num;
1564 if (csum) {
1565 spage->have_csum = 1;
1566 memcpy(spage->csum, csum, sdev->csum_size);
1567 } else {
1568 spage->have_csum = 0;
1569 }
1570 sblock->page_count++;
1571 len -= l;
1572 logical += l;
1573 physical += l;
1574 }
1575
1576 BUG_ON(sblock->page_count == 0);
1577 for (index = 0; index < sblock->page_count; index++) {
1578 struct scrub_page *spage = sblock->pagev + index;
1040 int ret; 1579 int ret;
1041 1580
1042 ret = scrub_submit(sdev); 1581 ret = scrub_add_page_to_bio(sdev, spage);
1043 if (ret) 1582 if (ret) {
1583 scrub_block_put(sblock);
1044 return ret; 1584 return ret;
1585 }
1045 } 1586 }
1046 1587
1588 if (force)
1589 scrub_submit(sdev);
1590
1591 /* last one frees, either here or in bio completion for last page */
1592 scrub_block_put(sblock);
1047 return 0; 1593 return 0;
1048} 1594}
1049 1595
1596static void scrub_bio_end_io(struct bio *bio, int err)
1597{
1598 struct scrub_bio *sbio = bio->bi_private;
1599 struct scrub_dev *sdev = sbio->sdev;
1600 struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info;
1601
1602 sbio->err = err;
1603 sbio->bio = bio;
1604
1605 btrfs_queue_worker(&fs_info->scrub_workers, &sbio->work);
1606}
1607
1608static void scrub_bio_end_io_worker(struct btrfs_work *work)
1609{
1610 struct scrub_bio *sbio = container_of(work, struct scrub_bio, work);
1611 struct scrub_dev *sdev = sbio->sdev;
1612 int i;
1613
1614 BUG_ON(sbio->page_count > SCRUB_PAGES_PER_BIO);
1615 if (sbio->err) {
1616 for (i = 0; i < sbio->page_count; i++) {
1617 struct scrub_page *spage = sbio->pagev[i];
1618
1619 spage->io_error = 1;
1620 spage->sblock->no_io_error_seen = 0;
1621 }
1622 }
1623
1624 /* now complete the scrub_block items that have all pages completed */
1625 for (i = 0; i < sbio->page_count; i++) {
1626 struct scrub_page *spage = sbio->pagev[i];
1627 struct scrub_block *sblock = spage->sblock;
1628
1629 if (atomic_dec_and_test(&sblock->outstanding_pages))
1630 scrub_block_complete(sblock);
1631 scrub_block_put(sblock);
1632 }
1633
1634 if (sbio->err) {
1635 /* what is this good for??? */
1636 sbio->bio->bi_flags &= ~(BIO_POOL_MASK - 1);
1637 sbio->bio->bi_flags |= 1 << BIO_UPTODATE;
1638 sbio->bio->bi_phys_segments = 0;
1639 sbio->bio->bi_idx = 0;
1640
1641 for (i = 0; i < sbio->page_count; i++) {
1642 struct bio_vec *bi;
1643 bi = &sbio->bio->bi_io_vec[i];
1644 bi->bv_offset = 0;
1645 bi->bv_len = PAGE_SIZE;
1646 }
1647 }
1648
1649 bio_put(sbio->bio);
1650 sbio->bio = NULL;
1651 spin_lock(&sdev->list_lock);
1652 sbio->next_free = sdev->first_free;
1653 sdev->first_free = sbio->index;
1654 spin_unlock(&sdev->list_lock);
1655 atomic_dec(&sdev->in_flight);
1656 wake_up(&sdev->list_wait);
1657}
1658
1659static void scrub_block_complete(struct scrub_block *sblock)
1660{
1661 if (!sblock->no_io_error_seen)
1662 scrub_handle_errored_block(sblock);
1663 else
1664 scrub_checksum(sblock);
1665}
1666
1050static int scrub_find_csum(struct scrub_dev *sdev, u64 logical, u64 len, 1667static int scrub_find_csum(struct scrub_dev *sdev, u64 logical, u64 len,
1051 u8 *csum) 1668 u8 *csum)
1052{ 1669{
@@ -1054,7 +1671,6 @@ static int scrub_find_csum(struct scrub_dev *sdev, u64 logical, u64 len,
1054 int ret = 0; 1671 int ret = 0;
1055 unsigned long i; 1672 unsigned long i;
1056 unsigned long num_sectors; 1673 unsigned long num_sectors;
1057 u32 sectorsize = sdev->dev->dev_root->sectorsize;
1058 1674
1059 while (!list_empty(&sdev->csum_list)) { 1675 while (!list_empty(&sdev->csum_list)) {
1060 sum = list_first_entry(&sdev->csum_list, 1676 sum = list_first_entry(&sdev->csum_list,
@@ -1072,7 +1688,7 @@ static int scrub_find_csum(struct scrub_dev *sdev, u64 logical, u64 len,
1072 if (!sum) 1688 if (!sum)
1073 return 0; 1689 return 0;
1074 1690
1075 num_sectors = sum->len / sectorsize; 1691 num_sectors = sum->len / sdev->sectorsize;
1076 for (i = 0; i < num_sectors; ++i) { 1692 for (i = 0; i < num_sectors; ++i) {
1077 if (sum->sums[i].bytenr == logical) { 1693 if (sum->sums[i].bytenr == logical) {
1078 memcpy(csum, &sum->sums[i].sum, sdev->csum_size); 1694 memcpy(csum, &sum->sums[i].sum, sdev->csum_size);
@@ -1093,9 +1709,28 @@ static int scrub_extent(struct scrub_dev *sdev, u64 logical, u64 len,
1093{ 1709{
1094 int ret; 1710 int ret;
1095 u8 csum[BTRFS_CSUM_SIZE]; 1711 u8 csum[BTRFS_CSUM_SIZE];
1712 u32 blocksize;
1713
1714 if (flags & BTRFS_EXTENT_FLAG_DATA) {
1715 blocksize = sdev->sectorsize;
1716 spin_lock(&sdev->stat_lock);
1717 sdev->stat.data_extents_scrubbed++;
1718 sdev->stat.data_bytes_scrubbed += len;
1719 spin_unlock(&sdev->stat_lock);
1720 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
1721 BUG_ON(sdev->nodesize != sdev->leafsize);
1722 blocksize = sdev->nodesize;
1723 spin_lock(&sdev->stat_lock);
1724 sdev->stat.tree_extents_scrubbed++;
1725 sdev->stat.tree_bytes_scrubbed += len;
1726 spin_unlock(&sdev->stat_lock);
1727 } else {
1728 blocksize = sdev->sectorsize;
1729 BUG_ON(1);
1730 }
1096 1731
1097 while (len) { 1732 while (len) {
1098 u64 l = min_t(u64, len, PAGE_SIZE); 1733 u64 l = min_t(u64, len, blocksize);
1099 int have_csum = 0; 1734 int have_csum = 0;
1100 1735
1101 if (flags & BTRFS_EXTENT_FLAG_DATA) { 1736 if (flags & BTRFS_EXTENT_FLAG_DATA) {
@@ -1104,8 +1739,8 @@ static int scrub_extent(struct scrub_dev *sdev, u64 logical, u64 len,
1104 if (have_csum == 0) 1739 if (have_csum == 0)
1105 ++sdev->stat.no_csum; 1740 ++sdev->stat.no_csum;
1106 } 1741 }
1107 ret = scrub_page(sdev, logical, l, physical, flags, gen, 1742 ret = scrub_pages(sdev, logical, l, physical, flags, gen,
1108 mirror_num, have_csum ? csum : NULL, 0); 1743 mirror_num, have_csum ? csum : NULL, 0);
1109 if (ret) 1744 if (ret)
1110 return ret; 1745 return ret;
1111 len -= l; 1746 len -= l;
@@ -1170,6 +1805,11 @@ static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev,
1170 if (!path) 1805 if (!path)
1171 return -ENOMEM; 1806 return -ENOMEM;
1172 1807
1808 /*
1809 * work on commit root. The related disk blocks are static as
1810 * long as COW is applied. This means, it is save to rewrite
1811 * them to repair disk errors without any race conditions
1812 */
1173 path->search_commit_root = 1; 1813 path->search_commit_root = 1;
1174 path->skip_locking = 1; 1814 path->skip_locking = 1;
1175 1815
@@ -1516,15 +2156,18 @@ static noinline_for_stack int scrub_supers(struct scrub_dev *sdev)
1516 struct btrfs_device *device = sdev->dev; 2156 struct btrfs_device *device = sdev->dev;
1517 struct btrfs_root *root = device->dev_root; 2157 struct btrfs_root *root = device->dev_root;
1518 2158
2159 if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
2160 return -EIO;
2161
1519 gen = root->fs_info->last_trans_committed; 2162 gen = root->fs_info->last_trans_committed;
1520 2163
1521 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { 2164 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
1522 bytenr = btrfs_sb_offset(i); 2165 bytenr = btrfs_sb_offset(i);
1523 if (bytenr + BTRFS_SUPER_INFO_SIZE >= device->total_bytes) 2166 if (bytenr + BTRFS_SUPER_INFO_SIZE > device->total_bytes)
1524 break; 2167 break;
1525 2168
1526 ret = scrub_page(sdev, bytenr, PAGE_SIZE, bytenr, 2169 ret = scrub_pages(sdev, bytenr, BTRFS_SUPER_INFO_SIZE, bytenr,
1527 BTRFS_EXTENT_FLAG_SUPER, gen, i, NULL, 1); 2170 BTRFS_EXTENT_FLAG_SUPER, gen, i, NULL, 1);
1528 if (ret) 2171 if (ret)
1529 return ret; 2172 return ret;
1530 } 2173 }
@@ -1583,10 +2226,30 @@ int btrfs_scrub_dev(struct btrfs_root *root, u64 devid, u64 start, u64 end,
1583 /* 2226 /*
1584 * check some assumptions 2227 * check some assumptions
1585 */ 2228 */
1586 if (root->sectorsize != PAGE_SIZE || 2229 if (root->nodesize != root->leafsize) {
1587 root->sectorsize != root->leafsize || 2230 printk(KERN_ERR
1588 root->sectorsize != root->nodesize) { 2231 "btrfs_scrub: size assumption nodesize == leafsize (%d == %d) fails\n",
1589 printk(KERN_ERR "btrfs_scrub: size assumptions fail\n"); 2232 root->nodesize, root->leafsize);
2233 return -EINVAL;
2234 }
2235
2236 if (root->nodesize > BTRFS_STRIPE_LEN) {
2237 /*
2238 * in this case scrub is unable to calculate the checksum
2239 * the way scrub is implemented. Do not handle this
2240 * situation at all because it won't ever happen.
2241 */
2242 printk(KERN_ERR
2243 "btrfs_scrub: size assumption nodesize <= BTRFS_STRIPE_LEN (%d <= %d) fails\n",
2244 root->nodesize, BTRFS_STRIPE_LEN);
2245 return -EINVAL;
2246 }
2247
2248 if (root->sectorsize != PAGE_SIZE) {
2249 /* not supported for data w/o checksums */
2250 printk(KERN_ERR
2251 "btrfs_scrub: size assumption sectorsize != PAGE_SIZE (%d != %lld) fails\n",
2252 root->sectorsize, (unsigned long long)PAGE_SIZE);
1590 return -EINVAL; 2253 return -EINVAL;
1591 } 2254 }
1592 2255
@@ -1656,7 +2319,7 @@ int btrfs_scrub_dev(struct btrfs_root *root, u64 devid, u64 start, u64 end,
1656 return ret; 2319 return ret;
1657} 2320}
1658 2321
1659int btrfs_scrub_pause(struct btrfs_root *root) 2322void btrfs_scrub_pause(struct btrfs_root *root)
1660{ 2323{
1661 struct btrfs_fs_info *fs_info = root->fs_info; 2324 struct btrfs_fs_info *fs_info = root->fs_info;
1662 2325
@@ -1671,34 +2334,28 @@ int btrfs_scrub_pause(struct btrfs_root *root)
1671 mutex_lock(&fs_info->scrub_lock); 2334 mutex_lock(&fs_info->scrub_lock);
1672 } 2335 }
1673 mutex_unlock(&fs_info->scrub_lock); 2336 mutex_unlock(&fs_info->scrub_lock);
1674
1675 return 0;
1676} 2337}
1677 2338
1678int btrfs_scrub_continue(struct btrfs_root *root) 2339void btrfs_scrub_continue(struct btrfs_root *root)
1679{ 2340{
1680 struct btrfs_fs_info *fs_info = root->fs_info; 2341 struct btrfs_fs_info *fs_info = root->fs_info;
1681 2342
1682 atomic_dec(&fs_info->scrub_pause_req); 2343 atomic_dec(&fs_info->scrub_pause_req);
1683 wake_up(&fs_info->scrub_pause_wait); 2344 wake_up(&fs_info->scrub_pause_wait);
1684 return 0;
1685} 2345}
1686 2346
1687int btrfs_scrub_pause_super(struct btrfs_root *root) 2347void btrfs_scrub_pause_super(struct btrfs_root *root)
1688{ 2348{
1689 down_write(&root->fs_info->scrub_super_lock); 2349 down_write(&root->fs_info->scrub_super_lock);
1690 return 0;
1691} 2350}
1692 2351
1693int btrfs_scrub_continue_super(struct btrfs_root *root) 2352void btrfs_scrub_continue_super(struct btrfs_root *root)
1694{ 2353{
1695 up_write(&root->fs_info->scrub_super_lock); 2354 up_write(&root->fs_info->scrub_super_lock);
1696 return 0;
1697} 2355}
1698 2356
1699int btrfs_scrub_cancel(struct btrfs_root *root) 2357int __btrfs_scrub_cancel(struct btrfs_fs_info *fs_info)
1700{ 2358{
1701 struct btrfs_fs_info *fs_info = root->fs_info;
1702 2359
1703 mutex_lock(&fs_info->scrub_lock); 2360 mutex_lock(&fs_info->scrub_lock);
1704 if (!atomic_read(&fs_info->scrubs_running)) { 2361 if (!atomic_read(&fs_info->scrubs_running)) {
@@ -1719,6 +2376,11 @@ int btrfs_scrub_cancel(struct btrfs_root *root)
1719 return 0; 2376 return 0;
1720} 2377}
1721 2378
2379int btrfs_scrub_cancel(struct btrfs_root *root)
2380{
2381 return __btrfs_scrub_cancel(root->fs_info);
2382}
2383
1722int btrfs_scrub_cancel_dev(struct btrfs_root *root, struct btrfs_device *dev) 2384int btrfs_scrub_cancel_dev(struct btrfs_root *root, struct btrfs_device *dev)
1723{ 2385{
1724 struct btrfs_fs_info *fs_info = root->fs_info; 2386 struct btrfs_fs_info *fs_info = root->fs_info;
@@ -1741,6 +2403,7 @@ int btrfs_scrub_cancel_dev(struct btrfs_root *root, struct btrfs_device *dev)
1741 2403
1742 return 0; 2404 return 0;
1743} 2405}
2406
1744int btrfs_scrub_cancel_devid(struct btrfs_root *root, u64 devid) 2407int btrfs_scrub_cancel_devid(struct btrfs_root *root, u64 devid)
1745{ 2408{
1746 struct btrfs_fs_info *fs_info = root->fs_info; 2409 struct btrfs_fs_info *fs_info = root->fs_info;
diff --git a/fs/btrfs/struct-funcs.c b/fs/btrfs/struct-funcs.c
index bc1f6ad18442..c6ffa5812419 100644
--- a/fs/btrfs/struct-funcs.c
+++ b/fs/btrfs/struct-funcs.c
@@ -44,8 +44,9 @@
44#define BTRFS_SETGET_FUNCS(name, type, member, bits) \ 44#define BTRFS_SETGET_FUNCS(name, type, member, bits) \
45u##bits btrfs_##name(struct extent_buffer *eb, type *s); \ 45u##bits btrfs_##name(struct extent_buffer *eb, type *s); \
46void btrfs_set_##name(struct extent_buffer *eb, type *s, u##bits val); \ 46void btrfs_set_##name(struct extent_buffer *eb, type *s, u##bits val); \
47u##bits btrfs_##name(struct extent_buffer *eb, \ 47void btrfs_set_token_##name(struct extent_buffer *eb, type *s, u##bits val, struct btrfs_map_token *token); \
48 type *s) \ 48u##bits btrfs_token_##name(struct extent_buffer *eb, \
49 type *s, struct btrfs_map_token *token) \
49{ \ 50{ \
50 unsigned long part_offset = (unsigned long)s; \ 51 unsigned long part_offset = (unsigned long)s; \
51 unsigned long offset = part_offset + offsetof(type, member); \ 52 unsigned long offset = part_offset + offsetof(type, member); \
@@ -54,9 +55,18 @@ u##bits btrfs_##name(struct extent_buffer *eb, \
54 char *kaddr; \ 55 char *kaddr; \
55 unsigned long map_start; \ 56 unsigned long map_start; \
56 unsigned long map_len; \ 57 unsigned long map_len; \
58 unsigned long mem_len = sizeof(((type *)0)->member); \
57 u##bits res; \ 59 u##bits res; \
60 if (token && token->kaddr && token->offset <= offset && \
61 token->eb == eb && \
62 (token->offset + PAGE_CACHE_SIZE >= offset + mem_len)) { \
63 kaddr = token->kaddr; \
64 p = (type *)(kaddr + part_offset - token->offset); \
65 res = le##bits##_to_cpu(p->member); \
66 return res; \
67 } \
58 err = map_private_extent_buffer(eb, offset, \ 68 err = map_private_extent_buffer(eb, offset, \
59 sizeof(((type *)0)->member), \ 69 mem_len, \
60 &kaddr, &map_start, &map_len); \ 70 &kaddr, &map_start, &map_len); \
61 if (err) { \ 71 if (err) { \
62 __le##bits leres; \ 72 __le##bits leres; \
@@ -65,10 +75,15 @@ u##bits btrfs_##name(struct extent_buffer *eb, \
65 } \ 75 } \
66 p = (type *)(kaddr + part_offset - map_start); \ 76 p = (type *)(kaddr + part_offset - map_start); \
67 res = le##bits##_to_cpu(p->member); \ 77 res = le##bits##_to_cpu(p->member); \
78 if (token) { \
79 token->kaddr = kaddr; \
80 token->offset = map_start; \
81 token->eb = eb; \
82 } \
68 return res; \ 83 return res; \
69} \ 84} \
70void btrfs_set_##name(struct extent_buffer *eb, \ 85void btrfs_set_token_##name(struct extent_buffer *eb, \
71 type *s, u##bits val) \ 86 type *s, u##bits val, struct btrfs_map_token *token) \
72{ \ 87{ \
73 unsigned long part_offset = (unsigned long)s; \ 88 unsigned long part_offset = (unsigned long)s; \
74 unsigned long offset = part_offset + offsetof(type, member); \ 89 unsigned long offset = part_offset + offsetof(type, member); \
@@ -77,8 +92,17 @@ void btrfs_set_##name(struct extent_buffer *eb, \
77 char *kaddr; \ 92 char *kaddr; \
78 unsigned long map_start; \ 93 unsigned long map_start; \
79 unsigned long map_len; \ 94 unsigned long map_len; \
95 unsigned long mem_len = sizeof(((type *)0)->member); \
96 if (token && token->kaddr && token->offset <= offset && \
97 token->eb == eb && \
98 (token->offset + PAGE_CACHE_SIZE >= offset + mem_len)) { \
99 kaddr = token->kaddr; \
100 p = (type *)(kaddr + part_offset - token->offset); \
101 p->member = cpu_to_le##bits(val); \
102 return; \
103 } \
80 err = map_private_extent_buffer(eb, offset, \ 104 err = map_private_extent_buffer(eb, offset, \
81 sizeof(((type *)0)->member), \ 105 mem_len, \
82 &kaddr, &map_start, &map_len); \ 106 &kaddr, &map_start, &map_len); \
83 if (err) { \ 107 if (err) { \
84 __le##bits val2; \ 108 __le##bits val2; \
@@ -88,7 +112,22 @@ void btrfs_set_##name(struct extent_buffer *eb, \
88 } \ 112 } \
89 p = (type *)(kaddr + part_offset - map_start); \ 113 p = (type *)(kaddr + part_offset - map_start); \
90 p->member = cpu_to_le##bits(val); \ 114 p->member = cpu_to_le##bits(val); \
91} 115 if (token) { \
116 token->kaddr = kaddr; \
117 token->offset = map_start; \
118 token->eb = eb; \
119 } \
120} \
121void btrfs_set_##name(struct extent_buffer *eb, \
122 type *s, u##bits val) \
123{ \
124 btrfs_set_token_##name(eb, s, val, NULL); \
125} \
126u##bits btrfs_##name(struct extent_buffer *eb, \
127 type *s) \
128{ \
129 return btrfs_token_##name(eb, s, NULL); \
130} \
92 131
93#include "ctree.h" 132#include "ctree.h"
94 133
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 81df3fec6a6d..8d5d380f7bdb 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -76,6 +76,9 @@ static const char *btrfs_decode_error(struct btrfs_fs_info *fs_info, int errno,
76 case -EROFS: 76 case -EROFS:
77 errstr = "Readonly filesystem"; 77 errstr = "Readonly filesystem";
78 break; 78 break;
79 case -EEXIST:
80 errstr = "Object already exists";
81 break;
79 default: 82 default:
80 if (nbuf) { 83 if (nbuf) {
81 if (snprintf(nbuf, 16, "error %d", -errno) >= 0) 84 if (snprintf(nbuf, 16, "error %d", -errno) >= 0)
@@ -116,6 +119,8 @@ static void btrfs_handle_error(struct btrfs_fs_info *fs_info)
116 if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { 119 if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
117 sb->s_flags |= MS_RDONLY; 120 sb->s_flags |= MS_RDONLY;
118 printk(KERN_INFO "btrfs is forced readonly\n"); 121 printk(KERN_INFO "btrfs is forced readonly\n");
122 __btrfs_scrub_cancel(fs_info);
123// WARN_ON(1);
119 } 124 }
120} 125}
121 126
@@ -124,25 +129,132 @@ static void btrfs_handle_error(struct btrfs_fs_info *fs_info)
124 * invokes the approciate error response. 129 * invokes the approciate error response.
125 */ 130 */
126void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function, 131void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
127 unsigned int line, int errno) 132 unsigned int line, int errno, const char *fmt, ...)
128{ 133{
129 struct super_block *sb = fs_info->sb; 134 struct super_block *sb = fs_info->sb;
130 char nbuf[16]; 135 char nbuf[16];
131 const char *errstr; 136 const char *errstr;
137 va_list args;
138 va_start(args, fmt);
132 139
133 /* 140 /*
134 * Special case: if the error is EROFS, and we're already 141 * Special case: if the error is EROFS, and we're already
135 * under MS_RDONLY, then it is safe here. 142 * under MS_RDONLY, then it is safe here.
136 */ 143 */
137 if (errno == -EROFS && (sb->s_flags & MS_RDONLY)) 144 if (errno == -EROFS && (sb->s_flags & MS_RDONLY))
145 return;
146
147 errstr = btrfs_decode_error(fs_info, errno, nbuf);
148 if (fmt) {
149 struct va_format vaf = {
150 .fmt = fmt,
151 .va = &args,
152 };
153
154 printk(KERN_CRIT "BTRFS error (device %s) in %s:%d: %s (%pV)\n",
155 sb->s_id, function, line, errstr, &vaf);
156 } else {
157 printk(KERN_CRIT "BTRFS error (device %s) in %s:%d: %s\n",
158 sb->s_id, function, line, errstr);
159 }
160
161 /* Don't go through full error handling during mount */
162 if (sb->s_flags & MS_BORN) {
163 save_error_info(fs_info);
164 btrfs_handle_error(fs_info);
165 }
166 va_end(args);
167}
168
169const char *logtypes[] = {
170 "emergency",
171 "alert",
172 "critical",
173 "error",
174 "warning",
175 "notice",
176 "info",
177 "debug",
178};
179
180void btrfs_printk(struct btrfs_fs_info *fs_info, const char *fmt, ...)
181{
182 struct super_block *sb = fs_info->sb;
183 char lvl[4];
184 struct va_format vaf;
185 va_list args;
186 const char *type = logtypes[4];
187
188 va_start(args, fmt);
189
190 if (fmt[0] == '<' && isdigit(fmt[1]) && fmt[2] == '>') {
191 strncpy(lvl, fmt, 3);
192 fmt += 3;
193 type = logtypes[fmt[1] - '0'];
194 } else
195 *lvl = '\0';
196
197 vaf.fmt = fmt;
198 vaf.va = &args;
199 printk("%sBTRFS %s (device %s): %pV", lvl, type, sb->s_id, &vaf);
200}
201
202/*
203 * We only mark the transaction aborted and then set the file system read-only.
204 * This will prevent new transactions from starting or trying to join this
205 * one.
206 *
207 * This means that error recovery at the call site is limited to freeing
208 * any local memory allocations and passing the error code up without
209 * further cleanup. The transaction should complete as it normally would
210 * in the call path but will return -EIO.
211 *
212 * We'll complete the cleanup in btrfs_end_transaction and
213 * btrfs_commit_transaction.
214 */
215void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
216 struct btrfs_root *root, const char *function,
217 unsigned int line, int errno)
218{
219 WARN_ONCE(1, KERN_DEBUG "btrfs: Transaction aborted");
220 trans->aborted = errno;
221 /* Nothing used. The other threads that have joined this
222 * transaction may be able to continue. */
223 if (!trans->blocks_used) {
224 btrfs_printk(root->fs_info, "Aborting unused transaction.\n");
138 return; 225 return;
226 }
227 trans->transaction->aborted = errno;
228 __btrfs_std_error(root->fs_info, function, line, errno, NULL);
229}
230/*
231 * __btrfs_panic decodes unexpected, fatal errors from the caller,
232 * issues an alert, and either panics or BUGs, depending on mount options.
233 */
234void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function,
235 unsigned int line, int errno, const char *fmt, ...)
236{
237 char nbuf[16];
238 char *s_id = "<unknown>";
239 const char *errstr;
240 struct va_format vaf = { .fmt = fmt };
241 va_list args;
139 242
140 errstr = btrfs_decode_error(fs_info, errno, nbuf); 243 if (fs_info)
141 printk(KERN_CRIT "BTRFS error (device %s) in %s:%d: %s\n", 244 s_id = fs_info->sb->s_id;
142 sb->s_id, function, line, errstr);
143 save_error_info(fs_info);
144 245
145 btrfs_handle_error(fs_info); 246 va_start(args, fmt);
247 vaf.va = &args;
248
249 errstr = btrfs_decode_error(fs_info, errno, nbuf);
250 if (fs_info->mount_opt & BTRFS_MOUNT_PANIC_ON_FATAL_ERROR)
251 panic(KERN_CRIT "BTRFS panic (device %s) in %s:%d: %pV (%s)\n",
252 s_id, function, line, &vaf, errstr);
253
254 printk(KERN_CRIT "BTRFS panic (device %s) in %s:%d: %pV (%s)\n",
255 s_id, function, line, &vaf, errstr);
256 va_end(args);
257 /* Caller calls BUG() */
146} 258}
147 259
148static void btrfs_put_super(struct super_block *sb) 260static void btrfs_put_super(struct super_block *sb)
@@ -166,7 +278,7 @@ enum {
166 Opt_enospc_debug, Opt_subvolrootid, Opt_defrag, Opt_inode_cache, 278 Opt_enospc_debug, Opt_subvolrootid, Opt_defrag, Opt_inode_cache,
167 Opt_no_space_cache, Opt_recovery, Opt_skip_balance, 279 Opt_no_space_cache, Opt_recovery, Opt_skip_balance,
168 Opt_check_integrity, Opt_check_integrity_including_extent_data, 280 Opt_check_integrity, Opt_check_integrity_including_extent_data,
169 Opt_check_integrity_print_mask, 281 Opt_check_integrity_print_mask, Opt_fatal_errors,
170 Opt_err, 282 Opt_err,
171}; 283};
172 284
@@ -206,12 +318,14 @@ static match_table_t tokens = {
206 {Opt_check_integrity, "check_int"}, 318 {Opt_check_integrity, "check_int"},
207 {Opt_check_integrity_including_extent_data, "check_int_data"}, 319 {Opt_check_integrity_including_extent_data, "check_int_data"},
208 {Opt_check_integrity_print_mask, "check_int_print_mask=%d"}, 320 {Opt_check_integrity_print_mask, "check_int_print_mask=%d"},
321 {Opt_fatal_errors, "fatal_errors=%s"},
209 {Opt_err, NULL}, 322 {Opt_err, NULL},
210}; 323};
211 324
212/* 325/*
213 * Regular mount options parser. Everything that is needed only when 326 * Regular mount options parser. Everything that is needed only when
214 * reading in a new superblock is parsed here. 327 * reading in a new superblock is parsed here.
328 * XXX JDM: This needs to be cleaned up for remount.
215 */ 329 */
216int btrfs_parse_options(struct btrfs_root *root, char *options) 330int btrfs_parse_options(struct btrfs_root *root, char *options)
217{ 331{
@@ -438,6 +552,18 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
438 ret = -EINVAL; 552 ret = -EINVAL;
439 goto out; 553 goto out;
440#endif 554#endif
555 case Opt_fatal_errors:
556 if (strcmp(args[0].from, "panic") == 0)
557 btrfs_set_opt(info->mount_opt,
558 PANIC_ON_FATAL_ERROR);
559 else if (strcmp(args[0].from, "bug") == 0)
560 btrfs_clear_opt(info->mount_opt,
561 PANIC_ON_FATAL_ERROR);
562 else {
563 ret = -EINVAL;
564 goto out;
565 }
566 break;
441 case Opt_err: 567 case Opt_err:
442 printk(KERN_INFO "btrfs: unrecognized mount option " 568 printk(KERN_INFO "btrfs: unrecognized mount option "
443 "'%s'\n", p); 569 "'%s'\n", p);
@@ -762,6 +888,8 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
762 seq_puts(seq, ",inode_cache"); 888 seq_puts(seq, ",inode_cache");
763 if (btrfs_test_opt(root, SKIP_BALANCE)) 889 if (btrfs_test_opt(root, SKIP_BALANCE))
764 seq_puts(seq, ",skip_balance"); 890 seq_puts(seq, ",skip_balance");
891 if (btrfs_test_opt(root, PANIC_ON_FATAL_ERROR))
892 seq_puts(seq, ",fatal_errors=panic");
765 return 0; 893 return 0;
766} 894}
767 895
@@ -995,11 +1123,20 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
995{ 1123{
996 struct btrfs_fs_info *fs_info = btrfs_sb(sb); 1124 struct btrfs_fs_info *fs_info = btrfs_sb(sb);
997 struct btrfs_root *root = fs_info->tree_root; 1125 struct btrfs_root *root = fs_info->tree_root;
1126 unsigned old_flags = sb->s_flags;
1127 unsigned long old_opts = fs_info->mount_opt;
1128 unsigned long old_compress_type = fs_info->compress_type;
1129 u64 old_max_inline = fs_info->max_inline;
1130 u64 old_alloc_start = fs_info->alloc_start;
1131 int old_thread_pool_size = fs_info->thread_pool_size;
1132 unsigned int old_metadata_ratio = fs_info->metadata_ratio;
998 int ret; 1133 int ret;
999 1134
1000 ret = btrfs_parse_options(root, data); 1135 ret = btrfs_parse_options(root, data);
1001 if (ret) 1136 if (ret) {
1002 return -EINVAL; 1137 ret = -EINVAL;
1138 goto restore;
1139 }
1003 1140
1004 if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) 1141 if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
1005 return 0; 1142 return 0;
@@ -1007,26 +1144,44 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
1007 if (*flags & MS_RDONLY) { 1144 if (*flags & MS_RDONLY) {
1008 sb->s_flags |= MS_RDONLY; 1145 sb->s_flags |= MS_RDONLY;
1009 1146
1010 ret = btrfs_commit_super(root); 1147 ret = btrfs_commit_super(root);
1011 WARN_ON(ret); 1148 if (ret)
1149 goto restore;
1012 } else { 1150 } else {
1013 if (fs_info->fs_devices->rw_devices == 0) 1151 if (fs_info->fs_devices->rw_devices == 0)
1014 return -EACCES; 1152 ret = -EACCES;
1153 goto restore;
1015 1154
1016 if (btrfs_super_log_root(fs_info->super_copy) != 0) 1155 if (btrfs_super_log_root(fs_info->super_copy) != 0)
1017 return -EINVAL; 1156 ret = -EINVAL;
1157 goto restore;
1018 1158
1019 ret = btrfs_cleanup_fs_roots(fs_info); 1159 ret = btrfs_cleanup_fs_roots(fs_info);
1020 WARN_ON(ret); 1160 if (ret)
1161 goto restore;
1021 1162
1022 /* recover relocation */ 1163 /* recover relocation */
1023 ret = btrfs_recover_relocation(root); 1164 ret = btrfs_recover_relocation(root);
1024 WARN_ON(ret); 1165 if (ret)
1166 goto restore;
1025 1167
1026 sb->s_flags &= ~MS_RDONLY; 1168 sb->s_flags &= ~MS_RDONLY;
1027 } 1169 }
1028 1170
1029 return 0; 1171 return 0;
1172
1173restore:
1174 /* We've hit an error - don't reset MS_RDONLY */
1175 if (sb->s_flags & MS_RDONLY)
1176 old_flags |= MS_RDONLY;
1177 sb->s_flags = old_flags;
1178 fs_info->mount_opt = old_opts;
1179 fs_info->compress_type = old_compress_type;
1180 fs_info->max_inline = old_max_inline;
1181 fs_info->alloc_start = old_alloc_start;
1182 fs_info->thread_pool_size = old_thread_pool_size;
1183 fs_info->metadata_ratio = old_metadata_ratio;
1184 return ret;
1030} 1185}
1031 1186
1032/* Used to sort the devices by max_avail(descending sort) */ 1187/* Used to sort the devices by max_avail(descending sort) */
@@ -1356,9 +1511,7 @@ static int __init init_btrfs_fs(void)
1356 if (err) 1511 if (err)
1357 return err; 1512 return err;
1358 1513
1359 err = btrfs_init_compress(); 1514 btrfs_init_compress();
1360 if (err)
1361 goto free_sysfs;
1362 1515
1363 err = btrfs_init_cachep(); 1516 err = btrfs_init_cachep();
1364 if (err) 1517 if (err)
@@ -1384,6 +1537,8 @@ static int __init init_btrfs_fs(void)
1384 if (err) 1537 if (err)
1385 goto unregister_ioctl; 1538 goto unregister_ioctl;
1386 1539
1540 btrfs_init_lockdep();
1541
1387 printk(KERN_INFO "%s loaded\n", BTRFS_BUILD_VERSION); 1542 printk(KERN_INFO "%s loaded\n", BTRFS_BUILD_VERSION);
1388 return 0; 1543 return 0;
1389 1544
@@ -1399,7 +1554,6 @@ free_cachep:
1399 btrfs_destroy_cachep(); 1554 btrfs_destroy_cachep();
1400free_compress: 1555free_compress:
1401 btrfs_exit_compress(); 1556 btrfs_exit_compress();
1402free_sysfs:
1403 btrfs_exit_sysfs(); 1557 btrfs_exit_sysfs();
1404 return err; 1558 return err;
1405} 1559}
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 04b77e3ceb7a..8da29e8e4de1 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -31,7 +31,7 @@
31 31
32#define BTRFS_ROOT_TRANS_TAG 0 32#define BTRFS_ROOT_TRANS_TAG 0
33 33
34static noinline void put_transaction(struct btrfs_transaction *transaction) 34void put_transaction(struct btrfs_transaction *transaction)
35{ 35{
36 WARN_ON(atomic_read(&transaction->use_count) == 0); 36 WARN_ON(atomic_read(&transaction->use_count) == 0);
37 if (atomic_dec_and_test(&transaction->use_count)) { 37 if (atomic_dec_and_test(&transaction->use_count)) {
@@ -58,6 +58,12 @@ static noinline int join_transaction(struct btrfs_root *root, int nofail)
58 58
59 spin_lock(&root->fs_info->trans_lock); 59 spin_lock(&root->fs_info->trans_lock);
60loop: 60loop:
61 /* The file system has been taken offline. No new transactions. */
62 if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
63 spin_unlock(&root->fs_info->trans_lock);
64 return -EROFS;
65 }
66
61 if (root->fs_info->trans_no_join) { 67 if (root->fs_info->trans_no_join) {
62 if (!nofail) { 68 if (!nofail) {
63 spin_unlock(&root->fs_info->trans_lock); 69 spin_unlock(&root->fs_info->trans_lock);
@@ -67,6 +73,8 @@ loop:
67 73
68 cur_trans = root->fs_info->running_transaction; 74 cur_trans = root->fs_info->running_transaction;
69 if (cur_trans) { 75 if (cur_trans) {
76 if (cur_trans->aborted)
77 return cur_trans->aborted;
70 atomic_inc(&cur_trans->use_count); 78 atomic_inc(&cur_trans->use_count);
71 atomic_inc(&cur_trans->num_writers); 79 atomic_inc(&cur_trans->num_writers);
72 cur_trans->num_joined++; 80 cur_trans->num_joined++;
@@ -123,6 +131,7 @@ loop:
123 root->fs_info->generation++; 131 root->fs_info->generation++;
124 cur_trans->transid = root->fs_info->generation; 132 cur_trans->transid = root->fs_info->generation;
125 root->fs_info->running_transaction = cur_trans; 133 root->fs_info->running_transaction = cur_trans;
134 cur_trans->aborted = 0;
126 spin_unlock(&root->fs_info->trans_lock); 135 spin_unlock(&root->fs_info->trans_lock);
127 136
128 return 0; 137 return 0;
@@ -318,6 +327,7 @@ again:
318 h->use_count = 1; 327 h->use_count = 1;
319 h->block_rsv = NULL; 328 h->block_rsv = NULL;
320 h->orig_rsv = NULL; 329 h->orig_rsv = NULL;
330 h->aborted = 0;
321 331
322 smp_mb(); 332 smp_mb();
323 if (cur_trans->blocked && may_wait_transaction(root, type)) { 333 if (cur_trans->blocked && may_wait_transaction(root, type)) {
@@ -327,8 +337,7 @@ again:
327 337
328 if (num_bytes) { 338 if (num_bytes) {
329 trace_btrfs_space_reservation(root->fs_info, "transaction", 339 trace_btrfs_space_reservation(root->fs_info, "transaction",
330 (u64)(unsigned long)h, 340 h->transid, num_bytes, 1);
331 num_bytes, 1);
332 h->block_rsv = &root->fs_info->trans_block_rsv; 341 h->block_rsv = &root->fs_info->trans_block_rsv;
333 h->bytes_reserved = num_bytes; 342 h->bytes_reserved = num_bytes;
334 } 343 }
@@ -440,6 +449,7 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
440 struct btrfs_transaction *cur_trans = trans->transaction; 449 struct btrfs_transaction *cur_trans = trans->transaction;
441 struct btrfs_block_rsv *rsv = trans->block_rsv; 450 struct btrfs_block_rsv *rsv = trans->block_rsv;
442 int updates; 451 int updates;
452 int err;
443 453
444 smp_mb(); 454 smp_mb();
445 if (cur_trans->blocked || cur_trans->delayed_refs.flushing) 455 if (cur_trans->blocked || cur_trans->delayed_refs.flushing)
@@ -453,8 +463,11 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
453 463
454 updates = trans->delayed_ref_updates; 464 updates = trans->delayed_ref_updates;
455 trans->delayed_ref_updates = 0; 465 trans->delayed_ref_updates = 0;
456 if (updates) 466 if (updates) {
457 btrfs_run_delayed_refs(trans, root, updates); 467 err = btrfs_run_delayed_refs(trans, root, updates);
468 if (err) /* Error code will also eval true */
469 return err;
470 }
458 471
459 trans->block_rsv = rsv; 472 trans->block_rsv = rsv;
460 473
@@ -525,6 +538,11 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
525 if (throttle) 538 if (throttle)
526 btrfs_run_delayed_iputs(root); 539 btrfs_run_delayed_iputs(root);
527 540
541 if (trans->aborted ||
542 root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
543 return -EIO;
544 }
545
528 return 0; 546 return 0;
529} 547}
530 548
@@ -690,11 +708,13 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
690 ret = btrfs_update_root(trans, tree_root, 708 ret = btrfs_update_root(trans, tree_root,
691 &root->root_key, 709 &root->root_key,
692 &root->root_item); 710 &root->root_item);
693 BUG_ON(ret); 711 if (ret)
712 return ret;
694 713
695 old_root_used = btrfs_root_used(&root->root_item); 714 old_root_used = btrfs_root_used(&root->root_item);
696 ret = btrfs_write_dirty_block_groups(trans, root); 715 ret = btrfs_write_dirty_block_groups(trans, root);
697 BUG_ON(ret); 716 if (ret)
717 return ret;
698 } 718 }
699 719
700 if (root != root->fs_info->extent_root) 720 if (root != root->fs_info->extent_root)
@@ -705,6 +725,10 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
705 725
706/* 726/*
707 * update all the cowonly tree roots on disk 727 * update all the cowonly tree roots on disk
728 *
729 * The error handling in this function may not be obvious. Any of the
730 * failures will cause the file system to go offline. We still need
731 * to clean up the delayed refs.
708 */ 732 */
709static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans, 733static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
710 struct btrfs_root *root) 734 struct btrfs_root *root)
@@ -715,22 +739,30 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
715 int ret; 739 int ret;
716 740
717 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); 741 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
718 BUG_ON(ret); 742 if (ret)
743 return ret;
719 744
720 eb = btrfs_lock_root_node(fs_info->tree_root); 745 eb = btrfs_lock_root_node(fs_info->tree_root);
721 btrfs_cow_block(trans, fs_info->tree_root, eb, NULL, 0, &eb); 746 ret = btrfs_cow_block(trans, fs_info->tree_root, eb, NULL,
747 0, &eb);
722 btrfs_tree_unlock(eb); 748 btrfs_tree_unlock(eb);
723 free_extent_buffer(eb); 749 free_extent_buffer(eb);
724 750
751 if (ret)
752 return ret;
753
725 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); 754 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
726 BUG_ON(ret); 755 if (ret)
756 return ret;
727 757
728 while (!list_empty(&fs_info->dirty_cowonly_roots)) { 758 while (!list_empty(&fs_info->dirty_cowonly_roots)) {
729 next = fs_info->dirty_cowonly_roots.next; 759 next = fs_info->dirty_cowonly_roots.next;
730 list_del_init(next); 760 list_del_init(next);
731 root = list_entry(next, struct btrfs_root, dirty_list); 761 root = list_entry(next, struct btrfs_root, dirty_list);
732 762
733 update_cowonly_root(trans, root); 763 ret = update_cowonly_root(trans, root);
764 if (ret)
765 return ret;
734 } 766 }
735 767
736 down_write(&fs_info->extent_commit_sem); 768 down_write(&fs_info->extent_commit_sem);
@@ -874,7 +906,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
874 906
875 new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS); 907 new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS);
876 if (!new_root_item) { 908 if (!new_root_item) {
877 pending->error = -ENOMEM; 909 ret = pending->error = -ENOMEM;
878 goto fail; 910 goto fail;
879 } 911 }
880 912
@@ -911,21 +943,24 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
911 * insert the directory item 943 * insert the directory item
912 */ 944 */
913 ret = btrfs_set_inode_index(parent_inode, &index); 945 ret = btrfs_set_inode_index(parent_inode, &index);
914 BUG_ON(ret); 946 BUG_ON(ret); /* -ENOMEM */
915 ret = btrfs_insert_dir_item(trans, parent_root, 947 ret = btrfs_insert_dir_item(trans, parent_root,
916 dentry->d_name.name, dentry->d_name.len, 948 dentry->d_name.name, dentry->d_name.len,
917 parent_inode, &key, 949 parent_inode, &key,
918 BTRFS_FT_DIR, index); 950 BTRFS_FT_DIR, index);
919 if (ret) { 951 if (ret == -EEXIST) {
920 pending->error = -EEXIST; 952 pending->error = -EEXIST;
921 dput(parent); 953 dput(parent);
922 goto fail; 954 goto fail;
955 } else if (ret) {
956 goto abort_trans_dput;
923 } 957 }
924 958
925 btrfs_i_size_write(parent_inode, parent_inode->i_size + 959 btrfs_i_size_write(parent_inode, parent_inode->i_size +
926 dentry->d_name.len * 2); 960 dentry->d_name.len * 2);
927 ret = btrfs_update_inode(trans, parent_root, parent_inode); 961 ret = btrfs_update_inode(trans, parent_root, parent_inode);
928 BUG_ON(ret); 962 if (ret)
963 goto abort_trans_dput;
929 964
930 /* 965 /*
931 * pull in the delayed directory update 966 * pull in the delayed directory update
@@ -934,7 +969,10 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
934 * snapshot 969 * snapshot
935 */ 970 */
936 ret = btrfs_run_delayed_items(trans, root); 971 ret = btrfs_run_delayed_items(trans, root);
937 BUG_ON(ret); 972 if (ret) { /* Transaction aborted */
973 dput(parent);
974 goto fail;
975 }
938 976
939 record_root_in_trans(trans, root); 977 record_root_in_trans(trans, root);
940 btrfs_set_root_last_snapshot(&root->root_item, trans->transid); 978 btrfs_set_root_last_snapshot(&root->root_item, trans->transid);
@@ -949,12 +987,21 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
949 btrfs_set_root_flags(new_root_item, root_flags); 987 btrfs_set_root_flags(new_root_item, root_flags);
950 988
951 old = btrfs_lock_root_node(root); 989 old = btrfs_lock_root_node(root);
952 btrfs_cow_block(trans, root, old, NULL, 0, &old); 990 ret = btrfs_cow_block(trans, root, old, NULL, 0, &old);
991 if (ret) {
992 btrfs_tree_unlock(old);
993 free_extent_buffer(old);
994 goto abort_trans_dput;
995 }
996
953 btrfs_set_lock_blocking(old); 997 btrfs_set_lock_blocking(old);
954 998
955 btrfs_copy_root(trans, root, old, &tmp, objectid); 999 ret = btrfs_copy_root(trans, root, old, &tmp, objectid);
1000 /* clean up in any case */
956 btrfs_tree_unlock(old); 1001 btrfs_tree_unlock(old);
957 free_extent_buffer(old); 1002 free_extent_buffer(old);
1003 if (ret)
1004 goto abort_trans_dput;
958 1005
959 /* see comments in should_cow_block() */ 1006 /* see comments in should_cow_block() */
960 root->force_cow = 1; 1007 root->force_cow = 1;
@@ -966,7 +1013,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
966 ret = btrfs_insert_root(trans, tree_root, &key, new_root_item); 1013 ret = btrfs_insert_root(trans, tree_root, &key, new_root_item);
967 btrfs_tree_unlock(tmp); 1014 btrfs_tree_unlock(tmp);
968 free_extent_buffer(tmp); 1015 free_extent_buffer(tmp);
969 BUG_ON(ret); 1016 if (ret)
1017 goto abort_trans_dput;
970 1018
971 /* 1019 /*
972 * insert root back/forward references 1020 * insert root back/forward references
@@ -975,19 +1023,32 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
975 parent_root->root_key.objectid, 1023 parent_root->root_key.objectid,
976 btrfs_ino(parent_inode), index, 1024 btrfs_ino(parent_inode), index,
977 dentry->d_name.name, dentry->d_name.len); 1025 dentry->d_name.name, dentry->d_name.len);
978 BUG_ON(ret);
979 dput(parent); 1026 dput(parent);
1027 if (ret)
1028 goto fail;
980 1029
981 key.offset = (u64)-1; 1030 key.offset = (u64)-1;
982 pending->snap = btrfs_read_fs_root_no_name(root->fs_info, &key); 1031 pending->snap = btrfs_read_fs_root_no_name(root->fs_info, &key);
983 BUG_ON(IS_ERR(pending->snap)); 1032 if (IS_ERR(pending->snap)) {
1033 ret = PTR_ERR(pending->snap);
1034 goto abort_trans;
1035 }
984 1036
985 btrfs_reloc_post_snapshot(trans, pending); 1037 ret = btrfs_reloc_post_snapshot(trans, pending);
1038 if (ret)
1039 goto abort_trans;
1040 ret = 0;
986fail: 1041fail:
987 kfree(new_root_item); 1042 kfree(new_root_item);
988 trans->block_rsv = rsv; 1043 trans->block_rsv = rsv;
989 btrfs_block_rsv_release(root, &pending->block_rsv, (u64)-1); 1044 btrfs_block_rsv_release(root, &pending->block_rsv, (u64)-1);
990 return 0; 1045 return ret;
1046
1047abort_trans_dput:
1048 dput(parent);
1049abort_trans:
1050 btrfs_abort_transaction(trans, root, ret);
1051 goto fail;
991} 1052}
992 1053
993/* 1054/*
@@ -1124,6 +1185,33 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
1124 return 0; 1185 return 0;
1125} 1186}
1126 1187
1188
1189static void cleanup_transaction(struct btrfs_trans_handle *trans,
1190 struct btrfs_root *root)
1191{
1192 struct btrfs_transaction *cur_trans = trans->transaction;
1193
1194 WARN_ON(trans->use_count > 1);
1195
1196 spin_lock(&root->fs_info->trans_lock);
1197 list_del_init(&cur_trans->list);
1198 spin_unlock(&root->fs_info->trans_lock);
1199
1200 btrfs_cleanup_one_transaction(trans->transaction, root);
1201
1202 put_transaction(cur_trans);
1203 put_transaction(cur_trans);
1204
1205 trace_btrfs_transaction_commit(root);
1206
1207 btrfs_scrub_continue(root);
1208
1209 if (current->journal_info == trans)
1210 current->journal_info = NULL;
1211
1212 kmem_cache_free(btrfs_trans_handle_cachep, trans);
1213}
1214
1127/* 1215/*
1128 * btrfs_transaction state sequence: 1216 * btrfs_transaction state sequence:
1129 * in_commit = 0, blocked = 0 (initial) 1217 * in_commit = 0, blocked = 0 (initial)
@@ -1135,10 +1223,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1135 struct btrfs_root *root) 1223 struct btrfs_root *root)
1136{ 1224{
1137 unsigned long joined = 0; 1225 unsigned long joined = 0;
1138 struct btrfs_transaction *cur_trans; 1226 struct btrfs_transaction *cur_trans = trans->transaction;
1139 struct btrfs_transaction *prev_trans = NULL; 1227 struct btrfs_transaction *prev_trans = NULL;
1140 DEFINE_WAIT(wait); 1228 DEFINE_WAIT(wait);
1141 int ret; 1229 int ret = -EIO;
1142 int should_grow = 0; 1230 int should_grow = 0;
1143 unsigned long now = get_seconds(); 1231 unsigned long now = get_seconds();
1144 int flush_on_commit = btrfs_test_opt(root, FLUSHONCOMMIT); 1232 int flush_on_commit = btrfs_test_opt(root, FLUSHONCOMMIT);
@@ -1148,13 +1236,18 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1148 btrfs_trans_release_metadata(trans, root); 1236 btrfs_trans_release_metadata(trans, root);
1149 trans->block_rsv = NULL; 1237 trans->block_rsv = NULL;
1150 1238
1239 if (cur_trans->aborted)
1240 goto cleanup_transaction;
1241
1151 /* make a pass through all the delayed refs we have so far 1242 /* make a pass through all the delayed refs we have so far
1152 * any runnings procs may add more while we are here 1243 * any runnings procs may add more while we are here
1153 */ 1244 */
1154 ret = btrfs_run_delayed_refs(trans, root, 0); 1245 ret = btrfs_run_delayed_refs(trans, root, 0);
1155 BUG_ON(ret); 1246 if (ret)
1247 goto cleanup_transaction;
1156 1248
1157 cur_trans = trans->transaction; 1249 cur_trans = trans->transaction;
1250
1158 /* 1251 /*
1159 * set the flushing flag so procs in this transaction have to 1252 * set the flushing flag so procs in this transaction have to
1160 * start sending their work down. 1253 * start sending their work down.
@@ -1162,19 +1255,20 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1162 cur_trans->delayed_refs.flushing = 1; 1255 cur_trans->delayed_refs.flushing = 1;
1163 1256
1164 ret = btrfs_run_delayed_refs(trans, root, 0); 1257 ret = btrfs_run_delayed_refs(trans, root, 0);
1165 BUG_ON(ret); 1258 if (ret)
1259 goto cleanup_transaction;
1166 1260
1167 spin_lock(&cur_trans->commit_lock); 1261 spin_lock(&cur_trans->commit_lock);
1168 if (cur_trans->in_commit) { 1262 if (cur_trans->in_commit) {
1169 spin_unlock(&cur_trans->commit_lock); 1263 spin_unlock(&cur_trans->commit_lock);
1170 atomic_inc(&cur_trans->use_count); 1264 atomic_inc(&cur_trans->use_count);
1171 btrfs_end_transaction(trans, root); 1265 ret = btrfs_end_transaction(trans, root);
1172 1266
1173 wait_for_commit(root, cur_trans); 1267 wait_for_commit(root, cur_trans);
1174 1268
1175 put_transaction(cur_trans); 1269 put_transaction(cur_trans);
1176 1270
1177 return 0; 1271 return ret;
1178 } 1272 }
1179 1273
1180 trans->transaction->in_commit = 1; 1274 trans->transaction->in_commit = 1;
@@ -1214,12 +1308,12 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1214 1308
1215 if (flush_on_commit || snap_pending) { 1309 if (flush_on_commit || snap_pending) {
1216 btrfs_start_delalloc_inodes(root, 1); 1310 btrfs_start_delalloc_inodes(root, 1);
1217 ret = btrfs_wait_ordered_extents(root, 0, 1); 1311 btrfs_wait_ordered_extents(root, 0, 1);
1218 BUG_ON(ret);
1219 } 1312 }
1220 1313
1221 ret = btrfs_run_delayed_items(trans, root); 1314 ret = btrfs_run_delayed_items(trans, root);
1222 BUG_ON(ret); 1315 if (ret)
1316 goto cleanup_transaction;
1223 1317
1224 /* 1318 /*
1225 * rename don't use btrfs_join_transaction, so, once we 1319 * rename don't use btrfs_join_transaction, so, once we
@@ -1261,13 +1355,22 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1261 mutex_lock(&root->fs_info->reloc_mutex); 1355 mutex_lock(&root->fs_info->reloc_mutex);
1262 1356
1263 ret = btrfs_run_delayed_items(trans, root); 1357 ret = btrfs_run_delayed_items(trans, root);
1264 BUG_ON(ret); 1358 if (ret) {
1359 mutex_unlock(&root->fs_info->reloc_mutex);
1360 goto cleanup_transaction;
1361 }
1265 1362
1266 ret = create_pending_snapshots(trans, root->fs_info); 1363 ret = create_pending_snapshots(trans, root->fs_info);
1267 BUG_ON(ret); 1364 if (ret) {
1365 mutex_unlock(&root->fs_info->reloc_mutex);
1366 goto cleanup_transaction;
1367 }
1268 1368
1269 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); 1369 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
1270 BUG_ON(ret); 1370 if (ret) {
1371 mutex_unlock(&root->fs_info->reloc_mutex);
1372 goto cleanup_transaction;
1373 }
1271 1374
1272 /* 1375 /*
1273 * make sure none of the code above managed to slip in a 1376 * make sure none of the code above managed to slip in a
@@ -1294,7 +1397,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1294 mutex_lock(&root->fs_info->tree_log_mutex); 1397 mutex_lock(&root->fs_info->tree_log_mutex);
1295 1398
1296 ret = commit_fs_roots(trans, root); 1399 ret = commit_fs_roots(trans, root);
1297 BUG_ON(ret); 1400 if (ret) {
1401 mutex_unlock(&root->fs_info->tree_log_mutex);
1402 goto cleanup_transaction;
1403 }
1298 1404
1299 /* commit_fs_roots gets rid of all the tree log roots, it is now 1405 /* commit_fs_roots gets rid of all the tree log roots, it is now
1300 * safe to free the root of tree log roots 1406 * safe to free the root of tree log roots
@@ -1302,7 +1408,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1302 btrfs_free_log_root_tree(trans, root->fs_info); 1408 btrfs_free_log_root_tree(trans, root->fs_info);
1303 1409
1304 ret = commit_cowonly_roots(trans, root); 1410 ret = commit_cowonly_roots(trans, root);
1305 BUG_ON(ret); 1411 if (ret) {
1412 mutex_unlock(&root->fs_info->tree_log_mutex);
1413 goto cleanup_transaction;
1414 }
1306 1415
1307 btrfs_prepare_extent_commit(trans, root); 1416 btrfs_prepare_extent_commit(trans, root);
1308 1417
@@ -1336,8 +1445,18 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1336 wake_up(&root->fs_info->transaction_wait); 1445 wake_up(&root->fs_info->transaction_wait);
1337 1446
1338 ret = btrfs_write_and_wait_transaction(trans, root); 1447 ret = btrfs_write_and_wait_transaction(trans, root);
1339 BUG_ON(ret); 1448 if (ret) {
1340 write_ctree_super(trans, root, 0); 1449 btrfs_error(root->fs_info, ret,
1450 "Error while writing out transaction.");
1451 mutex_unlock(&root->fs_info->tree_log_mutex);
1452 goto cleanup_transaction;
1453 }
1454
1455 ret = write_ctree_super(trans, root, 0);
1456 if (ret) {
1457 mutex_unlock(&root->fs_info->tree_log_mutex);
1458 goto cleanup_transaction;
1459 }
1341 1460
1342 /* 1461 /*
1343 * the super is written, we can safely allow the tree-loggers 1462 * the super is written, we can safely allow the tree-loggers
@@ -1373,6 +1492,15 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1373 btrfs_run_delayed_iputs(root); 1492 btrfs_run_delayed_iputs(root);
1374 1493
1375 return ret; 1494 return ret;
1495
1496cleanup_transaction:
1497 btrfs_printk(root->fs_info, "Skipping commit of aborted transaction.\n");
1498// WARN_ON(1);
1499 if (current->journal_info == trans)
1500 current->journal_info = NULL;
1501 cleanup_transaction(trans, root);
1502
1503 return ret;
1376} 1504}
1377 1505
1378/* 1506/*
@@ -1388,6 +1516,8 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root)
1388 spin_unlock(&fs_info->trans_lock); 1516 spin_unlock(&fs_info->trans_lock);
1389 1517
1390 while (!list_empty(&list)) { 1518 while (!list_empty(&list)) {
1519 int ret;
1520
1391 root = list_entry(list.next, struct btrfs_root, root_list); 1521 root = list_entry(list.next, struct btrfs_root, root_list);
1392 list_del(&root->root_list); 1522 list_del(&root->root_list);
1393 1523
@@ -1395,9 +1525,10 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root)
1395 1525
1396 if (btrfs_header_backref_rev(root->node) < 1526 if (btrfs_header_backref_rev(root->node) <
1397 BTRFS_MIXED_BACKREF_REV) 1527 BTRFS_MIXED_BACKREF_REV)
1398 btrfs_drop_snapshot(root, NULL, 0, 0); 1528 ret = btrfs_drop_snapshot(root, NULL, 0, 0);
1399 else 1529 else
1400 btrfs_drop_snapshot(root, NULL, 1, 0); 1530 ret =btrfs_drop_snapshot(root, NULL, 1, 0);
1531 BUG_ON(ret < 0);
1401 } 1532 }
1402 return 0; 1533 return 0;
1403} 1534}
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 02564e6230ac..fe27379e368b 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -43,6 +43,7 @@ struct btrfs_transaction {
43 wait_queue_head_t commit_wait; 43 wait_queue_head_t commit_wait;
44 struct list_head pending_snapshots; 44 struct list_head pending_snapshots;
45 struct btrfs_delayed_ref_root delayed_refs; 45 struct btrfs_delayed_ref_root delayed_refs;
46 int aborted;
46}; 47};
47 48
48struct btrfs_trans_handle { 49struct btrfs_trans_handle {
@@ -55,6 +56,7 @@ struct btrfs_trans_handle {
55 struct btrfs_transaction *transaction; 56 struct btrfs_transaction *transaction;
56 struct btrfs_block_rsv *block_rsv; 57 struct btrfs_block_rsv *block_rsv;
57 struct btrfs_block_rsv *orig_rsv; 58 struct btrfs_block_rsv *orig_rsv;
59 int aborted;
58}; 60};
59 61
60struct btrfs_pending_snapshot { 62struct btrfs_pending_snapshot {
@@ -114,4 +116,5 @@ int btrfs_wait_marked_extents(struct btrfs_root *root,
114 struct extent_io_tree *dirty_pages, int mark); 116 struct extent_io_tree *dirty_pages, int mark);
115int btrfs_transaction_blocked(struct btrfs_fs_info *info); 117int btrfs_transaction_blocked(struct btrfs_fs_info *info);
116int btrfs_transaction_in_commit(struct btrfs_fs_info *info); 118int btrfs_transaction_in_commit(struct btrfs_fs_info *info);
119void put_transaction(struct btrfs_transaction *transaction);
117#endif 120#endif
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 966cc74f5d6c..d017283ae6f5 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -212,14 +212,13 @@ int btrfs_pin_log_trans(struct btrfs_root *root)
212 * indicate we're done making changes to the log tree 212 * indicate we're done making changes to the log tree
213 * and wake up anyone waiting to do a sync 213 * and wake up anyone waiting to do a sync
214 */ 214 */
215int btrfs_end_log_trans(struct btrfs_root *root) 215void btrfs_end_log_trans(struct btrfs_root *root)
216{ 216{
217 if (atomic_dec_and_test(&root->log_writers)) { 217 if (atomic_dec_and_test(&root->log_writers)) {
218 smp_mb(); 218 smp_mb();
219 if (waitqueue_active(&root->log_writer_wait)) 219 if (waitqueue_active(&root->log_writer_wait))
220 wake_up(&root->log_writer_wait); 220 wake_up(&root->log_writer_wait);
221 } 221 }
222 return 0;
223} 222}
224 223
225 224
@@ -378,12 +377,11 @@ insert:
378 u32 found_size; 377 u32 found_size;
379 found_size = btrfs_item_size_nr(path->nodes[0], 378 found_size = btrfs_item_size_nr(path->nodes[0],
380 path->slots[0]); 379 path->slots[0]);
381 if (found_size > item_size) { 380 if (found_size > item_size)
382 btrfs_truncate_item(trans, root, path, item_size, 1); 381 btrfs_truncate_item(trans, root, path, item_size, 1);
383 } else if (found_size < item_size) { 382 else if (found_size < item_size)
384 ret = btrfs_extend_item(trans, root, path, 383 btrfs_extend_item(trans, root, path,
385 item_size - found_size); 384 item_size - found_size);
386 }
387 } else if (ret) { 385 } else if (ret) {
388 return ret; 386 return ret;
389 } 387 }
@@ -1763,7 +1761,7 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
1763 BTRFS_TREE_LOG_OBJECTID); 1761 BTRFS_TREE_LOG_OBJECTID);
1764 ret = btrfs_free_and_pin_reserved_extent(root, 1762 ret = btrfs_free_and_pin_reserved_extent(root,
1765 bytenr, blocksize); 1763 bytenr, blocksize);
1766 BUG_ON(ret); 1764 BUG_ON(ret); /* -ENOMEM or logic errors */
1767 } 1765 }
1768 free_extent_buffer(next); 1766 free_extent_buffer(next);
1769 continue; 1767 continue;
@@ -1871,20 +1869,26 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,
1871 wret = walk_down_log_tree(trans, log, path, &level, wc); 1869 wret = walk_down_log_tree(trans, log, path, &level, wc);
1872 if (wret > 0) 1870 if (wret > 0)
1873 break; 1871 break;
1874 if (wret < 0) 1872 if (wret < 0) {
1875 ret = wret; 1873 ret = wret;
1874 goto out;
1875 }
1876 1876
1877 wret = walk_up_log_tree(trans, log, path, &level, wc); 1877 wret = walk_up_log_tree(trans, log, path, &level, wc);
1878 if (wret > 0) 1878 if (wret > 0)
1879 break; 1879 break;
1880 if (wret < 0) 1880 if (wret < 0) {
1881 ret = wret; 1881 ret = wret;
1882 goto out;
1883 }
1882 } 1884 }
1883 1885
1884 /* was the root node processed? if not, catch it here */ 1886 /* was the root node processed? if not, catch it here */
1885 if (path->nodes[orig_level]) { 1887 if (path->nodes[orig_level]) {
1886 wc->process_func(log, path->nodes[orig_level], wc, 1888 ret = wc->process_func(log, path->nodes[orig_level], wc,
1887 btrfs_header_generation(path->nodes[orig_level])); 1889 btrfs_header_generation(path->nodes[orig_level]));
1890 if (ret)
1891 goto out;
1888 if (wc->free) { 1892 if (wc->free) {
1889 struct extent_buffer *next; 1893 struct extent_buffer *next;
1890 1894
@@ -1900,10 +1904,11 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,
1900 BTRFS_TREE_LOG_OBJECTID); 1904 BTRFS_TREE_LOG_OBJECTID);
1901 ret = btrfs_free_and_pin_reserved_extent(log, next->start, 1905 ret = btrfs_free_and_pin_reserved_extent(log, next->start,
1902 next->len); 1906 next->len);
1903 BUG_ON(ret); 1907 BUG_ON(ret); /* -ENOMEM or logic errors */
1904 } 1908 }
1905 } 1909 }
1906 1910
1911out:
1907 for (i = 0; i <= orig_level; i++) { 1912 for (i = 0; i <= orig_level; i++) {
1908 if (path->nodes[i]) { 1913 if (path->nodes[i]) {
1909 free_extent_buffer(path->nodes[i]); 1914 free_extent_buffer(path->nodes[i]);
@@ -1963,8 +1968,8 @@ static int wait_log_commit(struct btrfs_trans_handle *trans,
1963 return 0; 1968 return 0;
1964} 1969}
1965 1970
1966static int wait_for_writer(struct btrfs_trans_handle *trans, 1971static void wait_for_writer(struct btrfs_trans_handle *trans,
1967 struct btrfs_root *root) 1972 struct btrfs_root *root)
1968{ 1973{
1969 DEFINE_WAIT(wait); 1974 DEFINE_WAIT(wait);
1970 while (root->fs_info->last_trans_log_full_commit != 1975 while (root->fs_info->last_trans_log_full_commit !=
@@ -1978,7 +1983,6 @@ static int wait_for_writer(struct btrfs_trans_handle *trans,
1978 mutex_lock(&root->log_mutex); 1983 mutex_lock(&root->log_mutex);
1979 finish_wait(&root->log_writer_wait, &wait); 1984 finish_wait(&root->log_writer_wait, &wait);
1980 } 1985 }
1981 return 0;
1982} 1986}
1983 1987
1984/* 1988/*
@@ -2046,7 +2050,11 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2046 * wait for them until later. 2050 * wait for them until later.
2047 */ 2051 */
2048 ret = btrfs_write_marked_extents(log, &log->dirty_log_pages, mark); 2052 ret = btrfs_write_marked_extents(log, &log->dirty_log_pages, mark);
2049 BUG_ON(ret); 2053 if (ret) {
2054 btrfs_abort_transaction(trans, root, ret);
2055 mutex_unlock(&root->log_mutex);
2056 goto out;
2057 }
2050 2058
2051 btrfs_set_root_node(&log->root_item, log->node); 2059 btrfs_set_root_node(&log->root_item, log->node);
2052 2060
@@ -2077,7 +2085,11 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2077 } 2085 }
2078 2086
2079 if (ret) { 2087 if (ret) {
2080 BUG_ON(ret != -ENOSPC); 2088 if (ret != -ENOSPC) {
2089 btrfs_abort_transaction(trans, root, ret);
2090 mutex_unlock(&log_root_tree->log_mutex);
2091 goto out;
2092 }
2081 root->fs_info->last_trans_log_full_commit = trans->transid; 2093 root->fs_info->last_trans_log_full_commit = trans->transid;
2082 btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); 2094 btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
2083 mutex_unlock(&log_root_tree->log_mutex); 2095 mutex_unlock(&log_root_tree->log_mutex);
@@ -2117,7 +2129,11 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2117 ret = btrfs_write_and_wait_marked_extents(log_root_tree, 2129 ret = btrfs_write_and_wait_marked_extents(log_root_tree,
2118 &log_root_tree->dirty_log_pages, 2130 &log_root_tree->dirty_log_pages,
2119 EXTENT_DIRTY | EXTENT_NEW); 2131 EXTENT_DIRTY | EXTENT_NEW);
2120 BUG_ON(ret); 2132 if (ret) {
2133 btrfs_abort_transaction(trans, root, ret);
2134 mutex_unlock(&log_root_tree->log_mutex);
2135 goto out_wake_log_root;
2136 }
2121 btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); 2137 btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
2122 2138
2123 btrfs_set_super_log_root(root->fs_info->super_for_commit, 2139 btrfs_set_super_log_root(root->fs_info->super_for_commit,
@@ -2326,7 +2342,9 @@ out_unlock:
2326 if (ret == -ENOSPC) { 2342 if (ret == -ENOSPC) {
2327 root->fs_info->last_trans_log_full_commit = trans->transid; 2343 root->fs_info->last_trans_log_full_commit = trans->transid;
2328 ret = 0; 2344 ret = 0;
2329 } 2345 } else if (ret < 0)
2346 btrfs_abort_transaction(trans, root, ret);
2347
2330 btrfs_end_log_trans(root); 2348 btrfs_end_log_trans(root);
2331 2349
2332 return err; 2350 return err;
@@ -2357,7 +2375,8 @@ int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
2357 if (ret == -ENOSPC) { 2375 if (ret == -ENOSPC) {
2358 root->fs_info->last_trans_log_full_commit = trans->transid; 2376 root->fs_info->last_trans_log_full_commit = trans->transid;
2359 ret = 0; 2377 ret = 0;
2360 } 2378 } else if (ret < 0 && ret != -ENOENT)
2379 btrfs_abort_transaction(trans, root, ret);
2361 btrfs_end_log_trans(root); 2380 btrfs_end_log_trans(root);
2362 2381
2363 return ret; 2382 return ret;
@@ -3169,13 +3188,20 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree)
3169 fs_info->log_root_recovering = 1; 3188 fs_info->log_root_recovering = 1;
3170 3189
3171 trans = btrfs_start_transaction(fs_info->tree_root, 0); 3190 trans = btrfs_start_transaction(fs_info->tree_root, 0);
3172 BUG_ON(IS_ERR(trans)); 3191 if (IS_ERR(trans)) {
3192 ret = PTR_ERR(trans);
3193 goto error;
3194 }
3173 3195
3174 wc.trans = trans; 3196 wc.trans = trans;
3175 wc.pin = 1; 3197 wc.pin = 1;
3176 3198
3177 ret = walk_log_tree(trans, log_root_tree, &wc); 3199 ret = walk_log_tree(trans, log_root_tree, &wc);
3178 BUG_ON(ret); 3200 if (ret) {
3201 btrfs_error(fs_info, ret, "Failed to pin buffers while "
3202 "recovering log root tree.");
3203 goto error;
3204 }
3179 3205
3180again: 3206again:
3181 key.objectid = BTRFS_TREE_LOG_OBJECTID; 3207 key.objectid = BTRFS_TREE_LOG_OBJECTID;
@@ -3184,8 +3210,12 @@ again:
3184 3210
3185 while (1) { 3211 while (1) {
3186 ret = btrfs_search_slot(NULL, log_root_tree, &key, path, 0, 0); 3212 ret = btrfs_search_slot(NULL, log_root_tree, &key, path, 0, 0);
3187 if (ret < 0) 3213
3188 break; 3214 if (ret < 0) {
3215 btrfs_error(fs_info, ret,
3216 "Couldn't find tree log root.");
3217 goto error;
3218 }
3189 if (ret > 0) { 3219 if (ret > 0) {
3190 if (path->slots[0] == 0) 3220 if (path->slots[0] == 0)
3191 break; 3221 break;
@@ -3199,14 +3229,24 @@ again:
3199 3229
3200 log = btrfs_read_fs_root_no_radix(log_root_tree, 3230 log = btrfs_read_fs_root_no_radix(log_root_tree,
3201 &found_key); 3231 &found_key);
3202 BUG_ON(IS_ERR(log)); 3232 if (IS_ERR(log)) {
3233 ret = PTR_ERR(log);
3234 btrfs_error(fs_info, ret,
3235 "Couldn't read tree log root.");
3236 goto error;
3237 }
3203 3238
3204 tmp_key.objectid = found_key.offset; 3239 tmp_key.objectid = found_key.offset;
3205 tmp_key.type = BTRFS_ROOT_ITEM_KEY; 3240 tmp_key.type = BTRFS_ROOT_ITEM_KEY;
3206 tmp_key.offset = (u64)-1; 3241 tmp_key.offset = (u64)-1;
3207 3242
3208 wc.replay_dest = btrfs_read_fs_root_no_name(fs_info, &tmp_key); 3243 wc.replay_dest = btrfs_read_fs_root_no_name(fs_info, &tmp_key);
3209 BUG_ON(IS_ERR_OR_NULL(wc.replay_dest)); 3244 if (IS_ERR(wc.replay_dest)) {
3245 ret = PTR_ERR(wc.replay_dest);
3246 btrfs_error(fs_info, ret, "Couldn't read target root "
3247 "for tree log recovery.");
3248 goto error;
3249 }
3210 3250
3211 wc.replay_dest->log_root = log; 3251 wc.replay_dest->log_root = log;
3212 btrfs_record_root_in_trans(trans, wc.replay_dest); 3252 btrfs_record_root_in_trans(trans, wc.replay_dest);
@@ -3254,6 +3294,10 @@ again:
3254 3294
3255 kfree(log_root_tree); 3295 kfree(log_root_tree);
3256 return 0; 3296 return 0;
3297
3298error:
3299 btrfs_free_path(path);
3300 return ret;
3257} 3301}
3258 3302
3259/* 3303/*
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h
index 2270ac58d746..862ac813f6b8 100644
--- a/fs/btrfs/tree-log.h
+++ b/fs/btrfs/tree-log.h
@@ -38,7 +38,7 @@ int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
38 struct btrfs_root *root, 38 struct btrfs_root *root,
39 const char *name, int name_len, 39 const char *name, int name_len,
40 struct inode *inode, u64 dirid); 40 struct inode *inode, u64 dirid);
41int btrfs_end_log_trans(struct btrfs_root *root); 41void btrfs_end_log_trans(struct btrfs_root *root);
42int btrfs_pin_log_trans(struct btrfs_root *root); 42int btrfs_pin_log_trans(struct btrfs_root *root);
43int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, 43int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
44 struct btrfs_root *root, struct inode *inode, 44 struct btrfs_root *root, struct inode *inode,
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index ef41f285a475..a872b48be0ae 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -67,7 +67,7 @@ static void free_fs_devices(struct btrfs_fs_devices *fs_devices)
67 kfree(fs_devices); 67 kfree(fs_devices);
68} 68}
69 69
70int btrfs_cleanup_fs_uuids(void) 70void btrfs_cleanup_fs_uuids(void)
71{ 71{
72 struct btrfs_fs_devices *fs_devices; 72 struct btrfs_fs_devices *fs_devices;
73 73
@@ -77,7 +77,6 @@ int btrfs_cleanup_fs_uuids(void)
77 list_del(&fs_devices->list); 77 list_del(&fs_devices->list);
78 free_fs_devices(fs_devices); 78 free_fs_devices(fs_devices);
79 } 79 }
80 return 0;
81} 80}
82 81
83static noinline struct btrfs_device *__find_device(struct list_head *head, 82static noinline struct btrfs_device *__find_device(struct list_head *head,
@@ -130,7 +129,7 @@ static void requeue_list(struct btrfs_pending_bios *pending_bios,
130 * the list if the block device is congested. This way, multiple devices 129 * the list if the block device is congested. This way, multiple devices
131 * can make progress from a single worker thread. 130 * can make progress from a single worker thread.
132 */ 131 */
133static noinline int run_scheduled_bios(struct btrfs_device *device) 132static noinline void run_scheduled_bios(struct btrfs_device *device)
134{ 133{
135 struct bio *pending; 134 struct bio *pending;
136 struct backing_dev_info *bdi; 135 struct backing_dev_info *bdi;
@@ -316,7 +315,6 @@ loop_lock:
316 315
317done: 316done:
318 blk_finish_plug(&plug); 317 blk_finish_plug(&plug);
319 return 0;
320} 318}
321 319
322static void pending_bios_fn(struct btrfs_work *work) 320static void pending_bios_fn(struct btrfs_work *work)
@@ -455,7 +453,7 @@ error:
455 return ERR_PTR(-ENOMEM); 453 return ERR_PTR(-ENOMEM);
456} 454}
457 455
458int btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices) 456void btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices)
459{ 457{
460 struct btrfs_device *device, *next; 458 struct btrfs_device *device, *next;
461 459
@@ -503,7 +501,6 @@ again:
503 fs_devices->latest_trans = latest_transid; 501 fs_devices->latest_trans = latest_transid;
504 502
505 mutex_unlock(&uuid_mutex); 503 mutex_unlock(&uuid_mutex);
506 return 0;
507} 504}
508 505
509static void __free_device(struct work_struct *work) 506static void __free_device(struct work_struct *work)
@@ -552,10 +549,10 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
552 fs_devices->num_can_discard--; 549 fs_devices->num_can_discard--;
553 550
554 new_device = kmalloc(sizeof(*new_device), GFP_NOFS); 551 new_device = kmalloc(sizeof(*new_device), GFP_NOFS);
555 BUG_ON(!new_device); 552 BUG_ON(!new_device); /* -ENOMEM */
556 memcpy(new_device, device, sizeof(*new_device)); 553 memcpy(new_device, device, sizeof(*new_device));
557 new_device->name = kstrdup(device->name, GFP_NOFS); 554 new_device->name = kstrdup(device->name, GFP_NOFS);
558 BUG_ON(device->name && !new_device->name); 555 BUG_ON(device->name && !new_device->name); /* -ENOMEM */
559 new_device->bdev = NULL; 556 new_device->bdev = NULL;
560 new_device->writeable = 0; 557 new_device->writeable = 0;
561 new_device->in_fs_metadata = 0; 558 new_device->in_fs_metadata = 0;
@@ -625,6 +622,8 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
625 printk(KERN_INFO "open %s failed\n", device->name); 622 printk(KERN_INFO "open %s failed\n", device->name);
626 goto error; 623 goto error;
627 } 624 }
625 filemap_write_and_wait(bdev->bd_inode->i_mapping);
626 invalidate_bdev(bdev);
628 set_blocksize(bdev, 4096); 627 set_blocksize(bdev, 4096);
629 628
630 bh = btrfs_read_dev_super(bdev); 629 bh = btrfs_read_dev_super(bdev);
@@ -1039,8 +1038,10 @@ again:
1039 leaf = path->nodes[0]; 1038 leaf = path->nodes[0];
1040 extent = btrfs_item_ptr(leaf, path->slots[0], 1039 extent = btrfs_item_ptr(leaf, path->slots[0],
1041 struct btrfs_dev_extent); 1040 struct btrfs_dev_extent);
1041 } else {
1042 btrfs_error(root->fs_info, ret, "Slot search failed");
1043 goto out;
1042 } 1044 }
1043 BUG_ON(ret);
1044 1045
1045 if (device->bytes_used > 0) { 1046 if (device->bytes_used > 0) {
1046 u64 len = btrfs_dev_extent_length(leaf, extent); 1047 u64 len = btrfs_dev_extent_length(leaf, extent);
@@ -1050,7 +1051,10 @@ again:
1050 spin_unlock(&root->fs_info->free_chunk_lock); 1051 spin_unlock(&root->fs_info->free_chunk_lock);
1051 } 1052 }
1052 ret = btrfs_del_item(trans, root, path); 1053 ret = btrfs_del_item(trans, root, path);
1053 1054 if (ret) {
1055 btrfs_error(root->fs_info, ret,
1056 "Failed to remove dev extent item");
1057 }
1054out: 1058out:
1055 btrfs_free_path(path); 1059 btrfs_free_path(path);
1056 return ret; 1060 return ret;
@@ -1078,7 +1082,8 @@ int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
1078 key.type = BTRFS_DEV_EXTENT_KEY; 1082 key.type = BTRFS_DEV_EXTENT_KEY;
1079 ret = btrfs_insert_empty_item(trans, root, path, &key, 1083 ret = btrfs_insert_empty_item(trans, root, path, &key,
1080 sizeof(*extent)); 1084 sizeof(*extent));
1081 BUG_ON(ret); 1085 if (ret)
1086 goto out;
1082 1087
1083 leaf = path->nodes[0]; 1088 leaf = path->nodes[0];
1084 extent = btrfs_item_ptr(leaf, path->slots[0], 1089 extent = btrfs_item_ptr(leaf, path->slots[0],
@@ -1093,6 +1098,7 @@ int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
1093 1098
1094 btrfs_set_dev_extent_length(leaf, extent, num_bytes); 1099 btrfs_set_dev_extent_length(leaf, extent, num_bytes);
1095 btrfs_mark_buffer_dirty(leaf); 1100 btrfs_mark_buffer_dirty(leaf);
1101out:
1096 btrfs_free_path(path); 1102 btrfs_free_path(path);
1097 return ret; 1103 return ret;
1098} 1104}
@@ -1118,7 +1124,7 @@ static noinline int find_next_chunk(struct btrfs_root *root,
1118 if (ret < 0) 1124 if (ret < 0)
1119 goto error; 1125 goto error;
1120 1126
1121 BUG_ON(ret == 0); 1127 BUG_ON(ret == 0); /* Corruption */
1122 1128
1123 ret = btrfs_previous_item(root, path, 0, BTRFS_CHUNK_ITEM_KEY); 1129 ret = btrfs_previous_item(root, path, 0, BTRFS_CHUNK_ITEM_KEY);
1124 if (ret) { 1130 if (ret) {
@@ -1162,7 +1168,7 @@ static noinline int find_next_devid(struct btrfs_root *root, u64 *objectid)
1162 if (ret < 0) 1168 if (ret < 0)
1163 goto error; 1169 goto error;
1164 1170
1165 BUG_ON(ret == 0); 1171 BUG_ON(ret == 0); /* Corruption */
1166 1172
1167 ret = btrfs_previous_item(root, path, BTRFS_DEV_ITEMS_OBJECTID, 1173 ret = btrfs_previous_item(root, path, BTRFS_DEV_ITEMS_OBJECTID,
1168 BTRFS_DEV_ITEM_KEY); 1174 BTRFS_DEV_ITEM_KEY);
@@ -1350,6 +1356,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1350 } 1356 }
1351 1357
1352 set_blocksize(bdev, 4096); 1358 set_blocksize(bdev, 4096);
1359 invalidate_bdev(bdev);
1353 bh = btrfs_read_dev_super(bdev); 1360 bh = btrfs_read_dev_super(bdev);
1354 if (!bh) { 1361 if (!bh) {
1355 ret = -EINVAL; 1362 ret = -EINVAL;
@@ -1596,7 +1603,7 @@ next_slot:
1596 (unsigned long)btrfs_device_fsid(dev_item), 1603 (unsigned long)btrfs_device_fsid(dev_item),
1597 BTRFS_UUID_SIZE); 1604 BTRFS_UUID_SIZE);
1598 device = btrfs_find_device(root, devid, dev_uuid, fs_uuid); 1605 device = btrfs_find_device(root, devid, dev_uuid, fs_uuid);
1599 BUG_ON(!device); 1606 BUG_ON(!device); /* Logic error */
1600 1607
1601 if (device->fs_devices->seeding) { 1608 if (device->fs_devices->seeding) {
1602 btrfs_set_device_generation(leaf, dev_item, 1609 btrfs_set_device_generation(leaf, dev_item,
@@ -1706,7 +1713,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1706 if (seeding_dev) { 1713 if (seeding_dev) {
1707 sb->s_flags &= ~MS_RDONLY; 1714 sb->s_flags &= ~MS_RDONLY;
1708 ret = btrfs_prepare_sprout(root); 1715 ret = btrfs_prepare_sprout(root);
1709 BUG_ON(ret); 1716 BUG_ON(ret); /* -ENOMEM */
1710 } 1717 }
1711 1718
1712 device->fs_devices = root->fs_info->fs_devices; 1719 device->fs_devices = root->fs_info->fs_devices;
@@ -1744,11 +1751,15 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1744 1751
1745 if (seeding_dev) { 1752 if (seeding_dev) {
1746 ret = init_first_rw_device(trans, root, device); 1753 ret = init_first_rw_device(trans, root, device);
1747 BUG_ON(ret); 1754 if (ret)
1755 goto error_trans;
1748 ret = btrfs_finish_sprout(trans, root); 1756 ret = btrfs_finish_sprout(trans, root);
1749 BUG_ON(ret); 1757 if (ret)
1758 goto error_trans;
1750 } else { 1759 } else {
1751 ret = btrfs_add_device(trans, root, device); 1760 ret = btrfs_add_device(trans, root, device);
1761 if (ret)
1762 goto error_trans;
1752 } 1763 }
1753 1764
1754 /* 1765 /*
@@ -1758,17 +1769,31 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1758 btrfs_clear_space_info_full(root->fs_info); 1769 btrfs_clear_space_info_full(root->fs_info);
1759 1770
1760 unlock_chunks(root); 1771 unlock_chunks(root);
1761 btrfs_commit_transaction(trans, root); 1772 ret = btrfs_commit_transaction(trans, root);
1762 1773
1763 if (seeding_dev) { 1774 if (seeding_dev) {
1764 mutex_unlock(&uuid_mutex); 1775 mutex_unlock(&uuid_mutex);
1765 up_write(&sb->s_umount); 1776 up_write(&sb->s_umount);
1766 1777
1778 if (ret) /* transaction commit */
1779 return ret;
1780
1767 ret = btrfs_relocate_sys_chunks(root); 1781 ret = btrfs_relocate_sys_chunks(root);
1768 BUG_ON(ret); 1782 if (ret < 0)
1783 btrfs_error(root->fs_info, ret,
1784 "Failed to relocate sys chunks after "
1785 "device initialization. This can be fixed "
1786 "using the \"btrfs balance\" command.");
1769 } 1787 }
1770 1788
1771 return ret; 1789 return ret;
1790
1791error_trans:
1792 unlock_chunks(root);
1793 btrfs_abort_transaction(trans, root, ret);
1794 btrfs_end_transaction(trans, root);
1795 kfree(device->name);
1796 kfree(device);
1772error: 1797error:
1773 blkdev_put(bdev, FMODE_EXCL); 1798 blkdev_put(bdev, FMODE_EXCL);
1774 if (seeding_dev) { 1799 if (seeding_dev) {
@@ -1876,10 +1901,20 @@ static int btrfs_free_chunk(struct btrfs_trans_handle *trans,
1876 key.type = BTRFS_CHUNK_ITEM_KEY; 1901 key.type = BTRFS_CHUNK_ITEM_KEY;
1877 1902
1878 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 1903 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1879 BUG_ON(ret); 1904 if (ret < 0)
1905 goto out;
1906 else if (ret > 0) { /* Logic error or corruption */
1907 btrfs_error(root->fs_info, -ENOENT,
1908 "Failed lookup while freeing chunk.");
1909 ret = -ENOENT;
1910 goto out;
1911 }
1880 1912
1881 ret = btrfs_del_item(trans, root, path); 1913 ret = btrfs_del_item(trans, root, path);
1882 1914 if (ret < 0)
1915 btrfs_error(root->fs_info, ret,
1916 "Failed to delete chunk item.");
1917out:
1883 btrfs_free_path(path); 1918 btrfs_free_path(path);
1884 return ret; 1919 return ret;
1885} 1920}
@@ -2041,7 +2076,7 @@ again:
2041 ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0); 2076 ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0);
2042 if (ret < 0) 2077 if (ret < 0)
2043 goto error; 2078 goto error;
2044 BUG_ON(ret == 0); 2079 BUG_ON(ret == 0); /* Corruption */
2045 2080
2046 ret = btrfs_previous_item(chunk_root, path, key.objectid, 2081 ret = btrfs_previous_item(chunk_root, path, key.objectid,
2047 key.type); 2082 key.type);
@@ -2250,15 +2285,13 @@ static void unset_balance_control(struct btrfs_fs_info *fs_info)
2250 * Balance filters. Return 1 if chunk should be filtered out 2285 * Balance filters. Return 1 if chunk should be filtered out
2251 * (should not be balanced). 2286 * (should not be balanced).
2252 */ 2287 */
2253static int chunk_profiles_filter(u64 chunk_profile, 2288static int chunk_profiles_filter(u64 chunk_type,
2254 struct btrfs_balance_args *bargs) 2289 struct btrfs_balance_args *bargs)
2255{ 2290{
2256 chunk_profile &= BTRFS_BLOCK_GROUP_PROFILE_MASK; 2291 chunk_type = chunk_to_extended(chunk_type) &
2257 2292 BTRFS_EXTENDED_PROFILE_MASK;
2258 if (chunk_profile == 0)
2259 chunk_profile = BTRFS_AVAIL_ALLOC_BIT_SINGLE;
2260 2293
2261 if (bargs->profiles & chunk_profile) 2294 if (bargs->profiles & chunk_type)
2262 return 0; 2295 return 0;
2263 2296
2264 return 1; 2297 return 1;
@@ -2365,18 +2398,16 @@ static int chunk_vrange_filter(struct extent_buffer *leaf,
2365 return 1; 2398 return 1;
2366} 2399}
2367 2400
2368static int chunk_soft_convert_filter(u64 chunk_profile, 2401static int chunk_soft_convert_filter(u64 chunk_type,
2369 struct btrfs_balance_args *bargs) 2402 struct btrfs_balance_args *bargs)
2370{ 2403{
2371 if (!(bargs->flags & BTRFS_BALANCE_ARGS_CONVERT)) 2404 if (!(bargs->flags & BTRFS_BALANCE_ARGS_CONVERT))
2372 return 0; 2405 return 0;
2373 2406
2374 chunk_profile &= BTRFS_BLOCK_GROUP_PROFILE_MASK; 2407 chunk_type = chunk_to_extended(chunk_type) &
2408 BTRFS_EXTENDED_PROFILE_MASK;
2375 2409
2376 if (chunk_profile == 0) 2410 if (bargs->target == chunk_type)
2377 chunk_profile = BTRFS_AVAIL_ALLOC_BIT_SINGLE;
2378
2379 if (bargs->target & chunk_profile)
2380 return 1; 2411 return 1;
2381 2412
2382 return 0; 2413 return 0;
@@ -2602,6 +2633,30 @@ error:
2602 return ret; 2633 return ret;
2603} 2634}
2604 2635
2636/**
2637 * alloc_profile_is_valid - see if a given profile is valid and reduced
2638 * @flags: profile to validate
2639 * @extended: if true @flags is treated as an extended profile
2640 */
2641static int alloc_profile_is_valid(u64 flags, int extended)
2642{
2643 u64 mask = (extended ? BTRFS_EXTENDED_PROFILE_MASK :
2644 BTRFS_BLOCK_GROUP_PROFILE_MASK);
2645
2646 flags &= ~BTRFS_BLOCK_GROUP_TYPE_MASK;
2647
2648 /* 1) check that all other bits are zeroed */
2649 if (flags & ~mask)
2650 return 0;
2651
2652 /* 2) see if profile is reduced */
2653 if (flags == 0)
2654 return !extended; /* "0" is valid for usual profiles */
2655
2656 /* true if exactly one bit set */
2657 return (flags & (flags - 1)) == 0;
2658}
2659
2605static inline int balance_need_close(struct btrfs_fs_info *fs_info) 2660static inline int balance_need_close(struct btrfs_fs_info *fs_info)
2606{ 2661{
2607 /* cancel requested || normal exit path */ 2662 /* cancel requested || normal exit path */
@@ -2630,6 +2685,7 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
2630{ 2685{
2631 struct btrfs_fs_info *fs_info = bctl->fs_info; 2686 struct btrfs_fs_info *fs_info = bctl->fs_info;
2632 u64 allowed; 2687 u64 allowed;
2688 int mixed = 0;
2633 int ret; 2689 int ret;
2634 2690
2635 if (btrfs_fs_closing(fs_info) || 2691 if (btrfs_fs_closing(fs_info) ||
@@ -2639,13 +2695,16 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
2639 goto out; 2695 goto out;
2640 } 2696 }
2641 2697
2698 allowed = btrfs_super_incompat_flags(fs_info->super_copy);
2699 if (allowed & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
2700 mixed = 1;
2701
2642 /* 2702 /*
2643 * In case of mixed groups both data and meta should be picked, 2703 * In case of mixed groups both data and meta should be picked,
2644 * and identical options should be given for both of them. 2704 * and identical options should be given for both of them.
2645 */ 2705 */
2646 allowed = btrfs_super_incompat_flags(fs_info->super_copy); 2706 allowed = BTRFS_BALANCE_DATA | BTRFS_BALANCE_METADATA;
2647 if ((allowed & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) && 2707 if (mixed && (bctl->flags & allowed)) {
2648 (bctl->flags & (BTRFS_BALANCE_DATA | BTRFS_BALANCE_METADATA))) {
2649 if (!(bctl->flags & BTRFS_BALANCE_DATA) || 2708 if (!(bctl->flags & BTRFS_BALANCE_DATA) ||
2650 !(bctl->flags & BTRFS_BALANCE_METADATA) || 2709 !(bctl->flags & BTRFS_BALANCE_METADATA) ||
2651 memcmp(&bctl->data, &bctl->meta, sizeof(bctl->data))) { 2710 memcmp(&bctl->data, &bctl->meta, sizeof(bctl->data))) {
@@ -2656,14 +2715,6 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
2656 } 2715 }
2657 } 2716 }
2658 2717
2659 /*
2660 * Profile changing sanity checks. Skip them if a simple
2661 * balance is requested.
2662 */
2663 if (!((bctl->data.flags | bctl->sys.flags | bctl->meta.flags) &
2664 BTRFS_BALANCE_ARGS_CONVERT))
2665 goto do_balance;
2666
2667 allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE; 2718 allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE;
2668 if (fs_info->fs_devices->num_devices == 1) 2719 if (fs_info->fs_devices->num_devices == 1)
2669 allowed |= BTRFS_BLOCK_GROUP_DUP; 2720 allowed |= BTRFS_BLOCK_GROUP_DUP;
@@ -2673,24 +2724,27 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
2673 allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 | 2724 allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 |
2674 BTRFS_BLOCK_GROUP_RAID10); 2725 BTRFS_BLOCK_GROUP_RAID10);
2675 2726
2676 if (!profile_is_valid(bctl->data.target, 1) || 2727 if ((bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
2677 bctl->data.target & ~allowed) { 2728 (!alloc_profile_is_valid(bctl->data.target, 1) ||
2729 (bctl->data.target & ~allowed))) {
2678 printk(KERN_ERR "btrfs: unable to start balance with target " 2730 printk(KERN_ERR "btrfs: unable to start balance with target "
2679 "data profile %llu\n", 2731 "data profile %llu\n",
2680 (unsigned long long)bctl->data.target); 2732 (unsigned long long)bctl->data.target);
2681 ret = -EINVAL; 2733 ret = -EINVAL;
2682 goto out; 2734 goto out;
2683 } 2735 }
2684 if (!profile_is_valid(bctl->meta.target, 1) || 2736 if ((bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
2685 bctl->meta.target & ~allowed) { 2737 (!alloc_profile_is_valid(bctl->meta.target, 1) ||
2738 (bctl->meta.target & ~allowed))) {
2686 printk(KERN_ERR "btrfs: unable to start balance with target " 2739 printk(KERN_ERR "btrfs: unable to start balance with target "
2687 "metadata profile %llu\n", 2740 "metadata profile %llu\n",
2688 (unsigned long long)bctl->meta.target); 2741 (unsigned long long)bctl->meta.target);
2689 ret = -EINVAL; 2742 ret = -EINVAL;
2690 goto out; 2743 goto out;
2691 } 2744 }
2692 if (!profile_is_valid(bctl->sys.target, 1) || 2745 if ((bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
2693 bctl->sys.target & ~allowed) { 2746 (!alloc_profile_is_valid(bctl->sys.target, 1) ||
2747 (bctl->sys.target & ~allowed))) {
2694 printk(KERN_ERR "btrfs: unable to start balance with target " 2748 printk(KERN_ERR "btrfs: unable to start balance with target "
2695 "system profile %llu\n", 2749 "system profile %llu\n",
2696 (unsigned long long)bctl->sys.target); 2750 (unsigned long long)bctl->sys.target);
@@ -2698,7 +2752,9 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
2698 goto out; 2752 goto out;
2699 } 2753 }
2700 2754
2701 if (bctl->data.target & BTRFS_BLOCK_GROUP_DUP) { 2755 /* allow dup'ed data chunks only in mixed mode */
2756 if (!mixed && (bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
2757 (bctl->data.target & BTRFS_BLOCK_GROUP_DUP)) {
2702 printk(KERN_ERR "btrfs: dup for data is not allowed\n"); 2758 printk(KERN_ERR "btrfs: dup for data is not allowed\n");
2703 ret = -EINVAL; 2759 ret = -EINVAL;
2704 goto out; 2760 goto out;
@@ -2724,7 +2780,6 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
2724 } 2780 }
2725 } 2781 }
2726 2782
2727do_balance:
2728 ret = insert_balance_item(fs_info->tree_root, bctl); 2783 ret = insert_balance_item(fs_info->tree_root, bctl);
2729 if (ret && ret != -EEXIST) 2784 if (ret && ret != -EEXIST)
2730 goto out; 2785 goto out;
@@ -2967,7 +3022,7 @@ again:
2967 key.offset = (u64)-1; 3022 key.offset = (u64)-1;
2968 key.type = BTRFS_DEV_EXTENT_KEY; 3023 key.type = BTRFS_DEV_EXTENT_KEY;
2969 3024
2970 while (1) { 3025 do {
2971 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 3026 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2972 if (ret < 0) 3027 if (ret < 0)
2973 goto done; 3028 goto done;
@@ -3009,8 +3064,7 @@ again:
3009 goto done; 3064 goto done;
3010 if (ret == -ENOSPC) 3065 if (ret == -ENOSPC)
3011 failed++; 3066 failed++;
3012 key.offset -= 1; 3067 } while (key.offset-- > 0);
3013 }
3014 3068
3015 if (failed && !retried) { 3069 if (failed && !retried) {
3016 failed = 0; 3070 failed = 0;
@@ -3128,11 +3182,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
3128 int i; 3182 int i;
3129 int j; 3183 int j;
3130 3184
3131 if ((type & BTRFS_BLOCK_GROUP_RAID1) && 3185 BUG_ON(!alloc_profile_is_valid(type, 0));
3132 (type & BTRFS_BLOCK_GROUP_DUP)) {
3133 WARN_ON(1);
3134 type &= ~BTRFS_BLOCK_GROUP_DUP;
3135 }
3136 3186
3137 if (list_empty(&fs_devices->alloc_list)) 3187 if (list_empty(&fs_devices->alloc_list))
3138 return -ENOSPC; 3188 return -ENOSPC;
@@ -3328,13 +3378,15 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
3328 write_lock(&em_tree->lock); 3378 write_lock(&em_tree->lock);
3329 ret = add_extent_mapping(em_tree, em); 3379 ret = add_extent_mapping(em_tree, em);
3330 write_unlock(&em_tree->lock); 3380 write_unlock(&em_tree->lock);
3331 BUG_ON(ret);
3332 free_extent_map(em); 3381 free_extent_map(em);
3382 if (ret)
3383 goto error;
3333 3384
3334 ret = btrfs_make_block_group(trans, extent_root, 0, type, 3385 ret = btrfs_make_block_group(trans, extent_root, 0, type,
3335 BTRFS_FIRST_CHUNK_TREE_OBJECTID, 3386 BTRFS_FIRST_CHUNK_TREE_OBJECTID,
3336 start, num_bytes); 3387 start, num_bytes);
3337 BUG_ON(ret); 3388 if (ret)
3389 goto error;
3338 3390
3339 for (i = 0; i < map->num_stripes; ++i) { 3391 for (i = 0; i < map->num_stripes; ++i) {
3340 struct btrfs_device *device; 3392 struct btrfs_device *device;
@@ -3347,7 +3399,10 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
3347 info->chunk_root->root_key.objectid, 3399 info->chunk_root->root_key.objectid,
3348 BTRFS_FIRST_CHUNK_TREE_OBJECTID, 3400 BTRFS_FIRST_CHUNK_TREE_OBJECTID,
3349 start, dev_offset, stripe_size); 3401 start, dev_offset, stripe_size);
3350 BUG_ON(ret); 3402 if (ret) {
3403 btrfs_abort_transaction(trans, extent_root, ret);
3404 goto error;
3405 }
3351 } 3406 }
3352 3407
3353 kfree(devices_info); 3408 kfree(devices_info);
@@ -3383,7 +3438,8 @@ static int __finish_chunk_alloc(struct btrfs_trans_handle *trans,
3383 device = map->stripes[index].dev; 3438 device = map->stripes[index].dev;
3384 device->bytes_used += stripe_size; 3439 device->bytes_used += stripe_size;
3385 ret = btrfs_update_device(trans, device); 3440 ret = btrfs_update_device(trans, device);
3386 BUG_ON(ret); 3441 if (ret)
3442 goto out_free;
3387 index++; 3443 index++;
3388 } 3444 }
3389 3445
@@ -3420,16 +3476,19 @@ static int __finish_chunk_alloc(struct btrfs_trans_handle *trans,
3420 key.offset = chunk_offset; 3476 key.offset = chunk_offset;
3421 3477
3422 ret = btrfs_insert_item(trans, chunk_root, &key, chunk, item_size); 3478 ret = btrfs_insert_item(trans, chunk_root, &key, chunk, item_size);
3423 BUG_ON(ret);
3424 3479
3425 if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) { 3480 if (ret == 0 && map->type & BTRFS_BLOCK_GROUP_SYSTEM) {
3481 /*
3482 * TODO: Cleanup of inserted chunk root in case of
3483 * failure.
3484 */
3426 ret = btrfs_add_system_chunk(chunk_root, &key, chunk, 3485 ret = btrfs_add_system_chunk(chunk_root, &key, chunk,
3427 item_size); 3486 item_size);
3428 BUG_ON(ret);
3429 } 3487 }
3430 3488
3489out_free:
3431 kfree(chunk); 3490 kfree(chunk);
3432 return 0; 3491 return ret;
3433} 3492}
3434 3493
3435/* 3494/*
@@ -3461,7 +3520,8 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
3461 3520
3462 ret = __finish_chunk_alloc(trans, extent_root, map, chunk_offset, 3521 ret = __finish_chunk_alloc(trans, extent_root, map, chunk_offset,
3463 chunk_size, stripe_size); 3522 chunk_size, stripe_size);
3464 BUG_ON(ret); 3523 if (ret)
3524 return ret;
3465 return 0; 3525 return 0;
3466} 3526}
3467 3527
@@ -3493,7 +3553,8 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans,
3493 3553
3494 ret = __btrfs_alloc_chunk(trans, extent_root, &map, &chunk_size, 3554 ret = __btrfs_alloc_chunk(trans, extent_root, &map, &chunk_size,
3495 &stripe_size, chunk_offset, alloc_profile); 3555 &stripe_size, chunk_offset, alloc_profile);
3496 BUG_ON(ret); 3556 if (ret)
3557 return ret;
3497 3558
3498 sys_chunk_offset = chunk_offset + chunk_size; 3559 sys_chunk_offset = chunk_offset + chunk_size;
3499 3560
@@ -3504,10 +3565,12 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans,
3504 ret = __btrfs_alloc_chunk(trans, extent_root, &sys_map, 3565 ret = __btrfs_alloc_chunk(trans, extent_root, &sys_map,
3505 &sys_chunk_size, &sys_stripe_size, 3566 &sys_chunk_size, &sys_stripe_size,
3506 sys_chunk_offset, alloc_profile); 3567 sys_chunk_offset, alloc_profile);
3507 BUG_ON(ret); 3568 if (ret)
3569 goto abort;
3508 3570
3509 ret = btrfs_add_device(trans, fs_info->chunk_root, device); 3571 ret = btrfs_add_device(trans, fs_info->chunk_root, device);
3510 BUG_ON(ret); 3572 if (ret)
3573 goto abort;
3511 3574
3512 /* 3575 /*
3513 * Modifying chunk tree needs allocating new blocks from both 3576 * Modifying chunk tree needs allocating new blocks from both
@@ -3517,13 +3580,20 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans,
3517 */ 3580 */
3518 ret = __finish_chunk_alloc(trans, extent_root, map, chunk_offset, 3581 ret = __finish_chunk_alloc(trans, extent_root, map, chunk_offset,
3519 chunk_size, stripe_size); 3582 chunk_size, stripe_size);
3520 BUG_ON(ret); 3583 if (ret)
3584 goto abort;
3521 3585
3522 ret = __finish_chunk_alloc(trans, extent_root, sys_map, 3586 ret = __finish_chunk_alloc(trans, extent_root, sys_map,
3523 sys_chunk_offset, sys_chunk_size, 3587 sys_chunk_offset, sys_chunk_size,
3524 sys_stripe_size); 3588 sys_stripe_size);
3525 BUG_ON(ret); 3589 if (ret)
3590 goto abort;
3591
3526 return 0; 3592 return 0;
3593
3594abort:
3595 btrfs_abort_transaction(trans, root, ret);
3596 return ret;
3527} 3597}
3528 3598
3529int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset) 3599int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset)
@@ -3874,7 +3944,7 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
3874 do_div(length, map->num_stripes); 3944 do_div(length, map->num_stripes);
3875 3945
3876 buf = kzalloc(sizeof(u64) * map->num_stripes, GFP_NOFS); 3946 buf = kzalloc(sizeof(u64) * map->num_stripes, GFP_NOFS);
3877 BUG_ON(!buf); 3947 BUG_ON(!buf); /* -ENOMEM */
3878 3948
3879 for (i = 0; i < map->num_stripes; i++) { 3949 for (i = 0; i < map->num_stripes; i++) {
3880 if (devid && map->stripes[i].dev->devid != devid) 3950 if (devid && map->stripes[i].dev->devid != devid)
@@ -3967,7 +4037,7 @@ struct async_sched {
3967 * This will add one bio to the pending list for a device and make sure 4037 * This will add one bio to the pending list for a device and make sure
3968 * the work struct is scheduled. 4038 * the work struct is scheduled.
3969 */ 4039 */
3970static noinline int schedule_bio(struct btrfs_root *root, 4040static noinline void schedule_bio(struct btrfs_root *root,
3971 struct btrfs_device *device, 4041 struct btrfs_device *device,
3972 int rw, struct bio *bio) 4042 int rw, struct bio *bio)
3973{ 4043{
@@ -3979,7 +4049,7 @@ static noinline int schedule_bio(struct btrfs_root *root,
3979 bio_get(bio); 4049 bio_get(bio);
3980 btrfsic_submit_bio(rw, bio); 4050 btrfsic_submit_bio(rw, bio);
3981 bio_put(bio); 4051 bio_put(bio);
3982 return 0; 4052 return;
3983 } 4053 }
3984 4054
3985 /* 4055 /*
@@ -4013,7 +4083,6 @@ static noinline int schedule_bio(struct btrfs_root *root,
4013 if (should_queue) 4083 if (should_queue)
4014 btrfs_queue_worker(&root->fs_info->submit_workers, 4084 btrfs_queue_worker(&root->fs_info->submit_workers,
4015 &device->work); 4085 &device->work);
4016 return 0;
4017} 4086}
4018 4087
4019int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, 4088int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
@@ -4036,7 +4105,8 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
4036 4105
4037 ret = btrfs_map_block(map_tree, rw, logical, &map_length, &bbio, 4106 ret = btrfs_map_block(map_tree, rw, logical, &map_length, &bbio,
4038 mirror_num); 4107 mirror_num);
4039 BUG_ON(ret); 4108 if (ret) /* -ENOMEM */
4109 return ret;
4040 4110
4041 total_devs = bbio->num_stripes; 4111 total_devs = bbio->num_stripes;
4042 if (map_length < length) { 4112 if (map_length < length) {
@@ -4055,7 +4125,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
4055 while (dev_nr < total_devs) { 4125 while (dev_nr < total_devs) {
4056 if (dev_nr < total_devs - 1) { 4126 if (dev_nr < total_devs - 1) {
4057 bio = bio_clone(first_bio, GFP_NOFS); 4127 bio = bio_clone(first_bio, GFP_NOFS);
4058 BUG_ON(!bio); 4128 BUG_ON(!bio); /* -ENOMEM */
4059 } else { 4129 } else {
4060 bio = first_bio; 4130 bio = first_bio;
4061 } 4131 }
@@ -4209,13 +4279,13 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
4209 write_lock(&map_tree->map_tree.lock); 4279 write_lock(&map_tree->map_tree.lock);
4210 ret = add_extent_mapping(&map_tree->map_tree, em); 4280 ret = add_extent_mapping(&map_tree->map_tree, em);
4211 write_unlock(&map_tree->map_tree.lock); 4281 write_unlock(&map_tree->map_tree.lock);
4212 BUG_ON(ret); 4282 BUG_ON(ret); /* Tree corruption */
4213 free_extent_map(em); 4283 free_extent_map(em);
4214 4284
4215 return 0; 4285 return 0;
4216} 4286}
4217 4287
4218static int fill_device_from_item(struct extent_buffer *leaf, 4288static void fill_device_from_item(struct extent_buffer *leaf,
4219 struct btrfs_dev_item *dev_item, 4289 struct btrfs_dev_item *dev_item,
4220 struct btrfs_device *device) 4290 struct btrfs_device *device)
4221{ 4291{
@@ -4232,8 +4302,6 @@ static int fill_device_from_item(struct extent_buffer *leaf,
4232 4302
4233 ptr = (unsigned long)btrfs_device_uuid(dev_item); 4303 ptr = (unsigned long)btrfs_device_uuid(dev_item);
4234 read_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE); 4304 read_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
4235
4236 return 0;
4237} 4305}
4238 4306
4239static int open_seed_devices(struct btrfs_root *root, u8 *fsid) 4307static int open_seed_devices(struct btrfs_root *root, u8 *fsid)
@@ -4384,7 +4452,7 @@ int btrfs_read_sys_array(struct btrfs_root *root)
4384 * to silence the warning eg. on PowerPC 64. 4452 * to silence the warning eg. on PowerPC 64.
4385 */ 4453 */
4386 if (PAGE_CACHE_SIZE > BTRFS_SUPER_INFO_SIZE) 4454 if (PAGE_CACHE_SIZE > BTRFS_SUPER_INFO_SIZE)
4387 SetPageUptodate(sb->first_page); 4455 SetPageUptodate(sb->pages[0]);
4388 4456
4389 write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE); 4457 write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE);
4390 array_size = btrfs_super_sys_array_size(super_copy); 4458 array_size = btrfs_super_sys_array_size(super_copy);
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 19ac95048b88..bb6b03f97aaa 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -260,12 +260,12 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
260int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, 260int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
261 struct btrfs_fs_devices **fs_devices_ret); 261 struct btrfs_fs_devices **fs_devices_ret);
262int btrfs_close_devices(struct btrfs_fs_devices *fs_devices); 262int btrfs_close_devices(struct btrfs_fs_devices *fs_devices);
263int btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices); 263void btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices);
264int btrfs_add_device(struct btrfs_trans_handle *trans, 264int btrfs_add_device(struct btrfs_trans_handle *trans,
265 struct btrfs_root *root, 265 struct btrfs_root *root,
266 struct btrfs_device *device); 266 struct btrfs_device *device);
267int btrfs_rm_device(struct btrfs_root *root, char *device_path); 267int btrfs_rm_device(struct btrfs_root *root, char *device_path);
268int btrfs_cleanup_fs_uuids(void); 268void btrfs_cleanup_fs_uuids(void);
269int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len); 269int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len);
270int btrfs_grow_device(struct btrfs_trans_handle *trans, 270int btrfs_grow_device(struct btrfs_trans_handle *trans,
271 struct btrfs_device *device, u64 new_size); 271 struct btrfs_device *device, u64 new_size);
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index 84f3001a568d..91b91e805673 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -6,6 +6,7 @@
6 6
7#include <linux/writeback.h> 7#include <linux/writeback.h>
8#include <linux/tracepoint.h> 8#include <linux/tracepoint.h>
9#include <trace/events/gfpflags.h>
9 10
10struct btrfs_root; 11struct btrfs_root;
11struct btrfs_fs_info; 12struct btrfs_fs_info;
@@ -862,6 +863,49 @@ TRACE_EVENT(btrfs_setup_cluster,
862 __entry->size, __entry->max_size, __entry->bitmap) 863 __entry->size, __entry->max_size, __entry->bitmap)
863); 864);
864 865
866struct extent_state;
867TRACE_EVENT(alloc_extent_state,
868
869 TP_PROTO(struct extent_state *state, gfp_t mask, unsigned long IP),
870
871 TP_ARGS(state, mask, IP),
872
873 TP_STRUCT__entry(
874 __field(struct extent_state *, state)
875 __field(gfp_t, mask)
876 __field(unsigned long, ip)
877 ),
878
879 TP_fast_assign(
880 __entry->state = state,
881 __entry->mask = mask,
882 __entry->ip = IP
883 ),
884
885 TP_printk("state=%p; mask = %s; caller = %pF", __entry->state,
886 show_gfp_flags(__entry->mask), (void *)__entry->ip)
887);
888
889TRACE_EVENT(free_extent_state,
890
891 TP_PROTO(struct extent_state *state, unsigned long IP),
892
893 TP_ARGS(state, IP),
894
895 TP_STRUCT__entry(
896 __field(struct extent_state *, state)
897 __field(unsigned long, ip)
898 ),
899
900 TP_fast_assign(
901 __entry->state = state,
902 __entry->ip = IP
903 ),
904
905 TP_printk(" state=%p; caller = %pF", __entry->state,
906 (void *)__entry->ip)
907);
908
865#endif /* _TRACE_BTRFS_H */ 909#endif /* _TRACE_BTRFS_H */
866 910
867/* This part must be outside protection */ 911/* This part must be outside protection */