aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/backref.c118
-rw-r--r--fs/btrfs/btrfs_inode.h1
-rw-r--r--fs/btrfs/check-integrity.c16
-rw-r--r--fs/btrfs/ctree.c146
-rw-r--r--fs/btrfs/ctree.h11
-rw-r--r--fs/btrfs/delayed-inode.c18
-rw-r--r--fs/btrfs/delayed-inode.h3
-rw-r--r--fs/btrfs/disk-io.c111
-rw-r--r--fs/btrfs/extent-tree.c11
-rw-r--r--fs/btrfs/extent_io.c21
-rw-r--r--fs/btrfs/file.c13
-rw-r--r--fs/btrfs/free-space-cache.c145
-rw-r--r--fs/btrfs/inode.c130
-rw-r--r--fs/btrfs/ioctl.c118
-rw-r--r--fs/btrfs/ioctl.h2
-rw-r--r--fs/btrfs/ordered-data.c22
-rw-r--r--fs/btrfs/rcu-string.h56
-rw-r--r--fs/btrfs/scrub.c30
-rw-r--r--fs/btrfs/super.c37
-rw-r--r--fs/btrfs/transaction.c14
-rw-r--r--fs/btrfs/tree-log.c6
-rw-r--r--fs/btrfs/volumes.c187
-rw-r--r--fs/btrfs/volumes.h5
23 files changed, 791 insertions, 430 deletions
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 3f75895c919b..a383c18e74e8 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -179,60 +179,74 @@ static int __add_prelim_ref(struct list_head *head, u64 root_id,
179 179
180static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, 180static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
181 struct ulist *parents, int level, 181 struct ulist *parents, int level,
182 struct btrfs_key *key, u64 wanted_disk_byte, 182 struct btrfs_key *key_for_search, u64 time_seq,
183 u64 wanted_disk_byte,
183 const u64 *extent_item_pos) 184 const u64 *extent_item_pos)
184{ 185{
185 int ret; 186 int ret = 0;
186 int slot = path->slots[level]; 187 int slot;
187 struct extent_buffer *eb = path->nodes[level]; 188 struct extent_buffer *eb;
189 struct btrfs_key key;
188 struct btrfs_file_extent_item *fi; 190 struct btrfs_file_extent_item *fi;
189 struct extent_inode_elem *eie = NULL; 191 struct extent_inode_elem *eie = NULL;
190 u64 disk_byte; 192 u64 disk_byte;
191 u64 wanted_objectid = key->objectid;
192 193
193add_parent: 194 if (level != 0) {
194 if (level == 0 && extent_item_pos) { 195 eb = path->nodes[level];
195 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); 196 ret = ulist_add(parents, eb->start, 0, GFP_NOFS);
196 ret = check_extent_in_eb(key, eb, fi, *extent_item_pos, &eie);
197 if (ret < 0) 197 if (ret < 0)
198 return ret; 198 return ret;
199 }
200 ret = ulist_add(parents, eb->start, (unsigned long)eie, GFP_NOFS);
201 if (ret < 0)
202 return ret;
203
204 if (level != 0)
205 return 0; 199 return 0;
200 }
206 201
207 /* 202 /*
208 * if the current leaf is full with EXTENT_DATA items, we must 203 * We normally enter this function with the path already pointing to
209 * check the next one if that holds a reference as well. 204 * the first item to check. But sometimes, we may enter it with
210 * ref->count cannot be used to skip this check. 205 * slot==nritems. In that case, go to the next leaf before we continue.
211 * repeat this until we don't find any additional EXTENT_DATA items.
212 */ 206 */
213 while (1) { 207 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0]))
214 eie = NULL; 208 ret = btrfs_next_old_leaf(root, path, time_seq);
215 ret = btrfs_next_leaf(root, path);
216 if (ret < 0)
217 return ret;
218 if (ret)
219 return 0;
220 209
210 while (!ret) {
221 eb = path->nodes[0]; 211 eb = path->nodes[0];
222 for (slot = 0; slot < btrfs_header_nritems(eb); ++slot) { 212 slot = path->slots[0];
223 btrfs_item_key_to_cpu(eb, key, slot); 213
224 if (key->objectid != wanted_objectid || 214 btrfs_item_key_to_cpu(eb, &key, slot);
225 key->type != BTRFS_EXTENT_DATA_KEY) 215
226 return 0; 216 if (key.objectid != key_for_search->objectid ||
227 fi = btrfs_item_ptr(eb, slot, 217 key.type != BTRFS_EXTENT_DATA_KEY)
228 struct btrfs_file_extent_item); 218 break;
229 disk_byte = btrfs_file_extent_disk_bytenr(eb, fi); 219
230 if (disk_byte == wanted_disk_byte) 220 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
231 goto add_parent; 221 disk_byte = btrfs_file_extent_disk_bytenr(eb, fi);
222
223 if (disk_byte == wanted_disk_byte) {
224 eie = NULL;
225 if (extent_item_pos) {
226 ret = check_extent_in_eb(&key, eb, fi,
227 *extent_item_pos,
228 &eie);
229 if (ret < 0)
230 break;
231 }
232 if (!ret) {
233 ret = ulist_add(parents, eb->start,
234 (unsigned long)eie, GFP_NOFS);
235 if (ret < 0)
236 break;
237 if (!extent_item_pos) {
238 ret = btrfs_next_old_leaf(root, path,
239 time_seq);
240 continue;
241 }
242 }
232 } 243 }
244 ret = btrfs_next_old_item(root, path, time_seq);
233 } 245 }
234 246
235 return 0; 247 if (ret > 0)
248 ret = 0;
249 return ret;
236} 250}
237 251
238/* 252/*
@@ -249,7 +263,6 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
249 struct btrfs_path *path; 263 struct btrfs_path *path;
250 struct btrfs_root *root; 264 struct btrfs_root *root;
251 struct btrfs_key root_key; 265 struct btrfs_key root_key;
252 struct btrfs_key key = {0};
253 struct extent_buffer *eb; 266 struct extent_buffer *eb;
254 int ret = 0; 267 int ret = 0;
255 int root_level; 268 int root_level;
@@ -288,25 +301,19 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
288 goto out; 301 goto out;
289 302
290 eb = path->nodes[level]; 303 eb = path->nodes[level];
291 if (!eb) { 304 while (!eb) {
292 WARN_ON(1); 305 if (!level) {
293 ret = 1; 306 WARN_ON(1);
294 goto out; 307 ret = 1;
295 } 308 goto out;
296
297 if (level == 0) {
298 if (ret == 1 && path->slots[0] >= btrfs_header_nritems(eb)) {
299 ret = btrfs_next_leaf(root, path);
300 if (ret)
301 goto out;
302 eb = path->nodes[0];
303 } 309 }
304 310 level--;
305 btrfs_item_key_to_cpu(eb, &key, path->slots[0]); 311 eb = path->nodes[level];
306 } 312 }
307 313
308 ret = add_all_parents(root, path, parents, level, &key, 314 ret = add_all_parents(root, path, parents, level, &ref->key_for_search,
309 ref->wanted_disk_byte, extent_item_pos); 315 time_seq, ref->wanted_disk_byte,
316 extent_item_pos);
310out: 317out:
311 btrfs_free_path(path); 318 btrfs_free_path(path);
312 return ret; 319 return ret;
@@ -832,6 +839,7 @@ again:
832 } 839 }
833 ret = __add_delayed_refs(head, delayed_ref_seq, 840 ret = __add_delayed_refs(head, delayed_ref_seq,
834 &prefs_delayed); 841 &prefs_delayed);
842 mutex_unlock(&head->mutex);
835 if (ret) { 843 if (ret) {
836 spin_unlock(&delayed_refs->lock); 844 spin_unlock(&delayed_refs->lock);
837 goto out; 845 goto out;
@@ -925,8 +933,6 @@ again:
925 } 933 }
926 934
927out: 935out:
928 if (head)
929 mutex_unlock(&head->mutex);
930 btrfs_free_path(path); 936 btrfs_free_path(path);
931 while (!list_empty(&prefs)) { 937 while (!list_empty(&prefs)) {
932 ref = list_first_entry(&prefs, struct __prelim_ref, list); 938 ref = list_first_entry(&prefs, struct __prelim_ref, list);
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index e616f8872e69..12394a90d60f 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -37,6 +37,7 @@
37#define BTRFS_INODE_IN_DEFRAG 3 37#define BTRFS_INODE_IN_DEFRAG 3
38#define BTRFS_INODE_DELALLOC_META_RESERVED 4 38#define BTRFS_INODE_DELALLOC_META_RESERVED 4
39#define BTRFS_INODE_HAS_ORPHAN_ITEM 5 39#define BTRFS_INODE_HAS_ORPHAN_ITEM 5
40#define BTRFS_INODE_HAS_ASYNC_EXTENT 6
40 41
41/* in memory btrfs inode */ 42/* in memory btrfs inode */
42struct btrfs_inode { 43struct btrfs_inode {
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index 9cebb1fd6a3c..da6e9364a5e3 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -93,6 +93,7 @@
93#include "print-tree.h" 93#include "print-tree.h"
94#include "locking.h" 94#include "locking.h"
95#include "check-integrity.h" 95#include "check-integrity.h"
96#include "rcu-string.h"
96 97
97#define BTRFSIC_BLOCK_HASHTABLE_SIZE 0x10000 98#define BTRFSIC_BLOCK_HASHTABLE_SIZE 0x10000
98#define BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE 0x10000 99#define BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE 0x10000
@@ -843,13 +844,14 @@ static int btrfsic_process_superblock_dev_mirror(
843 superblock_tmp->never_written = 0; 844 superblock_tmp->never_written = 0;
844 superblock_tmp->mirror_num = 1 + superblock_mirror_num; 845 superblock_tmp->mirror_num = 1 + superblock_mirror_num;
845 if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE) 846 if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
846 printk(KERN_INFO "New initial S-block (bdev %p, %s)" 847 printk_in_rcu(KERN_INFO "New initial S-block (bdev %p, %s)"
847 " @%llu (%s/%llu/%d)\n", 848 " @%llu (%s/%llu/%d)\n",
848 superblock_bdev, device->name, 849 superblock_bdev,
849 (unsigned long long)dev_bytenr, 850 rcu_str_deref(device->name),
850 dev_state->name, 851 (unsigned long long)dev_bytenr,
851 (unsigned long long)dev_bytenr, 852 dev_state->name,
852 superblock_mirror_num); 853 (unsigned long long)dev_bytenr,
854 superblock_mirror_num);
853 list_add(&superblock_tmp->all_blocks_node, 855 list_add(&superblock_tmp->all_blocks_node,
854 &state->all_blocks_list); 856 &state->all_blocks_list);
855 btrfsic_block_hashtable_add(superblock_tmp, 857 btrfsic_block_hashtable_add(superblock_tmp,
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index d7a96cfdc50a..8206b3900587 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -467,6 +467,15 @@ static inline int tree_mod_dont_log(struct btrfs_fs_info *fs_info,
467 return 0; 467 return 0;
468} 468}
469 469
470/*
471 * This allocates memory and gets a tree modification sequence number when
472 * needed.
473 *
474 * Returns 0 when no sequence number is needed, < 0 on error.
475 * Returns 1 when a sequence number was added. In this case,
476 * fs_info->tree_mod_seq_lock was acquired and must be released by the caller
477 * after inserting into the rb tree.
478 */
470static inline int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags, 479static inline int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags,
471 struct tree_mod_elem **tm_ret) 480 struct tree_mod_elem **tm_ret)
472{ 481{
@@ -491,11 +500,11 @@ static inline int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags,
491 */ 500 */
492 kfree(tm); 501 kfree(tm);
493 seq = 0; 502 seq = 0;
503 spin_unlock(&fs_info->tree_mod_seq_lock);
494 } else { 504 } else {
495 __get_tree_mod_seq(fs_info, &tm->elem); 505 __get_tree_mod_seq(fs_info, &tm->elem);
496 seq = tm->elem.seq; 506 seq = tm->elem.seq;
497 } 507 }
498 spin_unlock(&fs_info->tree_mod_seq_lock);
499 508
500 return seq; 509 return seq;
501} 510}
@@ -521,7 +530,9 @@ tree_mod_log_insert_key_mask(struct btrfs_fs_info *fs_info,
521 tm->slot = slot; 530 tm->slot = slot;
522 tm->generation = btrfs_node_ptr_generation(eb, slot); 531 tm->generation = btrfs_node_ptr_generation(eb, slot);
523 532
524 return __tree_mod_log_insert(fs_info, tm); 533 ret = __tree_mod_log_insert(fs_info, tm);
534 spin_unlock(&fs_info->tree_mod_seq_lock);
535 return ret;
525} 536}
526 537
527static noinline int 538static noinline int
@@ -559,7 +570,9 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
559 tm->move.nr_items = nr_items; 570 tm->move.nr_items = nr_items;
560 tm->op = MOD_LOG_MOVE_KEYS; 571 tm->op = MOD_LOG_MOVE_KEYS;
561 572
562 return __tree_mod_log_insert(fs_info, tm); 573 ret = __tree_mod_log_insert(fs_info, tm);
574 spin_unlock(&fs_info->tree_mod_seq_lock);
575 return ret;
563} 576}
564 577
565static noinline int 578static noinline int
@@ -580,7 +593,9 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
580 tm->generation = btrfs_header_generation(old_root); 593 tm->generation = btrfs_header_generation(old_root);
581 tm->op = MOD_LOG_ROOT_REPLACE; 594 tm->op = MOD_LOG_ROOT_REPLACE;
582 595
583 return __tree_mod_log_insert(fs_info, tm); 596 ret = __tree_mod_log_insert(fs_info, tm);
597 spin_unlock(&fs_info->tree_mod_seq_lock);
598 return ret;
584} 599}
585 600
586static struct tree_mod_elem * 601static struct tree_mod_elem *
@@ -1009,11 +1024,18 @@ __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info,
1009 if (!looped && !tm) 1024 if (!looped && !tm)
1010 return 0; 1025 return 0;
1011 /* 1026 /*
1012 * we must have key remove operations in the log before the 1027 * if there are no tree operation for the oldest root, we simply
1013 * replace operation. 1028 * return it. this should only happen if that (old) root is at
1029 * level 0.
1014 */ 1030 */
1015 BUG_ON(!tm); 1031 if (!tm)
1032 break;
1016 1033
1034 /*
1035 * if there's an operation that's not a root replacement, we
1036 * found the oldest version of our root. normally, we'll find a
1037 * MOD_LOG_KEY_REMOVE_WHILE_FREEING operation here.
1038 */
1017 if (tm->op != MOD_LOG_ROOT_REPLACE) 1039 if (tm->op != MOD_LOG_ROOT_REPLACE)
1018 break; 1040 break;
1019 1041
@@ -1023,6 +1045,10 @@ __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info,
1023 looped = 1; 1045 looped = 1;
1024 } 1046 }
1025 1047
1048 /* if there's no old root to return, return what we found instead */
1049 if (!found)
1050 found = tm;
1051
1026 return found; 1052 return found;
1027} 1053}
1028 1054
@@ -1068,11 +1094,7 @@ __tree_mod_log_rewind(struct extent_buffer *eb, u64 time_seq,
1068 tm->generation); 1094 tm->generation);
1069 break; 1095 break;
1070 case MOD_LOG_KEY_ADD: 1096 case MOD_LOG_KEY_ADD:
1071 if (tm->slot != n - 1) { 1097 /* if a move operation is needed it's in the log */
1072 o_dst = btrfs_node_key_ptr_offset(tm->slot);
1073 o_src = btrfs_node_key_ptr_offset(tm->slot + 1);
1074 memmove_extent_buffer(eb, o_dst, o_src, p_size);
1075 }
1076 n--; 1098 n--;
1077 break; 1099 break;
1078 case MOD_LOG_MOVE_KEYS: 1100 case MOD_LOG_MOVE_KEYS:
@@ -1143,45 +1165,57 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
1143 return eb_rewin; 1165 return eb_rewin;
1144} 1166}
1145 1167
1168/*
1169 * get_old_root() rewinds the state of @root's root node to the given @time_seq
1170 * value. If there are no changes, the current root->root_node is returned. If
1171 * anything changed in between, there's a fresh buffer allocated on which the
1172 * rewind operations are done. In any case, the returned buffer is read locked.
1173 * Returns NULL on error (with no locks held).
1174 */
1146static inline struct extent_buffer * 1175static inline struct extent_buffer *
1147get_old_root(struct btrfs_root *root, u64 time_seq) 1176get_old_root(struct btrfs_root *root, u64 time_seq)
1148{ 1177{
1149 struct tree_mod_elem *tm; 1178 struct tree_mod_elem *tm;
1150 struct extent_buffer *eb; 1179 struct extent_buffer *eb;
1151 struct tree_mod_root *old_root; 1180 struct tree_mod_root *old_root = NULL;
1152 u64 old_generation; 1181 u64 old_generation = 0;
1182 u64 logical;
1153 1183
1184 eb = btrfs_read_lock_root_node(root);
1154 tm = __tree_mod_log_oldest_root(root->fs_info, root, time_seq); 1185 tm = __tree_mod_log_oldest_root(root->fs_info, root, time_seq);
1155 if (!tm) 1186 if (!tm)
1156 return root->node; 1187 return root->node;
1157 1188
1158 old_root = &tm->old_root; 1189 if (tm->op == MOD_LOG_ROOT_REPLACE) {
1159 old_generation = tm->generation; 1190 old_root = &tm->old_root;
1160 1191 old_generation = tm->generation;
1161 tm = tree_mod_log_search(root->fs_info, old_root->logical, time_seq); 1192 logical = old_root->logical;
1162 /* 1193 } else {
1163 * there was an item in the log when __tree_mod_log_oldest_root 1194 logical = root->node->start;
1164 * returned. this one must not go away, because the time_seq passed to 1195 }
1165 * us must be blocking its removal.
1166 */
1167 BUG_ON(!tm);
1168 1196
1169 if (old_root->logical == root->node->start) { 1197 tm = tree_mod_log_search(root->fs_info, logical, time_seq);
1170 /* there are logged operations for the current root */ 1198 if (old_root)
1199 eb = alloc_dummy_extent_buffer(logical, root->nodesize);
1200 else
1171 eb = btrfs_clone_extent_buffer(root->node); 1201 eb = btrfs_clone_extent_buffer(root->node);
1172 } else { 1202 btrfs_tree_read_unlock(root->node);
1173 /* there's a root replace operation for the current root */ 1203 free_extent_buffer(root->node);
1174 eb = alloc_dummy_extent_buffer(tm->index << PAGE_CACHE_SHIFT, 1204 if (!eb)
1175 root->nodesize); 1205 return NULL;
1206 btrfs_tree_read_lock(eb);
1207 if (old_root) {
1176 btrfs_set_header_bytenr(eb, eb->start); 1208 btrfs_set_header_bytenr(eb, eb->start);
1177 btrfs_set_header_backref_rev(eb, BTRFS_MIXED_BACKREF_REV); 1209 btrfs_set_header_backref_rev(eb, BTRFS_MIXED_BACKREF_REV);
1178 btrfs_set_header_owner(eb, root->root_key.objectid); 1210 btrfs_set_header_owner(eb, root->root_key.objectid);
1211 btrfs_set_header_level(eb, old_root->level);
1212 btrfs_set_header_generation(eb, old_generation);
1179 } 1213 }
1180 if (!eb) 1214 if (tm)
1181 return NULL; 1215 __tree_mod_log_rewind(eb, time_seq, tm);
1182 btrfs_set_header_level(eb, old_root->level); 1216 else
1183 btrfs_set_header_generation(eb, old_generation); 1217 WARN_ON(btrfs_header_level(eb) != 0);
1184 __tree_mod_log_rewind(eb, time_seq, tm); 1218 extent_buffer_get(eb);
1185 1219
1186 return eb; 1220 return eb;
1187} 1221}
@@ -1650,8 +1684,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1650 BTRFS_NODEPTRS_PER_BLOCK(root) / 4) 1684 BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
1651 return 0; 1685 return 0;
1652 1686
1653 btrfs_header_nritems(mid);
1654
1655 left = read_node_slot(root, parent, pslot - 1); 1687 left = read_node_slot(root, parent, pslot - 1);
1656 if (left) { 1688 if (left) {
1657 btrfs_tree_lock(left); 1689 btrfs_tree_lock(left);
@@ -1681,7 +1713,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1681 wret = push_node_left(trans, root, left, mid, 1); 1713 wret = push_node_left(trans, root, left, mid, 1);
1682 if (wret < 0) 1714 if (wret < 0)
1683 ret = wret; 1715 ret = wret;
1684 btrfs_header_nritems(mid);
1685 } 1716 }
1686 1717
1687 /* 1718 /*
@@ -2615,9 +2646,7 @@ int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key,
2615 2646
2616again: 2647again:
2617 b = get_old_root(root, time_seq); 2648 b = get_old_root(root, time_seq);
2618 extent_buffer_get(b);
2619 level = btrfs_header_level(b); 2649 level = btrfs_header_level(b);
2620 btrfs_tree_read_lock(b);
2621 p->locks[level] = BTRFS_READ_LOCK; 2650 p->locks[level] = BTRFS_READ_LOCK;
2622 2651
2623 while (b) { 2652 while (b) {
@@ -2964,7 +2993,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
2964static void insert_ptr(struct btrfs_trans_handle *trans, 2993static void insert_ptr(struct btrfs_trans_handle *trans,
2965 struct btrfs_root *root, struct btrfs_path *path, 2994 struct btrfs_root *root, struct btrfs_path *path,
2966 struct btrfs_disk_key *key, u64 bytenr, 2995 struct btrfs_disk_key *key, u64 bytenr,
2967 int slot, int level, int tree_mod_log) 2996 int slot, int level)
2968{ 2997{
2969 struct extent_buffer *lower; 2998 struct extent_buffer *lower;
2970 int nritems; 2999 int nritems;
@@ -2977,7 +3006,7 @@ static void insert_ptr(struct btrfs_trans_handle *trans,
2977 BUG_ON(slot > nritems); 3006 BUG_ON(slot > nritems);
2978 BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(root)); 3007 BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(root));
2979 if (slot != nritems) { 3008 if (slot != nritems) {
2980 if (tree_mod_log && level) 3009 if (level)
2981 tree_mod_log_eb_move(root->fs_info, lower, slot + 1, 3010 tree_mod_log_eb_move(root->fs_info, lower, slot + 1,
2982 slot, nritems - slot); 3011 slot, nritems - slot);
2983 memmove_extent_buffer(lower, 3012 memmove_extent_buffer(lower,
@@ -2985,7 +3014,7 @@ static void insert_ptr(struct btrfs_trans_handle *trans,
2985 btrfs_node_key_ptr_offset(slot), 3014 btrfs_node_key_ptr_offset(slot),
2986 (nritems - slot) * sizeof(struct btrfs_key_ptr)); 3015 (nritems - slot) * sizeof(struct btrfs_key_ptr));
2987 } 3016 }
2988 if (tree_mod_log && level) { 3017 if (level) {
2989 ret = tree_mod_log_insert_key(root->fs_info, lower, slot, 3018 ret = tree_mod_log_insert_key(root->fs_info, lower, slot,
2990 MOD_LOG_KEY_ADD); 3019 MOD_LOG_KEY_ADD);
2991 BUG_ON(ret < 0); 3020 BUG_ON(ret < 0);
@@ -3073,7 +3102,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
3073 btrfs_mark_buffer_dirty(split); 3102 btrfs_mark_buffer_dirty(split);
3074 3103
3075 insert_ptr(trans, root, path, &disk_key, split->start, 3104 insert_ptr(trans, root, path, &disk_key, split->start,
3076 path->slots[level + 1] + 1, level + 1, 1); 3105 path->slots[level + 1] + 1, level + 1);
3077 3106
3078 if (path->slots[level] >= mid) { 3107 if (path->slots[level] >= mid) {
3079 path->slots[level] -= mid; 3108 path->slots[level] -= mid;
@@ -3610,7 +3639,7 @@ static noinline void copy_for_split(struct btrfs_trans_handle *trans,
3610 btrfs_set_header_nritems(l, mid); 3639 btrfs_set_header_nritems(l, mid);
3611 btrfs_item_key(right, &disk_key, 0); 3640 btrfs_item_key(right, &disk_key, 0);
3612 insert_ptr(trans, root, path, &disk_key, right->start, 3641 insert_ptr(trans, root, path, &disk_key, right->start,
3613 path->slots[1] + 1, 1, 0); 3642 path->slots[1] + 1, 1);
3614 3643
3615 btrfs_mark_buffer_dirty(right); 3644 btrfs_mark_buffer_dirty(right);
3616 btrfs_mark_buffer_dirty(l); 3645 btrfs_mark_buffer_dirty(l);
@@ -3817,7 +3846,7 @@ again:
3817 if (mid <= slot) { 3846 if (mid <= slot) {
3818 btrfs_set_header_nritems(right, 0); 3847 btrfs_set_header_nritems(right, 0);
3819 insert_ptr(trans, root, path, &disk_key, right->start, 3848 insert_ptr(trans, root, path, &disk_key, right->start,
3820 path->slots[1] + 1, 1, 0); 3849 path->slots[1] + 1, 1);
3821 btrfs_tree_unlock(path->nodes[0]); 3850 btrfs_tree_unlock(path->nodes[0]);
3822 free_extent_buffer(path->nodes[0]); 3851 free_extent_buffer(path->nodes[0]);
3823 path->nodes[0] = right; 3852 path->nodes[0] = right;
@@ -3826,7 +3855,7 @@ again:
3826 } else { 3855 } else {
3827 btrfs_set_header_nritems(right, 0); 3856 btrfs_set_header_nritems(right, 0);
3828 insert_ptr(trans, root, path, &disk_key, right->start, 3857 insert_ptr(trans, root, path, &disk_key, right->start,
3829 path->slots[1], 1, 0); 3858 path->slots[1], 1);
3830 btrfs_tree_unlock(path->nodes[0]); 3859 btrfs_tree_unlock(path->nodes[0]);
3831 free_extent_buffer(path->nodes[0]); 3860 free_extent_buffer(path->nodes[0]);
3832 path->nodes[0] = right; 3861 path->nodes[0] = right;
@@ -5001,6 +5030,12 @@ next:
5001 */ 5030 */
5002int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) 5031int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
5003{ 5032{
5033 return btrfs_next_old_leaf(root, path, 0);
5034}
5035
5036int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
5037 u64 time_seq)
5038{
5004 int slot; 5039 int slot;
5005 int level; 5040 int level;
5006 struct extent_buffer *c; 5041 struct extent_buffer *c;
@@ -5025,7 +5060,10 @@ again:
5025 path->keep_locks = 1; 5060 path->keep_locks = 1;
5026 path->leave_spinning = 1; 5061 path->leave_spinning = 1;
5027 5062
5028 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 5063 if (time_seq)
5064 ret = btrfs_search_old_slot(root, &key, path, time_seq);
5065 else
5066 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5029 path->keep_locks = 0; 5067 path->keep_locks = 0;
5030 5068
5031 if (ret < 0) 5069 if (ret < 0)
@@ -5081,6 +5119,18 @@ again:
5081 5119
5082 if (!path->skip_locking) { 5120 if (!path->skip_locking) {
5083 ret = btrfs_try_tree_read_lock(next); 5121 ret = btrfs_try_tree_read_lock(next);
5122 if (!ret && time_seq) {
5123 /*
5124 * If we don't get the lock, we may be racing
5125 * with push_leaf_left, holding that lock while
5126 * itself waiting for the leaf we've currently
5127 * locked. To solve this situation, we give up
5128 * on our lock and cycle.
5129 */
5130 btrfs_release_path(path);
5131 cond_resched();
5132 goto again;
5133 }
5084 if (!ret) { 5134 if (!ret) {
5085 btrfs_set_path_blocking(path); 5135 btrfs_set_path_blocking(path);
5086 btrfs_tree_read_lock(next); 5136 btrfs_tree_read_lock(next);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 0236d03c6732..fa5c45b39075 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2753,13 +2753,20 @@ static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans,
2753} 2753}
2754 2754
2755int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); 2755int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path);
2756static inline int btrfs_next_item(struct btrfs_root *root, struct btrfs_path *p) 2756int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
2757 u64 time_seq);
2758static inline int btrfs_next_old_item(struct btrfs_root *root,
2759 struct btrfs_path *p, u64 time_seq)
2757{ 2760{
2758 ++p->slots[0]; 2761 ++p->slots[0];
2759 if (p->slots[0] >= btrfs_header_nritems(p->nodes[0])) 2762 if (p->slots[0] >= btrfs_header_nritems(p->nodes[0]))
2760 return btrfs_next_leaf(root, p); 2763 return btrfs_next_old_leaf(root, p, time_seq);
2761 return 0; 2764 return 0;
2762} 2765}
2766static inline int btrfs_next_item(struct btrfs_root *root, struct btrfs_path *p)
2767{
2768 return btrfs_next_old_item(root, p, 0);
2769}
2763int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path); 2770int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path);
2764int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf); 2771int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf);
2765int __must_check btrfs_drop_snapshot(struct btrfs_root *root, 2772int __must_check btrfs_drop_snapshot(struct btrfs_root *root,
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index c18d0442ae6d..2399f4086915 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1879,3 +1879,21 @@ void btrfs_kill_all_delayed_nodes(struct btrfs_root *root)
1879 } 1879 }
1880 } 1880 }
1881} 1881}
1882
1883void btrfs_destroy_delayed_inodes(struct btrfs_root *root)
1884{
1885 struct btrfs_delayed_root *delayed_root;
1886 struct btrfs_delayed_node *curr_node, *prev_node;
1887
1888 delayed_root = btrfs_get_delayed_root(root);
1889
1890 curr_node = btrfs_first_delayed_node(delayed_root);
1891 while (curr_node) {
1892 __btrfs_kill_delayed_node(curr_node);
1893
1894 prev_node = curr_node;
1895 curr_node = btrfs_next_delayed_node(curr_node);
1896 btrfs_release_delayed_node(prev_node);
1897 }
1898}
1899
diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h
index 7083d08b2a21..f5aa4023d3e1 100644
--- a/fs/btrfs/delayed-inode.h
+++ b/fs/btrfs/delayed-inode.h
@@ -124,6 +124,9 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev);
124/* Used for drop dead root */ 124/* Used for drop dead root */
125void btrfs_kill_all_delayed_nodes(struct btrfs_root *root); 125void btrfs_kill_all_delayed_nodes(struct btrfs_root *root);
126 126
127/* Used for clean the transaction */
128void btrfs_destroy_delayed_inodes(struct btrfs_root *root);
129
127/* Used for readdir() */ 130/* Used for readdir() */
128void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list, 131void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list,
129 struct list_head *del_list); 132 struct list_head *del_list);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 7ae51decf6d3..2936ca49b3b4 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -44,6 +44,7 @@
44#include "free-space-cache.h" 44#include "free-space-cache.h"
45#include "inode-map.h" 45#include "inode-map.h"
46#include "check-integrity.h" 46#include "check-integrity.h"
47#include "rcu-string.h"
47 48
48static struct extent_io_ops btree_extent_io_ops; 49static struct extent_io_ops btree_extent_io_ops;
49static void end_workqueue_fn(struct btrfs_work *work); 50static void end_workqueue_fn(struct btrfs_work *work);
@@ -2118,7 +2119,7 @@ int open_ctree(struct super_block *sb,
2118 2119
2119 features = btrfs_super_incompat_flags(disk_super); 2120 features = btrfs_super_incompat_flags(disk_super);
2120 features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF; 2121 features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF;
2121 if (tree_root->fs_info->compress_type & BTRFS_COMPRESS_LZO) 2122 if (tree_root->fs_info->compress_type == BTRFS_COMPRESS_LZO)
2122 features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; 2123 features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO;
2123 2124
2124 /* 2125 /*
@@ -2353,12 +2354,17 @@ retry_root_backup:
2353 BTRFS_CSUM_TREE_OBJECTID, csum_root); 2354 BTRFS_CSUM_TREE_OBJECTID, csum_root);
2354 if (ret) 2355 if (ret)
2355 goto recovery_tree_root; 2356 goto recovery_tree_root;
2356
2357 csum_root->track_dirty = 1; 2357 csum_root->track_dirty = 1;
2358 2358
2359 fs_info->generation = generation; 2359 fs_info->generation = generation;
2360 fs_info->last_trans_committed = generation; 2360 fs_info->last_trans_committed = generation;
2361 2361
2362 ret = btrfs_recover_balance(fs_info);
2363 if (ret) {
2364 printk(KERN_WARNING "btrfs: failed to recover balance\n");
2365 goto fail_block_groups;
2366 }
2367
2362 ret = btrfs_init_dev_stats(fs_info); 2368 ret = btrfs_init_dev_stats(fs_info);
2363 if (ret) { 2369 if (ret) {
2364 printk(KERN_ERR "btrfs: failed to init dev_stats: %d\n", 2370 printk(KERN_ERR "btrfs: failed to init dev_stats: %d\n",
@@ -2484,20 +2490,23 @@ retry_root_backup:
2484 goto fail_trans_kthread; 2490 goto fail_trans_kthread;
2485 } 2491 }
2486 2492
2487 if (!(sb->s_flags & MS_RDONLY)) { 2493 if (sb->s_flags & MS_RDONLY)
2488 down_read(&fs_info->cleanup_work_sem); 2494 return 0;
2489 err = btrfs_orphan_cleanup(fs_info->fs_root);
2490 if (!err)
2491 err = btrfs_orphan_cleanup(fs_info->tree_root);
2492 up_read(&fs_info->cleanup_work_sem);
2493 2495
2494 if (!err) 2496 down_read(&fs_info->cleanup_work_sem);
2495 err = btrfs_recover_balance(fs_info->tree_root); 2497 if ((ret = btrfs_orphan_cleanup(fs_info->fs_root)) ||
2498 (ret = btrfs_orphan_cleanup(fs_info->tree_root))) {
2499 up_read(&fs_info->cleanup_work_sem);
2500 close_ctree(tree_root);
2501 return ret;
2502 }
2503 up_read(&fs_info->cleanup_work_sem);
2496 2504
2497 if (err) { 2505 ret = btrfs_resume_balance_async(fs_info);
2498 close_ctree(tree_root); 2506 if (ret) {
2499 return err; 2507 printk(KERN_WARNING "btrfs: failed to resume balance\n");
2500 } 2508 close_ctree(tree_root);
2509 return ret;
2501 } 2510 }
2502 2511
2503 return 0; 2512 return 0;
@@ -2575,8 +2584,9 @@ static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate)
2575 struct btrfs_device *device = (struct btrfs_device *) 2584 struct btrfs_device *device = (struct btrfs_device *)
2576 bh->b_private; 2585 bh->b_private;
2577 2586
2578 printk_ratelimited(KERN_WARNING "lost page write due to " 2587 printk_ratelimited_in_rcu(KERN_WARNING "lost page write due to "
2579 "I/O error on %s\n", device->name); 2588 "I/O error on %s\n",
2589 rcu_str_deref(device->name));
2580 /* note, we dont' set_buffer_write_io_error because we have 2590 /* note, we dont' set_buffer_write_io_error because we have
2581 * our own ways of dealing with the IO errors 2591 * our own ways of dealing with the IO errors
2582 */ 2592 */
@@ -2749,8 +2759,8 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
2749 wait_for_completion(&device->flush_wait); 2759 wait_for_completion(&device->flush_wait);
2750 2760
2751 if (bio_flagged(bio, BIO_EOPNOTSUPP)) { 2761 if (bio_flagged(bio, BIO_EOPNOTSUPP)) {
2752 printk("btrfs: disabling barriers on dev %s\n", 2762 printk_in_rcu("btrfs: disabling barriers on dev %s\n",
2753 device->name); 2763 rcu_str_deref(device->name));
2754 device->nobarriers = 1; 2764 device->nobarriers = 1;
2755 } 2765 }
2756 if (!bio_flagged(bio, BIO_UPTODATE)) { 2766 if (!bio_flagged(bio, BIO_UPTODATE)) {
@@ -3400,7 +3410,6 @@ int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
3400 3410
3401 delayed_refs = &trans->delayed_refs; 3411 delayed_refs = &trans->delayed_refs;
3402 3412
3403again:
3404 spin_lock(&delayed_refs->lock); 3413 spin_lock(&delayed_refs->lock);
3405 if (delayed_refs->num_entries == 0) { 3414 if (delayed_refs->num_entries == 0) {
3406 spin_unlock(&delayed_refs->lock); 3415 spin_unlock(&delayed_refs->lock);
@@ -3408,31 +3417,37 @@ again:
3408 return ret; 3417 return ret;
3409 } 3418 }
3410 3419
3411 node = rb_first(&delayed_refs->root); 3420 while ((node = rb_first(&delayed_refs->root)) != NULL) {
3412 while (node) {
3413 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); 3421 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
3414 node = rb_next(node);
3415
3416 ref->in_tree = 0;
3417 rb_erase(&ref->rb_node, &delayed_refs->root);
3418 delayed_refs->num_entries--;
3419 3422
3420 atomic_set(&ref->refs, 1); 3423 atomic_set(&ref->refs, 1);
3421 if (btrfs_delayed_ref_is_head(ref)) { 3424 if (btrfs_delayed_ref_is_head(ref)) {
3422 struct btrfs_delayed_ref_head *head; 3425 struct btrfs_delayed_ref_head *head;
3423 3426
3424 head = btrfs_delayed_node_to_head(ref); 3427 head = btrfs_delayed_node_to_head(ref);
3425 spin_unlock(&delayed_refs->lock); 3428 if (!mutex_trylock(&head->mutex)) {
3426 mutex_lock(&head->mutex); 3429 atomic_inc(&ref->refs);
3430 spin_unlock(&delayed_refs->lock);
3431
3432 /* Need to wait for the delayed ref to run */
3433 mutex_lock(&head->mutex);
3434 mutex_unlock(&head->mutex);
3435 btrfs_put_delayed_ref(ref);
3436
3437 spin_lock(&delayed_refs->lock);
3438 continue;
3439 }
3440
3427 kfree(head->extent_op); 3441 kfree(head->extent_op);
3428 delayed_refs->num_heads--; 3442 delayed_refs->num_heads--;
3429 if (list_empty(&head->cluster)) 3443 if (list_empty(&head->cluster))
3430 delayed_refs->num_heads_ready--; 3444 delayed_refs->num_heads_ready--;
3431 list_del_init(&head->cluster); 3445 list_del_init(&head->cluster);
3432 mutex_unlock(&head->mutex);
3433 btrfs_put_delayed_ref(ref);
3434 goto again;
3435 } 3446 }
3447 ref->in_tree = 0;
3448 rb_erase(&ref->rb_node, &delayed_refs->root);
3449 delayed_refs->num_entries--;
3450
3436 spin_unlock(&delayed_refs->lock); 3451 spin_unlock(&delayed_refs->lock);
3437 btrfs_put_delayed_ref(ref); 3452 btrfs_put_delayed_ref(ref);
3438 3453
@@ -3520,11 +3535,9 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root,
3520 &(&BTRFS_I(page->mapping->host)->io_tree)->buffer, 3535 &(&BTRFS_I(page->mapping->host)->io_tree)->buffer,
3521 offset >> PAGE_CACHE_SHIFT); 3536 offset >> PAGE_CACHE_SHIFT);
3522 spin_unlock(&dirty_pages->buffer_lock); 3537 spin_unlock(&dirty_pages->buffer_lock);
3523 if (eb) { 3538 if (eb)
3524 ret = test_and_clear_bit(EXTENT_BUFFER_DIRTY, 3539 ret = test_and_clear_bit(EXTENT_BUFFER_DIRTY,
3525 &eb->bflags); 3540 &eb->bflags);
3526 atomic_set(&eb->refs, 1);
3527 }
3528 if (PageWriteback(page)) 3541 if (PageWriteback(page))
3529 end_page_writeback(page); 3542 end_page_writeback(page);
3530 3543
@@ -3538,8 +3551,8 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root,
3538 spin_unlock_irq(&page->mapping->tree_lock); 3551 spin_unlock_irq(&page->mapping->tree_lock);
3539 } 3552 }
3540 3553
3541 page->mapping->a_ops->invalidatepage(page, 0);
3542 unlock_page(page); 3554 unlock_page(page);
3555 page_cache_release(page);
3543 } 3556 }
3544 } 3557 }
3545 3558
@@ -3553,8 +3566,10 @@ static int btrfs_destroy_pinned_extent(struct btrfs_root *root,
3553 u64 start; 3566 u64 start;
3554 u64 end; 3567 u64 end;
3555 int ret; 3568 int ret;
3569 bool loop = true;
3556 3570
3557 unpin = pinned_extents; 3571 unpin = pinned_extents;
3572again:
3558 while (1) { 3573 while (1) {
3559 ret = find_first_extent_bit(unpin, 0, &start, &end, 3574 ret = find_first_extent_bit(unpin, 0, &start, &end,
3560 EXTENT_DIRTY); 3575 EXTENT_DIRTY);
@@ -3572,6 +3587,15 @@ static int btrfs_destroy_pinned_extent(struct btrfs_root *root,
3572 cond_resched(); 3587 cond_resched();
3573 } 3588 }
3574 3589
3590 if (loop) {
3591 if (unpin == &root->fs_info->freed_extents[0])
3592 unpin = &root->fs_info->freed_extents[1];
3593 else
3594 unpin = &root->fs_info->freed_extents[0];
3595 loop = false;
3596 goto again;
3597 }
3598
3575 return 0; 3599 return 0;
3576} 3600}
3577 3601
@@ -3585,21 +3609,23 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
3585 /* FIXME: cleanup wait for commit */ 3609 /* FIXME: cleanup wait for commit */
3586 cur_trans->in_commit = 1; 3610 cur_trans->in_commit = 1;
3587 cur_trans->blocked = 1; 3611 cur_trans->blocked = 1;
3588 if (waitqueue_active(&root->fs_info->transaction_blocked_wait)) 3612 wake_up(&root->fs_info->transaction_blocked_wait);
3589 wake_up(&root->fs_info->transaction_blocked_wait);
3590 3613
3591 cur_trans->blocked = 0; 3614 cur_trans->blocked = 0;
3592 if (waitqueue_active(&root->fs_info->transaction_wait)) 3615 wake_up(&root->fs_info->transaction_wait);
3593 wake_up(&root->fs_info->transaction_wait);
3594 3616
3595 cur_trans->commit_done = 1; 3617 cur_trans->commit_done = 1;
3596 if (waitqueue_active(&cur_trans->commit_wait)) 3618 wake_up(&cur_trans->commit_wait);
3597 wake_up(&cur_trans->commit_wait); 3619
3620 btrfs_destroy_delayed_inodes(root);
3621 btrfs_assert_delayed_root_empty(root);
3598 3622
3599 btrfs_destroy_pending_snapshots(cur_trans); 3623 btrfs_destroy_pending_snapshots(cur_trans);
3600 3624
3601 btrfs_destroy_marked_extents(root, &cur_trans->dirty_pages, 3625 btrfs_destroy_marked_extents(root, &cur_trans->dirty_pages,
3602 EXTENT_DIRTY); 3626 EXTENT_DIRTY);
3627 btrfs_destroy_pinned_extent(root,
3628 root->fs_info->pinned_extents);
3603 3629
3604 /* 3630 /*
3605 memset(cur_trans, 0, sizeof(*cur_trans)); 3631 memset(cur_trans, 0, sizeof(*cur_trans));
@@ -3648,6 +3674,9 @@ int btrfs_cleanup_transaction(struct btrfs_root *root)
3648 if (waitqueue_active(&t->commit_wait)) 3674 if (waitqueue_active(&t->commit_wait))
3649 wake_up(&t->commit_wait); 3675 wake_up(&t->commit_wait);
3650 3676
3677 btrfs_destroy_delayed_inodes(root);
3678 btrfs_assert_delayed_root_empty(root);
3679
3651 btrfs_destroy_pending_snapshots(t); 3680 btrfs_destroy_pending_snapshots(t);
3652 3681
3653 btrfs_destroy_delalloc_inodes(root); 3682 btrfs_destroy_delalloc_inodes(root);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 4b5a1e1bdefb..6e1d36702ff7 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2347,12 +2347,10 @@ next:
2347 return count; 2347 return count;
2348} 2348}
2349 2349
2350
2351static void wait_for_more_refs(struct btrfs_delayed_ref_root *delayed_refs, 2350static void wait_for_more_refs(struct btrfs_delayed_ref_root *delayed_refs,
2352 unsigned long num_refs) 2351 unsigned long num_refs,
2352 struct list_head *first_seq)
2353{ 2353{
2354 struct list_head *first_seq = delayed_refs->seq_head.next;
2355
2356 spin_unlock(&delayed_refs->lock); 2354 spin_unlock(&delayed_refs->lock);
2357 pr_debug("waiting for more refs (num %ld, first %p)\n", 2355 pr_debug("waiting for more refs (num %ld, first %p)\n",
2358 num_refs, first_seq); 2356 num_refs, first_seq);
@@ -2381,6 +2379,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2381 struct btrfs_delayed_ref_root *delayed_refs; 2379 struct btrfs_delayed_ref_root *delayed_refs;
2382 struct btrfs_delayed_ref_node *ref; 2380 struct btrfs_delayed_ref_node *ref;
2383 struct list_head cluster; 2381 struct list_head cluster;
2382 struct list_head *first_seq = NULL;
2384 int ret; 2383 int ret;
2385 u64 delayed_start; 2384 u64 delayed_start;
2386 int run_all = count == (unsigned long)-1; 2385 int run_all = count == (unsigned long)-1;
@@ -2436,8 +2435,10 @@ again:
2436 */ 2435 */
2437 consider_waiting = 1; 2436 consider_waiting = 1;
2438 num_refs = delayed_refs->num_entries; 2437 num_refs = delayed_refs->num_entries;
2438 first_seq = root->fs_info->tree_mod_seq_list.next;
2439 } else { 2439 } else {
2440 wait_for_more_refs(delayed_refs, num_refs); 2440 wait_for_more_refs(delayed_refs,
2441 num_refs, first_seq);
2441 /* 2442 /*
2442 * after waiting, things have changed. we 2443 * after waiting, things have changed. we
2443 * dropped the lock and someone else might have 2444 * dropped the lock and someone else might have
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 2c8f7b204617..01c21b6c6d43 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -20,6 +20,7 @@
20#include "volumes.h" 20#include "volumes.h"
21#include "check-integrity.h" 21#include "check-integrity.h"
22#include "locking.h" 22#include "locking.h"
23#include "rcu-string.h"
23 24
24static struct kmem_cache *extent_state_cache; 25static struct kmem_cache *extent_state_cache;
25static struct kmem_cache *extent_buffer_cache; 26static struct kmem_cache *extent_buffer_cache;
@@ -1917,9 +1918,9 @@ int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start,
1917 return -EIO; 1918 return -EIO;
1918 } 1919 }
1919 1920
1920 printk(KERN_INFO "btrfs read error corrected: ino %lu off %llu (dev %s " 1921 printk_in_rcu(KERN_INFO "btrfs read error corrected: ino %lu off %llu "
1921 "sector %llu)\n", page->mapping->host->i_ino, start, 1922 "(dev %s sector %llu)\n", page->mapping->host->i_ino,
1922 dev->name, sector); 1923 start, rcu_str_deref(dev->name), sector);
1923 1924
1924 bio_put(bio); 1925 bio_put(bio);
1925 return 0; 1926 return 0;
@@ -3323,6 +3324,7 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
3323 writepage_t writepage, void *data, 3324 writepage_t writepage, void *data,
3324 void (*flush_fn)(void *)) 3325 void (*flush_fn)(void *))
3325{ 3326{
3327 struct inode *inode = mapping->host;
3326 int ret = 0; 3328 int ret = 0;
3327 int done = 0; 3329 int done = 0;
3328 int nr_to_write_done = 0; 3330 int nr_to_write_done = 0;
@@ -3333,6 +3335,18 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
3333 int scanned = 0; 3335 int scanned = 0;
3334 int tag; 3336 int tag;
3335 3337
3338 /*
3339 * We have to hold onto the inode so that ordered extents can do their
3340 * work when the IO finishes. The alternative to this is failing to add
3341 * an ordered extent if the igrab() fails there and that is a huge pain
3342 * to deal with, so instead just hold onto the inode throughout the
3343 * writepages operation. If it fails here we are freeing up the inode
3344 * anyway and we'd rather not waste our time writing out stuff that is
3345 * going to be truncated anyway.
3346 */
3347 if (!igrab(inode))
3348 return 0;
3349
3336 pagevec_init(&pvec, 0); 3350 pagevec_init(&pvec, 0);
3337 if (wbc->range_cyclic) { 3351 if (wbc->range_cyclic) {
3338 index = mapping->writeback_index; /* Start from prev offset */ 3352 index = mapping->writeback_index; /* Start from prev offset */
@@ -3427,6 +3441,7 @@ retry:
3427 index = 0; 3441 index = 0;
3428 goto retry; 3442 goto retry;
3429 } 3443 }
3444 btrfs_add_delayed_iput(inode);
3430 return ret; 3445 return ret;
3431} 3446}
3432 3447
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 70dc8ca73e25..9aa01ec2138d 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1334,7 +1334,6 @@ static ssize_t __btrfs_direct_write(struct kiocb *iocb,
1334 loff_t *ppos, size_t count, size_t ocount) 1334 loff_t *ppos, size_t count, size_t ocount)
1335{ 1335{
1336 struct file *file = iocb->ki_filp; 1336 struct file *file = iocb->ki_filp;
1337 struct inode *inode = fdentry(file)->d_inode;
1338 struct iov_iter i; 1337 struct iov_iter i;
1339 ssize_t written; 1338 ssize_t written;
1340 ssize_t written_buffered; 1339 ssize_t written_buffered;
@@ -1344,18 +1343,6 @@ static ssize_t __btrfs_direct_write(struct kiocb *iocb,
1344 written = generic_file_direct_write(iocb, iov, &nr_segs, pos, ppos, 1343 written = generic_file_direct_write(iocb, iov, &nr_segs, pos, ppos,
1345 count, ocount); 1344 count, ocount);
1346 1345
1347 /*
1348 * the generic O_DIRECT will update in-memory i_size after the
1349 * DIOs are done. But our endio handlers that update the on
1350 * disk i_size never update past the in memory i_size. So we
1351 * need one more update here to catch any additions to the
1352 * file
1353 */
1354 if (inode->i_size != BTRFS_I(inode)->disk_i_size) {
1355 btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
1356 mark_inode_dirty(inode);
1357 }
1358
1359 if (written < 0 || written == count) 1346 if (written < 0 || written == count)
1360 return written; 1347 return written;
1361 1348
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 81296c57405a..6c4e2baa9290 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -1543,29 +1543,26 @@ again:
1543 end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1; 1543 end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1;
1544 1544
1545 /* 1545 /*
1546 * XXX - this can go away after a few releases. 1546 * We need to search for bits in this bitmap. We could only cover some
1547 * 1547 * of the extent in this bitmap thanks to how we add space, so we need
1548 * since the only user of btrfs_remove_free_space is the tree logging 1548 * to search for as much as it as we can and clear that amount, and then
1549 * stuff, and the only way to test that is under crash conditions, we 1549 * go searching for the next bit.
1550 * want to have this debug stuff here just in case somethings not
1551 * working. Search the bitmap for the space we are trying to use to
1552 * make sure its actually there. If its not there then we need to stop
1553 * because something has gone wrong.
1554 */ 1550 */
1555 search_start = *offset; 1551 search_start = *offset;
1556 search_bytes = *bytes; 1552 search_bytes = ctl->unit;
1557 search_bytes = min(search_bytes, end - search_start + 1); 1553 search_bytes = min(search_bytes, end - search_start + 1);
1558 ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes); 1554 ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes);
1559 BUG_ON(ret < 0 || search_start != *offset); 1555 BUG_ON(ret < 0 || search_start != *offset);
1560 1556
1561 if (*offset > bitmap_info->offset && *offset + *bytes > end) { 1557 /* We may have found more bits than what we need */
1562 bitmap_clear_bits(ctl, bitmap_info, *offset, end - *offset + 1); 1558 search_bytes = min(search_bytes, *bytes);
1563 *bytes -= end - *offset + 1; 1559
1564 *offset = end + 1; 1560 /* Cannot clear past the end of the bitmap */
1565 } else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) { 1561 search_bytes = min(search_bytes, end - search_start + 1);
1566 bitmap_clear_bits(ctl, bitmap_info, *offset, *bytes); 1562
1567 *bytes = 0; 1563 bitmap_clear_bits(ctl, bitmap_info, search_start, search_bytes);
1568 } 1564 *offset += search_bytes;
1565 *bytes -= search_bytes;
1569 1566
1570 if (*bytes) { 1567 if (*bytes) {
1571 struct rb_node *next = rb_next(&bitmap_info->offset_index); 1568 struct rb_node *next = rb_next(&bitmap_info->offset_index);
@@ -1596,7 +1593,7 @@ again:
1596 * everything over again. 1593 * everything over again.
1597 */ 1594 */
1598 search_start = *offset; 1595 search_start = *offset;
1599 search_bytes = *bytes; 1596 search_bytes = ctl->unit;
1600 ret = search_bitmap(ctl, bitmap_info, &search_start, 1597 ret = search_bitmap(ctl, bitmap_info, &search_start,
1601 &search_bytes); 1598 &search_bytes);
1602 if (ret < 0 || search_start != *offset) 1599 if (ret < 0 || search_start != *offset)
@@ -1879,12 +1876,14 @@ int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
1879{ 1876{
1880 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 1877 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1881 struct btrfs_free_space *info; 1878 struct btrfs_free_space *info;
1882 struct btrfs_free_space *next_info = NULL;
1883 int ret = 0; 1879 int ret = 0;
1884 1880
1885 spin_lock(&ctl->tree_lock); 1881 spin_lock(&ctl->tree_lock);
1886 1882
1887again: 1883again:
1884 if (!bytes)
1885 goto out_lock;
1886
1888 info = tree_search_offset(ctl, offset, 0, 0); 1887 info = tree_search_offset(ctl, offset, 0, 0);
1889 if (!info) { 1888 if (!info) {
1890 /* 1889 /*
@@ -1905,88 +1904,48 @@ again:
1905 } 1904 }
1906 } 1905 }
1907 1906
1908 if (info->bytes < bytes && rb_next(&info->offset_index)) { 1907 if (!info->bitmap) {
1909 u64 end;
1910 next_info = rb_entry(rb_next(&info->offset_index),
1911 struct btrfs_free_space,
1912 offset_index);
1913
1914 if (next_info->bitmap)
1915 end = next_info->offset +
1916 BITS_PER_BITMAP * ctl->unit - 1;
1917 else
1918 end = next_info->offset + next_info->bytes;
1919
1920 if (next_info->bytes < bytes ||
1921 next_info->offset > offset || offset > end) {
1922 printk(KERN_CRIT "Found free space at %llu, size %llu,"
1923 " trying to use %llu\n",
1924 (unsigned long long)info->offset,
1925 (unsigned long long)info->bytes,
1926 (unsigned long long)bytes);
1927 WARN_ON(1);
1928 ret = -EINVAL;
1929 goto out_lock;
1930 }
1931
1932 info = next_info;
1933 }
1934
1935 if (info->bytes == bytes) {
1936 unlink_free_space(ctl, info); 1908 unlink_free_space(ctl, info);
1937 if (info->bitmap) { 1909 if (offset == info->offset) {
1938 kfree(info->bitmap); 1910 u64 to_free = min(bytes, info->bytes);
1939 ctl->total_bitmaps--; 1911
1940 } 1912 info->bytes -= to_free;
1941 kmem_cache_free(btrfs_free_space_cachep, info); 1913 info->offset += to_free;
1942 ret = 0; 1914 if (info->bytes) {
1943 goto out_lock; 1915 ret = link_free_space(ctl, info);
1944 } 1916 WARN_ON(ret);
1945 1917 } else {
1946 if (!info->bitmap && info->offset == offset) { 1918 kmem_cache_free(btrfs_free_space_cachep, info);
1947 unlink_free_space(ctl, info); 1919 }
1948 info->offset += bytes;
1949 info->bytes -= bytes;
1950 ret = link_free_space(ctl, info);
1951 WARN_ON(ret);
1952 goto out_lock;
1953 }
1954 1920
1955 if (!info->bitmap && info->offset <= offset && 1921 offset += to_free;
1956 info->offset + info->bytes >= offset + bytes) { 1922 bytes -= to_free;
1957 u64 old_start = info->offset; 1923 goto again;
1958 /* 1924 } else {
1959 * we're freeing space in the middle of the info, 1925 u64 old_end = info->bytes + info->offset;
1960 * this can happen during tree log replay
1961 *
1962 * first unlink the old info and then
1963 * insert it again after the hole we're creating
1964 */
1965 unlink_free_space(ctl, info);
1966 if (offset + bytes < info->offset + info->bytes) {
1967 u64 old_end = info->offset + info->bytes;
1968 1926
1969 info->offset = offset + bytes; 1927 info->bytes = offset - info->offset;
1970 info->bytes = old_end - info->offset;
1971 ret = link_free_space(ctl, info); 1928 ret = link_free_space(ctl, info);
1972 WARN_ON(ret); 1929 WARN_ON(ret);
1973 if (ret) 1930 if (ret)
1974 goto out_lock; 1931 goto out_lock;
1975 } else {
1976 /* the hole we're creating ends at the end
1977 * of the info struct, just free the info
1978 */
1979 kmem_cache_free(btrfs_free_space_cachep, info);
1980 }
1981 spin_unlock(&ctl->tree_lock);
1982 1932
1983 /* step two, insert a new info struct to cover 1933 /* Not enough bytes in this entry to satisfy us */
1984 * anything before the hole 1934 if (old_end < offset + bytes) {
1985 */ 1935 bytes -= old_end - offset;
1986 ret = btrfs_add_free_space(block_group, old_start, 1936 offset = old_end;
1987 offset - old_start); 1937 goto again;
1988 WARN_ON(ret); /* -ENOMEM */ 1938 } else if (old_end == offset + bytes) {
1989 goto out; 1939 /* all done */
1940 goto out_lock;
1941 }
1942 spin_unlock(&ctl->tree_lock);
1943
1944 ret = btrfs_add_free_space(block_group, offset + bytes,
1945 old_end - (offset + bytes));
1946 WARN_ON(ret);
1947 goto out;
1948 }
1990 } 1949 }
1991 1950
1992 ret = remove_from_bitmap(ctl, info, &offset, &bytes); 1951 ret = remove_from_bitmap(ctl, info, &offset, &bytes);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index f6ab6f5e635a..a7d1921ac76b 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -830,7 +830,7 @@ static noinline int cow_file_range(struct inode *inode,
830 if (IS_ERR(trans)) { 830 if (IS_ERR(trans)) {
831 extent_clear_unlock_delalloc(inode, 831 extent_clear_unlock_delalloc(inode,
832 &BTRFS_I(inode)->io_tree, 832 &BTRFS_I(inode)->io_tree,
833 start, end, NULL, 833 start, end, locked_page,
834 EXTENT_CLEAR_UNLOCK_PAGE | 834 EXTENT_CLEAR_UNLOCK_PAGE |
835 EXTENT_CLEAR_UNLOCK | 835 EXTENT_CLEAR_UNLOCK |
836 EXTENT_CLEAR_DELALLOC | 836 EXTENT_CLEAR_DELALLOC |
@@ -963,7 +963,7 @@ out:
963out_unlock: 963out_unlock:
964 extent_clear_unlock_delalloc(inode, 964 extent_clear_unlock_delalloc(inode,
965 &BTRFS_I(inode)->io_tree, 965 &BTRFS_I(inode)->io_tree,
966 start, end, NULL, 966 start, end, locked_page,
967 EXTENT_CLEAR_UNLOCK_PAGE | 967 EXTENT_CLEAR_UNLOCK_PAGE |
968 EXTENT_CLEAR_UNLOCK | 968 EXTENT_CLEAR_UNLOCK |
969 EXTENT_CLEAR_DELALLOC | 969 EXTENT_CLEAR_DELALLOC |
@@ -986,8 +986,10 @@ static noinline void async_cow_start(struct btrfs_work *work)
986 compress_file_range(async_cow->inode, async_cow->locked_page, 986 compress_file_range(async_cow->inode, async_cow->locked_page,
987 async_cow->start, async_cow->end, async_cow, 987 async_cow->start, async_cow->end, async_cow,
988 &num_added); 988 &num_added);
989 if (num_added == 0) 989 if (num_added == 0) {
990 btrfs_add_delayed_iput(async_cow->inode);
990 async_cow->inode = NULL; 991 async_cow->inode = NULL;
992 }
991} 993}
992 994
993/* 995/*
@@ -1020,6 +1022,8 @@ static noinline void async_cow_free(struct btrfs_work *work)
1020{ 1022{
1021 struct async_cow *async_cow; 1023 struct async_cow *async_cow;
1022 async_cow = container_of(work, struct async_cow, work); 1024 async_cow = container_of(work, struct async_cow, work);
1025 if (async_cow->inode)
1026 btrfs_add_delayed_iput(async_cow->inode);
1023 kfree(async_cow); 1027 kfree(async_cow);
1024} 1028}
1025 1029
@@ -1038,7 +1042,7 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
1038 while (start < end) { 1042 while (start < end) {
1039 async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS); 1043 async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS);
1040 BUG_ON(!async_cow); /* -ENOMEM */ 1044 BUG_ON(!async_cow); /* -ENOMEM */
1041 async_cow->inode = inode; 1045 async_cow->inode = igrab(inode);
1042 async_cow->root = root; 1046 async_cow->root = root;
1043 async_cow->locked_page = locked_page; 1047 async_cow->locked_page = locked_page;
1044 async_cow->start = start; 1048 async_cow->start = start;
@@ -1136,8 +1140,18 @@ static noinline int run_delalloc_nocow(struct inode *inode,
1136 u64 ino = btrfs_ino(inode); 1140 u64 ino = btrfs_ino(inode);
1137 1141
1138 path = btrfs_alloc_path(); 1142 path = btrfs_alloc_path();
1139 if (!path) 1143 if (!path) {
1144 extent_clear_unlock_delalloc(inode,
1145 &BTRFS_I(inode)->io_tree,
1146 start, end, locked_page,
1147 EXTENT_CLEAR_UNLOCK_PAGE |
1148 EXTENT_CLEAR_UNLOCK |
1149 EXTENT_CLEAR_DELALLOC |
1150 EXTENT_CLEAR_DIRTY |
1151 EXTENT_SET_WRITEBACK |
1152 EXTENT_END_WRITEBACK);
1140 return -ENOMEM; 1153 return -ENOMEM;
1154 }
1141 1155
1142 nolock = btrfs_is_free_space_inode(root, inode); 1156 nolock = btrfs_is_free_space_inode(root, inode);
1143 1157
@@ -1147,6 +1161,15 @@ static noinline int run_delalloc_nocow(struct inode *inode,
1147 trans = btrfs_join_transaction(root); 1161 trans = btrfs_join_transaction(root);
1148 1162
1149 if (IS_ERR(trans)) { 1163 if (IS_ERR(trans)) {
1164 extent_clear_unlock_delalloc(inode,
1165 &BTRFS_I(inode)->io_tree,
1166 start, end, locked_page,
1167 EXTENT_CLEAR_UNLOCK_PAGE |
1168 EXTENT_CLEAR_UNLOCK |
1169 EXTENT_CLEAR_DELALLOC |
1170 EXTENT_CLEAR_DIRTY |
1171 EXTENT_SET_WRITEBACK |
1172 EXTENT_END_WRITEBACK);
1150 btrfs_free_path(path); 1173 btrfs_free_path(path);
1151 return PTR_ERR(trans); 1174 return PTR_ERR(trans);
1152 } 1175 }
@@ -1327,8 +1350,11 @@ out_check:
1327 } 1350 }
1328 btrfs_release_path(path); 1351 btrfs_release_path(path);
1329 1352
1330 if (cur_offset <= end && cow_start == (u64)-1) 1353 if (cur_offset <= end && cow_start == (u64)-1) {
1331 cow_start = cur_offset; 1354 cow_start = cur_offset;
1355 cur_offset = end;
1356 }
1357
1332 if (cow_start != (u64)-1) { 1358 if (cow_start != (u64)-1) {
1333 ret = cow_file_range(inode, locked_page, cow_start, end, 1359 ret = cow_file_range(inode, locked_page, cow_start, end,
1334 page_started, nr_written, 1); 1360 page_started, nr_written, 1);
@@ -1347,6 +1373,17 @@ error:
1347 if (!ret) 1373 if (!ret)
1348 ret = err; 1374 ret = err;
1349 1375
1376 if (ret && cur_offset < end)
1377 extent_clear_unlock_delalloc(inode,
1378 &BTRFS_I(inode)->io_tree,
1379 cur_offset, end, locked_page,
1380 EXTENT_CLEAR_UNLOCK_PAGE |
1381 EXTENT_CLEAR_UNLOCK |
1382 EXTENT_CLEAR_DELALLOC |
1383 EXTENT_CLEAR_DIRTY |
1384 EXTENT_SET_WRITEBACK |
1385 EXTENT_END_WRITEBACK);
1386
1350 btrfs_free_path(path); 1387 btrfs_free_path(path);
1351 return ret; 1388 return ret;
1352} 1389}
@@ -1361,20 +1398,23 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page,
1361 int ret; 1398 int ret;
1362 struct btrfs_root *root = BTRFS_I(inode)->root; 1399 struct btrfs_root *root = BTRFS_I(inode)->root;
1363 1400
1364 if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) 1401 if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) {
1365 ret = run_delalloc_nocow(inode, locked_page, start, end, 1402 ret = run_delalloc_nocow(inode, locked_page, start, end,
1366 page_started, 1, nr_written); 1403 page_started, 1, nr_written);
1367 else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC) 1404 } else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC) {
1368 ret = run_delalloc_nocow(inode, locked_page, start, end, 1405 ret = run_delalloc_nocow(inode, locked_page, start, end,
1369 page_started, 0, nr_written); 1406 page_started, 0, nr_written);
1370 else if (!btrfs_test_opt(root, COMPRESS) && 1407 } else if (!btrfs_test_opt(root, COMPRESS) &&
1371 !(BTRFS_I(inode)->force_compress) && 1408 !(BTRFS_I(inode)->force_compress) &&
1372 !(BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS)) 1409 !(BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS)) {
1373 ret = cow_file_range(inode, locked_page, start, end, 1410 ret = cow_file_range(inode, locked_page, start, end,
1374 page_started, nr_written, 1); 1411 page_started, nr_written, 1);
1375 else 1412 } else {
1413 set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
1414 &BTRFS_I(inode)->runtime_flags);
1376 ret = cow_file_range_async(inode, locked_page, start, end, 1415 ret = cow_file_range_async(inode, locked_page, start, end,
1377 page_started, nr_written); 1416 page_started, nr_written);
1417 }
1378 return ret; 1418 return ret;
1379} 1419}
1380 1420
@@ -3714,7 +3754,7 @@ void btrfs_evict_inode(struct inode *inode)
3714 btrfs_wait_ordered_range(inode, 0, (u64)-1); 3754 btrfs_wait_ordered_range(inode, 0, (u64)-1);
3715 3755
3716 if (root->fs_info->log_root_recovering) { 3756 if (root->fs_info->log_root_recovering) {
3717 BUG_ON(!test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, 3757 BUG_ON(test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
3718 &BTRFS_I(inode)->runtime_flags)); 3758 &BTRFS_I(inode)->runtime_flags));
3719 goto no_delete; 3759 goto no_delete;
3720 } 3760 }
@@ -5836,8 +5876,17 @@ map:
5836 bh_result->b_size = len; 5876 bh_result->b_size = len;
5837 bh_result->b_bdev = em->bdev; 5877 bh_result->b_bdev = em->bdev;
5838 set_buffer_mapped(bh_result); 5878 set_buffer_mapped(bh_result);
5839 if (create && !test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) 5879 if (create) {
5840 set_buffer_new(bh_result); 5880 if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
5881 set_buffer_new(bh_result);
5882
5883 /*
5884 * Need to update the i_size under the extent lock so buffered
5885 * readers will get the updated i_size when we unlock.
5886 */
5887 if (start + len > i_size_read(inode))
5888 i_size_write(inode, start + len);
5889 }
5841 5890
5842 free_extent_map(em); 5891 free_extent_map(em);
5843 5892
@@ -6320,12 +6369,48 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
6320 */ 6369 */
6321 ordered = btrfs_lookup_ordered_range(inode, lockstart, 6370 ordered = btrfs_lookup_ordered_range(inode, lockstart,
6322 lockend - lockstart + 1); 6371 lockend - lockstart + 1);
6323 if (!ordered) 6372
6373 /*
6374 * We need to make sure there are no buffered pages in this
6375 * range either, we could have raced between the invalidate in
6376 * generic_file_direct_write and locking the extent. The
6377 * invalidate needs to happen so that reads after a write do not
6378 * get stale data.
6379 */
6380 if (!ordered && (!writing ||
6381 !test_range_bit(&BTRFS_I(inode)->io_tree,
6382 lockstart, lockend, EXTENT_UPTODATE, 0,
6383 cached_state)))
6324 break; 6384 break;
6385
6325 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, 6386 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
6326 &cached_state, GFP_NOFS); 6387 &cached_state, GFP_NOFS);
6327 btrfs_start_ordered_extent(inode, ordered, 1); 6388
6328 btrfs_put_ordered_extent(ordered); 6389 if (ordered) {
6390 btrfs_start_ordered_extent(inode, ordered, 1);
6391 btrfs_put_ordered_extent(ordered);
6392 } else {
6393 /* Screw you mmap */
6394 ret = filemap_write_and_wait_range(file->f_mapping,
6395 lockstart,
6396 lockend);
6397 if (ret)
6398 goto out;
6399
6400 /*
6401 * If we found a page that couldn't be invalidated just
6402 * fall back to buffered.
6403 */
6404 ret = invalidate_inode_pages2_range(file->f_mapping,
6405 lockstart >> PAGE_CACHE_SHIFT,
6406 lockend >> PAGE_CACHE_SHIFT);
6407 if (ret) {
6408 if (ret == -EBUSY)
6409 ret = 0;
6410 goto out;
6411 }
6412 }
6413
6329 cond_resched(); 6414 cond_resched();
6330 } 6415 }
6331 6416
@@ -7054,10 +7139,13 @@ static void fixup_inode_flags(struct inode *dir, struct inode *inode)
7054 else 7139 else
7055 b_inode->flags &= ~BTRFS_INODE_NODATACOW; 7140 b_inode->flags &= ~BTRFS_INODE_NODATACOW;
7056 7141
7057 if (b_dir->flags & BTRFS_INODE_COMPRESS) 7142 if (b_dir->flags & BTRFS_INODE_COMPRESS) {
7058 b_inode->flags |= BTRFS_INODE_COMPRESS; 7143 b_inode->flags |= BTRFS_INODE_COMPRESS;
7059 else 7144 b_inode->flags &= ~BTRFS_INODE_NOCOMPRESS;
7060 b_inode->flags &= ~BTRFS_INODE_COMPRESS; 7145 } else {
7146 b_inode->flags &= ~(BTRFS_INODE_COMPRESS |
7147 BTRFS_INODE_NOCOMPRESS);
7148 }
7061} 7149}
7062 7150
7063static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, 7151static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 24b776c08d99..0e92e5763005 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -52,6 +52,7 @@
52#include "locking.h" 52#include "locking.h"
53#include "inode-map.h" 53#include "inode-map.h"
54#include "backref.h" 54#include "backref.h"
55#include "rcu-string.h"
55 56
56/* Mask out flags that are inappropriate for the given type of inode. */ 57/* Mask out flags that are inappropriate for the given type of inode. */
57static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags) 58static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags)
@@ -785,39 +786,57 @@ none:
785 return -ENOENT; 786 return -ENOENT;
786} 787}
787 788
788/* 789static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start)
789 * Validaty check of prev em and next em:
790 * 1) no prev/next em
791 * 2) prev/next em is an hole/inline extent
792 */
793static int check_adjacent_extents(struct inode *inode, struct extent_map *em)
794{ 790{
795 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 791 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
796 struct extent_map *prev = NULL, *next = NULL; 792 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
797 int ret = 0; 793 struct extent_map *em;
794 u64 len = PAGE_CACHE_SIZE;
798 795
796 /*
797 * hopefully we have this extent in the tree already, try without
798 * the full extent lock
799 */
799 read_lock(&em_tree->lock); 800 read_lock(&em_tree->lock);
800 prev = lookup_extent_mapping(em_tree, em->start - 1, (u64)-1); 801 em = lookup_extent_mapping(em_tree, start, len);
801 next = lookup_extent_mapping(em_tree, em->start + em->len, (u64)-1);
802 read_unlock(&em_tree->lock); 802 read_unlock(&em_tree->lock);
803 803
804 if ((!prev || prev->block_start >= EXTENT_MAP_LAST_BYTE) && 804 if (!em) {
805 (!next || next->block_start >= EXTENT_MAP_LAST_BYTE)) 805 /* get the big lock and read metadata off disk */
806 ret = 1; 806 lock_extent(io_tree, start, start + len - 1);
807 free_extent_map(prev); 807 em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
808 free_extent_map(next); 808 unlock_extent(io_tree, start, start + len - 1);
809
810 if (IS_ERR(em))
811 return NULL;
812 }
813
814 return em;
815}
816
817static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em)
818{
819 struct extent_map *next;
820 bool ret = true;
809 821
822 /* this is the last extent */
823 if (em->start + em->len >= i_size_read(inode))
824 return false;
825
826 next = defrag_lookup_extent(inode, em->start + em->len);
827 if (!next || next->block_start >= EXTENT_MAP_LAST_BYTE)
828 ret = false;
829
830 free_extent_map(next);
810 return ret; 831 return ret;
811} 832}
812 833
813static int should_defrag_range(struct inode *inode, u64 start, u64 len, 834static int should_defrag_range(struct inode *inode, u64 start, int thresh,
814 int thresh, u64 *last_len, u64 *skip, 835 u64 *last_len, u64 *skip, u64 *defrag_end)
815 u64 *defrag_end)
816{ 836{
817 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 837 struct extent_map *em;
818 struct extent_map *em = NULL;
819 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
820 int ret = 1; 838 int ret = 1;
839 bool next_mergeable = true;
821 840
822 /* 841 /*
823 * make sure that once we start defragging an extent, we keep on 842 * make sure that once we start defragging an extent, we keep on
@@ -828,23 +847,9 @@ static int should_defrag_range(struct inode *inode, u64 start, u64 len,
828 847
829 *skip = 0; 848 *skip = 0;
830 849
831 /* 850 em = defrag_lookup_extent(inode, start);
832 * hopefully we have this extent in the tree already, try without 851 if (!em)
833 * the full extent lock 852 return 0;
834 */
835 read_lock(&em_tree->lock);
836 em = lookup_extent_mapping(em_tree, start, len);
837 read_unlock(&em_tree->lock);
838
839 if (!em) {
840 /* get the big lock and read metadata off disk */
841 lock_extent(io_tree, start, start + len - 1);
842 em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
843 unlock_extent(io_tree, start, start + len - 1);
844
845 if (IS_ERR(em))
846 return 0;
847 }
848 853
849 /* this will cover holes, and inline extents */ 854 /* this will cover holes, and inline extents */
850 if (em->block_start >= EXTENT_MAP_LAST_BYTE) { 855 if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
@@ -852,18 +857,15 @@ static int should_defrag_range(struct inode *inode, u64 start, u64 len,
852 goto out; 857 goto out;
853 } 858 }
854 859
855 /* If we have nothing to merge with us, just skip. */ 860 next_mergeable = defrag_check_next_extent(inode, em);
856 if (check_adjacent_extents(inode, em)) {
857 ret = 0;
858 goto out;
859 }
860 861
861 /* 862 /*
862 * we hit a real extent, if it is big don't bother defragging it again 863 * we hit a real extent, if it is big or the next extent is not a
864 * real extent, don't bother defragging it
863 */ 865 */
864 if ((*last_len == 0 || *last_len >= thresh) && em->len >= thresh) 866 if ((*last_len == 0 || *last_len >= thresh) &&
867 (em->len >= thresh || !next_mergeable))
865 ret = 0; 868 ret = 0;
866
867out: 869out:
868 /* 870 /*
869 * last_len ends up being a counter of how many bytes we've defragged. 871 * last_len ends up being a counter of how many bytes we've defragged.
@@ -1142,8 +1144,8 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
1142 break; 1144 break;
1143 1145
1144 if (!should_defrag_range(inode, (u64)i << PAGE_CACHE_SHIFT, 1146 if (!should_defrag_range(inode, (u64)i << PAGE_CACHE_SHIFT,
1145 PAGE_CACHE_SIZE, extent_thresh, 1147 extent_thresh, &last_len, &skip,
1146 &last_len, &skip, &defrag_end)) { 1148 &defrag_end)) {
1147 unsigned long next; 1149 unsigned long next;
1148 /* 1150 /*
1149 * the should_defrag function tells us how much to skip 1151 * the should_defrag function tells us how much to skip
@@ -1304,6 +1306,14 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
1304 ret = -EINVAL; 1306 ret = -EINVAL;
1305 goto out_free; 1307 goto out_free;
1306 } 1308 }
1309 if (device->fs_devices && device->fs_devices->seeding) {
1310 printk(KERN_INFO "btrfs: resizer unable to apply on "
1311 "seeding device %llu\n",
1312 (unsigned long long)devid);
1313 ret = -EINVAL;
1314 goto out_free;
1315 }
1316
1307 if (!strcmp(sizestr, "max")) 1317 if (!strcmp(sizestr, "max"))
1308 new_size = device->bdev->bd_inode->i_size; 1318 new_size = device->bdev->bd_inode->i_size;
1309 else { 1319 else {
@@ -1345,8 +1355,9 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
1345 do_div(new_size, root->sectorsize); 1355 do_div(new_size, root->sectorsize);
1346 new_size *= root->sectorsize; 1356 new_size *= root->sectorsize;
1347 1357
1348 printk(KERN_INFO "btrfs: new size for %s is %llu\n", 1358 printk_in_rcu(KERN_INFO "btrfs: new size for %s is %llu\n",
1349 device->name, (unsigned long long)new_size); 1359 rcu_str_deref(device->name),
1360 (unsigned long long)new_size);
1350 1361
1351 if (new_size > old_size) { 1362 if (new_size > old_size) {
1352 trans = btrfs_start_transaction(root, 0); 1363 trans = btrfs_start_transaction(root, 0);
@@ -2264,7 +2275,12 @@ static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg)
2264 di_args->total_bytes = dev->total_bytes; 2275 di_args->total_bytes = dev->total_bytes;
2265 memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid)); 2276 memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid));
2266 if (dev->name) { 2277 if (dev->name) {
2267 strncpy(di_args->path, dev->name, sizeof(di_args->path)); 2278 struct rcu_string *name;
2279
2280 rcu_read_lock();
2281 name = rcu_dereference(dev->name);
2282 strncpy(di_args->path, name->str, sizeof(di_args->path));
2283 rcu_read_unlock();
2268 di_args->path[sizeof(di_args->path) - 1] = 0; 2284 di_args->path[sizeof(di_args->path) - 1] = 0;
2269 } else { 2285 } else {
2270 di_args->path[0] = '\0'; 2286 di_args->path[0] = '\0';
diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h
index 497c530724cf..e440aa653c30 100644
--- a/fs/btrfs/ioctl.h
+++ b/fs/btrfs/ioctl.h
@@ -339,7 +339,7 @@ struct btrfs_ioctl_get_dev_stats {
339#define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64) 339#define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64)
340#define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \ 340#define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \
341 struct btrfs_ioctl_vol_args_v2) 341 struct btrfs_ioctl_vol_args_v2)
342#define BTRFS_IOC_SUBVOL_GETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 25, __u64) 342#define BTRFS_IOC_SUBVOL_GETFLAGS _IOR(BTRFS_IOCTL_MAGIC, 25, __u64)
343#define BTRFS_IOC_SUBVOL_SETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 26, __u64) 343#define BTRFS_IOC_SUBVOL_SETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 26, __u64)
344#define BTRFS_IOC_SCRUB _IOWR(BTRFS_IOCTL_MAGIC, 27, \ 344#define BTRFS_IOC_SCRUB _IOWR(BTRFS_IOCTL_MAGIC, 27, \
345 struct btrfs_ioctl_scrub_args) 345 struct btrfs_ioctl_scrub_args)
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 9e138cdc36c5..643335a4fe3c 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -627,7 +627,27 @@ void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
627 /* start IO across the range first to instantiate any delalloc 627 /* start IO across the range first to instantiate any delalloc
628 * extents 628 * extents
629 */ 629 */
630 filemap_write_and_wait_range(inode->i_mapping, start, orig_end); 630 filemap_fdatawrite_range(inode->i_mapping, start, orig_end);
631
632 /*
633 * So with compression we will find and lock a dirty page and clear the
634 * first one as dirty, setup an async extent, and immediately return
635 * with the entire range locked but with nobody actually marked with
636 * writeback. So we can't just filemap_write_and_wait_range() and
637 * expect it to work since it will just kick off a thread to do the
638 * actual work. So we need to call filemap_fdatawrite_range _again_
639 * since it will wait on the page lock, which won't be unlocked until
640 * after the pages have been marked as writeback and so we're good to go
641 * from there. We have to do this otherwise we'll miss the ordered
642 * extents and that results in badness. Please Josef, do not think you
643 * know better and pull this out at some point in the future, it is
644 * right and you are wrong.
645 */
646 if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
647 &BTRFS_I(inode)->runtime_flags))
648 filemap_fdatawrite_range(inode->i_mapping, start, orig_end);
649
650 filemap_fdatawait_range(inode->i_mapping, start, orig_end);
631 651
632 end = orig_end; 652 end = orig_end;
633 found = 0; 653 found = 0;
diff --git a/fs/btrfs/rcu-string.h b/fs/btrfs/rcu-string.h
new file mode 100644
index 000000000000..9e111e4576d4
--- /dev/null
+++ b/fs/btrfs/rcu-string.h
@@ -0,0 +1,56 @@
1/*
2 * Copyright (C) 2012 Red Hat. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19struct rcu_string {
20 struct rcu_head rcu;
21 char str[0];
22};
23
24static inline struct rcu_string *rcu_string_strdup(const char *src, gfp_t mask)
25{
26 size_t len = strlen(src) + 1;
27 struct rcu_string *ret = kzalloc(sizeof(struct rcu_string) +
28 (len * sizeof(char)), mask);
29 if (!ret)
30 return ret;
31 strncpy(ret->str, src, len);
32 return ret;
33}
34
35static inline void rcu_string_free(struct rcu_string *str)
36{
37 if (str)
38 kfree_rcu(str, rcu);
39}
40
41#define printk_in_rcu(fmt, ...) do { \
42 rcu_read_lock(); \
43 printk(fmt, __VA_ARGS__); \
44 rcu_read_unlock(); \
45} while (0)
46
47#define printk_ratelimited_in_rcu(fmt, ...) do { \
48 rcu_read_lock(); \
49 printk_ratelimited(fmt, __VA_ARGS__); \
50 rcu_read_unlock(); \
51} while (0)
52
53#define rcu_str_deref(rcu_str) ({ \
54 struct rcu_string *__str = rcu_dereference(rcu_str); \
55 __str->str; \
56})
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index a38cfa4f251e..b223620cd5a6 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -26,6 +26,7 @@
26#include "backref.h" 26#include "backref.h"
27#include "extent_io.h" 27#include "extent_io.h"
28#include "check-integrity.h" 28#include "check-integrity.h"
29#include "rcu-string.h"
29 30
30/* 31/*
31 * This is only the first step towards a full-features scrub. It reads all 32 * This is only the first step towards a full-features scrub. It reads all
@@ -320,10 +321,10 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root, void *ctx)
320 * hold all of the paths here 321 * hold all of the paths here
321 */ 322 */
322 for (i = 0; i < ipath->fspath->elem_cnt; ++i) 323 for (i = 0; i < ipath->fspath->elem_cnt; ++i)
323 printk(KERN_WARNING "btrfs: %s at logical %llu on dev " 324 printk_in_rcu(KERN_WARNING "btrfs: %s at logical %llu on dev "
324 "%s, sector %llu, root %llu, inode %llu, offset %llu, " 325 "%s, sector %llu, root %llu, inode %llu, offset %llu, "
325 "length %llu, links %u (path: %s)\n", swarn->errstr, 326 "length %llu, links %u (path: %s)\n", swarn->errstr,
326 swarn->logical, swarn->dev->name, 327 swarn->logical, rcu_str_deref(swarn->dev->name),
327 (unsigned long long)swarn->sector, root, inum, offset, 328 (unsigned long long)swarn->sector, root, inum, offset,
328 min(isize - offset, (u64)PAGE_SIZE), nlink, 329 min(isize - offset, (u64)PAGE_SIZE), nlink,
329 (char *)(unsigned long)ipath->fspath->val[i]); 330 (char *)(unsigned long)ipath->fspath->val[i]);
@@ -332,10 +333,10 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root, void *ctx)
332 return 0; 333 return 0;
333 334
334err: 335err:
335 printk(KERN_WARNING "btrfs: %s at logical %llu on dev " 336 printk_in_rcu(KERN_WARNING "btrfs: %s at logical %llu on dev "
336 "%s, sector %llu, root %llu, inode %llu, offset %llu: path " 337 "%s, sector %llu, root %llu, inode %llu, offset %llu: path "
337 "resolving failed with ret=%d\n", swarn->errstr, 338 "resolving failed with ret=%d\n", swarn->errstr,
338 swarn->logical, swarn->dev->name, 339 swarn->logical, rcu_str_deref(swarn->dev->name),
339 (unsigned long long)swarn->sector, root, inum, offset, ret); 340 (unsigned long long)swarn->sector, root, inum, offset, ret);
340 341
341 free_ipath(ipath); 342 free_ipath(ipath);
@@ -390,10 +391,11 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
390 do { 391 do {
391 ret = tree_backref_for_extent(&ptr, eb, ei, item_size, 392 ret = tree_backref_for_extent(&ptr, eb, ei, item_size,
392 &ref_root, &ref_level); 393 &ref_root, &ref_level);
393 printk(KERN_WARNING 394 printk_in_rcu(KERN_WARNING
394 "btrfs: %s at logical %llu on dev %s, " 395 "btrfs: %s at logical %llu on dev %s, "
395 "sector %llu: metadata %s (level %d) in tree " 396 "sector %llu: metadata %s (level %d) in tree "
396 "%llu\n", errstr, swarn.logical, dev->name, 397 "%llu\n", errstr, swarn.logical,
398 rcu_str_deref(dev->name),
397 (unsigned long long)swarn.sector, 399 (unsigned long long)swarn.sector,
398 ref_level ? "node" : "leaf", 400 ref_level ? "node" : "leaf",
399 ret < 0 ? -1 : ref_level, 401 ret < 0 ? -1 : ref_level,
@@ -580,9 +582,11 @@ out:
580 spin_lock(&sdev->stat_lock); 582 spin_lock(&sdev->stat_lock);
581 ++sdev->stat.uncorrectable_errors; 583 ++sdev->stat.uncorrectable_errors;
582 spin_unlock(&sdev->stat_lock); 584 spin_unlock(&sdev->stat_lock);
583 printk_ratelimited(KERN_ERR 585
586 printk_ratelimited_in_rcu(KERN_ERR
584 "btrfs: unable to fixup (nodatasum) error at logical %llu on dev %s\n", 587 "btrfs: unable to fixup (nodatasum) error at logical %llu on dev %s\n",
585 (unsigned long long)fixup->logical, sdev->dev->name); 588 (unsigned long long)fixup->logical,
589 rcu_str_deref(sdev->dev->name));
586 } 590 }
587 591
588 btrfs_free_path(path); 592 btrfs_free_path(path);
@@ -936,18 +940,20 @@ corrected_error:
936 spin_lock(&sdev->stat_lock); 940 spin_lock(&sdev->stat_lock);
937 sdev->stat.corrected_errors++; 941 sdev->stat.corrected_errors++;
938 spin_unlock(&sdev->stat_lock); 942 spin_unlock(&sdev->stat_lock);
939 printk_ratelimited(KERN_ERR 943 printk_ratelimited_in_rcu(KERN_ERR
940 "btrfs: fixed up error at logical %llu on dev %s\n", 944 "btrfs: fixed up error at logical %llu on dev %s\n",
941 (unsigned long long)logical, sdev->dev->name); 945 (unsigned long long)logical,
946 rcu_str_deref(sdev->dev->name));
942 } 947 }
943 } else { 948 } else {
944did_not_correct_error: 949did_not_correct_error:
945 spin_lock(&sdev->stat_lock); 950 spin_lock(&sdev->stat_lock);
946 sdev->stat.uncorrectable_errors++; 951 sdev->stat.uncorrectable_errors++;
947 spin_unlock(&sdev->stat_lock); 952 spin_unlock(&sdev->stat_lock);
948 printk_ratelimited(KERN_ERR 953 printk_ratelimited_in_rcu(KERN_ERR
949 "btrfs: unable to fixup (regular) error at logical %llu on dev %s\n", 954 "btrfs: unable to fixup (regular) error at logical %llu on dev %s\n",
950 (unsigned long long)logical, sdev->dev->name); 955 (unsigned long long)logical,
956 rcu_str_deref(sdev->dev->name));
951 } 957 }
952 958
953out: 959out:
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 96eb9fef7bd2..e23991574fdf 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -54,6 +54,7 @@
54#include "version.h" 54#include "version.h"
55#include "export.h" 55#include "export.h"
56#include "compression.h" 56#include "compression.h"
57#include "rcu-string.h"
57 58
58#define CREATE_TRACE_POINTS 59#define CREATE_TRACE_POINTS
59#include <trace/events/btrfs.h> 60#include <trace/events/btrfs.h>
@@ -1186,6 +1187,10 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
1186 if (ret) 1187 if (ret)
1187 goto restore; 1188 goto restore;
1188 1189
1190 ret = btrfs_resume_balance_async(fs_info);
1191 if (ret)
1192 goto restore;
1193
1189 sb->s_flags &= ~MS_RDONLY; 1194 sb->s_flags &= ~MS_RDONLY;
1190 } 1195 }
1191 1196
@@ -1482,12 +1487,44 @@ static void btrfs_fs_dirty_inode(struct inode *inode, int flags)
1482 "error %d\n", btrfs_ino(inode), ret); 1487 "error %d\n", btrfs_ino(inode), ret);
1483} 1488}
1484 1489
1490static int btrfs_show_devname(struct seq_file *m, struct dentry *root)
1491{
1492 struct btrfs_fs_info *fs_info = btrfs_sb(root->d_sb);
1493 struct btrfs_fs_devices *cur_devices;
1494 struct btrfs_device *dev, *first_dev = NULL;
1495 struct list_head *head;
1496 struct rcu_string *name;
1497
1498 mutex_lock(&fs_info->fs_devices->device_list_mutex);
1499 cur_devices = fs_info->fs_devices;
1500 while (cur_devices) {
1501 head = &cur_devices->devices;
1502 list_for_each_entry(dev, head, dev_list) {
1503 if (!first_dev || dev->devid < first_dev->devid)
1504 first_dev = dev;
1505 }
1506 cur_devices = cur_devices->seed;
1507 }
1508
1509 if (first_dev) {
1510 rcu_read_lock();
1511 name = rcu_dereference(first_dev->name);
1512 seq_escape(m, name->str, " \t\n\\");
1513 rcu_read_unlock();
1514 } else {
1515 WARN_ON(1);
1516 }
1517 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
1518 return 0;
1519}
1520
1485static const struct super_operations btrfs_super_ops = { 1521static const struct super_operations btrfs_super_ops = {
1486 .drop_inode = btrfs_drop_inode, 1522 .drop_inode = btrfs_drop_inode,
1487 .evict_inode = btrfs_evict_inode, 1523 .evict_inode = btrfs_evict_inode,
1488 .put_super = btrfs_put_super, 1524 .put_super = btrfs_put_super,
1489 .sync_fs = btrfs_sync_fs, 1525 .sync_fs = btrfs_sync_fs,
1490 .show_options = btrfs_show_options, 1526 .show_options = btrfs_show_options,
1527 .show_devname = btrfs_show_devname,
1491 .write_inode = btrfs_write_inode, 1528 .write_inode = btrfs_write_inode,
1492 .dirty_inode = btrfs_fs_dirty_inode, 1529 .dirty_inode = btrfs_fs_dirty_inode,
1493 .alloc_inode = btrfs_alloc_inode, 1530 .alloc_inode = btrfs_alloc_inode,
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 1791c6e3d834..b72b068183ec 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -100,6 +100,10 @@ loop:
100 kmem_cache_free(btrfs_transaction_cachep, cur_trans); 100 kmem_cache_free(btrfs_transaction_cachep, cur_trans);
101 cur_trans = fs_info->running_transaction; 101 cur_trans = fs_info->running_transaction;
102 goto loop; 102 goto loop;
103 } else if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
104 spin_unlock(&root->fs_info->trans_lock);
105 kmem_cache_free(btrfs_transaction_cachep, cur_trans);
106 return -EROFS;
103 } 107 }
104 108
105 atomic_set(&cur_trans->num_writers, 1); 109 atomic_set(&cur_trans->num_writers, 1);
@@ -1213,14 +1217,20 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
1213 1217
1214 1218
1215static void cleanup_transaction(struct btrfs_trans_handle *trans, 1219static void cleanup_transaction(struct btrfs_trans_handle *trans,
1216 struct btrfs_root *root) 1220 struct btrfs_root *root, int err)
1217{ 1221{
1218 struct btrfs_transaction *cur_trans = trans->transaction; 1222 struct btrfs_transaction *cur_trans = trans->transaction;
1219 1223
1220 WARN_ON(trans->use_count > 1); 1224 WARN_ON(trans->use_count > 1);
1221 1225
1226 btrfs_abort_transaction(trans, root, err);
1227
1222 spin_lock(&root->fs_info->trans_lock); 1228 spin_lock(&root->fs_info->trans_lock);
1223 list_del_init(&cur_trans->list); 1229 list_del_init(&cur_trans->list);
1230 if (cur_trans == root->fs_info->running_transaction) {
1231 root->fs_info->running_transaction = NULL;
1232 root->fs_info->trans_no_join = 0;
1233 }
1224 spin_unlock(&root->fs_info->trans_lock); 1234 spin_unlock(&root->fs_info->trans_lock);
1225 1235
1226 btrfs_cleanup_one_transaction(trans->transaction, root); 1236 btrfs_cleanup_one_transaction(trans->transaction, root);
@@ -1526,7 +1536,7 @@ cleanup_transaction:
1526// WARN_ON(1); 1536// WARN_ON(1);
1527 if (current->journal_info == trans) 1537 if (current->journal_info == trans)
1528 current->journal_info = NULL; 1538 current->journal_info = NULL;
1529 cleanup_transaction(trans, root); 1539 cleanup_transaction(trans, root, ret);
1530 1540
1531 return ret; 1541 return ret;
1532} 1542}
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 2017d0ff511c..8abeae4224f9 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -690,6 +690,8 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
690 kfree(name); 690 kfree(name);
691 691
692 iput(inode); 692 iput(inode);
693
694 btrfs_run_delayed_items(trans, root);
693 return ret; 695 return ret;
694} 696}
695 697
@@ -895,6 +897,7 @@ again:
895 ret = btrfs_unlink_inode(trans, root, dir, 897 ret = btrfs_unlink_inode(trans, root, dir,
896 inode, victim_name, 898 inode, victim_name,
897 victim_name_len); 899 victim_name_len);
900 btrfs_run_delayed_items(trans, root);
898 } 901 }
899 kfree(victim_name); 902 kfree(victim_name);
900 ptr = (unsigned long)(victim_ref + 1) + victim_name_len; 903 ptr = (unsigned long)(victim_ref + 1) + victim_name_len;
@@ -1475,6 +1478,9 @@ again:
1475 ret = btrfs_unlink_inode(trans, root, dir, inode, 1478 ret = btrfs_unlink_inode(trans, root, dir, inode,
1476 name, name_len); 1479 name, name_len);
1477 BUG_ON(ret); 1480 BUG_ON(ret);
1481
1482 btrfs_run_delayed_items(trans, root);
1483
1478 kfree(name); 1484 kfree(name);
1479 iput(inode); 1485 iput(inode);
1480 1486
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 7782020996fe..ecaad40e7ef4 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -35,6 +35,7 @@
35#include "volumes.h" 35#include "volumes.h"
36#include "async-thread.h" 36#include "async-thread.h"
37#include "check-integrity.h" 37#include "check-integrity.h"
38#include "rcu-string.h"
38 39
39static int init_first_rw_device(struct btrfs_trans_handle *trans, 40static int init_first_rw_device(struct btrfs_trans_handle *trans,
40 struct btrfs_root *root, 41 struct btrfs_root *root,
@@ -64,7 +65,7 @@ static void free_fs_devices(struct btrfs_fs_devices *fs_devices)
64 device = list_entry(fs_devices->devices.next, 65 device = list_entry(fs_devices->devices.next,
65 struct btrfs_device, dev_list); 66 struct btrfs_device, dev_list);
66 list_del(&device->dev_list); 67 list_del(&device->dev_list);
67 kfree(device->name); 68 rcu_string_free(device->name);
68 kfree(device); 69 kfree(device);
69 } 70 }
70 kfree(fs_devices); 71 kfree(fs_devices);
@@ -334,8 +335,8 @@ static noinline int device_list_add(const char *path,
334{ 335{
335 struct btrfs_device *device; 336 struct btrfs_device *device;
336 struct btrfs_fs_devices *fs_devices; 337 struct btrfs_fs_devices *fs_devices;
338 struct rcu_string *name;
337 u64 found_transid = btrfs_super_generation(disk_super); 339 u64 found_transid = btrfs_super_generation(disk_super);
338 char *name;
339 340
340 fs_devices = find_fsid(disk_super->fsid); 341 fs_devices = find_fsid(disk_super->fsid);
341 if (!fs_devices) { 342 if (!fs_devices) {
@@ -369,11 +370,13 @@ static noinline int device_list_add(const char *path,
369 memcpy(device->uuid, disk_super->dev_item.uuid, 370 memcpy(device->uuid, disk_super->dev_item.uuid,
370 BTRFS_UUID_SIZE); 371 BTRFS_UUID_SIZE);
371 spin_lock_init(&device->io_lock); 372 spin_lock_init(&device->io_lock);
372 device->name = kstrdup(path, GFP_NOFS); 373
373 if (!device->name) { 374 name = rcu_string_strdup(path, GFP_NOFS);
375 if (!name) {
374 kfree(device); 376 kfree(device);
375 return -ENOMEM; 377 return -ENOMEM;
376 } 378 }
379 rcu_assign_pointer(device->name, name);
377 INIT_LIST_HEAD(&device->dev_alloc_list); 380 INIT_LIST_HEAD(&device->dev_alloc_list);
378 381
379 /* init readahead state */ 382 /* init readahead state */
@@ -390,12 +393,12 @@ static noinline int device_list_add(const char *path,
390 393
391 device->fs_devices = fs_devices; 394 device->fs_devices = fs_devices;
392 fs_devices->num_devices++; 395 fs_devices->num_devices++;
393 } else if (!device->name || strcmp(device->name, path)) { 396 } else if (!device->name || strcmp(device->name->str, path)) {
394 name = kstrdup(path, GFP_NOFS); 397 name = rcu_string_strdup(path, GFP_NOFS);
395 if (!name) 398 if (!name)
396 return -ENOMEM; 399 return -ENOMEM;
397 kfree(device->name); 400 rcu_string_free(device->name);
398 device->name = name; 401 rcu_assign_pointer(device->name, name);
399 if (device->missing) { 402 if (device->missing) {
400 fs_devices->missing_devices--; 403 fs_devices->missing_devices--;
401 device->missing = 0; 404 device->missing = 0;
@@ -430,15 +433,22 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
430 433
431 /* We have held the volume lock, it is safe to get the devices. */ 434 /* We have held the volume lock, it is safe to get the devices. */
432 list_for_each_entry(orig_dev, &orig->devices, dev_list) { 435 list_for_each_entry(orig_dev, &orig->devices, dev_list) {
436 struct rcu_string *name;
437
433 device = kzalloc(sizeof(*device), GFP_NOFS); 438 device = kzalloc(sizeof(*device), GFP_NOFS);
434 if (!device) 439 if (!device)
435 goto error; 440 goto error;
436 441
437 device->name = kstrdup(orig_dev->name, GFP_NOFS); 442 /*
438 if (!device->name) { 443 * This is ok to do without rcu read locked because we hold the
444 * uuid mutex so nothing we touch in here is going to disappear.
445 */
446 name = rcu_string_strdup(orig_dev->name->str, GFP_NOFS);
447 if (!name) {
439 kfree(device); 448 kfree(device);
440 goto error; 449 goto error;
441 } 450 }
451 rcu_assign_pointer(device->name, name);
442 452
443 device->devid = orig_dev->devid; 453 device->devid = orig_dev->devid;
444 device->work.func = pending_bios_fn; 454 device->work.func = pending_bios_fn;
@@ -491,7 +501,7 @@ again:
491 } 501 }
492 list_del_init(&device->dev_list); 502 list_del_init(&device->dev_list);
493 fs_devices->num_devices--; 503 fs_devices->num_devices--;
494 kfree(device->name); 504 rcu_string_free(device->name);
495 kfree(device); 505 kfree(device);
496 } 506 }
497 507
@@ -516,7 +526,7 @@ static void __free_device(struct work_struct *work)
516 if (device->bdev) 526 if (device->bdev)
517 blkdev_put(device->bdev, device->mode); 527 blkdev_put(device->bdev, device->mode);
518 528
519 kfree(device->name); 529 rcu_string_free(device->name);
520 kfree(device); 530 kfree(device);
521} 531}
522 532
@@ -540,6 +550,7 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
540 mutex_lock(&fs_devices->device_list_mutex); 550 mutex_lock(&fs_devices->device_list_mutex);
541 list_for_each_entry(device, &fs_devices->devices, dev_list) { 551 list_for_each_entry(device, &fs_devices->devices, dev_list) {
542 struct btrfs_device *new_device; 552 struct btrfs_device *new_device;
553 struct rcu_string *name;
543 554
544 if (device->bdev) 555 if (device->bdev)
545 fs_devices->open_devices--; 556 fs_devices->open_devices--;
@@ -555,8 +566,11 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
555 new_device = kmalloc(sizeof(*new_device), GFP_NOFS); 566 new_device = kmalloc(sizeof(*new_device), GFP_NOFS);
556 BUG_ON(!new_device); /* -ENOMEM */ 567 BUG_ON(!new_device); /* -ENOMEM */
557 memcpy(new_device, device, sizeof(*new_device)); 568 memcpy(new_device, device, sizeof(*new_device));
558 new_device->name = kstrdup(device->name, GFP_NOFS); 569
559 BUG_ON(device->name && !new_device->name); /* -ENOMEM */ 570 /* Safe because we are under uuid_mutex */
571 name = rcu_string_strdup(device->name->str, GFP_NOFS);
572 BUG_ON(device->name && !name); /* -ENOMEM */
573 rcu_assign_pointer(new_device->name, name);
560 new_device->bdev = NULL; 574 new_device->bdev = NULL;
561 new_device->writeable = 0; 575 new_device->writeable = 0;
562 new_device->in_fs_metadata = 0; 576 new_device->in_fs_metadata = 0;
@@ -621,9 +635,9 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
621 if (!device->name) 635 if (!device->name)
622 continue; 636 continue;
623 637
624 bdev = blkdev_get_by_path(device->name, flags, holder); 638 bdev = blkdev_get_by_path(device->name->str, flags, holder);
625 if (IS_ERR(bdev)) { 639 if (IS_ERR(bdev)) {
626 printk(KERN_INFO "open %s failed\n", device->name); 640 printk(KERN_INFO "open %s failed\n", device->name->str);
627 goto error; 641 goto error;
628 } 642 }
629 filemap_write_and_wait(bdev->bd_inode->i_mapping); 643 filemap_write_and_wait(bdev->bd_inode->i_mapping);
@@ -1632,6 +1646,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1632 struct block_device *bdev; 1646 struct block_device *bdev;
1633 struct list_head *devices; 1647 struct list_head *devices;
1634 struct super_block *sb = root->fs_info->sb; 1648 struct super_block *sb = root->fs_info->sb;
1649 struct rcu_string *name;
1635 u64 total_bytes; 1650 u64 total_bytes;
1636 int seeding_dev = 0; 1651 int seeding_dev = 0;
1637 int ret = 0; 1652 int ret = 0;
@@ -1671,23 +1686,24 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1671 goto error; 1686 goto error;
1672 } 1687 }
1673 1688
1674 device->name = kstrdup(device_path, GFP_NOFS); 1689 name = rcu_string_strdup(device_path, GFP_NOFS);
1675 if (!device->name) { 1690 if (!name) {
1676 kfree(device); 1691 kfree(device);
1677 ret = -ENOMEM; 1692 ret = -ENOMEM;
1678 goto error; 1693 goto error;
1679 } 1694 }
1695 rcu_assign_pointer(device->name, name);
1680 1696
1681 ret = find_next_devid(root, &device->devid); 1697 ret = find_next_devid(root, &device->devid);
1682 if (ret) { 1698 if (ret) {
1683 kfree(device->name); 1699 rcu_string_free(device->name);
1684 kfree(device); 1700 kfree(device);
1685 goto error; 1701 goto error;
1686 } 1702 }
1687 1703
1688 trans = btrfs_start_transaction(root, 0); 1704 trans = btrfs_start_transaction(root, 0);
1689 if (IS_ERR(trans)) { 1705 if (IS_ERR(trans)) {
1690 kfree(device->name); 1706 rcu_string_free(device->name);
1691 kfree(device); 1707 kfree(device);
1692 ret = PTR_ERR(trans); 1708 ret = PTR_ERR(trans);
1693 goto error; 1709 goto error;
@@ -1796,7 +1812,7 @@ error_trans:
1796 unlock_chunks(root); 1812 unlock_chunks(root);
1797 btrfs_abort_transaction(trans, root, ret); 1813 btrfs_abort_transaction(trans, root, ret);
1798 btrfs_end_transaction(trans, root); 1814 btrfs_end_transaction(trans, root);
1799 kfree(device->name); 1815 rcu_string_free(device->name);
1800 kfree(device); 1816 kfree(device);
1801error: 1817error:
1802 blkdev_put(bdev, FMODE_EXCL); 1818 blkdev_put(bdev, FMODE_EXCL);
@@ -2829,31 +2845,48 @@ out:
2829 2845
2830static int balance_kthread(void *data) 2846static int balance_kthread(void *data)
2831{ 2847{
2832 struct btrfs_balance_control *bctl = 2848 struct btrfs_fs_info *fs_info = data;
2833 (struct btrfs_balance_control *)data;
2834 struct btrfs_fs_info *fs_info = bctl->fs_info;
2835 int ret = 0; 2849 int ret = 0;
2836 2850
2837 mutex_lock(&fs_info->volume_mutex); 2851 mutex_lock(&fs_info->volume_mutex);
2838 mutex_lock(&fs_info->balance_mutex); 2852 mutex_lock(&fs_info->balance_mutex);
2839 2853
2840 set_balance_control(bctl); 2854 if (fs_info->balance_ctl) {
2841
2842 if (btrfs_test_opt(fs_info->tree_root, SKIP_BALANCE)) {
2843 printk(KERN_INFO "btrfs: force skipping balance\n");
2844 } else {
2845 printk(KERN_INFO "btrfs: continuing balance\n"); 2855 printk(KERN_INFO "btrfs: continuing balance\n");
2846 ret = btrfs_balance(bctl, NULL); 2856 ret = btrfs_balance(fs_info->balance_ctl, NULL);
2847 } 2857 }
2848 2858
2849 mutex_unlock(&fs_info->balance_mutex); 2859 mutex_unlock(&fs_info->balance_mutex);
2850 mutex_unlock(&fs_info->volume_mutex); 2860 mutex_unlock(&fs_info->volume_mutex);
2861
2851 return ret; 2862 return ret;
2852} 2863}
2853 2864
2854int btrfs_recover_balance(struct btrfs_root *tree_root) 2865int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info)
2855{ 2866{
2856 struct task_struct *tsk; 2867 struct task_struct *tsk;
2868
2869 spin_lock(&fs_info->balance_lock);
2870 if (!fs_info->balance_ctl) {
2871 spin_unlock(&fs_info->balance_lock);
2872 return 0;
2873 }
2874 spin_unlock(&fs_info->balance_lock);
2875
2876 if (btrfs_test_opt(fs_info->tree_root, SKIP_BALANCE)) {
2877 printk(KERN_INFO "btrfs: force skipping balance\n");
2878 return 0;
2879 }
2880
2881 tsk = kthread_run(balance_kthread, fs_info, "btrfs-balance");
2882 if (IS_ERR(tsk))
2883 return PTR_ERR(tsk);
2884
2885 return 0;
2886}
2887
2888int btrfs_recover_balance(struct btrfs_fs_info *fs_info)
2889{
2857 struct btrfs_balance_control *bctl; 2890 struct btrfs_balance_control *bctl;
2858 struct btrfs_balance_item *item; 2891 struct btrfs_balance_item *item;
2859 struct btrfs_disk_balance_args disk_bargs; 2892 struct btrfs_disk_balance_args disk_bargs;
@@ -2866,29 +2899,30 @@ int btrfs_recover_balance(struct btrfs_root *tree_root)
2866 if (!path) 2899 if (!path)
2867 return -ENOMEM; 2900 return -ENOMEM;
2868 2901
2869 bctl = kzalloc(sizeof(*bctl), GFP_NOFS);
2870 if (!bctl) {
2871 ret = -ENOMEM;
2872 goto out;
2873 }
2874
2875 key.objectid = BTRFS_BALANCE_OBJECTID; 2902 key.objectid = BTRFS_BALANCE_OBJECTID;
2876 key.type = BTRFS_BALANCE_ITEM_KEY; 2903 key.type = BTRFS_BALANCE_ITEM_KEY;
2877 key.offset = 0; 2904 key.offset = 0;
2878 2905
2879 ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0); 2906 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0);
2880 if (ret < 0) 2907 if (ret < 0)
2881 goto out_bctl; 2908 goto out;
2882 if (ret > 0) { /* ret = -ENOENT; */ 2909 if (ret > 0) { /* ret = -ENOENT; */
2883 ret = 0; 2910 ret = 0;
2884 goto out_bctl; 2911 goto out;
2912 }
2913
2914 bctl = kzalloc(sizeof(*bctl), GFP_NOFS);
2915 if (!bctl) {
2916 ret = -ENOMEM;
2917 goto out;
2885 } 2918 }
2886 2919
2887 leaf = path->nodes[0]; 2920 leaf = path->nodes[0];
2888 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_balance_item); 2921 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_balance_item);
2889 2922
2890 bctl->fs_info = tree_root->fs_info; 2923 bctl->fs_info = fs_info;
2891 bctl->flags = btrfs_balance_flags(leaf, item) | BTRFS_BALANCE_RESUME; 2924 bctl->flags = btrfs_balance_flags(leaf, item);
2925 bctl->flags |= BTRFS_BALANCE_RESUME;
2892 2926
2893 btrfs_balance_data(leaf, item, &disk_bargs); 2927 btrfs_balance_data(leaf, item, &disk_bargs);
2894 btrfs_disk_balance_args_to_cpu(&bctl->data, &disk_bargs); 2928 btrfs_disk_balance_args_to_cpu(&bctl->data, &disk_bargs);
@@ -2897,14 +2931,13 @@ int btrfs_recover_balance(struct btrfs_root *tree_root)
2897 btrfs_balance_sys(leaf, item, &disk_bargs); 2931 btrfs_balance_sys(leaf, item, &disk_bargs);
2898 btrfs_disk_balance_args_to_cpu(&bctl->sys, &disk_bargs); 2932 btrfs_disk_balance_args_to_cpu(&bctl->sys, &disk_bargs);
2899 2933
2900 tsk = kthread_run(balance_kthread, bctl, "btrfs-balance"); 2934 mutex_lock(&fs_info->volume_mutex);
2901 if (IS_ERR(tsk)) 2935 mutex_lock(&fs_info->balance_mutex);
2902 ret = PTR_ERR(tsk);
2903 else
2904 goto out;
2905 2936
2906out_bctl: 2937 set_balance_control(bctl);
2907 kfree(bctl); 2938
2939 mutex_unlock(&fs_info->balance_mutex);
2940 mutex_unlock(&fs_info->volume_mutex);
2908out: 2941out:
2909 btrfs_free_path(path); 2942 btrfs_free_path(path);
2910 return ret; 2943 return ret;
@@ -4045,16 +4078,18 @@ static void btrfs_end_bio(struct bio *bio, int err)
4045 4078
4046 BUG_ON(stripe_index >= bbio->num_stripes); 4079 BUG_ON(stripe_index >= bbio->num_stripes);
4047 dev = bbio->stripes[stripe_index].dev; 4080 dev = bbio->stripes[stripe_index].dev;
4048 if (bio->bi_rw & WRITE) 4081 if (dev->bdev) {
4049 btrfs_dev_stat_inc(dev, 4082 if (bio->bi_rw & WRITE)
4050 BTRFS_DEV_STAT_WRITE_ERRS); 4083 btrfs_dev_stat_inc(dev,
4051 else 4084 BTRFS_DEV_STAT_WRITE_ERRS);
4052 btrfs_dev_stat_inc(dev, 4085 else
4053 BTRFS_DEV_STAT_READ_ERRS); 4086 btrfs_dev_stat_inc(dev,
4054 if ((bio->bi_rw & WRITE_FLUSH) == WRITE_FLUSH) 4087 BTRFS_DEV_STAT_READ_ERRS);
4055 btrfs_dev_stat_inc(dev, 4088 if ((bio->bi_rw & WRITE_FLUSH) == WRITE_FLUSH)
4056 BTRFS_DEV_STAT_FLUSH_ERRS); 4089 btrfs_dev_stat_inc(dev,
4057 btrfs_dev_stat_print_on_error(dev); 4090 BTRFS_DEV_STAT_FLUSH_ERRS);
4091 btrfs_dev_stat_print_on_error(dev);
4092 }
4058 } 4093 }
4059 } 4094 }
4060 4095
@@ -4204,10 +4239,17 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
4204 bio->bi_sector = bbio->stripes[dev_nr].physical >> 9; 4239 bio->bi_sector = bbio->stripes[dev_nr].physical >> 9;
4205 dev = bbio->stripes[dev_nr].dev; 4240 dev = bbio->stripes[dev_nr].dev;
4206 if (dev && dev->bdev && (rw != WRITE || dev->writeable)) { 4241 if (dev && dev->bdev && (rw != WRITE || dev->writeable)) {
4242#ifdef DEBUG
4243 struct rcu_string *name;
4244
4245 rcu_read_lock();
4246 name = rcu_dereference(dev->name);
4207 pr_debug("btrfs_map_bio: rw %d, secor=%llu, dev=%lu " 4247 pr_debug("btrfs_map_bio: rw %d, secor=%llu, dev=%lu "
4208 "(%s id %llu), size=%u\n", rw, 4248 "(%s id %llu), size=%u\n", rw,
4209 (u64)bio->bi_sector, (u_long)dev->bdev->bd_dev, 4249 (u64)bio->bi_sector, (u_long)dev->bdev->bd_dev,
4210 dev->name, dev->devid, bio->bi_size); 4250 name->str, dev->devid, bio->bi_size);
4251 rcu_read_unlock();
4252#endif
4211 bio->bi_bdev = dev->bdev; 4253 bio->bi_bdev = dev->bdev;
4212 if (async_submit) 4254 if (async_submit)
4213 schedule_bio(root, dev, rw, bio); 4255 schedule_bio(root, dev, rw, bio);
@@ -4694,8 +4736,9 @@ int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info)
4694 key.offset = device->devid; 4736 key.offset = device->devid;
4695 ret = btrfs_search_slot(NULL, dev_root, &key, path, 0, 0); 4737 ret = btrfs_search_slot(NULL, dev_root, &key, path, 0, 0);
4696 if (ret) { 4738 if (ret) {
4697 printk(KERN_WARNING "btrfs: no dev_stats entry found for device %s (devid %llu) (OK on first mount after mkfs)\n", 4739 printk_in_rcu(KERN_WARNING "btrfs: no dev_stats entry found for device %s (devid %llu) (OK on first mount after mkfs)\n",
4698 device->name, (unsigned long long)device->devid); 4740 rcu_str_deref(device->name),
4741 (unsigned long long)device->devid);
4699 __btrfs_reset_dev_stats(device); 4742 __btrfs_reset_dev_stats(device);
4700 device->dev_stats_valid = 1; 4743 device->dev_stats_valid = 1;
4701 btrfs_release_path(path); 4744 btrfs_release_path(path);
@@ -4747,8 +4790,8 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans,
4747 BUG_ON(!path); 4790 BUG_ON(!path);
4748 ret = btrfs_search_slot(trans, dev_root, &key, path, -1, 1); 4791 ret = btrfs_search_slot(trans, dev_root, &key, path, -1, 1);
4749 if (ret < 0) { 4792 if (ret < 0) {
4750 printk(KERN_WARNING "btrfs: error %d while searching for dev_stats item for device %s!\n", 4793 printk_in_rcu(KERN_WARNING "btrfs: error %d while searching for dev_stats item for device %s!\n",
4751 ret, device->name); 4794 ret, rcu_str_deref(device->name));
4752 goto out; 4795 goto out;
4753 } 4796 }
4754 4797
@@ -4757,8 +4800,8 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans,
4757 /* need to delete old one and insert a new one */ 4800 /* need to delete old one and insert a new one */
4758 ret = btrfs_del_item(trans, dev_root, path); 4801 ret = btrfs_del_item(trans, dev_root, path);
4759 if (ret != 0) { 4802 if (ret != 0) {
4760 printk(KERN_WARNING "btrfs: delete too small dev_stats item for device %s failed %d!\n", 4803 printk_in_rcu(KERN_WARNING "btrfs: delete too small dev_stats item for device %s failed %d!\n",
4761 device->name, ret); 4804 rcu_str_deref(device->name), ret);
4762 goto out; 4805 goto out;
4763 } 4806 }
4764 ret = 1; 4807 ret = 1;
@@ -4770,8 +4813,8 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans,
4770 ret = btrfs_insert_empty_item(trans, dev_root, path, 4813 ret = btrfs_insert_empty_item(trans, dev_root, path,
4771 &key, sizeof(*ptr)); 4814 &key, sizeof(*ptr));
4772 if (ret < 0) { 4815 if (ret < 0) {
4773 printk(KERN_WARNING "btrfs: insert dev_stats item for device %s failed %d!\n", 4816 printk_in_rcu(KERN_WARNING "btrfs: insert dev_stats item for device %s failed %d!\n",
4774 device->name, ret); 4817 rcu_str_deref(device->name), ret);
4775 goto out; 4818 goto out;
4776 } 4819 }
4777 } 4820 }
@@ -4823,9 +4866,9 @@ void btrfs_dev_stat_print_on_error(struct btrfs_device *dev)
4823{ 4866{
4824 if (!dev->dev_stats_valid) 4867 if (!dev->dev_stats_valid)
4825 return; 4868 return;
4826 printk_ratelimited(KERN_ERR 4869 printk_ratelimited_in_rcu(KERN_ERR
4827 "btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n", 4870 "btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n",
4828 dev->name, 4871 rcu_str_deref(dev->name),
4829 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS), 4872 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS),
4830 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS), 4873 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS),
4831 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS), 4874 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS),
@@ -4837,8 +4880,8 @@ void btrfs_dev_stat_print_on_error(struct btrfs_device *dev)
4837 4880
4838static void btrfs_dev_stat_print_on_load(struct btrfs_device *dev) 4881static void btrfs_dev_stat_print_on_load(struct btrfs_device *dev)
4839{ 4882{
4840 printk(KERN_INFO "btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n", 4883 printk_in_rcu(KERN_INFO "btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n",
4841 dev->name, 4884 rcu_str_deref(dev->name),
4842 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS), 4885 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS),
4843 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS), 4886 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS),
4844 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS), 4887 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS),
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 3406a88ca83e..95f6637614db 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -58,7 +58,7 @@ struct btrfs_device {
58 /* the mode sent to blkdev_get */ 58 /* the mode sent to blkdev_get */
59 fmode_t mode; 59 fmode_t mode;
60 60
61 char *name; 61 struct rcu_string *name;
62 62
63 /* the internal btrfs device id */ 63 /* the internal btrfs device id */
64 u64 devid; 64 u64 devid;
@@ -281,7 +281,8 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size);
281int btrfs_init_new_device(struct btrfs_root *root, char *path); 281int btrfs_init_new_device(struct btrfs_root *root, char *path);
282int btrfs_balance(struct btrfs_balance_control *bctl, 282int btrfs_balance(struct btrfs_balance_control *bctl,
283 struct btrfs_ioctl_balance_args *bargs); 283 struct btrfs_ioctl_balance_args *bargs);
284int btrfs_recover_balance(struct btrfs_root *tree_root); 284int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info);
285int btrfs_recover_balance(struct btrfs_fs_info *fs_info);
285int btrfs_pause_balance(struct btrfs_fs_info *fs_info); 286int btrfs_pause_balance(struct btrfs_fs_info *fs_info);
286int btrfs_cancel_balance(struct btrfs_fs_info *fs_info); 287int btrfs_cancel_balance(struct btrfs_fs_info *fs_info);
287int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset); 288int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset);