aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--MAINTAINERS4
-rw-r--r--fs/btrfs/backref.c17
-rw-r--r--fs/btrfs/btrfs_inode.h1
-rw-r--r--fs/btrfs/check-integrity.c16
-rw-r--r--fs/btrfs/ctree.c86
-rw-r--r--fs/btrfs/ctree.h2
-rw-r--r--fs/btrfs/delayed-inode.c18
-rw-r--r--fs/btrfs/delayed-inode.h3
-rw-r--r--fs/btrfs/disk-io.c76
-rw-r--r--fs/btrfs/extent_io.c7
-rw-r--r--fs/btrfs/inode.c73
-rw-r--r--fs/btrfs/ioctl.c117
-rw-r--r--fs/btrfs/ordered-data.c22
-rw-r--r--fs/btrfs/rcu-string.h56
-rw-r--r--fs/btrfs/scrub.c30
-rw-r--r--fs/btrfs/super.c33
-rw-r--r--fs/btrfs/transaction.c14
-rw-r--r--fs/btrfs/volumes.c92
-rw-r--r--fs/btrfs/volumes.h2
19 files changed, 478 insertions, 191 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index f6e62def61cd..3e30a3afe2a4 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1646,11 +1646,11 @@ S: Maintained
1646F: drivers/gpio/gpio-bt8xx.c 1646F: drivers/gpio/gpio-bt8xx.c
1647 1647
1648BTRFS FILE SYSTEM 1648BTRFS FILE SYSTEM
1649M: Chris Mason <chris.mason@oracle.com> 1649M: Chris Mason <chris.mason@fusionio.com>
1650L: linux-btrfs@vger.kernel.org 1650L: linux-btrfs@vger.kernel.org
1651W: http://btrfs.wiki.kernel.org/ 1651W: http://btrfs.wiki.kernel.org/
1652Q: http://patchwork.kernel.org/project/linux-btrfs/list/ 1652Q: http://patchwork.kernel.org/project/linux-btrfs/list/
1653T: git git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable.git 1653T: git git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs.git
1654S: Maintained 1654S: Maintained
1655F: Documentation/filesystems/btrfs.txt 1655F: Documentation/filesystems/btrfs.txt
1656F: fs/btrfs/ 1656F: fs/btrfs/
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 3f75895c919b..8f7d1237b7a0 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -179,7 +179,8 @@ static int __add_prelim_ref(struct list_head *head, u64 root_id,
179 179
180static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, 180static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
181 struct ulist *parents, int level, 181 struct ulist *parents, int level,
182 struct btrfs_key *key, u64 wanted_disk_byte, 182 struct btrfs_key *key, u64 time_seq,
183 u64 wanted_disk_byte,
183 const u64 *extent_item_pos) 184 const u64 *extent_item_pos)
184{ 185{
185 int ret; 186 int ret;
@@ -212,7 +213,7 @@ add_parent:
212 */ 213 */
213 while (1) { 214 while (1) {
214 eie = NULL; 215 eie = NULL;
215 ret = btrfs_next_leaf(root, path); 216 ret = btrfs_next_old_leaf(root, path, time_seq);
216 if (ret < 0) 217 if (ret < 0)
217 return ret; 218 return ret;
218 if (ret) 219 if (ret)
@@ -294,18 +295,10 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
294 goto out; 295 goto out;
295 } 296 }
296 297
297 if (level == 0) { 298 if (level == 0)
298 if (ret == 1 && path->slots[0] >= btrfs_header_nritems(eb)) {
299 ret = btrfs_next_leaf(root, path);
300 if (ret)
301 goto out;
302 eb = path->nodes[0];
303 }
304
305 btrfs_item_key_to_cpu(eb, &key, path->slots[0]); 299 btrfs_item_key_to_cpu(eb, &key, path->slots[0]);
306 }
307 300
308 ret = add_all_parents(root, path, parents, level, &key, 301 ret = add_all_parents(root, path, parents, level, &key, time_seq,
309 ref->wanted_disk_byte, extent_item_pos); 302 ref->wanted_disk_byte, extent_item_pos);
310out: 303out:
311 btrfs_free_path(path); 304 btrfs_free_path(path);
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index e616f8872e69..12394a90d60f 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -37,6 +37,7 @@
37#define BTRFS_INODE_IN_DEFRAG 3 37#define BTRFS_INODE_IN_DEFRAG 3
38#define BTRFS_INODE_DELALLOC_META_RESERVED 4 38#define BTRFS_INODE_DELALLOC_META_RESERVED 4
39#define BTRFS_INODE_HAS_ORPHAN_ITEM 5 39#define BTRFS_INODE_HAS_ORPHAN_ITEM 5
40#define BTRFS_INODE_HAS_ASYNC_EXTENT 6
40 41
41/* in memory btrfs inode */ 42/* in memory btrfs inode */
42struct btrfs_inode { 43struct btrfs_inode {
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index 9cebb1fd6a3c..da6e9364a5e3 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -93,6 +93,7 @@
93#include "print-tree.h" 93#include "print-tree.h"
94#include "locking.h" 94#include "locking.h"
95#include "check-integrity.h" 95#include "check-integrity.h"
96#include "rcu-string.h"
96 97
97#define BTRFSIC_BLOCK_HASHTABLE_SIZE 0x10000 98#define BTRFSIC_BLOCK_HASHTABLE_SIZE 0x10000
98#define BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE 0x10000 99#define BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE 0x10000
@@ -843,13 +844,14 @@ static int btrfsic_process_superblock_dev_mirror(
843 superblock_tmp->never_written = 0; 844 superblock_tmp->never_written = 0;
844 superblock_tmp->mirror_num = 1 + superblock_mirror_num; 845 superblock_tmp->mirror_num = 1 + superblock_mirror_num;
845 if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE) 846 if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
846 printk(KERN_INFO "New initial S-block (bdev %p, %s)" 847 printk_in_rcu(KERN_INFO "New initial S-block (bdev %p, %s)"
847 " @%llu (%s/%llu/%d)\n", 848 " @%llu (%s/%llu/%d)\n",
848 superblock_bdev, device->name, 849 superblock_bdev,
849 (unsigned long long)dev_bytenr, 850 rcu_str_deref(device->name),
850 dev_state->name, 851 (unsigned long long)dev_bytenr,
851 (unsigned long long)dev_bytenr, 852 dev_state->name,
852 superblock_mirror_num); 853 (unsigned long long)dev_bytenr,
854 superblock_mirror_num);
853 list_add(&superblock_tmp->all_blocks_node, 855 list_add(&superblock_tmp->all_blocks_node,
854 &state->all_blocks_list); 856 &state->all_blocks_list);
855 btrfsic_block_hashtable_add(superblock_tmp, 857 btrfsic_block_hashtable_add(superblock_tmp,
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index d7a96cfdc50a..04b06bcf2ca9 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -467,6 +467,15 @@ static inline int tree_mod_dont_log(struct btrfs_fs_info *fs_info,
467 return 0; 467 return 0;
468} 468}
469 469
470/*
471 * This allocates memory and gets a tree modification sequence number when
472 * needed.
473 *
474 * Returns 0 when no sequence number is needed, < 0 on error.
475 * Returns 1 when a sequence number was added. In this case,
476 * fs_info->tree_mod_seq_lock was acquired and must be released by the caller
477 * after inserting into the rb tree.
478 */
470static inline int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags, 479static inline int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags,
471 struct tree_mod_elem **tm_ret) 480 struct tree_mod_elem **tm_ret)
472{ 481{
@@ -491,11 +500,11 @@ static inline int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags,
491 */ 500 */
492 kfree(tm); 501 kfree(tm);
493 seq = 0; 502 seq = 0;
503 spin_unlock(&fs_info->tree_mod_seq_lock);
494 } else { 504 } else {
495 __get_tree_mod_seq(fs_info, &tm->elem); 505 __get_tree_mod_seq(fs_info, &tm->elem);
496 seq = tm->elem.seq; 506 seq = tm->elem.seq;
497 } 507 }
498 spin_unlock(&fs_info->tree_mod_seq_lock);
499 508
500 return seq; 509 return seq;
501} 510}
@@ -521,7 +530,9 @@ tree_mod_log_insert_key_mask(struct btrfs_fs_info *fs_info,
521 tm->slot = slot; 530 tm->slot = slot;
522 tm->generation = btrfs_node_ptr_generation(eb, slot); 531 tm->generation = btrfs_node_ptr_generation(eb, slot);
523 532
524 return __tree_mod_log_insert(fs_info, tm); 533 ret = __tree_mod_log_insert(fs_info, tm);
534 spin_unlock(&fs_info->tree_mod_seq_lock);
535 return ret;
525} 536}
526 537
527static noinline int 538static noinline int
@@ -559,7 +570,9 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
559 tm->move.nr_items = nr_items; 570 tm->move.nr_items = nr_items;
560 tm->op = MOD_LOG_MOVE_KEYS; 571 tm->op = MOD_LOG_MOVE_KEYS;
561 572
562 return __tree_mod_log_insert(fs_info, tm); 573 ret = __tree_mod_log_insert(fs_info, tm);
574 spin_unlock(&fs_info->tree_mod_seq_lock);
575 return ret;
563} 576}
564 577
565static noinline int 578static noinline int
@@ -580,7 +593,9 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
580 tm->generation = btrfs_header_generation(old_root); 593 tm->generation = btrfs_header_generation(old_root);
581 tm->op = MOD_LOG_ROOT_REPLACE; 594 tm->op = MOD_LOG_ROOT_REPLACE;
582 595
583 return __tree_mod_log_insert(fs_info, tm); 596 ret = __tree_mod_log_insert(fs_info, tm);
597 spin_unlock(&fs_info->tree_mod_seq_lock);
598 return ret;
584} 599}
585 600
586static struct tree_mod_elem * 601static struct tree_mod_elem *
@@ -1023,6 +1038,10 @@ __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info,
1023 looped = 1; 1038 looped = 1;
1024 } 1039 }
1025 1040
1041 /* if there's no old root to return, return what we found instead */
1042 if (!found)
1043 found = tm;
1044
1026 return found; 1045 return found;
1027} 1046}
1028 1047
@@ -1143,22 +1162,36 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
1143 return eb_rewin; 1162 return eb_rewin;
1144} 1163}
1145 1164
1165/*
1166 * get_old_root() rewinds the state of @root's root node to the given @time_seq
1167 * value. If there are no changes, the current root->root_node is returned. If
1168 * anything changed in between, there's a fresh buffer allocated on which the
1169 * rewind operations are done. In any case, the returned buffer is read locked.
1170 * Returns NULL on error (with no locks held).
1171 */
1146static inline struct extent_buffer * 1172static inline struct extent_buffer *
1147get_old_root(struct btrfs_root *root, u64 time_seq) 1173get_old_root(struct btrfs_root *root, u64 time_seq)
1148{ 1174{
1149 struct tree_mod_elem *tm; 1175 struct tree_mod_elem *tm;
1150 struct extent_buffer *eb; 1176 struct extent_buffer *eb;
1151 struct tree_mod_root *old_root; 1177 struct tree_mod_root *old_root = NULL;
1152 u64 old_generation; 1178 u64 old_generation;
1179 u64 logical;
1153 1180
1181 eb = btrfs_read_lock_root_node(root);
1154 tm = __tree_mod_log_oldest_root(root->fs_info, root, time_seq); 1182 tm = __tree_mod_log_oldest_root(root->fs_info, root, time_seq);
1155 if (!tm) 1183 if (!tm)
1156 return root->node; 1184 return root->node;
1157 1185
1158 old_root = &tm->old_root; 1186 if (tm->op == MOD_LOG_ROOT_REPLACE) {
1159 old_generation = tm->generation; 1187 old_root = &tm->old_root;
1188 old_generation = tm->generation;
1189 logical = old_root->logical;
1190 } else {
1191 logical = root->node->start;
1192 }
1160 1193
1161 tm = tree_mod_log_search(root->fs_info, old_root->logical, time_seq); 1194 tm = tree_mod_log_search(root->fs_info, logical, time_seq);
1162 /* 1195 /*
1163 * there was an item in the log when __tree_mod_log_oldest_root 1196 * there was an item in the log when __tree_mod_log_oldest_root
1164 * returned. this one must not go away, because the time_seq passed to 1197 * returned. this one must not go away, because the time_seq passed to
@@ -1166,22 +1199,25 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
1166 */ 1199 */
1167 BUG_ON(!tm); 1200 BUG_ON(!tm);
1168 1201
1169 if (old_root->logical == root->node->start) { 1202 if (old_root)
1170 /* there are logged operations for the current root */
1171 eb = btrfs_clone_extent_buffer(root->node);
1172 } else {
1173 /* there's a root replace operation for the current root */
1174 eb = alloc_dummy_extent_buffer(tm->index << PAGE_CACHE_SHIFT, 1203 eb = alloc_dummy_extent_buffer(tm->index << PAGE_CACHE_SHIFT,
1175 root->nodesize); 1204 root->nodesize);
1205 else
1206 eb = btrfs_clone_extent_buffer(root->node);
1207 btrfs_tree_read_unlock(root->node);
1208 free_extent_buffer(root->node);
1209 if (!eb)
1210 return NULL;
1211 btrfs_tree_read_lock(eb);
1212 if (old_root) {
1176 btrfs_set_header_bytenr(eb, eb->start); 1213 btrfs_set_header_bytenr(eb, eb->start);
1177 btrfs_set_header_backref_rev(eb, BTRFS_MIXED_BACKREF_REV); 1214 btrfs_set_header_backref_rev(eb, BTRFS_MIXED_BACKREF_REV);
1178 btrfs_set_header_owner(eb, root->root_key.objectid); 1215 btrfs_set_header_owner(eb, root->root_key.objectid);
1216 btrfs_set_header_level(eb, old_root->level);
1217 btrfs_set_header_generation(eb, old_generation);
1179 } 1218 }
1180 if (!eb)
1181 return NULL;
1182 btrfs_set_header_level(eb, old_root->level);
1183 btrfs_set_header_generation(eb, old_generation);
1184 __tree_mod_log_rewind(eb, time_seq, tm); 1219 __tree_mod_log_rewind(eb, time_seq, tm);
1220 extent_buffer_get(eb);
1185 1221
1186 return eb; 1222 return eb;
1187} 1223}
@@ -1650,8 +1686,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1650 BTRFS_NODEPTRS_PER_BLOCK(root) / 4) 1686 BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
1651 return 0; 1687 return 0;
1652 1688
1653 btrfs_header_nritems(mid);
1654
1655 left = read_node_slot(root, parent, pslot - 1); 1689 left = read_node_slot(root, parent, pslot - 1);
1656 if (left) { 1690 if (left) {
1657 btrfs_tree_lock(left); 1691 btrfs_tree_lock(left);
@@ -1681,7 +1715,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1681 wret = push_node_left(trans, root, left, mid, 1); 1715 wret = push_node_left(trans, root, left, mid, 1);
1682 if (wret < 0) 1716 if (wret < 0)
1683 ret = wret; 1717 ret = wret;
1684 btrfs_header_nritems(mid);
1685 } 1718 }
1686 1719
1687 /* 1720 /*
@@ -2615,9 +2648,7 @@ int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key,
2615 2648
2616again: 2649again:
2617 b = get_old_root(root, time_seq); 2650 b = get_old_root(root, time_seq);
2618 extent_buffer_get(b);
2619 level = btrfs_header_level(b); 2651 level = btrfs_header_level(b);
2620 btrfs_tree_read_lock(b);
2621 p->locks[level] = BTRFS_READ_LOCK; 2652 p->locks[level] = BTRFS_READ_LOCK;
2622 2653
2623 while (b) { 2654 while (b) {
@@ -5001,6 +5032,12 @@ next:
5001 */ 5032 */
5002int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) 5033int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
5003{ 5034{
5035 return btrfs_next_old_leaf(root, path, 0);
5036}
5037
5038int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
5039 u64 time_seq)
5040{
5004 int slot; 5041 int slot;
5005 int level; 5042 int level;
5006 struct extent_buffer *c; 5043 struct extent_buffer *c;
@@ -5025,7 +5062,10 @@ again:
5025 path->keep_locks = 1; 5062 path->keep_locks = 1;
5026 path->leave_spinning = 1; 5063 path->leave_spinning = 1;
5027 5064
5028 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 5065 if (time_seq)
5066 ret = btrfs_search_old_slot(root, &key, path, time_seq);
5067 else
5068 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5029 path->keep_locks = 0; 5069 path->keep_locks = 0;
5030 5070
5031 if (ret < 0) 5071 if (ret < 0)
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 0236d03c6732..8b73b2d4deb7 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2753,6 +2753,8 @@ static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans,
2753} 2753}
2754 2754
2755int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); 2755int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path);
2756int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
2757 u64 time_seq);
2756static inline int btrfs_next_item(struct btrfs_root *root, struct btrfs_path *p) 2758static inline int btrfs_next_item(struct btrfs_root *root, struct btrfs_path *p)
2757{ 2759{
2758 ++p->slots[0]; 2760 ++p->slots[0];
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index c18d0442ae6d..2399f4086915 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1879,3 +1879,21 @@ void btrfs_kill_all_delayed_nodes(struct btrfs_root *root)
1879 } 1879 }
1880 } 1880 }
1881} 1881}
1882
1883void btrfs_destroy_delayed_inodes(struct btrfs_root *root)
1884{
1885 struct btrfs_delayed_root *delayed_root;
1886 struct btrfs_delayed_node *curr_node, *prev_node;
1887
1888 delayed_root = btrfs_get_delayed_root(root);
1889
1890 curr_node = btrfs_first_delayed_node(delayed_root);
1891 while (curr_node) {
1892 __btrfs_kill_delayed_node(curr_node);
1893
1894 prev_node = curr_node;
1895 curr_node = btrfs_next_delayed_node(curr_node);
1896 btrfs_release_delayed_node(prev_node);
1897 }
1898}
1899
diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h
index 7083d08b2a21..f5aa4023d3e1 100644
--- a/fs/btrfs/delayed-inode.h
+++ b/fs/btrfs/delayed-inode.h
@@ -124,6 +124,9 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev);
124/* Used for drop dead root */ 124/* Used for drop dead root */
125void btrfs_kill_all_delayed_nodes(struct btrfs_root *root); 125void btrfs_kill_all_delayed_nodes(struct btrfs_root *root);
126 126
127/* Used for clean the transaction */
128void btrfs_destroy_delayed_inodes(struct btrfs_root *root);
129
127/* Used for readdir() */ 130/* Used for readdir() */
128void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list, 131void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list,
129 struct list_head *del_list); 132 struct list_head *del_list);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 7ae51decf6d3..e1890b1d3075 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -44,6 +44,7 @@
44#include "free-space-cache.h" 44#include "free-space-cache.h"
45#include "inode-map.h" 45#include "inode-map.h"
46#include "check-integrity.h" 46#include "check-integrity.h"
47#include "rcu-string.h"
47 48
48static struct extent_io_ops btree_extent_io_ops; 49static struct extent_io_ops btree_extent_io_ops;
49static void end_workqueue_fn(struct btrfs_work *work); 50static void end_workqueue_fn(struct btrfs_work *work);
@@ -2118,7 +2119,7 @@ int open_ctree(struct super_block *sb,
2118 2119
2119 features = btrfs_super_incompat_flags(disk_super); 2120 features = btrfs_super_incompat_flags(disk_super);
2120 features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF; 2121 features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF;
2121 if (tree_root->fs_info->compress_type & BTRFS_COMPRESS_LZO) 2122 if (tree_root->fs_info->compress_type == BTRFS_COMPRESS_LZO)
2122 features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; 2123 features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO;
2123 2124
2124 /* 2125 /*
@@ -2575,8 +2576,9 @@ static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate)
2575 struct btrfs_device *device = (struct btrfs_device *) 2576 struct btrfs_device *device = (struct btrfs_device *)
2576 bh->b_private; 2577 bh->b_private;
2577 2578
2578 printk_ratelimited(KERN_WARNING "lost page write due to " 2579 printk_ratelimited_in_rcu(KERN_WARNING "lost page write due to "
2579 "I/O error on %s\n", device->name); 2580 "I/O error on %s\n",
2581 rcu_str_deref(device->name));
2580 /* note, we dont' set_buffer_write_io_error because we have 2582 /* note, we dont' set_buffer_write_io_error because we have
2581 * our own ways of dealing with the IO errors 2583 * our own ways of dealing with the IO errors
2582 */ 2584 */
@@ -2749,8 +2751,8 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
2749 wait_for_completion(&device->flush_wait); 2751 wait_for_completion(&device->flush_wait);
2750 2752
2751 if (bio_flagged(bio, BIO_EOPNOTSUPP)) { 2753 if (bio_flagged(bio, BIO_EOPNOTSUPP)) {
2752 printk("btrfs: disabling barriers on dev %s\n", 2754 printk_in_rcu("btrfs: disabling barriers on dev %s\n",
2753 device->name); 2755 rcu_str_deref(device->name));
2754 device->nobarriers = 1; 2756 device->nobarriers = 1;
2755 } 2757 }
2756 if (!bio_flagged(bio, BIO_UPTODATE)) { 2758 if (!bio_flagged(bio, BIO_UPTODATE)) {
@@ -3400,7 +3402,6 @@ int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
3400 3402
3401 delayed_refs = &trans->delayed_refs; 3403 delayed_refs = &trans->delayed_refs;
3402 3404
3403again:
3404 spin_lock(&delayed_refs->lock); 3405 spin_lock(&delayed_refs->lock);
3405 if (delayed_refs->num_entries == 0) { 3406 if (delayed_refs->num_entries == 0) {
3406 spin_unlock(&delayed_refs->lock); 3407 spin_unlock(&delayed_refs->lock);
@@ -3408,31 +3409,36 @@ again:
3408 return ret; 3409 return ret;
3409 } 3410 }
3410 3411
3411 node = rb_first(&delayed_refs->root); 3412 while ((node = rb_first(&delayed_refs->root)) != NULL) {
3412 while (node) {
3413 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); 3413 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
3414 node = rb_next(node);
3415
3416 ref->in_tree = 0;
3417 rb_erase(&ref->rb_node, &delayed_refs->root);
3418 delayed_refs->num_entries--;
3419 3414
3420 atomic_set(&ref->refs, 1); 3415 atomic_set(&ref->refs, 1);
3421 if (btrfs_delayed_ref_is_head(ref)) { 3416 if (btrfs_delayed_ref_is_head(ref)) {
3422 struct btrfs_delayed_ref_head *head; 3417 struct btrfs_delayed_ref_head *head;
3423 3418
3424 head = btrfs_delayed_node_to_head(ref); 3419 head = btrfs_delayed_node_to_head(ref);
3425 spin_unlock(&delayed_refs->lock); 3420 if (!mutex_trylock(&head->mutex)) {
3426 mutex_lock(&head->mutex); 3421 atomic_inc(&ref->refs);
3422 spin_unlock(&delayed_refs->lock);
3423
3424 /* Need to wait for the delayed ref to run */
3425 mutex_lock(&head->mutex);
3426 mutex_unlock(&head->mutex);
3427 btrfs_put_delayed_ref(ref);
3428
3429 continue;
3430 }
3431
3427 kfree(head->extent_op); 3432 kfree(head->extent_op);
3428 delayed_refs->num_heads--; 3433 delayed_refs->num_heads--;
3429 if (list_empty(&head->cluster)) 3434 if (list_empty(&head->cluster))
3430 delayed_refs->num_heads_ready--; 3435 delayed_refs->num_heads_ready--;
3431 list_del_init(&head->cluster); 3436 list_del_init(&head->cluster);
3432 mutex_unlock(&head->mutex);
3433 btrfs_put_delayed_ref(ref);
3434 goto again;
3435 } 3437 }
3438 ref->in_tree = 0;
3439 rb_erase(&ref->rb_node, &delayed_refs->root);
3440 delayed_refs->num_entries--;
3441
3436 spin_unlock(&delayed_refs->lock); 3442 spin_unlock(&delayed_refs->lock);
3437 btrfs_put_delayed_ref(ref); 3443 btrfs_put_delayed_ref(ref);
3438 3444
@@ -3520,11 +3526,9 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root,
3520 &(&BTRFS_I(page->mapping->host)->io_tree)->buffer, 3526 &(&BTRFS_I(page->mapping->host)->io_tree)->buffer,
3521 offset >> PAGE_CACHE_SHIFT); 3527 offset >> PAGE_CACHE_SHIFT);
3522 spin_unlock(&dirty_pages->buffer_lock); 3528 spin_unlock(&dirty_pages->buffer_lock);
3523 if (eb) { 3529 if (eb)
3524 ret = test_and_clear_bit(EXTENT_BUFFER_DIRTY, 3530 ret = test_and_clear_bit(EXTENT_BUFFER_DIRTY,
3525 &eb->bflags); 3531 &eb->bflags);
3526 atomic_set(&eb->refs, 1);
3527 }
3528 if (PageWriteback(page)) 3532 if (PageWriteback(page))
3529 end_page_writeback(page); 3533 end_page_writeback(page);
3530 3534
@@ -3538,8 +3542,8 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root,
3538 spin_unlock_irq(&page->mapping->tree_lock); 3542 spin_unlock_irq(&page->mapping->tree_lock);
3539 } 3543 }
3540 3544
3541 page->mapping->a_ops->invalidatepage(page, 0);
3542 unlock_page(page); 3545 unlock_page(page);
3546 page_cache_release(page);
3543 } 3547 }
3544 } 3548 }
3545 3549
@@ -3553,8 +3557,10 @@ static int btrfs_destroy_pinned_extent(struct btrfs_root *root,
3553 u64 start; 3557 u64 start;
3554 u64 end; 3558 u64 end;
3555 int ret; 3559 int ret;
3560 bool loop = true;
3556 3561
3557 unpin = pinned_extents; 3562 unpin = pinned_extents;
3563again:
3558 while (1) { 3564 while (1) {
3559 ret = find_first_extent_bit(unpin, 0, &start, &end, 3565 ret = find_first_extent_bit(unpin, 0, &start, &end,
3560 EXTENT_DIRTY); 3566 EXTENT_DIRTY);
@@ -3572,6 +3578,15 @@ static int btrfs_destroy_pinned_extent(struct btrfs_root *root,
3572 cond_resched(); 3578 cond_resched();
3573 } 3579 }
3574 3580
3581 if (loop) {
3582 if (unpin == &root->fs_info->freed_extents[0])
3583 unpin = &root->fs_info->freed_extents[1];
3584 else
3585 unpin = &root->fs_info->freed_extents[0];
3586 loop = false;
3587 goto again;
3588 }
3589
3575 return 0; 3590 return 0;
3576} 3591}
3577 3592
@@ -3585,21 +3600,23 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
3585 /* FIXME: cleanup wait for commit */ 3600 /* FIXME: cleanup wait for commit */
3586 cur_trans->in_commit = 1; 3601 cur_trans->in_commit = 1;
3587 cur_trans->blocked = 1; 3602 cur_trans->blocked = 1;
3588 if (waitqueue_active(&root->fs_info->transaction_blocked_wait)) 3603 wake_up(&root->fs_info->transaction_blocked_wait);
3589 wake_up(&root->fs_info->transaction_blocked_wait);
3590 3604
3591 cur_trans->blocked = 0; 3605 cur_trans->blocked = 0;
3592 if (waitqueue_active(&root->fs_info->transaction_wait)) 3606 wake_up(&root->fs_info->transaction_wait);
3593 wake_up(&root->fs_info->transaction_wait);
3594 3607
3595 cur_trans->commit_done = 1; 3608 cur_trans->commit_done = 1;
3596 if (waitqueue_active(&cur_trans->commit_wait)) 3609 wake_up(&cur_trans->commit_wait);
3597 wake_up(&cur_trans->commit_wait); 3610
3611 btrfs_destroy_delayed_inodes(root);
3612 btrfs_assert_delayed_root_empty(root);
3598 3613
3599 btrfs_destroy_pending_snapshots(cur_trans); 3614 btrfs_destroy_pending_snapshots(cur_trans);
3600 3615
3601 btrfs_destroy_marked_extents(root, &cur_trans->dirty_pages, 3616 btrfs_destroy_marked_extents(root, &cur_trans->dirty_pages,
3602 EXTENT_DIRTY); 3617 EXTENT_DIRTY);
3618 btrfs_destroy_pinned_extent(root,
3619 root->fs_info->pinned_extents);
3603 3620
3604 /* 3621 /*
3605 memset(cur_trans, 0, sizeof(*cur_trans)); 3622 memset(cur_trans, 0, sizeof(*cur_trans));
@@ -3648,6 +3665,9 @@ int btrfs_cleanup_transaction(struct btrfs_root *root)
3648 if (waitqueue_active(&t->commit_wait)) 3665 if (waitqueue_active(&t->commit_wait))
3649 wake_up(&t->commit_wait); 3666 wake_up(&t->commit_wait);
3650 3667
3668 btrfs_destroy_delayed_inodes(root);
3669 btrfs_assert_delayed_root_empty(root);
3670
3651 btrfs_destroy_pending_snapshots(t); 3671 btrfs_destroy_pending_snapshots(t);
3652 3672
3653 btrfs_destroy_delalloc_inodes(root); 3673 btrfs_destroy_delalloc_inodes(root);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 2c8f7b204617..aaa12c1eb348 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -20,6 +20,7 @@
20#include "volumes.h" 20#include "volumes.h"
21#include "check-integrity.h" 21#include "check-integrity.h"
22#include "locking.h" 22#include "locking.h"
23#include "rcu-string.h"
23 24
24static struct kmem_cache *extent_state_cache; 25static struct kmem_cache *extent_state_cache;
25static struct kmem_cache *extent_buffer_cache; 26static struct kmem_cache *extent_buffer_cache;
@@ -1917,9 +1918,9 @@ int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start,
1917 return -EIO; 1918 return -EIO;
1918 } 1919 }
1919 1920
1920 printk(KERN_INFO "btrfs read error corrected: ino %lu off %llu (dev %s " 1921 printk_in_rcu(KERN_INFO "btrfs read error corrected: ino %lu off %llu "
1921 "sector %llu)\n", page->mapping->host->i_ino, start, 1922 "(dev %s sector %llu)\n", page->mapping->host->i_ino,
1922 dev->name, sector); 1923 start, rcu_str_deref(dev->name), sector);
1923 1924
1924 bio_put(bio); 1925 bio_put(bio);
1925 return 0; 1926 return 0;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index f6ab6f5e635a..a4f02501da40 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -830,7 +830,7 @@ static noinline int cow_file_range(struct inode *inode,
830 if (IS_ERR(trans)) { 830 if (IS_ERR(trans)) {
831 extent_clear_unlock_delalloc(inode, 831 extent_clear_unlock_delalloc(inode,
832 &BTRFS_I(inode)->io_tree, 832 &BTRFS_I(inode)->io_tree,
833 start, end, NULL, 833 start, end, locked_page,
834 EXTENT_CLEAR_UNLOCK_PAGE | 834 EXTENT_CLEAR_UNLOCK_PAGE |
835 EXTENT_CLEAR_UNLOCK | 835 EXTENT_CLEAR_UNLOCK |
836 EXTENT_CLEAR_DELALLOC | 836 EXTENT_CLEAR_DELALLOC |
@@ -963,7 +963,7 @@ out:
963out_unlock: 963out_unlock:
964 extent_clear_unlock_delalloc(inode, 964 extent_clear_unlock_delalloc(inode,
965 &BTRFS_I(inode)->io_tree, 965 &BTRFS_I(inode)->io_tree,
966 start, end, NULL, 966 start, end, locked_page,
967 EXTENT_CLEAR_UNLOCK_PAGE | 967 EXTENT_CLEAR_UNLOCK_PAGE |
968 EXTENT_CLEAR_UNLOCK | 968 EXTENT_CLEAR_UNLOCK |
969 EXTENT_CLEAR_DELALLOC | 969 EXTENT_CLEAR_DELALLOC |
@@ -986,8 +986,10 @@ static noinline void async_cow_start(struct btrfs_work *work)
986 compress_file_range(async_cow->inode, async_cow->locked_page, 986 compress_file_range(async_cow->inode, async_cow->locked_page,
987 async_cow->start, async_cow->end, async_cow, 987 async_cow->start, async_cow->end, async_cow,
988 &num_added); 988 &num_added);
989 if (num_added == 0) 989 if (num_added == 0) {
990 iput(async_cow->inode);
990 async_cow->inode = NULL; 991 async_cow->inode = NULL;
992 }
991} 993}
992 994
993/* 995/*
@@ -1020,6 +1022,8 @@ static noinline void async_cow_free(struct btrfs_work *work)
1020{ 1022{
1021 struct async_cow *async_cow; 1023 struct async_cow *async_cow;
1022 async_cow = container_of(work, struct async_cow, work); 1024 async_cow = container_of(work, struct async_cow, work);
1025 if (async_cow->inode)
1026 iput(async_cow->inode);
1023 kfree(async_cow); 1027 kfree(async_cow);
1024} 1028}
1025 1029
@@ -1038,7 +1042,7 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
1038 while (start < end) { 1042 while (start < end) {
1039 async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS); 1043 async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS);
1040 BUG_ON(!async_cow); /* -ENOMEM */ 1044 BUG_ON(!async_cow); /* -ENOMEM */
1041 async_cow->inode = inode; 1045 async_cow->inode = igrab(inode);
1042 async_cow->root = root; 1046 async_cow->root = root;
1043 async_cow->locked_page = locked_page; 1047 async_cow->locked_page = locked_page;
1044 async_cow->start = start; 1048 async_cow->start = start;
@@ -1136,8 +1140,18 @@ static noinline int run_delalloc_nocow(struct inode *inode,
1136 u64 ino = btrfs_ino(inode); 1140 u64 ino = btrfs_ino(inode);
1137 1141
1138 path = btrfs_alloc_path(); 1142 path = btrfs_alloc_path();
1139 if (!path) 1143 if (!path) {
1144 extent_clear_unlock_delalloc(inode,
1145 &BTRFS_I(inode)->io_tree,
1146 start, end, locked_page,
1147 EXTENT_CLEAR_UNLOCK_PAGE |
1148 EXTENT_CLEAR_UNLOCK |
1149 EXTENT_CLEAR_DELALLOC |
1150 EXTENT_CLEAR_DIRTY |
1151 EXTENT_SET_WRITEBACK |
1152 EXTENT_END_WRITEBACK);
1140 return -ENOMEM; 1153 return -ENOMEM;
1154 }
1141 1155
1142 nolock = btrfs_is_free_space_inode(root, inode); 1156 nolock = btrfs_is_free_space_inode(root, inode);
1143 1157
@@ -1147,6 +1161,15 @@ static noinline int run_delalloc_nocow(struct inode *inode,
1147 trans = btrfs_join_transaction(root); 1161 trans = btrfs_join_transaction(root);
1148 1162
1149 if (IS_ERR(trans)) { 1163 if (IS_ERR(trans)) {
1164 extent_clear_unlock_delalloc(inode,
1165 &BTRFS_I(inode)->io_tree,
1166 start, end, locked_page,
1167 EXTENT_CLEAR_UNLOCK_PAGE |
1168 EXTENT_CLEAR_UNLOCK |
1169 EXTENT_CLEAR_DELALLOC |
1170 EXTENT_CLEAR_DIRTY |
1171 EXTENT_SET_WRITEBACK |
1172 EXTENT_END_WRITEBACK);
1150 btrfs_free_path(path); 1173 btrfs_free_path(path);
1151 return PTR_ERR(trans); 1174 return PTR_ERR(trans);
1152 } 1175 }
@@ -1327,8 +1350,11 @@ out_check:
1327 } 1350 }
1328 btrfs_release_path(path); 1351 btrfs_release_path(path);
1329 1352
1330 if (cur_offset <= end && cow_start == (u64)-1) 1353 if (cur_offset <= end && cow_start == (u64)-1) {
1331 cow_start = cur_offset; 1354 cow_start = cur_offset;
1355 cur_offset = end;
1356 }
1357
1332 if (cow_start != (u64)-1) { 1358 if (cow_start != (u64)-1) {
1333 ret = cow_file_range(inode, locked_page, cow_start, end, 1359 ret = cow_file_range(inode, locked_page, cow_start, end,
1334 page_started, nr_written, 1); 1360 page_started, nr_written, 1);
@@ -1347,6 +1373,17 @@ error:
1347 if (!ret) 1373 if (!ret)
1348 ret = err; 1374 ret = err;
1349 1375
1376 if (ret && cur_offset < end)
1377 extent_clear_unlock_delalloc(inode,
1378 &BTRFS_I(inode)->io_tree,
1379 cur_offset, end, locked_page,
1380 EXTENT_CLEAR_UNLOCK_PAGE |
1381 EXTENT_CLEAR_UNLOCK |
1382 EXTENT_CLEAR_DELALLOC |
1383 EXTENT_CLEAR_DIRTY |
1384 EXTENT_SET_WRITEBACK |
1385 EXTENT_END_WRITEBACK);
1386
1350 btrfs_free_path(path); 1387 btrfs_free_path(path);
1351 return ret; 1388 return ret;
1352} 1389}
@@ -1361,20 +1398,23 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page,
1361 int ret; 1398 int ret;
1362 struct btrfs_root *root = BTRFS_I(inode)->root; 1399 struct btrfs_root *root = BTRFS_I(inode)->root;
1363 1400
1364 if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) 1401 if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) {
1365 ret = run_delalloc_nocow(inode, locked_page, start, end, 1402 ret = run_delalloc_nocow(inode, locked_page, start, end,
1366 page_started, 1, nr_written); 1403 page_started, 1, nr_written);
1367 else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC) 1404 } else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC) {
1368 ret = run_delalloc_nocow(inode, locked_page, start, end, 1405 ret = run_delalloc_nocow(inode, locked_page, start, end,
1369 page_started, 0, nr_written); 1406 page_started, 0, nr_written);
1370 else if (!btrfs_test_opt(root, COMPRESS) && 1407 } else if (!btrfs_test_opt(root, COMPRESS) &&
1371 !(BTRFS_I(inode)->force_compress) && 1408 !(BTRFS_I(inode)->force_compress) &&
1372 !(BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS)) 1409 !(BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS)) {
1373 ret = cow_file_range(inode, locked_page, start, end, 1410 ret = cow_file_range(inode, locked_page, start, end,
1374 page_started, nr_written, 1); 1411 page_started, nr_written, 1);
1375 else 1412 } else {
1413 set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
1414 &BTRFS_I(inode)->runtime_flags);
1376 ret = cow_file_range_async(inode, locked_page, start, end, 1415 ret = cow_file_range_async(inode, locked_page, start, end,
1377 page_started, nr_written); 1416 page_started, nr_written);
1417 }
1378 return ret; 1418 return ret;
1379} 1419}
1380 1420
@@ -7054,10 +7094,13 @@ static void fixup_inode_flags(struct inode *dir, struct inode *inode)
7054 else 7094 else
7055 b_inode->flags &= ~BTRFS_INODE_NODATACOW; 7095 b_inode->flags &= ~BTRFS_INODE_NODATACOW;
7056 7096
7057 if (b_dir->flags & BTRFS_INODE_COMPRESS) 7097 if (b_dir->flags & BTRFS_INODE_COMPRESS) {
7058 b_inode->flags |= BTRFS_INODE_COMPRESS; 7098 b_inode->flags |= BTRFS_INODE_COMPRESS;
7059 else 7099 b_inode->flags &= ~BTRFS_INODE_NOCOMPRESS;
7060 b_inode->flags &= ~BTRFS_INODE_COMPRESS; 7100 } else {
7101 b_inode->flags &= ~(BTRFS_INODE_COMPRESS |
7102 BTRFS_INODE_NOCOMPRESS);
7103 }
7061} 7104}
7062 7105
7063static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, 7106static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 24b776c08d99..58adbd0356d6 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -52,6 +52,7 @@
52#include "locking.h" 52#include "locking.h"
53#include "inode-map.h" 53#include "inode-map.h"
54#include "backref.h" 54#include "backref.h"
55#include "rcu-string.h"
55 56
56/* Mask out flags that are inappropriate for the given type of inode. */ 57/* Mask out flags that are inappropriate for the given type of inode. */
57static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags) 58static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags)
@@ -785,39 +786,57 @@ none:
785 return -ENOENT; 786 return -ENOENT;
786} 787}
787 788
788/* 789static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start)
789 * Validaty check of prev em and next em:
790 * 1) no prev/next em
791 * 2) prev/next em is an hole/inline extent
792 */
793static int check_adjacent_extents(struct inode *inode, struct extent_map *em)
794{ 790{
795 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 791 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
796 struct extent_map *prev = NULL, *next = NULL; 792 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
797 int ret = 0; 793 struct extent_map *em;
794 u64 len = PAGE_CACHE_SIZE;
798 795
796 /*
797 * hopefully we have this extent in the tree already, try without
798 * the full extent lock
799 */
799 read_lock(&em_tree->lock); 800 read_lock(&em_tree->lock);
800 prev = lookup_extent_mapping(em_tree, em->start - 1, (u64)-1); 801 em = lookup_extent_mapping(em_tree, start, len);
801 next = lookup_extent_mapping(em_tree, em->start + em->len, (u64)-1);
802 read_unlock(&em_tree->lock); 802 read_unlock(&em_tree->lock);
803 803
804 if ((!prev || prev->block_start >= EXTENT_MAP_LAST_BYTE) && 804 if (!em) {
805 (!next || next->block_start >= EXTENT_MAP_LAST_BYTE)) 805 /* get the big lock and read metadata off disk */
806 ret = 1; 806 lock_extent(io_tree, start, start + len - 1);
807 free_extent_map(prev); 807 em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
808 free_extent_map(next); 808 unlock_extent(io_tree, start, start + len - 1);
809
810 if (IS_ERR(em))
811 return NULL;
812 }
813
814 return em;
815}
816
817static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em)
818{
819 struct extent_map *next;
820 bool ret = true;
821
822 /* this is the last extent */
823 if (em->start + em->len >= i_size_read(inode))
824 return false;
809 825
826 next = defrag_lookup_extent(inode, em->start + em->len);
827 if (!next || next->block_start >= EXTENT_MAP_LAST_BYTE)
828 ret = false;
829
830 free_extent_map(next);
810 return ret; 831 return ret;
811} 832}
812 833
813static int should_defrag_range(struct inode *inode, u64 start, u64 len, 834static int should_defrag_range(struct inode *inode, u64 start, int thresh,
814 int thresh, u64 *last_len, u64 *skip, 835 u64 *last_len, u64 *skip, u64 *defrag_end)
815 u64 *defrag_end)
816{ 836{
817 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 837 struct extent_map *em;
818 struct extent_map *em = NULL;
819 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
820 int ret = 1; 838 int ret = 1;
839 bool next_mergeable = true;
821 840
822 /* 841 /*
823 * make sure that once we start defragging an extent, we keep on 842 * make sure that once we start defragging an extent, we keep on
@@ -828,23 +847,9 @@ static int should_defrag_range(struct inode *inode, u64 start, u64 len,
828 847
829 *skip = 0; 848 *skip = 0;
830 849
831 /* 850 em = defrag_lookup_extent(inode, start);
832 * hopefully we have this extent in the tree already, try without 851 if (!em)
833 * the full extent lock 852 return 0;
834 */
835 read_lock(&em_tree->lock);
836 em = lookup_extent_mapping(em_tree, start, len);
837 read_unlock(&em_tree->lock);
838
839 if (!em) {
840 /* get the big lock and read metadata off disk */
841 lock_extent(io_tree, start, start + len - 1);
842 em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
843 unlock_extent(io_tree, start, start + len - 1);
844
845 if (IS_ERR(em))
846 return 0;
847 }
848 853
849 /* this will cover holes, and inline extents */ 854 /* this will cover holes, and inline extents */
850 if (em->block_start >= EXTENT_MAP_LAST_BYTE) { 855 if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
@@ -852,18 +857,15 @@ static int should_defrag_range(struct inode *inode, u64 start, u64 len,
852 goto out; 857 goto out;
853 } 858 }
854 859
855 /* If we have nothing to merge with us, just skip. */ 860 next_mergeable = defrag_check_next_extent(inode, em);
856 if (check_adjacent_extents(inode, em)) {
857 ret = 0;
858 goto out;
859 }
860 861
861 /* 862 /*
862 * we hit a real extent, if it is big don't bother defragging it again 863 * we hit a real extent, if it is big or the next extent is not a
864 * real extent, don't bother defragging it
863 */ 865 */
864 if ((*last_len == 0 || *last_len >= thresh) && em->len >= thresh) 866 if ((*last_len == 0 || *last_len >= thresh) &&
867 (em->len >= thresh || !next_mergeable))
865 ret = 0; 868 ret = 0;
866
867out: 869out:
868 /* 870 /*
869 * last_len ends up being a counter of how many bytes we've defragged. 871 * last_len ends up being a counter of how many bytes we've defragged.
@@ -1142,8 +1144,8 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
1142 break; 1144 break;
1143 1145
1144 if (!should_defrag_range(inode, (u64)i << PAGE_CACHE_SHIFT, 1146 if (!should_defrag_range(inode, (u64)i << PAGE_CACHE_SHIFT,
1145 PAGE_CACHE_SIZE, extent_thresh, 1147 extent_thresh, &last_len, &skip,
1146 &last_len, &skip, &defrag_end)) { 1148 &defrag_end)) {
1147 unsigned long next; 1149 unsigned long next;
1148 /* 1150 /*
1149 * the should_defrag function tells us how much to skip 1151 * the should_defrag function tells us how much to skip
@@ -1304,6 +1306,13 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
1304 ret = -EINVAL; 1306 ret = -EINVAL;
1305 goto out_free; 1307 goto out_free;
1306 } 1308 }
1309 if (device->fs_devices && device->fs_devices->seeding) {
1310 printk(KERN_INFO "btrfs: resizer unable to apply on "
1311 "seeding device %llu\n", devid);
1312 ret = -EINVAL;
1313 goto out_free;
1314 }
1315
1307 if (!strcmp(sizestr, "max")) 1316 if (!strcmp(sizestr, "max"))
1308 new_size = device->bdev->bd_inode->i_size; 1317 new_size = device->bdev->bd_inode->i_size;
1309 else { 1318 else {
@@ -1345,8 +1354,9 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
1345 do_div(new_size, root->sectorsize); 1354 do_div(new_size, root->sectorsize);
1346 new_size *= root->sectorsize; 1355 new_size *= root->sectorsize;
1347 1356
1348 printk(KERN_INFO "btrfs: new size for %s is %llu\n", 1357 printk_in_rcu(KERN_INFO "btrfs: new size for %s is %llu\n",
1349 device->name, (unsigned long long)new_size); 1358 rcu_str_deref(device->name),
1359 (unsigned long long)new_size);
1350 1360
1351 if (new_size > old_size) { 1361 if (new_size > old_size) {
1352 trans = btrfs_start_transaction(root, 0); 1362 trans = btrfs_start_transaction(root, 0);
@@ -2264,7 +2274,12 @@ static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg)
2264 di_args->total_bytes = dev->total_bytes; 2274 di_args->total_bytes = dev->total_bytes;
2265 memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid)); 2275 memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid));
2266 if (dev->name) { 2276 if (dev->name) {
2267 strncpy(di_args->path, dev->name, sizeof(di_args->path)); 2277 struct rcu_string *name;
2278
2279 rcu_read_lock();
2280 name = rcu_dereference(dev->name);
2281 strncpy(di_args->path, name->str, sizeof(di_args->path));
2282 rcu_read_unlock();
2268 di_args->path[sizeof(di_args->path) - 1] = 0; 2283 di_args->path[sizeof(di_args->path) - 1] = 0;
2269 } else { 2284 } else {
2270 di_args->path[0] = '\0'; 2285 di_args->path[0] = '\0';
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 9e138cdc36c5..643335a4fe3c 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -627,7 +627,27 @@ void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
627 /* start IO across the range first to instantiate any delalloc 627 /* start IO across the range first to instantiate any delalloc
628 * extents 628 * extents
629 */ 629 */
630 filemap_write_and_wait_range(inode->i_mapping, start, orig_end); 630 filemap_fdatawrite_range(inode->i_mapping, start, orig_end);
631
632 /*
633 * So with compression we will find and lock a dirty page and clear the
634 * first one as dirty, setup an async extent, and immediately return
635 * with the entire range locked but with nobody actually marked with
636 * writeback. So we can't just filemap_write_and_wait_range() and
637 * expect it to work since it will just kick off a thread to do the
638 * actual work. So we need to call filemap_fdatawrite_range _again_
639 * since it will wait on the page lock, which won't be unlocked until
640 * after the pages have been marked as writeback and so we're good to go
641 * from there. We have to do this otherwise we'll miss the ordered
642 * extents and that results in badness. Please Josef, do not think you
643 * know better and pull this out at some point in the future, it is
644 * right and you are wrong.
645 */
646 if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
647 &BTRFS_I(inode)->runtime_flags))
648 filemap_fdatawrite_range(inode->i_mapping, start, orig_end);
649
650 filemap_fdatawait_range(inode->i_mapping, start, orig_end);
631 651
632 end = orig_end; 652 end = orig_end;
633 found = 0; 653 found = 0;
diff --git a/fs/btrfs/rcu-string.h b/fs/btrfs/rcu-string.h
new file mode 100644
index 000000000000..9e111e4576d4
--- /dev/null
+++ b/fs/btrfs/rcu-string.h
@@ -0,0 +1,56 @@
1/*
2 * Copyright (C) 2012 Red Hat. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19struct rcu_string {
20 struct rcu_head rcu;
21 char str[0];
22};
23
24static inline struct rcu_string *rcu_string_strdup(const char *src, gfp_t mask)
25{
26 size_t len = strlen(src) + 1;
27 struct rcu_string *ret = kzalloc(sizeof(struct rcu_string) +
28 (len * sizeof(char)), mask);
29 if (!ret)
30 return ret;
31 strncpy(ret->str, src, len);
32 return ret;
33}
34
35static inline void rcu_string_free(struct rcu_string *str)
36{
37 if (str)
38 kfree_rcu(str, rcu);
39}
40
41#define printk_in_rcu(fmt, ...) do { \
42 rcu_read_lock(); \
43 printk(fmt, __VA_ARGS__); \
44 rcu_read_unlock(); \
45} while (0)
46
47#define printk_ratelimited_in_rcu(fmt, ...) do { \
48 rcu_read_lock(); \
49 printk_ratelimited(fmt, __VA_ARGS__); \
50 rcu_read_unlock(); \
51} while (0)
52
53#define rcu_str_deref(rcu_str) ({ \
54 struct rcu_string *__str = rcu_dereference(rcu_str); \
55 __str->str; \
56})
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index a38cfa4f251e..b223620cd5a6 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -26,6 +26,7 @@
26#include "backref.h" 26#include "backref.h"
27#include "extent_io.h" 27#include "extent_io.h"
28#include "check-integrity.h" 28#include "check-integrity.h"
29#include "rcu-string.h"
29 30
30/* 31/*
31 * This is only the first step towards a full-features scrub. It reads all 32 * This is only the first step towards a full-features scrub. It reads all
@@ -320,10 +321,10 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root, void *ctx)
320 * hold all of the paths here 321 * hold all of the paths here
321 */ 322 */
322 for (i = 0; i < ipath->fspath->elem_cnt; ++i) 323 for (i = 0; i < ipath->fspath->elem_cnt; ++i)
323 printk(KERN_WARNING "btrfs: %s at logical %llu on dev " 324 printk_in_rcu(KERN_WARNING "btrfs: %s at logical %llu on dev "
324 "%s, sector %llu, root %llu, inode %llu, offset %llu, " 325 "%s, sector %llu, root %llu, inode %llu, offset %llu, "
325 "length %llu, links %u (path: %s)\n", swarn->errstr, 326 "length %llu, links %u (path: %s)\n", swarn->errstr,
326 swarn->logical, swarn->dev->name, 327 swarn->logical, rcu_str_deref(swarn->dev->name),
327 (unsigned long long)swarn->sector, root, inum, offset, 328 (unsigned long long)swarn->sector, root, inum, offset,
328 min(isize - offset, (u64)PAGE_SIZE), nlink, 329 min(isize - offset, (u64)PAGE_SIZE), nlink,
329 (char *)(unsigned long)ipath->fspath->val[i]); 330 (char *)(unsigned long)ipath->fspath->val[i]);
@@ -332,10 +333,10 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root, void *ctx)
332 return 0; 333 return 0;
333 334
334err: 335err:
335 printk(KERN_WARNING "btrfs: %s at logical %llu on dev " 336 printk_in_rcu(KERN_WARNING "btrfs: %s at logical %llu on dev "
336 "%s, sector %llu, root %llu, inode %llu, offset %llu: path " 337 "%s, sector %llu, root %llu, inode %llu, offset %llu: path "
337 "resolving failed with ret=%d\n", swarn->errstr, 338 "resolving failed with ret=%d\n", swarn->errstr,
338 swarn->logical, swarn->dev->name, 339 swarn->logical, rcu_str_deref(swarn->dev->name),
339 (unsigned long long)swarn->sector, root, inum, offset, ret); 340 (unsigned long long)swarn->sector, root, inum, offset, ret);
340 341
341 free_ipath(ipath); 342 free_ipath(ipath);
@@ -390,10 +391,11 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
390 do { 391 do {
391 ret = tree_backref_for_extent(&ptr, eb, ei, item_size, 392 ret = tree_backref_for_extent(&ptr, eb, ei, item_size,
392 &ref_root, &ref_level); 393 &ref_root, &ref_level);
393 printk(KERN_WARNING 394 printk_in_rcu(KERN_WARNING
394 "btrfs: %s at logical %llu on dev %s, " 395 "btrfs: %s at logical %llu on dev %s, "
395 "sector %llu: metadata %s (level %d) in tree " 396 "sector %llu: metadata %s (level %d) in tree "
396 "%llu\n", errstr, swarn.logical, dev->name, 397 "%llu\n", errstr, swarn.logical,
398 rcu_str_deref(dev->name),
397 (unsigned long long)swarn.sector, 399 (unsigned long long)swarn.sector,
398 ref_level ? "node" : "leaf", 400 ref_level ? "node" : "leaf",
399 ret < 0 ? -1 : ref_level, 401 ret < 0 ? -1 : ref_level,
@@ -580,9 +582,11 @@ out:
580 spin_lock(&sdev->stat_lock); 582 spin_lock(&sdev->stat_lock);
581 ++sdev->stat.uncorrectable_errors; 583 ++sdev->stat.uncorrectable_errors;
582 spin_unlock(&sdev->stat_lock); 584 spin_unlock(&sdev->stat_lock);
583 printk_ratelimited(KERN_ERR 585
586 printk_ratelimited_in_rcu(KERN_ERR
584 "btrfs: unable to fixup (nodatasum) error at logical %llu on dev %s\n", 587 "btrfs: unable to fixup (nodatasum) error at logical %llu on dev %s\n",
585 (unsigned long long)fixup->logical, sdev->dev->name); 588 (unsigned long long)fixup->logical,
589 rcu_str_deref(sdev->dev->name));
586 } 590 }
587 591
588 btrfs_free_path(path); 592 btrfs_free_path(path);
@@ -936,18 +940,20 @@ corrected_error:
936 spin_lock(&sdev->stat_lock); 940 spin_lock(&sdev->stat_lock);
937 sdev->stat.corrected_errors++; 941 sdev->stat.corrected_errors++;
938 spin_unlock(&sdev->stat_lock); 942 spin_unlock(&sdev->stat_lock);
939 printk_ratelimited(KERN_ERR 943 printk_ratelimited_in_rcu(KERN_ERR
940 "btrfs: fixed up error at logical %llu on dev %s\n", 944 "btrfs: fixed up error at logical %llu on dev %s\n",
941 (unsigned long long)logical, sdev->dev->name); 945 (unsigned long long)logical,
946 rcu_str_deref(sdev->dev->name));
942 } 947 }
943 } else { 948 } else {
944did_not_correct_error: 949did_not_correct_error:
945 spin_lock(&sdev->stat_lock); 950 spin_lock(&sdev->stat_lock);
946 sdev->stat.uncorrectable_errors++; 951 sdev->stat.uncorrectable_errors++;
947 spin_unlock(&sdev->stat_lock); 952 spin_unlock(&sdev->stat_lock);
948 printk_ratelimited(KERN_ERR 953 printk_ratelimited_in_rcu(KERN_ERR
949 "btrfs: unable to fixup (regular) error at logical %llu on dev %s\n", 954 "btrfs: unable to fixup (regular) error at logical %llu on dev %s\n",
950 (unsigned long long)logical, sdev->dev->name); 955 (unsigned long long)logical,
956 rcu_str_deref(sdev->dev->name));
951 } 957 }
952 958
953out: 959out:
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 96eb9fef7bd2..0eb9a4da069e 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -54,6 +54,7 @@
54#include "version.h" 54#include "version.h"
55#include "export.h" 55#include "export.h"
56#include "compression.h" 56#include "compression.h"
57#include "rcu-string.h"
57 58
58#define CREATE_TRACE_POINTS 59#define CREATE_TRACE_POINTS
59#include <trace/events/btrfs.h> 60#include <trace/events/btrfs.h>
@@ -1482,12 +1483,44 @@ static void btrfs_fs_dirty_inode(struct inode *inode, int flags)
1482 "error %d\n", btrfs_ino(inode), ret); 1483 "error %d\n", btrfs_ino(inode), ret);
1483} 1484}
1484 1485
1486static int btrfs_show_devname(struct seq_file *m, struct dentry *root)
1487{
1488 struct btrfs_fs_info *fs_info = btrfs_sb(root->d_sb);
1489 struct btrfs_fs_devices *cur_devices;
1490 struct btrfs_device *dev, *first_dev = NULL;
1491 struct list_head *head;
1492 struct rcu_string *name;
1493
1494 mutex_lock(&fs_info->fs_devices->device_list_mutex);
1495 cur_devices = fs_info->fs_devices;
1496 while (cur_devices) {
1497 head = &cur_devices->devices;
1498 list_for_each_entry(dev, head, dev_list) {
1499 if (!first_dev || dev->devid < first_dev->devid)
1500 first_dev = dev;
1501 }
1502 cur_devices = cur_devices->seed;
1503 }
1504
1505 if (first_dev) {
1506 rcu_read_lock();
1507 name = rcu_dereference(first_dev->name);
1508 seq_escape(m, name->str, " \t\n\\");
1509 rcu_read_unlock();
1510 } else {
1511 WARN_ON(1);
1512 }
1513 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
1514 return 0;
1515}
1516
1485static const struct super_operations btrfs_super_ops = { 1517static const struct super_operations btrfs_super_ops = {
1486 .drop_inode = btrfs_drop_inode, 1518 .drop_inode = btrfs_drop_inode,
1487 .evict_inode = btrfs_evict_inode, 1519 .evict_inode = btrfs_evict_inode,
1488 .put_super = btrfs_put_super, 1520 .put_super = btrfs_put_super,
1489 .sync_fs = btrfs_sync_fs, 1521 .sync_fs = btrfs_sync_fs,
1490 .show_options = btrfs_show_options, 1522 .show_options = btrfs_show_options,
1523 .show_devname = btrfs_show_devname,
1491 .write_inode = btrfs_write_inode, 1524 .write_inode = btrfs_write_inode,
1492 .dirty_inode = btrfs_fs_dirty_inode, 1525 .dirty_inode = btrfs_fs_dirty_inode,
1493 .alloc_inode = btrfs_alloc_inode, 1526 .alloc_inode = btrfs_alloc_inode,
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 1791c6e3d834..b72b068183ec 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -100,6 +100,10 @@ loop:
100 kmem_cache_free(btrfs_transaction_cachep, cur_trans); 100 kmem_cache_free(btrfs_transaction_cachep, cur_trans);
101 cur_trans = fs_info->running_transaction; 101 cur_trans = fs_info->running_transaction;
102 goto loop; 102 goto loop;
103 } else if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
104 spin_unlock(&root->fs_info->trans_lock);
105 kmem_cache_free(btrfs_transaction_cachep, cur_trans);
106 return -EROFS;
103 } 107 }
104 108
105 atomic_set(&cur_trans->num_writers, 1); 109 atomic_set(&cur_trans->num_writers, 1);
@@ -1213,14 +1217,20 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
1213 1217
1214 1218
1215static void cleanup_transaction(struct btrfs_trans_handle *trans, 1219static void cleanup_transaction(struct btrfs_trans_handle *trans,
1216 struct btrfs_root *root) 1220 struct btrfs_root *root, int err)
1217{ 1221{
1218 struct btrfs_transaction *cur_trans = trans->transaction; 1222 struct btrfs_transaction *cur_trans = trans->transaction;
1219 1223
1220 WARN_ON(trans->use_count > 1); 1224 WARN_ON(trans->use_count > 1);
1221 1225
1226 btrfs_abort_transaction(trans, root, err);
1227
1222 spin_lock(&root->fs_info->trans_lock); 1228 spin_lock(&root->fs_info->trans_lock);
1223 list_del_init(&cur_trans->list); 1229 list_del_init(&cur_trans->list);
1230 if (cur_trans == root->fs_info->running_transaction) {
1231 root->fs_info->running_transaction = NULL;
1232 root->fs_info->trans_no_join = 0;
1233 }
1224 spin_unlock(&root->fs_info->trans_lock); 1234 spin_unlock(&root->fs_info->trans_lock);
1225 1235
1226 btrfs_cleanup_one_transaction(trans->transaction, root); 1236 btrfs_cleanup_one_transaction(trans->transaction, root);
@@ -1526,7 +1536,7 @@ cleanup_transaction:
1526// WARN_ON(1); 1536// WARN_ON(1);
1527 if (current->journal_info == trans) 1537 if (current->journal_info == trans)
1528 current->journal_info = NULL; 1538 current->journal_info = NULL;
1529 cleanup_transaction(trans, root); 1539 cleanup_transaction(trans, root, ret);
1530 1540
1531 return ret; 1541 return ret;
1532} 1542}
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 7782020996fe..8a3d2594b807 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -35,6 +35,7 @@
35#include "volumes.h" 35#include "volumes.h"
36#include "async-thread.h" 36#include "async-thread.h"
37#include "check-integrity.h" 37#include "check-integrity.h"
38#include "rcu-string.h"
38 39
39static int init_first_rw_device(struct btrfs_trans_handle *trans, 40static int init_first_rw_device(struct btrfs_trans_handle *trans,
40 struct btrfs_root *root, 41 struct btrfs_root *root,
@@ -64,7 +65,7 @@ static void free_fs_devices(struct btrfs_fs_devices *fs_devices)
64 device = list_entry(fs_devices->devices.next, 65 device = list_entry(fs_devices->devices.next,
65 struct btrfs_device, dev_list); 66 struct btrfs_device, dev_list);
66 list_del(&device->dev_list); 67 list_del(&device->dev_list);
67 kfree(device->name); 68 rcu_string_free(device->name);
68 kfree(device); 69 kfree(device);
69 } 70 }
70 kfree(fs_devices); 71 kfree(fs_devices);
@@ -334,8 +335,8 @@ static noinline int device_list_add(const char *path,
334{ 335{
335 struct btrfs_device *device; 336 struct btrfs_device *device;
336 struct btrfs_fs_devices *fs_devices; 337 struct btrfs_fs_devices *fs_devices;
338 struct rcu_string *name;
337 u64 found_transid = btrfs_super_generation(disk_super); 339 u64 found_transid = btrfs_super_generation(disk_super);
338 char *name;
339 340
340 fs_devices = find_fsid(disk_super->fsid); 341 fs_devices = find_fsid(disk_super->fsid);
341 if (!fs_devices) { 342 if (!fs_devices) {
@@ -369,11 +370,13 @@ static noinline int device_list_add(const char *path,
369 memcpy(device->uuid, disk_super->dev_item.uuid, 370 memcpy(device->uuid, disk_super->dev_item.uuid,
370 BTRFS_UUID_SIZE); 371 BTRFS_UUID_SIZE);
371 spin_lock_init(&device->io_lock); 372 spin_lock_init(&device->io_lock);
372 device->name = kstrdup(path, GFP_NOFS); 373
373 if (!device->name) { 374 name = rcu_string_strdup(path, GFP_NOFS);
375 if (!name) {
374 kfree(device); 376 kfree(device);
375 return -ENOMEM; 377 return -ENOMEM;
376 } 378 }
379 rcu_assign_pointer(device->name, name);
377 INIT_LIST_HEAD(&device->dev_alloc_list); 380 INIT_LIST_HEAD(&device->dev_alloc_list);
378 381
379 /* init readahead state */ 382 /* init readahead state */
@@ -390,12 +393,12 @@ static noinline int device_list_add(const char *path,
390 393
391 device->fs_devices = fs_devices; 394 device->fs_devices = fs_devices;
392 fs_devices->num_devices++; 395 fs_devices->num_devices++;
393 } else if (!device->name || strcmp(device->name, path)) { 396 } else if (!device->name || strcmp(device->name->str, path)) {
394 name = kstrdup(path, GFP_NOFS); 397 name = rcu_string_strdup(path, GFP_NOFS);
395 if (!name) 398 if (!name)
396 return -ENOMEM; 399 return -ENOMEM;
397 kfree(device->name); 400 rcu_string_free(device->name);
398 device->name = name; 401 rcu_assign_pointer(device->name, name);
399 if (device->missing) { 402 if (device->missing) {
400 fs_devices->missing_devices--; 403 fs_devices->missing_devices--;
401 device->missing = 0; 404 device->missing = 0;
@@ -430,15 +433,22 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
430 433
431 /* We have held the volume lock, it is safe to get the devices. */ 434 /* We have held the volume lock, it is safe to get the devices. */
432 list_for_each_entry(orig_dev, &orig->devices, dev_list) { 435 list_for_each_entry(orig_dev, &orig->devices, dev_list) {
436 struct rcu_string *name;
437
433 device = kzalloc(sizeof(*device), GFP_NOFS); 438 device = kzalloc(sizeof(*device), GFP_NOFS);
434 if (!device) 439 if (!device)
435 goto error; 440 goto error;
436 441
437 device->name = kstrdup(orig_dev->name, GFP_NOFS); 442 /*
438 if (!device->name) { 443 * This is ok to do without rcu read locked because we hold the
444 * uuid mutex so nothing we touch in here is going to disappear.
445 */
446 name = rcu_string_strdup(orig_dev->name->str, GFP_NOFS);
447 if (!name) {
439 kfree(device); 448 kfree(device);
440 goto error; 449 goto error;
441 } 450 }
451 rcu_assign_pointer(device->name, name);
442 452
443 device->devid = orig_dev->devid; 453 device->devid = orig_dev->devid;
444 device->work.func = pending_bios_fn; 454 device->work.func = pending_bios_fn;
@@ -491,7 +501,7 @@ again:
491 } 501 }
492 list_del_init(&device->dev_list); 502 list_del_init(&device->dev_list);
493 fs_devices->num_devices--; 503 fs_devices->num_devices--;
494 kfree(device->name); 504 rcu_string_free(device->name);
495 kfree(device); 505 kfree(device);
496 } 506 }
497 507
@@ -516,7 +526,7 @@ static void __free_device(struct work_struct *work)
516 if (device->bdev) 526 if (device->bdev)
517 blkdev_put(device->bdev, device->mode); 527 blkdev_put(device->bdev, device->mode);
518 528
519 kfree(device->name); 529 rcu_string_free(device->name);
520 kfree(device); 530 kfree(device);
521} 531}
522 532
@@ -540,6 +550,7 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
540 mutex_lock(&fs_devices->device_list_mutex); 550 mutex_lock(&fs_devices->device_list_mutex);
541 list_for_each_entry(device, &fs_devices->devices, dev_list) { 551 list_for_each_entry(device, &fs_devices->devices, dev_list) {
542 struct btrfs_device *new_device; 552 struct btrfs_device *new_device;
553 struct rcu_string *name;
543 554
544 if (device->bdev) 555 if (device->bdev)
545 fs_devices->open_devices--; 556 fs_devices->open_devices--;
@@ -555,8 +566,11 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
555 new_device = kmalloc(sizeof(*new_device), GFP_NOFS); 566 new_device = kmalloc(sizeof(*new_device), GFP_NOFS);
556 BUG_ON(!new_device); /* -ENOMEM */ 567 BUG_ON(!new_device); /* -ENOMEM */
557 memcpy(new_device, device, sizeof(*new_device)); 568 memcpy(new_device, device, sizeof(*new_device));
558 new_device->name = kstrdup(device->name, GFP_NOFS); 569
559 BUG_ON(device->name && !new_device->name); /* -ENOMEM */ 570 /* Safe because we are under uuid_mutex */
571 name = rcu_string_strdup(device->name->str, GFP_NOFS);
572 BUG_ON(device->name && !name); /* -ENOMEM */
573 rcu_assign_pointer(new_device->name, name);
560 new_device->bdev = NULL; 574 new_device->bdev = NULL;
561 new_device->writeable = 0; 575 new_device->writeable = 0;
562 new_device->in_fs_metadata = 0; 576 new_device->in_fs_metadata = 0;
@@ -621,9 +635,9 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
621 if (!device->name) 635 if (!device->name)
622 continue; 636 continue;
623 637
624 bdev = blkdev_get_by_path(device->name, flags, holder); 638 bdev = blkdev_get_by_path(device->name->str, flags, holder);
625 if (IS_ERR(bdev)) { 639 if (IS_ERR(bdev)) {
626 printk(KERN_INFO "open %s failed\n", device->name); 640 printk(KERN_INFO "open %s failed\n", device->name->str);
627 goto error; 641 goto error;
628 } 642 }
629 filemap_write_and_wait(bdev->bd_inode->i_mapping); 643 filemap_write_and_wait(bdev->bd_inode->i_mapping);
@@ -1632,6 +1646,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1632 struct block_device *bdev; 1646 struct block_device *bdev;
1633 struct list_head *devices; 1647 struct list_head *devices;
1634 struct super_block *sb = root->fs_info->sb; 1648 struct super_block *sb = root->fs_info->sb;
1649 struct rcu_string *name;
1635 u64 total_bytes; 1650 u64 total_bytes;
1636 int seeding_dev = 0; 1651 int seeding_dev = 0;
1637 int ret = 0; 1652 int ret = 0;
@@ -1671,23 +1686,24 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1671 goto error; 1686 goto error;
1672 } 1687 }
1673 1688
1674 device->name = kstrdup(device_path, GFP_NOFS); 1689 name = rcu_string_strdup(device_path, GFP_NOFS);
1675 if (!device->name) { 1690 if (!name) {
1676 kfree(device); 1691 kfree(device);
1677 ret = -ENOMEM; 1692 ret = -ENOMEM;
1678 goto error; 1693 goto error;
1679 } 1694 }
1695 rcu_assign_pointer(device->name, name);
1680 1696
1681 ret = find_next_devid(root, &device->devid); 1697 ret = find_next_devid(root, &device->devid);
1682 if (ret) { 1698 if (ret) {
1683 kfree(device->name); 1699 rcu_string_free(device->name);
1684 kfree(device); 1700 kfree(device);
1685 goto error; 1701 goto error;
1686 } 1702 }
1687 1703
1688 trans = btrfs_start_transaction(root, 0); 1704 trans = btrfs_start_transaction(root, 0);
1689 if (IS_ERR(trans)) { 1705 if (IS_ERR(trans)) {
1690 kfree(device->name); 1706 rcu_string_free(device->name);
1691 kfree(device); 1707 kfree(device);
1692 ret = PTR_ERR(trans); 1708 ret = PTR_ERR(trans);
1693 goto error; 1709 goto error;
@@ -1796,7 +1812,7 @@ error_trans:
1796 unlock_chunks(root); 1812 unlock_chunks(root);
1797 btrfs_abort_transaction(trans, root, ret); 1813 btrfs_abort_transaction(trans, root, ret);
1798 btrfs_end_transaction(trans, root); 1814 btrfs_end_transaction(trans, root);
1799 kfree(device->name); 1815 rcu_string_free(device->name);
1800 kfree(device); 1816 kfree(device);
1801error: 1817error:
1802 blkdev_put(bdev, FMODE_EXCL); 1818 blkdev_put(bdev, FMODE_EXCL);
@@ -4204,10 +4220,17 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
4204 bio->bi_sector = bbio->stripes[dev_nr].physical >> 9; 4220 bio->bi_sector = bbio->stripes[dev_nr].physical >> 9;
4205 dev = bbio->stripes[dev_nr].dev; 4221 dev = bbio->stripes[dev_nr].dev;
4206 if (dev && dev->bdev && (rw != WRITE || dev->writeable)) { 4222 if (dev && dev->bdev && (rw != WRITE || dev->writeable)) {
4223#ifdef DEBUG
4224 struct rcu_string *name;
4225
4226 rcu_read_lock();
4227 name = rcu_dereference(dev->name);
4207 pr_debug("btrfs_map_bio: rw %d, secor=%llu, dev=%lu " 4228 pr_debug("btrfs_map_bio: rw %d, secor=%llu, dev=%lu "
4208 "(%s id %llu), size=%u\n", rw, 4229 "(%s id %llu), size=%u\n", rw,
4209 (u64)bio->bi_sector, (u_long)dev->bdev->bd_dev, 4230 (u64)bio->bi_sector, (u_long)dev->bdev->bd_dev,
4210 dev->name, dev->devid, bio->bi_size); 4231 name->str, dev->devid, bio->bi_size);
4232 rcu_read_unlock();
4233#endif
4211 bio->bi_bdev = dev->bdev; 4234 bio->bi_bdev = dev->bdev;
4212 if (async_submit) 4235 if (async_submit)
4213 schedule_bio(root, dev, rw, bio); 4236 schedule_bio(root, dev, rw, bio);
@@ -4694,8 +4717,9 @@ int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info)
4694 key.offset = device->devid; 4717 key.offset = device->devid;
4695 ret = btrfs_search_slot(NULL, dev_root, &key, path, 0, 0); 4718 ret = btrfs_search_slot(NULL, dev_root, &key, path, 0, 0);
4696 if (ret) { 4719 if (ret) {
4697 printk(KERN_WARNING "btrfs: no dev_stats entry found for device %s (devid %llu) (OK on first mount after mkfs)\n", 4720 printk_in_rcu(KERN_WARNING "btrfs: no dev_stats entry found for device %s (devid %llu) (OK on first mount after mkfs)\n",
4698 device->name, (unsigned long long)device->devid); 4721 rcu_str_deref(device->name),
4722 (unsigned long long)device->devid);
4699 __btrfs_reset_dev_stats(device); 4723 __btrfs_reset_dev_stats(device);
4700 device->dev_stats_valid = 1; 4724 device->dev_stats_valid = 1;
4701 btrfs_release_path(path); 4725 btrfs_release_path(path);
@@ -4747,8 +4771,8 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans,
4747 BUG_ON(!path); 4771 BUG_ON(!path);
4748 ret = btrfs_search_slot(trans, dev_root, &key, path, -1, 1); 4772 ret = btrfs_search_slot(trans, dev_root, &key, path, -1, 1);
4749 if (ret < 0) { 4773 if (ret < 0) {
4750 printk(KERN_WARNING "btrfs: error %d while searching for dev_stats item for device %s!\n", 4774 printk_in_rcu(KERN_WARNING "btrfs: error %d while searching for dev_stats item for device %s!\n",
4751 ret, device->name); 4775 ret, rcu_str_deref(device->name));
4752 goto out; 4776 goto out;
4753 } 4777 }
4754 4778
@@ -4757,8 +4781,8 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans,
4757 /* need to delete old one and insert a new one */ 4781 /* need to delete old one and insert a new one */
4758 ret = btrfs_del_item(trans, dev_root, path); 4782 ret = btrfs_del_item(trans, dev_root, path);
4759 if (ret != 0) { 4783 if (ret != 0) {
4760 printk(KERN_WARNING "btrfs: delete too small dev_stats item for device %s failed %d!\n", 4784 printk_in_rcu(KERN_WARNING "btrfs: delete too small dev_stats item for device %s failed %d!\n",
4761 device->name, ret); 4785 rcu_str_deref(device->name), ret);
4762 goto out; 4786 goto out;
4763 } 4787 }
4764 ret = 1; 4788 ret = 1;
@@ -4770,8 +4794,8 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans,
4770 ret = btrfs_insert_empty_item(trans, dev_root, path, 4794 ret = btrfs_insert_empty_item(trans, dev_root, path,
4771 &key, sizeof(*ptr)); 4795 &key, sizeof(*ptr));
4772 if (ret < 0) { 4796 if (ret < 0) {
4773 printk(KERN_WARNING "btrfs: insert dev_stats item for device %s failed %d!\n", 4797 printk_in_rcu(KERN_WARNING "btrfs: insert dev_stats item for device %s failed %d!\n",
4774 device->name, ret); 4798 rcu_str_deref(device->name), ret);
4775 goto out; 4799 goto out;
4776 } 4800 }
4777 } 4801 }
@@ -4823,9 +4847,9 @@ void btrfs_dev_stat_print_on_error(struct btrfs_device *dev)
4823{ 4847{
4824 if (!dev->dev_stats_valid) 4848 if (!dev->dev_stats_valid)
4825 return; 4849 return;
4826 printk_ratelimited(KERN_ERR 4850 printk_ratelimited_in_rcu(KERN_ERR
4827 "btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n", 4851 "btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n",
4828 dev->name, 4852 rcu_str_deref(dev->name),
4829 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS), 4853 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS),
4830 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS), 4854 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS),
4831 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS), 4855 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS),
@@ -4837,8 +4861,8 @@ void btrfs_dev_stat_print_on_error(struct btrfs_device *dev)
4837 4861
4838static void btrfs_dev_stat_print_on_load(struct btrfs_device *dev) 4862static void btrfs_dev_stat_print_on_load(struct btrfs_device *dev)
4839{ 4863{
4840 printk(KERN_INFO "btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n", 4864 printk_in_rcu(KERN_INFO "btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n",
4841 dev->name, 4865 rcu_str_deref(dev->name),
4842 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS), 4866 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS),
4843 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS), 4867 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS),
4844 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS), 4868 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS),
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 3406a88ca83e..74366f27a76b 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -58,7 +58,7 @@ struct btrfs_device {
58 /* the mode sent to blkdev_get */ 58 /* the mode sent to blkdev_get */
59 fmode_t mode; 59 fmode_t mode;
60 60
61 char *name; 61 struct rcu_string *name;
62 62
63 /* the internal btrfs device id */ 63 /* the internal btrfs device id */
64 u64 devid; 64 u64 devid;