aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-06-15 19:04:37 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-06-15 19:04:37 -0400
commit718f58ad61810b7d4a6c9178185495f762e90807 (patch)
tree6f61597bea9b8dadeee13a888b969d021e5ec05d /fs
parent424d54d2dca03805942055e5b19926d33a7d1e31 (diff)
parent9c106405ddf893fcd04cd46555464417d2df8451 (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
Pull btrfs update from Chris Mason: "The dates look like I had to rebase this morning because there was a compiler warning for a printk arg that I had missed earlier. These are all fixes, including one to prevent using stale pointers for device names, and lots of fixes around transaction abort cleanups (Josef, Liu Bo). Jan Schmidt also sent in a number of fixes for the new reference number tracking code. Liu Bo beat me to updating the MAINTAINERS file. Since he thought to also fix the git url, I kept his commit." * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (24 commits) Btrfs: update MAINTAINERS info for BTRFS FILE SYSTEM Btrfs: destroy the items of the delayed inodes in error handling routine Btrfs: make sure that we've made everything in pinned tree clean Btrfs: avoid memory leak of extent state in error handling routine Btrfs: do not resize a seeding device Btrfs: fix missing inherited flag in rename Btrfs: fix incompat flags setting Btrfs: fix defrag regression Btrfs: call filemap_fdatawrite twice for compression Btrfs: keep inode pinned when compressing writes Btrfs: implement ->show_devname Btrfs: use rcu to protect device->name Btrfs: unlock everything properly in the error case for nocow Btrfs: fix btrfs_destroy_marked_extents Btrfs: abort the transaction if the commit fails Btrfs: wake up transaction waiters when aborting a transaction Btrfs: fix locking in btrfs_destroy_delayed_refs Btrfs: pass locked_page into extent_clear_unlock_delalloc if theres an error Btrfs: fix race in tree mod log addition Btrfs: add btrfs_next_old_leaf ...
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/backref.c17
-rw-r--r--fs/btrfs/btrfs_inode.h1
-rw-r--r--fs/btrfs/check-integrity.c16
-rw-r--r--fs/btrfs/ctree.c86
-rw-r--r--fs/btrfs/ctree.h2
-rw-r--r--fs/btrfs/delayed-inode.c18
-rw-r--r--fs/btrfs/delayed-inode.h3
-rw-r--r--fs/btrfs/disk-io.c76
-rw-r--r--fs/btrfs/extent_io.c7
-rw-r--r--fs/btrfs/inode.c73
-rw-r--r--fs/btrfs/ioctl.c117
-rw-r--r--fs/btrfs/ordered-data.c22
-rw-r--r--fs/btrfs/rcu-string.h56
-rw-r--r--fs/btrfs/scrub.c30
-rw-r--r--fs/btrfs/super.c33
-rw-r--r--fs/btrfs/transaction.c14
-rw-r--r--fs/btrfs/volumes.c92
-rw-r--r--fs/btrfs/volumes.h2
18 files changed, 476 insertions, 189 deletions
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 3f75895c919b..8f7d1237b7a0 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -179,7 +179,8 @@ static int __add_prelim_ref(struct list_head *head, u64 root_id,
179 179
180static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, 180static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
181 struct ulist *parents, int level, 181 struct ulist *parents, int level,
182 struct btrfs_key *key, u64 wanted_disk_byte, 182 struct btrfs_key *key, u64 time_seq,
183 u64 wanted_disk_byte,
183 const u64 *extent_item_pos) 184 const u64 *extent_item_pos)
184{ 185{
185 int ret; 186 int ret;
@@ -212,7 +213,7 @@ add_parent:
212 */ 213 */
213 while (1) { 214 while (1) {
214 eie = NULL; 215 eie = NULL;
215 ret = btrfs_next_leaf(root, path); 216 ret = btrfs_next_old_leaf(root, path, time_seq);
216 if (ret < 0) 217 if (ret < 0)
217 return ret; 218 return ret;
218 if (ret) 219 if (ret)
@@ -294,18 +295,10 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
294 goto out; 295 goto out;
295 } 296 }
296 297
297 if (level == 0) { 298 if (level == 0)
298 if (ret == 1 && path->slots[0] >= btrfs_header_nritems(eb)) {
299 ret = btrfs_next_leaf(root, path);
300 if (ret)
301 goto out;
302 eb = path->nodes[0];
303 }
304
305 btrfs_item_key_to_cpu(eb, &key, path->slots[0]); 299 btrfs_item_key_to_cpu(eb, &key, path->slots[0]);
306 }
307 300
308 ret = add_all_parents(root, path, parents, level, &key, 301 ret = add_all_parents(root, path, parents, level, &key, time_seq,
309 ref->wanted_disk_byte, extent_item_pos); 302 ref->wanted_disk_byte, extent_item_pos);
310out: 303out:
311 btrfs_free_path(path); 304 btrfs_free_path(path);
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index e616f8872e69..12394a90d60f 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -37,6 +37,7 @@
37#define BTRFS_INODE_IN_DEFRAG 3 37#define BTRFS_INODE_IN_DEFRAG 3
38#define BTRFS_INODE_DELALLOC_META_RESERVED 4 38#define BTRFS_INODE_DELALLOC_META_RESERVED 4
39#define BTRFS_INODE_HAS_ORPHAN_ITEM 5 39#define BTRFS_INODE_HAS_ORPHAN_ITEM 5
40#define BTRFS_INODE_HAS_ASYNC_EXTENT 6
40 41
41/* in memory btrfs inode */ 42/* in memory btrfs inode */
42struct btrfs_inode { 43struct btrfs_inode {
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index 9cebb1fd6a3c..da6e9364a5e3 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -93,6 +93,7 @@
93#include "print-tree.h" 93#include "print-tree.h"
94#include "locking.h" 94#include "locking.h"
95#include "check-integrity.h" 95#include "check-integrity.h"
96#include "rcu-string.h"
96 97
97#define BTRFSIC_BLOCK_HASHTABLE_SIZE 0x10000 98#define BTRFSIC_BLOCK_HASHTABLE_SIZE 0x10000
98#define BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE 0x10000 99#define BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE 0x10000
@@ -843,13 +844,14 @@ static int btrfsic_process_superblock_dev_mirror(
843 superblock_tmp->never_written = 0; 844 superblock_tmp->never_written = 0;
844 superblock_tmp->mirror_num = 1 + superblock_mirror_num; 845 superblock_tmp->mirror_num = 1 + superblock_mirror_num;
845 if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE) 846 if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
846 printk(KERN_INFO "New initial S-block (bdev %p, %s)" 847 printk_in_rcu(KERN_INFO "New initial S-block (bdev %p, %s)"
847 " @%llu (%s/%llu/%d)\n", 848 " @%llu (%s/%llu/%d)\n",
848 superblock_bdev, device->name, 849 superblock_bdev,
849 (unsigned long long)dev_bytenr, 850 rcu_str_deref(device->name),
850 dev_state->name, 851 (unsigned long long)dev_bytenr,
851 (unsigned long long)dev_bytenr, 852 dev_state->name,
852 superblock_mirror_num); 853 (unsigned long long)dev_bytenr,
854 superblock_mirror_num);
853 list_add(&superblock_tmp->all_blocks_node, 855 list_add(&superblock_tmp->all_blocks_node,
854 &state->all_blocks_list); 856 &state->all_blocks_list);
855 btrfsic_block_hashtable_add(superblock_tmp, 857 btrfsic_block_hashtable_add(superblock_tmp,
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index d7a96cfdc50a..04b06bcf2ca9 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -467,6 +467,15 @@ static inline int tree_mod_dont_log(struct btrfs_fs_info *fs_info,
467 return 0; 467 return 0;
468} 468}
469 469
470/*
471 * This allocates memory and gets a tree modification sequence number when
472 * needed.
473 *
474 * Returns 0 when no sequence number is needed, < 0 on error.
475 * Returns 1 when a sequence number was added. In this case,
476 * fs_info->tree_mod_seq_lock was acquired and must be released by the caller
477 * after inserting into the rb tree.
478 */
470static inline int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags, 479static inline int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags,
471 struct tree_mod_elem **tm_ret) 480 struct tree_mod_elem **tm_ret)
472{ 481{
@@ -491,11 +500,11 @@ static inline int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags,
491 */ 500 */
492 kfree(tm); 501 kfree(tm);
493 seq = 0; 502 seq = 0;
503 spin_unlock(&fs_info->tree_mod_seq_lock);
494 } else { 504 } else {
495 __get_tree_mod_seq(fs_info, &tm->elem); 505 __get_tree_mod_seq(fs_info, &tm->elem);
496 seq = tm->elem.seq; 506 seq = tm->elem.seq;
497 } 507 }
498 spin_unlock(&fs_info->tree_mod_seq_lock);
499 508
500 return seq; 509 return seq;
501} 510}
@@ -521,7 +530,9 @@ tree_mod_log_insert_key_mask(struct btrfs_fs_info *fs_info,
521 tm->slot = slot; 530 tm->slot = slot;
522 tm->generation = btrfs_node_ptr_generation(eb, slot); 531 tm->generation = btrfs_node_ptr_generation(eb, slot);
523 532
524 return __tree_mod_log_insert(fs_info, tm); 533 ret = __tree_mod_log_insert(fs_info, tm);
534 spin_unlock(&fs_info->tree_mod_seq_lock);
535 return ret;
525} 536}
526 537
527static noinline int 538static noinline int
@@ -559,7 +570,9 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
559 tm->move.nr_items = nr_items; 570 tm->move.nr_items = nr_items;
560 tm->op = MOD_LOG_MOVE_KEYS; 571 tm->op = MOD_LOG_MOVE_KEYS;
561 572
562 return __tree_mod_log_insert(fs_info, tm); 573 ret = __tree_mod_log_insert(fs_info, tm);
574 spin_unlock(&fs_info->tree_mod_seq_lock);
575 return ret;
563} 576}
564 577
565static noinline int 578static noinline int
@@ -580,7 +593,9 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
580 tm->generation = btrfs_header_generation(old_root); 593 tm->generation = btrfs_header_generation(old_root);
581 tm->op = MOD_LOG_ROOT_REPLACE; 594 tm->op = MOD_LOG_ROOT_REPLACE;
582 595
583 return __tree_mod_log_insert(fs_info, tm); 596 ret = __tree_mod_log_insert(fs_info, tm);
597 spin_unlock(&fs_info->tree_mod_seq_lock);
598 return ret;
584} 599}
585 600
586static struct tree_mod_elem * 601static struct tree_mod_elem *
@@ -1023,6 +1038,10 @@ __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info,
1023 looped = 1; 1038 looped = 1;
1024 } 1039 }
1025 1040
1041 /* if there's no old root to return, return what we found instead */
1042 if (!found)
1043 found = tm;
1044
1026 return found; 1045 return found;
1027} 1046}
1028 1047
@@ -1143,22 +1162,36 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
1143 return eb_rewin; 1162 return eb_rewin;
1144} 1163}
1145 1164
1165/*
1166 * get_old_root() rewinds the state of @root's root node to the given @time_seq
1167 * value. If there are no changes, the current root->root_node is returned. If
1168 * anything changed in between, there's a fresh buffer allocated on which the
1169 * rewind operations are done. In any case, the returned buffer is read locked.
1170 * Returns NULL on error (with no locks held).
1171 */
1146static inline struct extent_buffer * 1172static inline struct extent_buffer *
1147get_old_root(struct btrfs_root *root, u64 time_seq) 1173get_old_root(struct btrfs_root *root, u64 time_seq)
1148{ 1174{
1149 struct tree_mod_elem *tm; 1175 struct tree_mod_elem *tm;
1150 struct extent_buffer *eb; 1176 struct extent_buffer *eb;
1151 struct tree_mod_root *old_root; 1177 struct tree_mod_root *old_root = NULL;
1152 u64 old_generation; 1178 u64 old_generation;
1179 u64 logical;
1153 1180
1181 eb = btrfs_read_lock_root_node(root);
1154 tm = __tree_mod_log_oldest_root(root->fs_info, root, time_seq); 1182 tm = __tree_mod_log_oldest_root(root->fs_info, root, time_seq);
1155 if (!tm) 1183 if (!tm)
1156 return root->node; 1184 return root->node;
1157 1185
1158 old_root = &tm->old_root; 1186 if (tm->op == MOD_LOG_ROOT_REPLACE) {
1159 old_generation = tm->generation; 1187 old_root = &tm->old_root;
1188 old_generation = tm->generation;
1189 logical = old_root->logical;
1190 } else {
1191 logical = root->node->start;
1192 }
1160 1193
1161 tm = tree_mod_log_search(root->fs_info, old_root->logical, time_seq); 1194 tm = tree_mod_log_search(root->fs_info, logical, time_seq);
1162 /* 1195 /*
1163 * there was an item in the log when __tree_mod_log_oldest_root 1196 * there was an item in the log when __tree_mod_log_oldest_root
1164 * returned. this one must not go away, because the time_seq passed to 1197 * returned. this one must not go away, because the time_seq passed to
@@ -1166,22 +1199,25 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
1166 */ 1199 */
1167 BUG_ON(!tm); 1200 BUG_ON(!tm);
1168 1201
1169 if (old_root->logical == root->node->start) { 1202 if (old_root)
1170 /* there are logged operations for the current root */
1171 eb = btrfs_clone_extent_buffer(root->node);
1172 } else {
1173 /* there's a root replace operation for the current root */
1174 eb = alloc_dummy_extent_buffer(tm->index << PAGE_CACHE_SHIFT, 1203 eb = alloc_dummy_extent_buffer(tm->index << PAGE_CACHE_SHIFT,
1175 root->nodesize); 1204 root->nodesize);
1205 else
1206 eb = btrfs_clone_extent_buffer(root->node);
1207 btrfs_tree_read_unlock(root->node);
1208 free_extent_buffer(root->node);
1209 if (!eb)
1210 return NULL;
1211 btrfs_tree_read_lock(eb);
1212 if (old_root) {
1176 btrfs_set_header_bytenr(eb, eb->start); 1213 btrfs_set_header_bytenr(eb, eb->start);
1177 btrfs_set_header_backref_rev(eb, BTRFS_MIXED_BACKREF_REV); 1214 btrfs_set_header_backref_rev(eb, BTRFS_MIXED_BACKREF_REV);
1178 btrfs_set_header_owner(eb, root->root_key.objectid); 1215 btrfs_set_header_owner(eb, root->root_key.objectid);
1216 btrfs_set_header_level(eb, old_root->level);
1217 btrfs_set_header_generation(eb, old_generation);
1179 } 1218 }
1180 if (!eb)
1181 return NULL;
1182 btrfs_set_header_level(eb, old_root->level);
1183 btrfs_set_header_generation(eb, old_generation);
1184 __tree_mod_log_rewind(eb, time_seq, tm); 1219 __tree_mod_log_rewind(eb, time_seq, tm);
1220 extent_buffer_get(eb);
1185 1221
1186 return eb; 1222 return eb;
1187} 1223}
@@ -1650,8 +1686,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1650 BTRFS_NODEPTRS_PER_BLOCK(root) / 4) 1686 BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
1651 return 0; 1687 return 0;
1652 1688
1653 btrfs_header_nritems(mid);
1654
1655 left = read_node_slot(root, parent, pslot - 1); 1689 left = read_node_slot(root, parent, pslot - 1);
1656 if (left) { 1690 if (left) {
1657 btrfs_tree_lock(left); 1691 btrfs_tree_lock(left);
@@ -1681,7 +1715,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1681 wret = push_node_left(trans, root, left, mid, 1); 1715 wret = push_node_left(trans, root, left, mid, 1);
1682 if (wret < 0) 1716 if (wret < 0)
1683 ret = wret; 1717 ret = wret;
1684 btrfs_header_nritems(mid);
1685 } 1718 }
1686 1719
1687 /* 1720 /*
@@ -2615,9 +2648,7 @@ int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key,
2615 2648
2616again: 2649again:
2617 b = get_old_root(root, time_seq); 2650 b = get_old_root(root, time_seq);
2618 extent_buffer_get(b);
2619 level = btrfs_header_level(b); 2651 level = btrfs_header_level(b);
2620 btrfs_tree_read_lock(b);
2621 p->locks[level] = BTRFS_READ_LOCK; 2652 p->locks[level] = BTRFS_READ_LOCK;
2622 2653
2623 while (b) { 2654 while (b) {
@@ -5001,6 +5032,12 @@ next:
5001 */ 5032 */
5002int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) 5033int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
5003{ 5034{
5035 return btrfs_next_old_leaf(root, path, 0);
5036}
5037
5038int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
5039 u64 time_seq)
5040{
5004 int slot; 5041 int slot;
5005 int level; 5042 int level;
5006 struct extent_buffer *c; 5043 struct extent_buffer *c;
@@ -5025,7 +5062,10 @@ again:
5025 path->keep_locks = 1; 5062 path->keep_locks = 1;
5026 path->leave_spinning = 1; 5063 path->leave_spinning = 1;
5027 5064
5028 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 5065 if (time_seq)
5066 ret = btrfs_search_old_slot(root, &key, path, time_seq);
5067 else
5068 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5029 path->keep_locks = 0; 5069 path->keep_locks = 0;
5030 5070
5031 if (ret < 0) 5071 if (ret < 0)
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 0236d03c6732..8b73b2d4deb7 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2753,6 +2753,8 @@ static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans,
2753} 2753}
2754 2754
2755int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); 2755int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path);
2756int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
2757 u64 time_seq);
2756static inline int btrfs_next_item(struct btrfs_root *root, struct btrfs_path *p) 2758static inline int btrfs_next_item(struct btrfs_root *root, struct btrfs_path *p)
2757{ 2759{
2758 ++p->slots[0]; 2760 ++p->slots[0];
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index c18d0442ae6d..2399f4086915 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1879,3 +1879,21 @@ void btrfs_kill_all_delayed_nodes(struct btrfs_root *root)
1879 } 1879 }
1880 } 1880 }
1881} 1881}
1882
1883void btrfs_destroy_delayed_inodes(struct btrfs_root *root)
1884{
1885 struct btrfs_delayed_root *delayed_root;
1886 struct btrfs_delayed_node *curr_node, *prev_node;
1887
1888 delayed_root = btrfs_get_delayed_root(root);
1889
1890 curr_node = btrfs_first_delayed_node(delayed_root);
1891 while (curr_node) {
1892 __btrfs_kill_delayed_node(curr_node);
1893
1894 prev_node = curr_node;
1895 curr_node = btrfs_next_delayed_node(curr_node);
1896 btrfs_release_delayed_node(prev_node);
1897 }
1898}
1899
diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h
index 7083d08b2a21..f5aa4023d3e1 100644
--- a/fs/btrfs/delayed-inode.h
+++ b/fs/btrfs/delayed-inode.h
@@ -124,6 +124,9 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev);
124/* Used for drop dead root */ 124/* Used for drop dead root */
125void btrfs_kill_all_delayed_nodes(struct btrfs_root *root); 125void btrfs_kill_all_delayed_nodes(struct btrfs_root *root);
126 126
127/* Used for clean the transaction */
128void btrfs_destroy_delayed_inodes(struct btrfs_root *root);
129
127/* Used for readdir() */ 130/* Used for readdir() */
128void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list, 131void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list,
129 struct list_head *del_list); 132 struct list_head *del_list);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 7ae51decf6d3..e1890b1d3075 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -44,6 +44,7 @@
44#include "free-space-cache.h" 44#include "free-space-cache.h"
45#include "inode-map.h" 45#include "inode-map.h"
46#include "check-integrity.h" 46#include "check-integrity.h"
47#include "rcu-string.h"
47 48
48static struct extent_io_ops btree_extent_io_ops; 49static struct extent_io_ops btree_extent_io_ops;
49static void end_workqueue_fn(struct btrfs_work *work); 50static void end_workqueue_fn(struct btrfs_work *work);
@@ -2118,7 +2119,7 @@ int open_ctree(struct super_block *sb,
2118 2119
2119 features = btrfs_super_incompat_flags(disk_super); 2120 features = btrfs_super_incompat_flags(disk_super);
2120 features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF; 2121 features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF;
2121 if (tree_root->fs_info->compress_type & BTRFS_COMPRESS_LZO) 2122 if (tree_root->fs_info->compress_type == BTRFS_COMPRESS_LZO)
2122 features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; 2123 features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO;
2123 2124
2124 /* 2125 /*
@@ -2575,8 +2576,9 @@ static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate)
2575 struct btrfs_device *device = (struct btrfs_device *) 2576 struct btrfs_device *device = (struct btrfs_device *)
2576 bh->b_private; 2577 bh->b_private;
2577 2578
2578 printk_ratelimited(KERN_WARNING "lost page write due to " 2579 printk_ratelimited_in_rcu(KERN_WARNING "lost page write due to "
2579 "I/O error on %s\n", device->name); 2580 "I/O error on %s\n",
2581 rcu_str_deref(device->name));
2580 /* note, we dont' set_buffer_write_io_error because we have 2582 /* note, we dont' set_buffer_write_io_error because we have
2581 * our own ways of dealing with the IO errors 2583 * our own ways of dealing with the IO errors
2582 */ 2584 */
@@ -2749,8 +2751,8 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
2749 wait_for_completion(&device->flush_wait); 2751 wait_for_completion(&device->flush_wait);
2750 2752
2751 if (bio_flagged(bio, BIO_EOPNOTSUPP)) { 2753 if (bio_flagged(bio, BIO_EOPNOTSUPP)) {
2752 printk("btrfs: disabling barriers on dev %s\n", 2754 printk_in_rcu("btrfs: disabling barriers on dev %s\n",
2753 device->name); 2755 rcu_str_deref(device->name));
2754 device->nobarriers = 1; 2756 device->nobarriers = 1;
2755 } 2757 }
2756 if (!bio_flagged(bio, BIO_UPTODATE)) { 2758 if (!bio_flagged(bio, BIO_UPTODATE)) {
@@ -3400,7 +3402,6 @@ int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
3400 3402
3401 delayed_refs = &trans->delayed_refs; 3403 delayed_refs = &trans->delayed_refs;
3402 3404
3403again:
3404 spin_lock(&delayed_refs->lock); 3405 spin_lock(&delayed_refs->lock);
3405 if (delayed_refs->num_entries == 0) { 3406 if (delayed_refs->num_entries == 0) {
3406 spin_unlock(&delayed_refs->lock); 3407 spin_unlock(&delayed_refs->lock);
@@ -3408,31 +3409,36 @@ again:
3408 return ret; 3409 return ret;
3409 } 3410 }
3410 3411
3411 node = rb_first(&delayed_refs->root); 3412 while ((node = rb_first(&delayed_refs->root)) != NULL) {
3412 while (node) {
3413 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); 3413 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
3414 node = rb_next(node);
3415
3416 ref->in_tree = 0;
3417 rb_erase(&ref->rb_node, &delayed_refs->root);
3418 delayed_refs->num_entries--;
3419 3414
3420 atomic_set(&ref->refs, 1); 3415 atomic_set(&ref->refs, 1);
3421 if (btrfs_delayed_ref_is_head(ref)) { 3416 if (btrfs_delayed_ref_is_head(ref)) {
3422 struct btrfs_delayed_ref_head *head; 3417 struct btrfs_delayed_ref_head *head;
3423 3418
3424 head = btrfs_delayed_node_to_head(ref); 3419 head = btrfs_delayed_node_to_head(ref);
3425 spin_unlock(&delayed_refs->lock); 3420 if (!mutex_trylock(&head->mutex)) {
3426 mutex_lock(&head->mutex); 3421 atomic_inc(&ref->refs);
3422 spin_unlock(&delayed_refs->lock);
3423
3424 /* Need to wait for the delayed ref to run */
3425 mutex_lock(&head->mutex);
3426 mutex_unlock(&head->mutex);
3427 btrfs_put_delayed_ref(ref);
3428
3429 continue;
3430 }
3431
3427 kfree(head->extent_op); 3432 kfree(head->extent_op);
3428 delayed_refs->num_heads--; 3433 delayed_refs->num_heads--;
3429 if (list_empty(&head->cluster)) 3434 if (list_empty(&head->cluster))
3430 delayed_refs->num_heads_ready--; 3435 delayed_refs->num_heads_ready--;
3431 list_del_init(&head->cluster); 3436 list_del_init(&head->cluster);
3432 mutex_unlock(&head->mutex);
3433 btrfs_put_delayed_ref(ref);
3434 goto again;
3435 } 3437 }
3438 ref->in_tree = 0;
3439 rb_erase(&ref->rb_node, &delayed_refs->root);
3440 delayed_refs->num_entries--;
3441
3436 spin_unlock(&delayed_refs->lock); 3442 spin_unlock(&delayed_refs->lock);
3437 btrfs_put_delayed_ref(ref); 3443 btrfs_put_delayed_ref(ref);
3438 3444
@@ -3520,11 +3526,9 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root,
3520 &(&BTRFS_I(page->mapping->host)->io_tree)->buffer, 3526 &(&BTRFS_I(page->mapping->host)->io_tree)->buffer,
3521 offset >> PAGE_CACHE_SHIFT); 3527 offset >> PAGE_CACHE_SHIFT);
3522 spin_unlock(&dirty_pages->buffer_lock); 3528 spin_unlock(&dirty_pages->buffer_lock);
3523 if (eb) { 3529 if (eb)
3524 ret = test_and_clear_bit(EXTENT_BUFFER_DIRTY, 3530 ret = test_and_clear_bit(EXTENT_BUFFER_DIRTY,
3525 &eb->bflags); 3531 &eb->bflags);
3526 atomic_set(&eb->refs, 1);
3527 }
3528 if (PageWriteback(page)) 3532 if (PageWriteback(page))
3529 end_page_writeback(page); 3533 end_page_writeback(page);
3530 3534
@@ -3538,8 +3542,8 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root,
3538 spin_unlock_irq(&page->mapping->tree_lock); 3542 spin_unlock_irq(&page->mapping->tree_lock);
3539 } 3543 }
3540 3544
3541 page->mapping->a_ops->invalidatepage(page, 0);
3542 unlock_page(page); 3545 unlock_page(page);
3546 page_cache_release(page);
3543 } 3547 }
3544 } 3548 }
3545 3549
@@ -3553,8 +3557,10 @@ static int btrfs_destroy_pinned_extent(struct btrfs_root *root,
3553 u64 start; 3557 u64 start;
3554 u64 end; 3558 u64 end;
3555 int ret; 3559 int ret;
3560 bool loop = true;
3556 3561
3557 unpin = pinned_extents; 3562 unpin = pinned_extents;
3563again:
3558 while (1) { 3564 while (1) {
3559 ret = find_first_extent_bit(unpin, 0, &start, &end, 3565 ret = find_first_extent_bit(unpin, 0, &start, &end,
3560 EXTENT_DIRTY); 3566 EXTENT_DIRTY);
@@ -3572,6 +3578,15 @@ static int btrfs_destroy_pinned_extent(struct btrfs_root *root,
3572 cond_resched(); 3578 cond_resched();
3573 } 3579 }
3574 3580
3581 if (loop) {
3582 if (unpin == &root->fs_info->freed_extents[0])
3583 unpin = &root->fs_info->freed_extents[1];
3584 else
3585 unpin = &root->fs_info->freed_extents[0];
3586 loop = false;
3587 goto again;
3588 }
3589
3575 return 0; 3590 return 0;
3576} 3591}
3577 3592
@@ -3585,21 +3600,23 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
3585 /* FIXME: cleanup wait for commit */ 3600 /* FIXME: cleanup wait for commit */
3586 cur_trans->in_commit = 1; 3601 cur_trans->in_commit = 1;
3587 cur_trans->blocked = 1; 3602 cur_trans->blocked = 1;
3588 if (waitqueue_active(&root->fs_info->transaction_blocked_wait)) 3603 wake_up(&root->fs_info->transaction_blocked_wait);
3589 wake_up(&root->fs_info->transaction_blocked_wait);
3590 3604
3591 cur_trans->blocked = 0; 3605 cur_trans->blocked = 0;
3592 if (waitqueue_active(&root->fs_info->transaction_wait)) 3606 wake_up(&root->fs_info->transaction_wait);
3593 wake_up(&root->fs_info->transaction_wait);
3594 3607
3595 cur_trans->commit_done = 1; 3608 cur_trans->commit_done = 1;
3596 if (waitqueue_active(&cur_trans->commit_wait)) 3609 wake_up(&cur_trans->commit_wait);
3597 wake_up(&cur_trans->commit_wait); 3610
3611 btrfs_destroy_delayed_inodes(root);
3612 btrfs_assert_delayed_root_empty(root);
3598 3613
3599 btrfs_destroy_pending_snapshots(cur_trans); 3614 btrfs_destroy_pending_snapshots(cur_trans);
3600 3615
3601 btrfs_destroy_marked_extents(root, &cur_trans->dirty_pages, 3616 btrfs_destroy_marked_extents(root, &cur_trans->dirty_pages,
3602 EXTENT_DIRTY); 3617 EXTENT_DIRTY);
3618 btrfs_destroy_pinned_extent(root,
3619 root->fs_info->pinned_extents);
3603 3620
3604 /* 3621 /*
3605 memset(cur_trans, 0, sizeof(*cur_trans)); 3622 memset(cur_trans, 0, sizeof(*cur_trans));
@@ -3648,6 +3665,9 @@ int btrfs_cleanup_transaction(struct btrfs_root *root)
3648 if (waitqueue_active(&t->commit_wait)) 3665 if (waitqueue_active(&t->commit_wait))
3649 wake_up(&t->commit_wait); 3666 wake_up(&t->commit_wait);
3650 3667
3668 btrfs_destroy_delayed_inodes(root);
3669 btrfs_assert_delayed_root_empty(root);
3670
3651 btrfs_destroy_pending_snapshots(t); 3671 btrfs_destroy_pending_snapshots(t);
3652 3672
3653 btrfs_destroy_delalloc_inodes(root); 3673 btrfs_destroy_delalloc_inodes(root);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 2c8f7b204617..aaa12c1eb348 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -20,6 +20,7 @@
20#include "volumes.h" 20#include "volumes.h"
21#include "check-integrity.h" 21#include "check-integrity.h"
22#include "locking.h" 22#include "locking.h"
23#include "rcu-string.h"
23 24
24static struct kmem_cache *extent_state_cache; 25static struct kmem_cache *extent_state_cache;
25static struct kmem_cache *extent_buffer_cache; 26static struct kmem_cache *extent_buffer_cache;
@@ -1917,9 +1918,9 @@ int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start,
1917 return -EIO; 1918 return -EIO;
1918 } 1919 }
1919 1920
1920 printk(KERN_INFO "btrfs read error corrected: ino %lu off %llu (dev %s " 1921 printk_in_rcu(KERN_INFO "btrfs read error corrected: ino %lu off %llu "
1921 "sector %llu)\n", page->mapping->host->i_ino, start, 1922 "(dev %s sector %llu)\n", page->mapping->host->i_ino,
1922 dev->name, sector); 1923 start, rcu_str_deref(dev->name), sector);
1923 1924
1924 bio_put(bio); 1925 bio_put(bio);
1925 return 0; 1926 return 0;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index f6ab6f5e635a..a4f02501da40 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -830,7 +830,7 @@ static noinline int cow_file_range(struct inode *inode,
830 if (IS_ERR(trans)) { 830 if (IS_ERR(trans)) {
831 extent_clear_unlock_delalloc(inode, 831 extent_clear_unlock_delalloc(inode,
832 &BTRFS_I(inode)->io_tree, 832 &BTRFS_I(inode)->io_tree,
833 start, end, NULL, 833 start, end, locked_page,
834 EXTENT_CLEAR_UNLOCK_PAGE | 834 EXTENT_CLEAR_UNLOCK_PAGE |
835 EXTENT_CLEAR_UNLOCK | 835 EXTENT_CLEAR_UNLOCK |
836 EXTENT_CLEAR_DELALLOC | 836 EXTENT_CLEAR_DELALLOC |
@@ -963,7 +963,7 @@ out:
963out_unlock: 963out_unlock:
964 extent_clear_unlock_delalloc(inode, 964 extent_clear_unlock_delalloc(inode,
965 &BTRFS_I(inode)->io_tree, 965 &BTRFS_I(inode)->io_tree,
966 start, end, NULL, 966 start, end, locked_page,
967 EXTENT_CLEAR_UNLOCK_PAGE | 967 EXTENT_CLEAR_UNLOCK_PAGE |
968 EXTENT_CLEAR_UNLOCK | 968 EXTENT_CLEAR_UNLOCK |
969 EXTENT_CLEAR_DELALLOC | 969 EXTENT_CLEAR_DELALLOC |
@@ -986,8 +986,10 @@ static noinline void async_cow_start(struct btrfs_work *work)
986 compress_file_range(async_cow->inode, async_cow->locked_page, 986 compress_file_range(async_cow->inode, async_cow->locked_page,
987 async_cow->start, async_cow->end, async_cow, 987 async_cow->start, async_cow->end, async_cow,
988 &num_added); 988 &num_added);
989 if (num_added == 0) 989 if (num_added == 0) {
990 iput(async_cow->inode);
990 async_cow->inode = NULL; 991 async_cow->inode = NULL;
992 }
991} 993}
992 994
993/* 995/*
@@ -1020,6 +1022,8 @@ static noinline void async_cow_free(struct btrfs_work *work)
1020{ 1022{
1021 struct async_cow *async_cow; 1023 struct async_cow *async_cow;
1022 async_cow = container_of(work, struct async_cow, work); 1024 async_cow = container_of(work, struct async_cow, work);
1025 if (async_cow->inode)
1026 iput(async_cow->inode);
1023 kfree(async_cow); 1027 kfree(async_cow);
1024} 1028}
1025 1029
@@ -1038,7 +1042,7 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
1038 while (start < end) { 1042 while (start < end) {
1039 async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS); 1043 async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS);
1040 BUG_ON(!async_cow); /* -ENOMEM */ 1044 BUG_ON(!async_cow); /* -ENOMEM */
1041 async_cow->inode = inode; 1045 async_cow->inode = igrab(inode);
1042 async_cow->root = root; 1046 async_cow->root = root;
1043 async_cow->locked_page = locked_page; 1047 async_cow->locked_page = locked_page;
1044 async_cow->start = start; 1048 async_cow->start = start;
@@ -1136,8 +1140,18 @@ static noinline int run_delalloc_nocow(struct inode *inode,
1136 u64 ino = btrfs_ino(inode); 1140 u64 ino = btrfs_ino(inode);
1137 1141
1138 path = btrfs_alloc_path(); 1142 path = btrfs_alloc_path();
1139 if (!path) 1143 if (!path) {
1144 extent_clear_unlock_delalloc(inode,
1145 &BTRFS_I(inode)->io_tree,
1146 start, end, locked_page,
1147 EXTENT_CLEAR_UNLOCK_PAGE |
1148 EXTENT_CLEAR_UNLOCK |
1149 EXTENT_CLEAR_DELALLOC |
1150 EXTENT_CLEAR_DIRTY |
1151 EXTENT_SET_WRITEBACK |
1152 EXTENT_END_WRITEBACK);
1140 return -ENOMEM; 1153 return -ENOMEM;
1154 }
1141 1155
1142 nolock = btrfs_is_free_space_inode(root, inode); 1156 nolock = btrfs_is_free_space_inode(root, inode);
1143 1157
@@ -1147,6 +1161,15 @@ static noinline int run_delalloc_nocow(struct inode *inode,
1147 trans = btrfs_join_transaction(root); 1161 trans = btrfs_join_transaction(root);
1148 1162
1149 if (IS_ERR(trans)) { 1163 if (IS_ERR(trans)) {
1164 extent_clear_unlock_delalloc(inode,
1165 &BTRFS_I(inode)->io_tree,
1166 start, end, locked_page,
1167 EXTENT_CLEAR_UNLOCK_PAGE |
1168 EXTENT_CLEAR_UNLOCK |
1169 EXTENT_CLEAR_DELALLOC |
1170 EXTENT_CLEAR_DIRTY |
1171 EXTENT_SET_WRITEBACK |
1172 EXTENT_END_WRITEBACK);
1150 btrfs_free_path(path); 1173 btrfs_free_path(path);
1151 return PTR_ERR(trans); 1174 return PTR_ERR(trans);
1152 } 1175 }
@@ -1327,8 +1350,11 @@ out_check:
1327 } 1350 }
1328 btrfs_release_path(path); 1351 btrfs_release_path(path);
1329 1352
1330 if (cur_offset <= end && cow_start == (u64)-1) 1353 if (cur_offset <= end && cow_start == (u64)-1) {
1331 cow_start = cur_offset; 1354 cow_start = cur_offset;
1355 cur_offset = end;
1356 }
1357
1332 if (cow_start != (u64)-1) { 1358 if (cow_start != (u64)-1) {
1333 ret = cow_file_range(inode, locked_page, cow_start, end, 1359 ret = cow_file_range(inode, locked_page, cow_start, end,
1334 page_started, nr_written, 1); 1360 page_started, nr_written, 1);
@@ -1347,6 +1373,17 @@ error:
1347 if (!ret) 1373 if (!ret)
1348 ret = err; 1374 ret = err;
1349 1375
1376 if (ret && cur_offset < end)
1377 extent_clear_unlock_delalloc(inode,
1378 &BTRFS_I(inode)->io_tree,
1379 cur_offset, end, locked_page,
1380 EXTENT_CLEAR_UNLOCK_PAGE |
1381 EXTENT_CLEAR_UNLOCK |
1382 EXTENT_CLEAR_DELALLOC |
1383 EXTENT_CLEAR_DIRTY |
1384 EXTENT_SET_WRITEBACK |
1385 EXTENT_END_WRITEBACK);
1386
1350 btrfs_free_path(path); 1387 btrfs_free_path(path);
1351 return ret; 1388 return ret;
1352} 1389}
@@ -1361,20 +1398,23 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page,
1361 int ret; 1398 int ret;
1362 struct btrfs_root *root = BTRFS_I(inode)->root; 1399 struct btrfs_root *root = BTRFS_I(inode)->root;
1363 1400
1364 if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) 1401 if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) {
1365 ret = run_delalloc_nocow(inode, locked_page, start, end, 1402 ret = run_delalloc_nocow(inode, locked_page, start, end,
1366 page_started, 1, nr_written); 1403 page_started, 1, nr_written);
1367 else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC) 1404 } else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC) {
1368 ret = run_delalloc_nocow(inode, locked_page, start, end, 1405 ret = run_delalloc_nocow(inode, locked_page, start, end,
1369 page_started, 0, nr_written); 1406 page_started, 0, nr_written);
1370 else if (!btrfs_test_opt(root, COMPRESS) && 1407 } else if (!btrfs_test_opt(root, COMPRESS) &&
1371 !(BTRFS_I(inode)->force_compress) && 1408 !(BTRFS_I(inode)->force_compress) &&
1372 !(BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS)) 1409 !(BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS)) {
1373 ret = cow_file_range(inode, locked_page, start, end, 1410 ret = cow_file_range(inode, locked_page, start, end,
1374 page_started, nr_written, 1); 1411 page_started, nr_written, 1);
1375 else 1412 } else {
1413 set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
1414 &BTRFS_I(inode)->runtime_flags);
1376 ret = cow_file_range_async(inode, locked_page, start, end, 1415 ret = cow_file_range_async(inode, locked_page, start, end,
1377 page_started, nr_written); 1416 page_started, nr_written);
1417 }
1378 return ret; 1418 return ret;
1379} 1419}
1380 1420
@@ -7054,10 +7094,13 @@ static void fixup_inode_flags(struct inode *dir, struct inode *inode)
7054 else 7094 else
7055 b_inode->flags &= ~BTRFS_INODE_NODATACOW; 7095 b_inode->flags &= ~BTRFS_INODE_NODATACOW;
7056 7096
7057 if (b_dir->flags & BTRFS_INODE_COMPRESS) 7097 if (b_dir->flags & BTRFS_INODE_COMPRESS) {
7058 b_inode->flags |= BTRFS_INODE_COMPRESS; 7098 b_inode->flags |= BTRFS_INODE_COMPRESS;
7059 else 7099 b_inode->flags &= ~BTRFS_INODE_NOCOMPRESS;
7060 b_inode->flags &= ~BTRFS_INODE_COMPRESS; 7100 } else {
7101 b_inode->flags &= ~(BTRFS_INODE_COMPRESS |
7102 BTRFS_INODE_NOCOMPRESS);
7103 }
7061} 7104}
7062 7105
7063static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, 7106static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 24b776c08d99..58adbd0356d6 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -52,6 +52,7 @@
52#include "locking.h" 52#include "locking.h"
53#include "inode-map.h" 53#include "inode-map.h"
54#include "backref.h" 54#include "backref.h"
55#include "rcu-string.h"
55 56
56/* Mask out flags that are inappropriate for the given type of inode. */ 57/* Mask out flags that are inappropriate for the given type of inode. */
57static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags) 58static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags)
@@ -785,39 +786,57 @@ none:
785 return -ENOENT; 786 return -ENOENT;
786} 787}
787 788
788/* 789static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start)
789 * Validaty check of prev em and next em:
790 * 1) no prev/next em
791 * 2) prev/next em is an hole/inline extent
792 */
793static int check_adjacent_extents(struct inode *inode, struct extent_map *em)
794{ 790{
795 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 791 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
796 struct extent_map *prev = NULL, *next = NULL; 792 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
797 int ret = 0; 793 struct extent_map *em;
794 u64 len = PAGE_CACHE_SIZE;
798 795
796 /*
797 * hopefully we have this extent in the tree already, try without
798 * the full extent lock
799 */
799 read_lock(&em_tree->lock); 800 read_lock(&em_tree->lock);
800 prev = lookup_extent_mapping(em_tree, em->start - 1, (u64)-1); 801 em = lookup_extent_mapping(em_tree, start, len);
801 next = lookup_extent_mapping(em_tree, em->start + em->len, (u64)-1);
802 read_unlock(&em_tree->lock); 802 read_unlock(&em_tree->lock);
803 803
804 if ((!prev || prev->block_start >= EXTENT_MAP_LAST_BYTE) && 804 if (!em) {
805 (!next || next->block_start >= EXTENT_MAP_LAST_BYTE)) 805 /* get the big lock and read metadata off disk */
806 ret = 1; 806 lock_extent(io_tree, start, start + len - 1);
807 free_extent_map(prev); 807 em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
808 free_extent_map(next); 808 unlock_extent(io_tree, start, start + len - 1);
809
810 if (IS_ERR(em))
811 return NULL;
812 }
813
814 return em;
815}
816
817static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em)
818{
819 struct extent_map *next;
820 bool ret = true;
821
822 /* this is the last extent */
823 if (em->start + em->len >= i_size_read(inode))
824 return false;
809 825
826 next = defrag_lookup_extent(inode, em->start + em->len);
827 if (!next || next->block_start >= EXTENT_MAP_LAST_BYTE)
828 ret = false;
829
830 free_extent_map(next);
810 return ret; 831 return ret;
811} 832}
812 833
813static int should_defrag_range(struct inode *inode, u64 start, u64 len, 834static int should_defrag_range(struct inode *inode, u64 start, int thresh,
814 int thresh, u64 *last_len, u64 *skip, 835 u64 *last_len, u64 *skip, u64 *defrag_end)
815 u64 *defrag_end)
816{ 836{
817 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 837 struct extent_map *em;
818 struct extent_map *em = NULL;
819 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
820 int ret = 1; 838 int ret = 1;
839 bool next_mergeable = true;
821 840
822 /* 841 /*
823 * make sure that once we start defragging an extent, we keep on 842 * make sure that once we start defragging an extent, we keep on
@@ -828,23 +847,9 @@ static int should_defrag_range(struct inode *inode, u64 start, u64 len,
828 847
829 *skip = 0; 848 *skip = 0;
830 849
831 /* 850 em = defrag_lookup_extent(inode, start);
832 * hopefully we have this extent in the tree already, try without 851 if (!em)
833 * the full extent lock 852 return 0;
834 */
835 read_lock(&em_tree->lock);
836 em = lookup_extent_mapping(em_tree, start, len);
837 read_unlock(&em_tree->lock);
838
839 if (!em) {
840 /* get the big lock and read metadata off disk */
841 lock_extent(io_tree, start, start + len - 1);
842 em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
843 unlock_extent(io_tree, start, start + len - 1);
844
845 if (IS_ERR(em))
846 return 0;
847 }
848 853
849 /* this will cover holes, and inline extents */ 854 /* this will cover holes, and inline extents */
850 if (em->block_start >= EXTENT_MAP_LAST_BYTE) { 855 if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
@@ -852,18 +857,15 @@ static int should_defrag_range(struct inode *inode, u64 start, u64 len,
852 goto out; 857 goto out;
853 } 858 }
854 859
855 /* If we have nothing to merge with us, just skip. */ 860 next_mergeable = defrag_check_next_extent(inode, em);
856 if (check_adjacent_extents(inode, em)) {
857 ret = 0;
858 goto out;
859 }
860 861
861 /* 862 /*
862 * we hit a real extent, if it is big don't bother defragging it again 863 * we hit a real extent, if it is big or the next extent is not a
864 * real extent, don't bother defragging it
863 */ 865 */
864 if ((*last_len == 0 || *last_len >= thresh) && em->len >= thresh) 866 if ((*last_len == 0 || *last_len >= thresh) &&
867 (em->len >= thresh || !next_mergeable))
865 ret = 0; 868 ret = 0;
866
867out: 869out:
868 /* 870 /*
869 * last_len ends up being a counter of how many bytes we've defragged. 871 * last_len ends up being a counter of how many bytes we've defragged.
@@ -1142,8 +1144,8 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
1142 break; 1144 break;
1143 1145
1144 if (!should_defrag_range(inode, (u64)i << PAGE_CACHE_SHIFT, 1146 if (!should_defrag_range(inode, (u64)i << PAGE_CACHE_SHIFT,
1145 PAGE_CACHE_SIZE, extent_thresh, 1147 extent_thresh, &last_len, &skip,
1146 &last_len, &skip, &defrag_end)) { 1148 &defrag_end)) {
1147 unsigned long next; 1149 unsigned long next;
1148 /* 1150 /*
1149 * the should_defrag function tells us how much to skip 1151 * the should_defrag function tells us how much to skip
@@ -1304,6 +1306,13 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
1304 ret = -EINVAL; 1306 ret = -EINVAL;
1305 goto out_free; 1307 goto out_free;
1306 } 1308 }
1309 if (device->fs_devices && device->fs_devices->seeding) {
1310 printk(KERN_INFO "btrfs: resizer unable to apply on "
1311 "seeding device %llu\n", devid);
1312 ret = -EINVAL;
1313 goto out_free;
1314 }
1315
1307 if (!strcmp(sizestr, "max")) 1316 if (!strcmp(sizestr, "max"))
1308 new_size = device->bdev->bd_inode->i_size; 1317 new_size = device->bdev->bd_inode->i_size;
1309 else { 1318 else {
@@ -1345,8 +1354,9 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
1345 do_div(new_size, root->sectorsize); 1354 do_div(new_size, root->sectorsize);
1346 new_size *= root->sectorsize; 1355 new_size *= root->sectorsize;
1347 1356
1348 printk(KERN_INFO "btrfs: new size for %s is %llu\n", 1357 printk_in_rcu(KERN_INFO "btrfs: new size for %s is %llu\n",
1349 device->name, (unsigned long long)new_size); 1358 rcu_str_deref(device->name),
1359 (unsigned long long)new_size);
1350 1360
1351 if (new_size > old_size) { 1361 if (new_size > old_size) {
1352 trans = btrfs_start_transaction(root, 0); 1362 trans = btrfs_start_transaction(root, 0);
@@ -2264,7 +2274,12 @@ static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg)
2264 di_args->total_bytes = dev->total_bytes; 2274 di_args->total_bytes = dev->total_bytes;
2265 memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid)); 2275 memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid));
2266 if (dev->name) { 2276 if (dev->name) {
2267 strncpy(di_args->path, dev->name, sizeof(di_args->path)); 2277 struct rcu_string *name;
2278
2279 rcu_read_lock();
2280 name = rcu_dereference(dev->name);
2281 strncpy(di_args->path, name->str, sizeof(di_args->path));
2282 rcu_read_unlock();
2268 di_args->path[sizeof(di_args->path) - 1] = 0; 2283 di_args->path[sizeof(di_args->path) - 1] = 0;
2269 } else { 2284 } else {
2270 di_args->path[0] = '\0'; 2285 di_args->path[0] = '\0';
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 9e138cdc36c5..643335a4fe3c 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -627,7 +627,27 @@ void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
627 /* start IO across the range first to instantiate any delalloc 627 /* start IO across the range first to instantiate any delalloc
628 * extents 628 * extents
629 */ 629 */
630 filemap_write_and_wait_range(inode->i_mapping, start, orig_end); 630 filemap_fdatawrite_range(inode->i_mapping, start, orig_end);
631
632 /*
633 * So with compression we will find and lock a dirty page and clear the
634 * first one as dirty, setup an async extent, and immediately return
635 * with the entire range locked but with nobody actually marked with
636 * writeback. So we can't just filemap_write_and_wait_range() and
637 * expect it to work since it will just kick off a thread to do the
638 * actual work. So we need to call filemap_fdatawrite_range _again_
639 * since it will wait on the page lock, which won't be unlocked until
640 * after the pages have been marked as writeback and so we're good to go
641 * from there. We have to do this otherwise we'll miss the ordered
642 * extents and that results in badness. Please Josef, do not think you
643 * know better and pull this out at some point in the future, it is
644 * right and you are wrong.
645 */
646 if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
647 &BTRFS_I(inode)->runtime_flags))
648 filemap_fdatawrite_range(inode->i_mapping, start, orig_end);
649
650 filemap_fdatawait_range(inode->i_mapping, start, orig_end);
631 651
632 end = orig_end; 652 end = orig_end;
633 found = 0; 653 found = 0;
diff --git a/fs/btrfs/rcu-string.h b/fs/btrfs/rcu-string.h
new file mode 100644
index 000000000000..9e111e4576d4
--- /dev/null
+++ b/fs/btrfs/rcu-string.h
@@ -0,0 +1,56 @@
1/*
2 * Copyright (C) 2012 Red Hat. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19struct rcu_string {
20 struct rcu_head rcu;
21 char str[0];
22};
23
24static inline struct rcu_string *rcu_string_strdup(const char *src, gfp_t mask)
25{
26 size_t len = strlen(src) + 1;
27 struct rcu_string *ret = kzalloc(sizeof(struct rcu_string) +
28 (len * sizeof(char)), mask);
29 if (!ret)
30 return ret;
31 strncpy(ret->str, src, len);
32 return ret;
33}
34
35static inline void rcu_string_free(struct rcu_string *str)
36{
37 if (str)
38 kfree_rcu(str, rcu);
39}
40
41#define printk_in_rcu(fmt, ...) do { \
42 rcu_read_lock(); \
43 printk(fmt, __VA_ARGS__); \
44 rcu_read_unlock(); \
45} while (0)
46
47#define printk_ratelimited_in_rcu(fmt, ...) do { \
48 rcu_read_lock(); \
49 printk_ratelimited(fmt, __VA_ARGS__); \
50 rcu_read_unlock(); \
51} while (0)
52
53#define rcu_str_deref(rcu_str) ({ \
54 struct rcu_string *__str = rcu_dereference(rcu_str); \
55 __str->str; \
56})
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index a38cfa4f251e..b223620cd5a6 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -26,6 +26,7 @@
26#include "backref.h" 26#include "backref.h"
27#include "extent_io.h" 27#include "extent_io.h"
28#include "check-integrity.h" 28#include "check-integrity.h"
29#include "rcu-string.h"
29 30
30/* 31/*
31 * This is only the first step towards a full-features scrub. It reads all 32 * This is only the first step towards a full-features scrub. It reads all
@@ -320,10 +321,10 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root, void *ctx)
320 * hold all of the paths here 321 * hold all of the paths here
321 */ 322 */
322 for (i = 0; i < ipath->fspath->elem_cnt; ++i) 323 for (i = 0; i < ipath->fspath->elem_cnt; ++i)
323 printk(KERN_WARNING "btrfs: %s at logical %llu on dev " 324 printk_in_rcu(KERN_WARNING "btrfs: %s at logical %llu on dev "
324 "%s, sector %llu, root %llu, inode %llu, offset %llu, " 325 "%s, sector %llu, root %llu, inode %llu, offset %llu, "
325 "length %llu, links %u (path: %s)\n", swarn->errstr, 326 "length %llu, links %u (path: %s)\n", swarn->errstr,
326 swarn->logical, swarn->dev->name, 327 swarn->logical, rcu_str_deref(swarn->dev->name),
327 (unsigned long long)swarn->sector, root, inum, offset, 328 (unsigned long long)swarn->sector, root, inum, offset,
328 min(isize - offset, (u64)PAGE_SIZE), nlink, 329 min(isize - offset, (u64)PAGE_SIZE), nlink,
329 (char *)(unsigned long)ipath->fspath->val[i]); 330 (char *)(unsigned long)ipath->fspath->val[i]);
@@ -332,10 +333,10 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root, void *ctx)
332 return 0; 333 return 0;
333 334
334err: 335err:
335 printk(KERN_WARNING "btrfs: %s at logical %llu on dev " 336 printk_in_rcu(KERN_WARNING "btrfs: %s at logical %llu on dev "
336 "%s, sector %llu, root %llu, inode %llu, offset %llu: path " 337 "%s, sector %llu, root %llu, inode %llu, offset %llu: path "
337 "resolving failed with ret=%d\n", swarn->errstr, 338 "resolving failed with ret=%d\n", swarn->errstr,
338 swarn->logical, swarn->dev->name, 339 swarn->logical, rcu_str_deref(swarn->dev->name),
339 (unsigned long long)swarn->sector, root, inum, offset, ret); 340 (unsigned long long)swarn->sector, root, inum, offset, ret);
340 341
341 free_ipath(ipath); 342 free_ipath(ipath);
@@ -390,10 +391,11 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
390 do { 391 do {
391 ret = tree_backref_for_extent(&ptr, eb, ei, item_size, 392 ret = tree_backref_for_extent(&ptr, eb, ei, item_size,
392 &ref_root, &ref_level); 393 &ref_root, &ref_level);
393 printk(KERN_WARNING 394 printk_in_rcu(KERN_WARNING
394 "btrfs: %s at logical %llu on dev %s, " 395 "btrfs: %s at logical %llu on dev %s, "
395 "sector %llu: metadata %s (level %d) in tree " 396 "sector %llu: metadata %s (level %d) in tree "
396 "%llu\n", errstr, swarn.logical, dev->name, 397 "%llu\n", errstr, swarn.logical,
398 rcu_str_deref(dev->name),
397 (unsigned long long)swarn.sector, 399 (unsigned long long)swarn.sector,
398 ref_level ? "node" : "leaf", 400 ref_level ? "node" : "leaf",
399 ret < 0 ? -1 : ref_level, 401 ret < 0 ? -1 : ref_level,
@@ -580,9 +582,11 @@ out:
580 spin_lock(&sdev->stat_lock); 582 spin_lock(&sdev->stat_lock);
581 ++sdev->stat.uncorrectable_errors; 583 ++sdev->stat.uncorrectable_errors;
582 spin_unlock(&sdev->stat_lock); 584 spin_unlock(&sdev->stat_lock);
583 printk_ratelimited(KERN_ERR 585
586 printk_ratelimited_in_rcu(KERN_ERR
584 "btrfs: unable to fixup (nodatasum) error at logical %llu on dev %s\n", 587 "btrfs: unable to fixup (nodatasum) error at logical %llu on dev %s\n",
585 (unsigned long long)fixup->logical, sdev->dev->name); 588 (unsigned long long)fixup->logical,
589 rcu_str_deref(sdev->dev->name));
586 } 590 }
587 591
588 btrfs_free_path(path); 592 btrfs_free_path(path);
@@ -936,18 +940,20 @@ corrected_error:
936 spin_lock(&sdev->stat_lock); 940 spin_lock(&sdev->stat_lock);
937 sdev->stat.corrected_errors++; 941 sdev->stat.corrected_errors++;
938 spin_unlock(&sdev->stat_lock); 942 spin_unlock(&sdev->stat_lock);
939 printk_ratelimited(KERN_ERR 943 printk_ratelimited_in_rcu(KERN_ERR
940 "btrfs: fixed up error at logical %llu on dev %s\n", 944 "btrfs: fixed up error at logical %llu on dev %s\n",
941 (unsigned long long)logical, sdev->dev->name); 945 (unsigned long long)logical,
946 rcu_str_deref(sdev->dev->name));
942 } 947 }
943 } else { 948 } else {
944did_not_correct_error: 949did_not_correct_error:
945 spin_lock(&sdev->stat_lock); 950 spin_lock(&sdev->stat_lock);
946 sdev->stat.uncorrectable_errors++; 951 sdev->stat.uncorrectable_errors++;
947 spin_unlock(&sdev->stat_lock); 952 spin_unlock(&sdev->stat_lock);
948 printk_ratelimited(KERN_ERR 953 printk_ratelimited_in_rcu(KERN_ERR
949 "btrfs: unable to fixup (regular) error at logical %llu on dev %s\n", 954 "btrfs: unable to fixup (regular) error at logical %llu on dev %s\n",
950 (unsigned long long)logical, sdev->dev->name); 955 (unsigned long long)logical,
956 rcu_str_deref(sdev->dev->name));
951 } 957 }
952 958
953out: 959out:
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 96eb9fef7bd2..0eb9a4da069e 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -54,6 +54,7 @@
54#include "version.h" 54#include "version.h"
55#include "export.h" 55#include "export.h"
56#include "compression.h" 56#include "compression.h"
57#include "rcu-string.h"
57 58
58#define CREATE_TRACE_POINTS 59#define CREATE_TRACE_POINTS
59#include <trace/events/btrfs.h> 60#include <trace/events/btrfs.h>
@@ -1482,12 +1483,44 @@ static void btrfs_fs_dirty_inode(struct inode *inode, int flags)
1482 "error %d\n", btrfs_ino(inode), ret); 1483 "error %d\n", btrfs_ino(inode), ret);
1483} 1484}
1484 1485
1486static int btrfs_show_devname(struct seq_file *m, struct dentry *root)
1487{
1488 struct btrfs_fs_info *fs_info = btrfs_sb(root->d_sb);
1489 struct btrfs_fs_devices *cur_devices;
1490 struct btrfs_device *dev, *first_dev = NULL;
1491 struct list_head *head;
1492 struct rcu_string *name;
1493
1494 mutex_lock(&fs_info->fs_devices->device_list_mutex);
1495 cur_devices = fs_info->fs_devices;
1496 while (cur_devices) {
1497 head = &cur_devices->devices;
1498 list_for_each_entry(dev, head, dev_list) {
1499 if (!first_dev || dev->devid < first_dev->devid)
1500 first_dev = dev;
1501 }
1502 cur_devices = cur_devices->seed;
1503 }
1504
1505 if (first_dev) {
1506 rcu_read_lock();
1507 name = rcu_dereference(first_dev->name);
1508 seq_escape(m, name->str, " \t\n\\");
1509 rcu_read_unlock();
1510 } else {
1511 WARN_ON(1);
1512 }
1513 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
1514 return 0;
1515}
1516
1485static const struct super_operations btrfs_super_ops = { 1517static const struct super_operations btrfs_super_ops = {
1486 .drop_inode = btrfs_drop_inode, 1518 .drop_inode = btrfs_drop_inode,
1487 .evict_inode = btrfs_evict_inode, 1519 .evict_inode = btrfs_evict_inode,
1488 .put_super = btrfs_put_super, 1520 .put_super = btrfs_put_super,
1489 .sync_fs = btrfs_sync_fs, 1521 .sync_fs = btrfs_sync_fs,
1490 .show_options = btrfs_show_options, 1522 .show_options = btrfs_show_options,
1523 .show_devname = btrfs_show_devname,
1491 .write_inode = btrfs_write_inode, 1524 .write_inode = btrfs_write_inode,
1492 .dirty_inode = btrfs_fs_dirty_inode, 1525 .dirty_inode = btrfs_fs_dirty_inode,
1493 .alloc_inode = btrfs_alloc_inode, 1526 .alloc_inode = btrfs_alloc_inode,
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 1791c6e3d834..b72b068183ec 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -100,6 +100,10 @@ loop:
100 kmem_cache_free(btrfs_transaction_cachep, cur_trans); 100 kmem_cache_free(btrfs_transaction_cachep, cur_trans);
101 cur_trans = fs_info->running_transaction; 101 cur_trans = fs_info->running_transaction;
102 goto loop; 102 goto loop;
103 } else if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
104 spin_unlock(&root->fs_info->trans_lock);
105 kmem_cache_free(btrfs_transaction_cachep, cur_trans);
106 return -EROFS;
103 } 107 }
104 108
105 atomic_set(&cur_trans->num_writers, 1); 109 atomic_set(&cur_trans->num_writers, 1);
@@ -1213,14 +1217,20 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
1213 1217
1214 1218
1215static void cleanup_transaction(struct btrfs_trans_handle *trans, 1219static void cleanup_transaction(struct btrfs_trans_handle *trans,
1216 struct btrfs_root *root) 1220 struct btrfs_root *root, int err)
1217{ 1221{
1218 struct btrfs_transaction *cur_trans = trans->transaction; 1222 struct btrfs_transaction *cur_trans = trans->transaction;
1219 1223
1220 WARN_ON(trans->use_count > 1); 1224 WARN_ON(trans->use_count > 1);
1221 1225
1226 btrfs_abort_transaction(trans, root, err);
1227
1222 spin_lock(&root->fs_info->trans_lock); 1228 spin_lock(&root->fs_info->trans_lock);
1223 list_del_init(&cur_trans->list); 1229 list_del_init(&cur_trans->list);
1230 if (cur_trans == root->fs_info->running_transaction) {
1231 root->fs_info->running_transaction = NULL;
1232 root->fs_info->trans_no_join = 0;
1233 }
1224 spin_unlock(&root->fs_info->trans_lock); 1234 spin_unlock(&root->fs_info->trans_lock);
1225 1235
1226 btrfs_cleanup_one_transaction(trans->transaction, root); 1236 btrfs_cleanup_one_transaction(trans->transaction, root);
@@ -1526,7 +1536,7 @@ cleanup_transaction:
1526// WARN_ON(1); 1536// WARN_ON(1);
1527 if (current->journal_info == trans) 1537 if (current->journal_info == trans)
1528 current->journal_info = NULL; 1538 current->journal_info = NULL;
1529 cleanup_transaction(trans, root); 1539 cleanup_transaction(trans, root, ret);
1530 1540
1531 return ret; 1541 return ret;
1532} 1542}
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 7782020996fe..8a3d2594b807 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -35,6 +35,7 @@
35#include "volumes.h" 35#include "volumes.h"
36#include "async-thread.h" 36#include "async-thread.h"
37#include "check-integrity.h" 37#include "check-integrity.h"
38#include "rcu-string.h"
38 39
39static int init_first_rw_device(struct btrfs_trans_handle *trans, 40static int init_first_rw_device(struct btrfs_trans_handle *trans,
40 struct btrfs_root *root, 41 struct btrfs_root *root,
@@ -64,7 +65,7 @@ static void free_fs_devices(struct btrfs_fs_devices *fs_devices)
64 device = list_entry(fs_devices->devices.next, 65 device = list_entry(fs_devices->devices.next,
65 struct btrfs_device, dev_list); 66 struct btrfs_device, dev_list);
66 list_del(&device->dev_list); 67 list_del(&device->dev_list);
67 kfree(device->name); 68 rcu_string_free(device->name);
68 kfree(device); 69 kfree(device);
69 } 70 }
70 kfree(fs_devices); 71 kfree(fs_devices);
@@ -334,8 +335,8 @@ static noinline int device_list_add(const char *path,
334{ 335{
335 struct btrfs_device *device; 336 struct btrfs_device *device;
336 struct btrfs_fs_devices *fs_devices; 337 struct btrfs_fs_devices *fs_devices;
338 struct rcu_string *name;
337 u64 found_transid = btrfs_super_generation(disk_super); 339 u64 found_transid = btrfs_super_generation(disk_super);
338 char *name;
339 340
340 fs_devices = find_fsid(disk_super->fsid); 341 fs_devices = find_fsid(disk_super->fsid);
341 if (!fs_devices) { 342 if (!fs_devices) {
@@ -369,11 +370,13 @@ static noinline int device_list_add(const char *path,
369 memcpy(device->uuid, disk_super->dev_item.uuid, 370 memcpy(device->uuid, disk_super->dev_item.uuid,
370 BTRFS_UUID_SIZE); 371 BTRFS_UUID_SIZE);
371 spin_lock_init(&device->io_lock); 372 spin_lock_init(&device->io_lock);
372 device->name = kstrdup(path, GFP_NOFS); 373
373 if (!device->name) { 374 name = rcu_string_strdup(path, GFP_NOFS);
375 if (!name) {
374 kfree(device); 376 kfree(device);
375 return -ENOMEM; 377 return -ENOMEM;
376 } 378 }
379 rcu_assign_pointer(device->name, name);
377 INIT_LIST_HEAD(&device->dev_alloc_list); 380 INIT_LIST_HEAD(&device->dev_alloc_list);
378 381
379 /* init readahead state */ 382 /* init readahead state */
@@ -390,12 +393,12 @@ static noinline int device_list_add(const char *path,
390 393
391 device->fs_devices = fs_devices; 394 device->fs_devices = fs_devices;
392 fs_devices->num_devices++; 395 fs_devices->num_devices++;
393 } else if (!device->name || strcmp(device->name, path)) { 396 } else if (!device->name || strcmp(device->name->str, path)) {
394 name = kstrdup(path, GFP_NOFS); 397 name = rcu_string_strdup(path, GFP_NOFS);
395 if (!name) 398 if (!name)
396 return -ENOMEM; 399 return -ENOMEM;
397 kfree(device->name); 400 rcu_string_free(device->name);
398 device->name = name; 401 rcu_assign_pointer(device->name, name);
399 if (device->missing) { 402 if (device->missing) {
400 fs_devices->missing_devices--; 403 fs_devices->missing_devices--;
401 device->missing = 0; 404 device->missing = 0;
@@ -430,15 +433,22 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
430 433
431 /* We have held the volume lock, it is safe to get the devices. */ 434 /* We have held the volume lock, it is safe to get the devices. */
432 list_for_each_entry(orig_dev, &orig->devices, dev_list) { 435 list_for_each_entry(orig_dev, &orig->devices, dev_list) {
436 struct rcu_string *name;
437
433 device = kzalloc(sizeof(*device), GFP_NOFS); 438 device = kzalloc(sizeof(*device), GFP_NOFS);
434 if (!device) 439 if (!device)
435 goto error; 440 goto error;
436 441
437 device->name = kstrdup(orig_dev->name, GFP_NOFS); 442 /*
438 if (!device->name) { 443 * This is ok to do without rcu read locked because we hold the
444 * uuid mutex so nothing we touch in here is going to disappear.
445 */
446 name = rcu_string_strdup(orig_dev->name->str, GFP_NOFS);
447 if (!name) {
439 kfree(device); 448 kfree(device);
440 goto error; 449 goto error;
441 } 450 }
451 rcu_assign_pointer(device->name, name);
442 452
443 device->devid = orig_dev->devid; 453 device->devid = orig_dev->devid;
444 device->work.func = pending_bios_fn; 454 device->work.func = pending_bios_fn;
@@ -491,7 +501,7 @@ again:
491 } 501 }
492 list_del_init(&device->dev_list); 502 list_del_init(&device->dev_list);
493 fs_devices->num_devices--; 503 fs_devices->num_devices--;
494 kfree(device->name); 504 rcu_string_free(device->name);
495 kfree(device); 505 kfree(device);
496 } 506 }
497 507
@@ -516,7 +526,7 @@ static void __free_device(struct work_struct *work)
516 if (device->bdev) 526 if (device->bdev)
517 blkdev_put(device->bdev, device->mode); 527 blkdev_put(device->bdev, device->mode);
518 528
519 kfree(device->name); 529 rcu_string_free(device->name);
520 kfree(device); 530 kfree(device);
521} 531}
522 532
@@ -540,6 +550,7 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
540 mutex_lock(&fs_devices->device_list_mutex); 550 mutex_lock(&fs_devices->device_list_mutex);
541 list_for_each_entry(device, &fs_devices->devices, dev_list) { 551 list_for_each_entry(device, &fs_devices->devices, dev_list) {
542 struct btrfs_device *new_device; 552 struct btrfs_device *new_device;
553 struct rcu_string *name;
543 554
544 if (device->bdev) 555 if (device->bdev)
545 fs_devices->open_devices--; 556 fs_devices->open_devices--;
@@ -555,8 +566,11 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
555 new_device = kmalloc(sizeof(*new_device), GFP_NOFS); 566 new_device = kmalloc(sizeof(*new_device), GFP_NOFS);
556 BUG_ON(!new_device); /* -ENOMEM */ 567 BUG_ON(!new_device); /* -ENOMEM */
557 memcpy(new_device, device, sizeof(*new_device)); 568 memcpy(new_device, device, sizeof(*new_device));
558 new_device->name = kstrdup(device->name, GFP_NOFS); 569
559 BUG_ON(device->name && !new_device->name); /* -ENOMEM */ 570 /* Safe because we are under uuid_mutex */
571 name = rcu_string_strdup(device->name->str, GFP_NOFS);
572 BUG_ON(device->name && !name); /* -ENOMEM */
573 rcu_assign_pointer(new_device->name, name);
560 new_device->bdev = NULL; 574 new_device->bdev = NULL;
561 new_device->writeable = 0; 575 new_device->writeable = 0;
562 new_device->in_fs_metadata = 0; 576 new_device->in_fs_metadata = 0;
@@ -621,9 +635,9 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
621 if (!device->name) 635 if (!device->name)
622 continue; 636 continue;
623 637
624 bdev = blkdev_get_by_path(device->name, flags, holder); 638 bdev = blkdev_get_by_path(device->name->str, flags, holder);
625 if (IS_ERR(bdev)) { 639 if (IS_ERR(bdev)) {
626 printk(KERN_INFO "open %s failed\n", device->name); 640 printk(KERN_INFO "open %s failed\n", device->name->str);
627 goto error; 641 goto error;
628 } 642 }
629 filemap_write_and_wait(bdev->bd_inode->i_mapping); 643 filemap_write_and_wait(bdev->bd_inode->i_mapping);
@@ -1632,6 +1646,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1632 struct block_device *bdev; 1646 struct block_device *bdev;
1633 struct list_head *devices; 1647 struct list_head *devices;
1634 struct super_block *sb = root->fs_info->sb; 1648 struct super_block *sb = root->fs_info->sb;
1649 struct rcu_string *name;
1635 u64 total_bytes; 1650 u64 total_bytes;
1636 int seeding_dev = 0; 1651 int seeding_dev = 0;
1637 int ret = 0; 1652 int ret = 0;
@@ -1671,23 +1686,24 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1671 goto error; 1686 goto error;
1672 } 1687 }
1673 1688
1674 device->name = kstrdup(device_path, GFP_NOFS); 1689 name = rcu_string_strdup(device_path, GFP_NOFS);
1675 if (!device->name) { 1690 if (!name) {
1676 kfree(device); 1691 kfree(device);
1677 ret = -ENOMEM; 1692 ret = -ENOMEM;
1678 goto error; 1693 goto error;
1679 } 1694 }
1695 rcu_assign_pointer(device->name, name);
1680 1696
1681 ret = find_next_devid(root, &device->devid); 1697 ret = find_next_devid(root, &device->devid);
1682 if (ret) { 1698 if (ret) {
1683 kfree(device->name); 1699 rcu_string_free(device->name);
1684 kfree(device); 1700 kfree(device);
1685 goto error; 1701 goto error;
1686 } 1702 }
1687 1703
1688 trans = btrfs_start_transaction(root, 0); 1704 trans = btrfs_start_transaction(root, 0);
1689 if (IS_ERR(trans)) { 1705 if (IS_ERR(trans)) {
1690 kfree(device->name); 1706 rcu_string_free(device->name);
1691 kfree(device); 1707 kfree(device);
1692 ret = PTR_ERR(trans); 1708 ret = PTR_ERR(trans);
1693 goto error; 1709 goto error;
@@ -1796,7 +1812,7 @@ error_trans:
1796 unlock_chunks(root); 1812 unlock_chunks(root);
1797 btrfs_abort_transaction(trans, root, ret); 1813 btrfs_abort_transaction(trans, root, ret);
1798 btrfs_end_transaction(trans, root); 1814 btrfs_end_transaction(trans, root);
1799 kfree(device->name); 1815 rcu_string_free(device->name);
1800 kfree(device); 1816 kfree(device);
1801error: 1817error:
1802 blkdev_put(bdev, FMODE_EXCL); 1818 blkdev_put(bdev, FMODE_EXCL);
@@ -4204,10 +4220,17 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
4204 bio->bi_sector = bbio->stripes[dev_nr].physical >> 9; 4220 bio->bi_sector = bbio->stripes[dev_nr].physical >> 9;
4205 dev = bbio->stripes[dev_nr].dev; 4221 dev = bbio->stripes[dev_nr].dev;
4206 if (dev && dev->bdev && (rw != WRITE || dev->writeable)) { 4222 if (dev && dev->bdev && (rw != WRITE || dev->writeable)) {
4223#ifdef DEBUG
4224 struct rcu_string *name;
4225
4226 rcu_read_lock();
4227 name = rcu_dereference(dev->name);
4207 pr_debug("btrfs_map_bio: rw %d, secor=%llu, dev=%lu " 4228 pr_debug("btrfs_map_bio: rw %d, secor=%llu, dev=%lu "
4208 "(%s id %llu), size=%u\n", rw, 4229 "(%s id %llu), size=%u\n", rw,
4209 (u64)bio->bi_sector, (u_long)dev->bdev->bd_dev, 4230 (u64)bio->bi_sector, (u_long)dev->bdev->bd_dev,
4210 dev->name, dev->devid, bio->bi_size); 4231 name->str, dev->devid, bio->bi_size);
4232 rcu_read_unlock();
4233#endif
4211 bio->bi_bdev = dev->bdev; 4234 bio->bi_bdev = dev->bdev;
4212 if (async_submit) 4235 if (async_submit)
4213 schedule_bio(root, dev, rw, bio); 4236 schedule_bio(root, dev, rw, bio);
@@ -4694,8 +4717,9 @@ int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info)
4694 key.offset = device->devid; 4717 key.offset = device->devid;
4695 ret = btrfs_search_slot(NULL, dev_root, &key, path, 0, 0); 4718 ret = btrfs_search_slot(NULL, dev_root, &key, path, 0, 0);
4696 if (ret) { 4719 if (ret) {
4697 printk(KERN_WARNING "btrfs: no dev_stats entry found for device %s (devid %llu) (OK on first mount after mkfs)\n", 4720 printk_in_rcu(KERN_WARNING "btrfs: no dev_stats entry found for device %s (devid %llu) (OK on first mount after mkfs)\n",
4698 device->name, (unsigned long long)device->devid); 4721 rcu_str_deref(device->name),
4722 (unsigned long long)device->devid);
4699 __btrfs_reset_dev_stats(device); 4723 __btrfs_reset_dev_stats(device);
4700 device->dev_stats_valid = 1; 4724 device->dev_stats_valid = 1;
4701 btrfs_release_path(path); 4725 btrfs_release_path(path);
@@ -4747,8 +4771,8 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans,
4747 BUG_ON(!path); 4771 BUG_ON(!path);
4748 ret = btrfs_search_slot(trans, dev_root, &key, path, -1, 1); 4772 ret = btrfs_search_slot(trans, dev_root, &key, path, -1, 1);
4749 if (ret < 0) { 4773 if (ret < 0) {
4750 printk(KERN_WARNING "btrfs: error %d while searching for dev_stats item for device %s!\n", 4774 printk_in_rcu(KERN_WARNING "btrfs: error %d while searching for dev_stats item for device %s!\n",
4751 ret, device->name); 4775 ret, rcu_str_deref(device->name));
4752 goto out; 4776 goto out;
4753 } 4777 }
4754 4778
@@ -4757,8 +4781,8 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans,
4757 /* need to delete old one and insert a new one */ 4781 /* need to delete old one and insert a new one */
4758 ret = btrfs_del_item(trans, dev_root, path); 4782 ret = btrfs_del_item(trans, dev_root, path);
4759 if (ret != 0) { 4783 if (ret != 0) {
4760 printk(KERN_WARNING "btrfs: delete too small dev_stats item for device %s failed %d!\n", 4784 printk_in_rcu(KERN_WARNING "btrfs: delete too small dev_stats item for device %s failed %d!\n",
4761 device->name, ret); 4785 rcu_str_deref(device->name), ret);
4762 goto out; 4786 goto out;
4763 } 4787 }
4764 ret = 1; 4788 ret = 1;
@@ -4770,8 +4794,8 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans,
4770 ret = btrfs_insert_empty_item(trans, dev_root, path, 4794 ret = btrfs_insert_empty_item(trans, dev_root, path,
4771 &key, sizeof(*ptr)); 4795 &key, sizeof(*ptr));
4772 if (ret < 0) { 4796 if (ret < 0) {
4773 printk(KERN_WARNING "btrfs: insert dev_stats item for device %s failed %d!\n", 4797 printk_in_rcu(KERN_WARNING "btrfs: insert dev_stats item for device %s failed %d!\n",
4774 device->name, ret); 4798 rcu_str_deref(device->name), ret);
4775 goto out; 4799 goto out;
4776 } 4800 }
4777 } 4801 }
@@ -4823,9 +4847,9 @@ void btrfs_dev_stat_print_on_error(struct btrfs_device *dev)
4823{ 4847{
4824 if (!dev->dev_stats_valid) 4848 if (!dev->dev_stats_valid)
4825 return; 4849 return;
4826 printk_ratelimited(KERN_ERR 4850 printk_ratelimited_in_rcu(KERN_ERR
4827 "btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n", 4851 "btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n",
4828 dev->name, 4852 rcu_str_deref(dev->name),
4829 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS), 4853 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS),
4830 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS), 4854 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS),
4831 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS), 4855 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS),
@@ -4837,8 +4861,8 @@ void btrfs_dev_stat_print_on_error(struct btrfs_device *dev)
4837 4861
4838static void btrfs_dev_stat_print_on_load(struct btrfs_device *dev) 4862static void btrfs_dev_stat_print_on_load(struct btrfs_device *dev)
4839{ 4863{
4840 printk(KERN_INFO "btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n", 4864 printk_in_rcu(KERN_INFO "btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n",
4841 dev->name, 4865 rcu_str_deref(dev->name),
4842 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS), 4866 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS),
4843 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS), 4867 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS),
4844 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS), 4868 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS),
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 3406a88ca83e..74366f27a76b 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -58,7 +58,7 @@ struct btrfs_device {
58 /* the mode sent to blkdev_get */ 58 /* the mode sent to blkdev_get */
59 fmode_t mode; 59 fmode_t mode;
60 60
61 char *name; 61 struct rcu_string *name;
62 62
63 /* the internal btrfs device id */ 63 /* the internal btrfs device id */
64 u64 devid; 64 u64 devid;