aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/disk-io.c
diff options
context:
space:
mode:
authorChris Mason <clm@fb.com>2015-03-25 13:52:48 -0400
committerChris Mason <clm@fb.com>2015-03-25 13:52:48 -0400
commitfc4c3c872f44bf425963feba57eb9c3f8ac2d7eb (patch)
tree0d49c3e4d8f64a4cc0d3f42f37430fc60007e28b /fs/btrfs/disk-io.c
parent9deed229fa8a83bb5cd713b2d2a8e5c022a4b45b (diff)
parenta4f3d2c4efe2628329249b64fd5799468e025b9d (diff)
Merge branch 'cleanups-post-3.19' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux into for-linus-4.1
Signed-off-by: Chris Mason <clm@fb.com> Conflicts: fs/btrfs/disk-io.c
Diffstat (limited to 'fs/btrfs/disk-io.c')
-rw-r--r--fs/btrfs/disk-io.c554
1 files changed, 301 insertions, 253 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 6aaaf987fd31..23c49ab2de4c 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -54,7 +54,7 @@
54#include <asm/cpufeature.h> 54#include <asm/cpufeature.h>
55#endif 55#endif
56 56
57static struct extent_io_ops btree_extent_io_ops; 57static const struct extent_io_ops btree_extent_io_ops;
58static void end_workqueue_fn(struct btrfs_work *work); 58static void end_workqueue_fn(struct btrfs_work *work);
59static void free_fs_root(struct btrfs_root *root); 59static void free_fs_root(struct btrfs_root *root);
60static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, 60static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
@@ -274,10 +274,11 @@ void btrfs_csum_final(u32 crc, char *result)
274 * compute the csum for a btree block, and either verify it or write it 274 * compute the csum for a btree block, and either verify it or write it
275 * into the csum field of the block. 275 * into the csum field of the block.
276 */ 276 */
277static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, 277static int csum_tree_block(struct btrfs_fs_info *fs_info,
278 struct extent_buffer *buf,
278 int verify) 279 int verify)
279{ 280{
280 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); 281 u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
281 char *result = NULL; 282 char *result = NULL;
282 unsigned long len; 283 unsigned long len;
283 unsigned long cur_len; 284 unsigned long cur_len;
@@ -321,7 +322,7 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
321 printk_ratelimited(KERN_WARNING 322 printk_ratelimited(KERN_WARNING
322 "BTRFS: %s checksum verify failed on %llu wanted %X found %X " 323 "BTRFS: %s checksum verify failed on %llu wanted %X found %X "
323 "level %d\n", 324 "level %d\n",
324 root->fs_info->sb->s_id, buf->start, 325 fs_info->sb->s_id, buf->start,
325 val, found, btrfs_header_level(buf)); 326 val, found, btrfs_header_level(buf));
326 if (result != (char *)&inline_result) 327 if (result != (char *)&inline_result)
327 kfree(result); 328 kfree(result);
@@ -501,7 +502,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,
501 * we only fill in the checksum field in the first page of a multi-page block 502 * we only fill in the checksum field in the first page of a multi-page block
502 */ 503 */
503 504
504static int csum_dirty_buffer(struct btrfs_root *root, struct page *page) 505static int csum_dirty_buffer(struct btrfs_fs_info *fs_info, struct page *page)
505{ 506{
506 u64 start = page_offset(page); 507 u64 start = page_offset(page);
507 u64 found_start; 508 u64 found_start;
@@ -513,14 +514,14 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
513 found_start = btrfs_header_bytenr(eb); 514 found_start = btrfs_header_bytenr(eb);
514 if (WARN_ON(found_start != start || !PageUptodate(page))) 515 if (WARN_ON(found_start != start || !PageUptodate(page)))
515 return 0; 516 return 0;
516 csum_tree_block(root, eb, 0); 517 csum_tree_block(fs_info, eb, 0);
517 return 0; 518 return 0;
518} 519}
519 520
520static int check_tree_block_fsid(struct btrfs_root *root, 521static int check_tree_block_fsid(struct btrfs_fs_info *fs_info,
521 struct extent_buffer *eb) 522 struct extent_buffer *eb)
522{ 523{
523 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; 524 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
524 u8 fsid[BTRFS_UUID_SIZE]; 525 u8 fsid[BTRFS_UUID_SIZE];
525 int ret = 1; 526 int ret = 1;
526 527
@@ -640,7 +641,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
640 ret = -EIO; 641 ret = -EIO;
641 goto err; 642 goto err;
642 } 643 }
643 if (check_tree_block_fsid(root, eb)) { 644 if (check_tree_block_fsid(root->fs_info, eb)) {
644 printk_ratelimited(KERN_ERR "BTRFS (device %s): bad fsid on block %llu\n", 645 printk_ratelimited(KERN_ERR "BTRFS (device %s): bad fsid on block %llu\n",
645 eb->fs_info->sb->s_id, eb->start); 646 eb->fs_info->sb->s_id, eb->start);
646 ret = -EIO; 647 ret = -EIO;
@@ -657,7 +658,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
657 btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb), 658 btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb),
658 eb, found_level); 659 eb, found_level);
659 660
660 ret = csum_tree_block(root, eb, 1); 661 ret = csum_tree_block(root->fs_info, eb, 1);
661 if (ret) { 662 if (ret) {
662 ret = -EIO; 663 ret = -EIO;
663 goto err; 664 goto err;
@@ -882,7 +883,7 @@ static int btree_csum_one_bio(struct bio *bio)
882 883
883 bio_for_each_segment_all(bvec, bio, i) { 884 bio_for_each_segment_all(bvec, bio, i) {
884 root = BTRFS_I(bvec->bv_page->mapping->host)->root; 885 root = BTRFS_I(bvec->bv_page->mapping->host)->root;
885 ret = csum_dirty_buffer(root, bvec->bv_page); 886 ret = csum_dirty_buffer(root->fs_info, bvec->bv_page);
886 if (ret) 887 if (ret)
887 break; 888 break;
888 } 889 }
@@ -1119,10 +1120,10 @@ int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr,
1119 return 0; 1120 return 0;
1120} 1121}
1121 1122
1122struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, 1123struct extent_buffer *btrfs_find_tree_block(struct btrfs_fs_info *fs_info,
1123 u64 bytenr) 1124 u64 bytenr)
1124{ 1125{
1125 return find_extent_buffer(root->fs_info, bytenr); 1126 return find_extent_buffer(fs_info, bytenr);
1126} 1127}
1127 1128
1128struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, 1129struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
@@ -1165,11 +1166,10 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
1165 1166
1166} 1167}
1167 1168
1168void clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, 1169void clean_tree_block(struct btrfs_trans_handle *trans,
1170 struct btrfs_fs_info *fs_info,
1169 struct extent_buffer *buf) 1171 struct extent_buffer *buf)
1170{ 1172{
1171 struct btrfs_fs_info *fs_info = root->fs_info;
1172
1173 if (btrfs_header_generation(buf) == 1173 if (btrfs_header_generation(buf) ==
1174 fs_info->running_transaction->transid) { 1174 fs_info->running_transaction->transid) {
1175 btrfs_assert_tree_locked(buf); 1175 btrfs_assert_tree_locked(buf);
@@ -2146,6 +2146,267 @@ void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info)
2146 } 2146 }
2147} 2147}
2148 2148
2149static void btrfs_init_scrub(struct btrfs_fs_info *fs_info)
2150{
2151 mutex_init(&fs_info->scrub_lock);
2152 atomic_set(&fs_info->scrubs_running, 0);
2153 atomic_set(&fs_info->scrub_pause_req, 0);
2154 atomic_set(&fs_info->scrubs_paused, 0);
2155 atomic_set(&fs_info->scrub_cancel_req, 0);
2156 init_waitqueue_head(&fs_info->scrub_pause_wait);
2157 fs_info->scrub_workers_refcnt = 0;
2158}
2159
2160static void btrfs_init_balance(struct btrfs_fs_info *fs_info)
2161{
2162 spin_lock_init(&fs_info->balance_lock);
2163 mutex_init(&fs_info->balance_mutex);
2164 atomic_set(&fs_info->balance_running, 0);
2165 atomic_set(&fs_info->balance_pause_req, 0);
2166 atomic_set(&fs_info->balance_cancel_req, 0);
2167 fs_info->balance_ctl = NULL;
2168 init_waitqueue_head(&fs_info->balance_wait_q);
2169}
2170
2171static void btrfs_init_btree_inode(struct btrfs_fs_info *fs_info,
2172 struct btrfs_root *tree_root)
2173{
2174 fs_info->btree_inode->i_ino = BTRFS_BTREE_INODE_OBJECTID;
2175 set_nlink(fs_info->btree_inode, 1);
2176 /*
2177 * we set the i_size on the btree inode to the max possible int.
2178 * the real end of the address space is determined by all of
2179 * the devices in the system
2180 */
2181 fs_info->btree_inode->i_size = OFFSET_MAX;
2182 fs_info->btree_inode->i_mapping->a_ops = &btree_aops;
2183
2184 RB_CLEAR_NODE(&BTRFS_I(fs_info->btree_inode)->rb_node);
2185 extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree,
2186 fs_info->btree_inode->i_mapping);
2187 BTRFS_I(fs_info->btree_inode)->io_tree.track_uptodate = 0;
2188 extent_map_tree_init(&BTRFS_I(fs_info->btree_inode)->extent_tree);
2189
2190 BTRFS_I(fs_info->btree_inode)->io_tree.ops = &btree_extent_io_ops;
2191
2192 BTRFS_I(fs_info->btree_inode)->root = tree_root;
2193 memset(&BTRFS_I(fs_info->btree_inode)->location, 0,
2194 sizeof(struct btrfs_key));
2195 set_bit(BTRFS_INODE_DUMMY,
2196 &BTRFS_I(fs_info->btree_inode)->runtime_flags);
2197 btrfs_insert_inode_hash(fs_info->btree_inode);
2198}
2199
2200static void btrfs_init_dev_replace_locks(struct btrfs_fs_info *fs_info)
2201{
2202 fs_info->dev_replace.lock_owner = 0;
2203 atomic_set(&fs_info->dev_replace.nesting_level, 0);
2204 mutex_init(&fs_info->dev_replace.lock_finishing_cancel_unmount);
2205 mutex_init(&fs_info->dev_replace.lock_management_lock);
2206 mutex_init(&fs_info->dev_replace.lock);
2207 init_waitqueue_head(&fs_info->replace_wait);
2208}
2209
2210static void btrfs_init_qgroup(struct btrfs_fs_info *fs_info)
2211{
2212 spin_lock_init(&fs_info->qgroup_lock);
2213 mutex_init(&fs_info->qgroup_ioctl_lock);
2214 fs_info->qgroup_tree = RB_ROOT;
2215 fs_info->qgroup_op_tree = RB_ROOT;
2216 INIT_LIST_HEAD(&fs_info->dirty_qgroups);
2217 fs_info->qgroup_seq = 1;
2218 fs_info->quota_enabled = 0;
2219 fs_info->pending_quota_state = 0;
2220 fs_info->qgroup_ulist = NULL;
2221 mutex_init(&fs_info->qgroup_rescan_lock);
2222}
2223
2224static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info,
2225 struct btrfs_fs_devices *fs_devices)
2226{
2227 int max_active = fs_info->thread_pool_size;
2228 unsigned int flags = WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_UNBOUND;
2229
2230 fs_info->workers =
2231 btrfs_alloc_workqueue("worker", flags | WQ_HIGHPRI,
2232 max_active, 16);
2233
2234 fs_info->delalloc_workers =
2235 btrfs_alloc_workqueue("delalloc", flags, max_active, 2);
2236
2237 fs_info->flush_workers =
2238 btrfs_alloc_workqueue("flush_delalloc", flags, max_active, 0);
2239
2240 fs_info->caching_workers =
2241 btrfs_alloc_workqueue("cache", flags, max_active, 0);
2242
2243 /*
2244 * a higher idle thresh on the submit workers makes it much more
2245 * likely that bios will be send down in a sane order to the
2246 * devices
2247 */
2248 fs_info->submit_workers =
2249 btrfs_alloc_workqueue("submit", flags,
2250 min_t(u64, fs_devices->num_devices,
2251 max_active), 64);
2252
2253 fs_info->fixup_workers =
2254 btrfs_alloc_workqueue("fixup", flags, 1, 0);
2255
2256 /*
2257 * endios are largely parallel and should have a very
2258 * low idle thresh
2259 */
2260 fs_info->endio_workers =
2261 btrfs_alloc_workqueue("endio", flags, max_active, 4);
2262 fs_info->endio_meta_workers =
2263 btrfs_alloc_workqueue("endio-meta", flags, max_active, 4);
2264 fs_info->endio_meta_write_workers =
2265 btrfs_alloc_workqueue("endio-meta-write", flags, max_active, 2);
2266 fs_info->endio_raid56_workers =
2267 btrfs_alloc_workqueue("endio-raid56", flags, max_active, 4);
2268 fs_info->endio_repair_workers =
2269 btrfs_alloc_workqueue("endio-repair", flags, 1, 0);
2270 fs_info->rmw_workers =
2271 btrfs_alloc_workqueue("rmw", flags, max_active, 2);
2272 fs_info->endio_write_workers =
2273 btrfs_alloc_workqueue("endio-write", flags, max_active, 2);
2274 fs_info->endio_freespace_worker =
2275 btrfs_alloc_workqueue("freespace-write", flags, max_active, 0);
2276 fs_info->delayed_workers =
2277 btrfs_alloc_workqueue("delayed-meta", flags, max_active, 0);
2278 fs_info->readahead_workers =
2279 btrfs_alloc_workqueue("readahead", flags, max_active, 2);
2280 fs_info->qgroup_rescan_workers =
2281 btrfs_alloc_workqueue("qgroup-rescan", flags, 1, 0);
2282 fs_info->extent_workers =
2283 btrfs_alloc_workqueue("extent-refs", flags,
2284 min_t(u64, fs_devices->num_devices,
2285 max_active), 8);
2286
2287 if (!(fs_info->workers && fs_info->delalloc_workers &&
2288 fs_info->submit_workers && fs_info->flush_workers &&
2289 fs_info->endio_workers && fs_info->endio_meta_workers &&
2290 fs_info->endio_meta_write_workers &&
2291 fs_info->endio_repair_workers &&
2292 fs_info->endio_write_workers && fs_info->endio_raid56_workers &&
2293 fs_info->endio_freespace_worker && fs_info->rmw_workers &&
2294 fs_info->caching_workers && fs_info->readahead_workers &&
2295 fs_info->fixup_workers && fs_info->delayed_workers &&
2296 fs_info->extent_workers &&
2297 fs_info->qgroup_rescan_workers)) {
2298 return -ENOMEM;
2299 }
2300
2301 return 0;
2302}
2303
2304static int btrfs_replay_log(struct btrfs_fs_info *fs_info,
2305 struct btrfs_fs_devices *fs_devices)
2306{
2307 int ret;
2308 struct btrfs_root *tree_root = fs_info->tree_root;
2309 struct btrfs_root *log_tree_root;
2310 struct btrfs_super_block *disk_super = fs_info->super_copy;
2311 u64 bytenr = btrfs_super_log_root(disk_super);
2312
2313 if (fs_devices->rw_devices == 0) {
2314 printk(KERN_WARNING "BTRFS: log replay required "
2315 "on RO media\n");
2316 return -EIO;
2317 }
2318
2319 log_tree_root = btrfs_alloc_root(fs_info);
2320 if (!log_tree_root)
2321 return -ENOMEM;
2322
2323 __setup_root(tree_root->nodesize, tree_root->sectorsize,
2324 tree_root->stripesize, log_tree_root, fs_info,
2325 BTRFS_TREE_LOG_OBJECTID);
2326
2327 log_tree_root->node = read_tree_block(tree_root, bytenr,
2328 fs_info->generation + 1);
2329 if (!log_tree_root->node ||
2330 !extent_buffer_uptodate(log_tree_root->node)) {
2331 printk(KERN_ERR "BTRFS: failed to read log tree\n");
2332 free_extent_buffer(log_tree_root->node);
2333 kfree(log_tree_root);
2334 return -EIO;
2335 }
2336 /* returns with log_tree_root freed on success */
2337 ret = btrfs_recover_log_trees(log_tree_root);
2338 if (ret) {
2339 btrfs_error(tree_root->fs_info, ret,
2340 "Failed to recover log tree");
2341 free_extent_buffer(log_tree_root->node);
2342 kfree(log_tree_root);
2343 return ret;
2344 }
2345
2346 if (fs_info->sb->s_flags & MS_RDONLY) {
2347 ret = btrfs_commit_super(tree_root);
2348 if (ret)
2349 return ret;
2350 }
2351
2352 return 0;
2353}
2354
2355static int btrfs_read_roots(struct btrfs_fs_info *fs_info,
2356 struct btrfs_root *tree_root)
2357{
2358 struct btrfs_root *root;
2359 struct btrfs_key location;
2360 int ret;
2361
2362 location.objectid = BTRFS_EXTENT_TREE_OBJECTID;
2363 location.type = BTRFS_ROOT_ITEM_KEY;
2364 location.offset = 0;
2365
2366 root = btrfs_read_tree_root(tree_root, &location);
2367 if (IS_ERR(root))
2368 return PTR_ERR(root);
2369 set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
2370 fs_info->extent_root = root;
2371
2372 location.objectid = BTRFS_DEV_TREE_OBJECTID;
2373 root = btrfs_read_tree_root(tree_root, &location);
2374 if (IS_ERR(root))
2375 return PTR_ERR(root);
2376 set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
2377 fs_info->dev_root = root;
2378 btrfs_init_devices_late(fs_info);
2379
2380 location.objectid = BTRFS_CSUM_TREE_OBJECTID;
2381 root = btrfs_read_tree_root(tree_root, &location);
2382 if (IS_ERR(root))
2383 return PTR_ERR(root);
2384 set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
2385 fs_info->csum_root = root;
2386
2387 location.objectid = BTRFS_QUOTA_TREE_OBJECTID;
2388 root = btrfs_read_tree_root(tree_root, &location);
2389 if (!IS_ERR(root)) {
2390 set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
2391 fs_info->quota_enabled = 1;
2392 fs_info->pending_quota_state = 1;
2393 fs_info->quota_root = root;
2394 }
2395
2396 location.objectid = BTRFS_UUID_TREE_OBJECTID;
2397 root = btrfs_read_tree_root(tree_root, &location);
2398 if (IS_ERR(root)) {
2399 ret = PTR_ERR(root);
2400 if (ret != -ENOENT)
2401 return ret;
2402 } else {
2403 set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
2404 fs_info->uuid_root = root;
2405 }
2406
2407 return 0;
2408}
2409
2149int open_ctree(struct super_block *sb, 2410int open_ctree(struct super_block *sb,
2150 struct btrfs_fs_devices *fs_devices, 2411 struct btrfs_fs_devices *fs_devices,
2151 char *options) 2412 char *options)
@@ -2160,21 +2421,12 @@ int open_ctree(struct super_block *sb,
2160 struct btrfs_super_block *disk_super; 2421 struct btrfs_super_block *disk_super;
2161 struct btrfs_fs_info *fs_info = btrfs_sb(sb); 2422 struct btrfs_fs_info *fs_info = btrfs_sb(sb);
2162 struct btrfs_root *tree_root; 2423 struct btrfs_root *tree_root;
2163 struct btrfs_root *extent_root;
2164 struct btrfs_root *csum_root;
2165 struct btrfs_root *chunk_root; 2424 struct btrfs_root *chunk_root;
2166 struct btrfs_root *dev_root;
2167 struct btrfs_root *quota_root;
2168 struct btrfs_root *uuid_root;
2169 struct btrfs_root *log_tree_root;
2170 int ret; 2425 int ret;
2171 int err = -EINVAL; 2426 int err = -EINVAL;
2172 int num_backups_tried = 0; 2427 int num_backups_tried = 0;
2173 int backup_index = 0; 2428 int backup_index = 0;
2174 int max_active; 2429 int max_active;
2175 int flags = WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_UNBOUND;
2176 bool create_uuid_tree;
2177 bool check_uuid_tree;
2178 2430
2179 tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info); 2431 tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info);
2180 chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info); 2432 chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info);
@@ -2294,55 +2546,18 @@ int open_ctree(struct super_block *sb,
2294 } 2546 }
2295 btrfs_init_delayed_root(fs_info->delayed_root); 2547 btrfs_init_delayed_root(fs_info->delayed_root);
2296 2548
2297 mutex_init(&fs_info->scrub_lock); 2549 btrfs_init_scrub(fs_info);
2298 atomic_set(&fs_info->scrubs_running, 0);
2299 atomic_set(&fs_info->scrub_pause_req, 0);
2300 atomic_set(&fs_info->scrubs_paused, 0);
2301 atomic_set(&fs_info->scrub_cancel_req, 0);
2302 init_waitqueue_head(&fs_info->replace_wait);
2303 init_waitqueue_head(&fs_info->scrub_pause_wait);
2304 fs_info->scrub_workers_refcnt = 0;
2305#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY 2550#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
2306 fs_info->check_integrity_print_mask = 0; 2551 fs_info->check_integrity_print_mask = 0;
2307#endif 2552#endif
2308 2553 btrfs_init_balance(fs_info);
2309 spin_lock_init(&fs_info->balance_lock);
2310 mutex_init(&fs_info->balance_mutex);
2311 atomic_set(&fs_info->balance_running, 0);
2312 atomic_set(&fs_info->balance_pause_req, 0);
2313 atomic_set(&fs_info->balance_cancel_req, 0);
2314 fs_info->balance_ctl = NULL;
2315 init_waitqueue_head(&fs_info->balance_wait_q);
2316 btrfs_init_async_reclaim_work(&fs_info->async_reclaim_work); 2554 btrfs_init_async_reclaim_work(&fs_info->async_reclaim_work);
2317 2555
2318 sb->s_blocksize = 4096; 2556 sb->s_blocksize = 4096;
2319 sb->s_blocksize_bits = blksize_bits(4096); 2557 sb->s_blocksize_bits = blksize_bits(4096);
2320 sb->s_bdi = &fs_info->bdi; 2558 sb->s_bdi = &fs_info->bdi;
2321 2559
2322 fs_info->btree_inode->i_ino = BTRFS_BTREE_INODE_OBJECTID; 2560 btrfs_init_btree_inode(fs_info, tree_root);
2323 set_nlink(fs_info->btree_inode, 1);
2324 /*
2325 * we set the i_size on the btree inode to the max possible int.
2326 * the real end of the address space is determined by all of
2327 * the devices in the system
2328 */
2329 fs_info->btree_inode->i_size = OFFSET_MAX;
2330 fs_info->btree_inode->i_mapping->a_ops = &btree_aops;
2331
2332 RB_CLEAR_NODE(&BTRFS_I(fs_info->btree_inode)->rb_node);
2333 extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree,
2334 fs_info->btree_inode->i_mapping);
2335 BTRFS_I(fs_info->btree_inode)->io_tree.track_uptodate = 0;
2336 extent_map_tree_init(&BTRFS_I(fs_info->btree_inode)->extent_tree);
2337
2338 BTRFS_I(fs_info->btree_inode)->io_tree.ops = &btree_extent_io_ops;
2339
2340 BTRFS_I(fs_info->btree_inode)->root = tree_root;
2341 memset(&BTRFS_I(fs_info->btree_inode)->location, 0,
2342 sizeof(struct btrfs_key));
2343 set_bit(BTRFS_INODE_DUMMY,
2344 &BTRFS_I(fs_info->btree_inode)->runtime_flags);
2345 btrfs_insert_inode_hash(fs_info->btree_inode);
2346 2561
2347 spin_lock_init(&fs_info->block_group_cache_lock); 2562 spin_lock_init(&fs_info->block_group_cache_lock);
2348 fs_info->block_group_cache_tree = RB_ROOT; 2563 fs_info->block_group_cache_tree = RB_ROOT;
@@ -2367,22 +2582,9 @@ int open_ctree(struct super_block *sb,
2367 init_rwsem(&fs_info->cleanup_work_sem); 2582 init_rwsem(&fs_info->cleanup_work_sem);
2368 init_rwsem(&fs_info->subvol_sem); 2583 init_rwsem(&fs_info->subvol_sem);
2369 sema_init(&fs_info->uuid_tree_rescan_sem, 1); 2584 sema_init(&fs_info->uuid_tree_rescan_sem, 1);
2370 fs_info->dev_replace.lock_owner = 0;
2371 atomic_set(&fs_info->dev_replace.nesting_level, 0);
2372 mutex_init(&fs_info->dev_replace.lock_finishing_cancel_unmount);
2373 mutex_init(&fs_info->dev_replace.lock_management_lock);
2374 mutex_init(&fs_info->dev_replace.lock);
2375 2585
2376 spin_lock_init(&fs_info->qgroup_lock); 2586 btrfs_init_dev_replace_locks(fs_info);
2377 mutex_init(&fs_info->qgroup_ioctl_lock); 2587 btrfs_init_qgroup(fs_info);
2378 fs_info->qgroup_tree = RB_ROOT;
2379 fs_info->qgroup_op_tree = RB_ROOT;
2380 INIT_LIST_HEAD(&fs_info->dirty_qgroups);
2381 fs_info->qgroup_seq = 1;
2382 fs_info->quota_enabled = 0;
2383 fs_info->pending_quota_state = 0;
2384 fs_info->qgroup_ulist = NULL;
2385 mutex_init(&fs_info->qgroup_rescan_lock);
2386 2588
2387 btrfs_init_free_cluster(&fs_info->meta_alloc_cluster); 2589 btrfs_init_free_cluster(&fs_info->meta_alloc_cluster);
2388 btrfs_init_free_cluster(&fs_info->data_alloc_cluster); 2590 btrfs_init_free_cluster(&fs_info->data_alloc_cluster);
@@ -2554,75 +2756,9 @@ int open_ctree(struct super_block *sb,
2554 2756
2555 max_active = fs_info->thread_pool_size; 2757 max_active = fs_info->thread_pool_size;
2556 2758
2557 fs_info->workers = 2759 ret = btrfs_init_workqueues(fs_info, fs_devices);
2558 btrfs_alloc_workqueue("worker", flags | WQ_HIGHPRI, 2760 if (ret) {
2559 max_active, 16); 2761 err = ret;
2560
2561 fs_info->delalloc_workers =
2562 btrfs_alloc_workqueue("delalloc", flags, max_active, 2);
2563
2564 fs_info->flush_workers =
2565 btrfs_alloc_workqueue("flush_delalloc", flags, max_active, 0);
2566
2567 fs_info->caching_workers =
2568 btrfs_alloc_workqueue("cache", flags, max_active, 0);
2569
2570 /*
2571 * a higher idle thresh on the submit workers makes it much more
2572 * likely that bios will be send down in a sane order to the
2573 * devices
2574 */
2575 fs_info->submit_workers =
2576 btrfs_alloc_workqueue("submit", flags,
2577 min_t(u64, fs_devices->num_devices,
2578 max_active), 64);
2579
2580 fs_info->fixup_workers =
2581 btrfs_alloc_workqueue("fixup", flags, 1, 0);
2582
2583 /*
2584 * endios are largely parallel and should have a very
2585 * low idle thresh
2586 */
2587 fs_info->endio_workers =
2588 btrfs_alloc_workqueue("endio", flags, max_active, 4);
2589 fs_info->endio_meta_workers =
2590 btrfs_alloc_workqueue("endio-meta", flags, max_active, 4);
2591 fs_info->endio_meta_write_workers =
2592 btrfs_alloc_workqueue("endio-meta-write", flags, max_active, 2);
2593 fs_info->endio_raid56_workers =
2594 btrfs_alloc_workqueue("endio-raid56", flags, max_active, 4);
2595 fs_info->endio_repair_workers =
2596 btrfs_alloc_workqueue("endio-repair", flags, 1, 0);
2597 fs_info->rmw_workers =
2598 btrfs_alloc_workqueue("rmw", flags, max_active, 2);
2599 fs_info->endio_write_workers =
2600 btrfs_alloc_workqueue("endio-write", flags, max_active, 2);
2601 fs_info->endio_freespace_worker =
2602 btrfs_alloc_workqueue("freespace-write", flags, max_active, 0);
2603 fs_info->delayed_workers =
2604 btrfs_alloc_workqueue("delayed-meta", flags, max_active, 0);
2605 fs_info->readahead_workers =
2606 btrfs_alloc_workqueue("readahead", flags, max_active, 2);
2607 fs_info->qgroup_rescan_workers =
2608 btrfs_alloc_workqueue("qgroup-rescan", flags, 1, 0);
2609 fs_info->extent_workers =
2610 btrfs_alloc_workqueue("extent-refs", flags,
2611 min_t(u64, fs_devices->num_devices,
2612 max_active), 8);
2613
2614 if (!(fs_info->workers && fs_info->delalloc_workers &&
2615 fs_info->submit_workers && fs_info->flush_workers &&
2616 fs_info->endio_workers && fs_info->endio_meta_workers &&
2617 fs_info->endio_meta_write_workers &&
2618 fs_info->endio_repair_workers &&
2619 fs_info->endio_write_workers && fs_info->endio_raid56_workers &&
2620 fs_info->endio_freespace_worker && fs_info->rmw_workers &&
2621 fs_info->caching_workers && fs_info->readahead_workers &&
2622 fs_info->fixup_workers && fs_info->delayed_workers &&
2623 fs_info->extent_workers &&
2624 fs_info->qgroup_rescan_workers)) {
2625 err = -ENOMEM;
2626 goto fail_sb_buffer; 2762 goto fail_sb_buffer;
2627 } 2763 }
2628 2764
@@ -2688,7 +2824,7 @@ int open_ctree(struct super_block *sb,
2688 * keep the device that is marked to be the target device for the 2824 * keep the device that is marked to be the target device for the
2689 * dev_replace procedure 2825 * dev_replace procedure
2690 */ 2826 */
2691 btrfs_close_extra_devices(fs_info, fs_devices, 0); 2827 btrfs_close_extra_devices(fs_devices, 0);
2692 2828
2693 if (!fs_devices->latest_bdev) { 2829 if (!fs_devices->latest_bdev) {
2694 printk(KERN_ERR "BTRFS: failed to read devices on %s\n", 2830 printk(KERN_ERR "BTRFS: failed to read devices on %s\n",
@@ -2714,61 +2850,9 @@ retry_root_backup:
2714 tree_root->commit_root = btrfs_root_node(tree_root); 2850 tree_root->commit_root = btrfs_root_node(tree_root);
2715 btrfs_set_root_refs(&tree_root->root_item, 1); 2851 btrfs_set_root_refs(&tree_root->root_item, 1);
2716 2852
2717 location.objectid = BTRFS_EXTENT_TREE_OBJECTID; 2853 ret = btrfs_read_roots(fs_info, tree_root);
2718 location.type = BTRFS_ROOT_ITEM_KEY; 2854 if (ret)
2719 location.offset = 0;
2720
2721 extent_root = btrfs_read_tree_root(tree_root, &location);
2722 if (IS_ERR(extent_root)) {
2723 ret = PTR_ERR(extent_root);
2724 goto recovery_tree_root;
2725 }
2726 set_bit(BTRFS_ROOT_TRACK_DIRTY, &extent_root->state);
2727 fs_info->extent_root = extent_root;
2728
2729 location.objectid = BTRFS_DEV_TREE_OBJECTID;
2730 dev_root = btrfs_read_tree_root(tree_root, &location);
2731 if (IS_ERR(dev_root)) {
2732 ret = PTR_ERR(dev_root);
2733 goto recovery_tree_root;
2734 }
2735 set_bit(BTRFS_ROOT_TRACK_DIRTY, &dev_root->state);
2736 fs_info->dev_root = dev_root;
2737 btrfs_init_devices_late(fs_info);
2738
2739 location.objectid = BTRFS_CSUM_TREE_OBJECTID;
2740 csum_root = btrfs_read_tree_root(tree_root, &location);
2741 if (IS_ERR(csum_root)) {
2742 ret = PTR_ERR(csum_root);
2743 goto recovery_tree_root; 2855 goto recovery_tree_root;
2744 }
2745 set_bit(BTRFS_ROOT_TRACK_DIRTY, &csum_root->state);
2746 fs_info->csum_root = csum_root;
2747
2748 location.objectid = BTRFS_QUOTA_TREE_OBJECTID;
2749 quota_root = btrfs_read_tree_root(tree_root, &location);
2750 if (!IS_ERR(quota_root)) {
2751 set_bit(BTRFS_ROOT_TRACK_DIRTY, &quota_root->state);
2752 fs_info->quota_enabled = 1;
2753 fs_info->pending_quota_state = 1;
2754 fs_info->quota_root = quota_root;
2755 }
2756
2757 location.objectid = BTRFS_UUID_TREE_OBJECTID;
2758 uuid_root = btrfs_read_tree_root(tree_root, &location);
2759 if (IS_ERR(uuid_root)) {
2760 ret = PTR_ERR(uuid_root);
2761 if (ret != -ENOENT)
2762 goto recovery_tree_root;
2763 create_uuid_tree = true;
2764 check_uuid_tree = false;
2765 } else {
2766 set_bit(BTRFS_ROOT_TRACK_DIRTY, &uuid_root->state);
2767 fs_info->uuid_root = uuid_root;
2768 create_uuid_tree = false;
2769 check_uuid_tree =
2770 generation != btrfs_super_uuid_tree_generation(disk_super);
2771 }
2772 2856
2773 fs_info->generation = generation; 2857 fs_info->generation = generation;
2774 fs_info->last_trans_committed = generation; 2858 fs_info->last_trans_committed = generation;
@@ -2792,7 +2876,7 @@ retry_root_backup:
2792 goto fail_block_groups; 2876 goto fail_block_groups;
2793 } 2877 }
2794 2878
2795 btrfs_close_extra_devices(fs_info, fs_devices, 1); 2879 btrfs_close_extra_devices(fs_devices, 1);
2796 2880
2797 ret = btrfs_sysfs_add_one(fs_info); 2881 ret = btrfs_sysfs_add_one(fs_info);
2798 if (ret) { 2882 if (ret) {
@@ -2806,7 +2890,7 @@ retry_root_backup:
2806 goto fail_sysfs; 2890 goto fail_sysfs;
2807 } 2891 }
2808 2892
2809 ret = btrfs_read_block_groups(extent_root); 2893 ret = btrfs_read_block_groups(fs_info->extent_root);
2810 if (ret) { 2894 if (ret) {
2811 printk(KERN_ERR "BTRFS: Failed to read block groups: %d\n", ret); 2895 printk(KERN_ERR "BTRFS: Failed to read block groups: %d\n", ret);
2812 goto fail_sysfs; 2896 goto fail_sysfs;
@@ -2864,48 +2948,11 @@ retry_root_backup:
2864 2948
2865 /* do not make disk changes in broken FS */ 2949 /* do not make disk changes in broken FS */
2866 if (btrfs_super_log_root(disk_super) != 0) { 2950 if (btrfs_super_log_root(disk_super) != 0) {
2867 u64 bytenr = btrfs_super_log_root(disk_super); 2951 ret = btrfs_replay_log(fs_info, fs_devices);
2868
2869 if (fs_devices->rw_devices == 0) {
2870 printk(KERN_WARNING "BTRFS: log replay required "
2871 "on RO media\n");
2872 err = -EIO;
2873 goto fail_qgroup;
2874 }
2875
2876 log_tree_root = btrfs_alloc_root(fs_info);
2877 if (!log_tree_root) {
2878 err = -ENOMEM;
2879 goto fail_qgroup;
2880 }
2881
2882 __setup_root(nodesize, sectorsize, stripesize,
2883 log_tree_root, fs_info, BTRFS_TREE_LOG_OBJECTID);
2884
2885 log_tree_root->node = read_tree_block(tree_root, bytenr,
2886 generation + 1);
2887 if (!log_tree_root->node ||
2888 !extent_buffer_uptodate(log_tree_root->node)) {
2889 printk(KERN_ERR "BTRFS: failed to read log tree\n");
2890 free_extent_buffer(log_tree_root->node);
2891 kfree(log_tree_root);
2892 goto fail_qgroup;
2893 }
2894 /* returns with log_tree_root freed on success */
2895 ret = btrfs_recover_log_trees(log_tree_root);
2896 if (ret) { 2952 if (ret) {
2897 btrfs_error(tree_root->fs_info, ret, 2953 err = ret;
2898 "Failed to recover log tree");
2899 free_extent_buffer(log_tree_root->node);
2900 kfree(log_tree_root);
2901 goto fail_qgroup; 2954 goto fail_qgroup;
2902 } 2955 }
2903
2904 if (sb->s_flags & MS_RDONLY) {
2905 ret = btrfs_commit_super(tree_root);
2906 if (ret)
2907 goto fail_qgroup;
2908 }
2909 } 2956 }
2910 2957
2911 ret = btrfs_find_orphan_roots(tree_root); 2958 ret = btrfs_find_orphan_roots(tree_root);
@@ -2966,7 +3013,7 @@ retry_root_backup:
2966 3013
2967 btrfs_qgroup_rescan_resume(fs_info); 3014 btrfs_qgroup_rescan_resume(fs_info);
2968 3015
2969 if (create_uuid_tree) { 3016 if (!fs_info->uuid_root) {
2970 pr_info("BTRFS: creating UUID tree\n"); 3017 pr_info("BTRFS: creating UUID tree\n");
2971 ret = btrfs_create_uuid_tree(fs_info); 3018 ret = btrfs_create_uuid_tree(fs_info);
2972 if (ret) { 3019 if (ret) {
@@ -2975,8 +3022,9 @@ retry_root_backup:
2975 close_ctree(tree_root); 3022 close_ctree(tree_root);
2976 return ret; 3023 return ret;
2977 } 3024 }
2978 } else if (check_uuid_tree || 3025 } else if (btrfs_test_opt(tree_root, RESCAN_UUID_TREE) ||
2979 btrfs_test_opt(tree_root, RESCAN_UUID_TREE)) { 3026 fs_info->generation !=
3027 btrfs_super_uuid_tree_generation(disk_super)) {
2980 pr_info("BTRFS: checking UUID tree\n"); 3028 pr_info("BTRFS: checking UUID tree\n");
2981 ret = btrfs_check_uuid_tree(fs_info); 3029 ret = btrfs_check_uuid_tree(fs_info);
2982 if (ret) { 3030 if (ret) {
@@ -3668,7 +3716,7 @@ void close_ctree(struct btrfs_root *root)
3668 if (!(fs_info->sb->s_flags & MS_RDONLY)) { 3716 if (!(fs_info->sb->s_flags & MS_RDONLY)) {
3669 ret = btrfs_commit_super(root); 3717 ret = btrfs_commit_super(root);
3670 if (ret) 3718 if (ret)
3671 btrfs_err(root->fs_info, "commit super ret %d", ret); 3719 btrfs_err(fs_info, "commit super ret %d", ret);
3672 } 3720 }
3673 3721
3674 if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) 3722 if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
@@ -3680,10 +3728,10 @@ void close_ctree(struct btrfs_root *root)
3680 fs_info->closing = 2; 3728 fs_info->closing = 2;
3681 smp_mb(); 3729 smp_mb();
3682 3730
3683 btrfs_free_qgroup_config(root->fs_info); 3731 btrfs_free_qgroup_config(fs_info);
3684 3732
3685 if (percpu_counter_sum(&fs_info->delalloc_bytes)) { 3733 if (percpu_counter_sum(&fs_info->delalloc_bytes)) {
3686 btrfs_info(root->fs_info, "at unmount delalloc count %lld", 3734 btrfs_info(fs_info, "at unmount delalloc count %lld",
3687 percpu_counter_sum(&fs_info->delalloc_bytes)); 3735 percpu_counter_sum(&fs_info->delalloc_bytes));
3688 } 3736 }
3689 3737
@@ -4134,7 +4182,7 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root,
4134 4182
4135 clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS); 4183 clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS);
4136 while (start <= end) { 4184 while (start <= end) {
4137 eb = btrfs_find_tree_block(root, start); 4185 eb = btrfs_find_tree_block(root->fs_info, start);
4138 start += root->nodesize; 4186 start += root->nodesize;
4139 if (!eb) 4187 if (!eb)
4140 continue; 4188 continue;
@@ -4285,7 +4333,7 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root)
4285 return 0; 4333 return 0;
4286} 4334}
4287 4335
4288static struct extent_io_ops btree_extent_io_ops = { 4336static const struct extent_io_ops btree_extent_io_ops = {
4289 .readpage_end_io_hook = btree_readpage_end_io_hook, 4337 .readpage_end_io_hook = btree_readpage_end_io_hook,
4290 .readpage_io_failed_hook = btree_io_failed_hook, 4338 .readpage_io_failed_hook = btree_io_failed_hook,
4291 .submit_bio_hook = btree_submit_bio_hook, 4339 .submit_bio_hook = btree_submit_bio_hook,