diff options
Diffstat (limited to 'fs/btrfs/disk-io.c')
| -rw-r--r-- | fs/btrfs/disk-io.c | 2343 |
1 files changed, 2343 insertions, 0 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c new file mode 100644 index 000000000000..81a313874ae5 --- /dev/null +++ b/fs/btrfs/disk-io.c | |||
| @@ -0,0 +1,2343 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2007 Oracle. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or | ||
| 5 | * modify it under the terms of the GNU General Public | ||
| 6 | * License v2 as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 11 | * General Public License for more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public | ||
| 14 | * License along with this program; if not, write to the | ||
| 15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
| 16 | * Boston, MA 021110-1307, USA. | ||
| 17 | */ | ||
| 18 | |||
| 19 | #include <linux/version.h> | ||
| 20 | #include <linux/fs.h> | ||
| 21 | #include <linux/blkdev.h> | ||
| 22 | #include <linux/scatterlist.h> | ||
| 23 | #include <linux/swap.h> | ||
| 24 | #include <linux/radix-tree.h> | ||
| 25 | #include <linux/writeback.h> | ||
| 26 | #include <linux/buffer_head.h> | ||
| 27 | #include <linux/workqueue.h> | ||
| 28 | #include <linux/kthread.h> | ||
| 29 | #include <linux/freezer.h> | ||
| 30 | #include "compat.h" | ||
| 31 | #include "crc32c.h" | ||
| 32 | #include "ctree.h" | ||
| 33 | #include "disk-io.h" | ||
| 34 | #include "transaction.h" | ||
| 35 | #include "btrfs_inode.h" | ||
| 36 | #include "volumes.h" | ||
| 37 | #include "print-tree.h" | ||
| 38 | #include "async-thread.h" | ||
| 39 | #include "locking.h" | ||
| 40 | #include "ref-cache.h" | ||
| 41 | #include "tree-log.h" | ||
| 42 | |||
| 43 | static struct extent_io_ops btree_extent_io_ops; | ||
| 44 | static void end_workqueue_fn(struct btrfs_work *work); | ||
| 45 | |||
| 46 | /* | ||
| 47 | * end_io_wq structs are used to do processing in task context when an IO is | ||
| 48 | * complete. This is used during reads to verify checksums, and it is used | ||
| 49 | * by writes to insert metadata for new file extents after IO is complete. | ||
| 50 | */ | ||
| 51 | struct end_io_wq { | ||
| 52 | struct bio *bio; | ||
| 53 | bio_end_io_t *end_io; | ||
| 54 | void *private; | ||
| 55 | struct btrfs_fs_info *info; | ||
| 56 | int error; | ||
| 57 | int metadata; | ||
| 58 | struct list_head list; | ||
| 59 | struct btrfs_work work; | ||
| 60 | }; | ||
| 61 | |||
| 62 | /* | ||
| 63 | * async submit bios are used to offload expensive checksumming | ||
| 64 | * onto the worker threads. They checksum file and metadata bios | ||
| 65 | * just before they are sent down the IO stack. | ||
| 66 | */ | ||
| 67 | struct async_submit_bio { | ||
| 68 | struct inode *inode; | ||
| 69 | struct bio *bio; | ||
| 70 | struct list_head list; | ||
| 71 | extent_submit_bio_hook_t *submit_bio_start; | ||
| 72 | extent_submit_bio_hook_t *submit_bio_done; | ||
| 73 | int rw; | ||
| 74 | int mirror_num; | ||
| 75 | unsigned long bio_flags; | ||
| 76 | struct btrfs_work work; | ||
| 77 | }; | ||
| 78 | |||
| 79 | /* | ||
| 80 | * extents on the btree inode are pretty simple, there's one extent | ||
| 81 | * that covers the entire device | ||
| 82 | */ | ||
| 83 | static struct extent_map *btree_get_extent(struct inode *inode, | ||
| 84 | struct page *page, size_t page_offset, u64 start, u64 len, | ||
| 85 | int create) | ||
| 86 | { | ||
| 87 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | ||
| 88 | struct extent_map *em; | ||
| 89 | int ret; | ||
| 90 | |||
| 91 | spin_lock(&em_tree->lock); | ||
| 92 | em = lookup_extent_mapping(em_tree, start, len); | ||
| 93 | if (em) { | ||
| 94 | em->bdev = | ||
| 95 | BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; | ||
| 96 | spin_unlock(&em_tree->lock); | ||
| 97 | goto out; | ||
| 98 | } | ||
| 99 | spin_unlock(&em_tree->lock); | ||
| 100 | |||
| 101 | em = alloc_extent_map(GFP_NOFS); | ||
| 102 | if (!em) { | ||
| 103 | em = ERR_PTR(-ENOMEM); | ||
| 104 | goto out; | ||
| 105 | } | ||
| 106 | em->start = 0; | ||
| 107 | em->len = (u64)-1; | ||
| 108 | em->block_len = (u64)-1; | ||
| 109 | em->block_start = 0; | ||
| 110 | em->bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; | ||
| 111 | |||
| 112 | spin_lock(&em_tree->lock); | ||
| 113 | ret = add_extent_mapping(em_tree, em); | ||
| 114 | if (ret == -EEXIST) { | ||
| 115 | u64 failed_start = em->start; | ||
| 116 | u64 failed_len = em->len; | ||
| 117 | |||
| 118 | free_extent_map(em); | ||
| 119 | em = lookup_extent_mapping(em_tree, start, len); | ||
| 120 | if (em) { | ||
| 121 | ret = 0; | ||
| 122 | } else { | ||
| 123 | em = lookup_extent_mapping(em_tree, failed_start, | ||
| 124 | failed_len); | ||
| 125 | ret = -EIO; | ||
| 126 | } | ||
| 127 | } else if (ret) { | ||
| 128 | free_extent_map(em); | ||
| 129 | em = NULL; | ||
| 130 | } | ||
| 131 | spin_unlock(&em_tree->lock); | ||
| 132 | |||
| 133 | if (ret) | ||
| 134 | em = ERR_PTR(ret); | ||
| 135 | out: | ||
| 136 | return em; | ||
| 137 | } | ||
| 138 | |||
| 139 | u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len) | ||
| 140 | { | ||
| 141 | return btrfs_crc32c(seed, data, len); | ||
| 142 | } | ||
| 143 | |||
| 144 | void btrfs_csum_final(u32 crc, char *result) | ||
| 145 | { | ||
| 146 | *(__le32 *)result = ~cpu_to_le32(crc); | ||
| 147 | } | ||
| 148 | |||
| 149 | /* | ||
| 150 | * compute the csum for a btree block, and either verify it or write it | ||
| 151 | * into the csum field of the block. | ||
| 152 | */ | ||
| 153 | static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, | ||
| 154 | int verify) | ||
| 155 | { | ||
| 156 | u16 csum_size = | ||
| 157 | btrfs_super_csum_size(&root->fs_info->super_copy); | ||
| 158 | char *result = NULL; | ||
| 159 | unsigned long len; | ||
| 160 | unsigned long cur_len; | ||
| 161 | unsigned long offset = BTRFS_CSUM_SIZE; | ||
| 162 | char *map_token = NULL; | ||
| 163 | char *kaddr; | ||
| 164 | unsigned long map_start; | ||
| 165 | unsigned long map_len; | ||
| 166 | int err; | ||
| 167 | u32 crc = ~(u32)0; | ||
| 168 | unsigned long inline_result; | ||
| 169 | |||
| 170 | len = buf->len - offset; | ||
| 171 | while (len > 0) { | ||
| 172 | err = map_private_extent_buffer(buf, offset, 32, | ||
| 173 | &map_token, &kaddr, | ||
| 174 | &map_start, &map_len, KM_USER0); | ||
| 175 | if (err) | ||
| 176 | return 1; | ||
| 177 | cur_len = min(len, map_len - (offset - map_start)); | ||
| 178 | crc = btrfs_csum_data(root, kaddr + offset - map_start, | ||
| 179 | crc, cur_len); | ||
| 180 | len -= cur_len; | ||
| 181 | offset += cur_len; | ||
| 182 | unmap_extent_buffer(buf, map_token, KM_USER0); | ||
| 183 | } | ||
| 184 | if (csum_size > sizeof(inline_result)) { | ||
| 185 | result = kzalloc(csum_size * sizeof(char), GFP_NOFS); | ||
| 186 | if (!result) | ||
| 187 | return 1; | ||
| 188 | } else { | ||
| 189 | result = (char *)&inline_result; | ||
| 190 | } | ||
| 191 | |||
| 192 | btrfs_csum_final(crc, result); | ||
| 193 | |||
| 194 | if (verify) { | ||
| 195 | if (memcmp_extent_buffer(buf, result, 0, csum_size)) { | ||
| 196 | u32 val; | ||
| 197 | u32 found = 0; | ||
| 198 | memcpy(&found, result, csum_size); | ||
| 199 | |||
| 200 | read_extent_buffer(buf, &val, 0, csum_size); | ||
| 201 | printk(KERN_INFO "btrfs: %s checksum verify failed " | ||
| 202 | "on %llu wanted %X found %X level %d\n", | ||
| 203 | root->fs_info->sb->s_id, | ||
| 204 | buf->start, val, found, btrfs_header_level(buf)); | ||
| 205 | if (result != (char *)&inline_result) | ||
| 206 | kfree(result); | ||
| 207 | return 1; | ||
| 208 | } | ||
| 209 | } else { | ||
| 210 | write_extent_buffer(buf, result, 0, csum_size); | ||
| 211 | } | ||
| 212 | if (result != (char *)&inline_result) | ||
| 213 | kfree(result); | ||
| 214 | return 0; | ||
| 215 | } | ||
| 216 | |||
| 217 | /* | ||
| 218 | * we can't consider a given block up to date unless the transid of the | ||
| 219 | * block matches the transid in the parent node's pointer. This is how we | ||
| 220 | * detect blocks that either didn't get written at all or got written | ||
| 221 | * in the wrong place. | ||
| 222 | */ | ||
| 223 | static int verify_parent_transid(struct extent_io_tree *io_tree, | ||
| 224 | struct extent_buffer *eb, u64 parent_transid) | ||
| 225 | { | ||
| 226 | int ret; | ||
| 227 | |||
| 228 | if (!parent_transid || btrfs_header_generation(eb) == parent_transid) | ||
| 229 | return 0; | ||
| 230 | |||
| 231 | lock_extent(io_tree, eb->start, eb->start + eb->len - 1, GFP_NOFS); | ||
| 232 | if (extent_buffer_uptodate(io_tree, eb) && | ||
| 233 | btrfs_header_generation(eb) == parent_transid) { | ||
| 234 | ret = 0; | ||
| 235 | goto out; | ||
| 236 | } | ||
| 237 | printk("parent transid verify failed on %llu wanted %llu found %llu\n", | ||
| 238 | (unsigned long long)eb->start, | ||
| 239 | (unsigned long long)parent_transid, | ||
| 240 | (unsigned long long)btrfs_header_generation(eb)); | ||
| 241 | ret = 1; | ||
| 242 | clear_extent_buffer_uptodate(io_tree, eb); | ||
| 243 | out: | ||
| 244 | unlock_extent(io_tree, eb->start, eb->start + eb->len - 1, | ||
| 245 | GFP_NOFS); | ||
| 246 | return ret; | ||
| 247 | } | ||
| 248 | |||
| 249 | /* | ||
| 250 | * helper to read a given tree block, doing retries as required when | ||
| 251 | * the checksums don't match and we have alternate mirrors to try. | ||
| 252 | */ | ||
| 253 | static int btree_read_extent_buffer_pages(struct btrfs_root *root, | ||
| 254 | struct extent_buffer *eb, | ||
| 255 | u64 start, u64 parent_transid) | ||
| 256 | { | ||
| 257 | struct extent_io_tree *io_tree; | ||
| 258 | int ret; | ||
| 259 | int num_copies = 0; | ||
| 260 | int mirror_num = 0; | ||
| 261 | |||
| 262 | io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; | ||
| 263 | while (1) { | ||
| 264 | ret = read_extent_buffer_pages(io_tree, eb, start, 1, | ||
| 265 | btree_get_extent, mirror_num); | ||
| 266 | if (!ret && | ||
| 267 | !verify_parent_transid(io_tree, eb, parent_transid)) | ||
| 268 | return ret; | ||
| 269 | |||
| 270 | num_copies = btrfs_num_copies(&root->fs_info->mapping_tree, | ||
| 271 | eb->start, eb->len); | ||
| 272 | if (num_copies == 1) | ||
| 273 | return ret; | ||
| 274 | |||
| 275 | mirror_num++; | ||
| 276 | if (mirror_num > num_copies) | ||
| 277 | return ret; | ||
| 278 | } | ||
| 279 | return -EIO; | ||
| 280 | } | ||
| 281 | |||
| 282 | /* | ||
| 283 | * checksum a dirty tree block before IO. This has extra checks to make sure | ||
| 284 | * we only fill in the checksum field in the first page of a multi-page block | ||
| 285 | */ | ||
| 286 | |||
| 287 | static int csum_dirty_buffer(struct btrfs_root *root, struct page *page) | ||
| 288 | { | ||
| 289 | struct extent_io_tree *tree; | ||
| 290 | u64 start = (u64)page->index << PAGE_CACHE_SHIFT; | ||
| 291 | u64 found_start; | ||
| 292 | int found_level; | ||
| 293 | unsigned long len; | ||
| 294 | struct extent_buffer *eb; | ||
| 295 | int ret; | ||
| 296 | |||
| 297 | tree = &BTRFS_I(page->mapping->host)->io_tree; | ||
| 298 | |||
| 299 | if (page->private == EXTENT_PAGE_PRIVATE) | ||
| 300 | goto out; | ||
| 301 | if (!page->private) | ||
| 302 | goto out; | ||
| 303 | len = page->private >> 2; | ||
| 304 | WARN_ON(len == 0); | ||
| 305 | |||
| 306 | eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); | ||
| 307 | ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE, | ||
| 308 | btrfs_header_generation(eb)); | ||
| 309 | BUG_ON(ret); | ||
| 310 | found_start = btrfs_header_bytenr(eb); | ||
| 311 | if (found_start != start) { | ||
| 312 | WARN_ON(1); | ||
| 313 | goto err; | ||
| 314 | } | ||
| 315 | if (eb->first_page != page) { | ||
| 316 | WARN_ON(1); | ||
| 317 | goto err; | ||
| 318 | } | ||
| 319 | if (!PageUptodate(page)) { | ||
| 320 | WARN_ON(1); | ||
| 321 | goto err; | ||
| 322 | } | ||
| 323 | found_level = btrfs_header_level(eb); | ||
| 324 | |||
| 325 | csum_tree_block(root, eb, 0); | ||
| 326 | err: | ||
| 327 | free_extent_buffer(eb); | ||
| 328 | out: | ||
| 329 | return 0; | ||
| 330 | } | ||
| 331 | |||
| 332 | static int check_tree_block_fsid(struct btrfs_root *root, | ||
| 333 | struct extent_buffer *eb) | ||
| 334 | { | ||
| 335 | struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; | ||
| 336 | u8 fsid[BTRFS_UUID_SIZE]; | ||
| 337 | int ret = 1; | ||
| 338 | |||
| 339 | read_extent_buffer(eb, fsid, (unsigned long)btrfs_header_fsid(eb), | ||
| 340 | BTRFS_FSID_SIZE); | ||
| 341 | while (fs_devices) { | ||
| 342 | if (!memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE)) { | ||
| 343 | ret = 0; | ||
| 344 | break; | ||
| 345 | } | ||
| 346 | fs_devices = fs_devices->seed; | ||
| 347 | } | ||
| 348 | return ret; | ||
| 349 | } | ||
| 350 | |||
| 351 | static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, | ||
| 352 | struct extent_state *state) | ||
| 353 | { | ||
| 354 | struct extent_io_tree *tree; | ||
| 355 | u64 found_start; | ||
| 356 | int found_level; | ||
| 357 | unsigned long len; | ||
| 358 | struct extent_buffer *eb; | ||
| 359 | struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; | ||
| 360 | int ret = 0; | ||
| 361 | |||
| 362 | tree = &BTRFS_I(page->mapping->host)->io_tree; | ||
| 363 | if (page->private == EXTENT_PAGE_PRIVATE) | ||
| 364 | goto out; | ||
| 365 | if (!page->private) | ||
| 366 | goto out; | ||
| 367 | |||
| 368 | len = page->private >> 2; | ||
| 369 | WARN_ON(len == 0); | ||
| 370 | |||
| 371 | eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); | ||
| 372 | |||
| 373 | found_start = btrfs_header_bytenr(eb); | ||
| 374 | if (found_start != start) { | ||
| 375 | printk(KERN_INFO "btrfs bad tree block start %llu %llu\n", | ||
| 376 | (unsigned long long)found_start, | ||
| 377 | (unsigned long long)eb->start); | ||
| 378 | ret = -EIO; | ||
| 379 | goto err; | ||
| 380 | } | ||
| 381 | if (eb->first_page != page) { | ||
| 382 | printk(KERN_INFO "btrfs bad first page %lu %lu\n", | ||
| 383 | eb->first_page->index, page->index); | ||
| 384 | WARN_ON(1); | ||
| 385 | ret = -EIO; | ||
| 386 | goto err; | ||
| 387 | } | ||
| 388 | if (check_tree_block_fsid(root, eb)) { | ||
| 389 | printk(KERN_INFO "btrfs bad fsid on block %llu\n", | ||
| 390 | (unsigned long long)eb->start); | ||
| 391 | ret = -EIO; | ||
| 392 | goto err; | ||
| 393 | } | ||
| 394 | found_level = btrfs_header_level(eb); | ||
| 395 | |||
| 396 | ret = csum_tree_block(root, eb, 1); | ||
| 397 | if (ret) | ||
| 398 | ret = -EIO; | ||
| 399 | |||
| 400 | end = min_t(u64, eb->len, PAGE_CACHE_SIZE); | ||
| 401 | end = eb->start + end - 1; | ||
| 402 | err: | ||
| 403 | free_extent_buffer(eb); | ||
| 404 | out: | ||
| 405 | return ret; | ||
| 406 | } | ||
| 407 | |||
| 408 | static void end_workqueue_bio(struct bio *bio, int err) | ||
| 409 | { | ||
| 410 | struct end_io_wq *end_io_wq = bio->bi_private; | ||
| 411 | struct btrfs_fs_info *fs_info; | ||
| 412 | |||
| 413 | fs_info = end_io_wq->info; | ||
| 414 | end_io_wq->error = err; | ||
| 415 | end_io_wq->work.func = end_workqueue_fn; | ||
| 416 | end_io_wq->work.flags = 0; | ||
| 417 | |||
| 418 | if (bio->bi_rw & (1 << BIO_RW)) { | ||
| 419 | if (end_io_wq->metadata) | ||
| 420 | btrfs_queue_worker(&fs_info->endio_meta_write_workers, | ||
| 421 | &end_io_wq->work); | ||
| 422 | else | ||
| 423 | btrfs_queue_worker(&fs_info->endio_write_workers, | ||
| 424 | &end_io_wq->work); | ||
| 425 | } else { | ||
| 426 | if (end_io_wq->metadata) | ||
| 427 | btrfs_queue_worker(&fs_info->endio_meta_workers, | ||
| 428 | &end_io_wq->work); | ||
| 429 | else | ||
| 430 | btrfs_queue_worker(&fs_info->endio_workers, | ||
| 431 | &end_io_wq->work); | ||
| 432 | } | ||
| 433 | } | ||
| 434 | |||
| 435 | int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, | ||
| 436 | int metadata) | ||
| 437 | { | ||
| 438 | struct end_io_wq *end_io_wq; | ||
| 439 | end_io_wq = kmalloc(sizeof(*end_io_wq), GFP_NOFS); | ||
| 440 | if (!end_io_wq) | ||
| 441 | return -ENOMEM; | ||
| 442 | |||
| 443 | end_io_wq->private = bio->bi_private; | ||
| 444 | end_io_wq->end_io = bio->bi_end_io; | ||
| 445 | end_io_wq->info = info; | ||
| 446 | end_io_wq->error = 0; | ||
| 447 | end_io_wq->bio = bio; | ||
| 448 | end_io_wq->metadata = metadata; | ||
| 449 | |||
| 450 | bio->bi_private = end_io_wq; | ||
| 451 | bio->bi_end_io = end_workqueue_bio; | ||
| 452 | return 0; | ||
| 453 | } | ||
| 454 | |||
| 455 | unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info) | ||
| 456 | { | ||
| 457 | unsigned long limit = min_t(unsigned long, | ||
| 458 | info->workers.max_workers, | ||
| 459 | info->fs_devices->open_devices); | ||
| 460 | return 256 * limit; | ||
| 461 | } | ||
| 462 | |||
| 463 | int btrfs_congested_async(struct btrfs_fs_info *info, int iodone) | ||
| 464 | { | ||
| 465 | return atomic_read(&info->nr_async_bios) > | ||
| 466 | btrfs_async_submit_limit(info); | ||
| 467 | } | ||
| 468 | |||
| 469 | static void run_one_async_start(struct btrfs_work *work) | ||
| 470 | { | ||
| 471 | struct btrfs_fs_info *fs_info; | ||
| 472 | struct async_submit_bio *async; | ||
| 473 | |||
| 474 | async = container_of(work, struct async_submit_bio, work); | ||
| 475 | fs_info = BTRFS_I(async->inode)->root->fs_info; | ||
| 476 | async->submit_bio_start(async->inode, async->rw, async->bio, | ||
| 477 | async->mirror_num, async->bio_flags); | ||
| 478 | } | ||
| 479 | |||
| 480 | static void run_one_async_done(struct btrfs_work *work) | ||
| 481 | { | ||
| 482 | struct btrfs_fs_info *fs_info; | ||
| 483 | struct async_submit_bio *async; | ||
| 484 | int limit; | ||
| 485 | |||
| 486 | async = container_of(work, struct async_submit_bio, work); | ||
| 487 | fs_info = BTRFS_I(async->inode)->root->fs_info; | ||
| 488 | |||
| 489 | limit = btrfs_async_submit_limit(fs_info); | ||
| 490 | limit = limit * 2 / 3; | ||
| 491 | |||
| 492 | atomic_dec(&fs_info->nr_async_submits); | ||
| 493 | |||
| 494 | if (atomic_read(&fs_info->nr_async_submits) < limit && | ||
| 495 | waitqueue_active(&fs_info->async_submit_wait)) | ||
| 496 | wake_up(&fs_info->async_submit_wait); | ||
| 497 | |||
| 498 | async->submit_bio_done(async->inode, async->rw, async->bio, | ||
| 499 | async->mirror_num, async->bio_flags); | ||
| 500 | } | ||
| 501 | |||
| 502 | static void run_one_async_free(struct btrfs_work *work) | ||
| 503 | { | ||
| 504 | struct async_submit_bio *async; | ||
| 505 | |||
| 506 | async = container_of(work, struct async_submit_bio, work); | ||
| 507 | kfree(async); | ||
| 508 | } | ||
| 509 | |||
| 510 | int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, | ||
| 511 | int rw, struct bio *bio, int mirror_num, | ||
| 512 | unsigned long bio_flags, | ||
| 513 | extent_submit_bio_hook_t *submit_bio_start, | ||
| 514 | extent_submit_bio_hook_t *submit_bio_done) | ||
| 515 | { | ||
| 516 | struct async_submit_bio *async; | ||
| 517 | |||
| 518 | async = kmalloc(sizeof(*async), GFP_NOFS); | ||
| 519 | if (!async) | ||
| 520 | return -ENOMEM; | ||
| 521 | |||
| 522 | async->inode = inode; | ||
| 523 | async->rw = rw; | ||
| 524 | async->bio = bio; | ||
| 525 | async->mirror_num = mirror_num; | ||
| 526 | async->submit_bio_start = submit_bio_start; | ||
| 527 | async->submit_bio_done = submit_bio_done; | ||
| 528 | |||
| 529 | async->work.func = run_one_async_start; | ||
| 530 | async->work.ordered_func = run_one_async_done; | ||
| 531 | async->work.ordered_free = run_one_async_free; | ||
| 532 | |||
| 533 | async->work.flags = 0; | ||
| 534 | async->bio_flags = bio_flags; | ||
| 535 | |||
| 536 | atomic_inc(&fs_info->nr_async_submits); | ||
| 537 | btrfs_queue_worker(&fs_info->workers, &async->work); | ||
| 538 | #if 0 | ||
| 539 | int limit = btrfs_async_submit_limit(fs_info); | ||
| 540 | if (atomic_read(&fs_info->nr_async_submits) > limit) { | ||
| 541 | wait_event_timeout(fs_info->async_submit_wait, | ||
| 542 | (atomic_read(&fs_info->nr_async_submits) < limit), | ||
| 543 | HZ/10); | ||
| 544 | |||
| 545 | wait_event_timeout(fs_info->async_submit_wait, | ||
| 546 | (atomic_read(&fs_info->nr_async_bios) < limit), | ||
| 547 | HZ/10); | ||
| 548 | } | ||
| 549 | #endif | ||
| 550 | while (atomic_read(&fs_info->async_submit_draining) && | ||
| 551 | atomic_read(&fs_info->nr_async_submits)) { | ||
| 552 | wait_event(fs_info->async_submit_wait, | ||
| 553 | (atomic_read(&fs_info->nr_async_submits) == 0)); | ||
| 554 | } | ||
| 555 | |||
| 556 | return 0; | ||
| 557 | } | ||
| 558 | |||
| 559 | static int btree_csum_one_bio(struct bio *bio) | ||
| 560 | { | ||
| 561 | struct bio_vec *bvec = bio->bi_io_vec; | ||
| 562 | int bio_index = 0; | ||
| 563 | struct btrfs_root *root; | ||
| 564 | |||
| 565 | WARN_ON(bio->bi_vcnt <= 0); | ||
| 566 | while (bio_index < bio->bi_vcnt) { | ||
| 567 | root = BTRFS_I(bvec->bv_page->mapping->host)->root; | ||
| 568 | csum_dirty_buffer(root, bvec->bv_page); | ||
| 569 | bio_index++; | ||
| 570 | bvec++; | ||
| 571 | } | ||
| 572 | return 0; | ||
| 573 | } | ||
| 574 | |||
| 575 | static int __btree_submit_bio_start(struct inode *inode, int rw, | ||
| 576 | struct bio *bio, int mirror_num, | ||
| 577 | unsigned long bio_flags) | ||
| 578 | { | ||
| 579 | /* | ||
| 580 | * when we're called for a write, we're already in the async | ||
| 581 | * submission context. Just jump into btrfs_map_bio | ||
| 582 | */ | ||
| 583 | btree_csum_one_bio(bio); | ||
| 584 | return 0; | ||
| 585 | } | ||
| 586 | |||
| 587 | static int __btree_submit_bio_done(struct inode *inode, int rw, struct bio *bio, | ||
| 588 | int mirror_num, unsigned long bio_flags) | ||
| 589 | { | ||
| 590 | /* | ||
| 591 | * when we're called for a write, we're already in the async | ||
| 592 | * submission context. Just jump into btrfs_map_bio | ||
| 593 | */ | ||
| 594 | return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num, 1); | ||
| 595 | } | ||
| 596 | |||
| 597 | static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | ||
| 598 | int mirror_num, unsigned long bio_flags) | ||
| 599 | { | ||
| 600 | int ret; | ||
| 601 | |||
| 602 | ret = btrfs_bio_wq_end_io(BTRFS_I(inode)->root->fs_info, | ||
| 603 | bio, 1); | ||
| 604 | BUG_ON(ret); | ||
| 605 | |||
| 606 | if (!(rw & (1 << BIO_RW))) { | ||
| 607 | /* | ||
| 608 | * called for a read, do the setup so that checksum validation | ||
| 609 | * can happen in the async kernel threads | ||
| 610 | */ | ||
| 611 | return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, | ||
| 612 | mirror_num, 0); | ||
| 613 | } | ||
| 614 | /* | ||
| 615 | * kthread helpers are used to submit writes so that checksumming | ||
| 616 | * can happen in parallel across all CPUs | ||
| 617 | */ | ||
| 618 | return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, | ||
| 619 | inode, rw, bio, mirror_num, 0, | ||
| 620 | __btree_submit_bio_start, | ||
| 621 | __btree_submit_bio_done); | ||
| 622 | } | ||
| 623 | |||
| 624 | static int btree_writepage(struct page *page, struct writeback_control *wbc) | ||
| 625 | { | ||
| 626 | struct extent_io_tree *tree; | ||
| 627 | tree = &BTRFS_I(page->mapping->host)->io_tree; | ||
| 628 | |||
| 629 | if (current->flags & PF_MEMALLOC) { | ||
| 630 | redirty_page_for_writepage(wbc, page); | ||
| 631 | unlock_page(page); | ||
| 632 | return 0; | ||
| 633 | } | ||
| 634 | return extent_write_full_page(tree, page, btree_get_extent, wbc); | ||
| 635 | } | ||
| 636 | |||
| 637 | static int btree_writepages(struct address_space *mapping, | ||
| 638 | struct writeback_control *wbc) | ||
| 639 | { | ||
| 640 | struct extent_io_tree *tree; | ||
| 641 | tree = &BTRFS_I(mapping->host)->io_tree; | ||
| 642 | if (wbc->sync_mode == WB_SYNC_NONE) { | ||
| 643 | u64 num_dirty; | ||
| 644 | u64 start = 0; | ||
| 645 | unsigned long thresh = 32 * 1024 * 1024; | ||
| 646 | |||
| 647 | if (wbc->for_kupdate) | ||
| 648 | return 0; | ||
| 649 | |||
| 650 | num_dirty = count_range_bits(tree, &start, (u64)-1, | ||
| 651 | thresh, EXTENT_DIRTY); | ||
| 652 | if (num_dirty < thresh) | ||
| 653 | return 0; | ||
| 654 | } | ||
| 655 | return extent_writepages(tree, mapping, btree_get_extent, wbc); | ||
| 656 | } | ||
| 657 | |||
| 658 | static int btree_readpage(struct file *file, struct page *page) | ||
| 659 | { | ||
| 660 | struct extent_io_tree *tree; | ||
| 661 | tree = &BTRFS_I(page->mapping->host)->io_tree; | ||
| 662 | return extent_read_full_page(tree, page, btree_get_extent); | ||
| 663 | } | ||
| 664 | |||
| 665 | static int btree_releasepage(struct page *page, gfp_t gfp_flags) | ||
| 666 | { | ||
| 667 | struct extent_io_tree *tree; | ||
| 668 | struct extent_map_tree *map; | ||
| 669 | int ret; | ||
| 670 | |||
| 671 | if (PageWriteback(page) || PageDirty(page)) | ||
| 672 | return 0; | ||
| 673 | |||
| 674 | tree = &BTRFS_I(page->mapping->host)->io_tree; | ||
| 675 | map = &BTRFS_I(page->mapping->host)->extent_tree; | ||
| 676 | |||
| 677 | ret = try_release_extent_state(map, tree, page, gfp_flags); | ||
| 678 | if (!ret) | ||
| 679 | return 0; | ||
| 680 | |||
| 681 | ret = try_release_extent_buffer(tree, page); | ||
| 682 | if (ret == 1) { | ||
| 683 | ClearPagePrivate(page); | ||
| 684 | set_page_private(page, 0); | ||
| 685 | page_cache_release(page); | ||
| 686 | } | ||
| 687 | |||
| 688 | return ret; | ||
| 689 | } | ||
| 690 | |||
| 691 | static void btree_invalidatepage(struct page *page, unsigned long offset) | ||
| 692 | { | ||
| 693 | struct extent_io_tree *tree; | ||
| 694 | tree = &BTRFS_I(page->mapping->host)->io_tree; | ||
| 695 | extent_invalidatepage(tree, page, offset); | ||
| 696 | btree_releasepage(page, GFP_NOFS); | ||
| 697 | if (PagePrivate(page)) { | ||
| 698 | printk(KERN_WARNING "btrfs warning page private not zero " | ||
| 699 | "on page %llu\n", (unsigned long long)page_offset(page)); | ||
| 700 | ClearPagePrivate(page); | ||
| 701 | set_page_private(page, 0); | ||
| 702 | page_cache_release(page); | ||
| 703 | } | ||
| 704 | } | ||
| 705 | |||
| 706 | #if 0 | ||
| 707 | static int btree_writepage(struct page *page, struct writeback_control *wbc) | ||
| 708 | { | ||
| 709 | struct buffer_head *bh; | ||
| 710 | struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; | ||
| 711 | struct buffer_head *head; | ||
| 712 | if (!page_has_buffers(page)) { | ||
| 713 | create_empty_buffers(page, root->fs_info->sb->s_blocksize, | ||
| 714 | (1 << BH_Dirty)|(1 << BH_Uptodate)); | ||
| 715 | } | ||
| 716 | head = page_buffers(page); | ||
| 717 | bh = head; | ||
| 718 | do { | ||
| 719 | if (buffer_dirty(bh)) | ||
| 720 | csum_tree_block(root, bh, 0); | ||
| 721 | bh = bh->b_this_page; | ||
| 722 | } while (bh != head); | ||
| 723 | return block_write_full_page(page, btree_get_block, wbc); | ||
| 724 | } | ||
| 725 | #endif | ||
| 726 | |||
| 727 | static struct address_space_operations btree_aops = { | ||
| 728 | .readpage = btree_readpage, | ||
| 729 | .writepage = btree_writepage, | ||
| 730 | .writepages = btree_writepages, | ||
| 731 | .releasepage = btree_releasepage, | ||
| 732 | .invalidatepage = btree_invalidatepage, | ||
| 733 | .sync_page = block_sync_page, | ||
| 734 | }; | ||
| 735 | |||
| 736 | int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize, | ||
| 737 | u64 parent_transid) | ||
| 738 | { | ||
| 739 | struct extent_buffer *buf = NULL; | ||
| 740 | struct inode *btree_inode = root->fs_info->btree_inode; | ||
| 741 | int ret = 0; | ||
| 742 | |||
| 743 | buf = btrfs_find_create_tree_block(root, bytenr, blocksize); | ||
| 744 | if (!buf) | ||
| 745 | return 0; | ||
| 746 | read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, | ||
| 747 | buf, 0, 0, btree_get_extent, 0); | ||
| 748 | free_extent_buffer(buf); | ||
| 749 | return ret; | ||
| 750 | } | ||
| 751 | |||
| 752 | struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, | ||
| 753 | u64 bytenr, u32 blocksize) | ||
| 754 | { | ||
| 755 | struct inode *btree_inode = root->fs_info->btree_inode; | ||
| 756 | struct extent_buffer *eb; | ||
| 757 | eb = find_extent_buffer(&BTRFS_I(btree_inode)->io_tree, | ||
| 758 | bytenr, blocksize, GFP_NOFS); | ||
| 759 | return eb; | ||
| 760 | } | ||
| 761 | |||
| 762 | struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, | ||
| 763 | u64 bytenr, u32 blocksize) | ||
| 764 | { | ||
| 765 | struct inode *btree_inode = root->fs_info->btree_inode; | ||
| 766 | struct extent_buffer *eb; | ||
| 767 | |||
| 768 | eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->io_tree, | ||
| 769 | bytenr, blocksize, NULL, GFP_NOFS); | ||
| 770 | return eb; | ||
| 771 | } | ||
| 772 | |||
| 773 | |||
| 774 | int btrfs_write_tree_block(struct extent_buffer *buf) | ||
| 775 | { | ||
| 776 | return btrfs_fdatawrite_range(buf->first_page->mapping, buf->start, | ||
| 777 | buf->start + buf->len - 1, WB_SYNC_ALL); | ||
| 778 | } | ||
| 779 | |||
| 780 | int btrfs_wait_tree_block_writeback(struct extent_buffer *buf) | ||
| 781 | { | ||
| 782 | return btrfs_wait_on_page_writeback_range(buf->first_page->mapping, | ||
| 783 | buf->start, buf->start + buf->len - 1); | ||
| 784 | } | ||
| 785 | |||
| 786 | struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, | ||
| 787 | u32 blocksize, u64 parent_transid) | ||
| 788 | { | ||
| 789 | struct extent_buffer *buf = NULL; | ||
| 790 | struct inode *btree_inode = root->fs_info->btree_inode; | ||
| 791 | struct extent_io_tree *io_tree; | ||
| 792 | int ret; | ||
| 793 | |||
| 794 | io_tree = &BTRFS_I(btree_inode)->io_tree; | ||
| 795 | |||
| 796 | buf = btrfs_find_create_tree_block(root, bytenr, blocksize); | ||
| 797 | if (!buf) | ||
| 798 | return NULL; | ||
| 799 | |||
| 800 | ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid); | ||
| 801 | |||
| 802 | if (ret == 0) | ||
| 803 | buf->flags |= EXTENT_UPTODATE; | ||
| 804 | else | ||
| 805 | WARN_ON(1); | ||
| 806 | return buf; | ||
| 807 | |||
| 808 | } | ||
| 809 | |||
| 810 | int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, | ||
| 811 | struct extent_buffer *buf) | ||
| 812 | { | ||
| 813 | struct inode *btree_inode = root->fs_info->btree_inode; | ||
| 814 | if (btrfs_header_generation(buf) == | ||
| 815 | root->fs_info->running_transaction->transid) { | ||
| 816 | WARN_ON(!btrfs_tree_locked(buf)); | ||
| 817 | clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, | ||
| 818 | buf); | ||
| 819 | } | ||
| 820 | return 0; | ||
| 821 | } | ||
| 822 | |||
| 823 | static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | ||
| 824 | u32 stripesize, struct btrfs_root *root, | ||
| 825 | struct btrfs_fs_info *fs_info, | ||
| 826 | u64 objectid) | ||
| 827 | { | ||
| 828 | root->node = NULL; | ||
| 829 | root->commit_root = NULL; | ||
| 830 | root->ref_tree = NULL; | ||
| 831 | root->sectorsize = sectorsize; | ||
| 832 | root->nodesize = nodesize; | ||
| 833 | root->leafsize = leafsize; | ||
| 834 | root->stripesize = stripesize; | ||
| 835 | root->ref_cows = 0; | ||
| 836 | root->track_dirty = 0; | ||
| 837 | |||
| 838 | root->fs_info = fs_info; | ||
| 839 | root->objectid = objectid; | ||
| 840 | root->last_trans = 0; | ||
| 841 | root->highest_inode = 0; | ||
| 842 | root->last_inode_alloc = 0; | ||
| 843 | root->name = NULL; | ||
| 844 | root->in_sysfs = 0; | ||
| 845 | |||
| 846 | INIT_LIST_HEAD(&root->dirty_list); | ||
| 847 | INIT_LIST_HEAD(&root->orphan_list); | ||
| 848 | INIT_LIST_HEAD(&root->dead_list); | ||
| 849 | spin_lock_init(&root->node_lock); | ||
| 850 | spin_lock_init(&root->list_lock); | ||
| 851 | mutex_init(&root->objectid_mutex); | ||
| 852 | mutex_init(&root->log_mutex); | ||
| 853 | extent_io_tree_init(&root->dirty_log_pages, | ||
| 854 | fs_info->btree_inode->i_mapping, GFP_NOFS); | ||
| 855 | |||
| 856 | btrfs_leaf_ref_tree_init(&root->ref_tree_struct); | ||
| 857 | root->ref_tree = &root->ref_tree_struct; | ||
| 858 | |||
| 859 | memset(&root->root_key, 0, sizeof(root->root_key)); | ||
| 860 | memset(&root->root_item, 0, sizeof(root->root_item)); | ||
| 861 | memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); | ||
| 862 | memset(&root->root_kobj, 0, sizeof(root->root_kobj)); | ||
| 863 | root->defrag_trans_start = fs_info->generation; | ||
| 864 | init_completion(&root->kobj_unregister); | ||
| 865 | root->defrag_running = 0; | ||
| 866 | root->defrag_level = 0; | ||
| 867 | root->root_key.objectid = objectid; | ||
| 868 | root->anon_super.s_root = NULL; | ||
| 869 | root->anon_super.s_dev = 0; | ||
| 870 | INIT_LIST_HEAD(&root->anon_super.s_list); | ||
| 871 | INIT_LIST_HEAD(&root->anon_super.s_instances); | ||
| 872 | init_rwsem(&root->anon_super.s_umount); | ||
| 873 | |||
| 874 | return 0; | ||
| 875 | } | ||
| 876 | |||
| 877 | static int find_and_setup_root(struct btrfs_root *tree_root, | ||
| 878 | struct btrfs_fs_info *fs_info, | ||
| 879 | u64 objectid, | ||
| 880 | struct btrfs_root *root) | ||
| 881 | { | ||
| 882 | int ret; | ||
| 883 | u32 blocksize; | ||
| 884 | u64 generation; | ||
| 885 | |||
| 886 | __setup_root(tree_root->nodesize, tree_root->leafsize, | ||
| 887 | tree_root->sectorsize, tree_root->stripesize, | ||
| 888 | root, fs_info, objectid); | ||
| 889 | ret = btrfs_find_last_root(tree_root, objectid, | ||
| 890 | &root->root_item, &root->root_key); | ||
| 891 | BUG_ON(ret); | ||
| 892 | |||
| 893 | generation = btrfs_root_generation(&root->root_item); | ||
| 894 | blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); | ||
| 895 | root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), | ||
| 896 | blocksize, generation); | ||
| 897 | BUG_ON(!root->node); | ||
| 898 | return 0; | ||
| 899 | } | ||
| 900 | |||
| 901 | int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, | ||
| 902 | struct btrfs_fs_info *fs_info) | ||
| 903 | { | ||
| 904 | struct extent_buffer *eb; | ||
| 905 | struct btrfs_root *log_root_tree = fs_info->log_root_tree; | ||
| 906 | u64 start = 0; | ||
| 907 | u64 end = 0; | ||
| 908 | int ret; | ||
| 909 | |||
| 910 | if (!log_root_tree) | ||
| 911 | return 0; | ||
| 912 | |||
| 913 | while (1) { | ||
| 914 | ret = find_first_extent_bit(&log_root_tree->dirty_log_pages, | ||
| 915 | 0, &start, &end, EXTENT_DIRTY); | ||
| 916 | if (ret) | ||
| 917 | break; | ||
| 918 | |||
| 919 | clear_extent_dirty(&log_root_tree->dirty_log_pages, | ||
| 920 | start, end, GFP_NOFS); | ||
| 921 | } | ||
| 922 | eb = fs_info->log_root_tree->node; | ||
| 923 | |||
| 924 | WARN_ON(btrfs_header_level(eb) != 0); | ||
| 925 | WARN_ON(btrfs_header_nritems(eb) != 0); | ||
| 926 | |||
| 927 | ret = btrfs_free_reserved_extent(fs_info->tree_root, | ||
| 928 | eb->start, eb->len); | ||
| 929 | BUG_ON(ret); | ||
| 930 | |||
| 931 | free_extent_buffer(eb); | ||
| 932 | kfree(fs_info->log_root_tree); | ||
| 933 | fs_info->log_root_tree = NULL; | ||
| 934 | return 0; | ||
| 935 | } | ||
| 936 | |||
| 937 | int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, | ||
| 938 | struct btrfs_fs_info *fs_info) | ||
| 939 | { | ||
| 940 | struct btrfs_root *root; | ||
| 941 | struct btrfs_root *tree_root = fs_info->tree_root; | ||
| 942 | |||
| 943 | root = kzalloc(sizeof(*root), GFP_NOFS); | ||
| 944 | if (!root) | ||
| 945 | return -ENOMEM; | ||
| 946 | |||
| 947 | __setup_root(tree_root->nodesize, tree_root->leafsize, | ||
| 948 | tree_root->sectorsize, tree_root->stripesize, | ||
| 949 | root, fs_info, BTRFS_TREE_LOG_OBJECTID); | ||
| 950 | |||
| 951 | root->root_key.objectid = BTRFS_TREE_LOG_OBJECTID; | ||
| 952 | root->root_key.type = BTRFS_ROOT_ITEM_KEY; | ||
| 953 | root->root_key.offset = BTRFS_TREE_LOG_OBJECTID; | ||
| 954 | root->ref_cows = 0; | ||
| 955 | |||
| 956 | root->node = btrfs_alloc_free_block(trans, root, root->leafsize, | ||
| 957 | 0, BTRFS_TREE_LOG_OBJECTID, | ||
| 958 | trans->transid, 0, 0, 0); | ||
| 959 | |||
| 960 | btrfs_set_header_nritems(root->node, 0); | ||
| 961 | btrfs_set_header_level(root->node, 0); | ||
| 962 | btrfs_set_header_bytenr(root->node, root->node->start); | ||
| 963 | btrfs_set_header_generation(root->node, trans->transid); | ||
| 964 | btrfs_set_header_owner(root->node, BTRFS_TREE_LOG_OBJECTID); | ||
| 965 | |||
| 966 | write_extent_buffer(root->node, root->fs_info->fsid, | ||
| 967 | (unsigned long)btrfs_header_fsid(root->node), | ||
| 968 | BTRFS_FSID_SIZE); | ||
| 969 | btrfs_mark_buffer_dirty(root->node); | ||
| 970 | btrfs_tree_unlock(root->node); | ||
| 971 | fs_info->log_root_tree = root; | ||
| 972 | return 0; | ||
| 973 | } | ||
| 974 | |||
| 975 | struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, | ||
| 976 | struct btrfs_key *location) | ||
| 977 | { | ||
| 978 | struct btrfs_root *root; | ||
| 979 | struct btrfs_fs_info *fs_info = tree_root->fs_info; | ||
| 980 | struct btrfs_path *path; | ||
| 981 | struct extent_buffer *l; | ||
| 982 | u64 highest_inode; | ||
| 983 | u64 generation; | ||
| 984 | u32 blocksize; | ||
| 985 | int ret = 0; | ||
| 986 | |||
| 987 | root = kzalloc(sizeof(*root), GFP_NOFS); | ||
| 988 | if (!root) | ||
| 989 | return ERR_PTR(-ENOMEM); | ||
| 990 | if (location->offset == (u64)-1) { | ||
| 991 | ret = find_and_setup_root(tree_root, fs_info, | ||
| 992 | location->objectid, root); | ||
| 993 | if (ret) { | ||
| 994 | kfree(root); | ||
| 995 | return ERR_PTR(ret); | ||
| 996 | } | ||
| 997 | goto insert; | ||
| 998 | } | ||
| 999 | |||
| 1000 | __setup_root(tree_root->nodesize, tree_root->leafsize, | ||
| 1001 | tree_root->sectorsize, tree_root->stripesize, | ||
| 1002 | root, fs_info, location->objectid); | ||
| 1003 | |||
| 1004 | path = btrfs_alloc_path(); | ||
| 1005 | BUG_ON(!path); | ||
| 1006 | ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0); | ||
| 1007 | if (ret != 0) { | ||
| 1008 | if (ret > 0) | ||
| 1009 | ret = -ENOENT; | ||
| 1010 | goto out; | ||
| 1011 | } | ||
| 1012 | l = path->nodes[0]; | ||
| 1013 | read_extent_buffer(l, &root->root_item, | ||
| 1014 | btrfs_item_ptr_offset(l, path->slots[0]), | ||
| 1015 | sizeof(root->root_item)); | ||
| 1016 | memcpy(&root->root_key, location, sizeof(*location)); | ||
| 1017 | ret = 0; | ||
| 1018 | out: | ||
| 1019 | btrfs_release_path(root, path); | ||
| 1020 | btrfs_free_path(path); | ||
| 1021 | if (ret) { | ||
| 1022 | kfree(root); | ||
| 1023 | return ERR_PTR(ret); | ||
| 1024 | } | ||
| 1025 | generation = btrfs_root_generation(&root->root_item); | ||
| 1026 | blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); | ||
| 1027 | root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), | ||
| 1028 | blocksize, generation); | ||
| 1029 | BUG_ON(!root->node); | ||
| 1030 | insert: | ||
| 1031 | if (location->objectid != BTRFS_TREE_LOG_OBJECTID) { | ||
| 1032 | root->ref_cows = 1; | ||
| 1033 | ret = btrfs_find_highest_inode(root, &highest_inode); | ||
| 1034 | if (ret == 0) { | ||
| 1035 | root->highest_inode = highest_inode; | ||
| 1036 | root->last_inode_alloc = highest_inode; | ||
| 1037 | } | ||
| 1038 | } | ||
| 1039 | return root; | ||
| 1040 | } | ||
| 1041 | |||
| 1042 | struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, | ||
| 1043 | u64 root_objectid) | ||
| 1044 | { | ||
| 1045 | struct btrfs_root *root; | ||
| 1046 | |||
| 1047 | if (root_objectid == BTRFS_ROOT_TREE_OBJECTID) | ||
| 1048 | return fs_info->tree_root; | ||
| 1049 | if (root_objectid == BTRFS_EXTENT_TREE_OBJECTID) | ||
| 1050 | return fs_info->extent_root; | ||
| 1051 | |||
| 1052 | root = radix_tree_lookup(&fs_info->fs_roots_radix, | ||
| 1053 | (unsigned long)root_objectid); | ||
| 1054 | return root; | ||
| 1055 | } | ||
| 1056 | |||
| 1057 | struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, | ||
| 1058 | struct btrfs_key *location) | ||
| 1059 | { | ||
| 1060 | struct btrfs_root *root; | ||
| 1061 | int ret; | ||
| 1062 | |||
| 1063 | if (location->objectid == BTRFS_ROOT_TREE_OBJECTID) | ||
| 1064 | return fs_info->tree_root; | ||
| 1065 | if (location->objectid == BTRFS_EXTENT_TREE_OBJECTID) | ||
| 1066 | return fs_info->extent_root; | ||
| 1067 | if (location->objectid == BTRFS_CHUNK_TREE_OBJECTID) | ||
| 1068 | return fs_info->chunk_root; | ||
| 1069 | if (location->objectid == BTRFS_DEV_TREE_OBJECTID) | ||
| 1070 | return fs_info->dev_root; | ||
| 1071 | if (location->objectid == BTRFS_CSUM_TREE_OBJECTID) | ||
| 1072 | return fs_info->csum_root; | ||
| 1073 | |||
| 1074 | root = radix_tree_lookup(&fs_info->fs_roots_radix, | ||
| 1075 | (unsigned long)location->objectid); | ||
| 1076 | if (root) | ||
| 1077 | return root; | ||
| 1078 | |||
| 1079 | root = btrfs_read_fs_root_no_radix(fs_info->tree_root, location); | ||
| 1080 | if (IS_ERR(root)) | ||
| 1081 | return root; | ||
| 1082 | |||
| 1083 | set_anon_super(&root->anon_super, NULL); | ||
| 1084 | |||
| 1085 | ret = radix_tree_insert(&fs_info->fs_roots_radix, | ||
| 1086 | (unsigned long)root->root_key.objectid, | ||
| 1087 | root); | ||
| 1088 | if (ret) { | ||
| 1089 | free_extent_buffer(root->node); | ||
| 1090 | kfree(root); | ||
| 1091 | return ERR_PTR(ret); | ||
| 1092 | } | ||
| 1093 | if (!(fs_info->sb->s_flags & MS_RDONLY)) { | ||
| 1094 | ret = btrfs_find_dead_roots(fs_info->tree_root, | ||
| 1095 | root->root_key.objectid, root); | ||
| 1096 | BUG_ON(ret); | ||
| 1097 | btrfs_orphan_cleanup(root); | ||
| 1098 | } | ||
| 1099 | return root; | ||
| 1100 | } | ||
| 1101 | |||
| 1102 | struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, | ||
| 1103 | struct btrfs_key *location, | ||
| 1104 | const char *name, int namelen) | ||
| 1105 | { | ||
| 1106 | struct btrfs_root *root; | ||
| 1107 | int ret; | ||
| 1108 | |||
| 1109 | root = btrfs_read_fs_root_no_name(fs_info, location); | ||
| 1110 | if (!root) | ||
| 1111 | return NULL; | ||
| 1112 | |||
| 1113 | if (root->in_sysfs) | ||
| 1114 | return root; | ||
| 1115 | |||
| 1116 | ret = btrfs_set_root_name(root, name, namelen); | ||
| 1117 | if (ret) { | ||
| 1118 | free_extent_buffer(root->node); | ||
| 1119 | kfree(root); | ||
| 1120 | return ERR_PTR(ret); | ||
| 1121 | } | ||
| 1122 | #if 0 | ||
| 1123 | ret = btrfs_sysfs_add_root(root); | ||
| 1124 | if (ret) { | ||
| 1125 | free_extent_buffer(root->node); | ||
| 1126 | kfree(root->name); | ||
| 1127 | kfree(root); | ||
| 1128 | return ERR_PTR(ret); | ||
| 1129 | } | ||
| 1130 | #endif | ||
| 1131 | root->in_sysfs = 1; | ||
| 1132 | return root; | ||
| 1133 | } | ||
| 1134 | |||
| 1135 | static int btrfs_congested_fn(void *congested_data, int bdi_bits) | ||
| 1136 | { | ||
| 1137 | struct btrfs_fs_info *info = (struct btrfs_fs_info *)congested_data; | ||
| 1138 | int ret = 0; | ||
| 1139 | struct list_head *cur; | ||
| 1140 | struct btrfs_device *device; | ||
| 1141 | struct backing_dev_info *bdi; | ||
| 1142 | #if 0 | ||
| 1143 | if ((bdi_bits & (1 << BDI_write_congested)) && | ||
| 1144 | btrfs_congested_async(info, 0)) | ||
| 1145 | return 1; | ||
| 1146 | #endif | ||
| 1147 | list_for_each(cur, &info->fs_devices->devices) { | ||
| 1148 | device = list_entry(cur, struct btrfs_device, dev_list); | ||
| 1149 | if (!device->bdev) | ||
| 1150 | continue; | ||
| 1151 | bdi = blk_get_backing_dev_info(device->bdev); | ||
| 1152 | if (bdi && bdi_congested(bdi, bdi_bits)) { | ||
| 1153 | ret = 1; | ||
| 1154 | break; | ||
| 1155 | } | ||
| 1156 | } | ||
| 1157 | return ret; | ||
| 1158 | } | ||
| 1159 | |||
| 1160 | /* | ||
| 1161 | * this unplugs every device on the box, and it is only used when page | ||
| 1162 | * is null | ||
| 1163 | */ | ||
| 1164 | static void __unplug_io_fn(struct backing_dev_info *bdi, struct page *page) | ||
| 1165 | { | ||
| 1166 | struct list_head *cur; | ||
| 1167 | struct btrfs_device *device; | ||
| 1168 | struct btrfs_fs_info *info; | ||
| 1169 | |||
| 1170 | info = (struct btrfs_fs_info *)bdi->unplug_io_data; | ||
| 1171 | list_for_each(cur, &info->fs_devices->devices) { | ||
| 1172 | device = list_entry(cur, struct btrfs_device, dev_list); | ||
| 1173 | if (!device->bdev) | ||
| 1174 | continue; | ||
| 1175 | |||
| 1176 | bdi = blk_get_backing_dev_info(device->bdev); | ||
| 1177 | if (bdi->unplug_io_fn) | ||
| 1178 | bdi->unplug_io_fn(bdi, page); | ||
| 1179 | } | ||
| 1180 | } | ||
| 1181 | |||
| 1182 | static void btrfs_unplug_io_fn(struct backing_dev_info *bdi, struct page *page) | ||
| 1183 | { | ||
| 1184 | struct inode *inode; | ||
| 1185 | struct extent_map_tree *em_tree; | ||
| 1186 | struct extent_map *em; | ||
| 1187 | struct address_space *mapping; | ||
| 1188 | u64 offset; | ||
| 1189 | |||
| 1190 | /* the generic O_DIRECT read code does this */ | ||
| 1191 | if (1 || !page) { | ||
| 1192 | __unplug_io_fn(bdi, page); | ||
| 1193 | return; | ||
| 1194 | } | ||
| 1195 | |||
| 1196 | /* | ||
| 1197 | * page->mapping may change at any time. Get a consistent copy | ||
| 1198 | * and use that for everything below | ||
| 1199 | */ | ||
| 1200 | smp_mb(); | ||
| 1201 | mapping = page->mapping; | ||
| 1202 | if (!mapping) | ||
| 1203 | return; | ||
| 1204 | |||
| 1205 | inode = mapping->host; | ||
| 1206 | |||
| 1207 | /* | ||
| 1208 | * don't do the expensive searching for a small number of | ||
| 1209 | * devices | ||
| 1210 | */ | ||
| 1211 | if (BTRFS_I(inode)->root->fs_info->fs_devices->open_devices <= 2) { | ||
| 1212 | __unplug_io_fn(bdi, page); | ||
| 1213 | return; | ||
| 1214 | } | ||
| 1215 | |||
| 1216 | offset = page_offset(page); | ||
| 1217 | |||
| 1218 | em_tree = &BTRFS_I(inode)->extent_tree; | ||
| 1219 | spin_lock(&em_tree->lock); | ||
| 1220 | em = lookup_extent_mapping(em_tree, offset, PAGE_CACHE_SIZE); | ||
| 1221 | spin_unlock(&em_tree->lock); | ||
| 1222 | if (!em) { | ||
| 1223 | __unplug_io_fn(bdi, page); | ||
| 1224 | return; | ||
| 1225 | } | ||
| 1226 | |||
| 1227 | if (em->block_start >= EXTENT_MAP_LAST_BYTE) { | ||
| 1228 | free_extent_map(em); | ||
| 1229 | __unplug_io_fn(bdi, page); | ||
| 1230 | return; | ||
| 1231 | } | ||
| 1232 | offset = offset - em->start; | ||
| 1233 | btrfs_unplug_page(&BTRFS_I(inode)->root->fs_info->mapping_tree, | ||
| 1234 | em->block_start + offset, page); | ||
| 1235 | free_extent_map(em); | ||
| 1236 | } | ||
| 1237 | |||
| 1238 | static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi) | ||
| 1239 | { | ||
| 1240 | bdi_init(bdi); | ||
| 1241 | bdi->ra_pages = default_backing_dev_info.ra_pages; | ||
| 1242 | bdi->state = 0; | ||
| 1243 | bdi->capabilities = default_backing_dev_info.capabilities; | ||
| 1244 | bdi->unplug_io_fn = btrfs_unplug_io_fn; | ||
| 1245 | bdi->unplug_io_data = info; | ||
| 1246 | bdi->congested_fn = btrfs_congested_fn; | ||
| 1247 | bdi->congested_data = info; | ||
| 1248 | return 0; | ||
| 1249 | } | ||
| 1250 | |||
| 1251 | static int bio_ready_for_csum(struct bio *bio) | ||
| 1252 | { | ||
| 1253 | u64 length = 0; | ||
| 1254 | u64 buf_len = 0; | ||
| 1255 | u64 start = 0; | ||
| 1256 | struct page *page; | ||
| 1257 | struct extent_io_tree *io_tree = NULL; | ||
| 1258 | struct btrfs_fs_info *info = NULL; | ||
| 1259 | struct bio_vec *bvec; | ||
| 1260 | int i; | ||
| 1261 | int ret; | ||
| 1262 | |||
| 1263 | bio_for_each_segment(bvec, bio, i) { | ||
| 1264 | page = bvec->bv_page; | ||
| 1265 | if (page->private == EXTENT_PAGE_PRIVATE) { | ||
| 1266 | length += bvec->bv_len; | ||
| 1267 | continue; | ||
| 1268 | } | ||
| 1269 | if (!page->private) { | ||
| 1270 | length += bvec->bv_len; | ||
| 1271 | continue; | ||
| 1272 | } | ||
| 1273 | length = bvec->bv_len; | ||
| 1274 | buf_len = page->private >> 2; | ||
| 1275 | start = page_offset(page) + bvec->bv_offset; | ||
| 1276 | io_tree = &BTRFS_I(page->mapping->host)->io_tree; | ||
| 1277 | info = BTRFS_I(page->mapping->host)->root->fs_info; | ||
| 1278 | } | ||
| 1279 | /* are we fully contained in this bio? */ | ||
| 1280 | if (buf_len <= length) | ||
| 1281 | return 1; | ||
| 1282 | |||
| 1283 | ret = extent_range_uptodate(io_tree, start + length, | ||
| 1284 | start + buf_len - 1); | ||
| 1285 | if (ret == 1) | ||
| 1286 | return ret; | ||
| 1287 | return ret; | ||
| 1288 | } | ||
| 1289 | |||
| 1290 | /* | ||
| 1291 | * called by the kthread helper functions to finally call the bio end_io | ||
| 1292 | * functions. This is where read checksum verification actually happens | ||
| 1293 | */ | ||
| 1294 | static void end_workqueue_fn(struct btrfs_work *work) | ||
| 1295 | { | ||
| 1296 | struct bio *bio; | ||
| 1297 | struct end_io_wq *end_io_wq; | ||
| 1298 | struct btrfs_fs_info *fs_info; | ||
| 1299 | int error; | ||
| 1300 | |||
| 1301 | end_io_wq = container_of(work, struct end_io_wq, work); | ||
| 1302 | bio = end_io_wq->bio; | ||
| 1303 | fs_info = end_io_wq->info; | ||
| 1304 | |||
| 1305 | /* metadata bio reads are special because the whole tree block must | ||
| 1306 | * be checksummed at once. This makes sure the entire block is in | ||
| 1307 | * ram and up to date before trying to verify things. For | ||
| 1308 | * blocksize <= pagesize, it is basically a noop | ||
| 1309 | */ | ||
| 1310 | if (!(bio->bi_rw & (1 << BIO_RW)) && end_io_wq->metadata && | ||
| 1311 | !bio_ready_for_csum(bio)) { | ||
| 1312 | btrfs_queue_worker(&fs_info->endio_meta_workers, | ||
| 1313 | &end_io_wq->work); | ||
| 1314 | return; | ||
| 1315 | } | ||
| 1316 | error = end_io_wq->error; | ||
| 1317 | bio->bi_private = end_io_wq->private; | ||
| 1318 | bio->bi_end_io = end_io_wq->end_io; | ||
| 1319 | kfree(end_io_wq); | ||
| 1320 | bio_endio(bio, error); | ||
| 1321 | } | ||
| 1322 | |||
| 1323 | static int cleaner_kthread(void *arg) | ||
| 1324 | { | ||
| 1325 | struct btrfs_root *root = arg; | ||
| 1326 | |||
| 1327 | do { | ||
| 1328 | smp_mb(); | ||
| 1329 | if (root->fs_info->closing) | ||
| 1330 | break; | ||
| 1331 | |||
| 1332 | vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); | ||
| 1333 | mutex_lock(&root->fs_info->cleaner_mutex); | ||
| 1334 | btrfs_clean_old_snapshots(root); | ||
| 1335 | mutex_unlock(&root->fs_info->cleaner_mutex); | ||
| 1336 | |||
| 1337 | if (freezing(current)) { | ||
| 1338 | refrigerator(); | ||
| 1339 | } else { | ||
| 1340 | smp_mb(); | ||
| 1341 | if (root->fs_info->closing) | ||
| 1342 | break; | ||
| 1343 | set_current_state(TASK_INTERRUPTIBLE); | ||
| 1344 | schedule(); | ||
| 1345 | __set_current_state(TASK_RUNNING); | ||
| 1346 | } | ||
| 1347 | } while (!kthread_should_stop()); | ||
| 1348 | return 0; | ||
| 1349 | } | ||
| 1350 | |||
| 1351 | static int transaction_kthread(void *arg) | ||
| 1352 | { | ||
| 1353 | struct btrfs_root *root = arg; | ||
| 1354 | struct btrfs_trans_handle *trans; | ||
| 1355 | struct btrfs_transaction *cur; | ||
| 1356 | unsigned long now; | ||
| 1357 | unsigned long delay; | ||
| 1358 | int ret; | ||
| 1359 | |||
| 1360 | do { | ||
| 1361 | smp_mb(); | ||
| 1362 | if (root->fs_info->closing) | ||
| 1363 | break; | ||
| 1364 | |||
| 1365 | delay = HZ * 30; | ||
| 1366 | vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); | ||
| 1367 | mutex_lock(&root->fs_info->transaction_kthread_mutex); | ||
| 1368 | |||
| 1369 | if (root->fs_info->total_ref_cache_size > 20 * 1024 * 1024) { | ||
| 1370 | printk(KERN_INFO "btrfs: total reference cache " | ||
| 1371 | "size %llu\n", | ||
| 1372 | root->fs_info->total_ref_cache_size); | ||
| 1373 | } | ||
| 1374 | |||
| 1375 | mutex_lock(&root->fs_info->trans_mutex); | ||
| 1376 | cur = root->fs_info->running_transaction; | ||
| 1377 | if (!cur) { | ||
| 1378 | mutex_unlock(&root->fs_info->trans_mutex); | ||
| 1379 | goto sleep; | ||
| 1380 | } | ||
| 1381 | |||
| 1382 | now = get_seconds(); | ||
| 1383 | if (now < cur->start_time || now - cur->start_time < 30) { | ||
| 1384 | mutex_unlock(&root->fs_info->trans_mutex); | ||
| 1385 | delay = HZ * 5; | ||
| 1386 | goto sleep; | ||
| 1387 | } | ||
| 1388 | mutex_unlock(&root->fs_info->trans_mutex); | ||
| 1389 | trans = btrfs_start_transaction(root, 1); | ||
| 1390 | ret = btrfs_commit_transaction(trans, root); | ||
| 1391 | sleep: | ||
| 1392 | wake_up_process(root->fs_info->cleaner_kthread); | ||
| 1393 | mutex_unlock(&root->fs_info->transaction_kthread_mutex); | ||
| 1394 | |||
| 1395 | if (freezing(current)) { | ||
| 1396 | refrigerator(); | ||
| 1397 | } else { | ||
| 1398 | if (root->fs_info->closing) | ||
| 1399 | break; | ||
| 1400 | set_current_state(TASK_INTERRUPTIBLE); | ||
| 1401 | schedule_timeout(delay); | ||
| 1402 | __set_current_state(TASK_RUNNING); | ||
| 1403 | } | ||
| 1404 | } while (!kthread_should_stop()); | ||
| 1405 | return 0; | ||
| 1406 | } | ||
| 1407 | |||
| 1408 | struct btrfs_root *open_ctree(struct super_block *sb, | ||
| 1409 | struct btrfs_fs_devices *fs_devices, | ||
| 1410 | char *options) | ||
| 1411 | { | ||
| 1412 | u32 sectorsize; | ||
| 1413 | u32 nodesize; | ||
| 1414 | u32 leafsize; | ||
| 1415 | u32 blocksize; | ||
| 1416 | u32 stripesize; | ||
| 1417 | u64 generation; | ||
| 1418 | u64 features; | ||
| 1419 | struct btrfs_key location; | ||
| 1420 | struct buffer_head *bh; | ||
| 1421 | struct btrfs_root *extent_root = kzalloc(sizeof(struct btrfs_root), | ||
| 1422 | GFP_NOFS); | ||
| 1423 | struct btrfs_root *csum_root = kzalloc(sizeof(struct btrfs_root), | ||
| 1424 | GFP_NOFS); | ||
| 1425 | struct btrfs_root *tree_root = kzalloc(sizeof(struct btrfs_root), | ||
| 1426 | GFP_NOFS); | ||
| 1427 | struct btrfs_fs_info *fs_info = kzalloc(sizeof(*fs_info), | ||
| 1428 | GFP_NOFS); | ||
| 1429 | struct btrfs_root *chunk_root = kzalloc(sizeof(struct btrfs_root), | ||
| 1430 | GFP_NOFS); | ||
| 1431 | struct btrfs_root *dev_root = kzalloc(sizeof(struct btrfs_root), | ||
| 1432 | GFP_NOFS); | ||
| 1433 | struct btrfs_root *log_tree_root; | ||
| 1434 | |||
| 1435 | int ret; | ||
| 1436 | int err = -EINVAL; | ||
| 1437 | |||
| 1438 | struct btrfs_super_block *disk_super; | ||
| 1439 | |||
| 1440 | if (!extent_root || !tree_root || !fs_info || | ||
| 1441 | !chunk_root || !dev_root || !csum_root) { | ||
| 1442 | err = -ENOMEM; | ||
| 1443 | goto fail; | ||
| 1444 | } | ||
| 1445 | INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS); | ||
| 1446 | INIT_LIST_HEAD(&fs_info->trans_list); | ||
| 1447 | INIT_LIST_HEAD(&fs_info->dead_roots); | ||
| 1448 | INIT_LIST_HEAD(&fs_info->hashers); | ||
| 1449 | INIT_LIST_HEAD(&fs_info->delalloc_inodes); | ||
| 1450 | spin_lock_init(&fs_info->hash_lock); | ||
| 1451 | spin_lock_init(&fs_info->delalloc_lock); | ||
| 1452 | spin_lock_init(&fs_info->new_trans_lock); | ||
| 1453 | spin_lock_init(&fs_info->ref_cache_lock); | ||
| 1454 | |||
| 1455 | init_completion(&fs_info->kobj_unregister); | ||
| 1456 | fs_info->tree_root = tree_root; | ||
| 1457 | fs_info->extent_root = extent_root; | ||
| 1458 | fs_info->csum_root = csum_root; | ||
| 1459 | fs_info->chunk_root = chunk_root; | ||
| 1460 | fs_info->dev_root = dev_root; | ||
| 1461 | fs_info->fs_devices = fs_devices; | ||
| 1462 | INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); | ||
| 1463 | INIT_LIST_HEAD(&fs_info->space_info); | ||
| 1464 | btrfs_mapping_init(&fs_info->mapping_tree); | ||
| 1465 | atomic_set(&fs_info->nr_async_submits, 0); | ||
| 1466 | atomic_set(&fs_info->async_delalloc_pages, 0); | ||
| 1467 | atomic_set(&fs_info->async_submit_draining, 0); | ||
| 1468 | atomic_set(&fs_info->nr_async_bios, 0); | ||
| 1469 | atomic_set(&fs_info->throttles, 0); | ||
| 1470 | atomic_set(&fs_info->throttle_gen, 0); | ||
| 1471 | fs_info->sb = sb; | ||
| 1472 | fs_info->max_extent = (u64)-1; | ||
| 1473 | fs_info->max_inline = 8192 * 1024; | ||
| 1474 | setup_bdi(fs_info, &fs_info->bdi); | ||
| 1475 | fs_info->btree_inode = new_inode(sb); | ||
| 1476 | fs_info->btree_inode->i_ino = 1; | ||
| 1477 | fs_info->btree_inode->i_nlink = 1; | ||
| 1478 | |||
| 1479 | fs_info->thread_pool_size = min_t(unsigned long, | ||
| 1480 | num_online_cpus() + 2, 8); | ||
| 1481 | |||
| 1482 | INIT_LIST_HEAD(&fs_info->ordered_extents); | ||
| 1483 | spin_lock_init(&fs_info->ordered_extent_lock); | ||
| 1484 | |||
| 1485 | sb->s_blocksize = 4096; | ||
| 1486 | sb->s_blocksize_bits = blksize_bits(4096); | ||
| 1487 | |||
| 1488 | /* | ||
| 1489 | * we set the i_size on the btree inode to the max possible int. | ||
| 1490 | * the real end of the address space is determined by all of | ||
| 1491 | * the devices in the system | ||
| 1492 | */ | ||
| 1493 | fs_info->btree_inode->i_size = OFFSET_MAX; | ||
| 1494 | fs_info->btree_inode->i_mapping->a_ops = &btree_aops; | ||
| 1495 | fs_info->btree_inode->i_mapping->backing_dev_info = &fs_info->bdi; | ||
| 1496 | |||
| 1497 | extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree, | ||
| 1498 | fs_info->btree_inode->i_mapping, | ||
| 1499 | GFP_NOFS); | ||
| 1500 | extent_map_tree_init(&BTRFS_I(fs_info->btree_inode)->extent_tree, | ||
| 1501 | GFP_NOFS); | ||
| 1502 | |||
| 1503 | BTRFS_I(fs_info->btree_inode)->io_tree.ops = &btree_extent_io_ops; | ||
| 1504 | |||
| 1505 | spin_lock_init(&fs_info->block_group_cache_lock); | ||
| 1506 | fs_info->block_group_cache_tree.rb_node = NULL; | ||
| 1507 | |||
| 1508 | extent_io_tree_init(&fs_info->pinned_extents, | ||
| 1509 | fs_info->btree_inode->i_mapping, GFP_NOFS); | ||
| 1510 | extent_io_tree_init(&fs_info->pending_del, | ||
| 1511 | fs_info->btree_inode->i_mapping, GFP_NOFS); | ||
| 1512 | extent_io_tree_init(&fs_info->extent_ins, | ||
| 1513 | fs_info->btree_inode->i_mapping, GFP_NOFS); | ||
| 1514 | fs_info->do_barriers = 1; | ||
| 1515 | |||
| 1516 | INIT_LIST_HEAD(&fs_info->dead_reloc_roots); | ||
| 1517 | btrfs_leaf_ref_tree_init(&fs_info->reloc_ref_tree); | ||
| 1518 | btrfs_leaf_ref_tree_init(&fs_info->shared_ref_tree); | ||
| 1519 | |||
| 1520 | BTRFS_I(fs_info->btree_inode)->root = tree_root; | ||
| 1521 | memset(&BTRFS_I(fs_info->btree_inode)->location, 0, | ||
| 1522 | sizeof(struct btrfs_key)); | ||
| 1523 | insert_inode_hash(fs_info->btree_inode); | ||
| 1524 | |||
| 1525 | mutex_init(&fs_info->trans_mutex); | ||
| 1526 | mutex_init(&fs_info->tree_log_mutex); | ||
| 1527 | mutex_init(&fs_info->drop_mutex); | ||
| 1528 | mutex_init(&fs_info->extent_ins_mutex); | ||
| 1529 | mutex_init(&fs_info->pinned_mutex); | ||
| 1530 | mutex_init(&fs_info->chunk_mutex); | ||
| 1531 | mutex_init(&fs_info->transaction_kthread_mutex); | ||
| 1532 | mutex_init(&fs_info->cleaner_mutex); | ||
| 1533 | mutex_init(&fs_info->volume_mutex); | ||
| 1534 | mutex_init(&fs_info->tree_reloc_mutex); | ||
| 1535 | init_waitqueue_head(&fs_info->transaction_throttle); | ||
| 1536 | init_waitqueue_head(&fs_info->transaction_wait); | ||
| 1537 | init_waitqueue_head(&fs_info->async_submit_wait); | ||
| 1538 | init_waitqueue_head(&fs_info->tree_log_wait); | ||
| 1539 | atomic_set(&fs_info->tree_log_commit, 0); | ||
| 1540 | atomic_set(&fs_info->tree_log_writers, 0); | ||
| 1541 | fs_info->tree_log_transid = 0; | ||
| 1542 | |||
| 1543 | __setup_root(4096, 4096, 4096, 4096, tree_root, | ||
| 1544 | fs_info, BTRFS_ROOT_TREE_OBJECTID); | ||
| 1545 | |||
| 1546 | |||
| 1547 | bh = btrfs_read_dev_super(fs_devices->latest_bdev); | ||
| 1548 | if (!bh) | ||
| 1549 | goto fail_iput; | ||
| 1550 | |||
| 1551 | memcpy(&fs_info->super_copy, bh->b_data, sizeof(fs_info->super_copy)); | ||
| 1552 | memcpy(&fs_info->super_for_commit, &fs_info->super_copy, | ||
| 1553 | sizeof(fs_info->super_for_commit)); | ||
| 1554 | brelse(bh); | ||
| 1555 | |||
| 1556 | memcpy(fs_info->fsid, fs_info->super_copy.fsid, BTRFS_FSID_SIZE); | ||
| 1557 | |||
| 1558 | disk_super = &fs_info->super_copy; | ||
| 1559 | if (!btrfs_super_root(disk_super)) | ||
| 1560 | goto fail_iput; | ||
| 1561 | |||
| 1562 | ret = btrfs_parse_options(tree_root, options); | ||
| 1563 | if (ret) { | ||
| 1564 | err = ret; | ||
| 1565 | goto fail_iput; | ||
| 1566 | } | ||
| 1567 | |||
| 1568 | features = btrfs_super_incompat_flags(disk_super) & | ||
| 1569 | ~BTRFS_FEATURE_INCOMPAT_SUPP; | ||
| 1570 | if (features) { | ||
| 1571 | printk(KERN_ERR "BTRFS: couldn't mount because of " | ||
| 1572 | "unsupported optional features (%Lx).\n", | ||
| 1573 | features); | ||
| 1574 | err = -EINVAL; | ||
| 1575 | goto fail_iput; | ||
| 1576 | } | ||
| 1577 | |||
| 1578 | features = btrfs_super_compat_ro_flags(disk_super) & | ||
| 1579 | ~BTRFS_FEATURE_COMPAT_RO_SUPP; | ||
| 1580 | if (!(sb->s_flags & MS_RDONLY) && features) { | ||
| 1581 | printk(KERN_ERR "BTRFS: couldn't mount RDWR because of " | ||
| 1582 | "unsupported option features (%Lx).\n", | ||
| 1583 | features); | ||
| 1584 | err = -EINVAL; | ||
| 1585 | goto fail_iput; | ||
| 1586 | } | ||
| 1587 | |||
| 1588 | /* | ||
| 1589 | * we need to start all the end_io workers up front because the | ||
| 1590 | * queue work function gets called at interrupt time, and so it | ||
| 1591 | * cannot dynamically grow. | ||
| 1592 | */ | ||
| 1593 | btrfs_init_workers(&fs_info->workers, "worker", | ||
| 1594 | fs_info->thread_pool_size); | ||
| 1595 | |||
| 1596 | btrfs_init_workers(&fs_info->delalloc_workers, "delalloc", | ||
| 1597 | fs_info->thread_pool_size); | ||
| 1598 | |||
| 1599 | btrfs_init_workers(&fs_info->submit_workers, "submit", | ||
| 1600 | min_t(u64, fs_devices->num_devices, | ||
| 1601 | fs_info->thread_pool_size)); | ||
| 1602 | |||
| 1603 | /* a higher idle thresh on the submit workers makes it much more | ||
| 1604 | * likely that bios will be send down in a sane order to the | ||
| 1605 | * devices | ||
| 1606 | */ | ||
| 1607 | fs_info->submit_workers.idle_thresh = 64; | ||
| 1608 | |||
| 1609 | fs_info->workers.idle_thresh = 16; | ||
| 1610 | fs_info->workers.ordered = 1; | ||
| 1611 | |||
| 1612 | fs_info->delalloc_workers.idle_thresh = 2; | ||
| 1613 | fs_info->delalloc_workers.ordered = 1; | ||
| 1614 | |||
| 1615 | btrfs_init_workers(&fs_info->fixup_workers, "fixup", 1); | ||
| 1616 | btrfs_init_workers(&fs_info->endio_workers, "endio", | ||
| 1617 | fs_info->thread_pool_size); | ||
| 1618 | btrfs_init_workers(&fs_info->endio_meta_workers, "endio-meta", | ||
| 1619 | fs_info->thread_pool_size); | ||
| 1620 | btrfs_init_workers(&fs_info->endio_meta_write_workers, | ||
| 1621 | "endio-meta-write", fs_info->thread_pool_size); | ||
| 1622 | btrfs_init_workers(&fs_info->endio_write_workers, "endio-write", | ||
| 1623 | fs_info->thread_pool_size); | ||
| 1624 | |||
| 1625 | /* | ||
| 1626 | * endios are largely parallel and should have a very | ||
| 1627 | * low idle thresh | ||
| 1628 | */ | ||
| 1629 | fs_info->endio_workers.idle_thresh = 4; | ||
| 1630 | fs_info->endio_write_workers.idle_thresh = 64; | ||
| 1631 | fs_info->endio_meta_write_workers.idle_thresh = 64; | ||
| 1632 | |||
| 1633 | btrfs_start_workers(&fs_info->workers, 1); | ||
| 1634 | btrfs_start_workers(&fs_info->submit_workers, 1); | ||
| 1635 | btrfs_start_workers(&fs_info->delalloc_workers, 1); | ||
| 1636 | btrfs_start_workers(&fs_info->fixup_workers, 1); | ||
| 1637 | btrfs_start_workers(&fs_info->endio_workers, fs_info->thread_pool_size); | ||
| 1638 | btrfs_start_workers(&fs_info->endio_meta_workers, | ||
| 1639 | fs_info->thread_pool_size); | ||
| 1640 | btrfs_start_workers(&fs_info->endio_meta_write_workers, | ||
| 1641 | fs_info->thread_pool_size); | ||
| 1642 | btrfs_start_workers(&fs_info->endio_write_workers, | ||
| 1643 | fs_info->thread_pool_size); | ||
| 1644 | |||
| 1645 | fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); | ||
| 1646 | fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, | ||
| 1647 | 4 * 1024 * 1024 / PAGE_CACHE_SIZE); | ||
| 1648 | |||
| 1649 | nodesize = btrfs_super_nodesize(disk_super); | ||
| 1650 | leafsize = btrfs_super_leafsize(disk_super); | ||
| 1651 | sectorsize = btrfs_super_sectorsize(disk_super); | ||
| 1652 | stripesize = btrfs_super_stripesize(disk_super); | ||
| 1653 | tree_root->nodesize = nodesize; | ||
| 1654 | tree_root->leafsize = leafsize; | ||
| 1655 | tree_root->sectorsize = sectorsize; | ||
| 1656 | tree_root->stripesize = stripesize; | ||
| 1657 | |||
| 1658 | sb->s_blocksize = sectorsize; | ||
| 1659 | sb->s_blocksize_bits = blksize_bits(sectorsize); | ||
| 1660 | |||
| 1661 | if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC, | ||
| 1662 | sizeof(disk_super->magic))) { | ||
| 1663 | printk(KERN_INFO "btrfs: valid FS not found on %s\n", sb->s_id); | ||
| 1664 | goto fail_sb_buffer; | ||
| 1665 | } | ||
| 1666 | |||
| 1667 | mutex_lock(&fs_info->chunk_mutex); | ||
| 1668 | ret = btrfs_read_sys_array(tree_root); | ||
| 1669 | mutex_unlock(&fs_info->chunk_mutex); | ||
| 1670 | if (ret) { | ||
| 1671 | printk(KERN_WARNING "btrfs: failed to read the system " | ||
| 1672 | "array on %s\n", sb->s_id); | ||
| 1673 | goto fail_sys_array; | ||
| 1674 | } | ||
| 1675 | |||
| 1676 | blocksize = btrfs_level_size(tree_root, | ||
| 1677 | btrfs_super_chunk_root_level(disk_super)); | ||
| 1678 | generation = btrfs_super_chunk_root_generation(disk_super); | ||
| 1679 | |||
| 1680 | __setup_root(nodesize, leafsize, sectorsize, stripesize, | ||
| 1681 | chunk_root, fs_info, BTRFS_CHUNK_TREE_OBJECTID); | ||
| 1682 | |||
| 1683 | chunk_root->node = read_tree_block(chunk_root, | ||
| 1684 | btrfs_super_chunk_root(disk_super), | ||
| 1685 | blocksize, generation); | ||
| 1686 | BUG_ON(!chunk_root->node); | ||
| 1687 | |||
| 1688 | read_extent_buffer(chunk_root->node, fs_info->chunk_tree_uuid, | ||
| 1689 | (unsigned long)btrfs_header_chunk_tree_uuid(chunk_root->node), | ||
| 1690 | BTRFS_UUID_SIZE); | ||
| 1691 | |||
| 1692 | mutex_lock(&fs_info->chunk_mutex); | ||
| 1693 | ret = btrfs_read_chunk_tree(chunk_root); | ||
| 1694 | mutex_unlock(&fs_info->chunk_mutex); | ||
| 1695 | if (ret) { | ||
| 1696 | printk(KERN_WARNING "btrfs: failed to read chunk tree on %s\n", | ||
| 1697 | sb->s_id); | ||
| 1698 | goto fail_chunk_root; | ||
| 1699 | } | ||
| 1700 | |||
| 1701 | btrfs_close_extra_devices(fs_devices); | ||
| 1702 | |||
| 1703 | blocksize = btrfs_level_size(tree_root, | ||
| 1704 | btrfs_super_root_level(disk_super)); | ||
| 1705 | generation = btrfs_super_generation(disk_super); | ||
| 1706 | |||
| 1707 | tree_root->node = read_tree_block(tree_root, | ||
| 1708 | btrfs_super_root(disk_super), | ||
| 1709 | blocksize, generation); | ||
| 1710 | if (!tree_root->node) | ||
| 1711 | goto fail_chunk_root; | ||
| 1712 | |||
| 1713 | |||
| 1714 | ret = find_and_setup_root(tree_root, fs_info, | ||
| 1715 | BTRFS_EXTENT_TREE_OBJECTID, extent_root); | ||
| 1716 | if (ret) | ||
| 1717 | goto fail_tree_root; | ||
| 1718 | extent_root->track_dirty = 1; | ||
| 1719 | |||
| 1720 | ret = find_and_setup_root(tree_root, fs_info, | ||
| 1721 | BTRFS_DEV_TREE_OBJECTID, dev_root); | ||
| 1722 | dev_root->track_dirty = 1; | ||
| 1723 | |||
| 1724 | if (ret) | ||
| 1725 | goto fail_extent_root; | ||
| 1726 | |||
| 1727 | ret = find_and_setup_root(tree_root, fs_info, | ||
| 1728 | BTRFS_CSUM_TREE_OBJECTID, csum_root); | ||
| 1729 | if (ret) | ||
| 1730 | goto fail_extent_root; | ||
| 1731 | |||
| 1732 | csum_root->track_dirty = 1; | ||
| 1733 | |||
| 1734 | btrfs_read_block_groups(extent_root); | ||
| 1735 | |||
| 1736 | fs_info->generation = generation; | ||
| 1737 | fs_info->last_trans_committed = generation; | ||
| 1738 | fs_info->data_alloc_profile = (u64)-1; | ||
| 1739 | fs_info->metadata_alloc_profile = (u64)-1; | ||
| 1740 | fs_info->system_alloc_profile = fs_info->metadata_alloc_profile; | ||
| 1741 | fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root, | ||
| 1742 | "btrfs-cleaner"); | ||
| 1743 | if (!fs_info->cleaner_kthread) | ||
| 1744 | goto fail_csum_root; | ||
| 1745 | |||
| 1746 | fs_info->transaction_kthread = kthread_run(transaction_kthread, | ||
| 1747 | tree_root, | ||
| 1748 | "btrfs-transaction"); | ||
| 1749 | if (!fs_info->transaction_kthread) | ||
| 1750 | goto fail_cleaner; | ||
| 1751 | |||
| 1752 | if (btrfs_super_log_root(disk_super) != 0) { | ||
| 1753 | u64 bytenr = btrfs_super_log_root(disk_super); | ||
| 1754 | |||
| 1755 | if (fs_devices->rw_devices == 0) { | ||
| 1756 | printk(KERN_WARNING "Btrfs log replay required " | ||
| 1757 | "on RO media\n"); | ||
| 1758 | err = -EIO; | ||
| 1759 | goto fail_trans_kthread; | ||
| 1760 | } | ||
| 1761 | blocksize = | ||
| 1762 | btrfs_level_size(tree_root, | ||
| 1763 | btrfs_super_log_root_level(disk_super)); | ||
| 1764 | |||
| 1765 | log_tree_root = kzalloc(sizeof(struct btrfs_root), | ||
| 1766 | GFP_NOFS); | ||
| 1767 | |||
| 1768 | __setup_root(nodesize, leafsize, sectorsize, stripesize, | ||
| 1769 | log_tree_root, fs_info, BTRFS_TREE_LOG_OBJECTID); | ||
| 1770 | |||
| 1771 | log_tree_root->node = read_tree_block(tree_root, bytenr, | ||
| 1772 | blocksize, | ||
| 1773 | generation + 1); | ||
| 1774 | ret = btrfs_recover_log_trees(log_tree_root); | ||
| 1775 | BUG_ON(ret); | ||
| 1776 | |||
| 1777 | if (sb->s_flags & MS_RDONLY) { | ||
| 1778 | ret = btrfs_commit_super(tree_root); | ||
| 1779 | BUG_ON(ret); | ||
| 1780 | } | ||
| 1781 | } | ||
| 1782 | |||
| 1783 | if (!(sb->s_flags & MS_RDONLY)) { | ||
| 1784 | ret = btrfs_cleanup_reloc_trees(tree_root); | ||
| 1785 | BUG_ON(ret); | ||
| 1786 | } | ||
| 1787 | |||
| 1788 | location.objectid = BTRFS_FS_TREE_OBJECTID; | ||
| 1789 | location.type = BTRFS_ROOT_ITEM_KEY; | ||
| 1790 | location.offset = (u64)-1; | ||
| 1791 | |||
| 1792 | fs_info->fs_root = btrfs_read_fs_root_no_name(fs_info, &location); | ||
| 1793 | if (!fs_info->fs_root) | ||
| 1794 | goto fail_trans_kthread; | ||
| 1795 | return tree_root; | ||
| 1796 | |||
| 1797 | fail_trans_kthread: | ||
| 1798 | kthread_stop(fs_info->transaction_kthread); | ||
| 1799 | fail_cleaner: | ||
| 1800 | kthread_stop(fs_info->cleaner_kthread); | ||
| 1801 | |||
| 1802 | /* | ||
| 1803 | * make sure we're done with the btree inode before we stop our | ||
| 1804 | * kthreads | ||
| 1805 | */ | ||
| 1806 | filemap_write_and_wait(fs_info->btree_inode->i_mapping); | ||
| 1807 | invalidate_inode_pages2(fs_info->btree_inode->i_mapping); | ||
| 1808 | |||
| 1809 | fail_csum_root: | ||
| 1810 | free_extent_buffer(csum_root->node); | ||
| 1811 | fail_extent_root: | ||
| 1812 | free_extent_buffer(extent_root->node); | ||
| 1813 | fail_tree_root: | ||
| 1814 | free_extent_buffer(tree_root->node); | ||
| 1815 | fail_chunk_root: | ||
| 1816 | free_extent_buffer(chunk_root->node); | ||
| 1817 | fail_sys_array: | ||
| 1818 | free_extent_buffer(dev_root->node); | ||
| 1819 | fail_sb_buffer: | ||
| 1820 | btrfs_stop_workers(&fs_info->fixup_workers); | ||
| 1821 | btrfs_stop_workers(&fs_info->delalloc_workers); | ||
| 1822 | btrfs_stop_workers(&fs_info->workers); | ||
| 1823 | btrfs_stop_workers(&fs_info->endio_workers); | ||
| 1824 | btrfs_stop_workers(&fs_info->endio_meta_workers); | ||
| 1825 | btrfs_stop_workers(&fs_info->endio_meta_write_workers); | ||
| 1826 | btrfs_stop_workers(&fs_info->endio_write_workers); | ||
| 1827 | btrfs_stop_workers(&fs_info->submit_workers); | ||
| 1828 | fail_iput: | ||
| 1829 | invalidate_inode_pages2(fs_info->btree_inode->i_mapping); | ||
| 1830 | iput(fs_info->btree_inode); | ||
| 1831 | fail: | ||
| 1832 | btrfs_close_devices(fs_info->fs_devices); | ||
| 1833 | btrfs_mapping_tree_free(&fs_info->mapping_tree); | ||
| 1834 | |||
| 1835 | kfree(extent_root); | ||
| 1836 | kfree(tree_root); | ||
| 1837 | bdi_destroy(&fs_info->bdi); | ||
| 1838 | kfree(fs_info); | ||
| 1839 | kfree(chunk_root); | ||
| 1840 | kfree(dev_root); | ||
| 1841 | kfree(csum_root); | ||
| 1842 | return ERR_PTR(err); | ||
| 1843 | } | ||
| 1844 | |||
| 1845 | static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate) | ||
| 1846 | { | ||
| 1847 | char b[BDEVNAME_SIZE]; | ||
| 1848 | |||
| 1849 | if (uptodate) { | ||
| 1850 | set_buffer_uptodate(bh); | ||
| 1851 | } else { | ||
| 1852 | if (!buffer_eopnotsupp(bh) && printk_ratelimit()) { | ||
| 1853 | printk(KERN_WARNING "lost page write due to " | ||
| 1854 | "I/O error on %s\n", | ||
| 1855 | bdevname(bh->b_bdev, b)); | ||
| 1856 | } | ||
| 1857 | /* note, we dont' set_buffer_write_io_error because we have | ||
| 1858 | * our own ways of dealing with the IO errors | ||
| 1859 | */ | ||
| 1860 | clear_buffer_uptodate(bh); | ||
| 1861 | } | ||
| 1862 | unlock_buffer(bh); | ||
| 1863 | put_bh(bh); | ||
| 1864 | } | ||
| 1865 | |||
| 1866 | struct buffer_head *btrfs_read_dev_super(struct block_device *bdev) | ||
| 1867 | { | ||
| 1868 | struct buffer_head *bh; | ||
| 1869 | struct buffer_head *latest = NULL; | ||
| 1870 | struct btrfs_super_block *super; | ||
| 1871 | int i; | ||
| 1872 | u64 transid = 0; | ||
| 1873 | u64 bytenr; | ||
| 1874 | |||
| 1875 | /* we would like to check all the supers, but that would make | ||
| 1876 | * a btrfs mount succeed after a mkfs from a different FS. | ||
| 1877 | * So, we need to add a special mount option to scan for | ||
| 1878 | * later supers, using BTRFS_SUPER_MIRROR_MAX instead | ||
| 1879 | */ | ||
| 1880 | for (i = 0; i < 1; i++) { | ||
| 1881 | bytenr = btrfs_sb_offset(i); | ||
| 1882 | if (bytenr + 4096 >= i_size_read(bdev->bd_inode)) | ||
| 1883 | break; | ||
| 1884 | bh = __bread(bdev, bytenr / 4096, 4096); | ||
| 1885 | if (!bh) | ||
| 1886 | continue; | ||
| 1887 | |||
| 1888 | super = (struct btrfs_super_block *)bh->b_data; | ||
| 1889 | if (btrfs_super_bytenr(super) != bytenr || | ||
| 1890 | strncmp((char *)(&super->magic), BTRFS_MAGIC, | ||
| 1891 | sizeof(super->magic))) { | ||
| 1892 | brelse(bh); | ||
| 1893 | continue; | ||
| 1894 | } | ||
| 1895 | |||
| 1896 | if (!latest || btrfs_super_generation(super) > transid) { | ||
| 1897 | brelse(latest); | ||
| 1898 | latest = bh; | ||
| 1899 | transid = btrfs_super_generation(super); | ||
| 1900 | } else { | ||
| 1901 | brelse(bh); | ||
| 1902 | } | ||
| 1903 | } | ||
| 1904 | return latest; | ||
| 1905 | } | ||
| 1906 | |||
| 1907 | static int write_dev_supers(struct btrfs_device *device, | ||
| 1908 | struct btrfs_super_block *sb, | ||
| 1909 | int do_barriers, int wait, int max_mirrors) | ||
| 1910 | { | ||
| 1911 | struct buffer_head *bh; | ||
| 1912 | int i; | ||
| 1913 | int ret; | ||
| 1914 | int errors = 0; | ||
| 1915 | u32 crc; | ||
| 1916 | u64 bytenr; | ||
| 1917 | int last_barrier = 0; | ||
| 1918 | |||
| 1919 | if (max_mirrors == 0) | ||
| 1920 | max_mirrors = BTRFS_SUPER_MIRROR_MAX; | ||
| 1921 | |||
| 1922 | /* make sure only the last submit_bh does a barrier */ | ||
| 1923 | if (do_barriers) { | ||
| 1924 | for (i = 0; i < max_mirrors; i++) { | ||
| 1925 | bytenr = btrfs_sb_offset(i); | ||
| 1926 | if (bytenr + BTRFS_SUPER_INFO_SIZE >= | ||
| 1927 | device->total_bytes) | ||
| 1928 | break; | ||
| 1929 | last_barrier = i; | ||
| 1930 | } | ||
| 1931 | } | ||
| 1932 | |||
| 1933 | for (i = 0; i < max_mirrors; i++) { | ||
| 1934 | bytenr = btrfs_sb_offset(i); | ||
| 1935 | if (bytenr + BTRFS_SUPER_INFO_SIZE >= device->total_bytes) | ||
| 1936 | break; | ||
| 1937 | |||
| 1938 | if (wait) { | ||
| 1939 | bh = __find_get_block(device->bdev, bytenr / 4096, | ||
| 1940 | BTRFS_SUPER_INFO_SIZE); | ||
| 1941 | BUG_ON(!bh); | ||
| 1942 | brelse(bh); | ||
| 1943 | wait_on_buffer(bh); | ||
| 1944 | if (buffer_uptodate(bh)) { | ||
| 1945 | brelse(bh); | ||
| 1946 | continue; | ||
| 1947 | } | ||
| 1948 | } else { | ||
| 1949 | btrfs_set_super_bytenr(sb, bytenr); | ||
| 1950 | |||
| 1951 | crc = ~(u32)0; | ||
| 1952 | crc = btrfs_csum_data(NULL, (char *)sb + | ||
| 1953 | BTRFS_CSUM_SIZE, crc, | ||
| 1954 | BTRFS_SUPER_INFO_SIZE - | ||
| 1955 | BTRFS_CSUM_SIZE); | ||
| 1956 | btrfs_csum_final(crc, sb->csum); | ||
| 1957 | |||
| 1958 | bh = __getblk(device->bdev, bytenr / 4096, | ||
| 1959 | BTRFS_SUPER_INFO_SIZE); | ||
| 1960 | memcpy(bh->b_data, sb, BTRFS_SUPER_INFO_SIZE); | ||
| 1961 | |||
| 1962 | set_buffer_uptodate(bh); | ||
| 1963 | get_bh(bh); | ||
| 1964 | lock_buffer(bh); | ||
| 1965 | bh->b_end_io = btrfs_end_buffer_write_sync; | ||
| 1966 | } | ||
| 1967 | |||
| 1968 | if (i == last_barrier && do_barriers && device->barriers) { | ||
| 1969 | ret = submit_bh(WRITE_BARRIER, bh); | ||
| 1970 | if (ret == -EOPNOTSUPP) { | ||
| 1971 | printk("btrfs: disabling barriers on dev %s\n", | ||
| 1972 | device->name); | ||
| 1973 | set_buffer_uptodate(bh); | ||
| 1974 | device->barriers = 0; | ||
| 1975 | get_bh(bh); | ||
| 1976 | lock_buffer(bh); | ||
| 1977 | ret = submit_bh(WRITE, bh); | ||
| 1978 | } | ||
| 1979 | } else { | ||
| 1980 | ret = submit_bh(WRITE, bh); | ||
| 1981 | } | ||
| 1982 | |||
| 1983 | if (!ret && wait) { | ||
| 1984 | wait_on_buffer(bh); | ||
| 1985 | if (!buffer_uptodate(bh)) | ||
| 1986 | errors++; | ||
| 1987 | } else if (ret) { | ||
| 1988 | errors++; | ||
| 1989 | } | ||
| 1990 | if (wait) | ||
| 1991 | brelse(bh); | ||
| 1992 | } | ||
| 1993 | return errors < i ? 0 : -1; | ||
| 1994 | } | ||
| 1995 | |||
| 1996 | int write_all_supers(struct btrfs_root *root, int max_mirrors) | ||
| 1997 | { | ||
| 1998 | struct list_head *cur; | ||
| 1999 | struct list_head *head = &root->fs_info->fs_devices->devices; | ||
| 2000 | struct btrfs_device *dev; | ||
| 2001 | struct btrfs_super_block *sb; | ||
| 2002 | struct btrfs_dev_item *dev_item; | ||
| 2003 | int ret; | ||
| 2004 | int do_barriers; | ||
| 2005 | int max_errors; | ||
| 2006 | int total_errors = 0; | ||
| 2007 | u64 flags; | ||
| 2008 | |||
| 2009 | max_errors = btrfs_super_num_devices(&root->fs_info->super_copy) - 1; | ||
| 2010 | do_barriers = !btrfs_test_opt(root, NOBARRIER); | ||
| 2011 | |||
| 2012 | sb = &root->fs_info->super_for_commit; | ||
| 2013 | dev_item = &sb->dev_item; | ||
| 2014 | list_for_each(cur, head) { | ||
| 2015 | dev = list_entry(cur, struct btrfs_device, dev_list); | ||
| 2016 | if (!dev->bdev) { | ||
| 2017 | total_errors++; | ||
| 2018 | continue; | ||
| 2019 | } | ||
| 2020 | if (!dev->in_fs_metadata || !dev->writeable) | ||
| 2021 | continue; | ||
| 2022 | |||
| 2023 | btrfs_set_stack_device_generation(dev_item, 0); | ||
| 2024 | btrfs_set_stack_device_type(dev_item, dev->type); | ||
| 2025 | btrfs_set_stack_device_id(dev_item, dev->devid); | ||
| 2026 | btrfs_set_stack_device_total_bytes(dev_item, dev->total_bytes); | ||
| 2027 | btrfs_set_stack_device_bytes_used(dev_item, dev->bytes_used); | ||
| 2028 | btrfs_set_stack_device_io_align(dev_item, dev->io_align); | ||
| 2029 | btrfs_set_stack_device_io_width(dev_item, dev->io_width); | ||
| 2030 | btrfs_set_stack_device_sector_size(dev_item, dev->sector_size); | ||
| 2031 | memcpy(dev_item->uuid, dev->uuid, BTRFS_UUID_SIZE); | ||
| 2032 | memcpy(dev_item->fsid, dev->fs_devices->fsid, BTRFS_UUID_SIZE); | ||
| 2033 | |||
| 2034 | flags = btrfs_super_flags(sb); | ||
| 2035 | btrfs_set_super_flags(sb, flags | BTRFS_HEADER_FLAG_WRITTEN); | ||
| 2036 | |||
| 2037 | ret = write_dev_supers(dev, sb, do_barriers, 0, max_mirrors); | ||
| 2038 | if (ret) | ||
| 2039 | total_errors++; | ||
| 2040 | } | ||
| 2041 | if (total_errors > max_errors) { | ||
| 2042 | printk(KERN_ERR "btrfs: %d errors while writing supers\n", | ||
| 2043 | total_errors); | ||
| 2044 | BUG(); | ||
| 2045 | } | ||
| 2046 | |||
| 2047 | total_errors = 0; | ||
| 2048 | list_for_each(cur, head) { | ||
| 2049 | dev = list_entry(cur, struct btrfs_device, dev_list); | ||
| 2050 | if (!dev->bdev) | ||
| 2051 | continue; | ||
| 2052 | if (!dev->in_fs_metadata || !dev->writeable) | ||
| 2053 | continue; | ||
| 2054 | |||
| 2055 | ret = write_dev_supers(dev, sb, do_barriers, 1, max_mirrors); | ||
| 2056 | if (ret) | ||
| 2057 | total_errors++; | ||
| 2058 | } | ||
| 2059 | if (total_errors > max_errors) { | ||
| 2060 | printk(KERN_ERR "btrfs: %d errors while writing supers\n", | ||
| 2061 | total_errors); | ||
| 2062 | BUG(); | ||
| 2063 | } | ||
| 2064 | return 0; | ||
| 2065 | } | ||
| 2066 | |||
| 2067 | int write_ctree_super(struct btrfs_trans_handle *trans, | ||
| 2068 | struct btrfs_root *root, int max_mirrors) | ||
| 2069 | { | ||
| 2070 | int ret; | ||
| 2071 | |||
| 2072 | ret = write_all_supers(root, max_mirrors); | ||
| 2073 | return ret; | ||
| 2074 | } | ||
| 2075 | |||
| 2076 | int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) | ||
| 2077 | { | ||
| 2078 | radix_tree_delete(&fs_info->fs_roots_radix, | ||
| 2079 | (unsigned long)root->root_key.objectid); | ||
| 2080 | if (root->anon_super.s_dev) { | ||
| 2081 | down_write(&root->anon_super.s_umount); | ||
| 2082 | kill_anon_super(&root->anon_super); | ||
| 2083 | } | ||
| 2084 | if (root->node) | ||
| 2085 | free_extent_buffer(root->node); | ||
| 2086 | if (root->commit_root) | ||
| 2087 | free_extent_buffer(root->commit_root); | ||
| 2088 | kfree(root->name); | ||
| 2089 | kfree(root); | ||
| 2090 | return 0; | ||
| 2091 | } | ||
| 2092 | |||
| 2093 | static int del_fs_roots(struct btrfs_fs_info *fs_info) | ||
| 2094 | { | ||
| 2095 | int ret; | ||
| 2096 | struct btrfs_root *gang[8]; | ||
| 2097 | int i; | ||
| 2098 | |||
| 2099 | while (1) { | ||
| 2100 | ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix, | ||
| 2101 | (void **)gang, 0, | ||
| 2102 | ARRAY_SIZE(gang)); | ||
| 2103 | if (!ret) | ||
| 2104 | break; | ||
| 2105 | for (i = 0; i < ret; i++) | ||
| 2106 | btrfs_free_fs_root(fs_info, gang[i]); | ||
| 2107 | } | ||
| 2108 | return 0; | ||
| 2109 | } | ||
| 2110 | |||
| 2111 | int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info) | ||
| 2112 | { | ||
| 2113 | u64 root_objectid = 0; | ||
| 2114 | struct btrfs_root *gang[8]; | ||
| 2115 | int i; | ||
| 2116 | int ret; | ||
| 2117 | |||
| 2118 | while (1) { | ||
| 2119 | ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix, | ||
| 2120 | (void **)gang, root_objectid, | ||
| 2121 | ARRAY_SIZE(gang)); | ||
| 2122 | if (!ret) | ||
| 2123 | break; | ||
| 2124 | for (i = 0; i < ret; i++) { | ||
| 2125 | root_objectid = gang[i]->root_key.objectid; | ||
| 2126 | ret = btrfs_find_dead_roots(fs_info->tree_root, | ||
| 2127 | root_objectid, gang[i]); | ||
| 2128 | BUG_ON(ret); | ||
| 2129 | btrfs_orphan_cleanup(gang[i]); | ||
| 2130 | } | ||
| 2131 | root_objectid++; | ||
| 2132 | } | ||
| 2133 | return 0; | ||
| 2134 | } | ||
| 2135 | |||
| 2136 | int btrfs_commit_super(struct btrfs_root *root) | ||
| 2137 | { | ||
| 2138 | struct btrfs_trans_handle *trans; | ||
| 2139 | int ret; | ||
| 2140 | |||
| 2141 | mutex_lock(&root->fs_info->cleaner_mutex); | ||
| 2142 | btrfs_clean_old_snapshots(root); | ||
| 2143 | mutex_unlock(&root->fs_info->cleaner_mutex); | ||
| 2144 | trans = btrfs_start_transaction(root, 1); | ||
| 2145 | ret = btrfs_commit_transaction(trans, root); | ||
| 2146 | BUG_ON(ret); | ||
| 2147 | /* run commit again to drop the original snapshot */ | ||
| 2148 | trans = btrfs_start_transaction(root, 1); | ||
| 2149 | btrfs_commit_transaction(trans, root); | ||
| 2150 | ret = btrfs_write_and_wait_transaction(NULL, root); | ||
| 2151 | BUG_ON(ret); | ||
| 2152 | |||
| 2153 | ret = write_ctree_super(NULL, root, 0); | ||
| 2154 | return ret; | ||
| 2155 | } | ||
| 2156 | |||
| 2157 | int close_ctree(struct btrfs_root *root) | ||
| 2158 | { | ||
| 2159 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
| 2160 | int ret; | ||
| 2161 | |||
| 2162 | fs_info->closing = 1; | ||
| 2163 | smp_mb(); | ||
| 2164 | |||
| 2165 | kthread_stop(root->fs_info->transaction_kthread); | ||
| 2166 | kthread_stop(root->fs_info->cleaner_kthread); | ||
| 2167 | |||
| 2168 | if (!(fs_info->sb->s_flags & MS_RDONLY)) { | ||
| 2169 | ret = btrfs_commit_super(root); | ||
| 2170 | if (ret) | ||
| 2171 | printk(KERN_ERR "btrfs: commit super ret %d\n", ret); | ||
| 2172 | } | ||
| 2173 | |||
| 2174 | if (fs_info->delalloc_bytes) { | ||
| 2175 | printk(KERN_INFO "btrfs: at unmount delalloc count %llu\n", | ||
| 2176 | fs_info->delalloc_bytes); | ||
| 2177 | } | ||
| 2178 | if (fs_info->total_ref_cache_size) { | ||
| 2179 | printk(KERN_INFO "btrfs: at umount reference cache size %llu\n", | ||
| 2180 | (unsigned long long)fs_info->total_ref_cache_size); | ||
| 2181 | } | ||
| 2182 | |||
| 2183 | if (fs_info->extent_root->node) | ||
| 2184 | free_extent_buffer(fs_info->extent_root->node); | ||
| 2185 | |||
| 2186 | if (fs_info->tree_root->node) | ||
| 2187 | free_extent_buffer(fs_info->tree_root->node); | ||
| 2188 | |||
| 2189 | if (root->fs_info->chunk_root->node) | ||
| 2190 | free_extent_buffer(root->fs_info->chunk_root->node); | ||
| 2191 | |||
| 2192 | if (root->fs_info->dev_root->node) | ||
| 2193 | free_extent_buffer(root->fs_info->dev_root->node); | ||
| 2194 | |||
| 2195 | if (root->fs_info->csum_root->node) | ||
| 2196 | free_extent_buffer(root->fs_info->csum_root->node); | ||
| 2197 | |||
| 2198 | btrfs_free_block_groups(root->fs_info); | ||
| 2199 | |||
| 2200 | del_fs_roots(fs_info); | ||
| 2201 | |||
| 2202 | iput(fs_info->btree_inode); | ||
| 2203 | |||
| 2204 | btrfs_stop_workers(&fs_info->fixup_workers); | ||
| 2205 | btrfs_stop_workers(&fs_info->delalloc_workers); | ||
| 2206 | btrfs_stop_workers(&fs_info->workers); | ||
| 2207 | btrfs_stop_workers(&fs_info->endio_workers); | ||
| 2208 | btrfs_stop_workers(&fs_info->endio_meta_workers); | ||
| 2209 | btrfs_stop_workers(&fs_info->endio_meta_write_workers); | ||
| 2210 | btrfs_stop_workers(&fs_info->endio_write_workers); | ||
| 2211 | btrfs_stop_workers(&fs_info->submit_workers); | ||
| 2212 | |||
| 2213 | #if 0 | ||
| 2214 | while (!list_empty(&fs_info->hashers)) { | ||
| 2215 | struct btrfs_hasher *hasher; | ||
| 2216 | hasher = list_entry(fs_info->hashers.next, struct btrfs_hasher, | ||
| 2217 | hashers); | ||
| 2218 | list_del(&hasher->hashers); | ||
| 2219 | crypto_free_hash(&fs_info->hash_tfm); | ||
| 2220 | kfree(hasher); | ||
| 2221 | } | ||
| 2222 | #endif | ||
| 2223 | btrfs_close_devices(fs_info->fs_devices); | ||
| 2224 | btrfs_mapping_tree_free(&fs_info->mapping_tree); | ||
| 2225 | |||
| 2226 | bdi_destroy(&fs_info->bdi); | ||
| 2227 | |||
| 2228 | kfree(fs_info->extent_root); | ||
| 2229 | kfree(fs_info->tree_root); | ||
| 2230 | kfree(fs_info->chunk_root); | ||
| 2231 | kfree(fs_info->dev_root); | ||
| 2232 | kfree(fs_info->csum_root); | ||
| 2233 | return 0; | ||
| 2234 | } | ||
| 2235 | |||
| 2236 | int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid) | ||
| 2237 | { | ||
| 2238 | int ret; | ||
| 2239 | struct inode *btree_inode = buf->first_page->mapping->host; | ||
| 2240 | |||
| 2241 | ret = extent_buffer_uptodate(&BTRFS_I(btree_inode)->io_tree, buf); | ||
| 2242 | if (!ret) | ||
| 2243 | return ret; | ||
| 2244 | |||
| 2245 | ret = verify_parent_transid(&BTRFS_I(btree_inode)->io_tree, buf, | ||
| 2246 | parent_transid); | ||
| 2247 | return !ret; | ||
| 2248 | } | ||
| 2249 | |||
| 2250 | int btrfs_set_buffer_uptodate(struct extent_buffer *buf) | ||
| 2251 | { | ||
| 2252 | struct inode *btree_inode = buf->first_page->mapping->host; | ||
| 2253 | return set_extent_buffer_uptodate(&BTRFS_I(btree_inode)->io_tree, | ||
| 2254 | buf); | ||
| 2255 | } | ||
| 2256 | |||
| 2257 | void btrfs_mark_buffer_dirty(struct extent_buffer *buf) | ||
| 2258 | { | ||
| 2259 | struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; | ||
| 2260 | u64 transid = btrfs_header_generation(buf); | ||
| 2261 | struct inode *btree_inode = root->fs_info->btree_inode; | ||
| 2262 | |||
| 2263 | WARN_ON(!btrfs_tree_locked(buf)); | ||
| 2264 | if (transid != root->fs_info->generation) { | ||
| 2265 | printk(KERN_CRIT "btrfs transid mismatch buffer %llu, " | ||
| 2266 | "found %llu running %llu\n", | ||
| 2267 | (unsigned long long)buf->start, | ||
| 2268 | (unsigned long long)transid, | ||
| 2269 | (unsigned long long)root->fs_info->generation); | ||
| 2270 | WARN_ON(1); | ||
| 2271 | } | ||
| 2272 | set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, buf); | ||
| 2273 | } | ||
| 2274 | |||
| 2275 | void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) | ||
| 2276 | { | ||
| 2277 | /* | ||
| 2278 | * looks as though older kernels can get into trouble with | ||
| 2279 | * this code, they end up stuck in balance_dirty_pages forever | ||
| 2280 | */ | ||
| 2281 | struct extent_io_tree *tree; | ||
| 2282 | u64 num_dirty; | ||
| 2283 | u64 start = 0; | ||
| 2284 | unsigned long thresh = 32 * 1024 * 1024; | ||
| 2285 | tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; | ||
| 2286 | |||
| 2287 | if (current_is_pdflush() || current->flags & PF_MEMALLOC) | ||
| 2288 | return; | ||
| 2289 | |||
| 2290 | num_dirty = count_range_bits(tree, &start, (u64)-1, | ||
| 2291 | thresh, EXTENT_DIRTY); | ||
| 2292 | if (num_dirty > thresh) { | ||
| 2293 | balance_dirty_pages_ratelimited_nr( | ||
| 2294 | root->fs_info->btree_inode->i_mapping, 1); | ||
| 2295 | } | ||
| 2296 | return; | ||
| 2297 | } | ||
| 2298 | |||
| 2299 | int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid) | ||
| 2300 | { | ||
| 2301 | struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; | ||
| 2302 | int ret; | ||
| 2303 | ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid); | ||
| 2304 | if (ret == 0) | ||
| 2305 | buf->flags |= EXTENT_UPTODATE; | ||
| 2306 | return ret; | ||
| 2307 | } | ||
| 2308 | |||
| 2309 | int btree_lock_page_hook(struct page *page) | ||
| 2310 | { | ||
| 2311 | struct inode *inode = page->mapping->host; | ||
| 2312 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 2313 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | ||
| 2314 | struct extent_buffer *eb; | ||
| 2315 | unsigned long len; | ||
| 2316 | u64 bytenr = page_offset(page); | ||
| 2317 | |||
| 2318 | if (page->private == EXTENT_PAGE_PRIVATE) | ||
| 2319 | goto out; | ||
| 2320 | |||
| 2321 | len = page->private >> 2; | ||
| 2322 | eb = find_extent_buffer(io_tree, bytenr, len, GFP_NOFS); | ||
| 2323 | if (!eb) | ||
| 2324 | goto out; | ||
| 2325 | |||
| 2326 | btrfs_tree_lock(eb); | ||
| 2327 | spin_lock(&root->fs_info->hash_lock); | ||
| 2328 | btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN); | ||
| 2329 | spin_unlock(&root->fs_info->hash_lock); | ||
| 2330 | btrfs_tree_unlock(eb); | ||
| 2331 | free_extent_buffer(eb); | ||
| 2332 | out: | ||
| 2333 | lock_page(page); | ||
| 2334 | return 0; | ||
| 2335 | } | ||
| 2336 | |||
| 2337 | static struct extent_io_ops btree_extent_io_ops = { | ||
| 2338 | .write_cache_pages_lock_hook = btree_lock_page_hook, | ||
| 2339 | .readpage_end_io_hook = btree_readpage_end_io_hook, | ||
| 2340 | .submit_bio_hook = btree_submit_bio_hook, | ||
| 2341 | /* note we're sharing with inode.c for the merge bio hook */ | ||
| 2342 | .merge_bio_hook = btrfs_merge_bio_hook, | ||
| 2343 | }; | ||
