diff options
Diffstat (limited to 'fs/btrfs')
| -rw-r--r-- | fs/btrfs/extent-tree.c | 50 | ||||
| -rw-r--r-- | fs/btrfs/free-space-cache.c | 296 | ||||
| -rw-r--r-- | fs/btrfs/free-space-cache.h | 2 |
3 files changed, 345 insertions, 3 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index d5455a2bf60b..9a325e465ad9 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
| @@ -421,7 +421,9 @@ err: | |||
| 421 | return 0; | 421 | return 0; |
| 422 | } | 422 | } |
| 423 | 423 | ||
| 424 | static int cache_block_group(struct btrfs_block_group_cache *cache) | 424 | static int cache_block_group(struct btrfs_block_group_cache *cache, |
| 425 | struct btrfs_trans_handle *trans, | ||
| 426 | int load_cache_only) | ||
| 425 | { | 427 | { |
| 426 | struct btrfs_fs_info *fs_info = cache->fs_info; | 428 | struct btrfs_fs_info *fs_info = cache->fs_info; |
| 427 | struct btrfs_caching_control *caching_ctl; | 429 | struct btrfs_caching_control *caching_ctl; |
| @@ -432,6 +434,36 @@ static int cache_block_group(struct btrfs_block_group_cache *cache) | |||
| 432 | if (cache->cached != BTRFS_CACHE_NO) | 434 | if (cache->cached != BTRFS_CACHE_NO) |
| 433 | return 0; | 435 | return 0; |
| 434 | 436 | ||
| 437 | /* | ||
| 438 | * We can't do the read from on-disk cache during a commit since we need | ||
| 439 | * to have the normal tree locking. | ||
| 440 | */ | ||
| 441 | if (!trans->transaction->in_commit) { | ||
| 442 | spin_lock(&cache->lock); | ||
| 443 | if (cache->cached != BTRFS_CACHE_NO) { | ||
| 444 | spin_unlock(&cache->lock); | ||
| 445 | return 0; | ||
| 446 | } | ||
| 447 | cache->cached = BTRFS_CACHE_STARTED; | ||
| 448 | spin_unlock(&cache->lock); | ||
| 449 | |||
| 450 | ret = load_free_space_cache(fs_info, cache); | ||
| 451 | |||
| 452 | spin_lock(&cache->lock); | ||
| 453 | if (ret == 1) { | ||
| 454 | cache->cached = BTRFS_CACHE_FINISHED; | ||
| 455 | cache->last_byte_to_unpin = (u64)-1; | ||
| 456 | } else { | ||
| 457 | cache->cached = BTRFS_CACHE_NO; | ||
| 458 | } | ||
| 459 | spin_unlock(&cache->lock); | ||
| 460 | if (ret == 1) | ||
| 461 | return 0; | ||
| 462 | } | ||
| 463 | |||
| 464 | if (load_cache_only) | ||
| 465 | return 0; | ||
| 466 | |||
| 435 | caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_KERNEL); | 467 | caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_KERNEL); |
| 436 | BUG_ON(!caching_ctl); | 468 | BUG_ON(!caching_ctl); |
| 437 | 469 | ||
| @@ -3984,6 +4016,14 @@ static int update_block_group(struct btrfs_trans_handle *trans, | |||
| 3984 | factor = 2; | 4016 | factor = 2; |
| 3985 | else | 4017 | else |
| 3986 | factor = 1; | 4018 | factor = 1; |
| 4019 | /* | ||
| 4020 | * If this block group has free space cache written out, we | ||
| 4021 | * need to make sure to load it if we are removing space. This | ||
| 4022 | * is because we need the unpinning stage to actually add the | ||
| 4023 | * space back to the block group, otherwise we will leak space. | ||
| 4024 | */ | ||
| 4025 | if (!alloc && cache->cached == BTRFS_CACHE_NO) | ||
| 4026 | cache_block_group(cache, trans, 1); | ||
| 3987 | 4027 | ||
| 3988 | byte_in_group = bytenr - cache->key.objectid; | 4028 | byte_in_group = bytenr - cache->key.objectid; |
| 3989 | WARN_ON(byte_in_group > cache->key.offset); | 4029 | WARN_ON(byte_in_group > cache->key.offset); |
| @@ -4828,6 +4868,10 @@ have_block_group: | |||
| 4828 | if (unlikely(block_group->cached == BTRFS_CACHE_NO)) { | 4868 | if (unlikely(block_group->cached == BTRFS_CACHE_NO)) { |
| 4829 | u64 free_percent; | 4869 | u64 free_percent; |
| 4830 | 4870 | ||
| 4871 | ret = cache_block_group(block_group, trans, 1); | ||
| 4872 | if (block_group->cached == BTRFS_CACHE_FINISHED) | ||
| 4873 | goto have_block_group; | ||
| 4874 | |||
| 4831 | free_percent = btrfs_block_group_used(&block_group->item); | 4875 | free_percent = btrfs_block_group_used(&block_group->item); |
| 4832 | free_percent *= 100; | 4876 | free_percent *= 100; |
| 4833 | free_percent = div64_u64(free_percent, | 4877 | free_percent = div64_u64(free_percent, |
| @@ -4848,7 +4892,7 @@ have_block_group: | |||
| 4848 | if (loop > LOOP_CACHING_NOWAIT || | 4892 | if (loop > LOOP_CACHING_NOWAIT || |
| 4849 | (loop > LOOP_FIND_IDEAL && | 4893 | (loop > LOOP_FIND_IDEAL && |
| 4850 | atomic_read(&space_info->caching_threads) < 2)) { | 4894 | atomic_read(&space_info->caching_threads) < 2)) { |
| 4851 | ret = cache_block_group(block_group); | 4895 | ret = cache_block_group(block_group, trans, 0); |
| 4852 | BUG_ON(ret); | 4896 | BUG_ON(ret); |
| 4853 | } | 4897 | } |
| 4854 | found_uncached_bg = true; | 4898 | found_uncached_bg = true; |
| @@ -5405,7 +5449,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, | |||
| 5405 | u64 num_bytes = ins->offset; | 5449 | u64 num_bytes = ins->offset; |
| 5406 | 5450 | ||
| 5407 | block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); | 5451 | block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); |
| 5408 | cache_block_group(block_group); | 5452 | cache_block_group(block_group, trans, 0); |
| 5409 | caching_ctl = get_caching_control(block_group); | 5453 | caching_ctl = get_caching_control(block_group); |
| 5410 | 5454 | ||
| 5411 | if (!caching_ctl) { | 5455 | if (!caching_ctl) { |
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 7f972e59cc04..baa193423fb8 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c | |||
| @@ -187,6 +187,302 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root, | |||
| 187 | return btrfs_update_inode(trans, root, inode); | 187 | return btrfs_update_inode(trans, root, inode); |
| 188 | } | 188 | } |
| 189 | 189 | ||
| 190 | static int readahead_cache(struct inode *inode) | ||
| 191 | { | ||
| 192 | struct file_ra_state *ra; | ||
| 193 | unsigned long last_index; | ||
| 194 | |||
| 195 | ra = kzalloc(sizeof(*ra), GFP_NOFS); | ||
| 196 | if (!ra) | ||
| 197 | return -ENOMEM; | ||
| 198 | |||
| 199 | file_ra_state_init(ra, inode->i_mapping); | ||
| 200 | last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT; | ||
| 201 | |||
| 202 | page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index); | ||
| 203 | |||
| 204 | kfree(ra); | ||
| 205 | |||
| 206 | return 0; | ||
| 207 | } | ||
| 208 | |||
| 209 | int load_free_space_cache(struct btrfs_fs_info *fs_info, | ||
| 210 | struct btrfs_block_group_cache *block_group) | ||
| 211 | { | ||
| 212 | struct btrfs_root *root = fs_info->tree_root; | ||
| 213 | struct inode *inode; | ||
| 214 | struct btrfs_free_space_header *header; | ||
| 215 | struct extent_buffer *leaf; | ||
| 216 | struct page *page; | ||
| 217 | struct btrfs_path *path; | ||
| 218 | u32 *checksums = NULL, *crc; | ||
| 219 | char *disk_crcs = NULL; | ||
| 220 | struct btrfs_key key; | ||
| 221 | struct list_head bitmaps; | ||
| 222 | u64 num_entries; | ||
| 223 | u64 num_bitmaps; | ||
| 224 | u64 generation; | ||
| 225 | u32 cur_crc = ~(u32)0; | ||
| 226 | pgoff_t index = 0; | ||
| 227 | unsigned long first_page_offset; | ||
| 228 | int num_checksums; | ||
| 229 | int ret = 0; | ||
| 230 | |||
| 231 | /* | ||
| 232 | * If we're unmounting then just return, since this does a search on the | ||
| 233 | * normal root and not the commit root and we could deadlock. | ||
| 234 | */ | ||
| 235 | smp_mb(); | ||
| 236 | if (fs_info->closing) | ||
| 237 | return 0; | ||
| 238 | |||
| 239 | /* | ||
| 240 | * If this block group has been marked to be cleared for one reason or | ||
| 241 | * another then we can't trust the on disk cache, so just return. | ||
| 242 | */ | ||
| 243 | spin_lock(&block_group->lock); | ||
| 244 | if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) { | ||
| 245 | printk(KERN_ERR "not reading block group %llu, dcs is %d\n", block_group->key.objectid, | ||
| 246 | block_group->disk_cache_state); | ||
| 247 | spin_unlock(&block_group->lock); | ||
| 248 | return 0; | ||
| 249 | } | ||
| 250 | spin_unlock(&block_group->lock); | ||
| 251 | |||
| 252 | INIT_LIST_HEAD(&bitmaps); | ||
| 253 | |||
| 254 | path = btrfs_alloc_path(); | ||
| 255 | if (!path) | ||
| 256 | return 0; | ||
| 257 | |||
| 258 | inode = lookup_free_space_inode(root, block_group, path); | ||
| 259 | if (IS_ERR(inode)) { | ||
| 260 | btrfs_free_path(path); | ||
| 261 | return 0; | ||
| 262 | } | ||
| 263 | |||
| 264 | /* Nothing in the space cache, goodbye */ | ||
| 265 | if (!i_size_read(inode)) { | ||
| 266 | btrfs_free_path(path); | ||
| 267 | goto out; | ||
| 268 | } | ||
| 269 | |||
| 270 | key.objectid = BTRFS_FREE_SPACE_OBJECTID; | ||
| 271 | key.offset = block_group->key.objectid; | ||
| 272 | key.type = 0; | ||
| 273 | |||
| 274 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
| 275 | if (ret) { | ||
| 276 | btrfs_free_path(path); | ||
| 277 | goto out; | ||
| 278 | } | ||
| 279 | |||
| 280 | leaf = path->nodes[0]; | ||
| 281 | header = btrfs_item_ptr(leaf, path->slots[0], | ||
| 282 | struct btrfs_free_space_header); | ||
| 283 | num_entries = btrfs_free_space_entries(leaf, header); | ||
| 284 | num_bitmaps = btrfs_free_space_bitmaps(leaf, header); | ||
| 285 | generation = btrfs_free_space_generation(leaf, header); | ||
| 286 | btrfs_free_path(path); | ||
| 287 | |||
| 288 | if (BTRFS_I(inode)->generation != generation) { | ||
| 289 | printk(KERN_ERR "btrfs: free space inode generation (%llu) did" | ||
| 290 | " not match free space cache generation (%llu) for " | ||
| 291 | "block group %llu\n", | ||
| 292 | (unsigned long long)BTRFS_I(inode)->generation, | ||
| 293 | (unsigned long long)generation, | ||
| 294 | (unsigned long long)block_group->key.objectid); | ||
| 295 | goto out; | ||
| 296 | } | ||
| 297 | |||
| 298 | if (!num_entries) | ||
| 299 | goto out; | ||
| 300 | |||
| 301 | /* Setup everything for doing checksumming */ | ||
| 302 | num_checksums = i_size_read(inode) / PAGE_CACHE_SIZE; | ||
| 303 | checksums = crc = kzalloc(sizeof(u32) * num_checksums, GFP_NOFS); | ||
| 304 | if (!checksums) | ||
| 305 | goto out; | ||
| 306 | first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64); | ||
| 307 | disk_crcs = kzalloc(first_page_offset, GFP_NOFS); | ||
| 308 | if (!disk_crcs) | ||
| 309 | goto out; | ||
| 310 | |||
| 311 | ret = readahead_cache(inode); | ||
| 312 | if (ret) { | ||
| 313 | ret = 0; | ||
| 314 | goto out; | ||
| 315 | } | ||
| 316 | |||
| 317 | while (1) { | ||
| 318 | struct btrfs_free_space_entry *entry; | ||
| 319 | struct btrfs_free_space *e; | ||
| 320 | void *addr; | ||
| 321 | unsigned long offset = 0; | ||
| 322 | unsigned long start_offset = 0; | ||
| 323 | int need_loop = 0; | ||
| 324 | |||
| 325 | if (!num_entries && !num_bitmaps) | ||
| 326 | break; | ||
| 327 | |||
| 328 | if (index == 0) { | ||
| 329 | start_offset = first_page_offset; | ||
| 330 | offset = start_offset; | ||
| 331 | } | ||
| 332 | |||
| 333 | page = grab_cache_page(inode->i_mapping, index); | ||
| 334 | if (!page) { | ||
| 335 | ret = 0; | ||
| 336 | goto free_cache; | ||
| 337 | } | ||
| 338 | |||
| 339 | if (!PageUptodate(page)) { | ||
| 340 | btrfs_readpage(NULL, page); | ||
| 341 | lock_page(page); | ||
| 342 | if (!PageUptodate(page)) { | ||
| 343 | unlock_page(page); | ||
| 344 | page_cache_release(page); | ||
| 345 | printk(KERN_ERR "btrfs: error reading free " | ||
| 346 | "space cache: %llu\n", | ||
| 347 | (unsigned long long) | ||
| 348 | block_group->key.objectid); | ||
| 349 | goto free_cache; | ||
| 350 | } | ||
| 351 | } | ||
| 352 | addr = kmap(page); | ||
| 353 | |||
| 354 | if (index == 0) { | ||
| 355 | u64 *gen; | ||
| 356 | |||
| 357 | memcpy(disk_crcs, addr, first_page_offset); | ||
| 358 | gen = addr + (sizeof(u32) * num_checksums); | ||
| 359 | if (*gen != BTRFS_I(inode)->generation) { | ||
| 360 | printk(KERN_ERR "btrfs: space cache generation" | ||
| 361 | " (%llu) does not match inode (%llu) " | ||
| 362 | "for block group %llu\n", | ||
| 363 | (unsigned long long)*gen, | ||
| 364 | (unsigned long long) | ||
| 365 | BTRFS_I(inode)->generation, | ||
| 366 | (unsigned long long) | ||
| 367 | block_group->key.objectid); | ||
| 368 | kunmap(page); | ||
| 369 | unlock_page(page); | ||
| 370 | page_cache_release(page); | ||
| 371 | goto free_cache; | ||
| 372 | } | ||
| 373 | crc = (u32 *)disk_crcs; | ||
| 374 | } | ||
| 375 | entry = addr + start_offset; | ||
| 376 | |||
| 377 | /* First lets check our crc before we do anything fun */ | ||
| 378 | cur_crc = ~(u32)0; | ||
| 379 | cur_crc = btrfs_csum_data(root, addr + start_offset, cur_crc, | ||
| 380 | PAGE_CACHE_SIZE - start_offset); | ||
| 381 | btrfs_csum_final(cur_crc, (char *)&cur_crc); | ||
| 382 | if (cur_crc != *crc) { | ||
| 383 | printk(KERN_ERR "btrfs: crc mismatch for page %lu in " | ||
| 384 | "block group %llu\n", index, | ||
| 385 | (unsigned long long)block_group->key.objectid); | ||
| 386 | kunmap(page); | ||
| 387 | unlock_page(page); | ||
| 388 | page_cache_release(page); | ||
| 389 | goto free_cache; | ||
| 390 | } | ||
| 391 | crc++; | ||
| 392 | |||
| 393 | while (1) { | ||
| 394 | if (!num_entries) | ||
| 395 | break; | ||
| 396 | |||
| 397 | need_loop = 1; | ||
| 398 | e = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS); | ||
| 399 | if (!e) { | ||
| 400 | kunmap(page); | ||
| 401 | unlock_page(page); | ||
| 402 | page_cache_release(page); | ||
| 403 | goto free_cache; | ||
| 404 | } | ||
| 405 | |||
| 406 | e->offset = le64_to_cpu(entry->offset); | ||
| 407 | e->bytes = le64_to_cpu(entry->bytes); | ||
| 408 | if (!e->bytes) { | ||
| 409 | kunmap(page); | ||
| 410 | kfree(e); | ||
| 411 | unlock_page(page); | ||
| 412 | page_cache_release(page); | ||
| 413 | goto free_cache; | ||
| 414 | } | ||
| 415 | |||
| 416 | if (entry->type == BTRFS_FREE_SPACE_EXTENT) { | ||
| 417 | spin_lock(&block_group->tree_lock); | ||
| 418 | ret = link_free_space(block_group, e); | ||
| 419 | spin_unlock(&block_group->tree_lock); | ||
| 420 | BUG_ON(ret); | ||
| 421 | } else { | ||
| 422 | e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); | ||
| 423 | if (!e->bitmap) { | ||
| 424 | kunmap(page); | ||
| 425 | kfree(e); | ||
| 426 | unlock_page(page); | ||
| 427 | page_cache_release(page); | ||
| 428 | goto free_cache; | ||
| 429 | } | ||
| 430 | spin_lock(&block_group->tree_lock); | ||
| 431 | ret = link_free_space(block_group, e); | ||
| 432 | block_group->total_bitmaps++; | ||
| 433 | recalculate_thresholds(block_group); | ||
| 434 | spin_unlock(&block_group->tree_lock); | ||
| 435 | list_add_tail(&e->list, &bitmaps); | ||
| 436 | } | ||
| 437 | |||
| 438 | num_entries--; | ||
| 439 | offset += sizeof(struct btrfs_free_space_entry); | ||
| 440 | if (offset + sizeof(struct btrfs_free_space_entry) >= | ||
| 441 | PAGE_CACHE_SIZE) | ||
| 442 | break; | ||
| 443 | entry++; | ||
| 444 | } | ||
| 445 | |||
| 446 | /* | ||
| 447 | * We read an entry out of this page, we need to move on to the | ||
| 448 | * next page. | ||
| 449 | */ | ||
| 450 | if (need_loop) { | ||
| 451 | kunmap(page); | ||
| 452 | goto next; | ||
| 453 | } | ||
| 454 | |||
| 455 | /* | ||
| 456 | * We add the bitmaps at the end of the entries in order that | ||
| 457 | * the bitmap entries are added to the cache. | ||
| 458 | */ | ||
| 459 | e = list_entry(bitmaps.next, struct btrfs_free_space, list); | ||
| 460 | list_del_init(&e->list); | ||
| 461 | memcpy(e->bitmap, addr, PAGE_CACHE_SIZE); | ||
| 462 | kunmap(page); | ||
| 463 | num_bitmaps--; | ||
| 464 | next: | ||
| 465 | unlock_page(page); | ||
| 466 | page_cache_release(page); | ||
| 467 | index++; | ||
| 468 | } | ||
| 469 | |||
| 470 | ret = 1; | ||
| 471 | out: | ||
| 472 | kfree(checksums); | ||
| 473 | kfree(disk_crcs); | ||
| 474 | iput(inode); | ||
| 475 | return ret; | ||
| 476 | |||
| 477 | free_cache: | ||
| 478 | /* This cache is bogus, make sure it gets cleared */ | ||
| 479 | spin_lock(&block_group->lock); | ||
| 480 | block_group->disk_cache_state = BTRFS_DC_CLEAR; | ||
| 481 | spin_unlock(&block_group->lock); | ||
| 482 | btrfs_remove_free_space_cache(block_group); | ||
| 483 | goto out; | ||
| 484 | } | ||
| 485 | |||
| 190 | int btrfs_write_out_cache(struct btrfs_root *root, | 486 | int btrfs_write_out_cache(struct btrfs_root *root, |
| 191 | struct btrfs_trans_handle *trans, | 487 | struct btrfs_trans_handle *trans, |
| 192 | struct btrfs_block_group_cache *block_group, | 488 | struct btrfs_block_group_cache *block_group, |
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h index 189f740bd3c0..e49ca5c321b5 100644 --- a/fs/btrfs/free-space-cache.h +++ b/fs/btrfs/free-space-cache.h | |||
| @@ -39,6 +39,8 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root, | |||
| 39 | struct btrfs_trans_handle *trans, | 39 | struct btrfs_trans_handle *trans, |
| 40 | struct btrfs_path *path, | 40 | struct btrfs_path *path, |
| 41 | struct inode *inode); | 41 | struct inode *inode); |
| 42 | int load_free_space_cache(struct btrfs_fs_info *fs_info, | ||
| 43 | struct btrfs_block_group_cache *block_group); | ||
| 42 | int btrfs_write_out_cache(struct btrfs_root *root, | 44 | int btrfs_write_out_cache(struct btrfs_root *root, |
| 43 | struct btrfs_trans_handle *trans, | 45 | struct btrfs_trans_handle *trans, |
| 44 | struct btrfs_block_group_cache *block_group, | 46 | struct btrfs_block_group_cache *block_group, |
