aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2008-08-01 11:27:23 -0400
committerChris Mason <chris.mason@oracle.com>2008-09-25 11:04:06 -0400
commitf87f057b49ee52cf5c627ab27a706e3252767c9f (patch)
tree4f8cc04e91ae836f4dd1b2151f47cbd1bd1b9367 /fs
parent492bb6deee3416ad792dcd8584ebd95c463af1a6 (diff)
Btrfs: Improve and cleanup locking done by walk_down_tree
While dropping snapshots, walk_down_tree does most of the work of checking reference counts and limiting tree traversal to just the blocks that we are freeing. It dropped and held the allocation mutex in strange and confusing ways, this commit changes it to only hold the mutex while actually freeing a block. The rest of the checks around reference counts should be safe without the lock because we only allow one process in btrfs_drop_snapshot at a time. Other processes dropping reference counts should not drop it to 1 because their tree roots already have an extra ref on the block. Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/extent-tree.c85
-rw-r--r--fs/btrfs/file.c13
-rw-r--r--fs/btrfs/ioctl.c6
3 files changed, 70 insertions, 34 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index b003b4364ddb..58bceeeda9c5 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2333,8 +2333,6 @@ static int noinline drop_leaf_ref_no_cache(struct btrfs_trans_handle *trans,
2333 leaf_owner = btrfs_header_owner(leaf); 2333 leaf_owner = btrfs_header_owner(leaf);
2334 leaf_generation = btrfs_header_generation(leaf); 2334 leaf_generation = btrfs_header_generation(leaf);
2335 2335
2336 mutex_unlock(&root->fs_info->alloc_mutex);
2337
2338 for (i = 0; i < nritems; i++) { 2336 for (i = 0; i < nritems; i++) {
2339 u64 disk_bytenr; 2337 u64 disk_bytenr;
2340 cond_resched(); 2338 cond_resched();
@@ -2362,8 +2360,6 @@ static int noinline drop_leaf_ref_no_cache(struct btrfs_trans_handle *trans,
2362 mutex_unlock(&root->fs_info->alloc_mutex); 2360 mutex_unlock(&root->fs_info->alloc_mutex);
2363 BUG_ON(ret); 2361 BUG_ON(ret);
2364 } 2362 }
2365
2366 mutex_lock(&root->fs_info->alloc_mutex);
2367 return 0; 2363 return 0;
2368} 2364}
2369 2365
@@ -2375,7 +2371,6 @@ static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans,
2375 int ret; 2371 int ret;
2376 struct btrfs_extent_info *info = ref->extents; 2372 struct btrfs_extent_info *info = ref->extents;
2377 2373
2378 mutex_unlock(&root->fs_info->alloc_mutex);
2379 for (i = 0; i < ref->nritems; i++) { 2374 for (i = 0; i < ref->nritems; i++) {
2380 mutex_lock(&root->fs_info->alloc_mutex); 2375 mutex_lock(&root->fs_info->alloc_mutex);
2381 ret = __btrfs_free_extent(trans, root, 2376 ret = __btrfs_free_extent(trans, root,
@@ -2386,7 +2381,6 @@ static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans,
2386 BUG_ON(ret); 2381 BUG_ON(ret);
2387 info++; 2382 info++;
2388 } 2383 }
2389 mutex_lock(&root->fs_info->alloc_mutex);
2390 2384
2391 return 0; 2385 return 0;
2392} 2386}
@@ -2440,10 +2434,39 @@ int drop_snap_lookup_refcount(struct btrfs_root *root, u64 start, u64 len,
2440 u32 *refs) 2434 u32 *refs)
2441{ 2435{
2442 int ret; 2436 int ret;
2443 mutex_unlock(&root->fs_info->alloc_mutex); 2437
2444 ret = lookup_extent_ref(NULL, root, start, len, refs); 2438 ret = lookup_extent_ref(NULL, root, start, len, refs);
2439 BUG_ON(ret);
2440
2441#if 0 // some debugging code in case we see problems here
2442 /* if the refs count is one, it won't get increased again. But
2443 * if the ref count is > 1, someone may be decreasing it at
2444 * the same time we are.
2445 */
2446 if (*refs != 1) {
2447 struct extent_buffer *eb = NULL;
2448 eb = btrfs_find_create_tree_block(root, start, len);
2449 if (eb)
2450 btrfs_tree_lock(eb);
2451
2452 mutex_lock(&root->fs_info->alloc_mutex);
2453 ret = lookup_extent_ref(NULL, root, start, len, refs);
2454 BUG_ON(ret);
2455 mutex_unlock(&root->fs_info->alloc_mutex);
2456
2457 if (eb) {
2458 btrfs_tree_unlock(eb);
2459 free_extent_buffer(eb);
2460 }
2461 if (*refs == 1) {
2462 printk("block %llu went down to one during drop_snap\n",
2463 (unsigned long long)start);
2464 }
2465
2466 }
2467#endif
2468
2445 cond_resched(); 2469 cond_resched();
2446 mutex_lock(&root->fs_info->alloc_mutex);
2447 return ret; 2470 return ret;
2448} 2471}
2449 2472
@@ -2467,8 +2490,6 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
2467 int ret; 2490 int ret;
2468 u32 refs; 2491 u32 refs;
2469 2492
2470 mutex_lock(&root->fs_info->alloc_mutex);
2471
2472 WARN_ON(*level < 0); 2493 WARN_ON(*level < 0);
2473 WARN_ON(*level >= BTRFS_MAX_LEVEL); 2494 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2474 ret = drop_snap_lookup_refcount(root, path->nodes[*level]->start, 2495 ret = drop_snap_lookup_refcount(root, path->nodes[*level]->start,
@@ -2507,13 +2528,21 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
2507 root_owner = btrfs_header_owner(parent); 2528 root_owner = btrfs_header_owner(parent);
2508 root_gen = btrfs_header_generation(parent); 2529 root_gen = btrfs_header_generation(parent);
2509 path->slots[*level]++; 2530 path->slots[*level]++;
2531
2532 mutex_lock(&root->fs_info->alloc_mutex);
2510 ret = __btrfs_free_extent(trans, root, bytenr, 2533 ret = __btrfs_free_extent(trans, root, bytenr,
2511 blocksize, root_owner, 2534 blocksize, root_owner,
2512 root_gen, 0, 0, 1); 2535 root_gen, 0, 0, 1);
2513 BUG_ON(ret); 2536 BUG_ON(ret);
2537 mutex_unlock(&root->fs_info->alloc_mutex);
2514 continue; 2538 continue;
2515 } 2539 }
2516 2540 /*
2541 * at this point, we have a single ref, and since the
2542 * only place referencing this extent is a dead root
2543 * the reference count should never go higher.
2544 * So, we don't need to check it again
2545 */
2517 if (*level == 1) { 2546 if (*level == 1) {
2518 struct btrfs_key key; 2547 struct btrfs_key key;
2519 btrfs_node_key_to_cpu(cur, &key, path->slots[*level]); 2548 btrfs_node_key_to_cpu(cur, &key, path->slots[*level]);
@@ -2533,33 +2562,23 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
2533 next = btrfs_find_tree_block(root, bytenr, blocksize); 2562 next = btrfs_find_tree_block(root, bytenr, blocksize);
2534 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) { 2563 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2535 free_extent_buffer(next); 2564 free_extent_buffer(next);
2536 mutex_unlock(&root->fs_info->alloc_mutex);
2537 2565
2538 if (path->slots[*level] == 0) 2566 if (path->slots[*level] == 0)
2539 reada_walk_down(root, cur, path->slots[*level]); 2567 reada_walk_down(root, cur, path->slots[*level]);
2540 next = read_tree_block(root, bytenr, blocksize, 2568 next = read_tree_block(root, bytenr, blocksize,
2541 ptr_gen); 2569 ptr_gen);
2542 cond_resched(); 2570 cond_resched();
2543 mutex_lock(&root->fs_info->alloc_mutex); 2571#if 0
2544 2572 /*
2545 /* we've dropped the lock, double check */ 2573 * this is a debugging check and can go away
2574 * the ref should never go all the way down to 1
2575 * at this point
2576 */
2546 ret = lookup_extent_ref(NULL, root, bytenr, blocksize, 2577 ret = lookup_extent_ref(NULL, root, bytenr, blocksize,
2547 &refs); 2578 &refs);
2548 BUG_ON(ret); 2579 BUG_ON(ret);
2549 if (refs != 1) { 2580 WARN_ON(refs != 1);
2550 parent = path->nodes[*level]; 2581#endif
2551 root_owner = btrfs_header_owner(parent);
2552 root_gen = btrfs_header_generation(parent);
2553
2554 path->slots[*level]++;
2555 free_extent_buffer(next);
2556 ret = __btrfs_free_extent(trans, root, bytenr,
2557 blocksize,
2558 root_owner,
2559 root_gen, 0, 0, 1);
2560 BUG_ON(ret);
2561 continue;
2562 }
2563 } 2582 }
2564 WARN_ON(*level <= 0); 2583 WARN_ON(*level <= 0);
2565 if (path->nodes[*level-1]) 2584 if (path->nodes[*level-1])
@@ -2584,6 +2603,8 @@ out:
2584 root_owner = btrfs_header_owner(parent); 2603 root_owner = btrfs_header_owner(parent);
2585 root_gen = btrfs_header_generation(parent); 2604 root_gen = btrfs_header_generation(parent);
2586 2605
2606
2607 mutex_lock(&root->fs_info->alloc_mutex);
2587 ret = __btrfs_free_extent(trans, root, bytenr, blocksize, 2608 ret = __btrfs_free_extent(trans, root, bytenr, blocksize,
2588 root_owner, root_gen, 0, 0, 1); 2609 root_owner, root_gen, 0, 0, 1);
2589 free_extent_buffer(path->nodes[*level]); 2610 free_extent_buffer(path->nodes[*level]);
@@ -2591,6 +2612,7 @@ out:
2591 *level += 1; 2612 *level += 1;
2592 BUG_ON(ret); 2613 BUG_ON(ret);
2593 mutex_unlock(&root->fs_info->alloc_mutex); 2614 mutex_unlock(&root->fs_info->alloc_mutex);
2615
2594 cond_resched(); 2616 cond_resched();
2595 return 0; 2617 return 0;
2596} 2618}
@@ -2834,6 +2856,11 @@ again:
2834 } 2856 }
2835 set_page_extent_mapped(page); 2857 set_page_extent_mapped(page);
2836 2858
2859 /*
2860 * make sure page_mkwrite is called for this page if userland
2861 * wants to change it from mmap
2862 */
2863 clear_page_dirty_for_io(page);
2837 2864
2838 set_extent_delalloc(io_tree, page_start, 2865 set_extent_delalloc(io_tree, page_start,
2839 page_end, GFP_NOFS); 2866 page_end, GFP_NOFS);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index c78f184ee5cc..8915f2dc1bce 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -338,6 +338,13 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
338 btrfs_drop_extent_cache(inode, start_pos, aligned_end - 1); 338 btrfs_drop_extent_cache(inode, start_pos, aligned_end - 1);
339 BUG_ON(err); 339 BUG_ON(err);
340 mutex_unlock(&BTRFS_I(inode)->extent_mutex); 340 mutex_unlock(&BTRFS_I(inode)->extent_mutex);
341
342 /*
343 * an ugly way to do all the prop accounting around
344 * the page bits and mapping tags
345 */
346 set_page_writeback(pages[0]);
347 end_page_writeback(pages[0]);
341 did_inline = 1; 348 did_inline = 1;
342 } 349 }
343 if (end_pos > isize) { 350 if (end_pos > isize) {
@@ -833,11 +840,7 @@ again:
833 start_pos, last_pos - 1, GFP_NOFS); 840 start_pos, last_pos - 1, GFP_NOFS);
834 } 841 }
835 for (i = 0; i < num_pages; i++) { 842 for (i = 0; i < num_pages; i++) {
836#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) 843 clear_page_dirty_for_io(pages[i]);
837 ClearPageDirty(pages[i]);
838#else
839 cancel_dirty_page(pages[i], PAGE_CACHE_SIZE);
840#endif
841 set_page_extent_mapped(pages[i]); 844 set_page_extent_mapped(pages[i]);
842 WARN_ON(!PageLocked(pages[i])); 845 WARN_ON(!PageLocked(pages[i]));
843 } 846 }
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 5e627746c4e8..224da287b3ed 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -268,6 +268,12 @@ again:
268 } 268 }
269 set_page_extent_mapped(page); 269 set_page_extent_mapped(page);
270 270
271 /*
272 * this makes sure page_mkwrite is called on the
273 * page if it is dirtied again later
274 */
275 clear_page_dirty_for_io(page);
276
271 set_extent_delalloc(io_tree, page_start, 277 set_extent_delalloc(io_tree, page_start,
272 page_end, GFP_NOFS); 278 page_end, GFP_NOFS);
273 279