aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorGui Hecheng <guihc.fnst@cn.fujitsu.com>2014-11-10 02:36:08 -0500
committerChris Mason <clm@fb.com>2014-11-20 20:20:08 -0500
commit321592427c0146126aadfab8a9b663de1875c9f4 (patch)
tree7cf8d8427168e4410b369d67555a0c5c3efd96aa /fs
parent5f5bc6b1e2d5a6f827bc860ef2dc5b6f365d1339 (diff)
btrfs: fix dead lock while running replace and defrag concurrently
This can be reproduced by fstests: btrfs/070 The scenario is like the following: replace worker thread defrag thread --------------------- ------------- copy_nocow_pages_worker btrfs_defrag_file copy_nocow_pages_for_inode ... btrfs_writepages |A| lock_extent_bits extent_write_cache_pages |B| lock_page __extent_writepage ... writepage_delalloc find_lock_delalloc_range |B| lock_extent_bits find_or_create_page pagecache_get_page |A| lock_page This leads to an ABBA pattern deadlock. To fix it, o we just change it to an AABB pattern which means to @unlock_extent_bits() before we @lock_page(), and in this way the @extent_read_full_page_nolock() is no longer in an locked context, so change it back to @extent_read_full_page() to regain protection. o Since we @unlock_extent_bits() earlier, then before @write_page_nocow(), the extent may not really point at the physical block we want, so we have to check it before write. Signed-off-by: Gui Hecheng <guihc.fnst@cn.fujitsu.com> Tested-by: David Sterba <dsterba@suse.cz> Signed-off-by: Chris Mason <clm@fb.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/scrub.c90
1 files changed, 60 insertions, 30 deletions
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index efa083113827..4325bb0111d9 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -3310,6 +3310,50 @@ out:
3310 scrub_pending_trans_workers_dec(sctx); 3310 scrub_pending_trans_workers_dec(sctx);
3311} 3311}
3312 3312
3313static int check_extent_to_block(struct inode *inode, u64 start, u64 len,
3314 u64 logical)
3315{
3316 struct extent_state *cached_state = NULL;
3317 struct btrfs_ordered_extent *ordered;
3318 struct extent_io_tree *io_tree;
3319 struct extent_map *em;
3320 u64 lockstart = start, lockend = start + len - 1;
3321 int ret = 0;
3322
3323 io_tree = &BTRFS_I(inode)->io_tree;
3324
3325 lock_extent_bits(io_tree, lockstart, lockend, 0, &cached_state);
3326 ordered = btrfs_lookup_ordered_range(inode, lockstart, len);
3327 if (ordered) {
3328 btrfs_put_ordered_extent(ordered);
3329 ret = 1;
3330 goto out_unlock;
3331 }
3332
3333 em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
3334 if (IS_ERR(em)) {
3335 ret = PTR_ERR(em);
3336 goto out_unlock;
3337 }
3338
3339 /*
3340 * This extent does not actually cover the logical extent anymore,
3341 * move on to the next inode.
3342 */
3343 if (em->block_start > logical ||
3344 em->block_start + em->block_len < logical + len) {
3345 free_extent_map(em);
3346 ret = 1;
3347 goto out_unlock;
3348 }
3349 free_extent_map(em);
3350
3351out_unlock:
3352 unlock_extent_cached(io_tree, lockstart, lockend, &cached_state,
3353 GFP_NOFS);
3354 return ret;
3355}
3356
3313static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, 3357static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
3314 struct scrub_copy_nocow_ctx *nocow_ctx) 3358 struct scrub_copy_nocow_ctx *nocow_ctx)
3315{ 3359{
@@ -3318,13 +3362,10 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
3318 struct inode *inode; 3362 struct inode *inode;
3319 struct page *page; 3363 struct page *page;
3320 struct btrfs_root *local_root; 3364 struct btrfs_root *local_root;
3321 struct btrfs_ordered_extent *ordered;
3322 struct extent_map *em;
3323 struct extent_state *cached_state = NULL;
3324 struct extent_io_tree *io_tree; 3365 struct extent_io_tree *io_tree;
3325 u64 physical_for_dev_replace; 3366 u64 physical_for_dev_replace;
3367 u64 nocow_ctx_logical;
3326 u64 len = nocow_ctx->len; 3368 u64 len = nocow_ctx->len;
3327 u64 lockstart = offset, lockend = offset + len - 1;
3328 unsigned long index; 3369 unsigned long index;
3329 int srcu_index; 3370 int srcu_index;
3330 int ret = 0; 3371 int ret = 0;
@@ -3356,30 +3397,13 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
3356 3397
3357 physical_for_dev_replace = nocow_ctx->physical_for_dev_replace; 3398 physical_for_dev_replace = nocow_ctx->physical_for_dev_replace;
3358 io_tree = &BTRFS_I(inode)->io_tree; 3399 io_tree = &BTRFS_I(inode)->io_tree;
3400 nocow_ctx_logical = nocow_ctx->logical;
3359 3401
3360 lock_extent_bits(io_tree, lockstart, lockend, 0, &cached_state); 3402 ret = check_extent_to_block(inode, offset, len, nocow_ctx_logical);
3361 ordered = btrfs_lookup_ordered_range(inode, lockstart, len); 3403 if (ret) {
3362 if (ordered) { 3404 ret = ret > 0 ? 0 : ret;
3363 btrfs_put_ordered_extent(ordered); 3405 goto out;
3364 goto out_unlock;
3365 }
3366
3367 em = btrfs_get_extent(inode, NULL, 0, lockstart, len, 0);
3368 if (IS_ERR(em)) {
3369 ret = PTR_ERR(em);
3370 goto out_unlock;
3371 }
3372
3373 /*
3374 * This extent does not actually cover the logical extent anymore,
3375 * move on to the next inode.
3376 */
3377 if (em->block_start > nocow_ctx->logical ||
3378 em->block_start + em->block_len < nocow_ctx->logical + len) {
3379 free_extent_map(em);
3380 goto out_unlock;
3381 } 3406 }
3382 free_extent_map(em);
3383 3407
3384 while (len >= PAGE_CACHE_SIZE) { 3408 while (len >= PAGE_CACHE_SIZE) {
3385 index = offset >> PAGE_CACHE_SHIFT; 3409 index = offset >> PAGE_CACHE_SHIFT;
@@ -3396,7 +3420,7 @@ again:
3396 goto next_page; 3420 goto next_page;
3397 } else { 3421 } else {
3398 ClearPageError(page); 3422 ClearPageError(page);
3399 err = extent_read_full_page_nolock(io_tree, page, 3423 err = extent_read_full_page(io_tree, page,
3400 btrfs_get_extent, 3424 btrfs_get_extent,
3401 nocow_ctx->mirror_num); 3425 nocow_ctx->mirror_num);
3402 if (err) { 3426 if (err) {
@@ -3421,6 +3445,14 @@ again:
3421 goto next_page; 3445 goto next_page;
3422 } 3446 }
3423 } 3447 }
3448
3449 ret = check_extent_to_block(inode, offset, len,
3450 nocow_ctx_logical);
3451 if (ret) {
3452 ret = ret > 0 ? 0 : ret;
3453 goto next_page;
3454 }
3455
3424 err = write_page_nocow(nocow_ctx->sctx, 3456 err = write_page_nocow(nocow_ctx->sctx,
3425 physical_for_dev_replace, page); 3457 physical_for_dev_replace, page);
3426 if (err) 3458 if (err)
@@ -3434,12 +3466,10 @@ next_page:
3434 3466
3435 offset += PAGE_CACHE_SIZE; 3467 offset += PAGE_CACHE_SIZE;
3436 physical_for_dev_replace += PAGE_CACHE_SIZE; 3468 physical_for_dev_replace += PAGE_CACHE_SIZE;
3469 nocow_ctx_logical += PAGE_CACHE_SIZE;
3437 len -= PAGE_CACHE_SIZE; 3470 len -= PAGE_CACHE_SIZE;
3438 } 3471 }
3439 ret = COPY_COMPLETE; 3472 ret = COPY_COMPLETE;
3440out_unlock:
3441 unlock_extent_cached(io_tree, lockstart, lockend, &cached_state,
3442 GFP_NOFS);
3443out: 3473out:
3444 mutex_unlock(&inode->i_mutex); 3474 mutex_unlock(&inode->i_mutex);
3445 iput(inode); 3475 iput(inode);