diff options
author | Gui Hecheng <guihc.fnst@cn.fujitsu.com> | 2014-11-10 02:36:08 -0500 |
---|---|---|
committer | Chris Mason <clm@fb.com> | 2014-11-20 20:20:08 -0500 |
commit | 321592427c0146126aadfab8a9b663de1875c9f4 (patch) | |
tree | 7cf8d8427168e4410b369d67555a0c5c3efd96aa /fs/btrfs/scrub.c | |
parent | 5f5bc6b1e2d5a6f827bc860ef2dc5b6f365d1339 (diff) |
btrfs: fix dead lock while running replace and defrag concurrently
This can be reproduced by fstests: btrfs/070
The scenario is like the following:
replace worker thread defrag thread
--------------------- -------------
copy_nocow_pages_worker btrfs_defrag_file
copy_nocow_pages_for_inode ...
btrfs_writepages
|A| lock_extent_bits extent_write_cache_pages
|B| lock_page
__extent_writepage
... writepage_delalloc
find_lock_delalloc_range
|B| lock_extent_bits
find_or_create_page
pagecache_get_page
|A| lock_page
This leads to an ABBA pattern deadlock. To fix it,
o we just change it to an AABB pattern which means to @unlock_extent_bits()
before we @lock_page(), and in this way the @extent_read_full_page_nolock()
is no longer in an locked context, so change it back to @extent_read_full_page()
to regain protection.
o Since we @unlock_extent_bits() earlier, then before @write_page_nocow(),
the extent may not really point at the physical block we want, so we
have to check it before write.
Signed-off-by: Gui Hecheng <guihc.fnst@cn.fujitsu.com>
Tested-by: David Sterba <dsterba@suse.cz>
Signed-off-by: Chris Mason <clm@fb.com>
Diffstat (limited to 'fs/btrfs/scrub.c')
-rw-r--r-- | fs/btrfs/scrub.c | 90 |
1 files changed, 60 insertions, 30 deletions
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index efa083113827..4325bb0111d9 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c | |||
@@ -3310,6 +3310,50 @@ out: | |||
3310 | scrub_pending_trans_workers_dec(sctx); | 3310 | scrub_pending_trans_workers_dec(sctx); |
3311 | } | 3311 | } |
3312 | 3312 | ||
3313 | static int check_extent_to_block(struct inode *inode, u64 start, u64 len, | ||
3314 | u64 logical) | ||
3315 | { | ||
3316 | struct extent_state *cached_state = NULL; | ||
3317 | struct btrfs_ordered_extent *ordered; | ||
3318 | struct extent_io_tree *io_tree; | ||
3319 | struct extent_map *em; | ||
3320 | u64 lockstart = start, lockend = start + len - 1; | ||
3321 | int ret = 0; | ||
3322 | |||
3323 | io_tree = &BTRFS_I(inode)->io_tree; | ||
3324 | |||
3325 | lock_extent_bits(io_tree, lockstart, lockend, 0, &cached_state); | ||
3326 | ordered = btrfs_lookup_ordered_range(inode, lockstart, len); | ||
3327 | if (ordered) { | ||
3328 | btrfs_put_ordered_extent(ordered); | ||
3329 | ret = 1; | ||
3330 | goto out_unlock; | ||
3331 | } | ||
3332 | |||
3333 | em = btrfs_get_extent(inode, NULL, 0, start, len, 0); | ||
3334 | if (IS_ERR(em)) { | ||
3335 | ret = PTR_ERR(em); | ||
3336 | goto out_unlock; | ||
3337 | } | ||
3338 | |||
3339 | /* | ||
3340 | * This extent does not actually cover the logical extent anymore, | ||
3341 | * move on to the next inode. | ||
3342 | */ | ||
3343 | if (em->block_start > logical || | ||
3344 | em->block_start + em->block_len < logical + len) { | ||
3345 | free_extent_map(em); | ||
3346 | ret = 1; | ||
3347 | goto out_unlock; | ||
3348 | } | ||
3349 | free_extent_map(em); | ||
3350 | |||
3351 | out_unlock: | ||
3352 | unlock_extent_cached(io_tree, lockstart, lockend, &cached_state, | ||
3353 | GFP_NOFS); | ||
3354 | return ret; | ||
3355 | } | ||
3356 | |||
3313 | static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, | 3357 | static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, |
3314 | struct scrub_copy_nocow_ctx *nocow_ctx) | 3358 | struct scrub_copy_nocow_ctx *nocow_ctx) |
3315 | { | 3359 | { |
@@ -3318,13 +3362,10 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, | |||
3318 | struct inode *inode; | 3362 | struct inode *inode; |
3319 | struct page *page; | 3363 | struct page *page; |
3320 | struct btrfs_root *local_root; | 3364 | struct btrfs_root *local_root; |
3321 | struct btrfs_ordered_extent *ordered; | ||
3322 | struct extent_map *em; | ||
3323 | struct extent_state *cached_state = NULL; | ||
3324 | struct extent_io_tree *io_tree; | 3365 | struct extent_io_tree *io_tree; |
3325 | u64 physical_for_dev_replace; | 3366 | u64 physical_for_dev_replace; |
3367 | u64 nocow_ctx_logical; | ||
3326 | u64 len = nocow_ctx->len; | 3368 | u64 len = nocow_ctx->len; |
3327 | u64 lockstart = offset, lockend = offset + len - 1; | ||
3328 | unsigned long index; | 3369 | unsigned long index; |
3329 | int srcu_index; | 3370 | int srcu_index; |
3330 | int ret = 0; | 3371 | int ret = 0; |
@@ -3356,30 +3397,13 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, | |||
3356 | 3397 | ||
3357 | physical_for_dev_replace = nocow_ctx->physical_for_dev_replace; | 3398 | physical_for_dev_replace = nocow_ctx->physical_for_dev_replace; |
3358 | io_tree = &BTRFS_I(inode)->io_tree; | 3399 | io_tree = &BTRFS_I(inode)->io_tree; |
3400 | nocow_ctx_logical = nocow_ctx->logical; | ||
3359 | 3401 | ||
3360 | lock_extent_bits(io_tree, lockstart, lockend, 0, &cached_state); | 3402 | ret = check_extent_to_block(inode, offset, len, nocow_ctx_logical); |
3361 | ordered = btrfs_lookup_ordered_range(inode, lockstart, len); | 3403 | if (ret) { |
3362 | if (ordered) { | 3404 | ret = ret > 0 ? 0 : ret; |
3363 | btrfs_put_ordered_extent(ordered); | 3405 | goto out; |
3364 | goto out_unlock; | ||
3365 | } | ||
3366 | |||
3367 | em = btrfs_get_extent(inode, NULL, 0, lockstart, len, 0); | ||
3368 | if (IS_ERR(em)) { | ||
3369 | ret = PTR_ERR(em); | ||
3370 | goto out_unlock; | ||
3371 | } | ||
3372 | |||
3373 | /* | ||
3374 | * This extent does not actually cover the logical extent anymore, | ||
3375 | * move on to the next inode. | ||
3376 | */ | ||
3377 | if (em->block_start > nocow_ctx->logical || | ||
3378 | em->block_start + em->block_len < nocow_ctx->logical + len) { | ||
3379 | free_extent_map(em); | ||
3380 | goto out_unlock; | ||
3381 | } | 3406 | } |
3382 | free_extent_map(em); | ||
3383 | 3407 | ||
3384 | while (len >= PAGE_CACHE_SIZE) { | 3408 | while (len >= PAGE_CACHE_SIZE) { |
3385 | index = offset >> PAGE_CACHE_SHIFT; | 3409 | index = offset >> PAGE_CACHE_SHIFT; |
@@ -3396,7 +3420,7 @@ again: | |||
3396 | goto next_page; | 3420 | goto next_page; |
3397 | } else { | 3421 | } else { |
3398 | ClearPageError(page); | 3422 | ClearPageError(page); |
3399 | err = extent_read_full_page_nolock(io_tree, page, | 3423 | err = extent_read_full_page(io_tree, page, |
3400 | btrfs_get_extent, | 3424 | btrfs_get_extent, |
3401 | nocow_ctx->mirror_num); | 3425 | nocow_ctx->mirror_num); |
3402 | if (err) { | 3426 | if (err) { |
@@ -3421,6 +3445,14 @@ again: | |||
3421 | goto next_page; | 3445 | goto next_page; |
3422 | } | 3446 | } |
3423 | } | 3447 | } |
3448 | |||
3449 | ret = check_extent_to_block(inode, offset, len, | ||
3450 | nocow_ctx_logical); | ||
3451 | if (ret) { | ||
3452 | ret = ret > 0 ? 0 : ret; | ||
3453 | goto next_page; | ||
3454 | } | ||
3455 | |||
3424 | err = write_page_nocow(nocow_ctx->sctx, | 3456 | err = write_page_nocow(nocow_ctx->sctx, |
3425 | physical_for_dev_replace, page); | 3457 | physical_for_dev_replace, page); |
3426 | if (err) | 3458 | if (err) |
@@ -3434,12 +3466,10 @@ next_page: | |||
3434 | 3466 | ||
3435 | offset += PAGE_CACHE_SIZE; | 3467 | offset += PAGE_CACHE_SIZE; |
3436 | physical_for_dev_replace += PAGE_CACHE_SIZE; | 3468 | physical_for_dev_replace += PAGE_CACHE_SIZE; |
3469 | nocow_ctx_logical += PAGE_CACHE_SIZE; | ||
3437 | len -= PAGE_CACHE_SIZE; | 3470 | len -= PAGE_CACHE_SIZE; |
3438 | } | 3471 | } |
3439 | ret = COPY_COMPLETE; | 3472 | ret = COPY_COMPLETE; |
3440 | out_unlock: | ||
3441 | unlock_extent_cached(io_tree, lockstart, lockend, &cached_state, | ||
3442 | GFP_NOFS); | ||
3443 | out: | 3473 | out: |
3444 | mutex_unlock(&inode->i_mutex); | 3474 | mutex_unlock(&inode->i_mutex); |
3445 | iput(inode); | 3475 | iput(inode); |