diff options
author | Miao Xie <miaox@cn.fujitsu.com> | 2012-02-16 02:01:24 -0500 |
---|---|---|
committer | David Sterba <dsterba@suse.cz> | 2012-02-16 11:23:16 -0500 |
commit | 600a45e1d5e376f679ff9ecc4ce9452710a6d27c (patch) | |
tree | c4bf84eee1db132b5ff18700b95da8426a909f2f /fs | |
parent | 013bd4c336ad0d30e9e41f9cff0dbc1858934e75 (diff) |
Btrfs: fix deadlock on page lock when doing auto-defragment
When I ran xfstests circularly on a auto-defragment btrfs, the deadlock
happened.
Steps to reproduce:
[tty0]
# export MOUNT_OPTIONS="-o autodefrag"
# export TEST_DEV=<partition1>
# export TEST_DIR=<mountpoint1>
# export SCRATCH_DEV=<partition2>
# export SCRATCH_MNT=<mountpoint2>
# while [ 1 ]
> do
> ./check 091 127 263
> sleep 1
> done
[tty1]
# while [ 1 ]
> do
> echo 3 > /proc/sys/vm/drop_caches
> done
Several hours later, the test processes will hang on, and the deadlock will
happen on page lock.
The reason is that:
Auto defrag task Flush thread Test task
btrfs_writepages()
add ordered extent
(including page 1, 2)
set page 1 writeback
set page 2 writeback
endio_fn()
end page 2 writeback
release page 2
lock page 1
alloc and lock page 2
page 2 is not uptodate
btrfs_readpage()
start ordered extent()
btrfs_writepages()
try to lock page 1
so deadlock happens.
Fix this bug by unlocking the page which is in writeback, and re-locking it
after the writeback end.
Signed-off-by: Miao Xie <miax@cn.fujitsu.com>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/btrfs/ioctl.c | 53 |
1 files changed, 29 insertions, 24 deletions
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 0b06a5ca8afc..e9bdb8b783e5 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
@@ -862,6 +862,7 @@ static int cluster_pages_for_defrag(struct inode *inode, | |||
862 | int i_done; | 862 | int i_done; |
863 | struct btrfs_ordered_extent *ordered; | 863 | struct btrfs_ordered_extent *ordered; |
864 | struct extent_state *cached_state = NULL; | 864 | struct extent_state *cached_state = NULL; |
865 | struct extent_io_tree *tree; | ||
865 | gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); | 866 | gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); |
866 | 867 | ||
867 | if (isize == 0) | 868 | if (isize == 0) |
@@ -872,18 +873,34 @@ static int cluster_pages_for_defrag(struct inode *inode, | |||
872 | num_pages << PAGE_CACHE_SHIFT); | 873 | num_pages << PAGE_CACHE_SHIFT); |
873 | if (ret) | 874 | if (ret) |
874 | return ret; | 875 | return ret; |
875 | again: | ||
876 | ret = 0; | ||
877 | i_done = 0; | 876 | i_done = 0; |
877 | tree = &BTRFS_I(inode)->io_tree; | ||
878 | 878 | ||
879 | /* step one, lock all the pages */ | 879 | /* step one, lock all the pages */ |
880 | for (i = 0; i < num_pages; i++) { | 880 | for (i = 0; i < num_pages; i++) { |
881 | struct page *page; | 881 | struct page *page; |
882 | again: | ||
882 | page = find_or_create_page(inode->i_mapping, | 883 | page = find_or_create_page(inode->i_mapping, |
883 | start_index + i, mask); | 884 | start_index + i, mask); |
884 | if (!page) | 885 | if (!page) |
885 | break; | 886 | break; |
886 | 887 | ||
888 | page_start = page_offset(page); | ||
889 | page_end = page_start + PAGE_CACHE_SIZE - 1; | ||
890 | while (1) { | ||
891 | lock_extent(tree, page_start, page_end, GFP_NOFS); | ||
892 | ordered = btrfs_lookup_ordered_extent(inode, | ||
893 | page_start); | ||
894 | unlock_extent(tree, page_start, page_end, GFP_NOFS); | ||
895 | if (!ordered) | ||
896 | break; | ||
897 | |||
898 | unlock_page(page); | ||
899 | btrfs_start_ordered_extent(inode, ordered, 1); | ||
900 | btrfs_put_ordered_extent(ordered); | ||
901 | lock_page(page); | ||
902 | } | ||
903 | |||
887 | if (!PageUptodate(page)) { | 904 | if (!PageUptodate(page)) { |
888 | btrfs_readpage(NULL, page); | 905 | btrfs_readpage(NULL, page); |
889 | lock_page(page); | 906 | lock_page(page); |
@@ -894,15 +911,22 @@ again: | |||
894 | break; | 911 | break; |
895 | } | 912 | } |
896 | } | 913 | } |
914 | |||
897 | isize = i_size_read(inode); | 915 | isize = i_size_read(inode); |
898 | file_end = (isize - 1) >> PAGE_CACHE_SHIFT; | 916 | file_end = (isize - 1) >> PAGE_CACHE_SHIFT; |
899 | if (!isize || page->index > file_end || | 917 | if (!isize || page->index > file_end) { |
900 | page->mapping != inode->i_mapping) { | ||
901 | /* whoops, we blew past eof, skip this page */ | 918 | /* whoops, we blew past eof, skip this page */ |
902 | unlock_page(page); | 919 | unlock_page(page); |
903 | page_cache_release(page); | 920 | page_cache_release(page); |
904 | break; | 921 | break; |
905 | } | 922 | } |
923 | |||
924 | if (page->mapping != inode->i_mapping) { | ||
925 | unlock_page(page); | ||
926 | page_cache_release(page); | ||
927 | goto again; | ||
928 | } | ||
929 | |||
906 | pages[i] = page; | 930 | pages[i] = page; |
907 | i_done++; | 931 | i_done++; |
908 | } | 932 | } |
@@ -925,25 +949,6 @@ again: | |||
925 | lock_extent_bits(&BTRFS_I(inode)->io_tree, | 949 | lock_extent_bits(&BTRFS_I(inode)->io_tree, |
926 | page_start, page_end - 1, 0, &cached_state, | 950 | page_start, page_end - 1, 0, &cached_state, |
927 | GFP_NOFS); | 951 | GFP_NOFS); |
928 | ordered = btrfs_lookup_first_ordered_extent(inode, page_end - 1); | ||
929 | if (ordered && | ||
930 | ordered->file_offset + ordered->len > page_start && | ||
931 | ordered->file_offset < page_end) { | ||
932 | btrfs_put_ordered_extent(ordered); | ||
933 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, | ||
934 | page_start, page_end - 1, | ||
935 | &cached_state, GFP_NOFS); | ||
936 | for (i = 0; i < i_done; i++) { | ||
937 | unlock_page(pages[i]); | ||
938 | page_cache_release(pages[i]); | ||
939 | } | ||
940 | btrfs_wait_ordered_range(inode, page_start, | ||
941 | page_end - page_start); | ||
942 | goto again; | ||
943 | } | ||
944 | if (ordered) | ||
945 | btrfs_put_ordered_extent(ordered); | ||
946 | |||
947 | clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, | 952 | clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, |
948 | page_end - 1, EXTENT_DIRTY | EXTENT_DELALLOC | | 953 | page_end - 1, EXTENT_DIRTY | EXTENT_DELALLOC | |
949 | EXTENT_DO_ACCOUNTING, 0, 0, &cached_state, | 954 | EXTENT_DO_ACCOUNTING, 0, 0, &cached_state, |