aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorMiao Xie <miaox@cn.fujitsu.com>2012-02-16 02:01:24 -0500
committerDavid Sterba <dsterba@suse.cz>2012-02-16 11:23:16 -0500
commit600a45e1d5e376f679ff9ecc4ce9452710a6d27c (patch)
treec4bf84eee1db132b5ff18700b95da8426a909f2f /fs
parent013bd4c336ad0d30e9e41f9cff0dbc1858934e75 (diff)
Btrfs: fix deadlock on page lock when doing auto-defragment
When I ran xfstests circularly on a auto-defragment btrfs, the deadlock happened. Steps to reproduce: [tty0] # export MOUNT_OPTIONS="-o autodefrag" # export TEST_DEV=<partition1> # export TEST_DIR=<mountpoint1> # export SCRATCH_DEV=<partition2> # export SCRATCH_MNT=<mountpoint2> # while [ 1 ] > do > ./check 091 127 263 > sleep 1 > done [tty1] # while [ 1 ] > do > echo 3 > /proc/sys/vm/drop_caches > done Several hours later, the test processes will hang on, and the deadlock will happen on page lock. The reason is that: Auto defrag task Flush thread Test task btrfs_writepages() add ordered extent (including page 1, 2) set page 1 writeback set page 2 writeback endio_fn() end page 2 writeback release page 2 lock page 1 alloc and lock page 2 page 2 is not uptodate btrfs_readpage() start ordered extent() btrfs_writepages() try to lock page 1 so deadlock happens. Fix this bug by unlocking the page which is in writeback, and re-locking it after the writeback end. Signed-off-by: Miao Xie <miax@cn.fujitsu.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/ioctl.c53
1 files changed, 29 insertions, 24 deletions
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 0b06a5ca8afc..e9bdb8b783e5 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -862,6 +862,7 @@ static int cluster_pages_for_defrag(struct inode *inode,
862 int i_done; 862 int i_done;
863 struct btrfs_ordered_extent *ordered; 863 struct btrfs_ordered_extent *ordered;
864 struct extent_state *cached_state = NULL; 864 struct extent_state *cached_state = NULL;
865 struct extent_io_tree *tree;
865 gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); 866 gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
866 867
867 if (isize == 0) 868 if (isize == 0)
@@ -872,18 +873,34 @@ static int cluster_pages_for_defrag(struct inode *inode,
872 num_pages << PAGE_CACHE_SHIFT); 873 num_pages << PAGE_CACHE_SHIFT);
873 if (ret) 874 if (ret)
874 return ret; 875 return ret;
875again:
876 ret = 0;
877 i_done = 0; 876 i_done = 0;
877 tree = &BTRFS_I(inode)->io_tree;
878 878
879 /* step one, lock all the pages */ 879 /* step one, lock all the pages */
880 for (i = 0; i < num_pages; i++) { 880 for (i = 0; i < num_pages; i++) {
881 struct page *page; 881 struct page *page;
882again:
882 page = find_or_create_page(inode->i_mapping, 883 page = find_or_create_page(inode->i_mapping,
883 start_index + i, mask); 884 start_index + i, mask);
884 if (!page) 885 if (!page)
885 break; 886 break;
886 887
888 page_start = page_offset(page);
889 page_end = page_start + PAGE_CACHE_SIZE - 1;
890 while (1) {
891 lock_extent(tree, page_start, page_end, GFP_NOFS);
892 ordered = btrfs_lookup_ordered_extent(inode,
893 page_start);
894 unlock_extent(tree, page_start, page_end, GFP_NOFS);
895 if (!ordered)
896 break;
897
898 unlock_page(page);
899 btrfs_start_ordered_extent(inode, ordered, 1);
900 btrfs_put_ordered_extent(ordered);
901 lock_page(page);
902 }
903
887 if (!PageUptodate(page)) { 904 if (!PageUptodate(page)) {
888 btrfs_readpage(NULL, page); 905 btrfs_readpage(NULL, page);
889 lock_page(page); 906 lock_page(page);
@@ -894,15 +911,22 @@ again:
894 break; 911 break;
895 } 912 }
896 } 913 }
914
897 isize = i_size_read(inode); 915 isize = i_size_read(inode);
898 file_end = (isize - 1) >> PAGE_CACHE_SHIFT; 916 file_end = (isize - 1) >> PAGE_CACHE_SHIFT;
899 if (!isize || page->index > file_end || 917 if (!isize || page->index > file_end) {
900 page->mapping != inode->i_mapping) {
901 /* whoops, we blew past eof, skip this page */ 918 /* whoops, we blew past eof, skip this page */
902 unlock_page(page); 919 unlock_page(page);
903 page_cache_release(page); 920 page_cache_release(page);
904 break; 921 break;
905 } 922 }
923
924 if (page->mapping != inode->i_mapping) {
925 unlock_page(page);
926 page_cache_release(page);
927 goto again;
928 }
929
906 pages[i] = page; 930 pages[i] = page;
907 i_done++; 931 i_done++;
908 } 932 }
@@ -925,25 +949,6 @@ again:
925 lock_extent_bits(&BTRFS_I(inode)->io_tree, 949 lock_extent_bits(&BTRFS_I(inode)->io_tree,
926 page_start, page_end - 1, 0, &cached_state, 950 page_start, page_end - 1, 0, &cached_state,
927 GFP_NOFS); 951 GFP_NOFS);
928 ordered = btrfs_lookup_first_ordered_extent(inode, page_end - 1);
929 if (ordered &&
930 ordered->file_offset + ordered->len > page_start &&
931 ordered->file_offset < page_end) {
932 btrfs_put_ordered_extent(ordered);
933 unlock_extent_cached(&BTRFS_I(inode)->io_tree,
934 page_start, page_end - 1,
935 &cached_state, GFP_NOFS);
936 for (i = 0; i < i_done; i++) {
937 unlock_page(pages[i]);
938 page_cache_release(pages[i]);
939 }
940 btrfs_wait_ordered_range(inode, page_start,
941 page_end - page_start);
942 goto again;
943 }
944 if (ordered)
945 btrfs_put_ordered_extent(ordered);
946
947 clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, 952 clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start,
948 page_end - 1, EXTENT_DIRTY | EXTENT_DELALLOC | 953 page_end - 1, EXTENT_DIRTY | EXTENT_DELALLOC |
949 EXTENT_DO_ACCOUNTING, 0, 0, &cached_state, 954 EXTENT_DO_ACCOUNTING, 0, 0, &cached_state,