diff options
author | Xin Zhong <xin.zhong@intel.com> | 2010-12-09 04:30:14 -0500 |
---|---|---|
committer | Chris Mason <chris.mason@oracle.com> | 2010-12-10 16:29:10 -0500 |
commit | 914ee295af418e936ec20a08c1663eaabe4cd07a (patch) | |
tree | 2e880ea913b8cbadf22b2288b5c278c2e5eadb60 /fs | |
parent | f106e82caaa0d943e47cacc184f5b40d538e0044 (diff) |
Btrfs: pwrite blocked when writing from the mmaped buffer of the same page
This problem is found in meego testing:
http://bugs.meego.com/show_bug.cgi?id=6672
A file in btrfs is mmaped and the mmaped buffer is passed to pwrite to write to the same page
of the same file. In btrfs_file_aio_write(), the pages is locked by prepare_pages(). So when
btrfs_copy_from_user() is called, page fault happens and the same page needs to be locked again
in filemap_fault(). The fix is to move iov_iter_fault_in_readable() before prepage_pages() to make page
fault happen before pages are locked. And also disable page fault in critical region in
btrfs_copy_from_user().
Reviewed-by: Yan, Zheng<zheng.z.yan@intel.com>
Signed-off-by: Zhong, Xin <xin.zhong@intel.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/btrfs/file.c | 92 |
1 files changed, 60 insertions, 32 deletions
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index c1faded5fca0..66836d85763b 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -48,30 +48,34 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages, | |||
48 | struct page **prepared_pages, | 48 | struct page **prepared_pages, |
49 | struct iov_iter *i) | 49 | struct iov_iter *i) |
50 | { | 50 | { |
51 | size_t copied; | 51 | size_t copied = 0; |
52 | int pg = 0; | 52 | int pg = 0; |
53 | int offset = pos & (PAGE_CACHE_SIZE - 1); | 53 | int offset = pos & (PAGE_CACHE_SIZE - 1); |
54 | int total_copied = 0; | ||
54 | 55 | ||
55 | while (write_bytes > 0) { | 56 | while (write_bytes > 0) { |
56 | size_t count = min_t(size_t, | 57 | size_t count = min_t(size_t, |
57 | PAGE_CACHE_SIZE - offset, write_bytes); | 58 | PAGE_CACHE_SIZE - offset, write_bytes); |
58 | struct page *page = prepared_pages[pg]; | 59 | struct page *page = prepared_pages[pg]; |
59 | again: | 60 | /* |
60 | if (unlikely(iov_iter_fault_in_readable(i, count))) | 61 | * Copy data from userspace to the current page |
61 | return -EFAULT; | 62 | * |
62 | 63 | * Disable pagefault to avoid recursive lock since | |
63 | /* Copy data from userspace to the current page */ | 64 | * the pages are already locked |
64 | copied = iov_iter_copy_from_user(page, i, offset, count); | 65 | */ |
66 | pagefault_disable(); | ||
67 | copied = iov_iter_copy_from_user_atomic(page, i, offset, count); | ||
68 | pagefault_enable(); | ||
65 | 69 | ||
66 | /* Flush processor's dcache for this page */ | 70 | /* Flush processor's dcache for this page */ |
67 | flush_dcache_page(page); | 71 | flush_dcache_page(page); |
68 | iov_iter_advance(i, copied); | 72 | iov_iter_advance(i, copied); |
69 | write_bytes -= copied; | 73 | write_bytes -= copied; |
74 | total_copied += copied; | ||
70 | 75 | ||
76 | /* Return to btrfs_file_aio_write to fault page */ | ||
71 | if (unlikely(copied == 0)) { | 77 | if (unlikely(copied == 0)) { |
72 | count = min_t(size_t, PAGE_CACHE_SIZE - offset, | 78 | break; |
73 | iov_iter_single_seg_count(i)); | ||
74 | goto again; | ||
75 | } | 79 | } |
76 | 80 | ||
77 | if (unlikely(copied < PAGE_CACHE_SIZE - offset)) { | 81 | if (unlikely(copied < PAGE_CACHE_SIZE - offset)) { |
@@ -81,7 +85,7 @@ again: | |||
81 | offset = 0; | 85 | offset = 0; |
82 | } | 86 | } |
83 | } | 87 | } |
84 | return 0; | 88 | return total_copied; |
85 | } | 89 | } |
86 | 90 | ||
87 | /* | 91 | /* |
@@ -854,6 +858,8 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | |||
854 | unsigned long last_index; | 858 | unsigned long last_index; |
855 | int will_write; | 859 | int will_write; |
856 | int buffered = 0; | 860 | int buffered = 0; |
861 | int copied = 0; | ||
862 | int dirty_pages = 0; | ||
857 | 863 | ||
858 | will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) || | 864 | will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) || |
859 | (file->f_flags & O_DIRECT)); | 865 | (file->f_flags & O_DIRECT)); |
@@ -970,7 +976,17 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | |||
970 | WARN_ON(num_pages > nrptrs); | 976 | WARN_ON(num_pages > nrptrs); |
971 | memset(pages, 0, sizeof(struct page *) * nrptrs); | 977 | memset(pages, 0, sizeof(struct page *) * nrptrs); |
972 | 978 | ||
973 | ret = btrfs_delalloc_reserve_space(inode, write_bytes); | 979 | /* |
980 | * Fault pages before locking them in prepare_pages | ||
981 | * to avoid recursive lock | ||
982 | */ | ||
983 | if (unlikely(iov_iter_fault_in_readable(&i, write_bytes))) { | ||
984 | ret = -EFAULT; | ||
985 | goto out; | ||
986 | } | ||
987 | |||
988 | ret = btrfs_delalloc_reserve_space(inode, | ||
989 | num_pages << PAGE_CACHE_SHIFT); | ||
974 | if (ret) | 990 | if (ret) |
975 | goto out; | 991 | goto out; |
976 | 992 | ||
@@ -978,37 +994,49 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | |||
978 | pos, first_index, last_index, | 994 | pos, first_index, last_index, |
979 | write_bytes); | 995 | write_bytes); |
980 | if (ret) { | 996 | if (ret) { |
981 | btrfs_delalloc_release_space(inode, write_bytes); | 997 | btrfs_delalloc_release_space(inode, |
998 | num_pages << PAGE_CACHE_SHIFT); | ||
982 | goto out; | 999 | goto out; |
983 | } | 1000 | } |
984 | 1001 | ||
985 | ret = btrfs_copy_from_user(pos, num_pages, | 1002 | copied = btrfs_copy_from_user(pos, num_pages, |
986 | write_bytes, pages, &i); | 1003 | write_bytes, pages, &i); |
987 | if (ret == 0) { | 1004 | dirty_pages = (copied + PAGE_CACHE_SIZE - 1) >> |
1005 | PAGE_CACHE_SHIFT; | ||
1006 | |||
1007 | if (num_pages > dirty_pages) { | ||
1008 | if (copied > 0) | ||
1009 | atomic_inc( | ||
1010 | &BTRFS_I(inode)->outstanding_extents); | ||
1011 | btrfs_delalloc_release_space(inode, | ||
1012 | (num_pages - dirty_pages) << | ||
1013 | PAGE_CACHE_SHIFT); | ||
1014 | } | ||
1015 | |||
1016 | if (copied > 0) { | ||
988 | dirty_and_release_pages(NULL, root, file, pages, | 1017 | dirty_and_release_pages(NULL, root, file, pages, |
989 | num_pages, pos, write_bytes); | 1018 | dirty_pages, pos, copied); |
990 | } | 1019 | } |
991 | 1020 | ||
992 | btrfs_drop_pages(pages, num_pages); | 1021 | btrfs_drop_pages(pages, num_pages); |
993 | if (ret) { | ||
994 | btrfs_delalloc_release_space(inode, write_bytes); | ||
995 | goto out; | ||
996 | } | ||
997 | 1022 | ||
998 | if (will_write) { | 1023 | if (copied > 0) { |
999 | filemap_fdatawrite_range(inode->i_mapping, pos, | 1024 | if (will_write) { |
1000 | pos + write_bytes - 1); | 1025 | filemap_fdatawrite_range(inode->i_mapping, pos, |
1001 | } else { | 1026 | pos + copied - 1); |
1002 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, | 1027 | } else { |
1003 | num_pages); | 1028 | balance_dirty_pages_ratelimited_nr( |
1004 | if (num_pages < | 1029 | inode->i_mapping, |
1005 | (root->leafsize >> PAGE_CACHE_SHIFT) + 1) | 1030 | dirty_pages); |
1006 | btrfs_btree_balance_dirty(root, 1); | 1031 | if (dirty_pages < |
1007 | btrfs_throttle(root); | 1032 | (root->leafsize >> PAGE_CACHE_SHIFT) + 1) |
1033 | btrfs_btree_balance_dirty(root, 1); | ||
1034 | btrfs_throttle(root); | ||
1035 | } | ||
1008 | } | 1036 | } |
1009 | 1037 | ||
1010 | pos += write_bytes; | 1038 | pos += copied; |
1011 | num_written += write_bytes; | 1039 | num_written += copied; |
1012 | 1040 | ||
1013 | cond_resched(); | 1041 | cond_resched(); |
1014 | } | 1042 | } |