diff options
author | Josef Bacik <josef@redhat.com> | 2011-09-30 15:23:54 -0400 |
---|---|---|
committer | Chris Mason <chris.mason@oracle.com> | 2011-09-30 15:23:54 -0400 |
commit | b6316429af7f365f307dfd2b6a7a42f2563aef19 (patch) | |
tree | f445576e448d3287d56650775bb7d0bc5a7d37c7 /fs/btrfs | |
parent | b6f3409b2197e8fcedb43e6600e37b7cfbe0715b (diff) |
Btrfs: force a page fault if we have a shorty copy on a page boundary
A user reported a problem where ceph was getting into 100% cpu usage while doing
some writing. It turns out it's because we were doing a short write on a not
uptodate page, which means we'd fall back at one page at a time and fault the
page in. The problem is our position is on the page boundary, so our fault in
logic wasn't actually reading the page, so we'd just spin forever or until the
page got read in by somebody else. This will force a readpage if we end up
doing a short copy. Alexandre could reproduce this easily with ceph and reports
it fixes his problem. I also wrote a reproducer that no longer hangs my box
with this patch. Thanks,
Reported-and-tested-by: Alexandre Oliva <aoliva@redhat.com>
Signed-off-by: Josef Bacik <josef@redhat.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs')
-rw-r--r-- | fs/btrfs/file.c | 24 |
1 files changed, 16 insertions, 8 deletions
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 98d95bb5f253..e73051099368 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -1036,11 +1036,13 @@ out: | |||
1036 | * on error we return an unlocked page and the error value | 1036 | * on error we return an unlocked page and the error value |
1037 | * on success we return a locked page and 0 | 1037 | * on success we return a locked page and 0 |
1038 | */ | 1038 | */ |
1039 | static int prepare_uptodate_page(struct page *page, u64 pos) | 1039 | static int prepare_uptodate_page(struct page *page, u64 pos, |
1040 | bool force_uptodate) | ||
1040 | { | 1041 | { |
1041 | int ret = 0; | 1042 | int ret = 0; |
1042 | 1043 | ||
1043 | if ((pos & (PAGE_CACHE_SIZE - 1)) && !PageUptodate(page)) { | 1044 | if (((pos & (PAGE_CACHE_SIZE - 1)) || force_uptodate) && |
1045 | !PageUptodate(page)) { | ||
1044 | ret = btrfs_readpage(NULL, page); | 1046 | ret = btrfs_readpage(NULL, page); |
1045 | if (ret) | 1047 | if (ret) |
1046 | return ret; | 1048 | return ret; |
@@ -1061,7 +1063,7 @@ static int prepare_uptodate_page(struct page *page, u64 pos) | |||
1061 | static noinline int prepare_pages(struct btrfs_root *root, struct file *file, | 1063 | static noinline int prepare_pages(struct btrfs_root *root, struct file *file, |
1062 | struct page **pages, size_t num_pages, | 1064 | struct page **pages, size_t num_pages, |
1063 | loff_t pos, unsigned long first_index, | 1065 | loff_t pos, unsigned long first_index, |
1064 | size_t write_bytes) | 1066 | size_t write_bytes, bool force_uptodate) |
1065 | { | 1067 | { |
1066 | struct extent_state *cached_state = NULL; | 1068 | struct extent_state *cached_state = NULL; |
1067 | int i; | 1069 | int i; |
@@ -1086,10 +1088,11 @@ again: | |||
1086 | } | 1088 | } |
1087 | 1089 | ||
1088 | if (i == 0) | 1090 | if (i == 0) |
1089 | err = prepare_uptodate_page(pages[i], pos); | 1091 | err = prepare_uptodate_page(pages[i], pos, |
1092 | force_uptodate); | ||
1090 | if (i == num_pages - 1) | 1093 | if (i == num_pages - 1) |
1091 | err = prepare_uptodate_page(pages[i], | 1094 | err = prepare_uptodate_page(pages[i], |
1092 | pos + write_bytes); | 1095 | pos + write_bytes, false); |
1093 | if (err) { | 1096 | if (err) { |
1094 | page_cache_release(pages[i]); | 1097 | page_cache_release(pages[i]); |
1095 | faili = i - 1; | 1098 | faili = i - 1; |
@@ -1158,6 +1161,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, | |||
1158 | size_t num_written = 0; | 1161 | size_t num_written = 0; |
1159 | int nrptrs; | 1162 | int nrptrs; |
1160 | int ret = 0; | 1163 | int ret = 0; |
1164 | bool force_page_uptodate = false; | ||
1161 | 1165 | ||
1162 | nrptrs = min((iov_iter_count(i) + PAGE_CACHE_SIZE - 1) / | 1166 | nrptrs = min((iov_iter_count(i) + PAGE_CACHE_SIZE - 1) / |
1163 | PAGE_CACHE_SIZE, PAGE_CACHE_SIZE / | 1167 | PAGE_CACHE_SIZE, PAGE_CACHE_SIZE / |
@@ -1200,7 +1204,8 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, | |||
1200 | * contents of pages from loop to loop | 1204 | * contents of pages from loop to loop |
1201 | */ | 1205 | */ |
1202 | ret = prepare_pages(root, file, pages, num_pages, | 1206 | ret = prepare_pages(root, file, pages, num_pages, |
1203 | pos, first_index, write_bytes); | 1207 | pos, first_index, write_bytes, |
1208 | force_page_uptodate); | ||
1204 | if (ret) { | 1209 | if (ret) { |
1205 | btrfs_delalloc_release_space(inode, | 1210 | btrfs_delalloc_release_space(inode, |
1206 | num_pages << PAGE_CACHE_SHIFT); | 1211 | num_pages << PAGE_CACHE_SHIFT); |
@@ -1217,12 +1222,15 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, | |||
1217 | if (copied < write_bytes) | 1222 | if (copied < write_bytes) |
1218 | nrptrs = 1; | 1223 | nrptrs = 1; |
1219 | 1224 | ||
1220 | if (copied == 0) | 1225 | if (copied == 0) { |
1226 | force_page_uptodate = true; | ||
1221 | dirty_pages = 0; | 1227 | dirty_pages = 0; |
1222 | else | 1228 | } else { |
1229 | force_page_uptodate = false; | ||
1223 | dirty_pages = (copied + offset + | 1230 | dirty_pages = (copied + offset + |
1224 | PAGE_CACHE_SIZE - 1) >> | 1231 | PAGE_CACHE_SIZE - 1) >> |
1225 | PAGE_CACHE_SHIFT; | 1232 | PAGE_CACHE_SHIFT; |
1233 | } | ||
1226 | 1234 | ||
1227 | /* | 1235 | /* |
1228 | * If we had a short copy we need to release the excess delaloc | 1236 | * If we had a short copy we need to release the excess delaloc |