aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/file.c
diff options
context:
space:
mode:
authorJosef Bacik <josef@redhat.com>2011-09-30 15:23:54 -0400
committerChris Mason <chris.mason@oracle.com>2011-09-30 15:23:54 -0400
commitb6316429af7f365f307dfd2b6a7a42f2563aef19 (patch)
treef445576e448d3287d56650775bb7d0bc5a7d37c7 /fs/btrfs/file.c
parentb6f3409b2197e8fcedb43e6600e37b7cfbe0715b (diff)
Btrfs: force a page fault if we have a shorty copy on a page boundary
A user reported a problem where ceph was getting into 100% cpu usage while doing some writing. It turns out it's because we were doing a short write on a not uptodate page, which means we'd fall back at one page at a time and fault the page in. The problem is our position is on the page boundary, so our fault in logic wasn't actually reading the page, so we'd just spin forever or until the page got read in by somebody else. This will force a readpage if we end up doing a short copy. Alexandre could reproduce this easily with ceph and reports it fixes his problem. I also wrote a reproducer that no longer hangs my box with this patch. Thanks, Reported-and-tested-by: Alexandre Oliva <aoliva@redhat.com> Signed-off-by: Josef Bacik <josef@redhat.com> Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/file.c')
-rw-r--r--fs/btrfs/file.c24
1 files changed, 16 insertions, 8 deletions
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 98d95bb5f253..e73051099368 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1036,11 +1036,13 @@ out:
1036 * on error we return an unlocked page and the error value 1036 * on error we return an unlocked page and the error value
1037 * on success we return a locked page and 0 1037 * on success we return a locked page and 0
1038 */ 1038 */
1039static int prepare_uptodate_page(struct page *page, u64 pos) 1039static int prepare_uptodate_page(struct page *page, u64 pos,
1040 bool force_uptodate)
1040{ 1041{
1041 int ret = 0; 1042 int ret = 0;
1042 1043
1043 if ((pos & (PAGE_CACHE_SIZE - 1)) && !PageUptodate(page)) { 1044 if (((pos & (PAGE_CACHE_SIZE - 1)) || force_uptodate) &&
1045 !PageUptodate(page)) {
1044 ret = btrfs_readpage(NULL, page); 1046 ret = btrfs_readpage(NULL, page);
1045 if (ret) 1047 if (ret)
1046 return ret; 1048 return ret;
@@ -1061,7 +1063,7 @@ static int prepare_uptodate_page(struct page *page, u64 pos)
1061static noinline int prepare_pages(struct btrfs_root *root, struct file *file, 1063static noinline int prepare_pages(struct btrfs_root *root, struct file *file,
1062 struct page **pages, size_t num_pages, 1064 struct page **pages, size_t num_pages,
1063 loff_t pos, unsigned long first_index, 1065 loff_t pos, unsigned long first_index,
1064 size_t write_bytes) 1066 size_t write_bytes, bool force_uptodate)
1065{ 1067{
1066 struct extent_state *cached_state = NULL; 1068 struct extent_state *cached_state = NULL;
1067 int i; 1069 int i;
@@ -1086,10 +1088,11 @@ again:
1086 } 1088 }
1087 1089
1088 if (i == 0) 1090 if (i == 0)
1089 err = prepare_uptodate_page(pages[i], pos); 1091 err = prepare_uptodate_page(pages[i], pos,
1092 force_uptodate);
1090 if (i == num_pages - 1) 1093 if (i == num_pages - 1)
1091 err = prepare_uptodate_page(pages[i], 1094 err = prepare_uptodate_page(pages[i],
1092 pos + write_bytes); 1095 pos + write_bytes, false);
1093 if (err) { 1096 if (err) {
1094 page_cache_release(pages[i]); 1097 page_cache_release(pages[i]);
1095 faili = i - 1; 1098 faili = i - 1;
@@ -1158,6 +1161,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
1158 size_t num_written = 0; 1161 size_t num_written = 0;
1159 int nrptrs; 1162 int nrptrs;
1160 int ret = 0; 1163 int ret = 0;
1164 bool force_page_uptodate = false;
1161 1165
1162 nrptrs = min((iov_iter_count(i) + PAGE_CACHE_SIZE - 1) / 1166 nrptrs = min((iov_iter_count(i) + PAGE_CACHE_SIZE - 1) /
1163 PAGE_CACHE_SIZE, PAGE_CACHE_SIZE / 1167 PAGE_CACHE_SIZE, PAGE_CACHE_SIZE /
@@ -1200,7 +1204,8 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
1200 * contents of pages from loop to loop 1204 * contents of pages from loop to loop
1201 */ 1205 */
1202 ret = prepare_pages(root, file, pages, num_pages, 1206 ret = prepare_pages(root, file, pages, num_pages,
1203 pos, first_index, write_bytes); 1207 pos, first_index, write_bytes,
1208 force_page_uptodate);
1204 if (ret) { 1209 if (ret) {
1205 btrfs_delalloc_release_space(inode, 1210 btrfs_delalloc_release_space(inode,
1206 num_pages << PAGE_CACHE_SHIFT); 1211 num_pages << PAGE_CACHE_SHIFT);
@@ -1217,12 +1222,15 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
1217 if (copied < write_bytes) 1222 if (copied < write_bytes)
1218 nrptrs = 1; 1223 nrptrs = 1;
1219 1224
1220 if (copied == 0) 1225 if (copied == 0) {
1226 force_page_uptodate = true;
1221 dirty_pages = 0; 1227 dirty_pages = 0;
1222 else 1228 } else {
1229 force_page_uptodate = false;
1223 dirty_pages = (copied + offset + 1230 dirty_pages = (copied + offset +
1224 PAGE_CACHE_SIZE - 1) >> 1231 PAGE_CACHE_SIZE - 1) >>
1225 PAGE_CACHE_SHIFT; 1232 PAGE_CACHE_SHIFT;
1233 }
1226 1234
1227 /* 1235 /*
1228 * If we had a short copy we need to release the excess delaloc 1236 * If we had a short copy we need to release the excess delaloc