aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorChris Mason <chris.mason@fusionio.com>2013-03-26 13:07:00 -0400
committerChris Mason <chris.mason@fusionio.com>2013-03-26 13:19:14 -0400
commit4adaa611020fa6ac65b0ac8db78276af4ec04e63 (patch)
treeb478ee3f9a16714521f5ced528ff2ce7afd71b8e /fs
parent1dd05682b3ef6e70409e130bfd83e91770801589 (diff)
Btrfs: fix race between mmap writes and compression
Btrfs uses page_mkwrite to ensure stable pages during crc calculations and mmap workloads. We call clear_page_dirty_for_io before we do any crcs, and this forces any application with the file mapped to wait for the crc to finish before it is allowed to change the file. With compression on, the clear_page_dirty_for_io step is happening after we've compressed the pages. This means the applications might be changing the pages while we are compressing them, and some of those modifications might not hit the disk. This commit adds the clear_page_dirty_for_io before compression starts and makes sure to redirty the page if we have to fallback to uncompressed IO as well. Signed-off-by: Chris Mason <chris.mason@fusionio.com> Reported-by: Alexandre Oliva <oliva@gnu.org> cc: stable@vger.kernel.org
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/extent_io.c33
-rw-r--r--fs/btrfs/extent_io.h2
-rw-r--r--fs/btrfs/inode.c14
3 files changed, 49 insertions, 0 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index f173c5af6461..cdee391fc7bf 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1257,6 +1257,39 @@ int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end)
1257 GFP_NOFS); 1257 GFP_NOFS);
1258} 1258}
1259 1259
1260int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end)
1261{
1262 unsigned long index = start >> PAGE_CACHE_SHIFT;
1263 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1264 struct page *page;
1265
1266 while (index <= end_index) {
1267 page = find_get_page(inode->i_mapping, index);
1268 BUG_ON(!page); /* Pages should be in the extent_io_tree */
1269 clear_page_dirty_for_io(page);
1270 page_cache_release(page);
1271 index++;
1272 }
1273 return 0;
1274}
1275
1276int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end)
1277{
1278 unsigned long index = start >> PAGE_CACHE_SHIFT;
1279 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1280 struct page *page;
1281
1282 while (index <= end_index) {
1283 page = find_get_page(inode->i_mapping, index);
1284 BUG_ON(!page); /* Pages should be in the extent_io_tree */
1285 account_page_redirty(page);
1286 __set_page_dirty_nobuffers(page);
1287 page_cache_release(page);
1288 index++;
1289 }
1290 return 0;
1291}
1292
1260/* 1293/*
1261 * helper function to set both pages and extents in the tree writeback 1294 * helper function to set both pages and extents in the tree writeback
1262 */ 1295 */
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 6068a1985560..258c92156857 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -325,6 +325,8 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset,
325 unsigned long *map_len); 325 unsigned long *map_len);
326int extent_range_uptodate(struct extent_io_tree *tree, 326int extent_range_uptodate(struct extent_io_tree *tree,
327 u64 start, u64 end); 327 u64 start, u64 end);
328int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end);
329int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end);
328int extent_clear_unlock_delalloc(struct inode *inode, 330int extent_clear_unlock_delalloc(struct inode *inode,
329 struct extent_io_tree *tree, 331 struct extent_io_tree *tree,
330 u64 start, u64 end, struct page *locked_page, 332 u64 start, u64 end, struct page *locked_page,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 1f26888825e2..6a6e13c53086 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -353,6 +353,7 @@ static noinline int compress_file_range(struct inode *inode,
353 int i; 353 int i;
354 int will_compress; 354 int will_compress;
355 int compress_type = root->fs_info->compress_type; 355 int compress_type = root->fs_info->compress_type;
356 int redirty = 0;
356 357
357 /* if this is a small write inside eof, kick off a defrag */ 358 /* if this is a small write inside eof, kick off a defrag */
358 if ((end - start + 1) < 16 * 1024 && 359 if ((end - start + 1) < 16 * 1024 &&
@@ -415,6 +416,17 @@ again:
415 if (BTRFS_I(inode)->force_compress) 416 if (BTRFS_I(inode)->force_compress)
416 compress_type = BTRFS_I(inode)->force_compress; 417 compress_type = BTRFS_I(inode)->force_compress;
417 418
419 /*
420 * we need to call clear_page_dirty_for_io on each
421 * page in the range. Otherwise applications with the file
422 * mmap'd can wander in and change the page contents while
423 * we are compressing them.
424 *
425 * If the compression fails for any reason, we set the pages
426 * dirty again later on.
427 */
428 extent_range_clear_dirty_for_io(inode, start, end);
429 redirty = 1;
418 ret = btrfs_compress_pages(compress_type, 430 ret = btrfs_compress_pages(compress_type,
419 inode->i_mapping, start, 431 inode->i_mapping, start,
420 total_compressed, pages, 432 total_compressed, pages,
@@ -554,6 +566,8 @@ cleanup_and_bail_uncompressed:
554 __set_page_dirty_nobuffers(locked_page); 566 __set_page_dirty_nobuffers(locked_page);
555 /* unlocked later on in the async handlers */ 567 /* unlocked later on in the async handlers */
556 } 568 }
569 if (redirty)
570 extent_range_redirty_for_io(inode, start, end);
557 add_async_extent(async_cow, start, end - start + 1, 571 add_async_extent(async_cow, start, end - start + 1,
558 0, NULL, 0, BTRFS_COMPRESS_NONE); 572 0, NULL, 0, BTRFS_COMPRESS_NONE);
559 *num_added += 1; 573 *num_added += 1;