Btrfs: fix race between mmap writes and compression

Btrfs uses page_mkwrite to ensure stable pages during crc calculations and mmap workloads. We call clear_page_dirty_for_io before we do any crcs, and this forces any application with the file mapped to wait for the crc to finish before it is allowed to change the file. With compression on, the clear_page_dirty_for_io step is happening after we've compressed the pages. This means the applications might be changing the pages while we are compressing them, and some of those modifications might not hit the disk. This commit adds the clear_page_dirty_for_io before compression starts and makes sure to redirty the page if we have to fallback to uncompressed IO as well. Signed-off-by: Chris Mason <chris.mason@fusionio.com> Reported-by: Alexandre Oliva <oliva@gnu.org> cc: stable@vger.kernel.org
author: Chris Mason <chris.mason@fusionio.com> 2013-03-26 13:07:00 -0400
committer: Chris Mason <chris.mason@fusionio.com> 2013-03-26 13:19:14 -0400
commit: 4adaa611020fa6ac65b0ac8db78276af4ec04e63 (patch)
tree: b478ee3f9a16714521f5ced528ff2ce7afd71b8e /fs
parent: 1dd05682b3ef6e70409e130bfd83e91770801589 (diff)
3 files changed, 49 insertions, 0 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index f173c5af6461..cdee391fc7bf 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1257,6 +1257,39 @@ int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end)
                                GFP_NOFS);
 }
+int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end)
+{
+        unsigned long index = start >> PAGE_CACHE_SHIFT;
+        unsigned long end_index = end >> PAGE_CACHE_SHIFT;
+        struct page *page;
+        while (index <= end_index) {
+                page = find_get_page(inode->i_mapping, index);
+                BUG_ON(!page); /* Pages should be in the extent_io_tree */
+                clear_page_dirty_for_io(page);
+                page_cache_release(page);
+                index++;
+        }
+        return 0;
+}
+int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end)
+{
+        unsigned long index = start >> PAGE_CACHE_SHIFT;
+        unsigned long end_index = end >> PAGE_CACHE_SHIFT;
+        struct page *page;
+        while (index <= end_index) {
+                page = find_get_page(inode->i_mapping, index);
+                BUG_ON(!page); /* Pages should be in the extent_io_tree */
+                account_page_redirty(page);
+                __set_page_dirty_nobuffers(page);
+                page_cache_release(page);
+                index++;
+        }
+        return 0;
+}
 /*
 * helper function to set both pages and extents in the tree writeback
 */
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 6068a1985560..258c92156857 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -325,6 +325,8 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset,
                      unsigned long *map_len);
 int extent_range_uptodate(struct extent_io_tree *tree,
                          u64 start, u64 end);
+int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end);
+int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end);
 int extent_clear_unlock_delalloc(struct inode *inode,
                                struct extent_io_tree *tree,
                                u64 start, u64 end, struct page *locked_page,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 1f26888825e2..6a6e13c53086 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -353,6 +353,7 @@ static noinline int compress_file_range(struct inode *inode,
        int i;
        int will_compress;
        int compress_type = root->fs_info->compress_type;
+        int redirty = 0;
        /* if this is a small write inside eof, kick off a defrag */
        if ((end - start + 1) < 16 * 1024 &&
@@ -415,6 +416,17 @@ again:
                if (BTRFS_I(inode)->force_compress)
                        compress_type = BTRFS_I(inode)->force_compress;
+                /*
+                 * we need to call clear_page_dirty_for_io on each
+                 * page in the range.  Otherwise applications with the file
+                 * mmap'd can wander in and change the page contents while
+                 * we are compressing them.
+                 *
+                 * If the compression fails for any reason, we set the pages
+                 * dirty again later on.
+                 */
+                extent_range_clear_dirty_for_io(inode, start, end);
+                redirty = 1;
                ret = btrfs_compress_pages(compress_type,
                                           inode->i_mapping, start,
                                           total_compressed, pages,
@@ -554,6 +566,8 @@ cleanup_and_bail_uncompressed:
                        __set_page_dirty_nobuffers(locked_page);
                        /* unlocked later on in the async handlers */
                }
+                if (redirty)
+                        extent_range_redirty_for_io(inode, start, end);
                add_async_extent(async_cow, start, end - start + 1,
                                 0, NULL, 0, BTRFS_COMPRESS_NONE);
                *num_added += 1;
author	Chris Mason <chris.mason@fusionio.com>	2013-03-26 13:07:00 -0400
committer	Chris Mason <chris.mason@fusionio.com>	2013-03-26 13:19:14 -0400
commit	4adaa611020fa6ac65b0ac8db78276af4ec04e63 (patch)
tree	b478ee3f9a16714521f5ced528ff2ce7afd71b8e /fs
parent	1dd05682b3ef6e70409e130bfd83e91770801589 (diff)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index f173c5af6461..cdee391fc7bf 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c
@@ -1257,6 +1257,39 @@ int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end)
1257	GFP_NOFS);	1257	GFP_NOFS);
1258	}	1258	}
1259		1259
		1260	int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end)
		1261	{
		1262	unsigned long index = start >> PAGE_CACHE_SHIFT;
		1263	unsigned long end_index = end >> PAGE_CACHE_SHIFT;
		1264	struct page *page;
		1265
		1266	while (index <= end_index) {
		1267	page = find_get_page(inode->i_mapping, index);
		1268	BUG_ON(!page); /* Pages should be in the extent_io_tree */
		1269	clear_page_dirty_for_io(page);
		1270	page_cache_release(page);
		1271	index++;
		1272	}
		1273	return 0;
		1274	}
		1275
		1276	int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end)
		1277	{
		1278	unsigned long index = start >> PAGE_CACHE_SHIFT;
		1279	unsigned long end_index = end >> PAGE_CACHE_SHIFT;
		1280	struct page *page;
		1281
		1282	while (index <= end_index) {
		1283	page = find_get_page(inode->i_mapping, index);
		1284	BUG_ON(!page); /* Pages should be in the extent_io_tree */
		1285	account_page_redirty(page);
		1286	__set_page_dirty_nobuffers(page);
		1287	page_cache_release(page);
		1288	index++;
		1289	}
		1290	return 0;
		1291	}
		1292
1260	/*	1293	/*
1261	* helper function to set both pages and extents in the tree writeback	1294	* helper function to set both pages and extents in the tree writeback
1262	*/	1295	*/


diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 6068a1985560..258c92156857 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h
@@ -325,6 +325,8 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset,
325	unsigned long *map_len);	325	unsigned long *map_len);
326	int extent_range_uptodate(struct extent_io_tree *tree,	326	int extent_range_uptodate(struct extent_io_tree *tree,
327	u64 start, u64 end);	327	u64 start, u64 end);
		328	int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end);
		329	int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end);
328	int extent_clear_unlock_delalloc(struct inode *inode,	330	int extent_clear_unlock_delalloc(struct inode *inode,
329	struct extent_io_tree *tree,	331	struct extent_io_tree *tree,
330	u64 start, u64 end, struct page *locked_page,	332	u64 start, u64 end, struct page *locked_page,


diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 1f26888825e2..6a6e13c53086 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c
@@ -353,6 +353,7 @@ static noinline int compress_file_range(struct inode *inode,
353	int i;	353	int i;
354	int will_compress;	354	int will_compress;
355	int compress_type = root->fs_info->compress_type;	355	int compress_type = root->fs_info->compress_type;
		356	int redirty = 0;
356		357
357	/* if this is a small write inside eof, kick off a defrag */	358	/* if this is a small write inside eof, kick off a defrag */
358	if ((end - start + 1) < 16 * 1024 &&	359	if ((end - start + 1) < 16 * 1024 &&
@@ -415,6 +416,17 @@ again:
415	if (BTRFS_I(inode)->force_compress)	416	if (BTRFS_I(inode)->force_compress)
416	compress_type = BTRFS_I(inode)->force_compress;	417	compress_type = BTRFS_I(inode)->force_compress;
417		418
		419	/*
		420	* we need to call clear_page_dirty_for_io on each
		421	* page in the range. Otherwise applications with the file
		422	* mmap'd can wander in and change the page contents while
		423	* we are compressing them.
		424	*
		425	* If the compression fails for any reason, we set the pages
		426	* dirty again later on.
		427	*/
		428	extent_range_clear_dirty_for_io(inode, start, end);
		429	redirty = 1;
418	ret = btrfs_compress_pages(compress_type,	430	ret = btrfs_compress_pages(compress_type,
419	inode->i_mapping, start,	431	inode->i_mapping, start,
420	total_compressed, pages,	432	total_compressed, pages,
@@ -554,6 +566,8 @@ cleanup_and_bail_uncompressed:
554	__set_page_dirty_nobuffers(locked_page);	566	__set_page_dirty_nobuffers(locked_page);
555	/* unlocked later on in the async handlers */	567	/* unlocked later on in the async handlers */
556	}	568	}
		569	if (redirty)
		570	extent_range_redirty_for_io(inode, start, end);
557	add_async_extent(async_cow, start, end - start + 1,	571	add_async_extent(async_cow, start, end - start + 1,
558	0, NULL, 0, BTRFS_COMPRESS_NONE);	572	0, NULL, 0, BTRFS_COMPRESS_NONE);
559	*num_added += 1;	573	*num_added += 1;