aboutsummaryrefslogtreecommitdiffstats
path: root/mm/truncate.c
diff options
context:
space:
mode:
authorJan Kara <jack@suse.cz>2014-10-01 21:49:18 -0400
committerTheodore Ts'o <tytso@mit.edu>2014-10-01 21:49:18 -0400
commit90a8020278c1598fafd071736a0846b38510309c (patch)
tree2ab461b549a2b5f6b933895b1e61eb98627bba94 /mm/truncate.c
parentf6e63f90809946d410c42045577cb159fedabf8c (diff)
vfs: fix data corruption when blocksize < pagesize for mmaped data
->page_mkwrite() is used by filesystems to allocate blocks under a page which is becoming writeably mmapped in some process' address space. This allows a filesystem to return a page fault if there is not enough space available, user exceeds quota or similar problem happens, rather than silently discarding data later when writepage is called. However VFS fails to call ->page_mkwrite() in all the cases where filesystems need it when blocksize < pagesize. For example when blocksize = 1024, pagesize = 4096 the following is problematic: ftruncate(fd, 0); pwrite(fd, buf, 1024, 0); map = mmap(NULL, 1024, PROT_WRITE, MAP_SHARED, fd, 0); map[0] = 'a'; ----> page_mkwrite() for index 0 is called ftruncate(fd, 10000); /* or even pwrite(fd, buf, 1, 10000) */ mremap(map, 1024, 10000, 0); map[4095] = 'a'; ----> no page_mkwrite() called At the moment ->page_mkwrite() is called, filesystem can allocate only one block for the page because i_size == 1024. Otherwise it would create blocks beyond i_size which is generally undesirable. But later at ->writepage() time, we also need to store data at offset 4095 but we don't have block allocated for it. This patch introduces a helper function filesystems can use to have ->page_mkwrite() called at all the necessary moments. Signed-off-by: Jan Kara <jack@suse.cz> Signed-off-by: Theodore Ts'o <tytso@mit.edu> Cc: stable@vger.kernel.org
Diffstat (limited to 'mm/truncate.c')
-rw-r--r--mm/truncate.c57
1 files changed, 57 insertions, 0 deletions
diff --git a/mm/truncate.c b/mm/truncate.c
index 96d167372d89..261eaf6e5a19 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -20,6 +20,7 @@
20#include <linux/buffer_head.h> /* grr. try_to_release_page, 20#include <linux/buffer_head.h> /* grr. try_to_release_page,
21 do_invalidatepage */ 21 do_invalidatepage */
22#include <linux/cleancache.h> 22#include <linux/cleancache.h>
23#include <linux/rmap.h>
23#include "internal.h" 24#include "internal.h"
24 25
25static void clear_exceptional_entry(struct address_space *mapping, 26static void clear_exceptional_entry(struct address_space *mapping,
@@ -719,12 +720,68 @@ EXPORT_SYMBOL(truncate_pagecache);
719 */ 720 */
720void truncate_setsize(struct inode *inode, loff_t newsize) 721void truncate_setsize(struct inode *inode, loff_t newsize)
721{ 722{
723 loff_t oldsize = inode->i_size;
724
722 i_size_write(inode, newsize); 725 i_size_write(inode, newsize);
726 if (newsize > oldsize)
727 pagecache_isize_extended(inode, oldsize, newsize);
723 truncate_pagecache(inode, newsize); 728 truncate_pagecache(inode, newsize);
724} 729}
725EXPORT_SYMBOL(truncate_setsize); 730EXPORT_SYMBOL(truncate_setsize);
726 731
727/** 732/**
733 * pagecache_isize_extended - update pagecache after extension of i_size
734 * @inode: inode for which i_size was extended
735 * @from: original inode size
736 * @to: new inode size
737 *
738 * Handle extension of inode size either caused by extending truncate or by
739 * write starting after current i_size. We mark the page straddling current
740 * i_size RO so that page_mkwrite() is called on the nearest write access to
741 * the page. This way filesystem can be sure that page_mkwrite() is called on
742 * the page before user writes to the page via mmap after the i_size has been
743 * changed.
744 *
745 * The function must be called after i_size is updated so that page fault
746 * coming after we unlock the page will already see the new i_size.
747 * The function must be called while we still hold i_mutex - this not only
748 * makes sure i_size is stable but also that userspace cannot observe new
749 * i_size value before we are prepared to store mmap writes at new inode size.
750 */
751void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to)
752{
753 int bsize = 1 << inode->i_blkbits;
754 loff_t rounded_from;
755 struct page *page;
756 pgoff_t index;
757
758 WARN_ON(!mutex_is_locked(&inode->i_mutex));
759 WARN_ON(to > inode->i_size);
760
761 if (from >= to || bsize == PAGE_CACHE_SIZE)
762 return;
763 /* Page straddling @from will not have any hole block created? */
764 rounded_from = round_up(from, bsize);
765 if (to <= rounded_from || !(rounded_from & (PAGE_CACHE_SIZE - 1)))
766 return;
767
768 index = from >> PAGE_CACHE_SHIFT;
769 page = find_lock_page(inode->i_mapping, index);
770 /* Page not cached? Nothing to do */
771 if (!page)
772 return;
773 /*
774 * See clear_page_dirty_for_io() for details why set_page_dirty()
775 * is needed.
776 */
777 if (page_mkclean(page))
778 set_page_dirty(page);
779 unlock_page(page);
780 page_cache_release(page);
781}
782EXPORT_SYMBOL(pagecache_isize_extended);
783
784/**
728 * truncate_pagecache_range - unmap and remove pagecache that is hole-punched 785 * truncate_pagecache_range - unmap and remove pagecache that is hole-punched
729 * @inode: inode 786 * @inode: inode
730 * @lstart: offset of beginning of hole 787 * @lstart: offset of beginning of hole