aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2007-07-17 07:03:04 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-07-17 13:22:59 -0400
commita32ea1e1f925399e0d81ca3f7394a44a6dafa12c (patch)
treefade44f4d7baf5695a856ad73e6b98f0d6edf9de
parente21ea246bce5bb93dd822de420172ec280aed492 (diff)
Fix read/truncate race
do_generic_mapping_read currently samples the i_size at the start and doesn't do so again unless it needs to call ->readpage to load a page. After ->readpage it has to re-sample i_size as a truncate may have caused that page to be filled with zeros, and the read() call should not see these. However there are other activities that might cause ->readpage to be called on a page between the time that do_generic_mapping_read samples i_size and when it finds that it has an uptodate page. These include at least read-ahead and possibly another thread performing a read. So do_generic_mapping_read must sample i_size *after* it has an uptodate page. Thus the current sampling at the start and after a read can be replaced with a sampling before the copy-out. The same change applied to __generic_file_splice_read. Note that this fixes any race with truncate_complete_page, but does not fix a possible race with truncate_partial_page. If a partial truncate happens after do_generic_mapping_read samples i_size and before the copy_out, the nuls that truncate_partial_page place in the page could be copied out incorrectly. I think the best fix for that is to *not* zero out parts of the page in truncate_partial_page, but rather to zero out the tail of a page when increasing i_size. Signed-off-by: Neil Brown <neilb@suse.de> Cc: Jens Axboe <jens.axboe@oracle.com> Acked-by: Nick Piggin <npiggin@suse.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--mm/filemap.c72
1 files changed, 28 insertions, 44 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index 100b99c2d504..5d5449f3d41c 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -867,13 +867,11 @@ void do_generic_mapping_read(struct address_space *mapping,
867{ 867{
868 struct inode *inode = mapping->host; 868 struct inode *inode = mapping->host;
869 unsigned long index; 869 unsigned long index;
870 unsigned long end_index;
871 unsigned long offset; 870 unsigned long offset;
872 unsigned long last_index; 871 unsigned long last_index;
873 unsigned long next_index; 872 unsigned long next_index;
874 unsigned long prev_index; 873 unsigned long prev_index;
875 unsigned int prev_offset; 874 unsigned int prev_offset;
876 loff_t isize;
877 struct page *cached_page; 875 struct page *cached_page;
878 int error; 876 int error;
879 struct file_ra_state ra = *_ra; 877 struct file_ra_state ra = *_ra;
@@ -886,27 +884,12 @@ void do_generic_mapping_read(struct address_space *mapping,
886 last_index = (*ppos + desc->count + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT; 884 last_index = (*ppos + desc->count + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT;
887 offset = *ppos & ~PAGE_CACHE_MASK; 885 offset = *ppos & ~PAGE_CACHE_MASK;
888 886
889 isize = i_size_read(inode);
890 if (!isize)
891 goto out;
892
893 end_index = (isize - 1) >> PAGE_CACHE_SHIFT;
894 for (;;) { 887 for (;;) {
895 struct page *page; 888 struct page *page;
889 unsigned long end_index;
890 loff_t isize;
896 unsigned long nr, ret; 891 unsigned long nr, ret;
897 892
898 /* nr is the maximum number of bytes to copy from this page */
899 nr = PAGE_CACHE_SIZE;
900 if (index >= end_index) {
901 if (index > end_index)
902 goto out;
903 nr = ((isize - 1) & ~PAGE_CACHE_MASK) + 1;
904 if (nr <= offset) {
905 goto out;
906 }
907 }
908 nr = nr - offset;
909
910 cond_resched(); 893 cond_resched();
911 if (index == next_index) 894 if (index == next_index)
912 next_index = page_cache_readahead(mapping, &ra, filp, 895 next_index = page_cache_readahead(mapping, &ra, filp,
@@ -921,6 +904,32 @@ find_page:
921 if (!PageUptodate(page)) 904 if (!PageUptodate(page))
922 goto page_not_up_to_date; 905 goto page_not_up_to_date;
923page_ok: 906page_ok:
907 /*
908 * i_size must be checked after we know the page is Uptodate.
909 *
910 * Checking i_size after the check allows us to calculate
911 * the correct value for "nr", which means the zero-filled
912 * part of the page is not copied back to userspace (unless
913 * another truncate extends the file - this is desired though).
914 */
915
916 isize = i_size_read(inode);
917 end_index = (isize - 1) >> PAGE_CACHE_SHIFT;
918 if (unlikely(!isize || index > end_index)) {
919 page_cache_release(page);
920 goto out;
921 }
922
923 /* nr is the maximum number of bytes to copy from this page */
924 nr = PAGE_CACHE_SIZE;
925 if (index == end_index) {
926 nr = ((isize - 1) & ~PAGE_CACHE_MASK) + 1;
927 if (nr <= offset) {
928 page_cache_release(page);
929 goto out;
930 }
931 }
932 nr = nr - offset;
924 933
925 /* If users can be writing to this page using arbitrary 934 /* If users can be writing to this page using arbitrary
926 * virtual addresses, take care about potential aliasing 935 * virtual addresses, take care about potential aliasing
@@ -1007,31 +1016,6 @@ readpage:
1007 unlock_page(page); 1016 unlock_page(page);
1008 } 1017 }
1009 1018
1010 /*
1011 * i_size must be checked after we have done ->readpage.
1012 *
1013 * Checking i_size after the readpage allows us to calculate
1014 * the correct value for "nr", which means the zero-filled
1015 * part of the page is not copied back to userspace (unless
1016 * another truncate extends the file - this is desired though).
1017 */
1018 isize = i_size_read(inode);
1019 end_index = (isize - 1) >> PAGE_CACHE_SHIFT;
1020 if (unlikely(!isize || index > end_index)) {
1021 page_cache_release(page);
1022 goto out;
1023 }
1024
1025 /* nr is the maximum number of bytes to copy from this page */
1026 nr = PAGE_CACHE_SIZE;
1027 if (index == end_index) {
1028 nr = ((isize - 1) & ~PAGE_CACHE_MASK) + 1;
1029 if (nr <= offset) {
1030 page_cache_release(page);
1031 goto out;
1032 }
1033 }
1034 nr = nr - offset;
1035 goto page_ok; 1019 goto page_ok;
1036 1020
1037readpage_error: 1021readpage_error: