diff options
author | NeilBrown <neilb@suse.de> | 2007-07-17 07:03:04 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-07-17 13:22:59 -0400 |
commit | a32ea1e1f925399e0d81ca3f7394a44a6dafa12c (patch) | |
tree | fade44f4d7baf5695a856ad73e6b98f0d6edf9de | |
parent | e21ea246bce5bb93dd822de420172ec280aed492 (diff) |
Fix read/truncate race
do_generic_mapping_read currently samples the i_size at the start and doesn't
do so again unless it needs to call ->readpage to load a page. After
->readpage it has to re-sample i_size as a truncate may have caused that page
to be filled with zeros, and the read() call should not see these.
However there are other activities that might cause ->readpage to be called on
a page between the time that do_generic_mapping_read samples i_size and when
it finds that it has an uptodate page. These include at least read-ahead and
possibly another thread performing a read.
So do_generic_mapping_read must sample i_size *after* it has an uptodate page.
Thus the current sampling at the start and after a read can be replaced with
a sampling before the copy-out.
The same change applied to __generic_file_splice_read.
Note that this fixes any race with truncate_complete_page, but does not fix a
possible race with truncate_partial_page. If a partial truncate happens after
do_generic_mapping_read samples i_size and before the copy_out, the nuls that
truncate_partial_page place in the page could be copied out incorrectly.
I think the best fix for that is to *not* zero out parts of the page in
truncate_partial_page, but rather to zero out the tail of a page when
increasing i_size.
Signed-off-by: Neil Brown <neilb@suse.de>
Cc: Jens Axboe <jens.axboe@oracle.com>
Acked-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | mm/filemap.c | 72 |
1 files changed, 28 insertions, 44 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index 100b99c2d504..5d5449f3d41c 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -867,13 +867,11 @@ void do_generic_mapping_read(struct address_space *mapping, | |||
867 | { | 867 | { |
868 | struct inode *inode = mapping->host; | 868 | struct inode *inode = mapping->host; |
869 | unsigned long index; | 869 | unsigned long index; |
870 | unsigned long end_index; | ||
871 | unsigned long offset; | 870 | unsigned long offset; |
872 | unsigned long last_index; | 871 | unsigned long last_index; |
873 | unsigned long next_index; | 872 | unsigned long next_index; |
874 | unsigned long prev_index; | 873 | unsigned long prev_index; |
875 | unsigned int prev_offset; | 874 | unsigned int prev_offset; |
876 | loff_t isize; | ||
877 | struct page *cached_page; | 875 | struct page *cached_page; |
878 | int error; | 876 | int error; |
879 | struct file_ra_state ra = *_ra; | 877 | struct file_ra_state ra = *_ra; |
@@ -886,27 +884,12 @@ void do_generic_mapping_read(struct address_space *mapping, | |||
886 | last_index = (*ppos + desc->count + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT; | 884 | last_index = (*ppos + desc->count + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT; |
887 | offset = *ppos & ~PAGE_CACHE_MASK; | 885 | offset = *ppos & ~PAGE_CACHE_MASK; |
888 | 886 | ||
889 | isize = i_size_read(inode); | ||
890 | if (!isize) | ||
891 | goto out; | ||
892 | |||
893 | end_index = (isize - 1) >> PAGE_CACHE_SHIFT; | ||
894 | for (;;) { | 887 | for (;;) { |
895 | struct page *page; | 888 | struct page *page; |
889 | unsigned long end_index; | ||
890 | loff_t isize; | ||
896 | unsigned long nr, ret; | 891 | unsigned long nr, ret; |
897 | 892 | ||
898 | /* nr is the maximum number of bytes to copy from this page */ | ||
899 | nr = PAGE_CACHE_SIZE; | ||
900 | if (index >= end_index) { | ||
901 | if (index > end_index) | ||
902 | goto out; | ||
903 | nr = ((isize - 1) & ~PAGE_CACHE_MASK) + 1; | ||
904 | if (nr <= offset) { | ||
905 | goto out; | ||
906 | } | ||
907 | } | ||
908 | nr = nr - offset; | ||
909 | |||
910 | cond_resched(); | 893 | cond_resched(); |
911 | if (index == next_index) | 894 | if (index == next_index) |
912 | next_index = page_cache_readahead(mapping, &ra, filp, | 895 | next_index = page_cache_readahead(mapping, &ra, filp, |
@@ -921,6 +904,32 @@ find_page: | |||
921 | if (!PageUptodate(page)) | 904 | if (!PageUptodate(page)) |
922 | goto page_not_up_to_date; | 905 | goto page_not_up_to_date; |
923 | page_ok: | 906 | page_ok: |
907 | /* | ||
908 | * i_size must be checked after we know the page is Uptodate. | ||
909 | * | ||
910 | * Checking i_size after the check allows us to calculate | ||
911 | * the correct value for "nr", which means the zero-filled | ||
912 | * part of the page is not copied back to userspace (unless | ||
913 | * another truncate extends the file - this is desired though). | ||
914 | */ | ||
915 | |||
916 | isize = i_size_read(inode); | ||
917 | end_index = (isize - 1) >> PAGE_CACHE_SHIFT; | ||
918 | if (unlikely(!isize || index > end_index)) { | ||
919 | page_cache_release(page); | ||
920 | goto out; | ||
921 | } | ||
922 | |||
923 | /* nr is the maximum number of bytes to copy from this page */ | ||
924 | nr = PAGE_CACHE_SIZE; | ||
925 | if (index == end_index) { | ||
926 | nr = ((isize - 1) & ~PAGE_CACHE_MASK) + 1; | ||
927 | if (nr <= offset) { | ||
928 | page_cache_release(page); | ||
929 | goto out; | ||
930 | } | ||
931 | } | ||
932 | nr = nr - offset; | ||
924 | 933 | ||
925 | /* If users can be writing to this page using arbitrary | 934 | /* If users can be writing to this page using arbitrary |
926 | * virtual addresses, take care about potential aliasing | 935 | * virtual addresses, take care about potential aliasing |
@@ -1007,31 +1016,6 @@ readpage: | |||
1007 | unlock_page(page); | 1016 | unlock_page(page); |
1008 | } | 1017 | } |
1009 | 1018 | ||
1010 | /* | ||
1011 | * i_size must be checked after we have done ->readpage. | ||
1012 | * | ||
1013 | * Checking i_size after the readpage allows us to calculate | ||
1014 | * the correct value for "nr", which means the zero-filled | ||
1015 | * part of the page is not copied back to userspace (unless | ||
1016 | * another truncate extends the file - this is desired though). | ||
1017 | */ | ||
1018 | isize = i_size_read(inode); | ||
1019 | end_index = (isize - 1) >> PAGE_CACHE_SHIFT; | ||
1020 | if (unlikely(!isize || index > end_index)) { | ||
1021 | page_cache_release(page); | ||
1022 | goto out; | ||
1023 | } | ||
1024 | |||
1025 | /* nr is the maximum number of bytes to copy from this page */ | ||
1026 | nr = PAGE_CACHE_SIZE; | ||
1027 | if (index == end_index) { | ||
1028 | nr = ((isize - 1) & ~PAGE_CACHE_MASK) + 1; | ||
1029 | if (nr <= offset) { | ||
1030 | page_cache_release(page); | ||
1031 | goto out; | ||
1032 | } | ||
1033 | } | ||
1034 | nr = nr - offset; | ||
1035 | goto page_ok; | 1019 | goto page_ok; |
1036 | 1020 | ||
1037 | readpage_error: | 1021 | readpage_error: |