aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndrew Morton <akpm@osdl.org>2007-10-16 04:24:54 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-10-16 12:42:54 -0400
commit6814d7a91289ceb143285975e244a8f00fd3a830 (patch)
treee439e5d802d6ebaa73bac8b08c311ca60d4c8ee4
parent4b49643fbb3fa8bf4910f82be02d45e94e8972a4 (diff)
Revert "[PATCH] generic_file_buffered_write(): deadlock on vectored write"
This reverts commit 6527c2bdf1f833cc18e8f42bd97973d583e4aa83, which fixed the following bug: When prefaulting in the pages in generic_file_buffered_write(), we only faulted in the pages for the firts segment of the iovec. If the second of successive segment described a mmapping of the page into which we're write()ing, and that page is not up-to-date, the fault handler tries to lock the already-locked page (to bring it up to date) and deadlocks. An exploit for this bug is in writev-deadlock-demo.c, in http://www.zip.com.au/~akpm/linux/patches/stuff/ext3-tools.tar.gz. (These demos assume blocksize < PAGE_CACHE_SIZE). The problem with this fix is that it takes the kernel back to doing a single prepare_write()/commit_write() per iovec segment. So in the worst case we'll run prepare_write+commit_write 1024 times where we previously would have run it once. The other problem with the fix is that it fix all the locking problems. <insert numbers obtained via ext3-tools's writev-speed.c here> And apparently this change killed NFS overwrite performance, because, I suppose, it talks to the server for each prepare_write+commit_write. So just back that patch out - we'll be fixing the deadlock by other means. Nick says: also it only ever actually papered over the bug, because after faulting in the pages, they might be unmapped or reclaimed. Signed-off-by: Nick Piggin <npiggin@suse.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--mm/filemap.c18
1 files changed, 7 insertions, 11 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index caaaa7adfdf9..4bf7d1ab6c2a 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1865,21 +1865,14 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
1865 do { 1865 do {
1866 unsigned long index; 1866 unsigned long index;
1867 unsigned long offset; 1867 unsigned long offset;
1868 unsigned long maxlen;
1868 size_t copied; 1869 size_t copied;
1869 1870
1870 offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */ 1871 offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
1871 index = pos >> PAGE_CACHE_SHIFT; 1872 index = pos >> PAGE_CACHE_SHIFT;
1872 bytes = PAGE_CACHE_SIZE - offset; 1873 bytes = PAGE_CACHE_SIZE - offset;
1873 1874 if (bytes > count)
1874 /* Limit the size of the copy to the caller's write size */ 1875 bytes = count;
1875 bytes = min(bytes, count);
1876
1877 /*
1878 * Limit the size of the copy to that of the current segment,
1879 * because fault_in_pages_readable() doesn't know how to walk
1880 * segments.
1881 */
1882 bytes = min(bytes, cur_iov->iov_len - iov_base);
1883 1876
1884 /* 1877 /*
1885 * Bring in the user page that we will copy from _first_. 1878 * Bring in the user page that we will copy from _first_.
@@ -1887,7 +1880,10 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
1887 * same page as we're writing to, without it being marked 1880 * same page as we're writing to, without it being marked
1888 * up-to-date. 1881 * up-to-date.
1889 */ 1882 */
1890 fault_in_pages_readable(buf, bytes); 1883 maxlen = cur_iov->iov_len - iov_base;
1884 if (maxlen > bytes)
1885 maxlen = bytes;
1886 fault_in_pages_readable(buf, maxlen);
1891 1887
1892 page = __grab_cache_page(mapping,index,&cached_page,&lru_pvec); 1888 page = __grab_cache_page(mapping,index,&cached_page,&lru_pvec);
1893 if (!page) { 1889 if (!page) {