aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDarrick J. Wong <darrick.wong@oracle.com>2013-02-21 19:42:51 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2013-02-21 20:22:19 -0500
commit1d1d1a767206fbe5d4c69493b7e6d2a8d08cc0a0 (patch)
tree6550294916016eac01deb596331aab1770223eab
parent7d311cdab663f4f7ab3a4c0d5d484234406f8268 (diff)
mm: only enforce stable page writes if the backing device requires it
Create a helper function to check if a backing device requires stable page writes and, if so, performs the necessary wait. Then, make it so that all points in the memory manager that handle making pages writable use the helper function. This should provide stable page write support to most filesystems, while eliminating unnecessary waiting for devices that don't require the feature. Before this patchset, all filesystems would block, regardless of whether or not it was necessary. ext3 would wait, but still generate occasional checksum errors. The network filesystems were left to do their own thing, so they'd wait too. After this patchset, all the disk filesystems except ext3 and btrfs will wait only if the hardware requires it. ext3 (if necessary) snapshots pages instead of blocking, and btrfs provides its own bdi so the mm will never wait. Network filesystems haven't been touched, so either they provide their own stable page guarantees or they don't block at all. The blocking behavior is back to what it was before 3.0 if you don't have a disk requiring stable page writes. Here's the result of using dbench to test latency on ext2: 3.8.0-rc3: Operation Count AvgLat MaxLat ---------------------------------------- WriteX 109347 0.028 59.817 ReadX 347180 0.004 3.391 Flush 15514 29.828 287.283 Throughput 57.429 MB/sec 4 clients 4 procs max_latency=287.290 ms 3.8.0-rc3 + patches: WriteX 105556 0.029 4.273 ReadX 335004 0.005 4.112 Flush 14982 30.540 298.634 Throughput 55.4496 MB/sec 4 clients 4 procs max_latency=298.650 ms As you can see, the maximum write latency drops considerably with this patch enabled. The other filesystems (ext3/ext4/xfs/btrfs) behave similarly, but see the cover letter for those results. Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Acked-by: Steven Whitehouse <swhiteho@redhat.com> Reviewed-by: Jan Kara <jack@suse.cz> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Andy Lutomirski <luto@amacapital.net> Cc: Artem Bityutskiy <dedekind1@gmail.com> Cc: Joel Becker <jlbec@evilplan.org> Cc: Mark Fasheh <mfasheh@suse.com> Cc: Jens Axboe <axboe@kernel.dk> Cc: Eric Van Hensbergen <ericvh@gmail.com> Cc: Ron Minnich <rminnich@sandia.gov> Cc: Latchesar Ionkov <lucho@ionkov.net> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--fs/buffer.c2
-rw-r--r--fs/ext4/inode.c2
-rw-r--r--fs/gfs2/file.c2
-rw-r--r--fs/nilfs2/file.c2
-rw-r--r--include/linux/pagemap.h1
-rw-r--r--mm/filemap.c3
-rw-r--r--mm/page-writeback.c20
7 files changed, 27 insertions, 5 deletions
diff --git a/fs/buffer.c b/fs/buffer.c
index 7a75c3e0fd58..2ea9cd44aeae 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2359,7 +2359,7 @@ int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
2359 if (unlikely(ret < 0)) 2359 if (unlikely(ret < 0))
2360 goto out_unlock; 2360 goto out_unlock;
2361 set_page_dirty(page); 2361 set_page_dirty(page);
2362 wait_on_page_writeback(page); 2362 wait_for_stable_page(page);
2363 return 0; 2363 return 0;
2364out_unlock: 2364out_unlock:
2365 unlock_page(page); 2365 unlock_page(page);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index cbfe13bf5b2a..cd818d8bb221 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4968,7 +4968,7 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
4968 0, len, NULL, 4968 0, len, NULL,
4969 ext4_bh_unmapped)) { 4969 ext4_bh_unmapped)) {
4970 /* Wait so that we don't change page under IO */ 4970 /* Wait so that we don't change page under IO */
4971 wait_on_page_writeback(page); 4971 wait_for_stable_page(page);
4972 ret = VM_FAULT_LOCKED; 4972 ret = VM_FAULT_LOCKED;
4973 goto out; 4973 goto out;
4974 } 4974 }
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 06b7092a3f25..2687f50d98cb 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -483,7 +483,7 @@ out:
483 gfs2_holder_uninit(&gh); 483 gfs2_holder_uninit(&gh);
484 if (ret == 0) { 484 if (ret == 0) {
485 set_page_dirty(page); 485 set_page_dirty(page);
486 wait_on_page_writeback(page); 486 wait_for_stable_page(page);
487 } 487 }
488 sb_end_pagefault(inode->i_sb); 488 sb_end_pagefault(inode->i_sb);
489 return block_page_mkwrite_return(ret); 489 return block_page_mkwrite_return(ret);
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index 61946883025c..bec4af6eab13 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c
@@ -126,7 +126,7 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
126 nilfs_transaction_commit(inode->i_sb); 126 nilfs_transaction_commit(inode->i_sb);
127 127
128 mapped: 128 mapped:
129 wait_on_page_writeback(page); 129 wait_for_stable_page(page);
130 out: 130 out:
131 sb_end_pagefault(inode->i_sb); 131 sb_end_pagefault(inode->i_sb);
132 return block_page_mkwrite_return(ret); 132 return block_page_mkwrite_return(ret);
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 6da609d14c15..0e38e13eb249 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -414,6 +414,7 @@ static inline void wait_on_page_writeback(struct page *page)
414} 414}
415 415
416extern void end_page_writeback(struct page *page); 416extern void end_page_writeback(struct page *page);
417void wait_for_stable_page(struct page *page);
417 418
418/* 419/*
419 * Add an arbitrary waiter to a page's wait queue 420 * Add an arbitrary waiter to a page's wait queue
diff --git a/mm/filemap.c b/mm/filemap.c
index 24a7ea583f0c..c610076c30e1 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1728,6 +1728,7 @@ int filemap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
1728 * see the dirty page and writeprotect it again. 1728 * see the dirty page and writeprotect it again.
1729 */ 1729 */
1730 set_page_dirty(page); 1730 set_page_dirty(page);
1731 wait_for_stable_page(page);
1731out: 1732out:
1732 sb_end_pagefault(inode->i_sb); 1733 sb_end_pagefault(inode->i_sb);
1733 return ret; 1734 return ret;
@@ -2274,7 +2275,7 @@ repeat:
2274 return NULL; 2275 return NULL;
2275 } 2276 }
2276found: 2277found:
2277 wait_on_page_writeback(page); 2278 wait_for_stable_page(page);
2278 return page; 2279 return page;
2279} 2280}
2280EXPORT_SYMBOL(grab_cache_page_write_begin); 2281EXPORT_SYMBOL(grab_cache_page_write_begin);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 66a0024becd9..355d5ee69058 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2290,3 +2290,23 @@ int mapping_tagged(struct address_space *mapping, int tag)
2290 return radix_tree_tagged(&mapping->page_tree, tag); 2290 return radix_tree_tagged(&mapping->page_tree, tag);
2291} 2291}
2292EXPORT_SYMBOL(mapping_tagged); 2292EXPORT_SYMBOL(mapping_tagged);
2293
2294/**
2295 * wait_for_stable_page() - wait for writeback to finish, if necessary.
2296 * @page: The page to wait on.
2297 *
2298 * This function determines if the given page is related to a backing device
2299 * that requires page contents to be held stable during writeback. If so, then
2300 * it will wait for any pending writeback to complete.
2301 */
2302void wait_for_stable_page(struct page *page)
2303{
2304 struct address_space *mapping = page_mapping(page);
2305 struct backing_dev_info *bdi = mapping->backing_dev_info;
2306
2307 if (!bdi_cap_stable_pages_required(bdi))
2308 return;
2309
2310 wait_on_page_writeback(page);
2311}
2312EXPORT_SYMBOL_GPL(wait_for_stable_page);