aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMel Gorman <mgorman@suse.de>2013-07-03 18:02:05 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-07-03 19:07:29 -0400
commitb45972265f823ed01eae0867a176320071665787 (patch)
treeded8dcb801e71fe10e38c715a91571c745c563b5
parentd04e8acd03e5c3421ef18e3da7bc88d56179ca42 (diff)
mm: vmscan: take page buffers dirty and locked state into account
Page reclaim keeps track of dirty and under writeback pages and uses it to determine if wait_iff_congested() should stall or if kswapd should begin writing back pages. This fails to account for buffer pages that can be under writeback but not PageWriteback which is the case for filesystems like ext3 ordered mode. Furthermore, PageDirty buffer pages can have all the buffers clean and writepage does no IO so it should not be accounted as congested. This patch adds an address_space operation that filesystems may optionally use to check if a page is really dirty or really under writeback. An implementation is provided for for buffer_heads is added and used for block operations and ext3 in ordered mode. By default the page flags are obeyed. Credit goes to Jan Kara for identifying that the page flags alone are not sufficient for ext3 and sanity checking a number of ideas on how the problem could be addressed. Signed-off-by: Mel Gorman <mgorman@suse.de> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Michal Hocko <mhocko@suse.cz> Cc: Rik van Riel <riel@redhat.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Jiri Slaby <jslaby@suse.cz> Cc: Valdis Kletnieks <Valdis.Kletnieks@vt.edu> Cc: Zlatko Calusic <zcalusic@bitsync.net> Cc: dormando <dormando@rydia.net> Cc: Trond Myklebust <trond.myklebust@fys.uio.no> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--fs/block_dev.c1
-rw-r--r--fs/buffer.c34
-rw-r--r--fs/ext3/inode.c1
-rw-r--r--include/linux/buffer_head.h3
-rw-r--r--include/linux/fs.h1
-rw-r--r--mm/vmscan.c10
6 files changed, 50 insertions, 0 deletions
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 431b6a04ebfd..bb43ce081d6e 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1562,6 +1562,7 @@ static const struct address_space_operations def_blk_aops = {
1562 .writepages = generic_writepages, 1562 .writepages = generic_writepages,
1563 .releasepage = blkdev_releasepage, 1563 .releasepage = blkdev_releasepage,
1564 .direct_IO = blkdev_direct_IO, 1564 .direct_IO = blkdev_direct_IO,
1565 .is_dirty_writeback = buffer_check_dirty_writeback,
1565}; 1566};
1566 1567
1567const struct file_operations def_blk_fops = { 1568const struct file_operations def_blk_fops = {
diff --git a/fs/buffer.c b/fs/buffer.c
index f93392e2df12..4d7433534f5c 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -83,6 +83,40 @@ void unlock_buffer(struct buffer_head *bh)
83EXPORT_SYMBOL(unlock_buffer); 83EXPORT_SYMBOL(unlock_buffer);
84 84
85/* 85/*
86 * Returns if the page has dirty or writeback buffers. If all the buffers
87 * are unlocked and clean then the PageDirty information is stale. If
88 * any of the pages are locked, it is assumed they are locked for IO.
89 */
90void buffer_check_dirty_writeback(struct page *page,
91 bool *dirty, bool *writeback)
92{
93 struct buffer_head *head, *bh;
94 *dirty = false;
95 *writeback = false;
96
97 BUG_ON(!PageLocked(page));
98
99 if (!page_has_buffers(page))
100 return;
101
102 if (PageWriteback(page))
103 *writeback = true;
104
105 head = page_buffers(page);
106 bh = head;
107 do {
108 if (buffer_locked(bh))
109 *writeback = true;
110
111 if (buffer_dirty(bh))
112 *dirty = true;
113
114 bh = bh->b_this_page;
115 } while (bh != head);
116}
117EXPORT_SYMBOL(buffer_check_dirty_writeback);
118
119/*
86 * Block until a buffer comes unlocked. This doesn't stop it 120 * Block until a buffer comes unlocked. This doesn't stop it
87 * from becoming locked again - you have to lock it yourself 121 * from becoming locked again - you have to lock it yourself
88 * if you want to preserve its state. 122 * if you want to preserve its state.
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index f67668f724ba..2bd85486b879 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1985,6 +1985,7 @@ static const struct address_space_operations ext3_ordered_aops = {
1985 .direct_IO = ext3_direct_IO, 1985 .direct_IO = ext3_direct_IO,
1986 .migratepage = buffer_migrate_page, 1986 .migratepage = buffer_migrate_page,
1987 .is_partially_uptodate = block_is_partially_uptodate, 1987 .is_partially_uptodate = block_is_partially_uptodate,
1988 .is_dirty_writeback = buffer_check_dirty_writeback,
1988 .error_remove_page = generic_error_remove_page, 1989 .error_remove_page = generic_error_remove_page,
1989}; 1990};
1990 1991
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index f5a3b838ddb0..91fa9a94ae92 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -139,6 +139,9 @@ BUFFER_FNS(Prio, prio)
139 }) 139 })
140#define page_has_buffers(page) PagePrivate(page) 140#define page_has_buffers(page) PagePrivate(page)
141 141
142void buffer_check_dirty_writeback(struct page *page,
143 bool *dirty, bool *writeback);
144
142/* 145/*
143 * Declarations 146 * Declarations
144 */ 147 */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 2b82c8041490..99be011e00de 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -380,6 +380,7 @@ struct address_space_operations {
380 int (*launder_page) (struct page *); 380 int (*launder_page) (struct page *);
381 int (*is_partially_uptodate) (struct page *, read_descriptor_t *, 381 int (*is_partially_uptodate) (struct page *, read_descriptor_t *,
382 unsigned long); 382 unsigned long);
383 void (*is_dirty_writeback) (struct page *, bool *, bool *);
383 int (*error_remove_page)(struct address_space *, struct page *); 384 int (*error_remove_page)(struct address_space *, struct page *);
384 385
385 /* swapfile support */ 386 /* swapfile support */
diff --git a/mm/vmscan.c b/mm/vmscan.c
index bf4778479e3a..c85794399848 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -673,6 +673,8 @@ static enum page_references page_check_references(struct page *page,
673static void page_check_dirty_writeback(struct page *page, 673static void page_check_dirty_writeback(struct page *page,
674 bool *dirty, bool *writeback) 674 bool *dirty, bool *writeback)
675{ 675{
676 struct address_space *mapping;
677
676 /* 678 /*
677 * Anonymous pages are not handled by flushers and must be written 679 * Anonymous pages are not handled by flushers and must be written
678 * from reclaim context. Do not stall reclaim based on them 680 * from reclaim context. Do not stall reclaim based on them
@@ -686,6 +688,14 @@ static void page_check_dirty_writeback(struct page *page,
686 /* By default assume that the page flags are accurate */ 688 /* By default assume that the page flags are accurate */
687 *dirty = PageDirty(page); 689 *dirty = PageDirty(page);
688 *writeback = PageWriteback(page); 690 *writeback = PageWriteback(page);
691
692 /* Verify dirty/writeback state if the filesystem supports it */
693 if (!page_has_private(page))
694 return;
695
696 mapping = page_mapping(page);
697 if (mapping && mapping->a_ops->is_dirty_writeback)
698 mapping->a_ops->is_dirty_writeback(page, dirty, writeback);
689} 699}
690 700
691/* 701/*