aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ceph
diff options
context:
space:
mode:
authorSha Zhengju <handai.szj@taobao.com>2013-08-21 04:27:34 -0400
committerSage Weil <sage@inktank.com>2013-08-27 19:29:44 -0400
commit7d6e1f5461d0c16eb6aa8d226976995856d85e4e (patch)
treeb85d1758b3969499ff993bac4b16962f442fd559 /fs/ceph
parentee7289bfadda5f4ef60884547ebc9989c8fb314a (diff)
ceph: use vfs __set_page_dirty_nobuffers interface instead of doing it inside filesystem
Following we will begin to add memcg dirty page accounting around __set_page_dirty_{buffers,nobuffers} in vfs layer, so we'd better use vfs interface to avoid exporting those details to filesystems. Since vfs set_page_dirty() should be called under page lock, here we don't need elaborate codes to handle racy anymore, and two WARN_ON() are added to detect such exceptions. Thanks very much for Sage and Yan Zheng's coaching! I tested it in a two server's ceph environment that one is client and the other is mds/osd/mon, and run the following fsx test from xfstests: ./fsx 1MB -N 50000 -p 10000 -l 1048576 ./fsx 10MB -N 50000 -p 10000 -l 10485760 ./fsx 100MB -N 50000 -p 10000 -l 104857600 The fsx does lots of mmap-read/mmap-write/truncate operations and the tests completed successfully without triggering any of WARN_ON. Signed-off-by: Sha Zhengju <handai.szj@taobao.com> Reviewed-by: Sage Weil <sage@inktank.com>
Diffstat (limited to 'fs/ceph')
-rw-r--r--fs/ceph/addr.c43
1 files changed, 14 insertions, 29 deletions
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index cb78ce81d6a6..3bed7da38326 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -70,15 +70,16 @@ static int ceph_set_page_dirty(struct page *page)
70 struct address_space *mapping = page->mapping; 70 struct address_space *mapping = page->mapping;
71 struct inode *inode; 71 struct inode *inode;
72 struct ceph_inode_info *ci; 72 struct ceph_inode_info *ci;
73 int undo = 0;
74 struct ceph_snap_context *snapc; 73 struct ceph_snap_context *snapc;
74 int ret;
75 75
76 if (unlikely(!mapping)) 76 if (unlikely(!mapping))
77 return !TestSetPageDirty(page); 77 return !TestSetPageDirty(page);
78 78
79 if (TestSetPageDirty(page)) { 79 if (PageDirty(page)) {
80 dout("%p set_page_dirty %p idx %lu -- already dirty\n", 80 dout("%p set_page_dirty %p idx %lu -- already dirty\n",
81 mapping->host, page, page->index); 81 mapping->host, page, page->index);
82 BUG_ON(!PagePrivate(page));
82 return 0; 83 return 0;
83 } 84 }
84 85
@@ -107,35 +108,19 @@ static int ceph_set_page_dirty(struct page *page)
107 snapc, snapc->seq, snapc->num_snaps); 108 snapc, snapc->seq, snapc->num_snaps);
108 spin_unlock(&ci->i_ceph_lock); 109 spin_unlock(&ci->i_ceph_lock);
109 110
110 /* now adjust page */ 111 /*
111 spin_lock_irq(&mapping->tree_lock); 112 * Reference snap context in page->private. Also set
112 if (page->mapping) { /* Race with truncate? */ 113 * PagePrivate so that we get invalidatepage callback.
113 WARN_ON_ONCE(!PageUptodate(page)); 114 */
114 account_page_dirtied(page, page->mapping); 115 BUG_ON(PagePrivate(page));
115 radix_tree_tag_set(&mapping->page_tree, 116 page->private = (unsigned long)snapc;
116 page_index(page), PAGECACHE_TAG_DIRTY); 117 SetPagePrivate(page);
117
118 /*
119 * Reference snap context in page->private. Also set
120 * PagePrivate so that we get invalidatepage callback.
121 */
122 page->private = (unsigned long)snapc;
123 SetPagePrivate(page);
124 } else {
125 dout("ANON set_page_dirty %p (raced truncate?)\n", page);
126 undo = 1;
127 }
128
129 spin_unlock_irq(&mapping->tree_lock);
130
131 if (undo)
132 /* whoops, we failed to dirty the page */
133 ceph_put_wrbuffer_cap_refs(ci, 1, snapc);
134 118
135 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); 119 ret = __set_page_dirty_nobuffers(page);
120 WARN_ON(!PageLocked(page));
121 WARN_ON(!page->mapping);
136 122
137 BUG_ON(!PageDirty(page)); 123 return ret;
138 return 1;
139} 124}
140 125
141/* 126/*