aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorWengang Wang <wen.gang.wang@oracle.com>2011-07-24 13:36:54 -0400
committerSunil Mushran <sunil.mushran@oracle.com>2011-07-24 13:36:54 -0400
commit5cffff9e29866a3de98c2c25135b3199491f93b0 (patch)
tree11b53cb3ad7cb44adb80f7802a62c145bf713514
parenta035bff6b82aca89c1223e2c614adc2d17ec8aa2 (diff)
ocfs2: Fix ocfs2_page_mkwrite()
This patch address two shortcomings in ocfs2_page_mkwrite(): 1. Makes the function return better VM_FAULT_* errors. 2. It handles a error that is triggered when a page is dropped from the mapping due to memory pressure. This patch locks the page to prevent that. [Patch was cleaned up by Sunil Mushran.] Signed-off-by: Wengang Wang <wen.gang.wang@oracle.com> Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
-rw-r--r--fs/ocfs2/aops.c51
-rw-r--r--fs/ocfs2/mmap.c53
2 files changed, 67 insertions, 37 deletions
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index c1efe939c774..ff98c169b631 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -863,6 +863,12 @@ struct ocfs2_write_ctxt {
863 struct page *w_target_page; 863 struct page *w_target_page;
864 864
865 /* 865 /*
866 * w_target_locked is used for page_mkwrite path indicating no unlocking
867 * against w_target_page in ocfs2_write_end_nolock.
868 */
869 unsigned int w_target_locked:1;
870
871 /*
866 * ocfs2_write_end() uses this to know what the real range to 872 * ocfs2_write_end() uses this to know what the real range to
867 * write in the target should be. 873 * write in the target should be.
868 */ 874 */
@@ -895,6 +901,24 @@ void ocfs2_unlock_and_free_pages(struct page **pages, int num_pages)
895 901
896static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc) 902static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc)
897{ 903{
904 int i;
905
906 /*
907 * w_target_locked is only set to true in the page_mkwrite() case.
908 * The intent is to allow us to lock the target page from write_begin()
909 * to write_end(). The caller must hold a ref on w_target_page.
910 */
911 if (wc->w_target_locked) {
912 BUG_ON(!wc->w_target_page);
913 for (i = 0; i < wc->w_num_pages; i++) {
914 if (wc->w_target_page == wc->w_pages[i]) {
915 wc->w_pages[i] = NULL;
916 break;
917 }
918 }
919 mark_page_accessed(wc->w_target_page);
920 page_cache_release(wc->w_target_page);
921 }
898 ocfs2_unlock_and_free_pages(wc->w_pages, wc->w_num_pages); 922 ocfs2_unlock_and_free_pages(wc->w_pages, wc->w_num_pages);
899 923
900 brelse(wc->w_di_bh); 924 brelse(wc->w_di_bh);
@@ -1132,20 +1156,17 @@ static int ocfs2_grab_pages_for_write(struct address_space *mapping,
1132 */ 1156 */
1133 lock_page(mmap_page); 1157 lock_page(mmap_page);
1134 1158
1159 /* Exit and let the caller retry */
1135 if (mmap_page->mapping != mapping) { 1160 if (mmap_page->mapping != mapping) {
1161 WARN_ON(mmap_page->mapping);
1136 unlock_page(mmap_page); 1162 unlock_page(mmap_page);
1137 /* 1163 ret = -EAGAIN;
1138 * Sanity check - the locking in
1139 * ocfs2_pagemkwrite() should ensure
1140 * that this code doesn't trigger.
1141 */
1142 ret = -EINVAL;
1143 mlog_errno(ret);
1144 goto out; 1164 goto out;
1145 } 1165 }
1146 1166
1147 page_cache_get(mmap_page); 1167 page_cache_get(mmap_page);
1148 wc->w_pages[i] = mmap_page; 1168 wc->w_pages[i] = mmap_page;
1169 wc->w_target_locked = true;
1149 } else { 1170 } else {
1150 wc->w_pages[i] = find_or_create_page(mapping, index, 1171 wc->w_pages[i] = find_or_create_page(mapping, index,
1151 GFP_NOFS); 1172 GFP_NOFS);
@@ -1160,6 +1181,8 @@ static int ocfs2_grab_pages_for_write(struct address_space *mapping,
1160 wc->w_target_page = wc->w_pages[i]; 1181 wc->w_target_page = wc->w_pages[i];
1161 } 1182 }
1162out: 1183out:
1184 if (ret)
1185 wc->w_target_locked = false;
1163 return ret; 1186 return ret;
1164} 1187}
1165 1188
@@ -1817,11 +1840,23 @@ try_again:
1817 */ 1840 */
1818 ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos, len, 1841 ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos, len,
1819 cluster_of_pages, mmap_page); 1842 cluster_of_pages, mmap_page);
1820 if (ret) { 1843 if (ret && ret != -EAGAIN) {
1821 mlog_errno(ret); 1844 mlog_errno(ret);
1822 goto out_quota; 1845 goto out_quota;
1823 } 1846 }
1824 1847
1848 /*
1849 * ocfs2_grab_pages_for_write() returns -EAGAIN if it could not lock
1850 * the target page. In this case, we exit with no error and no target
1851 * page. This will trigger the caller, page_mkwrite(), to re-try
1852 * the operation.
1853 */
1854 if (ret == -EAGAIN) {
1855 BUG_ON(wc->w_target_page);
1856 ret = 0;
1857 goto out_quota;
1858 }
1859
1825 ret = ocfs2_write_cluster_by_desc(mapping, data_ac, meta_ac, wc, pos, 1860 ret = ocfs2_write_cluster_by_desc(mapping, data_ac, meta_ac, wc, pos,
1826 len); 1861 len);
1827 if (ret) { 1862 if (ret) {
diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c
index 3e9393ca39eb..9cd41083e991 100644
--- a/fs/ocfs2/mmap.c
+++ b/fs/ocfs2/mmap.c
@@ -61,7 +61,7 @@ static int ocfs2_fault(struct vm_area_struct *area, struct vm_fault *vmf)
61static int __ocfs2_page_mkwrite(struct file *file, struct buffer_head *di_bh, 61static int __ocfs2_page_mkwrite(struct file *file, struct buffer_head *di_bh,
62 struct page *page) 62 struct page *page)
63{ 63{
64 int ret; 64 int ret = VM_FAULT_NOPAGE;
65 struct inode *inode = file->f_path.dentry->d_inode; 65 struct inode *inode = file->f_path.dentry->d_inode;
66 struct address_space *mapping = inode->i_mapping; 66 struct address_space *mapping = inode->i_mapping;
67 loff_t pos = page_offset(page); 67 loff_t pos = page_offset(page);
@@ -71,32 +71,25 @@ static int __ocfs2_page_mkwrite(struct file *file, struct buffer_head *di_bh,
71 void *fsdata; 71 void *fsdata;
72 loff_t size = i_size_read(inode); 72 loff_t size = i_size_read(inode);
73 73
74 /*
75 * Another node might have truncated while we were waiting on
76 * cluster locks.
77 * We don't check size == 0 before the shift. This is borrowed
78 * from do_generic_file_read.
79 */
80 last_index = (size - 1) >> PAGE_CACHE_SHIFT; 74 last_index = (size - 1) >> PAGE_CACHE_SHIFT;
81 if (unlikely(!size || page->index > last_index)) {
82 ret = -EINVAL;
83 goto out;
84 }
85 75
86 /* 76 /*
87 * The i_size check above doesn't catch the case where nodes 77 * There are cases that lead to the page no longer bebongs to the
88 * truncated and then re-extended the file. We'll re-check the 78 * mapping.
89 * page mapping after taking the page lock inside of 79 * 1) pagecache truncates locally due to memory pressure.
90 * ocfs2_write_begin_nolock(). 80 * 2) pagecache truncates when another is taking EX lock against
81 * inode lock. see ocfs2_data_convert_worker.
82 *
83 * The i_size check doesn't catch the case where nodes truncated and
84 * then re-extended the file. We'll re-check the page mapping after
85 * taking the page lock inside of ocfs2_write_begin_nolock().
86 *
87 * Let VM retry with these cases.
91 */ 88 */
92 if (!PageUptodate(page) || page->mapping != inode->i_mapping) { 89 if ((page->mapping != inode->i_mapping) ||
93 /* 90 (!PageUptodate(page)) ||
94 * the page has been umapped in ocfs2_data_downconvert_worker. 91 (page_offset(page) >= size))
95 * So return 0 here and let VFS retry.
96 */
97 ret = 0;
98 goto out; 92 goto out;
99 }
100 93
101 /* 94 /*
102 * Call ocfs2_write_begin() and ocfs2_write_end() to take 95 * Call ocfs2_write_begin() and ocfs2_write_end() to take
@@ -116,17 +109,21 @@ static int __ocfs2_page_mkwrite(struct file *file, struct buffer_head *di_bh,
116 if (ret) { 109 if (ret) {
117 if (ret != -ENOSPC) 110 if (ret != -ENOSPC)
118 mlog_errno(ret); 111 mlog_errno(ret);
112 if (ret == -ENOMEM)
113 ret = VM_FAULT_OOM;
114 else
115 ret = VM_FAULT_SIGBUS;
119 goto out; 116 goto out;
120 } 117 }
121 118
122 ret = ocfs2_write_end_nolock(mapping, pos, len, len, locked_page, 119 if (!locked_page) {
123 fsdata); 120 ret = VM_FAULT_NOPAGE;
124 if (ret < 0) {
125 mlog_errno(ret);
126 goto out; 121 goto out;
127 } 122 }
123 ret = ocfs2_write_end_nolock(mapping, pos, len, len, locked_page,
124 fsdata);
128 BUG_ON(ret != len); 125 BUG_ON(ret != len);
129 ret = 0; 126 ret = VM_FAULT_LOCKED;
130out: 127out:
131 return ret; 128 return ret;
132} 129}
@@ -168,8 +165,6 @@ static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
168 165
169out: 166out:
170 ocfs2_unblock_signals(&oldset); 167 ocfs2_unblock_signals(&oldset);
171 if (ret)
172 ret = VM_FAULT_SIGBUS;
173 return ret; 168 return ret;
174} 169}
175 170