diff options
author | Wengang Wang <wen.gang.wang@oracle.com> | 2011-07-24 13:36:54 -0400 |
---|---|---|
committer | Sunil Mushran <sunil.mushran@oracle.com> | 2011-07-24 13:36:54 -0400 |
commit | 5cffff9e29866a3de98c2c25135b3199491f93b0 (patch) | |
tree | 11b53cb3ad7cb44adb80f7802a62c145bf713514 | |
parent | a035bff6b82aca89c1223e2c614adc2d17ec8aa2 (diff) |
ocfs2: Fix ocfs2_page_mkwrite()
This patch address two shortcomings in ocfs2_page_mkwrite():
1. Makes the function return better VM_FAULT_* errors.
2. It handles a error that is triggered when a page is dropped from the mapping
due to memory pressure. This patch locks the page to prevent that.
[Patch was cleaned up by Sunil Mushran.]
Signed-off-by: Wengang Wang <wen.gang.wang@oracle.com>
Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
-rw-r--r-- | fs/ocfs2/aops.c | 51 | ||||
-rw-r--r-- | fs/ocfs2/mmap.c | 53 |
2 files changed, 67 insertions, 37 deletions
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index c1efe939c774..ff98c169b631 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c | |||
@@ -863,6 +863,12 @@ struct ocfs2_write_ctxt { | |||
863 | struct page *w_target_page; | 863 | struct page *w_target_page; |
864 | 864 | ||
865 | /* | 865 | /* |
866 | * w_target_locked is used for page_mkwrite path indicating no unlocking | ||
867 | * against w_target_page in ocfs2_write_end_nolock. | ||
868 | */ | ||
869 | unsigned int w_target_locked:1; | ||
870 | |||
871 | /* | ||
866 | * ocfs2_write_end() uses this to know what the real range to | 872 | * ocfs2_write_end() uses this to know what the real range to |
867 | * write in the target should be. | 873 | * write in the target should be. |
868 | */ | 874 | */ |
@@ -895,6 +901,24 @@ void ocfs2_unlock_and_free_pages(struct page **pages, int num_pages) | |||
895 | 901 | ||
896 | static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc) | 902 | static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc) |
897 | { | 903 | { |
904 | int i; | ||
905 | |||
906 | /* | ||
907 | * w_target_locked is only set to true in the page_mkwrite() case. | ||
908 | * The intent is to allow us to lock the target page from write_begin() | ||
909 | * to write_end(). The caller must hold a ref on w_target_page. | ||
910 | */ | ||
911 | if (wc->w_target_locked) { | ||
912 | BUG_ON(!wc->w_target_page); | ||
913 | for (i = 0; i < wc->w_num_pages; i++) { | ||
914 | if (wc->w_target_page == wc->w_pages[i]) { | ||
915 | wc->w_pages[i] = NULL; | ||
916 | break; | ||
917 | } | ||
918 | } | ||
919 | mark_page_accessed(wc->w_target_page); | ||
920 | page_cache_release(wc->w_target_page); | ||
921 | } | ||
898 | ocfs2_unlock_and_free_pages(wc->w_pages, wc->w_num_pages); | 922 | ocfs2_unlock_and_free_pages(wc->w_pages, wc->w_num_pages); |
899 | 923 | ||
900 | brelse(wc->w_di_bh); | 924 | brelse(wc->w_di_bh); |
@@ -1132,20 +1156,17 @@ static int ocfs2_grab_pages_for_write(struct address_space *mapping, | |||
1132 | */ | 1156 | */ |
1133 | lock_page(mmap_page); | 1157 | lock_page(mmap_page); |
1134 | 1158 | ||
1159 | /* Exit and let the caller retry */ | ||
1135 | if (mmap_page->mapping != mapping) { | 1160 | if (mmap_page->mapping != mapping) { |
1161 | WARN_ON(mmap_page->mapping); | ||
1136 | unlock_page(mmap_page); | 1162 | unlock_page(mmap_page); |
1137 | /* | 1163 | ret = -EAGAIN; |
1138 | * Sanity check - the locking in | ||
1139 | * ocfs2_pagemkwrite() should ensure | ||
1140 | * that this code doesn't trigger. | ||
1141 | */ | ||
1142 | ret = -EINVAL; | ||
1143 | mlog_errno(ret); | ||
1144 | goto out; | 1164 | goto out; |
1145 | } | 1165 | } |
1146 | 1166 | ||
1147 | page_cache_get(mmap_page); | 1167 | page_cache_get(mmap_page); |
1148 | wc->w_pages[i] = mmap_page; | 1168 | wc->w_pages[i] = mmap_page; |
1169 | wc->w_target_locked = true; | ||
1149 | } else { | 1170 | } else { |
1150 | wc->w_pages[i] = find_or_create_page(mapping, index, | 1171 | wc->w_pages[i] = find_or_create_page(mapping, index, |
1151 | GFP_NOFS); | 1172 | GFP_NOFS); |
@@ -1160,6 +1181,8 @@ static int ocfs2_grab_pages_for_write(struct address_space *mapping, | |||
1160 | wc->w_target_page = wc->w_pages[i]; | 1181 | wc->w_target_page = wc->w_pages[i]; |
1161 | } | 1182 | } |
1162 | out: | 1183 | out: |
1184 | if (ret) | ||
1185 | wc->w_target_locked = false; | ||
1163 | return ret; | 1186 | return ret; |
1164 | } | 1187 | } |
1165 | 1188 | ||
@@ -1817,11 +1840,23 @@ try_again: | |||
1817 | */ | 1840 | */ |
1818 | ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos, len, | 1841 | ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos, len, |
1819 | cluster_of_pages, mmap_page); | 1842 | cluster_of_pages, mmap_page); |
1820 | if (ret) { | 1843 | if (ret && ret != -EAGAIN) { |
1821 | mlog_errno(ret); | 1844 | mlog_errno(ret); |
1822 | goto out_quota; | 1845 | goto out_quota; |
1823 | } | 1846 | } |
1824 | 1847 | ||
1848 | /* | ||
1849 | * ocfs2_grab_pages_for_write() returns -EAGAIN if it could not lock | ||
1850 | * the target page. In this case, we exit with no error and no target | ||
1851 | * page. This will trigger the caller, page_mkwrite(), to re-try | ||
1852 | * the operation. | ||
1853 | */ | ||
1854 | if (ret == -EAGAIN) { | ||
1855 | BUG_ON(wc->w_target_page); | ||
1856 | ret = 0; | ||
1857 | goto out_quota; | ||
1858 | } | ||
1859 | |||
1825 | ret = ocfs2_write_cluster_by_desc(mapping, data_ac, meta_ac, wc, pos, | 1860 | ret = ocfs2_write_cluster_by_desc(mapping, data_ac, meta_ac, wc, pos, |
1826 | len); | 1861 | len); |
1827 | if (ret) { | 1862 | if (ret) { |
diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c index 3e9393ca39eb..9cd41083e991 100644 --- a/fs/ocfs2/mmap.c +++ b/fs/ocfs2/mmap.c | |||
@@ -61,7 +61,7 @@ static int ocfs2_fault(struct vm_area_struct *area, struct vm_fault *vmf) | |||
61 | static int __ocfs2_page_mkwrite(struct file *file, struct buffer_head *di_bh, | 61 | static int __ocfs2_page_mkwrite(struct file *file, struct buffer_head *di_bh, |
62 | struct page *page) | 62 | struct page *page) |
63 | { | 63 | { |
64 | int ret; | 64 | int ret = VM_FAULT_NOPAGE; |
65 | struct inode *inode = file->f_path.dentry->d_inode; | 65 | struct inode *inode = file->f_path.dentry->d_inode; |
66 | struct address_space *mapping = inode->i_mapping; | 66 | struct address_space *mapping = inode->i_mapping; |
67 | loff_t pos = page_offset(page); | 67 | loff_t pos = page_offset(page); |
@@ -71,32 +71,25 @@ static int __ocfs2_page_mkwrite(struct file *file, struct buffer_head *di_bh, | |||
71 | void *fsdata; | 71 | void *fsdata; |
72 | loff_t size = i_size_read(inode); | 72 | loff_t size = i_size_read(inode); |
73 | 73 | ||
74 | /* | ||
75 | * Another node might have truncated while we were waiting on | ||
76 | * cluster locks. | ||
77 | * We don't check size == 0 before the shift. This is borrowed | ||
78 | * from do_generic_file_read. | ||
79 | */ | ||
80 | last_index = (size - 1) >> PAGE_CACHE_SHIFT; | 74 | last_index = (size - 1) >> PAGE_CACHE_SHIFT; |
81 | if (unlikely(!size || page->index > last_index)) { | ||
82 | ret = -EINVAL; | ||
83 | goto out; | ||
84 | } | ||
85 | 75 | ||
86 | /* | 76 | /* |
87 | * The i_size check above doesn't catch the case where nodes | 77 | * There are cases that lead to the page no longer bebongs to the |
88 | * truncated and then re-extended the file. We'll re-check the | 78 | * mapping. |
89 | * page mapping after taking the page lock inside of | 79 | * 1) pagecache truncates locally due to memory pressure. |
90 | * ocfs2_write_begin_nolock(). | 80 | * 2) pagecache truncates when another is taking EX lock against |
81 | * inode lock. see ocfs2_data_convert_worker. | ||
82 | * | ||
83 | * The i_size check doesn't catch the case where nodes truncated and | ||
84 | * then re-extended the file. We'll re-check the page mapping after | ||
85 | * taking the page lock inside of ocfs2_write_begin_nolock(). | ||
86 | * | ||
87 | * Let VM retry with these cases. | ||
91 | */ | 88 | */ |
92 | if (!PageUptodate(page) || page->mapping != inode->i_mapping) { | 89 | if ((page->mapping != inode->i_mapping) || |
93 | /* | 90 | (!PageUptodate(page)) || |
94 | * the page has been umapped in ocfs2_data_downconvert_worker. | 91 | (page_offset(page) >= size)) |
95 | * So return 0 here and let VFS retry. | ||
96 | */ | ||
97 | ret = 0; | ||
98 | goto out; | 92 | goto out; |
99 | } | ||
100 | 93 | ||
101 | /* | 94 | /* |
102 | * Call ocfs2_write_begin() and ocfs2_write_end() to take | 95 | * Call ocfs2_write_begin() and ocfs2_write_end() to take |
@@ -116,17 +109,21 @@ static int __ocfs2_page_mkwrite(struct file *file, struct buffer_head *di_bh, | |||
116 | if (ret) { | 109 | if (ret) { |
117 | if (ret != -ENOSPC) | 110 | if (ret != -ENOSPC) |
118 | mlog_errno(ret); | 111 | mlog_errno(ret); |
112 | if (ret == -ENOMEM) | ||
113 | ret = VM_FAULT_OOM; | ||
114 | else | ||
115 | ret = VM_FAULT_SIGBUS; | ||
119 | goto out; | 116 | goto out; |
120 | } | 117 | } |
121 | 118 | ||
122 | ret = ocfs2_write_end_nolock(mapping, pos, len, len, locked_page, | 119 | if (!locked_page) { |
123 | fsdata); | 120 | ret = VM_FAULT_NOPAGE; |
124 | if (ret < 0) { | ||
125 | mlog_errno(ret); | ||
126 | goto out; | 121 | goto out; |
127 | } | 122 | } |
123 | ret = ocfs2_write_end_nolock(mapping, pos, len, len, locked_page, | ||
124 | fsdata); | ||
128 | BUG_ON(ret != len); | 125 | BUG_ON(ret != len); |
129 | ret = 0; | 126 | ret = VM_FAULT_LOCKED; |
130 | out: | 127 | out: |
131 | return ret; | 128 | return ret; |
132 | } | 129 | } |
@@ -168,8 +165,6 @@ static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
168 | 165 | ||
169 | out: | 166 | out: |
170 | ocfs2_unblock_signals(&oldset); | 167 | ocfs2_unblock_signals(&oldset); |
171 | if (ret) | ||
172 | ret = VM_FAULT_SIGBUS; | ||
173 | return ret; | 168 | return ret; |
174 | } | 169 | } |
175 | 170 | ||