diff options
Diffstat (limited to 'fs/ocfs2/buffer_head_io.c')
-rw-r--r-- | fs/ocfs2/buffer_head_io.c | 95 |
1 files changed, 72 insertions, 23 deletions
diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c index 9a24adf9be6e..c9037414f4f6 100644 --- a/fs/ocfs2/buffer_head_io.c +++ b/fs/ocfs2/buffer_head_io.c | |||
@@ -100,6 +100,9 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr, | |||
100 | mlog_entry("(block=(%llu), nr=(%d), flags=%d, inode=%p)\n", | 100 | mlog_entry("(block=(%llu), nr=(%d), flags=%d, inode=%p)\n", |
101 | (unsigned long long)block, nr, flags, inode); | 101 | (unsigned long long)block, nr, flags, inode); |
102 | 102 | ||
103 | BUG_ON((flags & OCFS2_BH_READAHEAD) && | ||
104 | (!inode || !(flags & OCFS2_BH_CACHED))); | ||
105 | |||
103 | if (osb == NULL || osb->sb == NULL || bhs == NULL) { | 106 | if (osb == NULL || osb->sb == NULL || bhs == NULL) { |
104 | status = -EINVAL; | 107 | status = -EINVAL; |
105 | mlog_errno(status); | 108 | mlog_errno(status); |
@@ -140,6 +143,30 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr, | |||
140 | bh = bhs[i]; | 143 | bh = bhs[i]; |
141 | ignore_cache = 0; | 144 | ignore_cache = 0; |
142 | 145 | ||
146 | /* There are three read-ahead cases here which we need to | ||
147 | * be concerned with. All three assume a buffer has | ||
148 | * previously been submitted with OCFS2_BH_READAHEAD | ||
149 | * and it hasn't yet completed I/O. | ||
150 | * | ||
151 | * 1) The current request is sync to disk. This rarely | ||
152 | * happens these days, and never when performance | ||
153 | * matters - the code can just wait on the buffer | ||
154 | * lock and re-submit. | ||
155 | * | ||
156 | * 2) The current request is cached, but not | ||
157 | * readahead. ocfs2_buffer_uptodate() will return | ||
158 | * false anyway, so we'll wind up waiting on the | ||
159 | * buffer lock to do I/O. We re-check the request | ||
160 | * with after getting the lock to avoid a re-submit. | ||
161 | * | ||
162 | * 3) The current request is readahead (and so must | ||
163 | * also be a caching one). We short circuit if the | ||
164 | * buffer is locked (under I/O) and if it's in the | ||
165 | * uptodate cache. The re-check from #2 catches the | ||
166 | * case that the previous read-ahead completes just | ||
167 | * before our is-it-in-flight check. | ||
168 | */ | ||
169 | |||
143 | if (flags & OCFS2_BH_CACHED && | 170 | if (flags & OCFS2_BH_CACHED && |
144 | !ocfs2_buffer_uptodate(inode, bh)) { | 171 | !ocfs2_buffer_uptodate(inode, bh)) { |
145 | mlog(ML_UPTODATE, | 172 | mlog(ML_UPTODATE, |
@@ -169,6 +196,14 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr, | |||
169 | continue; | 196 | continue; |
170 | } | 197 | } |
171 | 198 | ||
199 | /* A read-ahead request was made - if the | ||
200 | * buffer is already under read-ahead from a | ||
201 | * previously submitted request than we are | ||
202 | * done here. */ | ||
203 | if ((flags & OCFS2_BH_READAHEAD) | ||
204 | && ocfs2_buffer_read_ahead(inode, bh)) | ||
205 | continue; | ||
206 | |||
172 | lock_buffer(bh); | 207 | lock_buffer(bh); |
173 | if (buffer_jbd(bh)) { | 208 | if (buffer_jbd(bh)) { |
174 | #ifdef CATCH_BH_JBD_RACES | 209 | #ifdef CATCH_BH_JBD_RACES |
@@ -181,13 +216,22 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr, | |||
181 | continue; | 216 | continue; |
182 | #endif | 217 | #endif |
183 | } | 218 | } |
219 | |||
220 | /* Re-check ocfs2_buffer_uptodate() as a | ||
221 | * previously read-ahead buffer may have | ||
222 | * completed I/O while we were waiting for the | ||
223 | * buffer lock. */ | ||
224 | if ((flags & OCFS2_BH_CACHED) | ||
225 | && !(flags & OCFS2_BH_READAHEAD) | ||
226 | && ocfs2_buffer_uptodate(inode, bh)) { | ||
227 | unlock_buffer(bh); | ||
228 | continue; | ||
229 | } | ||
230 | |||
184 | clear_buffer_uptodate(bh); | 231 | clear_buffer_uptodate(bh); |
185 | get_bh(bh); /* for end_buffer_read_sync() */ | 232 | get_bh(bh); /* for end_buffer_read_sync() */ |
186 | bh->b_end_io = end_buffer_read_sync; | 233 | bh->b_end_io = end_buffer_read_sync; |
187 | if (flags & OCFS2_BH_READAHEAD) | 234 | submit_bh(READ, bh); |
188 | submit_bh(READA, bh); | ||
189 | else | ||
190 | submit_bh(READ, bh); | ||
191 | continue; | 235 | continue; |
192 | } | 236 | } |
193 | } | 237 | } |
@@ -197,34 +241,39 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr, | |||
197 | for (i = (nr - 1); i >= 0; i--) { | 241 | for (i = (nr - 1); i >= 0; i--) { |
198 | bh = bhs[i]; | 242 | bh = bhs[i]; |
199 | 243 | ||
200 | /* We know this can't have changed as we hold the | 244 | if (!(flags & OCFS2_BH_READAHEAD)) { |
201 | * inode sem. Avoid doing any work on the bh if the | 245 | /* We know this can't have changed as we hold the |
202 | * journal has it. */ | 246 | * inode sem. Avoid doing any work on the bh if the |
203 | if (!buffer_jbd(bh)) | 247 | * journal has it. */ |
204 | wait_on_buffer(bh); | 248 | if (!buffer_jbd(bh)) |
205 | 249 | wait_on_buffer(bh); | |
206 | if (!buffer_uptodate(bh)) { | 250 | |
207 | /* Status won't be cleared from here on out, | 251 | if (!buffer_uptodate(bh)) { |
208 | * so we can safely record this and loop back | 252 | /* Status won't be cleared from here on out, |
209 | * to cleanup the other buffers. Don't need to | 253 | * so we can safely record this and loop back |
210 | * remove the clustered uptodate information | 254 | * to cleanup the other buffers. Don't need to |
211 | * for this bh as it's not marked locally | 255 | * remove the clustered uptodate information |
212 | * uptodate. */ | 256 | * for this bh as it's not marked locally |
213 | status = -EIO; | 257 | * uptodate. */ |
214 | brelse(bh); | 258 | status = -EIO; |
215 | bhs[i] = NULL; | 259 | brelse(bh); |
216 | continue; | 260 | bhs[i] = NULL; |
261 | continue; | ||
262 | } | ||
217 | } | 263 | } |
218 | 264 | ||
265 | /* Always set the buffer in the cache, even if it was | ||
266 | * a forced read, or read-ahead which hasn't yet | ||
267 | * completed. */ | ||
219 | if (inode) | 268 | if (inode) |
220 | ocfs2_set_buffer_uptodate(inode, bh); | 269 | ocfs2_set_buffer_uptodate(inode, bh); |
221 | } | 270 | } |
222 | if (inode) | 271 | if (inode) |
223 | mutex_unlock(&OCFS2_I(inode)->ip_io_mutex); | 272 | mutex_unlock(&OCFS2_I(inode)->ip_io_mutex); |
224 | 273 | ||
225 | mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s\n", | 274 | mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s, flags=0x%x\n", |
226 | (unsigned long long)block, nr, | 275 | (unsigned long long)block, nr, |
227 | (!(flags & OCFS2_BH_CACHED) || ignore_cache) ? "no" : "yes"); | 276 | (!(flags & OCFS2_BH_CACHED) || ignore_cache) ? "no" : "yes", flags); |
228 | 277 | ||
229 | bail: | 278 | bail: |
230 | 279 | ||