aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ocfs2/buffer_head_io.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ocfs2/buffer_head_io.c')
-rw-r--r--fs/ocfs2/buffer_head_io.c95
1 files changed, 72 insertions, 23 deletions
diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c
index 9a24adf9be6e..c9037414f4f6 100644
--- a/fs/ocfs2/buffer_head_io.c
+++ b/fs/ocfs2/buffer_head_io.c
@@ -100,6 +100,9 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
100 mlog_entry("(block=(%llu), nr=(%d), flags=%d, inode=%p)\n", 100 mlog_entry("(block=(%llu), nr=(%d), flags=%d, inode=%p)\n",
101 (unsigned long long)block, nr, flags, inode); 101 (unsigned long long)block, nr, flags, inode);
102 102
103 BUG_ON((flags & OCFS2_BH_READAHEAD) &&
104 (!inode || !(flags & OCFS2_BH_CACHED)));
105
103 if (osb == NULL || osb->sb == NULL || bhs == NULL) { 106 if (osb == NULL || osb->sb == NULL || bhs == NULL) {
104 status = -EINVAL; 107 status = -EINVAL;
105 mlog_errno(status); 108 mlog_errno(status);
@@ -140,6 +143,30 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
140 bh = bhs[i]; 143 bh = bhs[i];
141 ignore_cache = 0; 144 ignore_cache = 0;
142 145
146 /* There are three read-ahead cases here which we need to
147 * be concerned with. All three assume a buffer has
148 * previously been submitted with OCFS2_BH_READAHEAD
149 * and it hasn't yet completed I/O.
150 *
151 * 1) The current request is sync to disk. This rarely
152 * happens these days, and never when performance
153 * matters - the code can just wait on the buffer
154 * lock and re-submit.
155 *
156 * 2) The current request is cached, but not
157 * readahead. ocfs2_buffer_uptodate() will return
158 * false anyway, so we'll wind up waiting on the
159 * buffer lock to do I/O. We re-check the request
160 * with after getting the lock to avoid a re-submit.
161 *
162 * 3) The current request is readahead (and so must
163 * also be a caching one). We short circuit if the
164 * buffer is locked (under I/O) and if it's in the
165 * uptodate cache. The re-check from #2 catches the
166 * case that the previous read-ahead completes just
167 * before our is-it-in-flight check.
168 */
169
143 if (flags & OCFS2_BH_CACHED && 170 if (flags & OCFS2_BH_CACHED &&
144 !ocfs2_buffer_uptodate(inode, bh)) { 171 !ocfs2_buffer_uptodate(inode, bh)) {
145 mlog(ML_UPTODATE, 172 mlog(ML_UPTODATE,
@@ -169,6 +196,14 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
169 continue; 196 continue;
170 } 197 }
171 198
199 /* A read-ahead request was made - if the
200 * buffer is already under read-ahead from a
201 * previously submitted request than we are
202 * done here. */
203 if ((flags & OCFS2_BH_READAHEAD)
204 && ocfs2_buffer_read_ahead(inode, bh))
205 continue;
206
172 lock_buffer(bh); 207 lock_buffer(bh);
173 if (buffer_jbd(bh)) { 208 if (buffer_jbd(bh)) {
174#ifdef CATCH_BH_JBD_RACES 209#ifdef CATCH_BH_JBD_RACES
@@ -181,13 +216,22 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
181 continue; 216 continue;
182#endif 217#endif
183 } 218 }
219
220 /* Re-check ocfs2_buffer_uptodate() as a
221 * previously read-ahead buffer may have
222 * completed I/O while we were waiting for the
223 * buffer lock. */
224 if ((flags & OCFS2_BH_CACHED)
225 && !(flags & OCFS2_BH_READAHEAD)
226 && ocfs2_buffer_uptodate(inode, bh)) {
227 unlock_buffer(bh);
228 continue;
229 }
230
184 clear_buffer_uptodate(bh); 231 clear_buffer_uptodate(bh);
185 get_bh(bh); /* for end_buffer_read_sync() */ 232 get_bh(bh); /* for end_buffer_read_sync() */
186 bh->b_end_io = end_buffer_read_sync; 233 bh->b_end_io = end_buffer_read_sync;
187 if (flags & OCFS2_BH_READAHEAD) 234 submit_bh(READ, bh);
188 submit_bh(READA, bh);
189 else
190 submit_bh(READ, bh);
191 continue; 235 continue;
192 } 236 }
193 } 237 }
@@ -197,34 +241,39 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
197 for (i = (nr - 1); i >= 0; i--) { 241 for (i = (nr - 1); i >= 0; i--) {
198 bh = bhs[i]; 242 bh = bhs[i];
199 243
200 /* We know this can't have changed as we hold the 244 if (!(flags & OCFS2_BH_READAHEAD)) {
201 * inode sem. Avoid doing any work on the bh if the 245 /* We know this can't have changed as we hold the
202 * journal has it. */ 246 * inode sem. Avoid doing any work on the bh if the
203 if (!buffer_jbd(bh)) 247 * journal has it. */
204 wait_on_buffer(bh); 248 if (!buffer_jbd(bh))
205 249 wait_on_buffer(bh);
206 if (!buffer_uptodate(bh)) { 250
207 /* Status won't be cleared from here on out, 251 if (!buffer_uptodate(bh)) {
208 * so we can safely record this and loop back 252 /* Status won't be cleared from here on out,
209 * to cleanup the other buffers. Don't need to 253 * so we can safely record this and loop back
210 * remove the clustered uptodate information 254 * to cleanup the other buffers. Don't need to
211 * for this bh as it's not marked locally 255 * remove the clustered uptodate information
212 * uptodate. */ 256 * for this bh as it's not marked locally
213 status = -EIO; 257 * uptodate. */
214 brelse(bh); 258 status = -EIO;
215 bhs[i] = NULL; 259 brelse(bh);
216 continue; 260 bhs[i] = NULL;
261 continue;
262 }
217 } 263 }
218 264
265 /* Always set the buffer in the cache, even if it was
266 * a forced read, or read-ahead which hasn't yet
267 * completed. */
219 if (inode) 268 if (inode)
220 ocfs2_set_buffer_uptodate(inode, bh); 269 ocfs2_set_buffer_uptodate(inode, bh);
221 } 270 }
222 if (inode) 271 if (inode)
223 mutex_unlock(&OCFS2_I(inode)->ip_io_mutex); 272 mutex_unlock(&OCFS2_I(inode)->ip_io_mutex);
224 273
225 mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s\n", 274 mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s, flags=0x%x\n",
226 (unsigned long long)block, nr, 275 (unsigned long long)block, nr,
227 (!(flags & OCFS2_BH_CACHED) || ignore_cache) ? "no" : "yes"); 276 (!(flags & OCFS2_BH_CACHED) || ignore_cache) ? "no" : "yes", flags);
228 277
229bail: 278bail:
230 279