aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJosef Bacik <josef@redhat.com>2010-05-23 11:00:54 -0400
committerChris Mason <chris.mason@oracle.com>2010-05-25 10:34:55 -0400
commit66f998f611897319b555364cefd5d6e88a205866 (patch)
tree3d2a46624bf6cf1ea1645cc8dad975af858dc114
parent3fd0a5585eb98e074fb9934549c8d85c49756c0d (diff)
fs: allow short direct-io reads to be completed via buffered IO
This is similar to what already happens in the write case. If we have a short read while doing O_DIRECT, instead of just returning, fallthrough and try to read the rest via buffered IO. BTRFS needs this because if we encounter a compressed or inline extent during DIO, we need to fallback on buffered. If the extent is compressed we need to read the entire thing into memory and de-compress it into the users pages. I have tested this with fsx and everything works great. Thanks, Signed-off-by: Josef Bacik <josef@redhat.com> Signed-off-by: Chris Mason <chris.mason@oracle.com>
-rw-r--r--mm/filemap.c36
1 files changed, 31 insertions, 5 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index 140ebda9640f..829ac9cdbd70 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1263,7 +1263,7 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
1263{ 1263{
1264 struct file *filp = iocb->ki_filp; 1264 struct file *filp = iocb->ki_filp;
1265 ssize_t retval; 1265 ssize_t retval;
1266 unsigned long seg; 1266 unsigned long seg = 0;
1267 size_t count; 1267 size_t count;
1268 loff_t *ppos = &iocb->ki_pos; 1268 loff_t *ppos = &iocb->ki_pos;
1269 1269
@@ -1290,21 +1290,47 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
1290 retval = mapping->a_ops->direct_IO(READ, iocb, 1290 retval = mapping->a_ops->direct_IO(READ, iocb,
1291 iov, pos, nr_segs); 1291 iov, pos, nr_segs);
1292 } 1292 }
1293 if (retval > 0) 1293 if (retval > 0) {
1294 *ppos = pos + retval; 1294 *ppos = pos + retval;
1295 if (retval) { 1295 count -= retval;
1296 }
1297
1298 /*
1299 * Btrfs can have a short DIO read if we encounter
1300 * compressed extents, so if there was an error, or if
1301 * we've already read everything we wanted to, or if
1302 * there was a short read because we hit EOF, go ahead
1303 * and return. Otherwise fallthrough to buffered io for
1304 * the rest of the read.
1305 */
1306 if (retval < 0 || !count || *ppos >= size) {
1296 file_accessed(filp); 1307 file_accessed(filp);
1297 goto out; 1308 goto out;
1298 } 1309 }
1299 } 1310 }
1300 } 1311 }
1301 1312
1313 count = retval;
1302 for (seg = 0; seg < nr_segs; seg++) { 1314 for (seg = 0; seg < nr_segs; seg++) {
1303 read_descriptor_t desc; 1315 read_descriptor_t desc;
1316 loff_t offset = 0;
1317
1318 /*
1319 * If we did a short DIO read we need to skip the section of the
1320 * iov that we've already read data into.
1321 */
1322 if (count) {
1323 if (count > iov[seg].iov_len) {
1324 count -= iov[seg].iov_len;
1325 continue;
1326 }
1327 offset = count;
1328 count = 0;
1329 }
1304 1330
1305 desc.written = 0; 1331 desc.written = 0;
1306 desc.arg.buf = iov[seg].iov_base; 1332 desc.arg.buf = iov[seg].iov_base + offset;
1307 desc.count = iov[seg].iov_len; 1333 desc.count = iov[seg].iov_len - offset;
1308 if (desc.count == 0) 1334 if (desc.count == 0)
1309 continue; 1335 continue;
1310 desc.error = 0; 1336 desc.error = 0;