diff options
author | Josef Bacik <josef@redhat.com> | 2010-05-23 11:00:54 -0400 |
---|---|---|
committer | Chris Mason <chris.mason@oracle.com> | 2010-05-25 10:34:55 -0400 |
commit | 66f998f611897319b555364cefd5d6e88a205866 (patch) | |
tree | 3d2a46624bf6cf1ea1645cc8dad975af858dc114 /mm | |
parent | 3fd0a5585eb98e074fb9934549c8d85c49756c0d (diff) |
fs: allow short direct-io reads to be completed via buffered IO
This is similar to what already happens in the write case. If we have a short
read while doing O_DIRECT, instead of just returning, fallthrough and try to
read the rest via buffered IO. BTRFS needs this because if we encounter a
compressed or inline extent during DIO, we need to fallback on buffered. If the
extent is compressed we need to read the entire thing into memory and
de-compress it into the users pages. I have tested this with fsx and everything
works great. Thanks,
Signed-off-by: Josef Bacik <josef@redhat.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/filemap.c | 36 |
1 files changed, 31 insertions, 5 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index 140ebda9640f..829ac9cdbd70 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -1263,7 +1263,7 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov, | |||
1263 | { | 1263 | { |
1264 | struct file *filp = iocb->ki_filp; | 1264 | struct file *filp = iocb->ki_filp; |
1265 | ssize_t retval; | 1265 | ssize_t retval; |
1266 | unsigned long seg; | 1266 | unsigned long seg = 0; |
1267 | size_t count; | 1267 | size_t count; |
1268 | loff_t *ppos = &iocb->ki_pos; | 1268 | loff_t *ppos = &iocb->ki_pos; |
1269 | 1269 | ||
@@ -1290,21 +1290,47 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov, | |||
1290 | retval = mapping->a_ops->direct_IO(READ, iocb, | 1290 | retval = mapping->a_ops->direct_IO(READ, iocb, |
1291 | iov, pos, nr_segs); | 1291 | iov, pos, nr_segs); |
1292 | } | 1292 | } |
1293 | if (retval > 0) | 1293 | if (retval > 0) { |
1294 | *ppos = pos + retval; | 1294 | *ppos = pos + retval; |
1295 | if (retval) { | 1295 | count -= retval; |
1296 | } | ||
1297 | |||
1298 | /* | ||
1299 | * Btrfs can have a short DIO read if we encounter | ||
1300 | * compressed extents, so if there was an error, or if | ||
1301 | * we've already read everything we wanted to, or if | ||
1302 | * there was a short read because we hit EOF, go ahead | ||
1303 | * and return. Otherwise fallthrough to buffered io for | ||
1304 | * the rest of the read. | ||
1305 | */ | ||
1306 | if (retval < 0 || !count || *ppos >= size) { | ||
1296 | file_accessed(filp); | 1307 | file_accessed(filp); |
1297 | goto out; | 1308 | goto out; |
1298 | } | 1309 | } |
1299 | } | 1310 | } |
1300 | } | 1311 | } |
1301 | 1312 | ||
1313 | count = retval; | ||
1302 | for (seg = 0; seg < nr_segs; seg++) { | 1314 | for (seg = 0; seg < nr_segs; seg++) { |
1303 | read_descriptor_t desc; | 1315 | read_descriptor_t desc; |
1316 | loff_t offset = 0; | ||
1317 | |||
1318 | /* | ||
1319 | * If we did a short DIO read we need to skip the section of the | ||
1320 | * iov that we've already read data into. | ||
1321 | */ | ||
1322 | if (count) { | ||
1323 | if (count > iov[seg].iov_len) { | ||
1324 | count -= iov[seg].iov_len; | ||
1325 | continue; | ||
1326 | } | ||
1327 | offset = count; | ||
1328 | count = 0; | ||
1329 | } | ||
1304 | 1330 | ||
1305 | desc.written = 0; | 1331 | desc.written = 0; |
1306 | desc.arg.buf = iov[seg].iov_base; | 1332 | desc.arg.buf = iov[seg].iov_base + offset; |
1307 | desc.count = iov[seg].iov_len; | 1333 | desc.count = iov[seg].iov_len - offset; |
1308 | if (desc.count == 0) | 1334 | if (desc.count == 0) |
1309 | continue; | 1335 | continue; |
1310 | desc.error = 0; | 1336 | desc.error = 0; |