aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSage Weil <sage@newdream.net>2011-06-01 19:08:44 -0400
committerSage Weil <sage@newdream.net>2011-06-08 00:34:14 -0400
commitc3cd62839aaa2cdb2b99687c9e44f1b300a4aece (patch)
treefc7823426f29f44911c93394fb8a3e43d0c91846
parent2584547230ae49b8de91ab3bb5e0a81898905b45 (diff)
ceph: fix short sync reads from the OSD
If we get a short read from the OSD because the object is small, we need to zero the remainder of the buffer. For O_DIRECT reads, the attempted range is not trimmed to i_size by the VFS, so we were actually looping indefinitely. Fix by trimming by i_size, and the unconditionally zeroing the trailing range. Reported-by: Jeff Wu <cpwu@tnsoft.com.cn> Signed-off-by: Sage Weil <sage@newdream.net>
-rw-r--r--fs/ceph/file.c28
1 files changed, 15 insertions, 13 deletions
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 8c5ac4e72832..b654f403139e 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -283,7 +283,7 @@ int ceph_release(struct inode *inode, struct file *file)
283static int striped_read(struct inode *inode, 283static int striped_read(struct inode *inode,
284 u64 off, u64 len, 284 u64 off, u64 len,
285 struct page **pages, int num_pages, 285 struct page **pages, int num_pages,
286 int *checkeof, bool align_to_pages, 286 int *checkeof, bool o_direct,
287 unsigned long buf_align) 287 unsigned long buf_align)
288{ 288{
289 struct ceph_fs_client *fsc = ceph_inode_to_client(inode); 289 struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
@@ -308,7 +308,7 @@ static int striped_read(struct inode *inode,
308 io_align = off & ~PAGE_MASK; 308 io_align = off & ~PAGE_MASK;
309 309
310more: 310more:
311 if (align_to_pages) 311 if (o_direct)
312 page_align = (pos - io_align + buf_align) & ~PAGE_MASK; 312 page_align = (pos - io_align + buf_align) & ~PAGE_MASK;
313 else 313 else
314 page_align = pos & ~PAGE_MASK; 314 page_align = pos & ~PAGE_MASK;
@@ -346,20 +346,22 @@ more:
346 } 346 }
347 347
348 if (was_short) { 348 if (was_short) {
349 /* was original extent fully inside i_size? */ 349 /* did we bounce off eof? */
350 if (pos + left <= inode->i_size) { 350 if (pos + left > inode->i_size)
351 dout("zero tail\n"); 351 *checkeof = 1;
352 ceph_zero_page_vector_range(page_off + read, len - read, 352
353 /* zero trailing bytes (inside i_size) */
354 if (left > 0 && pos < inode->i_size) {
355 if (pos + left > inode->i_size)
356 left = inode->i_size - pos;
357
358 dout("zero tail %d\n", left);
359 ceph_zero_page_vector_range(page_off + read, left,
353 pages); 360 pages);
354 read = len; 361 read += left;
355 goto out;
356 } 362 }
357
358 /* check i_size */
359 *checkeof = 1;
360 } 363 }
361 364
362out:
363 if (ret >= 0) 365 if (ret >= 0)
364 ret = read; 366 ret = read;
365 dout("striped_read returns %d\n", ret); 367 dout("striped_read returns %d\n", ret);
@@ -659,7 +661,7 @@ out:
659 661
660 /* hit EOF or hole? */ 662 /* hit EOF or hole? */
661 if (statret == 0 && *ppos < inode->i_size) { 663 if (statret == 0 && *ppos < inode->i_size) {
662 dout("aio_read sync_read hit hole, reading more\n"); 664 dout("aio_read sync_read hit hole, ppos %lld < size %lld, reading more\n", *ppos, inode->i_size);
663 read += ret; 665 read += ret;
664 base += ret; 666 base += ret;
665 len -= ret; 667 len -= ret;