diff options
author | Sage Weil <sage@newdream.net> | 2011-06-01 19:08:44 -0400 |
---|---|---|
committer | Sage Weil <sage@newdream.net> | 2011-06-08 00:34:14 -0400 |
commit | c3cd62839aaa2cdb2b99687c9e44f1b300a4aece (patch) | |
tree | fc7823426f29f44911c93394fb8a3e43d0c91846 /fs/ceph | |
parent | 2584547230ae49b8de91ab3bb5e0a81898905b45 (diff) |
ceph: fix short sync reads from the OSD
If we get a short read from the OSD because the object is small, we need to
zero the remainder of the buffer. For O_DIRECT reads, the attempted range
is not trimmed to i_size by the VFS, so we were actually looping
indefinitely.
Fix by trimming by i_size, and the unconditionally zeroing the trailing
range.
Reported-by: Jeff Wu <cpwu@tnsoft.com.cn>
Signed-off-by: Sage Weil <sage@newdream.net>
Diffstat (limited to 'fs/ceph')
-rw-r--r-- | fs/ceph/file.c | 28 |
1 files changed, 15 insertions, 13 deletions
diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 8c5ac4e72832..b654f403139e 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c | |||
@@ -283,7 +283,7 @@ int ceph_release(struct inode *inode, struct file *file) | |||
283 | static int striped_read(struct inode *inode, | 283 | static int striped_read(struct inode *inode, |
284 | u64 off, u64 len, | 284 | u64 off, u64 len, |
285 | struct page **pages, int num_pages, | 285 | struct page **pages, int num_pages, |
286 | int *checkeof, bool align_to_pages, | 286 | int *checkeof, bool o_direct, |
287 | unsigned long buf_align) | 287 | unsigned long buf_align) |
288 | { | 288 | { |
289 | struct ceph_fs_client *fsc = ceph_inode_to_client(inode); | 289 | struct ceph_fs_client *fsc = ceph_inode_to_client(inode); |
@@ -308,7 +308,7 @@ static int striped_read(struct inode *inode, | |||
308 | io_align = off & ~PAGE_MASK; | 308 | io_align = off & ~PAGE_MASK; |
309 | 309 | ||
310 | more: | 310 | more: |
311 | if (align_to_pages) | 311 | if (o_direct) |
312 | page_align = (pos - io_align + buf_align) & ~PAGE_MASK; | 312 | page_align = (pos - io_align + buf_align) & ~PAGE_MASK; |
313 | else | 313 | else |
314 | page_align = pos & ~PAGE_MASK; | 314 | page_align = pos & ~PAGE_MASK; |
@@ -346,20 +346,22 @@ more: | |||
346 | } | 346 | } |
347 | 347 | ||
348 | if (was_short) { | 348 | if (was_short) { |
349 | /* was original extent fully inside i_size? */ | 349 | /* did we bounce off eof? */ |
350 | if (pos + left <= inode->i_size) { | 350 | if (pos + left > inode->i_size) |
351 | dout("zero tail\n"); | 351 | *checkeof = 1; |
352 | ceph_zero_page_vector_range(page_off + read, len - read, | 352 | |
353 | /* zero trailing bytes (inside i_size) */ | ||
354 | if (left > 0 && pos < inode->i_size) { | ||
355 | if (pos + left > inode->i_size) | ||
356 | left = inode->i_size - pos; | ||
357 | |||
358 | dout("zero tail %d\n", left); | ||
359 | ceph_zero_page_vector_range(page_off + read, left, | ||
353 | pages); | 360 | pages); |
354 | read = len; | 361 | read += left; |
355 | goto out; | ||
356 | } | 362 | } |
357 | |||
358 | /* check i_size */ | ||
359 | *checkeof = 1; | ||
360 | } | 363 | } |
361 | 364 | ||
362 | out: | ||
363 | if (ret >= 0) | 365 | if (ret >= 0) |
364 | ret = read; | 366 | ret = read; |
365 | dout("striped_read returns %d\n", ret); | 367 | dout("striped_read returns %d\n", ret); |
@@ -659,7 +661,7 @@ out: | |||
659 | 661 | ||
660 | /* hit EOF or hole? */ | 662 | /* hit EOF or hole? */ |
661 | if (statret == 0 && *ppos < inode->i_size) { | 663 | if (statret == 0 && *ppos < inode->i_size) { |
662 | dout("aio_read sync_read hit hole, reading more\n"); | 664 | dout("aio_read sync_read hit hole, ppos %lld < size %lld, reading more\n", *ppos, inode->i_size); |
663 | read += ret; | 665 | read += ret; |
664 | base += ret; | 666 | base += ret; |
665 | len -= ret; | 667 | len -= ret; |