diff options
| author | Sage Weil <sage@newdream.net> | 2011-06-01 19:08:44 -0400 |
|---|---|---|
| committer | Sage Weil <sage@newdream.net> | 2011-06-08 00:34:14 -0400 |
| commit | c3cd62839aaa2cdb2b99687c9e44f1b300a4aece (patch) | |
| tree | fc7823426f29f44911c93394fb8a3e43d0c91846 | |
| parent | 2584547230ae49b8de91ab3bb5e0a81898905b45 (diff) | |
ceph: fix short sync reads from the OSD
If we get a short read from the OSD because the object is small, we need to
zero the remainder of the buffer. For O_DIRECT reads, the attempted range
is not trimmed to i_size by the VFS, so we were actually looping
indefinitely.
Fix by trimming by i_size, and the unconditionally zeroing the trailing
range.
Reported-by: Jeff Wu <cpwu@tnsoft.com.cn>
Signed-off-by: Sage Weil <sage@newdream.net>
| -rw-r--r-- | fs/ceph/file.c | 28 |
1 files changed, 15 insertions, 13 deletions
diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 8c5ac4e72832..b654f403139e 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c | |||
| @@ -283,7 +283,7 @@ int ceph_release(struct inode *inode, struct file *file) | |||
| 283 | static int striped_read(struct inode *inode, | 283 | static int striped_read(struct inode *inode, |
| 284 | u64 off, u64 len, | 284 | u64 off, u64 len, |
| 285 | struct page **pages, int num_pages, | 285 | struct page **pages, int num_pages, |
| 286 | int *checkeof, bool align_to_pages, | 286 | int *checkeof, bool o_direct, |
| 287 | unsigned long buf_align) | 287 | unsigned long buf_align) |
| 288 | { | 288 | { |
| 289 | struct ceph_fs_client *fsc = ceph_inode_to_client(inode); | 289 | struct ceph_fs_client *fsc = ceph_inode_to_client(inode); |
| @@ -308,7 +308,7 @@ static int striped_read(struct inode *inode, | |||
| 308 | io_align = off & ~PAGE_MASK; | 308 | io_align = off & ~PAGE_MASK; |
| 309 | 309 | ||
| 310 | more: | 310 | more: |
| 311 | if (align_to_pages) | 311 | if (o_direct) |
| 312 | page_align = (pos - io_align + buf_align) & ~PAGE_MASK; | 312 | page_align = (pos - io_align + buf_align) & ~PAGE_MASK; |
| 313 | else | 313 | else |
| 314 | page_align = pos & ~PAGE_MASK; | 314 | page_align = pos & ~PAGE_MASK; |
| @@ -346,20 +346,22 @@ more: | |||
| 346 | } | 346 | } |
| 347 | 347 | ||
| 348 | if (was_short) { | 348 | if (was_short) { |
| 349 | /* was original extent fully inside i_size? */ | 349 | /* did we bounce off eof? */ |
| 350 | if (pos + left <= inode->i_size) { | 350 | if (pos + left > inode->i_size) |
| 351 | dout("zero tail\n"); | 351 | *checkeof = 1; |
| 352 | ceph_zero_page_vector_range(page_off + read, len - read, | 352 | |
| 353 | /* zero trailing bytes (inside i_size) */ | ||
| 354 | if (left > 0 && pos < inode->i_size) { | ||
| 355 | if (pos + left > inode->i_size) | ||
| 356 | left = inode->i_size - pos; | ||
| 357 | |||
| 358 | dout("zero tail %d\n", left); | ||
| 359 | ceph_zero_page_vector_range(page_off + read, left, | ||
| 353 | pages); | 360 | pages); |
| 354 | read = len; | 361 | read += left; |
| 355 | goto out; | ||
| 356 | } | 362 | } |
| 357 | |||
| 358 | /* check i_size */ | ||
| 359 | *checkeof = 1; | ||
| 360 | } | 363 | } |
| 361 | 364 | ||
| 362 | out: | ||
| 363 | if (ret >= 0) | 365 | if (ret >= 0) |
| 364 | ret = read; | 366 | ret = read; |
| 365 | dout("striped_read returns %d\n", ret); | 367 | dout("striped_read returns %d\n", ret); |
| @@ -659,7 +661,7 @@ out: | |||
| 659 | 661 | ||
| 660 | /* hit EOF or hole? */ | 662 | /* hit EOF or hole? */ |
| 661 | if (statret == 0 && *ppos < inode->i_size) { | 663 | if (statret == 0 && *ppos < inode->i_size) { |
| 662 | dout("aio_read sync_read hit hole, reading more\n"); | 664 | dout("aio_read sync_read hit hole, ppos %lld < size %lld, reading more\n", *ppos, inode->i_size); |
| 663 | read += ret; | 665 | read += ret; |
| 664 | base += ret; | 666 | base += ret; |
| 665 | len -= ret; | 667 | len -= ret; |
