aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSage Weil <sage@newdream.net>2010-02-09 17:04:02 -0500
committerSage Weil <sage@newdream.net>2010-02-11 14:48:53 -0500
commit6a026589ba333185c466c906376fe022a27a53f9 (patch)
tree7c536163e2192928978b79d5a6bbd79dd5993777
parent68c283236a1e0772e1a469dd2ffc17afc300b07b (diff)
ceph: fix sync read eof check deadlock
If a sync read gets a short result from the OSD, it may need to do a getattr to see if it is short due to reaching end-of-file. The getattr was being done while holding a reference to FILE_RD, which can lead to a deadlock if the MDS is revoking that capability bit and can't process the getattr until it does. We fix this by setting a flag if EOF size validation is needed, and doing the getattr in ceph_aio_read, after the RD cap ref is dropped. If the read needs to be continued, we loop and continue traversing the file. Signed-off-by: Sage Weil <sage@newdream.net>
-rw-r--r--fs/ceph/file.c39
1 files changed, 26 insertions, 13 deletions
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index bbf1ccf2d56e..2c4ae4441cab 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -431,7 +431,8 @@ static void zero_page_vector_range(int off, int len, struct page **pages)
431 */ 431 */
432static int striped_read(struct inode *inode, 432static int striped_read(struct inode *inode,
433 u64 off, u64 len, 433 u64 off, u64 len,
434 struct page **pages, int num_pages) 434 struct page **pages, int num_pages,
435 int *checkeof)
435{ 436{
436 struct ceph_client *client = ceph_inode_to_client(inode); 437 struct ceph_client *client = ceph_inode_to_client(inode);
437 struct ceph_inode_info *ci = ceph_inode(inode); 438 struct ceph_inode_info *ci = ceph_inode(inode);
@@ -497,15 +498,7 @@ more:
497 } 498 }
498 499
499 /* check i_size */ 500 /* check i_size */
500 ret = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE); 501 *checkeof = 1;
501 if (ret < 0)
502 goto out;
503
504 /* hit EOF? */
505 if (pos >= inode->i_size)
506 goto out;
507
508 goto more;
509 } 502 }
510 503
511out: 504out:
@@ -522,7 +515,7 @@ out:
522 * If the read spans object boundary, just do multiple reads. 515 * If the read spans object boundary, just do multiple reads.
523 */ 516 */
524static ssize_t ceph_sync_read(struct file *file, char __user *data, 517static ssize_t ceph_sync_read(struct file *file, char __user *data,
525 unsigned len, loff_t *poff) 518 unsigned len, loff_t *poff, int *checkeof)
526{ 519{
527 struct inode *inode = file->f_dentry->d_inode; 520 struct inode *inode = file->f_dentry->d_inode;
528 struct page **pages; 521 struct page **pages;
@@ -552,7 +545,7 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data,
552 if (ret < 0) 545 if (ret < 0)
553 goto done; 546 goto done;
554 547
555 ret = striped_read(inode, off, len, pages, num_pages); 548 ret = striped_read(inode, off, len, pages, num_pages, checkeof);
556 549
557 if (ret >= 0 && (file->f_flags & O_DIRECT) == 0) 550 if (ret >= 0 && (file->f_flags & O_DIRECT) == 0)
558 ret = copy_page_vector_to_user(pages, data, off, ret); 551 ret = copy_page_vector_to_user(pages, data, off, ret);
@@ -746,11 +739,14 @@ static ssize_t ceph_aio_read(struct kiocb *iocb, const struct iovec *iov,
746 size_t len = iov->iov_len; 739 size_t len = iov->iov_len;
747 struct inode *inode = filp->f_dentry->d_inode; 740 struct inode *inode = filp->f_dentry->d_inode;
748 struct ceph_inode_info *ci = ceph_inode(inode); 741 struct ceph_inode_info *ci = ceph_inode(inode);
742 void *base = iov->iov_base;
749 ssize_t ret; 743 ssize_t ret;
750 int got = 0; 744 int got = 0;
745 int checkeof = 0, read = 0;
751 746
752 dout("aio_read %p %llx.%llx %llu~%u trying to get caps on %p\n", 747 dout("aio_read %p %llx.%llx %llu~%u trying to get caps on %p\n",
753 inode, ceph_vinop(inode), pos, (unsigned)len, inode); 748 inode, ceph_vinop(inode), pos, (unsigned)len, inode);
749again:
754 __ceph_do_pending_vmtruncate(inode); 750 __ceph_do_pending_vmtruncate(inode);
755 ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, CEPH_CAP_FILE_CACHE, 751 ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, CEPH_CAP_FILE_CACHE,
756 &got, -1); 752 &got, -1);
@@ -764,7 +760,7 @@ static ssize_t ceph_aio_read(struct kiocb *iocb, const struct iovec *iov,
764 (iocb->ki_filp->f_flags & O_DIRECT) || 760 (iocb->ki_filp->f_flags & O_DIRECT) ||
765 (inode->i_sb->s_flags & MS_SYNCHRONOUS)) 761 (inode->i_sb->s_flags & MS_SYNCHRONOUS))
766 /* hmm, this isn't really async... */ 762 /* hmm, this isn't really async... */
767 ret = ceph_sync_read(filp, iov->iov_base, len, ppos); 763 ret = ceph_sync_read(filp, base, len, ppos, &checkeof);
768 else 764 else
769 ret = generic_file_aio_read(iocb, iov, nr_segs, pos); 765 ret = generic_file_aio_read(iocb, iov, nr_segs, pos);
770 766
@@ -772,6 +768,23 @@ out:
772 dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n", 768 dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n",
773 inode, ceph_vinop(inode), ceph_cap_string(got), (int)ret); 769 inode, ceph_vinop(inode), ceph_cap_string(got), (int)ret);
774 ceph_put_cap_refs(ci, got); 770 ceph_put_cap_refs(ci, got);
771
772 if (checkeof && ret >= 0) {
773 int statret = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE);
774
775 /* hit EOF or hole? */
776 if (statret == 0 && *ppos < inode->i_size) {
777 dout("aio_read sync_read hit hole, reading more\n");
778 read += ret;
779 base += ret;
780 len -= ret;
781 checkeof = 0;
782 goto again;
783 }
784 }
785 if (ret >= 0)
786 ret += read;
787
775 return ret; 788 return ret;
776} 789}
777 790