aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYan, Zheng <zheng.z.yan@intel.com>2013-04-12 09:45:42 -0400
committerSage Weil <sage@inktank.com>2013-05-02 00:18:55 -0400
commit1ac0fc8adfc725660ee53a953b06855f64f8e792 (patch)
treeeeaaaf8462b98de6775c5df323cc714ada1b95d2
parent03d254edebe51949a569c38df6b4b05b7f3c50f9 (diff)
ceph: fix race between writepages and truncate
ceph_writepages_start() reads inode->i_size in two places. It can get different values between successive read, because truncate can change inode->i_size at any time. The race can lead to mismatch between data length of osd request and pages marked as writeback. When osd request finishes, it clear writeback page according to its data length. So some pages can be left in writeback state forever. The fix is only read inode->i_size once, save its value to a local variable and use the local variable when i_size is needed. Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com> Reviewed-by: Alex Elder <elder@inktank.com>
-rw-r--r--fs/ceph/addr.c14
1 files changed, 7 insertions, 7 deletions
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 27d62070a8e9..2d6466b5fe82 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -671,7 +671,7 @@ static int ceph_writepages_start(struct address_space *mapping,
671 unsigned wsize = 1 << inode->i_blkbits; 671 unsigned wsize = 1 << inode->i_blkbits;
672 struct ceph_osd_request *req = NULL; 672 struct ceph_osd_request *req = NULL;
673 int do_sync; 673 int do_sync;
674 u64 snap_size = 0; 674 u64 snap_size;
675 675
676 /* 676 /*
677 * Include a 'sync' in the OSD request if this is a data 677 * Include a 'sync' in the OSD request if this is a data
@@ -717,6 +717,7 @@ static int ceph_writepages_start(struct address_space *mapping,
717retry: 717retry:
718 /* find oldest snap context with dirty data */ 718 /* find oldest snap context with dirty data */
719 ceph_put_snap_context(snapc); 719 ceph_put_snap_context(snapc);
720 snap_size = 0;
720 snapc = get_oldest_context(inode, &snap_size); 721 snapc = get_oldest_context(inode, &snap_size);
721 if (!snapc) { 722 if (!snapc) {
722 /* hmm, why does writepages get called when there 723 /* hmm, why does writepages get called when there
@@ -724,6 +725,8 @@ retry:
724 dout(" no snap context with dirty data?\n"); 725 dout(" no snap context with dirty data?\n");
725 goto out; 726 goto out;
726 } 727 }
728 if (snap_size == 0)
729 snap_size = i_size_read(inode);
727 dout(" oldest snapc is %p seq %lld (%d snaps)\n", 730 dout(" oldest snapc is %p seq %lld (%d snaps)\n",
728 snapc, snapc->seq, snapc->num_snaps); 731 snapc, snapc->seq, snapc->num_snaps);
729 if (last_snapc && snapc != last_snapc) { 732 if (last_snapc && snapc != last_snapc) {
@@ -795,11 +798,8 @@ get_more_pages:
795 dout("waiting on writeback %p\n", page); 798 dout("waiting on writeback %p\n", page);
796 wait_on_page_writeback(page); 799 wait_on_page_writeback(page);
797 } 800 }
798 if ((snap_size && page_offset(page) > snap_size) || 801 if (page_offset(page) >= snap_size) {
799 (!snap_size && 802 dout("%p page eof %llu\n", page, snap_size);
800 page_offset(page) > i_size_read(inode))) {
801 dout("%p page eof %llu\n", page, snap_size ?
802 snap_size : i_size_read(inode));
803 done = 1; 803 done = 1;
804 unlock_page(page); 804 unlock_page(page);
805 break; 805 break;
@@ -911,7 +911,7 @@ get_more_pages:
911 /* Format the osd request message and submit the write */ 911 /* Format the osd request message and submit the write */
912 912
913 offset = page_offset(pages[0]); 913 offset = page_offset(pages[0]);
914 len = min((snap_size ? snap_size : i_size_read(inode)) - offset, 914 len = min(snap_size - offset,
915 (u64)locked_pages << PAGE_CACHE_SHIFT); 915 (u64)locked_pages << PAGE_CACHE_SHIFT);
916 dout("writepages got %d pages at %llu~%llu\n", 916 dout("writepages got %d pages at %llu~%llu\n",
917 locked_pages, offset, len); 917 locked_pages, offset, len);