diff options
author | Yan, Zheng <zyan@redhat.com> | 2015-10-27 06:36:06 -0400 |
---|---|---|
committer | Ilya Dryomov <idryomov@gmail.com> | 2015-11-02 17:36:48 -0500 |
commit | 68cd5b4b7612c2956d8553dfb39490b29f32566d (patch) | |
tree | e66461c4e1241a3dc5797a650a16683470185140 /fs/ceph | |
parent | 4c06ace81a60636dec358c288ef6aaf3aa6dc599 (diff) |
ceph: make fsync() wait unsafe requests that created/modified inode
If we get a unsafe reply for request that created/modified inode,
add the unsafe request to a list in the newly created/modified
inode. So we can make fsync() wait these unsafe requests.
Signed-off-by: Yan, Zheng <zyan@redhat.com>
Diffstat (limited to 'fs/ceph')
-rw-r--r-- | fs/ceph/caps.c | 71 | ||||
-rw-r--r-- | fs/ceph/inode.c | 1 | ||||
-rw-r--r-- | fs/ceph/mds_client.c | 14 | ||||
-rw-r--r-- | fs/ceph/mds_client.h | 3 | ||||
-rw-r--r-- | fs/ceph/super.h | 1 |
5 files changed, 53 insertions, 37 deletions
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 349315332040..c69e1253b47b 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
@@ -1970,49 +1970,46 @@ out: | |||
1970 | } | 1970 | } |
1971 | 1971 | ||
1972 | /* | 1972 | /* |
1973 | * wait for any uncommitted directory operations to commit. | 1973 | * wait for any unsafe requests to complete. |
1974 | */ | 1974 | */ |
1975 | static int unsafe_dirop_wait(struct inode *inode) | 1975 | static int unsafe_request_wait(struct inode *inode) |
1976 | { | 1976 | { |
1977 | struct ceph_inode_info *ci = ceph_inode(inode); | 1977 | struct ceph_inode_info *ci = ceph_inode(inode); |
1978 | struct list_head *head = &ci->i_unsafe_dirops; | 1978 | struct ceph_mds_request *req1 = NULL, *req2 = NULL; |
1979 | struct ceph_mds_request *req; | 1979 | int ret, err = 0; |
1980 | u64 last_tid; | ||
1981 | int ret = 0; | ||
1982 | |||
1983 | if (!S_ISDIR(inode->i_mode)) | ||
1984 | return 0; | ||
1985 | 1980 | ||
1986 | spin_lock(&ci->i_unsafe_lock); | 1981 | spin_lock(&ci->i_unsafe_lock); |
1987 | if (list_empty(head)) | 1982 | if (S_ISDIR(inode->i_mode) && !list_empty(&ci->i_unsafe_dirops)) { |
1988 | goto out; | 1983 | req1 = list_last_entry(&ci->i_unsafe_dirops, |
1989 | 1984 | struct ceph_mds_request, | |
1990 | req = list_last_entry(head, struct ceph_mds_request, | 1985 | r_unsafe_dir_item); |
1991 | r_unsafe_dir_item); | 1986 | ceph_mdsc_get_request(req1); |
1992 | last_tid = req->r_tid; | 1987 | } |
1993 | 1988 | if (!list_empty(&ci->i_unsafe_iops)) { | |
1994 | do { | 1989 | req2 = list_last_entry(&ci->i_unsafe_iops, |
1995 | ceph_mdsc_get_request(req); | 1990 | struct ceph_mds_request, |
1996 | spin_unlock(&ci->i_unsafe_lock); | 1991 | r_unsafe_target_item); |
1992 | ceph_mdsc_get_request(req2); | ||
1993 | } | ||
1994 | spin_unlock(&ci->i_unsafe_lock); | ||
1997 | 1995 | ||
1998 | dout("unsafe_dirop_wait %p wait on tid %llu (until %llu)\n", | 1996 | dout("unsafe_requeset_wait %p wait on tid %llu %llu\n", |
1999 | inode, req->r_tid, last_tid); | 1997 | inode, req1 ? req1->r_tid : 0ULL, req2 ? req2->r_tid : 0ULL); |
2000 | ret = !wait_for_completion_timeout(&req->r_safe_completion, | 1998 | if (req1) { |
2001 | ceph_timeout_jiffies(req->r_timeout)); | 1999 | ret = !wait_for_completion_timeout(&req1->r_safe_completion, |
2000 | ceph_timeout_jiffies(req1->r_timeout)); | ||
2002 | if (ret) | 2001 | if (ret) |
2003 | ret = -EIO; /* timed out */ | 2002 | err = -EIO; |
2004 | 2003 | ceph_mdsc_put_request(req1); | |
2005 | ceph_mdsc_put_request(req); | 2004 | } |
2006 | 2005 | if (req2) { | |
2007 | spin_lock(&ci->i_unsafe_lock); | 2006 | ret = !wait_for_completion_timeout(&req2->r_safe_completion, |
2008 | if (ret || list_empty(head)) | 2007 | ceph_timeout_jiffies(req2->r_timeout)); |
2009 | break; | 2008 | if (ret) |
2010 | req = list_first_entry(head, struct ceph_mds_request, | 2009 | err = -EIO; |
2011 | r_unsafe_dir_item); | 2010 | ceph_mdsc_put_request(req2); |
2012 | } while (req->r_tid < last_tid); | 2011 | } |
2013 | out: | 2012 | return err; |
2014 | spin_unlock(&ci->i_unsafe_lock); | ||
2015 | return ret; | ||
2016 | } | 2013 | } |
2017 | 2014 | ||
2018 | int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync) | 2015 | int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync) |
@@ -2038,7 +2035,7 @@ int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync) | |||
2038 | dirty = try_flush_caps(inode, &flush_tid); | 2035 | dirty = try_flush_caps(inode, &flush_tid); |
2039 | dout("fsync dirty caps are %s\n", ceph_cap_string(dirty)); | 2036 | dout("fsync dirty caps are %s\n", ceph_cap_string(dirty)); |
2040 | 2037 | ||
2041 | ret = unsafe_dirop_wait(inode); | 2038 | ret = unsafe_request_wait(inode); |
2042 | 2039 | ||
2043 | /* | 2040 | /* |
2044 | * only wait on non-file metadata writeback (the mds | 2041 | * only wait on non-file metadata writeback (the mds |
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 96d2bd829902..498dcfa2dcdb 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c | |||
@@ -452,6 +452,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb) | |||
452 | 452 | ||
453 | INIT_LIST_HEAD(&ci->i_unsafe_writes); | 453 | INIT_LIST_HEAD(&ci->i_unsafe_writes); |
454 | INIT_LIST_HEAD(&ci->i_unsafe_dirops); | 454 | INIT_LIST_HEAD(&ci->i_unsafe_dirops); |
455 | INIT_LIST_HEAD(&ci->i_unsafe_iops); | ||
455 | spin_lock_init(&ci->i_unsafe_lock); | 456 | spin_lock_init(&ci->i_unsafe_lock); |
456 | 457 | ||
457 | ci->i_snap_realm = NULL; | 458 | ci->i_snap_realm = NULL; |
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 1e47a3d1d12f..89838a226fe9 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c | |||
@@ -666,6 +666,12 @@ static void __unregister_request(struct ceph_mds_client *mdsc, | |||
666 | list_del_init(&req->r_unsafe_dir_item); | 666 | list_del_init(&req->r_unsafe_dir_item); |
667 | spin_unlock(&ci->i_unsafe_lock); | 667 | spin_unlock(&ci->i_unsafe_lock); |
668 | } | 668 | } |
669 | if (req->r_target_inode && req->r_got_unsafe) { | ||
670 | struct ceph_inode_info *ci = ceph_inode(req->r_target_inode); | ||
671 | spin_lock(&ci->i_unsafe_lock); | ||
672 | list_del_init(&req->r_unsafe_target_item); | ||
673 | spin_unlock(&ci->i_unsafe_lock); | ||
674 | } | ||
669 | 675 | ||
670 | if (req->r_unsafe_dir) { | 676 | if (req->r_unsafe_dir) { |
671 | iput(req->r_unsafe_dir); | 677 | iput(req->r_unsafe_dir); |
@@ -1707,6 +1713,7 @@ ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode) | |||
1707 | req->r_started = jiffies; | 1713 | req->r_started = jiffies; |
1708 | req->r_resend_mds = -1; | 1714 | req->r_resend_mds = -1; |
1709 | INIT_LIST_HEAD(&req->r_unsafe_dir_item); | 1715 | INIT_LIST_HEAD(&req->r_unsafe_dir_item); |
1716 | INIT_LIST_HEAD(&req->r_unsafe_target_item); | ||
1710 | req->r_fmode = -1; | 1717 | req->r_fmode = -1; |
1711 | kref_init(&req->r_kref); | 1718 | kref_init(&req->r_kref); |
1712 | INIT_LIST_HEAD(&req->r_wait); | 1719 | INIT_LIST_HEAD(&req->r_wait); |
@@ -2529,6 +2536,13 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) | |||
2529 | up_read(&mdsc->snap_rwsem); | 2536 | up_read(&mdsc->snap_rwsem); |
2530 | if (realm) | 2537 | if (realm) |
2531 | ceph_put_snap_realm(mdsc, realm); | 2538 | ceph_put_snap_realm(mdsc, realm); |
2539 | |||
2540 | if (err == 0 && req->r_got_unsafe && req->r_target_inode) { | ||
2541 | struct ceph_inode_info *ci = ceph_inode(req->r_target_inode); | ||
2542 | spin_lock(&ci->i_unsafe_lock); | ||
2543 | list_add_tail(&req->r_unsafe_target_item, &ci->i_unsafe_iops); | ||
2544 | spin_unlock(&ci->i_unsafe_lock); | ||
2545 | } | ||
2532 | out_err: | 2546 | out_err: |
2533 | mutex_lock(&mdsc->mutex); | 2547 | mutex_lock(&mdsc->mutex); |
2534 | if (!req->r_aborted) { | 2548 | if (!req->r_aborted) { |
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index f575eafe2261..ccf11ef0ca87 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h | |||
@@ -236,6 +236,9 @@ struct ceph_mds_request { | |||
236 | struct inode *r_unsafe_dir; | 236 | struct inode *r_unsafe_dir; |
237 | struct list_head r_unsafe_dir_item; | 237 | struct list_head r_unsafe_dir_item; |
238 | 238 | ||
239 | /* unsafe requests that modify the target inode */ | ||
240 | struct list_head r_unsafe_target_item; | ||
241 | |||
239 | struct ceph_mds_session *r_session; | 242 | struct ceph_mds_session *r_session; |
240 | 243 | ||
241 | int r_attempts; /* resend attempts */ | 244 | int r_attempts; /* resend attempts */ |
diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 2f2460d23a06..75b7d125ce66 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h | |||
@@ -342,6 +342,7 @@ struct ceph_inode_info { | |||
342 | 342 | ||
343 | struct list_head i_unsafe_writes; /* uncommitted sync writes */ | 343 | struct list_head i_unsafe_writes; /* uncommitted sync writes */ |
344 | struct list_head i_unsafe_dirops; /* uncommitted mds dir ops */ | 344 | struct list_head i_unsafe_dirops; /* uncommitted mds dir ops */ |
345 | struct list_head i_unsafe_iops; /* uncommitted mds inode ops */ | ||
345 | spinlock_t i_unsafe_lock; | 346 | spinlock_t i_unsafe_lock; |
346 | 347 | ||
347 | struct ceph_snap_realm *i_snap_realm; /* snap realm (if caps) */ | 348 | struct ceph_snap_realm *i_snap_realm; /* snap realm (if caps) */ |