aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSage Weil <sage@newdream.net>2011-05-24 14:46:31 -0400
committerSage Weil <sage@newdream.net>2011-05-24 14:52:12 -0400
commitdb3540522e955c1ebb391f4f5324dff4f20ecd09 (patch)
tree8c25b07caa8614345c71f09e8872e60b68af4c31
parentcd634fb6eec72ef8e6dd677546b8d0ffdd2501eb (diff)
ceph: fix cap flush race reentrancy
In e9964c10 we change cap flushing to do a delicate dance because some inodes on the cap_dirty list could be in a migrating state (got EXPORT but not IMPORT) in which we couldn't actually flush and move from dirty->flushing, breaking the while (!empty) { process first } loop structure. It worked for a single sync thread, but was not reentrant and triggered infinite loops when multiple syncers came along. Instead, move inodes with dirty to a separate cap_dirty_migrating list when in the limbo export-but-no-import state, allowing us to go back to the simple loop structure (which was reentrant). This is cleaner and more robust. Audited the cap_dirty users and this looks fine: list_empty(&ci->i_dirty_item) is still a reliable indicator of whether we have dirty caps (which list we're on is irrelevant) and list_del_init() calls still do the right thing. Signed-off-by: Sage Weil <sage@newdream.net>
-rw-r--r--fs/ceph/caps.c58
-rw-r--r--fs/ceph/mds_client.c1
-rw-r--r--fs/ceph/mds_client.h1
3 files changed, 31 insertions, 29 deletions
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 591202bc9668..1f72b00447c4 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -2635,6 +2635,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
2635 struct ceph_mds_session *session, 2635 struct ceph_mds_session *session,
2636 int *open_target_sessions) 2636 int *open_target_sessions)
2637{ 2637{
2638 struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
2638 struct ceph_inode_info *ci = ceph_inode(inode); 2639 struct ceph_inode_info *ci = ceph_inode(inode);
2639 int mds = session->s_mds; 2640 int mds = session->s_mds;
2640 unsigned mseq = le32_to_cpu(ex->migrate_seq); 2641 unsigned mseq = le32_to_cpu(ex->migrate_seq);
@@ -2671,6 +2672,19 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
2671 * export targets, so that we get the matching IMPORT 2672 * export targets, so that we get the matching IMPORT
2672 */ 2673 */
2673 *open_target_sessions = 1; 2674 *open_target_sessions = 1;
2675
2676 /*
2677 * we can't flush dirty caps that we've seen the
2678 * EXPORT but no IMPORT for
2679 */
2680 spin_lock(&mdsc->cap_dirty_lock);
2681 if (!list_empty(&ci->i_dirty_item)) {
2682 dout(" moving %p to cap_dirty_migrating\n",
2683 inode);
2684 list_move(&ci->i_dirty_item,
2685 &mdsc->cap_dirty_migrating);
2686 }
2687 spin_unlock(&mdsc->cap_dirty_lock);
2674 } 2688 }
2675 __ceph_remove_cap(cap); 2689 __ceph_remove_cap(cap);
2676 } 2690 }
@@ -2708,6 +2722,13 @@ static void handle_cap_import(struct ceph_mds_client *mdsc,
2708 ci->i_cap_exporting_issued = 0; 2722 ci->i_cap_exporting_issued = 0;
2709 ci->i_cap_exporting_mseq = 0; 2723 ci->i_cap_exporting_mseq = 0;
2710 ci->i_cap_exporting_mds = -1; 2724 ci->i_cap_exporting_mds = -1;
2725
2726 spin_lock(&mdsc->cap_dirty_lock);
2727 if (!list_empty(&ci->i_dirty_item)) {
2728 dout(" moving %p back to cap_dirty\n", inode);
2729 list_move(&ci->i_dirty_item, &mdsc->cap_dirty);
2730 }
2731 spin_unlock(&mdsc->cap_dirty_lock);
2711 } else { 2732 } else {
2712 dout("handle_cap_import inode %p ci %p mds%d mseq %d\n", 2733 dout("handle_cap_import inode %p ci %p mds%d mseq %d\n",
2713 inode, ci, mds, mseq); 2734 inode, ci, mds, mseq);
@@ -2911,38 +2932,16 @@ void ceph_check_delayed_caps(struct ceph_mds_client *mdsc)
2911 */ 2932 */
2912void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc) 2933void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc)
2913{ 2934{
2914 struct ceph_inode_info *ci, *nci = NULL; 2935 struct ceph_inode_info *ci;
2915 struct inode *inode, *ninode = NULL; 2936 struct inode *inode;
2916 struct list_head *p, *n;
2917 2937
2918 dout("flush_dirty_caps\n"); 2938 dout("flush_dirty_caps\n");
2919 spin_lock(&mdsc->cap_dirty_lock); 2939 spin_lock(&mdsc->cap_dirty_lock);
2920 list_for_each_safe(p, n, &mdsc->cap_dirty) { 2940 while (!list_empty(&mdsc->cap_dirty)) {
2921 if (nci) { 2941 ci = list_first_entry(&mdsc->cap_dirty, struct ceph_inode_info,
2922 ci = nci; 2942 i_dirty_item);
2923 inode = ninode; 2943 inode = igrab(&ci->vfs_inode);
2924 ci->i_ceph_flags &= ~CEPH_I_NOFLUSH; 2944 dout("flush_dirty_caps %p\n", inode);
2925 dout("flush_dirty_caps inode %p (was next inode)\n",
2926 inode);
2927 } else {
2928 ci = list_entry(p, struct ceph_inode_info,
2929 i_dirty_item);
2930 inode = igrab(&ci->vfs_inode);
2931 BUG_ON(!inode);
2932 dout("flush_dirty_caps inode %p\n", inode);
2933 }
2934 if (n != &mdsc->cap_dirty) {
2935 nci = list_entry(n, struct ceph_inode_info,
2936 i_dirty_item);
2937 ninode = igrab(&nci->vfs_inode);
2938 BUG_ON(!ninode);
2939 nci->i_ceph_flags |= CEPH_I_NOFLUSH;
2940 dout("flush_dirty_caps next inode %p, noflush\n",
2941 ninode);
2942 } else {
2943 nci = NULL;
2944 ninode = NULL;
2945 }
2946 spin_unlock(&mdsc->cap_dirty_lock); 2945 spin_unlock(&mdsc->cap_dirty_lock);
2947 if (inode) { 2946 if (inode) {
2948 ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_FLUSH, 2947 ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_FLUSH,
@@ -2952,6 +2951,7 @@ void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc)
2952 spin_lock(&mdsc->cap_dirty_lock); 2951 spin_lock(&mdsc->cap_dirty_lock);
2953 } 2952 }
2954 spin_unlock(&mdsc->cap_dirty_lock); 2953 spin_unlock(&mdsc->cap_dirty_lock);
2954 dout("flush_dirty_caps done\n");
2955} 2955}
2956 2956
2957/* 2957/*
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index c12d2e9a0ec6..79743d146be6 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -3004,6 +3004,7 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
3004 spin_lock_init(&mdsc->snap_flush_lock); 3004 spin_lock_init(&mdsc->snap_flush_lock);
3005 mdsc->cap_flush_seq = 0; 3005 mdsc->cap_flush_seq = 0;
3006 INIT_LIST_HEAD(&mdsc->cap_dirty); 3006 INIT_LIST_HEAD(&mdsc->cap_dirty);
3007 INIT_LIST_HEAD(&mdsc->cap_dirty_migrating);
3007 mdsc->num_cap_flushing = 0; 3008 mdsc->num_cap_flushing = 0;
3008 spin_lock_init(&mdsc->cap_dirty_lock); 3009 spin_lock_init(&mdsc->cap_dirty_lock);
3009 init_waitqueue_head(&mdsc->cap_flushing_wq); 3010 init_waitqueue_head(&mdsc->cap_flushing_wq);
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 4e3a9cc0bba6..7d8a0d662d56 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -278,6 +278,7 @@ struct ceph_mds_client {
278 278
279 u64 cap_flush_seq; 279 u64 cap_flush_seq;
280 struct list_head cap_dirty; /* inodes with dirty caps */ 280 struct list_head cap_dirty; /* inodes with dirty caps */
281 struct list_head cap_dirty_migrating; /* ...that are migration... */
281 int num_cap_flushing; /* # caps we are flushing */ 282 int num_cap_flushing; /* # caps we are flushing */
282 spinlock_t cap_dirty_lock; /* protects above items */ 283 spinlock_t cap_dirty_lock; /* protects above items */
283 wait_queue_head_t cap_flushing_wq; 284 wait_queue_head_t cap_flushing_wq;