diff options
author | Sage Weil <sage@newdream.net> | 2011-05-24 14:46:31 -0400 |
---|---|---|
committer | Sage Weil <sage@newdream.net> | 2011-05-24 14:52:12 -0400 |
commit | db3540522e955c1ebb391f4f5324dff4f20ecd09 (patch) | |
tree | 8c25b07caa8614345c71f09e8872e60b68af4c31 /fs | |
parent | cd634fb6eec72ef8e6dd677546b8d0ffdd2501eb (diff) |
ceph: fix cap flush race reentrancy
In e9964c10 we change cap flushing to do a delicate dance because some
inodes on the cap_dirty list could be in a migrating state (got EXPORT but
not IMPORT) in which we couldn't actually flush and move from
dirty->flushing, breaking the while (!empty) { process first } loop
structure. It worked for a single sync thread, but was not reentrant and
triggered infinite loops when multiple syncers came along.
Instead, move inodes with dirty to a separate cap_dirty_migrating list
when in the limbo export-but-no-import state, allowing us to go back to
the simple loop structure (which was reentrant). This is cleaner and more
robust.
Audited the cap_dirty users and this looks fine:
list_empty(&ci->i_dirty_item) is still a reliable indicator of whether we
have dirty caps (which list we're on is irrelevant) and list_del_init()
calls still do the right thing.
Signed-off-by: Sage Weil <sage@newdream.net>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/ceph/caps.c | 58 | ||||
-rw-r--r-- | fs/ceph/mds_client.c | 1 | ||||
-rw-r--r-- | fs/ceph/mds_client.h | 1 |
3 files changed, 31 insertions, 29 deletions
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 591202bc9668..1f72b00447c4 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
@@ -2635,6 +2635,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex, | |||
2635 | struct ceph_mds_session *session, | 2635 | struct ceph_mds_session *session, |
2636 | int *open_target_sessions) | 2636 | int *open_target_sessions) |
2637 | { | 2637 | { |
2638 | struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; | ||
2638 | struct ceph_inode_info *ci = ceph_inode(inode); | 2639 | struct ceph_inode_info *ci = ceph_inode(inode); |
2639 | int mds = session->s_mds; | 2640 | int mds = session->s_mds; |
2640 | unsigned mseq = le32_to_cpu(ex->migrate_seq); | 2641 | unsigned mseq = le32_to_cpu(ex->migrate_seq); |
@@ -2671,6 +2672,19 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex, | |||
2671 | * export targets, so that we get the matching IMPORT | 2672 | * export targets, so that we get the matching IMPORT |
2672 | */ | 2673 | */ |
2673 | *open_target_sessions = 1; | 2674 | *open_target_sessions = 1; |
2675 | |||
2676 | /* | ||
2677 | * we can't flush dirty caps that we've seen the | ||
2678 | * EXPORT but no IMPORT for | ||
2679 | */ | ||
2680 | spin_lock(&mdsc->cap_dirty_lock); | ||
2681 | if (!list_empty(&ci->i_dirty_item)) { | ||
2682 | dout(" moving %p to cap_dirty_migrating\n", | ||
2683 | inode); | ||
2684 | list_move(&ci->i_dirty_item, | ||
2685 | &mdsc->cap_dirty_migrating); | ||
2686 | } | ||
2687 | spin_unlock(&mdsc->cap_dirty_lock); | ||
2674 | } | 2688 | } |
2675 | __ceph_remove_cap(cap); | 2689 | __ceph_remove_cap(cap); |
2676 | } | 2690 | } |
@@ -2708,6 +2722,13 @@ static void handle_cap_import(struct ceph_mds_client *mdsc, | |||
2708 | ci->i_cap_exporting_issued = 0; | 2722 | ci->i_cap_exporting_issued = 0; |
2709 | ci->i_cap_exporting_mseq = 0; | 2723 | ci->i_cap_exporting_mseq = 0; |
2710 | ci->i_cap_exporting_mds = -1; | 2724 | ci->i_cap_exporting_mds = -1; |
2725 | |||
2726 | spin_lock(&mdsc->cap_dirty_lock); | ||
2727 | if (!list_empty(&ci->i_dirty_item)) { | ||
2728 | dout(" moving %p back to cap_dirty\n", inode); | ||
2729 | list_move(&ci->i_dirty_item, &mdsc->cap_dirty); | ||
2730 | } | ||
2731 | spin_unlock(&mdsc->cap_dirty_lock); | ||
2711 | } else { | 2732 | } else { |
2712 | dout("handle_cap_import inode %p ci %p mds%d mseq %d\n", | 2733 | dout("handle_cap_import inode %p ci %p mds%d mseq %d\n", |
2713 | inode, ci, mds, mseq); | 2734 | inode, ci, mds, mseq); |
@@ -2911,38 +2932,16 @@ void ceph_check_delayed_caps(struct ceph_mds_client *mdsc) | |||
2911 | */ | 2932 | */ |
2912 | void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc) | 2933 | void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc) |
2913 | { | 2934 | { |
2914 | struct ceph_inode_info *ci, *nci = NULL; | 2935 | struct ceph_inode_info *ci; |
2915 | struct inode *inode, *ninode = NULL; | 2936 | struct inode *inode; |
2916 | struct list_head *p, *n; | ||
2917 | 2937 | ||
2918 | dout("flush_dirty_caps\n"); | 2938 | dout("flush_dirty_caps\n"); |
2919 | spin_lock(&mdsc->cap_dirty_lock); | 2939 | spin_lock(&mdsc->cap_dirty_lock); |
2920 | list_for_each_safe(p, n, &mdsc->cap_dirty) { | 2940 | while (!list_empty(&mdsc->cap_dirty)) { |
2921 | if (nci) { | 2941 | ci = list_first_entry(&mdsc->cap_dirty, struct ceph_inode_info, |
2922 | ci = nci; | 2942 | i_dirty_item); |
2923 | inode = ninode; | 2943 | inode = igrab(&ci->vfs_inode); |
2924 | ci->i_ceph_flags &= ~CEPH_I_NOFLUSH; | 2944 | dout("flush_dirty_caps %p\n", inode); |
2925 | dout("flush_dirty_caps inode %p (was next inode)\n", | ||
2926 | inode); | ||
2927 | } else { | ||
2928 | ci = list_entry(p, struct ceph_inode_info, | ||
2929 | i_dirty_item); | ||
2930 | inode = igrab(&ci->vfs_inode); | ||
2931 | BUG_ON(!inode); | ||
2932 | dout("flush_dirty_caps inode %p\n", inode); | ||
2933 | } | ||
2934 | if (n != &mdsc->cap_dirty) { | ||
2935 | nci = list_entry(n, struct ceph_inode_info, | ||
2936 | i_dirty_item); | ||
2937 | ninode = igrab(&nci->vfs_inode); | ||
2938 | BUG_ON(!ninode); | ||
2939 | nci->i_ceph_flags |= CEPH_I_NOFLUSH; | ||
2940 | dout("flush_dirty_caps next inode %p, noflush\n", | ||
2941 | ninode); | ||
2942 | } else { | ||
2943 | nci = NULL; | ||
2944 | ninode = NULL; | ||
2945 | } | ||
2946 | spin_unlock(&mdsc->cap_dirty_lock); | 2945 | spin_unlock(&mdsc->cap_dirty_lock); |
2947 | if (inode) { | 2946 | if (inode) { |
2948 | ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_FLUSH, | 2947 | ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_FLUSH, |
@@ -2952,6 +2951,7 @@ void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc) | |||
2952 | spin_lock(&mdsc->cap_dirty_lock); | 2951 | spin_lock(&mdsc->cap_dirty_lock); |
2953 | } | 2952 | } |
2954 | spin_unlock(&mdsc->cap_dirty_lock); | 2953 | spin_unlock(&mdsc->cap_dirty_lock); |
2954 | dout("flush_dirty_caps done\n"); | ||
2955 | } | 2955 | } |
2956 | 2956 | ||
2957 | /* | 2957 | /* |
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index c12d2e9a0ec6..79743d146be6 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c | |||
@@ -3004,6 +3004,7 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc) | |||
3004 | spin_lock_init(&mdsc->snap_flush_lock); | 3004 | spin_lock_init(&mdsc->snap_flush_lock); |
3005 | mdsc->cap_flush_seq = 0; | 3005 | mdsc->cap_flush_seq = 0; |
3006 | INIT_LIST_HEAD(&mdsc->cap_dirty); | 3006 | INIT_LIST_HEAD(&mdsc->cap_dirty); |
3007 | INIT_LIST_HEAD(&mdsc->cap_dirty_migrating); | ||
3007 | mdsc->num_cap_flushing = 0; | 3008 | mdsc->num_cap_flushing = 0; |
3008 | spin_lock_init(&mdsc->cap_dirty_lock); | 3009 | spin_lock_init(&mdsc->cap_dirty_lock); |
3009 | init_waitqueue_head(&mdsc->cap_flushing_wq); | 3010 | init_waitqueue_head(&mdsc->cap_flushing_wq); |
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 4e3a9cc0bba6..7d8a0d662d56 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h | |||
@@ -278,6 +278,7 @@ struct ceph_mds_client { | |||
278 | 278 | ||
279 | u64 cap_flush_seq; | 279 | u64 cap_flush_seq; |
280 | struct list_head cap_dirty; /* inodes with dirty caps */ | 280 | struct list_head cap_dirty; /* inodes with dirty caps */ |
281 | struct list_head cap_dirty_migrating; /* ...that are migration... */ | ||
281 | int num_cap_flushing; /* # caps we are flushing */ | 282 | int num_cap_flushing; /* # caps we are flushing */ |
282 | spinlock_t cap_dirty_lock; /* protects above items */ | 283 | spinlock_t cap_dirty_lock; /* protects above items */ |
283 | wait_queue_head_t cap_flushing_wq; | 284 | wait_queue_head_t cap_flushing_wq; |