aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ceph/caps.c
diff options
context:
space:
mode:
authorSage Weil <sage@newdream.net>2010-02-16 14:39:45 -0500
committerSage Weil <sage@newdream.net>2010-02-17 13:02:47 -0500
commit7c1332b8cb5b27656cf6ab1f5fe808a8eb8bb2c0 (patch)
treef990ab6b339a88896f41a6b3541d0676684c935d /fs/ceph/caps.c
parent85ccce43a3fc15a40ded6ae1603e3f68a17f4d24 (diff)
ceph: fix iterate_caps removal race
We need to be able to iterate over all caps on a session with a possibly slow callback on each cap. To allow this, we used to prevent cap reordering while we were iterating. However, we were not safe from races with removal: removing the 'next' cap would make the next pointer from list_for_each_entry_safe be invalid, and cause a lock up or similar badness. Instead, we keep an iterator pointer in the session pointing to the current cap. As before, we avoid reordering. For removal, if the cap isn't the current cap we are iterating over, we are fine. If it is, we clear cap->ci (to mark the cap as pending removal) but leave it in the session list. In iterate_caps, we can safely finish removal and get the next cap pointer. While we're at it, clean up put_cap to not take a cap reservation context, as it was never used. Signed-off-by: Sage Weil <sage@newdream.net>
Diffstat (limited to 'fs/ceph/caps.c')
-rw-r--r--fs/ceph/caps.c47
1 files changed, 24 insertions, 23 deletions
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index f94b56faba3b..4958a2ef3e04 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -266,12 +266,11 @@ static struct ceph_cap *get_cap(struct ceph_cap_reservation *ctx)
266 return cap; 266 return cap;
267} 267}
268 268
269static void put_cap(struct ceph_cap *cap, 269void ceph_put_cap(struct ceph_cap *cap)
270 struct ceph_cap_reservation *ctx)
271{ 270{
272 spin_lock(&caps_list_lock); 271 spin_lock(&caps_list_lock);
273 dout("put_cap ctx=%p (%d) %d = %d used + %d resv + %d avail\n", 272 dout("put_cap %p %d = %d used + %d resv + %d avail\n",
274 ctx, ctx ? ctx->count : 0, caps_total_count, caps_use_count, 273 cap, caps_total_count, caps_use_count,
275 caps_reserve_count, caps_avail_count); 274 caps_reserve_count, caps_avail_count);
276 caps_use_count--; 275 caps_use_count--;
277 /* 276 /*
@@ -282,12 +281,7 @@ static void put_cap(struct ceph_cap *cap,
282 caps_total_count--; 281 caps_total_count--;
283 kmem_cache_free(ceph_cap_cachep, cap); 282 kmem_cache_free(ceph_cap_cachep, cap);
284 } else { 283 } else {
285 if (ctx) { 284 caps_avail_count++;
286 ctx->count++;
287 caps_reserve_count++;
288 } else {
289 caps_avail_count++;
290 }
291 list_add(&cap->caps_item, &caps_list); 285 list_add(&cap->caps_item, &caps_list);
292 } 286 }
293 287
@@ -709,7 +703,7 @@ static void __touch_cap(struct ceph_cap *cap)
709 struct ceph_mds_session *s = cap->session; 703 struct ceph_mds_session *s = cap->session;
710 704
711 spin_lock(&s->s_cap_lock); 705 spin_lock(&s->s_cap_lock);
712 if (!s->s_iterating_caps) { 706 if (s->s_cap_iterator == NULL) {
713 dout("__touch_cap %p cap %p mds%d\n", &cap->ci->vfs_inode, cap, 707 dout("__touch_cap %p cap %p mds%d\n", &cap->ci->vfs_inode, cap,
714 s->s_mds); 708 s->s_mds);
715 list_move_tail(&cap->session_caps, &s->s_caps); 709 list_move_tail(&cap->session_caps, &s->s_caps);
@@ -865,8 +859,7 @@ static int __ceph_is_any_caps(struct ceph_inode_info *ci)
865 * caller should hold i_lock, and session s_mutex. 859 * caller should hold i_lock, and session s_mutex.
866 * returns true if this is the last cap. if so, caller should iput. 860 * returns true if this is the last cap. if so, caller should iput.
867 */ 861 */
868void __ceph_remove_cap(struct ceph_cap *cap, 862void __ceph_remove_cap(struct ceph_cap *cap)
869 struct ceph_cap_reservation *ctx)
870{ 863{
871 struct ceph_mds_session *session = cap->session; 864 struct ceph_mds_session *session = cap->session;
872 struct ceph_inode_info *ci = cap->ci; 865 struct ceph_inode_info *ci = cap->ci;
@@ -874,19 +867,27 @@ void __ceph_remove_cap(struct ceph_cap *cap,
874 867
875 dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode); 868 dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode);
876 869
877 /* remove from session list */
878 spin_lock(&session->s_cap_lock);
879 list_del_init(&cap->session_caps);
880 session->s_nr_caps--;
881 spin_unlock(&session->s_cap_lock);
882
883 /* remove from inode list */ 870 /* remove from inode list */
884 rb_erase(&cap->ci_node, &ci->i_caps); 871 rb_erase(&cap->ci_node, &ci->i_caps);
885 cap->session = NULL; 872 cap->ci = NULL;
886 if (ci->i_auth_cap == cap) 873 if (ci->i_auth_cap == cap)
887 ci->i_auth_cap = NULL; 874 ci->i_auth_cap = NULL;
888 875
889 put_cap(cap, ctx); 876 /* remove from session list */
877 spin_lock(&session->s_cap_lock);
878 if (session->s_cap_iterator == cap) {
879 /* not yet, we are iterating over this very cap */
880 dout("__ceph_remove_cap delaying %p removal from session %p\n",
881 cap, cap->session);
882 } else {
883 list_del_init(&cap->session_caps);
884 session->s_nr_caps--;
885 cap->session = NULL;
886 }
887 spin_unlock(&session->s_cap_lock);
888
889 if (cap->session == NULL)
890 ceph_put_cap(cap);
890 891
891 if (!__ceph_is_any_caps(ci) && ci->i_snap_realm) { 892 if (!__ceph_is_any_caps(ci) && ci->i_snap_realm) {
892 struct ceph_snap_realm *realm = ci->i_snap_realm; 893 struct ceph_snap_realm *realm = ci->i_snap_realm;
@@ -1022,7 +1023,7 @@ void ceph_queue_caps_release(struct inode *inode)
1022 } 1023 }
1023 spin_unlock(&session->s_cap_lock); 1024 spin_unlock(&session->s_cap_lock);
1024 p = rb_next(p); 1025 p = rb_next(p);
1025 __ceph_remove_cap(cap, NULL); 1026 __ceph_remove_cap(cap);
1026 1027
1027 } 1028 }
1028 spin_unlock(&inode->i_lock); 1029 spin_unlock(&inode->i_lock);
@@ -2521,7 +2522,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
2521 ci->i_cap_exporting_mseq = mseq; 2522 ci->i_cap_exporting_mseq = mseq;
2522 ci->i_cap_exporting_issued = cap->issued; 2523 ci->i_cap_exporting_issued = cap->issued;
2523 } 2524 }
2524 __ceph_remove_cap(cap, NULL); 2525 __ceph_remove_cap(cap);
2525 } else { 2526 } else {
2526 WARN_ON(!cap); 2527 WARN_ON(!cap);
2527 } 2528 }