diff options
author | Yan, Zheng <zyan@redhat.com> | 2014-12-23 02:30:54 -0500 |
---|---|---|
committer | Ilya Dryomov <idryomov@gmail.com> | 2015-02-19 05:31:38 -0500 |
commit | 982d6011bc30a26e8a3d546e0e7fc7db2c255d85 (patch) | |
tree | 3d1ee68f03edac2d64b7683baf15821cf14bdef7 /fs/ceph/snap.c | |
parent | 1487a688d8ea596e6710b0d256300ab10ce99284 (diff) |
ceph: improve reference tracking for snaprealm
When snaprealm is created, its initial reference count is zero.
But in some rare cases, the newly created snaprealm is not referenced
by anyone. This causes snaprealm with zero reference count not freed.
The fix is set reference count of newly snaprealm to 1. The reference
is return the function who requests to create the snaprealm. When the
function finishes its job, it releases the reference.
Signed-off-by: Yan, Zheng <zyan@redhat.com>
Diffstat (limited to 'fs/ceph/snap.c')
-rw-r--r-- | fs/ceph/snap.c | 54 |
1 files changed, 38 insertions, 16 deletions
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index ce35fbd4ba5d..a97e39f09ba6 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c | |||
@@ -70,13 +70,11 @@ void ceph_get_snap_realm(struct ceph_mds_client *mdsc, | |||
70 | * safe. we do need to protect against concurrent empty list | 70 | * safe. we do need to protect against concurrent empty list |
71 | * additions, however. | 71 | * additions, however. |
72 | */ | 72 | */ |
73 | if (atomic_read(&realm->nref) == 0) { | 73 | if (atomic_inc_return(&realm->nref) == 1) { |
74 | spin_lock(&mdsc->snap_empty_lock); | 74 | spin_lock(&mdsc->snap_empty_lock); |
75 | list_del_init(&realm->empty_item); | 75 | list_del_init(&realm->empty_item); |
76 | spin_unlock(&mdsc->snap_empty_lock); | 76 | spin_unlock(&mdsc->snap_empty_lock); |
77 | } | 77 | } |
78 | |||
79 | atomic_inc(&realm->nref); | ||
80 | } | 78 | } |
81 | 79 | ||
82 | static void __insert_snap_realm(struct rb_root *root, | 80 | static void __insert_snap_realm(struct rb_root *root, |
@@ -116,7 +114,7 @@ static struct ceph_snap_realm *ceph_create_snap_realm( | |||
116 | if (!realm) | 114 | if (!realm) |
117 | return ERR_PTR(-ENOMEM); | 115 | return ERR_PTR(-ENOMEM); |
118 | 116 | ||
119 | atomic_set(&realm->nref, 0); /* tree does not take a ref */ | 117 | atomic_set(&realm->nref, 1); /* for caller */ |
120 | realm->ino = ino; | 118 | realm->ino = ino; |
121 | INIT_LIST_HEAD(&realm->children); | 119 | INIT_LIST_HEAD(&realm->children); |
122 | INIT_LIST_HEAD(&realm->child_item); | 120 | INIT_LIST_HEAD(&realm->child_item); |
@@ -134,8 +132,8 @@ static struct ceph_snap_realm *ceph_create_snap_realm( | |||
134 | * | 132 | * |
135 | * caller must hold snap_rwsem for write. | 133 | * caller must hold snap_rwsem for write. |
136 | */ | 134 | */ |
137 | struct ceph_snap_realm *ceph_lookup_snap_realm(struct ceph_mds_client *mdsc, | 135 | static struct ceph_snap_realm *__lookup_snap_realm(struct ceph_mds_client *mdsc, |
138 | u64 ino) | 136 | u64 ino) |
139 | { | 137 | { |
140 | struct rb_node *n = mdsc->snap_realms.rb_node; | 138 | struct rb_node *n = mdsc->snap_realms.rb_node; |
141 | struct ceph_snap_realm *r; | 139 | struct ceph_snap_realm *r; |
@@ -154,6 +152,16 @@ struct ceph_snap_realm *ceph_lookup_snap_realm(struct ceph_mds_client *mdsc, | |||
154 | return NULL; | 152 | return NULL; |
155 | } | 153 | } |
156 | 154 | ||
155 | struct ceph_snap_realm *ceph_lookup_snap_realm(struct ceph_mds_client *mdsc, | ||
156 | u64 ino) | ||
157 | { | ||
158 | struct ceph_snap_realm *r; | ||
159 | r = __lookup_snap_realm(mdsc, ino); | ||
160 | if (r) | ||
161 | ceph_get_snap_realm(mdsc, r); | ||
162 | return r; | ||
163 | } | ||
164 | |||
157 | static void __put_snap_realm(struct ceph_mds_client *mdsc, | 165 | static void __put_snap_realm(struct ceph_mds_client *mdsc, |
158 | struct ceph_snap_realm *realm); | 166 | struct ceph_snap_realm *realm); |
159 | 167 | ||
@@ -273,7 +281,6 @@ static int adjust_snap_realm_parent(struct ceph_mds_client *mdsc, | |||
273 | } | 281 | } |
274 | realm->parent_ino = parentino; | 282 | realm->parent_ino = parentino; |
275 | realm->parent = parent; | 283 | realm->parent = parent; |
276 | ceph_get_snap_realm(mdsc, parent); | ||
277 | list_add(&realm->child_item, &parent->children); | 284 | list_add(&realm->child_item, &parent->children); |
278 | return 1; | 285 | return 1; |
279 | } | 286 | } |
@@ -631,12 +638,14 @@ static void queue_realm_cap_snaps(struct ceph_snap_realm *realm) | |||
631 | * Caller must hold snap_rwsem for write. | 638 | * Caller must hold snap_rwsem for write. |
632 | */ | 639 | */ |
633 | int ceph_update_snap_trace(struct ceph_mds_client *mdsc, | 640 | int ceph_update_snap_trace(struct ceph_mds_client *mdsc, |
634 | void *p, void *e, bool deletion) | 641 | void *p, void *e, bool deletion, |
642 | struct ceph_snap_realm **realm_ret) | ||
635 | { | 643 | { |
636 | struct ceph_mds_snap_realm *ri; /* encoded */ | 644 | struct ceph_mds_snap_realm *ri; /* encoded */ |
637 | __le64 *snaps; /* encoded */ | 645 | __le64 *snaps; /* encoded */ |
638 | __le64 *prior_parent_snaps; /* encoded */ | 646 | __le64 *prior_parent_snaps; /* encoded */ |
639 | struct ceph_snap_realm *realm; | 647 | struct ceph_snap_realm *realm = NULL; |
648 | struct ceph_snap_realm *first_realm = NULL; | ||
640 | int invalidate = 0; | 649 | int invalidate = 0; |
641 | int err = -ENOMEM; | 650 | int err = -ENOMEM; |
642 | LIST_HEAD(dirty_realms); | 651 | LIST_HEAD(dirty_realms); |
@@ -704,13 +713,18 @@ more: | |||
704 | dout("done with %llx %p, invalidated=%d, %p %p\n", realm->ino, | 713 | dout("done with %llx %p, invalidated=%d, %p %p\n", realm->ino, |
705 | realm, invalidate, p, e); | 714 | realm, invalidate, p, e); |
706 | 715 | ||
707 | if (p < e) | ||
708 | goto more; | ||
709 | |||
710 | /* invalidate when we reach the _end_ (root) of the trace */ | 716 | /* invalidate when we reach the _end_ (root) of the trace */ |
711 | if (invalidate) | 717 | if (invalidate && p >= e) |
712 | rebuild_snap_realms(realm); | 718 | rebuild_snap_realms(realm); |
713 | 719 | ||
720 | if (!first_realm) | ||
721 | first_realm = realm; | ||
722 | else | ||
723 | ceph_put_snap_realm(mdsc, realm); | ||
724 | |||
725 | if (p < e) | ||
726 | goto more; | ||
727 | |||
714 | /* | 728 | /* |
715 | * queue cap snaps _after_ we've built the new snap contexts, | 729 | * queue cap snaps _after_ we've built the new snap contexts, |
716 | * so that i_head_snapc can be set appropriately. | 730 | * so that i_head_snapc can be set appropriately. |
@@ -721,12 +735,21 @@ more: | |||
721 | queue_realm_cap_snaps(realm); | 735 | queue_realm_cap_snaps(realm); |
722 | } | 736 | } |
723 | 737 | ||
738 | if (realm_ret) | ||
739 | *realm_ret = first_realm; | ||
740 | else | ||
741 | ceph_put_snap_realm(mdsc, first_realm); | ||
742 | |||
724 | __cleanup_empty_realms(mdsc); | 743 | __cleanup_empty_realms(mdsc); |
725 | return 0; | 744 | return 0; |
726 | 745 | ||
727 | bad: | 746 | bad: |
728 | err = -EINVAL; | 747 | err = -EINVAL; |
729 | fail: | 748 | fail: |
749 | if (realm && !IS_ERR(realm)) | ||
750 | ceph_put_snap_realm(mdsc, realm); | ||
751 | if (first_realm) | ||
752 | ceph_put_snap_realm(mdsc, first_realm); | ||
730 | pr_err("update_snap_trace error %d\n", err); | 753 | pr_err("update_snap_trace error %d\n", err); |
731 | return err; | 754 | return err; |
732 | } | 755 | } |
@@ -844,7 +867,6 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc, | |||
844 | if (IS_ERR(realm)) | 867 | if (IS_ERR(realm)) |
845 | goto out; | 868 | goto out; |
846 | } | 869 | } |
847 | ceph_get_snap_realm(mdsc, realm); | ||
848 | 870 | ||
849 | dout("splitting snap_realm %llx %p\n", realm->ino, realm); | 871 | dout("splitting snap_realm %llx %p\n", realm->ino, realm); |
850 | for (i = 0; i < num_split_inos; i++) { | 872 | for (i = 0; i < num_split_inos; i++) { |
@@ -905,7 +927,7 @@ skip_inode: | |||
905 | /* we may have taken some of the old realm's children. */ | 927 | /* we may have taken some of the old realm's children. */ |
906 | for (i = 0; i < num_split_realms; i++) { | 928 | for (i = 0; i < num_split_realms; i++) { |
907 | struct ceph_snap_realm *child = | 929 | struct ceph_snap_realm *child = |
908 | ceph_lookup_snap_realm(mdsc, | 930 | __lookup_snap_realm(mdsc, |
909 | le64_to_cpu(split_realms[i])); | 931 | le64_to_cpu(split_realms[i])); |
910 | if (!child) | 932 | if (!child) |
911 | continue; | 933 | continue; |
@@ -918,7 +940,7 @@ skip_inode: | |||
918 | * snap, we can avoid queueing cap_snaps. | 940 | * snap, we can avoid queueing cap_snaps. |
919 | */ | 941 | */ |
920 | ceph_update_snap_trace(mdsc, p, e, | 942 | ceph_update_snap_trace(mdsc, p, e, |
921 | op == CEPH_SNAP_OP_DESTROY); | 943 | op == CEPH_SNAP_OP_DESTROY, NULL); |
922 | 944 | ||
923 | if (op == CEPH_SNAP_OP_SPLIT) | 945 | if (op == CEPH_SNAP_OP_SPLIT) |
924 | /* we took a reference when we created the realm, above */ | 946 | /* we took a reference when we created the realm, above */ |