diff options
author | Kurt Hackel <kurt.hackel@oracle.com> | 2006-12-01 17:47:20 -0500 |
---|---|---|
committer | Mark Fasheh <mark.fasheh@oracle.com> | 2007-02-07 14:53:07 -0500 |
commit | ba2bf2185121db74e075c703fbf986761733dd1d (patch) | |
tree | f7b90fa14db61fb6fc5d92d393b1d837e58a9faa /fs/ocfs2/dlm/dlmthread.c | |
parent | 5331be090567d9335476f876b2d85427cd7c4426 (diff) |
ocfs2_dlm: fix cluster-wide refcounting of lock resources
This was previously broken and migration of some locks had to be temporarily
disabled. We use a new (and backward-incompatible) set of network messages
to account for all references to a lock resources held across the cluster.
once these are all freed, the master node may then free the lock resource
memory once its local references are dropped.
Signed-off-by: Kurt Hackel <kurt.hackel@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
Diffstat (limited to 'fs/ocfs2/dlm/dlmthread.c')
-rw-r--r-- | fs/ocfs2/dlm/dlmthread.c | 167 |
1 files changed, 77 insertions, 90 deletions
diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c index 0c822f3ffb05..620eb824ce1d 100644 --- a/fs/ocfs2/dlm/dlmthread.c +++ b/fs/ocfs2/dlm/dlmthread.c | |||
@@ -54,9 +54,6 @@ | |||
54 | #include "cluster/masklog.h" | 54 | #include "cluster/masklog.h" |
55 | 55 | ||
56 | static int dlm_thread(void *data); | 56 | static int dlm_thread(void *data); |
57 | static void dlm_purge_lockres_now(struct dlm_ctxt *dlm, | ||
58 | struct dlm_lock_resource *lockres); | ||
59 | |||
60 | static void dlm_flush_asts(struct dlm_ctxt *dlm); | 57 | static void dlm_flush_asts(struct dlm_ctxt *dlm); |
61 | 58 | ||
62 | #define dlm_lock_is_remote(dlm, lock) ((lock)->ml.node != (dlm)->node_num) | 59 | #define dlm_lock_is_remote(dlm, lock) ((lock)->ml.node != (dlm)->node_num) |
@@ -82,14 +79,33 @@ repeat: | |||
82 | current->state = TASK_RUNNING; | 79 | current->state = TASK_RUNNING; |
83 | } | 80 | } |
84 | 81 | ||
85 | 82 | int __dlm_lockres_has_locks(struct dlm_lock_resource *res) | |
86 | int __dlm_lockres_unused(struct dlm_lock_resource *res) | ||
87 | { | 83 | { |
88 | if (list_empty(&res->granted) && | 84 | if (list_empty(&res->granted) && |
89 | list_empty(&res->converting) && | 85 | list_empty(&res->converting) && |
90 | list_empty(&res->blocked) && | 86 | list_empty(&res->blocked)) |
91 | list_empty(&res->dirty)) | 87 | return 0; |
92 | return 1; | 88 | return 1; |
89 | } | ||
90 | |||
91 | /* "unused": the lockres has no locks, is not on the dirty list, | ||
92 | * has no inflight locks (in the gap between mastery and acquiring | ||
93 | * the first lock), and has no bits in its refmap. | ||
94 | * truly ready to be freed. */ | ||
95 | int __dlm_lockres_unused(struct dlm_lock_resource *res) | ||
96 | { | ||
97 | if (!__dlm_lockres_has_locks(res) && | ||
98 | list_empty(&res->dirty)) { | ||
99 | /* try not to scan the bitmap unless the first two | ||
100 | * conditions are already true */ | ||
101 | int bit = find_next_bit(res->refmap, O2NM_MAX_NODES, 0); | ||
102 | if (bit >= O2NM_MAX_NODES) { | ||
103 | /* since the bit for dlm->node_num is not | ||
104 | * set, inflight_locks better be zero */ | ||
105 | BUG_ON(res->inflight_locks != 0); | ||
106 | return 1; | ||
107 | } | ||
108 | } | ||
93 | return 0; | 109 | return 0; |
94 | } | 110 | } |
95 | 111 | ||
@@ -106,46 +122,21 @@ void __dlm_lockres_calc_usage(struct dlm_ctxt *dlm, | |||
106 | assert_spin_locked(&res->spinlock); | 122 | assert_spin_locked(&res->spinlock); |
107 | 123 | ||
108 | if (__dlm_lockres_unused(res)){ | 124 | if (__dlm_lockres_unused(res)){ |
109 | /* For now, just keep any resource we master */ | ||
110 | if (res->owner == dlm->node_num) | ||
111 | { | ||
112 | if (!list_empty(&res->purge)) { | ||
113 | mlog(0, "we master %s:%.*s, but it is on " | ||
114 | "the purge list. Removing\n", | ||
115 | dlm->name, res->lockname.len, | ||
116 | res->lockname.name); | ||
117 | list_del_init(&res->purge); | ||
118 | dlm->purge_count--; | ||
119 | } | ||
120 | return; | ||
121 | } | ||
122 | |||
123 | if (list_empty(&res->purge)) { | 125 | if (list_empty(&res->purge)) { |
124 | mlog(0, "putting lockres %.*s from purge list\n", | 126 | mlog(0, "putting lockres %.*s:%p onto purge list\n", |
125 | res->lockname.len, res->lockname.name); | 127 | res->lockname.len, res->lockname.name, res); |
126 | 128 | ||
127 | res->last_used = jiffies; | 129 | res->last_used = jiffies; |
130 | dlm_lockres_get(res); | ||
128 | list_add_tail(&res->purge, &dlm->purge_list); | 131 | list_add_tail(&res->purge, &dlm->purge_list); |
129 | dlm->purge_count++; | 132 | dlm->purge_count++; |
130 | |||
131 | /* if this node is not the owner, there is | ||
132 | * no way to keep track of who the owner could be. | ||
133 | * unhash it to avoid serious problems. */ | ||
134 | if (res->owner != dlm->node_num) { | ||
135 | mlog(0, "%s:%.*s: doing immediate " | ||
136 | "purge of lockres owned by %u\n", | ||
137 | dlm->name, res->lockname.len, | ||
138 | res->lockname.name, res->owner); | ||
139 | |||
140 | dlm_purge_lockres_now(dlm, res); | ||
141 | } | ||
142 | } | 133 | } |
143 | } else if (!list_empty(&res->purge)) { | 134 | } else if (!list_empty(&res->purge)) { |
144 | mlog(0, "removing lockres %.*s from purge list, " | 135 | mlog(0, "removing lockres %.*s:%p from purge list, owner=%u\n", |
145 | "owner=%u\n", res->lockname.len, res->lockname.name, | 136 | res->lockname.len, res->lockname.name, res, res->owner); |
146 | res->owner); | ||
147 | 137 | ||
148 | list_del_init(&res->purge); | 138 | list_del_init(&res->purge); |
139 | dlm_lockres_put(res); | ||
149 | dlm->purge_count--; | 140 | dlm->purge_count--; |
150 | } | 141 | } |
151 | } | 142 | } |
@@ -163,68 +154,60 @@ void dlm_lockres_calc_usage(struct dlm_ctxt *dlm, | |||
163 | spin_unlock(&dlm->spinlock); | 154 | spin_unlock(&dlm->spinlock); |
164 | } | 155 | } |
165 | 156 | ||
166 | /* TODO: Eventual API: Called with the dlm spinlock held, may drop it | 157 | int dlm_purge_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) |
167 | * to do migration, but will re-acquire before exit. */ | ||
168 | void dlm_purge_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *lockres) | ||
169 | { | 158 | { |
170 | int master; | 159 | int master; |
171 | int ret; | 160 | int ret = 0; |
172 | |||
173 | spin_lock(&lockres->spinlock); | ||
174 | master = lockres->owner == dlm->node_num; | ||
175 | spin_unlock(&lockres->spinlock); | ||
176 | |||
177 | mlog(0, "purging lockres %.*s, master = %d\n", lockres->lockname.len, | ||
178 | lockres->lockname.name, master); | ||
179 | 161 | ||
180 | /* Non master is the easy case -- no migration required, just | 162 | spin_lock(&res->spinlock); |
181 | * quit. */ | 163 | if (!__dlm_lockres_unused(res)) { |
164 | spin_unlock(&res->spinlock); | ||
165 | mlog(0, "%s:%.*s: tried to purge but not unused\n", | ||
166 | dlm->name, res->lockname.len, res->lockname.name); | ||
167 | return -ENOTEMPTY; | ||
168 | } | ||
169 | master = (res->owner == dlm->node_num); | ||
182 | if (!master) | 170 | if (!master) |
183 | goto finish; | 171 | res->state |= DLM_LOCK_RES_DROPPING_REF; |
184 | 172 | spin_unlock(&res->spinlock); | |
185 | /* Wheee! Migrate lockres here! */ | ||
186 | spin_unlock(&dlm->spinlock); | ||
187 | again: | ||
188 | 173 | ||
189 | ret = dlm_migrate_lockres(dlm, lockres, O2NM_MAX_NODES); | 174 | mlog(0, "purging lockres %.*s, master = %d\n", res->lockname.len, |
190 | if (ret == -ENOTEMPTY) { | 175 | res->lockname.name, master); |
191 | mlog(ML_ERROR, "lockres %.*s still has local locks!\n", | ||
192 | lockres->lockname.len, lockres->lockname.name); | ||
193 | 176 | ||
194 | BUG(); | 177 | if (!master) { |
195 | } else if (ret < 0) { | 178 | /* drop spinlock to do messaging, retake below */ |
196 | mlog(ML_NOTICE, "lockres %.*s: migrate failed, retrying\n", | 179 | spin_unlock(&dlm->spinlock); |
197 | lockres->lockname.len, lockres->lockname.name); | 180 | /* clear our bit from the master's refmap, ignore errors */ |
198 | msleep(100); | 181 | ret = dlm_drop_lockres_ref(dlm, res); |
199 | goto again; | 182 | if (ret < 0) { |
183 | mlog_errno(ret); | ||
184 | if (!dlm_is_host_down(ret)) | ||
185 | BUG(); | ||
186 | } | ||
187 | mlog(0, "%s:%.*s: dlm_deref_lockres returned %d\n", | ||
188 | dlm->name, res->lockname.len, res->lockname.name, ret); | ||
189 | spin_lock(&dlm->spinlock); | ||
200 | } | 190 | } |
201 | 191 | ||
202 | spin_lock(&dlm->spinlock); | 192 | if (!list_empty(&res->purge)) { |
203 | 193 | mlog(0, "removing lockres %.*s:%p from purgelist, " | |
204 | finish: | 194 | "master = %d\n", res->lockname.len, res->lockname.name, |
205 | if (!list_empty(&lockres->purge)) { | 195 | res, master); |
206 | list_del_init(&lockres->purge); | 196 | list_del_init(&res->purge); |
197 | dlm_lockres_put(res); | ||
207 | dlm->purge_count--; | 198 | dlm->purge_count--; |
208 | } | 199 | } |
209 | __dlm_unhash_lockres(lockres); | 200 | __dlm_unhash_lockres(res); |
210 | } | ||
211 | |||
212 | /* make an unused lockres go away immediately. | ||
213 | * as soon as the dlm spinlock is dropped, this lockres | ||
214 | * will not be found. kfree still happens on last put. */ | ||
215 | static void dlm_purge_lockres_now(struct dlm_ctxt *dlm, | ||
216 | struct dlm_lock_resource *lockres) | ||
217 | { | ||
218 | assert_spin_locked(&dlm->spinlock); | ||
219 | assert_spin_locked(&lockres->spinlock); | ||
220 | |||
221 | BUG_ON(!__dlm_lockres_unused(lockres)); | ||
222 | 201 | ||
223 | if (!list_empty(&lockres->purge)) { | 202 | /* lockres is not in the hash now. drop the flag and wake up |
224 | list_del_init(&lockres->purge); | 203 | * any processes waiting in dlm_get_lock_resource. */ |
225 | dlm->purge_count--; | 204 | if (!master) { |
205 | spin_lock(&res->spinlock); | ||
206 | res->state &= ~DLM_LOCK_RES_DROPPING_REF; | ||
207 | spin_unlock(&res->spinlock); | ||
208 | wake_up(&res->wq); | ||
226 | } | 209 | } |
227 | __dlm_unhash_lockres(lockres); | 210 | return 0; |
228 | } | 211 | } |
229 | 212 | ||
230 | static void dlm_run_purge_list(struct dlm_ctxt *dlm, | 213 | static void dlm_run_purge_list(struct dlm_ctxt *dlm, |
@@ -268,13 +251,17 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm, | |||
268 | break; | 251 | break; |
269 | } | 252 | } |
270 | 253 | ||
254 | mlog(0, "removing lockres %.*s:%p from purgelist\n", | ||
255 | lockres->lockname.len, lockres->lockname.name, lockres); | ||
271 | list_del_init(&lockres->purge); | 256 | list_del_init(&lockres->purge); |
257 | dlm_lockres_put(lockres); | ||
272 | dlm->purge_count--; | 258 | dlm->purge_count--; |
273 | 259 | ||
274 | /* This may drop and reacquire the dlm spinlock if it | 260 | /* This may drop and reacquire the dlm spinlock if it |
275 | * has to do migration. */ | 261 | * has to do migration. */ |
276 | mlog(0, "calling dlm_purge_lockres!\n"); | 262 | mlog(0, "calling dlm_purge_lockres!\n"); |
277 | dlm_purge_lockres(dlm, lockres); | 263 | if (dlm_purge_lockres(dlm, lockres)) |
264 | BUG(); | ||
278 | mlog(0, "DONE calling dlm_purge_lockres!\n"); | 265 | mlog(0, "DONE calling dlm_purge_lockres!\n"); |
279 | 266 | ||
280 | /* Avoid adding any scheduling latencies */ | 267 | /* Avoid adding any scheduling latencies */ |