diff options
author | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-02-08 13:37:22 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-02-08 13:37:22 -0500 |
commit | 5986a2ec35836a878350c54af4bd91b1de6abc59 (patch) | |
tree | 2efe068e124071ca30a5f1886402b890d7ba429e | |
parent | 43187902cbfafe73ede0144166b741fb0f7d04e1 (diff) | |
parent | ff05d1c4643dd4260eb699396043d7e8009c0de4 (diff) |
Merge branch 'upstream-linus' of master.kernel.org:/pub/scm/linux/kernel/git/mfasheh/ocfs2
* 'upstream-linus' of master.kernel.org:/pub/scm/linux/kernel/git/mfasheh/ocfs2: (22 commits)
configfs: Zero terminate data in configfs attribute writes.
[PATCH] ocfs2 heartbeat: clean up bio submission code
ocfs2: introduce sc->sc_send_lock to protect outbound outbound messages
[PATCH] ocfs2: drop INET from Kconfig, not needed
ocfs2_dlm: Add timeout to dlm join domain
ocfs2_dlm: Silence some messages during join domain
ocfs2_dlm: disallow a domain join if node maps mismatch
ocfs2_dlm: Ensure correct ordering of set/clear refmap bit on lockres
ocfs2: Binds listener to the configured ip address
ocfs2_dlm: Calling post handler function in assert master handler
ocfs2: Added post handler callable function in o2net message handler
ocfs2_dlm: Cookies in locks not being printed correctly in error messages
ocfs2_dlm: Silence a failed convert
ocfs2_dlm: wake up sleepers on the lockres waitqueue
ocfs2_dlm: Dlm dispatch was stopping too early
ocfs2_dlm: Drop inflight refmap even if no locks found on the lockres
ocfs2_dlm: Flush dlm workqueue before starting to migrate
ocfs2_dlm: Fix migrate lockres handler queue scanning
ocfs2_dlm: Make dlmunlock() wait for migration to complete
ocfs2_dlm: Fixes race between migrate and dirty
...
-rw-r--r-- | fs/Kconfig | 1 | ||||
-rw-r--r-- | fs/configfs/file.c | 9 | ||||
-rw-r--r-- | fs/ocfs2/cluster/heartbeat.c | 158 | ||||
-rw-r--r-- | fs/ocfs2/cluster/tcp.c | 35 | ||||
-rw-r--r-- | fs/ocfs2/cluster/tcp.h | 6 | ||||
-rw-r--r-- | fs/ocfs2/cluster/tcp_internal.h | 12 | ||||
-rw-r--r-- | fs/ocfs2/dlm/dlmast.c | 14 | ||||
-rw-r--r-- | fs/ocfs2/dlm/dlmcommon.h | 130 | ||||
-rw-r--r-- | fs/ocfs2/dlm/dlmconvert.c | 40 | ||||
-rw-r--r-- | fs/ocfs2/dlm/dlmdebug.c | 30 | ||||
-rw-r--r-- | fs/ocfs2/dlm/dlmdomain.c | 253 | ||||
-rw-r--r-- | fs/ocfs2/dlm/dlmlock.c | 7 | ||||
-rw-r--r-- | fs/ocfs2/dlm/dlmmaster.c | 579 | ||||
-rw-r--r-- | fs/ocfs2/dlm/dlmrecovery.c | 182 | ||||
-rw-r--r-- | fs/ocfs2/dlm/dlmthread.c | 200 | ||||
-rw-r--r-- | fs/ocfs2/dlm/dlmunlock.c | 15 | ||||
-rw-r--r-- | fs/ocfs2/vote.c | 8 |
17 files changed, 1211 insertions, 468 deletions
diff --git a/fs/Kconfig b/fs/Kconfig index 8cd2417a14db..5e8e9d9ccb33 100644 --- a/fs/Kconfig +++ b/fs/Kconfig | |||
@@ -426,7 +426,6 @@ config OCFS2_FS | |||
426 | select CONFIGFS_FS | 426 | select CONFIGFS_FS |
427 | select JBD | 427 | select JBD |
428 | select CRC32 | 428 | select CRC32 |
429 | select INET | ||
430 | help | 429 | help |
431 | OCFS2 is a general purpose extent based shared disk cluster file | 430 | OCFS2 is a general purpose extent based shared disk cluster file |
432 | system with many similarities to ext3. It supports 64 bit inode | 431 | system with many similarities to ext3. It supports 64 bit inode |
diff --git a/fs/configfs/file.c b/fs/configfs/file.c index 2a7cb086e80c..d98be5e01328 100644 --- a/fs/configfs/file.c +++ b/fs/configfs/file.c | |||
@@ -162,14 +162,17 @@ fill_write_buffer(struct configfs_buffer * buffer, const char __user * buf, size | |||
162 | int error; | 162 | int error; |
163 | 163 | ||
164 | if (!buffer->page) | 164 | if (!buffer->page) |
165 | buffer->page = (char *)get_zeroed_page(GFP_KERNEL); | 165 | buffer->page = (char *)__get_free_pages(GFP_KERNEL, 0); |
166 | if (!buffer->page) | 166 | if (!buffer->page) |
167 | return -ENOMEM; | 167 | return -ENOMEM; |
168 | 168 | ||
169 | if (count > PAGE_SIZE) | 169 | if (count >= PAGE_SIZE) |
170 | count = PAGE_SIZE; | 170 | count = PAGE_SIZE - 1; |
171 | error = copy_from_user(buffer->page,buf,count); | 171 | error = copy_from_user(buffer->page,buf,count); |
172 | buffer->needs_read_fill = 1; | 172 | buffer->needs_read_fill = 1; |
173 | /* if buf is assumed to contain a string, terminate it by \0, | ||
174 | * so e.g. sscanf() can scan the string easily */ | ||
175 | buffer->page[count] = 0; | ||
173 | return error ? -EFAULT : count; | 176 | return error ? -EFAULT : count; |
174 | } | 177 | } |
175 | 178 | ||
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 277ca67a2ad6..5a9779bb9236 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c | |||
@@ -184,10 +184,9 @@ static void o2hb_disarm_write_timeout(struct o2hb_region *reg) | |||
184 | flush_scheduled_work(); | 184 | flush_scheduled_work(); |
185 | } | 185 | } |
186 | 186 | ||
187 | static inline void o2hb_bio_wait_init(struct o2hb_bio_wait_ctxt *wc, | 187 | static inline void o2hb_bio_wait_init(struct o2hb_bio_wait_ctxt *wc) |
188 | unsigned int num_ios) | ||
189 | { | 188 | { |
190 | atomic_set(&wc->wc_num_reqs, num_ios); | 189 | atomic_set(&wc->wc_num_reqs, 1); |
191 | init_completion(&wc->wc_io_complete); | 190 | init_completion(&wc->wc_io_complete); |
192 | wc->wc_error = 0; | 191 | wc->wc_error = 0; |
193 | } | 192 | } |
@@ -212,6 +211,7 @@ static void o2hb_wait_on_io(struct o2hb_region *reg, | |||
212 | struct address_space *mapping = reg->hr_bdev->bd_inode->i_mapping; | 211 | struct address_space *mapping = reg->hr_bdev->bd_inode->i_mapping; |
213 | 212 | ||
214 | blk_run_address_space(mapping); | 213 | blk_run_address_space(mapping); |
214 | o2hb_bio_wait_dec(wc, 1); | ||
215 | 215 | ||
216 | wait_for_completion(&wc->wc_io_complete); | 216 | wait_for_completion(&wc->wc_io_complete); |
217 | } | 217 | } |
@@ -231,6 +231,7 @@ static int o2hb_bio_end_io(struct bio *bio, | |||
231 | return 1; | 231 | return 1; |
232 | 232 | ||
233 | o2hb_bio_wait_dec(wc, 1); | 233 | o2hb_bio_wait_dec(wc, 1); |
234 | bio_put(bio); | ||
234 | return 0; | 235 | return 0; |
235 | } | 236 | } |
236 | 237 | ||
@@ -238,23 +239,22 @@ static int o2hb_bio_end_io(struct bio *bio, | |||
238 | * start_slot. */ | 239 | * start_slot. */ |
239 | static struct bio *o2hb_setup_one_bio(struct o2hb_region *reg, | 240 | static struct bio *o2hb_setup_one_bio(struct o2hb_region *reg, |
240 | struct o2hb_bio_wait_ctxt *wc, | 241 | struct o2hb_bio_wait_ctxt *wc, |
241 | unsigned int start_slot, | 242 | unsigned int *current_slot, |
242 | unsigned int num_slots) | 243 | unsigned int max_slots) |
243 | { | 244 | { |
244 | int i, nr_vecs, len, first_page, last_page; | 245 | int len, current_page; |
245 | unsigned int vec_len, vec_start; | 246 | unsigned int vec_len, vec_start; |
246 | unsigned int bits = reg->hr_block_bits; | 247 | unsigned int bits = reg->hr_block_bits; |
247 | unsigned int spp = reg->hr_slots_per_page; | 248 | unsigned int spp = reg->hr_slots_per_page; |
249 | unsigned int cs = *current_slot; | ||
248 | struct bio *bio; | 250 | struct bio *bio; |
249 | struct page *page; | 251 | struct page *page; |
250 | 252 | ||
251 | nr_vecs = (num_slots + spp - 1) / spp; | ||
252 | |||
253 | /* Testing has shown this allocation to take long enough under | 253 | /* Testing has shown this allocation to take long enough under |
254 | * GFP_KERNEL that the local node can get fenced. It would be | 254 | * GFP_KERNEL that the local node can get fenced. It would be |
255 | * nicest if we could pre-allocate these bios and avoid this | 255 | * nicest if we could pre-allocate these bios and avoid this |
256 | * all together. */ | 256 | * all together. */ |
257 | bio = bio_alloc(GFP_ATOMIC, nr_vecs); | 257 | bio = bio_alloc(GFP_ATOMIC, 16); |
258 | if (!bio) { | 258 | if (!bio) { |
259 | mlog(ML_ERROR, "Could not alloc slots BIO!\n"); | 259 | mlog(ML_ERROR, "Could not alloc slots BIO!\n"); |
260 | bio = ERR_PTR(-ENOMEM); | 260 | bio = ERR_PTR(-ENOMEM); |
@@ -262,137 +262,53 @@ static struct bio *o2hb_setup_one_bio(struct o2hb_region *reg, | |||
262 | } | 262 | } |
263 | 263 | ||
264 | /* Must put everything in 512 byte sectors for the bio... */ | 264 | /* Must put everything in 512 byte sectors for the bio... */ |
265 | bio->bi_sector = (reg->hr_start_block + start_slot) << (bits - 9); | 265 | bio->bi_sector = (reg->hr_start_block + cs) << (bits - 9); |
266 | bio->bi_bdev = reg->hr_bdev; | 266 | bio->bi_bdev = reg->hr_bdev; |
267 | bio->bi_private = wc; | 267 | bio->bi_private = wc; |
268 | bio->bi_end_io = o2hb_bio_end_io; | 268 | bio->bi_end_io = o2hb_bio_end_io; |
269 | 269 | ||
270 | first_page = start_slot / spp; | 270 | vec_start = (cs << bits) % PAGE_CACHE_SIZE; |
271 | last_page = first_page + nr_vecs; | 271 | while(cs < max_slots) { |
272 | vec_start = (start_slot << bits) % PAGE_CACHE_SIZE; | 272 | current_page = cs / spp; |
273 | for(i = first_page; i < last_page; i++) { | 273 | page = reg->hr_slot_data[current_page]; |
274 | page = reg->hr_slot_data[i]; | ||
275 | 274 | ||
276 | vec_len = PAGE_CACHE_SIZE; | 275 | vec_len = min(PAGE_CACHE_SIZE, |
277 | /* last page might be short */ | 276 | (max_slots-cs) * (PAGE_CACHE_SIZE/spp) ); |
278 | if (((i + 1) * spp) > (start_slot + num_slots)) | ||
279 | vec_len = ((num_slots + start_slot) % spp) << bits; | ||
280 | vec_len -= vec_start; | ||
281 | 277 | ||
282 | mlog(ML_HB_BIO, "page %d, vec_len = %u, vec_start = %u\n", | 278 | mlog(ML_HB_BIO, "page %d, vec_len = %u, vec_start = %u\n", |
283 | i, vec_len, vec_start); | 279 | current_page, vec_len, vec_start); |
284 | 280 | ||
285 | len = bio_add_page(bio, page, vec_len, vec_start); | 281 | len = bio_add_page(bio, page, vec_len, vec_start); |
286 | if (len != vec_len) { | 282 | if (len != vec_len) break; |
287 | bio_put(bio); | ||
288 | bio = ERR_PTR(-EIO); | ||
289 | |||
290 | mlog(ML_ERROR, "Error adding page to bio i = %d, " | ||
291 | "vec_len = %u, len = %d\n, start = %u\n", | ||
292 | i, vec_len, len, vec_start); | ||
293 | goto bail; | ||
294 | } | ||
295 | 283 | ||
284 | cs += vec_len / (PAGE_CACHE_SIZE/spp); | ||
296 | vec_start = 0; | 285 | vec_start = 0; |
297 | } | 286 | } |
298 | 287 | ||
299 | bail: | 288 | bail: |
289 | *current_slot = cs; | ||
300 | return bio; | 290 | return bio; |
301 | } | 291 | } |
302 | 292 | ||
303 | /* | ||
304 | * Compute the maximum number of sectors the bdev can handle in one bio, | ||
305 | * as a power of two. | ||
306 | * | ||
307 | * Stolen from oracleasm, thanks Joel! | ||
308 | */ | ||
309 | static int compute_max_sectors(struct block_device *bdev) | ||
310 | { | ||
311 | int max_pages, max_sectors, pow_two_sectors; | ||
312 | |||
313 | struct request_queue *q; | ||
314 | |||
315 | q = bdev_get_queue(bdev); | ||
316 | max_pages = q->max_sectors >> (PAGE_SHIFT - 9); | ||
317 | if (max_pages > BIO_MAX_PAGES) | ||
318 | max_pages = BIO_MAX_PAGES; | ||
319 | if (max_pages > q->max_phys_segments) | ||
320 | max_pages = q->max_phys_segments; | ||
321 | if (max_pages > q->max_hw_segments) | ||
322 | max_pages = q->max_hw_segments; | ||
323 | max_pages--; /* Handle I/Os that straddle a page */ | ||
324 | |||
325 | if (max_pages) { | ||
326 | max_sectors = max_pages << (PAGE_SHIFT - 9); | ||
327 | } else { | ||
328 | /* If BIO contains 1 or less than 1 page. */ | ||
329 | max_sectors = q->max_sectors; | ||
330 | } | ||
331 | /* Why is fls() 1-based???? */ | ||
332 | pow_two_sectors = 1 << (fls(max_sectors) - 1); | ||
333 | |||
334 | return pow_two_sectors; | ||
335 | } | ||
336 | |||
337 | static inline void o2hb_compute_request_limits(struct o2hb_region *reg, | ||
338 | unsigned int num_slots, | ||
339 | unsigned int *num_bios, | ||
340 | unsigned int *slots_per_bio) | ||
341 | { | ||
342 | unsigned int max_sectors, io_sectors; | ||
343 | |||
344 | max_sectors = compute_max_sectors(reg->hr_bdev); | ||
345 | |||
346 | io_sectors = num_slots << (reg->hr_block_bits - 9); | ||
347 | |||
348 | *num_bios = (io_sectors + max_sectors - 1) / max_sectors; | ||
349 | *slots_per_bio = max_sectors >> (reg->hr_block_bits - 9); | ||
350 | |||
351 | mlog(ML_HB_BIO, "My io size is %u sectors for %u slots. This " | ||
352 | "device can handle %u sectors of I/O\n", io_sectors, num_slots, | ||
353 | max_sectors); | ||
354 | mlog(ML_HB_BIO, "Will need %u bios holding %u slots each\n", | ||
355 | *num_bios, *slots_per_bio); | ||
356 | } | ||
357 | |||
358 | static int o2hb_read_slots(struct o2hb_region *reg, | 293 | static int o2hb_read_slots(struct o2hb_region *reg, |
359 | unsigned int max_slots) | 294 | unsigned int max_slots) |
360 | { | 295 | { |
361 | unsigned int num_bios, slots_per_bio, start_slot, num_slots; | 296 | unsigned int current_slot=0; |
362 | int i, status; | 297 | int status; |
363 | struct o2hb_bio_wait_ctxt wc; | 298 | struct o2hb_bio_wait_ctxt wc; |
364 | struct bio **bios; | ||
365 | struct bio *bio; | 299 | struct bio *bio; |
366 | 300 | ||
367 | o2hb_compute_request_limits(reg, max_slots, &num_bios, &slots_per_bio); | 301 | o2hb_bio_wait_init(&wc); |
368 | 302 | ||
369 | bios = kcalloc(num_bios, sizeof(struct bio *), GFP_KERNEL); | 303 | while(current_slot < max_slots) { |
370 | if (!bios) { | 304 | bio = o2hb_setup_one_bio(reg, &wc, ¤t_slot, max_slots); |
371 | status = -ENOMEM; | ||
372 | mlog_errno(status); | ||
373 | return status; | ||
374 | } | ||
375 | |||
376 | o2hb_bio_wait_init(&wc, num_bios); | ||
377 | |||
378 | num_slots = slots_per_bio; | ||
379 | for(i = 0; i < num_bios; i++) { | ||
380 | start_slot = i * slots_per_bio; | ||
381 | |||
382 | /* adjust num_slots at last bio */ | ||
383 | if (max_slots < (start_slot + num_slots)) | ||
384 | num_slots = max_slots - start_slot; | ||
385 | |||
386 | bio = o2hb_setup_one_bio(reg, &wc, start_slot, num_slots); | ||
387 | if (IS_ERR(bio)) { | 305 | if (IS_ERR(bio)) { |
388 | o2hb_bio_wait_dec(&wc, num_bios - i); | ||
389 | |||
390 | status = PTR_ERR(bio); | 306 | status = PTR_ERR(bio); |
391 | mlog_errno(status); | 307 | mlog_errno(status); |
392 | goto bail_and_wait; | 308 | goto bail_and_wait; |
393 | } | 309 | } |
394 | bios[i] = bio; | ||
395 | 310 | ||
311 | atomic_inc(&wc.wc_num_reqs); | ||
396 | submit_bio(READ, bio); | 312 | submit_bio(READ, bio); |
397 | } | 313 | } |
398 | 314 | ||
@@ -403,38 +319,30 @@ bail_and_wait: | |||
403 | if (wc.wc_error && !status) | 319 | if (wc.wc_error && !status) |
404 | status = wc.wc_error; | 320 | status = wc.wc_error; |
405 | 321 | ||
406 | if (bios) { | ||
407 | for(i = 0; i < num_bios; i++) | ||
408 | if (bios[i]) | ||
409 | bio_put(bios[i]); | ||
410 | kfree(bios); | ||
411 | } | ||
412 | |||
413 | return status; | 322 | return status; |
414 | } | 323 | } |
415 | 324 | ||
416 | static int o2hb_issue_node_write(struct o2hb_region *reg, | 325 | static int o2hb_issue_node_write(struct o2hb_region *reg, |
417 | struct bio **write_bio, | ||
418 | struct o2hb_bio_wait_ctxt *write_wc) | 326 | struct o2hb_bio_wait_ctxt *write_wc) |
419 | { | 327 | { |
420 | int status; | 328 | int status; |
421 | unsigned int slot; | 329 | unsigned int slot; |
422 | struct bio *bio; | 330 | struct bio *bio; |
423 | 331 | ||
424 | o2hb_bio_wait_init(write_wc, 1); | 332 | o2hb_bio_wait_init(write_wc); |
425 | 333 | ||
426 | slot = o2nm_this_node(); | 334 | slot = o2nm_this_node(); |
427 | 335 | ||
428 | bio = o2hb_setup_one_bio(reg, write_wc, slot, 1); | 336 | bio = o2hb_setup_one_bio(reg, write_wc, &slot, slot+1); |
429 | if (IS_ERR(bio)) { | 337 | if (IS_ERR(bio)) { |
430 | status = PTR_ERR(bio); | 338 | status = PTR_ERR(bio); |
431 | mlog_errno(status); | 339 | mlog_errno(status); |
432 | goto bail; | 340 | goto bail; |
433 | } | 341 | } |
434 | 342 | ||
343 | atomic_inc(&write_wc->wc_num_reqs); | ||
435 | submit_bio(WRITE, bio); | 344 | submit_bio(WRITE, bio); |
436 | 345 | ||
437 | *write_bio = bio; | ||
438 | status = 0; | 346 | status = 0; |
439 | bail: | 347 | bail: |
440 | return status; | 348 | return status; |
@@ -826,7 +734,6 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg) | |||
826 | { | 734 | { |
827 | int i, ret, highest_node, change = 0; | 735 | int i, ret, highest_node, change = 0; |
828 | unsigned long configured_nodes[BITS_TO_LONGS(O2NM_MAX_NODES)]; | 736 | unsigned long configured_nodes[BITS_TO_LONGS(O2NM_MAX_NODES)]; |
829 | struct bio *write_bio; | ||
830 | struct o2hb_bio_wait_ctxt write_wc; | 737 | struct o2hb_bio_wait_ctxt write_wc; |
831 | 738 | ||
832 | ret = o2nm_configured_node_map(configured_nodes, | 739 | ret = o2nm_configured_node_map(configured_nodes, |
@@ -864,7 +771,7 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg) | |||
864 | 771 | ||
865 | /* And fire off the write. Note that we don't wait on this I/O | 772 | /* And fire off the write. Note that we don't wait on this I/O |
866 | * until later. */ | 773 | * until later. */ |
867 | ret = o2hb_issue_node_write(reg, &write_bio, &write_wc); | 774 | ret = o2hb_issue_node_write(reg, &write_wc); |
868 | if (ret < 0) { | 775 | if (ret < 0) { |
869 | mlog_errno(ret); | 776 | mlog_errno(ret); |
870 | return ret; | 777 | return ret; |
@@ -882,7 +789,6 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg) | |||
882 | * people we find in our steady state have seen us. | 789 | * people we find in our steady state have seen us. |
883 | */ | 790 | */ |
884 | o2hb_wait_on_io(reg, &write_wc); | 791 | o2hb_wait_on_io(reg, &write_wc); |
885 | bio_put(write_bio); | ||
886 | if (write_wc.wc_error) { | 792 | if (write_wc.wc_error) { |
887 | /* Do not re-arm the write timeout on I/O error - we | 793 | /* Do not re-arm the write timeout on I/O error - we |
888 | * can't be sure that the new block ever made it to | 794 | * can't be sure that the new block ever made it to |
@@ -943,7 +849,6 @@ static int o2hb_thread(void *data) | |||
943 | { | 849 | { |
944 | int i, ret; | 850 | int i, ret; |
945 | struct o2hb_region *reg = data; | 851 | struct o2hb_region *reg = data; |
946 | struct bio *write_bio; | ||
947 | struct o2hb_bio_wait_ctxt write_wc; | 852 | struct o2hb_bio_wait_ctxt write_wc; |
948 | struct timeval before_hb, after_hb; | 853 | struct timeval before_hb, after_hb; |
949 | unsigned int elapsed_msec; | 854 | unsigned int elapsed_msec; |
@@ -993,10 +898,9 @@ static int o2hb_thread(void *data) | |||
993 | * | 898 | * |
994 | * XXX: Should we skip this on unclean_stop? */ | 899 | * XXX: Should we skip this on unclean_stop? */ |
995 | o2hb_prepare_block(reg, 0); | 900 | o2hb_prepare_block(reg, 0); |
996 | ret = o2hb_issue_node_write(reg, &write_bio, &write_wc); | 901 | ret = o2hb_issue_node_write(reg, &write_wc); |
997 | if (ret == 0) { | 902 | if (ret == 0) { |
998 | o2hb_wait_on_io(reg, &write_wc); | 903 | o2hb_wait_on_io(reg, &write_wc); |
999 | bio_put(write_bio); | ||
1000 | } else { | 904 | } else { |
1001 | mlog_errno(ret); | 905 | mlog_errno(ret); |
1002 | } | 906 | } |
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index ae4ff4a6636b..1718215fc018 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c | |||
@@ -556,6 +556,8 @@ static void o2net_register_callbacks(struct sock *sk, | |||
556 | sk->sk_data_ready = o2net_data_ready; | 556 | sk->sk_data_ready = o2net_data_ready; |
557 | sk->sk_state_change = o2net_state_change; | 557 | sk->sk_state_change = o2net_state_change; |
558 | 558 | ||
559 | mutex_init(&sc->sc_send_lock); | ||
560 | |||
559 | write_unlock_bh(&sk->sk_callback_lock); | 561 | write_unlock_bh(&sk->sk_callback_lock); |
560 | } | 562 | } |
561 | 563 | ||
@@ -688,6 +690,7 @@ static void o2net_handler_put(struct o2net_msg_handler *nmh) | |||
688 | * be given to the handler if their payload is longer than the max. */ | 690 | * be given to the handler if their payload is longer than the max. */ |
689 | int o2net_register_handler(u32 msg_type, u32 key, u32 max_len, | 691 | int o2net_register_handler(u32 msg_type, u32 key, u32 max_len, |
690 | o2net_msg_handler_func *func, void *data, | 692 | o2net_msg_handler_func *func, void *data, |
693 | o2net_post_msg_handler_func *post_func, | ||
691 | struct list_head *unreg_list) | 694 | struct list_head *unreg_list) |
692 | { | 695 | { |
693 | struct o2net_msg_handler *nmh = NULL; | 696 | struct o2net_msg_handler *nmh = NULL; |
@@ -722,6 +725,7 @@ int o2net_register_handler(u32 msg_type, u32 key, u32 max_len, | |||
722 | 725 | ||
723 | nmh->nh_func = func; | 726 | nmh->nh_func = func; |
724 | nmh->nh_func_data = data; | 727 | nmh->nh_func_data = data; |
728 | nmh->nh_post_func = post_func; | ||
725 | nmh->nh_msg_type = msg_type; | 729 | nmh->nh_msg_type = msg_type; |
726 | nmh->nh_max_len = max_len; | 730 | nmh->nh_max_len = max_len; |
727 | nmh->nh_key = key; | 731 | nmh->nh_key = key; |
@@ -856,10 +860,12 @@ static void o2net_sendpage(struct o2net_sock_container *sc, | |||
856 | ssize_t ret; | 860 | ssize_t ret; |
857 | 861 | ||
858 | 862 | ||
863 | mutex_lock(&sc->sc_send_lock); | ||
859 | ret = sc->sc_sock->ops->sendpage(sc->sc_sock, | 864 | ret = sc->sc_sock->ops->sendpage(sc->sc_sock, |
860 | virt_to_page(kmalloced_virt), | 865 | virt_to_page(kmalloced_virt), |
861 | (long)kmalloced_virt & ~PAGE_MASK, | 866 | (long)kmalloced_virt & ~PAGE_MASK, |
862 | size, MSG_DONTWAIT); | 867 | size, MSG_DONTWAIT); |
868 | mutex_unlock(&sc->sc_send_lock); | ||
863 | if (ret != size) { | 869 | if (ret != size) { |
864 | mlog(ML_ERROR, "sendpage of size %zu to " SC_NODEF_FMT | 870 | mlog(ML_ERROR, "sendpage of size %zu to " SC_NODEF_FMT |
865 | " failed with %zd\n", size, SC_NODEF_ARGS(sc), ret); | 871 | " failed with %zd\n", size, SC_NODEF_ARGS(sc), ret); |
@@ -974,8 +980,10 @@ int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec, | |||
974 | 980 | ||
975 | /* finally, convert the message header to network byte-order | 981 | /* finally, convert the message header to network byte-order |
976 | * and send */ | 982 | * and send */ |
983 | mutex_lock(&sc->sc_send_lock); | ||
977 | ret = o2net_send_tcp_msg(sc->sc_sock, vec, veclen, | 984 | ret = o2net_send_tcp_msg(sc->sc_sock, vec, veclen, |
978 | sizeof(struct o2net_msg) + caller_bytes); | 985 | sizeof(struct o2net_msg) + caller_bytes); |
986 | mutex_unlock(&sc->sc_send_lock); | ||
979 | msglog(msg, "sending returned %d\n", ret); | 987 | msglog(msg, "sending returned %d\n", ret); |
980 | if (ret < 0) { | 988 | if (ret < 0) { |
981 | mlog(0, "error returned from o2net_send_tcp_msg=%d\n", ret); | 989 | mlog(0, "error returned from o2net_send_tcp_msg=%d\n", ret); |
@@ -1049,6 +1057,7 @@ static int o2net_process_message(struct o2net_sock_container *sc, | |||
1049 | int ret = 0, handler_status; | 1057 | int ret = 0, handler_status; |
1050 | enum o2net_system_error syserr; | 1058 | enum o2net_system_error syserr; |
1051 | struct o2net_msg_handler *nmh = NULL; | 1059 | struct o2net_msg_handler *nmh = NULL; |
1060 | void *ret_data = NULL; | ||
1052 | 1061 | ||
1053 | msglog(hdr, "processing message\n"); | 1062 | msglog(hdr, "processing message\n"); |
1054 | 1063 | ||
@@ -1101,17 +1110,26 @@ static int o2net_process_message(struct o2net_sock_container *sc, | |||
1101 | sc->sc_msg_type = be16_to_cpu(hdr->msg_type); | 1110 | sc->sc_msg_type = be16_to_cpu(hdr->msg_type); |
1102 | handler_status = (nmh->nh_func)(hdr, sizeof(struct o2net_msg) + | 1111 | handler_status = (nmh->nh_func)(hdr, sizeof(struct o2net_msg) + |
1103 | be16_to_cpu(hdr->data_len), | 1112 | be16_to_cpu(hdr->data_len), |
1104 | nmh->nh_func_data); | 1113 | nmh->nh_func_data, &ret_data); |
1105 | do_gettimeofday(&sc->sc_tv_func_stop); | 1114 | do_gettimeofday(&sc->sc_tv_func_stop); |
1106 | 1115 | ||
1107 | out_respond: | 1116 | out_respond: |
1108 | /* this destroys the hdr, so don't use it after this */ | 1117 | /* this destroys the hdr, so don't use it after this */ |
1118 | mutex_lock(&sc->sc_send_lock); | ||
1109 | ret = o2net_send_status_magic(sc->sc_sock, hdr, syserr, | 1119 | ret = o2net_send_status_magic(sc->sc_sock, hdr, syserr, |
1110 | handler_status); | 1120 | handler_status); |
1121 | mutex_unlock(&sc->sc_send_lock); | ||
1111 | hdr = NULL; | 1122 | hdr = NULL; |
1112 | mlog(0, "sending handler status %d, syserr %d returned %d\n", | 1123 | mlog(0, "sending handler status %d, syserr %d returned %d\n", |
1113 | handler_status, syserr, ret); | 1124 | handler_status, syserr, ret); |
1114 | 1125 | ||
1126 | if (nmh) { | ||
1127 | BUG_ON(ret_data != NULL && nmh->nh_post_func == NULL); | ||
1128 | if (nmh->nh_post_func) | ||
1129 | (nmh->nh_post_func)(handler_status, nmh->nh_func_data, | ||
1130 | ret_data); | ||
1131 | } | ||
1132 | |||
1115 | out: | 1133 | out: |
1116 | if (nmh) | 1134 | if (nmh) |
1117 | o2net_handler_put(nmh); | 1135 | o2net_handler_put(nmh); |
@@ -1795,13 +1813,13 @@ out: | |||
1795 | ready(sk, bytes); | 1813 | ready(sk, bytes); |
1796 | } | 1814 | } |
1797 | 1815 | ||
1798 | static int o2net_open_listening_sock(__be16 port) | 1816 | static int o2net_open_listening_sock(__be32 addr, __be16 port) |
1799 | { | 1817 | { |
1800 | struct socket *sock = NULL; | 1818 | struct socket *sock = NULL; |
1801 | int ret; | 1819 | int ret; |
1802 | struct sockaddr_in sin = { | 1820 | struct sockaddr_in sin = { |
1803 | .sin_family = PF_INET, | 1821 | .sin_family = PF_INET, |
1804 | .sin_addr = { .s_addr = (__force u32)htonl(INADDR_ANY) }, | 1822 | .sin_addr = { .s_addr = (__force u32)addr }, |
1805 | .sin_port = (__force u16)port, | 1823 | .sin_port = (__force u16)port, |
1806 | }; | 1824 | }; |
1807 | 1825 | ||
@@ -1824,15 +1842,15 @@ static int o2net_open_listening_sock(__be16 port) | |||
1824 | sock->sk->sk_reuse = 1; | 1842 | sock->sk->sk_reuse = 1; |
1825 | ret = sock->ops->bind(sock, (struct sockaddr *)&sin, sizeof(sin)); | 1843 | ret = sock->ops->bind(sock, (struct sockaddr *)&sin, sizeof(sin)); |
1826 | if (ret < 0) { | 1844 | if (ret < 0) { |
1827 | mlog(ML_ERROR, "unable to bind socket to port %d, ret=%d\n", | 1845 | mlog(ML_ERROR, "unable to bind socket at %u.%u.%u.%u:%u, " |
1828 | ntohs(port), ret); | 1846 | "ret=%d\n", NIPQUAD(addr), ntohs(port), ret); |
1829 | goto out; | 1847 | goto out; |
1830 | } | 1848 | } |
1831 | 1849 | ||
1832 | ret = sock->ops->listen(sock, 64); | 1850 | ret = sock->ops->listen(sock, 64); |
1833 | if (ret < 0) { | 1851 | if (ret < 0) { |
1834 | mlog(ML_ERROR, "unable to listen on port %d, ret=%d\n", | 1852 | mlog(ML_ERROR, "unable to listen on %u.%u.%u.%u:%u, ret=%d\n", |
1835 | ntohs(port), ret); | 1853 | NIPQUAD(addr), ntohs(port), ret); |
1836 | } | 1854 | } |
1837 | 1855 | ||
1838 | out: | 1856 | out: |
@@ -1865,7 +1883,8 @@ int o2net_start_listening(struct o2nm_node *node) | |||
1865 | return -ENOMEM; /* ? */ | 1883 | return -ENOMEM; /* ? */ |
1866 | } | 1884 | } |
1867 | 1885 | ||
1868 | ret = o2net_open_listening_sock(node->nd_ipv4_port); | 1886 | ret = o2net_open_listening_sock(node->nd_ipv4_address, |
1887 | node->nd_ipv4_port); | ||
1869 | if (ret) { | 1888 | if (ret) { |
1870 | destroy_workqueue(o2net_wq); | 1889 | destroy_workqueue(o2net_wq); |
1871 | o2net_wq = NULL; | 1890 | o2net_wq = NULL; |
diff --git a/fs/ocfs2/cluster/tcp.h b/fs/ocfs2/cluster/tcp.h index 21a4e43df836..da880fc215f0 100644 --- a/fs/ocfs2/cluster/tcp.h +++ b/fs/ocfs2/cluster/tcp.h | |||
@@ -50,7 +50,10 @@ struct o2net_msg | |||
50 | __u8 buf[0]; | 50 | __u8 buf[0]; |
51 | }; | 51 | }; |
52 | 52 | ||
53 | typedef int (o2net_msg_handler_func)(struct o2net_msg *msg, u32 len, void *data); | 53 | typedef int (o2net_msg_handler_func)(struct o2net_msg *msg, u32 len, void *data, |
54 | void **ret_data); | ||
55 | typedef void (o2net_post_msg_handler_func)(int status, void *data, | ||
56 | void *ret_data); | ||
54 | 57 | ||
55 | #define O2NET_MAX_PAYLOAD_BYTES (4096 - sizeof(struct o2net_msg)) | 58 | #define O2NET_MAX_PAYLOAD_BYTES (4096 - sizeof(struct o2net_msg)) |
56 | 59 | ||
@@ -99,6 +102,7 @@ int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *vec, | |||
99 | 102 | ||
100 | int o2net_register_handler(u32 msg_type, u32 key, u32 max_len, | 103 | int o2net_register_handler(u32 msg_type, u32 key, u32 max_len, |
101 | o2net_msg_handler_func *func, void *data, | 104 | o2net_msg_handler_func *func, void *data, |
105 | o2net_post_msg_handler_func *post_func, | ||
102 | struct list_head *unreg_list); | 106 | struct list_head *unreg_list); |
103 | void o2net_unregister_handler_list(struct list_head *list); | 107 | void o2net_unregister_handler_list(struct list_head *list); |
104 | 108 | ||
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h index b700dc9624d1..4dae5df5e467 100644 --- a/fs/ocfs2/cluster/tcp_internal.h +++ b/fs/ocfs2/cluster/tcp_internal.h | |||
@@ -38,6 +38,12 @@ | |||
38 | * locking semantics of the file system using the protocol. It should | 38 | * locking semantics of the file system using the protocol. It should |
39 | * be somewhere else, I'm sure, but right now it isn't. | 39 | * be somewhere else, I'm sure, but right now it isn't. |
40 | * | 40 | * |
41 | * New in version 7: | ||
42 | * - DLM join domain includes the live nodemap | ||
43 | * | ||
44 | * New in version 6: | ||
45 | * - DLM lockres remote refcount fixes. | ||
46 | * | ||
41 | * New in version 5: | 47 | * New in version 5: |
42 | * - Network timeout checking protocol | 48 | * - Network timeout checking protocol |
43 | * | 49 | * |
@@ -51,7 +57,7 @@ | |||
51 | * - full 64 bit i_size in the metadata lock lvbs | 57 | * - full 64 bit i_size in the metadata lock lvbs |
52 | * - introduction of "rw" lock and pushing meta/data locking down | 58 | * - introduction of "rw" lock and pushing meta/data locking down |
53 | */ | 59 | */ |
54 | #define O2NET_PROTOCOL_VERSION 5ULL | 60 | #define O2NET_PROTOCOL_VERSION 7ULL |
55 | struct o2net_handshake { | 61 | struct o2net_handshake { |
56 | __be64 protocol_version; | 62 | __be64 protocol_version; |
57 | __be64 connector_id; | 63 | __be64 connector_id; |
@@ -149,6 +155,8 @@ struct o2net_sock_container { | |||
149 | struct timeval sc_tv_func_stop; | 155 | struct timeval sc_tv_func_stop; |
150 | u32 sc_msg_key; | 156 | u32 sc_msg_key; |
151 | u16 sc_msg_type; | 157 | u16 sc_msg_type; |
158 | |||
159 | struct mutex sc_send_lock; | ||
152 | }; | 160 | }; |
153 | 161 | ||
154 | struct o2net_msg_handler { | 162 | struct o2net_msg_handler { |
@@ -158,6 +166,8 @@ struct o2net_msg_handler { | |||
158 | u32 nh_key; | 166 | u32 nh_key; |
159 | o2net_msg_handler_func *nh_func; | 167 | o2net_msg_handler_func *nh_func; |
160 | o2net_msg_handler_func *nh_func_data; | 168 | o2net_msg_handler_func *nh_func_data; |
169 | o2net_post_msg_handler_func | ||
170 | *nh_post_func; | ||
161 | struct kref nh_kref; | 171 | struct kref nh_kref; |
162 | struct list_head nh_unregister_item; | 172 | struct list_head nh_unregister_item; |
163 | }; | 173 | }; |
diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c index 681046d51393..241cad342a48 100644 --- a/fs/ocfs2/dlm/dlmast.c +++ b/fs/ocfs2/dlm/dlmast.c | |||
@@ -263,7 +263,8 @@ void dlm_do_local_bast(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, | |||
263 | 263 | ||
264 | 264 | ||
265 | 265 | ||
266 | int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data) | 266 | int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data, |
267 | void **ret_data) | ||
267 | { | 268 | { |
268 | int ret; | 269 | int ret; |
269 | unsigned int locklen; | 270 | unsigned int locklen; |
@@ -311,8 +312,8 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data) | |||
311 | past->type != DLM_BAST) { | 312 | past->type != DLM_BAST) { |
312 | mlog(ML_ERROR, "Unknown ast type! %d, cookie=%u:%llu" | 313 | mlog(ML_ERROR, "Unknown ast type! %d, cookie=%u:%llu" |
313 | "name=%.*s\n", past->type, | 314 | "name=%.*s\n", past->type, |
314 | dlm_get_lock_cookie_node(cookie), | 315 | dlm_get_lock_cookie_node(be64_to_cpu(cookie)), |
315 | dlm_get_lock_cookie_seq(cookie), | 316 | dlm_get_lock_cookie_seq(be64_to_cpu(cookie)), |
316 | locklen, name); | 317 | locklen, name); |
317 | ret = DLM_IVLOCKID; | 318 | ret = DLM_IVLOCKID; |
318 | goto leave; | 319 | goto leave; |
@@ -323,8 +324,8 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data) | |||
323 | mlog(0, "got %sast for unknown lockres! " | 324 | mlog(0, "got %sast for unknown lockres! " |
324 | "cookie=%u:%llu, name=%.*s, namelen=%u\n", | 325 | "cookie=%u:%llu, name=%.*s, namelen=%u\n", |
325 | past->type == DLM_AST ? "" : "b", | 326 | past->type == DLM_AST ? "" : "b", |
326 | dlm_get_lock_cookie_node(cookie), | 327 | dlm_get_lock_cookie_node(be64_to_cpu(cookie)), |
327 | dlm_get_lock_cookie_seq(cookie), | 328 | dlm_get_lock_cookie_seq(be64_to_cpu(cookie)), |
328 | locklen, name, locklen); | 329 | locklen, name, locklen); |
329 | ret = DLM_IVLOCKID; | 330 | ret = DLM_IVLOCKID; |
330 | goto leave; | 331 | goto leave; |
@@ -369,7 +370,8 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data) | |||
369 | 370 | ||
370 | mlog(0, "got %sast for unknown lock! cookie=%u:%llu, " | 371 | mlog(0, "got %sast for unknown lock! cookie=%u:%llu, " |
371 | "name=%.*s, namelen=%u\n", past->type == DLM_AST ? "" : "b", | 372 | "name=%.*s, namelen=%u\n", past->type == DLM_AST ? "" : "b", |
372 | dlm_get_lock_cookie_node(cookie), dlm_get_lock_cookie_seq(cookie), | 373 | dlm_get_lock_cookie_node(be64_to_cpu(cookie)), |
374 | dlm_get_lock_cookie_seq(be64_to_cpu(cookie)), | ||
373 | locklen, name, locklen); | 375 | locklen, name, locklen); |
374 | 376 | ||
375 | ret = DLM_NORMAL; | 377 | ret = DLM_NORMAL; |
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h index 6b6ff76538c5..e90b92f9ece1 100644 --- a/fs/ocfs2/dlm/dlmcommon.h +++ b/fs/ocfs2/dlm/dlmcommon.h | |||
@@ -180,6 +180,11 @@ struct dlm_assert_master_priv | |||
180 | unsigned ignore_higher:1; | 180 | unsigned ignore_higher:1; |
181 | }; | 181 | }; |
182 | 182 | ||
183 | struct dlm_deref_lockres_priv | ||
184 | { | ||
185 | struct dlm_lock_resource *deref_res; | ||
186 | u8 deref_node; | ||
187 | }; | ||
183 | 188 | ||
184 | struct dlm_work_item | 189 | struct dlm_work_item |
185 | { | 190 | { |
@@ -191,6 +196,7 @@ struct dlm_work_item | |||
191 | struct dlm_request_all_locks_priv ral; | 196 | struct dlm_request_all_locks_priv ral; |
192 | struct dlm_mig_lockres_priv ml; | 197 | struct dlm_mig_lockres_priv ml; |
193 | struct dlm_assert_master_priv am; | 198 | struct dlm_assert_master_priv am; |
199 | struct dlm_deref_lockres_priv dl; | ||
194 | } u; | 200 | } u; |
195 | }; | 201 | }; |
196 | 202 | ||
@@ -222,6 +228,9 @@ static inline void __dlm_set_joining_node(struct dlm_ctxt *dlm, | |||
222 | #define DLM_LOCK_RES_DIRTY 0x00000008 | 228 | #define DLM_LOCK_RES_DIRTY 0x00000008 |
223 | #define DLM_LOCK_RES_IN_PROGRESS 0x00000010 | 229 | #define DLM_LOCK_RES_IN_PROGRESS 0x00000010 |
224 | #define DLM_LOCK_RES_MIGRATING 0x00000020 | 230 | #define DLM_LOCK_RES_MIGRATING 0x00000020 |
231 | #define DLM_LOCK_RES_DROPPING_REF 0x00000040 | ||
232 | #define DLM_LOCK_RES_BLOCK_DIRTY 0x00001000 | ||
233 | #define DLM_LOCK_RES_SETREF_INPROG 0x00002000 | ||
225 | 234 | ||
226 | /* max milliseconds to wait to sync up a network failure with a node death */ | 235 | /* max milliseconds to wait to sync up a network failure with a node death */ |
227 | #define DLM_NODE_DEATH_WAIT_MAX (5 * 1000) | 236 | #define DLM_NODE_DEATH_WAIT_MAX (5 * 1000) |
@@ -265,6 +274,8 @@ struct dlm_lock_resource | |||
265 | u8 owner; //node which owns the lock resource, or unknown | 274 | u8 owner; //node which owns the lock resource, or unknown |
266 | u16 state; | 275 | u16 state; |
267 | char lvb[DLM_LVB_LEN]; | 276 | char lvb[DLM_LVB_LEN]; |
277 | unsigned int inflight_locks; | ||
278 | unsigned long refmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; | ||
268 | }; | 279 | }; |
269 | 280 | ||
270 | struct dlm_migratable_lock | 281 | struct dlm_migratable_lock |
@@ -367,7 +378,7 @@ enum { | |||
367 | DLM_CONVERT_LOCK_MSG, /* 504 */ | 378 | DLM_CONVERT_LOCK_MSG, /* 504 */ |
368 | DLM_PROXY_AST_MSG, /* 505 */ | 379 | DLM_PROXY_AST_MSG, /* 505 */ |
369 | DLM_UNLOCK_LOCK_MSG, /* 506 */ | 380 | DLM_UNLOCK_LOCK_MSG, /* 506 */ |
370 | DLM_UNUSED_MSG2, /* 507 */ | 381 | DLM_DEREF_LOCKRES_MSG, /* 507 */ |
371 | DLM_MIGRATE_REQUEST_MSG, /* 508 */ | 382 | DLM_MIGRATE_REQUEST_MSG, /* 508 */ |
372 | DLM_MIG_LOCKRES_MSG, /* 509 */ | 383 | DLM_MIG_LOCKRES_MSG, /* 509 */ |
373 | DLM_QUERY_JOIN_MSG, /* 510 */ | 384 | DLM_QUERY_JOIN_MSG, /* 510 */ |
@@ -417,6 +428,9 @@ struct dlm_master_request | |||
417 | u8 name[O2NM_MAX_NAME_LEN]; | 428 | u8 name[O2NM_MAX_NAME_LEN]; |
418 | }; | 429 | }; |
419 | 430 | ||
431 | #define DLM_ASSERT_RESPONSE_REASSERT 0x00000001 | ||
432 | #define DLM_ASSERT_RESPONSE_MASTERY_REF 0x00000002 | ||
433 | |||
420 | #define DLM_ASSERT_MASTER_MLE_CLEANUP 0x00000001 | 434 | #define DLM_ASSERT_MASTER_MLE_CLEANUP 0x00000001 |
421 | #define DLM_ASSERT_MASTER_REQUERY 0x00000002 | 435 | #define DLM_ASSERT_MASTER_REQUERY 0x00000002 |
422 | #define DLM_ASSERT_MASTER_FINISH_MIGRATION 0x00000004 | 436 | #define DLM_ASSERT_MASTER_FINISH_MIGRATION 0x00000004 |
@@ -430,6 +444,8 @@ struct dlm_assert_master | |||
430 | u8 name[O2NM_MAX_NAME_LEN]; | 444 | u8 name[O2NM_MAX_NAME_LEN]; |
431 | }; | 445 | }; |
432 | 446 | ||
447 | #define DLM_MIGRATE_RESPONSE_MASTERY_REF 0x00000001 | ||
448 | |||
433 | struct dlm_migrate_request | 449 | struct dlm_migrate_request |
434 | { | 450 | { |
435 | u8 master; | 451 | u8 master; |
@@ -609,12 +625,16 @@ struct dlm_begin_reco | |||
609 | }; | 625 | }; |
610 | 626 | ||
611 | 627 | ||
628 | #define BITS_PER_BYTE 8 | ||
629 | #define BITS_TO_BYTES(bits) (((bits)+BITS_PER_BYTE-1)/BITS_PER_BYTE) | ||
630 | |||
612 | struct dlm_query_join_request | 631 | struct dlm_query_join_request |
613 | { | 632 | { |
614 | u8 node_idx; | 633 | u8 node_idx; |
615 | u8 pad1[2]; | 634 | u8 pad1[2]; |
616 | u8 name_len; | 635 | u8 name_len; |
617 | u8 domain[O2NM_MAX_NAME_LEN]; | 636 | u8 domain[O2NM_MAX_NAME_LEN]; |
637 | u8 node_map[BITS_TO_BYTES(O2NM_MAX_NODES)]; | ||
618 | }; | 638 | }; |
619 | 639 | ||
620 | struct dlm_assert_joined | 640 | struct dlm_assert_joined |
@@ -648,6 +668,16 @@ struct dlm_finalize_reco | |||
648 | __be32 pad2; | 668 | __be32 pad2; |
649 | }; | 669 | }; |
650 | 670 | ||
671 | struct dlm_deref_lockres | ||
672 | { | ||
673 | u32 pad1; | ||
674 | u16 pad2; | ||
675 | u8 node_idx; | ||
676 | u8 namelen; | ||
677 | |||
678 | u8 name[O2NM_MAX_NAME_LEN]; | ||
679 | }; | ||
680 | |||
651 | static inline enum dlm_status | 681 | static inline enum dlm_status |
652 | __dlm_lockres_state_to_status(struct dlm_lock_resource *res) | 682 | __dlm_lockres_state_to_status(struct dlm_lock_resource *res) |
653 | { | 683 | { |
@@ -688,16 +718,20 @@ void dlm_lock_put(struct dlm_lock *lock); | |||
688 | void dlm_lock_attach_lockres(struct dlm_lock *lock, | 718 | void dlm_lock_attach_lockres(struct dlm_lock *lock, |
689 | struct dlm_lock_resource *res); | 719 | struct dlm_lock_resource *res); |
690 | 720 | ||
691 | int dlm_create_lock_handler(struct o2net_msg *msg, u32 len, void *data); | 721 | int dlm_create_lock_handler(struct o2net_msg *msg, u32 len, void *data, |
692 | int dlm_convert_lock_handler(struct o2net_msg *msg, u32 len, void *data); | 722 | void **ret_data); |
693 | int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data); | 723 | int dlm_convert_lock_handler(struct o2net_msg *msg, u32 len, void *data, |
724 | void **ret_data); | ||
725 | int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data, | ||
726 | void **ret_data); | ||
694 | 727 | ||
695 | void dlm_revert_pending_convert(struct dlm_lock_resource *res, | 728 | void dlm_revert_pending_convert(struct dlm_lock_resource *res, |
696 | struct dlm_lock *lock); | 729 | struct dlm_lock *lock); |
697 | void dlm_revert_pending_lock(struct dlm_lock_resource *res, | 730 | void dlm_revert_pending_lock(struct dlm_lock_resource *res, |
698 | struct dlm_lock *lock); | 731 | struct dlm_lock *lock); |
699 | 732 | ||
700 | int dlm_unlock_lock_handler(struct o2net_msg *msg, u32 len, void *data); | 733 | int dlm_unlock_lock_handler(struct o2net_msg *msg, u32 len, void *data, |
734 | void **ret_data); | ||
701 | void dlm_commit_pending_cancel(struct dlm_lock_resource *res, | 735 | void dlm_commit_pending_cancel(struct dlm_lock_resource *res, |
702 | struct dlm_lock *lock); | 736 | struct dlm_lock *lock); |
703 | void dlm_commit_pending_unlock(struct dlm_lock_resource *res, | 737 | void dlm_commit_pending_unlock(struct dlm_lock_resource *res, |
@@ -721,8 +755,6 @@ void __dlm_lockres_calc_usage(struct dlm_ctxt *dlm, | |||
721 | struct dlm_lock_resource *res); | 755 | struct dlm_lock_resource *res); |
722 | void dlm_lockres_calc_usage(struct dlm_ctxt *dlm, | 756 | void dlm_lockres_calc_usage(struct dlm_ctxt *dlm, |
723 | struct dlm_lock_resource *res); | 757 | struct dlm_lock_resource *res); |
724 | void dlm_purge_lockres(struct dlm_ctxt *dlm, | ||
725 | struct dlm_lock_resource *lockres); | ||
726 | static inline void dlm_lockres_get(struct dlm_lock_resource *res) | 758 | static inline void dlm_lockres_get(struct dlm_lock_resource *res) |
727 | { | 759 | { |
728 | /* This is called on every lookup, so it might be worth | 760 | /* This is called on every lookup, so it might be worth |
@@ -733,6 +765,10 @@ void dlm_lockres_put(struct dlm_lock_resource *res); | |||
733 | void __dlm_unhash_lockres(struct dlm_lock_resource *res); | 765 | void __dlm_unhash_lockres(struct dlm_lock_resource *res); |
734 | void __dlm_insert_lockres(struct dlm_ctxt *dlm, | 766 | void __dlm_insert_lockres(struct dlm_ctxt *dlm, |
735 | struct dlm_lock_resource *res); | 767 | struct dlm_lock_resource *res); |
768 | struct dlm_lock_resource * __dlm_lookup_lockres_full(struct dlm_ctxt *dlm, | ||
769 | const char *name, | ||
770 | unsigned int len, | ||
771 | unsigned int hash); | ||
736 | struct dlm_lock_resource * __dlm_lookup_lockres(struct dlm_ctxt *dlm, | 772 | struct dlm_lock_resource * __dlm_lookup_lockres(struct dlm_ctxt *dlm, |
737 | const char *name, | 773 | const char *name, |
738 | unsigned int len, | 774 | unsigned int len, |
@@ -753,6 +789,47 @@ struct dlm_lock_resource *dlm_new_lockres(struct dlm_ctxt *dlm, | |||
753 | const char *name, | 789 | const char *name, |
754 | unsigned int namelen); | 790 | unsigned int namelen); |
755 | 791 | ||
792 | #define dlm_lockres_set_refmap_bit(bit,res) \ | ||
793 | __dlm_lockres_set_refmap_bit(bit,res,__FILE__,__LINE__) | ||
794 | #define dlm_lockres_clear_refmap_bit(bit,res) \ | ||
795 | __dlm_lockres_clear_refmap_bit(bit,res,__FILE__,__LINE__) | ||
796 | |||
797 | static inline void __dlm_lockres_set_refmap_bit(int bit, | ||
798 | struct dlm_lock_resource *res, | ||
799 | const char *file, | ||
800 | int line) | ||
801 | { | ||
802 | //printk("%s:%d:%.*s: setting bit %d\n", file, line, | ||
803 | // res->lockname.len, res->lockname.name, bit); | ||
804 | set_bit(bit, res->refmap); | ||
805 | } | ||
806 | |||
807 | static inline void __dlm_lockres_clear_refmap_bit(int bit, | ||
808 | struct dlm_lock_resource *res, | ||
809 | const char *file, | ||
810 | int line) | ||
811 | { | ||
812 | //printk("%s:%d:%.*s: clearing bit %d\n", file, line, | ||
813 | // res->lockname.len, res->lockname.name, bit); | ||
814 | clear_bit(bit, res->refmap); | ||
815 | } | ||
816 | |||
817 | void __dlm_lockres_drop_inflight_ref(struct dlm_ctxt *dlm, | ||
818 | struct dlm_lock_resource *res, | ||
819 | const char *file, | ||
820 | int line); | ||
821 | void __dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm, | ||
822 | struct dlm_lock_resource *res, | ||
823 | int new_lockres, | ||
824 | const char *file, | ||
825 | int line); | ||
826 | #define dlm_lockres_drop_inflight_ref(d,r) \ | ||
827 | __dlm_lockres_drop_inflight_ref(d,r,__FILE__,__LINE__) | ||
828 | #define dlm_lockres_grab_inflight_ref(d,r) \ | ||
829 | __dlm_lockres_grab_inflight_ref(d,r,0,__FILE__,__LINE__) | ||
830 | #define dlm_lockres_grab_inflight_ref_new(d,r) \ | ||
831 | __dlm_lockres_grab_inflight_ref(d,r,1,__FILE__,__LINE__) | ||
832 | |||
756 | void dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock); | 833 | void dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock); |
757 | void dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock); | 834 | void dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock); |
758 | void dlm_do_local_ast(struct dlm_ctxt *dlm, | 835 | void dlm_do_local_ast(struct dlm_ctxt *dlm, |
@@ -801,10 +878,7 @@ int dlm_heartbeat_init(struct dlm_ctxt *dlm); | |||
801 | void dlm_hb_node_down_cb(struct o2nm_node *node, int idx, void *data); | 878 | void dlm_hb_node_down_cb(struct o2nm_node *node, int idx, void *data); |
802 | void dlm_hb_node_up_cb(struct o2nm_node *node, int idx, void *data); | 879 | void dlm_hb_node_up_cb(struct o2nm_node *node, int idx, void *data); |
803 | 880 | ||
804 | int dlm_lockres_is_dirty(struct dlm_ctxt *dlm, struct dlm_lock_resource *res); | 881 | int dlm_empty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res); |
805 | int dlm_migrate_lockres(struct dlm_ctxt *dlm, | ||
806 | struct dlm_lock_resource *res, | ||
807 | u8 target); | ||
808 | int dlm_finish_migration(struct dlm_ctxt *dlm, | 882 | int dlm_finish_migration(struct dlm_ctxt *dlm, |
809 | struct dlm_lock_resource *res, | 883 | struct dlm_lock_resource *res, |
810 | u8 old_master); | 884 | u8 old_master); |
@@ -812,15 +886,27 @@ void dlm_lockres_release_ast(struct dlm_ctxt *dlm, | |||
812 | struct dlm_lock_resource *res); | 886 | struct dlm_lock_resource *res); |
813 | void __dlm_lockres_reserve_ast(struct dlm_lock_resource *res); | 887 | void __dlm_lockres_reserve_ast(struct dlm_lock_resource *res); |
814 | 888 | ||
815 | int dlm_master_request_handler(struct o2net_msg *msg, u32 len, void *data); | 889 | int dlm_master_request_handler(struct o2net_msg *msg, u32 len, void *data, |
816 | int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data); | 890 | void **ret_data); |
817 | int dlm_migrate_request_handler(struct o2net_msg *msg, u32 len, void *data); | 891 | int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data, |
818 | int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data); | 892 | void **ret_data); |
819 | int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data); | 893 | void dlm_assert_master_post_handler(int status, void *data, void *ret_data); |
820 | int dlm_request_all_locks_handler(struct o2net_msg *msg, u32 len, void *data); | 894 | int dlm_deref_lockres_handler(struct o2net_msg *msg, u32 len, void *data, |
821 | int dlm_reco_data_done_handler(struct o2net_msg *msg, u32 len, void *data); | 895 | void **ret_data); |
822 | int dlm_begin_reco_handler(struct o2net_msg *msg, u32 len, void *data); | 896 | int dlm_migrate_request_handler(struct o2net_msg *msg, u32 len, void *data, |
823 | int dlm_finalize_reco_handler(struct o2net_msg *msg, u32 len, void *data); | 897 | void **ret_data); |
898 | int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data, | ||
899 | void **ret_data); | ||
900 | int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data, | ||
901 | void **ret_data); | ||
902 | int dlm_request_all_locks_handler(struct o2net_msg *msg, u32 len, void *data, | ||
903 | void **ret_data); | ||
904 | int dlm_reco_data_done_handler(struct o2net_msg *msg, u32 len, void *data, | ||
905 | void **ret_data); | ||
906 | int dlm_begin_reco_handler(struct o2net_msg *msg, u32 len, void *data, | ||
907 | void **ret_data); | ||
908 | int dlm_finalize_reco_handler(struct o2net_msg *msg, u32 len, void *data, | ||
909 | void **ret_data); | ||
824 | int dlm_do_master_requery(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, | 910 | int dlm_do_master_requery(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, |
825 | u8 nodenum, u8 *real_master); | 911 | u8 nodenum, u8 *real_master); |
826 | 912 | ||
@@ -856,10 +942,12 @@ static inline void __dlm_wait_on_lockres(struct dlm_lock_resource *res) | |||
856 | int dlm_init_mle_cache(void); | 942 | int dlm_init_mle_cache(void); |
857 | void dlm_destroy_mle_cache(void); | 943 | void dlm_destroy_mle_cache(void); |
858 | void dlm_hb_event_notify_attached(struct dlm_ctxt *dlm, int idx, int node_up); | 944 | void dlm_hb_event_notify_attached(struct dlm_ctxt *dlm, int idx, int node_up); |
945 | int dlm_drop_lockres_ref(struct dlm_ctxt *dlm, | ||
946 | struct dlm_lock_resource *res); | ||
859 | void dlm_clean_master_list(struct dlm_ctxt *dlm, | 947 | void dlm_clean_master_list(struct dlm_ctxt *dlm, |
860 | u8 dead_node); | 948 | u8 dead_node); |
861 | int dlm_lock_basts_flushed(struct dlm_ctxt *dlm, struct dlm_lock *lock); | 949 | int dlm_lock_basts_flushed(struct dlm_ctxt *dlm, struct dlm_lock *lock); |
862 | 950 | int __dlm_lockres_has_locks(struct dlm_lock_resource *res); | |
863 | int __dlm_lockres_unused(struct dlm_lock_resource *res); | 951 | int __dlm_lockres_unused(struct dlm_lock_resource *res); |
864 | 952 | ||
865 | static inline const char * dlm_lock_mode_name(int mode) | 953 | static inline const char * dlm_lock_mode_name(int mode) |
diff --git a/fs/ocfs2/dlm/dlmconvert.c b/fs/ocfs2/dlm/dlmconvert.c index c764dc8e40a2..ecb4d997221e 100644 --- a/fs/ocfs2/dlm/dlmconvert.c +++ b/fs/ocfs2/dlm/dlmconvert.c | |||
@@ -286,8 +286,8 @@ enum dlm_status dlmconvert_remote(struct dlm_ctxt *dlm, | |||
286 | __dlm_print_one_lock_resource(res); | 286 | __dlm_print_one_lock_resource(res); |
287 | mlog(ML_ERROR, "converting a remote lock that is already " | 287 | mlog(ML_ERROR, "converting a remote lock that is already " |
288 | "converting! (cookie=%u:%llu, conv=%d)\n", | 288 | "converting! (cookie=%u:%llu, conv=%d)\n", |
289 | dlm_get_lock_cookie_node(lock->ml.cookie), | 289 | dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)), |
290 | dlm_get_lock_cookie_seq(lock->ml.cookie), | 290 | dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)), |
291 | lock->ml.convert_type); | 291 | lock->ml.convert_type); |
292 | status = DLM_DENIED; | 292 | status = DLM_DENIED; |
293 | goto bail; | 293 | goto bail; |
@@ -418,7 +418,8 @@ static enum dlm_status dlm_send_remote_convert_request(struct dlm_ctxt *dlm, | |||
418 | * returns: DLM_NORMAL, DLM_IVLOCKID, DLM_BADARGS, | 418 | * returns: DLM_NORMAL, DLM_IVLOCKID, DLM_BADARGS, |
419 | * status from __dlmconvert_master | 419 | * status from __dlmconvert_master |
420 | */ | 420 | */ |
421 | int dlm_convert_lock_handler(struct o2net_msg *msg, u32 len, void *data) | 421 | int dlm_convert_lock_handler(struct o2net_msg *msg, u32 len, void *data, |
422 | void **ret_data) | ||
422 | { | 423 | { |
423 | struct dlm_ctxt *dlm = data; | 424 | struct dlm_ctxt *dlm = data; |
424 | struct dlm_convert_lock *cnv = (struct dlm_convert_lock *)msg->buf; | 425 | struct dlm_convert_lock *cnv = (struct dlm_convert_lock *)msg->buf; |
@@ -428,7 +429,7 @@ int dlm_convert_lock_handler(struct o2net_msg *msg, u32 len, void *data) | |||
428 | struct dlm_lockstatus *lksb; | 429 | struct dlm_lockstatus *lksb; |
429 | enum dlm_status status = DLM_NORMAL; | 430 | enum dlm_status status = DLM_NORMAL; |
430 | u32 flags; | 431 | u32 flags; |
431 | int call_ast = 0, kick_thread = 0, ast_reserved = 0; | 432 | int call_ast = 0, kick_thread = 0, ast_reserved = 0, wake = 0; |
432 | 433 | ||
433 | if (!dlm_grab(dlm)) { | 434 | if (!dlm_grab(dlm)) { |
434 | dlm_error(DLM_REJECTED); | 435 | dlm_error(DLM_REJECTED); |
@@ -479,25 +480,14 @@ int dlm_convert_lock_handler(struct o2net_msg *msg, u32 len, void *data) | |||
479 | } | 480 | } |
480 | lock = NULL; | 481 | lock = NULL; |
481 | } | 482 | } |
482 | if (!lock) { | ||
483 | __dlm_print_one_lock_resource(res); | ||
484 | list_for_each(iter, &res->granted) { | ||
485 | lock = list_entry(iter, struct dlm_lock, list); | ||
486 | if (lock->ml.node == cnv->node_idx) { | ||
487 | mlog(ML_ERROR, "There is something here " | ||
488 | "for node %u, lock->ml.cookie=%llu, " | ||
489 | "cnv->cookie=%llu\n", cnv->node_idx, | ||
490 | (unsigned long long)lock->ml.cookie, | ||
491 | (unsigned long long)cnv->cookie); | ||
492 | break; | ||
493 | } | ||
494 | } | ||
495 | lock = NULL; | ||
496 | } | ||
497 | spin_unlock(&res->spinlock); | 483 | spin_unlock(&res->spinlock); |
498 | if (!lock) { | 484 | if (!lock) { |
499 | status = DLM_IVLOCKID; | 485 | status = DLM_IVLOCKID; |
500 | dlm_error(status); | 486 | mlog(ML_ERROR, "did not find lock to convert on grant queue! " |
487 | "cookie=%u:%llu\n", | ||
488 | dlm_get_lock_cookie_node(be64_to_cpu(cnv->cookie)), | ||
489 | dlm_get_lock_cookie_seq(be64_to_cpu(cnv->cookie))); | ||
490 | __dlm_print_one_lock_resource(res); | ||
501 | goto leave; | 491 | goto leave; |
502 | } | 492 | } |
503 | 493 | ||
@@ -524,8 +514,11 @@ int dlm_convert_lock_handler(struct o2net_msg *msg, u32 len, void *data) | |||
524 | cnv->requested_type, | 514 | cnv->requested_type, |
525 | &call_ast, &kick_thread); | 515 | &call_ast, &kick_thread); |
526 | res->state &= ~DLM_LOCK_RES_IN_PROGRESS; | 516 | res->state &= ~DLM_LOCK_RES_IN_PROGRESS; |
517 | wake = 1; | ||
527 | } | 518 | } |
528 | spin_unlock(&res->spinlock); | 519 | spin_unlock(&res->spinlock); |
520 | if (wake) | ||
521 | wake_up(&res->wq); | ||
529 | 522 | ||
530 | if (status != DLM_NORMAL) { | 523 | if (status != DLM_NORMAL) { |
531 | if (status != DLM_NOTQUEUED) | 524 | if (status != DLM_NOTQUEUED) |
@@ -534,12 +527,7 @@ int dlm_convert_lock_handler(struct o2net_msg *msg, u32 len, void *data) | |||
534 | } | 527 | } |
535 | 528 | ||
536 | leave: | 529 | leave: |
537 | if (!lock) | 530 | if (lock) |
538 | mlog(ML_ERROR, "did not find lock to convert on grant queue! " | ||
539 | "cookie=%u:%llu\n", | ||
540 | dlm_get_lock_cookie_node(cnv->cookie), | ||
541 | dlm_get_lock_cookie_seq(cnv->cookie)); | ||
542 | else | ||
543 | dlm_lock_put(lock); | 531 | dlm_lock_put(lock); |
544 | 532 | ||
545 | /* either queue the ast or release it, if reserved */ | 533 | /* either queue the ast or release it, if reserved */ |
diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c index 3f6c8d88f7af..64239b37e5d4 100644 --- a/fs/ocfs2/dlm/dlmdebug.c +++ b/fs/ocfs2/dlm/dlmdebug.c | |||
@@ -53,6 +53,23 @@ void dlm_print_one_lock_resource(struct dlm_lock_resource *res) | |||
53 | spin_unlock(&res->spinlock); | 53 | spin_unlock(&res->spinlock); |
54 | } | 54 | } |
55 | 55 | ||
56 | static void dlm_print_lockres_refmap(struct dlm_lock_resource *res) | ||
57 | { | ||
58 | int bit; | ||
59 | assert_spin_locked(&res->spinlock); | ||
60 | |||
61 | mlog(ML_NOTICE, " refmap nodes: [ "); | ||
62 | bit = 0; | ||
63 | while (1) { | ||
64 | bit = find_next_bit(res->refmap, O2NM_MAX_NODES, bit); | ||
65 | if (bit >= O2NM_MAX_NODES) | ||
66 | break; | ||
67 | printk("%u ", bit); | ||
68 | bit++; | ||
69 | } | ||
70 | printk("], inflight=%u\n", res->inflight_locks); | ||
71 | } | ||
72 | |||
56 | void __dlm_print_one_lock_resource(struct dlm_lock_resource *res) | 73 | void __dlm_print_one_lock_resource(struct dlm_lock_resource *res) |
57 | { | 74 | { |
58 | struct list_head *iter2; | 75 | struct list_head *iter2; |
@@ -65,6 +82,7 @@ void __dlm_print_one_lock_resource(struct dlm_lock_resource *res) | |||
65 | res->owner, res->state); | 82 | res->owner, res->state); |
66 | mlog(ML_NOTICE, " last used: %lu, on purge list: %s\n", | 83 | mlog(ML_NOTICE, " last used: %lu, on purge list: %s\n", |
67 | res->last_used, list_empty(&res->purge) ? "no" : "yes"); | 84 | res->last_used, list_empty(&res->purge) ? "no" : "yes"); |
85 | dlm_print_lockres_refmap(res); | ||
68 | mlog(ML_NOTICE, " granted queue: \n"); | 86 | mlog(ML_NOTICE, " granted queue: \n"); |
69 | list_for_each(iter2, &res->granted) { | 87 | list_for_each(iter2, &res->granted) { |
70 | lock = list_entry(iter2, struct dlm_lock, list); | 88 | lock = list_entry(iter2, struct dlm_lock, list); |
@@ -72,8 +90,8 @@ void __dlm_print_one_lock_resource(struct dlm_lock_resource *res) | |||
72 | mlog(ML_NOTICE, " type=%d, conv=%d, node=%u, " | 90 | mlog(ML_NOTICE, " type=%d, conv=%d, node=%u, " |
73 | "cookie=%u:%llu, ast=(empty=%c,pend=%c), bast=(empty=%c,pend=%c)\n", | 91 | "cookie=%u:%llu, ast=(empty=%c,pend=%c), bast=(empty=%c,pend=%c)\n", |
74 | lock->ml.type, lock->ml.convert_type, lock->ml.node, | 92 | lock->ml.type, lock->ml.convert_type, lock->ml.node, |
75 | dlm_get_lock_cookie_node(lock->ml.cookie), | 93 | dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)), |
76 | dlm_get_lock_cookie_seq(lock->ml.cookie), | 94 | dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)), |
77 | list_empty(&lock->ast_list) ? 'y' : 'n', | 95 | list_empty(&lock->ast_list) ? 'y' : 'n', |
78 | lock->ast_pending ? 'y' : 'n', | 96 | lock->ast_pending ? 'y' : 'n', |
79 | list_empty(&lock->bast_list) ? 'y' : 'n', | 97 | list_empty(&lock->bast_list) ? 'y' : 'n', |
@@ -87,8 +105,8 @@ void __dlm_print_one_lock_resource(struct dlm_lock_resource *res) | |||
87 | mlog(ML_NOTICE, " type=%d, conv=%d, node=%u, " | 105 | mlog(ML_NOTICE, " type=%d, conv=%d, node=%u, " |
88 | "cookie=%u:%llu, ast=(empty=%c,pend=%c), bast=(empty=%c,pend=%c)\n", | 106 | "cookie=%u:%llu, ast=(empty=%c,pend=%c), bast=(empty=%c,pend=%c)\n", |
89 | lock->ml.type, lock->ml.convert_type, lock->ml.node, | 107 | lock->ml.type, lock->ml.convert_type, lock->ml.node, |
90 | dlm_get_lock_cookie_node(lock->ml.cookie), | 108 | dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)), |
91 | dlm_get_lock_cookie_seq(lock->ml.cookie), | 109 | dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)), |
92 | list_empty(&lock->ast_list) ? 'y' : 'n', | 110 | list_empty(&lock->ast_list) ? 'y' : 'n', |
93 | lock->ast_pending ? 'y' : 'n', | 111 | lock->ast_pending ? 'y' : 'n', |
94 | list_empty(&lock->bast_list) ? 'y' : 'n', | 112 | list_empty(&lock->bast_list) ? 'y' : 'n', |
@@ -102,8 +120,8 @@ void __dlm_print_one_lock_resource(struct dlm_lock_resource *res) | |||
102 | mlog(ML_NOTICE, " type=%d, conv=%d, node=%u, " | 120 | mlog(ML_NOTICE, " type=%d, conv=%d, node=%u, " |
103 | "cookie=%u:%llu, ast=(empty=%c,pend=%c), bast=(empty=%c,pend=%c)\n", | 121 | "cookie=%u:%llu, ast=(empty=%c,pend=%c), bast=(empty=%c,pend=%c)\n", |
104 | lock->ml.type, lock->ml.convert_type, lock->ml.node, | 122 | lock->ml.type, lock->ml.convert_type, lock->ml.node, |
105 | dlm_get_lock_cookie_node(lock->ml.cookie), | 123 | dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)), |
106 | dlm_get_lock_cookie_seq(lock->ml.cookie), | 124 | dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)), |
107 | list_empty(&lock->ast_list) ? 'y' : 'n', | 125 | list_empty(&lock->ast_list) ? 'y' : 'n', |
108 | lock->ast_pending ? 'y' : 'n', | 126 | lock->ast_pending ? 'y' : 'n', |
109 | list_empty(&lock->bast_list) ? 'y' : 'n', | 127 | list_empty(&lock->bast_list) ? 'y' : 'n', |
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index f0b25f2dd205..6087c4749fee 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c | |||
@@ -48,6 +48,36 @@ | |||
48 | #define MLOG_MASK_PREFIX (ML_DLM|ML_DLM_DOMAIN) | 48 | #define MLOG_MASK_PREFIX (ML_DLM|ML_DLM_DOMAIN) |
49 | #include "cluster/masklog.h" | 49 | #include "cluster/masklog.h" |
50 | 50 | ||
51 | /* | ||
52 | * ocfs2 node maps are array of long int, which limits to send them freely | ||
53 | * across the wire due to endianness issues. To workaround this, we convert | ||
54 | * long ints to byte arrays. Following 3 routines are helper functions to | ||
55 | * set/test/copy bits within those array of bytes | ||
56 | */ | ||
57 | static inline void byte_set_bit(u8 nr, u8 map[]) | ||
58 | { | ||
59 | map[nr >> 3] |= (1UL << (nr & 7)); | ||
60 | } | ||
61 | |||
62 | static inline int byte_test_bit(u8 nr, u8 map[]) | ||
63 | { | ||
64 | return ((1UL << (nr & 7)) & (map[nr >> 3])) != 0; | ||
65 | } | ||
66 | |||
67 | static inline void byte_copymap(u8 dmap[], unsigned long smap[], | ||
68 | unsigned int sz) | ||
69 | { | ||
70 | unsigned int nn; | ||
71 | |||
72 | if (!sz) | ||
73 | return; | ||
74 | |||
75 | memset(dmap, 0, ((sz + 7) >> 3)); | ||
76 | for (nn = 0 ; nn < sz; nn++) | ||
77 | if (test_bit(nn, smap)) | ||
78 | byte_set_bit(nn, dmap); | ||
79 | } | ||
80 | |||
51 | static void dlm_free_pagevec(void **vec, int pages) | 81 | static void dlm_free_pagevec(void **vec, int pages) |
52 | { | 82 | { |
53 | while (pages--) | 83 | while (pages--) |
@@ -95,10 +125,14 @@ static DECLARE_WAIT_QUEUE_HEAD(dlm_domain_events); | |||
95 | 125 | ||
96 | #define DLM_DOMAIN_BACKOFF_MS 200 | 126 | #define DLM_DOMAIN_BACKOFF_MS 200 |
97 | 127 | ||
98 | static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data); | 128 | static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data, |
99 | static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data); | 129 | void **ret_data); |
100 | static int dlm_cancel_join_handler(struct o2net_msg *msg, u32 len, void *data); | 130 | static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data, |
101 | static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data); | 131 | void **ret_data); |
132 | static int dlm_cancel_join_handler(struct o2net_msg *msg, u32 len, void *data, | ||
133 | void **ret_data); | ||
134 | static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data, | ||
135 | void **ret_data); | ||
102 | 136 | ||
103 | static void dlm_unregister_domain_handlers(struct dlm_ctxt *dlm); | 137 | static void dlm_unregister_domain_handlers(struct dlm_ctxt *dlm); |
104 | 138 | ||
@@ -125,10 +159,10 @@ void __dlm_insert_lockres(struct dlm_ctxt *dlm, | |||
125 | hlist_add_head(&res->hash_node, bucket); | 159 | hlist_add_head(&res->hash_node, bucket); |
126 | } | 160 | } |
127 | 161 | ||
128 | struct dlm_lock_resource * __dlm_lookup_lockres(struct dlm_ctxt *dlm, | 162 | struct dlm_lock_resource * __dlm_lookup_lockres_full(struct dlm_ctxt *dlm, |
129 | const char *name, | 163 | const char *name, |
130 | unsigned int len, | 164 | unsigned int len, |
131 | unsigned int hash) | 165 | unsigned int hash) |
132 | { | 166 | { |
133 | struct hlist_head *bucket; | 167 | struct hlist_head *bucket; |
134 | struct hlist_node *list; | 168 | struct hlist_node *list; |
@@ -154,6 +188,37 @@ struct dlm_lock_resource * __dlm_lookup_lockres(struct dlm_ctxt *dlm, | |||
154 | return NULL; | 188 | return NULL; |
155 | } | 189 | } |
156 | 190 | ||
191 | /* intended to be called by functions which do not care about lock | ||
192 | * resources which are being purged (most net _handler functions). | ||
193 | * this will return NULL for any lock resource which is found but | ||
194 | * currently in the process of dropping its mastery reference. | ||
195 | * use __dlm_lookup_lockres_full when you need the lock resource | ||
196 | * regardless (e.g. dlm_get_lock_resource) */ | ||
197 | struct dlm_lock_resource * __dlm_lookup_lockres(struct dlm_ctxt *dlm, | ||
198 | const char *name, | ||
199 | unsigned int len, | ||
200 | unsigned int hash) | ||
201 | { | ||
202 | struct dlm_lock_resource *res = NULL; | ||
203 | |||
204 | mlog_entry("%.*s\n", len, name); | ||
205 | |||
206 | assert_spin_locked(&dlm->spinlock); | ||
207 | |||
208 | res = __dlm_lookup_lockres_full(dlm, name, len, hash); | ||
209 | if (res) { | ||
210 | spin_lock(&res->spinlock); | ||
211 | if (res->state & DLM_LOCK_RES_DROPPING_REF) { | ||
212 | spin_unlock(&res->spinlock); | ||
213 | dlm_lockres_put(res); | ||
214 | return NULL; | ||
215 | } | ||
216 | spin_unlock(&res->spinlock); | ||
217 | } | ||
218 | |||
219 | return res; | ||
220 | } | ||
221 | |||
157 | struct dlm_lock_resource * dlm_lookup_lockres(struct dlm_ctxt *dlm, | 222 | struct dlm_lock_resource * dlm_lookup_lockres(struct dlm_ctxt *dlm, |
158 | const char *name, | 223 | const char *name, |
159 | unsigned int len) | 224 | unsigned int len) |
@@ -330,43 +395,60 @@ static void dlm_complete_dlm_shutdown(struct dlm_ctxt *dlm) | |||
330 | wake_up(&dlm_domain_events); | 395 | wake_up(&dlm_domain_events); |
331 | } | 396 | } |
332 | 397 | ||
333 | static void dlm_migrate_all_locks(struct dlm_ctxt *dlm) | 398 | static int dlm_migrate_all_locks(struct dlm_ctxt *dlm) |
334 | { | 399 | { |
335 | int i; | 400 | int i, num, n, ret = 0; |
336 | struct dlm_lock_resource *res; | 401 | struct dlm_lock_resource *res; |
402 | struct hlist_node *iter; | ||
403 | struct hlist_head *bucket; | ||
404 | int dropped; | ||
337 | 405 | ||
338 | mlog(0, "Migrating locks from domain %s\n", dlm->name); | 406 | mlog(0, "Migrating locks from domain %s\n", dlm->name); |
339 | restart: | 407 | |
408 | num = 0; | ||
340 | spin_lock(&dlm->spinlock); | 409 | spin_lock(&dlm->spinlock); |
341 | for (i = 0; i < DLM_HASH_BUCKETS; i++) { | 410 | for (i = 0; i < DLM_HASH_BUCKETS; i++) { |
342 | while (!hlist_empty(dlm_lockres_hash(dlm, i))) { | 411 | redo_bucket: |
343 | res = hlist_entry(dlm_lockres_hash(dlm, i)->first, | 412 | n = 0; |
344 | struct dlm_lock_resource, hash_node); | 413 | bucket = dlm_lockres_hash(dlm, i); |
345 | /* need reference when manually grabbing lockres */ | 414 | iter = bucket->first; |
415 | while (iter) { | ||
416 | n++; | ||
417 | res = hlist_entry(iter, struct dlm_lock_resource, | ||
418 | hash_node); | ||
346 | dlm_lockres_get(res); | 419 | dlm_lockres_get(res); |
347 | /* this should unhash the lockres | 420 | /* migrate, if necessary. this will drop the dlm |
348 | * and exit with dlm->spinlock */ | 421 | * spinlock and retake it if it does migration. */ |
349 | mlog(0, "purging res=%p\n", res); | 422 | dropped = dlm_empty_lockres(dlm, res); |
350 | if (dlm_lockres_is_dirty(dlm, res)) { | 423 | |
351 | /* HACK! this should absolutely go. | 424 | spin_lock(&res->spinlock); |
352 | * need to figure out why some empty | 425 | __dlm_lockres_calc_usage(dlm, res); |
353 | * lockreses are still marked dirty */ | 426 | iter = res->hash_node.next; |
354 | mlog(ML_ERROR, "lockres %.*s dirty!\n", | 427 | spin_unlock(&res->spinlock); |
355 | res->lockname.len, res->lockname.name); | 428 | |
356 | |||
357 | spin_unlock(&dlm->spinlock); | ||
358 | dlm_kick_thread(dlm, res); | ||
359 | wait_event(dlm->ast_wq, !dlm_lockres_is_dirty(dlm, res)); | ||
360 | dlm_lockres_put(res); | ||
361 | goto restart; | ||
362 | } | ||
363 | dlm_purge_lockres(dlm, res); | ||
364 | dlm_lockres_put(res); | 429 | dlm_lockres_put(res); |
430 | |||
431 | cond_resched_lock(&dlm->spinlock); | ||
432 | |||
433 | if (dropped) | ||
434 | goto redo_bucket; | ||
365 | } | 435 | } |
436 | num += n; | ||
437 | mlog(0, "%s: touched %d lockreses in bucket %d " | ||
438 | "(tot=%d)\n", dlm->name, n, i, num); | ||
366 | } | 439 | } |
367 | spin_unlock(&dlm->spinlock); | 440 | spin_unlock(&dlm->spinlock); |
368 | 441 | wake_up(&dlm->dlm_thread_wq); | |
442 | |||
443 | /* let the dlm thread take care of purging, keep scanning until | ||
444 | * nothing remains in the hash */ | ||
445 | if (num) { | ||
446 | mlog(0, "%s: %d lock resources in hash last pass\n", | ||
447 | dlm->name, num); | ||
448 | ret = -EAGAIN; | ||
449 | } | ||
369 | mlog(0, "DONE Migrating locks from domain %s\n", dlm->name); | 450 | mlog(0, "DONE Migrating locks from domain %s\n", dlm->name); |
451 | return ret; | ||
370 | } | 452 | } |
371 | 453 | ||
372 | static int dlm_no_joining_node(struct dlm_ctxt *dlm) | 454 | static int dlm_no_joining_node(struct dlm_ctxt *dlm) |
@@ -418,7 +500,8 @@ static void __dlm_print_nodes(struct dlm_ctxt *dlm) | |||
418 | printk("\n"); | 500 | printk("\n"); |
419 | } | 501 | } |
420 | 502 | ||
421 | static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data) | 503 | static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data, |
504 | void **ret_data) | ||
422 | { | 505 | { |
423 | struct dlm_ctxt *dlm = data; | 506 | struct dlm_ctxt *dlm = data; |
424 | unsigned int node; | 507 | unsigned int node; |
@@ -571,7 +654,9 @@ void dlm_unregister_domain(struct dlm_ctxt *dlm) | |||
571 | /* We changed dlm state, notify the thread */ | 654 | /* We changed dlm state, notify the thread */ |
572 | dlm_kick_thread(dlm, NULL); | 655 | dlm_kick_thread(dlm, NULL); |
573 | 656 | ||
574 | dlm_migrate_all_locks(dlm); | 657 | while (dlm_migrate_all_locks(dlm)) { |
658 | mlog(0, "%s: more migration to do\n", dlm->name); | ||
659 | } | ||
575 | dlm_mark_domain_leaving(dlm); | 660 | dlm_mark_domain_leaving(dlm); |
576 | dlm_leave_domain(dlm); | 661 | dlm_leave_domain(dlm); |
577 | dlm_complete_dlm_shutdown(dlm); | 662 | dlm_complete_dlm_shutdown(dlm); |
@@ -580,11 +665,13 @@ void dlm_unregister_domain(struct dlm_ctxt *dlm) | |||
580 | } | 665 | } |
581 | EXPORT_SYMBOL_GPL(dlm_unregister_domain); | 666 | EXPORT_SYMBOL_GPL(dlm_unregister_domain); |
582 | 667 | ||
583 | static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data) | 668 | static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data, |
669 | void **ret_data) | ||
584 | { | 670 | { |
585 | struct dlm_query_join_request *query; | 671 | struct dlm_query_join_request *query; |
586 | enum dlm_query_join_response response; | 672 | enum dlm_query_join_response response; |
587 | struct dlm_ctxt *dlm = NULL; | 673 | struct dlm_ctxt *dlm = NULL; |
674 | u8 nodenum; | ||
588 | 675 | ||
589 | query = (struct dlm_query_join_request *) msg->buf; | 676 | query = (struct dlm_query_join_request *) msg->buf; |
590 | 677 | ||
@@ -608,6 +695,28 @@ static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data) | |||
608 | 695 | ||
609 | spin_lock(&dlm_domain_lock); | 696 | spin_lock(&dlm_domain_lock); |
610 | dlm = __dlm_lookup_domain_full(query->domain, query->name_len); | 697 | dlm = __dlm_lookup_domain_full(query->domain, query->name_len); |
698 | if (!dlm) | ||
699 | goto unlock_respond; | ||
700 | |||
701 | /* | ||
702 | * There is a small window where the joining node may not see the | ||
703 | * node(s) that just left but still part of the cluster. DISALLOW | ||
704 | * join request if joining node has different node map. | ||
705 | */ | ||
706 | nodenum=0; | ||
707 | while (nodenum < O2NM_MAX_NODES) { | ||
708 | if (test_bit(nodenum, dlm->domain_map)) { | ||
709 | if (!byte_test_bit(nodenum, query->node_map)) { | ||
710 | mlog(0, "disallow join as node %u does not " | ||
711 | "have node %u in its nodemap\n", | ||
712 | query->node_idx, nodenum); | ||
713 | response = JOIN_DISALLOW; | ||
714 | goto unlock_respond; | ||
715 | } | ||
716 | } | ||
717 | nodenum++; | ||
718 | } | ||
719 | |||
611 | /* Once the dlm ctxt is marked as leaving then we don't want | 720 | /* Once the dlm ctxt is marked as leaving then we don't want |
612 | * to be put in someone's domain map. | 721 | * to be put in someone's domain map. |
613 | * Also, explicitly disallow joining at certain troublesome | 722 | * Also, explicitly disallow joining at certain troublesome |
@@ -626,15 +735,15 @@ static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data) | |||
626 | /* Disallow parallel joins. */ | 735 | /* Disallow parallel joins. */ |
627 | response = JOIN_DISALLOW; | 736 | response = JOIN_DISALLOW; |
628 | } else if (dlm->reco.state & DLM_RECO_STATE_ACTIVE) { | 737 | } else if (dlm->reco.state & DLM_RECO_STATE_ACTIVE) { |
629 | mlog(ML_NOTICE, "node %u trying to join, but recovery " | 738 | mlog(0, "node %u trying to join, but recovery " |
630 | "is ongoing.\n", bit); | 739 | "is ongoing.\n", bit); |
631 | response = JOIN_DISALLOW; | 740 | response = JOIN_DISALLOW; |
632 | } else if (test_bit(bit, dlm->recovery_map)) { | 741 | } else if (test_bit(bit, dlm->recovery_map)) { |
633 | mlog(ML_NOTICE, "node %u trying to join, but it " | 742 | mlog(0, "node %u trying to join, but it " |
634 | "still needs recovery.\n", bit); | 743 | "still needs recovery.\n", bit); |
635 | response = JOIN_DISALLOW; | 744 | response = JOIN_DISALLOW; |
636 | } else if (test_bit(bit, dlm->domain_map)) { | 745 | } else if (test_bit(bit, dlm->domain_map)) { |
637 | mlog(ML_NOTICE, "node %u trying to join, but it " | 746 | mlog(0, "node %u trying to join, but it " |
638 | "is still in the domain! needs recovery?\n", | 747 | "is still in the domain! needs recovery?\n", |
639 | bit); | 748 | bit); |
640 | response = JOIN_DISALLOW; | 749 | response = JOIN_DISALLOW; |
@@ -649,6 +758,7 @@ static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data) | |||
649 | 758 | ||
650 | spin_unlock(&dlm->spinlock); | 759 | spin_unlock(&dlm->spinlock); |
651 | } | 760 | } |
761 | unlock_respond: | ||
652 | spin_unlock(&dlm_domain_lock); | 762 | spin_unlock(&dlm_domain_lock); |
653 | 763 | ||
654 | respond: | 764 | respond: |
@@ -657,7 +767,8 @@ respond: | |||
657 | return response; | 767 | return response; |
658 | } | 768 | } |
659 | 769 | ||
660 | static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data) | 770 | static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data, |
771 | void **ret_data) | ||
661 | { | 772 | { |
662 | struct dlm_assert_joined *assert; | 773 | struct dlm_assert_joined *assert; |
663 | struct dlm_ctxt *dlm = NULL; | 774 | struct dlm_ctxt *dlm = NULL; |
@@ -694,7 +805,8 @@ static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data) | |||
694 | return 0; | 805 | return 0; |
695 | } | 806 | } |
696 | 807 | ||
697 | static int dlm_cancel_join_handler(struct o2net_msg *msg, u32 len, void *data) | 808 | static int dlm_cancel_join_handler(struct o2net_msg *msg, u32 len, void *data, |
809 | void **ret_data) | ||
698 | { | 810 | { |
699 | struct dlm_cancel_join *cancel; | 811 | struct dlm_cancel_join *cancel; |
700 | struct dlm_ctxt *dlm = NULL; | 812 | struct dlm_ctxt *dlm = NULL; |
@@ -796,6 +908,9 @@ static int dlm_request_join(struct dlm_ctxt *dlm, | |||
796 | join_msg.name_len = strlen(dlm->name); | 908 | join_msg.name_len = strlen(dlm->name); |
797 | memcpy(join_msg.domain, dlm->name, join_msg.name_len); | 909 | memcpy(join_msg.domain, dlm->name, join_msg.name_len); |
798 | 910 | ||
911 | /* copy live node map to join message */ | ||
912 | byte_copymap(join_msg.node_map, dlm->live_nodes_map, O2NM_MAX_NODES); | ||
913 | |||
799 | status = o2net_send_message(DLM_QUERY_JOIN_MSG, DLM_MOD_KEY, &join_msg, | 914 | status = o2net_send_message(DLM_QUERY_JOIN_MSG, DLM_MOD_KEY, &join_msg, |
800 | sizeof(join_msg), node, &retval); | 915 | sizeof(join_msg), node, &retval); |
801 | if (status < 0 && status != -ENOPROTOOPT) { | 916 | if (status < 0 && status != -ENOPROTOOPT) { |
@@ -1036,98 +1151,106 @@ static int dlm_register_domain_handlers(struct dlm_ctxt *dlm) | |||
1036 | status = o2net_register_handler(DLM_MASTER_REQUEST_MSG, dlm->key, | 1151 | status = o2net_register_handler(DLM_MASTER_REQUEST_MSG, dlm->key, |
1037 | sizeof(struct dlm_master_request), | 1152 | sizeof(struct dlm_master_request), |
1038 | dlm_master_request_handler, | 1153 | dlm_master_request_handler, |
1039 | dlm, &dlm->dlm_domain_handlers); | 1154 | dlm, NULL, &dlm->dlm_domain_handlers); |
1040 | if (status) | 1155 | if (status) |
1041 | goto bail; | 1156 | goto bail; |
1042 | 1157 | ||
1043 | status = o2net_register_handler(DLM_ASSERT_MASTER_MSG, dlm->key, | 1158 | status = o2net_register_handler(DLM_ASSERT_MASTER_MSG, dlm->key, |
1044 | sizeof(struct dlm_assert_master), | 1159 | sizeof(struct dlm_assert_master), |
1045 | dlm_assert_master_handler, | 1160 | dlm_assert_master_handler, |
1046 | dlm, &dlm->dlm_domain_handlers); | 1161 | dlm, dlm_assert_master_post_handler, |
1162 | &dlm->dlm_domain_handlers); | ||
1047 | if (status) | 1163 | if (status) |
1048 | goto bail; | 1164 | goto bail; |
1049 | 1165 | ||
1050 | status = o2net_register_handler(DLM_CREATE_LOCK_MSG, dlm->key, | 1166 | status = o2net_register_handler(DLM_CREATE_LOCK_MSG, dlm->key, |
1051 | sizeof(struct dlm_create_lock), | 1167 | sizeof(struct dlm_create_lock), |
1052 | dlm_create_lock_handler, | 1168 | dlm_create_lock_handler, |
1053 | dlm, &dlm->dlm_domain_handlers); | 1169 | dlm, NULL, &dlm->dlm_domain_handlers); |
1054 | if (status) | 1170 | if (status) |
1055 | goto bail; | 1171 | goto bail; |
1056 | 1172 | ||
1057 | status = o2net_register_handler(DLM_CONVERT_LOCK_MSG, dlm->key, | 1173 | status = o2net_register_handler(DLM_CONVERT_LOCK_MSG, dlm->key, |
1058 | DLM_CONVERT_LOCK_MAX_LEN, | 1174 | DLM_CONVERT_LOCK_MAX_LEN, |
1059 | dlm_convert_lock_handler, | 1175 | dlm_convert_lock_handler, |
1060 | dlm, &dlm->dlm_domain_handlers); | 1176 | dlm, NULL, &dlm->dlm_domain_handlers); |
1061 | if (status) | 1177 | if (status) |
1062 | goto bail; | 1178 | goto bail; |
1063 | 1179 | ||
1064 | status = o2net_register_handler(DLM_UNLOCK_LOCK_MSG, dlm->key, | 1180 | status = o2net_register_handler(DLM_UNLOCK_LOCK_MSG, dlm->key, |
1065 | DLM_UNLOCK_LOCK_MAX_LEN, | 1181 | DLM_UNLOCK_LOCK_MAX_LEN, |
1066 | dlm_unlock_lock_handler, | 1182 | dlm_unlock_lock_handler, |
1067 | dlm, &dlm->dlm_domain_handlers); | 1183 | dlm, NULL, &dlm->dlm_domain_handlers); |
1068 | if (status) | 1184 | if (status) |
1069 | goto bail; | 1185 | goto bail; |
1070 | 1186 | ||
1071 | status = o2net_register_handler(DLM_PROXY_AST_MSG, dlm->key, | 1187 | status = o2net_register_handler(DLM_PROXY_AST_MSG, dlm->key, |
1072 | DLM_PROXY_AST_MAX_LEN, | 1188 | DLM_PROXY_AST_MAX_LEN, |
1073 | dlm_proxy_ast_handler, | 1189 | dlm_proxy_ast_handler, |
1074 | dlm, &dlm->dlm_domain_handlers); | 1190 | dlm, NULL, &dlm->dlm_domain_handlers); |
1075 | if (status) | 1191 | if (status) |
1076 | goto bail; | 1192 | goto bail; |
1077 | 1193 | ||
1078 | status = o2net_register_handler(DLM_EXIT_DOMAIN_MSG, dlm->key, | 1194 | status = o2net_register_handler(DLM_EXIT_DOMAIN_MSG, dlm->key, |
1079 | sizeof(struct dlm_exit_domain), | 1195 | sizeof(struct dlm_exit_domain), |
1080 | dlm_exit_domain_handler, | 1196 | dlm_exit_domain_handler, |
1081 | dlm, &dlm->dlm_domain_handlers); | 1197 | dlm, NULL, &dlm->dlm_domain_handlers); |
1198 | if (status) | ||
1199 | goto bail; | ||
1200 | |||
1201 | status = o2net_register_handler(DLM_DEREF_LOCKRES_MSG, dlm->key, | ||
1202 | sizeof(struct dlm_deref_lockres), | ||
1203 | dlm_deref_lockres_handler, | ||
1204 | dlm, NULL, &dlm->dlm_domain_handlers); | ||
1082 | if (status) | 1205 | if (status) |
1083 | goto bail; | 1206 | goto bail; |
1084 | 1207 | ||
1085 | status = o2net_register_handler(DLM_MIGRATE_REQUEST_MSG, dlm->key, | 1208 | status = o2net_register_handler(DLM_MIGRATE_REQUEST_MSG, dlm->key, |
1086 | sizeof(struct dlm_migrate_request), | 1209 | sizeof(struct dlm_migrate_request), |
1087 | dlm_migrate_request_handler, | 1210 | dlm_migrate_request_handler, |
1088 | dlm, &dlm->dlm_domain_handlers); | 1211 | dlm, NULL, &dlm->dlm_domain_handlers); |
1089 | if (status) | 1212 | if (status) |
1090 | goto bail; | 1213 | goto bail; |
1091 | 1214 | ||
1092 | status = o2net_register_handler(DLM_MIG_LOCKRES_MSG, dlm->key, | 1215 | status = o2net_register_handler(DLM_MIG_LOCKRES_MSG, dlm->key, |
1093 | DLM_MIG_LOCKRES_MAX_LEN, | 1216 | DLM_MIG_LOCKRES_MAX_LEN, |
1094 | dlm_mig_lockres_handler, | 1217 | dlm_mig_lockres_handler, |
1095 | dlm, &dlm->dlm_domain_handlers); | 1218 | dlm, NULL, &dlm->dlm_domain_handlers); |
1096 | if (status) | 1219 | if (status) |
1097 | goto bail; | 1220 | goto bail; |
1098 | 1221 | ||
1099 | status = o2net_register_handler(DLM_MASTER_REQUERY_MSG, dlm->key, | 1222 | status = o2net_register_handler(DLM_MASTER_REQUERY_MSG, dlm->key, |
1100 | sizeof(struct dlm_master_requery), | 1223 | sizeof(struct dlm_master_requery), |
1101 | dlm_master_requery_handler, | 1224 | dlm_master_requery_handler, |
1102 | dlm, &dlm->dlm_domain_handlers); | 1225 | dlm, NULL, &dlm->dlm_domain_handlers); |
1103 | if (status) | 1226 | if (status) |
1104 | goto bail; | 1227 | goto bail; |
1105 | 1228 | ||
1106 | status = o2net_register_handler(DLM_LOCK_REQUEST_MSG, dlm->key, | 1229 | status = o2net_register_handler(DLM_LOCK_REQUEST_MSG, dlm->key, |
1107 | sizeof(struct dlm_lock_request), | 1230 | sizeof(struct dlm_lock_request), |
1108 | dlm_request_all_locks_handler, | 1231 | dlm_request_all_locks_handler, |
1109 | dlm, &dlm->dlm_domain_handlers); | 1232 | dlm, NULL, &dlm->dlm_domain_handlers); |
1110 | if (status) | 1233 | if (status) |
1111 | goto bail; | 1234 | goto bail; |
1112 | 1235 | ||
1113 | status = o2net_register_handler(DLM_RECO_DATA_DONE_MSG, dlm->key, | 1236 | status = o2net_register_handler(DLM_RECO_DATA_DONE_MSG, dlm->key, |
1114 | sizeof(struct dlm_reco_data_done), | 1237 | sizeof(struct dlm_reco_data_done), |
1115 | dlm_reco_data_done_handler, | 1238 | dlm_reco_data_done_handler, |
1116 | dlm, &dlm->dlm_domain_handlers); | 1239 | dlm, NULL, &dlm->dlm_domain_handlers); |
1117 | if (status) | 1240 | if (status) |
1118 | goto bail; | 1241 | goto bail; |
1119 | 1242 | ||
1120 | status = o2net_register_handler(DLM_BEGIN_RECO_MSG, dlm->key, | 1243 | status = o2net_register_handler(DLM_BEGIN_RECO_MSG, dlm->key, |
1121 | sizeof(struct dlm_begin_reco), | 1244 | sizeof(struct dlm_begin_reco), |
1122 | dlm_begin_reco_handler, | 1245 | dlm_begin_reco_handler, |
1123 | dlm, &dlm->dlm_domain_handlers); | 1246 | dlm, NULL, &dlm->dlm_domain_handlers); |
1124 | if (status) | 1247 | if (status) |
1125 | goto bail; | 1248 | goto bail; |
1126 | 1249 | ||
1127 | status = o2net_register_handler(DLM_FINALIZE_RECO_MSG, dlm->key, | 1250 | status = o2net_register_handler(DLM_FINALIZE_RECO_MSG, dlm->key, |
1128 | sizeof(struct dlm_finalize_reco), | 1251 | sizeof(struct dlm_finalize_reco), |
1129 | dlm_finalize_reco_handler, | 1252 | dlm_finalize_reco_handler, |
1130 | dlm, &dlm->dlm_domain_handlers); | 1253 | dlm, NULL, &dlm->dlm_domain_handlers); |
1131 | if (status) | 1254 | if (status) |
1132 | goto bail; | 1255 | goto bail; |
1133 | 1256 | ||
@@ -1141,6 +1264,8 @@ bail: | |||
1141 | static int dlm_join_domain(struct dlm_ctxt *dlm) | 1264 | static int dlm_join_domain(struct dlm_ctxt *dlm) |
1142 | { | 1265 | { |
1143 | int status; | 1266 | int status; |
1267 | unsigned int backoff; | ||
1268 | unsigned int total_backoff = 0; | ||
1144 | 1269 | ||
1145 | BUG_ON(!dlm); | 1270 | BUG_ON(!dlm); |
1146 | 1271 | ||
@@ -1172,18 +1297,27 @@ static int dlm_join_domain(struct dlm_ctxt *dlm) | |||
1172 | } | 1297 | } |
1173 | 1298 | ||
1174 | do { | 1299 | do { |
1175 | unsigned int backoff; | ||
1176 | status = dlm_try_to_join_domain(dlm); | 1300 | status = dlm_try_to_join_domain(dlm); |
1177 | 1301 | ||
1178 | /* If we're racing another node to the join, then we | 1302 | /* If we're racing another node to the join, then we |
1179 | * need to back off temporarily and let them | 1303 | * need to back off temporarily and let them |
1180 | * complete. */ | 1304 | * complete. */ |
1305 | #define DLM_JOIN_TIMEOUT_MSECS 90000 | ||
1181 | if (status == -EAGAIN) { | 1306 | if (status == -EAGAIN) { |
1182 | if (signal_pending(current)) { | 1307 | if (signal_pending(current)) { |
1183 | status = -ERESTARTSYS; | 1308 | status = -ERESTARTSYS; |
1184 | goto bail; | 1309 | goto bail; |
1185 | } | 1310 | } |
1186 | 1311 | ||
1312 | if (total_backoff > | ||
1313 | msecs_to_jiffies(DLM_JOIN_TIMEOUT_MSECS)) { | ||
1314 | status = -ERESTARTSYS; | ||
1315 | mlog(ML_NOTICE, "Timed out joining dlm domain " | ||
1316 | "%s after %u msecs\n", dlm->name, | ||
1317 | jiffies_to_msecs(total_backoff)); | ||
1318 | goto bail; | ||
1319 | } | ||
1320 | |||
1187 | /* | 1321 | /* |
1188 | * <chip> After you! | 1322 | * <chip> After you! |
1189 | * <dale> No, after you! | 1323 | * <dale> No, after you! |
@@ -1193,6 +1327,7 @@ static int dlm_join_domain(struct dlm_ctxt *dlm) | |||
1193 | */ | 1327 | */ |
1194 | backoff = (unsigned int)(jiffies & 0x3); | 1328 | backoff = (unsigned int)(jiffies & 0x3); |
1195 | backoff *= DLM_DOMAIN_BACKOFF_MS; | 1329 | backoff *= DLM_DOMAIN_BACKOFF_MS; |
1330 | total_backoff += backoff; | ||
1196 | mlog(0, "backoff %d\n", backoff); | 1331 | mlog(0, "backoff %d\n", backoff); |
1197 | msleep(backoff); | 1332 | msleep(backoff); |
1198 | } | 1333 | } |
@@ -1421,21 +1556,21 @@ static int dlm_register_net_handlers(void) | |||
1421 | status = o2net_register_handler(DLM_QUERY_JOIN_MSG, DLM_MOD_KEY, | 1556 | status = o2net_register_handler(DLM_QUERY_JOIN_MSG, DLM_MOD_KEY, |
1422 | sizeof(struct dlm_query_join_request), | 1557 | sizeof(struct dlm_query_join_request), |
1423 | dlm_query_join_handler, | 1558 | dlm_query_join_handler, |
1424 | NULL, &dlm_join_handlers); | 1559 | NULL, NULL, &dlm_join_handlers); |
1425 | if (status) | 1560 | if (status) |
1426 | goto bail; | 1561 | goto bail; |
1427 | 1562 | ||
1428 | status = o2net_register_handler(DLM_ASSERT_JOINED_MSG, DLM_MOD_KEY, | 1563 | status = o2net_register_handler(DLM_ASSERT_JOINED_MSG, DLM_MOD_KEY, |
1429 | sizeof(struct dlm_assert_joined), | 1564 | sizeof(struct dlm_assert_joined), |
1430 | dlm_assert_joined_handler, | 1565 | dlm_assert_joined_handler, |
1431 | NULL, &dlm_join_handlers); | 1566 | NULL, NULL, &dlm_join_handlers); |
1432 | if (status) | 1567 | if (status) |
1433 | goto bail; | 1568 | goto bail; |
1434 | 1569 | ||
1435 | status = o2net_register_handler(DLM_CANCEL_JOIN_MSG, DLM_MOD_KEY, | 1570 | status = o2net_register_handler(DLM_CANCEL_JOIN_MSG, DLM_MOD_KEY, |
1436 | sizeof(struct dlm_cancel_join), | 1571 | sizeof(struct dlm_cancel_join), |
1437 | dlm_cancel_join_handler, | 1572 | dlm_cancel_join_handler, |
1438 | NULL, &dlm_join_handlers); | 1573 | NULL, NULL, &dlm_join_handlers); |
1439 | 1574 | ||
1440 | bail: | 1575 | bail: |
1441 | if (status < 0) | 1576 | if (status < 0) |
diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c index e5ca3db197f6..52578d907d9a 100644 --- a/fs/ocfs2/dlm/dlmlock.c +++ b/fs/ocfs2/dlm/dlmlock.c | |||
@@ -163,6 +163,10 @@ static enum dlm_status dlmlock_master(struct dlm_ctxt *dlm, | |||
163 | kick_thread = 1; | 163 | kick_thread = 1; |
164 | } | 164 | } |
165 | } | 165 | } |
166 | /* reduce the inflight count, this may result in the lockres | ||
167 | * being purged below during calc_usage */ | ||
168 | if (lock->ml.node == dlm->node_num) | ||
169 | dlm_lockres_drop_inflight_ref(dlm, res); | ||
166 | 170 | ||
167 | spin_unlock(&res->spinlock); | 171 | spin_unlock(&res->spinlock); |
168 | wake_up(&res->wq); | 172 | wake_up(&res->wq); |
@@ -437,7 +441,8 @@ struct dlm_lock * dlm_new_lock(int type, u8 node, u64 cookie, | |||
437 | * held on exit: none | 441 | * held on exit: none |
438 | * returns: DLM_NORMAL, DLM_SYSERR, DLM_IVLOCKID, DLM_NOTQUEUED | 442 | * returns: DLM_NORMAL, DLM_SYSERR, DLM_IVLOCKID, DLM_NOTQUEUED |
439 | */ | 443 | */ |
440 | int dlm_create_lock_handler(struct o2net_msg *msg, u32 len, void *data) | 444 | int dlm_create_lock_handler(struct o2net_msg *msg, u32 len, void *data, |
445 | void **ret_data) | ||
441 | { | 446 | { |
442 | struct dlm_ctxt *dlm = data; | 447 | struct dlm_ctxt *dlm = data; |
443 | struct dlm_create_lock *create = (struct dlm_create_lock *)msg->buf; | 448 | struct dlm_create_lock *create = (struct dlm_create_lock *)msg->buf; |
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 0ad872055cb3..77e4e6169a0d 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c | |||
@@ -99,9 +99,10 @@ static void dlm_mle_node_up(struct dlm_ctxt *dlm, | |||
99 | int idx); | 99 | int idx); |
100 | 100 | ||
101 | static void dlm_assert_master_worker(struct dlm_work_item *item, void *data); | 101 | static void dlm_assert_master_worker(struct dlm_work_item *item, void *data); |
102 | static int dlm_do_assert_master(struct dlm_ctxt *dlm, const char *lockname, | 102 | static int dlm_do_assert_master(struct dlm_ctxt *dlm, |
103 | unsigned int namelen, void *nodemap, | 103 | struct dlm_lock_resource *res, |
104 | u32 flags); | 104 | void *nodemap, u32 flags); |
105 | static void dlm_deref_lockres_worker(struct dlm_work_item *item, void *data); | ||
105 | 106 | ||
106 | static inline int dlm_mle_equal(struct dlm_ctxt *dlm, | 107 | static inline int dlm_mle_equal(struct dlm_ctxt *dlm, |
107 | struct dlm_master_list_entry *mle, | 108 | struct dlm_master_list_entry *mle, |
@@ -237,7 +238,8 @@ static int dlm_find_mle(struct dlm_ctxt *dlm, | |||
237 | struct dlm_master_list_entry **mle, | 238 | struct dlm_master_list_entry **mle, |
238 | char *name, unsigned int namelen); | 239 | char *name, unsigned int namelen); |
239 | 240 | ||
240 | static int dlm_do_master_request(struct dlm_master_list_entry *mle, int to); | 241 | static int dlm_do_master_request(struct dlm_lock_resource *res, |
242 | struct dlm_master_list_entry *mle, int to); | ||
241 | 243 | ||
242 | 244 | ||
243 | static int dlm_wait_for_lock_mastery(struct dlm_ctxt *dlm, | 245 | static int dlm_wait_for_lock_mastery(struct dlm_ctxt *dlm, |
@@ -687,6 +689,7 @@ static void dlm_init_lockres(struct dlm_ctxt *dlm, | |||
687 | INIT_LIST_HEAD(&res->purge); | 689 | INIT_LIST_HEAD(&res->purge); |
688 | atomic_set(&res->asts_reserved, 0); | 690 | atomic_set(&res->asts_reserved, 0); |
689 | res->migration_pending = 0; | 691 | res->migration_pending = 0; |
692 | res->inflight_locks = 0; | ||
690 | 693 | ||
691 | kref_init(&res->refs); | 694 | kref_init(&res->refs); |
692 | 695 | ||
@@ -700,6 +703,7 @@ static void dlm_init_lockres(struct dlm_ctxt *dlm, | |||
700 | res->last_used = 0; | 703 | res->last_used = 0; |
701 | 704 | ||
702 | memset(res->lvb, 0, DLM_LVB_LEN); | 705 | memset(res->lvb, 0, DLM_LVB_LEN); |
706 | memset(res->refmap, 0, sizeof(res->refmap)); | ||
703 | } | 707 | } |
704 | 708 | ||
705 | struct dlm_lock_resource *dlm_new_lockres(struct dlm_ctxt *dlm, | 709 | struct dlm_lock_resource *dlm_new_lockres(struct dlm_ctxt *dlm, |
@@ -722,6 +726,42 @@ struct dlm_lock_resource *dlm_new_lockres(struct dlm_ctxt *dlm, | |||
722 | return res; | 726 | return res; |
723 | } | 727 | } |
724 | 728 | ||
729 | void __dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm, | ||
730 | struct dlm_lock_resource *res, | ||
731 | int new_lockres, | ||
732 | const char *file, | ||
733 | int line) | ||
734 | { | ||
735 | if (!new_lockres) | ||
736 | assert_spin_locked(&res->spinlock); | ||
737 | |||
738 | if (!test_bit(dlm->node_num, res->refmap)) { | ||
739 | BUG_ON(res->inflight_locks != 0); | ||
740 | dlm_lockres_set_refmap_bit(dlm->node_num, res); | ||
741 | } | ||
742 | res->inflight_locks++; | ||
743 | mlog(0, "%s:%.*s: inflight++: now %u\n", | ||
744 | dlm->name, res->lockname.len, res->lockname.name, | ||
745 | res->inflight_locks); | ||
746 | } | ||
747 | |||
748 | void __dlm_lockres_drop_inflight_ref(struct dlm_ctxt *dlm, | ||
749 | struct dlm_lock_resource *res, | ||
750 | const char *file, | ||
751 | int line) | ||
752 | { | ||
753 | assert_spin_locked(&res->spinlock); | ||
754 | |||
755 | BUG_ON(res->inflight_locks == 0); | ||
756 | res->inflight_locks--; | ||
757 | mlog(0, "%s:%.*s: inflight--: now %u\n", | ||
758 | dlm->name, res->lockname.len, res->lockname.name, | ||
759 | res->inflight_locks); | ||
760 | if (res->inflight_locks == 0) | ||
761 | dlm_lockres_clear_refmap_bit(dlm->node_num, res); | ||
762 | wake_up(&res->wq); | ||
763 | } | ||
764 | |||
725 | /* | 765 | /* |
726 | * lookup a lock resource by name. | 766 | * lookup a lock resource by name. |
727 | * may already exist in the hashtable. | 767 | * may already exist in the hashtable. |
@@ -752,6 +792,7 @@ struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm, | |||
752 | unsigned int hash; | 792 | unsigned int hash; |
753 | int tries = 0; | 793 | int tries = 0; |
754 | int bit, wait_on_recovery = 0; | 794 | int bit, wait_on_recovery = 0; |
795 | int drop_inflight_if_nonlocal = 0; | ||
755 | 796 | ||
756 | BUG_ON(!lockid); | 797 | BUG_ON(!lockid); |
757 | 798 | ||
@@ -761,9 +802,30 @@ struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm, | |||
761 | 802 | ||
762 | lookup: | 803 | lookup: |
763 | spin_lock(&dlm->spinlock); | 804 | spin_lock(&dlm->spinlock); |
764 | tmpres = __dlm_lookup_lockres(dlm, lockid, namelen, hash); | 805 | tmpres = __dlm_lookup_lockres_full(dlm, lockid, namelen, hash); |
765 | if (tmpres) { | 806 | if (tmpres) { |
807 | int dropping_ref = 0; | ||
808 | |||
809 | spin_lock(&tmpres->spinlock); | ||
810 | if (tmpres->owner == dlm->node_num) { | ||
811 | BUG_ON(tmpres->state & DLM_LOCK_RES_DROPPING_REF); | ||
812 | dlm_lockres_grab_inflight_ref(dlm, tmpres); | ||
813 | } else if (tmpres->state & DLM_LOCK_RES_DROPPING_REF) | ||
814 | dropping_ref = 1; | ||
815 | spin_unlock(&tmpres->spinlock); | ||
766 | spin_unlock(&dlm->spinlock); | 816 | spin_unlock(&dlm->spinlock); |
817 | |||
818 | /* wait until done messaging the master, drop our ref to allow | ||
819 | * the lockres to be purged, start over. */ | ||
820 | if (dropping_ref) { | ||
821 | spin_lock(&tmpres->spinlock); | ||
822 | __dlm_wait_on_lockres_flags(tmpres, DLM_LOCK_RES_DROPPING_REF); | ||
823 | spin_unlock(&tmpres->spinlock); | ||
824 | dlm_lockres_put(tmpres); | ||
825 | tmpres = NULL; | ||
826 | goto lookup; | ||
827 | } | ||
828 | |||
767 | mlog(0, "found in hash!\n"); | 829 | mlog(0, "found in hash!\n"); |
768 | if (res) | 830 | if (res) |
769 | dlm_lockres_put(res); | 831 | dlm_lockres_put(res); |
@@ -793,6 +855,7 @@ lookup: | |||
793 | spin_lock(&res->spinlock); | 855 | spin_lock(&res->spinlock); |
794 | dlm_change_lockres_owner(dlm, res, dlm->node_num); | 856 | dlm_change_lockres_owner(dlm, res, dlm->node_num); |
795 | __dlm_insert_lockres(dlm, res); | 857 | __dlm_insert_lockres(dlm, res); |
858 | dlm_lockres_grab_inflight_ref(dlm, res); | ||
796 | spin_unlock(&res->spinlock); | 859 | spin_unlock(&res->spinlock); |
797 | spin_unlock(&dlm->spinlock); | 860 | spin_unlock(&dlm->spinlock); |
798 | /* lockres still marked IN_PROGRESS */ | 861 | /* lockres still marked IN_PROGRESS */ |
@@ -805,29 +868,40 @@ lookup: | |||
805 | /* if we found a block, wait for lock to be mastered by another node */ | 868 | /* if we found a block, wait for lock to be mastered by another node */ |
806 | blocked = dlm_find_mle(dlm, &mle, (char *)lockid, namelen); | 869 | blocked = dlm_find_mle(dlm, &mle, (char *)lockid, namelen); |
807 | if (blocked) { | 870 | if (blocked) { |
871 | int mig; | ||
808 | if (mle->type == DLM_MLE_MASTER) { | 872 | if (mle->type == DLM_MLE_MASTER) { |
809 | mlog(ML_ERROR, "master entry for nonexistent lock!\n"); | 873 | mlog(ML_ERROR, "master entry for nonexistent lock!\n"); |
810 | BUG(); | 874 | BUG(); |
811 | } else if (mle->type == DLM_MLE_MIGRATION) { | 875 | } |
812 | /* migration is in progress! */ | 876 | mig = (mle->type == DLM_MLE_MIGRATION); |
813 | /* the good news is that we now know the | 877 | /* if there is a migration in progress, let the migration |
814 | * "current" master (mle->master). */ | 878 | * finish before continuing. we can wait for the absence |
815 | 879 | * of the MIGRATION mle: either the migrate finished or | |
880 | * one of the nodes died and the mle was cleaned up. | ||
881 | * if there is a BLOCK here, but it already has a master | ||
882 | * set, we are too late. the master does not have a ref | ||
883 | * for us in the refmap. detach the mle and drop it. | ||
884 | * either way, go back to the top and start over. */ | ||
885 | if (mig || mle->master != O2NM_MAX_NODES) { | ||
886 | BUG_ON(mig && mle->master == dlm->node_num); | ||
887 | /* we arrived too late. the master does not | ||
888 | * have a ref for us. retry. */ | ||
889 | mlog(0, "%s:%.*s: late on %s\n", | ||
890 | dlm->name, namelen, lockid, | ||
891 | mig ? "MIGRATION" : "BLOCK"); | ||
816 | spin_unlock(&dlm->master_lock); | 892 | spin_unlock(&dlm->master_lock); |
817 | assert_spin_locked(&dlm->spinlock); | ||
818 | |||
819 | /* set the lockres owner and hash it */ | ||
820 | spin_lock(&res->spinlock); | ||
821 | dlm_set_lockres_owner(dlm, res, mle->master); | ||
822 | __dlm_insert_lockres(dlm, res); | ||
823 | spin_unlock(&res->spinlock); | ||
824 | spin_unlock(&dlm->spinlock); | 893 | spin_unlock(&dlm->spinlock); |
825 | 894 | ||
826 | /* master is known, detach */ | 895 | /* master is known, detach */ |
827 | dlm_mle_detach_hb_events(dlm, mle); | 896 | if (!mig) |
897 | dlm_mle_detach_hb_events(dlm, mle); | ||
828 | dlm_put_mle(mle); | 898 | dlm_put_mle(mle); |
829 | mle = NULL; | 899 | mle = NULL; |
830 | goto wake_waiters; | 900 | /* this is lame, but we cant wait on either |
901 | * the mle or lockres waitqueue here */ | ||
902 | if (mig) | ||
903 | msleep(100); | ||
904 | goto lookup; | ||
831 | } | 905 | } |
832 | } else { | 906 | } else { |
833 | /* go ahead and try to master lock on this node */ | 907 | /* go ahead and try to master lock on this node */ |
@@ -858,6 +932,13 @@ lookup: | |||
858 | 932 | ||
859 | /* finally add the lockres to its hash bucket */ | 933 | /* finally add the lockres to its hash bucket */ |
860 | __dlm_insert_lockres(dlm, res); | 934 | __dlm_insert_lockres(dlm, res); |
935 | /* since this lockres is new it doesnt not require the spinlock */ | ||
936 | dlm_lockres_grab_inflight_ref_new(dlm, res); | ||
937 | |||
938 | /* if this node does not become the master make sure to drop | ||
939 | * this inflight reference below */ | ||
940 | drop_inflight_if_nonlocal = 1; | ||
941 | |||
861 | /* get an extra ref on the mle in case this is a BLOCK | 942 | /* get an extra ref on the mle in case this is a BLOCK |
862 | * if so, the creator of the BLOCK may try to put the last | 943 | * if so, the creator of the BLOCK may try to put the last |
863 | * ref at this time in the assert master handler, so we | 944 | * ref at this time in the assert master handler, so we |
@@ -910,7 +991,7 @@ redo_request: | |||
910 | ret = -EINVAL; | 991 | ret = -EINVAL; |
911 | dlm_node_iter_init(mle->vote_map, &iter); | 992 | dlm_node_iter_init(mle->vote_map, &iter); |
912 | while ((nodenum = dlm_node_iter_next(&iter)) >= 0) { | 993 | while ((nodenum = dlm_node_iter_next(&iter)) >= 0) { |
913 | ret = dlm_do_master_request(mle, nodenum); | 994 | ret = dlm_do_master_request(res, mle, nodenum); |
914 | if (ret < 0) | 995 | if (ret < 0) |
915 | mlog_errno(ret); | 996 | mlog_errno(ret); |
916 | if (mle->master != O2NM_MAX_NODES) { | 997 | if (mle->master != O2NM_MAX_NODES) { |
@@ -960,6 +1041,8 @@ wait: | |||
960 | 1041 | ||
961 | wake_waiters: | 1042 | wake_waiters: |
962 | spin_lock(&res->spinlock); | 1043 | spin_lock(&res->spinlock); |
1044 | if (res->owner != dlm->node_num && drop_inflight_if_nonlocal) | ||
1045 | dlm_lockres_drop_inflight_ref(dlm, res); | ||
963 | res->state &= ~DLM_LOCK_RES_IN_PROGRESS; | 1046 | res->state &= ~DLM_LOCK_RES_IN_PROGRESS; |
964 | spin_unlock(&res->spinlock); | 1047 | spin_unlock(&res->spinlock); |
965 | wake_up(&res->wq); | 1048 | wake_up(&res->wq); |
@@ -998,7 +1081,7 @@ recheck: | |||
998 | /* this will cause the master to re-assert across | 1081 | /* this will cause the master to re-assert across |
999 | * the whole cluster, freeing up mles */ | 1082 | * the whole cluster, freeing up mles */ |
1000 | if (res->owner != dlm->node_num) { | 1083 | if (res->owner != dlm->node_num) { |
1001 | ret = dlm_do_master_request(mle, res->owner); | 1084 | ret = dlm_do_master_request(res, mle, res->owner); |
1002 | if (ret < 0) { | 1085 | if (ret < 0) { |
1003 | /* give recovery a chance to run */ | 1086 | /* give recovery a chance to run */ |
1004 | mlog(ML_ERROR, "link to %u went down?: %d\n", res->owner, ret); | 1087 | mlog(ML_ERROR, "link to %u went down?: %d\n", res->owner, ret); |
@@ -1062,6 +1145,8 @@ recheck: | |||
1062 | * now tell other nodes that I am | 1145 | * now tell other nodes that I am |
1063 | * mastering this. */ | 1146 | * mastering this. */ |
1064 | mle->master = dlm->node_num; | 1147 | mle->master = dlm->node_num; |
1148 | /* ref was grabbed in get_lock_resource | ||
1149 | * will be dropped in dlmlock_master */ | ||
1065 | assert = 1; | 1150 | assert = 1; |
1066 | sleep = 0; | 1151 | sleep = 0; |
1067 | } | 1152 | } |
@@ -1087,7 +1172,8 @@ recheck: | |||
1087 | (atomic_read(&mle->woken) == 1), | 1172 | (atomic_read(&mle->woken) == 1), |
1088 | timeo); | 1173 | timeo); |
1089 | if (res->owner == O2NM_MAX_NODES) { | 1174 | if (res->owner == O2NM_MAX_NODES) { |
1090 | mlog(0, "waiting again\n"); | 1175 | mlog(0, "%s:%.*s: waiting again\n", dlm->name, |
1176 | res->lockname.len, res->lockname.name); | ||
1091 | goto recheck; | 1177 | goto recheck; |
1092 | } | 1178 | } |
1093 | mlog(0, "done waiting, master is %u\n", res->owner); | 1179 | mlog(0, "done waiting, master is %u\n", res->owner); |
@@ -1100,8 +1186,7 @@ recheck: | |||
1100 | m = dlm->node_num; | 1186 | m = dlm->node_num; |
1101 | mlog(0, "about to master %.*s here, this=%u\n", | 1187 | mlog(0, "about to master %.*s here, this=%u\n", |
1102 | res->lockname.len, res->lockname.name, m); | 1188 | res->lockname.len, res->lockname.name, m); |
1103 | ret = dlm_do_assert_master(dlm, res->lockname.name, | 1189 | ret = dlm_do_assert_master(dlm, res, mle->vote_map, 0); |
1104 | res->lockname.len, mle->vote_map, 0); | ||
1105 | if (ret) { | 1190 | if (ret) { |
1106 | /* This is a failure in the network path, | 1191 | /* This is a failure in the network path, |
1107 | * not in the response to the assert_master | 1192 | * not in the response to the assert_master |
@@ -1117,6 +1202,8 @@ recheck: | |||
1117 | 1202 | ||
1118 | /* set the lockres owner */ | 1203 | /* set the lockres owner */ |
1119 | spin_lock(&res->spinlock); | 1204 | spin_lock(&res->spinlock); |
1205 | /* mastery reference obtained either during | ||
1206 | * assert_master_handler or in get_lock_resource */ | ||
1120 | dlm_change_lockres_owner(dlm, res, m); | 1207 | dlm_change_lockres_owner(dlm, res, m); |
1121 | spin_unlock(&res->spinlock); | 1208 | spin_unlock(&res->spinlock); |
1122 | 1209 | ||
@@ -1283,7 +1370,8 @@ static int dlm_restart_lock_mastery(struct dlm_ctxt *dlm, | |||
1283 | * | 1370 | * |
1284 | */ | 1371 | */ |
1285 | 1372 | ||
1286 | static int dlm_do_master_request(struct dlm_master_list_entry *mle, int to) | 1373 | static int dlm_do_master_request(struct dlm_lock_resource *res, |
1374 | struct dlm_master_list_entry *mle, int to) | ||
1287 | { | 1375 | { |
1288 | struct dlm_ctxt *dlm = mle->dlm; | 1376 | struct dlm_ctxt *dlm = mle->dlm; |
1289 | struct dlm_master_request request; | 1377 | struct dlm_master_request request; |
@@ -1339,6 +1427,9 @@ again: | |||
1339 | case DLM_MASTER_RESP_YES: | 1427 | case DLM_MASTER_RESP_YES: |
1340 | set_bit(to, mle->response_map); | 1428 | set_bit(to, mle->response_map); |
1341 | mlog(0, "node %u is the master, response=YES\n", to); | 1429 | mlog(0, "node %u is the master, response=YES\n", to); |
1430 | mlog(0, "%s:%.*s: master node %u now knows I have a " | ||
1431 | "reference\n", dlm->name, res->lockname.len, | ||
1432 | res->lockname.name, to); | ||
1342 | mle->master = to; | 1433 | mle->master = to; |
1343 | break; | 1434 | break; |
1344 | case DLM_MASTER_RESP_NO: | 1435 | case DLM_MASTER_RESP_NO: |
@@ -1379,7 +1470,8 @@ out: | |||
1379 | * | 1470 | * |
1380 | * if possible, TRIM THIS DOWN!!! | 1471 | * if possible, TRIM THIS DOWN!!! |
1381 | */ | 1472 | */ |
1382 | int dlm_master_request_handler(struct o2net_msg *msg, u32 len, void *data) | 1473 | int dlm_master_request_handler(struct o2net_msg *msg, u32 len, void *data, |
1474 | void **ret_data) | ||
1383 | { | 1475 | { |
1384 | u8 response = DLM_MASTER_RESP_MAYBE; | 1476 | u8 response = DLM_MASTER_RESP_MAYBE; |
1385 | struct dlm_ctxt *dlm = data; | 1477 | struct dlm_ctxt *dlm = data; |
@@ -1417,10 +1509,11 @@ way_up_top: | |||
1417 | 1509 | ||
1418 | /* take care of the easy cases up front */ | 1510 | /* take care of the easy cases up front */ |
1419 | spin_lock(&res->spinlock); | 1511 | spin_lock(&res->spinlock); |
1420 | if (res->state & DLM_LOCK_RES_RECOVERING) { | 1512 | if (res->state & (DLM_LOCK_RES_RECOVERING| |
1513 | DLM_LOCK_RES_MIGRATING)) { | ||
1421 | spin_unlock(&res->spinlock); | 1514 | spin_unlock(&res->spinlock); |
1422 | mlog(0, "returning DLM_MASTER_RESP_ERROR since res is " | 1515 | mlog(0, "returning DLM_MASTER_RESP_ERROR since res is " |
1423 | "being recovered\n"); | 1516 | "being recovered/migrated\n"); |
1424 | response = DLM_MASTER_RESP_ERROR; | 1517 | response = DLM_MASTER_RESP_ERROR; |
1425 | if (mle) | 1518 | if (mle) |
1426 | kmem_cache_free(dlm_mle_cache, mle); | 1519 | kmem_cache_free(dlm_mle_cache, mle); |
@@ -1428,8 +1521,10 @@ way_up_top: | |||
1428 | } | 1521 | } |
1429 | 1522 | ||
1430 | if (res->owner == dlm->node_num) { | 1523 | if (res->owner == dlm->node_num) { |
1524 | mlog(0, "%s:%.*s: setting bit %u in refmap\n", | ||
1525 | dlm->name, namelen, name, request->node_idx); | ||
1526 | dlm_lockres_set_refmap_bit(request->node_idx, res); | ||
1431 | spin_unlock(&res->spinlock); | 1527 | spin_unlock(&res->spinlock); |
1432 | // mlog(0, "this node is the master\n"); | ||
1433 | response = DLM_MASTER_RESP_YES; | 1528 | response = DLM_MASTER_RESP_YES; |
1434 | if (mle) | 1529 | if (mle) |
1435 | kmem_cache_free(dlm_mle_cache, mle); | 1530 | kmem_cache_free(dlm_mle_cache, mle); |
@@ -1477,7 +1572,6 @@ way_up_top: | |||
1477 | mlog(0, "node %u is master, but trying to migrate to " | 1572 | mlog(0, "node %u is master, but trying to migrate to " |
1478 | "node %u.\n", tmpmle->master, tmpmle->new_master); | 1573 | "node %u.\n", tmpmle->master, tmpmle->new_master); |
1479 | if (tmpmle->master == dlm->node_num) { | 1574 | if (tmpmle->master == dlm->node_num) { |
1480 | response = DLM_MASTER_RESP_YES; | ||
1481 | mlog(ML_ERROR, "no owner on lockres, but this " | 1575 | mlog(ML_ERROR, "no owner on lockres, but this " |
1482 | "node is trying to migrate it to %u?!\n", | 1576 | "node is trying to migrate it to %u?!\n", |
1483 | tmpmle->new_master); | 1577 | tmpmle->new_master); |
@@ -1494,6 +1588,10 @@ way_up_top: | |||
1494 | * go back and clean the mles on any | 1588 | * go back and clean the mles on any |
1495 | * other nodes */ | 1589 | * other nodes */ |
1496 | dispatch_assert = 1; | 1590 | dispatch_assert = 1; |
1591 | dlm_lockres_set_refmap_bit(request->node_idx, res); | ||
1592 | mlog(0, "%s:%.*s: setting bit %u in refmap\n", | ||
1593 | dlm->name, namelen, name, | ||
1594 | request->node_idx); | ||
1497 | } else | 1595 | } else |
1498 | response = DLM_MASTER_RESP_NO; | 1596 | response = DLM_MASTER_RESP_NO; |
1499 | } else { | 1597 | } else { |
@@ -1607,17 +1705,24 @@ send_response: | |||
1607 | * can periodically run all locks owned by this node | 1705 | * can periodically run all locks owned by this node |
1608 | * and re-assert across the cluster... | 1706 | * and re-assert across the cluster... |
1609 | */ | 1707 | */ |
1610 | static int dlm_do_assert_master(struct dlm_ctxt *dlm, const char *lockname, | 1708 | int dlm_do_assert_master(struct dlm_ctxt *dlm, |
1611 | unsigned int namelen, void *nodemap, | 1709 | struct dlm_lock_resource *res, |
1612 | u32 flags) | 1710 | void *nodemap, u32 flags) |
1613 | { | 1711 | { |
1614 | struct dlm_assert_master assert; | 1712 | struct dlm_assert_master assert; |
1615 | int to, tmpret; | 1713 | int to, tmpret; |
1616 | struct dlm_node_iter iter; | 1714 | struct dlm_node_iter iter; |
1617 | int ret = 0; | 1715 | int ret = 0; |
1618 | int reassert; | 1716 | int reassert; |
1717 | const char *lockname = res->lockname.name; | ||
1718 | unsigned int namelen = res->lockname.len; | ||
1619 | 1719 | ||
1620 | BUG_ON(namelen > O2NM_MAX_NAME_LEN); | 1720 | BUG_ON(namelen > O2NM_MAX_NAME_LEN); |
1721 | |||
1722 | spin_lock(&res->spinlock); | ||
1723 | res->state |= DLM_LOCK_RES_SETREF_INPROG; | ||
1724 | spin_unlock(&res->spinlock); | ||
1725 | |||
1621 | again: | 1726 | again: |
1622 | reassert = 0; | 1727 | reassert = 0; |
1623 | 1728 | ||
@@ -1647,6 +1752,7 @@ again: | |||
1647 | mlog(0, "link to %d went down!\n", to); | 1752 | mlog(0, "link to %d went down!\n", to); |
1648 | /* any nonzero status return will do */ | 1753 | /* any nonzero status return will do */ |
1649 | ret = tmpret; | 1754 | ret = tmpret; |
1755 | r = 0; | ||
1650 | } else if (r < 0) { | 1756 | } else if (r < 0) { |
1651 | /* ok, something horribly messed. kill thyself. */ | 1757 | /* ok, something horribly messed. kill thyself. */ |
1652 | mlog(ML_ERROR,"during assert master of %.*s to %u, " | 1758 | mlog(ML_ERROR,"during assert master of %.*s to %u, " |
@@ -1661,17 +1767,39 @@ again: | |||
1661 | spin_unlock(&dlm->master_lock); | 1767 | spin_unlock(&dlm->master_lock); |
1662 | spin_unlock(&dlm->spinlock); | 1768 | spin_unlock(&dlm->spinlock); |
1663 | BUG(); | 1769 | BUG(); |
1664 | } else if (r == EAGAIN) { | 1770 | } |
1771 | |||
1772 | if (r & DLM_ASSERT_RESPONSE_REASSERT && | ||
1773 | !(r & DLM_ASSERT_RESPONSE_MASTERY_REF)) { | ||
1774 | mlog(ML_ERROR, "%.*s: very strange, " | ||
1775 | "master MLE but no lockres on %u\n", | ||
1776 | namelen, lockname, to); | ||
1777 | } | ||
1778 | |||
1779 | if (r & DLM_ASSERT_RESPONSE_REASSERT) { | ||
1665 | mlog(0, "%.*s: node %u create mles on other " | 1780 | mlog(0, "%.*s: node %u create mles on other " |
1666 | "nodes and requests a re-assert\n", | 1781 | "nodes and requests a re-assert\n", |
1667 | namelen, lockname, to); | 1782 | namelen, lockname, to); |
1668 | reassert = 1; | 1783 | reassert = 1; |
1669 | } | 1784 | } |
1785 | if (r & DLM_ASSERT_RESPONSE_MASTERY_REF) { | ||
1786 | mlog(0, "%.*s: node %u has a reference to this " | ||
1787 | "lockres, set the bit in the refmap\n", | ||
1788 | namelen, lockname, to); | ||
1789 | spin_lock(&res->spinlock); | ||
1790 | dlm_lockres_set_refmap_bit(to, res); | ||
1791 | spin_unlock(&res->spinlock); | ||
1792 | } | ||
1670 | } | 1793 | } |
1671 | 1794 | ||
1672 | if (reassert) | 1795 | if (reassert) |
1673 | goto again; | 1796 | goto again; |
1674 | 1797 | ||
1798 | spin_lock(&res->spinlock); | ||
1799 | res->state &= ~DLM_LOCK_RES_SETREF_INPROG; | ||
1800 | spin_unlock(&res->spinlock); | ||
1801 | wake_up(&res->wq); | ||
1802 | |||
1675 | return ret; | 1803 | return ret; |
1676 | } | 1804 | } |
1677 | 1805 | ||
@@ -1684,7 +1812,8 @@ again: | |||
1684 | * | 1812 | * |
1685 | * if possible, TRIM THIS DOWN!!! | 1813 | * if possible, TRIM THIS DOWN!!! |
1686 | */ | 1814 | */ |
1687 | int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data) | 1815 | int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data, |
1816 | void **ret_data) | ||
1688 | { | 1817 | { |
1689 | struct dlm_ctxt *dlm = data; | 1818 | struct dlm_ctxt *dlm = data; |
1690 | struct dlm_master_list_entry *mle = NULL; | 1819 | struct dlm_master_list_entry *mle = NULL; |
@@ -1693,7 +1822,7 @@ int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data) | |||
1693 | char *name; | 1822 | char *name; |
1694 | unsigned int namelen, hash; | 1823 | unsigned int namelen, hash; |
1695 | u32 flags; | 1824 | u32 flags; |
1696 | int master_request = 0; | 1825 | int master_request = 0, have_lockres_ref = 0; |
1697 | int ret = 0; | 1826 | int ret = 0; |
1698 | 1827 | ||
1699 | if (!dlm_grab(dlm)) | 1828 | if (!dlm_grab(dlm)) |
@@ -1851,6 +1980,7 @@ ok: | |||
1851 | spin_unlock(&mle->spinlock); | 1980 | spin_unlock(&mle->spinlock); |
1852 | 1981 | ||
1853 | if (res) { | 1982 | if (res) { |
1983 | int wake = 0; | ||
1854 | spin_lock(&res->spinlock); | 1984 | spin_lock(&res->spinlock); |
1855 | if (mle->type == DLM_MLE_MIGRATION) { | 1985 | if (mle->type == DLM_MLE_MIGRATION) { |
1856 | mlog(0, "finishing off migration of lockres %.*s, " | 1986 | mlog(0, "finishing off migration of lockres %.*s, " |
@@ -1858,12 +1988,16 @@ ok: | |||
1858 | res->lockname.len, res->lockname.name, | 1988 | res->lockname.len, res->lockname.name, |
1859 | dlm->node_num, mle->new_master); | 1989 | dlm->node_num, mle->new_master); |
1860 | res->state &= ~DLM_LOCK_RES_MIGRATING; | 1990 | res->state &= ~DLM_LOCK_RES_MIGRATING; |
1991 | wake = 1; | ||
1861 | dlm_change_lockres_owner(dlm, res, mle->new_master); | 1992 | dlm_change_lockres_owner(dlm, res, mle->new_master); |
1862 | BUG_ON(res->state & DLM_LOCK_RES_DIRTY); | 1993 | BUG_ON(res->state & DLM_LOCK_RES_DIRTY); |
1863 | } else { | 1994 | } else { |
1864 | dlm_change_lockres_owner(dlm, res, mle->master); | 1995 | dlm_change_lockres_owner(dlm, res, mle->master); |
1865 | } | 1996 | } |
1866 | spin_unlock(&res->spinlock); | 1997 | spin_unlock(&res->spinlock); |
1998 | have_lockres_ref = 1; | ||
1999 | if (wake) | ||
2000 | wake_up(&res->wq); | ||
1867 | } | 2001 | } |
1868 | 2002 | ||
1869 | /* master is known, detach if not already detached. | 2003 | /* master is known, detach if not already detached. |
@@ -1913,12 +2047,28 @@ ok: | |||
1913 | 2047 | ||
1914 | done: | 2048 | done: |
1915 | ret = 0; | 2049 | ret = 0; |
1916 | if (res) | 2050 | if (res) { |
1917 | dlm_lockres_put(res); | 2051 | spin_lock(&res->spinlock); |
2052 | res->state |= DLM_LOCK_RES_SETREF_INPROG; | ||
2053 | spin_unlock(&res->spinlock); | ||
2054 | *ret_data = (void *)res; | ||
2055 | } | ||
1918 | dlm_put(dlm); | 2056 | dlm_put(dlm); |
1919 | if (master_request) { | 2057 | if (master_request) { |
1920 | mlog(0, "need to tell master to reassert\n"); | 2058 | mlog(0, "need to tell master to reassert\n"); |
1921 | ret = EAGAIN; // positive. negative would shoot down the node. | 2059 | /* positive. negative would shoot down the node. */ |
2060 | ret |= DLM_ASSERT_RESPONSE_REASSERT; | ||
2061 | if (!have_lockres_ref) { | ||
2062 | mlog(ML_ERROR, "strange, got assert from %u, MASTER " | ||
2063 | "mle present here for %s:%.*s, but no lockres!\n", | ||
2064 | assert->node_idx, dlm->name, namelen, name); | ||
2065 | } | ||
2066 | } | ||
2067 | if (have_lockres_ref) { | ||
2068 | /* let the master know we have a reference to the lockres */ | ||
2069 | ret |= DLM_ASSERT_RESPONSE_MASTERY_REF; | ||
2070 | mlog(0, "%s:%.*s: got assert from %u, need a ref\n", | ||
2071 | dlm->name, namelen, name, assert->node_idx); | ||
1922 | } | 2072 | } |
1923 | return ret; | 2073 | return ret; |
1924 | 2074 | ||
@@ -1929,11 +2079,25 @@ kill: | |||
1929 | __dlm_print_one_lock_resource(res); | 2079 | __dlm_print_one_lock_resource(res); |
1930 | spin_unlock(&res->spinlock); | 2080 | spin_unlock(&res->spinlock); |
1931 | spin_unlock(&dlm->spinlock); | 2081 | spin_unlock(&dlm->spinlock); |
1932 | dlm_lockres_put(res); | 2082 | *ret_data = (void *)res; |
1933 | dlm_put(dlm); | 2083 | dlm_put(dlm); |
1934 | return -EINVAL; | 2084 | return -EINVAL; |
1935 | } | 2085 | } |
1936 | 2086 | ||
2087 | void dlm_assert_master_post_handler(int status, void *data, void *ret_data) | ||
2088 | { | ||
2089 | struct dlm_lock_resource *res = (struct dlm_lock_resource *)ret_data; | ||
2090 | |||
2091 | if (ret_data) { | ||
2092 | spin_lock(&res->spinlock); | ||
2093 | res->state &= ~DLM_LOCK_RES_SETREF_INPROG; | ||
2094 | spin_unlock(&res->spinlock); | ||
2095 | wake_up(&res->wq); | ||
2096 | dlm_lockres_put(res); | ||
2097 | } | ||
2098 | return; | ||
2099 | } | ||
2100 | |||
1937 | int dlm_dispatch_assert_master(struct dlm_ctxt *dlm, | 2101 | int dlm_dispatch_assert_master(struct dlm_ctxt *dlm, |
1938 | struct dlm_lock_resource *res, | 2102 | struct dlm_lock_resource *res, |
1939 | int ignore_higher, u8 request_from, u32 flags) | 2103 | int ignore_higher, u8 request_from, u32 flags) |
@@ -2023,9 +2187,7 @@ static void dlm_assert_master_worker(struct dlm_work_item *item, void *data) | |||
2023 | * even if one or more nodes die */ | 2187 | * even if one or more nodes die */ |
2024 | mlog(0, "worker about to master %.*s here, this=%u\n", | 2188 | mlog(0, "worker about to master %.*s here, this=%u\n", |
2025 | res->lockname.len, res->lockname.name, dlm->node_num); | 2189 | res->lockname.len, res->lockname.name, dlm->node_num); |
2026 | ret = dlm_do_assert_master(dlm, res->lockname.name, | 2190 | ret = dlm_do_assert_master(dlm, res, nodemap, flags); |
2027 | res->lockname.len, | ||
2028 | nodemap, flags); | ||
2029 | if (ret < 0) { | 2191 | if (ret < 0) { |
2030 | /* no need to restart, we are done */ | 2192 | /* no need to restart, we are done */ |
2031 | if (!dlm_is_host_down(ret)) | 2193 | if (!dlm_is_host_down(ret)) |
@@ -2097,14 +2259,180 @@ static int dlm_pre_master_reco_lockres(struct dlm_ctxt *dlm, | |||
2097 | return ret; | 2259 | return ret; |
2098 | } | 2260 | } |
2099 | 2261 | ||
2262 | /* | ||
2263 | * DLM_DEREF_LOCKRES_MSG | ||
2264 | */ | ||
2265 | |||
2266 | int dlm_drop_lockres_ref(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) | ||
2267 | { | ||
2268 | struct dlm_deref_lockres deref; | ||
2269 | int ret = 0, r; | ||
2270 | const char *lockname; | ||
2271 | unsigned int namelen; | ||
2272 | |||
2273 | lockname = res->lockname.name; | ||
2274 | namelen = res->lockname.len; | ||
2275 | BUG_ON(namelen > O2NM_MAX_NAME_LEN); | ||
2276 | |||
2277 | mlog(0, "%s:%.*s: sending deref to %d\n", | ||
2278 | dlm->name, namelen, lockname, res->owner); | ||
2279 | memset(&deref, 0, sizeof(deref)); | ||
2280 | deref.node_idx = dlm->node_num; | ||
2281 | deref.namelen = namelen; | ||
2282 | memcpy(deref.name, lockname, namelen); | ||
2283 | |||
2284 | ret = o2net_send_message(DLM_DEREF_LOCKRES_MSG, dlm->key, | ||
2285 | &deref, sizeof(deref), res->owner, &r); | ||
2286 | if (ret < 0) | ||
2287 | mlog_errno(ret); | ||
2288 | else if (r < 0) { | ||
2289 | /* BAD. other node says I did not have a ref. */ | ||
2290 | mlog(ML_ERROR,"while dropping ref on %s:%.*s " | ||
2291 | "(master=%u) got %d.\n", dlm->name, namelen, | ||
2292 | lockname, res->owner, r); | ||
2293 | dlm_print_one_lock_resource(res); | ||
2294 | BUG(); | ||
2295 | } | ||
2296 | return ret; | ||
2297 | } | ||
2298 | |||
2299 | int dlm_deref_lockres_handler(struct o2net_msg *msg, u32 len, void *data, | ||
2300 | void **ret_data) | ||
2301 | { | ||
2302 | struct dlm_ctxt *dlm = data; | ||
2303 | struct dlm_deref_lockres *deref = (struct dlm_deref_lockres *)msg->buf; | ||
2304 | struct dlm_lock_resource *res = NULL; | ||
2305 | char *name; | ||
2306 | unsigned int namelen; | ||
2307 | int ret = -EINVAL; | ||
2308 | u8 node; | ||
2309 | unsigned int hash; | ||
2310 | struct dlm_work_item *item; | ||
2311 | int cleared = 0; | ||
2312 | int dispatch = 0; | ||
2313 | |||
2314 | if (!dlm_grab(dlm)) | ||
2315 | return 0; | ||
2316 | |||
2317 | name = deref->name; | ||
2318 | namelen = deref->namelen; | ||
2319 | node = deref->node_idx; | ||
2320 | |||
2321 | if (namelen > DLM_LOCKID_NAME_MAX) { | ||
2322 | mlog(ML_ERROR, "Invalid name length!"); | ||
2323 | goto done; | ||
2324 | } | ||
2325 | if (deref->node_idx >= O2NM_MAX_NODES) { | ||
2326 | mlog(ML_ERROR, "Invalid node number: %u\n", node); | ||
2327 | goto done; | ||
2328 | } | ||
2329 | |||
2330 | hash = dlm_lockid_hash(name, namelen); | ||
2331 | |||
2332 | spin_lock(&dlm->spinlock); | ||
2333 | res = __dlm_lookup_lockres_full(dlm, name, namelen, hash); | ||
2334 | if (!res) { | ||
2335 | spin_unlock(&dlm->spinlock); | ||
2336 | mlog(ML_ERROR, "%s:%.*s: bad lockres name\n", | ||
2337 | dlm->name, namelen, name); | ||
2338 | goto done; | ||
2339 | } | ||
2340 | spin_unlock(&dlm->spinlock); | ||
2341 | |||
2342 | spin_lock(&res->spinlock); | ||
2343 | if (res->state & DLM_LOCK_RES_SETREF_INPROG) | ||
2344 | dispatch = 1; | ||
2345 | else { | ||
2346 | BUG_ON(res->state & DLM_LOCK_RES_DROPPING_REF); | ||
2347 | if (test_bit(node, res->refmap)) { | ||
2348 | dlm_lockres_clear_refmap_bit(node, res); | ||
2349 | cleared = 1; | ||
2350 | } | ||
2351 | } | ||
2352 | spin_unlock(&res->spinlock); | ||
2353 | |||
2354 | if (!dispatch) { | ||
2355 | if (cleared) | ||
2356 | dlm_lockres_calc_usage(dlm, res); | ||
2357 | else { | ||
2358 | mlog(ML_ERROR, "%s:%.*s: node %u trying to drop ref " | ||
2359 | "but it is already dropped!\n", dlm->name, | ||
2360 | res->lockname.len, res->lockname.name, node); | ||
2361 | __dlm_print_one_lock_resource(res); | ||
2362 | } | ||
2363 | ret = 0; | ||
2364 | goto done; | ||
2365 | } | ||
2366 | |||
2367 | item = kzalloc(sizeof(*item), GFP_NOFS); | ||
2368 | if (!item) { | ||
2369 | ret = -ENOMEM; | ||
2370 | mlog_errno(ret); | ||
2371 | goto done; | ||
2372 | } | ||
2373 | |||
2374 | dlm_init_work_item(dlm, item, dlm_deref_lockres_worker, NULL); | ||
2375 | item->u.dl.deref_res = res; | ||
2376 | item->u.dl.deref_node = node; | ||
2377 | |||
2378 | spin_lock(&dlm->work_lock); | ||
2379 | list_add_tail(&item->list, &dlm->work_list); | ||
2380 | spin_unlock(&dlm->work_lock); | ||
2381 | |||
2382 | queue_work(dlm->dlm_worker, &dlm->dispatched_work); | ||
2383 | return 0; | ||
2384 | |||
2385 | done: | ||
2386 | if (res) | ||
2387 | dlm_lockres_put(res); | ||
2388 | dlm_put(dlm); | ||
2389 | |||
2390 | return ret; | ||
2391 | } | ||
2392 | |||
2393 | static void dlm_deref_lockres_worker(struct dlm_work_item *item, void *data) | ||
2394 | { | ||
2395 | struct dlm_ctxt *dlm; | ||
2396 | struct dlm_lock_resource *res; | ||
2397 | u8 node; | ||
2398 | u8 cleared = 0; | ||
2399 | |||
2400 | dlm = item->dlm; | ||
2401 | res = item->u.dl.deref_res; | ||
2402 | node = item->u.dl.deref_node; | ||
2403 | |||
2404 | spin_lock(&res->spinlock); | ||
2405 | BUG_ON(res->state & DLM_LOCK_RES_DROPPING_REF); | ||
2406 | if (test_bit(node, res->refmap)) { | ||
2407 | __dlm_wait_on_lockres_flags(res, DLM_LOCK_RES_SETREF_INPROG); | ||
2408 | dlm_lockres_clear_refmap_bit(node, res); | ||
2409 | cleared = 1; | ||
2410 | } | ||
2411 | spin_unlock(&res->spinlock); | ||
2412 | |||
2413 | if (cleared) { | ||
2414 | mlog(0, "%s:%.*s node %u ref dropped in dispatch\n", | ||
2415 | dlm->name, res->lockname.len, res->lockname.name, node); | ||
2416 | dlm_lockres_calc_usage(dlm, res); | ||
2417 | } else { | ||
2418 | mlog(ML_ERROR, "%s:%.*s: node %u trying to drop ref " | ||
2419 | "but it is already dropped!\n", dlm->name, | ||
2420 | res->lockname.len, res->lockname.name, node); | ||
2421 | __dlm_print_one_lock_resource(res); | ||
2422 | } | ||
2423 | |||
2424 | dlm_lockres_put(res); | ||
2425 | } | ||
2426 | |||
2100 | 2427 | ||
2101 | /* | 2428 | /* |
2102 | * DLM_MIGRATE_LOCKRES | 2429 | * DLM_MIGRATE_LOCKRES |
2103 | */ | 2430 | */ |
2104 | 2431 | ||
2105 | 2432 | ||
2106 | int dlm_migrate_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, | 2433 | static int dlm_migrate_lockres(struct dlm_ctxt *dlm, |
2107 | u8 target) | 2434 | struct dlm_lock_resource *res, |
2435 | u8 target) | ||
2108 | { | 2436 | { |
2109 | struct dlm_master_list_entry *mle = NULL; | 2437 | struct dlm_master_list_entry *mle = NULL; |
2110 | struct dlm_master_list_entry *oldmle = NULL; | 2438 | struct dlm_master_list_entry *oldmle = NULL; |
@@ -2116,7 +2444,7 @@ int dlm_migrate_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, | |||
2116 | struct list_head *queue, *iter; | 2444 | struct list_head *queue, *iter; |
2117 | int i; | 2445 | int i; |
2118 | struct dlm_lock *lock; | 2446 | struct dlm_lock *lock; |
2119 | int empty = 1; | 2447 | int empty = 1, wake = 0; |
2120 | 2448 | ||
2121 | if (!dlm_grab(dlm)) | 2449 | if (!dlm_grab(dlm)) |
2122 | return -EINVAL; | 2450 | return -EINVAL; |
@@ -2241,6 +2569,7 @@ int dlm_migrate_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, | |||
2241 | res->lockname.name, target); | 2569 | res->lockname.name, target); |
2242 | spin_lock(&res->spinlock); | 2570 | spin_lock(&res->spinlock); |
2243 | res->state &= ~DLM_LOCK_RES_MIGRATING; | 2571 | res->state &= ~DLM_LOCK_RES_MIGRATING; |
2572 | wake = 1; | ||
2244 | spin_unlock(&res->spinlock); | 2573 | spin_unlock(&res->spinlock); |
2245 | ret = -EINVAL; | 2574 | ret = -EINVAL; |
2246 | } | 2575 | } |
@@ -2268,6 +2597,9 @@ fail: | |||
2268 | * the lockres | 2597 | * the lockres |
2269 | */ | 2598 | */ |
2270 | 2599 | ||
2600 | /* now that remote nodes are spinning on the MIGRATING flag, | ||
2601 | * ensure that all assert_master work is flushed. */ | ||
2602 | flush_workqueue(dlm->dlm_worker); | ||
2271 | 2603 | ||
2272 | /* get an extra reference on the mle. | 2604 | /* get an extra reference on the mle. |
2273 | * otherwise the assert_master from the new | 2605 | * otherwise the assert_master from the new |
@@ -2296,6 +2628,7 @@ fail: | |||
2296 | dlm_put_mle_inuse(mle); | 2628 | dlm_put_mle_inuse(mle); |
2297 | spin_lock(&res->spinlock); | 2629 | spin_lock(&res->spinlock); |
2298 | res->state &= ~DLM_LOCK_RES_MIGRATING; | 2630 | res->state &= ~DLM_LOCK_RES_MIGRATING; |
2631 | wake = 1; | ||
2299 | spin_unlock(&res->spinlock); | 2632 | spin_unlock(&res->spinlock); |
2300 | goto leave; | 2633 | goto leave; |
2301 | } | 2634 | } |
@@ -2322,7 +2655,8 @@ fail: | |||
2322 | res->owner == target) | 2655 | res->owner == target) |
2323 | break; | 2656 | break; |
2324 | 2657 | ||
2325 | mlog(0, "timed out during migration\n"); | 2658 | mlog(0, "%s:%.*s: timed out during migration\n", |
2659 | dlm->name, res->lockname.len, res->lockname.name); | ||
2326 | /* avoid hang during shutdown when migrating lockres | 2660 | /* avoid hang during shutdown when migrating lockres |
2327 | * to a node which also goes down */ | 2661 | * to a node which also goes down */ |
2328 | if (dlm_is_node_dead(dlm, target)) { | 2662 | if (dlm_is_node_dead(dlm, target)) { |
@@ -2330,20 +2664,20 @@ fail: | |||
2330 | "target %u is no longer up, restarting\n", | 2664 | "target %u is no longer up, restarting\n", |
2331 | dlm->name, res->lockname.len, | 2665 | dlm->name, res->lockname.len, |
2332 | res->lockname.name, target); | 2666 | res->lockname.name, target); |
2333 | ret = -ERESTARTSYS; | 2667 | ret = -EINVAL; |
2668 | /* migration failed, detach and clean up mle */ | ||
2669 | dlm_mle_detach_hb_events(dlm, mle); | ||
2670 | dlm_put_mle(mle); | ||
2671 | dlm_put_mle_inuse(mle); | ||
2672 | spin_lock(&res->spinlock); | ||
2673 | res->state &= ~DLM_LOCK_RES_MIGRATING; | ||
2674 | wake = 1; | ||
2675 | spin_unlock(&res->spinlock); | ||
2676 | goto leave; | ||
2334 | } | 2677 | } |
2335 | } | 2678 | } else |
2336 | if (ret == -ERESTARTSYS) { | 2679 | mlog(0, "%s:%.*s: caught signal during migration\n", |
2337 | /* migration failed, detach and clean up mle */ | 2680 | dlm->name, res->lockname.len, res->lockname.name); |
2338 | dlm_mle_detach_hb_events(dlm, mle); | ||
2339 | dlm_put_mle(mle); | ||
2340 | dlm_put_mle_inuse(mle); | ||
2341 | spin_lock(&res->spinlock); | ||
2342 | res->state &= ~DLM_LOCK_RES_MIGRATING; | ||
2343 | spin_unlock(&res->spinlock); | ||
2344 | goto leave; | ||
2345 | } | ||
2346 | /* TODO: if node died: stop, clean up, return error */ | ||
2347 | } | 2681 | } |
2348 | 2682 | ||
2349 | /* all done, set the owner, clear the flag */ | 2683 | /* all done, set the owner, clear the flag */ |
@@ -2366,6 +2700,11 @@ leave: | |||
2366 | if (ret < 0) | 2700 | if (ret < 0) |
2367 | dlm_kick_thread(dlm, res); | 2701 | dlm_kick_thread(dlm, res); |
2368 | 2702 | ||
2703 | /* wake up waiters if the MIGRATING flag got set | ||
2704 | * but migration failed */ | ||
2705 | if (wake) | ||
2706 | wake_up(&res->wq); | ||
2707 | |||
2369 | /* TODO: cleanup */ | 2708 | /* TODO: cleanup */ |
2370 | if (mres) | 2709 | if (mres) |
2371 | free_page((unsigned long)mres); | 2710 | free_page((unsigned long)mres); |
@@ -2376,6 +2715,53 @@ leave: | |||
2376 | return ret; | 2715 | return ret; |
2377 | } | 2716 | } |
2378 | 2717 | ||
2718 | #define DLM_MIGRATION_RETRY_MS 100 | ||
2719 | |||
2720 | /* Should be called only after beginning the domain leave process. | ||
2721 | * There should not be any remaining locks on nonlocal lock resources, | ||
2722 | * and there should be no local locks left on locally mastered resources. | ||
2723 | * | ||
2724 | * Called with the dlm spinlock held, may drop it to do migration, but | ||
2725 | * will re-acquire before exit. | ||
2726 | * | ||
2727 | * Returns: 1 if dlm->spinlock was dropped/retaken, 0 if never dropped */ | ||
2728 | int dlm_empty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) | ||
2729 | { | ||
2730 | int ret; | ||
2731 | int lock_dropped = 0; | ||
2732 | |||
2733 | if (res->owner != dlm->node_num) { | ||
2734 | if (!__dlm_lockres_unused(res)) { | ||
2735 | mlog(ML_ERROR, "%s:%.*s: this node is not master, " | ||
2736 | "trying to free this but locks remain\n", | ||
2737 | dlm->name, res->lockname.len, res->lockname.name); | ||
2738 | } | ||
2739 | goto leave; | ||
2740 | } | ||
2741 | |||
2742 | /* Wheee! Migrate lockres here! Will sleep so drop spinlock. */ | ||
2743 | spin_unlock(&dlm->spinlock); | ||
2744 | lock_dropped = 1; | ||
2745 | while (1) { | ||
2746 | ret = dlm_migrate_lockres(dlm, res, O2NM_MAX_NODES); | ||
2747 | if (ret >= 0) | ||
2748 | break; | ||
2749 | if (ret == -ENOTEMPTY) { | ||
2750 | mlog(ML_ERROR, "lockres %.*s still has local locks!\n", | ||
2751 | res->lockname.len, res->lockname.name); | ||
2752 | BUG(); | ||
2753 | } | ||
2754 | |||
2755 | mlog(0, "lockres %.*s: migrate failed, " | ||
2756 | "retrying\n", res->lockname.len, | ||
2757 | res->lockname.name); | ||
2758 | msleep(DLM_MIGRATION_RETRY_MS); | ||
2759 | } | ||
2760 | spin_lock(&dlm->spinlock); | ||
2761 | leave: | ||
2762 | return lock_dropped; | ||
2763 | } | ||
2764 | |||
2379 | int dlm_lock_basts_flushed(struct dlm_ctxt *dlm, struct dlm_lock *lock) | 2765 | int dlm_lock_basts_flushed(struct dlm_ctxt *dlm, struct dlm_lock *lock) |
2380 | { | 2766 | { |
2381 | int ret; | 2767 | int ret; |
@@ -2405,7 +2791,8 @@ static int dlm_migration_can_proceed(struct dlm_ctxt *dlm, | |||
2405 | return can_proceed; | 2791 | return can_proceed; |
2406 | } | 2792 | } |
2407 | 2793 | ||
2408 | int dlm_lockres_is_dirty(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) | 2794 | static int dlm_lockres_is_dirty(struct dlm_ctxt *dlm, |
2795 | struct dlm_lock_resource *res) | ||
2409 | { | 2796 | { |
2410 | int ret; | 2797 | int ret; |
2411 | spin_lock(&res->spinlock); | 2798 | spin_lock(&res->spinlock); |
@@ -2434,8 +2821,15 @@ static int dlm_mark_lockres_migrating(struct dlm_ctxt *dlm, | |||
2434 | __dlm_lockres_reserve_ast(res); | 2821 | __dlm_lockres_reserve_ast(res); |
2435 | spin_unlock(&res->spinlock); | 2822 | spin_unlock(&res->spinlock); |
2436 | 2823 | ||
2437 | /* now flush all the pending asts.. hang out for a bit */ | 2824 | /* now flush all the pending asts */ |
2438 | dlm_kick_thread(dlm, res); | 2825 | dlm_kick_thread(dlm, res); |
2826 | /* before waiting on DIRTY, block processes which may | ||
2827 | * try to dirty the lockres before MIGRATING is set */ | ||
2828 | spin_lock(&res->spinlock); | ||
2829 | BUG_ON(res->state & DLM_LOCK_RES_BLOCK_DIRTY); | ||
2830 | res->state |= DLM_LOCK_RES_BLOCK_DIRTY; | ||
2831 | spin_unlock(&res->spinlock); | ||
2832 | /* now wait on any pending asts and the DIRTY state */ | ||
2439 | wait_event(dlm->ast_wq, !dlm_lockres_is_dirty(dlm, res)); | 2833 | wait_event(dlm->ast_wq, !dlm_lockres_is_dirty(dlm, res)); |
2440 | dlm_lockres_release_ast(dlm, res); | 2834 | dlm_lockres_release_ast(dlm, res); |
2441 | 2835 | ||
@@ -2461,6 +2855,13 @@ again: | |||
2461 | mlog(0, "trying again...\n"); | 2855 | mlog(0, "trying again...\n"); |
2462 | goto again; | 2856 | goto again; |
2463 | } | 2857 | } |
2858 | /* now that we are sure the MIGRATING state is there, drop | ||
2859 | * the unneded state which blocked threads trying to DIRTY */ | ||
2860 | spin_lock(&res->spinlock); | ||
2861 | BUG_ON(!(res->state & DLM_LOCK_RES_BLOCK_DIRTY)); | ||
2862 | BUG_ON(!(res->state & DLM_LOCK_RES_MIGRATING)); | ||
2863 | res->state &= ~DLM_LOCK_RES_BLOCK_DIRTY; | ||
2864 | spin_unlock(&res->spinlock); | ||
2464 | 2865 | ||
2465 | /* did the target go down or die? */ | 2866 | /* did the target go down or die? */ |
2466 | spin_lock(&dlm->spinlock); | 2867 | spin_lock(&dlm->spinlock); |
@@ -2490,7 +2891,7 @@ static void dlm_remove_nonlocal_locks(struct dlm_ctxt *dlm, | |||
2490 | { | 2891 | { |
2491 | struct list_head *iter, *iter2; | 2892 | struct list_head *iter, *iter2; |
2492 | struct list_head *queue = &res->granted; | 2893 | struct list_head *queue = &res->granted; |
2493 | int i; | 2894 | int i, bit; |
2494 | struct dlm_lock *lock; | 2895 | struct dlm_lock *lock; |
2495 | 2896 | ||
2496 | assert_spin_locked(&res->spinlock); | 2897 | assert_spin_locked(&res->spinlock); |
@@ -2508,12 +2909,28 @@ static void dlm_remove_nonlocal_locks(struct dlm_ctxt *dlm, | |||
2508 | BUG_ON(!list_empty(&lock->bast_list)); | 2909 | BUG_ON(!list_empty(&lock->bast_list)); |
2509 | BUG_ON(lock->ast_pending); | 2910 | BUG_ON(lock->ast_pending); |
2510 | BUG_ON(lock->bast_pending); | 2911 | BUG_ON(lock->bast_pending); |
2912 | dlm_lockres_clear_refmap_bit(lock->ml.node, res); | ||
2511 | list_del_init(&lock->list); | 2913 | list_del_init(&lock->list); |
2512 | dlm_lock_put(lock); | 2914 | dlm_lock_put(lock); |
2513 | } | 2915 | } |
2514 | } | 2916 | } |
2515 | queue++; | 2917 | queue++; |
2516 | } | 2918 | } |
2919 | bit = 0; | ||
2920 | while (1) { | ||
2921 | bit = find_next_bit(res->refmap, O2NM_MAX_NODES, bit); | ||
2922 | if (bit >= O2NM_MAX_NODES) | ||
2923 | break; | ||
2924 | /* do not clear the local node reference, if there is a | ||
2925 | * process holding this, let it drop the ref itself */ | ||
2926 | if (bit != dlm->node_num) { | ||
2927 | mlog(0, "%s:%.*s: node %u had a ref to this " | ||
2928 | "migrating lockres, clearing\n", dlm->name, | ||
2929 | res->lockname.len, res->lockname.name, bit); | ||
2930 | dlm_lockres_clear_refmap_bit(bit, res); | ||
2931 | } | ||
2932 | bit++; | ||
2933 | } | ||
2517 | } | 2934 | } |
2518 | 2935 | ||
2519 | /* for now this is not too intelligent. we will | 2936 | /* for now this is not too intelligent. we will |
@@ -2601,6 +3018,16 @@ static int dlm_do_migrate_request(struct dlm_ctxt *dlm, | |||
2601 | mlog(0, "migrate request (node %u) returned %d!\n", | 3018 | mlog(0, "migrate request (node %u) returned %d!\n", |
2602 | nodenum, status); | 3019 | nodenum, status); |
2603 | ret = status; | 3020 | ret = status; |
3021 | } else if (status == DLM_MIGRATE_RESPONSE_MASTERY_REF) { | ||
3022 | /* during the migration request we short-circuited | ||
3023 | * the mastery of the lockres. make sure we have | ||
3024 | * a mastery ref for nodenum */ | ||
3025 | mlog(0, "%s:%.*s: need ref for node %u\n", | ||
3026 | dlm->name, res->lockname.len, res->lockname.name, | ||
3027 | nodenum); | ||
3028 | spin_lock(&res->spinlock); | ||
3029 | dlm_lockres_set_refmap_bit(nodenum, res); | ||
3030 | spin_unlock(&res->spinlock); | ||
2604 | } | 3031 | } |
2605 | } | 3032 | } |
2606 | 3033 | ||
@@ -2619,7 +3046,8 @@ static int dlm_do_migrate_request(struct dlm_ctxt *dlm, | |||
2619 | * we will have no mle in the list to start with. now we can add an mle for | 3046 | * we will have no mle in the list to start with. now we can add an mle for |
2620 | * the migration and this should be the only one found for those scanning the | 3047 | * the migration and this should be the only one found for those scanning the |
2621 | * list. */ | 3048 | * list. */ |
2622 | int dlm_migrate_request_handler(struct o2net_msg *msg, u32 len, void *data) | 3049 | int dlm_migrate_request_handler(struct o2net_msg *msg, u32 len, void *data, |
3050 | void **ret_data) | ||
2623 | { | 3051 | { |
2624 | struct dlm_ctxt *dlm = data; | 3052 | struct dlm_ctxt *dlm = data; |
2625 | struct dlm_lock_resource *res = NULL; | 3053 | struct dlm_lock_resource *res = NULL; |
@@ -2745,7 +3173,13 @@ static int dlm_add_migration_mle(struct dlm_ctxt *dlm, | |||
2745 | /* remove it from the list so that only one | 3173 | /* remove it from the list so that only one |
2746 | * mle will be found */ | 3174 | * mle will be found */ |
2747 | list_del_init(&tmp->list); | 3175 | list_del_init(&tmp->list); |
2748 | __dlm_mle_detach_hb_events(dlm, mle); | 3176 | /* this was obviously WRONG. mle is uninited here. should be tmp. */ |
3177 | __dlm_mle_detach_hb_events(dlm, tmp); | ||
3178 | ret = DLM_MIGRATE_RESPONSE_MASTERY_REF; | ||
3179 | mlog(0, "%s:%.*s: master=%u, newmaster=%u, " | ||
3180 | "telling master to get ref for cleared out mle " | ||
3181 | "during migration\n", dlm->name, namelen, name, | ||
3182 | master, new_master); | ||
2749 | } | 3183 | } |
2750 | spin_unlock(&tmp->spinlock); | 3184 | spin_unlock(&tmp->spinlock); |
2751 | } | 3185 | } |
@@ -2753,6 +3187,8 @@ static int dlm_add_migration_mle(struct dlm_ctxt *dlm, | |||
2753 | /* now add a migration mle to the tail of the list */ | 3187 | /* now add a migration mle to the tail of the list */ |
2754 | dlm_init_mle(mle, DLM_MLE_MIGRATION, dlm, res, name, namelen); | 3188 | dlm_init_mle(mle, DLM_MLE_MIGRATION, dlm, res, name, namelen); |
2755 | mle->new_master = new_master; | 3189 | mle->new_master = new_master; |
3190 | /* the new master will be sending an assert master for this. | ||
3191 | * at that point we will get the refmap reference */ | ||
2756 | mle->master = master; | 3192 | mle->master = master; |
2757 | /* do this for consistency with other mle types */ | 3193 | /* do this for consistency with other mle types */ |
2758 | set_bit(new_master, mle->maybe_map); | 3194 | set_bit(new_master, mle->maybe_map); |
@@ -2902,6 +3338,13 @@ int dlm_finish_migration(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, | |||
2902 | clear_bit(dlm->node_num, iter.node_map); | 3338 | clear_bit(dlm->node_num, iter.node_map); |
2903 | spin_unlock(&dlm->spinlock); | 3339 | spin_unlock(&dlm->spinlock); |
2904 | 3340 | ||
3341 | /* ownership of the lockres is changing. account for the | ||
3342 | * mastery reference here since old_master will briefly have | ||
3343 | * a reference after the migration completes */ | ||
3344 | spin_lock(&res->spinlock); | ||
3345 | dlm_lockres_set_refmap_bit(old_master, res); | ||
3346 | spin_unlock(&res->spinlock); | ||
3347 | |||
2905 | mlog(0, "now time to do a migrate request to other nodes\n"); | 3348 | mlog(0, "now time to do a migrate request to other nodes\n"); |
2906 | ret = dlm_do_migrate_request(dlm, res, old_master, | 3349 | ret = dlm_do_migrate_request(dlm, res, old_master, |
2907 | dlm->node_num, &iter); | 3350 | dlm->node_num, &iter); |
@@ -2914,8 +3357,7 @@ int dlm_finish_migration(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, | |||
2914 | res->lockname.len, res->lockname.name); | 3357 | res->lockname.len, res->lockname.name); |
2915 | /* this call now finishes out the nodemap | 3358 | /* this call now finishes out the nodemap |
2916 | * even if one or more nodes die */ | 3359 | * even if one or more nodes die */ |
2917 | ret = dlm_do_assert_master(dlm, res->lockname.name, | 3360 | ret = dlm_do_assert_master(dlm, res, iter.node_map, |
2918 | res->lockname.len, iter.node_map, | ||
2919 | DLM_ASSERT_MASTER_FINISH_MIGRATION); | 3361 | DLM_ASSERT_MASTER_FINISH_MIGRATION); |
2920 | if (ret < 0) { | 3362 | if (ret < 0) { |
2921 | /* no longer need to retry. all living nodes contacted. */ | 3363 | /* no longer need to retry. all living nodes contacted. */ |
@@ -2927,8 +3369,7 @@ int dlm_finish_migration(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, | |||
2927 | set_bit(old_master, iter.node_map); | 3369 | set_bit(old_master, iter.node_map); |
2928 | mlog(0, "doing assert master of %.*s back to %u\n", | 3370 | mlog(0, "doing assert master of %.*s back to %u\n", |
2929 | res->lockname.len, res->lockname.name, old_master); | 3371 | res->lockname.len, res->lockname.name, old_master); |
2930 | ret = dlm_do_assert_master(dlm, res->lockname.name, | 3372 | ret = dlm_do_assert_master(dlm, res, iter.node_map, |
2931 | res->lockname.len, iter.node_map, | ||
2932 | DLM_ASSERT_MASTER_FINISH_MIGRATION); | 3373 | DLM_ASSERT_MASTER_FINISH_MIGRATION); |
2933 | if (ret < 0) { | 3374 | if (ret < 0) { |
2934 | mlog(0, "assert master to original master failed " | 3375 | mlog(0, "assert master to original master failed " |
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 367a11e9e2ed..6d4a83d50152 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c | |||
@@ -163,9 +163,6 @@ void dlm_dispatch_work(struct work_struct *work) | |||
163 | dlm_workfunc_t *workfunc; | 163 | dlm_workfunc_t *workfunc; |
164 | int tot=0; | 164 | int tot=0; |
165 | 165 | ||
166 | if (!dlm_joined(dlm)) | ||
167 | return; | ||
168 | |||
169 | spin_lock(&dlm->work_lock); | 166 | spin_lock(&dlm->work_lock); |
170 | list_splice_init(&dlm->work_list, &tmp_list); | 167 | list_splice_init(&dlm->work_list, &tmp_list); |
171 | spin_unlock(&dlm->work_lock); | 168 | spin_unlock(&dlm->work_lock); |
@@ -821,7 +818,8 @@ static int dlm_request_all_locks(struct dlm_ctxt *dlm, u8 request_from, | |||
821 | 818 | ||
822 | } | 819 | } |
823 | 820 | ||
824 | int dlm_request_all_locks_handler(struct o2net_msg *msg, u32 len, void *data) | 821 | int dlm_request_all_locks_handler(struct o2net_msg *msg, u32 len, void *data, |
822 | void **ret_data) | ||
825 | { | 823 | { |
826 | struct dlm_ctxt *dlm = data; | 824 | struct dlm_ctxt *dlm = data; |
827 | struct dlm_lock_request *lr = (struct dlm_lock_request *)msg->buf; | 825 | struct dlm_lock_request *lr = (struct dlm_lock_request *)msg->buf; |
@@ -978,7 +976,8 @@ static int dlm_send_all_done_msg(struct dlm_ctxt *dlm, u8 dead_node, u8 send_to) | |||
978 | } | 976 | } |
979 | 977 | ||
980 | 978 | ||
981 | int dlm_reco_data_done_handler(struct o2net_msg *msg, u32 len, void *data) | 979 | int dlm_reco_data_done_handler(struct o2net_msg *msg, u32 len, void *data, |
980 | void **ret_data) | ||
982 | { | 981 | { |
983 | struct dlm_ctxt *dlm = data; | 982 | struct dlm_ctxt *dlm = data; |
984 | struct dlm_reco_data_done *done = (struct dlm_reco_data_done *)msg->buf; | 983 | struct dlm_reco_data_done *done = (struct dlm_reco_data_done *)msg->buf; |
@@ -1129,6 +1128,11 @@ static int dlm_send_mig_lockres_msg(struct dlm_ctxt *dlm, | |||
1129 | if (total_locks == mres_total_locks) | 1128 | if (total_locks == mres_total_locks) |
1130 | mres->flags |= DLM_MRES_ALL_DONE; | 1129 | mres->flags |= DLM_MRES_ALL_DONE; |
1131 | 1130 | ||
1131 | mlog(0, "%s:%.*s: sending mig lockres (%s) to %u\n", | ||
1132 | dlm->name, res->lockname.len, res->lockname.name, | ||
1133 | orig_flags & DLM_MRES_MIGRATION ? "migrate" : "recovery", | ||
1134 | send_to); | ||
1135 | |||
1132 | /* send it */ | 1136 | /* send it */ |
1133 | ret = o2net_send_message(DLM_MIG_LOCKRES_MSG, dlm->key, mres, | 1137 | ret = o2net_send_message(DLM_MIG_LOCKRES_MSG, dlm->key, mres, |
1134 | sz, send_to, &status); | 1138 | sz, send_to, &status); |
@@ -1213,6 +1217,34 @@ static int dlm_add_lock_to_array(struct dlm_lock *lock, | |||
1213 | return 0; | 1217 | return 0; |
1214 | } | 1218 | } |
1215 | 1219 | ||
1220 | static void dlm_add_dummy_lock(struct dlm_ctxt *dlm, | ||
1221 | struct dlm_migratable_lockres *mres) | ||
1222 | { | ||
1223 | struct dlm_lock dummy; | ||
1224 | memset(&dummy, 0, sizeof(dummy)); | ||
1225 | dummy.ml.cookie = 0; | ||
1226 | dummy.ml.type = LKM_IVMODE; | ||
1227 | dummy.ml.convert_type = LKM_IVMODE; | ||
1228 | dummy.ml.highest_blocked = LKM_IVMODE; | ||
1229 | dummy.lksb = NULL; | ||
1230 | dummy.ml.node = dlm->node_num; | ||
1231 | dlm_add_lock_to_array(&dummy, mres, DLM_BLOCKED_LIST); | ||
1232 | } | ||
1233 | |||
1234 | static inline int dlm_is_dummy_lock(struct dlm_ctxt *dlm, | ||
1235 | struct dlm_migratable_lock *ml, | ||
1236 | u8 *nodenum) | ||
1237 | { | ||
1238 | if (unlikely(ml->cookie == 0 && | ||
1239 | ml->type == LKM_IVMODE && | ||
1240 | ml->convert_type == LKM_IVMODE && | ||
1241 | ml->highest_blocked == LKM_IVMODE && | ||
1242 | ml->list == DLM_BLOCKED_LIST)) { | ||
1243 | *nodenum = ml->node; | ||
1244 | return 1; | ||
1245 | } | ||
1246 | return 0; | ||
1247 | } | ||
1216 | 1248 | ||
1217 | int dlm_send_one_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, | 1249 | int dlm_send_one_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, |
1218 | struct dlm_migratable_lockres *mres, | 1250 | struct dlm_migratable_lockres *mres, |
@@ -1260,6 +1292,14 @@ int dlm_send_one_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, | |||
1260 | goto error; | 1292 | goto error; |
1261 | } | 1293 | } |
1262 | } | 1294 | } |
1295 | if (total_locks == 0) { | ||
1296 | /* send a dummy lock to indicate a mastery reference only */ | ||
1297 | mlog(0, "%s:%.*s: sending dummy lock to %u, %s\n", | ||
1298 | dlm->name, res->lockname.len, res->lockname.name, | ||
1299 | send_to, flags & DLM_MRES_RECOVERY ? "recovery" : | ||
1300 | "migration"); | ||
1301 | dlm_add_dummy_lock(dlm, mres); | ||
1302 | } | ||
1263 | /* flush any remaining locks */ | 1303 | /* flush any remaining locks */ |
1264 | ret = dlm_send_mig_lockres_msg(dlm, mres, send_to, res, total_locks); | 1304 | ret = dlm_send_mig_lockres_msg(dlm, mres, send_to, res, total_locks); |
1265 | if (ret < 0) | 1305 | if (ret < 0) |
@@ -1293,7 +1333,8 @@ error: | |||
1293 | * do we spin? returning an error only delays the problem really | 1333 | * do we spin? returning an error only delays the problem really |
1294 | */ | 1334 | */ |
1295 | 1335 | ||
1296 | int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data) | 1336 | int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data, |
1337 | void **ret_data) | ||
1297 | { | 1338 | { |
1298 | struct dlm_ctxt *dlm = data; | 1339 | struct dlm_ctxt *dlm = data; |
1299 | struct dlm_migratable_lockres *mres = | 1340 | struct dlm_migratable_lockres *mres = |
@@ -1382,17 +1423,21 @@ int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data) | |||
1382 | spin_lock(&res->spinlock); | 1423 | spin_lock(&res->spinlock); |
1383 | res->state &= ~DLM_LOCK_RES_IN_PROGRESS; | 1424 | res->state &= ~DLM_LOCK_RES_IN_PROGRESS; |
1384 | spin_unlock(&res->spinlock); | 1425 | spin_unlock(&res->spinlock); |
1426 | wake_up(&res->wq); | ||
1385 | 1427 | ||
1386 | /* add an extra ref for just-allocated lockres | 1428 | /* add an extra ref for just-allocated lockres |
1387 | * otherwise the lockres will be purged immediately */ | 1429 | * otherwise the lockres will be purged immediately */ |
1388 | dlm_lockres_get(res); | 1430 | dlm_lockres_get(res); |
1389 | |||
1390 | } | 1431 | } |
1391 | 1432 | ||
1392 | /* at this point we have allocated everything we need, | 1433 | /* at this point we have allocated everything we need, |
1393 | * and we have a hashed lockres with an extra ref and | 1434 | * and we have a hashed lockres with an extra ref and |
1394 | * the proper res->state flags. */ | 1435 | * the proper res->state flags. */ |
1395 | ret = 0; | 1436 | ret = 0; |
1437 | spin_lock(&res->spinlock); | ||
1438 | /* drop this either when master requery finds a different master | ||
1439 | * or when a lock is added by the recovery worker */ | ||
1440 | dlm_lockres_grab_inflight_ref(dlm, res); | ||
1396 | if (mres->master == DLM_LOCK_RES_OWNER_UNKNOWN) { | 1441 | if (mres->master == DLM_LOCK_RES_OWNER_UNKNOWN) { |
1397 | /* migration cannot have an unknown master */ | 1442 | /* migration cannot have an unknown master */ |
1398 | BUG_ON(!(mres->flags & DLM_MRES_RECOVERY)); | 1443 | BUG_ON(!(mres->flags & DLM_MRES_RECOVERY)); |
@@ -1400,10 +1445,11 @@ int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data) | |||
1400 | "unknown owner.. will need to requery: " | 1445 | "unknown owner.. will need to requery: " |
1401 | "%.*s\n", mres->lockname_len, mres->lockname); | 1446 | "%.*s\n", mres->lockname_len, mres->lockname); |
1402 | } else { | 1447 | } else { |
1403 | spin_lock(&res->spinlock); | 1448 | /* take a reference now to pin the lockres, drop it |
1449 | * when locks are added in the worker */ | ||
1404 | dlm_change_lockres_owner(dlm, res, dlm->node_num); | 1450 | dlm_change_lockres_owner(dlm, res, dlm->node_num); |
1405 | spin_unlock(&res->spinlock); | ||
1406 | } | 1451 | } |
1452 | spin_unlock(&res->spinlock); | ||
1407 | 1453 | ||
1408 | /* queue up work for dlm_mig_lockres_worker */ | 1454 | /* queue up work for dlm_mig_lockres_worker */ |
1409 | dlm_grab(dlm); /* get an extra ref for the work item */ | 1455 | dlm_grab(dlm); /* get an extra ref for the work item */ |
@@ -1459,6 +1505,9 @@ again: | |||
1459 | "this node will take it.\n", | 1505 | "this node will take it.\n", |
1460 | res->lockname.len, res->lockname.name); | 1506 | res->lockname.len, res->lockname.name); |
1461 | } else { | 1507 | } else { |
1508 | spin_lock(&res->spinlock); | ||
1509 | dlm_lockres_drop_inflight_ref(dlm, res); | ||
1510 | spin_unlock(&res->spinlock); | ||
1462 | mlog(0, "master needs to respond to sender " | 1511 | mlog(0, "master needs to respond to sender " |
1463 | "that node %u still owns %.*s\n", | 1512 | "that node %u still owns %.*s\n", |
1464 | real_master, res->lockname.len, | 1513 | real_master, res->lockname.len, |
@@ -1578,7 +1627,8 @@ int dlm_do_master_requery(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, | |||
1578 | /* this function cannot error, so unless the sending | 1627 | /* this function cannot error, so unless the sending |
1579 | * or receiving of the message failed, the owner can | 1628 | * or receiving of the message failed, the owner can |
1580 | * be trusted */ | 1629 | * be trusted */ |
1581 | int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data) | 1630 | int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data, |
1631 | void **ret_data) | ||
1582 | { | 1632 | { |
1583 | struct dlm_ctxt *dlm = data; | 1633 | struct dlm_ctxt *dlm = data; |
1584 | struct dlm_master_requery *req = (struct dlm_master_requery *)msg->buf; | 1634 | struct dlm_master_requery *req = (struct dlm_master_requery *)msg->buf; |
@@ -1660,21 +1710,38 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm, | |||
1660 | { | 1710 | { |
1661 | struct dlm_migratable_lock *ml; | 1711 | struct dlm_migratable_lock *ml; |
1662 | struct list_head *queue; | 1712 | struct list_head *queue; |
1713 | struct list_head *tmpq = NULL; | ||
1663 | struct dlm_lock *newlock = NULL; | 1714 | struct dlm_lock *newlock = NULL; |
1664 | struct dlm_lockstatus *lksb = NULL; | 1715 | struct dlm_lockstatus *lksb = NULL; |
1665 | int ret = 0; | 1716 | int ret = 0; |
1666 | int i, bad; | 1717 | int i, j, bad; |
1667 | struct list_head *iter; | 1718 | struct list_head *iter; |
1668 | struct dlm_lock *lock = NULL; | 1719 | struct dlm_lock *lock = NULL; |
1720 | u8 from = O2NM_MAX_NODES; | ||
1721 | unsigned int added = 0; | ||
1669 | 1722 | ||
1670 | mlog(0, "running %d locks for this lockres\n", mres->num_locks); | 1723 | mlog(0, "running %d locks for this lockres\n", mres->num_locks); |
1671 | for (i=0; i<mres->num_locks; i++) { | 1724 | for (i=0; i<mres->num_locks; i++) { |
1672 | ml = &(mres->ml[i]); | 1725 | ml = &(mres->ml[i]); |
1726 | |||
1727 | if (dlm_is_dummy_lock(dlm, ml, &from)) { | ||
1728 | /* placeholder, just need to set the refmap bit */ | ||
1729 | BUG_ON(mres->num_locks != 1); | ||
1730 | mlog(0, "%s:%.*s: dummy lock for %u\n", | ||
1731 | dlm->name, mres->lockname_len, mres->lockname, | ||
1732 | from); | ||
1733 | spin_lock(&res->spinlock); | ||
1734 | dlm_lockres_set_refmap_bit(from, res); | ||
1735 | spin_unlock(&res->spinlock); | ||
1736 | added++; | ||
1737 | break; | ||
1738 | } | ||
1673 | BUG_ON(ml->highest_blocked != LKM_IVMODE); | 1739 | BUG_ON(ml->highest_blocked != LKM_IVMODE); |
1674 | newlock = NULL; | 1740 | newlock = NULL; |
1675 | lksb = NULL; | 1741 | lksb = NULL; |
1676 | 1742 | ||
1677 | queue = dlm_list_num_to_pointer(res, ml->list); | 1743 | queue = dlm_list_num_to_pointer(res, ml->list); |
1744 | tmpq = NULL; | ||
1678 | 1745 | ||
1679 | /* if the lock is for the local node it needs to | 1746 | /* if the lock is for the local node it needs to |
1680 | * be moved to the proper location within the queue. | 1747 | * be moved to the proper location within the queue. |
@@ -1684,11 +1751,16 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm, | |||
1684 | BUG_ON(!(mres->flags & DLM_MRES_MIGRATION)); | 1751 | BUG_ON(!(mres->flags & DLM_MRES_MIGRATION)); |
1685 | 1752 | ||
1686 | spin_lock(&res->spinlock); | 1753 | spin_lock(&res->spinlock); |
1687 | list_for_each(iter, queue) { | 1754 | for (j = DLM_GRANTED_LIST; j <= DLM_BLOCKED_LIST; j++) { |
1688 | lock = list_entry (iter, struct dlm_lock, list); | 1755 | tmpq = dlm_list_idx_to_ptr(res, j); |
1689 | if (lock->ml.cookie != ml->cookie) | 1756 | list_for_each(iter, tmpq) { |
1690 | lock = NULL; | 1757 | lock = list_entry (iter, struct dlm_lock, list); |
1691 | else | 1758 | if (lock->ml.cookie != ml->cookie) |
1759 | lock = NULL; | ||
1760 | else | ||
1761 | break; | ||
1762 | } | ||
1763 | if (lock) | ||
1692 | break; | 1764 | break; |
1693 | } | 1765 | } |
1694 | 1766 | ||
@@ -1698,12 +1770,20 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm, | |||
1698 | u64 c = ml->cookie; | 1770 | u64 c = ml->cookie; |
1699 | mlog(ML_ERROR, "could not find local lock " | 1771 | mlog(ML_ERROR, "could not find local lock " |
1700 | "with cookie %u:%llu!\n", | 1772 | "with cookie %u:%llu!\n", |
1701 | dlm_get_lock_cookie_node(c), | 1773 | dlm_get_lock_cookie_node(be64_to_cpu(c)), |
1702 | dlm_get_lock_cookie_seq(c)); | 1774 | dlm_get_lock_cookie_seq(be64_to_cpu(c))); |
1775 | __dlm_print_one_lock_resource(res); | ||
1703 | BUG(); | 1776 | BUG(); |
1704 | } | 1777 | } |
1705 | BUG_ON(lock->ml.node != ml->node); | 1778 | BUG_ON(lock->ml.node != ml->node); |
1706 | 1779 | ||
1780 | if (tmpq != queue) { | ||
1781 | mlog(0, "lock was on %u instead of %u for %.*s\n", | ||
1782 | j, ml->list, res->lockname.len, res->lockname.name); | ||
1783 | spin_unlock(&res->spinlock); | ||
1784 | continue; | ||
1785 | } | ||
1786 | |||
1707 | /* see NOTE above about why we do not update | 1787 | /* see NOTE above about why we do not update |
1708 | * to match the master here */ | 1788 | * to match the master here */ |
1709 | 1789 | ||
@@ -1711,6 +1791,7 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm, | |||
1711 | /* do not alter lock refcount. switching lists. */ | 1791 | /* do not alter lock refcount. switching lists. */ |
1712 | list_move_tail(&lock->list, queue); | 1792 | list_move_tail(&lock->list, queue); |
1713 | spin_unlock(&res->spinlock); | 1793 | spin_unlock(&res->spinlock); |
1794 | added++; | ||
1714 | 1795 | ||
1715 | mlog(0, "just reordered a local lock!\n"); | 1796 | mlog(0, "just reordered a local lock!\n"); |
1716 | continue; | 1797 | continue; |
@@ -1799,14 +1880,14 @@ skip_lvb: | |||
1799 | mlog(ML_ERROR, "%s:%.*s: %u:%llu: lock already " | 1880 | mlog(ML_ERROR, "%s:%.*s: %u:%llu: lock already " |
1800 | "exists on this lockres!\n", dlm->name, | 1881 | "exists on this lockres!\n", dlm->name, |
1801 | res->lockname.len, res->lockname.name, | 1882 | res->lockname.len, res->lockname.name, |
1802 | dlm_get_lock_cookie_node(c), | 1883 | dlm_get_lock_cookie_node(be64_to_cpu(c)), |
1803 | dlm_get_lock_cookie_seq(c)); | 1884 | dlm_get_lock_cookie_seq(be64_to_cpu(c))); |
1804 | 1885 | ||
1805 | mlog(ML_NOTICE, "sent lock: type=%d, conv=%d, " | 1886 | mlog(ML_NOTICE, "sent lock: type=%d, conv=%d, " |
1806 | "node=%u, cookie=%u:%llu, queue=%d\n", | 1887 | "node=%u, cookie=%u:%llu, queue=%d\n", |
1807 | ml->type, ml->convert_type, ml->node, | 1888 | ml->type, ml->convert_type, ml->node, |
1808 | dlm_get_lock_cookie_node(ml->cookie), | 1889 | dlm_get_lock_cookie_node(be64_to_cpu(ml->cookie)), |
1809 | dlm_get_lock_cookie_seq(ml->cookie), | 1890 | dlm_get_lock_cookie_seq(be64_to_cpu(ml->cookie)), |
1810 | ml->list); | 1891 | ml->list); |
1811 | 1892 | ||
1812 | __dlm_print_one_lock_resource(res); | 1893 | __dlm_print_one_lock_resource(res); |
@@ -1817,12 +1898,22 @@ skip_lvb: | |||
1817 | if (!bad) { | 1898 | if (!bad) { |
1818 | dlm_lock_get(newlock); | 1899 | dlm_lock_get(newlock); |
1819 | list_add_tail(&newlock->list, queue); | 1900 | list_add_tail(&newlock->list, queue); |
1901 | mlog(0, "%s:%.*s: added lock for node %u, " | ||
1902 | "setting refmap bit\n", dlm->name, | ||
1903 | res->lockname.len, res->lockname.name, ml->node); | ||
1904 | dlm_lockres_set_refmap_bit(ml->node, res); | ||
1905 | added++; | ||
1820 | } | 1906 | } |
1821 | spin_unlock(&res->spinlock); | 1907 | spin_unlock(&res->spinlock); |
1822 | } | 1908 | } |
1823 | mlog(0, "done running all the locks\n"); | 1909 | mlog(0, "done running all the locks\n"); |
1824 | 1910 | ||
1825 | leave: | 1911 | leave: |
1912 | /* balance the ref taken when the work was queued */ | ||
1913 | spin_lock(&res->spinlock); | ||
1914 | dlm_lockres_drop_inflight_ref(dlm, res); | ||
1915 | spin_unlock(&res->spinlock); | ||
1916 | |||
1826 | if (ret < 0) { | 1917 | if (ret < 0) { |
1827 | mlog_errno(ret); | 1918 | mlog_errno(ret); |
1828 | if (newlock) | 1919 | if (newlock) |
@@ -1935,9 +2026,11 @@ static void dlm_finish_local_lockres_recovery(struct dlm_ctxt *dlm, | |||
1935 | if (res->owner == dead_node) { | 2026 | if (res->owner == dead_node) { |
1936 | list_del_init(&res->recovering); | 2027 | list_del_init(&res->recovering); |
1937 | spin_lock(&res->spinlock); | 2028 | spin_lock(&res->spinlock); |
2029 | /* new_master has our reference from | ||
2030 | * the lock state sent during recovery */ | ||
1938 | dlm_change_lockres_owner(dlm, res, new_master); | 2031 | dlm_change_lockres_owner(dlm, res, new_master); |
1939 | res->state &= ~DLM_LOCK_RES_RECOVERING; | 2032 | res->state &= ~DLM_LOCK_RES_RECOVERING; |
1940 | if (!__dlm_lockres_unused(res)) | 2033 | if (__dlm_lockres_has_locks(res)) |
1941 | __dlm_dirty_lockres(dlm, res); | 2034 | __dlm_dirty_lockres(dlm, res); |
1942 | spin_unlock(&res->spinlock); | 2035 | spin_unlock(&res->spinlock); |
1943 | wake_up(&res->wq); | 2036 | wake_up(&res->wq); |
@@ -1977,9 +2070,11 @@ static void dlm_finish_local_lockres_recovery(struct dlm_ctxt *dlm, | |||
1977 | dlm_lockres_put(res); | 2070 | dlm_lockres_put(res); |
1978 | } | 2071 | } |
1979 | spin_lock(&res->spinlock); | 2072 | spin_lock(&res->spinlock); |
2073 | /* new_master has our reference from | ||
2074 | * the lock state sent during recovery */ | ||
1980 | dlm_change_lockres_owner(dlm, res, new_master); | 2075 | dlm_change_lockres_owner(dlm, res, new_master); |
1981 | res->state &= ~DLM_LOCK_RES_RECOVERING; | 2076 | res->state &= ~DLM_LOCK_RES_RECOVERING; |
1982 | if (!__dlm_lockres_unused(res)) | 2077 | if (__dlm_lockres_has_locks(res)) |
1983 | __dlm_dirty_lockres(dlm, res); | 2078 | __dlm_dirty_lockres(dlm, res); |
1984 | spin_unlock(&res->spinlock); | 2079 | spin_unlock(&res->spinlock); |
1985 | wake_up(&res->wq); | 2080 | wake_up(&res->wq); |
@@ -2048,6 +2143,7 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm, | |||
2048 | { | 2143 | { |
2049 | struct list_head *iter, *tmpiter; | 2144 | struct list_head *iter, *tmpiter; |
2050 | struct dlm_lock *lock; | 2145 | struct dlm_lock *lock; |
2146 | unsigned int freed = 0; | ||
2051 | 2147 | ||
2052 | /* this node is the lockres master: | 2148 | /* this node is the lockres master: |
2053 | * 1) remove any stale locks for the dead node | 2149 | * 1) remove any stale locks for the dead node |
@@ -2062,6 +2158,7 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm, | |||
2062 | if (lock->ml.node == dead_node) { | 2158 | if (lock->ml.node == dead_node) { |
2063 | list_del_init(&lock->list); | 2159 | list_del_init(&lock->list); |
2064 | dlm_lock_put(lock); | 2160 | dlm_lock_put(lock); |
2161 | freed++; | ||
2065 | } | 2162 | } |
2066 | } | 2163 | } |
2067 | list_for_each_safe(iter, tmpiter, &res->converting) { | 2164 | list_for_each_safe(iter, tmpiter, &res->converting) { |
@@ -2069,6 +2166,7 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm, | |||
2069 | if (lock->ml.node == dead_node) { | 2166 | if (lock->ml.node == dead_node) { |
2070 | list_del_init(&lock->list); | 2167 | list_del_init(&lock->list); |
2071 | dlm_lock_put(lock); | 2168 | dlm_lock_put(lock); |
2169 | freed++; | ||
2072 | } | 2170 | } |
2073 | } | 2171 | } |
2074 | list_for_each_safe(iter, tmpiter, &res->blocked) { | 2172 | list_for_each_safe(iter, tmpiter, &res->blocked) { |
@@ -2076,9 +2174,23 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm, | |||
2076 | if (lock->ml.node == dead_node) { | 2174 | if (lock->ml.node == dead_node) { |
2077 | list_del_init(&lock->list); | 2175 | list_del_init(&lock->list); |
2078 | dlm_lock_put(lock); | 2176 | dlm_lock_put(lock); |
2177 | freed++; | ||
2079 | } | 2178 | } |
2080 | } | 2179 | } |
2081 | 2180 | ||
2181 | if (freed) { | ||
2182 | mlog(0, "%s:%.*s: freed %u locks for dead node %u, " | ||
2183 | "dropping ref from lockres\n", dlm->name, | ||
2184 | res->lockname.len, res->lockname.name, freed, dead_node); | ||
2185 | BUG_ON(!test_bit(dead_node, res->refmap)); | ||
2186 | dlm_lockres_clear_refmap_bit(dead_node, res); | ||
2187 | } else if (test_bit(dead_node, res->refmap)) { | ||
2188 | mlog(0, "%s:%.*s: dead node %u had a ref, but had " | ||
2189 | "no locks and had not purged before dying\n", dlm->name, | ||
2190 | res->lockname.len, res->lockname.name, dead_node); | ||
2191 | dlm_lockres_clear_refmap_bit(dead_node, res); | ||
2192 | } | ||
2193 | |||
2082 | /* do not kick thread yet */ | 2194 | /* do not kick thread yet */ |
2083 | __dlm_dirty_lockres(dlm, res); | 2195 | __dlm_dirty_lockres(dlm, res); |
2084 | } | 2196 | } |
@@ -2141,9 +2253,21 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node) | |||
2141 | spin_lock(&res->spinlock); | 2253 | spin_lock(&res->spinlock); |
2142 | /* zero the lvb if necessary */ | 2254 | /* zero the lvb if necessary */ |
2143 | dlm_revalidate_lvb(dlm, res, dead_node); | 2255 | dlm_revalidate_lvb(dlm, res, dead_node); |
2144 | if (res->owner == dead_node) | 2256 | if (res->owner == dead_node) { |
2257 | if (res->state & DLM_LOCK_RES_DROPPING_REF) | ||
2258 | mlog(0, "%s:%.*s: owned by " | ||
2259 | "dead node %u, this node was " | ||
2260 | "dropping its ref when it died. " | ||
2261 | "continue, dropping the flag.\n", | ||
2262 | dlm->name, res->lockname.len, | ||
2263 | res->lockname.name, dead_node); | ||
2264 | |||
2265 | /* the wake_up for this will happen when the | ||
2266 | * RECOVERING flag is dropped later */ | ||
2267 | res->state &= ~DLM_LOCK_RES_DROPPING_REF; | ||
2268 | |||
2145 | dlm_move_lockres_to_recovery_list(dlm, res); | 2269 | dlm_move_lockres_to_recovery_list(dlm, res); |
2146 | else if (res->owner == dlm->node_num) { | 2270 | } else if (res->owner == dlm->node_num) { |
2147 | dlm_free_dead_locks(dlm, res, dead_node); | 2271 | dlm_free_dead_locks(dlm, res, dead_node); |
2148 | __dlm_lockres_calc_usage(dlm, res); | 2272 | __dlm_lockres_calc_usage(dlm, res); |
2149 | } | 2273 | } |
@@ -2480,7 +2604,8 @@ retry: | |||
2480 | return ret; | 2604 | return ret; |
2481 | } | 2605 | } |
2482 | 2606 | ||
2483 | int dlm_begin_reco_handler(struct o2net_msg *msg, u32 len, void *data) | 2607 | int dlm_begin_reco_handler(struct o2net_msg *msg, u32 len, void *data, |
2608 | void **ret_data) | ||
2484 | { | 2609 | { |
2485 | struct dlm_ctxt *dlm = data; | 2610 | struct dlm_ctxt *dlm = data; |
2486 | struct dlm_begin_reco *br = (struct dlm_begin_reco *)msg->buf; | 2611 | struct dlm_begin_reco *br = (struct dlm_begin_reco *)msg->buf; |
@@ -2608,7 +2733,8 @@ stage2: | |||
2608 | return ret; | 2733 | return ret; |
2609 | } | 2734 | } |
2610 | 2735 | ||
2611 | int dlm_finalize_reco_handler(struct o2net_msg *msg, u32 len, void *data) | 2736 | int dlm_finalize_reco_handler(struct o2net_msg *msg, u32 len, void *data, |
2737 | void **ret_data) | ||
2612 | { | 2738 | { |
2613 | struct dlm_ctxt *dlm = data; | 2739 | struct dlm_ctxt *dlm = data; |
2614 | struct dlm_finalize_reco *fr = (struct dlm_finalize_reco *)msg->buf; | 2740 | struct dlm_finalize_reco *fr = (struct dlm_finalize_reco *)msg->buf; |
diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c index 0c822f3ffb05..8ffa0916eb86 100644 --- a/fs/ocfs2/dlm/dlmthread.c +++ b/fs/ocfs2/dlm/dlmthread.c | |||
@@ -54,9 +54,6 @@ | |||
54 | #include "cluster/masklog.h" | 54 | #include "cluster/masklog.h" |
55 | 55 | ||
56 | static int dlm_thread(void *data); | 56 | static int dlm_thread(void *data); |
57 | static void dlm_purge_lockres_now(struct dlm_ctxt *dlm, | ||
58 | struct dlm_lock_resource *lockres); | ||
59 | |||
60 | static void dlm_flush_asts(struct dlm_ctxt *dlm); | 57 | static void dlm_flush_asts(struct dlm_ctxt *dlm); |
61 | 58 | ||
62 | #define dlm_lock_is_remote(dlm, lock) ((lock)->ml.node != (dlm)->node_num) | 59 | #define dlm_lock_is_remote(dlm, lock) ((lock)->ml.node != (dlm)->node_num) |
@@ -82,14 +79,33 @@ repeat: | |||
82 | current->state = TASK_RUNNING; | 79 | current->state = TASK_RUNNING; |
83 | } | 80 | } |
84 | 81 | ||
85 | 82 | int __dlm_lockres_has_locks(struct dlm_lock_resource *res) | |
86 | int __dlm_lockres_unused(struct dlm_lock_resource *res) | ||
87 | { | 83 | { |
88 | if (list_empty(&res->granted) && | 84 | if (list_empty(&res->granted) && |
89 | list_empty(&res->converting) && | 85 | list_empty(&res->converting) && |
90 | list_empty(&res->blocked) && | 86 | list_empty(&res->blocked)) |
91 | list_empty(&res->dirty)) | 87 | return 0; |
92 | return 1; | 88 | return 1; |
89 | } | ||
90 | |||
91 | /* "unused": the lockres has no locks, is not on the dirty list, | ||
92 | * has no inflight locks (in the gap between mastery and acquiring | ||
93 | * the first lock), and has no bits in its refmap. | ||
94 | * truly ready to be freed. */ | ||
95 | int __dlm_lockres_unused(struct dlm_lock_resource *res) | ||
96 | { | ||
97 | if (!__dlm_lockres_has_locks(res) && | ||
98 | (list_empty(&res->dirty) && !(res->state & DLM_LOCK_RES_DIRTY))) { | ||
99 | /* try not to scan the bitmap unless the first two | ||
100 | * conditions are already true */ | ||
101 | int bit = find_next_bit(res->refmap, O2NM_MAX_NODES, 0); | ||
102 | if (bit >= O2NM_MAX_NODES) { | ||
103 | /* since the bit for dlm->node_num is not | ||
104 | * set, inflight_locks better be zero */ | ||
105 | BUG_ON(res->inflight_locks != 0); | ||
106 | return 1; | ||
107 | } | ||
108 | } | ||
93 | return 0; | 109 | return 0; |
94 | } | 110 | } |
95 | 111 | ||
@@ -106,46 +122,21 @@ void __dlm_lockres_calc_usage(struct dlm_ctxt *dlm, | |||
106 | assert_spin_locked(&res->spinlock); | 122 | assert_spin_locked(&res->spinlock); |
107 | 123 | ||
108 | if (__dlm_lockres_unused(res)){ | 124 | if (__dlm_lockres_unused(res)){ |
109 | /* For now, just keep any resource we master */ | ||
110 | if (res->owner == dlm->node_num) | ||
111 | { | ||
112 | if (!list_empty(&res->purge)) { | ||
113 | mlog(0, "we master %s:%.*s, but it is on " | ||
114 | "the purge list. Removing\n", | ||
115 | dlm->name, res->lockname.len, | ||
116 | res->lockname.name); | ||
117 | list_del_init(&res->purge); | ||
118 | dlm->purge_count--; | ||
119 | } | ||
120 | return; | ||
121 | } | ||
122 | |||
123 | if (list_empty(&res->purge)) { | 125 | if (list_empty(&res->purge)) { |
124 | mlog(0, "putting lockres %.*s from purge list\n", | 126 | mlog(0, "putting lockres %.*s:%p onto purge list\n", |
125 | res->lockname.len, res->lockname.name); | 127 | res->lockname.len, res->lockname.name, res); |
126 | 128 | ||
127 | res->last_used = jiffies; | 129 | res->last_used = jiffies; |
130 | dlm_lockres_get(res); | ||
128 | list_add_tail(&res->purge, &dlm->purge_list); | 131 | list_add_tail(&res->purge, &dlm->purge_list); |
129 | dlm->purge_count++; | 132 | dlm->purge_count++; |
130 | |||
131 | /* if this node is not the owner, there is | ||
132 | * no way to keep track of who the owner could be. | ||
133 | * unhash it to avoid serious problems. */ | ||
134 | if (res->owner != dlm->node_num) { | ||
135 | mlog(0, "%s:%.*s: doing immediate " | ||
136 | "purge of lockres owned by %u\n", | ||
137 | dlm->name, res->lockname.len, | ||
138 | res->lockname.name, res->owner); | ||
139 | |||
140 | dlm_purge_lockres_now(dlm, res); | ||
141 | } | ||
142 | } | 133 | } |
143 | } else if (!list_empty(&res->purge)) { | 134 | } else if (!list_empty(&res->purge)) { |
144 | mlog(0, "removing lockres %.*s from purge list, " | 135 | mlog(0, "removing lockres %.*s:%p from purge list, owner=%u\n", |
145 | "owner=%u\n", res->lockname.len, res->lockname.name, | 136 | res->lockname.len, res->lockname.name, res, res->owner); |
146 | res->owner); | ||
147 | 137 | ||
148 | list_del_init(&res->purge); | 138 | list_del_init(&res->purge); |
139 | dlm_lockres_put(res); | ||
149 | dlm->purge_count--; | 140 | dlm->purge_count--; |
150 | } | 141 | } |
151 | } | 142 | } |
@@ -163,68 +154,65 @@ void dlm_lockres_calc_usage(struct dlm_ctxt *dlm, | |||
163 | spin_unlock(&dlm->spinlock); | 154 | spin_unlock(&dlm->spinlock); |
164 | } | 155 | } |
165 | 156 | ||
166 | /* TODO: Eventual API: Called with the dlm spinlock held, may drop it | 157 | static int dlm_purge_lockres(struct dlm_ctxt *dlm, |
167 | * to do migration, but will re-acquire before exit. */ | 158 | struct dlm_lock_resource *res) |
168 | void dlm_purge_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *lockres) | ||
169 | { | 159 | { |
170 | int master; | 160 | int master; |
171 | int ret; | 161 | int ret = 0; |
172 | |||
173 | spin_lock(&lockres->spinlock); | ||
174 | master = lockres->owner == dlm->node_num; | ||
175 | spin_unlock(&lockres->spinlock); | ||
176 | 162 | ||
177 | mlog(0, "purging lockres %.*s, master = %d\n", lockres->lockname.len, | 163 | spin_lock(&res->spinlock); |
178 | lockres->lockname.name, master); | 164 | if (!__dlm_lockres_unused(res)) { |
179 | 165 | spin_unlock(&res->spinlock); | |
180 | /* Non master is the easy case -- no migration required, just | 166 | mlog(0, "%s:%.*s: tried to purge but not unused\n", |
181 | * quit. */ | 167 | dlm->name, res->lockname.len, res->lockname.name); |
168 | return -ENOTEMPTY; | ||
169 | } | ||
170 | master = (res->owner == dlm->node_num); | ||
182 | if (!master) | 171 | if (!master) |
183 | goto finish; | 172 | res->state |= DLM_LOCK_RES_DROPPING_REF; |
184 | 173 | spin_unlock(&res->spinlock); | |
185 | /* Wheee! Migrate lockres here! */ | ||
186 | spin_unlock(&dlm->spinlock); | ||
187 | again: | ||
188 | 174 | ||
189 | ret = dlm_migrate_lockres(dlm, lockres, O2NM_MAX_NODES); | 175 | mlog(0, "purging lockres %.*s, master = %d\n", res->lockname.len, |
190 | if (ret == -ENOTEMPTY) { | 176 | res->lockname.name, master); |
191 | mlog(ML_ERROR, "lockres %.*s still has local locks!\n", | ||
192 | lockres->lockname.len, lockres->lockname.name); | ||
193 | 177 | ||
194 | BUG(); | 178 | if (!master) { |
195 | } else if (ret < 0) { | 179 | spin_lock(&res->spinlock); |
196 | mlog(ML_NOTICE, "lockres %.*s: migrate failed, retrying\n", | 180 | /* This ensures that clear refmap is sent after the set */ |
197 | lockres->lockname.len, lockres->lockname.name); | 181 | __dlm_wait_on_lockres_flags(res, DLM_LOCK_RES_SETREF_INPROG); |
198 | msleep(100); | 182 | spin_unlock(&res->spinlock); |
199 | goto again; | 183 | /* drop spinlock to do messaging, retake below */ |
184 | spin_unlock(&dlm->spinlock); | ||
185 | /* clear our bit from the master's refmap, ignore errors */ | ||
186 | ret = dlm_drop_lockres_ref(dlm, res); | ||
187 | if (ret < 0) { | ||
188 | mlog_errno(ret); | ||
189 | if (!dlm_is_host_down(ret)) | ||
190 | BUG(); | ||
191 | } | ||
192 | mlog(0, "%s:%.*s: dlm_deref_lockres returned %d\n", | ||
193 | dlm->name, res->lockname.len, res->lockname.name, ret); | ||
194 | spin_lock(&dlm->spinlock); | ||
200 | } | 195 | } |
201 | 196 | ||
202 | spin_lock(&dlm->spinlock); | 197 | if (!list_empty(&res->purge)) { |
203 | 198 | mlog(0, "removing lockres %.*s:%p from purgelist, " | |
204 | finish: | 199 | "master = %d\n", res->lockname.len, res->lockname.name, |
205 | if (!list_empty(&lockres->purge)) { | 200 | res, master); |
206 | list_del_init(&lockres->purge); | 201 | list_del_init(&res->purge); |
202 | dlm_lockres_put(res); | ||
207 | dlm->purge_count--; | 203 | dlm->purge_count--; |
208 | } | 204 | } |
209 | __dlm_unhash_lockres(lockres); | 205 | __dlm_unhash_lockres(res); |
210 | } | ||
211 | |||
212 | /* make an unused lockres go away immediately. | ||
213 | * as soon as the dlm spinlock is dropped, this lockres | ||
214 | * will not be found. kfree still happens on last put. */ | ||
215 | static void dlm_purge_lockres_now(struct dlm_ctxt *dlm, | ||
216 | struct dlm_lock_resource *lockres) | ||
217 | { | ||
218 | assert_spin_locked(&dlm->spinlock); | ||
219 | assert_spin_locked(&lockres->spinlock); | ||
220 | 206 | ||
221 | BUG_ON(!__dlm_lockres_unused(lockres)); | 207 | /* lockres is not in the hash now. drop the flag and wake up |
222 | 208 | * any processes waiting in dlm_get_lock_resource. */ | |
223 | if (!list_empty(&lockres->purge)) { | 209 | if (!master) { |
224 | list_del_init(&lockres->purge); | 210 | spin_lock(&res->spinlock); |
225 | dlm->purge_count--; | 211 | res->state &= ~DLM_LOCK_RES_DROPPING_REF; |
212 | spin_unlock(&res->spinlock); | ||
213 | wake_up(&res->wq); | ||
226 | } | 214 | } |
227 | __dlm_unhash_lockres(lockres); | 215 | return 0; |
228 | } | 216 | } |
229 | 217 | ||
230 | static void dlm_run_purge_list(struct dlm_ctxt *dlm, | 218 | static void dlm_run_purge_list(struct dlm_ctxt *dlm, |
@@ -268,13 +256,17 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm, | |||
268 | break; | 256 | break; |
269 | } | 257 | } |
270 | 258 | ||
259 | mlog(0, "removing lockres %.*s:%p from purgelist\n", | ||
260 | lockres->lockname.len, lockres->lockname.name, lockres); | ||
271 | list_del_init(&lockres->purge); | 261 | list_del_init(&lockres->purge); |
262 | dlm_lockres_put(lockres); | ||
272 | dlm->purge_count--; | 263 | dlm->purge_count--; |
273 | 264 | ||
274 | /* This may drop and reacquire the dlm spinlock if it | 265 | /* This may drop and reacquire the dlm spinlock if it |
275 | * has to do migration. */ | 266 | * has to do migration. */ |
276 | mlog(0, "calling dlm_purge_lockres!\n"); | 267 | mlog(0, "calling dlm_purge_lockres!\n"); |
277 | dlm_purge_lockres(dlm, lockres); | 268 | if (dlm_purge_lockres(dlm, lockres)) |
269 | BUG(); | ||
278 | mlog(0, "DONE calling dlm_purge_lockres!\n"); | 270 | mlog(0, "DONE calling dlm_purge_lockres!\n"); |
279 | 271 | ||
280 | /* Avoid adding any scheduling latencies */ | 272 | /* Avoid adding any scheduling latencies */ |
@@ -467,12 +459,17 @@ void __dlm_dirty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) | |||
467 | assert_spin_locked(&res->spinlock); | 459 | assert_spin_locked(&res->spinlock); |
468 | 460 | ||
469 | /* don't shuffle secondary queues */ | 461 | /* don't shuffle secondary queues */ |
470 | if ((res->owner == dlm->node_num) && | 462 | if ((res->owner == dlm->node_num)) { |
471 | !(res->state & DLM_LOCK_RES_DIRTY)) { | 463 | if (res->state & (DLM_LOCK_RES_MIGRATING | |
472 | /* ref for dirty_list */ | 464 | DLM_LOCK_RES_BLOCK_DIRTY)) |
473 | dlm_lockres_get(res); | 465 | return; |
474 | list_add_tail(&res->dirty, &dlm->dirty_list); | 466 | |
475 | res->state |= DLM_LOCK_RES_DIRTY; | 467 | if (list_empty(&res->dirty)) { |
468 | /* ref for dirty_list */ | ||
469 | dlm_lockres_get(res); | ||
470 | list_add_tail(&res->dirty, &dlm->dirty_list); | ||
471 | res->state |= DLM_LOCK_RES_DIRTY; | ||
472 | } | ||
476 | } | 473 | } |
477 | } | 474 | } |
478 | 475 | ||
@@ -651,7 +648,7 @@ static int dlm_thread(void *data) | |||
651 | dlm_lockres_get(res); | 648 | dlm_lockres_get(res); |
652 | 649 | ||
653 | spin_lock(&res->spinlock); | 650 | spin_lock(&res->spinlock); |
654 | res->state &= ~DLM_LOCK_RES_DIRTY; | 651 | /* We clear the DLM_LOCK_RES_DIRTY state once we shuffle lists below */ |
655 | list_del_init(&res->dirty); | 652 | list_del_init(&res->dirty); |
656 | spin_unlock(&res->spinlock); | 653 | spin_unlock(&res->spinlock); |
657 | spin_unlock(&dlm->spinlock); | 654 | spin_unlock(&dlm->spinlock); |
@@ -675,10 +672,11 @@ static int dlm_thread(void *data) | |||
675 | /* it is now ok to move lockreses in these states | 672 | /* it is now ok to move lockreses in these states |
676 | * to the dirty list, assuming that they will only be | 673 | * to the dirty list, assuming that they will only be |
677 | * dirty for a short while. */ | 674 | * dirty for a short while. */ |
675 | BUG_ON(res->state & DLM_LOCK_RES_MIGRATING); | ||
678 | if (res->state & (DLM_LOCK_RES_IN_PROGRESS | | 676 | if (res->state & (DLM_LOCK_RES_IN_PROGRESS | |
679 | DLM_LOCK_RES_MIGRATING | | ||
680 | DLM_LOCK_RES_RECOVERING)) { | 677 | DLM_LOCK_RES_RECOVERING)) { |
681 | /* move it to the tail and keep going */ | 678 | /* move it to the tail and keep going */ |
679 | res->state &= ~DLM_LOCK_RES_DIRTY; | ||
682 | spin_unlock(&res->spinlock); | 680 | spin_unlock(&res->spinlock); |
683 | mlog(0, "delaying list shuffling for in-" | 681 | mlog(0, "delaying list shuffling for in-" |
684 | "progress lockres %.*s, state=%d\n", | 682 | "progress lockres %.*s, state=%d\n", |
@@ -699,6 +697,7 @@ static int dlm_thread(void *data) | |||
699 | 697 | ||
700 | /* called while holding lockres lock */ | 698 | /* called while holding lockres lock */ |
701 | dlm_shuffle_lists(dlm, res); | 699 | dlm_shuffle_lists(dlm, res); |
700 | res->state &= ~DLM_LOCK_RES_DIRTY; | ||
702 | spin_unlock(&res->spinlock); | 701 | spin_unlock(&res->spinlock); |
703 | 702 | ||
704 | dlm_lockres_calc_usage(dlm, res); | 703 | dlm_lockres_calc_usage(dlm, res); |
@@ -709,11 +708,8 @@ in_progress: | |||
709 | /* if the lock was in-progress, stick | 708 | /* if the lock was in-progress, stick |
710 | * it on the back of the list */ | 709 | * it on the back of the list */ |
711 | if (delay) { | 710 | if (delay) { |
712 | /* ref for dirty_list */ | ||
713 | dlm_lockres_get(res); | ||
714 | spin_lock(&res->spinlock); | 711 | spin_lock(&res->spinlock); |
715 | list_add_tail(&res->dirty, &dlm->dirty_list); | 712 | __dlm_dirty_lockres(dlm, res); |
716 | res->state |= DLM_LOCK_RES_DIRTY; | ||
717 | spin_unlock(&res->spinlock); | 713 | spin_unlock(&res->spinlock); |
718 | } | 714 | } |
719 | dlm_lockres_put(res); | 715 | dlm_lockres_put(res); |
diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c index 37be4b2e0d4a..86ca085ef324 100644 --- a/fs/ocfs2/dlm/dlmunlock.c +++ b/fs/ocfs2/dlm/dlmunlock.c | |||
@@ -147,6 +147,10 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm, | |||
147 | goto leave; | 147 | goto leave; |
148 | } | 148 | } |
149 | 149 | ||
150 | if (res->state & DLM_LOCK_RES_MIGRATING) { | ||
151 | status = DLM_MIGRATING; | ||
152 | goto leave; | ||
153 | } | ||
150 | 154 | ||
151 | /* see above for what the spec says about | 155 | /* see above for what the spec says about |
152 | * LKM_CANCEL and the lock queue state */ | 156 | * LKM_CANCEL and the lock queue state */ |
@@ -244,8 +248,8 @@ leave: | |||
244 | /* this should always be coupled with list removal */ | 248 | /* this should always be coupled with list removal */ |
245 | BUG_ON(!(actions & DLM_UNLOCK_REMOVE_LOCK)); | 249 | BUG_ON(!(actions & DLM_UNLOCK_REMOVE_LOCK)); |
246 | mlog(0, "lock %u:%llu should be gone now! refs=%d\n", | 250 | mlog(0, "lock %u:%llu should be gone now! refs=%d\n", |
247 | dlm_get_lock_cookie_node(lock->ml.cookie), | 251 | dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)), |
248 | dlm_get_lock_cookie_seq(lock->ml.cookie), | 252 | dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)), |
249 | atomic_read(&lock->lock_refs.refcount)-1); | 253 | atomic_read(&lock->lock_refs.refcount)-1); |
250 | dlm_lock_put(lock); | 254 | dlm_lock_put(lock); |
251 | } | 255 | } |
@@ -379,7 +383,8 @@ static enum dlm_status dlm_send_remote_unlock_request(struct dlm_ctxt *dlm, | |||
379 | * returns: DLM_NORMAL, DLM_BADARGS, DLM_IVLOCKID, | 383 | * returns: DLM_NORMAL, DLM_BADARGS, DLM_IVLOCKID, |
380 | * return value from dlmunlock_master | 384 | * return value from dlmunlock_master |
381 | */ | 385 | */ |
382 | int dlm_unlock_lock_handler(struct o2net_msg *msg, u32 len, void *data) | 386 | int dlm_unlock_lock_handler(struct o2net_msg *msg, u32 len, void *data, |
387 | void **ret_data) | ||
383 | { | 388 | { |
384 | struct dlm_ctxt *dlm = data; | 389 | struct dlm_ctxt *dlm = data; |
385 | struct dlm_unlock_lock *unlock = (struct dlm_unlock_lock *)msg->buf; | 390 | struct dlm_unlock_lock *unlock = (struct dlm_unlock_lock *)msg->buf; |
@@ -502,8 +507,8 @@ not_found: | |||
502 | if (!found) | 507 | if (!found) |
503 | mlog(ML_ERROR, "failed to find lock to unlock! " | 508 | mlog(ML_ERROR, "failed to find lock to unlock! " |
504 | "cookie=%u:%llu\n", | 509 | "cookie=%u:%llu\n", |
505 | dlm_get_lock_cookie_node(unlock->cookie), | 510 | dlm_get_lock_cookie_node(be64_to_cpu(unlock->cookie)), |
506 | dlm_get_lock_cookie_seq(unlock->cookie)); | 511 | dlm_get_lock_cookie_seq(be64_to_cpu(unlock->cookie))); |
507 | else | 512 | else |
508 | dlm_lock_put(lock); | 513 | dlm_lock_put(lock); |
509 | 514 | ||
diff --git a/fs/ocfs2/vote.c b/fs/ocfs2/vote.c index 0afd8b9af70f..f30e63b9910c 100644 --- a/fs/ocfs2/vote.c +++ b/fs/ocfs2/vote.c | |||
@@ -887,7 +887,7 @@ static inline int ocfs2_translate_response(int response) | |||
887 | 887 | ||
888 | static int ocfs2_handle_response_message(struct o2net_msg *msg, | 888 | static int ocfs2_handle_response_message(struct o2net_msg *msg, |
889 | u32 len, | 889 | u32 len, |
890 | void *data) | 890 | void *data, void **ret_data) |
891 | { | 891 | { |
892 | unsigned int response_id, node_num; | 892 | unsigned int response_id, node_num; |
893 | int response_status; | 893 | int response_status; |
@@ -943,7 +943,7 @@ bail: | |||
943 | 943 | ||
944 | static int ocfs2_handle_vote_message(struct o2net_msg *msg, | 944 | static int ocfs2_handle_vote_message(struct o2net_msg *msg, |
945 | u32 len, | 945 | u32 len, |
946 | void *data) | 946 | void *data, void **ret_data) |
947 | { | 947 | { |
948 | int status; | 948 | int status; |
949 | struct ocfs2_super *osb = data; | 949 | struct ocfs2_super *osb = data; |
@@ -1007,7 +1007,7 @@ int ocfs2_register_net_handlers(struct ocfs2_super *osb) | |||
1007 | osb->net_key, | 1007 | osb->net_key, |
1008 | sizeof(struct ocfs2_response_msg), | 1008 | sizeof(struct ocfs2_response_msg), |
1009 | ocfs2_handle_response_message, | 1009 | ocfs2_handle_response_message, |
1010 | osb, &osb->osb_net_handlers); | 1010 | osb, NULL, &osb->osb_net_handlers); |
1011 | if (status) { | 1011 | if (status) { |
1012 | mlog_errno(status); | 1012 | mlog_errno(status); |
1013 | goto bail; | 1013 | goto bail; |
@@ -1017,7 +1017,7 @@ int ocfs2_register_net_handlers(struct ocfs2_super *osb) | |||
1017 | osb->net_key, | 1017 | osb->net_key, |
1018 | sizeof(struct ocfs2_vote_msg), | 1018 | sizeof(struct ocfs2_vote_msg), |
1019 | ocfs2_handle_vote_message, | 1019 | ocfs2_handle_vote_message, |
1020 | osb, &osb->osb_net_handlers); | 1020 | osb, NULL, &osb->osb_net_handlers); |
1021 | if (status) { | 1021 | if (status) { |
1022 | mlog_errno(status); | 1022 | mlog_errno(status); |
1023 | goto bail; | 1023 | goto bail; |