aboutsummaryrefslogtreecommitdiffstats
path: root/fs/dlm/member.c
diff options
context:
space:
mode:
authorDavid Teigland <teigland@redhat.com>2011-11-02 15:30:58 -0400
committerDavid Teigland <teigland@redhat.com>2012-01-04 09:56:31 -0500
commit60f98d1839376d30e13f3e452dce2433fad3060e (patch)
treeb8b43859ad26519bd75a40920f6d1ca46f2d44a5 /fs/dlm/member.c
parent757a42719635495779462514458bbfbf12a37dac (diff)
dlm: add recovery callbacks
These new callbacks notify the dlm user about lock recovery. GFS2, and possibly others, need to be aware of when the dlm will be doing lock recovery for a failed lockspace member. In the past, this coordination has been done between dlm and file system daemons in userspace, which then direct their kernel counterparts. These callbacks allow the same coordination directly, and more simply. Signed-off-by: David Teigland <teigland@redhat.com>
Diffstat (limited to 'fs/dlm/member.c')
-rw-r--r--fs/dlm/member.c197
1 files changed, 125 insertions, 72 deletions
diff --git a/fs/dlm/member.c b/fs/dlm/member.c
index eebc52aae82e..862640a36d5c 100644
--- a/fs/dlm/member.c
+++ b/fs/dlm/member.c
@@ -1,7 +1,7 @@
1/****************************************************************************** 1/******************************************************************************
2******************************************************************************* 2*******************************************************************************
3** 3**
4** Copyright (C) 2005-2009 Red Hat, Inc. All rights reserved. 4** Copyright (C) 2005-2011 Red Hat, Inc. All rights reserved.
5** 5**
6** This copyrighted material is made available to anyone wishing to use, 6** This copyrighted material is made available to anyone wishing to use,
7** modify, copy, or redistribute it subject to the terms and conditions 7** modify, copy, or redistribute it subject to the terms and conditions
@@ -27,7 +27,7 @@ int dlm_slots_version(struct dlm_header *h)
27} 27}
28 28
29void dlm_slot_save(struct dlm_ls *ls, struct dlm_rcom *rc, 29void dlm_slot_save(struct dlm_ls *ls, struct dlm_rcom *rc,
30 struct dlm_member *memb) 30 struct dlm_member *memb)
31{ 31{
32 struct rcom_config *rf = (struct rcom_config *)rc->rc_buf; 32 struct rcom_config *rf = (struct rcom_config *)rc->rc_buf;
33 33
@@ -317,59 +317,51 @@ static void add_ordered_member(struct dlm_ls *ls, struct dlm_member *new)
317 } 317 }
318} 318}
319 319
320static int dlm_add_member(struct dlm_ls *ls, int nodeid) 320static int dlm_add_member(struct dlm_ls *ls, struct dlm_config_node *node)
321{ 321{
322 struct dlm_member *memb; 322 struct dlm_member *memb;
323 int w, error; 323 int error;
324 324
325 memb = kzalloc(sizeof(struct dlm_member), GFP_NOFS); 325 memb = kzalloc(sizeof(struct dlm_member), GFP_NOFS);
326 if (!memb) 326 if (!memb)
327 return -ENOMEM; 327 return -ENOMEM;
328 328
329 w = dlm_node_weight(ls->ls_name, nodeid); 329 error = dlm_lowcomms_connect_node(node->nodeid);
330 if (w < 0) {
331 kfree(memb);
332 return w;
333 }
334
335 error = dlm_lowcomms_connect_node(nodeid);
336 if (error < 0) { 330 if (error < 0) {
337 kfree(memb); 331 kfree(memb);
338 return error; 332 return error;
339 } 333 }
340 334
341 memb->nodeid = nodeid; 335 memb->nodeid = node->nodeid;
342 memb->weight = w; 336 memb->weight = node->weight;
337 memb->comm_seq = node->comm_seq;
343 add_ordered_member(ls, memb); 338 add_ordered_member(ls, memb);
344 ls->ls_num_nodes++; 339 ls->ls_num_nodes++;
345 return 0; 340 return 0;
346} 341}
347 342
348static void dlm_remove_member(struct dlm_ls *ls, struct dlm_member *memb) 343static struct dlm_member *find_memb(struct list_head *head, int nodeid)
349{
350 list_move(&memb->list, &ls->ls_nodes_gone);
351 ls->ls_num_nodes--;
352}
353
354int dlm_is_member(struct dlm_ls *ls, int nodeid)
355{ 344{
356 struct dlm_member *memb; 345 struct dlm_member *memb;
357 346
358 list_for_each_entry(memb, &ls->ls_nodes, list) { 347 list_for_each_entry(memb, head, list) {
359 if (memb->nodeid == nodeid) 348 if (memb->nodeid == nodeid)
360 return 1; 349 return memb;
361 } 350 }
351 return NULL;
352}
353
354int dlm_is_member(struct dlm_ls *ls, int nodeid)
355{
356 if (find_memb(&ls->ls_nodes, nodeid))
357 return 1;
362 return 0; 358 return 0;
363} 359}
364 360
365int dlm_is_removed(struct dlm_ls *ls, int nodeid) 361int dlm_is_removed(struct dlm_ls *ls, int nodeid)
366{ 362{
367 struct dlm_member *memb; 363 if (find_memb(&ls->ls_nodes_gone, nodeid))
368 364 return 1;
369 list_for_each_entry(memb, &ls->ls_nodes_gone, list) {
370 if (memb->nodeid == nodeid)
371 return 1;
372 }
373 return 0; 365 return 0;
374} 366}
375 367
@@ -460,10 +452,88 @@ static int ping_members(struct dlm_ls *ls)
460 return error; 452 return error;
461} 453}
462 454
455static void dlm_lsop_recover_prep(struct dlm_ls *ls)
456{
457 if (!ls->ls_ops || !ls->ls_ops->recover_prep)
458 return;
459 ls->ls_ops->recover_prep(ls->ls_ops_arg);
460}
461
462static void dlm_lsop_recover_slot(struct dlm_ls *ls, struct dlm_member *memb)
463{
464 struct dlm_slot slot;
465 uint32_t seq;
466 int error;
467
468 if (!ls->ls_ops || !ls->ls_ops->recover_slot)
469 return;
470
471 /* if there is no comms connection with this node
472 or the present comms connection is newer
473 than the one when this member was added, then
474 we consider the node to have failed (versus
475 being removed due to dlm_release_lockspace) */
476
477 error = dlm_comm_seq(memb->nodeid, &seq);
478
479 if (!error && seq == memb->comm_seq)
480 return;
481
482 slot.nodeid = memb->nodeid;
483 slot.slot = memb->slot;
484
485 ls->ls_ops->recover_slot(ls->ls_ops_arg, &slot);
486}
487
488void dlm_lsop_recover_done(struct dlm_ls *ls)
489{
490 struct dlm_member *memb;
491 struct dlm_slot *slots;
492 int i, num;
493
494 if (!ls->ls_ops || !ls->ls_ops->recover_done)
495 return;
496
497 num = ls->ls_num_nodes;
498
499 slots = kzalloc(num * sizeof(struct dlm_slot), GFP_KERNEL);
500 if (!slots)
501 return;
502
503 i = 0;
504 list_for_each_entry(memb, &ls->ls_nodes, list) {
505 if (i == num) {
506 log_error(ls, "dlm_lsop_recover_done bad num %d", num);
507 goto out;
508 }
509 slots[i].nodeid = memb->nodeid;
510 slots[i].slot = memb->slot;
511 i++;
512 }
513
514 ls->ls_ops->recover_done(ls->ls_ops_arg, slots, num,
515 ls->ls_slot, ls->ls_generation);
516 out:
517 kfree(slots);
518}
519
520static struct dlm_config_node *find_config_node(struct dlm_recover *rv,
521 int nodeid)
522{
523 int i;
524
525 for (i = 0; i < rv->nodes_count; i++) {
526 if (rv->nodes[i].nodeid == nodeid)
527 return &rv->nodes[i];
528 }
529 return NULL;
530}
531
463int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out) 532int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out)
464{ 533{
465 struct dlm_member *memb, *safe; 534 struct dlm_member *memb, *safe;
466 int i, error, found, pos = 0, neg = 0, low = -1; 535 struct dlm_config_node *node;
536 int i, error, neg = 0, low = -1;
467 537
468 /* previously removed members that we've not finished removing need to 538 /* previously removed members that we've not finished removing need to
469 count as a negative change so the "neg" recovery steps will happen */ 539 count as a negative change so the "neg" recovery steps will happen */
@@ -476,46 +546,32 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out)
476 /* move departed members from ls_nodes to ls_nodes_gone */ 546 /* move departed members from ls_nodes to ls_nodes_gone */
477 547
478 list_for_each_entry_safe(memb, safe, &ls->ls_nodes, list) { 548 list_for_each_entry_safe(memb, safe, &ls->ls_nodes, list) {
479 found = 0; 549 node = find_config_node(rv, memb->nodeid);
480 for (i = 0; i < rv->node_count; i++) { 550 if (node && !node->new)
481 if (memb->nodeid == rv->nodeids[i]) { 551 continue;
482 found = 1;
483 break;
484 }
485 }
486 552
487 if (!found) { 553 if (!node) {
488 neg++;
489 dlm_remove_member(ls, memb);
490 log_debug(ls, "remove member %d", memb->nodeid); 554 log_debug(ls, "remove member %d", memb->nodeid);
555 } else {
556 /* removed and re-added */
557 log_debug(ls, "remove member %d comm_seq %u %u",
558 memb->nodeid, memb->comm_seq, node->comm_seq);
491 } 559 }
492 }
493
494 /* Add an entry to ls_nodes_gone for members that were removed and
495 then added again, so that previous state for these nodes will be
496 cleared during recovery. */
497 560
498 for (i = 0; i < rv->new_count; i++) {
499 if (!dlm_is_member(ls, rv->new[i]))
500 continue;
501 log_debug(ls, "new nodeid %d is a re-added member", rv->new[i]);
502
503 memb = kzalloc(sizeof(struct dlm_member), GFP_NOFS);
504 if (!memb)
505 return -ENOMEM;
506 memb->nodeid = rv->new[i];
507 list_add_tail(&memb->list, &ls->ls_nodes_gone);
508 neg++; 561 neg++;
562 list_move(&memb->list, &ls->ls_nodes_gone);
563 ls->ls_num_nodes--;
564 dlm_lsop_recover_slot(ls, memb);
509 } 565 }
510 566
511 /* add new members to ls_nodes */ 567 /* add new members to ls_nodes */
512 568
513 for (i = 0; i < rv->node_count; i++) { 569 for (i = 0; i < rv->nodes_count; i++) {
514 if (dlm_is_member(ls, rv->nodeids[i])) 570 node = &rv->nodes[i];
571 if (dlm_is_member(ls, node->nodeid))
515 continue; 572 continue;
516 dlm_add_member(ls, rv->nodeids[i]); 573 dlm_add_member(ls, node);
517 pos++; 574 log_debug(ls, "add member %d", node->nodeid);
518 log_debug(ls, "add member %d", rv->nodeids[i]);
519 } 575 }
520 576
521 list_for_each_entry(memb, &ls->ls_nodes, list) { 577 list_for_each_entry(memb, &ls->ls_nodes, list) {
@@ -609,21 +665,22 @@ int dlm_ls_stop(struct dlm_ls *ls)
609 665
610 if (!ls->ls_recover_begin) 666 if (!ls->ls_recover_begin)
611 ls->ls_recover_begin = jiffies; 667 ls->ls_recover_begin = jiffies;
668
669 dlm_lsop_recover_prep(ls);
612 return 0; 670 return 0;
613} 671}
614 672
615int dlm_ls_start(struct dlm_ls *ls) 673int dlm_ls_start(struct dlm_ls *ls)
616{ 674{
617 struct dlm_recover *rv = NULL, *rv_old; 675 struct dlm_recover *rv = NULL, *rv_old;
618 int *ids = NULL, *new = NULL; 676 struct dlm_config_node *nodes;
619 int error, ids_count = 0, new_count = 0; 677 int error, count;
620 678
621 rv = kzalloc(sizeof(struct dlm_recover), GFP_NOFS); 679 rv = kzalloc(sizeof(struct dlm_recover), GFP_NOFS);
622 if (!rv) 680 if (!rv)
623 return -ENOMEM; 681 return -ENOMEM;
624 682
625 error = dlm_nodeid_list(ls->ls_name, &ids, &ids_count, 683 error = dlm_config_nodes(ls->ls_name, &nodes, &count);
626 &new, &new_count);
627 if (error < 0) 684 if (error < 0)
628 goto fail; 685 goto fail;
629 686
@@ -638,10 +695,8 @@ int dlm_ls_start(struct dlm_ls *ls)
638 goto fail; 695 goto fail;
639 } 696 }
640 697
641 rv->nodeids = ids; 698 rv->nodes = nodes;
642 rv->node_count = ids_count; 699 rv->nodes_count = count;
643 rv->new = new;
644 rv->new_count = new_count;
645 rv->seq = ++ls->ls_recover_seq; 700 rv->seq = ++ls->ls_recover_seq;
646 rv_old = ls->ls_recover_args; 701 rv_old = ls->ls_recover_args;
647 ls->ls_recover_args = rv; 702 ls->ls_recover_args = rv;
@@ -649,9 +704,8 @@ int dlm_ls_start(struct dlm_ls *ls)
649 704
650 if (rv_old) { 705 if (rv_old) {
651 log_error(ls, "unused recovery %llx %d", 706 log_error(ls, "unused recovery %llx %d",
652 (unsigned long long)rv_old->seq, rv_old->node_count); 707 (unsigned long long)rv_old->seq, rv_old->nodes_count);
653 kfree(rv_old->nodeids); 708 kfree(rv_old->nodes);
654 kfree(rv_old->new);
655 kfree(rv_old); 709 kfree(rv_old);
656 } 710 }
657 711
@@ -660,8 +714,7 @@ int dlm_ls_start(struct dlm_ls *ls)
660 714
661 fail: 715 fail:
662 kfree(rv); 716 kfree(rv);
663 kfree(ids); 717 kfree(nodes);
664 kfree(new);
665 return error; 718 return error;
666} 719}
667 720