aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Teigland <teigland@redhat.com>2008-03-18 15:22:11 -0400
committerDavid Teigland <teigland@redhat.com>2008-04-21 12:18:01 -0400
commitd44e0fc704143624b3e88fbf8fbcfda7a83fd299 (patch)
treeaf6c5a5c6ae179051caf725e46f11ff8e2f0f7c8
parent761b9d3ffc953c24ceb55d8e12ff7e02b17e0484 (diff)
dlm: recover nodes that are removed and re-added
If a node is removed from a lockspace, and then added back before the dlm is notified of the removal, the dlm will not detect the removal and won't clear the old state from the node. This is fixed by using a list of added nodes so the membership recovery can detect when a newly added node is already in the member list. Signed-off-by: David Teigland <teigland@redhat.com>
-rw-r--r--fs/dlm/config.c48
-rw-r--r--fs/dlm/config.h3
-rw-r--r--fs/dlm/dlm_internal.h4
-rw-r--r--fs/dlm/member.c34
-rw-r--r--fs/dlm/recoverd.c1
5 files changed, 74 insertions, 16 deletions
diff --git a/fs/dlm/config.c b/fs/dlm/config.c
index 7ceaea3d983b..eac23bd288b2 100644
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c
@@ -284,6 +284,7 @@ struct node {
284 struct list_head list; /* space->members */ 284 struct list_head list; /* space->members */
285 int nodeid; 285 int nodeid;
286 int weight; 286 int weight;
287 int new;
287}; 288};
288 289
289static struct configfs_group_operations clusters_ops = { 290static struct configfs_group_operations clusters_ops = {
@@ -565,6 +566,7 @@ static struct config_item *make_node(struct config_group *g, const char *name)
565 config_item_init_type_name(&nd->item, name, &node_type); 566 config_item_init_type_name(&nd->item, name, &node_type);
566 nd->nodeid = -1; 567 nd->nodeid = -1;
567 nd->weight = 1; /* default weight of 1 if none is set */ 568 nd->weight = 1; /* default weight of 1 if none is set */
569 nd->new = 1; /* set to 0 once it's been read by dlm_nodeid_list() */
568 570
569 mutex_lock(&sp->members_lock); 571 mutex_lock(&sp->members_lock);
570 list_add(&nd->list, &sp->members); 572 list_add(&nd->list, &sp->members);
@@ -805,12 +807,13 @@ static void put_comm(struct comm *cm)
805} 807}
806 808
807/* caller must free mem */ 809/* caller must free mem */
808int dlm_nodeid_list(char *lsname, int **ids_out) 810int dlm_nodeid_list(char *lsname, int **ids_out, int *ids_count_out,
811 int **new_out, int *new_count_out)
809{ 812{
810 struct space *sp; 813 struct space *sp;
811 struct node *nd; 814 struct node *nd;
812 int i = 0, rv = 0; 815 int i = 0, rv = 0, ids_count = 0, new_count = 0;
813 int *ids; 816 int *ids, *new;
814 817
815 sp = get_space(lsname); 818 sp = get_space(lsname);
816 if (!sp) 819 if (!sp)
@@ -818,23 +821,50 @@ int dlm_nodeid_list(char *lsname, int **ids_out)
818 821
819 mutex_lock(&sp->members_lock); 822 mutex_lock(&sp->members_lock);
820 if (!sp->members_count) { 823 if (!sp->members_count) {
821 rv = 0; 824 rv = -EINVAL;
825 printk(KERN_ERR "dlm: zero members_count\n");
822 goto out; 826 goto out;
823 } 827 }
824 828
825 ids = kcalloc(sp->members_count, sizeof(int), GFP_KERNEL); 829 ids_count = sp->members_count;
830
831 ids = kcalloc(ids_count, sizeof(int), GFP_KERNEL);
826 if (!ids) { 832 if (!ids) {
827 rv = -ENOMEM; 833 rv = -ENOMEM;
828 goto out; 834 goto out;
829 } 835 }
830 836
831 rv = sp->members_count; 837 list_for_each_entry(nd, &sp->members, list) {
832 list_for_each_entry(nd, &sp->members, list)
833 ids[i++] = nd->nodeid; 838 ids[i++] = nd->nodeid;
839 if (nd->new)
840 new_count++;
841 }
842
843 if (ids_count != i)
844 printk(KERN_ERR "dlm: bad nodeid count %d %d\n", ids_count, i);
845
846 if (!new_count)
847 goto out_ids;
848
849 new = kcalloc(new_count, sizeof(int), GFP_KERNEL);
850 if (!new) {
851 kfree(ids);
852 rv = -ENOMEM;
853 goto out;
854 }
834 855
835 if (rv != i) 856 i = 0;
836 printk("bad nodeid count %d %d\n", rv, i); 857 list_for_each_entry(nd, &sp->members, list) {
858 if (nd->new) {
859 new[i++] = nd->nodeid;
860 nd->new = 0;
861 }
862 }
863 *new_count_out = new_count;
864 *new_out = new;
837 865
866 out_ids:
867 *ids_count_out = ids_count;
838 *ids_out = ids; 868 *ids_out = ids;
839 out: 869 out:
840 mutex_unlock(&sp->members_lock); 870 mutex_unlock(&sp->members_lock);
diff --git a/fs/dlm/config.h b/fs/dlm/config.h
index a3170fe22090..4f1d6fce58c5 100644
--- a/fs/dlm/config.h
+++ b/fs/dlm/config.h
@@ -35,7 +35,8 @@ extern struct dlm_config_info dlm_config;
35int dlm_config_init(void); 35int dlm_config_init(void);
36void dlm_config_exit(void); 36void dlm_config_exit(void);
37int dlm_node_weight(char *lsname, int nodeid); 37int dlm_node_weight(char *lsname, int nodeid);
38int dlm_nodeid_list(char *lsname, int **ids_out); 38int dlm_nodeid_list(char *lsname, int **ids_out, int *ids_count_out,
39 int **new_out, int *new_count_out);
39int dlm_nodeid_to_addr(int nodeid, struct sockaddr_storage *addr); 40int dlm_nodeid_to_addr(int nodeid, struct sockaddr_storage *addr);
40int dlm_addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid); 41int dlm_addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid);
41int dlm_our_nodeid(void); 42int dlm_our_nodeid(void);
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index d30ea8b433a2..c70c8e58358f 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -133,8 +133,10 @@ struct dlm_member {
133 133
134struct dlm_recover { 134struct dlm_recover {
135 struct list_head list; 135 struct list_head list;
136 int *nodeids; 136 int *nodeids; /* nodeids of all members */
137 int node_count; 137 int node_count;
138 int *new; /* nodeids of new members */
139 int new_count;
138 uint64_t seq; 140 uint64_t seq;
139}; 141};
140 142
diff --git a/fs/dlm/member.c b/fs/dlm/member.c
index fa17f5a27883..26133f05ae3a 100644
--- a/fs/dlm/member.c
+++ b/fs/dlm/member.c
@@ -210,6 +210,23 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out)
210 } 210 }
211 } 211 }
212 212
213 /* Add an entry to ls_nodes_gone for members that were removed and
214 then added again, so that previous state for these nodes will be
215 cleared during recovery. */
216
217 for (i = 0; i < rv->new_count; i++) {
218 if (!dlm_is_member(ls, rv->new[i]))
219 continue;
220 log_debug(ls, "new nodeid %d is a re-added member", rv->new[i]);
221
222 memb = kzalloc(sizeof(struct dlm_member), GFP_KERNEL);
223 if (!memb)
224 return -ENOMEM;
225 memb->nodeid = rv->new[i];
226 list_add_tail(&memb->list, &ls->ls_nodes_gone);
227 neg++;
228 }
229
213 /* add new members to ls_nodes */ 230 /* add new members to ls_nodes */
214 231
215 for (i = 0; i < rv->node_count; i++) { 232 for (i = 0; i < rv->node_count; i++) {
@@ -314,15 +331,16 @@ int dlm_ls_stop(struct dlm_ls *ls)
314int dlm_ls_start(struct dlm_ls *ls) 331int dlm_ls_start(struct dlm_ls *ls)
315{ 332{
316 struct dlm_recover *rv = NULL, *rv_old; 333 struct dlm_recover *rv = NULL, *rv_old;
317 int *ids = NULL; 334 int *ids = NULL, *new = NULL;
318 int error, count; 335 int error, ids_count = 0, new_count = 0;
319 336
320 rv = kzalloc(sizeof(struct dlm_recover), GFP_KERNEL); 337 rv = kzalloc(sizeof(struct dlm_recover), GFP_KERNEL);
321 if (!rv) 338 if (!rv)
322 return -ENOMEM; 339 return -ENOMEM;
323 340
324 error = count = dlm_nodeid_list(ls->ls_name, &ids); 341 error = dlm_nodeid_list(ls->ls_name, &ids, &ids_count,
325 if (error <= 0) 342 &new, &new_count);
343 if (error < 0)
326 goto fail; 344 goto fail;
327 345
328 spin_lock(&ls->ls_recover_lock); 346 spin_lock(&ls->ls_recover_lock);
@@ -337,14 +355,19 @@ int dlm_ls_start(struct dlm_ls *ls)
337 } 355 }
338 356
339 rv->nodeids = ids; 357 rv->nodeids = ids;
340 rv->node_count = count; 358 rv->node_count = ids_count;
359 rv->new = new;
360 rv->new_count = new_count;
341 rv->seq = ++ls->ls_recover_seq; 361 rv->seq = ++ls->ls_recover_seq;
342 rv_old = ls->ls_recover_args; 362 rv_old = ls->ls_recover_args;
343 ls->ls_recover_args = rv; 363 ls->ls_recover_args = rv;
344 spin_unlock(&ls->ls_recover_lock); 364 spin_unlock(&ls->ls_recover_lock);
345 365
346 if (rv_old) { 366 if (rv_old) {
367 log_error(ls, "unused recovery %llx %d",
368 (unsigned long long)rv_old->seq, rv_old->node_count);
347 kfree(rv_old->nodeids); 369 kfree(rv_old->nodeids);
370 kfree(rv_old->new);
348 kfree(rv_old); 371 kfree(rv_old);
349 } 372 }
350 373
@@ -354,6 +377,7 @@ int dlm_ls_start(struct dlm_ls *ls)
354 fail: 377 fail:
355 kfree(rv); 378 kfree(rv);
356 kfree(ids); 379 kfree(ids);
380 kfree(new);
357 return error; 381 return error;
358} 382}
359 383
diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c
index 997f9531d594..fd677c8c3d3b 100644
--- a/fs/dlm/recoverd.c
+++ b/fs/dlm/recoverd.c
@@ -257,6 +257,7 @@ static void do_ls_recovery(struct dlm_ls *ls)
257 if (rv) { 257 if (rv) {
258 ls_recover(ls, rv); 258 ls_recover(ls, rv);
259 kfree(rv->nodeids); 259 kfree(rv->nodeids);
260 kfree(rv->new);
260 kfree(rv); 261 kfree(rv);
261 } 262 }
262} 263}