diff options
author | Jonathan Herman <hermanjl@cs.unc.edu> | 2013-01-17 16:15:55 -0500 |
---|---|---|
committer | Jonathan Herman <hermanjl@cs.unc.edu> | 2013-01-17 16:15:55 -0500 |
commit | 8dea78da5cee153b8af9c07a2745f6c55057fe12 (patch) | |
tree | a8f4d49d63b1ecc92f2fddceba0655b2472c5bd9 /fs/dlm | |
parent | 406089d01562f1e2bf9f089fd7637009ebaad589 (diff) |
Patched in Tegra support.
Diffstat (limited to 'fs/dlm')
-rw-r--r-- | fs/dlm/Kconfig | 2 | ||||
-rw-r--r-- | fs/dlm/ast.c | 7 | ||||
-rw-r--r-- | fs/dlm/config.c | 206 | ||||
-rw-r--r-- | fs/dlm/config.h | 20 | ||||
-rw-r--r-- | fs/dlm/debug_fs.c | 126 | ||||
-rw-r--r-- | fs/dlm/dir.c | 295 | ||||
-rw-r--r-- | fs/dlm/dir.h | 7 | ||||
-rw-r--r-- | fs/dlm/dlm_internal.h | 183 | ||||
-rw-r--r-- | fs/dlm/lock.c | 1874 | ||||
-rw-r--r-- | fs/dlm/lock.h | 13 | ||||
-rw-r--r-- | fs/dlm/lockspace.c | 151 | ||||
-rw-r--r-- | fs/dlm/lowcomms.c | 274 | ||||
-rw-r--r-- | fs/dlm/lowcomms.h | 2 | ||||
-rw-r--r-- | fs/dlm/main.c | 2 | ||||
-rw-r--r-- | fs/dlm/member.c | 503 | ||||
-rw-r--r-- | fs/dlm/member.h | 10 | ||||
-rw-r--r-- | fs/dlm/memory.c | 8 | ||||
-rw-r--r-- | fs/dlm/netlink.c | 8 | ||||
-rw-r--r-- | fs/dlm/rcom.c | 267 | ||||
-rw-r--r-- | fs/dlm/rcom.h | 3 | ||||
-rw-r--r-- | fs/dlm/recover.c | 452 | ||||
-rw-r--r-- | fs/dlm/recover.h | 2 | ||||
-rw-r--r-- | fs/dlm/recoverd.c | 103 | ||||
-rw-r--r-- | fs/dlm/recoverd.h | 1 | ||||
-rw-r--r-- | fs/dlm/requestqueue.c | 43 | ||||
-rw-r--r-- | fs/dlm/user.c | 12 |
26 files changed, 1287 insertions, 3287 deletions
diff --git a/fs/dlm/Kconfig b/fs/dlm/Kconfig index e4242c3f848..1897eb1b4b6 100644 --- a/fs/dlm/Kconfig +++ b/fs/dlm/Kconfig | |||
@@ -1,6 +1,6 @@ | |||
1 | menuconfig DLM | 1 | menuconfig DLM |
2 | tristate "Distributed Lock Manager (DLM)" | 2 | tristate "Distributed Lock Manager (DLM)" |
3 | depends on INET | 3 | depends on EXPERIMENTAL && INET |
4 | depends on SYSFS && CONFIGFS_FS && (IPV6 || IPV6=n) | 4 | depends on SYSFS && CONFIGFS_FS && (IPV6 || IPV6=n) |
5 | select IP_SCTP | 5 | select IP_SCTP |
6 | help | 6 | help |
diff --git a/fs/dlm/ast.c b/fs/dlm/ast.c index 27a6ba9aaee..90e5997262e 100644 --- a/fs/dlm/ast.c +++ b/fs/dlm/ast.c | |||
@@ -15,8 +15,8 @@ | |||
15 | #include "lock.h" | 15 | #include "lock.h" |
16 | #include "user.h" | 16 | #include "user.h" |
17 | 17 | ||
18 | static uint64_t dlm_cb_seq; | 18 | static uint64_t dlm_cb_seq; |
19 | static DEFINE_SPINLOCK(dlm_cb_seq_spin); | 19 | static spinlock_t dlm_cb_seq_spin; |
20 | 20 | ||
21 | static void dlm_dump_lkb_callbacks(struct dlm_lkb *lkb) | 21 | static void dlm_dump_lkb_callbacks(struct dlm_lkb *lkb) |
22 | { | 22 | { |
@@ -310,7 +310,6 @@ void dlm_callback_resume(struct dlm_ls *ls) | |||
310 | } | 310 | } |
311 | mutex_unlock(&ls->ls_cb_mutex); | 311 | mutex_unlock(&ls->ls_cb_mutex); |
312 | 312 | ||
313 | if (count) | 313 | log_debug(ls, "dlm_callback_resume %d", count); |
314 | log_debug(ls, "dlm_callback_resume %d", count); | ||
315 | } | 314 | } |
316 | 315 | ||
diff --git a/fs/dlm/config.c b/fs/dlm/config.c index a0387dd8b1f..6cf72fcc0d0 100644 --- a/fs/dlm/config.c +++ b/fs/dlm/config.c | |||
@@ -2,7 +2,7 @@ | |||
2 | ******************************************************************************* | 2 | ******************************************************************************* |
3 | ** | 3 | ** |
4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | 4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. |
5 | ** Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. | 5 | ** Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. |
6 | ** | 6 | ** |
7 | ** This copyrighted material is made available to anyone wishing to use, | 7 | ** This copyrighted material is made available to anyone wishing to use, |
8 | ** modify, copy, or redistribute it subject to the terms and conditions | 8 | ** modify, copy, or redistribute it subject to the terms and conditions |
@@ -17,7 +17,6 @@ | |||
17 | #include <linux/slab.h> | 17 | #include <linux/slab.h> |
18 | #include <linux/in.h> | 18 | #include <linux/in.h> |
19 | #include <linux/in6.h> | 19 | #include <linux/in6.h> |
20 | #include <linux/dlmconstants.h> | ||
21 | #include <net/ipv6.h> | 20 | #include <net/ipv6.h> |
22 | #include <net/sock.h> | 21 | #include <net/sock.h> |
23 | 22 | ||
@@ -37,7 +36,6 @@ | |||
37 | static struct config_group *space_list; | 36 | static struct config_group *space_list; |
38 | static struct config_group *comm_list; | 37 | static struct config_group *comm_list; |
39 | static struct dlm_comm *local_comm; | 38 | static struct dlm_comm *local_comm; |
40 | static uint32_t dlm_comm_count; | ||
41 | 39 | ||
42 | struct dlm_clusters; | 40 | struct dlm_clusters; |
43 | struct dlm_cluster; | 41 | struct dlm_cluster; |
@@ -96,6 +94,7 @@ struct dlm_cluster { | |||
96 | unsigned int cl_tcp_port; | 94 | unsigned int cl_tcp_port; |
97 | unsigned int cl_buffer_size; | 95 | unsigned int cl_buffer_size; |
98 | unsigned int cl_rsbtbl_size; | 96 | unsigned int cl_rsbtbl_size; |
97 | unsigned int cl_dirtbl_size; | ||
99 | unsigned int cl_recover_timer; | 98 | unsigned int cl_recover_timer; |
100 | unsigned int cl_toss_secs; | 99 | unsigned int cl_toss_secs; |
101 | unsigned int cl_scan_secs; | 100 | unsigned int cl_scan_secs; |
@@ -104,14 +103,13 @@ struct dlm_cluster { | |||
104 | unsigned int cl_timewarn_cs; | 103 | unsigned int cl_timewarn_cs; |
105 | unsigned int cl_waitwarn_us; | 104 | unsigned int cl_waitwarn_us; |
106 | unsigned int cl_new_rsb_count; | 105 | unsigned int cl_new_rsb_count; |
107 | unsigned int cl_recover_callbacks; | ||
108 | char cl_cluster_name[DLM_LOCKSPACE_LEN]; | ||
109 | }; | 106 | }; |
110 | 107 | ||
111 | enum { | 108 | enum { |
112 | CLUSTER_ATTR_TCP_PORT = 0, | 109 | CLUSTER_ATTR_TCP_PORT = 0, |
113 | CLUSTER_ATTR_BUFFER_SIZE, | 110 | CLUSTER_ATTR_BUFFER_SIZE, |
114 | CLUSTER_ATTR_RSBTBL_SIZE, | 111 | CLUSTER_ATTR_RSBTBL_SIZE, |
112 | CLUSTER_ATTR_DIRTBL_SIZE, | ||
115 | CLUSTER_ATTR_RECOVER_TIMER, | 113 | CLUSTER_ATTR_RECOVER_TIMER, |
116 | CLUSTER_ATTR_TOSS_SECS, | 114 | CLUSTER_ATTR_TOSS_SECS, |
117 | CLUSTER_ATTR_SCAN_SECS, | 115 | CLUSTER_ATTR_SCAN_SECS, |
@@ -120,8 +118,6 @@ enum { | |||
120 | CLUSTER_ATTR_TIMEWARN_CS, | 118 | CLUSTER_ATTR_TIMEWARN_CS, |
121 | CLUSTER_ATTR_WAITWARN_US, | 119 | CLUSTER_ATTR_WAITWARN_US, |
122 | CLUSTER_ATTR_NEW_RSB_COUNT, | 120 | CLUSTER_ATTR_NEW_RSB_COUNT, |
123 | CLUSTER_ATTR_RECOVER_CALLBACKS, | ||
124 | CLUSTER_ATTR_CLUSTER_NAME, | ||
125 | }; | 121 | }; |
126 | 122 | ||
127 | struct cluster_attribute { | 123 | struct cluster_attribute { |
@@ -130,27 +126,6 @@ struct cluster_attribute { | |||
130 | ssize_t (*store)(struct dlm_cluster *, const char *, size_t); | 126 | ssize_t (*store)(struct dlm_cluster *, const char *, size_t); |
131 | }; | 127 | }; |
132 | 128 | ||
133 | static ssize_t cluster_cluster_name_read(struct dlm_cluster *cl, char *buf) | ||
134 | { | ||
135 | return sprintf(buf, "%s\n", cl->cl_cluster_name); | ||
136 | } | ||
137 | |||
138 | static ssize_t cluster_cluster_name_write(struct dlm_cluster *cl, | ||
139 | const char *buf, size_t len) | ||
140 | { | ||
141 | strncpy(dlm_config.ci_cluster_name, buf, DLM_LOCKSPACE_LEN); | ||
142 | strncpy(cl->cl_cluster_name, buf, DLM_LOCKSPACE_LEN); | ||
143 | return len; | ||
144 | } | ||
145 | |||
146 | static struct cluster_attribute cluster_attr_cluster_name = { | ||
147 | .attr = { .ca_owner = THIS_MODULE, | ||
148 | .ca_name = "cluster_name", | ||
149 | .ca_mode = S_IRUGO | S_IWUSR }, | ||
150 | .show = cluster_cluster_name_read, | ||
151 | .store = cluster_cluster_name_write, | ||
152 | }; | ||
153 | |||
154 | static ssize_t cluster_set(struct dlm_cluster *cl, unsigned int *cl_field, | 129 | static ssize_t cluster_set(struct dlm_cluster *cl, unsigned int *cl_field, |
155 | int *info_field, int check_zero, | 130 | int *info_field, int check_zero, |
156 | const char *buf, size_t len) | 131 | const char *buf, size_t len) |
@@ -187,6 +162,7 @@ __CONFIGFS_ATTR(name, 0644, name##_read, name##_write) | |||
187 | CLUSTER_ATTR(tcp_port, 1); | 162 | CLUSTER_ATTR(tcp_port, 1); |
188 | CLUSTER_ATTR(buffer_size, 1); | 163 | CLUSTER_ATTR(buffer_size, 1); |
189 | CLUSTER_ATTR(rsbtbl_size, 1); | 164 | CLUSTER_ATTR(rsbtbl_size, 1); |
165 | CLUSTER_ATTR(dirtbl_size, 1); | ||
190 | CLUSTER_ATTR(recover_timer, 1); | 166 | CLUSTER_ATTR(recover_timer, 1); |
191 | CLUSTER_ATTR(toss_secs, 1); | 167 | CLUSTER_ATTR(toss_secs, 1); |
192 | CLUSTER_ATTR(scan_secs, 1); | 168 | CLUSTER_ATTR(scan_secs, 1); |
@@ -195,12 +171,12 @@ CLUSTER_ATTR(protocol, 0); | |||
195 | CLUSTER_ATTR(timewarn_cs, 1); | 171 | CLUSTER_ATTR(timewarn_cs, 1); |
196 | CLUSTER_ATTR(waitwarn_us, 0); | 172 | CLUSTER_ATTR(waitwarn_us, 0); |
197 | CLUSTER_ATTR(new_rsb_count, 0); | 173 | CLUSTER_ATTR(new_rsb_count, 0); |
198 | CLUSTER_ATTR(recover_callbacks, 0); | ||
199 | 174 | ||
200 | static struct configfs_attribute *cluster_attrs[] = { | 175 | static struct configfs_attribute *cluster_attrs[] = { |
201 | [CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port.attr, | 176 | [CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port.attr, |
202 | [CLUSTER_ATTR_BUFFER_SIZE] = &cluster_attr_buffer_size.attr, | 177 | [CLUSTER_ATTR_BUFFER_SIZE] = &cluster_attr_buffer_size.attr, |
203 | [CLUSTER_ATTR_RSBTBL_SIZE] = &cluster_attr_rsbtbl_size.attr, | 178 | [CLUSTER_ATTR_RSBTBL_SIZE] = &cluster_attr_rsbtbl_size.attr, |
179 | [CLUSTER_ATTR_DIRTBL_SIZE] = &cluster_attr_dirtbl_size.attr, | ||
204 | [CLUSTER_ATTR_RECOVER_TIMER] = &cluster_attr_recover_timer.attr, | 180 | [CLUSTER_ATTR_RECOVER_TIMER] = &cluster_attr_recover_timer.attr, |
205 | [CLUSTER_ATTR_TOSS_SECS] = &cluster_attr_toss_secs.attr, | 181 | [CLUSTER_ATTR_TOSS_SECS] = &cluster_attr_toss_secs.attr, |
206 | [CLUSTER_ATTR_SCAN_SECS] = &cluster_attr_scan_secs.attr, | 182 | [CLUSTER_ATTR_SCAN_SECS] = &cluster_attr_scan_secs.attr, |
@@ -209,8 +185,6 @@ static struct configfs_attribute *cluster_attrs[] = { | |||
209 | [CLUSTER_ATTR_TIMEWARN_CS] = &cluster_attr_timewarn_cs.attr, | 185 | [CLUSTER_ATTR_TIMEWARN_CS] = &cluster_attr_timewarn_cs.attr, |
210 | [CLUSTER_ATTR_WAITWARN_US] = &cluster_attr_waitwarn_us.attr, | 186 | [CLUSTER_ATTR_WAITWARN_US] = &cluster_attr_waitwarn_us.attr, |
211 | [CLUSTER_ATTR_NEW_RSB_COUNT] = &cluster_attr_new_rsb_count.attr, | 187 | [CLUSTER_ATTR_NEW_RSB_COUNT] = &cluster_attr_new_rsb_count.attr, |
212 | [CLUSTER_ATTR_RECOVER_CALLBACKS] = &cluster_attr_recover_callbacks.attr, | ||
213 | [CLUSTER_ATTR_CLUSTER_NAME] = &cluster_attr_cluster_name.attr, | ||
214 | NULL, | 188 | NULL, |
215 | }; | 189 | }; |
216 | 190 | ||
@@ -319,7 +293,6 @@ struct dlm_comms { | |||
319 | 293 | ||
320 | struct dlm_comm { | 294 | struct dlm_comm { |
321 | struct config_item item; | 295 | struct config_item item; |
322 | int seq; | ||
323 | int nodeid; | 296 | int nodeid; |
324 | int local; | 297 | int local; |
325 | int addr_count; | 298 | int addr_count; |
@@ -336,7 +309,6 @@ struct dlm_node { | |||
336 | int nodeid; | 309 | int nodeid; |
337 | int weight; | 310 | int weight; |
338 | int new; | 311 | int new; |
339 | int comm_seq; /* copy of cm->seq when nd->nodeid is set */ | ||
340 | }; | 312 | }; |
341 | 313 | ||
342 | static struct configfs_group_operations clusters_ops = { | 314 | static struct configfs_group_operations clusters_ops = { |
@@ -474,6 +446,7 @@ static struct config_group *make_cluster(struct config_group *g, | |||
474 | cl->cl_tcp_port = dlm_config.ci_tcp_port; | 446 | cl->cl_tcp_port = dlm_config.ci_tcp_port; |
475 | cl->cl_buffer_size = dlm_config.ci_buffer_size; | 447 | cl->cl_buffer_size = dlm_config.ci_buffer_size; |
476 | cl->cl_rsbtbl_size = dlm_config.ci_rsbtbl_size; | 448 | cl->cl_rsbtbl_size = dlm_config.ci_rsbtbl_size; |
449 | cl->cl_dirtbl_size = dlm_config.ci_dirtbl_size; | ||
477 | cl->cl_recover_timer = dlm_config.ci_recover_timer; | 450 | cl->cl_recover_timer = dlm_config.ci_recover_timer; |
478 | cl->cl_toss_secs = dlm_config.ci_toss_secs; | 451 | cl->cl_toss_secs = dlm_config.ci_toss_secs; |
479 | cl->cl_scan_secs = dlm_config.ci_scan_secs; | 452 | cl->cl_scan_secs = dlm_config.ci_scan_secs; |
@@ -482,9 +455,6 @@ static struct config_group *make_cluster(struct config_group *g, | |||
482 | cl->cl_timewarn_cs = dlm_config.ci_timewarn_cs; | 455 | cl->cl_timewarn_cs = dlm_config.ci_timewarn_cs; |
483 | cl->cl_waitwarn_us = dlm_config.ci_waitwarn_us; | 456 | cl->cl_waitwarn_us = dlm_config.ci_waitwarn_us; |
484 | cl->cl_new_rsb_count = dlm_config.ci_new_rsb_count; | 457 | cl->cl_new_rsb_count = dlm_config.ci_new_rsb_count; |
485 | cl->cl_recover_callbacks = dlm_config.ci_recover_callbacks; | ||
486 | memcpy(cl->cl_cluster_name, dlm_config.ci_cluster_name, | ||
487 | DLM_LOCKSPACE_LEN); | ||
488 | 458 | ||
489 | space_list = &sps->ss_group; | 459 | space_list = &sps->ss_group; |
490 | comm_list = &cms->cs_group; | 460 | comm_list = &cms->cs_group; |
@@ -588,11 +558,6 @@ static struct config_item *make_comm(struct config_group *g, const char *name) | |||
588 | return ERR_PTR(-ENOMEM); | 558 | return ERR_PTR(-ENOMEM); |
589 | 559 | ||
590 | config_item_init_type_name(&cm->item, name, &comm_type); | 560 | config_item_init_type_name(&cm->item, name, &comm_type); |
591 | |||
592 | cm->seq = dlm_comm_count++; | ||
593 | if (!cm->seq) | ||
594 | cm->seq = dlm_comm_count++; | ||
595 | |||
596 | cm->nodeid = -1; | 561 | cm->nodeid = -1; |
597 | cm->local = 0; | 562 | cm->local = 0; |
598 | cm->addr_count = 0; | 563 | cm->addr_count = 0; |
@@ -750,7 +715,6 @@ static ssize_t comm_local_write(struct dlm_comm *cm, const char *buf, | |||
750 | static ssize_t comm_addr_write(struct dlm_comm *cm, const char *buf, size_t len) | 715 | static ssize_t comm_addr_write(struct dlm_comm *cm, const char *buf, size_t len) |
751 | { | 716 | { |
752 | struct sockaddr_storage *addr; | 717 | struct sockaddr_storage *addr; |
753 | int rv; | ||
754 | 718 | ||
755 | if (len != sizeof(struct sockaddr_storage)) | 719 | if (len != sizeof(struct sockaddr_storage)) |
756 | return -EINVAL; | 720 | return -EINVAL; |
@@ -763,13 +727,6 @@ static ssize_t comm_addr_write(struct dlm_comm *cm, const char *buf, size_t len) | |||
763 | return -ENOMEM; | 727 | return -ENOMEM; |
764 | 728 | ||
765 | memcpy(addr, buf, len); | 729 | memcpy(addr, buf, len); |
766 | |||
767 | rv = dlm_lowcomms_addr(cm->nodeid, addr, len); | ||
768 | if (rv) { | ||
769 | kfree(addr); | ||
770 | return rv; | ||
771 | } | ||
772 | |||
773 | cm->addr[cm->addr_count++] = addr; | 730 | cm->addr[cm->addr_count++] = addr; |
774 | return len; | 731 | return len; |
775 | } | 732 | } |
@@ -844,10 +801,7 @@ static ssize_t node_nodeid_read(struct dlm_node *nd, char *buf) | |||
844 | static ssize_t node_nodeid_write(struct dlm_node *nd, const char *buf, | 801 | static ssize_t node_nodeid_write(struct dlm_node *nd, const char *buf, |
845 | size_t len) | 802 | size_t len) |
846 | { | 803 | { |
847 | uint32_t seq = 0; | ||
848 | nd->nodeid = simple_strtol(buf, NULL, 0); | 804 | nd->nodeid = simple_strtol(buf, NULL, 0); |
849 | dlm_comm_seq(nd->nodeid, &seq); | ||
850 | nd->comm_seq = seq; | ||
851 | return len; | 805 | return len; |
852 | } | 806 | } |
853 | 807 | ||
@@ -886,7 +840,34 @@ static void put_space(struct dlm_space *sp) | |||
886 | config_item_put(&sp->group.cg_item); | 840 | config_item_put(&sp->group.cg_item); |
887 | } | 841 | } |
888 | 842 | ||
889 | static struct dlm_comm *get_comm(int nodeid) | 843 | static int addr_compare(struct sockaddr_storage *x, struct sockaddr_storage *y) |
844 | { | ||
845 | switch (x->ss_family) { | ||
846 | case AF_INET: { | ||
847 | struct sockaddr_in *sinx = (struct sockaddr_in *)x; | ||
848 | struct sockaddr_in *siny = (struct sockaddr_in *)y; | ||
849 | if (sinx->sin_addr.s_addr != siny->sin_addr.s_addr) | ||
850 | return 0; | ||
851 | if (sinx->sin_port != siny->sin_port) | ||
852 | return 0; | ||
853 | break; | ||
854 | } | ||
855 | case AF_INET6: { | ||
856 | struct sockaddr_in6 *sinx = (struct sockaddr_in6 *)x; | ||
857 | struct sockaddr_in6 *siny = (struct sockaddr_in6 *)y; | ||
858 | if (!ipv6_addr_equal(&sinx->sin6_addr, &siny->sin6_addr)) | ||
859 | return 0; | ||
860 | if (sinx->sin6_port != siny->sin6_port) | ||
861 | return 0; | ||
862 | break; | ||
863 | } | ||
864 | default: | ||
865 | return 0; | ||
866 | } | ||
867 | return 1; | ||
868 | } | ||
869 | |||
870 | static struct dlm_comm *get_comm(int nodeid, struct sockaddr_storage *addr) | ||
890 | { | 871 | { |
891 | struct config_item *i; | 872 | struct config_item *i; |
892 | struct dlm_comm *cm = NULL; | 873 | struct dlm_comm *cm = NULL; |
@@ -900,11 +881,19 @@ static struct dlm_comm *get_comm(int nodeid) | |||
900 | list_for_each_entry(i, &comm_list->cg_children, ci_entry) { | 881 | list_for_each_entry(i, &comm_list->cg_children, ci_entry) { |
901 | cm = config_item_to_comm(i); | 882 | cm = config_item_to_comm(i); |
902 | 883 | ||
903 | if (cm->nodeid != nodeid) | 884 | if (nodeid) { |
904 | continue; | 885 | if (cm->nodeid != nodeid) |
905 | found = 1; | 886 | continue; |
906 | config_item_get(i); | 887 | found = 1; |
907 | break; | 888 | config_item_get(i); |
889 | break; | ||
890 | } else { | ||
891 | if (!cm->addr_count || !addr_compare(cm->addr[0], addr)) | ||
892 | continue; | ||
893 | found = 1; | ||
894 | config_item_get(i); | ||
895 | break; | ||
896 | } | ||
908 | } | 897 | } |
909 | mutex_unlock(&clusters_root.subsys.su_mutex); | 898 | mutex_unlock(&clusters_root.subsys.su_mutex); |
910 | 899 | ||
@@ -919,13 +908,13 @@ static void put_comm(struct dlm_comm *cm) | |||
919 | } | 908 | } |
920 | 909 | ||
921 | /* caller must free mem */ | 910 | /* caller must free mem */ |
922 | int dlm_config_nodes(char *lsname, struct dlm_config_node **nodes_out, | 911 | int dlm_nodeid_list(char *lsname, int **ids_out, int *ids_count_out, |
923 | int *count_out) | 912 | int **new_out, int *new_count_out) |
924 | { | 913 | { |
925 | struct dlm_space *sp; | 914 | struct dlm_space *sp; |
926 | struct dlm_node *nd; | 915 | struct dlm_node *nd; |
927 | struct dlm_config_node *nodes, *node; | 916 | int i = 0, rv = 0, ids_count = 0, new_count = 0; |
928 | int rv, count; | 917 | int *ids, *new; |
929 | 918 | ||
930 | sp = get_space(lsname); | 919 | sp = get_space(lsname); |
931 | if (!sp) | 920 | if (!sp) |
@@ -938,40 +927,93 @@ int dlm_config_nodes(char *lsname, struct dlm_config_node **nodes_out, | |||
938 | goto out; | 927 | goto out; |
939 | } | 928 | } |
940 | 929 | ||
941 | count = sp->members_count; | 930 | ids_count = sp->members_count; |
942 | 931 | ||
943 | nodes = kcalloc(count, sizeof(struct dlm_config_node), GFP_NOFS); | 932 | ids = kcalloc(ids_count, sizeof(int), GFP_NOFS); |
944 | if (!nodes) { | 933 | if (!ids) { |
945 | rv = -ENOMEM; | 934 | rv = -ENOMEM; |
946 | goto out; | 935 | goto out; |
947 | } | 936 | } |
948 | 937 | ||
949 | node = nodes; | ||
950 | list_for_each_entry(nd, &sp->members, list) { | 938 | list_for_each_entry(nd, &sp->members, list) { |
951 | node->nodeid = nd->nodeid; | 939 | ids[i++] = nd->nodeid; |
952 | node->weight = nd->weight; | 940 | if (nd->new) |
953 | node->new = nd->new; | 941 | new_count++; |
954 | node->comm_seq = nd->comm_seq; | 942 | } |
955 | node++; | 943 | |
944 | if (ids_count != i) | ||
945 | printk(KERN_ERR "dlm: bad nodeid count %d %d\n", ids_count, i); | ||
946 | |||
947 | if (!new_count) | ||
948 | goto out_ids; | ||
956 | 949 | ||
957 | nd->new = 0; | 950 | new = kcalloc(new_count, sizeof(int), GFP_NOFS); |
951 | if (!new) { | ||
952 | kfree(ids); | ||
953 | rv = -ENOMEM; | ||
954 | goto out; | ||
955 | } | ||
956 | |||
957 | i = 0; | ||
958 | list_for_each_entry(nd, &sp->members, list) { | ||
959 | if (nd->new) { | ||
960 | new[i++] = nd->nodeid; | ||
961 | nd->new = 0; | ||
962 | } | ||
958 | } | 963 | } |
964 | *new_count_out = new_count; | ||
965 | *new_out = new; | ||
959 | 966 | ||
960 | *count_out = count; | 967 | out_ids: |
961 | *nodes_out = nodes; | 968 | *ids_count_out = ids_count; |
962 | rv = 0; | 969 | *ids_out = ids; |
963 | out: | 970 | out: |
964 | mutex_unlock(&sp->members_lock); | 971 | mutex_unlock(&sp->members_lock); |
965 | put_space(sp); | 972 | put_space(sp); |
966 | return rv; | 973 | return rv; |
967 | } | 974 | } |
968 | 975 | ||
969 | int dlm_comm_seq(int nodeid, uint32_t *seq) | 976 | int dlm_node_weight(char *lsname, int nodeid) |
977 | { | ||
978 | struct dlm_space *sp; | ||
979 | struct dlm_node *nd; | ||
980 | int w = -EEXIST; | ||
981 | |||
982 | sp = get_space(lsname); | ||
983 | if (!sp) | ||
984 | goto out; | ||
985 | |||
986 | mutex_lock(&sp->members_lock); | ||
987 | list_for_each_entry(nd, &sp->members, list) { | ||
988 | if (nd->nodeid != nodeid) | ||
989 | continue; | ||
990 | w = nd->weight; | ||
991 | break; | ||
992 | } | ||
993 | mutex_unlock(&sp->members_lock); | ||
994 | put_space(sp); | ||
995 | out: | ||
996 | return w; | ||
997 | } | ||
998 | |||
999 | int dlm_nodeid_to_addr(int nodeid, struct sockaddr_storage *addr) | ||
1000 | { | ||
1001 | struct dlm_comm *cm = get_comm(nodeid, NULL); | ||
1002 | if (!cm) | ||
1003 | return -EEXIST; | ||
1004 | if (!cm->addr_count) | ||
1005 | return -ENOENT; | ||
1006 | memcpy(addr, cm->addr[0], sizeof(*addr)); | ||
1007 | put_comm(cm); | ||
1008 | return 0; | ||
1009 | } | ||
1010 | |||
1011 | int dlm_addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid) | ||
970 | { | 1012 | { |
971 | struct dlm_comm *cm = get_comm(nodeid); | 1013 | struct dlm_comm *cm = get_comm(0, addr); |
972 | if (!cm) | 1014 | if (!cm) |
973 | return -EEXIST; | 1015 | return -EEXIST; |
974 | *seq = cm->seq; | 1016 | *nodeid = cm->nodeid; |
975 | put_comm(cm); | 1017 | put_comm(cm); |
976 | return 0; | 1018 | return 0; |
977 | } | 1019 | } |
@@ -996,6 +1038,7 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num) | |||
996 | #define DEFAULT_TCP_PORT 21064 | 1038 | #define DEFAULT_TCP_PORT 21064 |
997 | #define DEFAULT_BUFFER_SIZE 4096 | 1039 | #define DEFAULT_BUFFER_SIZE 4096 |
998 | #define DEFAULT_RSBTBL_SIZE 1024 | 1040 | #define DEFAULT_RSBTBL_SIZE 1024 |
1041 | #define DEFAULT_DIRTBL_SIZE 1024 | ||
999 | #define DEFAULT_RECOVER_TIMER 5 | 1042 | #define DEFAULT_RECOVER_TIMER 5 |
1000 | #define DEFAULT_TOSS_SECS 10 | 1043 | #define DEFAULT_TOSS_SECS 10 |
1001 | #define DEFAULT_SCAN_SECS 5 | 1044 | #define DEFAULT_SCAN_SECS 5 |
@@ -1004,13 +1047,12 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num) | |||
1004 | #define DEFAULT_TIMEWARN_CS 500 /* 5 sec = 500 centiseconds */ | 1047 | #define DEFAULT_TIMEWARN_CS 500 /* 5 sec = 500 centiseconds */ |
1005 | #define DEFAULT_WAITWARN_US 0 | 1048 | #define DEFAULT_WAITWARN_US 0 |
1006 | #define DEFAULT_NEW_RSB_COUNT 128 | 1049 | #define DEFAULT_NEW_RSB_COUNT 128 |
1007 | #define DEFAULT_RECOVER_CALLBACKS 0 | ||
1008 | #define DEFAULT_CLUSTER_NAME "" | ||
1009 | 1050 | ||
1010 | struct dlm_config_info dlm_config = { | 1051 | struct dlm_config_info dlm_config = { |
1011 | .ci_tcp_port = DEFAULT_TCP_PORT, | 1052 | .ci_tcp_port = DEFAULT_TCP_PORT, |
1012 | .ci_buffer_size = DEFAULT_BUFFER_SIZE, | 1053 | .ci_buffer_size = DEFAULT_BUFFER_SIZE, |
1013 | .ci_rsbtbl_size = DEFAULT_RSBTBL_SIZE, | 1054 | .ci_rsbtbl_size = DEFAULT_RSBTBL_SIZE, |
1055 | .ci_dirtbl_size = DEFAULT_DIRTBL_SIZE, | ||
1014 | .ci_recover_timer = DEFAULT_RECOVER_TIMER, | 1056 | .ci_recover_timer = DEFAULT_RECOVER_TIMER, |
1015 | .ci_toss_secs = DEFAULT_TOSS_SECS, | 1057 | .ci_toss_secs = DEFAULT_TOSS_SECS, |
1016 | .ci_scan_secs = DEFAULT_SCAN_SECS, | 1058 | .ci_scan_secs = DEFAULT_SCAN_SECS, |
@@ -1018,8 +1060,6 @@ struct dlm_config_info dlm_config = { | |||
1018 | .ci_protocol = DEFAULT_PROTOCOL, | 1060 | .ci_protocol = DEFAULT_PROTOCOL, |
1019 | .ci_timewarn_cs = DEFAULT_TIMEWARN_CS, | 1061 | .ci_timewarn_cs = DEFAULT_TIMEWARN_CS, |
1020 | .ci_waitwarn_us = DEFAULT_WAITWARN_US, | 1062 | .ci_waitwarn_us = DEFAULT_WAITWARN_US, |
1021 | .ci_new_rsb_count = DEFAULT_NEW_RSB_COUNT, | 1063 | .ci_new_rsb_count = DEFAULT_NEW_RSB_COUNT |
1022 | .ci_recover_callbacks = DEFAULT_RECOVER_CALLBACKS, | ||
1023 | .ci_cluster_name = DEFAULT_CLUSTER_NAME | ||
1024 | }; | 1064 | }; |
1025 | 1065 | ||
diff --git a/fs/dlm/config.h b/fs/dlm/config.h index f30697bc278..3099d0dd26c 100644 --- a/fs/dlm/config.h +++ b/fs/dlm/config.h | |||
@@ -2,7 +2,7 @@ | |||
2 | ******************************************************************************* | 2 | ******************************************************************************* |
3 | ** | 3 | ** |
4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | 4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. |
5 | ** Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. | 5 | ** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. |
6 | ** | 6 | ** |
7 | ** This copyrighted material is made available to anyone wishing to use, | 7 | ** This copyrighted material is made available to anyone wishing to use, |
8 | ** modify, copy, or redistribute it subject to the terms and conditions | 8 | ** modify, copy, or redistribute it subject to the terms and conditions |
@@ -14,19 +14,13 @@ | |||
14 | #ifndef __CONFIG_DOT_H__ | 14 | #ifndef __CONFIG_DOT_H__ |
15 | #define __CONFIG_DOT_H__ | 15 | #define __CONFIG_DOT_H__ |
16 | 16 | ||
17 | struct dlm_config_node { | ||
18 | int nodeid; | ||
19 | int weight; | ||
20 | int new; | ||
21 | uint32_t comm_seq; | ||
22 | }; | ||
23 | |||
24 | #define DLM_MAX_ADDR_COUNT 3 | 17 | #define DLM_MAX_ADDR_COUNT 3 |
25 | 18 | ||
26 | struct dlm_config_info { | 19 | struct dlm_config_info { |
27 | int ci_tcp_port; | 20 | int ci_tcp_port; |
28 | int ci_buffer_size; | 21 | int ci_buffer_size; |
29 | int ci_rsbtbl_size; | 22 | int ci_rsbtbl_size; |
23 | int ci_dirtbl_size; | ||
30 | int ci_recover_timer; | 24 | int ci_recover_timer; |
31 | int ci_toss_secs; | 25 | int ci_toss_secs; |
32 | int ci_scan_secs; | 26 | int ci_scan_secs; |
@@ -35,17 +29,17 @@ struct dlm_config_info { | |||
35 | int ci_timewarn_cs; | 29 | int ci_timewarn_cs; |
36 | int ci_waitwarn_us; | 30 | int ci_waitwarn_us; |
37 | int ci_new_rsb_count; | 31 | int ci_new_rsb_count; |
38 | int ci_recover_callbacks; | ||
39 | char ci_cluster_name[DLM_LOCKSPACE_LEN]; | ||
40 | }; | 32 | }; |
41 | 33 | ||
42 | extern struct dlm_config_info dlm_config; | 34 | extern struct dlm_config_info dlm_config; |
43 | 35 | ||
44 | int dlm_config_init(void); | 36 | int dlm_config_init(void); |
45 | void dlm_config_exit(void); | 37 | void dlm_config_exit(void); |
46 | int dlm_config_nodes(char *lsname, struct dlm_config_node **nodes_out, | 38 | int dlm_node_weight(char *lsname, int nodeid); |
47 | int *count_out); | 39 | int dlm_nodeid_list(char *lsname, int **ids_out, int *ids_count_out, |
48 | int dlm_comm_seq(int nodeid, uint32_t *seq); | 40 | int **new_out, int *new_count_out); |
41 | int dlm_nodeid_to_addr(int nodeid, struct sockaddr_storage *addr); | ||
42 | int dlm_addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid); | ||
49 | int dlm_our_nodeid(void); | 43 | int dlm_our_nodeid(void); |
50 | int dlm_our_addr(struct sockaddr_storage *addr, int num); | 44 | int dlm_our_addr(struct sockaddr_storage *addr, int num); |
51 | 45 | ||
diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c index b969deef9eb..59779237e2b 100644 --- a/fs/dlm/debug_fs.c +++ b/fs/dlm/debug_fs.c | |||
@@ -344,45 +344,6 @@ static int print_format3(struct dlm_rsb *r, struct seq_file *s) | |||
344 | return rv; | 344 | return rv; |
345 | } | 345 | } |
346 | 346 | ||
347 | static int print_format4(struct dlm_rsb *r, struct seq_file *s) | ||
348 | { | ||
349 | int our_nodeid = dlm_our_nodeid(); | ||
350 | int print_name = 1; | ||
351 | int i, rv; | ||
352 | |||
353 | lock_rsb(r); | ||
354 | |||
355 | rv = seq_printf(s, "rsb %p %d %d %d %d %lu %lx %d ", | ||
356 | r, | ||
357 | r->res_nodeid, | ||
358 | r->res_master_nodeid, | ||
359 | r->res_dir_nodeid, | ||
360 | our_nodeid, | ||
361 | r->res_toss_time, | ||
362 | r->res_flags, | ||
363 | r->res_length); | ||
364 | if (rv) | ||
365 | goto out; | ||
366 | |||
367 | for (i = 0; i < r->res_length; i++) { | ||
368 | if (!isascii(r->res_name[i]) || !isprint(r->res_name[i])) | ||
369 | print_name = 0; | ||
370 | } | ||
371 | |||
372 | seq_printf(s, "%s", print_name ? "str " : "hex"); | ||
373 | |||
374 | for (i = 0; i < r->res_length; i++) { | ||
375 | if (print_name) | ||
376 | seq_printf(s, "%c", r->res_name[i]); | ||
377 | else | ||
378 | seq_printf(s, " %02x", (unsigned char)r->res_name[i]); | ||
379 | } | ||
380 | rv = seq_printf(s, "\n"); | ||
381 | out: | ||
382 | unlock_rsb(r); | ||
383 | return rv; | ||
384 | } | ||
385 | |||
386 | struct rsbtbl_iter { | 347 | struct rsbtbl_iter { |
387 | struct dlm_rsb *rsb; | 348 | struct dlm_rsb *rsb; |
388 | unsigned bucket; | 349 | unsigned bucket; |
@@ -421,13 +382,6 @@ static int table_seq_show(struct seq_file *seq, void *iter_ptr) | |||
421 | } | 382 | } |
422 | rv = print_format3(ri->rsb, seq); | 383 | rv = print_format3(ri->rsb, seq); |
423 | break; | 384 | break; |
424 | case 4: | ||
425 | if (ri->header) { | ||
426 | seq_printf(seq, "version 4 rsb 2\n"); | ||
427 | ri->header = 0; | ||
428 | } | ||
429 | rv = print_format4(ri->rsb, seq); | ||
430 | break; | ||
431 | } | 385 | } |
432 | 386 | ||
433 | return rv; | 387 | return rv; |
@@ -436,18 +390,14 @@ static int table_seq_show(struct seq_file *seq, void *iter_ptr) | |||
436 | static const struct seq_operations format1_seq_ops; | 390 | static const struct seq_operations format1_seq_ops; |
437 | static const struct seq_operations format2_seq_ops; | 391 | static const struct seq_operations format2_seq_ops; |
438 | static const struct seq_operations format3_seq_ops; | 392 | static const struct seq_operations format3_seq_ops; |
439 | static const struct seq_operations format4_seq_ops; | ||
440 | 393 | ||
441 | static void *table_seq_start(struct seq_file *seq, loff_t *pos) | 394 | static void *table_seq_start(struct seq_file *seq, loff_t *pos) |
442 | { | 395 | { |
443 | struct rb_root *tree; | ||
444 | struct rb_node *node; | ||
445 | struct dlm_ls *ls = seq->private; | 396 | struct dlm_ls *ls = seq->private; |
446 | struct rsbtbl_iter *ri; | 397 | struct rsbtbl_iter *ri; |
447 | struct dlm_rsb *r; | 398 | struct dlm_rsb *r; |
448 | loff_t n = *pos; | 399 | loff_t n = *pos; |
449 | unsigned bucket, entry; | 400 | unsigned bucket, entry; |
450 | int toss = (seq->op == &format4_seq_ops); | ||
451 | 401 | ||
452 | bucket = n >> 32; | 402 | bucket = n >> 32; |
453 | entry = n & ((1LL << 32) - 1); | 403 | entry = n & ((1LL << 32) - 1); |
@@ -466,15 +416,11 @@ static void *table_seq_start(struct seq_file *seq, loff_t *pos) | |||
466 | ri->format = 2; | 416 | ri->format = 2; |
467 | if (seq->op == &format3_seq_ops) | 417 | if (seq->op == &format3_seq_ops) |
468 | ri->format = 3; | 418 | ri->format = 3; |
469 | if (seq->op == &format4_seq_ops) | ||
470 | ri->format = 4; | ||
471 | |||
472 | tree = toss ? &ls->ls_rsbtbl[bucket].toss : &ls->ls_rsbtbl[bucket].keep; | ||
473 | 419 | ||
474 | spin_lock(&ls->ls_rsbtbl[bucket].lock); | 420 | spin_lock(&ls->ls_rsbtbl[bucket].lock); |
475 | if (!RB_EMPTY_ROOT(tree)) { | 421 | if (!list_empty(&ls->ls_rsbtbl[bucket].list)) { |
476 | for (node = rb_first(tree); node; node = rb_next(node)) { | 422 | list_for_each_entry(r, &ls->ls_rsbtbl[bucket].list, |
477 | r = rb_entry(node, struct dlm_rsb, res_hashnode); | 423 | res_hashchain) { |
478 | if (!entry--) { | 424 | if (!entry--) { |
479 | dlm_hold_rsb(r); | 425 | dlm_hold_rsb(r); |
480 | ri->rsb = r; | 426 | ri->rsb = r; |
@@ -501,12 +447,11 @@ static void *table_seq_start(struct seq_file *seq, loff_t *pos) | |||
501 | kfree(ri); | 447 | kfree(ri); |
502 | return NULL; | 448 | return NULL; |
503 | } | 449 | } |
504 | tree = toss ? &ls->ls_rsbtbl[bucket].toss : &ls->ls_rsbtbl[bucket].keep; | ||
505 | 450 | ||
506 | spin_lock(&ls->ls_rsbtbl[bucket].lock); | 451 | spin_lock(&ls->ls_rsbtbl[bucket].lock); |
507 | if (!RB_EMPTY_ROOT(tree)) { | 452 | if (!list_empty(&ls->ls_rsbtbl[bucket].list)) { |
508 | node = rb_first(tree); | 453 | r = list_first_entry(&ls->ls_rsbtbl[bucket].list, |
509 | r = rb_entry(node, struct dlm_rsb, res_hashnode); | 454 | struct dlm_rsb, res_hashchain); |
510 | dlm_hold_rsb(r); | 455 | dlm_hold_rsb(r); |
511 | ri->rsb = r; | 456 | ri->rsb = r; |
512 | ri->bucket = bucket; | 457 | ri->bucket = bucket; |
@@ -522,12 +467,10 @@ static void *table_seq_next(struct seq_file *seq, void *iter_ptr, loff_t *pos) | |||
522 | { | 467 | { |
523 | struct dlm_ls *ls = seq->private; | 468 | struct dlm_ls *ls = seq->private; |
524 | struct rsbtbl_iter *ri = iter_ptr; | 469 | struct rsbtbl_iter *ri = iter_ptr; |
525 | struct rb_root *tree; | 470 | struct list_head *next; |
526 | struct rb_node *next; | ||
527 | struct dlm_rsb *r, *rp; | 471 | struct dlm_rsb *r, *rp; |
528 | loff_t n = *pos; | 472 | loff_t n = *pos; |
529 | unsigned bucket; | 473 | unsigned bucket; |
530 | int toss = (seq->op == &format4_seq_ops); | ||
531 | 474 | ||
532 | bucket = n >> 32; | 475 | bucket = n >> 32; |
533 | 476 | ||
@@ -537,10 +480,10 @@ static void *table_seq_next(struct seq_file *seq, void *iter_ptr, loff_t *pos) | |||
537 | 480 | ||
538 | spin_lock(&ls->ls_rsbtbl[bucket].lock); | 481 | spin_lock(&ls->ls_rsbtbl[bucket].lock); |
539 | rp = ri->rsb; | 482 | rp = ri->rsb; |
540 | next = rb_next(&rp->res_hashnode); | 483 | next = rp->res_hashchain.next; |
541 | 484 | ||
542 | if (next) { | 485 | if (next != &ls->ls_rsbtbl[bucket].list) { |
543 | r = rb_entry(next, struct dlm_rsb, res_hashnode); | 486 | r = list_entry(next, struct dlm_rsb, res_hashchain); |
544 | dlm_hold_rsb(r); | 487 | dlm_hold_rsb(r); |
545 | ri->rsb = r; | 488 | ri->rsb = r; |
546 | spin_unlock(&ls->ls_rsbtbl[bucket].lock); | 489 | spin_unlock(&ls->ls_rsbtbl[bucket].lock); |
@@ -566,12 +509,11 @@ static void *table_seq_next(struct seq_file *seq, void *iter_ptr, loff_t *pos) | |||
566 | kfree(ri); | 509 | kfree(ri); |
567 | return NULL; | 510 | return NULL; |
568 | } | 511 | } |
569 | tree = toss ? &ls->ls_rsbtbl[bucket].toss : &ls->ls_rsbtbl[bucket].keep; | ||
570 | 512 | ||
571 | spin_lock(&ls->ls_rsbtbl[bucket].lock); | 513 | spin_lock(&ls->ls_rsbtbl[bucket].lock); |
572 | if (!RB_EMPTY_ROOT(tree)) { | 514 | if (!list_empty(&ls->ls_rsbtbl[bucket].list)) { |
573 | next = rb_first(tree); | 515 | r = list_first_entry(&ls->ls_rsbtbl[bucket].list, |
574 | r = rb_entry(next, struct dlm_rsb, res_hashnode); | 516 | struct dlm_rsb, res_hashchain); |
575 | dlm_hold_rsb(r); | 517 | dlm_hold_rsb(r); |
576 | ri->rsb = r; | 518 | ri->rsb = r; |
577 | ri->bucket = bucket; | 519 | ri->bucket = bucket; |
@@ -614,17 +556,9 @@ static const struct seq_operations format3_seq_ops = { | |||
614 | .show = table_seq_show, | 556 | .show = table_seq_show, |
615 | }; | 557 | }; |
616 | 558 | ||
617 | static const struct seq_operations format4_seq_ops = { | ||
618 | .start = table_seq_start, | ||
619 | .next = table_seq_next, | ||
620 | .stop = table_seq_stop, | ||
621 | .show = table_seq_show, | ||
622 | }; | ||
623 | |||
624 | static const struct file_operations format1_fops; | 559 | static const struct file_operations format1_fops; |
625 | static const struct file_operations format2_fops; | 560 | static const struct file_operations format2_fops; |
626 | static const struct file_operations format3_fops; | 561 | static const struct file_operations format3_fops; |
627 | static const struct file_operations format4_fops; | ||
628 | 562 | ||
629 | static int table_open(struct inode *inode, struct file *file) | 563 | static int table_open(struct inode *inode, struct file *file) |
630 | { | 564 | { |
@@ -637,8 +571,6 @@ static int table_open(struct inode *inode, struct file *file) | |||
637 | ret = seq_open(file, &format2_seq_ops); | 571 | ret = seq_open(file, &format2_seq_ops); |
638 | else if (file->f_op == &format3_fops) | 572 | else if (file->f_op == &format3_fops) |
639 | ret = seq_open(file, &format3_seq_ops); | 573 | ret = seq_open(file, &format3_seq_ops); |
640 | else if (file->f_op == &format4_fops) | ||
641 | ret = seq_open(file, &format4_seq_ops); | ||
642 | 574 | ||
643 | if (ret) | 575 | if (ret) |
644 | return ret; | 576 | return ret; |
@@ -672,17 +604,16 @@ static const struct file_operations format3_fops = { | |||
672 | .release = seq_release | 604 | .release = seq_release |
673 | }; | 605 | }; |
674 | 606 | ||
675 | static const struct file_operations format4_fops = { | ||
676 | .owner = THIS_MODULE, | ||
677 | .open = table_open, | ||
678 | .read = seq_read, | ||
679 | .llseek = seq_lseek, | ||
680 | .release = seq_release | ||
681 | }; | ||
682 | |||
683 | /* | 607 | /* |
684 | * dump lkb's on the ls_waiters list | 608 | * dump lkb's on the ls_waiters list |
685 | */ | 609 | */ |
610 | |||
611 | static int waiters_open(struct inode *inode, struct file *file) | ||
612 | { | ||
613 | file->private_data = inode->i_private; | ||
614 | return 0; | ||
615 | } | ||
616 | |||
686 | static ssize_t waiters_read(struct file *file, char __user *userbuf, | 617 | static ssize_t waiters_read(struct file *file, char __user *userbuf, |
687 | size_t count, loff_t *ppos) | 618 | size_t count, loff_t *ppos) |
688 | { | 619 | { |
@@ -711,7 +642,7 @@ static ssize_t waiters_read(struct file *file, char __user *userbuf, | |||
711 | 642 | ||
712 | static const struct file_operations waiters_fops = { | 643 | static const struct file_operations waiters_fops = { |
713 | .owner = THIS_MODULE, | 644 | .owner = THIS_MODULE, |
714 | .open = simple_open, | 645 | .open = waiters_open, |
715 | .read = waiters_read, | 646 | .read = waiters_read, |
716 | .llseek = default_llseek, | 647 | .llseek = default_llseek, |
717 | }; | 648 | }; |
@@ -726,8 +657,6 @@ void dlm_delete_debug_file(struct dlm_ls *ls) | |||
726 | debugfs_remove(ls->ls_debug_locks_dentry); | 657 | debugfs_remove(ls->ls_debug_locks_dentry); |
727 | if (ls->ls_debug_all_dentry) | 658 | if (ls->ls_debug_all_dentry) |
728 | debugfs_remove(ls->ls_debug_all_dentry); | 659 | debugfs_remove(ls->ls_debug_all_dentry); |
729 | if (ls->ls_debug_toss_dentry) | ||
730 | debugfs_remove(ls->ls_debug_toss_dentry); | ||
731 | } | 660 | } |
732 | 661 | ||
733 | int dlm_create_debug_file(struct dlm_ls *ls) | 662 | int dlm_create_debug_file(struct dlm_ls *ls) |
@@ -770,19 +699,6 @@ int dlm_create_debug_file(struct dlm_ls *ls) | |||
770 | if (!ls->ls_debug_all_dentry) | 699 | if (!ls->ls_debug_all_dentry) |
771 | goto fail; | 700 | goto fail; |
772 | 701 | ||
773 | /* format 4 */ | ||
774 | |||
775 | memset(name, 0, sizeof(name)); | ||
776 | snprintf(name, DLM_LOCKSPACE_LEN+8, "%s_toss", ls->ls_name); | ||
777 | |||
778 | ls->ls_debug_toss_dentry = debugfs_create_file(name, | ||
779 | S_IFREG | S_IRUGO, | ||
780 | dlm_root, | ||
781 | ls, | ||
782 | &format4_fops); | ||
783 | if (!ls->ls_debug_toss_dentry) | ||
784 | goto fail; | ||
785 | |||
786 | memset(name, 0, sizeof(name)); | 702 | memset(name, 0, sizeof(name)); |
787 | snprintf(name, DLM_LOCKSPACE_LEN+8, "%s_waiters", ls->ls_name); | 703 | snprintf(name, DLM_LOCKSPACE_LEN+8, "%s_waiters", ls->ls_name); |
788 | 704 | ||
diff --git a/fs/dlm/dir.c b/fs/dlm/dir.c index 278a75cda44..7b84c1dbc82 100644 --- a/fs/dlm/dir.c +++ b/fs/dlm/dir.c | |||
@@ -23,6 +23,50 @@ | |||
23 | #include "lock.h" | 23 | #include "lock.h" |
24 | #include "dir.h" | 24 | #include "dir.h" |
25 | 25 | ||
26 | |||
27 | static void put_free_de(struct dlm_ls *ls, struct dlm_direntry *de) | ||
28 | { | ||
29 | spin_lock(&ls->ls_recover_list_lock); | ||
30 | list_add(&de->list, &ls->ls_recover_list); | ||
31 | spin_unlock(&ls->ls_recover_list_lock); | ||
32 | } | ||
33 | |||
34 | static struct dlm_direntry *get_free_de(struct dlm_ls *ls, int len) | ||
35 | { | ||
36 | int found = 0; | ||
37 | struct dlm_direntry *de; | ||
38 | |||
39 | spin_lock(&ls->ls_recover_list_lock); | ||
40 | list_for_each_entry(de, &ls->ls_recover_list, list) { | ||
41 | if (de->length == len) { | ||
42 | list_del(&de->list); | ||
43 | de->master_nodeid = 0; | ||
44 | memset(de->name, 0, len); | ||
45 | found = 1; | ||
46 | break; | ||
47 | } | ||
48 | } | ||
49 | spin_unlock(&ls->ls_recover_list_lock); | ||
50 | |||
51 | if (!found) | ||
52 | de = kzalloc(sizeof(struct dlm_direntry) + len, GFP_NOFS); | ||
53 | return de; | ||
54 | } | ||
55 | |||
56 | void dlm_clear_free_entries(struct dlm_ls *ls) | ||
57 | { | ||
58 | struct dlm_direntry *de; | ||
59 | |||
60 | spin_lock(&ls->ls_recover_list_lock); | ||
61 | while (!list_empty(&ls->ls_recover_list)) { | ||
62 | de = list_entry(ls->ls_recover_list.next, struct dlm_direntry, | ||
63 | list); | ||
64 | list_del(&de->list); | ||
65 | kfree(de); | ||
66 | } | ||
67 | spin_unlock(&ls->ls_recover_list_lock); | ||
68 | } | ||
69 | |||
26 | /* | 70 | /* |
27 | * We use the upper 16 bits of the hash value to select the directory node. | 71 | * We use the upper 16 bits of the hash value to select the directory node. |
28 | * Low bits are used for distribution of rsb's among hash buckets on each node. | 72 | * Low bits are used for distribution of rsb's among hash buckets on each node. |
@@ -34,53 +78,144 @@ | |||
34 | 78 | ||
35 | int dlm_hash2nodeid(struct dlm_ls *ls, uint32_t hash) | 79 | int dlm_hash2nodeid(struct dlm_ls *ls, uint32_t hash) |
36 | { | 80 | { |
37 | uint32_t node; | 81 | struct list_head *tmp; |
82 | struct dlm_member *memb = NULL; | ||
83 | uint32_t node, n = 0; | ||
84 | int nodeid; | ||
85 | |||
86 | if (ls->ls_num_nodes == 1) { | ||
87 | nodeid = dlm_our_nodeid(); | ||
88 | goto out; | ||
89 | } | ||
38 | 90 | ||
39 | if (ls->ls_num_nodes == 1) | 91 | if (ls->ls_node_array) { |
40 | return dlm_our_nodeid(); | ||
41 | else { | ||
42 | node = (hash >> 16) % ls->ls_total_weight; | 92 | node = (hash >> 16) % ls->ls_total_weight; |
43 | return ls->ls_node_array[node]; | 93 | nodeid = ls->ls_node_array[node]; |
94 | goto out; | ||
95 | } | ||
96 | |||
97 | /* make_member_array() failed to kmalloc ls_node_array... */ | ||
98 | |||
99 | node = (hash >> 16) % ls->ls_num_nodes; | ||
100 | |||
101 | list_for_each(tmp, &ls->ls_nodes) { | ||
102 | if (n++ != node) | ||
103 | continue; | ||
104 | memb = list_entry(tmp, struct dlm_member, list); | ||
105 | break; | ||
44 | } | 106 | } |
107 | |||
108 | DLM_ASSERT(memb , printk("num_nodes=%u n=%u node=%u\n", | ||
109 | ls->ls_num_nodes, n, node);); | ||
110 | nodeid = memb->nodeid; | ||
111 | out: | ||
112 | return nodeid; | ||
45 | } | 113 | } |
46 | 114 | ||
47 | int dlm_dir_nodeid(struct dlm_rsb *r) | 115 | int dlm_dir_nodeid(struct dlm_rsb *r) |
48 | { | 116 | { |
49 | return r->res_dir_nodeid; | 117 | return dlm_hash2nodeid(r->res_ls, r->res_hash); |
50 | } | 118 | } |
51 | 119 | ||
52 | void dlm_recover_dir_nodeid(struct dlm_ls *ls) | 120 | static inline uint32_t dir_hash(struct dlm_ls *ls, char *name, int len) |
53 | { | 121 | { |
54 | struct dlm_rsb *r; | 122 | uint32_t val; |
55 | 123 | ||
56 | down_read(&ls->ls_root_sem); | 124 | val = jhash(name, len, 0); |
57 | list_for_each_entry(r, &ls->ls_root_list, res_root_list) { | 125 | val &= (ls->ls_dirtbl_size - 1); |
58 | r->res_dir_nodeid = dlm_hash2nodeid(ls, r->res_hash); | 126 | |
127 | return val; | ||
128 | } | ||
129 | |||
130 | static void add_entry_to_hash(struct dlm_ls *ls, struct dlm_direntry *de) | ||
131 | { | ||
132 | uint32_t bucket; | ||
133 | |||
134 | bucket = dir_hash(ls, de->name, de->length); | ||
135 | list_add_tail(&de->list, &ls->ls_dirtbl[bucket].list); | ||
136 | } | ||
137 | |||
138 | static struct dlm_direntry *search_bucket(struct dlm_ls *ls, char *name, | ||
139 | int namelen, uint32_t bucket) | ||
140 | { | ||
141 | struct dlm_direntry *de; | ||
142 | |||
143 | list_for_each_entry(de, &ls->ls_dirtbl[bucket].list, list) { | ||
144 | if (de->length == namelen && !memcmp(name, de->name, namelen)) | ||
145 | goto out; | ||
146 | } | ||
147 | de = NULL; | ||
148 | out: | ||
149 | return de; | ||
150 | } | ||
151 | |||
152 | void dlm_dir_remove_entry(struct dlm_ls *ls, int nodeid, char *name, int namelen) | ||
153 | { | ||
154 | struct dlm_direntry *de; | ||
155 | uint32_t bucket; | ||
156 | |||
157 | bucket = dir_hash(ls, name, namelen); | ||
158 | |||
159 | spin_lock(&ls->ls_dirtbl[bucket].lock); | ||
160 | |||
161 | de = search_bucket(ls, name, namelen, bucket); | ||
162 | |||
163 | if (!de) { | ||
164 | log_error(ls, "remove fr %u none", nodeid); | ||
165 | goto out; | ||
166 | } | ||
167 | |||
168 | if (de->master_nodeid != nodeid) { | ||
169 | log_error(ls, "remove fr %u ID %u", nodeid, de->master_nodeid); | ||
170 | goto out; | ||
171 | } | ||
172 | |||
173 | list_del(&de->list); | ||
174 | kfree(de); | ||
175 | out: | ||
176 | spin_unlock(&ls->ls_dirtbl[bucket].lock); | ||
177 | } | ||
178 | |||
179 | void dlm_dir_clear(struct dlm_ls *ls) | ||
180 | { | ||
181 | struct list_head *head; | ||
182 | struct dlm_direntry *de; | ||
183 | int i; | ||
184 | |||
185 | DLM_ASSERT(list_empty(&ls->ls_recover_list), ); | ||
186 | |||
187 | for (i = 0; i < ls->ls_dirtbl_size; i++) { | ||
188 | spin_lock(&ls->ls_dirtbl[i].lock); | ||
189 | head = &ls->ls_dirtbl[i].list; | ||
190 | while (!list_empty(head)) { | ||
191 | de = list_entry(head->next, struct dlm_direntry, list); | ||
192 | list_del(&de->list); | ||
193 | put_free_de(ls, de); | ||
194 | } | ||
195 | spin_unlock(&ls->ls_dirtbl[i].lock); | ||
59 | } | 196 | } |
60 | up_read(&ls->ls_root_sem); | ||
61 | } | 197 | } |
62 | 198 | ||
63 | int dlm_recover_directory(struct dlm_ls *ls) | 199 | int dlm_recover_directory(struct dlm_ls *ls) |
64 | { | 200 | { |
65 | struct dlm_member *memb; | 201 | struct dlm_member *memb; |
202 | struct dlm_direntry *de; | ||
66 | char *b, *last_name = NULL; | 203 | char *b, *last_name = NULL; |
67 | int error = -ENOMEM, last_len, nodeid, result; | 204 | int error = -ENOMEM, last_len, count = 0; |
68 | uint16_t namelen; | 205 | uint16_t namelen; |
69 | unsigned int count = 0, count_match = 0, count_bad = 0, count_add = 0; | ||
70 | 206 | ||
71 | log_debug(ls, "dlm_recover_directory"); | 207 | log_debug(ls, "dlm_recover_directory"); |
72 | 208 | ||
73 | if (dlm_no_directory(ls)) | 209 | if (dlm_no_directory(ls)) |
74 | goto out_status; | 210 | goto out_status; |
75 | 211 | ||
212 | dlm_dir_clear(ls); | ||
213 | |||
76 | last_name = kmalloc(DLM_RESNAME_MAXLEN, GFP_NOFS); | 214 | last_name = kmalloc(DLM_RESNAME_MAXLEN, GFP_NOFS); |
77 | if (!last_name) | 215 | if (!last_name) |
78 | goto out; | 216 | goto out; |
79 | 217 | ||
80 | list_for_each_entry(memb, &ls->ls_nodes, list) { | 218 | list_for_each_entry(memb, &ls->ls_nodes, list) { |
81 | if (memb->nodeid == dlm_our_nodeid()) | ||
82 | continue; | ||
83 | |||
84 | memset(last_name, 0, DLM_RESNAME_MAXLEN); | 219 | memset(last_name, 0, DLM_RESNAME_MAXLEN); |
85 | last_len = 0; | 220 | last_len = 0; |
86 | 221 | ||
@@ -95,7 +230,7 @@ int dlm_recover_directory(struct dlm_ls *ls) | |||
95 | if (error) | 230 | if (error) |
96 | goto out_free; | 231 | goto out_free; |
97 | 232 | ||
98 | cond_resched(); | 233 | schedule(); |
99 | 234 | ||
100 | /* | 235 | /* |
101 | * pick namelen/name pairs out of received buffer | 236 | * pick namelen/name pairs out of received buffer |
@@ -132,96 +267,96 @@ int dlm_recover_directory(struct dlm_ls *ls) | |||
132 | if (namelen > DLM_RESNAME_MAXLEN) | 267 | if (namelen > DLM_RESNAME_MAXLEN) |
133 | goto out_free; | 268 | goto out_free; |
134 | 269 | ||
135 | error = dlm_master_lookup(ls, memb->nodeid, | 270 | error = -ENOMEM; |
136 | b, namelen, | 271 | de = get_free_de(ls, namelen); |
137 | DLM_LU_RECOVER_DIR, | 272 | if (!de) |
138 | &nodeid, &result); | ||
139 | if (error) { | ||
140 | log_error(ls, "recover_dir lookup %d", | ||
141 | error); | ||
142 | goto out_free; | 273 | goto out_free; |
143 | } | ||
144 | |||
145 | /* The name was found in rsbtbl, but the | ||
146 | * master nodeid is different from | ||
147 | * memb->nodeid which says it is the master. | ||
148 | * This should not happen. */ | ||
149 | |||
150 | if (result == DLM_LU_MATCH && | ||
151 | nodeid != memb->nodeid) { | ||
152 | count_bad++; | ||
153 | log_error(ls, "recover_dir lookup %d " | ||
154 | "nodeid %d memb %d bad %u", | ||
155 | result, nodeid, memb->nodeid, | ||
156 | count_bad); | ||
157 | print_hex_dump_bytes("dlm_recover_dir ", | ||
158 | DUMP_PREFIX_NONE, | ||
159 | b, namelen); | ||
160 | } | ||
161 | |||
162 | /* The name was found in rsbtbl, and the | ||
163 | * master nodeid matches memb->nodeid. */ | ||
164 | |||
165 | if (result == DLM_LU_MATCH && | ||
166 | nodeid == memb->nodeid) { | ||
167 | count_match++; | ||
168 | } | ||
169 | |||
170 | /* The name was not found in rsbtbl and was | ||
171 | * added with memb->nodeid as the master. */ | ||
172 | |||
173 | if (result == DLM_LU_ADD) { | ||
174 | count_add++; | ||
175 | } | ||
176 | 274 | ||
275 | de->master_nodeid = memb->nodeid; | ||
276 | de->length = namelen; | ||
177 | last_len = namelen; | 277 | last_len = namelen; |
278 | memcpy(de->name, b, namelen); | ||
178 | memcpy(last_name, b, namelen); | 279 | memcpy(last_name, b, namelen); |
179 | b += namelen; | 280 | b += namelen; |
180 | left -= namelen; | 281 | left -= namelen; |
282 | |||
283 | add_entry_to_hash(ls, de); | ||
181 | count++; | 284 | count++; |
182 | } | 285 | } |
183 | } | 286 | } |
184 | done: | 287 | done: |
185 | ; | 288 | ; |
186 | } | 289 | } |
187 | 290 | ||
188 | out_status: | 291 | out_status: |
189 | error = 0; | 292 | error = 0; |
190 | dlm_set_recover_status(ls, DLM_RS_DIR); | 293 | dlm_set_recover_status(ls, DLM_RS_DIR); |
191 | 294 | log_debug(ls, "dlm_recover_directory %d entries", count); | |
192 | log_debug(ls, "dlm_recover_directory %u in %u new", | ||
193 | count, count_add); | ||
194 | out_free: | 295 | out_free: |
195 | kfree(last_name); | 296 | kfree(last_name); |
196 | out: | 297 | out: |
298 | dlm_clear_free_entries(ls); | ||
197 | return error; | 299 | return error; |
198 | } | 300 | } |
199 | 301 | ||
200 | static struct dlm_rsb *find_rsb_root(struct dlm_ls *ls, char *name, int len) | 302 | static int get_entry(struct dlm_ls *ls, int nodeid, char *name, |
303 | int namelen, int *r_nodeid) | ||
201 | { | 304 | { |
202 | struct dlm_rsb *r; | 305 | struct dlm_direntry *de, *tmp; |
203 | uint32_t hash, bucket; | 306 | uint32_t bucket; |
204 | int rv; | 307 | |
308 | bucket = dir_hash(ls, name, namelen); | ||
309 | |||
310 | spin_lock(&ls->ls_dirtbl[bucket].lock); | ||
311 | de = search_bucket(ls, name, namelen, bucket); | ||
312 | if (de) { | ||
313 | *r_nodeid = de->master_nodeid; | ||
314 | spin_unlock(&ls->ls_dirtbl[bucket].lock); | ||
315 | if (*r_nodeid == nodeid) | ||
316 | return -EEXIST; | ||
317 | return 0; | ||
318 | } | ||
319 | |||
320 | spin_unlock(&ls->ls_dirtbl[bucket].lock); | ||
321 | |||
322 | if (namelen > DLM_RESNAME_MAXLEN) | ||
323 | return -EINVAL; | ||
324 | |||
325 | de = kzalloc(sizeof(struct dlm_direntry) + namelen, GFP_NOFS); | ||
326 | if (!de) | ||
327 | return -ENOMEM; | ||
205 | 328 | ||
206 | hash = jhash(name, len, 0); | 329 | de->master_nodeid = nodeid; |
207 | bucket = hash & (ls->ls_rsbtbl_size - 1); | 330 | de->length = namelen; |
331 | memcpy(de->name, name, namelen); | ||
208 | 332 | ||
209 | spin_lock(&ls->ls_rsbtbl[bucket].lock); | 333 | spin_lock(&ls->ls_dirtbl[bucket].lock); |
210 | rv = dlm_search_rsb_tree(&ls->ls_rsbtbl[bucket].keep, name, len, &r); | 334 | tmp = search_bucket(ls, name, namelen, bucket); |
211 | if (rv) | 335 | if (tmp) { |
212 | rv = dlm_search_rsb_tree(&ls->ls_rsbtbl[bucket].toss, | 336 | kfree(de); |
213 | name, len, &r); | 337 | de = tmp; |
214 | spin_unlock(&ls->ls_rsbtbl[bucket].lock); | 338 | } else { |
339 | list_add_tail(&de->list, &ls->ls_dirtbl[bucket].list); | ||
340 | } | ||
341 | *r_nodeid = de->master_nodeid; | ||
342 | spin_unlock(&ls->ls_dirtbl[bucket].lock); | ||
343 | return 0; | ||
344 | } | ||
345 | |||
346 | int dlm_dir_lookup(struct dlm_ls *ls, int nodeid, char *name, int namelen, | ||
347 | int *r_nodeid) | ||
348 | { | ||
349 | return get_entry(ls, nodeid, name, namelen, r_nodeid); | ||
350 | } | ||
215 | 351 | ||
216 | if (!rv) | 352 | static struct dlm_rsb *find_rsb_root(struct dlm_ls *ls, char *name, int len) |
217 | return r; | 353 | { |
354 | struct dlm_rsb *r; | ||
218 | 355 | ||
219 | down_read(&ls->ls_root_sem); | 356 | down_read(&ls->ls_root_sem); |
220 | list_for_each_entry(r, &ls->ls_root_list, res_root_list) { | 357 | list_for_each_entry(r, &ls->ls_root_list, res_root_list) { |
221 | if (len == r->res_length && !memcmp(name, r->res_name, len)) { | 358 | if (len == r->res_length && !memcmp(name, r->res_name, len)) { |
222 | up_read(&ls->ls_root_sem); | 359 | up_read(&ls->ls_root_sem); |
223 | log_debug(ls, "find_rsb_root revert to root_list %s", | ||
224 | r->res_name); | ||
225 | return r; | 360 | return r; |
226 | } | 361 | } |
227 | } | 362 | } |
@@ -278,7 +413,6 @@ void dlm_copy_master_names(struct dlm_ls *ls, char *inbuf, int inlen, | |||
278 | be_namelen = cpu_to_be16(0); | 413 | be_namelen = cpu_to_be16(0); |
279 | memcpy(outbuf + offset, &be_namelen, sizeof(__be16)); | 414 | memcpy(outbuf + offset, &be_namelen, sizeof(__be16)); |
280 | offset += sizeof(__be16); | 415 | offset += sizeof(__be16); |
281 | ls->ls_recover_dir_sent_msg++; | ||
282 | goto out; | 416 | goto out; |
283 | } | 417 | } |
284 | 418 | ||
@@ -287,7 +421,6 @@ void dlm_copy_master_names(struct dlm_ls *ls, char *inbuf, int inlen, | |||
287 | offset += sizeof(__be16); | 421 | offset += sizeof(__be16); |
288 | memcpy(outbuf + offset, r->res_name, r->res_length); | 422 | memcpy(outbuf + offset, r->res_name, r->res_length); |
289 | offset += r->res_length; | 423 | offset += r->res_length; |
290 | ls->ls_recover_dir_sent_res++; | ||
291 | } | 424 | } |
292 | 425 | ||
293 | /* | 426 | /* |
@@ -300,8 +433,8 @@ void dlm_copy_master_names(struct dlm_ls *ls, char *inbuf, int inlen, | |||
300 | be_namelen = cpu_to_be16(0xFFFF); | 433 | be_namelen = cpu_to_be16(0xFFFF); |
301 | memcpy(outbuf + offset, &be_namelen, sizeof(__be16)); | 434 | memcpy(outbuf + offset, &be_namelen, sizeof(__be16)); |
302 | offset += sizeof(__be16); | 435 | offset += sizeof(__be16); |
303 | ls->ls_recover_dir_sent_msg++; | ||
304 | } | 436 | } |
437 | |||
305 | out: | 438 | out: |
306 | up_read(&ls->ls_root_sem); | 439 | up_read(&ls->ls_root_sem); |
307 | } | 440 | } |
diff --git a/fs/dlm/dir.h b/fs/dlm/dir.h index 41750634445..0b0eb1267b6 100644 --- a/fs/dlm/dir.h +++ b/fs/dlm/dir.h | |||
@@ -14,10 +14,15 @@ | |||
14 | #ifndef __DIR_DOT_H__ | 14 | #ifndef __DIR_DOT_H__ |
15 | #define __DIR_DOT_H__ | 15 | #define __DIR_DOT_H__ |
16 | 16 | ||
17 | |||
17 | int dlm_dir_nodeid(struct dlm_rsb *rsb); | 18 | int dlm_dir_nodeid(struct dlm_rsb *rsb); |
18 | int dlm_hash2nodeid(struct dlm_ls *ls, uint32_t hash); | 19 | int dlm_hash2nodeid(struct dlm_ls *ls, uint32_t hash); |
19 | void dlm_recover_dir_nodeid(struct dlm_ls *ls); | 20 | void dlm_dir_remove_entry(struct dlm_ls *ls, int nodeid, char *name, int len); |
21 | void dlm_dir_clear(struct dlm_ls *ls); | ||
22 | void dlm_clear_free_entries(struct dlm_ls *ls); | ||
20 | int dlm_recover_directory(struct dlm_ls *ls); | 23 | int dlm_recover_directory(struct dlm_ls *ls); |
24 | int dlm_dir_lookup(struct dlm_ls *ls, int nodeid, char *name, int namelen, | ||
25 | int *r_nodeid); | ||
21 | void dlm_copy_master_names(struct dlm_ls *ls, char *inbuf, int inlen, | 26 | void dlm_copy_master_names(struct dlm_ls *ls, char *inbuf, int inlen, |
22 | char *outbuf, int outlen, int nodeid); | 27 | char *outbuf, int outlen, int nodeid); |
23 | 28 | ||
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h index 77c0f70f8fe..fe2860c0244 100644 --- a/fs/dlm/dlm_internal.h +++ b/fs/dlm/dlm_internal.h | |||
@@ -2,7 +2,7 @@ | |||
2 | ******************************************************************************* | 2 | ******************************************************************************* |
3 | ** | 3 | ** |
4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | 4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. |
5 | ** Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. | 5 | ** Copyright (C) 2004-2010 Red Hat, Inc. All rights reserved. |
6 | ** | 6 | ** |
7 | ** This copyrighted material is made available to anyone wishing to use, | 7 | ** This copyrighted material is made available to anyone wishing to use, |
8 | ** modify, copy, or redistribute it subject to the terms and conditions | 8 | ** modify, copy, or redistribute it subject to the terms and conditions |
@@ -38,7 +38,6 @@ | |||
38 | #include <linux/miscdevice.h> | 38 | #include <linux/miscdevice.h> |
39 | #include <linux/mutex.h> | 39 | #include <linux/mutex.h> |
40 | #include <linux/idr.h> | 40 | #include <linux/idr.h> |
41 | #include <linux/ratelimit.h> | ||
42 | #include <asm/uaccess.h> | 41 | #include <asm/uaccess.h> |
43 | 42 | ||
44 | #include <linux/dlm.h> | 43 | #include <linux/dlm.h> |
@@ -55,6 +54,8 @@ struct dlm_lkb; | |||
55 | struct dlm_rsb; | 54 | struct dlm_rsb; |
56 | struct dlm_member; | 55 | struct dlm_member; |
57 | struct dlm_rsbtable; | 56 | struct dlm_rsbtable; |
57 | struct dlm_dirtable; | ||
58 | struct dlm_direntry; | ||
58 | struct dlm_recover; | 59 | struct dlm_recover; |
59 | struct dlm_header; | 60 | struct dlm_header; |
60 | struct dlm_message; | 61 | struct dlm_message; |
@@ -73,13 +74,6 @@ do { \ | |||
73 | (ls)->ls_name , ##args); \ | 74 | (ls)->ls_name , ##args); \ |
74 | } while (0) | 75 | } while (0) |
75 | 76 | ||
76 | #define log_limit(ls, fmt, args...) \ | ||
77 | do { \ | ||
78 | if (dlm_config.ci_log_debug) \ | ||
79 | printk_ratelimited(KERN_DEBUG "dlm: %s: " fmt "\n", \ | ||
80 | (ls)->ls_name , ##args); \ | ||
81 | } while (0) | ||
82 | |||
83 | #define DLM_ASSERT(x, do) \ | 77 | #define DLM_ASSERT(x, do) \ |
84 | { \ | 78 | { \ |
85 | if (!(x)) \ | 79 | if (!(x)) \ |
@@ -96,9 +90,21 @@ do { \ | |||
96 | } | 90 | } |
97 | 91 | ||
98 | 92 | ||
93 | struct dlm_direntry { | ||
94 | struct list_head list; | ||
95 | uint32_t master_nodeid; | ||
96 | uint16_t length; | ||
97 | char name[1]; | ||
98 | }; | ||
99 | |||
100 | struct dlm_dirtable { | ||
101 | struct list_head list; | ||
102 | spinlock_t lock; | ||
103 | }; | ||
104 | |||
99 | struct dlm_rsbtable { | 105 | struct dlm_rsbtable { |
100 | struct rb_root keep; | 106 | struct list_head list; |
101 | struct rb_root toss; | 107 | struct list_head toss; |
102 | spinlock_t lock; | 108 | spinlock_t lock; |
103 | }; | 109 | }; |
104 | 110 | ||
@@ -111,10 +117,6 @@ struct dlm_member { | |||
111 | struct list_head list; | 117 | struct list_head list; |
112 | int nodeid; | 118 | int nodeid; |
113 | int weight; | 119 | int weight; |
114 | int slot; | ||
115 | int slot_prev; | ||
116 | int comm_seq; | ||
117 | uint32_t generation; | ||
118 | }; | 120 | }; |
119 | 121 | ||
120 | /* | 122 | /* |
@@ -123,8 +125,10 @@ struct dlm_member { | |||
123 | 125 | ||
124 | struct dlm_recover { | 126 | struct dlm_recover { |
125 | struct list_head list; | 127 | struct list_head list; |
126 | struct dlm_config_node *nodes; | 128 | int *nodeids; /* nodeids of all members */ |
127 | int nodes_count; | 129 | int node_count; |
130 | int *new; /* nodeids of new members */ | ||
131 | int new_count; | ||
128 | uint64_t seq; | 132 | uint64_t seq; |
129 | }; | 133 | }; |
130 | 134 | ||
@@ -257,8 +261,6 @@ struct dlm_lkb { | |||
257 | ktime_t lkb_last_cast_time; /* for debugging */ | 261 | ktime_t lkb_last_cast_time; /* for debugging */ |
258 | ktime_t lkb_last_bast_time; /* for debugging */ | 262 | ktime_t lkb_last_bast_time; /* for debugging */ |
259 | 263 | ||
260 | uint64_t lkb_recover_seq; /* from ls_recover_seq */ | ||
261 | |||
262 | char *lkb_lvbptr; | 264 | char *lkb_lvbptr; |
263 | struct dlm_lksb *lkb_lksb; /* caller's status block */ | 265 | struct dlm_lksb *lkb_lksb; /* caller's status block */ |
264 | void (*lkb_astfn) (void *astparam); | 266 | void (*lkb_astfn) (void *astparam); |
@@ -269,15 +271,6 @@ struct dlm_lkb { | |||
269 | }; | 271 | }; |
270 | }; | 272 | }; |
271 | 273 | ||
272 | /* | ||
273 | * res_master_nodeid is "normal": 0 is unset/invalid, non-zero is the real | ||
274 | * nodeid, even when nodeid is our_nodeid. | ||
275 | * | ||
276 | * res_nodeid is "odd": -1 is unset/invalid, zero means our_nodeid, | ||
277 | * greater than zero when another nodeid. | ||
278 | * | ||
279 | * (TODO: remove res_nodeid and only use res_master_nodeid) | ||
280 | */ | ||
281 | 274 | ||
282 | struct dlm_rsb { | 275 | struct dlm_rsb { |
283 | struct dlm_ls *res_ls; /* the lockspace */ | 276 | struct dlm_ls *res_ls; /* the lockspace */ |
@@ -286,19 +279,13 @@ struct dlm_rsb { | |||
286 | unsigned long res_flags; | 279 | unsigned long res_flags; |
287 | int res_length; /* length of rsb name */ | 280 | int res_length; /* length of rsb name */ |
288 | int res_nodeid; | 281 | int res_nodeid; |
289 | int res_master_nodeid; | ||
290 | int res_dir_nodeid; | ||
291 | int res_id; /* for ls_recover_idr */ | ||
292 | uint32_t res_lvbseq; | 282 | uint32_t res_lvbseq; |
293 | uint32_t res_hash; | 283 | uint32_t res_hash; |
294 | uint32_t res_bucket; /* rsbtbl */ | 284 | uint32_t res_bucket; /* rsbtbl */ |
295 | unsigned long res_toss_time; | 285 | unsigned long res_toss_time; |
296 | uint32_t res_first_lkid; | 286 | uint32_t res_first_lkid; |
297 | struct list_head res_lookup; /* lkbs waiting on first */ | 287 | struct list_head res_lookup; /* lkbs waiting on first */ |
298 | union { | 288 | struct list_head res_hashchain; /* rsbtbl */ |
299 | struct list_head res_hashchain; | ||
300 | struct rb_node res_hashnode; /* rsbtbl */ | ||
301 | }; | ||
302 | struct list_head res_grantqueue; | 289 | struct list_head res_grantqueue; |
303 | struct list_head res_convertqueue; | 290 | struct list_head res_convertqueue; |
304 | struct list_head res_waitqueue; | 291 | struct list_head res_waitqueue; |
@@ -311,21 +298,10 @@ struct dlm_rsb { | |||
311 | char res_name[DLM_RESNAME_MAXLEN+1]; | 298 | char res_name[DLM_RESNAME_MAXLEN+1]; |
312 | }; | 299 | }; |
313 | 300 | ||
314 | /* dlm_master_lookup() flags */ | ||
315 | |||
316 | #define DLM_LU_RECOVER_DIR 1 | ||
317 | #define DLM_LU_RECOVER_MASTER 2 | ||
318 | |||
319 | /* dlm_master_lookup() results */ | ||
320 | |||
321 | #define DLM_LU_MATCH 1 | ||
322 | #define DLM_LU_ADD 2 | ||
323 | |||
324 | /* find_rsb() flags */ | 301 | /* find_rsb() flags */ |
325 | 302 | ||
326 | #define R_REQUEST 0x00000001 | 303 | #define R_MASTER 1 /* only return rsb if it's a master */ |
327 | #define R_RECEIVE_REQUEST 0x00000002 | 304 | #define R_CREATE 2 /* create/add rsb if not found */ |
328 | #define R_RECEIVE_RECOVER 0x00000004 | ||
329 | 305 | ||
330 | /* rsb_flags */ | 306 | /* rsb_flags */ |
331 | 307 | ||
@@ -336,8 +312,7 @@ enum rsb_flags { | |||
336 | RSB_NEW_MASTER, | 312 | RSB_NEW_MASTER, |
337 | RSB_NEW_MASTER2, | 313 | RSB_NEW_MASTER2, |
338 | RSB_RECOVER_CONVERT, | 314 | RSB_RECOVER_CONVERT, |
339 | RSB_RECOVER_GRANT, | 315 | RSB_LOCKS_PURGED, |
340 | RSB_RECOVER_LVB_INVAL, | ||
341 | }; | 316 | }; |
342 | 317 | ||
343 | static inline void rsb_set_flag(struct dlm_rsb *r, enum rsb_flags flag) | 318 | static inline void rsb_set_flag(struct dlm_rsb *r, enum rsb_flags flag) |
@@ -359,9 +334,7 @@ static inline int rsb_flag(struct dlm_rsb *r, enum rsb_flags flag) | |||
359 | /* dlm_header is first element of all structs sent between nodes */ | 334 | /* dlm_header is first element of all structs sent between nodes */ |
360 | 335 | ||
361 | #define DLM_HEADER_MAJOR 0x00030000 | 336 | #define DLM_HEADER_MAJOR 0x00030000 |
362 | #define DLM_HEADER_MINOR 0x00000001 | 337 | #define DLM_HEADER_MINOR 0x00000000 |
363 | |||
364 | #define DLM_HEADER_SLOTS 0x00000001 | ||
365 | 338 | ||
366 | #define DLM_MSG 1 | 339 | #define DLM_MSG 1 |
367 | #define DLM_RCOM 2 | 340 | #define DLM_RCOM 2 |
@@ -449,34 +422,10 @@ union dlm_packet { | |||
449 | struct dlm_rcom rcom; | 422 | struct dlm_rcom rcom; |
450 | }; | 423 | }; |
451 | 424 | ||
452 | #define DLM_RSF_NEED_SLOTS 0x00000001 | ||
453 | |||
454 | /* RCOM_STATUS data */ | ||
455 | struct rcom_status { | ||
456 | __le32 rs_flags; | ||
457 | __le32 rs_unused1; | ||
458 | __le64 rs_unused2; | ||
459 | }; | ||
460 | |||
461 | /* RCOM_STATUS_REPLY data */ | ||
462 | struct rcom_config { | 425 | struct rcom_config { |
463 | __le32 rf_lvblen; | 426 | __le32 rf_lvblen; |
464 | __le32 rf_lsflags; | 427 | __le32 rf_lsflags; |
465 | 428 | __le64 rf_unused; | |
466 | /* DLM_HEADER_SLOTS adds: */ | ||
467 | __le32 rf_flags; | ||
468 | __le16 rf_our_slot; | ||
469 | __le16 rf_num_slots; | ||
470 | __le32 rf_generation; | ||
471 | __le32 rf_unused1; | ||
472 | __le64 rf_unused2; | ||
473 | }; | ||
474 | |||
475 | struct rcom_slot { | ||
476 | __le32 ro_nodeid; | ||
477 | __le16 ro_slot; | ||
478 | __le16 ro_unused1; | ||
479 | __le64 ro_unused2; | ||
480 | }; | 429 | }; |
481 | 430 | ||
482 | struct rcom_lock { | 431 | struct rcom_lock { |
@@ -499,18 +448,10 @@ struct rcom_lock { | |||
499 | char rl_lvb[0]; | 448 | char rl_lvb[0]; |
500 | }; | 449 | }; |
501 | 450 | ||
502 | /* | ||
503 | * The max number of resources per rsbtbl bucket that shrink will attempt | ||
504 | * to remove in each iteration. | ||
505 | */ | ||
506 | |||
507 | #define DLM_REMOVE_NAMES_MAX 8 | ||
508 | |||
509 | struct dlm_ls { | 451 | struct dlm_ls { |
510 | struct list_head ls_list; /* list of lockspaces */ | 452 | struct list_head ls_list; /* list of lockspaces */ |
511 | dlm_lockspace_t *ls_local_handle; | 453 | dlm_lockspace_t *ls_local_handle; |
512 | uint32_t ls_global_id; /* global unique lockspace ID */ | 454 | uint32_t ls_global_id; /* global unique lockspace ID */ |
513 | uint32_t ls_generation; | ||
514 | uint32_t ls_exflags; | 455 | uint32_t ls_exflags; |
515 | int ls_lvblen; | 456 | int ls_lvblen; |
516 | int ls_count; /* refcount of processes in | 457 | int ls_count; /* refcount of processes in |
@@ -526,6 +467,9 @@ struct dlm_ls { | |||
526 | struct dlm_rsbtable *ls_rsbtbl; | 467 | struct dlm_rsbtable *ls_rsbtbl; |
527 | uint32_t ls_rsbtbl_size; | 468 | uint32_t ls_rsbtbl_size; |
528 | 469 | ||
470 | struct dlm_dirtable *ls_dirtbl; | ||
471 | uint32_t ls_dirtbl_size; | ||
472 | |||
529 | struct mutex ls_waiters_mutex; | 473 | struct mutex ls_waiters_mutex; |
530 | struct list_head ls_waiters; /* lkbs needing a reply */ | 474 | struct list_head ls_waiters; /* lkbs needing a reply */ |
531 | 475 | ||
@@ -539,12 +483,6 @@ struct dlm_ls { | |||
539 | int ls_new_rsb_count; | 483 | int ls_new_rsb_count; |
540 | struct list_head ls_new_rsb; /* new rsb structs */ | 484 | struct list_head ls_new_rsb; /* new rsb structs */ |
541 | 485 | ||
542 | spinlock_t ls_remove_spin; | ||
543 | char ls_remove_name[DLM_RESNAME_MAXLEN+1]; | ||
544 | char *ls_remove_names[DLM_REMOVE_NAMES_MAX]; | ||
545 | int ls_remove_len; | ||
546 | int ls_remove_lens[DLM_REMOVE_NAMES_MAX]; | ||
547 | |||
548 | struct list_head ls_nodes; /* current nodes in ls */ | 486 | struct list_head ls_nodes; /* current nodes in ls */ |
549 | struct list_head ls_nodes_gone; /* dead node list, recovery */ | 487 | struct list_head ls_nodes_gone; /* dead node list, recovery */ |
550 | int ls_num_nodes; /* number of nodes in ls */ | 488 | int ls_num_nodes; /* number of nodes in ls */ |
@@ -552,11 +490,6 @@ struct dlm_ls { | |||
552 | int ls_total_weight; | 490 | int ls_total_weight; |
553 | int *ls_node_array; | 491 | int *ls_node_array; |
554 | 492 | ||
555 | int ls_slot; | ||
556 | int ls_num_slots; | ||
557 | int ls_slots_size; | ||
558 | struct dlm_slot *ls_slots; | ||
559 | |||
560 | struct dlm_rsb ls_stub_rsb; /* for returning errors */ | 493 | struct dlm_rsb ls_stub_rsb; /* for returning errors */ |
561 | struct dlm_lkb ls_stub_lkb; /* for returning errors */ | 494 | struct dlm_lkb ls_stub_lkb; /* for returning errors */ |
562 | struct dlm_message ls_stub_ms; /* for faking a reply */ | 495 | struct dlm_message ls_stub_ms; /* for faking a reply */ |
@@ -565,7 +498,6 @@ struct dlm_ls { | |||
565 | struct dentry *ls_debug_waiters_dentry; /* debugfs */ | 498 | struct dentry *ls_debug_waiters_dentry; /* debugfs */ |
566 | struct dentry *ls_debug_locks_dentry; /* debugfs */ | 499 | struct dentry *ls_debug_locks_dentry; /* debugfs */ |
567 | struct dentry *ls_debug_all_dentry; /* debugfs */ | 500 | struct dentry *ls_debug_all_dentry; /* debugfs */ |
568 | struct dentry *ls_debug_toss_dentry; /* debugfs */ | ||
569 | 501 | ||
570 | wait_queue_head_t ls_uevent_wait; /* user part of join/leave */ | 502 | wait_queue_head_t ls_uevent_wait; /* user part of join/leave */ |
571 | int ls_uevent_result; | 503 | int ls_uevent_result; |
@@ -594,64 +526,29 @@ struct dlm_ls { | |||
594 | struct mutex ls_requestqueue_mutex; | 526 | struct mutex ls_requestqueue_mutex; |
595 | struct dlm_rcom *ls_recover_buf; | 527 | struct dlm_rcom *ls_recover_buf; |
596 | int ls_recover_nodeid; /* for debugging */ | 528 | int ls_recover_nodeid; /* for debugging */ |
597 | unsigned int ls_recover_dir_sent_res; /* for log info */ | ||
598 | unsigned int ls_recover_dir_sent_msg; /* for log info */ | ||
599 | unsigned int ls_recover_locks_in; /* for log info */ | ||
600 | uint64_t ls_rcom_seq; | 529 | uint64_t ls_rcom_seq; |
601 | spinlock_t ls_rcom_spin; | 530 | spinlock_t ls_rcom_spin; |
602 | struct list_head ls_recover_list; | 531 | struct list_head ls_recover_list; |
603 | spinlock_t ls_recover_list_lock; | 532 | spinlock_t ls_recover_list_lock; |
604 | int ls_recover_list_count; | 533 | int ls_recover_list_count; |
605 | struct idr ls_recover_idr; | ||
606 | spinlock_t ls_recover_idr_lock; | ||
607 | wait_queue_head_t ls_wait_general; | 534 | wait_queue_head_t ls_wait_general; |
608 | wait_queue_head_t ls_recover_lock_wait; | ||
609 | struct mutex ls_clear_proc_locks; | 535 | struct mutex ls_clear_proc_locks; |
610 | 536 | ||
611 | struct list_head ls_root_list; /* root resources */ | 537 | struct list_head ls_root_list; /* root resources */ |
612 | struct rw_semaphore ls_root_sem; /* protect root_list */ | 538 | struct rw_semaphore ls_root_sem; /* protect root_list */ |
613 | 539 | ||
614 | const struct dlm_lockspace_ops *ls_ops; | ||
615 | void *ls_ops_arg; | ||
616 | |||
617 | int ls_namelen; | 540 | int ls_namelen; |
618 | char ls_name[1]; | 541 | char ls_name[1]; |
619 | }; | 542 | }; |
620 | 543 | ||
621 | /* | 544 | #define LSFL_WORK 0 |
622 | * LSFL_RECOVER_STOP - dlm_ls_stop() sets this to tell dlm recovery routines | 545 | #define LSFL_RUNNING 1 |
623 | * that they should abort what they're doing so new recovery can be started. | 546 | #define LSFL_RECOVERY_STOP 2 |
624 | * | 547 | #define LSFL_RCOM_READY 3 |
625 | * LSFL_RECOVER_DOWN - dlm_ls_stop() sets this to tell dlm_recoverd that it | 548 | #define LSFL_RCOM_WAIT 4 |
626 | * should do down_write() on the in_recovery rw_semaphore. (doing down_write | 549 | #define LSFL_UEVENT_WAIT 5 |
627 | * within dlm_ls_stop causes complaints about the lock acquired/released | 550 | #define LSFL_TIMEWARN 6 |
628 | * in different contexts.) | 551 | #define LSFL_CB_DELAY 7 |
629 | * | ||
630 | * LSFL_RECOVER_LOCK - dlm_recoverd holds the in_recovery rw_semaphore. | ||
631 | * It sets this after it is done with down_write() on the in_recovery | ||
632 | * rw_semaphore and clears it after it has released the rw_semaphore. | ||
633 | * | ||
634 | * LSFL_RECOVER_WORK - dlm_ls_start() sets this to tell dlm_recoverd that it | ||
635 | * should begin recovery of the lockspace. | ||
636 | * | ||
637 | * LSFL_RUNNING - set when normal locking activity is enabled. | ||
638 | * dlm_ls_stop() clears this to tell dlm locking routines that they should | ||
639 | * quit what they are doing so recovery can run. dlm_recoverd sets | ||
640 | * this after recovery is finished. | ||
641 | */ | ||
642 | |||
643 | #define LSFL_RECOVER_STOP 0 | ||
644 | #define LSFL_RECOVER_DOWN 1 | ||
645 | #define LSFL_RECOVER_LOCK 2 | ||
646 | #define LSFL_RECOVER_WORK 3 | ||
647 | #define LSFL_RUNNING 4 | ||
648 | |||
649 | #define LSFL_RCOM_READY 5 | ||
650 | #define LSFL_RCOM_WAIT 6 | ||
651 | #define LSFL_UEVENT_WAIT 7 | ||
652 | #define LSFL_TIMEWARN 8 | ||
653 | #define LSFL_CB_DELAY 9 | ||
654 | #define LSFL_NODIR 10 | ||
655 | 552 | ||
656 | /* much of this is just saving user space pointers associated with the | 553 | /* much of this is just saving user space pointers associated with the |
657 | lock that we pass back to the user lib with an ast */ | 554 | lock that we pass back to the user lib with an ast */ |
@@ -694,12 +591,12 @@ static inline int dlm_locking_stopped(struct dlm_ls *ls) | |||
694 | 591 | ||
695 | static inline int dlm_recovery_stopped(struct dlm_ls *ls) | 592 | static inline int dlm_recovery_stopped(struct dlm_ls *ls) |
696 | { | 593 | { |
697 | return test_bit(LSFL_RECOVER_STOP, &ls->ls_flags); | 594 | return test_bit(LSFL_RECOVERY_STOP, &ls->ls_flags); |
698 | } | 595 | } |
699 | 596 | ||
700 | static inline int dlm_no_directory(struct dlm_ls *ls) | 597 | static inline int dlm_no_directory(struct dlm_ls *ls) |
701 | { | 598 | { |
702 | return test_bit(LSFL_NODIR, &ls->ls_flags); | 599 | return (ls->ls_exflags & DLM_LSFL_NODIR) ? 1 : 0; |
703 | } | 600 | } |
704 | 601 | ||
705 | int dlm_netlink_init(void); | 602 | int dlm_netlink_init(void); |
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index a579f30f237..83b5e32514e 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c | |||
@@ -56,7 +56,6 @@ | |||
56 | L: receive_xxxx_reply() <- R: send_xxxx_reply() | 56 | L: receive_xxxx_reply() <- R: send_xxxx_reply() |
57 | */ | 57 | */ |
58 | #include <linux/types.h> | 58 | #include <linux/types.h> |
59 | #include <linux/rbtree.h> | ||
60 | #include <linux/slab.h> | 59 | #include <linux/slab.h> |
61 | #include "dlm_internal.h" | 60 | #include "dlm_internal.h" |
62 | #include <linux/dlm_device.h> | 61 | #include <linux/dlm_device.h> |
@@ -90,7 +89,6 @@ static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, | |||
90 | static int receive_extralen(struct dlm_message *ms); | 89 | static int receive_extralen(struct dlm_message *ms); |
91 | static void do_purge(struct dlm_ls *ls, int nodeid, int pid); | 90 | static void do_purge(struct dlm_ls *ls, int nodeid, int pid); |
92 | static void del_timeout(struct dlm_lkb *lkb); | 91 | static void del_timeout(struct dlm_lkb *lkb); |
93 | static void toss_rsb(struct kref *kref); | ||
94 | 92 | ||
95 | /* | 93 | /* |
96 | * Lock compatibilty matrix - thanks Steve | 94 | * Lock compatibilty matrix - thanks Steve |
@@ -161,21 +159,18 @@ static const int __quecvt_compat_matrix[8][8] = { | |||
161 | 159 | ||
162 | void dlm_print_lkb(struct dlm_lkb *lkb) | 160 | void dlm_print_lkb(struct dlm_lkb *lkb) |
163 | { | 161 | { |
164 | printk(KERN_ERR "lkb: nodeid %d id %x remid %x exflags %x flags %x " | 162 | printk(KERN_ERR "lkb: nodeid %d id %x remid %x exflags %x flags %x\n" |
165 | "sts %d rq %d gr %d wait_type %d wait_nodeid %d seq %llu\n", | 163 | " status %d rqmode %d grmode %d wait_type %d\n", |
166 | lkb->lkb_nodeid, lkb->lkb_id, lkb->lkb_remid, lkb->lkb_exflags, | 164 | lkb->lkb_nodeid, lkb->lkb_id, lkb->lkb_remid, lkb->lkb_exflags, |
167 | lkb->lkb_flags, lkb->lkb_status, lkb->lkb_rqmode, | 165 | lkb->lkb_flags, lkb->lkb_status, lkb->lkb_rqmode, |
168 | lkb->lkb_grmode, lkb->lkb_wait_type, lkb->lkb_wait_nodeid, | 166 | lkb->lkb_grmode, lkb->lkb_wait_type); |
169 | (unsigned long long)lkb->lkb_recover_seq); | ||
170 | } | 167 | } |
171 | 168 | ||
172 | static void dlm_print_rsb(struct dlm_rsb *r) | 169 | static void dlm_print_rsb(struct dlm_rsb *r) |
173 | { | 170 | { |
174 | printk(KERN_ERR "rsb: nodeid %d master %d dir %d flags %lx first %x " | 171 | printk(KERN_ERR "rsb: nodeid %d flags %lx first %x rlc %d name %s\n", |
175 | "rlc %d name %s\n", | 172 | r->res_nodeid, r->res_flags, r->res_first_lkid, |
176 | r->res_nodeid, r->res_master_nodeid, r->res_dir_nodeid, | 173 | r->res_recover_locks_count, r->res_name); |
177 | r->res_flags, r->res_first_lkid, r->res_recover_locks_count, | ||
178 | r->res_name); | ||
179 | } | 174 | } |
180 | 175 | ||
181 | void dlm_dump_rsb(struct dlm_rsb *r) | 176 | void dlm_dump_rsb(struct dlm_rsb *r) |
@@ -255,6 +250,8 @@ static inline int is_process_copy(struct dlm_lkb *lkb) | |||
255 | 250 | ||
256 | static inline int is_master_copy(struct dlm_lkb *lkb) | 251 | static inline int is_master_copy(struct dlm_lkb *lkb) |
257 | { | 252 | { |
253 | if (lkb->lkb_flags & DLM_IFL_MSTCPY) | ||
254 | DLM_ASSERT(lkb->lkb_nodeid, dlm_print_lkb(lkb);); | ||
258 | return (lkb->lkb_flags & DLM_IFL_MSTCPY) ? 1 : 0; | 255 | return (lkb->lkb_flags & DLM_IFL_MSTCPY) ? 1 : 0; |
259 | } | 256 | } |
260 | 257 | ||
@@ -330,37 +327,6 @@ static void queue_bast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rqmode) | |||
330 | * Basic operations on rsb's and lkb's | 327 | * Basic operations on rsb's and lkb's |
331 | */ | 328 | */ |
332 | 329 | ||
333 | /* This is only called to add a reference when the code already holds | ||
334 | a valid reference to the rsb, so there's no need for locking. */ | ||
335 | |||
336 | static inline void hold_rsb(struct dlm_rsb *r) | ||
337 | { | ||
338 | kref_get(&r->res_ref); | ||
339 | } | ||
340 | |||
341 | void dlm_hold_rsb(struct dlm_rsb *r) | ||
342 | { | ||
343 | hold_rsb(r); | ||
344 | } | ||
345 | |||
346 | /* When all references to the rsb are gone it's transferred to | ||
347 | the tossed list for later disposal. */ | ||
348 | |||
349 | static void put_rsb(struct dlm_rsb *r) | ||
350 | { | ||
351 | struct dlm_ls *ls = r->res_ls; | ||
352 | uint32_t bucket = r->res_bucket; | ||
353 | |||
354 | spin_lock(&ls->ls_rsbtbl[bucket].lock); | ||
355 | kref_put(&r->res_ref, toss_rsb); | ||
356 | spin_unlock(&ls->ls_rsbtbl[bucket].lock); | ||
357 | } | ||
358 | |||
359 | void dlm_put_rsb(struct dlm_rsb *r) | ||
360 | { | ||
361 | put_rsb(r); | ||
362 | } | ||
363 | |||
364 | static int pre_rsb_struct(struct dlm_ls *ls) | 330 | static int pre_rsb_struct(struct dlm_ls *ls) |
365 | { | 331 | { |
366 | struct dlm_rsb *r1, *r2; | 332 | struct dlm_rsb *r1, *r2; |
@@ -414,8 +380,6 @@ static int get_rsb_struct(struct dlm_ls *ls, char *name, int len, | |||
414 | 380 | ||
415 | r = list_first_entry(&ls->ls_new_rsb, struct dlm_rsb, res_hashchain); | 381 | r = list_first_entry(&ls->ls_new_rsb, struct dlm_rsb, res_hashchain); |
416 | list_del(&r->res_hashchain); | 382 | list_del(&r->res_hashchain); |
417 | /* Convert the empty list_head to a NULL rb_node for tree usage: */ | ||
418 | memset(&r->res_hashnode, 0, sizeof(struct rb_node)); | ||
419 | ls->ls_new_rsb_count--; | 383 | ls->ls_new_rsb_count--; |
420 | spin_unlock(&ls->ls_new_rsb_spin); | 384 | spin_unlock(&ls->ls_new_rsb_spin); |
421 | 385 | ||
@@ -424,6 +388,7 @@ static int get_rsb_struct(struct dlm_ls *ls, char *name, int len, | |||
424 | memcpy(r->res_name, name, len); | 388 | memcpy(r->res_name, name, len); |
425 | mutex_init(&r->res_mutex); | 389 | mutex_init(&r->res_mutex); |
426 | 390 | ||
391 | INIT_LIST_HEAD(&r->res_hashchain); | ||
427 | INIT_LIST_HEAD(&r->res_lookup); | 392 | INIT_LIST_HEAD(&r->res_lookup); |
428 | INIT_LIST_HEAD(&r->res_grantqueue); | 393 | INIT_LIST_HEAD(&r->res_grantqueue); |
429 | INIT_LIST_HEAD(&r->res_convertqueue); | 394 | INIT_LIST_HEAD(&r->res_convertqueue); |
@@ -435,67 +400,59 @@ static int get_rsb_struct(struct dlm_ls *ls, char *name, int len, | |||
435 | return 0; | 400 | return 0; |
436 | } | 401 | } |
437 | 402 | ||
438 | static int rsb_cmp(struct dlm_rsb *r, const char *name, int nlen) | 403 | static int search_rsb_list(struct list_head *head, char *name, int len, |
404 | unsigned int flags, struct dlm_rsb **r_ret) | ||
439 | { | 405 | { |
440 | char maxname[DLM_RESNAME_MAXLEN]; | ||
441 | |||
442 | memset(maxname, 0, DLM_RESNAME_MAXLEN); | ||
443 | memcpy(maxname, name, nlen); | ||
444 | return memcmp(r->res_name, maxname, DLM_RESNAME_MAXLEN); | ||
445 | } | ||
446 | |||
447 | int dlm_search_rsb_tree(struct rb_root *tree, char *name, int len, | ||
448 | struct dlm_rsb **r_ret) | ||
449 | { | ||
450 | struct rb_node *node = tree->rb_node; | ||
451 | struct dlm_rsb *r; | 406 | struct dlm_rsb *r; |
452 | int rc; | 407 | int error = 0; |
453 | 408 | ||
454 | while (node) { | 409 | list_for_each_entry(r, head, res_hashchain) { |
455 | r = rb_entry(node, struct dlm_rsb, res_hashnode); | 410 | if (len == r->res_length && !memcmp(name, r->res_name, len)) |
456 | rc = rsb_cmp(r, name, len); | ||
457 | if (rc < 0) | ||
458 | node = node->rb_left; | ||
459 | else if (rc > 0) | ||
460 | node = node->rb_right; | ||
461 | else | ||
462 | goto found; | 411 | goto found; |
463 | } | 412 | } |
464 | *r_ret = NULL; | 413 | *r_ret = NULL; |
465 | return -EBADR; | 414 | return -EBADR; |
466 | 415 | ||
467 | found: | 416 | found: |
417 | if (r->res_nodeid && (flags & R_MASTER)) | ||
418 | error = -ENOTBLK; | ||
468 | *r_ret = r; | 419 | *r_ret = r; |
469 | return 0; | 420 | return error; |
470 | } | 421 | } |
471 | 422 | ||
472 | static int rsb_insert(struct dlm_rsb *rsb, struct rb_root *tree) | 423 | static int _search_rsb(struct dlm_ls *ls, char *name, int len, int b, |
424 | unsigned int flags, struct dlm_rsb **r_ret) | ||
473 | { | 425 | { |
474 | struct rb_node **newn = &tree->rb_node; | 426 | struct dlm_rsb *r; |
475 | struct rb_node *parent = NULL; | 427 | int error; |
476 | int rc; | ||
477 | |||
478 | while (*newn) { | ||
479 | struct dlm_rsb *cur = rb_entry(*newn, struct dlm_rsb, | ||
480 | res_hashnode); | ||
481 | 428 | ||
482 | parent = *newn; | 429 | error = search_rsb_list(&ls->ls_rsbtbl[b].list, name, len, flags, &r); |
483 | rc = rsb_cmp(cur, rsb->res_name, rsb->res_length); | 430 | if (!error) { |
484 | if (rc < 0) | 431 | kref_get(&r->res_ref); |
485 | newn = &parent->rb_left; | 432 | goto out; |
486 | else if (rc > 0) | ||
487 | newn = &parent->rb_right; | ||
488 | else { | ||
489 | log_print("rsb_insert match"); | ||
490 | dlm_dump_rsb(rsb); | ||
491 | dlm_dump_rsb(cur); | ||
492 | return -EEXIST; | ||
493 | } | ||
494 | } | 433 | } |
434 | error = search_rsb_list(&ls->ls_rsbtbl[b].toss, name, len, flags, &r); | ||
435 | if (error) | ||
436 | goto out; | ||
495 | 437 | ||
496 | rb_link_node(&rsb->res_hashnode, parent, newn); | 438 | list_move(&r->res_hashchain, &ls->ls_rsbtbl[b].list); |
497 | rb_insert_color(&rsb->res_hashnode, tree); | 439 | |
498 | return 0; | 440 | if (dlm_no_directory(ls)) |
441 | goto out; | ||
442 | |||
443 | if (r->res_nodeid == -1) { | ||
444 | rsb_clear_flag(r, RSB_MASTER_UNCERTAIN); | ||
445 | r->res_first_lkid = 0; | ||
446 | } else if (r->res_nodeid > 0) { | ||
447 | rsb_set_flag(r, RSB_MASTER_UNCERTAIN); | ||
448 | r->res_first_lkid = 0; | ||
449 | } else { | ||
450 | DLM_ASSERT(r->res_nodeid == 0, dlm_print_rsb(r);); | ||
451 | DLM_ASSERT(!rsb_flag(r, RSB_MASTER_UNCERTAIN),); | ||
452 | } | ||
453 | out: | ||
454 | *r_ret = r; | ||
455 | return error; | ||
499 | } | 456 | } |
500 | 457 | ||
501 | /* | 458 | /* |
@@ -510,632 +467,119 @@ static int rsb_insert(struct dlm_rsb *rsb, struct rb_root *tree) | |||
510 | * Searching for an rsb means looking through both the normal list and toss | 467 | * Searching for an rsb means looking through both the normal list and toss |
511 | * list. When found on the toss list the rsb is moved to the normal list with | 468 | * list. When found on the toss list the rsb is moved to the normal list with |
512 | * ref count of 1; when found on normal list the ref count is incremented. | 469 | * ref count of 1; when found on normal list the ref count is incremented. |
513 | * | ||
514 | * rsb's on the keep list are being used locally and refcounted. | ||
515 | * rsb's on the toss list are not being used locally, and are not refcounted. | ||
516 | * | ||
517 | * The toss list rsb's were either | ||
518 | * - previously used locally but not any more (were on keep list, then | ||
519 | * moved to toss list when last refcount dropped) | ||
520 | * - created and put on toss list as a directory record for a lookup | ||
521 | * (we are the dir node for the res, but are not using the res right now, | ||
522 | * but some other node is) | ||
523 | * | ||
524 | * The purpose of find_rsb() is to return a refcounted rsb for local use. | ||
525 | * So, if the given rsb is on the toss list, it is moved to the keep list | ||
526 | * before being returned. | ||
527 | * | ||
528 | * toss_rsb() happens when all local usage of the rsb is done, i.e. no | ||
529 | * more refcounts exist, so the rsb is moved from the keep list to the | ||
530 | * toss list. | ||
531 | * | ||
532 | * rsb's on both keep and toss lists are used for doing a name to master | ||
533 | * lookups. rsb's that are in use locally (and being refcounted) are on | ||
534 | * the keep list, rsb's that are not in use locally (not refcounted) and | ||
535 | * only exist for name/master lookups are on the toss list. | ||
536 | * | ||
537 | * rsb's on the toss list who's dir_nodeid is not local can have stale | ||
538 | * name/master mappings. So, remote requests on such rsb's can potentially | ||
539 | * return with an error, which means the mapping is stale and needs to | ||
540 | * be updated with a new lookup. (The idea behind MASTER UNCERTAIN and | ||
541 | * first_lkid is to keep only a single outstanding request on an rsb | ||
542 | * while that rsb has a potentially stale master.) | ||
543 | */ | 470 | */ |
544 | 471 | ||
545 | static int find_rsb_dir(struct dlm_ls *ls, char *name, int len, | 472 | static int find_rsb(struct dlm_ls *ls, char *name, int namelen, |
546 | uint32_t hash, uint32_t b, | 473 | unsigned int flags, struct dlm_rsb **r_ret) |
547 | int dir_nodeid, int from_nodeid, | ||
548 | unsigned int flags, struct dlm_rsb **r_ret) | ||
549 | { | 474 | { |
550 | struct dlm_rsb *r = NULL; | 475 | struct dlm_rsb *r = NULL; |
551 | int our_nodeid = dlm_our_nodeid(); | 476 | uint32_t hash, bucket; |
552 | int from_local = 0; | ||
553 | int from_other = 0; | ||
554 | int from_dir = 0; | ||
555 | int create = 0; | ||
556 | int error; | 477 | int error; |
557 | 478 | ||
558 | if (flags & R_RECEIVE_REQUEST) { | 479 | if (namelen > DLM_RESNAME_MAXLEN) { |
559 | if (from_nodeid == dir_nodeid) | 480 | error = -EINVAL; |
560 | from_dir = 1; | 481 | goto out; |
561 | else | ||
562 | from_other = 1; | ||
563 | } else if (flags & R_REQUEST) { | ||
564 | from_local = 1; | ||
565 | } | 482 | } |
566 | 483 | ||
567 | /* | 484 | if (dlm_no_directory(ls)) |
568 | * flags & R_RECEIVE_RECOVER is from dlm_recover_master_copy, so | 485 | flags |= R_CREATE; |
569 | * from_nodeid has sent us a lock in dlm_recover_locks, believing | ||
570 | * we're the new master. Our local recovery may not have set | ||
571 | * res_master_nodeid to our_nodeid yet, so allow either. Don't | ||
572 | * create the rsb; dlm_recover_process_copy() will handle EBADR | ||
573 | * by resending. | ||
574 | * | ||
575 | * If someone sends us a request, we are the dir node, and we do | ||
576 | * not find the rsb anywhere, then recreate it. This happens if | ||
577 | * someone sends us a request after we have removed/freed an rsb | ||
578 | * from our toss list. (They sent a request instead of lookup | ||
579 | * because they are using an rsb from their toss list.) | ||
580 | */ | ||
581 | 486 | ||
582 | if (from_local || from_dir || | 487 | hash = jhash(name, namelen, 0); |
583 | (from_other && (dir_nodeid == our_nodeid))) { | 488 | bucket = hash & (ls->ls_rsbtbl_size - 1); |
584 | create = 1; | ||
585 | } | ||
586 | 489 | ||
587 | retry: | 490 | retry: |
588 | if (create) { | 491 | if (flags & R_CREATE) { |
589 | error = pre_rsb_struct(ls); | 492 | error = pre_rsb_struct(ls); |
590 | if (error < 0) | 493 | if (error < 0) |
591 | goto out; | 494 | goto out; |
592 | } | 495 | } |
593 | 496 | ||
594 | spin_lock(&ls->ls_rsbtbl[b].lock); | 497 | spin_lock(&ls->ls_rsbtbl[bucket].lock); |
595 | |||
596 | error = dlm_search_rsb_tree(&ls->ls_rsbtbl[b].keep, name, len, &r); | ||
597 | if (error) | ||
598 | goto do_toss; | ||
599 | |||
600 | /* | ||
601 | * rsb is active, so we can't check master_nodeid without lock_rsb. | ||
602 | */ | ||
603 | |||
604 | kref_get(&r->res_ref); | ||
605 | error = 0; | ||
606 | goto out_unlock; | ||
607 | |||
608 | |||
609 | do_toss: | ||
610 | error = dlm_search_rsb_tree(&ls->ls_rsbtbl[b].toss, name, len, &r); | ||
611 | if (error) | ||
612 | goto do_new; | ||
613 | |||
614 | /* | ||
615 | * rsb found inactive (master_nodeid may be out of date unless | ||
616 | * we are the dir_nodeid or were the master) No other thread | ||
617 | * is using this rsb because it's on the toss list, so we can | ||
618 | * look at or update res_master_nodeid without lock_rsb. | ||
619 | */ | ||
620 | 498 | ||
621 | if ((r->res_master_nodeid != our_nodeid) && from_other) { | 499 | error = _search_rsb(ls, name, namelen, bucket, flags, &r); |
622 | /* our rsb was not master, and another node (not the dir node) | 500 | if (!error) |
623 | has sent us a request */ | ||
624 | log_debug(ls, "find_rsb toss from_other %d master %d dir %d %s", | ||
625 | from_nodeid, r->res_master_nodeid, dir_nodeid, | ||
626 | r->res_name); | ||
627 | error = -ENOTBLK; | ||
628 | goto out_unlock; | 501 | goto out_unlock; |
629 | } | ||
630 | |||
631 | if ((r->res_master_nodeid != our_nodeid) && from_dir) { | ||
632 | /* don't think this should ever happen */ | ||
633 | log_error(ls, "find_rsb toss from_dir %d master %d", | ||
634 | from_nodeid, r->res_master_nodeid); | ||
635 | dlm_print_rsb(r); | ||
636 | /* fix it and go on */ | ||
637 | r->res_master_nodeid = our_nodeid; | ||
638 | r->res_nodeid = 0; | ||
639 | rsb_clear_flag(r, RSB_MASTER_UNCERTAIN); | ||
640 | r->res_first_lkid = 0; | ||
641 | } | ||
642 | |||
643 | if (from_local && (r->res_master_nodeid != our_nodeid)) { | ||
644 | /* Because we have held no locks on this rsb, | ||
645 | res_master_nodeid could have become stale. */ | ||
646 | rsb_set_flag(r, RSB_MASTER_UNCERTAIN); | ||
647 | r->res_first_lkid = 0; | ||
648 | } | ||
649 | |||
650 | rb_erase(&r->res_hashnode, &ls->ls_rsbtbl[b].toss); | ||
651 | error = rsb_insert(r, &ls->ls_rsbtbl[b].keep); | ||
652 | goto out_unlock; | ||
653 | 502 | ||
503 | if (error == -EBADR && !(flags & R_CREATE)) | ||
504 | goto out_unlock; | ||
654 | 505 | ||
655 | do_new: | 506 | /* the rsb was found but wasn't a master copy */ |
656 | /* | 507 | if (error == -ENOTBLK) |
657 | * rsb not found | ||
658 | */ | ||
659 | |||
660 | if (error == -EBADR && !create) | ||
661 | goto out_unlock; | 508 | goto out_unlock; |
662 | 509 | ||
663 | error = get_rsb_struct(ls, name, len, &r); | 510 | error = get_rsb_struct(ls, name, namelen, &r); |
664 | if (error == -EAGAIN) { | 511 | if (error == -EAGAIN) { |
665 | spin_unlock(&ls->ls_rsbtbl[b].lock); | 512 | spin_unlock(&ls->ls_rsbtbl[bucket].lock); |
666 | goto retry; | 513 | goto retry; |
667 | } | 514 | } |
668 | if (error) | 515 | if (error) |
669 | goto out_unlock; | 516 | goto out_unlock; |
670 | 517 | ||
671 | r->res_hash = hash; | 518 | r->res_hash = hash; |
672 | r->res_bucket = b; | 519 | r->res_bucket = bucket; |
673 | r->res_dir_nodeid = dir_nodeid; | 520 | r->res_nodeid = -1; |
674 | kref_init(&r->res_ref); | 521 | kref_init(&r->res_ref); |
675 | 522 | ||
676 | if (from_dir) { | 523 | /* With no directory, the master can be set immediately */ |
677 | /* want to see how often this happens */ | 524 | if (dlm_no_directory(ls)) { |
678 | log_debug(ls, "find_rsb new from_dir %d recreate %s", | 525 | int nodeid = dlm_dir_nodeid(r); |
679 | from_nodeid, r->res_name); | 526 | if (nodeid == dlm_our_nodeid()) |
680 | r->res_master_nodeid = our_nodeid; | 527 | nodeid = 0; |
681 | r->res_nodeid = 0; | 528 | r->res_nodeid = nodeid; |
682 | goto out_add; | ||
683 | } | ||
684 | |||
685 | if (from_other && (dir_nodeid != our_nodeid)) { | ||
686 | /* should never happen */ | ||
687 | log_error(ls, "find_rsb new from_other %d dir %d our %d %s", | ||
688 | from_nodeid, dir_nodeid, our_nodeid, r->res_name); | ||
689 | dlm_free_rsb(r); | ||
690 | error = -ENOTBLK; | ||
691 | goto out_unlock; | ||
692 | } | ||
693 | |||
694 | if (from_other) { | ||
695 | log_debug(ls, "find_rsb new from_other %d dir %d %s", | ||
696 | from_nodeid, dir_nodeid, r->res_name); | ||
697 | } | ||
698 | |||
699 | if (dir_nodeid == our_nodeid) { | ||
700 | /* When we are the dir nodeid, we can set the master | ||
701 | node immediately */ | ||
702 | r->res_master_nodeid = our_nodeid; | ||
703 | r->res_nodeid = 0; | ||
704 | } else { | ||
705 | /* set_master will send_lookup to dir_nodeid */ | ||
706 | r->res_master_nodeid = 0; | ||
707 | r->res_nodeid = -1; | ||
708 | } | 529 | } |
709 | 530 | list_add(&r->res_hashchain, &ls->ls_rsbtbl[bucket].list); | |
710 | out_add: | 531 | error = 0; |
711 | error = rsb_insert(r, &ls->ls_rsbtbl[b].keep); | ||
712 | out_unlock: | 532 | out_unlock: |
713 | spin_unlock(&ls->ls_rsbtbl[b].lock); | 533 | spin_unlock(&ls->ls_rsbtbl[bucket].lock); |
714 | out: | 534 | out: |
715 | *r_ret = r; | 535 | *r_ret = r; |
716 | return error; | 536 | return error; |
717 | } | 537 | } |
718 | 538 | ||
719 | /* During recovery, other nodes can send us new MSTCPY locks (from | 539 | /* This is only called to add a reference when the code already holds |
720 | dlm_recover_locks) before we've made ourself master (in | 540 | a valid reference to the rsb, so there's no need for locking. */ |
721 | dlm_recover_masters). */ | ||
722 | 541 | ||
723 | static int find_rsb_nodir(struct dlm_ls *ls, char *name, int len, | 542 | static inline void hold_rsb(struct dlm_rsb *r) |
724 | uint32_t hash, uint32_t b, | ||
725 | int dir_nodeid, int from_nodeid, | ||
726 | unsigned int flags, struct dlm_rsb **r_ret) | ||
727 | { | 543 | { |
728 | struct dlm_rsb *r = NULL; | ||
729 | int our_nodeid = dlm_our_nodeid(); | ||
730 | int recover = (flags & R_RECEIVE_RECOVER); | ||
731 | int error; | ||
732 | |||
733 | retry: | ||
734 | error = pre_rsb_struct(ls); | ||
735 | if (error < 0) | ||
736 | goto out; | ||
737 | |||
738 | spin_lock(&ls->ls_rsbtbl[b].lock); | ||
739 | |||
740 | error = dlm_search_rsb_tree(&ls->ls_rsbtbl[b].keep, name, len, &r); | ||
741 | if (error) | ||
742 | goto do_toss; | ||
743 | |||
744 | /* | ||
745 | * rsb is active, so we can't check master_nodeid without lock_rsb. | ||
746 | */ | ||
747 | |||
748 | kref_get(&r->res_ref); | 544 | kref_get(&r->res_ref); |
749 | goto out_unlock; | ||
750 | |||
751 | |||
752 | do_toss: | ||
753 | error = dlm_search_rsb_tree(&ls->ls_rsbtbl[b].toss, name, len, &r); | ||
754 | if (error) | ||
755 | goto do_new; | ||
756 | |||
757 | /* | ||
758 | * rsb found inactive. No other thread is using this rsb because | ||
759 | * it's on the toss list, so we can look at or update | ||
760 | * res_master_nodeid without lock_rsb. | ||
761 | */ | ||
762 | |||
763 | if (!recover && (r->res_master_nodeid != our_nodeid) && from_nodeid) { | ||
764 | /* our rsb is not master, and another node has sent us a | ||
765 | request; this should never happen */ | ||
766 | log_error(ls, "find_rsb toss from_nodeid %d master %d dir %d", | ||
767 | from_nodeid, r->res_master_nodeid, dir_nodeid); | ||
768 | dlm_print_rsb(r); | ||
769 | error = -ENOTBLK; | ||
770 | goto out_unlock; | ||
771 | } | ||
772 | |||
773 | if (!recover && (r->res_master_nodeid != our_nodeid) && | ||
774 | (dir_nodeid == our_nodeid)) { | ||
775 | /* our rsb is not master, and we are dir; may as well fix it; | ||
776 | this should never happen */ | ||
777 | log_error(ls, "find_rsb toss our %d master %d dir %d", | ||
778 | our_nodeid, r->res_master_nodeid, dir_nodeid); | ||
779 | dlm_print_rsb(r); | ||
780 | r->res_master_nodeid = our_nodeid; | ||
781 | r->res_nodeid = 0; | ||
782 | } | ||
783 | |||
784 | rb_erase(&r->res_hashnode, &ls->ls_rsbtbl[b].toss); | ||
785 | error = rsb_insert(r, &ls->ls_rsbtbl[b].keep); | ||
786 | goto out_unlock; | ||
787 | |||
788 | |||
789 | do_new: | ||
790 | /* | ||
791 | * rsb not found | ||
792 | */ | ||
793 | |||
794 | error = get_rsb_struct(ls, name, len, &r); | ||
795 | if (error == -EAGAIN) { | ||
796 | spin_unlock(&ls->ls_rsbtbl[b].lock); | ||
797 | goto retry; | ||
798 | } | ||
799 | if (error) | ||
800 | goto out_unlock; | ||
801 | |||
802 | r->res_hash = hash; | ||
803 | r->res_bucket = b; | ||
804 | r->res_dir_nodeid = dir_nodeid; | ||
805 | r->res_master_nodeid = dir_nodeid; | ||
806 | r->res_nodeid = (dir_nodeid == our_nodeid) ? 0 : dir_nodeid; | ||
807 | kref_init(&r->res_ref); | ||
808 | |||
809 | error = rsb_insert(r, &ls->ls_rsbtbl[b].keep); | ||
810 | out_unlock: | ||
811 | spin_unlock(&ls->ls_rsbtbl[b].lock); | ||
812 | out: | ||
813 | *r_ret = r; | ||
814 | return error; | ||
815 | } | 545 | } |
816 | 546 | ||
817 | static int find_rsb(struct dlm_ls *ls, char *name, int len, int from_nodeid, | 547 | void dlm_hold_rsb(struct dlm_rsb *r) |
818 | unsigned int flags, struct dlm_rsb **r_ret) | ||
819 | { | ||
820 | uint32_t hash, b; | ||
821 | int dir_nodeid; | ||
822 | |||
823 | if (len > DLM_RESNAME_MAXLEN) | ||
824 | return -EINVAL; | ||
825 | |||
826 | hash = jhash(name, len, 0); | ||
827 | b = hash & (ls->ls_rsbtbl_size - 1); | ||
828 | |||
829 | dir_nodeid = dlm_hash2nodeid(ls, hash); | ||
830 | |||
831 | if (dlm_no_directory(ls)) | ||
832 | return find_rsb_nodir(ls, name, len, hash, b, dir_nodeid, | ||
833 | from_nodeid, flags, r_ret); | ||
834 | else | ||
835 | return find_rsb_dir(ls, name, len, hash, b, dir_nodeid, | ||
836 | from_nodeid, flags, r_ret); | ||
837 | } | ||
838 | |||
839 | /* we have received a request and found that res_master_nodeid != our_nodeid, | ||
840 | so we need to return an error or make ourself the master */ | ||
841 | |||
842 | static int validate_master_nodeid(struct dlm_ls *ls, struct dlm_rsb *r, | ||
843 | int from_nodeid) | ||
844 | { | 548 | { |
845 | if (dlm_no_directory(ls)) { | 549 | hold_rsb(r); |
846 | log_error(ls, "find_rsb keep from_nodeid %d master %d dir %d", | ||
847 | from_nodeid, r->res_master_nodeid, | ||
848 | r->res_dir_nodeid); | ||
849 | dlm_print_rsb(r); | ||
850 | return -ENOTBLK; | ||
851 | } | ||
852 | |||
853 | if (from_nodeid != r->res_dir_nodeid) { | ||
854 | /* our rsb is not master, and another node (not the dir node) | ||
855 | has sent us a request. this is much more common when our | ||
856 | master_nodeid is zero, so limit debug to non-zero. */ | ||
857 | |||
858 | if (r->res_master_nodeid) { | ||
859 | log_debug(ls, "validate master from_other %d master %d " | ||
860 | "dir %d first %x %s", from_nodeid, | ||
861 | r->res_master_nodeid, r->res_dir_nodeid, | ||
862 | r->res_first_lkid, r->res_name); | ||
863 | } | ||
864 | return -ENOTBLK; | ||
865 | } else { | ||
866 | /* our rsb is not master, but the dir nodeid has sent us a | ||
867 | request; this could happen with master 0 / res_nodeid -1 */ | ||
868 | |||
869 | if (r->res_master_nodeid) { | ||
870 | log_error(ls, "validate master from_dir %d master %d " | ||
871 | "first %x %s", | ||
872 | from_nodeid, r->res_master_nodeid, | ||
873 | r->res_first_lkid, r->res_name); | ||
874 | } | ||
875 | |||
876 | r->res_master_nodeid = dlm_our_nodeid(); | ||
877 | r->res_nodeid = 0; | ||
878 | return 0; | ||
879 | } | ||
880 | } | 550 | } |
881 | 551 | ||
882 | /* | 552 | static void toss_rsb(struct kref *kref) |
883 | * We're the dir node for this res and another node wants to know the | ||
884 | * master nodeid. During normal operation (non recovery) this is only | ||
885 | * called from receive_lookup(); master lookups when the local node is | ||
886 | * the dir node are done by find_rsb(). | ||
887 | * | ||
888 | * normal operation, we are the dir node for a resource | ||
889 | * . _request_lock | ||
890 | * . set_master | ||
891 | * . send_lookup | ||
892 | * . receive_lookup | ||
893 | * . dlm_master_lookup flags 0 | ||
894 | * | ||
895 | * recover directory, we are rebuilding dir for all resources | ||
896 | * . dlm_recover_directory | ||
897 | * . dlm_rcom_names | ||
898 | * remote node sends back the rsb names it is master of and we are dir of | ||
899 | * . dlm_master_lookup RECOVER_DIR (fix_master 0, from_master 1) | ||
900 | * we either create new rsb setting remote node as master, or find existing | ||
901 | * rsb and set master to be the remote node. | ||
902 | * | ||
903 | * recover masters, we are finding the new master for resources | ||
904 | * . dlm_recover_masters | ||
905 | * . recover_master | ||
906 | * . dlm_send_rcom_lookup | ||
907 | * . receive_rcom_lookup | ||
908 | * . dlm_master_lookup RECOVER_MASTER (fix_master 1, from_master 0) | ||
909 | */ | ||
910 | |||
911 | int dlm_master_lookup(struct dlm_ls *ls, int from_nodeid, char *name, int len, | ||
912 | unsigned int flags, int *r_nodeid, int *result) | ||
913 | { | 553 | { |
914 | struct dlm_rsb *r = NULL; | 554 | struct dlm_rsb *r = container_of(kref, struct dlm_rsb, res_ref); |
915 | uint32_t hash, b; | 555 | struct dlm_ls *ls = r->res_ls; |
916 | int from_master = (flags & DLM_LU_RECOVER_DIR); | ||
917 | int fix_master = (flags & DLM_LU_RECOVER_MASTER); | ||
918 | int our_nodeid = dlm_our_nodeid(); | ||
919 | int dir_nodeid, error, toss_list = 0; | ||
920 | |||
921 | if (len > DLM_RESNAME_MAXLEN) | ||
922 | return -EINVAL; | ||
923 | |||
924 | if (from_nodeid == our_nodeid) { | ||
925 | log_error(ls, "dlm_master_lookup from our_nodeid %d flags %x", | ||
926 | our_nodeid, flags); | ||
927 | return -EINVAL; | ||
928 | } | ||
929 | |||
930 | hash = jhash(name, len, 0); | ||
931 | b = hash & (ls->ls_rsbtbl_size - 1); | ||
932 | |||
933 | dir_nodeid = dlm_hash2nodeid(ls, hash); | ||
934 | if (dir_nodeid != our_nodeid) { | ||
935 | log_error(ls, "dlm_master_lookup from %d dir %d our %d h %x %d", | ||
936 | from_nodeid, dir_nodeid, our_nodeid, hash, | ||
937 | ls->ls_num_nodes); | ||
938 | *r_nodeid = -1; | ||
939 | return -EINVAL; | ||
940 | } | ||
941 | |||
942 | retry: | ||
943 | error = pre_rsb_struct(ls); | ||
944 | if (error < 0) | ||
945 | return error; | ||
946 | |||
947 | spin_lock(&ls->ls_rsbtbl[b].lock); | ||
948 | error = dlm_search_rsb_tree(&ls->ls_rsbtbl[b].keep, name, len, &r); | ||
949 | if (!error) { | ||
950 | /* because the rsb is active, we need to lock_rsb before | ||
951 | checking/changing re_master_nodeid */ | ||
952 | |||
953 | hold_rsb(r); | ||
954 | spin_unlock(&ls->ls_rsbtbl[b].lock); | ||
955 | lock_rsb(r); | ||
956 | goto found; | ||
957 | } | ||
958 | |||
959 | error = dlm_search_rsb_tree(&ls->ls_rsbtbl[b].toss, name, len, &r); | ||
960 | if (error) | ||
961 | goto not_found; | ||
962 | |||
963 | /* because the rsb is inactive (on toss list), it's not refcounted | ||
964 | and lock_rsb is not used, but is protected by the rsbtbl lock */ | ||
965 | |||
966 | toss_list = 1; | ||
967 | found: | ||
968 | if (r->res_dir_nodeid != our_nodeid) { | ||
969 | /* should not happen, but may as well fix it and carry on */ | ||
970 | log_error(ls, "dlm_master_lookup res_dir %d our %d %s", | ||
971 | r->res_dir_nodeid, our_nodeid, r->res_name); | ||
972 | r->res_dir_nodeid = our_nodeid; | ||
973 | } | ||
974 | |||
975 | if (fix_master && dlm_is_removed(ls, r->res_master_nodeid)) { | ||
976 | /* Recovery uses this function to set a new master when | ||
977 | the previous master failed. Setting NEW_MASTER will | ||
978 | force dlm_recover_masters to call recover_master on this | ||
979 | rsb even though the res_nodeid is no longer removed. */ | ||
980 | |||
981 | r->res_master_nodeid = from_nodeid; | ||
982 | r->res_nodeid = from_nodeid; | ||
983 | rsb_set_flag(r, RSB_NEW_MASTER); | ||
984 | |||
985 | if (toss_list) { | ||
986 | /* I don't think we should ever find it on toss list. */ | ||
987 | log_error(ls, "dlm_master_lookup fix_master on toss"); | ||
988 | dlm_dump_rsb(r); | ||
989 | } | ||
990 | } | ||
991 | |||
992 | if (from_master && (r->res_master_nodeid != from_nodeid)) { | ||
993 | /* this will happen if from_nodeid became master during | ||
994 | a previous recovery cycle, and we aborted the previous | ||
995 | cycle before recovering this master value */ | ||
996 | |||
997 | log_limit(ls, "dlm_master_lookup from_master %d " | ||
998 | "master_nodeid %d res_nodeid %d first %x %s", | ||
999 | from_nodeid, r->res_master_nodeid, r->res_nodeid, | ||
1000 | r->res_first_lkid, r->res_name); | ||
1001 | |||
1002 | if (r->res_master_nodeid == our_nodeid) { | ||
1003 | log_error(ls, "from_master %d our_master", from_nodeid); | ||
1004 | dlm_dump_rsb(r); | ||
1005 | dlm_send_rcom_lookup_dump(r, from_nodeid); | ||
1006 | goto out_found; | ||
1007 | } | ||
1008 | |||
1009 | r->res_master_nodeid = from_nodeid; | ||
1010 | r->res_nodeid = from_nodeid; | ||
1011 | rsb_set_flag(r, RSB_NEW_MASTER); | ||
1012 | } | ||
1013 | |||
1014 | if (!r->res_master_nodeid) { | ||
1015 | /* this will happen if recovery happens while we're looking | ||
1016 | up the master for this rsb */ | ||
1017 | |||
1018 | log_debug(ls, "dlm_master_lookup master 0 to %d first %x %s", | ||
1019 | from_nodeid, r->res_first_lkid, r->res_name); | ||
1020 | r->res_master_nodeid = from_nodeid; | ||
1021 | r->res_nodeid = from_nodeid; | ||
1022 | } | ||
1023 | |||
1024 | if (!from_master && !fix_master && | ||
1025 | (r->res_master_nodeid == from_nodeid)) { | ||
1026 | /* this can happen when the master sends remove, the dir node | ||
1027 | finds the rsb on the keep list and ignores the remove, | ||
1028 | and the former master sends a lookup */ | ||
1029 | |||
1030 | log_limit(ls, "dlm_master_lookup from master %d flags %x " | ||
1031 | "first %x %s", from_nodeid, flags, | ||
1032 | r->res_first_lkid, r->res_name); | ||
1033 | } | ||
1034 | |||
1035 | out_found: | ||
1036 | *r_nodeid = r->res_master_nodeid; | ||
1037 | if (result) | ||
1038 | *result = DLM_LU_MATCH; | ||
1039 | |||
1040 | if (toss_list) { | ||
1041 | r->res_toss_time = jiffies; | ||
1042 | /* the rsb was inactive (on toss list) */ | ||
1043 | spin_unlock(&ls->ls_rsbtbl[b].lock); | ||
1044 | } else { | ||
1045 | /* the rsb was active */ | ||
1046 | unlock_rsb(r); | ||
1047 | put_rsb(r); | ||
1048 | } | ||
1049 | return 0; | ||
1050 | |||
1051 | not_found: | ||
1052 | error = get_rsb_struct(ls, name, len, &r); | ||
1053 | if (error == -EAGAIN) { | ||
1054 | spin_unlock(&ls->ls_rsbtbl[b].lock); | ||
1055 | goto retry; | ||
1056 | } | ||
1057 | if (error) | ||
1058 | goto out_unlock; | ||
1059 | 556 | ||
1060 | r->res_hash = hash; | 557 | DLM_ASSERT(list_empty(&r->res_root_list), dlm_print_rsb(r);); |
1061 | r->res_bucket = b; | ||
1062 | r->res_dir_nodeid = our_nodeid; | ||
1063 | r->res_master_nodeid = from_nodeid; | ||
1064 | r->res_nodeid = from_nodeid; | ||
1065 | kref_init(&r->res_ref); | 558 | kref_init(&r->res_ref); |
559 | list_move(&r->res_hashchain, &ls->ls_rsbtbl[r->res_bucket].toss); | ||
1066 | r->res_toss_time = jiffies; | 560 | r->res_toss_time = jiffies; |
1067 | 561 | if (r->res_lvbptr) { | |
1068 | error = rsb_insert(r, &ls->ls_rsbtbl[b].toss); | 562 | dlm_free_lvb(r->res_lvbptr); |
1069 | if (error) { | 563 | r->res_lvbptr = NULL; |
1070 | /* should never happen */ | ||
1071 | dlm_free_rsb(r); | ||
1072 | spin_unlock(&ls->ls_rsbtbl[b].lock); | ||
1073 | goto retry; | ||
1074 | } | 564 | } |
1075 | |||
1076 | if (result) | ||
1077 | *result = DLM_LU_ADD; | ||
1078 | *r_nodeid = from_nodeid; | ||
1079 | error = 0; | ||
1080 | out_unlock: | ||
1081 | spin_unlock(&ls->ls_rsbtbl[b].lock); | ||
1082 | return error; | ||
1083 | } | 565 | } |
1084 | 566 | ||
1085 | static void dlm_dump_rsb_hash(struct dlm_ls *ls, uint32_t hash) | 567 | /* When all references to the rsb are gone it's transferred to |
1086 | { | 568 | the tossed list for later disposal. */ |
1087 | struct rb_node *n; | ||
1088 | struct dlm_rsb *r; | ||
1089 | int i; | ||
1090 | |||
1091 | for (i = 0; i < ls->ls_rsbtbl_size; i++) { | ||
1092 | spin_lock(&ls->ls_rsbtbl[i].lock); | ||
1093 | for (n = rb_first(&ls->ls_rsbtbl[i].keep); n; n = rb_next(n)) { | ||
1094 | r = rb_entry(n, struct dlm_rsb, res_hashnode); | ||
1095 | if (r->res_hash == hash) | ||
1096 | dlm_dump_rsb(r); | ||
1097 | } | ||
1098 | spin_unlock(&ls->ls_rsbtbl[i].lock); | ||
1099 | } | ||
1100 | } | ||
1101 | 569 | ||
1102 | void dlm_dump_rsb_name(struct dlm_ls *ls, char *name, int len) | 570 | static void put_rsb(struct dlm_rsb *r) |
1103 | { | 571 | { |
1104 | struct dlm_rsb *r = NULL; | 572 | struct dlm_ls *ls = r->res_ls; |
1105 | uint32_t hash, b; | 573 | uint32_t bucket = r->res_bucket; |
1106 | int error; | ||
1107 | |||
1108 | hash = jhash(name, len, 0); | ||
1109 | b = hash & (ls->ls_rsbtbl_size - 1); | ||
1110 | |||
1111 | spin_lock(&ls->ls_rsbtbl[b].lock); | ||
1112 | error = dlm_search_rsb_tree(&ls->ls_rsbtbl[b].keep, name, len, &r); | ||
1113 | if (!error) | ||
1114 | goto out_dump; | ||
1115 | 574 | ||
1116 | error = dlm_search_rsb_tree(&ls->ls_rsbtbl[b].toss, name, len, &r); | 575 | spin_lock(&ls->ls_rsbtbl[bucket].lock); |
1117 | if (error) | 576 | kref_put(&r->res_ref, toss_rsb); |
1118 | goto out; | 577 | spin_unlock(&ls->ls_rsbtbl[bucket].lock); |
1119 | out_dump: | ||
1120 | dlm_dump_rsb(r); | ||
1121 | out: | ||
1122 | spin_unlock(&ls->ls_rsbtbl[b].lock); | ||
1123 | } | 578 | } |
1124 | 579 | ||
1125 | static void toss_rsb(struct kref *kref) | 580 | void dlm_put_rsb(struct dlm_rsb *r) |
1126 | { | 581 | { |
1127 | struct dlm_rsb *r = container_of(kref, struct dlm_rsb, res_ref); | 582 | put_rsb(r); |
1128 | struct dlm_ls *ls = r->res_ls; | ||
1129 | |||
1130 | DLM_ASSERT(list_empty(&r->res_root_list), dlm_print_rsb(r);); | ||
1131 | kref_init(&r->res_ref); | ||
1132 | rb_erase(&r->res_hashnode, &ls->ls_rsbtbl[r->res_bucket].keep); | ||
1133 | rsb_insert(r, &ls->ls_rsbtbl[r->res_bucket].toss); | ||
1134 | r->res_toss_time = jiffies; | ||
1135 | if (r->res_lvbptr) { | ||
1136 | dlm_free_lvb(r->res_lvbptr); | ||
1137 | r->res_lvbptr = NULL; | ||
1138 | } | ||
1139 | } | 583 | } |
1140 | 584 | ||
1141 | /* See comment for unhold_lkb */ | 585 | /* See comment for unhold_lkb */ |
@@ -1569,9 +1013,8 @@ static int _remove_from_waiters(struct dlm_lkb *lkb, int mstype, | |||
1569 | goto out_del; | 1013 | goto out_del; |
1570 | } | 1014 | } |
1571 | 1015 | ||
1572 | log_error(ls, "remwait error %x remote %d %x msg %d flags %x no wait", | 1016 | log_error(ls, "remwait error %x reply %d flags %x no wait_type", |
1573 | lkb->lkb_id, ms ? ms->m_header.h_nodeid : 0, lkb->lkb_remid, | 1017 | lkb->lkb_id, mstype, lkb->lkb_flags); |
1574 | mstype, lkb->lkb_flags); | ||
1575 | return -1; | 1018 | return -1; |
1576 | 1019 | ||
1577 | out_del: | 1020 | out_del: |
@@ -1624,170 +1067,61 @@ static int remove_from_waiters_ms(struct dlm_lkb *lkb, struct dlm_message *ms) | |||
1624 | return error; | 1067 | return error; |
1625 | } | 1068 | } |
1626 | 1069 | ||
1627 | /* If there's an rsb for the same resource being removed, ensure | 1070 | static void dir_remove(struct dlm_rsb *r) |
1628 | that the remove message is sent before the new lookup message. | ||
1629 | It should be rare to need a delay here, but if not, then it may | ||
1630 | be worthwhile to add a proper wait mechanism rather than a delay. */ | ||
1631 | |||
1632 | static void wait_pending_remove(struct dlm_rsb *r) | ||
1633 | { | 1071 | { |
1634 | struct dlm_ls *ls = r->res_ls; | 1072 | int to_nodeid; |
1635 | restart: | 1073 | |
1636 | spin_lock(&ls->ls_remove_spin); | 1074 | if (dlm_no_directory(r->res_ls)) |
1637 | if (ls->ls_remove_len && | 1075 | return; |
1638 | !rsb_cmp(r, ls->ls_remove_name, ls->ls_remove_len)) { | 1076 | |
1639 | log_debug(ls, "delay lookup for remove dir %d %s", | 1077 | to_nodeid = dlm_dir_nodeid(r); |
1640 | r->res_dir_nodeid, r->res_name); | 1078 | if (to_nodeid != dlm_our_nodeid()) |
1641 | spin_unlock(&ls->ls_remove_spin); | 1079 | send_remove(r); |
1642 | msleep(1); | 1080 | else |
1643 | goto restart; | 1081 | dlm_dir_remove_entry(r->res_ls, to_nodeid, |
1644 | } | 1082 | r->res_name, r->res_length); |
1645 | spin_unlock(&ls->ls_remove_spin); | ||
1646 | } | 1083 | } |
1647 | 1084 | ||
1648 | /* | 1085 | /* FIXME: shouldn't this be able to exit as soon as one non-due rsb is |
1649 | * ls_remove_spin protects ls_remove_name and ls_remove_len which are | 1086 | found since they are in order of newest to oldest? */ |
1650 | * read by other threads in wait_pending_remove. ls_remove_names | ||
1651 | * and ls_remove_lens are only used by the scan thread, so they do | ||
1652 | * not need protection. | ||
1653 | */ | ||
1654 | 1087 | ||
1655 | static void shrink_bucket(struct dlm_ls *ls, int b) | 1088 | static int shrink_bucket(struct dlm_ls *ls, int b) |
1656 | { | 1089 | { |
1657 | struct rb_node *n, *next; | ||
1658 | struct dlm_rsb *r; | 1090 | struct dlm_rsb *r; |
1659 | char *name; | 1091 | int count = 0, found; |
1660 | int our_nodeid = dlm_our_nodeid(); | ||
1661 | int remote_count = 0; | ||
1662 | int i, len, rv; | ||
1663 | |||
1664 | memset(&ls->ls_remove_lens, 0, sizeof(int) * DLM_REMOVE_NAMES_MAX); | ||
1665 | |||
1666 | spin_lock(&ls->ls_rsbtbl[b].lock); | ||
1667 | for (n = rb_first(&ls->ls_rsbtbl[b].toss); n; n = next) { | ||
1668 | next = rb_next(n); | ||
1669 | r = rb_entry(n, struct dlm_rsb, res_hashnode); | ||
1670 | |||
1671 | /* If we're the directory record for this rsb, and | ||
1672 | we're not the master of it, then we need to wait | ||
1673 | for the master node to send us a dir remove for | ||
1674 | before removing the dir record. */ | ||
1675 | |||
1676 | if (!dlm_no_directory(ls) && | ||
1677 | (r->res_master_nodeid != our_nodeid) && | ||
1678 | (dlm_dir_nodeid(r) == our_nodeid)) { | ||
1679 | continue; | ||
1680 | } | ||
1681 | |||
1682 | if (!time_after_eq(jiffies, r->res_toss_time + | ||
1683 | dlm_config.ci_toss_secs * HZ)) { | ||
1684 | continue; | ||
1685 | } | ||
1686 | |||
1687 | if (!dlm_no_directory(ls) && | ||
1688 | (r->res_master_nodeid == our_nodeid) && | ||
1689 | (dlm_dir_nodeid(r) != our_nodeid)) { | ||
1690 | |||
1691 | /* We're the master of this rsb but we're not | ||
1692 | the directory record, so we need to tell the | ||
1693 | dir node to remove the dir record. */ | ||
1694 | |||
1695 | ls->ls_remove_lens[remote_count] = r->res_length; | ||
1696 | memcpy(ls->ls_remove_names[remote_count], r->res_name, | ||
1697 | DLM_RESNAME_MAXLEN); | ||
1698 | remote_count++; | ||
1699 | |||
1700 | if (remote_count >= DLM_REMOVE_NAMES_MAX) | ||
1701 | break; | ||
1702 | continue; | ||
1703 | } | ||
1704 | |||
1705 | if (!kref_put(&r->res_ref, kill_rsb)) { | ||
1706 | log_error(ls, "tossed rsb in use %s", r->res_name); | ||
1707 | continue; | ||
1708 | } | ||
1709 | |||
1710 | rb_erase(&r->res_hashnode, &ls->ls_rsbtbl[b].toss); | ||
1711 | dlm_free_rsb(r); | ||
1712 | } | ||
1713 | spin_unlock(&ls->ls_rsbtbl[b].lock); | ||
1714 | |||
1715 | /* | ||
1716 | * While searching for rsb's to free, we found some that require | ||
1717 | * remote removal. We leave them in place and find them again here | ||
1718 | * so there is a very small gap between removing them from the toss | ||
1719 | * list and sending the removal. Keeping this gap small is | ||
1720 | * important to keep us (the master node) from being out of sync | ||
1721 | * with the remote dir node for very long. | ||
1722 | * | ||
1723 | * From the time the rsb is removed from toss until just after | ||
1724 | * send_remove, the rsb name is saved in ls_remove_name. A new | ||
1725 | * lookup checks this to ensure that a new lookup message for the | ||
1726 | * same resource name is not sent just before the remove message. | ||
1727 | */ | ||
1728 | |||
1729 | for (i = 0; i < remote_count; i++) { | ||
1730 | name = ls->ls_remove_names[i]; | ||
1731 | len = ls->ls_remove_lens[i]; | ||
1732 | 1092 | ||
1093 | for (;;) { | ||
1094 | found = 0; | ||
1733 | spin_lock(&ls->ls_rsbtbl[b].lock); | 1095 | spin_lock(&ls->ls_rsbtbl[b].lock); |
1734 | rv = dlm_search_rsb_tree(&ls->ls_rsbtbl[b].toss, name, len, &r); | 1096 | list_for_each_entry_reverse(r, &ls->ls_rsbtbl[b].toss, |
1735 | if (rv) { | 1097 | res_hashchain) { |
1736 | spin_unlock(&ls->ls_rsbtbl[b].lock); | 1098 | if (!time_after_eq(jiffies, r->res_toss_time + |
1737 | log_debug(ls, "remove_name not toss %s", name); | 1099 | dlm_config.ci_toss_secs * HZ)) |
1738 | continue; | 1100 | continue; |
1739 | } | 1101 | found = 1; |
1740 | 1102 | break; | |
1741 | if (r->res_master_nodeid != our_nodeid) { | ||
1742 | spin_unlock(&ls->ls_rsbtbl[b].lock); | ||
1743 | log_debug(ls, "remove_name master %d dir %d our %d %s", | ||
1744 | r->res_master_nodeid, r->res_dir_nodeid, | ||
1745 | our_nodeid, name); | ||
1746 | continue; | ||
1747 | } | 1103 | } |
1748 | 1104 | ||
1749 | if (r->res_dir_nodeid == our_nodeid) { | 1105 | if (!found) { |
1750 | /* should never happen */ | ||
1751 | spin_unlock(&ls->ls_rsbtbl[b].lock); | 1106 | spin_unlock(&ls->ls_rsbtbl[b].lock); |
1752 | log_error(ls, "remove_name dir %d master %d our %d %s", | 1107 | break; |
1753 | r->res_dir_nodeid, r->res_master_nodeid, | ||
1754 | our_nodeid, name); | ||
1755 | continue; | ||
1756 | } | 1108 | } |
1757 | 1109 | ||
1758 | if (!time_after_eq(jiffies, r->res_toss_time + | 1110 | if (kref_put(&r->res_ref, kill_rsb)) { |
1759 | dlm_config.ci_toss_secs * HZ)) { | 1111 | list_del(&r->res_hashchain); |
1760 | spin_unlock(&ls->ls_rsbtbl[b].lock); | 1112 | spin_unlock(&ls->ls_rsbtbl[b].lock); |
1761 | log_debug(ls, "remove_name toss_time %lu now %lu %s", | ||
1762 | r->res_toss_time, jiffies, name); | ||
1763 | continue; | ||
1764 | } | ||
1765 | 1113 | ||
1766 | if (!kref_put(&r->res_ref, kill_rsb)) { | 1114 | if (is_master(r)) |
1115 | dir_remove(r); | ||
1116 | dlm_free_rsb(r); | ||
1117 | count++; | ||
1118 | } else { | ||
1767 | spin_unlock(&ls->ls_rsbtbl[b].lock); | 1119 | spin_unlock(&ls->ls_rsbtbl[b].lock); |
1768 | log_error(ls, "remove_name in use %s", name); | 1120 | log_error(ls, "tossed rsb in use %s", r->res_name); |
1769 | continue; | ||
1770 | } | 1121 | } |
1771 | |||
1772 | rb_erase(&r->res_hashnode, &ls->ls_rsbtbl[b].toss); | ||
1773 | |||
1774 | /* block lookup of same name until we've sent remove */ | ||
1775 | spin_lock(&ls->ls_remove_spin); | ||
1776 | ls->ls_remove_len = len; | ||
1777 | memcpy(ls->ls_remove_name, name, DLM_RESNAME_MAXLEN); | ||
1778 | spin_unlock(&ls->ls_remove_spin); | ||
1779 | spin_unlock(&ls->ls_rsbtbl[b].lock); | ||
1780 | |||
1781 | send_remove(r); | ||
1782 | |||
1783 | /* allow lookup of name again */ | ||
1784 | spin_lock(&ls->ls_remove_spin); | ||
1785 | ls->ls_remove_len = 0; | ||
1786 | memset(ls->ls_remove_name, 0, DLM_RESNAME_MAXLEN); | ||
1787 | spin_unlock(&ls->ls_remove_spin); | ||
1788 | |||
1789 | dlm_free_rsb(r); | ||
1790 | } | 1122 | } |
1123 | |||
1124 | return count; | ||
1791 | } | 1125 | } |
1792 | 1126 | ||
1793 | void dlm_scan_rsbs(struct dlm_ls *ls) | 1127 | void dlm_scan_rsbs(struct dlm_ls *ls) |
@@ -2113,13 +1447,13 @@ static void _grant_lock(struct dlm_rsb *r, struct dlm_lkb *lkb) | |||
2113 | } | 1447 | } |
2114 | 1448 | ||
2115 | lkb->lkb_rqmode = DLM_LOCK_IV; | 1449 | lkb->lkb_rqmode = DLM_LOCK_IV; |
2116 | lkb->lkb_highbast = 0; | ||
2117 | } | 1450 | } |
2118 | 1451 | ||
2119 | static void grant_lock(struct dlm_rsb *r, struct dlm_lkb *lkb) | 1452 | static void grant_lock(struct dlm_rsb *r, struct dlm_lkb *lkb) |
2120 | { | 1453 | { |
2121 | set_lvb_lock(r, lkb); | 1454 | set_lvb_lock(r, lkb); |
2122 | _grant_lock(r, lkb); | 1455 | _grant_lock(r, lkb); |
1456 | lkb->lkb_highbast = 0; | ||
2123 | } | 1457 | } |
2124 | 1458 | ||
2125 | static void grant_lock_pc(struct dlm_rsb *r, struct dlm_lkb *lkb, | 1459 | static void grant_lock_pc(struct dlm_rsb *r, struct dlm_lkb *lkb, |
@@ -2279,14 +1613,10 @@ static int conversion_deadlock_detect(struct dlm_rsb *r, struct dlm_lkb *lkb2) | |||
2279 | * immediate request, it is 0 if called later, after the lock has been | 1613 | * immediate request, it is 0 if called later, after the lock has been |
2280 | * queued. | 1614 | * queued. |
2281 | * | 1615 | * |
2282 | * recover is 1 if dlm_recover_grant() is trying to grant conversions | ||
2283 | * after recovery. | ||
2284 | * | ||
2285 | * References are from chapter 6 of "VAXcluster Principles" by Roy Davis | 1616 | * References are from chapter 6 of "VAXcluster Principles" by Roy Davis |
2286 | */ | 1617 | */ |
2287 | 1618 | ||
2288 | static int _can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now, | 1619 | static int _can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now) |
2289 | int recover) | ||
2290 | { | 1620 | { |
2291 | int8_t conv = (lkb->lkb_grmode != DLM_LOCK_IV); | 1621 | int8_t conv = (lkb->lkb_grmode != DLM_LOCK_IV); |
2292 | 1622 | ||
@@ -2318,7 +1648,7 @@ static int _can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now, | |||
2318 | */ | 1648 | */ |
2319 | 1649 | ||
2320 | if (queue_conflict(&r->res_grantqueue, lkb)) | 1650 | if (queue_conflict(&r->res_grantqueue, lkb)) |
2321 | return 0; | 1651 | goto out; |
2322 | 1652 | ||
2323 | /* | 1653 | /* |
2324 | * 6-3: By default, a conversion request is immediately granted if the | 1654 | * 6-3: By default, a conversion request is immediately granted if the |
@@ -2327,24 +1657,7 @@ static int _can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now, | |||
2327 | */ | 1657 | */ |
2328 | 1658 | ||
2329 | if (queue_conflict(&r->res_convertqueue, lkb)) | 1659 | if (queue_conflict(&r->res_convertqueue, lkb)) |
2330 | return 0; | 1660 | goto out; |
2331 | |||
2332 | /* | ||
2333 | * The RECOVER_GRANT flag means dlm_recover_grant() is granting | ||
2334 | * locks for a recovered rsb, on which lkb's have been rebuilt. | ||
2335 | * The lkb's may have been rebuilt on the queues in a different | ||
2336 | * order than they were in on the previous master. So, granting | ||
2337 | * queued conversions in order after recovery doesn't make sense | ||
2338 | * since the order hasn't been preserved anyway. The new order | ||
2339 | * could also have created a new "in place" conversion deadlock. | ||
2340 | * (e.g. old, failed master held granted EX, with PR->EX, NL->EX. | ||
2341 | * After recovery, there would be no granted locks, and possibly | ||
2342 | * NL->EX, PR->EX, an in-place conversion deadlock.) So, after | ||
2343 | * recovery, grant conversions without considering order. | ||
2344 | */ | ||
2345 | |||
2346 | if (conv && recover) | ||
2347 | return 1; | ||
2348 | 1661 | ||
2349 | /* | 1662 | /* |
2350 | * 6-5: But the default algorithm for deciding whether to grant or | 1663 | * 6-5: But the default algorithm for deciding whether to grant or |
@@ -2373,18 +1686,6 @@ static int _can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now, | |||
2373 | return 1; | 1686 | return 1; |
2374 | 1687 | ||
2375 | /* | 1688 | /* |
2376 | * Even if the convert is compat with all granted locks, | ||
2377 | * QUECVT forces it behind other locks on the convert queue. | ||
2378 | */ | ||
2379 | |||
2380 | if (now && conv && (lkb->lkb_exflags & DLM_LKF_QUECVT)) { | ||
2381 | if (list_empty(&r->res_convertqueue)) | ||
2382 | return 1; | ||
2383 | else | ||
2384 | return 0; | ||
2385 | } | ||
2386 | |||
2387 | /* | ||
2388 | * The NOORDER flag is set to avoid the standard vms rules on grant | 1689 | * The NOORDER flag is set to avoid the standard vms rules on grant |
2389 | * order. | 1690 | * order. |
2390 | */ | 1691 | */ |
@@ -2427,12 +1728,12 @@ static int _can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now, | |||
2427 | if (!now && !conv && list_empty(&r->res_convertqueue) && | 1728 | if (!now && !conv && list_empty(&r->res_convertqueue) && |
2428 | first_in_list(lkb, &r->res_waitqueue)) | 1729 | first_in_list(lkb, &r->res_waitqueue)) |
2429 | return 1; | 1730 | return 1; |
2430 | 1731 | out: | |
2431 | return 0; | 1732 | return 0; |
2432 | } | 1733 | } |
2433 | 1734 | ||
2434 | static int can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now, | 1735 | static int can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now, |
2435 | int recover, int *err) | 1736 | int *err) |
2436 | { | 1737 | { |
2437 | int rv; | 1738 | int rv; |
2438 | int8_t alt = 0, rqmode = lkb->lkb_rqmode; | 1739 | int8_t alt = 0, rqmode = lkb->lkb_rqmode; |
@@ -2441,7 +1742,7 @@ static int can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now, | |||
2441 | if (err) | 1742 | if (err) |
2442 | *err = 0; | 1743 | *err = 0; |
2443 | 1744 | ||
2444 | rv = _can_be_granted(r, lkb, now, recover); | 1745 | rv = _can_be_granted(r, lkb, now); |
2445 | if (rv) | 1746 | if (rv) |
2446 | goto out; | 1747 | goto out; |
2447 | 1748 | ||
@@ -2482,7 +1783,7 @@ static int can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now, | |||
2482 | 1783 | ||
2483 | if (alt) { | 1784 | if (alt) { |
2484 | lkb->lkb_rqmode = alt; | 1785 | lkb->lkb_rqmode = alt; |
2485 | rv = _can_be_granted(r, lkb, now, 0); | 1786 | rv = _can_be_granted(r, lkb, now); |
2486 | if (rv) | 1787 | if (rv) |
2487 | lkb->lkb_sbflags |= DLM_SBF_ALTMODE; | 1788 | lkb->lkb_sbflags |= DLM_SBF_ALTMODE; |
2488 | else | 1789 | else |
@@ -2502,11 +1803,9 @@ static int can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now, | |||
2502 | /* Returns the highest requested mode of all blocked conversions; sets | 1803 | /* Returns the highest requested mode of all blocked conversions; sets |
2503 | cw if there's a blocked conversion to DLM_LOCK_CW. */ | 1804 | cw if there's a blocked conversion to DLM_LOCK_CW. */ |
2504 | 1805 | ||
2505 | static int grant_pending_convert(struct dlm_rsb *r, int high, int *cw, | 1806 | static int grant_pending_convert(struct dlm_rsb *r, int high, int *cw) |
2506 | unsigned int *count) | ||
2507 | { | 1807 | { |
2508 | struct dlm_lkb *lkb, *s; | 1808 | struct dlm_lkb *lkb, *s; |
2509 | int recover = rsb_flag(r, RSB_RECOVER_GRANT); | ||
2510 | int hi, demoted, quit, grant_restart, demote_restart; | 1809 | int hi, demoted, quit, grant_restart, demote_restart; |
2511 | int deadlk; | 1810 | int deadlk; |
2512 | 1811 | ||
@@ -2520,11 +1819,9 @@ static int grant_pending_convert(struct dlm_rsb *r, int high, int *cw, | |||
2520 | demoted = is_demoted(lkb); | 1819 | demoted = is_demoted(lkb); |
2521 | deadlk = 0; | 1820 | deadlk = 0; |
2522 | 1821 | ||
2523 | if (can_be_granted(r, lkb, 0, recover, &deadlk)) { | 1822 | if (can_be_granted(r, lkb, 0, &deadlk)) { |
2524 | grant_lock_pending(r, lkb); | 1823 | grant_lock_pending(r, lkb); |
2525 | grant_restart = 1; | 1824 | grant_restart = 1; |
2526 | if (count) | ||
2527 | (*count)++; | ||
2528 | continue; | 1825 | continue; |
2529 | } | 1826 | } |
2530 | 1827 | ||
@@ -2558,17 +1855,14 @@ static int grant_pending_convert(struct dlm_rsb *r, int high, int *cw, | |||
2558 | return max_t(int, high, hi); | 1855 | return max_t(int, high, hi); |
2559 | } | 1856 | } |
2560 | 1857 | ||
2561 | static int grant_pending_wait(struct dlm_rsb *r, int high, int *cw, | 1858 | static int grant_pending_wait(struct dlm_rsb *r, int high, int *cw) |
2562 | unsigned int *count) | ||
2563 | { | 1859 | { |
2564 | struct dlm_lkb *lkb, *s; | 1860 | struct dlm_lkb *lkb, *s; |
2565 | 1861 | ||
2566 | list_for_each_entry_safe(lkb, s, &r->res_waitqueue, lkb_statequeue) { | 1862 | list_for_each_entry_safe(lkb, s, &r->res_waitqueue, lkb_statequeue) { |
2567 | if (can_be_granted(r, lkb, 0, 0, NULL)) { | 1863 | if (can_be_granted(r, lkb, 0, NULL)) |
2568 | grant_lock_pending(r, lkb); | 1864 | grant_lock_pending(r, lkb); |
2569 | if (count) | 1865 | else { |
2570 | (*count)++; | ||
2571 | } else { | ||
2572 | high = max_t(int, lkb->lkb_rqmode, high); | 1866 | high = max_t(int, lkb->lkb_rqmode, high); |
2573 | if (lkb->lkb_rqmode == DLM_LOCK_CW) | 1867 | if (lkb->lkb_rqmode == DLM_LOCK_CW) |
2574 | *cw = 1; | 1868 | *cw = 1; |
@@ -2597,20 +1891,16 @@ static int lock_requires_bast(struct dlm_lkb *gr, int high, int cw) | |||
2597 | return 0; | 1891 | return 0; |
2598 | } | 1892 | } |
2599 | 1893 | ||
2600 | static void grant_pending_locks(struct dlm_rsb *r, unsigned int *count) | 1894 | static void grant_pending_locks(struct dlm_rsb *r) |
2601 | { | 1895 | { |
2602 | struct dlm_lkb *lkb, *s; | 1896 | struct dlm_lkb *lkb, *s; |
2603 | int high = DLM_LOCK_IV; | 1897 | int high = DLM_LOCK_IV; |
2604 | int cw = 0; | 1898 | int cw = 0; |
2605 | 1899 | ||
2606 | if (!is_master(r)) { | 1900 | DLM_ASSERT(is_master(r), dlm_dump_rsb(r);); |
2607 | log_print("grant_pending_locks r nodeid %d", r->res_nodeid); | ||
2608 | dlm_dump_rsb(r); | ||
2609 | return; | ||
2610 | } | ||
2611 | 1901 | ||
2612 | high = grant_pending_convert(r, high, &cw, count); | 1902 | high = grant_pending_convert(r, high, &cw); |
2613 | high = grant_pending_wait(r, high, &cw, count); | 1903 | high = grant_pending_wait(r, high, &cw); |
2614 | 1904 | ||
2615 | if (high == DLM_LOCK_IV) | 1905 | if (high == DLM_LOCK_IV) |
2616 | return; | 1906 | return; |
@@ -2695,7 +1985,8 @@ static void send_blocking_asts_all(struct dlm_rsb *r, struct dlm_lkb *lkb) | |||
2695 | 1985 | ||
2696 | static int set_master(struct dlm_rsb *r, struct dlm_lkb *lkb) | 1986 | static int set_master(struct dlm_rsb *r, struct dlm_lkb *lkb) |
2697 | { | 1987 | { |
2698 | int our_nodeid = dlm_our_nodeid(); | 1988 | struct dlm_ls *ls = r->res_ls; |
1989 | int i, error, dir_nodeid, ret_nodeid, our_nodeid = dlm_our_nodeid(); | ||
2699 | 1990 | ||
2700 | if (rsb_flag(r, RSB_MASTER_UNCERTAIN)) { | 1991 | if (rsb_flag(r, RSB_MASTER_UNCERTAIN)) { |
2701 | rsb_clear_flag(r, RSB_MASTER_UNCERTAIN); | 1992 | rsb_clear_flag(r, RSB_MASTER_UNCERTAIN); |
@@ -2709,37 +2000,53 @@ static int set_master(struct dlm_rsb *r, struct dlm_lkb *lkb) | |||
2709 | return 1; | 2000 | return 1; |
2710 | } | 2001 | } |
2711 | 2002 | ||
2712 | if (r->res_master_nodeid == our_nodeid) { | 2003 | if (r->res_nodeid == 0) { |
2713 | lkb->lkb_nodeid = 0; | 2004 | lkb->lkb_nodeid = 0; |
2714 | return 0; | 2005 | return 0; |
2715 | } | 2006 | } |
2716 | 2007 | ||
2717 | if (r->res_master_nodeid) { | 2008 | if (r->res_nodeid > 0) { |
2718 | lkb->lkb_nodeid = r->res_master_nodeid; | 2009 | lkb->lkb_nodeid = r->res_nodeid; |
2719 | return 0; | 2010 | return 0; |
2720 | } | 2011 | } |
2721 | 2012 | ||
2722 | if (dlm_dir_nodeid(r) == our_nodeid) { | 2013 | DLM_ASSERT(r->res_nodeid == -1, dlm_dump_rsb(r);); |
2723 | /* This is a somewhat unusual case; find_rsb will usually | 2014 | |
2724 | have set res_master_nodeid when dir nodeid is local, but | 2015 | dir_nodeid = dlm_dir_nodeid(r); |
2725 | there are cases where we become the dir node after we've | 2016 | |
2726 | past find_rsb and go through _request_lock again. | 2017 | if (dir_nodeid != our_nodeid) { |
2727 | confirm_master() or process_lookup_list() needs to be | 2018 | r->res_first_lkid = lkb->lkb_id; |
2728 | called after this. */ | 2019 | send_lookup(r, lkb); |
2729 | log_debug(r->res_ls, "set_master %x self master %d dir %d %s", | 2020 | return 1; |
2730 | lkb->lkb_id, r->res_master_nodeid, r->res_dir_nodeid, | ||
2731 | r->res_name); | ||
2732 | r->res_master_nodeid = our_nodeid; | ||
2733 | r->res_nodeid = 0; | ||
2734 | lkb->lkb_nodeid = 0; | ||
2735 | return 0; | ||
2736 | } | 2021 | } |
2737 | 2022 | ||
2738 | wait_pending_remove(r); | 2023 | for (i = 0; i < 2; i++) { |
2024 | /* It's possible for dlm_scand to remove an old rsb for | ||
2025 | this same resource from the toss list, us to create | ||
2026 | a new one, look up the master locally, and find it | ||
2027 | already exists just before dlm_scand does the | ||
2028 | dir_remove() on the previous rsb. */ | ||
2739 | 2029 | ||
2740 | r->res_first_lkid = lkb->lkb_id; | 2030 | error = dlm_dir_lookup(ls, our_nodeid, r->res_name, |
2741 | send_lookup(r, lkb); | 2031 | r->res_length, &ret_nodeid); |
2742 | return 1; | 2032 | if (!error) |
2033 | break; | ||
2034 | log_debug(ls, "dir_lookup error %d %s", error, r->res_name); | ||
2035 | schedule(); | ||
2036 | } | ||
2037 | if (error && error != -EEXIST) | ||
2038 | return error; | ||
2039 | |||
2040 | if (ret_nodeid == our_nodeid) { | ||
2041 | r->res_first_lkid = 0; | ||
2042 | r->res_nodeid = 0; | ||
2043 | lkb->lkb_nodeid = 0; | ||
2044 | } else { | ||
2045 | r->res_first_lkid = lkb->lkb_id; | ||
2046 | r->res_nodeid = ret_nodeid; | ||
2047 | lkb->lkb_nodeid = ret_nodeid; | ||
2048 | } | ||
2049 | return 0; | ||
2743 | } | 2050 | } |
2744 | 2051 | ||
2745 | static void process_lookup_list(struct dlm_rsb *r) | 2052 | static void process_lookup_list(struct dlm_rsb *r) |
@@ -3064,7 +2371,7 @@ static int do_request(struct dlm_rsb *r, struct dlm_lkb *lkb) | |||
3064 | { | 2371 | { |
3065 | int error = 0; | 2372 | int error = 0; |
3066 | 2373 | ||
3067 | if (can_be_granted(r, lkb, 1, 0, NULL)) { | 2374 | if (can_be_granted(r, lkb, 1, NULL)) { |
3068 | grant_lock(r, lkb); | 2375 | grant_lock(r, lkb); |
3069 | queue_cast(r, lkb, 0); | 2376 | queue_cast(r, lkb, 0); |
3070 | goto out; | 2377 | goto out; |
@@ -3104,7 +2411,7 @@ static int do_convert(struct dlm_rsb *r, struct dlm_lkb *lkb) | |||
3104 | 2411 | ||
3105 | /* changing an existing lock may allow others to be granted */ | 2412 | /* changing an existing lock may allow others to be granted */ |
3106 | 2413 | ||
3107 | if (can_be_granted(r, lkb, 1, 0, &deadlk)) { | 2414 | if (can_be_granted(r, lkb, 1, &deadlk)) { |
3108 | grant_lock(r, lkb); | 2415 | grant_lock(r, lkb); |
3109 | queue_cast(r, lkb, 0); | 2416 | queue_cast(r, lkb, 0); |
3110 | goto out; | 2417 | goto out; |
@@ -3129,8 +2436,8 @@ static int do_convert(struct dlm_rsb *r, struct dlm_lkb *lkb) | |||
3129 | before we try again to grant this one. */ | 2436 | before we try again to grant this one. */ |
3130 | 2437 | ||
3131 | if (is_demoted(lkb)) { | 2438 | if (is_demoted(lkb)) { |
3132 | grant_pending_convert(r, DLM_LOCK_IV, NULL, NULL); | 2439 | grant_pending_convert(r, DLM_LOCK_IV, NULL); |
3133 | if (_can_be_granted(r, lkb, 1, 0)) { | 2440 | if (_can_be_granted(r, lkb, 1)) { |
3134 | grant_lock(r, lkb); | 2441 | grant_lock(r, lkb); |
3135 | queue_cast(r, lkb, 0); | 2442 | queue_cast(r, lkb, 0); |
3136 | goto out; | 2443 | goto out; |
@@ -3157,7 +2464,7 @@ static void do_convert_effects(struct dlm_rsb *r, struct dlm_lkb *lkb, | |||
3157 | { | 2464 | { |
3158 | switch (error) { | 2465 | switch (error) { |
3159 | case 0: | 2466 | case 0: |
3160 | grant_pending_locks(r, NULL); | 2467 | grant_pending_locks(r); |
3161 | /* grant_pending_locks also sends basts */ | 2468 | /* grant_pending_locks also sends basts */ |
3162 | break; | 2469 | break; |
3163 | case -EAGAIN: | 2470 | case -EAGAIN: |
@@ -3180,11 +2487,11 @@ static int do_unlock(struct dlm_rsb *r, struct dlm_lkb *lkb) | |||
3180 | static void do_unlock_effects(struct dlm_rsb *r, struct dlm_lkb *lkb, | 2487 | static void do_unlock_effects(struct dlm_rsb *r, struct dlm_lkb *lkb, |
3181 | int error) | 2488 | int error) |
3182 | { | 2489 | { |
3183 | grant_pending_locks(r, NULL); | 2490 | grant_pending_locks(r); |
3184 | } | 2491 | } |
3185 | 2492 | ||
3186 | /* returns: 0 did nothing, -DLM_ECANCEL canceled lock */ | 2493 | /* returns: 0 did nothing, -DLM_ECANCEL canceled lock */ |
3187 | 2494 | ||
3188 | static int do_cancel(struct dlm_rsb *r, struct dlm_lkb *lkb) | 2495 | static int do_cancel(struct dlm_rsb *r, struct dlm_lkb *lkb) |
3189 | { | 2496 | { |
3190 | int error; | 2497 | int error; |
@@ -3201,7 +2508,7 @@ static void do_cancel_effects(struct dlm_rsb *r, struct dlm_lkb *lkb, | |||
3201 | int error) | 2508 | int error) |
3202 | { | 2509 | { |
3203 | if (error) | 2510 | if (error) |
3204 | grant_pending_locks(r, NULL); | 2511 | grant_pending_locks(r); |
3205 | } | 2512 | } |
3206 | 2513 | ||
3207 | /* | 2514 | /* |
@@ -3308,11 +2615,11 @@ static int request_lock(struct dlm_ls *ls, struct dlm_lkb *lkb, char *name, | |||
3308 | 2615 | ||
3309 | error = validate_lock_args(ls, lkb, args); | 2616 | error = validate_lock_args(ls, lkb, args); |
3310 | if (error) | 2617 | if (error) |
3311 | return error; | 2618 | goto out; |
3312 | 2619 | ||
3313 | error = find_rsb(ls, name, len, 0, R_REQUEST, &r); | 2620 | error = find_rsb(ls, name, len, R_CREATE, &r); |
3314 | if (error) | 2621 | if (error) |
3315 | return error; | 2622 | goto out; |
3316 | 2623 | ||
3317 | lock_rsb(r); | 2624 | lock_rsb(r); |
3318 | 2625 | ||
@@ -3323,6 +2630,8 @@ static int request_lock(struct dlm_ls *ls, struct dlm_lkb *lkb, char *name, | |||
3323 | 2630 | ||
3324 | unlock_rsb(r); | 2631 | unlock_rsb(r); |
3325 | put_rsb(r); | 2632 | put_rsb(r); |
2633 | |||
2634 | out: | ||
3326 | return error; | 2635 | return error; |
3327 | } | 2636 | } |
3328 | 2637 | ||
@@ -4000,72 +3309,11 @@ static int validate_message(struct dlm_lkb *lkb, struct dlm_message *ms) | |||
4000 | return error; | 3309 | return error; |
4001 | } | 3310 | } |
4002 | 3311 | ||
4003 | static void send_repeat_remove(struct dlm_ls *ls, char *ms_name, int len) | 3312 | static void receive_request(struct dlm_ls *ls, struct dlm_message *ms) |
4004 | { | ||
4005 | char name[DLM_RESNAME_MAXLEN + 1]; | ||
4006 | struct dlm_message *ms; | ||
4007 | struct dlm_mhandle *mh; | ||
4008 | struct dlm_rsb *r; | ||
4009 | uint32_t hash, b; | ||
4010 | int rv, dir_nodeid; | ||
4011 | |||
4012 | memset(name, 0, sizeof(name)); | ||
4013 | memcpy(name, ms_name, len); | ||
4014 | |||
4015 | hash = jhash(name, len, 0); | ||
4016 | b = hash & (ls->ls_rsbtbl_size - 1); | ||
4017 | |||
4018 | dir_nodeid = dlm_hash2nodeid(ls, hash); | ||
4019 | |||
4020 | log_error(ls, "send_repeat_remove dir %d %s", dir_nodeid, name); | ||
4021 | |||
4022 | spin_lock(&ls->ls_rsbtbl[b].lock); | ||
4023 | rv = dlm_search_rsb_tree(&ls->ls_rsbtbl[b].keep, name, len, &r); | ||
4024 | if (!rv) { | ||
4025 | spin_unlock(&ls->ls_rsbtbl[b].lock); | ||
4026 | log_error(ls, "repeat_remove on keep %s", name); | ||
4027 | return; | ||
4028 | } | ||
4029 | |||
4030 | rv = dlm_search_rsb_tree(&ls->ls_rsbtbl[b].toss, name, len, &r); | ||
4031 | if (!rv) { | ||
4032 | spin_unlock(&ls->ls_rsbtbl[b].lock); | ||
4033 | log_error(ls, "repeat_remove on toss %s", name); | ||
4034 | return; | ||
4035 | } | ||
4036 | |||
4037 | /* use ls->remove_name2 to avoid conflict with shrink? */ | ||
4038 | |||
4039 | spin_lock(&ls->ls_remove_spin); | ||
4040 | ls->ls_remove_len = len; | ||
4041 | memcpy(ls->ls_remove_name, name, DLM_RESNAME_MAXLEN); | ||
4042 | spin_unlock(&ls->ls_remove_spin); | ||
4043 | spin_unlock(&ls->ls_rsbtbl[b].lock); | ||
4044 | |||
4045 | rv = _create_message(ls, sizeof(struct dlm_message) + len, | ||
4046 | dir_nodeid, DLM_MSG_REMOVE, &ms, &mh); | ||
4047 | if (rv) | ||
4048 | return; | ||
4049 | |||
4050 | memcpy(ms->m_extra, name, len); | ||
4051 | ms->m_hash = hash; | ||
4052 | |||
4053 | send_message(mh, ms); | ||
4054 | |||
4055 | spin_lock(&ls->ls_remove_spin); | ||
4056 | ls->ls_remove_len = 0; | ||
4057 | memset(ls->ls_remove_name, 0, DLM_RESNAME_MAXLEN); | ||
4058 | spin_unlock(&ls->ls_remove_spin); | ||
4059 | } | ||
4060 | |||
4061 | static int receive_request(struct dlm_ls *ls, struct dlm_message *ms) | ||
4062 | { | 3313 | { |
4063 | struct dlm_lkb *lkb; | 3314 | struct dlm_lkb *lkb; |
4064 | struct dlm_rsb *r; | 3315 | struct dlm_rsb *r; |
4065 | int from_nodeid; | 3316 | int error, namelen; |
4066 | int error, namelen = 0; | ||
4067 | |||
4068 | from_nodeid = ms->m_header.h_nodeid; | ||
4069 | 3317 | ||
4070 | error = create_lkb(ls, &lkb); | 3318 | error = create_lkb(ls, &lkb); |
4071 | if (error) | 3319 | if (error) |
@@ -4079,16 +3327,9 @@ static int receive_request(struct dlm_ls *ls, struct dlm_message *ms) | |||
4079 | goto fail; | 3327 | goto fail; |
4080 | } | 3328 | } |
4081 | 3329 | ||
4082 | /* The dir node is the authority on whether we are the master | ||
4083 | for this rsb or not, so if the master sends us a request, we should | ||
4084 | recreate the rsb if we've destroyed it. This race happens when we | ||
4085 | send a remove message to the dir node at the same time that the dir | ||
4086 | node sends us a request for the rsb. */ | ||
4087 | |||
4088 | namelen = receive_extralen(ms); | 3330 | namelen = receive_extralen(ms); |
4089 | 3331 | ||
4090 | error = find_rsb(ls, ms->m_extra, namelen, from_nodeid, | 3332 | error = find_rsb(ls, ms->m_extra, namelen, R_MASTER, &r); |
4091 | R_RECEIVE_REQUEST, &r); | ||
4092 | if (error) { | 3333 | if (error) { |
4093 | __put_lkb(ls, lkb); | 3334 | __put_lkb(ls, lkb); |
4094 | goto fail; | 3335 | goto fail; |
@@ -4096,16 +3337,6 @@ static int receive_request(struct dlm_ls *ls, struct dlm_message *ms) | |||
4096 | 3337 | ||
4097 | lock_rsb(r); | 3338 | lock_rsb(r); |
4098 | 3339 | ||
4099 | if (r->res_master_nodeid != dlm_our_nodeid()) { | ||
4100 | error = validate_master_nodeid(ls, r, from_nodeid); | ||
4101 | if (error) { | ||
4102 | unlock_rsb(r); | ||
4103 | put_rsb(r); | ||
4104 | __put_lkb(ls, lkb); | ||
4105 | goto fail; | ||
4106 | } | ||
4107 | } | ||
4108 | |||
4109 | attach_lkb(r, lkb); | 3340 | attach_lkb(r, lkb); |
4110 | error = do_request(r, lkb); | 3341 | error = do_request(r, lkb); |
4111 | send_request_reply(r, lkb, error); | 3342 | send_request_reply(r, lkb, error); |
@@ -4118,40 +3349,14 @@ static int receive_request(struct dlm_ls *ls, struct dlm_message *ms) | |||
4118 | error = 0; | 3349 | error = 0; |
4119 | if (error) | 3350 | if (error) |
4120 | dlm_put_lkb(lkb); | 3351 | dlm_put_lkb(lkb); |
4121 | return 0; | 3352 | return; |
4122 | 3353 | ||
4123 | fail: | 3354 | fail: |
4124 | /* TODO: instead of returning ENOTBLK, add the lkb to res_lookup | ||
4125 | and do this receive_request again from process_lookup_list once | ||
4126 | we get the lookup reply. This would avoid a many repeated | ||
4127 | ENOTBLK request failures when the lookup reply designating us | ||
4128 | as master is delayed. */ | ||
4129 | |||
4130 | /* We could repeatedly return -EBADR here if our send_remove() is | ||
4131 | delayed in being sent/arriving/being processed on the dir node. | ||
4132 | Another node would repeatedly lookup up the master, and the dir | ||
4133 | node would continue returning our nodeid until our send_remove | ||
4134 | took effect. | ||
4135 | |||
4136 | We send another remove message in case our previous send_remove | ||
4137 | was lost/ignored/missed somehow. */ | ||
4138 | |||
4139 | if (error != -ENOTBLK) { | ||
4140 | log_limit(ls, "receive_request %x from %d %d", | ||
4141 | ms->m_lkid, from_nodeid, error); | ||
4142 | } | ||
4143 | |||
4144 | if (namelen && error == -EBADR) { | ||
4145 | send_repeat_remove(ls, ms->m_extra, namelen); | ||
4146 | msleep(1000); | ||
4147 | } | ||
4148 | |||
4149 | setup_stub_lkb(ls, ms); | 3355 | setup_stub_lkb(ls, ms); |
4150 | send_request_reply(&ls->ls_stub_rsb, &ls->ls_stub_lkb, error); | 3356 | send_request_reply(&ls->ls_stub_rsb, &ls->ls_stub_lkb, error); |
4151 | return error; | ||
4152 | } | 3357 | } |
4153 | 3358 | ||
4154 | static int receive_convert(struct dlm_ls *ls, struct dlm_message *ms) | 3359 | static void receive_convert(struct dlm_ls *ls, struct dlm_message *ms) |
4155 | { | 3360 | { |
4156 | struct dlm_lkb *lkb; | 3361 | struct dlm_lkb *lkb; |
4157 | struct dlm_rsb *r; | 3362 | struct dlm_rsb *r; |
@@ -4161,15 +3366,6 @@ static int receive_convert(struct dlm_ls *ls, struct dlm_message *ms) | |||
4161 | if (error) | 3366 | if (error) |
4162 | goto fail; | 3367 | goto fail; |
4163 | 3368 | ||
4164 | if (lkb->lkb_remid != ms->m_lkid) { | ||
4165 | log_error(ls, "receive_convert %x remid %x recover_seq %llu " | ||
4166 | "remote %d %x", lkb->lkb_id, lkb->lkb_remid, | ||
4167 | (unsigned long long)lkb->lkb_recover_seq, | ||
4168 | ms->m_header.h_nodeid, ms->m_lkid); | ||
4169 | error = -ENOENT; | ||
4170 | goto fail; | ||
4171 | } | ||
4172 | |||
4173 | r = lkb->lkb_resource; | 3369 | r = lkb->lkb_resource; |
4174 | 3370 | ||
4175 | hold_rsb(r); | 3371 | hold_rsb(r); |
@@ -4197,15 +3393,14 @@ static int receive_convert(struct dlm_ls *ls, struct dlm_message *ms) | |||
4197 | unlock_rsb(r); | 3393 | unlock_rsb(r); |
4198 | put_rsb(r); | 3394 | put_rsb(r); |
4199 | dlm_put_lkb(lkb); | 3395 | dlm_put_lkb(lkb); |
4200 | return 0; | 3396 | return; |
4201 | 3397 | ||
4202 | fail: | 3398 | fail: |
4203 | setup_stub_lkb(ls, ms); | 3399 | setup_stub_lkb(ls, ms); |
4204 | send_convert_reply(&ls->ls_stub_rsb, &ls->ls_stub_lkb, error); | 3400 | send_convert_reply(&ls->ls_stub_rsb, &ls->ls_stub_lkb, error); |
4205 | return error; | ||
4206 | } | 3401 | } |
4207 | 3402 | ||
4208 | static int receive_unlock(struct dlm_ls *ls, struct dlm_message *ms) | 3403 | static void receive_unlock(struct dlm_ls *ls, struct dlm_message *ms) |
4209 | { | 3404 | { |
4210 | struct dlm_lkb *lkb; | 3405 | struct dlm_lkb *lkb; |
4211 | struct dlm_rsb *r; | 3406 | struct dlm_rsb *r; |
@@ -4215,14 +3410,6 @@ static int receive_unlock(struct dlm_ls *ls, struct dlm_message *ms) | |||
4215 | if (error) | 3410 | if (error) |
4216 | goto fail; | 3411 | goto fail; |
4217 | 3412 | ||
4218 | if (lkb->lkb_remid != ms->m_lkid) { | ||
4219 | log_error(ls, "receive_unlock %x remid %x remote %d %x", | ||
4220 | lkb->lkb_id, lkb->lkb_remid, | ||
4221 | ms->m_header.h_nodeid, ms->m_lkid); | ||
4222 | error = -ENOENT; | ||
4223 | goto fail; | ||
4224 | } | ||
4225 | |||
4226 | r = lkb->lkb_resource; | 3413 | r = lkb->lkb_resource; |
4227 | 3414 | ||
4228 | hold_rsb(r); | 3415 | hold_rsb(r); |
@@ -4247,15 +3434,14 @@ static int receive_unlock(struct dlm_ls *ls, struct dlm_message *ms) | |||
4247 | unlock_rsb(r); | 3434 | unlock_rsb(r); |
4248 | put_rsb(r); | 3435 | put_rsb(r); |
4249 | dlm_put_lkb(lkb); | 3436 | dlm_put_lkb(lkb); |
4250 | return 0; | 3437 | return; |
4251 | 3438 | ||
4252 | fail: | 3439 | fail: |
4253 | setup_stub_lkb(ls, ms); | 3440 | setup_stub_lkb(ls, ms); |
4254 | send_unlock_reply(&ls->ls_stub_rsb, &ls->ls_stub_lkb, error); | 3441 | send_unlock_reply(&ls->ls_stub_rsb, &ls->ls_stub_lkb, error); |
4255 | return error; | ||
4256 | } | 3442 | } |
4257 | 3443 | ||
4258 | static int receive_cancel(struct dlm_ls *ls, struct dlm_message *ms) | 3444 | static void receive_cancel(struct dlm_ls *ls, struct dlm_message *ms) |
4259 | { | 3445 | { |
4260 | struct dlm_lkb *lkb; | 3446 | struct dlm_lkb *lkb; |
4261 | struct dlm_rsb *r; | 3447 | struct dlm_rsb *r; |
@@ -4283,23 +3469,25 @@ static int receive_cancel(struct dlm_ls *ls, struct dlm_message *ms) | |||
4283 | unlock_rsb(r); | 3469 | unlock_rsb(r); |
4284 | put_rsb(r); | 3470 | put_rsb(r); |
4285 | dlm_put_lkb(lkb); | 3471 | dlm_put_lkb(lkb); |
4286 | return 0; | 3472 | return; |
4287 | 3473 | ||
4288 | fail: | 3474 | fail: |
4289 | setup_stub_lkb(ls, ms); | 3475 | setup_stub_lkb(ls, ms); |
4290 | send_cancel_reply(&ls->ls_stub_rsb, &ls->ls_stub_lkb, error); | 3476 | send_cancel_reply(&ls->ls_stub_rsb, &ls->ls_stub_lkb, error); |
4291 | return error; | ||
4292 | } | 3477 | } |
4293 | 3478 | ||
4294 | static int receive_grant(struct dlm_ls *ls, struct dlm_message *ms) | 3479 | static void receive_grant(struct dlm_ls *ls, struct dlm_message *ms) |
4295 | { | 3480 | { |
4296 | struct dlm_lkb *lkb; | 3481 | struct dlm_lkb *lkb; |
4297 | struct dlm_rsb *r; | 3482 | struct dlm_rsb *r; |
4298 | int error; | 3483 | int error; |
4299 | 3484 | ||
4300 | error = find_lkb(ls, ms->m_remid, &lkb); | 3485 | error = find_lkb(ls, ms->m_remid, &lkb); |
4301 | if (error) | 3486 | if (error) { |
4302 | return error; | 3487 | log_debug(ls, "receive_grant from %d no lkb %x", |
3488 | ms->m_header.h_nodeid, ms->m_remid); | ||
3489 | return; | ||
3490 | } | ||
4303 | 3491 | ||
4304 | r = lkb->lkb_resource; | 3492 | r = lkb->lkb_resource; |
4305 | 3493 | ||
@@ -4319,18 +3507,20 @@ static int receive_grant(struct dlm_ls *ls, struct dlm_message *ms) | |||
4319 | unlock_rsb(r); | 3507 | unlock_rsb(r); |
4320 | put_rsb(r); | 3508 | put_rsb(r); |
4321 | dlm_put_lkb(lkb); | 3509 | dlm_put_lkb(lkb); |
4322 | return 0; | ||
4323 | } | 3510 | } |
4324 | 3511 | ||
4325 | static int receive_bast(struct dlm_ls *ls, struct dlm_message *ms) | 3512 | static void receive_bast(struct dlm_ls *ls, struct dlm_message *ms) |
4326 | { | 3513 | { |
4327 | struct dlm_lkb *lkb; | 3514 | struct dlm_lkb *lkb; |
4328 | struct dlm_rsb *r; | 3515 | struct dlm_rsb *r; |
4329 | int error; | 3516 | int error; |
4330 | 3517 | ||
4331 | error = find_lkb(ls, ms->m_remid, &lkb); | 3518 | error = find_lkb(ls, ms->m_remid, &lkb); |
4332 | if (error) | 3519 | if (error) { |
4333 | return error; | 3520 | log_debug(ls, "receive_bast from %d no lkb %x", |
3521 | ms->m_header.h_nodeid, ms->m_remid); | ||
3522 | return; | ||
3523 | } | ||
4334 | 3524 | ||
4335 | r = lkb->lkb_resource; | 3525 | r = lkb->lkb_resource; |
4336 | 3526 | ||
@@ -4342,120 +3532,57 @@ static int receive_bast(struct dlm_ls *ls, struct dlm_message *ms) | |||
4342 | goto out; | 3532 | goto out; |
4343 | 3533 | ||
4344 | queue_bast(r, lkb, ms->m_bastmode); | 3534 | queue_bast(r, lkb, ms->m_bastmode); |
4345 | lkb->lkb_highbast = ms->m_bastmode; | ||
4346 | out: | 3535 | out: |
4347 | unlock_rsb(r); | 3536 | unlock_rsb(r); |
4348 | put_rsb(r); | 3537 | put_rsb(r); |
4349 | dlm_put_lkb(lkb); | 3538 | dlm_put_lkb(lkb); |
4350 | return 0; | ||
4351 | } | 3539 | } |
4352 | 3540 | ||
4353 | static void receive_lookup(struct dlm_ls *ls, struct dlm_message *ms) | 3541 | static void receive_lookup(struct dlm_ls *ls, struct dlm_message *ms) |
4354 | { | 3542 | { |
4355 | int len, error, ret_nodeid, from_nodeid, our_nodeid; | 3543 | int len, error, ret_nodeid, dir_nodeid, from_nodeid, our_nodeid; |
4356 | 3544 | ||
4357 | from_nodeid = ms->m_header.h_nodeid; | 3545 | from_nodeid = ms->m_header.h_nodeid; |
4358 | our_nodeid = dlm_our_nodeid(); | 3546 | our_nodeid = dlm_our_nodeid(); |
4359 | 3547 | ||
4360 | len = receive_extralen(ms); | 3548 | len = receive_extralen(ms); |
4361 | 3549 | ||
4362 | error = dlm_master_lookup(ls, from_nodeid, ms->m_extra, len, 0, | 3550 | dir_nodeid = dlm_hash2nodeid(ls, ms->m_hash); |
4363 | &ret_nodeid, NULL); | 3551 | if (dir_nodeid != our_nodeid) { |
3552 | log_error(ls, "lookup dir_nodeid %d from %d", | ||
3553 | dir_nodeid, from_nodeid); | ||
3554 | error = -EINVAL; | ||
3555 | ret_nodeid = -1; | ||
3556 | goto out; | ||
3557 | } | ||
3558 | |||
3559 | error = dlm_dir_lookup(ls, from_nodeid, ms->m_extra, len, &ret_nodeid); | ||
4364 | 3560 | ||
4365 | /* Optimization: we're master so treat lookup as a request */ | 3561 | /* Optimization: we're master so treat lookup as a request */ |
4366 | if (!error && ret_nodeid == our_nodeid) { | 3562 | if (!error && ret_nodeid == our_nodeid) { |
4367 | receive_request(ls, ms); | 3563 | receive_request(ls, ms); |
4368 | return; | 3564 | return; |
4369 | } | 3565 | } |
3566 | out: | ||
4370 | send_lookup_reply(ls, ms, ret_nodeid, error); | 3567 | send_lookup_reply(ls, ms, ret_nodeid, error); |
4371 | } | 3568 | } |
4372 | 3569 | ||
4373 | static void receive_remove(struct dlm_ls *ls, struct dlm_message *ms) | 3570 | static void receive_remove(struct dlm_ls *ls, struct dlm_message *ms) |
4374 | { | 3571 | { |
4375 | char name[DLM_RESNAME_MAXLEN+1]; | 3572 | int len, dir_nodeid, from_nodeid; |
4376 | struct dlm_rsb *r; | ||
4377 | uint32_t hash, b; | ||
4378 | int rv, len, dir_nodeid, from_nodeid; | ||
4379 | 3573 | ||
4380 | from_nodeid = ms->m_header.h_nodeid; | 3574 | from_nodeid = ms->m_header.h_nodeid; |
4381 | 3575 | ||
4382 | len = receive_extralen(ms); | 3576 | len = receive_extralen(ms); |
4383 | 3577 | ||
4384 | if (len > DLM_RESNAME_MAXLEN) { | ||
4385 | log_error(ls, "receive_remove from %d bad len %d", | ||
4386 | from_nodeid, len); | ||
4387 | return; | ||
4388 | } | ||
4389 | |||
4390 | dir_nodeid = dlm_hash2nodeid(ls, ms->m_hash); | 3578 | dir_nodeid = dlm_hash2nodeid(ls, ms->m_hash); |
4391 | if (dir_nodeid != dlm_our_nodeid()) { | 3579 | if (dir_nodeid != dlm_our_nodeid()) { |
4392 | log_error(ls, "receive_remove from %d bad nodeid %d", | 3580 | log_error(ls, "remove dir entry dir_nodeid %d from %d", |
4393 | from_nodeid, dir_nodeid); | 3581 | dir_nodeid, from_nodeid); |
4394 | return; | 3582 | return; |
4395 | } | 3583 | } |
4396 | 3584 | ||
4397 | /* Look for name on rsbtbl.toss, if it's there, kill it. | 3585 | dlm_dir_remove_entry(ls, from_nodeid, ms->m_extra, len); |
4398 | If it's on rsbtbl.keep, it's being used, and we should ignore this | ||
4399 | message. This is an expected race between the dir node sending a | ||
4400 | request to the master node at the same time as the master node sends | ||
4401 | a remove to the dir node. The resolution to that race is for the | ||
4402 | dir node to ignore the remove message, and the master node to | ||
4403 | recreate the master rsb when it gets a request from the dir node for | ||
4404 | an rsb it doesn't have. */ | ||
4405 | |||
4406 | memset(name, 0, sizeof(name)); | ||
4407 | memcpy(name, ms->m_extra, len); | ||
4408 | |||
4409 | hash = jhash(name, len, 0); | ||
4410 | b = hash & (ls->ls_rsbtbl_size - 1); | ||
4411 | |||
4412 | spin_lock(&ls->ls_rsbtbl[b].lock); | ||
4413 | |||
4414 | rv = dlm_search_rsb_tree(&ls->ls_rsbtbl[b].toss, name, len, &r); | ||
4415 | if (rv) { | ||
4416 | /* verify the rsb is on keep list per comment above */ | ||
4417 | rv = dlm_search_rsb_tree(&ls->ls_rsbtbl[b].keep, name, len, &r); | ||
4418 | if (rv) { | ||
4419 | /* should not happen */ | ||
4420 | log_error(ls, "receive_remove from %d not found %s", | ||
4421 | from_nodeid, name); | ||
4422 | spin_unlock(&ls->ls_rsbtbl[b].lock); | ||
4423 | return; | ||
4424 | } | ||
4425 | if (r->res_master_nodeid != from_nodeid) { | ||
4426 | /* should not happen */ | ||
4427 | log_error(ls, "receive_remove keep from %d master %d", | ||
4428 | from_nodeid, r->res_master_nodeid); | ||
4429 | dlm_print_rsb(r); | ||
4430 | spin_unlock(&ls->ls_rsbtbl[b].lock); | ||
4431 | return; | ||
4432 | } | ||
4433 | |||
4434 | log_debug(ls, "receive_remove from %d master %d first %x %s", | ||
4435 | from_nodeid, r->res_master_nodeid, r->res_first_lkid, | ||
4436 | name); | ||
4437 | spin_unlock(&ls->ls_rsbtbl[b].lock); | ||
4438 | return; | ||
4439 | } | ||
4440 | |||
4441 | if (r->res_master_nodeid != from_nodeid) { | ||
4442 | log_error(ls, "receive_remove toss from %d master %d", | ||
4443 | from_nodeid, r->res_master_nodeid); | ||
4444 | dlm_print_rsb(r); | ||
4445 | spin_unlock(&ls->ls_rsbtbl[b].lock); | ||
4446 | return; | ||
4447 | } | ||
4448 | |||
4449 | if (kref_put(&r->res_ref, kill_rsb)) { | ||
4450 | rb_erase(&r->res_hashnode, &ls->ls_rsbtbl[b].toss); | ||
4451 | spin_unlock(&ls->ls_rsbtbl[b].lock); | ||
4452 | dlm_free_rsb(r); | ||
4453 | } else { | ||
4454 | log_error(ls, "receive_remove from %d rsb ref error", | ||
4455 | from_nodeid); | ||
4456 | dlm_print_rsb(r); | ||
4457 | spin_unlock(&ls->ls_rsbtbl[b].lock); | ||
4458 | } | ||
4459 | } | 3586 | } |
4460 | 3587 | ||
4461 | static void receive_purge(struct dlm_ls *ls, struct dlm_message *ms) | 3588 | static void receive_purge(struct dlm_ls *ls, struct dlm_message *ms) |
@@ -4463,16 +3590,18 @@ static void receive_purge(struct dlm_ls *ls, struct dlm_message *ms) | |||
4463 | do_purge(ls, ms->m_nodeid, ms->m_pid); | 3590 | do_purge(ls, ms->m_nodeid, ms->m_pid); |
4464 | } | 3591 | } |
4465 | 3592 | ||
4466 | static int receive_request_reply(struct dlm_ls *ls, struct dlm_message *ms) | 3593 | static void receive_request_reply(struct dlm_ls *ls, struct dlm_message *ms) |
4467 | { | 3594 | { |
4468 | struct dlm_lkb *lkb; | 3595 | struct dlm_lkb *lkb; |
4469 | struct dlm_rsb *r; | 3596 | struct dlm_rsb *r; |
4470 | int error, mstype, result; | 3597 | int error, mstype, result; |
4471 | int from_nodeid = ms->m_header.h_nodeid; | ||
4472 | 3598 | ||
4473 | error = find_lkb(ls, ms->m_remid, &lkb); | 3599 | error = find_lkb(ls, ms->m_remid, &lkb); |
4474 | if (error) | 3600 | if (error) { |
4475 | return error; | 3601 | log_debug(ls, "receive_request_reply from %d no lkb %x", |
3602 | ms->m_header.h_nodeid, ms->m_remid); | ||
3603 | return; | ||
3604 | } | ||
4476 | 3605 | ||
4477 | r = lkb->lkb_resource; | 3606 | r = lkb->lkb_resource; |
4478 | hold_rsb(r); | 3607 | hold_rsb(r); |
@@ -4484,19 +3613,14 @@ static int receive_request_reply(struct dlm_ls *ls, struct dlm_message *ms) | |||
4484 | 3613 | ||
4485 | mstype = lkb->lkb_wait_type; | 3614 | mstype = lkb->lkb_wait_type; |
4486 | error = remove_from_waiters(lkb, DLM_MSG_REQUEST_REPLY); | 3615 | error = remove_from_waiters(lkb, DLM_MSG_REQUEST_REPLY); |
4487 | if (error) { | 3616 | if (error) |
4488 | log_error(ls, "receive_request_reply %x remote %d %x result %d", | ||
4489 | lkb->lkb_id, from_nodeid, ms->m_lkid, ms->m_result); | ||
4490 | dlm_dump_rsb(r); | ||
4491 | goto out; | 3617 | goto out; |
4492 | } | ||
4493 | 3618 | ||
4494 | /* Optimization: the dir node was also the master, so it took our | 3619 | /* Optimization: the dir node was also the master, so it took our |
4495 | lookup as a request and sent request reply instead of lookup reply */ | 3620 | lookup as a request and sent request reply instead of lookup reply */ |
4496 | if (mstype == DLM_MSG_LOOKUP) { | 3621 | if (mstype == DLM_MSG_LOOKUP) { |
4497 | r->res_master_nodeid = from_nodeid; | 3622 | r->res_nodeid = ms->m_header.h_nodeid; |
4498 | r->res_nodeid = from_nodeid; | 3623 | lkb->lkb_nodeid = r->res_nodeid; |
4499 | lkb->lkb_nodeid = from_nodeid; | ||
4500 | } | 3624 | } |
4501 | 3625 | ||
4502 | /* this is the value returned from do_request() on the master */ | 3626 | /* this is the value returned from do_request() on the master */ |
@@ -4530,30 +3654,18 @@ static int receive_request_reply(struct dlm_ls *ls, struct dlm_message *ms) | |||
4530 | case -EBADR: | 3654 | case -EBADR: |
4531 | case -ENOTBLK: | 3655 | case -ENOTBLK: |
4532 | /* find_rsb failed to find rsb or rsb wasn't master */ | 3656 | /* find_rsb failed to find rsb or rsb wasn't master */ |
4533 | log_limit(ls, "receive_request_reply %x from %d %d " | 3657 | log_debug(ls, "receive_request_reply %x %x master diff %d %d", |
4534 | "master %d dir %d first %x %s", lkb->lkb_id, | 3658 | lkb->lkb_id, lkb->lkb_flags, r->res_nodeid, result); |
4535 | from_nodeid, result, r->res_master_nodeid, | 3659 | r->res_nodeid = -1; |
4536 | r->res_dir_nodeid, r->res_first_lkid, r->res_name); | 3660 | lkb->lkb_nodeid = -1; |
4537 | |||
4538 | if (r->res_dir_nodeid != dlm_our_nodeid() && | ||
4539 | r->res_master_nodeid != dlm_our_nodeid()) { | ||
4540 | /* cause _request_lock->set_master->send_lookup */ | ||
4541 | r->res_master_nodeid = 0; | ||
4542 | r->res_nodeid = -1; | ||
4543 | lkb->lkb_nodeid = -1; | ||
4544 | } | ||
4545 | 3661 | ||
4546 | if (is_overlap(lkb)) { | 3662 | if (is_overlap(lkb)) { |
4547 | /* we'll ignore error in cancel/unlock reply */ | 3663 | /* we'll ignore error in cancel/unlock reply */ |
4548 | queue_cast_overlap(r, lkb); | 3664 | queue_cast_overlap(r, lkb); |
4549 | confirm_master(r, result); | 3665 | confirm_master(r, result); |
4550 | unhold_lkb(lkb); /* undoes create_lkb() */ | 3666 | unhold_lkb(lkb); /* undoes create_lkb() */ |
4551 | } else { | 3667 | } else |
4552 | _request_lock(r, lkb); | 3668 | _request_lock(r, lkb); |
4553 | |||
4554 | if (r->res_master_nodeid == dlm_our_nodeid()) | ||
4555 | confirm_master(r, 0); | ||
4556 | } | ||
4557 | break; | 3669 | break; |
4558 | 3670 | ||
4559 | default: | 3671 | default: |
@@ -4580,7 +3692,6 @@ static int receive_request_reply(struct dlm_ls *ls, struct dlm_message *ms) | |||
4580 | unlock_rsb(r); | 3692 | unlock_rsb(r); |
4581 | put_rsb(r); | 3693 | put_rsb(r); |
4582 | dlm_put_lkb(lkb); | 3694 | dlm_put_lkb(lkb); |
4583 | return 0; | ||
4584 | } | 3695 | } |
4585 | 3696 | ||
4586 | static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, | 3697 | static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, |
@@ -4619,11 +3730,8 @@ static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, | |||
4619 | break; | 3730 | break; |
4620 | 3731 | ||
4621 | default: | 3732 | default: |
4622 | log_error(r->res_ls, "receive_convert_reply %x remote %d %x %d", | 3733 | log_error(r->res_ls, "receive_convert_reply %x error %d", |
4623 | lkb->lkb_id, ms->m_header.h_nodeid, ms->m_lkid, | 3734 | lkb->lkb_id, ms->m_result); |
4624 | ms->m_result); | ||
4625 | dlm_print_rsb(r); | ||
4626 | dlm_print_lkb(lkb); | ||
4627 | } | 3735 | } |
4628 | } | 3736 | } |
4629 | 3737 | ||
@@ -4650,18 +3758,20 @@ static void _receive_convert_reply(struct dlm_lkb *lkb, struct dlm_message *ms) | |||
4650 | put_rsb(r); | 3758 | put_rsb(r); |
4651 | } | 3759 | } |
4652 | 3760 | ||
4653 | static int receive_convert_reply(struct dlm_ls *ls, struct dlm_message *ms) | 3761 | static void receive_convert_reply(struct dlm_ls *ls, struct dlm_message *ms) |
4654 | { | 3762 | { |
4655 | struct dlm_lkb *lkb; | 3763 | struct dlm_lkb *lkb; |
4656 | int error; | 3764 | int error; |
4657 | 3765 | ||
4658 | error = find_lkb(ls, ms->m_remid, &lkb); | 3766 | error = find_lkb(ls, ms->m_remid, &lkb); |
4659 | if (error) | 3767 | if (error) { |
4660 | return error; | 3768 | log_debug(ls, "receive_convert_reply from %d no lkb %x", |
3769 | ms->m_header.h_nodeid, ms->m_remid); | ||
3770 | return; | ||
3771 | } | ||
4661 | 3772 | ||
4662 | _receive_convert_reply(lkb, ms); | 3773 | _receive_convert_reply(lkb, ms); |
4663 | dlm_put_lkb(lkb); | 3774 | dlm_put_lkb(lkb); |
4664 | return 0; | ||
4665 | } | 3775 | } |
4666 | 3776 | ||
4667 | static void _receive_unlock_reply(struct dlm_lkb *lkb, struct dlm_message *ms) | 3777 | static void _receive_unlock_reply(struct dlm_lkb *lkb, struct dlm_message *ms) |
@@ -4700,18 +3810,20 @@ static void _receive_unlock_reply(struct dlm_lkb *lkb, struct dlm_message *ms) | |||
4700 | put_rsb(r); | 3810 | put_rsb(r); |
4701 | } | 3811 | } |
4702 | 3812 | ||
4703 | static int receive_unlock_reply(struct dlm_ls *ls, struct dlm_message *ms) | 3813 | static void receive_unlock_reply(struct dlm_ls *ls, struct dlm_message *ms) |
4704 | { | 3814 | { |
4705 | struct dlm_lkb *lkb; | 3815 | struct dlm_lkb *lkb; |
4706 | int error; | 3816 | int error; |
4707 | 3817 | ||
4708 | error = find_lkb(ls, ms->m_remid, &lkb); | 3818 | error = find_lkb(ls, ms->m_remid, &lkb); |
4709 | if (error) | 3819 | if (error) { |
4710 | return error; | 3820 | log_debug(ls, "receive_unlock_reply from %d no lkb %x", |
3821 | ms->m_header.h_nodeid, ms->m_remid); | ||
3822 | return; | ||
3823 | } | ||
4711 | 3824 | ||
4712 | _receive_unlock_reply(lkb, ms); | 3825 | _receive_unlock_reply(lkb, ms); |
4713 | dlm_put_lkb(lkb); | 3826 | dlm_put_lkb(lkb); |
4714 | return 0; | ||
4715 | } | 3827 | } |
4716 | 3828 | ||
4717 | static void _receive_cancel_reply(struct dlm_lkb *lkb, struct dlm_message *ms) | 3829 | static void _receive_cancel_reply(struct dlm_lkb *lkb, struct dlm_message *ms) |
@@ -4750,18 +3862,20 @@ static void _receive_cancel_reply(struct dlm_lkb *lkb, struct dlm_message *ms) | |||
4750 | put_rsb(r); | 3862 | put_rsb(r); |
4751 | } | 3863 | } |
4752 | 3864 | ||
4753 | static int receive_cancel_reply(struct dlm_ls *ls, struct dlm_message *ms) | 3865 | static void receive_cancel_reply(struct dlm_ls *ls, struct dlm_message *ms) |
4754 | { | 3866 | { |
4755 | struct dlm_lkb *lkb; | 3867 | struct dlm_lkb *lkb; |
4756 | int error; | 3868 | int error; |
4757 | 3869 | ||
4758 | error = find_lkb(ls, ms->m_remid, &lkb); | 3870 | error = find_lkb(ls, ms->m_remid, &lkb); |
4759 | if (error) | 3871 | if (error) { |
4760 | return error; | 3872 | log_debug(ls, "receive_cancel_reply from %d no lkb %x", |
3873 | ms->m_header.h_nodeid, ms->m_remid); | ||
3874 | return; | ||
3875 | } | ||
4761 | 3876 | ||
4762 | _receive_cancel_reply(lkb, ms); | 3877 | _receive_cancel_reply(lkb, ms); |
4763 | dlm_put_lkb(lkb); | 3878 | dlm_put_lkb(lkb); |
4764 | return 0; | ||
4765 | } | 3879 | } |
4766 | 3880 | ||
4767 | static void receive_lookup_reply(struct dlm_ls *ls, struct dlm_message *ms) | 3881 | static void receive_lookup_reply(struct dlm_ls *ls, struct dlm_message *ms) |
@@ -4769,15 +3883,14 @@ static void receive_lookup_reply(struct dlm_ls *ls, struct dlm_message *ms) | |||
4769 | struct dlm_lkb *lkb; | 3883 | struct dlm_lkb *lkb; |
4770 | struct dlm_rsb *r; | 3884 | struct dlm_rsb *r; |
4771 | int error, ret_nodeid; | 3885 | int error, ret_nodeid; |
4772 | int do_lookup_list = 0; | ||
4773 | 3886 | ||
4774 | error = find_lkb(ls, ms->m_lkid, &lkb); | 3887 | error = find_lkb(ls, ms->m_lkid, &lkb); |
4775 | if (error) { | 3888 | if (error) { |
4776 | log_error(ls, "receive_lookup_reply no lkid %x", ms->m_lkid); | 3889 | log_error(ls, "receive_lookup_reply no lkb"); |
4777 | return; | 3890 | return; |
4778 | } | 3891 | } |
4779 | 3892 | ||
4780 | /* ms->m_result is the value returned by dlm_master_lookup on dir node | 3893 | /* ms->m_result is the value returned by dlm_dir_lookup on dir node |
4781 | FIXME: will a non-zero error ever be returned? */ | 3894 | FIXME: will a non-zero error ever be returned? */ |
4782 | 3895 | ||
4783 | r = lkb->lkb_resource; | 3896 | r = lkb->lkb_resource; |
@@ -4789,37 +3902,12 @@ static void receive_lookup_reply(struct dlm_ls *ls, struct dlm_message *ms) | |||
4789 | goto out; | 3902 | goto out; |
4790 | 3903 | ||
4791 | ret_nodeid = ms->m_nodeid; | 3904 | ret_nodeid = ms->m_nodeid; |
4792 | |||
4793 | /* We sometimes receive a request from the dir node for this | ||
4794 | rsb before we've received the dir node's loookup_reply for it. | ||
4795 | The request from the dir node implies we're the master, so we set | ||
4796 | ourself as master in receive_request_reply, and verify here that | ||
4797 | we are indeed the master. */ | ||
4798 | |||
4799 | if (r->res_master_nodeid && (r->res_master_nodeid != ret_nodeid)) { | ||
4800 | /* This should never happen */ | ||
4801 | log_error(ls, "receive_lookup_reply %x from %d ret %d " | ||
4802 | "master %d dir %d our %d first %x %s", | ||
4803 | lkb->lkb_id, ms->m_header.h_nodeid, ret_nodeid, | ||
4804 | r->res_master_nodeid, r->res_dir_nodeid, | ||
4805 | dlm_our_nodeid(), r->res_first_lkid, r->res_name); | ||
4806 | } | ||
4807 | |||
4808 | if (ret_nodeid == dlm_our_nodeid()) { | 3905 | if (ret_nodeid == dlm_our_nodeid()) { |
4809 | r->res_master_nodeid = ret_nodeid; | ||
4810 | r->res_nodeid = 0; | 3906 | r->res_nodeid = 0; |
4811 | do_lookup_list = 1; | 3907 | ret_nodeid = 0; |
4812 | r->res_first_lkid = 0; | 3908 | r->res_first_lkid = 0; |
4813 | } else if (ret_nodeid == -1) { | ||
4814 | /* the remote node doesn't believe it's the dir node */ | ||
4815 | log_error(ls, "receive_lookup_reply %x from %d bad ret_nodeid", | ||
4816 | lkb->lkb_id, ms->m_header.h_nodeid); | ||
4817 | r->res_master_nodeid = 0; | ||
4818 | r->res_nodeid = -1; | ||
4819 | lkb->lkb_nodeid = -1; | ||
4820 | } else { | 3909 | } else { |
4821 | /* set_master() will set lkb_nodeid from r */ | 3910 | /* set_master() will copy res_nodeid to lkb_nodeid */ |
4822 | r->res_master_nodeid = ret_nodeid; | ||
4823 | r->res_nodeid = ret_nodeid; | 3911 | r->res_nodeid = ret_nodeid; |
4824 | } | 3912 | } |
4825 | 3913 | ||
@@ -4834,7 +3922,7 @@ static void receive_lookup_reply(struct dlm_ls *ls, struct dlm_message *ms) | |||
4834 | _request_lock(r, lkb); | 3922 | _request_lock(r, lkb); |
4835 | 3923 | ||
4836 | out_list: | 3924 | out_list: |
4837 | if (do_lookup_list) | 3925 | if (!ret_nodeid) |
4838 | process_lookup_list(r); | 3926 | process_lookup_list(r); |
4839 | out: | 3927 | out: |
4840 | unlock_rsb(r); | 3928 | unlock_rsb(r); |
@@ -4842,13 +3930,10 @@ static void receive_lookup_reply(struct dlm_ls *ls, struct dlm_message *ms) | |||
4842 | dlm_put_lkb(lkb); | 3930 | dlm_put_lkb(lkb); |
4843 | } | 3931 | } |
4844 | 3932 | ||
4845 | static void _receive_message(struct dlm_ls *ls, struct dlm_message *ms, | 3933 | static void _receive_message(struct dlm_ls *ls, struct dlm_message *ms) |
4846 | uint32_t saved_seq) | ||
4847 | { | 3934 | { |
4848 | int error = 0, noent = 0; | ||
4849 | |||
4850 | if (!dlm_is_member(ls, ms->m_header.h_nodeid)) { | 3935 | if (!dlm_is_member(ls, ms->m_header.h_nodeid)) { |
4851 | log_limit(ls, "receive %d from non-member %d %x %x %d", | 3936 | log_debug(ls, "ignore non-member message %d from %d %x %x %d", |
4852 | ms->m_type, ms->m_header.h_nodeid, ms->m_lkid, | 3937 | ms->m_type, ms->m_header.h_nodeid, ms->m_lkid, |
4853 | ms->m_remid, ms->m_result); | 3938 | ms->m_remid, ms->m_result); |
4854 | return; | 3939 | return; |
@@ -4859,50 +3944,47 @@ static void _receive_message(struct dlm_ls *ls, struct dlm_message *ms, | |||
4859 | /* messages sent to a master node */ | 3944 | /* messages sent to a master node */ |
4860 | 3945 | ||
4861 | case DLM_MSG_REQUEST: | 3946 | case DLM_MSG_REQUEST: |
4862 | error = receive_request(ls, ms); | 3947 | receive_request(ls, ms); |
4863 | break; | 3948 | break; |
4864 | 3949 | ||
4865 | case DLM_MSG_CONVERT: | 3950 | case DLM_MSG_CONVERT: |
4866 | error = receive_convert(ls, ms); | 3951 | receive_convert(ls, ms); |
4867 | break; | 3952 | break; |
4868 | 3953 | ||
4869 | case DLM_MSG_UNLOCK: | 3954 | case DLM_MSG_UNLOCK: |
4870 | error = receive_unlock(ls, ms); | 3955 | receive_unlock(ls, ms); |
4871 | break; | 3956 | break; |
4872 | 3957 | ||
4873 | case DLM_MSG_CANCEL: | 3958 | case DLM_MSG_CANCEL: |
4874 | noent = 1; | 3959 | receive_cancel(ls, ms); |
4875 | error = receive_cancel(ls, ms); | ||
4876 | break; | 3960 | break; |
4877 | 3961 | ||
4878 | /* messages sent from a master node (replies to above) */ | 3962 | /* messages sent from a master node (replies to above) */ |
4879 | 3963 | ||
4880 | case DLM_MSG_REQUEST_REPLY: | 3964 | case DLM_MSG_REQUEST_REPLY: |
4881 | error = receive_request_reply(ls, ms); | 3965 | receive_request_reply(ls, ms); |
4882 | break; | 3966 | break; |
4883 | 3967 | ||
4884 | case DLM_MSG_CONVERT_REPLY: | 3968 | case DLM_MSG_CONVERT_REPLY: |
4885 | error = receive_convert_reply(ls, ms); | 3969 | receive_convert_reply(ls, ms); |
4886 | break; | 3970 | break; |
4887 | 3971 | ||
4888 | case DLM_MSG_UNLOCK_REPLY: | 3972 | case DLM_MSG_UNLOCK_REPLY: |
4889 | error = receive_unlock_reply(ls, ms); | 3973 | receive_unlock_reply(ls, ms); |
4890 | break; | 3974 | break; |
4891 | 3975 | ||
4892 | case DLM_MSG_CANCEL_REPLY: | 3976 | case DLM_MSG_CANCEL_REPLY: |
4893 | error = receive_cancel_reply(ls, ms); | 3977 | receive_cancel_reply(ls, ms); |
4894 | break; | 3978 | break; |
4895 | 3979 | ||
4896 | /* messages sent from a master node (only two types of async msg) */ | 3980 | /* messages sent from a master node (only two types of async msg) */ |
4897 | 3981 | ||
4898 | case DLM_MSG_GRANT: | 3982 | case DLM_MSG_GRANT: |
4899 | noent = 1; | 3983 | receive_grant(ls, ms); |
4900 | error = receive_grant(ls, ms); | ||
4901 | break; | 3984 | break; |
4902 | 3985 | ||
4903 | case DLM_MSG_BAST: | 3986 | case DLM_MSG_BAST: |
4904 | noent = 1; | 3987 | receive_bast(ls, ms); |
4905 | error = receive_bast(ls, ms); | ||
4906 | break; | 3988 | break; |
4907 | 3989 | ||
4908 | /* messages sent to a dir node */ | 3990 | /* messages sent to a dir node */ |
@@ -4930,37 +4012,6 @@ static void _receive_message(struct dlm_ls *ls, struct dlm_message *ms, | |||
4930 | default: | 4012 | default: |
4931 | log_error(ls, "unknown message type %d", ms->m_type); | 4013 | log_error(ls, "unknown message type %d", ms->m_type); |
4932 | } | 4014 | } |
4933 | |||
4934 | /* | ||
4935 | * When checking for ENOENT, we're checking the result of | ||
4936 | * find_lkb(m_remid): | ||
4937 | * | ||
4938 | * The lock id referenced in the message wasn't found. This may | ||
4939 | * happen in normal usage for the async messages and cancel, so | ||
4940 | * only use log_debug for them. | ||
4941 | * | ||
4942 | * Some errors are expected and normal. | ||
4943 | */ | ||
4944 | |||
4945 | if (error == -ENOENT && noent) { | ||
4946 | log_debug(ls, "receive %d no %x remote %d %x saved_seq %u", | ||
4947 | ms->m_type, ms->m_remid, ms->m_header.h_nodeid, | ||
4948 | ms->m_lkid, saved_seq); | ||
4949 | } else if (error == -ENOENT) { | ||
4950 | log_error(ls, "receive %d no %x remote %d %x saved_seq %u", | ||
4951 | ms->m_type, ms->m_remid, ms->m_header.h_nodeid, | ||
4952 | ms->m_lkid, saved_seq); | ||
4953 | |||
4954 | if (ms->m_type == DLM_MSG_CONVERT) | ||
4955 | dlm_dump_rsb_hash(ls, ms->m_hash); | ||
4956 | } | ||
4957 | |||
4958 | if (error == -EINVAL) { | ||
4959 | log_error(ls, "receive %d inval from %d lkid %x remid %x " | ||
4960 | "saved_seq %u", | ||
4961 | ms->m_type, ms->m_header.h_nodeid, | ||
4962 | ms->m_lkid, ms->m_remid, saved_seq); | ||
4963 | } | ||
4964 | } | 4015 | } |
4965 | 4016 | ||
4966 | /* If the lockspace is in recovery mode (locking stopped), then normal | 4017 | /* If the lockspace is in recovery mode (locking stopped), then normal |
@@ -4975,29 +4026,19 @@ static void dlm_receive_message(struct dlm_ls *ls, struct dlm_message *ms, | |||
4975 | int nodeid) | 4026 | int nodeid) |
4976 | { | 4027 | { |
4977 | if (dlm_locking_stopped(ls)) { | 4028 | if (dlm_locking_stopped(ls)) { |
4978 | /* If we were a member of this lockspace, left, and rejoined, | ||
4979 | other nodes may still be sending us messages from the | ||
4980 | lockspace generation before we left. */ | ||
4981 | if (!ls->ls_generation) { | ||
4982 | log_limit(ls, "receive %d from %d ignore old gen", | ||
4983 | ms->m_type, nodeid); | ||
4984 | return; | ||
4985 | } | ||
4986 | |||
4987 | dlm_add_requestqueue(ls, nodeid, ms); | 4029 | dlm_add_requestqueue(ls, nodeid, ms); |
4988 | } else { | 4030 | } else { |
4989 | dlm_wait_requestqueue(ls); | 4031 | dlm_wait_requestqueue(ls); |
4990 | _receive_message(ls, ms, 0); | 4032 | _receive_message(ls, ms); |
4991 | } | 4033 | } |
4992 | } | 4034 | } |
4993 | 4035 | ||
4994 | /* This is called by dlm_recoverd to process messages that were saved on | 4036 | /* This is called by dlm_recoverd to process messages that were saved on |
4995 | the requestqueue. */ | 4037 | the requestqueue. */ |
4996 | 4038 | ||
4997 | void dlm_receive_message_saved(struct dlm_ls *ls, struct dlm_message *ms, | 4039 | void dlm_receive_message_saved(struct dlm_ls *ls, struct dlm_message *ms) |
4998 | uint32_t saved_seq) | ||
4999 | { | 4040 | { |
5000 | _receive_message(ls, ms, saved_seq); | 4041 | _receive_message(ls, ms); |
5001 | } | 4042 | } |
5002 | 4043 | ||
5003 | /* This is called by the midcomms layer when something is received for | 4044 | /* This is called by the midcomms layer when something is received for |
@@ -5033,11 +4074,9 @@ void dlm_receive_buffer(union dlm_packet *p, int nodeid) | |||
5033 | 4074 | ||
5034 | ls = dlm_find_lockspace_global(hd->h_lockspace); | 4075 | ls = dlm_find_lockspace_global(hd->h_lockspace); |
5035 | if (!ls) { | 4076 | if (!ls) { |
5036 | if (dlm_config.ci_log_debug) { | 4077 | if (dlm_config.ci_log_debug) |
5037 | printk_ratelimited(KERN_DEBUG "dlm: invalid lockspace " | 4078 | log_print("invalid lockspace %x from %d cmd %d type %d", |
5038 | "%u from %d cmd %d type %d\n", | 4079 | hd->h_lockspace, nodeid, hd->h_cmd, type); |
5039 | hd->h_lockspace, nodeid, hd->h_cmd, type); | ||
5040 | } | ||
5041 | 4080 | ||
5042 | if (hd->h_cmd == DLM_RCOM && type == DLM_RCOM_STATUS) | 4081 | if (hd->h_cmd == DLM_RCOM && type == DLM_RCOM_STATUS) |
5043 | dlm_send_ls_not_ready(nodeid, &p->rcom); | 4082 | dlm_send_ls_not_ready(nodeid, &p->rcom); |
@@ -5085,13 +4124,15 @@ static void recover_convert_waiter(struct dlm_ls *ls, struct dlm_lkb *lkb, | |||
5085 | /* A waiting lkb needs recovery if the master node has failed, or | 4124 | /* A waiting lkb needs recovery if the master node has failed, or |
5086 | the master node is changing (only when no directory is used) */ | 4125 | the master node is changing (only when no directory is used) */ |
5087 | 4126 | ||
5088 | static int waiter_needs_recovery(struct dlm_ls *ls, struct dlm_lkb *lkb, | 4127 | static int waiter_needs_recovery(struct dlm_ls *ls, struct dlm_lkb *lkb) |
5089 | int dir_nodeid) | ||
5090 | { | 4128 | { |
5091 | if (dlm_no_directory(ls)) | 4129 | if (dlm_is_removed(ls, lkb->lkb_nodeid)) |
5092 | return 1; | 4130 | return 1; |
5093 | 4131 | ||
5094 | if (dlm_is_removed(ls, lkb->lkb_wait_nodeid)) | 4132 | if (!dlm_no_directory(ls)) |
4133 | return 0; | ||
4134 | |||
4135 | if (dlm_dir_nodeid(lkb->lkb_resource) != lkb->lkb_nodeid) | ||
5095 | return 1; | 4136 | return 1; |
5096 | 4137 | ||
5097 | return 0; | 4138 | return 0; |
@@ -5108,7 +4149,6 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls) | |||
5108 | struct dlm_lkb *lkb, *safe; | 4149 | struct dlm_lkb *lkb, *safe; |
5109 | struct dlm_message *ms_stub; | 4150 | struct dlm_message *ms_stub; |
5110 | int wait_type, stub_unlock_result, stub_cancel_result; | 4151 | int wait_type, stub_unlock_result, stub_cancel_result; |
5111 | int dir_nodeid; | ||
5112 | 4152 | ||
5113 | ms_stub = kmalloc(sizeof(struct dlm_message), GFP_KERNEL); | 4153 | ms_stub = kmalloc(sizeof(struct dlm_message), GFP_KERNEL); |
5114 | if (!ms_stub) { | 4154 | if (!ms_stub) { |
@@ -5120,21 +4160,13 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls) | |||
5120 | 4160 | ||
5121 | list_for_each_entry_safe(lkb, safe, &ls->ls_waiters, lkb_wait_reply) { | 4161 | list_for_each_entry_safe(lkb, safe, &ls->ls_waiters, lkb_wait_reply) { |
5122 | 4162 | ||
5123 | dir_nodeid = dlm_dir_nodeid(lkb->lkb_resource); | ||
5124 | |||
5125 | /* exclude debug messages about unlocks because there can be so | 4163 | /* exclude debug messages about unlocks because there can be so |
5126 | many and they aren't very interesting */ | 4164 | many and they aren't very interesting */ |
5127 | 4165 | ||
5128 | if (lkb->lkb_wait_type != DLM_MSG_UNLOCK) { | 4166 | if (lkb->lkb_wait_type != DLM_MSG_UNLOCK) { |
5129 | log_debug(ls, "waiter %x remote %x msg %d r_nodeid %d " | 4167 | log_debug(ls, "recover_waiter %x nodeid %d " |
5130 | "lkb_nodeid %d wait_nodeid %d dir_nodeid %d", | 4168 | "msg %d to %d", lkb->lkb_id, lkb->lkb_nodeid, |
5131 | lkb->lkb_id, | 4169 | lkb->lkb_wait_type, lkb->lkb_wait_nodeid); |
5132 | lkb->lkb_remid, | ||
5133 | lkb->lkb_wait_type, | ||
5134 | lkb->lkb_resource->res_nodeid, | ||
5135 | lkb->lkb_nodeid, | ||
5136 | lkb->lkb_wait_nodeid, | ||
5137 | dir_nodeid); | ||
5138 | } | 4170 | } |
5139 | 4171 | ||
5140 | /* all outstanding lookups, regardless of destination will be | 4172 | /* all outstanding lookups, regardless of destination will be |
@@ -5145,7 +4177,7 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls) | |||
5145 | continue; | 4177 | continue; |
5146 | } | 4178 | } |
5147 | 4179 | ||
5148 | if (!waiter_needs_recovery(ls, lkb, dir_nodeid)) | 4180 | if (!waiter_needs_recovery(ls, lkb)) |
5149 | continue; | 4181 | continue; |
5150 | 4182 | ||
5151 | wait_type = lkb->lkb_wait_type; | 4183 | wait_type = lkb->lkb_wait_type; |
@@ -5278,11 +4310,8 @@ int dlm_recover_waiters_post(struct dlm_ls *ls) | |||
5278 | ou = is_overlap_unlock(lkb); | 4310 | ou = is_overlap_unlock(lkb); |
5279 | err = 0; | 4311 | err = 0; |
5280 | 4312 | ||
5281 | log_debug(ls, "waiter %x remote %x msg %d r_nodeid %d " | 4313 | log_debug(ls, "recover_waiter %x nodeid %d msg %d r_nodeid %d", |
5282 | "lkb_nodeid %d wait_nodeid %d dir_nodeid %d " | 4314 | lkb->lkb_id, lkb->lkb_nodeid, mstype, r->res_nodeid); |
5283 | "overlap %d %d", lkb->lkb_id, lkb->lkb_remid, mstype, | ||
5284 | r->res_nodeid, lkb->lkb_nodeid, lkb->lkb_wait_nodeid, | ||
5285 | dlm_dir_nodeid(r), oc, ou); | ||
5286 | 4315 | ||
5287 | /* At this point we assume that we won't get a reply to any | 4316 | /* At this point we assume that we won't get a reply to any |
5288 | previous op or overlap op on this lock. First, do a big | 4317 | previous op or overlap op on this lock. First, do a big |
@@ -5334,12 +4363,9 @@ int dlm_recover_waiters_post(struct dlm_ls *ls) | |||
5334 | } | 4363 | } |
5335 | } | 4364 | } |
5336 | 4365 | ||
5337 | if (err) { | 4366 | if (err) |
5338 | log_error(ls, "waiter %x msg %d r_nodeid %d " | 4367 | log_error(ls, "recover_waiters_post %x %d %x %d %d", |
5339 | "dir_nodeid %d overlap %d %d", | 4368 | lkb->lkb_id, mstype, lkb->lkb_flags, oc, ou); |
5340 | lkb->lkb_id, mstype, r->res_nodeid, | ||
5341 | dlm_dir_nodeid(r), oc, ou); | ||
5342 | } | ||
5343 | unlock_rsb(r); | 4369 | unlock_rsb(r); |
5344 | put_rsb(r); | 4370 | put_rsb(r); |
5345 | dlm_put_lkb(lkb); | 4371 | dlm_put_lkb(lkb); |
@@ -5348,187 +4374,110 @@ int dlm_recover_waiters_post(struct dlm_ls *ls) | |||
5348 | return error; | 4374 | return error; |
5349 | } | 4375 | } |
5350 | 4376 | ||
5351 | static void purge_mstcpy_list(struct dlm_ls *ls, struct dlm_rsb *r, | 4377 | static void purge_queue(struct dlm_rsb *r, struct list_head *queue, |
5352 | struct list_head *list) | 4378 | int (*test)(struct dlm_ls *ls, struct dlm_lkb *lkb)) |
5353 | { | 4379 | { |
4380 | struct dlm_ls *ls = r->res_ls; | ||
5354 | struct dlm_lkb *lkb, *safe; | 4381 | struct dlm_lkb *lkb, *safe; |
5355 | 4382 | ||
5356 | list_for_each_entry_safe(lkb, safe, list, lkb_statequeue) { | 4383 | list_for_each_entry_safe(lkb, safe, queue, lkb_statequeue) { |
5357 | if (!is_master_copy(lkb)) | 4384 | if (test(ls, lkb)) { |
5358 | continue; | 4385 | rsb_set_flag(r, RSB_LOCKS_PURGED); |
5359 | 4386 | del_lkb(r, lkb); | |
5360 | /* don't purge lkbs we've added in recover_master_copy for | 4387 | /* this put should free the lkb */ |
5361 | the current recovery seq */ | 4388 | if (!dlm_put_lkb(lkb)) |
5362 | 4389 | log_error(ls, "purged lkb not released"); | |
5363 | if (lkb->lkb_recover_seq == ls->ls_recover_seq) | 4390 | } |
5364 | continue; | ||
5365 | |||
5366 | del_lkb(r, lkb); | ||
5367 | |||
5368 | /* this put should free the lkb */ | ||
5369 | if (!dlm_put_lkb(lkb)) | ||
5370 | log_error(ls, "purged mstcpy lkb not released"); | ||
5371 | } | 4391 | } |
5372 | } | 4392 | } |
5373 | 4393 | ||
5374 | void dlm_purge_mstcpy_locks(struct dlm_rsb *r) | 4394 | static int purge_dead_test(struct dlm_ls *ls, struct dlm_lkb *lkb) |
5375 | { | 4395 | { |
5376 | struct dlm_ls *ls = r->res_ls; | 4396 | return (is_master_copy(lkb) && dlm_is_removed(ls, lkb->lkb_nodeid)); |
5377 | |||
5378 | purge_mstcpy_list(ls, r, &r->res_grantqueue); | ||
5379 | purge_mstcpy_list(ls, r, &r->res_convertqueue); | ||
5380 | purge_mstcpy_list(ls, r, &r->res_waitqueue); | ||
5381 | } | 4397 | } |
5382 | 4398 | ||
5383 | static void purge_dead_list(struct dlm_ls *ls, struct dlm_rsb *r, | 4399 | static int purge_mstcpy_test(struct dlm_ls *ls, struct dlm_lkb *lkb) |
5384 | struct list_head *list, | ||
5385 | int nodeid_gone, unsigned int *count) | ||
5386 | { | 4400 | { |
5387 | struct dlm_lkb *lkb, *safe; | 4401 | return is_master_copy(lkb); |
5388 | 4402 | } | |
5389 | list_for_each_entry_safe(lkb, safe, list, lkb_statequeue) { | ||
5390 | if (!is_master_copy(lkb)) | ||
5391 | continue; | ||
5392 | |||
5393 | if ((lkb->lkb_nodeid == nodeid_gone) || | ||
5394 | dlm_is_removed(ls, lkb->lkb_nodeid)) { | ||
5395 | |||
5396 | /* tell recover_lvb to invalidate the lvb | ||
5397 | because a node holding EX/PW failed */ | ||
5398 | if ((lkb->lkb_exflags & DLM_LKF_VALBLK) && | ||
5399 | (lkb->lkb_grmode >= DLM_LOCK_PW)) { | ||
5400 | rsb_set_flag(r, RSB_RECOVER_LVB_INVAL); | ||
5401 | } | ||
5402 | |||
5403 | del_lkb(r, lkb); | ||
5404 | |||
5405 | /* this put should free the lkb */ | ||
5406 | if (!dlm_put_lkb(lkb)) | ||
5407 | log_error(ls, "purged dead lkb not released"); | ||
5408 | 4403 | ||
5409 | rsb_set_flag(r, RSB_RECOVER_GRANT); | 4404 | static void purge_dead_locks(struct dlm_rsb *r) |
4405 | { | ||
4406 | purge_queue(r, &r->res_grantqueue, &purge_dead_test); | ||
4407 | purge_queue(r, &r->res_convertqueue, &purge_dead_test); | ||
4408 | purge_queue(r, &r->res_waitqueue, &purge_dead_test); | ||
4409 | } | ||
5410 | 4410 | ||
5411 | (*count)++; | 4411 | void dlm_purge_mstcpy_locks(struct dlm_rsb *r) |
5412 | } | 4412 | { |
5413 | } | 4413 | purge_queue(r, &r->res_grantqueue, &purge_mstcpy_test); |
4414 | purge_queue(r, &r->res_convertqueue, &purge_mstcpy_test); | ||
4415 | purge_queue(r, &r->res_waitqueue, &purge_mstcpy_test); | ||
5414 | } | 4416 | } |
5415 | 4417 | ||
5416 | /* Get rid of locks held by nodes that are gone. */ | 4418 | /* Get rid of locks held by nodes that are gone. */ |
5417 | 4419 | ||
5418 | void dlm_recover_purge(struct dlm_ls *ls) | 4420 | int dlm_purge_locks(struct dlm_ls *ls) |
5419 | { | 4421 | { |
5420 | struct dlm_rsb *r; | 4422 | struct dlm_rsb *r; |
5421 | struct dlm_member *memb; | ||
5422 | int nodes_count = 0; | ||
5423 | int nodeid_gone = 0; | ||
5424 | unsigned int lkb_count = 0; | ||
5425 | |||
5426 | /* cache one removed nodeid to optimize the common | ||
5427 | case of a single node removed */ | ||
5428 | 4423 | ||
5429 | list_for_each_entry(memb, &ls->ls_nodes_gone, list) { | 4424 | log_debug(ls, "dlm_purge_locks"); |
5430 | nodes_count++; | ||
5431 | nodeid_gone = memb->nodeid; | ||
5432 | } | ||
5433 | |||
5434 | if (!nodes_count) | ||
5435 | return; | ||
5436 | 4425 | ||
5437 | down_write(&ls->ls_root_sem); | 4426 | down_write(&ls->ls_root_sem); |
5438 | list_for_each_entry(r, &ls->ls_root_list, res_root_list) { | 4427 | list_for_each_entry(r, &ls->ls_root_list, res_root_list) { |
5439 | hold_rsb(r); | 4428 | hold_rsb(r); |
5440 | lock_rsb(r); | 4429 | lock_rsb(r); |
5441 | if (is_master(r)) { | 4430 | if (is_master(r)) |
5442 | purge_dead_list(ls, r, &r->res_grantqueue, | 4431 | purge_dead_locks(r); |
5443 | nodeid_gone, &lkb_count); | ||
5444 | purge_dead_list(ls, r, &r->res_convertqueue, | ||
5445 | nodeid_gone, &lkb_count); | ||
5446 | purge_dead_list(ls, r, &r->res_waitqueue, | ||
5447 | nodeid_gone, &lkb_count); | ||
5448 | } | ||
5449 | unlock_rsb(r); | 4432 | unlock_rsb(r); |
5450 | unhold_rsb(r); | 4433 | unhold_rsb(r); |
5451 | cond_resched(); | 4434 | |
4435 | schedule(); | ||
5452 | } | 4436 | } |
5453 | up_write(&ls->ls_root_sem); | 4437 | up_write(&ls->ls_root_sem); |
5454 | 4438 | ||
5455 | if (lkb_count) | 4439 | return 0; |
5456 | log_debug(ls, "dlm_recover_purge %u locks for %u nodes", | ||
5457 | lkb_count, nodes_count); | ||
5458 | } | 4440 | } |
5459 | 4441 | ||
5460 | static struct dlm_rsb *find_grant_rsb(struct dlm_ls *ls, int bucket) | 4442 | static struct dlm_rsb *find_purged_rsb(struct dlm_ls *ls, int bucket) |
5461 | { | 4443 | { |
5462 | struct rb_node *n; | 4444 | struct dlm_rsb *r, *r_ret = NULL; |
5463 | struct dlm_rsb *r; | ||
5464 | 4445 | ||
5465 | spin_lock(&ls->ls_rsbtbl[bucket].lock); | 4446 | spin_lock(&ls->ls_rsbtbl[bucket].lock); |
5466 | for (n = rb_first(&ls->ls_rsbtbl[bucket].keep); n; n = rb_next(n)) { | 4447 | list_for_each_entry(r, &ls->ls_rsbtbl[bucket].list, res_hashchain) { |
5467 | r = rb_entry(n, struct dlm_rsb, res_hashnode); | 4448 | if (!rsb_flag(r, RSB_LOCKS_PURGED)) |
5468 | |||
5469 | if (!rsb_flag(r, RSB_RECOVER_GRANT)) | ||
5470 | continue; | 4449 | continue; |
5471 | if (!is_master(r)) { | ||
5472 | rsb_clear_flag(r, RSB_RECOVER_GRANT); | ||
5473 | continue; | ||
5474 | } | ||
5475 | hold_rsb(r); | 4450 | hold_rsb(r); |
5476 | spin_unlock(&ls->ls_rsbtbl[bucket].lock); | 4451 | rsb_clear_flag(r, RSB_LOCKS_PURGED); |
5477 | return r; | 4452 | r_ret = r; |
4453 | break; | ||
5478 | } | 4454 | } |
5479 | spin_unlock(&ls->ls_rsbtbl[bucket].lock); | 4455 | spin_unlock(&ls->ls_rsbtbl[bucket].lock); |
5480 | return NULL; | 4456 | return r_ret; |
5481 | } | 4457 | } |
5482 | 4458 | ||
5483 | /* | 4459 | void dlm_grant_after_purge(struct dlm_ls *ls) |
5484 | * Attempt to grant locks on resources that we are the master of. | ||
5485 | * Locks may have become grantable during recovery because locks | ||
5486 | * from departed nodes have been purged (or not rebuilt), allowing | ||
5487 | * previously blocked locks to now be granted. The subset of rsb's | ||
5488 | * we are interested in are those with lkb's on either the convert or | ||
5489 | * waiting queues. | ||
5490 | * | ||
5491 | * Simplest would be to go through each master rsb and check for non-empty | ||
5492 | * convert or waiting queues, and attempt to grant on those rsbs. | ||
5493 | * Checking the queues requires lock_rsb, though, for which we'd need | ||
5494 | * to release the rsbtbl lock. This would make iterating through all | ||
5495 | * rsb's very inefficient. So, we rely on earlier recovery routines | ||
5496 | * to set RECOVER_GRANT on any rsb's that we should attempt to grant | ||
5497 | * locks for. | ||
5498 | */ | ||
5499 | |||
5500 | void dlm_recover_grant(struct dlm_ls *ls) | ||
5501 | { | 4460 | { |
5502 | struct dlm_rsb *r; | 4461 | struct dlm_rsb *r; |
5503 | int bucket = 0; | 4462 | int bucket = 0; |
5504 | unsigned int count = 0; | ||
5505 | unsigned int rsb_count = 0; | ||
5506 | unsigned int lkb_count = 0; | ||
5507 | 4463 | ||
5508 | while (1) { | 4464 | while (1) { |
5509 | r = find_grant_rsb(ls, bucket); | 4465 | r = find_purged_rsb(ls, bucket); |
5510 | if (!r) { | 4466 | if (!r) { |
5511 | if (bucket == ls->ls_rsbtbl_size - 1) | 4467 | if (bucket == ls->ls_rsbtbl_size - 1) |
5512 | break; | 4468 | break; |
5513 | bucket++; | 4469 | bucket++; |
5514 | continue; | 4470 | continue; |
5515 | } | 4471 | } |
5516 | rsb_count++; | ||
5517 | count = 0; | ||
5518 | lock_rsb(r); | 4472 | lock_rsb(r); |
5519 | /* the RECOVER_GRANT flag is checked in the grant path */ | 4473 | if (is_master(r)) { |
5520 | grant_pending_locks(r, &count); | 4474 | grant_pending_locks(r); |
5521 | rsb_clear_flag(r, RSB_RECOVER_GRANT); | 4475 | confirm_master(r, 0); |
5522 | lkb_count += count; | 4476 | } |
5523 | confirm_master(r, 0); | ||
5524 | unlock_rsb(r); | 4477 | unlock_rsb(r); |
5525 | put_rsb(r); | 4478 | put_rsb(r); |
5526 | cond_resched(); | 4479 | schedule(); |
5527 | } | 4480 | } |
5528 | |||
5529 | if (lkb_count) | ||
5530 | log_debug(ls, "dlm_recover_grant %u locks on %u resources", | ||
5531 | lkb_count, rsb_count); | ||
5532 | } | 4481 | } |
5533 | 4482 | ||
5534 | static struct dlm_lkb *search_remid_list(struct list_head *head, int nodeid, | 4483 | static struct dlm_lkb *search_remid_list(struct list_head *head, int nodeid, |
@@ -5617,8 +4566,6 @@ int dlm_recover_master_copy(struct dlm_ls *ls, struct dlm_rcom *rc) | |||
5617 | struct rcom_lock *rl = (struct rcom_lock *) rc->rc_buf; | 4566 | struct rcom_lock *rl = (struct rcom_lock *) rc->rc_buf; |
5618 | struct dlm_rsb *r; | 4567 | struct dlm_rsb *r; |
5619 | struct dlm_lkb *lkb; | 4568 | struct dlm_lkb *lkb; |
5620 | uint32_t remid = 0; | ||
5621 | int from_nodeid = rc->rc_header.h_nodeid; | ||
5622 | int error; | 4569 | int error; |
5623 | 4570 | ||
5624 | if (rl->rl_parent_lkid) { | 4571 | if (rl->rl_parent_lkid) { |
@@ -5626,31 +4573,14 @@ int dlm_recover_master_copy(struct dlm_ls *ls, struct dlm_rcom *rc) | |||
5626 | goto out; | 4573 | goto out; |
5627 | } | 4574 | } |
5628 | 4575 | ||
5629 | remid = le32_to_cpu(rl->rl_lkid); | ||
5630 | |||
5631 | /* In general we expect the rsb returned to be R_MASTER, but we don't | ||
5632 | have to require it. Recovery of masters on one node can overlap | ||
5633 | recovery of locks on another node, so one node can send us MSTCPY | ||
5634 | locks before we've made ourselves master of this rsb. We can still | ||
5635 | add new MSTCPY locks that we receive here without any harm; when | ||
5636 | we make ourselves master, dlm_recover_masters() won't touch the | ||
5637 | MSTCPY locks we've received early. */ | ||
5638 | |||
5639 | error = find_rsb(ls, rl->rl_name, le16_to_cpu(rl->rl_namelen), | 4576 | error = find_rsb(ls, rl->rl_name, le16_to_cpu(rl->rl_namelen), |
5640 | from_nodeid, R_RECEIVE_RECOVER, &r); | 4577 | R_MASTER, &r); |
5641 | if (error) | 4578 | if (error) |
5642 | goto out; | 4579 | goto out; |
5643 | 4580 | ||
5644 | lock_rsb(r); | 4581 | lock_rsb(r); |
5645 | 4582 | ||
5646 | if (dlm_no_directory(ls) && (dlm_dir_nodeid(r) != dlm_our_nodeid())) { | 4583 | lkb = search_remid(r, rc->rc_header.h_nodeid, le32_to_cpu(rl->rl_lkid)); |
5647 | log_error(ls, "dlm_recover_master_copy remote %d %x not dir", | ||
5648 | from_nodeid, remid); | ||
5649 | error = -EBADR; | ||
5650 | goto out_unlock; | ||
5651 | } | ||
5652 | |||
5653 | lkb = search_remid(r, from_nodeid, remid); | ||
5654 | if (lkb) { | 4584 | if (lkb) { |
5655 | error = -EEXIST; | 4585 | error = -EEXIST; |
5656 | goto out_remid; | 4586 | goto out_remid; |
@@ -5669,25 +4599,19 @@ int dlm_recover_master_copy(struct dlm_ls *ls, struct dlm_rcom *rc) | |||
5669 | attach_lkb(r, lkb); | 4599 | attach_lkb(r, lkb); |
5670 | add_lkb(r, lkb, rl->rl_status); | 4600 | add_lkb(r, lkb, rl->rl_status); |
5671 | error = 0; | 4601 | error = 0; |
5672 | ls->ls_recover_locks_in++; | ||
5673 | |||
5674 | if (!list_empty(&r->res_waitqueue) || !list_empty(&r->res_convertqueue)) | ||
5675 | rsb_set_flag(r, RSB_RECOVER_GRANT); | ||
5676 | 4602 | ||
5677 | out_remid: | 4603 | out_remid: |
5678 | /* this is the new value returned to the lock holder for | 4604 | /* this is the new value returned to the lock holder for |
5679 | saving in its process-copy lkb */ | 4605 | saving in its process-copy lkb */ |
5680 | rl->rl_remid = cpu_to_le32(lkb->lkb_id); | 4606 | rl->rl_remid = cpu_to_le32(lkb->lkb_id); |
5681 | 4607 | ||
5682 | lkb->lkb_recover_seq = ls->ls_recover_seq; | ||
5683 | |||
5684 | out_unlock: | 4608 | out_unlock: |
5685 | unlock_rsb(r); | 4609 | unlock_rsb(r); |
5686 | put_rsb(r); | 4610 | put_rsb(r); |
5687 | out: | 4611 | out: |
5688 | if (error && error != -EEXIST) | 4612 | if (error) |
5689 | log_debug(ls, "dlm_recover_master_copy remote %d %x error %d", | 4613 | log_debug(ls, "recover_master_copy %d %x", error, |
5690 | from_nodeid, remid, error); | 4614 | le32_to_cpu(rl->rl_lkid)); |
5691 | rl->rl_result = cpu_to_le32(error); | 4615 | rl->rl_result = cpu_to_le32(error); |
5692 | return error; | 4616 | return error; |
5693 | } | 4617 | } |
@@ -5698,52 +4622,41 @@ int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc) | |||
5698 | struct rcom_lock *rl = (struct rcom_lock *) rc->rc_buf; | 4622 | struct rcom_lock *rl = (struct rcom_lock *) rc->rc_buf; |
5699 | struct dlm_rsb *r; | 4623 | struct dlm_rsb *r; |
5700 | struct dlm_lkb *lkb; | 4624 | struct dlm_lkb *lkb; |
5701 | uint32_t lkid, remid; | 4625 | int error; |
5702 | int error, result; | ||
5703 | |||
5704 | lkid = le32_to_cpu(rl->rl_lkid); | ||
5705 | remid = le32_to_cpu(rl->rl_remid); | ||
5706 | result = le32_to_cpu(rl->rl_result); | ||
5707 | 4626 | ||
5708 | error = find_lkb(ls, lkid, &lkb); | 4627 | error = find_lkb(ls, le32_to_cpu(rl->rl_lkid), &lkb); |
5709 | if (error) { | 4628 | if (error) { |
5710 | log_error(ls, "dlm_recover_process_copy no %x remote %d %x %d", | 4629 | log_error(ls, "recover_process_copy no lkid %x", |
5711 | lkid, rc->rc_header.h_nodeid, remid, result); | 4630 | le32_to_cpu(rl->rl_lkid)); |
5712 | return error; | 4631 | return error; |
5713 | } | 4632 | } |
5714 | 4633 | ||
4634 | DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb);); | ||
4635 | |||
4636 | error = le32_to_cpu(rl->rl_result); | ||
4637 | |||
5715 | r = lkb->lkb_resource; | 4638 | r = lkb->lkb_resource; |
5716 | hold_rsb(r); | 4639 | hold_rsb(r); |
5717 | lock_rsb(r); | 4640 | lock_rsb(r); |
5718 | 4641 | ||
5719 | if (!is_process_copy(lkb)) { | 4642 | switch (error) { |
5720 | log_error(ls, "dlm_recover_process_copy bad %x remote %d %x %d", | ||
5721 | lkid, rc->rc_header.h_nodeid, remid, result); | ||
5722 | dlm_dump_rsb(r); | ||
5723 | unlock_rsb(r); | ||
5724 | put_rsb(r); | ||
5725 | dlm_put_lkb(lkb); | ||
5726 | return -EINVAL; | ||
5727 | } | ||
5728 | |||
5729 | switch (result) { | ||
5730 | case -EBADR: | 4643 | case -EBADR: |
5731 | /* There's a chance the new master received our lock before | 4644 | /* There's a chance the new master received our lock before |
5732 | dlm_recover_master_reply(), this wouldn't happen if we did | 4645 | dlm_recover_master_reply(), this wouldn't happen if we did |
5733 | a barrier between recover_masters and recover_locks. */ | 4646 | a barrier between recover_masters and recover_locks. */ |
5734 | 4647 | log_debug(ls, "master copy not ready %x r %lx %s", lkb->lkb_id, | |
5735 | log_debug(ls, "dlm_recover_process_copy %x remote %d %x %d", | 4648 | (unsigned long)r, r->res_name); |
5736 | lkid, rc->rc_header.h_nodeid, remid, result); | ||
5737 | |||
5738 | dlm_send_rcom_lock(r, lkb); | 4649 | dlm_send_rcom_lock(r, lkb); |
5739 | goto out; | 4650 | goto out; |
5740 | case -EEXIST: | 4651 | case -EEXIST: |
4652 | log_debug(ls, "master copy exists %x", lkb->lkb_id); | ||
4653 | /* fall through */ | ||
5741 | case 0: | 4654 | case 0: |
5742 | lkb->lkb_remid = remid; | 4655 | lkb->lkb_remid = le32_to_cpu(rl->rl_remid); |
5743 | break; | 4656 | break; |
5744 | default: | 4657 | default: |
5745 | log_error(ls, "dlm_recover_process_copy %x remote %d %x %d unk", | 4658 | log_error(ls, "dlm_recover_process_copy unknown error %d %x", |
5746 | lkid, rc->rc_header.h_nodeid, remid, result); | 4659 | error, lkb->lkb_id); |
5747 | } | 4660 | } |
5748 | 4661 | ||
5749 | /* an ack for dlm_recover_locks() which waits for replies from | 4662 | /* an ack for dlm_recover_locks() which waits for replies from |
@@ -6032,18 +4945,15 @@ static int orphan_proc_lock(struct dlm_ls *ls, struct dlm_lkb *lkb) | |||
6032 | return error; | 4945 | return error; |
6033 | } | 4946 | } |
6034 | 4947 | ||
6035 | /* The FORCEUNLOCK flag allows the unlock to go ahead even if the lkb isn't | 4948 | /* The force flag allows the unlock to go ahead even if the lkb isn't granted. |
6036 | granted. Regardless of what rsb queue the lock is on, it's removed and | 4949 | Regardless of what rsb queue the lock is on, it's removed and freed. */ |
6037 | freed. The IVVALBLK flag causes the lvb on the resource to be invalidated | ||
6038 | if our lock is PW/EX (it's ignored if our granted mode is smaller.) */ | ||
6039 | 4950 | ||
6040 | static int unlock_proc_lock(struct dlm_ls *ls, struct dlm_lkb *lkb) | 4951 | static int unlock_proc_lock(struct dlm_ls *ls, struct dlm_lkb *lkb) |
6041 | { | 4952 | { |
6042 | struct dlm_args args; | 4953 | struct dlm_args args; |
6043 | int error; | 4954 | int error; |
6044 | 4955 | ||
6045 | set_unlock_args(DLM_LKF_FORCEUNLOCK | DLM_LKF_IVVALBLK, | 4956 | set_unlock_args(DLM_LKF_FORCEUNLOCK, lkb->lkb_ua, &args); |
6046 | lkb->lkb_ua, &args); | ||
6047 | 4957 | ||
6048 | error = unlock_lock(ls, lkb, &args); | 4958 | error = unlock_lock(ls, lkb, &args); |
6049 | if (error == -DLM_EUNLOCK) | 4959 | if (error == -DLM_EUNLOCK) |
diff --git a/fs/dlm/lock.h b/fs/dlm/lock.h index 5e0c72e36a9..265017a7c3e 100644 --- a/fs/dlm/lock.h +++ b/fs/dlm/lock.h | |||
@@ -14,10 +14,8 @@ | |||
14 | #define __LOCK_DOT_H__ | 14 | #define __LOCK_DOT_H__ |
15 | 15 | ||
16 | void dlm_dump_rsb(struct dlm_rsb *r); | 16 | void dlm_dump_rsb(struct dlm_rsb *r); |
17 | void dlm_dump_rsb_name(struct dlm_ls *ls, char *name, int len); | ||
18 | void dlm_print_lkb(struct dlm_lkb *lkb); | 17 | void dlm_print_lkb(struct dlm_lkb *lkb); |
19 | void dlm_receive_message_saved(struct dlm_ls *ls, struct dlm_message *ms, | 18 | void dlm_receive_message_saved(struct dlm_ls *ls, struct dlm_message *ms); |
20 | uint32_t saved_seq); | ||
21 | void dlm_receive_buffer(union dlm_packet *p, int nodeid); | 19 | void dlm_receive_buffer(union dlm_packet *p, int nodeid); |
22 | int dlm_modes_compat(int mode1, int mode2); | 20 | int dlm_modes_compat(int mode1, int mode2); |
23 | void dlm_put_rsb(struct dlm_rsb *r); | 21 | void dlm_put_rsb(struct dlm_rsb *r); |
@@ -29,15 +27,10 @@ void dlm_unlock_recovery(struct dlm_ls *ls); | |||
29 | void dlm_scan_waiters(struct dlm_ls *ls); | 27 | void dlm_scan_waiters(struct dlm_ls *ls); |
30 | void dlm_scan_timeout(struct dlm_ls *ls); | 28 | void dlm_scan_timeout(struct dlm_ls *ls); |
31 | void dlm_adjust_timeouts(struct dlm_ls *ls); | 29 | void dlm_adjust_timeouts(struct dlm_ls *ls); |
32 | int dlm_master_lookup(struct dlm_ls *ls, int nodeid, char *name, int len, | ||
33 | unsigned int flags, int *r_nodeid, int *result); | ||
34 | 30 | ||
35 | int dlm_search_rsb_tree(struct rb_root *tree, char *name, int len, | 31 | int dlm_purge_locks(struct dlm_ls *ls); |
36 | struct dlm_rsb **r_ret); | ||
37 | |||
38 | void dlm_recover_purge(struct dlm_ls *ls); | ||
39 | void dlm_purge_mstcpy_locks(struct dlm_rsb *r); | 32 | void dlm_purge_mstcpy_locks(struct dlm_rsb *r); |
40 | void dlm_recover_grant(struct dlm_ls *ls); | 33 | void dlm_grant_after_purge(struct dlm_ls *ls); |
41 | int dlm_recover_waiters_post(struct dlm_ls *ls); | 34 | int dlm_recover_waiters_post(struct dlm_ls *ls); |
42 | void dlm_recover_waiters_pre(struct dlm_ls *ls); | 35 | void dlm_recover_waiters_pre(struct dlm_ls *ls); |
43 | int dlm_recover_master_copy(struct dlm_ls *ls, struct dlm_rcom *rc); | 36 | int dlm_recover_master_copy(struct dlm_ls *ls, struct dlm_rcom *rc); |
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c index 2e99fb0c973..a1d8f1af144 100644 --- a/fs/dlm/lockspace.c +++ b/fs/dlm/lockspace.c | |||
@@ -2,7 +2,7 @@ | |||
2 | ******************************************************************************* | 2 | ******************************************************************************* |
3 | ** | 3 | ** |
4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | 4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. |
5 | ** Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. | 5 | ** Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. |
6 | ** | 6 | ** |
7 | ** This copyrighted material is made available to anyone wishing to use, | 7 | ** This copyrighted material is made available to anyone wishing to use, |
8 | ** modify, copy, or redistribute it subject to the terms and conditions | 8 | ** modify, copy, or redistribute it subject to the terms and conditions |
@@ -74,19 +74,6 @@ static ssize_t dlm_id_store(struct dlm_ls *ls, const char *buf, size_t len) | |||
74 | return len; | 74 | return len; |
75 | } | 75 | } |
76 | 76 | ||
77 | static ssize_t dlm_nodir_show(struct dlm_ls *ls, char *buf) | ||
78 | { | ||
79 | return snprintf(buf, PAGE_SIZE, "%u\n", dlm_no_directory(ls)); | ||
80 | } | ||
81 | |||
82 | static ssize_t dlm_nodir_store(struct dlm_ls *ls, const char *buf, size_t len) | ||
83 | { | ||
84 | int val = simple_strtoul(buf, NULL, 0); | ||
85 | if (val == 1) | ||
86 | set_bit(LSFL_NODIR, &ls->ls_flags); | ||
87 | return len; | ||
88 | } | ||
89 | |||
90 | static ssize_t dlm_recover_status_show(struct dlm_ls *ls, char *buf) | 77 | static ssize_t dlm_recover_status_show(struct dlm_ls *ls, char *buf) |
91 | { | 78 | { |
92 | uint32_t status = dlm_recover_status(ls); | 79 | uint32_t status = dlm_recover_status(ls); |
@@ -120,12 +107,6 @@ static struct dlm_attr dlm_attr_id = { | |||
120 | .store = dlm_id_store | 107 | .store = dlm_id_store |
121 | }; | 108 | }; |
122 | 109 | ||
123 | static struct dlm_attr dlm_attr_nodir = { | ||
124 | .attr = {.name = "nodir", .mode = S_IRUGO | S_IWUSR}, | ||
125 | .show = dlm_nodir_show, | ||
126 | .store = dlm_nodir_store | ||
127 | }; | ||
128 | |||
129 | static struct dlm_attr dlm_attr_recover_status = { | 110 | static struct dlm_attr dlm_attr_recover_status = { |
130 | .attr = {.name = "recover_status", .mode = S_IRUGO}, | 111 | .attr = {.name = "recover_status", .mode = S_IRUGO}, |
131 | .show = dlm_recover_status_show | 112 | .show = dlm_recover_status_show |
@@ -140,7 +121,6 @@ static struct attribute *dlm_attrs[] = { | |||
140 | &dlm_attr_control.attr, | 121 | &dlm_attr_control.attr, |
141 | &dlm_attr_event.attr, | 122 | &dlm_attr_event.attr, |
142 | &dlm_attr_id.attr, | 123 | &dlm_attr_id.attr, |
143 | &dlm_attr_nodir.attr, | ||
144 | &dlm_attr_recover_status.attr, | 124 | &dlm_attr_recover_status.attr, |
145 | &dlm_attr_recover_nodeid.attr, | 125 | &dlm_attr_recover_nodeid.attr, |
146 | NULL, | 126 | NULL, |
@@ -406,15 +386,12 @@ static void threads_stop(void) | |||
406 | dlm_lowcomms_stop(); | 386 | dlm_lowcomms_stop(); |
407 | } | 387 | } |
408 | 388 | ||
409 | static int new_lockspace(const char *name, const char *cluster, | 389 | static int new_lockspace(const char *name, int namelen, void **lockspace, |
410 | uint32_t flags, int lvblen, | 390 | uint32_t flags, int lvblen) |
411 | const struct dlm_lockspace_ops *ops, void *ops_arg, | ||
412 | int *ops_result, dlm_lockspace_t **lockspace) | ||
413 | { | 391 | { |
414 | struct dlm_ls *ls; | 392 | struct dlm_ls *ls; |
415 | int i, size, error; | 393 | int i, size, error; |
416 | int do_unreg = 0; | 394 | int do_unreg = 0; |
417 | int namelen = strlen(name); | ||
418 | 395 | ||
419 | if (namelen > DLM_LOCKSPACE_LEN) | 396 | if (namelen > DLM_LOCKSPACE_LEN) |
420 | return -EINVAL; | 397 | return -EINVAL; |
@@ -426,24 +403,8 @@ static int new_lockspace(const char *name, const char *cluster, | |||
426 | return -EINVAL; | 403 | return -EINVAL; |
427 | 404 | ||
428 | if (!dlm_user_daemon_available()) { | 405 | if (!dlm_user_daemon_available()) { |
429 | log_print("dlm user daemon not available"); | 406 | module_put(THIS_MODULE); |
430 | error = -EUNATCH; | 407 | return -EUNATCH; |
431 | goto out; | ||
432 | } | ||
433 | |||
434 | if (ops && ops_result) { | ||
435 | if (!dlm_config.ci_recover_callbacks) | ||
436 | *ops_result = -EOPNOTSUPP; | ||
437 | else | ||
438 | *ops_result = 0; | ||
439 | } | ||
440 | |||
441 | if (dlm_config.ci_recover_callbacks && cluster && | ||
442 | strncmp(cluster, dlm_config.ci_cluster_name, DLM_LOCKSPACE_LEN)) { | ||
443 | log_print("dlm cluster name %s mismatch %s", | ||
444 | dlm_config.ci_cluster_name, cluster); | ||
445 | error = -EBADR; | ||
446 | goto out; | ||
447 | } | 408 | } |
448 | 409 | ||
449 | error = 0; | 410 | error = 0; |
@@ -481,11 +442,6 @@ static int new_lockspace(const char *name, const char *cluster, | |||
481 | ls->ls_flags = 0; | 442 | ls->ls_flags = 0; |
482 | ls->ls_scan_time = jiffies; | 443 | ls->ls_scan_time = jiffies; |
483 | 444 | ||
484 | if (ops && dlm_config.ci_recover_callbacks) { | ||
485 | ls->ls_ops = ops; | ||
486 | ls->ls_ops_arg = ops_arg; | ||
487 | } | ||
488 | |||
489 | if (flags & DLM_LSFL_TIMEWARN) | 445 | if (flags & DLM_LSFL_TIMEWARN) |
490 | set_bit(LSFL_TIMEWARN, &ls->ls_flags); | 446 | set_bit(LSFL_TIMEWARN, &ls->ls_flags); |
491 | 447 | ||
@@ -501,23 +457,25 @@ static int new_lockspace(const char *name, const char *cluster, | |||
501 | if (!ls->ls_rsbtbl) | 457 | if (!ls->ls_rsbtbl) |
502 | goto out_lsfree; | 458 | goto out_lsfree; |
503 | for (i = 0; i < size; i++) { | 459 | for (i = 0; i < size; i++) { |
504 | ls->ls_rsbtbl[i].keep.rb_node = NULL; | 460 | INIT_LIST_HEAD(&ls->ls_rsbtbl[i].list); |
505 | ls->ls_rsbtbl[i].toss.rb_node = NULL; | 461 | INIT_LIST_HEAD(&ls->ls_rsbtbl[i].toss); |
506 | spin_lock_init(&ls->ls_rsbtbl[i].lock); | 462 | spin_lock_init(&ls->ls_rsbtbl[i].lock); |
507 | } | 463 | } |
508 | 464 | ||
509 | spin_lock_init(&ls->ls_remove_spin); | ||
510 | |||
511 | for (i = 0; i < DLM_REMOVE_NAMES_MAX; i++) { | ||
512 | ls->ls_remove_names[i] = kzalloc(DLM_RESNAME_MAXLEN+1, | ||
513 | GFP_KERNEL); | ||
514 | if (!ls->ls_remove_names[i]) | ||
515 | goto out_rsbtbl; | ||
516 | } | ||
517 | |||
518 | idr_init(&ls->ls_lkbidr); | 465 | idr_init(&ls->ls_lkbidr); |
519 | spin_lock_init(&ls->ls_lkbidr_spin); | 466 | spin_lock_init(&ls->ls_lkbidr_spin); |
520 | 467 | ||
468 | size = dlm_config.ci_dirtbl_size; | ||
469 | ls->ls_dirtbl_size = size; | ||
470 | |||
471 | ls->ls_dirtbl = vmalloc(sizeof(struct dlm_dirtable) * size); | ||
472 | if (!ls->ls_dirtbl) | ||
473 | goto out_lkbfree; | ||
474 | for (i = 0; i < size; i++) { | ||
475 | INIT_LIST_HEAD(&ls->ls_dirtbl[i].list); | ||
476 | spin_lock_init(&ls->ls_dirtbl[i].lock); | ||
477 | } | ||
478 | |||
521 | INIT_LIST_HEAD(&ls->ls_waiters); | 479 | INIT_LIST_HEAD(&ls->ls_waiters); |
522 | mutex_init(&ls->ls_waiters_mutex); | 480 | mutex_init(&ls->ls_waiters_mutex); |
523 | INIT_LIST_HEAD(&ls->ls_orphans); | 481 | INIT_LIST_HEAD(&ls->ls_orphans); |
@@ -565,23 +523,18 @@ static int new_lockspace(const char *name, const char *cluster, | |||
565 | 523 | ||
566 | ls->ls_recover_buf = kmalloc(dlm_config.ci_buffer_size, GFP_NOFS); | 524 | ls->ls_recover_buf = kmalloc(dlm_config.ci_buffer_size, GFP_NOFS); |
567 | if (!ls->ls_recover_buf) | 525 | if (!ls->ls_recover_buf) |
568 | goto out_lkbidr; | 526 | goto out_dirfree; |
569 | |||
570 | ls->ls_slot = 0; | ||
571 | ls->ls_num_slots = 0; | ||
572 | ls->ls_slots_size = 0; | ||
573 | ls->ls_slots = NULL; | ||
574 | 527 | ||
575 | INIT_LIST_HEAD(&ls->ls_recover_list); | 528 | INIT_LIST_HEAD(&ls->ls_recover_list); |
576 | spin_lock_init(&ls->ls_recover_list_lock); | 529 | spin_lock_init(&ls->ls_recover_list_lock); |
577 | idr_init(&ls->ls_recover_idr); | ||
578 | spin_lock_init(&ls->ls_recover_idr_lock); | ||
579 | ls->ls_recover_list_count = 0; | 530 | ls->ls_recover_list_count = 0; |
580 | ls->ls_local_handle = ls; | 531 | ls->ls_local_handle = ls; |
581 | init_waitqueue_head(&ls->ls_wait_general); | 532 | init_waitqueue_head(&ls->ls_wait_general); |
582 | INIT_LIST_HEAD(&ls->ls_root_list); | 533 | INIT_LIST_HEAD(&ls->ls_root_list); |
583 | init_rwsem(&ls->ls_root_sem); | 534 | init_rwsem(&ls->ls_root_sem); |
584 | 535 | ||
536 | down_write(&ls->ls_in_recovery); | ||
537 | |||
585 | spin_lock(&lslist_lock); | 538 | spin_lock(&lslist_lock); |
586 | ls->ls_create_count = 1; | 539 | ls->ls_create_count = 1; |
587 | list_add(&ls->ls_list, &lslist); | 540 | list_add(&ls->ls_list, &lslist); |
@@ -595,24 +548,13 @@ static int new_lockspace(const char *name, const char *cluster, | |||
595 | } | 548 | } |
596 | } | 549 | } |
597 | 550 | ||
598 | init_waitqueue_head(&ls->ls_recover_lock_wait); | 551 | /* needs to find ls in lslist */ |
599 | |||
600 | /* | ||
601 | * Once started, dlm_recoverd first looks for ls in lslist, then | ||
602 | * initializes ls_in_recovery as locked in "down" mode. We need | ||
603 | * to wait for the wakeup from dlm_recoverd because in_recovery | ||
604 | * has to start out in down mode. | ||
605 | */ | ||
606 | |||
607 | error = dlm_recoverd_start(ls); | 552 | error = dlm_recoverd_start(ls); |
608 | if (error) { | 553 | if (error) { |
609 | log_error(ls, "can't start dlm_recoverd %d", error); | 554 | log_error(ls, "can't start dlm_recoverd %d", error); |
610 | goto out_callback; | 555 | goto out_callback; |
611 | } | 556 | } |
612 | 557 | ||
613 | wait_event(ls->ls_recover_lock_wait, | ||
614 | test_bit(LSFL_RECOVER_LOCK, &ls->ls_flags)); | ||
615 | |||
616 | ls->ls_kobj.kset = dlm_kset; | 558 | ls->ls_kobj.kset = dlm_kset; |
617 | error = kobject_init_and_add(&ls->ls_kobj, &dlm_ktype, NULL, | 559 | error = kobject_init_and_add(&ls->ls_kobj, &dlm_ktype, NULL, |
618 | "%s", ls->ls_name); | 560 | "%s", ls->ls_name); |
@@ -656,15 +598,11 @@ static int new_lockspace(const char *name, const char *cluster, | |||
656 | spin_lock(&lslist_lock); | 598 | spin_lock(&lslist_lock); |
657 | list_del(&ls->ls_list); | 599 | list_del(&ls->ls_list); |
658 | spin_unlock(&lslist_lock); | 600 | spin_unlock(&lslist_lock); |
659 | idr_destroy(&ls->ls_recover_idr); | ||
660 | kfree(ls->ls_recover_buf); | 601 | kfree(ls->ls_recover_buf); |
661 | out_lkbidr: | 602 | out_dirfree: |
603 | vfree(ls->ls_dirtbl); | ||
604 | out_lkbfree: | ||
662 | idr_destroy(&ls->ls_lkbidr); | 605 | idr_destroy(&ls->ls_lkbidr); |
663 | for (i = 0; i < DLM_REMOVE_NAMES_MAX; i++) { | ||
664 | if (ls->ls_remove_names[i]) | ||
665 | kfree(ls->ls_remove_names[i]); | ||
666 | } | ||
667 | out_rsbtbl: | ||
668 | vfree(ls->ls_rsbtbl); | 606 | vfree(ls->ls_rsbtbl); |
669 | out_lsfree: | 607 | out_lsfree: |
670 | if (do_unreg) | 608 | if (do_unreg) |
@@ -676,10 +614,8 @@ static int new_lockspace(const char *name, const char *cluster, | |||
676 | return error; | 614 | return error; |
677 | } | 615 | } |
678 | 616 | ||
679 | int dlm_new_lockspace(const char *name, const char *cluster, | 617 | int dlm_new_lockspace(const char *name, int namelen, void **lockspace, |
680 | uint32_t flags, int lvblen, | 618 | uint32_t flags, int lvblen) |
681 | const struct dlm_lockspace_ops *ops, void *ops_arg, | ||
682 | int *ops_result, dlm_lockspace_t **lockspace) | ||
683 | { | 619 | { |
684 | int error = 0; | 620 | int error = 0; |
685 | 621 | ||
@@ -689,8 +625,7 @@ int dlm_new_lockspace(const char *name, const char *cluster, | |||
689 | if (error) | 625 | if (error) |
690 | goto out; | 626 | goto out; |
691 | 627 | ||
692 | error = new_lockspace(name, cluster, flags, lvblen, ops, ops_arg, | 628 | error = new_lockspace(name, namelen, lockspace, flags, lvblen); |
693 | ops_result, lockspace); | ||
694 | if (!error) | 629 | if (!error) |
695 | ls_count++; | 630 | ls_count++; |
696 | if (error > 0) | 631 | if (error > 0) |
@@ -750,7 +685,7 @@ static int lockspace_busy(struct dlm_ls *ls, int force) | |||
750 | static int release_lockspace(struct dlm_ls *ls, int force) | 685 | static int release_lockspace(struct dlm_ls *ls, int force) |
751 | { | 686 | { |
752 | struct dlm_rsb *rsb; | 687 | struct dlm_rsb *rsb; |
753 | struct rb_node *n; | 688 | struct list_head *head; |
754 | int i, busy, rv; | 689 | int i, busy, rv; |
755 | 690 | ||
756 | busy = lockspace_busy(ls, force); | 691 | busy = lockspace_busy(ls, force); |
@@ -792,6 +727,13 @@ static int release_lockspace(struct dlm_ls *ls, int force) | |||
792 | kfree(ls->ls_recover_buf); | 727 | kfree(ls->ls_recover_buf); |
793 | 728 | ||
794 | /* | 729 | /* |
730 | * Free direntry structs. | ||
731 | */ | ||
732 | |||
733 | dlm_dir_clear(ls); | ||
734 | vfree(ls->ls_dirtbl); | ||
735 | |||
736 | /* | ||
795 | * Free all lkb's in idr | 737 | * Free all lkb's in idr |
796 | */ | 738 | */ |
797 | 739 | ||
@@ -804,24 +746,26 @@ static int release_lockspace(struct dlm_ls *ls, int force) | |||
804 | */ | 746 | */ |
805 | 747 | ||
806 | for (i = 0; i < ls->ls_rsbtbl_size; i++) { | 748 | for (i = 0; i < ls->ls_rsbtbl_size; i++) { |
807 | while ((n = rb_first(&ls->ls_rsbtbl[i].keep))) { | 749 | head = &ls->ls_rsbtbl[i].list; |
808 | rsb = rb_entry(n, struct dlm_rsb, res_hashnode); | 750 | while (!list_empty(head)) { |
809 | rb_erase(n, &ls->ls_rsbtbl[i].keep); | 751 | rsb = list_entry(head->next, struct dlm_rsb, |
752 | res_hashchain); | ||
753 | |||
754 | list_del(&rsb->res_hashchain); | ||
810 | dlm_free_rsb(rsb); | 755 | dlm_free_rsb(rsb); |
811 | } | 756 | } |
812 | 757 | ||
813 | while ((n = rb_first(&ls->ls_rsbtbl[i].toss))) { | 758 | head = &ls->ls_rsbtbl[i].toss; |
814 | rsb = rb_entry(n, struct dlm_rsb, res_hashnode); | 759 | while (!list_empty(head)) { |
815 | rb_erase(n, &ls->ls_rsbtbl[i].toss); | 760 | rsb = list_entry(head->next, struct dlm_rsb, |
761 | res_hashchain); | ||
762 | list_del(&rsb->res_hashchain); | ||
816 | dlm_free_rsb(rsb); | 763 | dlm_free_rsb(rsb); |
817 | } | 764 | } |
818 | } | 765 | } |
819 | 766 | ||
820 | vfree(ls->ls_rsbtbl); | 767 | vfree(ls->ls_rsbtbl); |
821 | 768 | ||
822 | for (i = 0; i < DLM_REMOVE_NAMES_MAX; i++) | ||
823 | kfree(ls->ls_remove_names[i]); | ||
824 | |||
825 | while (!list_empty(&ls->ls_new_rsb)) { | 769 | while (!list_empty(&ls->ls_new_rsb)) { |
826 | rsb = list_first_entry(&ls->ls_new_rsb, struct dlm_rsb, | 770 | rsb = list_first_entry(&ls->ls_new_rsb, struct dlm_rsb, |
827 | res_hashchain); | 771 | res_hashchain); |
@@ -835,6 +779,7 @@ static int release_lockspace(struct dlm_ls *ls, int force) | |||
835 | 779 | ||
836 | dlm_purge_requestqueue(ls); | 780 | dlm_purge_requestqueue(ls); |
837 | kfree(ls->ls_recover_args); | 781 | kfree(ls->ls_recover_args); |
782 | dlm_clear_free_entries(ls); | ||
838 | dlm_clear_members(ls); | 783 | dlm_clear_members(ls); |
839 | dlm_clear_members_gone(ls); | 784 | dlm_clear_members_gone(ls); |
840 | kfree(ls->ls_node_array); | 785 | kfree(ls->ls_node_array); |
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index dd87a31bcc2..990626e7da8 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c | |||
@@ -52,7 +52,6 @@ | |||
52 | #include <linux/mutex.h> | 52 | #include <linux/mutex.h> |
53 | #include <linux/sctp.h> | 53 | #include <linux/sctp.h> |
54 | #include <linux/slab.h> | 54 | #include <linux/slab.h> |
55 | #include <net/sctp/sctp.h> | ||
56 | #include <net/sctp/user.h> | 55 | #include <net/sctp/user.h> |
57 | #include <net/ipv6.h> | 56 | #include <net/ipv6.h> |
58 | 57 | ||
@@ -140,19 +139,8 @@ struct writequeue_entry { | |||
140 | struct connection *con; | 139 | struct connection *con; |
141 | }; | 140 | }; |
142 | 141 | ||
143 | struct dlm_node_addr { | ||
144 | struct list_head list; | ||
145 | int nodeid; | ||
146 | int addr_count; | ||
147 | struct sockaddr_storage *addr[DLM_MAX_ADDR_COUNT]; | ||
148 | }; | ||
149 | |||
150 | static LIST_HEAD(dlm_node_addrs); | ||
151 | static DEFINE_SPINLOCK(dlm_node_addrs_spin); | ||
152 | |||
153 | static struct sockaddr_storage *dlm_local_addr[DLM_MAX_ADDR_COUNT]; | 142 | static struct sockaddr_storage *dlm_local_addr[DLM_MAX_ADDR_COUNT]; |
154 | static int dlm_local_count; | 143 | static int dlm_local_count; |
155 | static int dlm_allow_conn; | ||
156 | 144 | ||
157 | /* Work queues */ | 145 | /* Work queues */ |
158 | static struct workqueue_struct *recv_workqueue; | 146 | static struct workqueue_struct *recv_workqueue; |
@@ -274,146 +262,31 @@ static struct connection *assoc2con(int assoc_id) | |||
274 | return NULL; | 262 | return NULL; |
275 | } | 263 | } |
276 | 264 | ||
277 | static struct dlm_node_addr *find_node_addr(int nodeid) | 265 | static int nodeid_to_addr(int nodeid, struct sockaddr *retaddr) |
278 | { | ||
279 | struct dlm_node_addr *na; | ||
280 | |||
281 | list_for_each_entry(na, &dlm_node_addrs, list) { | ||
282 | if (na->nodeid == nodeid) | ||
283 | return na; | ||
284 | } | ||
285 | return NULL; | ||
286 | } | ||
287 | |||
288 | static int addr_compare(struct sockaddr_storage *x, struct sockaddr_storage *y) | ||
289 | { | ||
290 | switch (x->ss_family) { | ||
291 | case AF_INET: { | ||
292 | struct sockaddr_in *sinx = (struct sockaddr_in *)x; | ||
293 | struct sockaddr_in *siny = (struct sockaddr_in *)y; | ||
294 | if (sinx->sin_addr.s_addr != siny->sin_addr.s_addr) | ||
295 | return 0; | ||
296 | if (sinx->sin_port != siny->sin_port) | ||
297 | return 0; | ||
298 | break; | ||
299 | } | ||
300 | case AF_INET6: { | ||
301 | struct sockaddr_in6 *sinx = (struct sockaddr_in6 *)x; | ||
302 | struct sockaddr_in6 *siny = (struct sockaddr_in6 *)y; | ||
303 | if (!ipv6_addr_equal(&sinx->sin6_addr, &siny->sin6_addr)) | ||
304 | return 0; | ||
305 | if (sinx->sin6_port != siny->sin6_port) | ||
306 | return 0; | ||
307 | break; | ||
308 | } | ||
309 | default: | ||
310 | return 0; | ||
311 | } | ||
312 | return 1; | ||
313 | } | ||
314 | |||
315 | static int nodeid_to_addr(int nodeid, struct sockaddr_storage *sas_out, | ||
316 | struct sockaddr *sa_out) | ||
317 | { | 266 | { |
318 | struct sockaddr_storage sas; | 267 | struct sockaddr_storage addr; |
319 | struct dlm_node_addr *na; | 268 | int error; |
320 | 269 | ||
321 | if (!dlm_local_count) | 270 | if (!dlm_local_count) |
322 | return -1; | 271 | return -1; |
323 | 272 | ||
324 | spin_lock(&dlm_node_addrs_spin); | 273 | error = dlm_nodeid_to_addr(nodeid, &addr); |
325 | na = find_node_addr(nodeid); | 274 | if (error) |
326 | if (na && na->addr_count) | 275 | return error; |
327 | memcpy(&sas, na->addr[0], sizeof(struct sockaddr_storage)); | ||
328 | spin_unlock(&dlm_node_addrs_spin); | ||
329 | |||
330 | if (!na) | ||
331 | return -EEXIST; | ||
332 | |||
333 | if (!na->addr_count) | ||
334 | return -ENOENT; | ||
335 | |||
336 | if (sas_out) | ||
337 | memcpy(sas_out, &sas, sizeof(struct sockaddr_storage)); | ||
338 | |||
339 | if (!sa_out) | ||
340 | return 0; | ||
341 | 276 | ||
342 | if (dlm_local_addr[0]->ss_family == AF_INET) { | 277 | if (dlm_local_addr[0]->ss_family == AF_INET) { |
343 | struct sockaddr_in *in4 = (struct sockaddr_in *) &sas; | 278 | struct sockaddr_in *in4 = (struct sockaddr_in *) &addr; |
344 | struct sockaddr_in *ret4 = (struct sockaddr_in *) sa_out; | 279 | struct sockaddr_in *ret4 = (struct sockaddr_in *) retaddr; |
345 | ret4->sin_addr.s_addr = in4->sin_addr.s_addr; | 280 | ret4->sin_addr.s_addr = in4->sin_addr.s_addr; |
346 | } else { | 281 | } else { |
347 | struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) &sas; | 282 | struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) &addr; |
348 | struct sockaddr_in6 *ret6 = (struct sockaddr_in6 *) sa_out; | 283 | struct sockaddr_in6 *ret6 = (struct sockaddr_in6 *) retaddr; |
349 | ret6->sin6_addr = in6->sin6_addr; | 284 | ipv6_addr_copy(&ret6->sin6_addr, &in6->sin6_addr); |
350 | } | 285 | } |
351 | 286 | ||
352 | return 0; | 287 | return 0; |
353 | } | 288 | } |
354 | 289 | ||
355 | static int addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid) | ||
356 | { | ||
357 | struct dlm_node_addr *na; | ||
358 | int rv = -EEXIST; | ||
359 | |||
360 | spin_lock(&dlm_node_addrs_spin); | ||
361 | list_for_each_entry(na, &dlm_node_addrs, list) { | ||
362 | if (!na->addr_count) | ||
363 | continue; | ||
364 | |||
365 | if (!addr_compare(na->addr[0], addr)) | ||
366 | continue; | ||
367 | |||
368 | *nodeid = na->nodeid; | ||
369 | rv = 0; | ||
370 | break; | ||
371 | } | ||
372 | spin_unlock(&dlm_node_addrs_spin); | ||
373 | return rv; | ||
374 | } | ||
375 | |||
376 | int dlm_lowcomms_addr(int nodeid, struct sockaddr_storage *addr, int len) | ||
377 | { | ||
378 | struct sockaddr_storage *new_addr; | ||
379 | struct dlm_node_addr *new_node, *na; | ||
380 | |||
381 | new_node = kzalloc(sizeof(struct dlm_node_addr), GFP_NOFS); | ||
382 | if (!new_node) | ||
383 | return -ENOMEM; | ||
384 | |||
385 | new_addr = kzalloc(sizeof(struct sockaddr_storage), GFP_NOFS); | ||
386 | if (!new_addr) { | ||
387 | kfree(new_node); | ||
388 | return -ENOMEM; | ||
389 | } | ||
390 | |||
391 | memcpy(new_addr, addr, len); | ||
392 | |||
393 | spin_lock(&dlm_node_addrs_spin); | ||
394 | na = find_node_addr(nodeid); | ||
395 | if (!na) { | ||
396 | new_node->nodeid = nodeid; | ||
397 | new_node->addr[0] = new_addr; | ||
398 | new_node->addr_count = 1; | ||
399 | list_add(&new_node->list, &dlm_node_addrs); | ||
400 | spin_unlock(&dlm_node_addrs_spin); | ||
401 | return 0; | ||
402 | } | ||
403 | |||
404 | if (na->addr_count >= DLM_MAX_ADDR_COUNT) { | ||
405 | spin_unlock(&dlm_node_addrs_spin); | ||
406 | kfree(new_addr); | ||
407 | kfree(new_node); | ||
408 | return -ENOSPC; | ||
409 | } | ||
410 | |||
411 | na->addr[na->addr_count++] = new_addr; | ||
412 | spin_unlock(&dlm_node_addrs_spin); | ||
413 | kfree(new_node); | ||
414 | return 0; | ||
415 | } | ||
416 | |||
417 | /* Data available on socket or listen socket received a connect */ | 290 | /* Data available on socket or listen socket received a connect */ |
418 | static void lowcomms_data_ready(struct sock *sk, int count_unused) | 291 | static void lowcomms_data_ready(struct sock *sk, int count_unused) |
419 | { | 292 | { |
@@ -473,7 +346,7 @@ int dlm_lowcomms_connect_node(int nodeid) | |||
473 | } | 346 | } |
474 | 347 | ||
475 | /* Make a socket active */ | 348 | /* Make a socket active */ |
476 | static void add_sock(struct socket *sock, struct connection *con) | 349 | static int add_sock(struct socket *sock, struct connection *con) |
477 | { | 350 | { |
478 | con->sock = sock; | 351 | con->sock = sock; |
479 | 352 | ||
@@ -483,6 +356,7 @@ static void add_sock(struct socket *sock, struct connection *con) | |||
483 | con->sock->sk->sk_state_change = lowcomms_state_change; | 356 | con->sock->sk->sk_state_change = lowcomms_state_change; |
484 | con->sock->sk->sk_user_data = con; | 357 | con->sock->sk->sk_user_data = con; |
485 | con->sock->sk->sk_allocation = GFP_NOFS; | 358 | con->sock->sk->sk_allocation = GFP_NOFS; |
359 | return 0; | ||
486 | } | 360 | } |
487 | 361 | ||
488 | /* Add the port number to an IPv6 or 4 sockaddr and return the address | 362 | /* Add the port number to an IPv6 or 4 sockaddr and return the address |
@@ -600,6 +474,9 @@ static void process_sctp_notification(struct connection *con, | |||
600 | int prim_len, ret; | 474 | int prim_len, ret; |
601 | int addr_len; | 475 | int addr_len; |
602 | struct connection *new_con; | 476 | struct connection *new_con; |
477 | sctp_peeloff_arg_t parg; | ||
478 | int parglen = sizeof(parg); | ||
479 | int err; | ||
603 | 480 | ||
604 | /* | 481 | /* |
605 | * We get this before any data for an association. | 482 | * We get this before any data for an association. |
@@ -634,7 +511,7 @@ static void process_sctp_notification(struct connection *con, | |||
634 | return; | 511 | return; |
635 | } | 512 | } |
636 | make_sockaddr(&prim.ssp_addr, 0, &addr_len); | 513 | make_sockaddr(&prim.ssp_addr, 0, &addr_len); |
637 | if (addr_to_nodeid(&prim.ssp_addr, &nodeid)) { | 514 | if (dlm_addr_to_nodeid(&prim.ssp_addr, &nodeid)) { |
638 | unsigned char *b=(unsigned char *)&prim.ssp_addr; | 515 | unsigned char *b=(unsigned char *)&prim.ssp_addr; |
639 | log_print("reject connect from unknown addr"); | 516 | log_print("reject connect from unknown addr"); |
640 | print_hex_dump_bytes("ss: ", DUMP_PREFIX_NONE, | 517 | print_hex_dump_bytes("ss: ", DUMP_PREFIX_NONE, |
@@ -648,19 +525,23 @@ static void process_sctp_notification(struct connection *con, | |||
648 | return; | 525 | return; |
649 | 526 | ||
650 | /* Peel off a new sock */ | 527 | /* Peel off a new sock */ |
651 | sctp_lock_sock(con->sock->sk); | 528 | parg.associd = sn->sn_assoc_change.sac_assoc_id; |
652 | ret = sctp_do_peeloff(con->sock->sk, | 529 | ret = kernel_getsockopt(con->sock, IPPROTO_SCTP, |
653 | sn->sn_assoc_change.sac_assoc_id, | 530 | SCTP_SOCKOPT_PEELOFF, |
654 | &new_con->sock); | 531 | (void *)&parg, &parglen); |
655 | sctp_release_sock(con->sock->sk); | ||
656 | if (ret < 0) { | 532 | if (ret < 0) { |
657 | log_print("Can't peel off a socket for " | 533 | log_print("Can't peel off a socket for " |
658 | "connection %d to node %d: err=%d", | 534 | "connection %d to node %d: err=%d", |
659 | (int)sn->sn_assoc_change.sac_assoc_id, | 535 | parg.associd, nodeid, ret); |
660 | nodeid, ret); | 536 | return; |
537 | } | ||
538 | new_con->sock = sockfd_lookup(parg.sd, &err); | ||
539 | if (!new_con->sock) { | ||
540 | log_print("sockfd_lookup error %d", err); | ||
661 | return; | 541 | return; |
662 | } | 542 | } |
663 | add_sock(new_con->sock, new_con); | 543 | add_sock(new_con->sock, new_con); |
544 | sockfd_put(new_con->sock); | ||
664 | 545 | ||
665 | log_print("connecting to %d sctp association %d", | 546 | log_print("connecting to %d sctp association %d", |
666 | nodeid, (int)sn->sn_assoc_change.sac_assoc_id); | 547 | nodeid, (int)sn->sn_assoc_change.sac_assoc_id); |
@@ -835,13 +716,6 @@ static int tcp_accept_from_sock(struct connection *con) | |||
835 | struct connection *newcon; | 716 | struct connection *newcon; |
836 | struct connection *addcon; | 717 | struct connection *addcon; |
837 | 718 | ||
838 | mutex_lock(&connections_lock); | ||
839 | if (!dlm_allow_conn) { | ||
840 | mutex_unlock(&connections_lock); | ||
841 | return -1; | ||
842 | } | ||
843 | mutex_unlock(&connections_lock); | ||
844 | |||
845 | memset(&peeraddr, 0, sizeof(peeraddr)); | 719 | memset(&peeraddr, 0, sizeof(peeraddr)); |
846 | result = sock_create_kern(dlm_local_addr[0]->ss_family, SOCK_STREAM, | 720 | result = sock_create_kern(dlm_local_addr[0]->ss_family, SOCK_STREAM, |
847 | IPPROTO_TCP, &newsock); | 721 | IPPROTO_TCP, &newsock); |
@@ -871,7 +745,7 @@ static int tcp_accept_from_sock(struct connection *con) | |||
871 | 745 | ||
872 | /* Get the new node's NODEID */ | 746 | /* Get the new node's NODEID */ |
873 | make_sockaddr(&peeraddr, 0, &len); | 747 | make_sockaddr(&peeraddr, 0, &len); |
874 | if (addr_to_nodeid(&peeraddr, &nodeid)) { | 748 | if (dlm_addr_to_nodeid(&peeraddr, &nodeid)) { |
875 | unsigned char *b=(unsigned char *)&peeraddr; | 749 | unsigned char *b=(unsigned char *)&peeraddr; |
876 | log_print("connect from non cluster node"); | 750 | log_print("connect from non cluster node"); |
877 | print_hex_dump_bytes("ss: ", DUMP_PREFIX_NONE, | 751 | print_hex_dump_bytes("ss: ", DUMP_PREFIX_NONE, |
@@ -986,7 +860,7 @@ static void sctp_init_assoc(struct connection *con) | |||
986 | if (con->retries++ > MAX_CONNECT_RETRIES) | 860 | if (con->retries++ > MAX_CONNECT_RETRIES) |
987 | return; | 861 | return; |
988 | 862 | ||
989 | if (nodeid_to_addr(con->nodeid, NULL, (struct sockaddr *)&rem_addr)) { | 863 | if (nodeid_to_addr(con->nodeid, (struct sockaddr *)&rem_addr)) { |
990 | log_print("no address for nodeid %d", con->nodeid); | 864 | log_print("no address for nodeid %d", con->nodeid); |
991 | return; | 865 | return; |
992 | } | 866 | } |
@@ -1052,11 +926,11 @@ static void sctp_init_assoc(struct connection *con) | |||
1052 | /* Connect a new socket to its peer */ | 926 | /* Connect a new socket to its peer */ |
1053 | static void tcp_connect_to_sock(struct connection *con) | 927 | static void tcp_connect_to_sock(struct connection *con) |
1054 | { | 928 | { |
929 | int result = -EHOSTUNREACH; | ||
1055 | struct sockaddr_storage saddr, src_addr; | 930 | struct sockaddr_storage saddr, src_addr; |
1056 | int addr_len; | 931 | int addr_len; |
1057 | struct socket *sock = NULL; | 932 | struct socket *sock = NULL; |
1058 | int one = 1; | 933 | int one = 1; |
1059 | int result; | ||
1060 | 934 | ||
1061 | if (con->nodeid == 0) { | 935 | if (con->nodeid == 0) { |
1062 | log_print("attempt to connect sock 0 foiled"); | 936 | log_print("attempt to connect sock 0 foiled"); |
@@ -1068,8 +942,10 @@ static void tcp_connect_to_sock(struct connection *con) | |||
1068 | goto out; | 942 | goto out; |
1069 | 943 | ||
1070 | /* Some odd races can cause double-connects, ignore them */ | 944 | /* Some odd races can cause double-connects, ignore them */ |
1071 | if (con->sock) | 945 | if (con->sock) { |
946 | result = 0; | ||
1072 | goto out; | 947 | goto out; |
948 | } | ||
1073 | 949 | ||
1074 | /* Create a socket to communicate with */ | 950 | /* Create a socket to communicate with */ |
1075 | result = sock_create_kern(dlm_local_addr[0]->ss_family, SOCK_STREAM, | 951 | result = sock_create_kern(dlm_local_addr[0]->ss_family, SOCK_STREAM, |
@@ -1078,11 +954,8 @@ static void tcp_connect_to_sock(struct connection *con) | |||
1078 | goto out_err; | 954 | goto out_err; |
1079 | 955 | ||
1080 | memset(&saddr, 0, sizeof(saddr)); | 956 | memset(&saddr, 0, sizeof(saddr)); |
1081 | result = nodeid_to_addr(con->nodeid, &saddr, NULL); | 957 | if (dlm_nodeid_to_addr(con->nodeid, &saddr)) |
1082 | if (result < 0) { | ||
1083 | log_print("no address for nodeid %d", con->nodeid); | ||
1084 | goto out_err; | 958 | goto out_err; |
1085 | } | ||
1086 | 959 | ||
1087 | sock->sk->sk_user_data = con; | 960 | sock->sk->sk_user_data = con; |
1088 | con->rx_action = receive_from_sock; | 961 | con->rx_action = receive_from_sock; |
@@ -1108,7 +981,8 @@ static void tcp_connect_to_sock(struct connection *con) | |||
1108 | kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, (char *)&one, | 981 | kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, (char *)&one, |
1109 | sizeof(one)); | 982 | sizeof(one)); |
1110 | 983 | ||
1111 | result = sock->ops->connect(sock, (struct sockaddr *)&saddr, addr_len, | 984 | result = |
985 | sock->ops->connect(sock, (struct sockaddr *)&saddr, addr_len, | ||
1112 | O_NONBLOCK); | 986 | O_NONBLOCK); |
1113 | if (result == -EINPROGRESS) | 987 | if (result == -EINPROGRESS) |
1114 | result = 0; | 988 | result = 0; |
@@ -1126,17 +1000,11 @@ out_err: | |||
1126 | * Some errors are fatal and this list might need adjusting. For other | 1000 | * Some errors are fatal and this list might need adjusting. For other |
1127 | * errors we try again until the max number of retries is reached. | 1001 | * errors we try again until the max number of retries is reached. |
1128 | */ | 1002 | */ |
1129 | if (result != -EHOSTUNREACH && | 1003 | if (result != -EHOSTUNREACH && result != -ENETUNREACH && |
1130 | result != -ENETUNREACH && | 1004 | result != -ENETDOWN && result != -EINVAL |
1131 | result != -ENETDOWN && | 1005 | && result != -EPROTONOSUPPORT) { |
1132 | result != -EINVAL && | ||
1133 | result != -EPROTONOSUPPORT) { | ||
1134 | log_print("connect %d try %d error %d", con->nodeid, | ||
1135 | con->retries, result); | ||
1136 | mutex_unlock(&con->sock_mutex); | ||
1137 | msleep(1000); | ||
1138 | lowcomms_connect_sock(con); | 1006 | lowcomms_connect_sock(con); |
1139 | return; | 1007 | result = 0; |
1140 | } | 1008 | } |
1141 | out: | 1009 | out: |
1142 | mutex_unlock(&con->sock_mutex); | 1010 | mutex_unlock(&con->sock_mutex); |
@@ -1174,8 +1042,10 @@ static struct socket *tcp_create_listen_sock(struct connection *con, | |||
1174 | if (result < 0) { | 1042 | if (result < 0) { |
1175 | log_print("Failed to set SO_REUSEADDR on socket: %d", result); | 1043 | log_print("Failed to set SO_REUSEADDR on socket: %d", result); |
1176 | } | 1044 | } |
1045 | sock->sk->sk_user_data = con; | ||
1177 | con->rx_action = tcp_accept_from_sock; | 1046 | con->rx_action = tcp_accept_from_sock; |
1178 | con->connect_action = tcp_connect_to_sock; | 1047 | con->connect_action = tcp_connect_to_sock; |
1048 | con->sock = sock; | ||
1179 | 1049 | ||
1180 | /* Bind to our port */ | 1050 | /* Bind to our port */ |
1181 | make_sockaddr(saddr, dlm_config.ci_tcp_port, &addr_len); | 1051 | make_sockaddr(saddr, dlm_config.ci_tcp_port, &addr_len); |
@@ -1212,7 +1082,7 @@ static void init_local(void) | |||
1212 | int i; | 1082 | int i; |
1213 | 1083 | ||
1214 | dlm_local_count = 0; | 1084 | dlm_local_count = 0; |
1215 | for (i = 0; i < DLM_MAX_ADDR_COUNT; i++) { | 1085 | for (i = 0; i < DLM_MAX_ADDR_COUNT - 1; i++) { |
1216 | if (dlm_our_addr(&sas, i)) | 1086 | if (dlm_our_addr(&sas, i)) |
1217 | break; | 1087 | break; |
1218 | 1088 | ||
@@ -1385,6 +1255,7 @@ void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc) | |||
1385 | struct connection *con; | 1255 | struct connection *con; |
1386 | struct writequeue_entry *e; | 1256 | struct writequeue_entry *e; |
1387 | int offset = 0; | 1257 | int offset = 0; |
1258 | int users = 0; | ||
1388 | 1259 | ||
1389 | con = nodeid2con(nodeid, allocation); | 1260 | con = nodeid2con(nodeid, allocation); |
1390 | if (!con) | 1261 | if (!con) |
@@ -1398,7 +1269,7 @@ void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc) | |||
1398 | } else { | 1269 | } else { |
1399 | offset = e->end; | 1270 | offset = e->end; |
1400 | e->end += len; | 1271 | e->end += len; |
1401 | e->users++; | 1272 | users = e->users++; |
1402 | } | 1273 | } |
1403 | spin_unlock(&con->writequeue_lock); | 1274 | spin_unlock(&con->writequeue_lock); |
1404 | 1275 | ||
@@ -1413,7 +1284,7 @@ void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc) | |||
1413 | spin_lock(&con->writequeue_lock); | 1284 | spin_lock(&con->writequeue_lock); |
1414 | offset = e->end; | 1285 | offset = e->end; |
1415 | e->end += len; | 1286 | e->end += len; |
1416 | e->users++; | 1287 | users = e->users++; |
1417 | list_add_tail(&e->list, &con->writequeue); | 1288 | list_add_tail(&e->list, &con->writequeue); |
1418 | spin_unlock(&con->writequeue_lock); | 1289 | spin_unlock(&con->writequeue_lock); |
1419 | goto got_one; | 1290 | goto got_one; |
@@ -1485,7 +1356,8 @@ static void send_to_sock(struct connection *con) | |||
1485 | } | 1356 | } |
1486 | cond_resched(); | 1357 | cond_resched(); |
1487 | goto out; | 1358 | goto out; |
1488 | } else if (ret < 0) | 1359 | } |
1360 | if (ret <= 0) | ||
1489 | goto send_error; | 1361 | goto send_error; |
1490 | } | 1362 | } |
1491 | 1363 | ||
@@ -1502,6 +1374,7 @@ static void send_to_sock(struct connection *con) | |||
1502 | if (e->len == 0 && e->users == 0) { | 1374 | if (e->len == 0 && e->users == 0) { |
1503 | list_del(&e->list); | 1375 | list_del(&e->list); |
1504 | free_entry(e); | 1376 | free_entry(e); |
1377 | continue; | ||
1505 | } | 1378 | } |
1506 | } | 1379 | } |
1507 | spin_unlock(&con->writequeue_lock); | 1380 | spin_unlock(&con->writequeue_lock); |
@@ -1519,6 +1392,7 @@ out_connect: | |||
1519 | mutex_unlock(&con->sock_mutex); | 1392 | mutex_unlock(&con->sock_mutex); |
1520 | if (!test_bit(CF_INIT_PENDING, &con->flags)) | 1393 | if (!test_bit(CF_INIT_PENDING, &con->flags)) |
1521 | lowcomms_connect_sock(con); | 1394 | lowcomms_connect_sock(con); |
1395 | return; | ||
1522 | } | 1396 | } |
1523 | 1397 | ||
1524 | static void clean_one_writequeue(struct connection *con) | 1398 | static void clean_one_writequeue(struct connection *con) |
@@ -1538,7 +1412,6 @@ static void clean_one_writequeue(struct connection *con) | |||
1538 | int dlm_lowcomms_close(int nodeid) | 1412 | int dlm_lowcomms_close(int nodeid) |
1539 | { | 1413 | { |
1540 | struct connection *con; | 1414 | struct connection *con; |
1541 | struct dlm_node_addr *na; | ||
1542 | 1415 | ||
1543 | log_print("closing connection to node %d", nodeid); | 1416 | log_print("closing connection to node %d", nodeid); |
1544 | con = nodeid2con(nodeid, 0); | 1417 | con = nodeid2con(nodeid, 0); |
@@ -1553,17 +1426,6 @@ int dlm_lowcomms_close(int nodeid) | |||
1553 | clean_one_writequeue(con); | 1426 | clean_one_writequeue(con); |
1554 | close_connection(con, true); | 1427 | close_connection(con, true); |
1555 | } | 1428 | } |
1556 | |||
1557 | spin_lock(&dlm_node_addrs_spin); | ||
1558 | na = find_node_addr(nodeid); | ||
1559 | if (na) { | ||
1560 | list_del(&na->list); | ||
1561 | while (na->addr_count--) | ||
1562 | kfree(na->addr[na->addr_count]); | ||
1563 | kfree(na); | ||
1564 | } | ||
1565 | spin_unlock(&dlm_node_addrs_spin); | ||
1566 | |||
1567 | return 0; | 1429 | return 0; |
1568 | } | 1430 | } |
1569 | 1431 | ||
@@ -1647,7 +1509,6 @@ void dlm_lowcomms_stop(void) | |||
1647 | socket activity. | 1509 | socket activity. |
1648 | */ | 1510 | */ |
1649 | mutex_lock(&connections_lock); | 1511 | mutex_lock(&connections_lock); |
1650 | dlm_allow_conn = 0; | ||
1651 | foreach_conn(stop_conn); | 1512 | foreach_conn(stop_conn); |
1652 | mutex_unlock(&connections_lock); | 1513 | mutex_unlock(&connections_lock); |
1653 | 1514 | ||
@@ -1675,7 +1536,7 @@ int dlm_lowcomms_start(void) | |||
1675 | if (!dlm_local_count) { | 1536 | if (!dlm_local_count) { |
1676 | error = -ENOTCONN; | 1537 | error = -ENOTCONN; |
1677 | log_print("no local IP address has been set"); | 1538 | log_print("no local IP address has been set"); |
1678 | goto fail; | 1539 | goto out; |
1679 | } | 1540 | } |
1680 | 1541 | ||
1681 | error = -ENOMEM; | 1542 | error = -ENOMEM; |
@@ -1683,13 +1544,7 @@ int dlm_lowcomms_start(void) | |||
1683 | __alignof__(struct connection), 0, | 1544 | __alignof__(struct connection), 0, |
1684 | NULL); | 1545 | NULL); |
1685 | if (!con_cache) | 1546 | if (!con_cache) |
1686 | goto fail; | 1547 | goto out; |
1687 | |||
1688 | error = work_start(); | ||
1689 | if (error) | ||
1690 | goto fail_destroy; | ||
1691 | |||
1692 | dlm_allow_conn = 1; | ||
1693 | 1548 | ||
1694 | /* Start listening */ | 1549 | /* Start listening */ |
1695 | if (dlm_config.ci_protocol == 0) | 1550 | if (dlm_config.ci_protocol == 0) |
@@ -1699,31 +1554,20 @@ int dlm_lowcomms_start(void) | |||
1699 | if (error) | 1554 | if (error) |
1700 | goto fail_unlisten; | 1555 | goto fail_unlisten; |
1701 | 1556 | ||
1557 | error = work_start(); | ||
1558 | if (error) | ||
1559 | goto fail_unlisten; | ||
1560 | |||
1702 | return 0; | 1561 | return 0; |
1703 | 1562 | ||
1704 | fail_unlisten: | 1563 | fail_unlisten: |
1705 | dlm_allow_conn = 0; | ||
1706 | con = nodeid2con(0,0); | 1564 | con = nodeid2con(0,0); |
1707 | if (con) { | 1565 | if (con) { |
1708 | close_connection(con, false); | 1566 | close_connection(con, false); |
1709 | kmem_cache_free(con_cache, con); | 1567 | kmem_cache_free(con_cache, con); |
1710 | } | 1568 | } |
1711 | fail_destroy: | ||
1712 | kmem_cache_destroy(con_cache); | 1569 | kmem_cache_destroy(con_cache); |
1713 | fail: | ||
1714 | return error; | ||
1715 | } | ||
1716 | 1570 | ||
1717 | void dlm_lowcomms_exit(void) | 1571 | out: |
1718 | { | 1572 | return error; |
1719 | struct dlm_node_addr *na, *safe; | ||
1720 | |||
1721 | spin_lock(&dlm_node_addrs_spin); | ||
1722 | list_for_each_entry_safe(na, safe, &dlm_node_addrs, list) { | ||
1723 | list_del(&na->list); | ||
1724 | while (na->addr_count--) | ||
1725 | kfree(na->addr[na->addr_count]); | ||
1726 | kfree(na); | ||
1727 | } | ||
1728 | spin_unlock(&dlm_node_addrs_spin); | ||
1729 | } | 1573 | } |
diff --git a/fs/dlm/lowcomms.h b/fs/dlm/lowcomms.h index 67462e54fc2..1311e642628 100644 --- a/fs/dlm/lowcomms.h +++ b/fs/dlm/lowcomms.h | |||
@@ -16,12 +16,10 @@ | |||
16 | 16 | ||
17 | int dlm_lowcomms_start(void); | 17 | int dlm_lowcomms_start(void); |
18 | void dlm_lowcomms_stop(void); | 18 | void dlm_lowcomms_stop(void); |
19 | void dlm_lowcomms_exit(void); | ||
20 | int dlm_lowcomms_close(int nodeid); | 19 | int dlm_lowcomms_close(int nodeid); |
21 | void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc); | 20 | void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc); |
22 | void dlm_lowcomms_commit_buffer(void *mh); | 21 | void dlm_lowcomms_commit_buffer(void *mh); |
23 | int dlm_lowcomms_connect_node(int nodeid); | 22 | int dlm_lowcomms_connect_node(int nodeid); |
24 | int dlm_lowcomms_addr(int nodeid, struct sockaddr_storage *addr, int len); | ||
25 | 23 | ||
26 | #endif /* __LOWCOMMS_DOT_H__ */ | 24 | #endif /* __LOWCOMMS_DOT_H__ */ |
27 | 25 | ||
diff --git a/fs/dlm/main.c b/fs/dlm/main.c index 079c0bd71ab..5a59efa0bb4 100644 --- a/fs/dlm/main.c +++ b/fs/dlm/main.c | |||
@@ -17,7 +17,6 @@ | |||
17 | #include "user.h" | 17 | #include "user.h" |
18 | #include "memory.h" | 18 | #include "memory.h" |
19 | #include "config.h" | 19 | #include "config.h" |
20 | #include "lowcomms.h" | ||
21 | 20 | ||
22 | static int __init init_dlm(void) | 21 | static int __init init_dlm(void) |
23 | { | 22 | { |
@@ -79,7 +78,6 @@ static void __exit exit_dlm(void) | |||
79 | dlm_config_exit(); | 78 | dlm_config_exit(); |
80 | dlm_memory_exit(); | 79 | dlm_memory_exit(); |
81 | dlm_lockspace_exit(); | 80 | dlm_lockspace_exit(); |
82 | dlm_lowcomms_exit(); | ||
83 | dlm_unregister_debugfs(); | 81 | dlm_unregister_debugfs(); |
84 | } | 82 | } |
85 | 83 | ||
diff --git a/fs/dlm/member.c b/fs/dlm/member.c index 476557b5492..b12532e553f 100644 --- a/fs/dlm/member.c +++ b/fs/dlm/member.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /****************************************************************************** | 1 | /****************************************************************************** |
2 | ******************************************************************************* | 2 | ******************************************************************************* |
3 | ** | 3 | ** |
4 | ** Copyright (C) 2005-2011 Red Hat, Inc. All rights reserved. | 4 | ** Copyright (C) 2005-2009 Red Hat, Inc. All rights reserved. |
5 | ** | 5 | ** |
6 | ** This copyrighted material is made available to anyone wishing to use, | 6 | ** This copyrighted material is made available to anyone wishing to use, |
7 | ** modify, copy, or redistribute it subject to the terms and conditions | 7 | ** modify, copy, or redistribute it subject to the terms and conditions |
@@ -19,280 +19,6 @@ | |||
19 | #include "config.h" | 19 | #include "config.h" |
20 | #include "lowcomms.h" | 20 | #include "lowcomms.h" |
21 | 21 | ||
22 | int dlm_slots_version(struct dlm_header *h) | ||
23 | { | ||
24 | if ((h->h_version & 0x0000FFFF) < DLM_HEADER_SLOTS) | ||
25 | return 0; | ||
26 | return 1; | ||
27 | } | ||
28 | |||
29 | void dlm_slot_save(struct dlm_ls *ls, struct dlm_rcom *rc, | ||
30 | struct dlm_member *memb) | ||
31 | { | ||
32 | struct rcom_config *rf = (struct rcom_config *)rc->rc_buf; | ||
33 | |||
34 | if (!dlm_slots_version(&rc->rc_header)) | ||
35 | return; | ||
36 | |||
37 | memb->slot = le16_to_cpu(rf->rf_our_slot); | ||
38 | memb->generation = le32_to_cpu(rf->rf_generation); | ||
39 | } | ||
40 | |||
41 | void dlm_slots_copy_out(struct dlm_ls *ls, struct dlm_rcom *rc) | ||
42 | { | ||
43 | struct dlm_slot *slot; | ||
44 | struct rcom_slot *ro; | ||
45 | int i; | ||
46 | |||
47 | ro = (struct rcom_slot *)(rc->rc_buf + sizeof(struct rcom_config)); | ||
48 | |||
49 | /* ls_slots array is sparse, but not rcom_slots */ | ||
50 | |||
51 | for (i = 0; i < ls->ls_slots_size; i++) { | ||
52 | slot = &ls->ls_slots[i]; | ||
53 | if (!slot->nodeid) | ||
54 | continue; | ||
55 | ro->ro_nodeid = cpu_to_le32(slot->nodeid); | ||
56 | ro->ro_slot = cpu_to_le16(slot->slot); | ||
57 | ro++; | ||
58 | } | ||
59 | } | ||
60 | |||
61 | #define SLOT_DEBUG_LINE 128 | ||
62 | |||
63 | static void log_debug_slots(struct dlm_ls *ls, uint32_t gen, int num_slots, | ||
64 | struct rcom_slot *ro0, struct dlm_slot *array, | ||
65 | int array_size) | ||
66 | { | ||
67 | char line[SLOT_DEBUG_LINE]; | ||
68 | int len = SLOT_DEBUG_LINE - 1; | ||
69 | int pos = 0; | ||
70 | int ret, i; | ||
71 | |||
72 | if (!dlm_config.ci_log_debug) | ||
73 | return; | ||
74 | |||
75 | memset(line, 0, sizeof(line)); | ||
76 | |||
77 | if (array) { | ||
78 | for (i = 0; i < array_size; i++) { | ||
79 | if (!array[i].nodeid) | ||
80 | continue; | ||
81 | |||
82 | ret = snprintf(line + pos, len - pos, " %d:%d", | ||
83 | array[i].slot, array[i].nodeid); | ||
84 | if (ret >= len - pos) | ||
85 | break; | ||
86 | pos += ret; | ||
87 | } | ||
88 | } else if (ro0) { | ||
89 | for (i = 0; i < num_slots; i++) { | ||
90 | ret = snprintf(line + pos, len - pos, " %d:%d", | ||
91 | ro0[i].ro_slot, ro0[i].ro_nodeid); | ||
92 | if (ret >= len - pos) | ||
93 | break; | ||
94 | pos += ret; | ||
95 | } | ||
96 | } | ||
97 | |||
98 | log_debug(ls, "generation %u slots %d%s", gen, num_slots, line); | ||
99 | } | ||
100 | |||
101 | int dlm_slots_copy_in(struct dlm_ls *ls) | ||
102 | { | ||
103 | struct dlm_member *memb; | ||
104 | struct dlm_rcom *rc = ls->ls_recover_buf; | ||
105 | struct rcom_config *rf = (struct rcom_config *)rc->rc_buf; | ||
106 | struct rcom_slot *ro0, *ro; | ||
107 | int our_nodeid = dlm_our_nodeid(); | ||
108 | int i, num_slots; | ||
109 | uint32_t gen; | ||
110 | |||
111 | if (!dlm_slots_version(&rc->rc_header)) | ||
112 | return -1; | ||
113 | |||
114 | gen = le32_to_cpu(rf->rf_generation); | ||
115 | if (gen <= ls->ls_generation) { | ||
116 | log_error(ls, "dlm_slots_copy_in gen %u old %u", | ||
117 | gen, ls->ls_generation); | ||
118 | } | ||
119 | ls->ls_generation = gen; | ||
120 | |||
121 | num_slots = le16_to_cpu(rf->rf_num_slots); | ||
122 | if (!num_slots) | ||
123 | return -1; | ||
124 | |||
125 | ro0 = (struct rcom_slot *)(rc->rc_buf + sizeof(struct rcom_config)); | ||
126 | |||
127 | for (i = 0, ro = ro0; i < num_slots; i++, ro++) { | ||
128 | ro->ro_nodeid = le32_to_cpu(ro->ro_nodeid); | ||
129 | ro->ro_slot = le16_to_cpu(ro->ro_slot); | ||
130 | } | ||
131 | |||
132 | log_debug_slots(ls, gen, num_slots, ro0, NULL, 0); | ||
133 | |||
134 | list_for_each_entry(memb, &ls->ls_nodes, list) { | ||
135 | for (i = 0, ro = ro0; i < num_slots; i++, ro++) { | ||
136 | if (ro->ro_nodeid != memb->nodeid) | ||
137 | continue; | ||
138 | memb->slot = ro->ro_slot; | ||
139 | memb->slot_prev = memb->slot; | ||
140 | break; | ||
141 | } | ||
142 | |||
143 | if (memb->nodeid == our_nodeid) { | ||
144 | if (ls->ls_slot && ls->ls_slot != memb->slot) { | ||
145 | log_error(ls, "dlm_slots_copy_in our slot " | ||
146 | "changed %d %d", ls->ls_slot, | ||
147 | memb->slot); | ||
148 | return -1; | ||
149 | } | ||
150 | |||
151 | if (!ls->ls_slot) | ||
152 | ls->ls_slot = memb->slot; | ||
153 | } | ||
154 | |||
155 | if (!memb->slot) { | ||
156 | log_error(ls, "dlm_slots_copy_in nodeid %d no slot", | ||
157 | memb->nodeid); | ||
158 | return -1; | ||
159 | } | ||
160 | } | ||
161 | |||
162 | return 0; | ||
163 | } | ||
164 | |||
165 | /* for any nodes that do not support slots, we will not have set memb->slot | ||
166 | in wait_status_all(), so memb->slot will remain -1, and we will not | ||
167 | assign slots or set ls_num_slots here */ | ||
168 | |||
169 | int dlm_slots_assign(struct dlm_ls *ls, int *num_slots, int *slots_size, | ||
170 | struct dlm_slot **slots_out, uint32_t *gen_out) | ||
171 | { | ||
172 | struct dlm_member *memb; | ||
173 | struct dlm_slot *array; | ||
174 | int our_nodeid = dlm_our_nodeid(); | ||
175 | int array_size, max_slots, i; | ||
176 | int need = 0; | ||
177 | int max = 0; | ||
178 | int num = 0; | ||
179 | uint32_t gen = 0; | ||
180 | |||
181 | /* our own memb struct will have slot -1 gen 0 */ | ||
182 | |||
183 | list_for_each_entry(memb, &ls->ls_nodes, list) { | ||
184 | if (memb->nodeid == our_nodeid) { | ||
185 | memb->slot = ls->ls_slot; | ||
186 | memb->generation = ls->ls_generation; | ||
187 | break; | ||
188 | } | ||
189 | } | ||
190 | |||
191 | list_for_each_entry(memb, &ls->ls_nodes, list) { | ||
192 | if (memb->generation > gen) | ||
193 | gen = memb->generation; | ||
194 | |||
195 | /* node doesn't support slots */ | ||
196 | |||
197 | if (memb->slot == -1) | ||
198 | return -1; | ||
199 | |||
200 | /* node needs a slot assigned */ | ||
201 | |||
202 | if (!memb->slot) | ||
203 | need++; | ||
204 | |||
205 | /* node has a slot assigned */ | ||
206 | |||
207 | num++; | ||
208 | |||
209 | if (!max || max < memb->slot) | ||
210 | max = memb->slot; | ||
211 | |||
212 | /* sanity check, once slot is assigned it shouldn't change */ | ||
213 | |||
214 | if (memb->slot_prev && memb->slot && memb->slot_prev != memb->slot) { | ||
215 | log_error(ls, "nodeid %d slot changed %d %d", | ||
216 | memb->nodeid, memb->slot_prev, memb->slot); | ||
217 | return -1; | ||
218 | } | ||
219 | memb->slot_prev = memb->slot; | ||
220 | } | ||
221 | |||
222 | array_size = max + need; | ||
223 | |||
224 | array = kzalloc(array_size * sizeof(struct dlm_slot), GFP_NOFS); | ||
225 | if (!array) | ||
226 | return -ENOMEM; | ||
227 | |||
228 | num = 0; | ||
229 | |||
230 | /* fill in slots (offsets) that are used */ | ||
231 | |||
232 | list_for_each_entry(memb, &ls->ls_nodes, list) { | ||
233 | if (!memb->slot) | ||
234 | continue; | ||
235 | |||
236 | if (memb->slot > array_size) { | ||
237 | log_error(ls, "invalid slot number %d", memb->slot); | ||
238 | kfree(array); | ||
239 | return -1; | ||
240 | } | ||
241 | |||
242 | array[memb->slot - 1].nodeid = memb->nodeid; | ||
243 | array[memb->slot - 1].slot = memb->slot; | ||
244 | num++; | ||
245 | } | ||
246 | |||
247 | /* assign new slots from unused offsets */ | ||
248 | |||
249 | list_for_each_entry(memb, &ls->ls_nodes, list) { | ||
250 | if (memb->slot) | ||
251 | continue; | ||
252 | |||
253 | for (i = 0; i < array_size; i++) { | ||
254 | if (array[i].nodeid) | ||
255 | continue; | ||
256 | |||
257 | memb->slot = i + 1; | ||
258 | memb->slot_prev = memb->slot; | ||
259 | array[i].nodeid = memb->nodeid; | ||
260 | array[i].slot = memb->slot; | ||
261 | num++; | ||
262 | |||
263 | if (!ls->ls_slot && memb->nodeid == our_nodeid) | ||
264 | ls->ls_slot = memb->slot; | ||
265 | break; | ||
266 | } | ||
267 | |||
268 | if (!memb->slot) { | ||
269 | log_error(ls, "no free slot found"); | ||
270 | kfree(array); | ||
271 | return -1; | ||
272 | } | ||
273 | } | ||
274 | |||
275 | gen++; | ||
276 | |||
277 | log_debug_slots(ls, gen, num, NULL, array, array_size); | ||
278 | |||
279 | max_slots = (dlm_config.ci_buffer_size - sizeof(struct dlm_rcom) - | ||
280 | sizeof(struct rcom_config)) / sizeof(struct rcom_slot); | ||
281 | |||
282 | if (num > max_slots) { | ||
283 | log_error(ls, "num_slots %d exceeds max_slots %d", | ||
284 | num, max_slots); | ||
285 | kfree(array); | ||
286 | return -1; | ||
287 | } | ||
288 | |||
289 | *gen_out = gen; | ||
290 | *slots_out = array; | ||
291 | *slots_size = array_size; | ||
292 | *num_slots = num; | ||
293 | return 0; | ||
294 | } | ||
295 | |||
296 | static void add_ordered_member(struct dlm_ls *ls, struct dlm_member *new) | 22 | static void add_ordered_member(struct dlm_ls *ls, struct dlm_member *new) |
297 | { | 23 | { |
298 | struct dlm_member *memb = NULL; | 24 | struct dlm_member *memb = NULL; |
@@ -317,51 +43,59 @@ static void add_ordered_member(struct dlm_ls *ls, struct dlm_member *new) | |||
317 | } | 43 | } |
318 | } | 44 | } |
319 | 45 | ||
320 | static int dlm_add_member(struct dlm_ls *ls, struct dlm_config_node *node) | 46 | static int dlm_add_member(struct dlm_ls *ls, int nodeid) |
321 | { | 47 | { |
322 | struct dlm_member *memb; | 48 | struct dlm_member *memb; |
323 | int error; | 49 | int w, error; |
324 | 50 | ||
325 | memb = kzalloc(sizeof(struct dlm_member), GFP_NOFS); | 51 | memb = kzalloc(sizeof(struct dlm_member), GFP_NOFS); |
326 | if (!memb) | 52 | if (!memb) |
327 | return -ENOMEM; | 53 | return -ENOMEM; |
328 | 54 | ||
329 | error = dlm_lowcomms_connect_node(node->nodeid); | 55 | w = dlm_node_weight(ls->ls_name, nodeid); |
56 | if (w < 0) { | ||
57 | kfree(memb); | ||
58 | return w; | ||
59 | } | ||
60 | |||
61 | error = dlm_lowcomms_connect_node(nodeid); | ||
330 | if (error < 0) { | 62 | if (error < 0) { |
331 | kfree(memb); | 63 | kfree(memb); |
332 | return error; | 64 | return error; |
333 | } | 65 | } |
334 | 66 | ||
335 | memb->nodeid = node->nodeid; | 67 | memb->nodeid = nodeid; |
336 | memb->weight = node->weight; | 68 | memb->weight = w; |
337 | memb->comm_seq = node->comm_seq; | ||
338 | add_ordered_member(ls, memb); | 69 | add_ordered_member(ls, memb); |
339 | ls->ls_num_nodes++; | 70 | ls->ls_num_nodes++; |
340 | return 0; | 71 | return 0; |
341 | } | 72 | } |
342 | 73 | ||
343 | static struct dlm_member *find_memb(struct list_head *head, int nodeid) | 74 | static void dlm_remove_member(struct dlm_ls *ls, struct dlm_member *memb) |
344 | { | 75 | { |
345 | struct dlm_member *memb; | 76 | list_move(&memb->list, &ls->ls_nodes_gone); |
346 | 77 | ls->ls_num_nodes--; | |
347 | list_for_each_entry(memb, head, list) { | ||
348 | if (memb->nodeid == nodeid) | ||
349 | return memb; | ||
350 | } | ||
351 | return NULL; | ||
352 | } | 78 | } |
353 | 79 | ||
354 | int dlm_is_member(struct dlm_ls *ls, int nodeid) | 80 | int dlm_is_member(struct dlm_ls *ls, int nodeid) |
355 | { | 81 | { |
356 | if (find_memb(&ls->ls_nodes, nodeid)) | 82 | struct dlm_member *memb; |
357 | return 1; | 83 | |
84 | list_for_each_entry(memb, &ls->ls_nodes, list) { | ||
85 | if (memb->nodeid == nodeid) | ||
86 | return 1; | ||
87 | } | ||
358 | return 0; | 88 | return 0; |
359 | } | 89 | } |
360 | 90 | ||
361 | int dlm_is_removed(struct dlm_ls *ls, int nodeid) | 91 | int dlm_is_removed(struct dlm_ls *ls, int nodeid) |
362 | { | 92 | { |
363 | if (find_memb(&ls->ls_nodes_gone, nodeid)) | 93 | struct dlm_member *memb; |
364 | return 1; | 94 | |
95 | list_for_each_entry(memb, &ls->ls_nodes_gone, list) { | ||
96 | if (memb->nodeid == nodeid) | ||
97 | return 1; | ||
98 | } | ||
365 | return 0; | 99 | return 0; |
366 | } | 100 | } |
367 | 101 | ||
@@ -442,7 +176,7 @@ static int ping_members(struct dlm_ls *ls) | |||
442 | error = dlm_recovery_stopped(ls); | 176 | error = dlm_recovery_stopped(ls); |
443 | if (error) | 177 | if (error) |
444 | break; | 178 | break; |
445 | error = dlm_rcom_status(ls, memb->nodeid, 0); | 179 | error = dlm_rcom_status(ls, memb->nodeid); |
446 | if (error) | 180 | if (error) |
447 | break; | 181 | break; |
448 | } | 182 | } |
@@ -452,88 +186,10 @@ static int ping_members(struct dlm_ls *ls) | |||
452 | return error; | 186 | return error; |
453 | } | 187 | } |
454 | 188 | ||
455 | static void dlm_lsop_recover_prep(struct dlm_ls *ls) | ||
456 | { | ||
457 | if (!ls->ls_ops || !ls->ls_ops->recover_prep) | ||
458 | return; | ||
459 | ls->ls_ops->recover_prep(ls->ls_ops_arg); | ||
460 | } | ||
461 | |||
462 | static void dlm_lsop_recover_slot(struct dlm_ls *ls, struct dlm_member *memb) | ||
463 | { | ||
464 | struct dlm_slot slot; | ||
465 | uint32_t seq; | ||
466 | int error; | ||
467 | |||
468 | if (!ls->ls_ops || !ls->ls_ops->recover_slot) | ||
469 | return; | ||
470 | |||
471 | /* if there is no comms connection with this node | ||
472 | or the present comms connection is newer | ||
473 | than the one when this member was added, then | ||
474 | we consider the node to have failed (versus | ||
475 | being removed due to dlm_release_lockspace) */ | ||
476 | |||
477 | error = dlm_comm_seq(memb->nodeid, &seq); | ||
478 | |||
479 | if (!error && seq == memb->comm_seq) | ||
480 | return; | ||
481 | |||
482 | slot.nodeid = memb->nodeid; | ||
483 | slot.slot = memb->slot; | ||
484 | |||
485 | ls->ls_ops->recover_slot(ls->ls_ops_arg, &slot); | ||
486 | } | ||
487 | |||
488 | void dlm_lsop_recover_done(struct dlm_ls *ls) | ||
489 | { | ||
490 | struct dlm_member *memb; | ||
491 | struct dlm_slot *slots; | ||
492 | int i, num; | ||
493 | |||
494 | if (!ls->ls_ops || !ls->ls_ops->recover_done) | ||
495 | return; | ||
496 | |||
497 | num = ls->ls_num_nodes; | ||
498 | |||
499 | slots = kzalloc(num * sizeof(struct dlm_slot), GFP_KERNEL); | ||
500 | if (!slots) | ||
501 | return; | ||
502 | |||
503 | i = 0; | ||
504 | list_for_each_entry(memb, &ls->ls_nodes, list) { | ||
505 | if (i == num) { | ||
506 | log_error(ls, "dlm_lsop_recover_done bad num %d", num); | ||
507 | goto out; | ||
508 | } | ||
509 | slots[i].nodeid = memb->nodeid; | ||
510 | slots[i].slot = memb->slot; | ||
511 | i++; | ||
512 | } | ||
513 | |||
514 | ls->ls_ops->recover_done(ls->ls_ops_arg, slots, num, | ||
515 | ls->ls_slot, ls->ls_generation); | ||
516 | out: | ||
517 | kfree(slots); | ||
518 | } | ||
519 | |||
520 | static struct dlm_config_node *find_config_node(struct dlm_recover *rv, | ||
521 | int nodeid) | ||
522 | { | ||
523 | int i; | ||
524 | |||
525 | for (i = 0; i < rv->nodes_count; i++) { | ||
526 | if (rv->nodes[i].nodeid == nodeid) | ||
527 | return &rv->nodes[i]; | ||
528 | } | ||
529 | return NULL; | ||
530 | } | ||
531 | |||
532 | int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out) | 189 | int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out) |
533 | { | 190 | { |
534 | struct dlm_member *memb, *safe; | 191 | struct dlm_member *memb, *safe; |
535 | struct dlm_config_node *node; | 192 | int i, error, found, pos = 0, neg = 0, low = -1; |
536 | int i, error, neg = 0, low = -1; | ||
537 | 193 | ||
538 | /* previously removed members that we've not finished removing need to | 194 | /* previously removed members that we've not finished removing need to |
539 | count as a negative change so the "neg" recovery steps will happen */ | 195 | count as a negative change so the "neg" recovery steps will happen */ |
@@ -546,32 +202,46 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out) | |||
546 | /* move departed members from ls_nodes to ls_nodes_gone */ | 202 | /* move departed members from ls_nodes to ls_nodes_gone */ |
547 | 203 | ||
548 | list_for_each_entry_safe(memb, safe, &ls->ls_nodes, list) { | 204 | list_for_each_entry_safe(memb, safe, &ls->ls_nodes, list) { |
549 | node = find_config_node(rv, memb->nodeid); | 205 | found = 0; |
550 | if (node && !node->new) | 206 | for (i = 0; i < rv->node_count; i++) { |
551 | continue; | 207 | if (memb->nodeid == rv->nodeids[i]) { |
208 | found = 1; | ||
209 | break; | ||
210 | } | ||
211 | } | ||
552 | 212 | ||
553 | if (!node) { | 213 | if (!found) { |
214 | neg++; | ||
215 | dlm_remove_member(ls, memb); | ||
554 | log_debug(ls, "remove member %d", memb->nodeid); | 216 | log_debug(ls, "remove member %d", memb->nodeid); |
555 | } else { | ||
556 | /* removed and re-added */ | ||
557 | log_debug(ls, "remove member %d comm_seq %u %u", | ||
558 | memb->nodeid, memb->comm_seq, node->comm_seq); | ||
559 | } | 217 | } |
218 | } | ||
560 | 219 | ||
220 | /* Add an entry to ls_nodes_gone for members that were removed and | ||
221 | then added again, so that previous state for these nodes will be | ||
222 | cleared during recovery. */ | ||
223 | |||
224 | for (i = 0; i < rv->new_count; i++) { | ||
225 | if (!dlm_is_member(ls, rv->new[i])) | ||
226 | continue; | ||
227 | log_debug(ls, "new nodeid %d is a re-added member", rv->new[i]); | ||
228 | |||
229 | memb = kzalloc(sizeof(struct dlm_member), GFP_NOFS); | ||
230 | if (!memb) | ||
231 | return -ENOMEM; | ||
232 | memb->nodeid = rv->new[i]; | ||
233 | list_add_tail(&memb->list, &ls->ls_nodes_gone); | ||
561 | neg++; | 234 | neg++; |
562 | list_move(&memb->list, &ls->ls_nodes_gone); | ||
563 | ls->ls_num_nodes--; | ||
564 | dlm_lsop_recover_slot(ls, memb); | ||
565 | } | 235 | } |
566 | 236 | ||
567 | /* add new members to ls_nodes */ | 237 | /* add new members to ls_nodes */ |
568 | 238 | ||
569 | for (i = 0; i < rv->nodes_count; i++) { | 239 | for (i = 0; i < rv->node_count; i++) { |
570 | node = &rv->nodes[i]; | 240 | if (dlm_is_member(ls, rv->nodeids[i])) |
571 | if (dlm_is_member(ls, node->nodeid)) | ||
572 | continue; | 241 | continue; |
573 | dlm_add_member(ls, node); | 242 | dlm_add_member(ls, rv->nodeids[i]); |
574 | log_debug(ls, "add member %d", node->nodeid); | 243 | pos++; |
244 | log_debug(ls, "add member %d", rv->nodeids[i]); | ||
575 | } | 245 | } |
576 | 246 | ||
577 | list_for_each_entry(memb, &ls->ls_nodes, list) { | 247 | list_for_each_entry(memb, &ls->ls_nodes, list) { |
@@ -581,6 +251,7 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out) | |||
581 | ls->ls_low_nodeid = low; | 251 | ls->ls_low_nodeid = low; |
582 | 252 | ||
583 | make_member_array(ls); | 253 | make_member_array(ls); |
254 | dlm_set_recover_status(ls, DLM_RS_NODES); | ||
584 | *neg_out = neg; | 255 | *neg_out = neg; |
585 | 256 | ||
586 | error = ping_members(ls); | 257 | error = ping_members(ls); |
@@ -590,8 +261,12 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out) | |||
590 | ls->ls_members_result = error; | 261 | ls->ls_members_result = error; |
591 | complete(&ls->ls_members_done); | 262 | complete(&ls->ls_members_done); |
592 | } | 263 | } |
264 | if (error) | ||
265 | goto out; | ||
593 | 266 | ||
594 | log_debug(ls, "dlm_recover_members %d nodes", ls->ls_num_nodes); | 267 | error = dlm_recover_members_wait(ls); |
268 | out: | ||
269 | log_debug(ls, "total members %d error %d", ls->ls_num_nodes, error); | ||
595 | return error; | 270 | return error; |
596 | } | 271 | } |
597 | 272 | ||
@@ -616,13 +291,13 @@ int dlm_ls_stop(struct dlm_ls *ls) | |||
616 | down_write(&ls->ls_recv_active); | 291 | down_write(&ls->ls_recv_active); |
617 | 292 | ||
618 | /* | 293 | /* |
619 | * Abort any recovery that's in progress (see RECOVER_STOP, | 294 | * Abort any recovery that's in progress (see RECOVERY_STOP, |
620 | * dlm_recovery_stopped()) and tell any other threads running in the | 295 | * dlm_recovery_stopped()) and tell any other threads running in the |
621 | * dlm to quit any processing (see RUNNING, dlm_locking_stopped()). | 296 | * dlm to quit any processing (see RUNNING, dlm_locking_stopped()). |
622 | */ | 297 | */ |
623 | 298 | ||
624 | spin_lock(&ls->ls_recover_lock); | 299 | spin_lock(&ls->ls_recover_lock); |
625 | set_bit(LSFL_RECOVER_STOP, &ls->ls_flags); | 300 | set_bit(LSFL_RECOVERY_STOP, &ls->ls_flags); |
626 | new = test_and_clear_bit(LSFL_RUNNING, &ls->ls_flags); | 301 | new = test_and_clear_bit(LSFL_RUNNING, &ls->ls_flags); |
627 | ls->ls_recover_seq++; | 302 | ls->ls_recover_seq++; |
628 | spin_unlock(&ls->ls_recover_lock); | 303 | spin_unlock(&ls->ls_recover_lock); |
@@ -642,49 +317,36 @@ int dlm_ls_stop(struct dlm_ls *ls) | |||
642 | * when recovery is complete. | 317 | * when recovery is complete. |
643 | */ | 318 | */ |
644 | 319 | ||
645 | if (new) { | 320 | if (new) |
646 | set_bit(LSFL_RECOVER_DOWN, &ls->ls_flags); | 321 | down_write(&ls->ls_in_recovery); |
647 | wake_up_process(ls->ls_recoverd_task); | ||
648 | wait_event(ls->ls_recover_lock_wait, | ||
649 | test_bit(LSFL_RECOVER_LOCK, &ls->ls_flags)); | ||
650 | } | ||
651 | 322 | ||
652 | /* | 323 | /* |
653 | * The recoverd suspend/resume makes sure that dlm_recoverd (if | 324 | * The recoverd suspend/resume makes sure that dlm_recoverd (if |
654 | * running) has noticed RECOVER_STOP above and quit processing the | 325 | * running) has noticed RECOVERY_STOP above and quit processing the |
655 | * previous recovery. | 326 | * previous recovery. |
656 | */ | 327 | */ |
657 | 328 | ||
658 | dlm_recoverd_suspend(ls); | 329 | dlm_recoverd_suspend(ls); |
659 | |||
660 | spin_lock(&ls->ls_recover_lock); | ||
661 | kfree(ls->ls_slots); | ||
662 | ls->ls_slots = NULL; | ||
663 | ls->ls_num_slots = 0; | ||
664 | ls->ls_slots_size = 0; | ||
665 | ls->ls_recover_status = 0; | 330 | ls->ls_recover_status = 0; |
666 | spin_unlock(&ls->ls_recover_lock); | ||
667 | |||
668 | dlm_recoverd_resume(ls); | 331 | dlm_recoverd_resume(ls); |
669 | 332 | ||
670 | if (!ls->ls_recover_begin) | 333 | if (!ls->ls_recover_begin) |
671 | ls->ls_recover_begin = jiffies; | 334 | ls->ls_recover_begin = jiffies; |
672 | |||
673 | dlm_lsop_recover_prep(ls); | ||
674 | return 0; | 335 | return 0; |
675 | } | 336 | } |
676 | 337 | ||
677 | int dlm_ls_start(struct dlm_ls *ls) | 338 | int dlm_ls_start(struct dlm_ls *ls) |
678 | { | 339 | { |
679 | struct dlm_recover *rv = NULL, *rv_old; | 340 | struct dlm_recover *rv = NULL, *rv_old; |
680 | struct dlm_config_node *nodes; | 341 | int *ids = NULL, *new = NULL; |
681 | int error, count; | 342 | int error, ids_count = 0, new_count = 0; |
682 | 343 | ||
683 | rv = kzalloc(sizeof(struct dlm_recover), GFP_NOFS); | 344 | rv = kzalloc(sizeof(struct dlm_recover), GFP_NOFS); |
684 | if (!rv) | 345 | if (!rv) |
685 | return -ENOMEM; | 346 | return -ENOMEM; |
686 | 347 | ||
687 | error = dlm_config_nodes(ls->ls_name, &nodes, &count); | 348 | error = dlm_nodeid_list(ls->ls_name, &ids, &ids_count, |
349 | &new, &new_count); | ||
688 | if (error < 0) | 350 | if (error < 0) |
689 | goto fail; | 351 | goto fail; |
690 | 352 | ||
@@ -699,8 +361,10 @@ int dlm_ls_start(struct dlm_ls *ls) | |||
699 | goto fail; | 361 | goto fail; |
700 | } | 362 | } |
701 | 363 | ||
702 | rv->nodes = nodes; | 364 | rv->nodeids = ids; |
703 | rv->nodes_count = count; | 365 | rv->node_count = ids_count; |
366 | rv->new = new; | ||
367 | rv->new_count = new_count; | ||
704 | rv->seq = ++ls->ls_recover_seq; | 368 | rv->seq = ++ls->ls_recover_seq; |
705 | rv_old = ls->ls_recover_args; | 369 | rv_old = ls->ls_recover_args; |
706 | ls->ls_recover_args = rv; | 370 | ls->ls_recover_args = rv; |
@@ -708,18 +372,19 @@ int dlm_ls_start(struct dlm_ls *ls) | |||
708 | 372 | ||
709 | if (rv_old) { | 373 | if (rv_old) { |
710 | log_error(ls, "unused recovery %llx %d", | 374 | log_error(ls, "unused recovery %llx %d", |
711 | (unsigned long long)rv_old->seq, rv_old->nodes_count); | 375 | (unsigned long long)rv_old->seq, rv_old->node_count); |
712 | kfree(rv_old->nodes); | 376 | kfree(rv_old->nodeids); |
377 | kfree(rv_old->new); | ||
713 | kfree(rv_old); | 378 | kfree(rv_old); |
714 | } | 379 | } |
715 | 380 | ||
716 | set_bit(LSFL_RECOVER_WORK, &ls->ls_flags); | 381 | dlm_recoverd_kick(ls); |
717 | wake_up_process(ls->ls_recoverd_task); | ||
718 | return 0; | 382 | return 0; |
719 | 383 | ||
720 | fail: | 384 | fail: |
721 | kfree(rv); | 385 | kfree(rv); |
722 | kfree(nodes); | 386 | kfree(ids); |
387 | kfree(new); | ||
723 | return error; | 388 | return error; |
724 | } | 389 | } |
725 | 390 | ||
diff --git a/fs/dlm/member.h b/fs/dlm/member.h index 3deb70661c6..7a26fca1e0b 100644 --- a/fs/dlm/member.h +++ b/fs/dlm/member.h | |||
@@ -1,7 +1,7 @@ | |||
1 | /****************************************************************************** | 1 | /****************************************************************************** |
2 | ******************************************************************************* | 2 | ******************************************************************************* |
3 | ** | 3 | ** |
4 | ** Copyright (C) 2005-2011 Red Hat, Inc. All rights reserved. | 4 | ** Copyright (C) 2005-2008 Red Hat, Inc. All rights reserved. |
5 | ** | 5 | ** |
6 | ** This copyrighted material is made available to anyone wishing to use, | 6 | ** This copyrighted material is made available to anyone wishing to use, |
7 | ** modify, copy, or redistribute it subject to the terms and conditions | 7 | ** modify, copy, or redistribute it subject to the terms and conditions |
@@ -20,14 +20,6 @@ void dlm_clear_members_gone(struct dlm_ls *ls); | |||
20 | int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv,int *neg_out); | 20 | int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv,int *neg_out); |
21 | int dlm_is_removed(struct dlm_ls *ls, int nodeid); | 21 | int dlm_is_removed(struct dlm_ls *ls, int nodeid); |
22 | int dlm_is_member(struct dlm_ls *ls, int nodeid); | 22 | int dlm_is_member(struct dlm_ls *ls, int nodeid); |
23 | int dlm_slots_version(struct dlm_header *h); | ||
24 | void dlm_slot_save(struct dlm_ls *ls, struct dlm_rcom *rc, | ||
25 | struct dlm_member *memb); | ||
26 | void dlm_slots_copy_out(struct dlm_ls *ls, struct dlm_rcom *rc); | ||
27 | int dlm_slots_copy_in(struct dlm_ls *ls); | ||
28 | int dlm_slots_assign(struct dlm_ls *ls, int *num_slots, int *slots_size, | ||
29 | struct dlm_slot **slots_out, uint32_t *gen_out); | ||
30 | void dlm_lsop_recover_done(struct dlm_ls *ls); | ||
31 | 23 | ||
32 | #endif /* __MEMBER_DOT_H__ */ | 24 | #endif /* __MEMBER_DOT_H__ */ |
33 | 25 | ||
diff --git a/fs/dlm/memory.c b/fs/dlm/memory.c index 7cd24bccd4f..da64df7576e 100644 --- a/fs/dlm/memory.c +++ b/fs/dlm/memory.c | |||
@@ -21,19 +21,21 @@ static struct kmem_cache *rsb_cache; | |||
21 | 21 | ||
22 | int __init dlm_memory_init(void) | 22 | int __init dlm_memory_init(void) |
23 | { | 23 | { |
24 | int ret = 0; | ||
25 | |||
24 | lkb_cache = kmem_cache_create("dlm_lkb", sizeof(struct dlm_lkb), | 26 | lkb_cache = kmem_cache_create("dlm_lkb", sizeof(struct dlm_lkb), |
25 | __alignof__(struct dlm_lkb), 0, NULL); | 27 | __alignof__(struct dlm_lkb), 0, NULL); |
26 | if (!lkb_cache) | 28 | if (!lkb_cache) |
27 | return -ENOMEM; | 29 | ret = -ENOMEM; |
28 | 30 | ||
29 | rsb_cache = kmem_cache_create("dlm_rsb", sizeof(struct dlm_rsb), | 31 | rsb_cache = kmem_cache_create("dlm_rsb", sizeof(struct dlm_rsb), |
30 | __alignof__(struct dlm_rsb), 0, NULL); | 32 | __alignof__(struct dlm_rsb), 0, NULL); |
31 | if (!rsb_cache) { | 33 | if (!rsb_cache) { |
32 | kmem_cache_destroy(lkb_cache); | 34 | kmem_cache_destroy(lkb_cache); |
33 | return -ENOMEM; | 35 | ret = -ENOMEM; |
34 | } | 36 | } |
35 | 37 | ||
36 | return 0; | 38 | return ret; |
37 | } | 39 | } |
38 | 40 | ||
39 | void dlm_memory_exit(void) | 41 | void dlm_memory_exit(void) |
diff --git a/fs/dlm/netlink.c b/fs/dlm/netlink.c index 60a327863b1..ef17e0169da 100644 --- a/fs/dlm/netlink.c +++ b/fs/dlm/netlink.c | |||
@@ -14,7 +14,7 @@ | |||
14 | #include "dlm_internal.h" | 14 | #include "dlm_internal.h" |
15 | 15 | ||
16 | static uint32_t dlm_nl_seqnum; | 16 | static uint32_t dlm_nl_seqnum; |
17 | static uint32_t listener_nlportid; | 17 | static uint32_t listener_nlpid; |
18 | 18 | ||
19 | static struct genl_family family = { | 19 | static struct genl_family family = { |
20 | .id = GENL_ID_GENERATE, | 20 | .id = GENL_ID_GENERATE, |
@@ -64,13 +64,13 @@ static int send_data(struct sk_buff *skb) | |||
64 | return rv; | 64 | return rv; |
65 | } | 65 | } |
66 | 66 | ||
67 | return genlmsg_unicast(&init_net, skb, listener_nlportid); | 67 | return genlmsg_unicast(&init_net, skb, listener_nlpid); |
68 | } | 68 | } |
69 | 69 | ||
70 | static int user_cmd(struct sk_buff *skb, struct genl_info *info) | 70 | static int user_cmd(struct sk_buff *skb, struct genl_info *info) |
71 | { | 71 | { |
72 | listener_nlportid = info->snd_portid; | 72 | listener_nlpid = info->snd_pid; |
73 | printk("user_cmd nlpid %u\n", listener_nlportid); | 73 | printk("user_cmd nlpid %u\n", listener_nlpid); |
74 | return 0; | 74 | return 0; |
75 | } | 75 | } |
76 | 76 | ||
diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c index 9d61947d473..f10a50f24e8 100644 --- a/fs/dlm/rcom.c +++ b/fs/dlm/rcom.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include "lock.h" | 24 | #include "lock.h" |
25 | #include "util.h" | 25 | #include "util.h" |
26 | 26 | ||
27 | |||
27 | static int rcom_response(struct dlm_ls *ls) | 28 | static int rcom_response(struct dlm_ls *ls) |
28 | { | 29 | { |
29 | return test_bit(LSFL_RCOM_READY, &ls->ls_flags); | 30 | return test_bit(LSFL_RCOM_READY, &ls->ls_flags); |
@@ -71,30 +72,20 @@ static void send_rcom(struct dlm_ls *ls, struct dlm_mhandle *mh, | |||
71 | dlm_lowcomms_commit_buffer(mh); | 72 | dlm_lowcomms_commit_buffer(mh); |
72 | } | 73 | } |
73 | 74 | ||
74 | static void set_rcom_status(struct dlm_ls *ls, struct rcom_status *rs, | ||
75 | uint32_t flags) | ||
76 | { | ||
77 | rs->rs_flags = cpu_to_le32(flags); | ||
78 | } | ||
79 | |||
80 | /* When replying to a status request, a node also sends back its | 75 | /* When replying to a status request, a node also sends back its |
81 | configuration values. The requesting node then checks that the remote | 76 | configuration values. The requesting node then checks that the remote |
82 | node is configured the same way as itself. */ | 77 | node is configured the same way as itself. */ |
83 | 78 | ||
84 | static void set_rcom_config(struct dlm_ls *ls, struct rcom_config *rf, | 79 | static void make_config(struct dlm_ls *ls, struct rcom_config *rf) |
85 | uint32_t num_slots) | ||
86 | { | 80 | { |
87 | rf->rf_lvblen = cpu_to_le32(ls->ls_lvblen); | 81 | rf->rf_lvblen = cpu_to_le32(ls->ls_lvblen); |
88 | rf->rf_lsflags = cpu_to_le32(ls->ls_exflags); | 82 | rf->rf_lsflags = cpu_to_le32(ls->ls_exflags); |
89 | |||
90 | rf->rf_our_slot = cpu_to_le16(ls->ls_slot); | ||
91 | rf->rf_num_slots = cpu_to_le16(num_slots); | ||
92 | rf->rf_generation = cpu_to_le32(ls->ls_generation); | ||
93 | } | 83 | } |
94 | 84 | ||
95 | static int check_rcom_config(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid) | 85 | static int check_config(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid) |
96 | { | 86 | { |
97 | struct rcom_config *rf = (struct rcom_config *) rc->rc_buf; | 87 | struct rcom_config *rf = (struct rcom_config *) rc->rc_buf; |
88 | size_t conf_size = sizeof(struct dlm_rcom) + sizeof(struct rcom_config); | ||
98 | 89 | ||
99 | if ((rc->rc_header.h_version & 0xFFFF0000) != DLM_HEADER_MAJOR) { | 90 | if ((rc->rc_header.h_version & 0xFFFF0000) != DLM_HEADER_MAJOR) { |
100 | log_error(ls, "version mismatch: %x nodeid %d: %x", | 91 | log_error(ls, "version mismatch: %x nodeid %d: %x", |
@@ -103,6 +94,12 @@ static int check_rcom_config(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid) | |||
103 | return -EPROTO; | 94 | return -EPROTO; |
104 | } | 95 | } |
105 | 96 | ||
97 | if (rc->rc_header.h_length < conf_size) { | ||
98 | log_error(ls, "config too short: %d nodeid %d", | ||
99 | rc->rc_header.h_length, nodeid); | ||
100 | return -EPROTO; | ||
101 | } | ||
102 | |||
106 | if (le32_to_cpu(rf->rf_lvblen) != ls->ls_lvblen || | 103 | if (le32_to_cpu(rf->rf_lvblen) != ls->ls_lvblen || |
107 | le32_to_cpu(rf->rf_lsflags) != ls->ls_exflags) { | 104 | le32_to_cpu(rf->rf_lsflags) != ls->ls_exflags) { |
108 | log_error(ls, "config mismatch: %d,%x nodeid %d: %d,%x", | 105 | log_error(ls, "config mismatch: %d,%x nodeid %d: %d,%x", |
@@ -130,18 +127,7 @@ static void disallow_sync_reply(struct dlm_ls *ls) | |||
130 | spin_unlock(&ls->ls_rcom_spin); | 127 | spin_unlock(&ls->ls_rcom_spin); |
131 | } | 128 | } |
132 | 129 | ||
133 | /* | 130 | int dlm_rcom_status(struct dlm_ls *ls, int nodeid) |
134 | * low nodeid gathers one slot value at a time from each node. | ||
135 | * it sets need_slots=0, and saves rf_our_slot returned from each | ||
136 | * rcom_config. | ||
137 | * | ||
138 | * other nodes gather all slot values at once from the low nodeid. | ||
139 | * they set need_slots=1, and ignore the rf_our_slot returned from each | ||
140 | * rcom_config. they use the rf_num_slots returned from the low | ||
141 | * node's rcom_config. | ||
142 | */ | ||
143 | |||
144 | int dlm_rcom_status(struct dlm_ls *ls, int nodeid, uint32_t status_flags) | ||
145 | { | 131 | { |
146 | struct dlm_rcom *rc; | 132 | struct dlm_rcom *rc; |
147 | struct dlm_mhandle *mh; | 133 | struct dlm_mhandle *mh; |
@@ -155,13 +141,10 @@ int dlm_rcom_status(struct dlm_ls *ls, int nodeid, uint32_t status_flags) | |||
155 | goto out; | 141 | goto out; |
156 | } | 142 | } |
157 | 143 | ||
158 | error = create_rcom(ls, nodeid, DLM_RCOM_STATUS, | 144 | error = create_rcom(ls, nodeid, DLM_RCOM_STATUS, 0, &rc, &mh); |
159 | sizeof(struct rcom_status), &rc, &mh); | ||
160 | if (error) | 145 | if (error) |
161 | goto out; | 146 | goto out; |
162 | 147 | ||
163 | set_rcom_status(ls, (struct rcom_status *)rc->rc_buf, status_flags); | ||
164 | |||
165 | allow_sync_reply(ls, &rc->rc_id); | 148 | allow_sync_reply(ls, &rc->rc_id); |
166 | memset(ls->ls_recover_buf, 0, dlm_config.ci_buffer_size); | 149 | memset(ls->ls_recover_buf, 0, dlm_config.ci_buffer_size); |
167 | 150 | ||
@@ -178,11 +161,8 @@ int dlm_rcom_status(struct dlm_ls *ls, int nodeid, uint32_t status_flags) | |||
178 | /* we pretend the remote lockspace exists with 0 status */ | 161 | /* we pretend the remote lockspace exists with 0 status */ |
179 | log_debug(ls, "remote node %d not ready", nodeid); | 162 | log_debug(ls, "remote node %d not ready", nodeid); |
180 | rc->rc_result = 0; | 163 | rc->rc_result = 0; |
181 | error = 0; | 164 | } else |
182 | } else { | 165 | error = check_config(ls, rc, nodeid); |
183 | error = check_rcom_config(ls, rc, nodeid); | ||
184 | } | ||
185 | |||
186 | /* the caller looks at rc_result for the remote recovery status */ | 166 | /* the caller looks at rc_result for the remote recovery status */ |
187 | out: | 167 | out: |
188 | return error; | 168 | return error; |
@@ -192,60 +172,17 @@ static void receive_rcom_status(struct dlm_ls *ls, struct dlm_rcom *rc_in) | |||
192 | { | 172 | { |
193 | struct dlm_rcom *rc; | 173 | struct dlm_rcom *rc; |
194 | struct dlm_mhandle *mh; | 174 | struct dlm_mhandle *mh; |
195 | struct rcom_status *rs; | 175 | int error, nodeid = rc_in->rc_header.h_nodeid; |
196 | uint32_t status; | ||
197 | int nodeid = rc_in->rc_header.h_nodeid; | ||
198 | int len = sizeof(struct rcom_config); | ||
199 | int num_slots = 0; | ||
200 | int error; | ||
201 | |||
202 | if (!dlm_slots_version(&rc_in->rc_header)) { | ||
203 | status = dlm_recover_status(ls); | ||
204 | goto do_create; | ||
205 | } | ||
206 | |||
207 | rs = (struct rcom_status *)rc_in->rc_buf; | ||
208 | |||
209 | if (!(rs->rs_flags & DLM_RSF_NEED_SLOTS)) { | ||
210 | status = dlm_recover_status(ls); | ||
211 | goto do_create; | ||
212 | } | ||
213 | |||
214 | spin_lock(&ls->ls_recover_lock); | ||
215 | status = ls->ls_recover_status; | ||
216 | num_slots = ls->ls_num_slots; | ||
217 | spin_unlock(&ls->ls_recover_lock); | ||
218 | len += num_slots * sizeof(struct rcom_slot); | ||
219 | 176 | ||
220 | do_create: | ||
221 | error = create_rcom(ls, nodeid, DLM_RCOM_STATUS_REPLY, | 177 | error = create_rcom(ls, nodeid, DLM_RCOM_STATUS_REPLY, |
222 | len, &rc, &mh); | 178 | sizeof(struct rcom_config), &rc, &mh); |
223 | if (error) | 179 | if (error) |
224 | return; | 180 | return; |
225 | |||
226 | rc->rc_id = rc_in->rc_id; | 181 | rc->rc_id = rc_in->rc_id; |
227 | rc->rc_seq_reply = rc_in->rc_seq; | 182 | rc->rc_seq_reply = rc_in->rc_seq; |
228 | rc->rc_result = status; | 183 | rc->rc_result = dlm_recover_status(ls); |
184 | make_config(ls, (struct rcom_config *) rc->rc_buf); | ||
229 | 185 | ||
230 | set_rcom_config(ls, (struct rcom_config *)rc->rc_buf, num_slots); | ||
231 | |||
232 | if (!num_slots) | ||
233 | goto do_send; | ||
234 | |||
235 | spin_lock(&ls->ls_recover_lock); | ||
236 | if (ls->ls_num_slots != num_slots) { | ||
237 | spin_unlock(&ls->ls_recover_lock); | ||
238 | log_debug(ls, "receive_rcom_status num_slots %d to %d", | ||
239 | num_slots, ls->ls_num_slots); | ||
240 | rc->rc_result = 0; | ||
241 | set_rcom_config(ls, (struct rcom_config *)rc->rc_buf, 0); | ||
242 | goto do_send; | ||
243 | } | ||
244 | |||
245 | dlm_slots_copy_out(ls, rc); | ||
246 | spin_unlock(&ls->ls_recover_lock); | ||
247 | |||
248 | do_send: | ||
249 | send_rcom(ls, mh, rc); | 186 | send_rcom(ls, mh, rc); |
250 | } | 187 | } |
251 | 188 | ||
@@ -273,9 +210,19 @@ int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name, int last_len) | |||
273 | struct dlm_rcom *rc; | 210 | struct dlm_rcom *rc; |
274 | struct dlm_mhandle *mh; | 211 | struct dlm_mhandle *mh; |
275 | int error = 0; | 212 | int error = 0; |
213 | int max_size = dlm_config.ci_buffer_size - sizeof(struct dlm_rcom); | ||
276 | 214 | ||
277 | ls->ls_recover_nodeid = nodeid; | 215 | ls->ls_recover_nodeid = nodeid; |
278 | 216 | ||
217 | if (nodeid == dlm_our_nodeid()) { | ||
218 | ls->ls_recover_buf->rc_header.h_length = | ||
219 | dlm_config.ci_buffer_size; | ||
220 | dlm_copy_master_names(ls, last_name, last_len, | ||
221 | ls->ls_recover_buf->rc_buf, | ||
222 | max_size, nodeid); | ||
223 | goto out; | ||
224 | } | ||
225 | |||
279 | error = create_rcom(ls, nodeid, DLM_RCOM_NAMES, last_len, &rc, &mh); | 226 | error = create_rcom(ls, nodeid, DLM_RCOM_NAMES, last_len, &rc, &mh); |
280 | if (error) | 227 | if (error) |
281 | goto out; | 228 | goto out; |
@@ -325,26 +272,7 @@ int dlm_send_rcom_lookup(struct dlm_rsb *r, int dir_nodeid) | |||
325 | if (error) | 272 | if (error) |
326 | goto out; | 273 | goto out; |
327 | memcpy(rc->rc_buf, r->res_name, r->res_length); | 274 | memcpy(rc->rc_buf, r->res_name, r->res_length); |
328 | rc->rc_id = (unsigned long) r->res_id; | 275 | rc->rc_id = (unsigned long) r; |
329 | |||
330 | send_rcom(ls, mh, rc); | ||
331 | out: | ||
332 | return error; | ||
333 | } | ||
334 | |||
335 | int dlm_send_rcom_lookup_dump(struct dlm_rsb *r, int to_nodeid) | ||
336 | { | ||
337 | struct dlm_rcom *rc; | ||
338 | struct dlm_mhandle *mh; | ||
339 | struct dlm_ls *ls = r->res_ls; | ||
340 | int error; | ||
341 | |||
342 | error = create_rcom(ls, to_nodeid, DLM_RCOM_LOOKUP, r->res_length, | ||
343 | &rc, &mh); | ||
344 | if (error) | ||
345 | goto out; | ||
346 | memcpy(rc->rc_buf, r->res_name, r->res_length); | ||
347 | rc->rc_id = 0xFFFFFFFF; | ||
348 | 276 | ||
349 | send_rcom(ls, mh, rc); | 277 | send_rcom(ls, mh, rc); |
350 | out: | 278 | out: |
@@ -362,14 +290,7 @@ static void receive_rcom_lookup(struct dlm_ls *ls, struct dlm_rcom *rc_in) | |||
362 | if (error) | 290 | if (error) |
363 | return; | 291 | return; |
364 | 292 | ||
365 | if (rc_in->rc_id == 0xFFFFFFFF) { | 293 | error = dlm_dir_lookup(ls, nodeid, rc_in->rc_buf, len, &ret_nodeid); |
366 | log_error(ls, "receive_rcom_lookup dump from %d", nodeid); | ||
367 | dlm_dump_rsb_name(ls, rc_in->rc_buf, len); | ||
368 | return; | ||
369 | } | ||
370 | |||
371 | error = dlm_master_lookup(ls, nodeid, rc_in->rc_buf, len, | ||
372 | DLM_LU_RECOVER_MASTER, &ret_nodeid, NULL); | ||
373 | if (error) | 294 | if (error) |
374 | ret_nodeid = error; | 295 | ret_nodeid = error; |
375 | rc->rc_result = ret_nodeid; | 296 | rc->rc_result = ret_nodeid; |
@@ -500,102 +421,46 @@ int dlm_send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in) | |||
500 | return 0; | 421 | return 0; |
501 | } | 422 | } |
502 | 423 | ||
503 | /* | 424 | static int is_old_reply(struct dlm_ls *ls, struct dlm_rcom *rc) |
504 | * Ignore messages for stage Y before we set | ||
505 | * recover_status bit for stage X: | ||
506 | * | ||
507 | * recover_status = 0 | ||
508 | * | ||
509 | * dlm_recover_members() | ||
510 | * - send nothing | ||
511 | * - recv nothing | ||
512 | * - ignore NAMES, NAMES_REPLY | ||
513 | * - ignore LOOKUP, LOOKUP_REPLY | ||
514 | * - ignore LOCK, LOCK_REPLY | ||
515 | * | ||
516 | * recover_status |= NODES | ||
517 | * | ||
518 | * dlm_recover_members_wait() | ||
519 | * | ||
520 | * dlm_recover_directory() | ||
521 | * - send NAMES | ||
522 | * - recv NAMES_REPLY | ||
523 | * - ignore LOOKUP, LOOKUP_REPLY | ||
524 | * - ignore LOCK, LOCK_REPLY | ||
525 | * | ||
526 | * recover_status |= DIR | ||
527 | * | ||
528 | * dlm_recover_directory_wait() | ||
529 | * | ||
530 | * dlm_recover_masters() | ||
531 | * - send LOOKUP | ||
532 | * - recv LOOKUP_REPLY | ||
533 | * | ||
534 | * dlm_recover_locks() | ||
535 | * - send LOCKS | ||
536 | * - recv LOCKS_REPLY | ||
537 | * | ||
538 | * recover_status |= LOCKS | ||
539 | * | ||
540 | * dlm_recover_locks_wait() | ||
541 | * | ||
542 | * recover_status |= DONE | ||
543 | */ | ||
544 | |||
545 | /* Called by dlm_recv; corresponds to dlm_receive_message() but special | ||
546 | recovery-only comms are sent through here. */ | ||
547 | |||
548 | void dlm_receive_rcom(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid) | ||
549 | { | 425 | { |
550 | int lock_size = sizeof(struct dlm_rcom) + sizeof(struct rcom_lock); | ||
551 | int stop, reply = 0, names = 0, lookup = 0, lock = 0; | ||
552 | uint32_t status; | ||
553 | uint64_t seq; | 426 | uint64_t seq; |
427 | int rv = 0; | ||
554 | 428 | ||
555 | switch (rc->rc_type) { | 429 | switch (rc->rc_type) { |
556 | case DLM_RCOM_STATUS_REPLY: | 430 | case DLM_RCOM_STATUS_REPLY: |
557 | reply = 1; | ||
558 | break; | ||
559 | case DLM_RCOM_NAMES: | ||
560 | names = 1; | ||
561 | break; | ||
562 | case DLM_RCOM_NAMES_REPLY: | 431 | case DLM_RCOM_NAMES_REPLY: |
563 | names = 1; | ||
564 | reply = 1; | ||
565 | break; | ||
566 | case DLM_RCOM_LOOKUP: | ||
567 | lookup = 1; | ||
568 | break; | ||
569 | case DLM_RCOM_LOOKUP_REPLY: | 432 | case DLM_RCOM_LOOKUP_REPLY: |
570 | lookup = 1; | ||
571 | reply = 1; | ||
572 | break; | ||
573 | case DLM_RCOM_LOCK: | ||
574 | lock = 1; | ||
575 | break; | ||
576 | case DLM_RCOM_LOCK_REPLY: | 433 | case DLM_RCOM_LOCK_REPLY: |
577 | lock = 1; | 434 | spin_lock(&ls->ls_recover_lock); |
578 | reply = 1; | 435 | seq = ls->ls_recover_seq; |
579 | break; | 436 | spin_unlock(&ls->ls_recover_lock); |
580 | }; | 437 | if (rc->rc_seq_reply != seq) { |
581 | 438 | log_debug(ls, "ignoring old reply %x from %d " | |
582 | spin_lock(&ls->ls_recover_lock); | 439 | "seq_reply %llx expect %llx", |
583 | status = ls->ls_recover_status; | 440 | rc->rc_type, rc->rc_header.h_nodeid, |
584 | stop = test_bit(LSFL_RECOVER_STOP, &ls->ls_flags); | 441 | (unsigned long long)rc->rc_seq_reply, |
585 | seq = ls->ls_recover_seq; | 442 | (unsigned long long)seq); |
586 | spin_unlock(&ls->ls_recover_lock); | 443 | rv = 1; |
444 | } | ||
445 | } | ||
446 | return rv; | ||
447 | } | ||
587 | 448 | ||
588 | if (stop && (rc->rc_type != DLM_RCOM_STATUS)) | 449 | /* Called by dlm_recv; corresponds to dlm_receive_message() but special |
589 | goto ignore; | 450 | recovery-only comms are sent through here. */ |
590 | 451 | ||
591 | if (reply && (rc->rc_seq_reply != seq)) | 452 | void dlm_receive_rcom(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid) |
592 | goto ignore; | 453 | { |
454 | int lock_size = sizeof(struct dlm_rcom) + sizeof(struct rcom_lock); | ||
593 | 455 | ||
594 | if (!(status & DLM_RS_NODES) && (names || lookup || lock)) | 456 | if (dlm_recovery_stopped(ls) && (rc->rc_type != DLM_RCOM_STATUS)) { |
595 | goto ignore; | 457 | log_debug(ls, "ignoring recovery message %x from %d", |
458 | rc->rc_type, nodeid); | ||
459 | goto out; | ||
460 | } | ||
596 | 461 | ||
597 | if (!(status & DLM_RS_DIR) && (lookup || lock)) | 462 | if (is_old_reply(ls, rc)) |
598 | goto ignore; | 463 | goto out; |
599 | 464 | ||
600 | switch (rc->rc_type) { | 465 | switch (rc->rc_type) { |
601 | case DLM_RCOM_STATUS: | 466 | case DLM_RCOM_STATUS: |
@@ -637,20 +502,10 @@ void dlm_receive_rcom(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid) | |||
637 | default: | 502 | default: |
638 | log_error(ls, "receive_rcom bad type %d", rc->rc_type); | 503 | log_error(ls, "receive_rcom bad type %d", rc->rc_type); |
639 | } | 504 | } |
640 | return; | 505 | out: |
641 | |||
642 | ignore: | ||
643 | log_limit(ls, "dlm_receive_rcom ignore msg %d " | ||
644 | "from %d %llu %llu recover seq %llu sts %x gen %u", | ||
645 | rc->rc_type, | ||
646 | nodeid, | ||
647 | (unsigned long long)rc->rc_seq, | ||
648 | (unsigned long long)rc->rc_seq_reply, | ||
649 | (unsigned long long)seq, | ||
650 | status, ls->ls_generation); | ||
651 | return; | 506 | return; |
652 | Eshort: | 507 | Eshort: |
653 | log_error(ls, "recovery message %d from %d is too short", | 508 | log_error(ls, "recovery message %x from %d is too short", |
654 | rc->rc_type, nodeid); | 509 | rc->rc_type, nodeid); |
655 | } | 510 | } |
656 | 511 | ||
diff --git a/fs/dlm/rcom.h b/fs/dlm/rcom.h index f8e243463c1..b09abd29ba3 100644 --- a/fs/dlm/rcom.h +++ b/fs/dlm/rcom.h | |||
@@ -14,10 +14,9 @@ | |||
14 | #ifndef __RCOM_DOT_H__ | 14 | #ifndef __RCOM_DOT_H__ |
15 | #define __RCOM_DOT_H__ | 15 | #define __RCOM_DOT_H__ |
16 | 16 | ||
17 | int dlm_rcom_status(struct dlm_ls *ls, int nodeid, uint32_t status_flags); | 17 | int dlm_rcom_status(struct dlm_ls *ls, int nodeid); |
18 | int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name,int last_len); | 18 | int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name,int last_len); |
19 | int dlm_send_rcom_lookup(struct dlm_rsb *r, int dir_nodeid); | 19 | int dlm_send_rcom_lookup(struct dlm_rsb *r, int dir_nodeid); |
20 | int dlm_send_rcom_lookup_dump(struct dlm_rsb *r, int to_nodeid); | ||
21 | int dlm_send_rcom_lock(struct dlm_rsb *r, struct dlm_lkb *lkb); | 20 | int dlm_send_rcom_lock(struct dlm_rsb *r, struct dlm_lkb *lkb); |
22 | void dlm_receive_rcom(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid); | 21 | void dlm_receive_rcom(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid); |
23 | int dlm_send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in); | 22 | int dlm_send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in); |
diff --git a/fs/dlm/recover.c b/fs/dlm/recover.c index aedea28a86a..14638235f7b 100644 --- a/fs/dlm/recover.c +++ b/fs/dlm/recover.c | |||
@@ -36,23 +36,30 @@ | |||
36 | * (LS_RECOVERY_STOP set due to failure of a node in ls_nodes). When another | 36 | * (LS_RECOVERY_STOP set due to failure of a node in ls_nodes). When another |
37 | * function thinks it could have completed the waited-on task, they should wake | 37 | * function thinks it could have completed the waited-on task, they should wake |
38 | * up ls_wait_general to get an immediate response rather than waiting for the | 38 | * up ls_wait_general to get an immediate response rather than waiting for the |
39 | * timeout. This uses a timeout so it can check periodically if the wait | 39 | * timer to detect the result. A timer wakes us up periodically while waiting |
40 | * should abort due to node failure (which doesn't cause a wake_up). | 40 | * to see if we should abort due to a node failure. This should only be called |
41 | * This should only be called by the dlm_recoverd thread. | 41 | * by the dlm_recoverd thread. |
42 | */ | 42 | */ |
43 | 43 | ||
44 | static void dlm_wait_timer_fn(unsigned long data) | ||
45 | { | ||
46 | struct dlm_ls *ls = (struct dlm_ls *) data; | ||
47 | mod_timer(&ls->ls_timer, jiffies + (dlm_config.ci_recover_timer * HZ)); | ||
48 | wake_up(&ls->ls_wait_general); | ||
49 | } | ||
50 | |||
44 | int dlm_wait_function(struct dlm_ls *ls, int (*testfn) (struct dlm_ls *ls)) | 51 | int dlm_wait_function(struct dlm_ls *ls, int (*testfn) (struct dlm_ls *ls)) |
45 | { | 52 | { |
46 | int error = 0; | 53 | int error = 0; |
47 | int rv; | ||
48 | 54 | ||
49 | while (1) { | 55 | init_timer(&ls->ls_timer); |
50 | rv = wait_event_timeout(ls->ls_wait_general, | 56 | ls->ls_timer.function = dlm_wait_timer_fn; |
51 | testfn(ls) || dlm_recovery_stopped(ls), | 57 | ls->ls_timer.data = (long) ls; |
52 | dlm_config.ci_recover_timer * HZ); | 58 | ls->ls_timer.expires = jiffies + (dlm_config.ci_recover_timer * HZ); |
53 | if (rv) | 59 | add_timer(&ls->ls_timer); |
54 | break; | 60 | |
55 | } | 61 | wait_event(ls->ls_wait_general, testfn(ls) || dlm_recovery_stopped(ls)); |
62 | del_timer_sync(&ls->ls_timer); | ||
56 | 63 | ||
57 | if (dlm_recovery_stopped(ls)) { | 64 | if (dlm_recovery_stopped(ls)) { |
58 | log_debug(ls, "dlm_wait_function aborted"); | 65 | log_debug(ls, "dlm_wait_function aborted"); |
@@ -78,20 +85,14 @@ uint32_t dlm_recover_status(struct dlm_ls *ls) | |||
78 | return status; | 85 | return status; |
79 | } | 86 | } |
80 | 87 | ||
81 | static void _set_recover_status(struct dlm_ls *ls, uint32_t status) | ||
82 | { | ||
83 | ls->ls_recover_status |= status; | ||
84 | } | ||
85 | |||
86 | void dlm_set_recover_status(struct dlm_ls *ls, uint32_t status) | 88 | void dlm_set_recover_status(struct dlm_ls *ls, uint32_t status) |
87 | { | 89 | { |
88 | spin_lock(&ls->ls_recover_lock); | 90 | spin_lock(&ls->ls_recover_lock); |
89 | _set_recover_status(ls, status); | 91 | ls->ls_recover_status |= status; |
90 | spin_unlock(&ls->ls_recover_lock); | 92 | spin_unlock(&ls->ls_recover_lock); |
91 | } | 93 | } |
92 | 94 | ||
93 | static int wait_status_all(struct dlm_ls *ls, uint32_t wait_status, | 95 | static int wait_status_all(struct dlm_ls *ls, uint32_t wait_status) |
94 | int save_slots) | ||
95 | { | 96 | { |
96 | struct dlm_rcom *rc = ls->ls_recover_buf; | 97 | struct dlm_rcom *rc = ls->ls_recover_buf; |
97 | struct dlm_member *memb; | 98 | struct dlm_member *memb; |
@@ -105,13 +106,10 @@ static int wait_status_all(struct dlm_ls *ls, uint32_t wait_status, | |||
105 | goto out; | 106 | goto out; |
106 | } | 107 | } |
107 | 108 | ||
108 | error = dlm_rcom_status(ls, memb->nodeid, 0); | 109 | error = dlm_rcom_status(ls, memb->nodeid); |
109 | if (error) | 110 | if (error) |
110 | goto out; | 111 | goto out; |
111 | 112 | ||
112 | if (save_slots) | ||
113 | dlm_slot_save(ls, rc, memb); | ||
114 | |||
115 | if (rc->rc_result & wait_status) | 113 | if (rc->rc_result & wait_status) |
116 | break; | 114 | break; |
117 | if (delay < 1000) | 115 | if (delay < 1000) |
@@ -123,8 +121,7 @@ static int wait_status_all(struct dlm_ls *ls, uint32_t wait_status, | |||
123 | return error; | 121 | return error; |
124 | } | 122 | } |
125 | 123 | ||
126 | static int wait_status_low(struct dlm_ls *ls, uint32_t wait_status, | 124 | static int wait_status_low(struct dlm_ls *ls, uint32_t wait_status) |
127 | uint32_t status_flags) | ||
128 | { | 125 | { |
129 | struct dlm_rcom *rc = ls->ls_recover_buf; | 126 | struct dlm_rcom *rc = ls->ls_recover_buf; |
130 | int error = 0, delay = 0, nodeid = ls->ls_low_nodeid; | 127 | int error = 0, delay = 0, nodeid = ls->ls_low_nodeid; |
@@ -135,7 +132,7 @@ static int wait_status_low(struct dlm_ls *ls, uint32_t wait_status, | |||
135 | goto out; | 132 | goto out; |
136 | } | 133 | } |
137 | 134 | ||
138 | error = dlm_rcom_status(ls, nodeid, status_flags); | 135 | error = dlm_rcom_status(ls, nodeid); |
139 | if (error) | 136 | if (error) |
140 | break; | 137 | break; |
141 | 138 | ||
@@ -155,56 +152,18 @@ static int wait_status(struct dlm_ls *ls, uint32_t status) | |||
155 | int error; | 152 | int error; |
156 | 153 | ||
157 | if (ls->ls_low_nodeid == dlm_our_nodeid()) { | 154 | if (ls->ls_low_nodeid == dlm_our_nodeid()) { |
158 | error = wait_status_all(ls, status, 0); | 155 | error = wait_status_all(ls, status); |
159 | if (!error) | 156 | if (!error) |
160 | dlm_set_recover_status(ls, status_all); | 157 | dlm_set_recover_status(ls, status_all); |
161 | } else | 158 | } else |
162 | error = wait_status_low(ls, status_all, 0); | 159 | error = wait_status_low(ls, status_all); |
163 | 160 | ||
164 | return error; | 161 | return error; |
165 | } | 162 | } |
166 | 163 | ||
167 | int dlm_recover_members_wait(struct dlm_ls *ls) | 164 | int dlm_recover_members_wait(struct dlm_ls *ls) |
168 | { | 165 | { |
169 | struct dlm_member *memb; | 166 | return wait_status(ls, DLM_RS_NODES); |
170 | struct dlm_slot *slots; | ||
171 | int num_slots, slots_size; | ||
172 | int error, rv; | ||
173 | uint32_t gen; | ||
174 | |||
175 | list_for_each_entry(memb, &ls->ls_nodes, list) { | ||
176 | memb->slot = -1; | ||
177 | memb->generation = 0; | ||
178 | } | ||
179 | |||
180 | if (ls->ls_low_nodeid == dlm_our_nodeid()) { | ||
181 | error = wait_status_all(ls, DLM_RS_NODES, 1); | ||
182 | if (error) | ||
183 | goto out; | ||
184 | |||
185 | /* slots array is sparse, slots_size may be > num_slots */ | ||
186 | |||
187 | rv = dlm_slots_assign(ls, &num_slots, &slots_size, &slots, &gen); | ||
188 | if (!rv) { | ||
189 | spin_lock(&ls->ls_recover_lock); | ||
190 | _set_recover_status(ls, DLM_RS_NODES_ALL); | ||
191 | ls->ls_num_slots = num_slots; | ||
192 | ls->ls_slots_size = slots_size; | ||
193 | ls->ls_slots = slots; | ||
194 | ls->ls_generation = gen; | ||
195 | spin_unlock(&ls->ls_recover_lock); | ||
196 | } else { | ||
197 | dlm_set_recover_status(ls, DLM_RS_NODES_ALL); | ||
198 | } | ||
199 | } else { | ||
200 | error = wait_status_low(ls, DLM_RS_NODES_ALL, DLM_RSF_NEED_SLOTS); | ||
201 | if (error) | ||
202 | goto out; | ||
203 | |||
204 | dlm_slots_copy_in(ls); | ||
205 | } | ||
206 | out: | ||
207 | return error; | ||
208 | } | 167 | } |
209 | 168 | ||
210 | int dlm_recover_directory_wait(struct dlm_ls *ls) | 169 | int dlm_recover_directory_wait(struct dlm_ls *ls) |
@@ -270,6 +229,22 @@ static void recover_list_del(struct dlm_rsb *r) | |||
270 | dlm_put_rsb(r); | 229 | dlm_put_rsb(r); |
271 | } | 230 | } |
272 | 231 | ||
232 | static struct dlm_rsb *recover_list_find(struct dlm_ls *ls, uint64_t id) | ||
233 | { | ||
234 | struct dlm_rsb *r = NULL; | ||
235 | |||
236 | spin_lock(&ls->ls_recover_list_lock); | ||
237 | |||
238 | list_for_each_entry(r, &ls->ls_recover_list, res_recover_list) { | ||
239 | if (id == (unsigned long) r) | ||
240 | goto out; | ||
241 | } | ||
242 | r = NULL; | ||
243 | out: | ||
244 | spin_unlock(&ls->ls_recover_list_lock); | ||
245 | return r; | ||
246 | } | ||
247 | |||
273 | static void recover_list_clear(struct dlm_ls *ls) | 248 | static void recover_list_clear(struct dlm_ls *ls) |
274 | { | 249 | { |
275 | struct dlm_rsb *r, *s; | 250 | struct dlm_rsb *r, *s; |
@@ -290,94 +265,6 @@ static void recover_list_clear(struct dlm_ls *ls) | |||
290 | spin_unlock(&ls->ls_recover_list_lock); | 265 | spin_unlock(&ls->ls_recover_list_lock); |
291 | } | 266 | } |
292 | 267 | ||
293 | static int recover_idr_empty(struct dlm_ls *ls) | ||
294 | { | ||
295 | int empty = 1; | ||
296 | |||
297 | spin_lock(&ls->ls_recover_idr_lock); | ||
298 | if (ls->ls_recover_list_count) | ||
299 | empty = 0; | ||
300 | spin_unlock(&ls->ls_recover_idr_lock); | ||
301 | |||
302 | return empty; | ||
303 | } | ||
304 | |||
305 | static int recover_idr_add(struct dlm_rsb *r) | ||
306 | { | ||
307 | struct dlm_ls *ls = r->res_ls; | ||
308 | int rv, id; | ||
309 | |||
310 | rv = idr_pre_get(&ls->ls_recover_idr, GFP_NOFS); | ||
311 | if (!rv) | ||
312 | return -ENOMEM; | ||
313 | |||
314 | spin_lock(&ls->ls_recover_idr_lock); | ||
315 | if (r->res_id) { | ||
316 | spin_unlock(&ls->ls_recover_idr_lock); | ||
317 | return -1; | ||
318 | } | ||
319 | rv = idr_get_new_above(&ls->ls_recover_idr, r, 1, &id); | ||
320 | if (rv) { | ||
321 | spin_unlock(&ls->ls_recover_idr_lock); | ||
322 | return rv; | ||
323 | } | ||
324 | r->res_id = id; | ||
325 | ls->ls_recover_list_count++; | ||
326 | dlm_hold_rsb(r); | ||
327 | spin_unlock(&ls->ls_recover_idr_lock); | ||
328 | return 0; | ||
329 | } | ||
330 | |||
331 | static void recover_idr_del(struct dlm_rsb *r) | ||
332 | { | ||
333 | struct dlm_ls *ls = r->res_ls; | ||
334 | |||
335 | spin_lock(&ls->ls_recover_idr_lock); | ||
336 | idr_remove(&ls->ls_recover_idr, r->res_id); | ||
337 | r->res_id = 0; | ||
338 | ls->ls_recover_list_count--; | ||
339 | spin_unlock(&ls->ls_recover_idr_lock); | ||
340 | |||
341 | dlm_put_rsb(r); | ||
342 | } | ||
343 | |||
344 | static struct dlm_rsb *recover_idr_find(struct dlm_ls *ls, uint64_t id) | ||
345 | { | ||
346 | struct dlm_rsb *r; | ||
347 | |||
348 | spin_lock(&ls->ls_recover_idr_lock); | ||
349 | r = idr_find(&ls->ls_recover_idr, (int)id); | ||
350 | spin_unlock(&ls->ls_recover_idr_lock); | ||
351 | return r; | ||
352 | } | ||
353 | |||
354 | static int recover_idr_clear_rsb(int id, void *p, void *data) | ||
355 | { | ||
356 | struct dlm_ls *ls = data; | ||
357 | struct dlm_rsb *r = p; | ||
358 | |||
359 | r->res_id = 0; | ||
360 | r->res_recover_locks_count = 0; | ||
361 | ls->ls_recover_list_count--; | ||
362 | |||
363 | dlm_put_rsb(r); | ||
364 | return 0; | ||
365 | } | ||
366 | |||
367 | static void recover_idr_clear(struct dlm_ls *ls) | ||
368 | { | ||
369 | spin_lock(&ls->ls_recover_idr_lock); | ||
370 | idr_for_each(&ls->ls_recover_idr, recover_idr_clear_rsb, ls); | ||
371 | idr_remove_all(&ls->ls_recover_idr); | ||
372 | |||
373 | if (ls->ls_recover_list_count != 0) { | ||
374 | log_error(ls, "warning: recover_list_count %d", | ||
375 | ls->ls_recover_list_count); | ||
376 | ls->ls_recover_list_count = 0; | ||
377 | } | ||
378 | spin_unlock(&ls->ls_recover_idr_lock); | ||
379 | } | ||
380 | |||
381 | 268 | ||
382 | /* Master recovery: find new master node for rsb's that were | 269 | /* Master recovery: find new master node for rsb's that were |
383 | mastered on nodes that have been removed. | 270 | mastered on nodes that have been removed. |
@@ -404,12 +291,9 @@ static void set_lock_master(struct list_head *queue, int nodeid) | |||
404 | { | 291 | { |
405 | struct dlm_lkb *lkb; | 292 | struct dlm_lkb *lkb; |
406 | 293 | ||
407 | list_for_each_entry(lkb, queue, lkb_statequeue) { | 294 | list_for_each_entry(lkb, queue, lkb_statequeue) |
408 | if (!(lkb->lkb_flags & DLM_IFL_MSTCPY)) { | 295 | if (!(lkb->lkb_flags & DLM_IFL_MSTCPY)) |
409 | lkb->lkb_nodeid = nodeid; | 296 | lkb->lkb_nodeid = nodeid; |
410 | lkb->lkb_remid = 0; | ||
411 | } | ||
412 | } | ||
413 | } | 297 | } |
414 | 298 | ||
415 | static void set_master_lkbs(struct dlm_rsb *r) | 299 | static void set_master_lkbs(struct dlm_rsb *r) |
@@ -422,93 +306,67 @@ static void set_master_lkbs(struct dlm_rsb *r) | |||
422 | /* | 306 | /* |
423 | * Propagate the new master nodeid to locks | 307 | * Propagate the new master nodeid to locks |
424 | * The NEW_MASTER flag tells dlm_recover_locks() which rsb's to consider. | 308 | * The NEW_MASTER flag tells dlm_recover_locks() which rsb's to consider. |
425 | * The NEW_MASTER2 flag tells recover_lvb() and recover_grant() which | 309 | * The NEW_MASTER2 flag tells recover_lvb() and set_locks_purged() which |
426 | * rsb's to consider. | 310 | * rsb's to consider. |
427 | */ | 311 | */ |
428 | 312 | ||
429 | static void set_new_master(struct dlm_rsb *r) | 313 | static void set_new_master(struct dlm_rsb *r, int nodeid) |
430 | { | 314 | { |
315 | lock_rsb(r); | ||
316 | r->res_nodeid = nodeid; | ||
431 | set_master_lkbs(r); | 317 | set_master_lkbs(r); |
432 | rsb_set_flag(r, RSB_NEW_MASTER); | 318 | rsb_set_flag(r, RSB_NEW_MASTER); |
433 | rsb_set_flag(r, RSB_NEW_MASTER2); | 319 | rsb_set_flag(r, RSB_NEW_MASTER2); |
320 | unlock_rsb(r); | ||
434 | } | 321 | } |
435 | 322 | ||
436 | /* | 323 | /* |
437 | * We do async lookups on rsb's that need new masters. The rsb's | 324 | * We do async lookups on rsb's that need new masters. The rsb's |
438 | * waiting for a lookup reply are kept on the recover_list. | 325 | * waiting for a lookup reply are kept on the recover_list. |
439 | * | ||
440 | * Another node recovering the master may have sent us a rcom lookup, | ||
441 | * and our dlm_master_lookup() set it as the new master, along with | ||
442 | * NEW_MASTER so that we'll recover it here (this implies dir_nodeid | ||
443 | * equals our_nodeid below). | ||
444 | */ | 326 | */ |
445 | 327 | ||
446 | static int recover_master(struct dlm_rsb *r, unsigned int *count) | 328 | static int recover_master(struct dlm_rsb *r) |
447 | { | 329 | { |
448 | struct dlm_ls *ls = r->res_ls; | 330 | struct dlm_ls *ls = r->res_ls; |
449 | int our_nodeid, dir_nodeid; | 331 | int error, dir_nodeid, ret_nodeid, our_nodeid = dlm_our_nodeid(); |
450 | int is_removed = 0; | ||
451 | int error; | ||
452 | |||
453 | if (is_master(r)) | ||
454 | return 0; | ||
455 | |||
456 | is_removed = dlm_is_removed(ls, r->res_nodeid); | ||
457 | 332 | ||
458 | if (!is_removed && !rsb_flag(r, RSB_NEW_MASTER)) | ||
459 | return 0; | ||
460 | |||
461 | our_nodeid = dlm_our_nodeid(); | ||
462 | dir_nodeid = dlm_dir_nodeid(r); | 333 | dir_nodeid = dlm_dir_nodeid(r); |
463 | 334 | ||
464 | if (dir_nodeid == our_nodeid) { | 335 | if (dir_nodeid == our_nodeid) { |
465 | if (is_removed) { | 336 | error = dlm_dir_lookup(ls, our_nodeid, r->res_name, |
466 | r->res_master_nodeid = our_nodeid; | 337 | r->res_length, &ret_nodeid); |
467 | r->res_nodeid = 0; | 338 | if (error) |
468 | } | 339 | log_error(ls, "recover dir lookup error %d", error); |
469 | 340 | ||
470 | /* set master of lkbs to ourself when is_removed, or to | 341 | if (ret_nodeid == our_nodeid) |
471 | another new master which we set along with NEW_MASTER | 342 | ret_nodeid = 0; |
472 | in dlm_master_lookup */ | 343 | set_new_master(r, ret_nodeid); |
473 | set_new_master(r); | ||
474 | error = 0; | ||
475 | } else { | 344 | } else { |
476 | recover_idr_add(r); | 345 | recover_list_add(r); |
477 | error = dlm_send_rcom_lookup(r, dir_nodeid); | 346 | error = dlm_send_rcom_lookup(r, dir_nodeid); |
478 | } | 347 | } |
479 | 348 | ||
480 | (*count)++; | ||
481 | return error; | 349 | return error; |
482 | } | 350 | } |
483 | 351 | ||
484 | /* | 352 | /* |
485 | * All MSTCPY locks are purged and rebuilt, even if the master stayed the same. | 353 | * When not using a directory, most resource names will hash to a new static |
486 | * This is necessary because recovery can be started, aborted and restarted, | 354 | * master nodeid and the resource will need to be remastered. |
487 | * causing the master nodeid to briefly change during the aborted recovery, and | ||
488 | * change back to the original value in the second recovery. The MSTCPY locks | ||
489 | * may or may not have been purged during the aborted recovery. Another node | ||
490 | * with an outstanding request in waiters list and a request reply saved in the | ||
491 | * requestqueue, cannot know whether it should ignore the reply and resend the | ||
492 | * request, or accept the reply and complete the request. It must do the | ||
493 | * former if the remote node purged MSTCPY locks, and it must do the later if | ||
494 | * the remote node did not. This is solved by always purging MSTCPY locks, in | ||
495 | * which case, the request reply would always be ignored and the request | ||
496 | * resent. | ||
497 | */ | 355 | */ |
498 | 356 | ||
499 | static int recover_master_static(struct dlm_rsb *r, unsigned int *count) | 357 | static int recover_master_static(struct dlm_rsb *r) |
500 | { | 358 | { |
501 | int dir_nodeid = dlm_dir_nodeid(r); | 359 | int master = dlm_dir_nodeid(r); |
502 | int new_master = dir_nodeid; | ||
503 | 360 | ||
504 | if (dir_nodeid == dlm_our_nodeid()) | 361 | if (master == dlm_our_nodeid()) |
505 | new_master = 0; | 362 | master = 0; |
506 | 363 | ||
507 | dlm_purge_mstcpy_locks(r); | 364 | if (r->res_nodeid != master) { |
508 | r->res_master_nodeid = dir_nodeid; | 365 | if (is_master(r)) |
509 | r->res_nodeid = new_master; | 366 | dlm_purge_mstcpy_locks(r); |
510 | set_new_master(r); | 367 | set_new_master(r, master); |
511 | (*count)++; | 368 | return 1; |
369 | } | ||
512 | return 0; | 370 | return 0; |
513 | } | 371 | } |
514 | 372 | ||
@@ -525,10 +383,7 @@ static int recover_master_static(struct dlm_rsb *r, unsigned int *count) | |||
525 | int dlm_recover_masters(struct dlm_ls *ls) | 383 | int dlm_recover_masters(struct dlm_ls *ls) |
526 | { | 384 | { |
527 | struct dlm_rsb *r; | 385 | struct dlm_rsb *r; |
528 | unsigned int total = 0; | 386 | int error = 0, count = 0; |
529 | unsigned int count = 0; | ||
530 | int nodir = dlm_no_directory(ls); | ||
531 | int error; | ||
532 | 387 | ||
533 | log_debug(ls, "dlm_recover_masters"); | 388 | log_debug(ls, "dlm_recover_masters"); |
534 | 389 | ||
@@ -540,58 +395,48 @@ int dlm_recover_masters(struct dlm_ls *ls) | |||
540 | goto out; | 395 | goto out; |
541 | } | 396 | } |
542 | 397 | ||
543 | lock_rsb(r); | 398 | if (dlm_no_directory(ls)) |
544 | if (nodir) | 399 | count += recover_master_static(r); |
545 | error = recover_master_static(r, &count); | 400 | else if (!is_master(r) && |
546 | else | 401 | (dlm_is_removed(ls, r->res_nodeid) || |
547 | error = recover_master(r, &count); | 402 | rsb_flag(r, RSB_NEW_MASTER))) { |
548 | unlock_rsb(r); | 403 | recover_master(r); |
549 | cond_resched(); | 404 | count++; |
550 | total++; | ||
551 | |||
552 | if (error) { | ||
553 | up_read(&ls->ls_root_sem); | ||
554 | goto out; | ||
555 | } | 405 | } |
406 | |||
407 | schedule(); | ||
556 | } | 408 | } |
557 | up_read(&ls->ls_root_sem); | 409 | up_read(&ls->ls_root_sem); |
558 | 410 | ||
559 | log_debug(ls, "dlm_recover_masters %u of %u", count, total); | 411 | log_debug(ls, "dlm_recover_masters %d resources", count); |
560 | 412 | ||
561 | error = dlm_wait_function(ls, &recover_idr_empty); | 413 | error = dlm_wait_function(ls, &recover_list_empty); |
562 | out: | 414 | out: |
563 | if (error) | 415 | if (error) |
564 | recover_idr_clear(ls); | 416 | recover_list_clear(ls); |
565 | return error; | 417 | return error; |
566 | } | 418 | } |
567 | 419 | ||
568 | int dlm_recover_master_reply(struct dlm_ls *ls, struct dlm_rcom *rc) | 420 | int dlm_recover_master_reply(struct dlm_ls *ls, struct dlm_rcom *rc) |
569 | { | 421 | { |
570 | struct dlm_rsb *r; | 422 | struct dlm_rsb *r; |
571 | int ret_nodeid, new_master; | 423 | int nodeid; |
572 | 424 | ||
573 | r = recover_idr_find(ls, rc->rc_id); | 425 | r = recover_list_find(ls, rc->rc_id); |
574 | if (!r) { | 426 | if (!r) { |
575 | log_error(ls, "dlm_recover_master_reply no id %llx", | 427 | log_error(ls, "dlm_recover_master_reply no id %llx", |
576 | (unsigned long long)rc->rc_id); | 428 | (unsigned long long)rc->rc_id); |
577 | goto out; | 429 | goto out; |
578 | } | 430 | } |
579 | 431 | ||
580 | ret_nodeid = rc->rc_result; | 432 | nodeid = rc->rc_result; |
581 | 433 | if (nodeid == dlm_our_nodeid()) | |
582 | if (ret_nodeid == dlm_our_nodeid()) | 434 | nodeid = 0; |
583 | new_master = 0; | ||
584 | else | ||
585 | new_master = ret_nodeid; | ||
586 | 435 | ||
587 | lock_rsb(r); | 436 | set_new_master(r, nodeid); |
588 | r->res_master_nodeid = ret_nodeid; | 437 | recover_list_del(r); |
589 | r->res_nodeid = new_master; | ||
590 | set_new_master(r); | ||
591 | unlock_rsb(r); | ||
592 | recover_idr_del(r); | ||
593 | 438 | ||
594 | if (recover_idr_empty(ls)) | 439 | if (recover_list_empty(ls)) |
595 | wake_up(&ls->ls_wait_general); | 440 | wake_up(&ls->ls_wait_general); |
596 | out: | 441 | out: |
597 | return 0; | 442 | return 0; |
@@ -663,6 +508,8 @@ int dlm_recover_locks(struct dlm_ls *ls) | |||
663 | struct dlm_rsb *r; | 508 | struct dlm_rsb *r; |
664 | int error, count = 0; | 509 | int error, count = 0; |
665 | 510 | ||
511 | log_debug(ls, "dlm_recover_locks"); | ||
512 | |||
666 | down_read(&ls->ls_root_sem); | 513 | down_read(&ls->ls_root_sem); |
667 | list_for_each_entry(r, &ls->ls_root_list, res_root_list) { | 514 | list_for_each_entry(r, &ls->ls_root_list, res_root_list) { |
668 | if (is_master(r)) { | 515 | if (is_master(r)) { |
@@ -689,12 +536,14 @@ int dlm_recover_locks(struct dlm_ls *ls) | |||
689 | } | 536 | } |
690 | up_read(&ls->ls_root_sem); | 537 | up_read(&ls->ls_root_sem); |
691 | 538 | ||
692 | log_debug(ls, "dlm_recover_locks %d out", count); | 539 | log_debug(ls, "dlm_recover_locks %d locks", count); |
693 | 540 | ||
694 | error = dlm_wait_function(ls, &recover_list_empty); | 541 | error = dlm_wait_function(ls, &recover_list_empty); |
695 | out: | 542 | out: |
696 | if (error) | 543 | if (error) |
697 | recover_list_clear(ls); | 544 | recover_list_clear(ls); |
545 | else | ||
546 | dlm_set_recover_status(ls, DLM_RS_LOCKS); | ||
698 | return error; | 547 | return error; |
699 | } | 548 | } |
700 | 549 | ||
@@ -717,14 +566,8 @@ void dlm_recovered_lock(struct dlm_rsb *r) | |||
717 | * the VALNOTVALID flag if necessary, and determining the correct lvb contents | 566 | * the VALNOTVALID flag if necessary, and determining the correct lvb contents |
718 | * based on the lvb's of the locks held on the rsb. | 567 | * based on the lvb's of the locks held on the rsb. |
719 | * | 568 | * |
720 | * RSB_VALNOTVALID is set in two cases: | 569 | * RSB_VALNOTVALID is set if there are only NL/CR locks on the rsb. If it |
721 | * | 570 | * was already set prior to recovery, it's not cleared, regardless of locks. |
722 | * 1. we are master, but not new, and we purged an EX/PW lock held by a | ||
723 | * failed node (in dlm_recover_purge which set RSB_RECOVER_LVB_INVAL) | ||
724 | * | ||
725 | * 2. we are a new master, and there are only NL/CR locks left. | ||
726 | * (We could probably improve this by only invaliding in this way when | ||
727 | * the previous master left uncleanly. VMS docs mention that.) | ||
728 | * | 571 | * |
729 | * The LVB contents are only considered for changing when this is a new master | 572 | * The LVB contents are only considered for changing when this is a new master |
730 | * of the rsb (NEW_MASTER2). Then, the rsb's lvb is taken from any lkb with | 573 | * of the rsb (NEW_MASTER2). Then, the rsb's lvb is taken from any lkb with |
@@ -740,19 +583,6 @@ static void recover_lvb(struct dlm_rsb *r) | |||
740 | int big_lock_exists = 0; | 583 | int big_lock_exists = 0; |
741 | int lvblen = r->res_ls->ls_lvblen; | 584 | int lvblen = r->res_ls->ls_lvblen; |
742 | 585 | ||
743 | if (!rsb_flag(r, RSB_NEW_MASTER2) && | ||
744 | rsb_flag(r, RSB_RECOVER_LVB_INVAL)) { | ||
745 | /* case 1 above */ | ||
746 | rsb_set_flag(r, RSB_VALNOTVALID); | ||
747 | return; | ||
748 | } | ||
749 | |||
750 | if (!rsb_flag(r, RSB_NEW_MASTER2)) | ||
751 | return; | ||
752 | |||
753 | /* we are the new master, so figure out if VALNOTVALID should | ||
754 | be set, and set the rsb lvb from the best lkb available. */ | ||
755 | |||
756 | list_for_each_entry(lkb, &r->res_grantqueue, lkb_statequeue) { | 586 | list_for_each_entry(lkb, &r->res_grantqueue, lkb_statequeue) { |
757 | if (!(lkb->lkb_exflags & DLM_LKF_VALBLK)) | 587 | if (!(lkb->lkb_exflags & DLM_LKF_VALBLK)) |
758 | continue; | 588 | continue; |
@@ -791,10 +621,13 @@ static void recover_lvb(struct dlm_rsb *r) | |||
791 | if (!lock_lvb_exists) | 621 | if (!lock_lvb_exists) |
792 | goto out; | 622 | goto out; |
793 | 623 | ||
794 | /* lvb is invalidated if only NL/CR locks remain */ | ||
795 | if (!big_lock_exists) | 624 | if (!big_lock_exists) |
796 | rsb_set_flag(r, RSB_VALNOTVALID); | 625 | rsb_set_flag(r, RSB_VALNOTVALID); |
797 | 626 | ||
627 | /* don't mess with the lvb unless we're the new master */ | ||
628 | if (!rsb_flag(r, RSB_NEW_MASTER2)) | ||
629 | goto out; | ||
630 | |||
798 | if (!r->res_lvbptr) { | 631 | if (!r->res_lvbptr) { |
799 | r->res_lvbptr = dlm_allocate_lvb(r->res_ls); | 632 | r->res_lvbptr = dlm_allocate_lvb(r->res_ls); |
800 | if (!r->res_lvbptr) | 633 | if (!r->res_lvbptr) |
@@ -820,7 +653,6 @@ static void recover_lvb(struct dlm_rsb *r) | |||
820 | 653 | ||
821 | static void recover_conversion(struct dlm_rsb *r) | 654 | static void recover_conversion(struct dlm_rsb *r) |
822 | { | 655 | { |
823 | struct dlm_ls *ls = r->res_ls; | ||
824 | struct dlm_lkb *lkb; | 656 | struct dlm_lkb *lkb; |
825 | int grmode = -1; | 657 | int grmode = -1; |
826 | 658 | ||
@@ -835,32 +667,29 @@ static void recover_conversion(struct dlm_rsb *r) | |||
835 | list_for_each_entry(lkb, &r->res_convertqueue, lkb_statequeue) { | 667 | list_for_each_entry(lkb, &r->res_convertqueue, lkb_statequeue) { |
836 | if (lkb->lkb_grmode != DLM_LOCK_IV) | 668 | if (lkb->lkb_grmode != DLM_LOCK_IV) |
837 | continue; | 669 | continue; |
838 | if (grmode == -1) { | 670 | if (grmode == -1) |
839 | log_debug(ls, "recover_conversion %x set gr to rq %d", | ||
840 | lkb->lkb_id, lkb->lkb_rqmode); | ||
841 | lkb->lkb_grmode = lkb->lkb_rqmode; | 671 | lkb->lkb_grmode = lkb->lkb_rqmode; |
842 | } else { | 672 | else |
843 | log_debug(ls, "recover_conversion %x set gr %d", | ||
844 | lkb->lkb_id, grmode); | ||
845 | lkb->lkb_grmode = grmode; | 673 | lkb->lkb_grmode = grmode; |
846 | } | ||
847 | } | 674 | } |
848 | } | 675 | } |
849 | 676 | ||
850 | /* We've become the new master for this rsb and waiting/converting locks may | 677 | /* We've become the new master for this rsb and waiting/converting locks may |
851 | need to be granted in dlm_recover_grant() due to locks that may have | 678 | need to be granted in dlm_grant_after_purge() due to locks that may have |
852 | existed from a removed node. */ | 679 | existed from a removed node. */ |
853 | 680 | ||
854 | static void recover_grant(struct dlm_rsb *r) | 681 | static void set_locks_purged(struct dlm_rsb *r) |
855 | { | 682 | { |
856 | if (!list_empty(&r->res_waitqueue) || !list_empty(&r->res_convertqueue)) | 683 | if (!list_empty(&r->res_waitqueue) || !list_empty(&r->res_convertqueue)) |
857 | rsb_set_flag(r, RSB_RECOVER_GRANT); | 684 | rsb_set_flag(r, RSB_LOCKS_PURGED); |
858 | } | 685 | } |
859 | 686 | ||
860 | void dlm_recover_rsbs(struct dlm_ls *ls) | 687 | void dlm_recover_rsbs(struct dlm_ls *ls) |
861 | { | 688 | { |
862 | struct dlm_rsb *r; | 689 | struct dlm_rsb *r; |
863 | unsigned int count = 0; | 690 | int count = 0; |
691 | |||
692 | log_debug(ls, "dlm_recover_rsbs"); | ||
864 | 693 | ||
865 | down_read(&ls->ls_root_sem); | 694 | down_read(&ls->ls_root_sem); |
866 | list_for_each_entry(r, &ls->ls_root_list, res_root_list) { | 695 | list_for_each_entry(r, &ls->ls_root_list, res_root_list) { |
@@ -868,33 +697,24 @@ void dlm_recover_rsbs(struct dlm_ls *ls) | |||
868 | if (is_master(r)) { | 697 | if (is_master(r)) { |
869 | if (rsb_flag(r, RSB_RECOVER_CONVERT)) | 698 | if (rsb_flag(r, RSB_RECOVER_CONVERT)) |
870 | recover_conversion(r); | 699 | recover_conversion(r); |
871 | |||
872 | /* recover lvb before granting locks so the updated | ||
873 | lvb/VALNOTVALID is presented in the completion */ | ||
874 | recover_lvb(r); | ||
875 | |||
876 | if (rsb_flag(r, RSB_NEW_MASTER2)) | 700 | if (rsb_flag(r, RSB_NEW_MASTER2)) |
877 | recover_grant(r); | 701 | set_locks_purged(r); |
702 | recover_lvb(r); | ||
878 | count++; | 703 | count++; |
879 | } else { | ||
880 | rsb_clear_flag(r, RSB_VALNOTVALID); | ||
881 | } | 704 | } |
882 | rsb_clear_flag(r, RSB_RECOVER_CONVERT); | 705 | rsb_clear_flag(r, RSB_RECOVER_CONVERT); |
883 | rsb_clear_flag(r, RSB_RECOVER_LVB_INVAL); | ||
884 | rsb_clear_flag(r, RSB_NEW_MASTER2); | 706 | rsb_clear_flag(r, RSB_NEW_MASTER2); |
885 | unlock_rsb(r); | 707 | unlock_rsb(r); |
886 | } | 708 | } |
887 | up_read(&ls->ls_root_sem); | 709 | up_read(&ls->ls_root_sem); |
888 | 710 | ||
889 | if (count) | 711 | log_debug(ls, "dlm_recover_rsbs %d rsbs", count); |
890 | log_debug(ls, "dlm_recover_rsbs %d done", count); | ||
891 | } | 712 | } |
892 | 713 | ||
893 | /* Create a single list of all root rsb's to be used during recovery */ | 714 | /* Create a single list of all root rsb's to be used during recovery */ |
894 | 715 | ||
895 | int dlm_create_root_list(struct dlm_ls *ls) | 716 | int dlm_create_root_list(struct dlm_ls *ls) |
896 | { | 717 | { |
897 | struct rb_node *n; | ||
898 | struct dlm_rsb *r; | 718 | struct dlm_rsb *r; |
899 | int i, error = 0; | 719 | int i, error = 0; |
900 | 720 | ||
@@ -907,14 +727,24 @@ int dlm_create_root_list(struct dlm_ls *ls) | |||
907 | 727 | ||
908 | for (i = 0; i < ls->ls_rsbtbl_size; i++) { | 728 | for (i = 0; i < ls->ls_rsbtbl_size; i++) { |
909 | spin_lock(&ls->ls_rsbtbl[i].lock); | 729 | spin_lock(&ls->ls_rsbtbl[i].lock); |
910 | for (n = rb_first(&ls->ls_rsbtbl[i].keep); n; n = rb_next(n)) { | 730 | list_for_each_entry(r, &ls->ls_rsbtbl[i].list, res_hashchain) { |
911 | r = rb_entry(n, struct dlm_rsb, res_hashnode); | ||
912 | list_add(&r->res_root_list, &ls->ls_root_list); | 731 | list_add(&r->res_root_list, &ls->ls_root_list); |
913 | dlm_hold_rsb(r); | 732 | dlm_hold_rsb(r); |
914 | } | 733 | } |
915 | 734 | ||
916 | if (!RB_EMPTY_ROOT(&ls->ls_rsbtbl[i].toss)) | 735 | /* If we're using a directory, add tossed rsbs to the root |
917 | log_error(ls, "dlm_create_root_list toss not empty"); | 736 | list; they'll have entries created in the new directory, |
737 | but no other recovery steps should do anything with them. */ | ||
738 | |||
739 | if (dlm_no_directory(ls)) { | ||
740 | spin_unlock(&ls->ls_rsbtbl[i].lock); | ||
741 | continue; | ||
742 | } | ||
743 | |||
744 | list_for_each_entry(r, &ls->ls_rsbtbl[i].toss, res_hashchain) { | ||
745 | list_add(&r->res_root_list, &ls->ls_root_list); | ||
746 | dlm_hold_rsb(r); | ||
747 | } | ||
918 | spin_unlock(&ls->ls_rsbtbl[i].lock); | 748 | spin_unlock(&ls->ls_rsbtbl[i].lock); |
919 | } | 749 | } |
920 | out: | 750 | out: |
@@ -934,26 +764,26 @@ void dlm_release_root_list(struct dlm_ls *ls) | |||
934 | up_write(&ls->ls_root_sem); | 764 | up_write(&ls->ls_root_sem); |
935 | } | 765 | } |
936 | 766 | ||
937 | void dlm_clear_toss(struct dlm_ls *ls) | 767 | /* If not using a directory, clear the entire toss list, there's no benefit to |
768 | caching the master value since it's fixed. If we are using a dir, keep the | ||
769 | rsb's we're the master of. Recovery will add them to the root list and from | ||
770 | there they'll be entered in the rebuilt directory. */ | ||
771 | |||
772 | void dlm_clear_toss_list(struct dlm_ls *ls) | ||
938 | { | 773 | { |
939 | struct rb_node *n, *next; | 774 | struct dlm_rsb *r, *safe; |
940 | struct dlm_rsb *r; | ||
941 | unsigned int count = 0; | ||
942 | int i; | 775 | int i; |
943 | 776 | ||
944 | for (i = 0; i < ls->ls_rsbtbl_size; i++) { | 777 | for (i = 0; i < ls->ls_rsbtbl_size; i++) { |
945 | spin_lock(&ls->ls_rsbtbl[i].lock); | 778 | spin_lock(&ls->ls_rsbtbl[i].lock); |
946 | for (n = rb_first(&ls->ls_rsbtbl[i].toss); n; n = next) { | 779 | list_for_each_entry_safe(r, safe, &ls->ls_rsbtbl[i].toss, |
947 | next = rb_next(n); | 780 | res_hashchain) { |
948 | r = rb_entry(n, struct dlm_rsb, res_hashnode); | 781 | if (dlm_no_directory(ls) || !is_master(r)) { |
949 | rb_erase(n, &ls->ls_rsbtbl[i].toss); | 782 | list_del(&r->res_hashchain); |
950 | dlm_free_rsb(r); | 783 | dlm_free_rsb(r); |
951 | count++; | 784 | } |
952 | } | 785 | } |
953 | spin_unlock(&ls->ls_rsbtbl[i].lock); | 786 | spin_unlock(&ls->ls_rsbtbl[i].lock); |
954 | } | 787 | } |
955 | |||
956 | if (count) | ||
957 | log_debug(ls, "dlm_clear_toss %u done", count); | ||
958 | } | 788 | } |
959 | 789 | ||
diff --git a/fs/dlm/recover.h b/fs/dlm/recover.h index d8c8738c70e..ebd0363f1e0 100644 --- a/fs/dlm/recover.h +++ b/fs/dlm/recover.h | |||
@@ -27,7 +27,7 @@ int dlm_recover_locks(struct dlm_ls *ls); | |||
27 | void dlm_recovered_lock(struct dlm_rsb *r); | 27 | void dlm_recovered_lock(struct dlm_rsb *r); |
28 | int dlm_create_root_list(struct dlm_ls *ls); | 28 | int dlm_create_root_list(struct dlm_ls *ls); |
29 | void dlm_release_root_list(struct dlm_ls *ls); | 29 | void dlm_release_root_list(struct dlm_ls *ls); |
30 | void dlm_clear_toss(struct dlm_ls *ls); | 30 | void dlm_clear_toss_list(struct dlm_ls *ls); |
31 | void dlm_recover_rsbs(struct dlm_ls *ls); | 31 | void dlm_recover_rsbs(struct dlm_ls *ls); |
32 | 32 | ||
33 | #endif /* __RECOVER_DOT_H__ */ | 33 | #endif /* __RECOVER_DOT_H__ */ |
diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c index 32f9f8926ec..774da3cf92c 100644 --- a/fs/dlm/recoverd.c +++ b/fs/dlm/recoverd.c | |||
@@ -2,7 +2,7 @@ | |||
2 | ******************************************************************************* | 2 | ******************************************************************************* |
3 | ** | 3 | ** |
4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | 4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. |
5 | ** Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. | 5 | ** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. |
6 | ** | 6 | ** |
7 | ** This copyrighted material is made available to anyone wishing to use, | 7 | ** This copyrighted material is made available to anyone wishing to use, |
8 | ** modify, copy, or redistribute it subject to the terms and conditions | 8 | ** modify, copy, or redistribute it subject to the terms and conditions |
@@ -41,7 +41,6 @@ static int enable_locking(struct dlm_ls *ls, uint64_t seq) | |||
41 | set_bit(LSFL_RUNNING, &ls->ls_flags); | 41 | set_bit(LSFL_RUNNING, &ls->ls_flags); |
42 | /* unblocks processes waiting to enter the dlm */ | 42 | /* unblocks processes waiting to enter the dlm */ |
43 | up_write(&ls->ls_in_recovery); | 43 | up_write(&ls->ls_in_recovery); |
44 | clear_bit(LSFL_RECOVER_LOCK, &ls->ls_flags); | ||
45 | error = 0; | 44 | error = 0; |
46 | } | 45 | } |
47 | spin_unlock(&ls->ls_recover_lock); | 46 | spin_unlock(&ls->ls_recover_lock); |
@@ -55,13 +54,18 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) | |||
55 | unsigned long start; | 54 | unsigned long start; |
56 | int error, neg = 0; | 55 | int error, neg = 0; |
57 | 56 | ||
58 | log_debug(ls, "dlm_recover %llu", (unsigned long long)rv->seq); | 57 | log_debug(ls, "recover %llx", (unsigned long long)rv->seq); |
59 | 58 | ||
60 | mutex_lock(&ls->ls_recoverd_active); | 59 | mutex_lock(&ls->ls_recoverd_active); |
61 | 60 | ||
62 | dlm_callback_suspend(ls); | 61 | dlm_callback_suspend(ls); |
63 | 62 | ||
64 | dlm_clear_toss(ls); | 63 | /* |
64 | * Free non-master tossed rsb's. Master rsb's are kept on toss | ||
65 | * list and put on root list to be included in resdir recovery. | ||
66 | */ | ||
67 | |||
68 | dlm_clear_toss_list(ls); | ||
65 | 69 | ||
66 | /* | 70 | /* |
67 | * This list of root rsb's will be the basis of most of the recovery | 71 | * This list of root rsb's will be the basis of most of the recovery |
@@ -72,28 +76,14 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) | |||
72 | 76 | ||
73 | /* | 77 | /* |
74 | * Add or remove nodes from the lockspace's ls_nodes list. | 78 | * Add or remove nodes from the lockspace's ls_nodes list. |
79 | * Also waits for all nodes to complete dlm_recover_members. | ||
75 | */ | 80 | */ |
76 | 81 | ||
77 | error = dlm_recover_members(ls, rv, &neg); | 82 | error = dlm_recover_members(ls, rv, &neg); |
78 | if (error) { | 83 | if (error) { |
79 | log_debug(ls, "dlm_recover_members error %d", error); | 84 | log_debug(ls, "recover_members failed %d", error); |
80 | goto fail; | 85 | goto fail; |
81 | } | 86 | } |
82 | |||
83 | dlm_recover_dir_nodeid(ls); | ||
84 | |||
85 | ls->ls_recover_dir_sent_res = 0; | ||
86 | ls->ls_recover_dir_sent_msg = 0; | ||
87 | ls->ls_recover_locks_in = 0; | ||
88 | |||
89 | dlm_set_recover_status(ls, DLM_RS_NODES); | ||
90 | |||
91 | error = dlm_recover_members_wait(ls); | ||
92 | if (error) { | ||
93 | log_debug(ls, "dlm_recover_members_wait error %d", error); | ||
94 | goto fail; | ||
95 | } | ||
96 | |||
97 | start = jiffies; | 87 | start = jiffies; |
98 | 88 | ||
99 | /* | 89 | /* |
@@ -103,21 +93,20 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) | |||
103 | 93 | ||
104 | error = dlm_recover_directory(ls); | 94 | error = dlm_recover_directory(ls); |
105 | if (error) { | 95 | if (error) { |
106 | log_debug(ls, "dlm_recover_directory error %d", error); | 96 | log_debug(ls, "recover_directory failed %d", error); |
107 | goto fail; | 97 | goto fail; |
108 | } | 98 | } |
109 | 99 | ||
110 | dlm_set_recover_status(ls, DLM_RS_DIR); | 100 | /* |
101 | * Wait for all nodes to complete directory rebuild. | ||
102 | */ | ||
111 | 103 | ||
112 | error = dlm_recover_directory_wait(ls); | 104 | error = dlm_recover_directory_wait(ls); |
113 | if (error) { | 105 | if (error) { |
114 | log_debug(ls, "dlm_recover_directory_wait error %d", error); | 106 | log_debug(ls, "recover_directory_wait failed %d", error); |
115 | goto fail; | 107 | goto fail; |
116 | } | 108 | } |
117 | 109 | ||
118 | log_debug(ls, "dlm_recover_directory %u out %u messages", | ||
119 | ls->ls_recover_dir_sent_res, ls->ls_recover_dir_sent_msg); | ||
120 | |||
121 | /* | 110 | /* |
122 | * We may have outstanding operations that are waiting for a reply from | 111 | * We may have outstanding operations that are waiting for a reply from |
123 | * a failed node. Mark these to be resent after recovery. Unlock and | 112 | * a failed node. Mark these to be resent after recovery. Unlock and |
@@ -135,7 +124,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) | |||
135 | * Clear lkb's for departed nodes. | 124 | * Clear lkb's for departed nodes. |
136 | */ | 125 | */ |
137 | 126 | ||
138 | dlm_recover_purge(ls); | 127 | dlm_purge_locks(ls); |
139 | 128 | ||
140 | /* | 129 | /* |
141 | * Get new master nodeid's for rsb's that were mastered on | 130 | * Get new master nodeid's for rsb's that were mastered on |
@@ -144,7 +133,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) | |||
144 | 133 | ||
145 | error = dlm_recover_masters(ls); | 134 | error = dlm_recover_masters(ls); |
146 | if (error) { | 135 | if (error) { |
147 | log_debug(ls, "dlm_recover_masters error %d", error); | 136 | log_debug(ls, "recover_masters failed %d", error); |
148 | goto fail; | 137 | goto fail; |
149 | } | 138 | } |
150 | 139 | ||
@@ -154,21 +143,16 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) | |||
154 | 143 | ||
155 | error = dlm_recover_locks(ls); | 144 | error = dlm_recover_locks(ls); |
156 | if (error) { | 145 | if (error) { |
157 | log_debug(ls, "dlm_recover_locks error %d", error); | 146 | log_debug(ls, "recover_locks failed %d", error); |
158 | goto fail; | 147 | goto fail; |
159 | } | 148 | } |
160 | 149 | ||
161 | dlm_set_recover_status(ls, DLM_RS_LOCKS); | ||
162 | |||
163 | error = dlm_recover_locks_wait(ls); | 150 | error = dlm_recover_locks_wait(ls); |
164 | if (error) { | 151 | if (error) { |
165 | log_debug(ls, "dlm_recover_locks_wait error %d", error); | 152 | log_debug(ls, "recover_locks_wait failed %d", error); |
166 | goto fail; | 153 | goto fail; |
167 | } | 154 | } |
168 | 155 | ||
169 | log_debug(ls, "dlm_recover_locks %u in", | ||
170 | ls->ls_recover_locks_in); | ||
171 | |||
172 | /* | 156 | /* |
173 | * Finalize state in master rsb's now that all locks can be | 157 | * Finalize state in master rsb's now that all locks can be |
174 | * checked. This includes conversion resolution and lvb | 158 | * checked. This includes conversion resolution and lvb |
@@ -186,7 +170,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) | |||
186 | 170 | ||
187 | error = dlm_recover_locks_wait(ls); | 171 | error = dlm_recover_locks_wait(ls); |
188 | if (error) { | 172 | if (error) { |
189 | log_debug(ls, "dlm_recover_locks_wait error %d", error); | 173 | log_debug(ls, "recover_locks_wait failed %d", error); |
190 | goto fail; | 174 | goto fail; |
191 | } | 175 | } |
192 | } | 176 | } |
@@ -202,10 +186,9 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) | |||
202 | dlm_purge_requestqueue(ls); | 186 | dlm_purge_requestqueue(ls); |
203 | 187 | ||
204 | dlm_set_recover_status(ls, DLM_RS_DONE); | 188 | dlm_set_recover_status(ls, DLM_RS_DONE); |
205 | |||
206 | error = dlm_recover_done_wait(ls); | 189 | error = dlm_recover_done_wait(ls); |
207 | if (error) { | 190 | if (error) { |
208 | log_debug(ls, "dlm_recover_done_wait error %d", error); | 191 | log_debug(ls, "recover_done_wait failed %d", error); |
209 | goto fail; | 192 | goto fail; |
210 | } | 193 | } |
211 | 194 | ||
@@ -217,35 +200,34 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) | |||
217 | 200 | ||
218 | error = enable_locking(ls, rv->seq); | 201 | error = enable_locking(ls, rv->seq); |
219 | if (error) { | 202 | if (error) { |
220 | log_debug(ls, "enable_locking error %d", error); | 203 | log_debug(ls, "enable_locking failed %d", error); |
221 | goto fail; | 204 | goto fail; |
222 | } | 205 | } |
223 | 206 | ||
224 | error = dlm_process_requestqueue(ls); | 207 | error = dlm_process_requestqueue(ls); |
225 | if (error) { | 208 | if (error) { |
226 | log_debug(ls, "dlm_process_requestqueue error %d", error); | 209 | log_debug(ls, "process_requestqueue failed %d", error); |
227 | goto fail; | 210 | goto fail; |
228 | } | 211 | } |
229 | 212 | ||
230 | error = dlm_recover_waiters_post(ls); | 213 | error = dlm_recover_waiters_post(ls); |
231 | if (error) { | 214 | if (error) { |
232 | log_debug(ls, "dlm_recover_waiters_post error %d", error); | 215 | log_debug(ls, "recover_waiters_post failed %d", error); |
233 | goto fail; | 216 | goto fail; |
234 | } | 217 | } |
235 | 218 | ||
236 | dlm_recover_grant(ls); | 219 | dlm_grant_after_purge(ls); |
237 | 220 | ||
238 | log_debug(ls, "dlm_recover %llu generation %u done: %u ms", | 221 | log_debug(ls, "recover %llx done: %u ms", |
239 | (unsigned long long)rv->seq, ls->ls_generation, | 222 | (unsigned long long)rv->seq, |
240 | jiffies_to_msecs(jiffies - start)); | 223 | jiffies_to_msecs(jiffies - start)); |
241 | mutex_unlock(&ls->ls_recoverd_active); | 224 | mutex_unlock(&ls->ls_recoverd_active); |
242 | 225 | ||
243 | dlm_lsop_recover_done(ls); | ||
244 | return 0; | 226 | return 0; |
245 | 227 | ||
246 | fail: | 228 | fail: |
247 | dlm_release_root_list(ls); | 229 | dlm_release_root_list(ls); |
248 | log_debug(ls, "dlm_recover %llu error %d", | 230 | log_debug(ls, "recover %llx error %d", |
249 | (unsigned long long)rv->seq, error); | 231 | (unsigned long long)rv->seq, error); |
250 | mutex_unlock(&ls->ls_recoverd_active); | 232 | mutex_unlock(&ls->ls_recoverd_active); |
251 | return error; | 233 | return error; |
@@ -263,12 +245,13 @@ static void do_ls_recovery(struct dlm_ls *ls) | |||
263 | rv = ls->ls_recover_args; | 245 | rv = ls->ls_recover_args; |
264 | ls->ls_recover_args = NULL; | 246 | ls->ls_recover_args = NULL; |
265 | if (rv && ls->ls_recover_seq == rv->seq) | 247 | if (rv && ls->ls_recover_seq == rv->seq) |
266 | clear_bit(LSFL_RECOVER_STOP, &ls->ls_flags); | 248 | clear_bit(LSFL_RECOVERY_STOP, &ls->ls_flags); |
267 | spin_unlock(&ls->ls_recover_lock); | 249 | spin_unlock(&ls->ls_recover_lock); |
268 | 250 | ||
269 | if (rv) { | 251 | if (rv) { |
270 | ls_recover(ls, rv); | 252 | ls_recover(ls, rv); |
271 | kfree(rv->nodes); | 253 | kfree(rv->nodeids); |
254 | kfree(rv->new); | ||
272 | kfree(rv); | 255 | kfree(rv); |
273 | } | 256 | } |
274 | } | 257 | } |
@@ -283,34 +266,26 @@ static int dlm_recoverd(void *arg) | |||
283 | return -1; | 266 | return -1; |
284 | } | 267 | } |
285 | 268 | ||
286 | down_write(&ls->ls_in_recovery); | ||
287 | set_bit(LSFL_RECOVER_LOCK, &ls->ls_flags); | ||
288 | wake_up(&ls->ls_recover_lock_wait); | ||
289 | |||
290 | while (!kthread_should_stop()) { | 269 | while (!kthread_should_stop()) { |
291 | set_current_state(TASK_INTERRUPTIBLE); | 270 | set_current_state(TASK_INTERRUPTIBLE); |
292 | if (!test_bit(LSFL_RECOVER_WORK, &ls->ls_flags) && | 271 | if (!test_bit(LSFL_WORK, &ls->ls_flags)) |
293 | !test_bit(LSFL_RECOVER_DOWN, &ls->ls_flags)) | ||
294 | schedule(); | 272 | schedule(); |
295 | set_current_state(TASK_RUNNING); | 273 | set_current_state(TASK_RUNNING); |
296 | 274 | ||
297 | if (test_and_clear_bit(LSFL_RECOVER_DOWN, &ls->ls_flags)) { | 275 | if (test_and_clear_bit(LSFL_WORK, &ls->ls_flags)) |
298 | down_write(&ls->ls_in_recovery); | ||
299 | set_bit(LSFL_RECOVER_LOCK, &ls->ls_flags); | ||
300 | wake_up(&ls->ls_recover_lock_wait); | ||
301 | } | ||
302 | |||
303 | if (test_and_clear_bit(LSFL_RECOVER_WORK, &ls->ls_flags)) | ||
304 | do_ls_recovery(ls); | 276 | do_ls_recovery(ls); |
305 | } | 277 | } |
306 | 278 | ||
307 | if (test_bit(LSFL_RECOVER_LOCK, &ls->ls_flags)) | ||
308 | up_write(&ls->ls_in_recovery); | ||
309 | |||
310 | dlm_put_lockspace(ls); | 279 | dlm_put_lockspace(ls); |
311 | return 0; | 280 | return 0; |
312 | } | 281 | } |
313 | 282 | ||
283 | void dlm_recoverd_kick(struct dlm_ls *ls) | ||
284 | { | ||
285 | set_bit(LSFL_WORK, &ls->ls_flags); | ||
286 | wake_up_process(ls->ls_recoverd_task); | ||
287 | } | ||
288 | |||
314 | int dlm_recoverd_start(struct dlm_ls *ls) | 289 | int dlm_recoverd_start(struct dlm_ls *ls) |
315 | { | 290 | { |
316 | struct task_struct *p; | 291 | struct task_struct *p; |
diff --git a/fs/dlm/recoverd.h b/fs/dlm/recoverd.h index 8856079733f..866657c5d69 100644 --- a/fs/dlm/recoverd.h +++ b/fs/dlm/recoverd.h | |||
@@ -14,6 +14,7 @@ | |||
14 | #ifndef __RECOVERD_DOT_H__ | 14 | #ifndef __RECOVERD_DOT_H__ |
15 | #define __RECOVERD_DOT_H__ | 15 | #define __RECOVERD_DOT_H__ |
16 | 16 | ||
17 | void dlm_recoverd_kick(struct dlm_ls *ls); | ||
17 | void dlm_recoverd_stop(struct dlm_ls *ls); | 18 | void dlm_recoverd_stop(struct dlm_ls *ls); |
18 | int dlm_recoverd_start(struct dlm_ls *ls); | 19 | int dlm_recoverd_start(struct dlm_ls *ls); |
19 | void dlm_recoverd_suspend(struct dlm_ls *ls); | 20 | void dlm_recoverd_suspend(struct dlm_ls *ls); |
diff --git a/fs/dlm/requestqueue.c b/fs/dlm/requestqueue.c index 1695f1b0dd4..a44fa22890e 100644 --- a/fs/dlm/requestqueue.c +++ b/fs/dlm/requestqueue.c | |||
@@ -19,7 +19,6 @@ | |||
19 | 19 | ||
20 | struct rq_entry { | 20 | struct rq_entry { |
21 | struct list_head list; | 21 | struct list_head list; |
22 | uint32_t recover_seq; | ||
23 | int nodeid; | 22 | int nodeid; |
24 | struct dlm_message request; | 23 | struct dlm_message request; |
25 | }; | 24 | }; |
@@ -42,7 +41,6 @@ void dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_message *ms) | |||
42 | return; | 41 | return; |
43 | } | 42 | } |
44 | 43 | ||
45 | e->recover_seq = ls->ls_recover_seq & 0xFFFFFFFF; | ||
46 | e->nodeid = nodeid; | 44 | e->nodeid = nodeid; |
47 | memcpy(&e->request, ms, ms->m_header.h_length); | 45 | memcpy(&e->request, ms, ms->m_header.h_length); |
48 | 46 | ||
@@ -65,7 +63,6 @@ void dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_message *ms) | |||
65 | int dlm_process_requestqueue(struct dlm_ls *ls) | 63 | int dlm_process_requestqueue(struct dlm_ls *ls) |
66 | { | 64 | { |
67 | struct rq_entry *e; | 65 | struct rq_entry *e; |
68 | struct dlm_message *ms; | ||
69 | int error = 0; | 66 | int error = 0; |
70 | 67 | ||
71 | mutex_lock(&ls->ls_requestqueue_mutex); | 68 | mutex_lock(&ls->ls_requestqueue_mutex); |
@@ -79,15 +76,7 @@ int dlm_process_requestqueue(struct dlm_ls *ls) | |||
79 | e = list_entry(ls->ls_requestqueue.next, struct rq_entry, list); | 76 | e = list_entry(ls->ls_requestqueue.next, struct rq_entry, list); |
80 | mutex_unlock(&ls->ls_requestqueue_mutex); | 77 | mutex_unlock(&ls->ls_requestqueue_mutex); |
81 | 78 | ||
82 | ms = &e->request; | 79 | dlm_receive_message_saved(ls, &e->request); |
83 | |||
84 | log_limit(ls, "dlm_process_requestqueue msg %d from %d " | ||
85 | "lkid %x remid %x result %d seq %u", | ||
86 | ms->m_type, ms->m_header.h_nodeid, | ||
87 | ms->m_lkid, ms->m_remid, ms->m_result, | ||
88 | e->recover_seq); | ||
89 | |||
90 | dlm_receive_message_saved(ls, &e->request, e->recover_seq); | ||
91 | 80 | ||
92 | mutex_lock(&ls->ls_requestqueue_mutex); | 81 | mutex_lock(&ls->ls_requestqueue_mutex); |
93 | list_del(&e->list); | 82 | list_del(&e->list); |
@@ -149,7 +138,35 @@ static int purge_request(struct dlm_ls *ls, struct dlm_message *ms, int nodeid) | |||
149 | if (!dlm_no_directory(ls)) | 138 | if (!dlm_no_directory(ls)) |
150 | return 0; | 139 | return 0; |
151 | 140 | ||
152 | return 1; | 141 | /* with no directory, the master is likely to change as a part of |
142 | recovery; requests to/from the defunct master need to be purged */ | ||
143 | |||
144 | switch (type) { | ||
145 | case DLM_MSG_REQUEST: | ||
146 | case DLM_MSG_CONVERT: | ||
147 | case DLM_MSG_UNLOCK: | ||
148 | case DLM_MSG_CANCEL: | ||
149 | /* we're no longer the master of this resource, the sender | ||
150 | will resend to the new master (see waiter_needs_recovery) */ | ||
151 | |||
152 | if (dlm_hash2nodeid(ls, ms->m_hash) != dlm_our_nodeid()) | ||
153 | return 1; | ||
154 | break; | ||
155 | |||
156 | case DLM_MSG_REQUEST_REPLY: | ||
157 | case DLM_MSG_CONVERT_REPLY: | ||
158 | case DLM_MSG_UNLOCK_REPLY: | ||
159 | case DLM_MSG_CANCEL_REPLY: | ||
160 | case DLM_MSG_GRANT: | ||
161 | /* this reply is from the former master of the resource, | ||
162 | we'll resend to the new master if needed */ | ||
163 | |||
164 | if (dlm_hash2nodeid(ls, ms->m_hash) != nodeid) | ||
165 | return 1; | ||
166 | break; | ||
167 | } | ||
168 | |||
169 | return 0; | ||
153 | } | 170 | } |
154 | 171 | ||
155 | void dlm_purge_requestqueue(struct dlm_ls *ls) | 172 | void dlm_purge_requestqueue(struct dlm_ls *ls) |
diff --git a/fs/dlm/user.c b/fs/dlm/user.c index 7ff49852b0c..d8ea6075640 100644 --- a/fs/dlm/user.c +++ b/fs/dlm/user.c | |||
@@ -392,9 +392,8 @@ static int device_create_lockspace(struct dlm_lspace_params *params) | |||
392 | if (!capable(CAP_SYS_ADMIN)) | 392 | if (!capable(CAP_SYS_ADMIN)) |
393 | return -EPERM; | 393 | return -EPERM; |
394 | 394 | ||
395 | error = dlm_new_lockspace(params->name, NULL, params->flags, | 395 | error = dlm_new_lockspace(params->name, strlen(params->name), |
396 | DLM_USER_LVB_LEN, NULL, NULL, NULL, | 396 | &lockspace, params->flags, DLM_USER_LVB_LEN); |
397 | &lockspace); | ||
398 | if (error) | 397 | if (error) |
399 | return error; | 398 | return error; |
400 | 399 | ||
@@ -503,13 +502,6 @@ static ssize_t device_write(struct file *file, const char __user *buf, | |||
503 | #endif | 502 | #endif |
504 | return -EINVAL; | 503 | return -EINVAL; |
505 | 504 | ||
506 | #ifdef CONFIG_COMPAT | ||
507 | if (count > sizeof(struct dlm_write_request32) + DLM_RESNAME_MAXLEN) | ||
508 | #else | ||
509 | if (count > sizeof(struct dlm_write_request) + DLM_RESNAME_MAXLEN) | ||
510 | #endif | ||
511 | return -EINVAL; | ||
512 | |||
513 | kbuf = kzalloc(count + 1, GFP_NOFS); | 505 | kbuf = kzalloc(count + 1, GFP_NOFS); |
514 | if (!kbuf) | 506 | if (!kbuf) |
515 | return -ENOMEM; | 507 | return -ENOMEM; |