diff options
Diffstat (limited to 'fs/ocfs2/dlm/dlmdomain.c')
-rw-r--r-- | fs/ocfs2/dlm/dlmdomain.c | 401 |
1 files changed, 400 insertions, 1 deletions
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 153abb5abef0..58a93b953735 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c | |||
@@ -128,10 +128,14 @@ static DECLARE_WAIT_QUEUE_HEAD(dlm_domain_events); | |||
128 | * will have a negotiated version with the same major number and a minor | 128 | * will have a negotiated version with the same major number and a minor |
129 | * number equal or smaller. The dlm_ctxt->dlm_locking_proto field should | 129 | * number equal or smaller. The dlm_ctxt->dlm_locking_proto field should |
130 | * be used to determine what a running domain is actually using. | 130 | * be used to determine what a running domain is actually using. |
131 | * | ||
132 | * New in version 1.1: | ||
133 | * - Message DLM_QUERY_REGION added to support global heartbeat | ||
134 | * - Message DLM_QUERY_NODEINFO added to allow online node removes | ||
131 | */ | 135 | */ |
132 | static const struct dlm_protocol_version dlm_protocol = { | 136 | static const struct dlm_protocol_version dlm_protocol = { |
133 | .pv_major = 1, | 137 | .pv_major = 1, |
134 | .pv_minor = 0, | 138 | .pv_minor = 1, |
135 | }; | 139 | }; |
136 | 140 | ||
137 | #define DLM_DOMAIN_BACKOFF_MS 200 | 141 | #define DLM_DOMAIN_BACKOFF_MS 200 |
@@ -142,6 +146,8 @@ static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data, | |||
142 | void **ret_data); | 146 | void **ret_data); |
143 | static int dlm_cancel_join_handler(struct o2net_msg *msg, u32 len, void *data, | 147 | static int dlm_cancel_join_handler(struct o2net_msg *msg, u32 len, void *data, |
144 | void **ret_data); | 148 | void **ret_data); |
149 | static int dlm_query_region_handler(struct o2net_msg *msg, u32 len, | ||
150 | void *data, void **ret_data); | ||
145 | static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data, | 151 | static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data, |
146 | void **ret_data); | 152 | void **ret_data); |
147 | static int dlm_protocol_compare(struct dlm_protocol_version *existing, | 153 | static int dlm_protocol_compare(struct dlm_protocol_version *existing, |
@@ -693,6 +699,7 @@ void dlm_unregister_domain(struct dlm_ctxt *dlm) | |||
693 | 699 | ||
694 | dlm_mark_domain_leaving(dlm); | 700 | dlm_mark_domain_leaving(dlm); |
695 | dlm_leave_domain(dlm); | 701 | dlm_leave_domain(dlm); |
702 | dlm_force_free_mles(dlm); | ||
696 | dlm_complete_dlm_shutdown(dlm); | 703 | dlm_complete_dlm_shutdown(dlm); |
697 | } | 704 | } |
698 | dlm_put(dlm); | 705 | dlm_put(dlm); |
@@ -920,6 +927,370 @@ static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data, | |||
920 | return 0; | 927 | return 0; |
921 | } | 928 | } |
922 | 929 | ||
930 | static int dlm_match_regions(struct dlm_ctxt *dlm, | ||
931 | struct dlm_query_region *qr) | ||
932 | { | ||
933 | char *local = NULL, *remote = qr->qr_regions; | ||
934 | char *l, *r; | ||
935 | int localnr, i, j, foundit; | ||
936 | int status = 0; | ||
937 | |||
938 | if (!o2hb_global_heartbeat_active()) { | ||
939 | if (qr->qr_numregions) { | ||
940 | mlog(ML_ERROR, "Domain %s: Joining node %d has global " | ||
941 | "heartbeat enabled but local node %d does not\n", | ||
942 | qr->qr_domain, qr->qr_node, dlm->node_num); | ||
943 | status = -EINVAL; | ||
944 | } | ||
945 | goto bail; | ||
946 | } | ||
947 | |||
948 | if (o2hb_global_heartbeat_active() && !qr->qr_numregions) { | ||
949 | mlog(ML_ERROR, "Domain %s: Local node %d has global " | ||
950 | "heartbeat enabled but joining node %d does not\n", | ||
951 | qr->qr_domain, dlm->node_num, qr->qr_node); | ||
952 | status = -EINVAL; | ||
953 | goto bail; | ||
954 | } | ||
955 | |||
956 | r = remote; | ||
957 | for (i = 0; i < qr->qr_numregions; ++i) { | ||
958 | mlog(0, "Region %.*s\n", O2HB_MAX_REGION_NAME_LEN, r); | ||
959 | r += O2HB_MAX_REGION_NAME_LEN; | ||
960 | } | ||
961 | |||
962 | local = kmalloc(sizeof(qr->qr_regions), GFP_KERNEL); | ||
963 | if (!local) { | ||
964 | status = -ENOMEM; | ||
965 | goto bail; | ||
966 | } | ||
967 | |||
968 | localnr = o2hb_get_all_regions(local, O2NM_MAX_REGIONS); | ||
969 | |||
970 | /* compare local regions with remote */ | ||
971 | l = local; | ||
972 | for (i = 0; i < localnr; ++i) { | ||
973 | foundit = 0; | ||
974 | r = remote; | ||
975 | for (j = 0; j <= qr->qr_numregions; ++j) { | ||
976 | if (!memcmp(l, r, O2HB_MAX_REGION_NAME_LEN)) { | ||
977 | foundit = 1; | ||
978 | break; | ||
979 | } | ||
980 | r += O2HB_MAX_REGION_NAME_LEN; | ||
981 | } | ||
982 | if (!foundit) { | ||
983 | status = -EINVAL; | ||
984 | mlog(ML_ERROR, "Domain %s: Region '%.*s' registered " | ||
985 | "in local node %d but not in joining node %d\n", | ||
986 | qr->qr_domain, O2HB_MAX_REGION_NAME_LEN, l, | ||
987 | dlm->node_num, qr->qr_node); | ||
988 | goto bail; | ||
989 | } | ||
990 | l += O2HB_MAX_REGION_NAME_LEN; | ||
991 | } | ||
992 | |||
993 | /* compare remote with local regions */ | ||
994 | r = remote; | ||
995 | for (i = 0; i < qr->qr_numregions; ++i) { | ||
996 | foundit = 0; | ||
997 | l = local; | ||
998 | for (j = 0; j < localnr; ++j) { | ||
999 | if (!memcmp(r, l, O2HB_MAX_REGION_NAME_LEN)) { | ||
1000 | foundit = 1; | ||
1001 | break; | ||
1002 | } | ||
1003 | l += O2HB_MAX_REGION_NAME_LEN; | ||
1004 | } | ||
1005 | if (!foundit) { | ||
1006 | status = -EINVAL; | ||
1007 | mlog(ML_ERROR, "Domain %s: Region '%.*s' registered " | ||
1008 | "in joining node %d but not in local node %d\n", | ||
1009 | qr->qr_domain, O2HB_MAX_REGION_NAME_LEN, r, | ||
1010 | qr->qr_node, dlm->node_num); | ||
1011 | goto bail; | ||
1012 | } | ||
1013 | r += O2HB_MAX_REGION_NAME_LEN; | ||
1014 | } | ||
1015 | |||
1016 | bail: | ||
1017 | kfree(local); | ||
1018 | |||
1019 | return status; | ||
1020 | } | ||
1021 | |||
1022 | static int dlm_send_regions(struct dlm_ctxt *dlm, unsigned long *node_map) | ||
1023 | { | ||
1024 | struct dlm_query_region *qr = NULL; | ||
1025 | int status, ret = 0, i; | ||
1026 | char *p; | ||
1027 | |||
1028 | if (find_next_bit(node_map, O2NM_MAX_NODES, 0) >= O2NM_MAX_NODES) | ||
1029 | goto bail; | ||
1030 | |||
1031 | qr = kzalloc(sizeof(struct dlm_query_region), GFP_KERNEL); | ||
1032 | if (!qr) { | ||
1033 | ret = -ENOMEM; | ||
1034 | mlog_errno(ret); | ||
1035 | goto bail; | ||
1036 | } | ||
1037 | |||
1038 | qr->qr_node = dlm->node_num; | ||
1039 | qr->qr_namelen = strlen(dlm->name); | ||
1040 | memcpy(qr->qr_domain, dlm->name, qr->qr_namelen); | ||
1041 | /* if local hb, the numregions will be zero */ | ||
1042 | if (o2hb_global_heartbeat_active()) | ||
1043 | qr->qr_numregions = o2hb_get_all_regions(qr->qr_regions, | ||
1044 | O2NM_MAX_REGIONS); | ||
1045 | |||
1046 | p = qr->qr_regions; | ||
1047 | for (i = 0; i < qr->qr_numregions; ++i, p += O2HB_MAX_REGION_NAME_LEN) | ||
1048 | mlog(0, "Region %.*s\n", O2HB_MAX_REGION_NAME_LEN, p); | ||
1049 | |||
1050 | i = -1; | ||
1051 | while ((i = find_next_bit(node_map, O2NM_MAX_NODES, | ||
1052 | i + 1)) < O2NM_MAX_NODES) { | ||
1053 | if (i == dlm->node_num) | ||
1054 | continue; | ||
1055 | |||
1056 | mlog(0, "Sending regions to node %d\n", i); | ||
1057 | |||
1058 | ret = o2net_send_message(DLM_QUERY_REGION, DLM_MOD_KEY, qr, | ||
1059 | sizeof(struct dlm_query_region), | ||
1060 | i, &status); | ||
1061 | if (ret >= 0) | ||
1062 | ret = status; | ||
1063 | if (ret) { | ||
1064 | mlog(ML_ERROR, "Region mismatch %d, node %d\n", | ||
1065 | ret, i); | ||
1066 | break; | ||
1067 | } | ||
1068 | } | ||
1069 | |||
1070 | bail: | ||
1071 | kfree(qr); | ||
1072 | return ret; | ||
1073 | } | ||
1074 | |||
1075 | static int dlm_query_region_handler(struct o2net_msg *msg, u32 len, | ||
1076 | void *data, void **ret_data) | ||
1077 | { | ||
1078 | struct dlm_query_region *qr; | ||
1079 | struct dlm_ctxt *dlm = NULL; | ||
1080 | int status = 0; | ||
1081 | int locked = 0; | ||
1082 | |||
1083 | qr = (struct dlm_query_region *) msg->buf; | ||
1084 | |||
1085 | mlog(0, "Node %u queries hb regions on domain %s\n", qr->qr_node, | ||
1086 | qr->qr_domain); | ||
1087 | |||
1088 | status = -EINVAL; | ||
1089 | |||
1090 | spin_lock(&dlm_domain_lock); | ||
1091 | dlm = __dlm_lookup_domain_full(qr->qr_domain, qr->qr_namelen); | ||
1092 | if (!dlm) { | ||
1093 | mlog(ML_ERROR, "Node %d queried hb regions on domain %s " | ||
1094 | "before join domain\n", qr->qr_node, qr->qr_domain); | ||
1095 | goto bail; | ||
1096 | } | ||
1097 | |||
1098 | spin_lock(&dlm->spinlock); | ||
1099 | locked = 1; | ||
1100 | if (dlm->joining_node != qr->qr_node) { | ||
1101 | mlog(ML_ERROR, "Node %d queried hb regions on domain %s " | ||
1102 | "but joining node is %d\n", qr->qr_node, qr->qr_domain, | ||
1103 | dlm->joining_node); | ||
1104 | goto bail; | ||
1105 | } | ||
1106 | |||
1107 | /* Support for global heartbeat was added in 1.1 */ | ||
1108 | if (dlm->dlm_locking_proto.pv_major == 1 && | ||
1109 | dlm->dlm_locking_proto.pv_minor == 0) { | ||
1110 | mlog(ML_ERROR, "Node %d queried hb regions on domain %s " | ||
1111 | "but active dlm protocol is %d.%d\n", qr->qr_node, | ||
1112 | qr->qr_domain, dlm->dlm_locking_proto.pv_major, | ||
1113 | dlm->dlm_locking_proto.pv_minor); | ||
1114 | goto bail; | ||
1115 | } | ||
1116 | |||
1117 | status = dlm_match_regions(dlm, qr); | ||
1118 | |||
1119 | bail: | ||
1120 | if (locked) | ||
1121 | spin_unlock(&dlm->spinlock); | ||
1122 | spin_unlock(&dlm_domain_lock); | ||
1123 | |||
1124 | return status; | ||
1125 | } | ||
1126 | |||
1127 | static int dlm_match_nodes(struct dlm_ctxt *dlm, struct dlm_query_nodeinfo *qn) | ||
1128 | { | ||
1129 | struct o2nm_node *local; | ||
1130 | struct dlm_node_info *remote; | ||
1131 | int i, j; | ||
1132 | int status = 0; | ||
1133 | |||
1134 | for (j = 0; j < qn->qn_numnodes; ++j) | ||
1135 | mlog(0, "Node %3d, %pI4:%u\n", qn->qn_nodes[j].ni_nodenum, | ||
1136 | &(qn->qn_nodes[j].ni_ipv4_address), | ||
1137 | ntohs(qn->qn_nodes[j].ni_ipv4_port)); | ||
1138 | |||
1139 | for (i = 0; i < O2NM_MAX_NODES && !status; ++i) { | ||
1140 | local = o2nm_get_node_by_num(i); | ||
1141 | remote = NULL; | ||
1142 | for (j = 0; j < qn->qn_numnodes; ++j) { | ||
1143 | if (qn->qn_nodes[j].ni_nodenum == i) { | ||
1144 | remote = &(qn->qn_nodes[j]); | ||
1145 | break; | ||
1146 | } | ||
1147 | } | ||
1148 | |||
1149 | if (!local && !remote) | ||
1150 | continue; | ||
1151 | |||
1152 | if ((local && !remote) || (!local && remote)) | ||
1153 | status = -EINVAL; | ||
1154 | |||
1155 | if (!status && | ||
1156 | ((remote->ni_nodenum != local->nd_num) || | ||
1157 | (remote->ni_ipv4_port != local->nd_ipv4_port) || | ||
1158 | (remote->ni_ipv4_address != local->nd_ipv4_address))) | ||
1159 | status = -EINVAL; | ||
1160 | |||
1161 | if (status) { | ||
1162 | if (remote && !local) | ||
1163 | mlog(ML_ERROR, "Domain %s: Node %d (%pI4:%u) " | ||
1164 | "registered in joining node %d but not in " | ||
1165 | "local node %d\n", qn->qn_domain, | ||
1166 | remote->ni_nodenum, | ||
1167 | &(remote->ni_ipv4_address), | ||
1168 | ntohs(remote->ni_ipv4_port), | ||
1169 | qn->qn_nodenum, dlm->node_num); | ||
1170 | if (local && !remote) | ||
1171 | mlog(ML_ERROR, "Domain %s: Node %d (%pI4:%u) " | ||
1172 | "registered in local node %d but not in " | ||
1173 | "joining node %d\n", qn->qn_domain, | ||
1174 | local->nd_num, &(local->nd_ipv4_address), | ||
1175 | ntohs(local->nd_ipv4_port), | ||
1176 | dlm->node_num, qn->qn_nodenum); | ||
1177 | BUG_ON((!local && !remote)); | ||
1178 | } | ||
1179 | |||
1180 | if (local) | ||
1181 | o2nm_node_put(local); | ||
1182 | } | ||
1183 | |||
1184 | return status; | ||
1185 | } | ||
1186 | |||
1187 | static int dlm_send_nodeinfo(struct dlm_ctxt *dlm, unsigned long *node_map) | ||
1188 | { | ||
1189 | struct dlm_query_nodeinfo *qn = NULL; | ||
1190 | struct o2nm_node *node; | ||
1191 | int ret = 0, status, count, i; | ||
1192 | |||
1193 | if (find_next_bit(node_map, O2NM_MAX_NODES, 0) >= O2NM_MAX_NODES) | ||
1194 | goto bail; | ||
1195 | |||
1196 | qn = kzalloc(sizeof(struct dlm_query_nodeinfo), GFP_KERNEL); | ||
1197 | if (!qn) { | ||
1198 | ret = -ENOMEM; | ||
1199 | mlog_errno(ret); | ||
1200 | goto bail; | ||
1201 | } | ||
1202 | |||
1203 | for (i = 0, count = 0; i < O2NM_MAX_NODES; ++i) { | ||
1204 | node = o2nm_get_node_by_num(i); | ||
1205 | if (!node) | ||
1206 | continue; | ||
1207 | qn->qn_nodes[count].ni_nodenum = node->nd_num; | ||
1208 | qn->qn_nodes[count].ni_ipv4_port = node->nd_ipv4_port; | ||
1209 | qn->qn_nodes[count].ni_ipv4_address = node->nd_ipv4_address; | ||
1210 | mlog(0, "Node %3d, %pI4:%u\n", node->nd_num, | ||
1211 | &(node->nd_ipv4_address), ntohs(node->nd_ipv4_port)); | ||
1212 | ++count; | ||
1213 | o2nm_node_put(node); | ||
1214 | } | ||
1215 | |||
1216 | qn->qn_nodenum = dlm->node_num; | ||
1217 | qn->qn_numnodes = count; | ||
1218 | qn->qn_namelen = strlen(dlm->name); | ||
1219 | memcpy(qn->qn_domain, dlm->name, qn->qn_namelen); | ||
1220 | |||
1221 | i = -1; | ||
1222 | while ((i = find_next_bit(node_map, O2NM_MAX_NODES, | ||
1223 | i + 1)) < O2NM_MAX_NODES) { | ||
1224 | if (i == dlm->node_num) | ||
1225 | continue; | ||
1226 | |||
1227 | mlog(0, "Sending nodeinfo to node %d\n", i); | ||
1228 | |||
1229 | ret = o2net_send_message(DLM_QUERY_NODEINFO, DLM_MOD_KEY, | ||
1230 | qn, sizeof(struct dlm_query_nodeinfo), | ||
1231 | i, &status); | ||
1232 | if (ret >= 0) | ||
1233 | ret = status; | ||
1234 | if (ret) { | ||
1235 | mlog(ML_ERROR, "node mismatch %d, node %d\n", ret, i); | ||
1236 | break; | ||
1237 | } | ||
1238 | } | ||
1239 | |||
1240 | bail: | ||
1241 | kfree(qn); | ||
1242 | return ret; | ||
1243 | } | ||
1244 | |||
1245 | static int dlm_query_nodeinfo_handler(struct o2net_msg *msg, u32 len, | ||
1246 | void *data, void **ret_data) | ||
1247 | { | ||
1248 | struct dlm_query_nodeinfo *qn; | ||
1249 | struct dlm_ctxt *dlm = NULL; | ||
1250 | int locked = 0, status = -EINVAL; | ||
1251 | |||
1252 | qn = (struct dlm_query_nodeinfo *) msg->buf; | ||
1253 | |||
1254 | mlog(0, "Node %u queries nodes on domain %s\n", qn->qn_nodenum, | ||
1255 | qn->qn_domain); | ||
1256 | |||
1257 | spin_lock(&dlm_domain_lock); | ||
1258 | dlm = __dlm_lookup_domain_full(qn->qn_domain, qn->qn_namelen); | ||
1259 | if (!dlm) { | ||
1260 | mlog(ML_ERROR, "Node %d queried nodes on domain %s before " | ||
1261 | "join domain\n", qn->qn_nodenum, qn->qn_domain); | ||
1262 | goto bail; | ||
1263 | } | ||
1264 | |||
1265 | spin_lock(&dlm->spinlock); | ||
1266 | locked = 1; | ||
1267 | if (dlm->joining_node != qn->qn_nodenum) { | ||
1268 | mlog(ML_ERROR, "Node %d queried nodes on domain %s but " | ||
1269 | "joining node is %d\n", qn->qn_nodenum, qn->qn_domain, | ||
1270 | dlm->joining_node); | ||
1271 | goto bail; | ||
1272 | } | ||
1273 | |||
1274 | /* Support for node query was added in 1.1 */ | ||
1275 | if (dlm->dlm_locking_proto.pv_major == 1 && | ||
1276 | dlm->dlm_locking_proto.pv_minor == 0) { | ||
1277 | mlog(ML_ERROR, "Node %d queried nodes on domain %s " | ||
1278 | "but active dlm protocol is %d.%d\n", qn->qn_nodenum, | ||
1279 | qn->qn_domain, dlm->dlm_locking_proto.pv_major, | ||
1280 | dlm->dlm_locking_proto.pv_minor); | ||
1281 | goto bail; | ||
1282 | } | ||
1283 | |||
1284 | status = dlm_match_nodes(dlm, qn); | ||
1285 | |||
1286 | bail: | ||
1287 | if (locked) | ||
1288 | spin_unlock(&dlm->spinlock); | ||
1289 | spin_unlock(&dlm_domain_lock); | ||
1290 | |||
1291 | return status; | ||
1292 | } | ||
1293 | |||
923 | static int dlm_cancel_join_handler(struct o2net_msg *msg, u32 len, void *data, | 1294 | static int dlm_cancel_join_handler(struct o2net_msg *msg, u32 len, void *data, |
924 | void **ret_data) | 1295 | void **ret_data) |
925 | { | 1296 | { |
@@ -1240,6 +1611,20 @@ static int dlm_try_to_join_domain(struct dlm_ctxt *dlm) | |||
1240 | set_bit(dlm->node_num, dlm->domain_map); | 1611 | set_bit(dlm->node_num, dlm->domain_map); |
1241 | spin_unlock(&dlm->spinlock); | 1612 | spin_unlock(&dlm->spinlock); |
1242 | 1613 | ||
1614 | /* Support for global heartbeat and node info was added in 1.1 */ | ||
1615 | if (dlm_protocol.pv_major > 1 || dlm_protocol.pv_minor > 0) { | ||
1616 | status = dlm_send_nodeinfo(dlm, ctxt->yes_resp_map); | ||
1617 | if (status) { | ||
1618 | mlog_errno(status); | ||
1619 | goto bail; | ||
1620 | } | ||
1621 | status = dlm_send_regions(dlm, ctxt->yes_resp_map); | ||
1622 | if (status) { | ||
1623 | mlog_errno(status); | ||
1624 | goto bail; | ||
1625 | } | ||
1626 | } | ||
1627 | |||
1243 | dlm_send_join_asserts(dlm, ctxt->yes_resp_map); | 1628 | dlm_send_join_asserts(dlm, ctxt->yes_resp_map); |
1244 | 1629 | ||
1245 | /* Joined state *must* be set before the joining node | 1630 | /* Joined state *must* be set before the joining node |
@@ -1806,7 +2191,21 @@ static int dlm_register_net_handlers(void) | |||
1806 | sizeof(struct dlm_cancel_join), | 2191 | sizeof(struct dlm_cancel_join), |
1807 | dlm_cancel_join_handler, | 2192 | dlm_cancel_join_handler, |
1808 | NULL, NULL, &dlm_join_handlers); | 2193 | NULL, NULL, &dlm_join_handlers); |
2194 | if (status) | ||
2195 | goto bail; | ||
2196 | |||
2197 | status = o2net_register_handler(DLM_QUERY_REGION, DLM_MOD_KEY, | ||
2198 | sizeof(struct dlm_query_region), | ||
2199 | dlm_query_region_handler, | ||
2200 | NULL, NULL, &dlm_join_handlers); | ||
1809 | 2201 | ||
2202 | if (status) | ||
2203 | goto bail; | ||
2204 | |||
2205 | status = o2net_register_handler(DLM_QUERY_NODEINFO, DLM_MOD_KEY, | ||
2206 | sizeof(struct dlm_query_nodeinfo), | ||
2207 | dlm_query_nodeinfo_handler, | ||
2208 | NULL, NULL, &dlm_join_handlers); | ||
1810 | bail: | 2209 | bail: |
1811 | if (status < 0) | 2210 | if (status < 0) |
1812 | dlm_unregister_net_handlers(); | 2211 | dlm_unregister_net_handlers(); |