aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ocfs2/stack_o2cb.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ocfs2/stack_o2cb.c')
-rw-r--r--fs/ocfs2/stack_o2cb.c71
1 files changed, 63 insertions, 8 deletions
diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c
index 19965b00c43..94368017edb 100644
--- a/fs/ocfs2/stack_o2cb.c
+++ b/fs/ocfs2/stack_o2cb.c
@@ -28,6 +28,7 @@
28#include "cluster/masklog.h" 28#include "cluster/masklog.h"
29#include "cluster/nodemanager.h" 29#include "cluster/nodemanager.h"
30#include "cluster/heartbeat.h" 30#include "cluster/heartbeat.h"
31#include "cluster/tcp.h"
31 32
32#include "stackglue.h" 33#include "stackglue.h"
33 34
@@ -256,6 +257,61 @@ static void o2cb_dump_lksb(struct ocfs2_dlm_lksb *lksb)
256} 257}
257 258
258/* 259/*
260 * Check if this node is heartbeating and is connected to all other
261 * heartbeating nodes.
262 */
263static int o2cb_cluster_check(void)
264{
265 u8 node_num;
266 int i;
267 unsigned long hbmap[BITS_TO_LONGS(O2NM_MAX_NODES)];
268 unsigned long netmap[BITS_TO_LONGS(O2NM_MAX_NODES)];
269
270 node_num = o2nm_this_node();
271 if (node_num == O2NM_MAX_NODES) {
272 printk(KERN_ERR "o2cb: This node has not been configured.\n");
273 return -EINVAL;
274 }
275
276 /*
277 * o2dlm expects o2net sockets to be created. If not, then
278 * dlm_join_domain() fails with a stack of errors which are both cryptic
279 * and incomplete. The idea here is to detect upfront whether we have
280 * managed to connect to all nodes or not. If not, then list the nodes
281 * to allow the user to check the configuration (incorrect IP, firewall,
282 * etc.) Yes, this is racy. But its not the end of the world.
283 */
284#define O2CB_MAP_STABILIZE_COUNT 60
285 for (i = 0; i < O2CB_MAP_STABILIZE_COUNT; ++i) {
286 o2hb_fill_node_map(hbmap, sizeof(hbmap));
287 if (!test_bit(node_num, hbmap)) {
288 printk(KERN_ERR "o2cb: %s heartbeat has not been "
289 "started.\n", (o2hb_global_heartbeat_active() ?
290 "Global" : "Local"));
291 return -EINVAL;
292 }
293 o2net_fill_node_map(netmap, sizeof(netmap));
294 /* Force set the current node to allow easy compare */
295 set_bit(node_num, netmap);
296 if (!memcmp(hbmap, netmap, sizeof(hbmap)))
297 return 0;
298 if (i < O2CB_MAP_STABILIZE_COUNT)
299 msleep(1000);
300 }
301
302 printk(KERN_ERR "o2cb: This node could not connect to nodes:");
303 i = -1;
304 while ((i = find_next_bit(hbmap, O2NM_MAX_NODES,
305 i + 1)) < O2NM_MAX_NODES) {
306 if (!test_bit(i, netmap))
307 printk(" %u", i);
308 }
309 printk(".\n");
310
311 return -ENOTCONN;
312}
313
314/*
259 * Called from the dlm when it's about to evict a node. This is how the 315 * Called from the dlm when it's about to evict a node. This is how the
260 * classic stack signals node death. 316 * classic stack signals node death.
261 */ 317 */
@@ -263,8 +319,8 @@ static void o2dlm_eviction_cb(int node_num, void *data)
263{ 319{
264 struct ocfs2_cluster_connection *conn = data; 320 struct ocfs2_cluster_connection *conn = data;
265 321
266 mlog(ML_NOTICE, "o2dlm has evicted node %d from group %.*s\n", 322 printk(KERN_NOTICE "o2cb: o2dlm has evicted node %d from domain %.*s\n",
267 node_num, conn->cc_namelen, conn->cc_name); 323 node_num, conn->cc_namelen, conn->cc_name);
268 324
269 conn->cc_recovery_handler(node_num, conn->cc_recovery_data); 325 conn->cc_recovery_handler(node_num, conn->cc_recovery_data);
270} 326}
@@ -280,12 +336,11 @@ static int o2cb_cluster_connect(struct ocfs2_cluster_connection *conn)
280 BUG_ON(conn == NULL); 336 BUG_ON(conn == NULL);
281 BUG_ON(conn->cc_proto == NULL); 337 BUG_ON(conn->cc_proto == NULL);
282 338
283 /* for now we only have one cluster/node, make sure we see it 339 /* Ensure cluster stack is up and all nodes are connected */
284 * in the heartbeat universe */ 340 rc = o2cb_cluster_check();
285 if (!o2hb_check_local_node_heartbeating()) { 341 if (rc) {
286 if (o2hb_global_heartbeat_active()) 342 printk(KERN_ERR "o2cb: Cluster check failed. Fix errors "
287 mlog(ML_ERROR, "Global heartbeat not started\n"); 343 "before retrying.\n");
288 rc = -EINVAL;
289 goto out; 344 goto out;
290 } 345 }
291 346