aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSrinivas Eeda <srinivas.eeda@oracle.com>2007-01-29 18:31:35 -0500
committerMark Fasheh <mark.fasheh@oracle.com>2007-02-07 15:09:14 -0500
commit1faf289454b9eeb6e463da3eee47f7009668370d (patch)
treefe77643771a985a20c3782fb2a952cf9c05937a5
parentf3f854648de64c4b6f13f6f13113bc9525c621e5 (diff)
ocfs2_dlm: disallow a domain join if node maps mismatch
There is a small window where a joining node may not see the node(s) that just died but are still part of the domain. To fix this, we must disallow join requests if the joining node has a different node map. A new field node_map is added to dlm_query_join_request to send the current nodes nodemap along with join request. On the receiving end the nodes that are part of the cluster verifies if this new node sees all the nodes that are still part of the cluster. They disallow the join if the maps mismatch. Signed-off-by: Srinivas Eeda <srinivas.eeda@oracle.com> Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
-rw-r--r--fs/ocfs2/cluster/tcp_internal.h5
-rw-r--r--fs/ocfs2/dlm/dlmcommon.h4
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c54
3 files changed, 62 insertions, 1 deletions
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h
index d74040fac343..177927a8f007 100644
--- a/fs/ocfs2/cluster/tcp_internal.h
+++ b/fs/ocfs2/cluster/tcp_internal.h
@@ -38,6 +38,9 @@
38 * locking semantics of the file system using the protocol. It should 38 * locking semantics of the file system using the protocol. It should
39 * be somewhere else, I'm sure, but right now it isn't. 39 * be somewhere else, I'm sure, but right now it isn't.
40 * 40 *
41 * New in version 7:
42 * - DLM join domain includes the live nodemap
43 *
41 * New in version 6: 44 * New in version 6:
42 * - DLM lockres remote refcount fixes. 45 * - DLM lockres remote refcount fixes.
43 * 46 *
@@ -54,7 +57,7 @@
54 * - full 64 bit i_size in the metadata lock lvbs 57 * - full 64 bit i_size in the metadata lock lvbs
55 * - introduction of "rw" lock and pushing meta/data locking down 58 * - introduction of "rw" lock and pushing meta/data locking down
56 */ 59 */
57#define O2NET_PROTOCOL_VERSION 6ULL 60#define O2NET_PROTOCOL_VERSION 7ULL
58struct o2net_handshake { 61struct o2net_handshake {
59 __be64 protocol_version; 62 __be64 protocol_version;
60 __be64 connector_id; 63 __be64 connector_id;
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
index 2f4f5d4edb07..e90b92f9ece1 100644
--- a/fs/ocfs2/dlm/dlmcommon.h
+++ b/fs/ocfs2/dlm/dlmcommon.h
@@ -625,12 +625,16 @@ struct dlm_begin_reco
625}; 625};
626 626
627 627
628#define BITS_PER_BYTE 8
629#define BITS_TO_BYTES(bits) (((bits)+BITS_PER_BYTE-1)/BITS_PER_BYTE)
630
628struct dlm_query_join_request 631struct dlm_query_join_request
629{ 632{
630 u8 node_idx; 633 u8 node_idx;
631 u8 pad1[2]; 634 u8 pad1[2];
632 u8 name_len; 635 u8 name_len;
633 u8 domain[O2NM_MAX_NAME_LEN]; 636 u8 domain[O2NM_MAX_NAME_LEN];
637 u8 node_map[BITS_TO_BYTES(O2NM_MAX_NODES)];
634}; 638};
635 639
636struct dlm_assert_joined 640struct dlm_assert_joined
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 6590e1bca23c..19b57a6bcb1a 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -48,6 +48,36 @@
48#define MLOG_MASK_PREFIX (ML_DLM|ML_DLM_DOMAIN) 48#define MLOG_MASK_PREFIX (ML_DLM|ML_DLM_DOMAIN)
49#include "cluster/masklog.h" 49#include "cluster/masklog.h"
50 50
51/*
52 * ocfs2 node maps are array of long int, which limits to send them freely
53 * across the wire due to endianness issues. To workaround this, we convert
54 * long ints to byte arrays. Following 3 routines are helper functions to
55 * set/test/copy bits within those array of bytes
56 */
57static inline void byte_set_bit(u8 nr, u8 map[])
58{
59 map[nr >> 3] |= (1UL << (nr & 7));
60}
61
62static inline int byte_test_bit(u8 nr, u8 map[])
63{
64 return ((1UL << (nr & 7)) & (map[nr >> 3])) != 0;
65}
66
67static inline void byte_copymap(u8 dmap[], unsigned long smap[],
68 unsigned int sz)
69{
70 unsigned int nn;
71
72 if (!sz)
73 return;
74
75 memset(dmap, 0, ((sz + 7) >> 3));
76 for (nn = 0 ; nn < sz; nn++)
77 if (test_bit(nn, smap))
78 byte_set_bit(nn, dmap);
79}
80
51static void dlm_free_pagevec(void **vec, int pages) 81static void dlm_free_pagevec(void **vec, int pages)
52{ 82{
53 while (pages--) 83 while (pages--)
@@ -641,6 +671,7 @@ static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data,
641 struct dlm_query_join_request *query; 671 struct dlm_query_join_request *query;
642 enum dlm_query_join_response response; 672 enum dlm_query_join_response response;
643 struct dlm_ctxt *dlm = NULL; 673 struct dlm_ctxt *dlm = NULL;
674 u8 nodenum;
644 675
645 query = (struct dlm_query_join_request *) msg->buf; 676 query = (struct dlm_query_join_request *) msg->buf;
646 677
@@ -664,6 +695,25 @@ static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data,
664 695
665 spin_lock(&dlm_domain_lock); 696 spin_lock(&dlm_domain_lock);
666 dlm = __dlm_lookup_domain_full(query->domain, query->name_len); 697 dlm = __dlm_lookup_domain_full(query->domain, query->name_len);
698 if (!dlm)
699 goto unlock_respond;
700
701 /*
702 * There is a small window where the joining node may not see the
703 * node(s) that just left but still part of the cluster. DISALLOW
704 * join request if joining node has different node map.
705 */
706 nodenum=0;
707 while (nodenum < O2NM_MAX_NODES) {
708 if (test_bit(nodenum, dlm->domain_map)) {
709 if (!byte_test_bit(nodenum, query->node_map)) {
710 response = JOIN_DISALLOW;
711 goto unlock_respond;
712 }
713 }
714 nodenum++;
715 }
716
667 /* Once the dlm ctxt is marked as leaving then we don't want 717 /* Once the dlm ctxt is marked as leaving then we don't want
668 * to be put in someone's domain map. 718 * to be put in someone's domain map.
669 * Also, explicitly disallow joining at certain troublesome 719 * Also, explicitly disallow joining at certain troublesome
@@ -705,6 +755,7 @@ static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data,
705 755
706 spin_unlock(&dlm->spinlock); 756 spin_unlock(&dlm->spinlock);
707 } 757 }
758unlock_respond:
708 spin_unlock(&dlm_domain_lock); 759 spin_unlock(&dlm_domain_lock);
709 760
710respond: 761respond:
@@ -854,6 +905,9 @@ static int dlm_request_join(struct dlm_ctxt *dlm,
854 join_msg.name_len = strlen(dlm->name); 905 join_msg.name_len = strlen(dlm->name);
855 memcpy(join_msg.domain, dlm->name, join_msg.name_len); 906 memcpy(join_msg.domain, dlm->name, join_msg.name_len);
856 907
908 /* copy live node map to join message */
909 byte_copymap(join_msg.node_map, dlm->live_nodes_map, O2NM_MAX_NODES);
910
857 status = o2net_send_message(DLM_QUERY_JOIN_MSG, DLM_MOD_KEY, &join_msg, 911 status = o2net_send_message(DLM_QUERY_JOIN_MSG, DLM_MOD_KEY, &join_msg,
858 sizeof(join_msg), node, &retval); 912 sizeof(join_msg), node, &retval);
859 if (status < 0 && status != -ENOPROTOOPT) { 913 if (status < 0 && status != -ENOPROTOOPT) {