diff options
author | Srinivas Eeda <srinivas.eeda@oracle.com> | 2007-01-29 18:31:35 -0500 |
---|---|---|
committer | Mark Fasheh <mark.fasheh@oracle.com> | 2007-02-07 15:09:14 -0500 |
commit | 1faf289454b9eeb6e463da3eee47f7009668370d (patch) | |
tree | fe77643771a985a20c3782fb2a952cf9c05937a5 /fs/ocfs2/dlm/dlmdomain.c | |
parent | f3f854648de64c4b6f13f6f13113bc9525c621e5 (diff) |
ocfs2_dlm: disallow a domain join if node maps mismatch
There is a small window where a joining node may not see the node(s) that
just died but are still part of the domain. To fix this, we must disallow
join requests if the joining node has a different node map.
A new field node_map is added to dlm_query_join_request to send the current
nodes nodemap along with join request. On the receiving end the nodes that
are part of the cluster verifies if this new node sees all the nodes that
are still part of the cluster. They disallow the join if the maps mismatch.
Signed-off-by: Srinivas Eeda <srinivas.eeda@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
Diffstat (limited to 'fs/ocfs2/dlm/dlmdomain.c')
-rw-r--r-- | fs/ocfs2/dlm/dlmdomain.c | 54 |
1 files changed, 54 insertions, 0 deletions
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 6590e1bca23c..19b57a6bcb1a 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c | |||
@@ -48,6 +48,36 @@ | |||
48 | #define MLOG_MASK_PREFIX (ML_DLM|ML_DLM_DOMAIN) | 48 | #define MLOG_MASK_PREFIX (ML_DLM|ML_DLM_DOMAIN) |
49 | #include "cluster/masklog.h" | 49 | #include "cluster/masklog.h" |
50 | 50 | ||
51 | /* | ||
52 | * ocfs2 node maps are array of long int, which limits to send them freely | ||
53 | * across the wire due to endianness issues. To workaround this, we convert | ||
54 | * long ints to byte arrays. Following 3 routines are helper functions to | ||
55 | * set/test/copy bits within those array of bytes | ||
56 | */ | ||
57 | static inline void byte_set_bit(u8 nr, u8 map[]) | ||
58 | { | ||
59 | map[nr >> 3] |= (1UL << (nr & 7)); | ||
60 | } | ||
61 | |||
62 | static inline int byte_test_bit(u8 nr, u8 map[]) | ||
63 | { | ||
64 | return ((1UL << (nr & 7)) & (map[nr >> 3])) != 0; | ||
65 | } | ||
66 | |||
67 | static inline void byte_copymap(u8 dmap[], unsigned long smap[], | ||
68 | unsigned int sz) | ||
69 | { | ||
70 | unsigned int nn; | ||
71 | |||
72 | if (!sz) | ||
73 | return; | ||
74 | |||
75 | memset(dmap, 0, ((sz + 7) >> 3)); | ||
76 | for (nn = 0 ; nn < sz; nn++) | ||
77 | if (test_bit(nn, smap)) | ||
78 | byte_set_bit(nn, dmap); | ||
79 | } | ||
80 | |||
51 | static void dlm_free_pagevec(void **vec, int pages) | 81 | static void dlm_free_pagevec(void **vec, int pages) |
52 | { | 82 | { |
53 | while (pages--) | 83 | while (pages--) |
@@ -641,6 +671,7 @@ static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data, | |||
641 | struct dlm_query_join_request *query; | 671 | struct dlm_query_join_request *query; |
642 | enum dlm_query_join_response response; | 672 | enum dlm_query_join_response response; |
643 | struct dlm_ctxt *dlm = NULL; | 673 | struct dlm_ctxt *dlm = NULL; |
674 | u8 nodenum; | ||
644 | 675 | ||
645 | query = (struct dlm_query_join_request *) msg->buf; | 676 | query = (struct dlm_query_join_request *) msg->buf; |
646 | 677 | ||
@@ -664,6 +695,25 @@ static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data, | |||
664 | 695 | ||
665 | spin_lock(&dlm_domain_lock); | 696 | spin_lock(&dlm_domain_lock); |
666 | dlm = __dlm_lookup_domain_full(query->domain, query->name_len); | 697 | dlm = __dlm_lookup_domain_full(query->domain, query->name_len); |
698 | if (!dlm) | ||
699 | goto unlock_respond; | ||
700 | |||
701 | /* | ||
702 | * There is a small window where the joining node may not see the | ||
703 | * node(s) that just left but still part of the cluster. DISALLOW | ||
704 | * join request if joining node has different node map. | ||
705 | */ | ||
706 | nodenum=0; | ||
707 | while (nodenum < O2NM_MAX_NODES) { | ||
708 | if (test_bit(nodenum, dlm->domain_map)) { | ||
709 | if (!byte_test_bit(nodenum, query->node_map)) { | ||
710 | response = JOIN_DISALLOW; | ||
711 | goto unlock_respond; | ||
712 | } | ||
713 | } | ||
714 | nodenum++; | ||
715 | } | ||
716 | |||
667 | /* Once the dlm ctxt is marked as leaving then we don't want | 717 | /* Once the dlm ctxt is marked as leaving then we don't want |
668 | * to be put in someone's domain map. | 718 | * to be put in someone's domain map. |
669 | * Also, explicitly disallow joining at certain troublesome | 719 | * Also, explicitly disallow joining at certain troublesome |
@@ -705,6 +755,7 @@ static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data, | |||
705 | 755 | ||
706 | spin_unlock(&dlm->spinlock); | 756 | spin_unlock(&dlm->spinlock); |
707 | } | 757 | } |
758 | unlock_respond: | ||
708 | spin_unlock(&dlm_domain_lock); | 759 | spin_unlock(&dlm_domain_lock); |
709 | 760 | ||
710 | respond: | 761 | respond: |
@@ -854,6 +905,9 @@ static int dlm_request_join(struct dlm_ctxt *dlm, | |||
854 | join_msg.name_len = strlen(dlm->name); | 905 | join_msg.name_len = strlen(dlm->name); |
855 | memcpy(join_msg.domain, dlm->name, join_msg.name_len); | 906 | memcpy(join_msg.domain, dlm->name, join_msg.name_len); |
856 | 907 | ||
908 | /* copy live node map to join message */ | ||
909 | byte_copymap(join_msg.node_map, dlm->live_nodes_map, O2NM_MAX_NODES); | ||
910 | |||
857 | status = o2net_send_message(DLM_QUERY_JOIN_MSG, DLM_MOD_KEY, &join_msg, | 911 | status = o2net_send_message(DLM_QUERY_JOIN_MSG, DLM_MOD_KEY, &join_msg, |
858 | sizeof(join_msg), node, &retval); | 912 | sizeof(join_msg), node, &retval); |
859 | if (status < 0 && status != -ENOPROTOOPT) { | 913 | if (status < 0 && status != -ENOPROTOOPT) { |