aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/ocfs2/cluster/tcp_internal.h5
-rw-r--r--fs/ocfs2/dlm/dlmcommon.h4
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c54
3 files changed, 62 insertions, 1 deletions
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h
index d74040fac343..177927a8f007 100644
--- a/fs/ocfs2/cluster/tcp_internal.h
+++ b/fs/ocfs2/cluster/tcp_internal.h
@@ -38,6 +38,9 @@
38 * locking semantics of the file system using the protocol. It should 38 * locking semantics of the file system using the protocol. It should
39 * be somewhere else, I'm sure, but right now it isn't. 39 * be somewhere else, I'm sure, but right now it isn't.
40 * 40 *
41 * New in version 7:
42 * - DLM join domain includes the live nodemap
43 *
41 * New in version 6: 44 * New in version 6:
42 * - DLM lockres remote refcount fixes. 45 * - DLM lockres remote refcount fixes.
43 * 46 *
@@ -54,7 +57,7 @@
54 * - full 64 bit i_size in the metadata lock lvbs 57 * - full 64 bit i_size in the metadata lock lvbs
55 * - introduction of "rw" lock and pushing meta/data locking down 58 * - introduction of "rw" lock and pushing meta/data locking down
56 */ 59 */
57#define O2NET_PROTOCOL_VERSION 6ULL 60#define O2NET_PROTOCOL_VERSION 7ULL
58struct o2net_handshake { 61struct o2net_handshake {
59 __be64 protocol_version; 62 __be64 protocol_version;
60 __be64 connector_id; 63 __be64 connector_id;
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
index 2f4f5d4edb07..e90b92f9ece1 100644
--- a/fs/ocfs2/dlm/dlmcommon.h
+++ b/fs/ocfs2/dlm/dlmcommon.h
@@ -625,12 +625,16 @@ struct dlm_begin_reco
625}; 625};
626 626
627 627
628#define BITS_PER_BYTE 8
629#define BITS_TO_BYTES(bits) (((bits)+BITS_PER_BYTE-1)/BITS_PER_BYTE)
630
628struct dlm_query_join_request 631struct dlm_query_join_request
629{ 632{
630 u8 node_idx; 633 u8 node_idx;
631 u8 pad1[2]; 634 u8 pad1[2];
632 u8 name_len; 635 u8 name_len;
633 u8 domain[O2NM_MAX_NAME_LEN]; 636 u8 domain[O2NM_MAX_NAME_LEN];
637 u8 node_map[BITS_TO_BYTES(O2NM_MAX_NODES)];
634}; 638};
635 639
636struct dlm_assert_joined 640struct dlm_assert_joined
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 6590e1bca23c..19b57a6bcb1a 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -48,6 +48,36 @@
48#define MLOG_MASK_PREFIX (ML_DLM|ML_DLM_DOMAIN) 48#define MLOG_MASK_PREFIX (ML_DLM|ML_DLM_DOMAIN)
49#include "cluster/masklog.h" 49#include "cluster/masklog.h"
50 50
51/*
52 * ocfs2 node maps are array of long int, which limits to send them freely
53 * across the wire due to endianness issues. To workaround this, we convert
54 * long ints to byte arrays. Following 3 routines are helper functions to
55 * set/test/copy bits within those array of bytes
56 */
57static inline void byte_set_bit(u8 nr, u8 map[])
58{
59 map[nr >> 3] |= (1UL << (nr & 7));
60}
61
62static inline int byte_test_bit(u8 nr, u8 map[])
63{
64 return ((1UL << (nr & 7)) & (map[nr >> 3])) != 0;
65}
66
67static inline void byte_copymap(u8 dmap[], unsigned long smap[],
68 unsigned int sz)
69{
70 unsigned int nn;
71
72 if (!sz)
73 return;
74
75 memset(dmap, 0, ((sz + 7) >> 3));
76 for (nn = 0 ; nn < sz; nn++)
77 if (test_bit(nn, smap))
78 byte_set_bit(nn, dmap);
79}
80
51static void dlm_free_pagevec(void **vec, int pages) 81static void dlm_free_pagevec(void **vec, int pages)
52{ 82{
53 while (pages--) 83 while (pages--)
@@ -641,6 +671,7 @@ static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data,
641 struct dlm_query_join_request *query; 671 struct dlm_query_join_request *query;
642 enum dlm_query_join_response response; 672 enum dlm_query_join_response response;
643 struct dlm_ctxt *dlm = NULL; 673 struct dlm_ctxt *dlm = NULL;
674 u8 nodenum;
644 675
645 query = (struct dlm_query_join_request *) msg->buf; 676 query = (struct dlm_query_join_request *) msg->buf;
646 677
@@ -664,6 +695,25 @@ static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data,
664 695
665 spin_lock(&dlm_domain_lock); 696 spin_lock(&dlm_domain_lock);
666 dlm = __dlm_lookup_domain_full(query->domain, query->name_len); 697 dlm = __dlm_lookup_domain_full(query->domain, query->name_len);
698 if (!dlm)
699 goto unlock_respond;
700
701 /*
702 * There is a small window where the joining node may not see the
703 * node(s) that just left but still part of the cluster. DISALLOW
704 * join request if joining node has different node map.
705 */
706 nodenum=0;
707 while (nodenum < O2NM_MAX_NODES) {
708 if (test_bit(nodenum, dlm->domain_map)) {
709 if (!byte_test_bit(nodenum, query->node_map)) {
710 response = JOIN_DISALLOW;
711 goto unlock_respond;
712 }
713 }
714 nodenum++;
715 }
716
667 /* Once the dlm ctxt is marked as leaving then we don't want 717 /* Once the dlm ctxt is marked as leaving then we don't want
668 * to be put in someone's domain map. 718 * to be put in someone's domain map.
669 * Also, explicitly disallow joining at certain troublesome 719 * Also, explicitly disallow joining at certain troublesome
@@ -705,6 +755,7 @@ static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data,
705 755
706 spin_unlock(&dlm->spinlock); 756 spin_unlock(&dlm->spinlock);
707 } 757 }
758unlock_respond:
708 spin_unlock(&dlm_domain_lock); 759 spin_unlock(&dlm_domain_lock);
709 760
710respond: 761respond:
@@ -854,6 +905,9 @@ static int dlm_request_join(struct dlm_ctxt *dlm,
854 join_msg.name_len = strlen(dlm->name); 905 join_msg.name_len = strlen(dlm->name);
855 memcpy(join_msg.domain, dlm->name, join_msg.name_len); 906 memcpy(join_msg.domain, dlm->name, join_msg.name_len);
856 907
908 /* copy live node map to join message */
909 byte_copymap(join_msg.node_map, dlm->live_nodes_map, O2NM_MAX_NODES);
910
857 status = o2net_send_message(DLM_QUERY_JOIN_MSG, DLM_MOD_KEY, &join_msg, 911 status = o2net_send_message(DLM_QUERY_JOIN_MSG, DLM_MOD_KEY, &join_msg,
858 sizeof(join_msg), node, &retval); 912 sizeof(join_msg), node, &retval);
859 if (status < 0 && status != -ENOPROTOOPT) { 913 if (status < 0 && status != -ENOPROTOOPT) {