diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-03-27 17:48:07 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-03-27 17:48:07 -0400 |
commit | 805de022b100bcf796860fe88d7db4164066d1c3 (patch) | |
tree | 79002a4947a0df8d82ea5f75fac8c6d958848877 | |
parent | 7c757eb9f804782fb39d0ae2c1a88ffb9309138e (diff) | |
parent | 1fecb1c4b62881e3689ba2dcf93072ae301b597c (diff) |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/teigland/dlm
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/teigland/dlm:
dlm: fix length calculation in compat code
dlm: ignore cancel on granted lock
dlm: clear defunct cancel state
dlm: replace idr with hash table for connections
dlm: comment typo fixes
dlm: use ipv6_addr_copy
dlm: Change rwlock which is only used in write mode to a spinlock
-rw-r--r-- | fs/dlm/dir.c | 18 | ||||
-rw-r--r-- | fs/dlm/dlm_internal.h | 2 | ||||
-rw-r--r-- | fs/dlm/lock.c | 60 | ||||
-rw-r--r-- | fs/dlm/lockspace.c | 2 | ||||
-rw-r--r-- | fs/dlm/lowcomms.c | 181 | ||||
-rw-r--r-- | fs/dlm/user.c | 24 |
6 files changed, 175 insertions, 112 deletions
diff --git a/fs/dlm/dir.c b/fs/dlm/dir.c index 92969f879a17..858fba14aaa6 100644 --- a/fs/dlm/dir.c +++ b/fs/dlm/dir.c | |||
@@ -156,7 +156,7 @@ void dlm_dir_remove_entry(struct dlm_ls *ls, int nodeid, char *name, int namelen | |||
156 | 156 | ||
157 | bucket = dir_hash(ls, name, namelen); | 157 | bucket = dir_hash(ls, name, namelen); |
158 | 158 | ||
159 | write_lock(&ls->ls_dirtbl[bucket].lock); | 159 | spin_lock(&ls->ls_dirtbl[bucket].lock); |
160 | 160 | ||
161 | de = search_bucket(ls, name, namelen, bucket); | 161 | de = search_bucket(ls, name, namelen, bucket); |
162 | 162 | ||
@@ -173,7 +173,7 @@ void dlm_dir_remove_entry(struct dlm_ls *ls, int nodeid, char *name, int namelen | |||
173 | list_del(&de->list); | 173 | list_del(&de->list); |
174 | kfree(de); | 174 | kfree(de); |
175 | out: | 175 | out: |
176 | write_unlock(&ls->ls_dirtbl[bucket].lock); | 176 | spin_unlock(&ls->ls_dirtbl[bucket].lock); |
177 | } | 177 | } |
178 | 178 | ||
179 | void dlm_dir_clear(struct dlm_ls *ls) | 179 | void dlm_dir_clear(struct dlm_ls *ls) |
@@ -185,14 +185,14 @@ void dlm_dir_clear(struct dlm_ls *ls) | |||
185 | DLM_ASSERT(list_empty(&ls->ls_recover_list), ); | 185 | DLM_ASSERT(list_empty(&ls->ls_recover_list), ); |
186 | 186 | ||
187 | for (i = 0; i < ls->ls_dirtbl_size; i++) { | 187 | for (i = 0; i < ls->ls_dirtbl_size; i++) { |
188 | write_lock(&ls->ls_dirtbl[i].lock); | 188 | spin_lock(&ls->ls_dirtbl[i].lock); |
189 | head = &ls->ls_dirtbl[i].list; | 189 | head = &ls->ls_dirtbl[i].list; |
190 | while (!list_empty(head)) { | 190 | while (!list_empty(head)) { |
191 | de = list_entry(head->next, struct dlm_direntry, list); | 191 | de = list_entry(head->next, struct dlm_direntry, list); |
192 | list_del(&de->list); | 192 | list_del(&de->list); |
193 | put_free_de(ls, de); | 193 | put_free_de(ls, de); |
194 | } | 194 | } |
195 | write_unlock(&ls->ls_dirtbl[i].lock); | 195 | spin_unlock(&ls->ls_dirtbl[i].lock); |
196 | } | 196 | } |
197 | } | 197 | } |
198 | 198 | ||
@@ -307,17 +307,17 @@ static int get_entry(struct dlm_ls *ls, int nodeid, char *name, | |||
307 | 307 | ||
308 | bucket = dir_hash(ls, name, namelen); | 308 | bucket = dir_hash(ls, name, namelen); |
309 | 309 | ||
310 | write_lock(&ls->ls_dirtbl[bucket].lock); | 310 | spin_lock(&ls->ls_dirtbl[bucket].lock); |
311 | de = search_bucket(ls, name, namelen, bucket); | 311 | de = search_bucket(ls, name, namelen, bucket); |
312 | if (de) { | 312 | if (de) { |
313 | *r_nodeid = de->master_nodeid; | 313 | *r_nodeid = de->master_nodeid; |
314 | write_unlock(&ls->ls_dirtbl[bucket].lock); | 314 | spin_unlock(&ls->ls_dirtbl[bucket].lock); |
315 | if (*r_nodeid == nodeid) | 315 | if (*r_nodeid == nodeid) |
316 | return -EEXIST; | 316 | return -EEXIST; |
317 | return 0; | 317 | return 0; |
318 | } | 318 | } |
319 | 319 | ||
320 | write_unlock(&ls->ls_dirtbl[bucket].lock); | 320 | spin_unlock(&ls->ls_dirtbl[bucket].lock); |
321 | 321 | ||
322 | if (namelen > DLM_RESNAME_MAXLEN) | 322 | if (namelen > DLM_RESNAME_MAXLEN) |
323 | return -EINVAL; | 323 | return -EINVAL; |
@@ -330,7 +330,7 @@ static int get_entry(struct dlm_ls *ls, int nodeid, char *name, | |||
330 | de->length = namelen; | 330 | de->length = namelen; |
331 | memcpy(de->name, name, namelen); | 331 | memcpy(de->name, name, namelen); |
332 | 332 | ||
333 | write_lock(&ls->ls_dirtbl[bucket].lock); | 333 | spin_lock(&ls->ls_dirtbl[bucket].lock); |
334 | tmp = search_bucket(ls, name, namelen, bucket); | 334 | tmp = search_bucket(ls, name, namelen, bucket); |
335 | if (tmp) { | 335 | if (tmp) { |
336 | kfree(de); | 336 | kfree(de); |
@@ -339,7 +339,7 @@ static int get_entry(struct dlm_ls *ls, int nodeid, char *name, | |||
339 | list_add_tail(&de->list, &ls->ls_dirtbl[bucket].list); | 339 | list_add_tail(&de->list, &ls->ls_dirtbl[bucket].list); |
340 | } | 340 | } |
341 | *r_nodeid = de->master_nodeid; | 341 | *r_nodeid = de->master_nodeid; |
342 | write_unlock(&ls->ls_dirtbl[bucket].lock); | 342 | spin_unlock(&ls->ls_dirtbl[bucket].lock); |
343 | return 0; | 343 | return 0; |
344 | } | 344 | } |
345 | 345 | ||
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h index 076e86f38bc8..d01ca0a711db 100644 --- a/fs/dlm/dlm_internal.h +++ b/fs/dlm/dlm_internal.h | |||
@@ -99,7 +99,7 @@ struct dlm_direntry { | |||
99 | 99 | ||
100 | struct dlm_dirtable { | 100 | struct dlm_dirtable { |
101 | struct list_head list; | 101 | struct list_head list; |
102 | rwlock_t lock; | 102 | spinlock_t lock; |
103 | }; | 103 | }; |
104 | 104 | ||
105 | struct dlm_rsbtable { | 105 | struct dlm_rsbtable { |
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index 01e7d39c5fba..205ec95b347e 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c | |||
@@ -835,7 +835,7 @@ static int add_to_waiters(struct dlm_lkb *lkb, int mstype) | |||
835 | lkb->lkb_wait_count++; | 835 | lkb->lkb_wait_count++; |
836 | hold_lkb(lkb); | 836 | hold_lkb(lkb); |
837 | 837 | ||
838 | log_debug(ls, "add overlap %x cur %d new %d count %d flags %x", | 838 | log_debug(ls, "addwait %x cur %d overlap %d count %d f %x", |
839 | lkb->lkb_id, lkb->lkb_wait_type, mstype, | 839 | lkb->lkb_id, lkb->lkb_wait_type, mstype, |
840 | lkb->lkb_wait_count, lkb->lkb_flags); | 840 | lkb->lkb_wait_count, lkb->lkb_flags); |
841 | goto out; | 841 | goto out; |
@@ -851,7 +851,7 @@ static int add_to_waiters(struct dlm_lkb *lkb, int mstype) | |||
851 | list_add(&lkb->lkb_wait_reply, &ls->ls_waiters); | 851 | list_add(&lkb->lkb_wait_reply, &ls->ls_waiters); |
852 | out: | 852 | out: |
853 | if (error) | 853 | if (error) |
854 | log_error(ls, "add_to_waiters %x error %d flags %x %d %d %s", | 854 | log_error(ls, "addwait error %x %d flags %x %d %d %s", |
855 | lkb->lkb_id, error, lkb->lkb_flags, mstype, | 855 | lkb->lkb_id, error, lkb->lkb_flags, mstype, |
856 | lkb->lkb_wait_type, lkb->lkb_resource->res_name); | 856 | lkb->lkb_wait_type, lkb->lkb_resource->res_name); |
857 | mutex_unlock(&ls->ls_waiters_mutex); | 857 | mutex_unlock(&ls->ls_waiters_mutex); |
@@ -863,23 +863,55 @@ static int add_to_waiters(struct dlm_lkb *lkb, int mstype) | |||
863 | request reply on the requestqueue) between dlm_recover_waiters_pre() which | 863 | request reply on the requestqueue) between dlm_recover_waiters_pre() which |
864 | set RESEND and dlm_recover_waiters_post() */ | 864 | set RESEND and dlm_recover_waiters_post() */ |
865 | 865 | ||
866 | static int _remove_from_waiters(struct dlm_lkb *lkb, int mstype) | 866 | static int _remove_from_waiters(struct dlm_lkb *lkb, int mstype, |
867 | struct dlm_message *ms) | ||
867 | { | 868 | { |
868 | struct dlm_ls *ls = lkb->lkb_resource->res_ls; | 869 | struct dlm_ls *ls = lkb->lkb_resource->res_ls; |
869 | int overlap_done = 0; | 870 | int overlap_done = 0; |
870 | 871 | ||
871 | if (is_overlap_unlock(lkb) && (mstype == DLM_MSG_UNLOCK_REPLY)) { | 872 | if (is_overlap_unlock(lkb) && (mstype == DLM_MSG_UNLOCK_REPLY)) { |
873 | log_debug(ls, "remwait %x unlock_reply overlap", lkb->lkb_id); | ||
872 | lkb->lkb_flags &= ~DLM_IFL_OVERLAP_UNLOCK; | 874 | lkb->lkb_flags &= ~DLM_IFL_OVERLAP_UNLOCK; |
873 | overlap_done = 1; | 875 | overlap_done = 1; |
874 | goto out_del; | 876 | goto out_del; |
875 | } | 877 | } |
876 | 878 | ||
877 | if (is_overlap_cancel(lkb) && (mstype == DLM_MSG_CANCEL_REPLY)) { | 879 | if (is_overlap_cancel(lkb) && (mstype == DLM_MSG_CANCEL_REPLY)) { |
880 | log_debug(ls, "remwait %x cancel_reply overlap", lkb->lkb_id); | ||
878 | lkb->lkb_flags &= ~DLM_IFL_OVERLAP_CANCEL; | 881 | lkb->lkb_flags &= ~DLM_IFL_OVERLAP_CANCEL; |
879 | overlap_done = 1; | 882 | overlap_done = 1; |
880 | goto out_del; | 883 | goto out_del; |
881 | } | 884 | } |
882 | 885 | ||
886 | /* Cancel state was preemptively cleared by a successful convert, | ||
887 | see next comment, nothing to do. */ | ||
888 | |||
889 | if ((mstype == DLM_MSG_CANCEL_REPLY) && | ||
890 | (lkb->lkb_wait_type != DLM_MSG_CANCEL)) { | ||
891 | log_debug(ls, "remwait %x cancel_reply wait_type %d", | ||
892 | lkb->lkb_id, lkb->lkb_wait_type); | ||
893 | return -1; | ||
894 | } | ||
895 | |||
896 | /* Remove for the convert reply, and premptively remove for the | ||
897 | cancel reply. A convert has been granted while there's still | ||
898 | an outstanding cancel on it (the cancel is moot and the result | ||
899 | in the cancel reply should be 0). We preempt the cancel reply | ||
900 | because the app gets the convert result and then can follow up | ||
901 | with another op, like convert. This subsequent op would see the | ||
902 | lingering state of the cancel and fail with -EBUSY. */ | ||
903 | |||
904 | if ((mstype == DLM_MSG_CONVERT_REPLY) && | ||
905 | (lkb->lkb_wait_type == DLM_MSG_CONVERT) && | ||
906 | is_overlap_cancel(lkb) && ms && !ms->m_result) { | ||
907 | log_debug(ls, "remwait %x convert_reply zap overlap_cancel", | ||
908 | lkb->lkb_id); | ||
909 | lkb->lkb_wait_type = 0; | ||
910 | lkb->lkb_flags &= ~DLM_IFL_OVERLAP_CANCEL; | ||
911 | lkb->lkb_wait_count--; | ||
912 | goto out_del; | ||
913 | } | ||
914 | |||
883 | /* N.B. type of reply may not always correspond to type of original | 915 | /* N.B. type of reply may not always correspond to type of original |
884 | msg due to lookup->request optimization, verify others? */ | 916 | msg due to lookup->request optimization, verify others? */ |
885 | 917 | ||
@@ -888,8 +920,8 @@ static int _remove_from_waiters(struct dlm_lkb *lkb, int mstype) | |||
888 | goto out_del; | 920 | goto out_del; |
889 | } | 921 | } |
890 | 922 | ||
891 | log_error(ls, "remove_from_waiters lkid %x flags %x types %d %d", | 923 | log_error(ls, "remwait error %x reply %d flags %x no wait_type", |
892 | lkb->lkb_id, lkb->lkb_flags, mstype, lkb->lkb_wait_type); | 924 | lkb->lkb_id, mstype, lkb->lkb_flags); |
893 | return -1; | 925 | return -1; |
894 | 926 | ||
895 | out_del: | 927 | out_del: |
@@ -899,7 +931,7 @@ static int _remove_from_waiters(struct dlm_lkb *lkb, int mstype) | |||
899 | this would happen */ | 931 | this would happen */ |
900 | 932 | ||
901 | if (overlap_done && lkb->lkb_wait_type) { | 933 | if (overlap_done && lkb->lkb_wait_type) { |
902 | log_error(ls, "remove_from_waiters %x reply %d give up on %d", | 934 | log_error(ls, "remwait error %x reply %d wait_type %d overlap", |
903 | lkb->lkb_id, mstype, lkb->lkb_wait_type); | 935 | lkb->lkb_id, mstype, lkb->lkb_wait_type); |
904 | lkb->lkb_wait_count--; | 936 | lkb->lkb_wait_count--; |
905 | lkb->lkb_wait_type = 0; | 937 | lkb->lkb_wait_type = 0; |
@@ -921,7 +953,7 @@ static int remove_from_waiters(struct dlm_lkb *lkb, int mstype) | |||
921 | int error; | 953 | int error; |
922 | 954 | ||
923 | mutex_lock(&ls->ls_waiters_mutex); | 955 | mutex_lock(&ls->ls_waiters_mutex); |
924 | error = _remove_from_waiters(lkb, mstype); | 956 | error = _remove_from_waiters(lkb, mstype, NULL); |
925 | mutex_unlock(&ls->ls_waiters_mutex); | 957 | mutex_unlock(&ls->ls_waiters_mutex); |
926 | return error; | 958 | return error; |
927 | } | 959 | } |
@@ -936,7 +968,7 @@ static int remove_from_waiters_ms(struct dlm_lkb *lkb, struct dlm_message *ms) | |||
936 | 968 | ||
937 | if (ms != &ls->ls_stub_ms) | 969 | if (ms != &ls->ls_stub_ms) |
938 | mutex_lock(&ls->ls_waiters_mutex); | 970 | mutex_lock(&ls->ls_waiters_mutex); |
939 | error = _remove_from_waiters(lkb, ms->m_type); | 971 | error = _remove_from_waiters(lkb, ms->m_type, ms); |
940 | if (ms != &ls->ls_stub_ms) | 972 | if (ms != &ls->ls_stub_ms) |
941 | mutex_unlock(&ls->ls_waiters_mutex); | 973 | mutex_unlock(&ls->ls_waiters_mutex); |
942 | return error; | 974 | return error; |
@@ -2083,6 +2115,11 @@ static int validate_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb, | |||
2083 | lkb->lkb_timeout_cs = args->timeout; | 2115 | lkb->lkb_timeout_cs = args->timeout; |
2084 | rv = 0; | 2116 | rv = 0; |
2085 | out: | 2117 | out: |
2118 | if (rv) | ||
2119 | log_debug(ls, "validate_lock_args %d %x %x %x %d %d %s", | ||
2120 | rv, lkb->lkb_id, lkb->lkb_flags, args->flags, | ||
2121 | lkb->lkb_status, lkb->lkb_wait_type, | ||
2122 | lkb->lkb_resource->res_name); | ||
2086 | return rv; | 2123 | return rv; |
2087 | } | 2124 | } |
2088 | 2125 | ||
@@ -2149,6 +2186,13 @@ static int validate_unlock_args(struct dlm_lkb *lkb, struct dlm_args *args) | |||
2149 | goto out; | 2186 | goto out; |
2150 | } | 2187 | } |
2151 | 2188 | ||
2189 | /* there's nothing to cancel */ | ||
2190 | if (lkb->lkb_status == DLM_LKSTS_GRANTED && | ||
2191 | !lkb->lkb_wait_type) { | ||
2192 | rv = -EBUSY; | ||
2193 | goto out; | ||
2194 | } | ||
2195 | |||
2152 | switch (lkb->lkb_wait_type) { | 2196 | switch (lkb->lkb_wait_type) { |
2153 | case DLM_MSG_LOOKUP: | 2197 | case DLM_MSG_LOOKUP: |
2154 | case DLM_MSG_REQUEST: | 2198 | case DLM_MSG_REQUEST: |
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c index aa32e5f02493..cd8e2df3c295 100644 --- a/fs/dlm/lockspace.c +++ b/fs/dlm/lockspace.c | |||
@@ -487,7 +487,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace, | |||
487 | goto out_lkbfree; | 487 | goto out_lkbfree; |
488 | for (i = 0; i < size; i++) { | 488 | for (i = 0; i < size; i++) { |
489 | INIT_LIST_HEAD(&ls->ls_dirtbl[i].list); | 489 | INIT_LIST_HEAD(&ls->ls_dirtbl[i].list); |
490 | rwlock_init(&ls->ls_dirtbl[i].lock); | 490 | spin_lock_init(&ls->ls_dirtbl[i].lock); |
491 | } | 491 | } |
492 | 492 | ||
493 | INIT_LIST_HEAD(&ls->ls_waiters); | 493 | INIT_LIST_HEAD(&ls->ls_waiters); |
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 103a5ebd1371..609108a83267 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c | |||
@@ -2,7 +2,7 @@ | |||
2 | ******************************************************************************* | 2 | ******************************************************************************* |
3 | ** | 3 | ** |
4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | 4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. |
5 | ** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. | 5 | ** Copyright (C) 2004-2009 Red Hat, Inc. All rights reserved. |
6 | ** | 6 | ** |
7 | ** This copyrighted material is made available to anyone wishing to use, | 7 | ** This copyrighted material is made available to anyone wishing to use, |
8 | ** modify, copy, or redistribute it subject to the terms and conditions | 8 | ** modify, copy, or redistribute it subject to the terms and conditions |
@@ -21,7 +21,7 @@ | |||
21 | * | 21 | * |
22 | * Cluster nodes are referred to by their nodeids. nodeids are | 22 | * Cluster nodes are referred to by their nodeids. nodeids are |
23 | * simply 32 bit numbers to the locking module - if they need to | 23 | * simply 32 bit numbers to the locking module - if they need to |
24 | * be expanded for the cluster infrastructure then that is it's | 24 | * be expanded for the cluster infrastructure then that is its |
25 | * responsibility. It is this layer's | 25 | * responsibility. It is this layer's |
26 | * responsibility to resolve these into IP address or | 26 | * responsibility to resolve these into IP address or |
27 | * whatever it needs for inter-node communication. | 27 | * whatever it needs for inter-node communication. |
@@ -36,9 +36,9 @@ | |||
36 | * of high load. Also, this way, the sending thread can collect together | 36 | * of high load. Also, this way, the sending thread can collect together |
37 | * messages bound for one node and send them in one block. | 37 | * messages bound for one node and send them in one block. |
38 | * | 38 | * |
39 | * lowcomms will choose to use wither TCP or SCTP as its transport layer | 39 | * lowcomms will choose to use either TCP or SCTP as its transport layer |
40 | * depending on the configuration variable 'protocol'. This should be set | 40 | * depending on the configuration variable 'protocol'. This should be set |
41 | * to 0 (default) for TCP or 1 for SCTP. It shouldbe configured using a | 41 | * to 0 (default) for TCP or 1 for SCTP. It should be configured using a |
42 | * cluster-wide mechanism as it must be the same on all nodes of the cluster | 42 | * cluster-wide mechanism as it must be the same on all nodes of the cluster |
43 | * for the DLM to function. | 43 | * for the DLM to function. |
44 | * | 44 | * |
@@ -48,11 +48,11 @@ | |||
48 | #include <net/sock.h> | 48 | #include <net/sock.h> |
49 | #include <net/tcp.h> | 49 | #include <net/tcp.h> |
50 | #include <linux/pagemap.h> | 50 | #include <linux/pagemap.h> |
51 | #include <linux/idr.h> | ||
52 | #include <linux/file.h> | 51 | #include <linux/file.h> |
53 | #include <linux/mutex.h> | 52 | #include <linux/mutex.h> |
54 | #include <linux/sctp.h> | 53 | #include <linux/sctp.h> |
55 | #include <net/sctp/user.h> | 54 | #include <net/sctp/user.h> |
55 | #include <net/ipv6.h> | ||
56 | 56 | ||
57 | #include "dlm_internal.h" | 57 | #include "dlm_internal.h" |
58 | #include "lowcomms.h" | 58 | #include "lowcomms.h" |
@@ -60,6 +60,7 @@ | |||
60 | #include "config.h" | 60 | #include "config.h" |
61 | 61 | ||
62 | #define NEEDED_RMEM (4*1024*1024) | 62 | #define NEEDED_RMEM (4*1024*1024) |
63 | #define CONN_HASH_SIZE 32 | ||
63 | 64 | ||
64 | struct cbuf { | 65 | struct cbuf { |
65 | unsigned int base; | 66 | unsigned int base; |
@@ -114,6 +115,7 @@ struct connection { | |||
114 | int retries; | 115 | int retries; |
115 | #define MAX_CONNECT_RETRIES 3 | 116 | #define MAX_CONNECT_RETRIES 3 |
116 | int sctp_assoc; | 117 | int sctp_assoc; |
118 | struct hlist_node list; | ||
117 | struct connection *othercon; | 119 | struct connection *othercon; |
118 | struct work_struct rwork; /* Receive workqueue */ | 120 | struct work_struct rwork; /* Receive workqueue */ |
119 | struct work_struct swork; /* Send workqueue */ | 121 | struct work_struct swork; /* Send workqueue */ |
@@ -138,14 +140,37 @@ static int dlm_local_count; | |||
138 | static struct workqueue_struct *recv_workqueue; | 140 | static struct workqueue_struct *recv_workqueue; |
139 | static struct workqueue_struct *send_workqueue; | 141 | static struct workqueue_struct *send_workqueue; |
140 | 142 | ||
141 | static DEFINE_IDR(connections_idr); | 143 | static struct hlist_head connection_hash[CONN_HASH_SIZE]; |
142 | static DEFINE_MUTEX(connections_lock); | 144 | static DEFINE_MUTEX(connections_lock); |
143 | static int max_nodeid; | ||
144 | static struct kmem_cache *con_cache; | 145 | static struct kmem_cache *con_cache; |
145 | 146 | ||
146 | static void process_recv_sockets(struct work_struct *work); | 147 | static void process_recv_sockets(struct work_struct *work); |
147 | static void process_send_sockets(struct work_struct *work); | 148 | static void process_send_sockets(struct work_struct *work); |
148 | 149 | ||
150 | |||
151 | /* This is deliberately very simple because most clusters have simple | ||
152 | sequential nodeids, so we should be able to go straight to a connection | ||
153 | struct in the array */ | ||
154 | static inline int nodeid_hash(int nodeid) | ||
155 | { | ||
156 | return nodeid & (CONN_HASH_SIZE-1); | ||
157 | } | ||
158 | |||
159 | static struct connection *__find_con(int nodeid) | ||
160 | { | ||
161 | int r; | ||
162 | struct hlist_node *h; | ||
163 | struct connection *con; | ||
164 | |||
165 | r = nodeid_hash(nodeid); | ||
166 | |||
167 | hlist_for_each_entry(con, h, &connection_hash[r], list) { | ||
168 | if (con->nodeid == nodeid) | ||
169 | return con; | ||
170 | } | ||
171 | return NULL; | ||
172 | } | ||
173 | |||
149 | /* | 174 | /* |
150 | * If 'allocation' is zero then we don't attempt to create a new | 175 | * If 'allocation' is zero then we don't attempt to create a new |
151 | * connection structure for this node. | 176 | * connection structure for this node. |
@@ -154,31 +179,17 @@ static struct connection *__nodeid2con(int nodeid, gfp_t alloc) | |||
154 | { | 179 | { |
155 | struct connection *con = NULL; | 180 | struct connection *con = NULL; |
156 | int r; | 181 | int r; |
157 | int n; | ||
158 | 182 | ||
159 | con = idr_find(&connections_idr, nodeid); | 183 | con = __find_con(nodeid); |
160 | if (con || !alloc) | 184 | if (con || !alloc) |
161 | return con; | 185 | return con; |
162 | 186 | ||
163 | r = idr_pre_get(&connections_idr, alloc); | ||
164 | if (!r) | ||
165 | return NULL; | ||
166 | |||
167 | con = kmem_cache_zalloc(con_cache, alloc); | 187 | con = kmem_cache_zalloc(con_cache, alloc); |
168 | if (!con) | 188 | if (!con) |
169 | return NULL; | 189 | return NULL; |
170 | 190 | ||
171 | r = idr_get_new_above(&connections_idr, con, nodeid, &n); | 191 | r = nodeid_hash(nodeid); |
172 | if (r) { | 192 | hlist_add_head(&con->list, &connection_hash[r]); |
173 | kmem_cache_free(con_cache, con); | ||
174 | return NULL; | ||
175 | } | ||
176 | |||
177 | if (n != nodeid) { | ||
178 | idr_remove(&connections_idr, n); | ||
179 | kmem_cache_free(con_cache, con); | ||
180 | return NULL; | ||
181 | } | ||
182 | 193 | ||
183 | con->nodeid = nodeid; | 194 | con->nodeid = nodeid; |
184 | mutex_init(&con->sock_mutex); | 195 | mutex_init(&con->sock_mutex); |
@@ -189,19 +200,30 @@ static struct connection *__nodeid2con(int nodeid, gfp_t alloc) | |||
189 | 200 | ||
190 | /* Setup action pointers for child sockets */ | 201 | /* Setup action pointers for child sockets */ |
191 | if (con->nodeid) { | 202 | if (con->nodeid) { |
192 | struct connection *zerocon = idr_find(&connections_idr, 0); | 203 | struct connection *zerocon = __find_con(0); |
193 | 204 | ||
194 | con->connect_action = zerocon->connect_action; | 205 | con->connect_action = zerocon->connect_action; |
195 | if (!con->rx_action) | 206 | if (!con->rx_action) |
196 | con->rx_action = zerocon->rx_action; | 207 | con->rx_action = zerocon->rx_action; |
197 | } | 208 | } |
198 | 209 | ||
199 | if (nodeid > max_nodeid) | ||
200 | max_nodeid = nodeid; | ||
201 | |||
202 | return con; | 210 | return con; |
203 | } | 211 | } |
204 | 212 | ||
213 | /* Loop round all connections */ | ||
214 | static void foreach_conn(void (*conn_func)(struct connection *c)) | ||
215 | { | ||
216 | int i; | ||
217 | struct hlist_node *h, *n; | ||
218 | struct connection *con; | ||
219 | |||
220 | for (i = 0; i < CONN_HASH_SIZE; i++) { | ||
221 | hlist_for_each_entry_safe(con, h, n, &connection_hash[i], list){ | ||
222 | conn_func(con); | ||
223 | } | ||
224 | } | ||
225 | } | ||
226 | |||
205 | static struct connection *nodeid2con(int nodeid, gfp_t allocation) | 227 | static struct connection *nodeid2con(int nodeid, gfp_t allocation) |
206 | { | 228 | { |
207 | struct connection *con; | 229 | struct connection *con; |
@@ -217,14 +239,17 @@ static struct connection *nodeid2con(int nodeid, gfp_t allocation) | |||
217 | static struct connection *assoc2con(int assoc_id) | 239 | static struct connection *assoc2con(int assoc_id) |
218 | { | 240 | { |
219 | int i; | 241 | int i; |
242 | struct hlist_node *h; | ||
220 | struct connection *con; | 243 | struct connection *con; |
221 | 244 | ||
222 | mutex_lock(&connections_lock); | 245 | mutex_lock(&connections_lock); |
223 | for (i=0; i<=max_nodeid; i++) { | 246 | |
224 | con = __nodeid2con(i, 0); | 247 | for (i = 0 ; i < CONN_HASH_SIZE; i++) { |
225 | if (con && con->sctp_assoc == assoc_id) { | 248 | hlist_for_each_entry(con, h, &connection_hash[i], list) { |
226 | mutex_unlock(&connections_lock); | 249 | if (con && con->sctp_assoc == assoc_id) { |
227 | return con; | 250 | mutex_unlock(&connections_lock); |
251 | return con; | ||
252 | } | ||
228 | } | 253 | } |
229 | } | 254 | } |
230 | mutex_unlock(&connections_lock); | 255 | mutex_unlock(&connections_lock); |
@@ -250,8 +275,7 @@ static int nodeid_to_addr(int nodeid, struct sockaddr *retaddr) | |||
250 | } else { | 275 | } else { |
251 | struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) &addr; | 276 | struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) &addr; |
252 | struct sockaddr_in6 *ret6 = (struct sockaddr_in6 *) retaddr; | 277 | struct sockaddr_in6 *ret6 = (struct sockaddr_in6 *) retaddr; |
253 | memcpy(&ret6->sin6_addr, &in6->sin6_addr, | 278 | ipv6_addr_copy(&ret6->sin6_addr, &in6->sin6_addr); |
254 | sizeof(in6->sin6_addr)); | ||
255 | } | 279 | } |
256 | 280 | ||
257 | return 0; | 281 | return 0; |
@@ -376,25 +400,23 @@ static void sctp_send_shutdown(sctp_assoc_t associd) | |||
376 | log_print("send EOF to node failed: %d", ret); | 400 | log_print("send EOF to node failed: %d", ret); |
377 | } | 401 | } |
378 | 402 | ||
403 | static void sctp_init_failed_foreach(struct connection *con) | ||
404 | { | ||
405 | con->sctp_assoc = 0; | ||
406 | if (test_and_clear_bit(CF_CONNECT_PENDING, &con->flags)) { | ||
407 | if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags)) | ||
408 | queue_work(send_workqueue, &con->swork); | ||
409 | } | ||
410 | } | ||
411 | |||
379 | /* INIT failed but we don't know which node... | 412 | /* INIT failed but we don't know which node... |
380 | restart INIT on all pending nodes */ | 413 | restart INIT on all pending nodes */ |
381 | static void sctp_init_failed(void) | 414 | static void sctp_init_failed(void) |
382 | { | 415 | { |
383 | int i; | ||
384 | struct connection *con; | ||
385 | |||
386 | mutex_lock(&connections_lock); | 416 | mutex_lock(&connections_lock); |
387 | for (i=1; i<=max_nodeid; i++) { | 417 | |
388 | con = __nodeid2con(i, 0); | 418 | foreach_conn(sctp_init_failed_foreach); |
389 | if (!con) | 419 | |
390 | continue; | ||
391 | con->sctp_assoc = 0; | ||
392 | if (test_and_clear_bit(CF_CONNECT_PENDING, &con->flags)) { | ||
393 | if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags)) { | ||
394 | queue_work(send_workqueue, &con->swork); | ||
395 | } | ||
396 | } | ||
397 | } | ||
398 | mutex_unlock(&connections_lock); | 420 | mutex_unlock(&connections_lock); |
399 | } | 421 | } |
400 | 422 | ||
@@ -1313,13 +1335,10 @@ out_connect: | |||
1313 | 1335 | ||
1314 | static void clean_one_writequeue(struct connection *con) | 1336 | static void clean_one_writequeue(struct connection *con) |
1315 | { | 1337 | { |
1316 | struct list_head *list; | 1338 | struct writequeue_entry *e, *safe; |
1317 | struct list_head *temp; | ||
1318 | 1339 | ||
1319 | spin_lock(&con->writequeue_lock); | 1340 | spin_lock(&con->writequeue_lock); |
1320 | list_for_each_safe(list, temp, &con->writequeue) { | 1341 | list_for_each_entry_safe(e, safe, &con->writequeue, list) { |
1321 | struct writequeue_entry *e = | ||
1322 | list_entry(list, struct writequeue_entry, list); | ||
1323 | list_del(&e->list); | 1342 | list_del(&e->list); |
1324 | free_entry(e); | 1343 | free_entry(e); |
1325 | } | 1344 | } |
@@ -1369,14 +1388,7 @@ static void process_send_sockets(struct work_struct *work) | |||
1369 | /* Discard all entries on the write queues */ | 1388 | /* Discard all entries on the write queues */ |
1370 | static void clean_writequeues(void) | 1389 | static void clean_writequeues(void) |
1371 | { | 1390 | { |
1372 | int nodeid; | 1391 | foreach_conn(clean_one_writequeue); |
1373 | |||
1374 | for (nodeid = 1; nodeid <= max_nodeid; nodeid++) { | ||
1375 | struct connection *con = __nodeid2con(nodeid, 0); | ||
1376 | |||
1377 | if (con) | ||
1378 | clean_one_writequeue(con); | ||
1379 | } | ||
1380 | } | 1392 | } |
1381 | 1393 | ||
1382 | static void work_stop(void) | 1394 | static void work_stop(void) |
@@ -1406,23 +1418,29 @@ static int work_start(void) | |||
1406 | return 0; | 1418 | return 0; |
1407 | } | 1419 | } |
1408 | 1420 | ||
1409 | void dlm_lowcomms_stop(void) | 1421 | static void stop_conn(struct connection *con) |
1410 | { | 1422 | { |
1411 | int i; | 1423 | con->flags |= 0x0F; |
1412 | struct connection *con; | 1424 | if (con->sock) |
1425 | con->sock->sk->sk_user_data = NULL; | ||
1426 | } | ||
1413 | 1427 | ||
1428 | static void free_conn(struct connection *con) | ||
1429 | { | ||
1430 | close_connection(con, true); | ||
1431 | if (con->othercon) | ||
1432 | kmem_cache_free(con_cache, con->othercon); | ||
1433 | hlist_del(&con->list); | ||
1434 | kmem_cache_free(con_cache, con); | ||
1435 | } | ||
1436 | |||
1437 | void dlm_lowcomms_stop(void) | ||
1438 | { | ||
1414 | /* Set all the flags to prevent any | 1439 | /* Set all the flags to prevent any |
1415 | socket activity. | 1440 | socket activity. |
1416 | */ | 1441 | */ |
1417 | mutex_lock(&connections_lock); | 1442 | mutex_lock(&connections_lock); |
1418 | for (i = 0; i <= max_nodeid; i++) { | 1443 | foreach_conn(stop_conn); |
1419 | con = __nodeid2con(i, 0); | ||
1420 | if (con) { | ||
1421 | con->flags |= 0x0F; | ||
1422 | if (con->sock) | ||
1423 | con->sock->sk->sk_user_data = NULL; | ||
1424 | } | ||
1425 | } | ||
1426 | mutex_unlock(&connections_lock); | 1444 | mutex_unlock(&connections_lock); |
1427 | 1445 | ||
1428 | work_stop(); | 1446 | work_stop(); |
@@ -1430,25 +1448,20 @@ void dlm_lowcomms_stop(void) | |||
1430 | mutex_lock(&connections_lock); | 1448 | mutex_lock(&connections_lock); |
1431 | clean_writequeues(); | 1449 | clean_writequeues(); |
1432 | 1450 | ||
1433 | for (i = 0; i <= max_nodeid; i++) { | 1451 | foreach_conn(free_conn); |
1434 | con = __nodeid2con(i, 0); | 1452 | |
1435 | if (con) { | ||
1436 | close_connection(con, true); | ||
1437 | if (con->othercon) | ||
1438 | kmem_cache_free(con_cache, con->othercon); | ||
1439 | kmem_cache_free(con_cache, con); | ||
1440 | } | ||
1441 | } | ||
1442 | max_nodeid = 0; | ||
1443 | mutex_unlock(&connections_lock); | 1453 | mutex_unlock(&connections_lock); |
1444 | kmem_cache_destroy(con_cache); | 1454 | kmem_cache_destroy(con_cache); |
1445 | idr_init(&connections_idr); | ||
1446 | } | 1455 | } |
1447 | 1456 | ||
1448 | int dlm_lowcomms_start(void) | 1457 | int dlm_lowcomms_start(void) |
1449 | { | 1458 | { |
1450 | int error = -EINVAL; | 1459 | int error = -EINVAL; |
1451 | struct connection *con; | 1460 | struct connection *con; |
1461 | int i; | ||
1462 | |||
1463 | for (i = 0; i < CONN_HASH_SIZE; i++) | ||
1464 | INIT_HLIST_HEAD(&connection_hash[i]); | ||
1452 | 1465 | ||
1453 | init_local(); | 1466 | init_local(); |
1454 | if (!dlm_local_count) { | 1467 | if (!dlm_local_count) { |
diff --git a/fs/dlm/user.c b/fs/dlm/user.c index 065149e84f42..ebce994ab0b7 100644 --- a/fs/dlm/user.c +++ b/fs/dlm/user.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 2006-2008 Red Hat, Inc. All rights reserved. | 2 | * Copyright (C) 2006-2009 Red Hat, Inc. All rights reserved. |
3 | * | 3 | * |
4 | * This copyrighted material is made available to anyone wishing to use, | 4 | * This copyrighted material is made available to anyone wishing to use, |
5 | * modify, copy, or redistribute it subject to the terms and conditions | 5 | * modify, copy, or redistribute it subject to the terms and conditions |
@@ -84,7 +84,7 @@ struct dlm_lock_result32 { | |||
84 | 84 | ||
85 | static void compat_input(struct dlm_write_request *kb, | 85 | static void compat_input(struct dlm_write_request *kb, |
86 | struct dlm_write_request32 *kb32, | 86 | struct dlm_write_request32 *kb32, |
87 | size_t count) | 87 | int namelen) |
88 | { | 88 | { |
89 | kb->version[0] = kb32->version[0]; | 89 | kb->version[0] = kb32->version[0]; |
90 | kb->version[1] = kb32->version[1]; | 90 | kb->version[1] = kb32->version[1]; |
@@ -96,8 +96,7 @@ static void compat_input(struct dlm_write_request *kb, | |||
96 | kb->cmd == DLM_USER_REMOVE_LOCKSPACE) { | 96 | kb->cmd == DLM_USER_REMOVE_LOCKSPACE) { |
97 | kb->i.lspace.flags = kb32->i.lspace.flags; | 97 | kb->i.lspace.flags = kb32->i.lspace.flags; |
98 | kb->i.lspace.minor = kb32->i.lspace.minor; | 98 | kb->i.lspace.minor = kb32->i.lspace.minor; |
99 | memcpy(kb->i.lspace.name, kb32->i.lspace.name, count - | 99 | memcpy(kb->i.lspace.name, kb32->i.lspace.name, namelen); |
100 | offsetof(struct dlm_write_request32, i.lspace.name)); | ||
101 | } else if (kb->cmd == DLM_USER_PURGE) { | 100 | } else if (kb->cmd == DLM_USER_PURGE) { |
102 | kb->i.purge.nodeid = kb32->i.purge.nodeid; | 101 | kb->i.purge.nodeid = kb32->i.purge.nodeid; |
103 | kb->i.purge.pid = kb32->i.purge.pid; | 102 | kb->i.purge.pid = kb32->i.purge.pid; |
@@ -115,8 +114,7 @@ static void compat_input(struct dlm_write_request *kb, | |||
115 | kb->i.lock.bastaddr = (void *)(long)kb32->i.lock.bastaddr; | 114 | kb->i.lock.bastaddr = (void *)(long)kb32->i.lock.bastaddr; |
116 | kb->i.lock.lksb = (void *)(long)kb32->i.lock.lksb; | 115 | kb->i.lock.lksb = (void *)(long)kb32->i.lock.lksb; |
117 | memcpy(kb->i.lock.lvb, kb32->i.lock.lvb, DLM_USER_LVB_LEN); | 116 | memcpy(kb->i.lock.lvb, kb32->i.lock.lvb, DLM_USER_LVB_LEN); |
118 | memcpy(kb->i.lock.name, kb32->i.lock.name, count - | 117 | memcpy(kb->i.lock.name, kb32->i.lock.name, namelen); |
119 | offsetof(struct dlm_write_request32, i.lock.name)); | ||
120 | } | 118 | } |
121 | } | 119 | } |
122 | 120 | ||
@@ -539,9 +537,16 @@ static ssize_t device_write(struct file *file, const char __user *buf, | |||
539 | #ifdef CONFIG_COMPAT | 537 | #ifdef CONFIG_COMPAT |
540 | if (!kbuf->is64bit) { | 538 | if (!kbuf->is64bit) { |
541 | struct dlm_write_request32 *k32buf; | 539 | struct dlm_write_request32 *k32buf; |
540 | int namelen = 0; | ||
541 | |||
542 | if (count > sizeof(struct dlm_write_request32)) | ||
543 | namelen = count - sizeof(struct dlm_write_request32); | ||
544 | |||
542 | k32buf = (struct dlm_write_request32 *)kbuf; | 545 | k32buf = (struct dlm_write_request32 *)kbuf; |
543 | kbuf = kmalloc(count + 1 + (sizeof(struct dlm_write_request) - | 546 | |
544 | sizeof(struct dlm_write_request32)), GFP_KERNEL); | 547 | /* add 1 after namelen so that the name string is terminated */ |
548 | kbuf = kzalloc(sizeof(struct dlm_write_request) + namelen + 1, | ||
549 | GFP_KERNEL); | ||
545 | if (!kbuf) { | 550 | if (!kbuf) { |
546 | kfree(k32buf); | 551 | kfree(k32buf); |
547 | return -ENOMEM; | 552 | return -ENOMEM; |
@@ -549,7 +554,8 @@ static ssize_t device_write(struct file *file, const char __user *buf, | |||
549 | 554 | ||
550 | if (proc) | 555 | if (proc) |
551 | set_bit(DLM_PROC_FLAGS_COMPAT, &proc->flags); | 556 | set_bit(DLM_PROC_FLAGS_COMPAT, &proc->flags); |
552 | compat_input(kbuf, k32buf, count + 1); | 557 | |
558 | compat_input(kbuf, k32buf, namelen); | ||
553 | kfree(k32buf); | 559 | kfree(k32buf); |
554 | } | 560 | } |
555 | #endif | 561 | #endif |