diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-03-27 17:48:07 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-03-27 17:48:07 -0400 |
commit | 805de022b100bcf796860fe88d7db4164066d1c3 (patch) | |
tree | 79002a4947a0df8d82ea5f75fac8c6d958848877 /fs/dlm/lowcomms.c | |
parent | 7c757eb9f804782fb39d0ae2c1a88ffb9309138e (diff) | |
parent | 1fecb1c4b62881e3689ba2dcf93072ae301b597c (diff) |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/teigland/dlm
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/teigland/dlm:
dlm: fix length calculation in compat code
dlm: ignore cancel on granted lock
dlm: clear defunct cancel state
dlm: replace idr with hash table for connections
dlm: comment typo fixes
dlm: use ipv6_addr_copy
dlm: Change rwlock which is only used in write mode to a spinlock
Diffstat (limited to 'fs/dlm/lowcomms.c')
-rw-r--r-- | fs/dlm/lowcomms.c | 181 |
1 files changed, 97 insertions, 84 deletions
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 103a5ebd1371..609108a83267 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c | |||
@@ -2,7 +2,7 @@ | |||
2 | ******************************************************************************* | 2 | ******************************************************************************* |
3 | ** | 3 | ** |
4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | 4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. |
5 | ** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. | 5 | ** Copyright (C) 2004-2009 Red Hat, Inc. All rights reserved. |
6 | ** | 6 | ** |
7 | ** This copyrighted material is made available to anyone wishing to use, | 7 | ** This copyrighted material is made available to anyone wishing to use, |
8 | ** modify, copy, or redistribute it subject to the terms and conditions | 8 | ** modify, copy, or redistribute it subject to the terms and conditions |
@@ -21,7 +21,7 @@ | |||
21 | * | 21 | * |
22 | * Cluster nodes are referred to by their nodeids. nodeids are | 22 | * Cluster nodes are referred to by their nodeids. nodeids are |
23 | * simply 32 bit numbers to the locking module - if they need to | 23 | * simply 32 bit numbers to the locking module - if they need to |
24 | * be expanded for the cluster infrastructure then that is it's | 24 | * be expanded for the cluster infrastructure then that is its |
25 | * responsibility. It is this layer's | 25 | * responsibility. It is this layer's |
26 | * responsibility to resolve these into IP address or | 26 | * responsibility to resolve these into IP address or |
27 | * whatever it needs for inter-node communication. | 27 | * whatever it needs for inter-node communication. |
@@ -36,9 +36,9 @@ | |||
36 | * of high load. Also, this way, the sending thread can collect together | 36 | * of high load. Also, this way, the sending thread can collect together |
37 | * messages bound for one node and send them in one block. | 37 | * messages bound for one node and send them in one block. |
38 | * | 38 | * |
39 | * lowcomms will choose to use wither TCP or SCTP as its transport layer | 39 | * lowcomms will choose to use either TCP or SCTP as its transport layer |
40 | * depending on the configuration variable 'protocol'. This should be set | 40 | * depending on the configuration variable 'protocol'. This should be set |
41 | * to 0 (default) for TCP or 1 for SCTP. It shouldbe configured using a | 41 | * to 0 (default) for TCP or 1 for SCTP. It should be configured using a |
42 | * cluster-wide mechanism as it must be the same on all nodes of the cluster | 42 | * cluster-wide mechanism as it must be the same on all nodes of the cluster |
43 | * for the DLM to function. | 43 | * for the DLM to function. |
44 | * | 44 | * |
@@ -48,11 +48,11 @@ | |||
48 | #include <net/sock.h> | 48 | #include <net/sock.h> |
49 | #include <net/tcp.h> | 49 | #include <net/tcp.h> |
50 | #include <linux/pagemap.h> | 50 | #include <linux/pagemap.h> |
51 | #include <linux/idr.h> | ||
52 | #include <linux/file.h> | 51 | #include <linux/file.h> |
53 | #include <linux/mutex.h> | 52 | #include <linux/mutex.h> |
54 | #include <linux/sctp.h> | 53 | #include <linux/sctp.h> |
55 | #include <net/sctp/user.h> | 54 | #include <net/sctp/user.h> |
55 | #include <net/ipv6.h> | ||
56 | 56 | ||
57 | #include "dlm_internal.h" | 57 | #include "dlm_internal.h" |
58 | #include "lowcomms.h" | 58 | #include "lowcomms.h" |
@@ -60,6 +60,7 @@ | |||
60 | #include "config.h" | 60 | #include "config.h" |
61 | 61 | ||
62 | #define NEEDED_RMEM (4*1024*1024) | 62 | #define NEEDED_RMEM (4*1024*1024) |
63 | #define CONN_HASH_SIZE 32 | ||
63 | 64 | ||
64 | struct cbuf { | 65 | struct cbuf { |
65 | unsigned int base; | 66 | unsigned int base; |
@@ -114,6 +115,7 @@ struct connection { | |||
114 | int retries; | 115 | int retries; |
115 | #define MAX_CONNECT_RETRIES 3 | 116 | #define MAX_CONNECT_RETRIES 3 |
116 | int sctp_assoc; | 117 | int sctp_assoc; |
118 | struct hlist_node list; | ||
117 | struct connection *othercon; | 119 | struct connection *othercon; |
118 | struct work_struct rwork; /* Receive workqueue */ | 120 | struct work_struct rwork; /* Receive workqueue */ |
119 | struct work_struct swork; /* Send workqueue */ | 121 | struct work_struct swork; /* Send workqueue */ |
@@ -138,14 +140,37 @@ static int dlm_local_count; | |||
138 | static struct workqueue_struct *recv_workqueue; | 140 | static struct workqueue_struct *recv_workqueue; |
139 | static struct workqueue_struct *send_workqueue; | 141 | static struct workqueue_struct *send_workqueue; |
140 | 142 | ||
141 | static DEFINE_IDR(connections_idr); | 143 | static struct hlist_head connection_hash[CONN_HASH_SIZE]; |
142 | static DEFINE_MUTEX(connections_lock); | 144 | static DEFINE_MUTEX(connections_lock); |
143 | static int max_nodeid; | ||
144 | static struct kmem_cache *con_cache; | 145 | static struct kmem_cache *con_cache; |
145 | 146 | ||
146 | static void process_recv_sockets(struct work_struct *work); | 147 | static void process_recv_sockets(struct work_struct *work); |
147 | static void process_send_sockets(struct work_struct *work); | 148 | static void process_send_sockets(struct work_struct *work); |
148 | 149 | ||
150 | |||
151 | /* This is deliberately very simple because most clusters have simple | ||
152 | sequential nodeids, so we should be able to go straight to a connection | ||
153 | struct in the array */ | ||
154 | static inline int nodeid_hash(int nodeid) | ||
155 | { | ||
156 | return nodeid & (CONN_HASH_SIZE-1); | ||
157 | } | ||
158 | |||
159 | static struct connection *__find_con(int nodeid) | ||
160 | { | ||
161 | int r; | ||
162 | struct hlist_node *h; | ||
163 | struct connection *con; | ||
164 | |||
165 | r = nodeid_hash(nodeid); | ||
166 | |||
167 | hlist_for_each_entry(con, h, &connection_hash[r], list) { | ||
168 | if (con->nodeid == nodeid) | ||
169 | return con; | ||
170 | } | ||
171 | return NULL; | ||
172 | } | ||
173 | |||
149 | /* | 174 | /* |
150 | * If 'allocation' is zero then we don't attempt to create a new | 175 | * If 'allocation' is zero then we don't attempt to create a new |
151 | * connection structure for this node. | 176 | * connection structure for this node. |
@@ -154,31 +179,17 @@ static struct connection *__nodeid2con(int nodeid, gfp_t alloc) | |||
154 | { | 179 | { |
155 | struct connection *con = NULL; | 180 | struct connection *con = NULL; |
156 | int r; | 181 | int r; |
157 | int n; | ||
158 | 182 | ||
159 | con = idr_find(&connections_idr, nodeid); | 183 | con = __find_con(nodeid); |
160 | if (con || !alloc) | 184 | if (con || !alloc) |
161 | return con; | 185 | return con; |
162 | 186 | ||
163 | r = idr_pre_get(&connections_idr, alloc); | ||
164 | if (!r) | ||
165 | return NULL; | ||
166 | |||
167 | con = kmem_cache_zalloc(con_cache, alloc); | 187 | con = kmem_cache_zalloc(con_cache, alloc); |
168 | if (!con) | 188 | if (!con) |
169 | return NULL; | 189 | return NULL; |
170 | 190 | ||
171 | r = idr_get_new_above(&connections_idr, con, nodeid, &n); | 191 | r = nodeid_hash(nodeid); |
172 | if (r) { | 192 | hlist_add_head(&con->list, &connection_hash[r]); |
173 | kmem_cache_free(con_cache, con); | ||
174 | return NULL; | ||
175 | } | ||
176 | |||
177 | if (n != nodeid) { | ||
178 | idr_remove(&connections_idr, n); | ||
179 | kmem_cache_free(con_cache, con); | ||
180 | return NULL; | ||
181 | } | ||
182 | 193 | ||
183 | con->nodeid = nodeid; | 194 | con->nodeid = nodeid; |
184 | mutex_init(&con->sock_mutex); | 195 | mutex_init(&con->sock_mutex); |
@@ -189,19 +200,30 @@ static struct connection *__nodeid2con(int nodeid, gfp_t alloc) | |||
189 | 200 | ||
190 | /* Setup action pointers for child sockets */ | 201 | /* Setup action pointers for child sockets */ |
191 | if (con->nodeid) { | 202 | if (con->nodeid) { |
192 | struct connection *zerocon = idr_find(&connections_idr, 0); | 203 | struct connection *zerocon = __find_con(0); |
193 | 204 | ||
194 | con->connect_action = zerocon->connect_action; | 205 | con->connect_action = zerocon->connect_action; |
195 | if (!con->rx_action) | 206 | if (!con->rx_action) |
196 | con->rx_action = zerocon->rx_action; | 207 | con->rx_action = zerocon->rx_action; |
197 | } | 208 | } |
198 | 209 | ||
199 | if (nodeid > max_nodeid) | ||
200 | max_nodeid = nodeid; | ||
201 | |||
202 | return con; | 210 | return con; |
203 | } | 211 | } |
204 | 212 | ||
213 | /* Loop round all connections */ | ||
214 | static void foreach_conn(void (*conn_func)(struct connection *c)) | ||
215 | { | ||
216 | int i; | ||
217 | struct hlist_node *h, *n; | ||
218 | struct connection *con; | ||
219 | |||
220 | for (i = 0; i < CONN_HASH_SIZE; i++) { | ||
221 | hlist_for_each_entry_safe(con, h, n, &connection_hash[i], list){ | ||
222 | conn_func(con); | ||
223 | } | ||
224 | } | ||
225 | } | ||
226 | |||
205 | static struct connection *nodeid2con(int nodeid, gfp_t allocation) | 227 | static struct connection *nodeid2con(int nodeid, gfp_t allocation) |
206 | { | 228 | { |
207 | struct connection *con; | 229 | struct connection *con; |
@@ -217,14 +239,17 @@ static struct connection *nodeid2con(int nodeid, gfp_t allocation) | |||
217 | static struct connection *assoc2con(int assoc_id) | 239 | static struct connection *assoc2con(int assoc_id) |
218 | { | 240 | { |
219 | int i; | 241 | int i; |
242 | struct hlist_node *h; | ||
220 | struct connection *con; | 243 | struct connection *con; |
221 | 244 | ||
222 | mutex_lock(&connections_lock); | 245 | mutex_lock(&connections_lock); |
223 | for (i=0; i<=max_nodeid; i++) { | 246 | |
224 | con = __nodeid2con(i, 0); | 247 | for (i = 0 ; i < CONN_HASH_SIZE; i++) { |
225 | if (con && con->sctp_assoc == assoc_id) { | 248 | hlist_for_each_entry(con, h, &connection_hash[i], list) { |
226 | mutex_unlock(&connections_lock); | 249 | if (con && con->sctp_assoc == assoc_id) { |
227 | return con; | 250 | mutex_unlock(&connections_lock); |
251 | return con; | ||
252 | } | ||
228 | } | 253 | } |
229 | } | 254 | } |
230 | mutex_unlock(&connections_lock); | 255 | mutex_unlock(&connections_lock); |
@@ -250,8 +275,7 @@ static int nodeid_to_addr(int nodeid, struct sockaddr *retaddr) | |||
250 | } else { | 275 | } else { |
251 | struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) &addr; | 276 | struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) &addr; |
252 | struct sockaddr_in6 *ret6 = (struct sockaddr_in6 *) retaddr; | 277 | struct sockaddr_in6 *ret6 = (struct sockaddr_in6 *) retaddr; |
253 | memcpy(&ret6->sin6_addr, &in6->sin6_addr, | 278 | ipv6_addr_copy(&ret6->sin6_addr, &in6->sin6_addr); |
254 | sizeof(in6->sin6_addr)); | ||
255 | } | 279 | } |
256 | 280 | ||
257 | return 0; | 281 | return 0; |
@@ -376,25 +400,23 @@ static void sctp_send_shutdown(sctp_assoc_t associd) | |||
376 | log_print("send EOF to node failed: %d", ret); | 400 | log_print("send EOF to node failed: %d", ret); |
377 | } | 401 | } |
378 | 402 | ||
403 | static void sctp_init_failed_foreach(struct connection *con) | ||
404 | { | ||
405 | con->sctp_assoc = 0; | ||
406 | if (test_and_clear_bit(CF_CONNECT_PENDING, &con->flags)) { | ||
407 | if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags)) | ||
408 | queue_work(send_workqueue, &con->swork); | ||
409 | } | ||
410 | } | ||
411 | |||
379 | /* INIT failed but we don't know which node... | 412 | /* INIT failed but we don't know which node... |
380 | restart INIT on all pending nodes */ | 413 | restart INIT on all pending nodes */ |
381 | static void sctp_init_failed(void) | 414 | static void sctp_init_failed(void) |
382 | { | 415 | { |
383 | int i; | ||
384 | struct connection *con; | ||
385 | |||
386 | mutex_lock(&connections_lock); | 416 | mutex_lock(&connections_lock); |
387 | for (i=1; i<=max_nodeid; i++) { | 417 | |
388 | con = __nodeid2con(i, 0); | 418 | foreach_conn(sctp_init_failed_foreach); |
389 | if (!con) | 419 | |
390 | continue; | ||
391 | con->sctp_assoc = 0; | ||
392 | if (test_and_clear_bit(CF_CONNECT_PENDING, &con->flags)) { | ||
393 | if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags)) { | ||
394 | queue_work(send_workqueue, &con->swork); | ||
395 | } | ||
396 | } | ||
397 | } | ||
398 | mutex_unlock(&connections_lock); | 420 | mutex_unlock(&connections_lock); |
399 | } | 421 | } |
400 | 422 | ||
@@ -1313,13 +1335,10 @@ out_connect: | |||
1313 | 1335 | ||
1314 | static void clean_one_writequeue(struct connection *con) | 1336 | static void clean_one_writequeue(struct connection *con) |
1315 | { | 1337 | { |
1316 | struct list_head *list; | 1338 | struct writequeue_entry *e, *safe; |
1317 | struct list_head *temp; | ||
1318 | 1339 | ||
1319 | spin_lock(&con->writequeue_lock); | 1340 | spin_lock(&con->writequeue_lock); |
1320 | list_for_each_safe(list, temp, &con->writequeue) { | 1341 | list_for_each_entry_safe(e, safe, &con->writequeue, list) { |
1321 | struct writequeue_entry *e = | ||
1322 | list_entry(list, struct writequeue_entry, list); | ||
1323 | list_del(&e->list); | 1342 | list_del(&e->list); |
1324 | free_entry(e); | 1343 | free_entry(e); |
1325 | } | 1344 | } |
@@ -1369,14 +1388,7 @@ static void process_send_sockets(struct work_struct *work) | |||
1369 | /* Discard all entries on the write queues */ | 1388 | /* Discard all entries on the write queues */ |
1370 | static void clean_writequeues(void) | 1389 | static void clean_writequeues(void) |
1371 | { | 1390 | { |
1372 | int nodeid; | 1391 | foreach_conn(clean_one_writequeue); |
1373 | |||
1374 | for (nodeid = 1; nodeid <= max_nodeid; nodeid++) { | ||
1375 | struct connection *con = __nodeid2con(nodeid, 0); | ||
1376 | |||
1377 | if (con) | ||
1378 | clean_one_writequeue(con); | ||
1379 | } | ||
1380 | } | 1392 | } |
1381 | 1393 | ||
1382 | static void work_stop(void) | 1394 | static void work_stop(void) |
@@ -1406,23 +1418,29 @@ static int work_start(void) | |||
1406 | return 0; | 1418 | return 0; |
1407 | } | 1419 | } |
1408 | 1420 | ||
1409 | void dlm_lowcomms_stop(void) | 1421 | static void stop_conn(struct connection *con) |
1410 | { | 1422 | { |
1411 | int i; | 1423 | con->flags |= 0x0F; |
1412 | struct connection *con; | 1424 | if (con->sock) |
1425 | con->sock->sk->sk_user_data = NULL; | ||
1426 | } | ||
1413 | 1427 | ||
1428 | static void free_conn(struct connection *con) | ||
1429 | { | ||
1430 | close_connection(con, true); | ||
1431 | if (con->othercon) | ||
1432 | kmem_cache_free(con_cache, con->othercon); | ||
1433 | hlist_del(&con->list); | ||
1434 | kmem_cache_free(con_cache, con); | ||
1435 | } | ||
1436 | |||
1437 | void dlm_lowcomms_stop(void) | ||
1438 | { | ||
1414 | /* Set all the flags to prevent any | 1439 | /* Set all the flags to prevent any |
1415 | socket activity. | 1440 | socket activity. |
1416 | */ | 1441 | */ |
1417 | mutex_lock(&connections_lock); | 1442 | mutex_lock(&connections_lock); |
1418 | for (i = 0; i <= max_nodeid; i++) { | 1443 | foreach_conn(stop_conn); |
1419 | con = __nodeid2con(i, 0); | ||
1420 | if (con) { | ||
1421 | con->flags |= 0x0F; | ||
1422 | if (con->sock) | ||
1423 | con->sock->sk->sk_user_data = NULL; | ||
1424 | } | ||
1425 | } | ||
1426 | mutex_unlock(&connections_lock); | 1444 | mutex_unlock(&connections_lock); |
1427 | 1445 | ||
1428 | work_stop(); | 1446 | work_stop(); |
@@ -1430,25 +1448,20 @@ void dlm_lowcomms_stop(void) | |||
1430 | mutex_lock(&connections_lock); | 1448 | mutex_lock(&connections_lock); |
1431 | clean_writequeues(); | 1449 | clean_writequeues(); |
1432 | 1450 | ||
1433 | for (i = 0; i <= max_nodeid; i++) { | 1451 | foreach_conn(free_conn); |
1434 | con = __nodeid2con(i, 0); | 1452 | |
1435 | if (con) { | ||
1436 | close_connection(con, true); | ||
1437 | if (con->othercon) | ||
1438 | kmem_cache_free(con_cache, con->othercon); | ||
1439 | kmem_cache_free(con_cache, con); | ||
1440 | } | ||
1441 | } | ||
1442 | max_nodeid = 0; | ||
1443 | mutex_unlock(&connections_lock); | 1453 | mutex_unlock(&connections_lock); |
1444 | kmem_cache_destroy(con_cache); | 1454 | kmem_cache_destroy(con_cache); |
1445 | idr_init(&connections_idr); | ||
1446 | } | 1455 | } |
1447 | 1456 | ||
1448 | int dlm_lowcomms_start(void) | 1457 | int dlm_lowcomms_start(void) |
1449 | { | 1458 | { |
1450 | int error = -EINVAL; | 1459 | int error = -EINVAL; |
1451 | struct connection *con; | 1460 | struct connection *con; |
1461 | int i; | ||
1462 | |||
1463 | for (i = 0; i < CONN_HASH_SIZE; i++) | ||
1464 | INIT_HLIST_HEAD(&connection_hash[i]); | ||
1452 | 1465 | ||
1453 | init_local(); | 1466 | init_local(); |
1454 | if (!dlm_local_count) { | 1467 | if (!dlm_local_count) { |