diff options
| -rw-r--r-- | fs/dlm/lowcomms.c | 171 |
1 files changed, 92 insertions, 79 deletions
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 982314c472c1..609108a83267 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c | |||
| @@ -2,7 +2,7 @@ | |||
| 2 | ******************************************************************************* | 2 | ******************************************************************************* |
| 3 | ** | 3 | ** |
| 4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | 4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. |
| 5 | ** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. | 5 | ** Copyright (C) 2004-2009 Red Hat, Inc. All rights reserved. |
| 6 | ** | 6 | ** |
| 7 | ** This copyrighted material is made available to anyone wishing to use, | 7 | ** This copyrighted material is made available to anyone wishing to use, |
| 8 | ** modify, copy, or redistribute it subject to the terms and conditions | 8 | ** modify, copy, or redistribute it subject to the terms and conditions |
| @@ -48,7 +48,6 @@ | |||
| 48 | #include <net/sock.h> | 48 | #include <net/sock.h> |
| 49 | #include <net/tcp.h> | 49 | #include <net/tcp.h> |
| 50 | #include <linux/pagemap.h> | 50 | #include <linux/pagemap.h> |
| 51 | #include <linux/idr.h> | ||
| 52 | #include <linux/file.h> | 51 | #include <linux/file.h> |
| 53 | #include <linux/mutex.h> | 52 | #include <linux/mutex.h> |
| 54 | #include <linux/sctp.h> | 53 | #include <linux/sctp.h> |
| @@ -61,6 +60,7 @@ | |||
| 61 | #include "config.h" | 60 | #include "config.h" |
| 62 | 61 | ||
| 63 | #define NEEDED_RMEM (4*1024*1024) | 62 | #define NEEDED_RMEM (4*1024*1024) |
| 63 | #define CONN_HASH_SIZE 32 | ||
| 64 | 64 | ||
| 65 | struct cbuf { | 65 | struct cbuf { |
| 66 | unsigned int base; | 66 | unsigned int base; |
| @@ -115,6 +115,7 @@ struct connection { | |||
| 115 | int retries; | 115 | int retries; |
| 116 | #define MAX_CONNECT_RETRIES 3 | 116 | #define MAX_CONNECT_RETRIES 3 |
| 117 | int sctp_assoc; | 117 | int sctp_assoc; |
| 118 | struct hlist_node list; | ||
| 118 | struct connection *othercon; | 119 | struct connection *othercon; |
| 119 | struct work_struct rwork; /* Receive workqueue */ | 120 | struct work_struct rwork; /* Receive workqueue */ |
| 120 | struct work_struct swork; /* Send workqueue */ | 121 | struct work_struct swork; /* Send workqueue */ |
| @@ -139,14 +140,37 @@ static int dlm_local_count; | |||
| 139 | static struct workqueue_struct *recv_workqueue; | 140 | static struct workqueue_struct *recv_workqueue; |
| 140 | static struct workqueue_struct *send_workqueue; | 141 | static struct workqueue_struct *send_workqueue; |
| 141 | 142 | ||
| 142 | static DEFINE_IDR(connections_idr); | 143 | static struct hlist_head connection_hash[CONN_HASH_SIZE]; |
| 143 | static DEFINE_MUTEX(connections_lock); | 144 | static DEFINE_MUTEX(connections_lock); |
| 144 | static int max_nodeid; | ||
| 145 | static struct kmem_cache *con_cache; | 145 | static struct kmem_cache *con_cache; |
| 146 | 146 | ||
| 147 | static void process_recv_sockets(struct work_struct *work); | 147 | static void process_recv_sockets(struct work_struct *work); |
| 148 | static void process_send_sockets(struct work_struct *work); | 148 | static void process_send_sockets(struct work_struct *work); |
| 149 | 149 | ||
| 150 | |||
| 151 | /* This is deliberately very simple because most clusters have simple | ||
| 152 | sequential nodeids, so we should be able to go straight to a connection | ||
| 153 | struct in the array */ | ||
| 154 | static inline int nodeid_hash(int nodeid) | ||
| 155 | { | ||
| 156 | return nodeid & (CONN_HASH_SIZE-1); | ||
| 157 | } | ||
| 158 | |||
| 159 | static struct connection *__find_con(int nodeid) | ||
| 160 | { | ||
| 161 | int r; | ||
| 162 | struct hlist_node *h; | ||
| 163 | struct connection *con; | ||
| 164 | |||
| 165 | r = nodeid_hash(nodeid); | ||
| 166 | |||
| 167 | hlist_for_each_entry(con, h, &connection_hash[r], list) { | ||
| 168 | if (con->nodeid == nodeid) | ||
| 169 | return con; | ||
| 170 | } | ||
| 171 | return NULL; | ||
| 172 | } | ||
| 173 | |||
| 150 | /* | 174 | /* |
| 151 | * If 'allocation' is zero then we don't attempt to create a new | 175 | * If 'allocation' is zero then we don't attempt to create a new |
| 152 | * connection structure for this node. | 176 | * connection structure for this node. |
| @@ -155,31 +179,17 @@ static struct connection *__nodeid2con(int nodeid, gfp_t alloc) | |||
| 155 | { | 179 | { |
| 156 | struct connection *con = NULL; | 180 | struct connection *con = NULL; |
| 157 | int r; | 181 | int r; |
| 158 | int n; | ||
| 159 | 182 | ||
| 160 | con = idr_find(&connections_idr, nodeid); | 183 | con = __find_con(nodeid); |
| 161 | if (con || !alloc) | 184 | if (con || !alloc) |
| 162 | return con; | 185 | return con; |
| 163 | 186 | ||
| 164 | r = idr_pre_get(&connections_idr, alloc); | ||
| 165 | if (!r) | ||
| 166 | return NULL; | ||
| 167 | |||
| 168 | con = kmem_cache_zalloc(con_cache, alloc); | 187 | con = kmem_cache_zalloc(con_cache, alloc); |
| 169 | if (!con) | 188 | if (!con) |
| 170 | return NULL; | 189 | return NULL; |
| 171 | 190 | ||
| 172 | r = idr_get_new_above(&connections_idr, con, nodeid, &n); | 191 | r = nodeid_hash(nodeid); |
| 173 | if (r) { | 192 | hlist_add_head(&con->list, &connection_hash[r]); |
| 174 | kmem_cache_free(con_cache, con); | ||
| 175 | return NULL; | ||
| 176 | } | ||
| 177 | |||
| 178 | if (n != nodeid) { | ||
| 179 | idr_remove(&connections_idr, n); | ||
| 180 | kmem_cache_free(con_cache, con); | ||
| 181 | return NULL; | ||
| 182 | } | ||
| 183 | 193 | ||
| 184 | con->nodeid = nodeid; | 194 | con->nodeid = nodeid; |
| 185 | mutex_init(&con->sock_mutex); | 195 | mutex_init(&con->sock_mutex); |
| @@ -190,19 +200,30 @@ static struct connection *__nodeid2con(int nodeid, gfp_t alloc) | |||
| 190 | 200 | ||
| 191 | /* Setup action pointers for child sockets */ | 201 | /* Setup action pointers for child sockets */ |
| 192 | if (con->nodeid) { | 202 | if (con->nodeid) { |
| 193 | struct connection *zerocon = idr_find(&connections_idr, 0); | 203 | struct connection *zerocon = __find_con(0); |
| 194 | 204 | ||
| 195 | con->connect_action = zerocon->connect_action; | 205 | con->connect_action = zerocon->connect_action; |
| 196 | if (!con->rx_action) | 206 | if (!con->rx_action) |
| 197 | con->rx_action = zerocon->rx_action; | 207 | con->rx_action = zerocon->rx_action; |
| 198 | } | 208 | } |
| 199 | 209 | ||
| 200 | if (nodeid > max_nodeid) | ||
| 201 | max_nodeid = nodeid; | ||
| 202 | |||
| 203 | return con; | 210 | return con; |
| 204 | } | 211 | } |
| 205 | 212 | ||
| 213 | /* Loop round all connections */ | ||
| 214 | static void foreach_conn(void (*conn_func)(struct connection *c)) | ||
| 215 | { | ||
| 216 | int i; | ||
| 217 | struct hlist_node *h, *n; | ||
| 218 | struct connection *con; | ||
| 219 | |||
| 220 | for (i = 0; i < CONN_HASH_SIZE; i++) { | ||
| 221 | hlist_for_each_entry_safe(con, h, n, &connection_hash[i], list){ | ||
| 222 | conn_func(con); | ||
| 223 | } | ||
| 224 | } | ||
| 225 | } | ||
| 226 | |||
| 206 | static struct connection *nodeid2con(int nodeid, gfp_t allocation) | 227 | static struct connection *nodeid2con(int nodeid, gfp_t allocation) |
| 207 | { | 228 | { |
| 208 | struct connection *con; | 229 | struct connection *con; |
| @@ -218,14 +239,17 @@ static struct connection *nodeid2con(int nodeid, gfp_t allocation) | |||
| 218 | static struct connection *assoc2con(int assoc_id) | 239 | static struct connection *assoc2con(int assoc_id) |
| 219 | { | 240 | { |
| 220 | int i; | 241 | int i; |
| 242 | struct hlist_node *h; | ||
| 221 | struct connection *con; | 243 | struct connection *con; |
| 222 | 244 | ||
| 223 | mutex_lock(&connections_lock); | 245 | mutex_lock(&connections_lock); |
| 224 | for (i=0; i<=max_nodeid; i++) { | 246 | |
| 225 | con = __nodeid2con(i, 0); | 247 | for (i = 0 ; i < CONN_HASH_SIZE; i++) { |
| 226 | if (con && con->sctp_assoc == assoc_id) { | 248 | hlist_for_each_entry(con, h, &connection_hash[i], list) { |
| 227 | mutex_unlock(&connections_lock); | 249 | if (con && con->sctp_assoc == assoc_id) { |
| 228 | return con; | 250 | mutex_unlock(&connections_lock); |
| 251 | return con; | ||
| 252 | } | ||
| 229 | } | 253 | } |
| 230 | } | 254 | } |
| 231 | mutex_unlock(&connections_lock); | 255 | mutex_unlock(&connections_lock); |
| @@ -376,25 +400,23 @@ static void sctp_send_shutdown(sctp_assoc_t associd) | |||
| 376 | log_print("send EOF to node failed: %d", ret); | 400 | log_print("send EOF to node failed: %d", ret); |
| 377 | } | 401 | } |
| 378 | 402 | ||
| 403 | static void sctp_init_failed_foreach(struct connection *con) | ||
| 404 | { | ||
| 405 | con->sctp_assoc = 0; | ||
| 406 | if (test_and_clear_bit(CF_CONNECT_PENDING, &con->flags)) { | ||
| 407 | if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags)) | ||
| 408 | queue_work(send_workqueue, &con->swork); | ||
| 409 | } | ||
| 410 | } | ||
| 411 | |||
| 379 | /* INIT failed but we don't know which node... | 412 | /* INIT failed but we don't know which node... |
| 380 | restart INIT on all pending nodes */ | 413 | restart INIT on all pending nodes */ |
| 381 | static void sctp_init_failed(void) | 414 | static void sctp_init_failed(void) |
| 382 | { | 415 | { |
| 383 | int i; | ||
| 384 | struct connection *con; | ||
| 385 | |||
| 386 | mutex_lock(&connections_lock); | 416 | mutex_lock(&connections_lock); |
| 387 | for (i=1; i<=max_nodeid; i++) { | 417 | |
| 388 | con = __nodeid2con(i, 0); | 418 | foreach_conn(sctp_init_failed_foreach); |
| 389 | if (!con) | 419 | |
| 390 | continue; | ||
| 391 | con->sctp_assoc = 0; | ||
| 392 | if (test_and_clear_bit(CF_CONNECT_PENDING, &con->flags)) { | ||
| 393 | if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags)) { | ||
| 394 | queue_work(send_workqueue, &con->swork); | ||
| 395 | } | ||
| 396 | } | ||
| 397 | } | ||
| 398 | mutex_unlock(&connections_lock); | 420 | mutex_unlock(&connections_lock); |
| 399 | } | 421 | } |
| 400 | 422 | ||
| @@ -1313,13 +1335,10 @@ out_connect: | |||
| 1313 | 1335 | ||
| 1314 | static void clean_one_writequeue(struct connection *con) | 1336 | static void clean_one_writequeue(struct connection *con) |
| 1315 | { | 1337 | { |
| 1316 | struct list_head *list; | 1338 | struct writequeue_entry *e, *safe; |
| 1317 | struct list_head *temp; | ||
| 1318 | 1339 | ||
| 1319 | spin_lock(&con->writequeue_lock); | 1340 | spin_lock(&con->writequeue_lock); |
| 1320 | list_for_each_safe(list, temp, &con->writequeue) { | 1341 | list_for_each_entry_safe(e, safe, &con->writequeue, list) { |
| 1321 | struct writequeue_entry *e = | ||
| 1322 | list_entry(list, struct writequeue_entry, list); | ||
| 1323 | list_del(&e->list); | 1342 | list_del(&e->list); |
| 1324 | free_entry(e); | 1343 | free_entry(e); |
| 1325 | } | 1344 | } |
| @@ -1369,14 +1388,7 @@ static void process_send_sockets(struct work_struct *work) | |||
| 1369 | /* Discard all entries on the write queues */ | 1388 | /* Discard all entries on the write queues */ |
| 1370 | static void clean_writequeues(void) | 1389 | static void clean_writequeues(void) |
| 1371 | { | 1390 | { |
| 1372 | int nodeid; | 1391 | foreach_conn(clean_one_writequeue); |
| 1373 | |||
| 1374 | for (nodeid = 1; nodeid <= max_nodeid; nodeid++) { | ||
| 1375 | struct connection *con = __nodeid2con(nodeid, 0); | ||
| 1376 | |||
| 1377 | if (con) | ||
| 1378 | clean_one_writequeue(con); | ||
| 1379 | } | ||
| 1380 | } | 1392 | } |
| 1381 | 1393 | ||
| 1382 | static void work_stop(void) | 1394 | static void work_stop(void) |
| @@ -1406,23 +1418,29 @@ static int work_start(void) | |||
| 1406 | return 0; | 1418 | return 0; |
| 1407 | } | 1419 | } |
| 1408 | 1420 | ||
| 1409 | void dlm_lowcomms_stop(void) | 1421 | static void stop_conn(struct connection *con) |
| 1410 | { | 1422 | { |
| 1411 | int i; | 1423 | con->flags |= 0x0F; |
| 1412 | struct connection *con; | 1424 | if (con->sock) |
| 1425 | con->sock->sk->sk_user_data = NULL; | ||
| 1426 | } | ||
| 1413 | 1427 | ||
| 1428 | static void free_conn(struct connection *con) | ||
| 1429 | { | ||
| 1430 | close_connection(con, true); | ||
| 1431 | if (con->othercon) | ||
| 1432 | kmem_cache_free(con_cache, con->othercon); | ||
| 1433 | hlist_del(&con->list); | ||
| 1434 | kmem_cache_free(con_cache, con); | ||
| 1435 | } | ||
| 1436 | |||
| 1437 | void dlm_lowcomms_stop(void) | ||
| 1438 | { | ||
| 1414 | /* Set all the flags to prevent any | 1439 | /* Set all the flags to prevent any |
| 1415 | socket activity. | 1440 | socket activity. |
| 1416 | */ | 1441 | */ |
| 1417 | mutex_lock(&connections_lock); | 1442 | mutex_lock(&connections_lock); |
| 1418 | for (i = 0; i <= max_nodeid; i++) { | 1443 | foreach_conn(stop_conn); |
| 1419 | con = __nodeid2con(i, 0); | ||
| 1420 | if (con) { | ||
| 1421 | con->flags |= 0x0F; | ||
| 1422 | if (con->sock) | ||
| 1423 | con->sock->sk->sk_user_data = NULL; | ||
| 1424 | } | ||
| 1425 | } | ||
| 1426 | mutex_unlock(&connections_lock); | 1444 | mutex_unlock(&connections_lock); |
| 1427 | 1445 | ||
| 1428 | work_stop(); | 1446 | work_stop(); |
| @@ -1430,25 +1448,20 @@ void dlm_lowcomms_stop(void) | |||
| 1430 | mutex_lock(&connections_lock); | 1448 | mutex_lock(&connections_lock); |
| 1431 | clean_writequeues(); | 1449 | clean_writequeues(); |
| 1432 | 1450 | ||
| 1433 | for (i = 0; i <= max_nodeid; i++) { | 1451 | foreach_conn(free_conn); |
| 1434 | con = __nodeid2con(i, 0); | 1452 | |
| 1435 | if (con) { | ||
| 1436 | close_connection(con, true); | ||
| 1437 | if (con->othercon) | ||
| 1438 | kmem_cache_free(con_cache, con->othercon); | ||
| 1439 | kmem_cache_free(con_cache, con); | ||
| 1440 | } | ||
| 1441 | } | ||
| 1442 | max_nodeid = 0; | ||
| 1443 | mutex_unlock(&connections_lock); | 1453 | mutex_unlock(&connections_lock); |
| 1444 | kmem_cache_destroy(con_cache); | 1454 | kmem_cache_destroy(con_cache); |
| 1445 | idr_init(&connections_idr); | ||
| 1446 | } | 1455 | } |
| 1447 | 1456 | ||
| 1448 | int dlm_lowcomms_start(void) | 1457 | int dlm_lowcomms_start(void) |
| 1449 | { | 1458 | { |
| 1450 | int error = -EINVAL; | 1459 | int error = -EINVAL; |
| 1451 | struct connection *con; | 1460 | struct connection *con; |
| 1461 | int i; | ||
| 1462 | |||
| 1463 | for (i = 0; i < CONN_HASH_SIZE; i++) | ||
| 1464 | INIT_HLIST_HEAD(&connection_hash[i]); | ||
| 1452 | 1465 | ||
| 1453 | init_local(); | 1466 | init_local(); |
| 1454 | if (!dlm_local_count) { | 1467 | if (!dlm_local_count) { |
