aboutsummaryrefslogtreecommitdiffstats
path: root/fs/dlm/lowcomms.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/dlm/lowcomms.c')
-rw-r--r--fs/dlm/lowcomms.c177
1 files changed, 134 insertions, 43 deletions
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index d0ccd2fd79eb..d90909ec6aa6 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -52,7 +52,6 @@
52#include <linux/mutex.h> 52#include <linux/mutex.h>
53#include <linux/sctp.h> 53#include <linux/sctp.h>
54#include <linux/slab.h> 54#include <linux/slab.h>
55#include <linux/sctp.h>
56#include <net/sctp/sctp.h> 55#include <net/sctp/sctp.h>
57#include <net/ipv6.h> 56#include <net/ipv6.h>
58 57
@@ -126,6 +125,7 @@ struct connection {
126 struct connection *othercon; 125 struct connection *othercon;
127 struct work_struct rwork; /* Receive workqueue */ 126 struct work_struct rwork; /* Receive workqueue */
128 struct work_struct swork; /* Send workqueue */ 127 struct work_struct swork; /* Send workqueue */
128 bool try_new_addr;
129}; 129};
130#define sock2con(x) ((struct connection *)(x)->sk_user_data) 130#define sock2con(x) ((struct connection *)(x)->sk_user_data)
131 131
@@ -144,6 +144,7 @@ struct dlm_node_addr {
144 struct list_head list; 144 struct list_head list;
145 int nodeid; 145 int nodeid;
146 int addr_count; 146 int addr_count;
147 int curr_addr_index;
147 struct sockaddr_storage *addr[DLM_MAX_ADDR_COUNT]; 148 struct sockaddr_storage *addr[DLM_MAX_ADDR_COUNT];
148}; 149};
149 150
@@ -310,7 +311,7 @@ static int addr_compare(struct sockaddr_storage *x, struct sockaddr_storage *y)
310} 311}
311 312
312static int nodeid_to_addr(int nodeid, struct sockaddr_storage *sas_out, 313static int nodeid_to_addr(int nodeid, struct sockaddr_storage *sas_out,
313 struct sockaddr *sa_out) 314 struct sockaddr *sa_out, bool try_new_addr)
314{ 315{
315 struct sockaddr_storage sas; 316 struct sockaddr_storage sas;
316 struct dlm_node_addr *na; 317 struct dlm_node_addr *na;
@@ -320,8 +321,16 @@ static int nodeid_to_addr(int nodeid, struct sockaddr_storage *sas_out,
320 321
321 spin_lock(&dlm_node_addrs_spin); 322 spin_lock(&dlm_node_addrs_spin);
322 na = find_node_addr(nodeid); 323 na = find_node_addr(nodeid);
323 if (na && na->addr_count) 324 if (na && na->addr_count) {
324 memcpy(&sas, na->addr[0], sizeof(struct sockaddr_storage)); 325 if (try_new_addr) {
326 na->curr_addr_index++;
327 if (na->curr_addr_index == na->addr_count)
328 na->curr_addr_index = 0;
329 }
330
331 memcpy(&sas, na->addr[na->curr_addr_index ],
332 sizeof(struct sockaddr_storage));
333 }
325 spin_unlock(&dlm_node_addrs_spin); 334 spin_unlock(&dlm_node_addrs_spin);
326 335
327 if (!na) 336 if (!na)
@@ -353,19 +362,22 @@ static int addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid)
353{ 362{
354 struct dlm_node_addr *na; 363 struct dlm_node_addr *na;
355 int rv = -EEXIST; 364 int rv = -EEXIST;
365 int addr_i;
356 366
357 spin_lock(&dlm_node_addrs_spin); 367 spin_lock(&dlm_node_addrs_spin);
358 list_for_each_entry(na, &dlm_node_addrs, list) { 368 list_for_each_entry(na, &dlm_node_addrs, list) {
359 if (!na->addr_count) 369 if (!na->addr_count)
360 continue; 370 continue;
361 371
362 if (!addr_compare(na->addr[0], addr)) 372 for (addr_i = 0; addr_i < na->addr_count; addr_i++) {
363 continue; 373 if (addr_compare(na->addr[addr_i], addr)) {
364 374 *nodeid = na->nodeid;
365 *nodeid = na->nodeid; 375 rv = 0;
366 rv = 0; 376 goto unlock;
367 break; 377 }
378 }
368 } 379 }
380unlock:
369 spin_unlock(&dlm_node_addrs_spin); 381 spin_unlock(&dlm_node_addrs_spin);
370 return rv; 382 return rv;
371} 383}
@@ -561,8 +573,23 @@ static void sctp_send_shutdown(sctp_assoc_t associd)
561 573
562static void sctp_init_failed_foreach(struct connection *con) 574static void sctp_init_failed_foreach(struct connection *con)
563{ 575{
576
577 /*
578 * Don't try to recover base con and handle race where the
579 * other node's assoc init creates a assoc and we get that
580 * notification, then we get a notification that our attempt
581 * failed due. This happens when we are still trying the primary
582 * address, but the other node has already tried secondary addrs
583 * and found one that worked.
584 */
585 if (!con->nodeid || con->sctp_assoc)
586 return;
587
588 log_print("Retrying SCTP association init for node %d\n", con->nodeid);
589
590 con->try_new_addr = true;
564 con->sctp_assoc = 0; 591 con->sctp_assoc = 0;
565 if (test_and_clear_bit(CF_CONNECT_PENDING, &con->flags)) { 592 if (test_and_clear_bit(CF_INIT_PENDING, &con->flags)) {
566 if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags)) 593 if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags))
567 queue_work(send_workqueue, &con->swork); 594 queue_work(send_workqueue, &con->swork);
568 } 595 }
@@ -579,15 +606,56 @@ static void sctp_init_failed(void)
579 mutex_unlock(&connections_lock); 606 mutex_unlock(&connections_lock);
580} 607}
581 608
609static void retry_failed_sctp_send(struct connection *recv_con,
610 struct sctp_send_failed *sn_send_failed,
611 char *buf)
612{
613 int len = sn_send_failed->ssf_length - sizeof(struct sctp_send_failed);
614 struct dlm_mhandle *mh;
615 struct connection *con;
616 char *retry_buf;
617 int nodeid = sn_send_failed->ssf_info.sinfo_ppid;
618
619 log_print("Retry sending %d bytes to node id %d", len, nodeid);
620
621 con = nodeid2con(nodeid, 0);
622 if (!con) {
623 log_print("Could not look up con for nodeid %d\n",
624 nodeid);
625 return;
626 }
627
628 mh = dlm_lowcomms_get_buffer(nodeid, len, GFP_NOFS, &retry_buf);
629 if (!mh) {
630 log_print("Could not allocate buf for retry.");
631 return;
632 }
633 memcpy(retry_buf, buf + sizeof(struct sctp_send_failed), len);
634 dlm_lowcomms_commit_buffer(mh);
635
636 /*
637 * If we got a assoc changed event before the send failed event then
638 * we only need to retry the send.
639 */
640 if (con->sctp_assoc) {
641 if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags))
642 queue_work(send_workqueue, &con->swork);
643 } else
644 sctp_init_failed_foreach(con);
645}
646
582/* Something happened to an association */ 647/* Something happened to an association */
583static void process_sctp_notification(struct connection *con, 648static void process_sctp_notification(struct connection *con,
584 struct msghdr *msg, char *buf) 649 struct msghdr *msg, char *buf)
585{ 650{
586 union sctp_notification *sn = (union sctp_notification *)buf; 651 union sctp_notification *sn = (union sctp_notification *)buf;
587 652
588 if (sn->sn_header.sn_type == SCTP_ASSOC_CHANGE) { 653 switch (sn->sn_header.sn_type) {
654 case SCTP_SEND_FAILED:
655 retry_failed_sctp_send(con, &sn->sn_send_failed, buf);
656 break;
657 case SCTP_ASSOC_CHANGE:
589 switch (sn->sn_assoc_change.sac_state) { 658 switch (sn->sn_assoc_change.sac_state) {
590
591 case SCTP_COMM_UP: 659 case SCTP_COMM_UP:
592 case SCTP_RESTART: 660 case SCTP_RESTART:
593 { 661 {
@@ -662,9 +730,11 @@ static void process_sctp_notification(struct connection *con,
662 log_print("connecting to %d sctp association %d", 730 log_print("connecting to %d sctp association %d",
663 nodeid, (int)sn->sn_assoc_change.sac_assoc_id); 731 nodeid, (int)sn->sn_assoc_change.sac_assoc_id);
664 732
733 new_con->sctp_assoc = sn->sn_assoc_change.sac_assoc_id;
734 new_con->try_new_addr = false;
665 /* Send any pending writes */ 735 /* Send any pending writes */
666 clear_bit(CF_CONNECT_PENDING, &new_con->flags); 736 clear_bit(CF_CONNECT_PENDING, &new_con->flags);
667 clear_bit(CF_INIT_PENDING, &con->flags); 737 clear_bit(CF_INIT_PENDING, &new_con->flags);
668 if (!test_and_set_bit(CF_WRITE_PENDING, &new_con->flags)) { 738 if (!test_and_set_bit(CF_WRITE_PENDING, &new_con->flags)) {
669 queue_work(send_workqueue, &new_con->swork); 739 queue_work(send_workqueue, &new_con->swork);
670 } 740 }
@@ -683,14 +753,10 @@ static void process_sctp_notification(struct connection *con,
683 } 753 }
684 break; 754 break;
685 755
686 /* We don't know which INIT failed, so clear the PENDING flags
687 * on them all. if assoc_id is zero then it will then try
688 * again */
689
690 case SCTP_CANT_STR_ASSOC: 756 case SCTP_CANT_STR_ASSOC:
691 { 757 {
758 /* Will retry init when we get the send failed notification */
692 log_print("Can't start SCTP association - retrying"); 759 log_print("Can't start SCTP association - retrying");
693 sctp_init_failed();
694 } 760 }
695 break; 761 break;
696 762
@@ -699,6 +765,8 @@ static void process_sctp_notification(struct connection *con,
699 (int)sn->sn_assoc_change.sac_assoc_id, 765 (int)sn->sn_assoc_change.sac_assoc_id,
700 sn->sn_assoc_change.sac_state); 766 sn->sn_assoc_change.sac_state);
701 } 767 }
768 default:
769 ; /* fall through */
702 } 770 }
703} 771}
704 772
@@ -958,6 +1026,24 @@ static void free_entry(struct writequeue_entry *e)
958 kfree(e); 1026 kfree(e);
959} 1027}
960 1028
1029/*
1030 * writequeue_entry_complete - try to delete and free write queue entry
1031 * @e: write queue entry to try to delete
1032 * @completed: bytes completed
1033 *
1034 * writequeue_lock must be held.
1035 */
1036static void writequeue_entry_complete(struct writequeue_entry *e, int completed)
1037{
1038 e->offset += completed;
1039 e->len -= completed;
1040
1041 if (e->len == 0 && e->users == 0) {
1042 list_del(&e->list);
1043 free_entry(e);
1044 }
1045}
1046
961/* Initiate an SCTP association. 1047/* Initiate an SCTP association.
962 This is a special case of send_to_sock() in that we don't yet have a 1048 This is a special case of send_to_sock() in that we don't yet have a
963 peeled-off socket for this association, so we use the listening socket 1049 peeled-off socket for this association, so we use the listening socket
@@ -977,15 +1063,14 @@ static void sctp_init_assoc(struct connection *con)
977 int addrlen; 1063 int addrlen;
978 struct kvec iov[1]; 1064 struct kvec iov[1];
979 1065
1066 mutex_lock(&con->sock_mutex);
980 if (test_and_set_bit(CF_INIT_PENDING, &con->flags)) 1067 if (test_and_set_bit(CF_INIT_PENDING, &con->flags))
981 return; 1068 goto unlock;
982
983 if (con->retries++ > MAX_CONNECT_RETRIES)
984 return;
985 1069
986 if (nodeid_to_addr(con->nodeid, NULL, (struct sockaddr *)&rem_addr)) { 1070 if (nodeid_to_addr(con->nodeid, NULL, (struct sockaddr *)&rem_addr,
1071 con->try_new_addr)) {
987 log_print("no address for nodeid %d", con->nodeid); 1072 log_print("no address for nodeid %d", con->nodeid);
988 return; 1073 goto unlock;
989 } 1074 }
990 base_con = nodeid2con(0, 0); 1075 base_con = nodeid2con(0, 0);
991 BUG_ON(base_con == NULL); 1076 BUG_ON(base_con == NULL);
@@ -1003,17 +1088,25 @@ static void sctp_init_assoc(struct connection *con)
1003 if (list_empty(&con->writequeue)) { 1088 if (list_empty(&con->writequeue)) {
1004 spin_unlock(&con->writequeue_lock); 1089 spin_unlock(&con->writequeue_lock);
1005 log_print("writequeue empty for nodeid %d", con->nodeid); 1090 log_print("writequeue empty for nodeid %d", con->nodeid);
1006 return; 1091 goto unlock;
1007 } 1092 }
1008 1093
1009 e = list_first_entry(&con->writequeue, struct writequeue_entry, list); 1094 e = list_first_entry(&con->writequeue, struct writequeue_entry, list);
1010 len = e->len; 1095 len = e->len;
1011 offset = e->offset; 1096 offset = e->offset;
1012 spin_unlock(&con->writequeue_lock);
1013 1097
1014 /* Send the first block off the write queue */ 1098 /* Send the first block off the write queue */
1015 iov[0].iov_base = page_address(e->page)+offset; 1099 iov[0].iov_base = page_address(e->page)+offset;
1016 iov[0].iov_len = len; 1100 iov[0].iov_len = len;
1101 spin_unlock(&con->writequeue_lock);
1102
1103 if (rem_addr.ss_family == AF_INET) {
1104 struct sockaddr_in *sin = (struct sockaddr_in *)&rem_addr;
1105 log_print("Trying to connect to %pI4", &sin->sin_addr.s_addr);
1106 } else {
1107 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&rem_addr;
1108 log_print("Trying to connect to %pI6", &sin6->sin6_addr);
1109 }
1017 1110
1018 cmsg = CMSG_FIRSTHDR(&outmessage); 1111 cmsg = CMSG_FIRSTHDR(&outmessage);
1019 cmsg->cmsg_level = IPPROTO_SCTP; 1112 cmsg->cmsg_level = IPPROTO_SCTP;
@@ -1021,8 +1114,9 @@ static void sctp_init_assoc(struct connection *con)
1021 cmsg->cmsg_len = CMSG_LEN(sizeof(struct sctp_sndrcvinfo)); 1114 cmsg->cmsg_len = CMSG_LEN(sizeof(struct sctp_sndrcvinfo));
1022 sinfo = CMSG_DATA(cmsg); 1115 sinfo = CMSG_DATA(cmsg);
1023 memset(sinfo, 0x00, sizeof(struct sctp_sndrcvinfo)); 1116 memset(sinfo, 0x00, sizeof(struct sctp_sndrcvinfo));
1024 sinfo->sinfo_ppid = cpu_to_le32(dlm_our_nodeid()); 1117 sinfo->sinfo_ppid = cpu_to_le32(con->nodeid);
1025 outmessage.msg_controllen = cmsg->cmsg_len; 1118 outmessage.msg_controllen = cmsg->cmsg_len;
1119 sinfo->sinfo_flags |= SCTP_ADDR_OVER;
1026 1120
1027 ret = kernel_sendmsg(base_con->sock, &outmessage, iov, 1, len); 1121 ret = kernel_sendmsg(base_con->sock, &outmessage, iov, 1, len);
1028 if (ret < 0) { 1122 if (ret < 0) {
@@ -1035,15 +1129,12 @@ static void sctp_init_assoc(struct connection *con)
1035 } 1129 }
1036 else { 1130 else {
1037 spin_lock(&con->writequeue_lock); 1131 spin_lock(&con->writequeue_lock);
1038 e->offset += ret; 1132 writequeue_entry_complete(e, ret);
1039 e->len -= ret;
1040
1041 if (e->len == 0 && e->users == 0) {
1042 list_del(&e->list);
1043 free_entry(e);
1044 }
1045 spin_unlock(&con->writequeue_lock); 1133 spin_unlock(&con->writequeue_lock);
1046 } 1134 }
1135
1136unlock:
1137 mutex_unlock(&con->sock_mutex);
1047} 1138}
1048 1139
1049/* Connect a new socket to its peer */ 1140/* Connect a new socket to its peer */
@@ -1075,7 +1166,7 @@ static void tcp_connect_to_sock(struct connection *con)
1075 goto out_err; 1166 goto out_err;
1076 1167
1077 memset(&saddr, 0, sizeof(saddr)); 1168 memset(&saddr, 0, sizeof(saddr));
1078 result = nodeid_to_addr(con->nodeid, &saddr, NULL); 1169 result = nodeid_to_addr(con->nodeid, &saddr, NULL, false);
1079 if (result < 0) { 1170 if (result < 0) {
1080 log_print("no address for nodeid %d", con->nodeid); 1171 log_print("no address for nodeid %d", con->nodeid);
1081 goto out_err; 1172 goto out_err;
@@ -1254,6 +1345,7 @@ static int sctp_listen_for_all(void)
1254 int result = -EINVAL, num = 1, i, addr_len; 1345 int result = -EINVAL, num = 1, i, addr_len;
1255 struct connection *con = nodeid2con(0, GFP_NOFS); 1346 struct connection *con = nodeid2con(0, GFP_NOFS);
1256 int bufsize = NEEDED_RMEM; 1347 int bufsize = NEEDED_RMEM;
1348 int one = 1;
1257 1349
1258 if (!con) 1350 if (!con)
1259 return -ENOMEM; 1351 return -ENOMEM;
@@ -1288,6 +1380,11 @@ static int sctp_listen_for_all(void)
1288 goto create_delsock; 1380 goto create_delsock;
1289 } 1381 }
1290 1382
1383 result = kernel_setsockopt(sock, SOL_SCTP, SCTP_NODELAY, (char *)&one,
1384 sizeof(one));
1385 if (result < 0)
1386 log_print("Could not set SCTP NODELAY error %d\n", result);
1387
1291 /* Init con struct */ 1388 /* Init con struct */
1292 sock->sk->sk_user_data = con; 1389 sock->sk->sk_user_data = con;
1293 con->sock = sock; 1390 con->sock = sock;
@@ -1493,13 +1590,7 @@ static void send_to_sock(struct connection *con)
1493 } 1590 }
1494 1591
1495 spin_lock(&con->writequeue_lock); 1592 spin_lock(&con->writequeue_lock);
1496 e->offset += ret; 1593 writequeue_entry_complete(e, ret);
1497 e->len -= ret;
1498
1499 if (e->len == 0 && e->users == 0) {
1500 list_del(&e->list);
1501 free_entry(e);
1502 }
1503 } 1594 }
1504 spin_unlock(&con->writequeue_lock); 1595 spin_unlock(&con->writequeue_lock);
1505out: 1596out: