diff options
Diffstat (limited to 'fs/dlm/lowcomms.c')
-rw-r--r-- | fs/dlm/lowcomms.c | 177 |
1 files changed, 134 insertions, 43 deletions
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index d0ccd2fd79eb..d90909ec6aa6 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c | |||
@@ -52,7 +52,6 @@ | |||
52 | #include <linux/mutex.h> | 52 | #include <linux/mutex.h> |
53 | #include <linux/sctp.h> | 53 | #include <linux/sctp.h> |
54 | #include <linux/slab.h> | 54 | #include <linux/slab.h> |
55 | #include <linux/sctp.h> | ||
56 | #include <net/sctp/sctp.h> | 55 | #include <net/sctp/sctp.h> |
57 | #include <net/ipv6.h> | 56 | #include <net/ipv6.h> |
58 | 57 | ||
@@ -126,6 +125,7 @@ struct connection { | |||
126 | struct connection *othercon; | 125 | struct connection *othercon; |
127 | struct work_struct rwork; /* Receive workqueue */ | 126 | struct work_struct rwork; /* Receive workqueue */ |
128 | struct work_struct swork; /* Send workqueue */ | 127 | struct work_struct swork; /* Send workqueue */ |
128 | bool try_new_addr; | ||
129 | }; | 129 | }; |
130 | #define sock2con(x) ((struct connection *)(x)->sk_user_data) | 130 | #define sock2con(x) ((struct connection *)(x)->sk_user_data) |
131 | 131 | ||
@@ -144,6 +144,7 @@ struct dlm_node_addr { | |||
144 | struct list_head list; | 144 | struct list_head list; |
145 | int nodeid; | 145 | int nodeid; |
146 | int addr_count; | 146 | int addr_count; |
147 | int curr_addr_index; | ||
147 | struct sockaddr_storage *addr[DLM_MAX_ADDR_COUNT]; | 148 | struct sockaddr_storage *addr[DLM_MAX_ADDR_COUNT]; |
148 | }; | 149 | }; |
149 | 150 | ||
@@ -310,7 +311,7 @@ static int addr_compare(struct sockaddr_storage *x, struct sockaddr_storage *y) | |||
310 | } | 311 | } |
311 | 312 | ||
312 | static int nodeid_to_addr(int nodeid, struct sockaddr_storage *sas_out, | 313 | static int nodeid_to_addr(int nodeid, struct sockaddr_storage *sas_out, |
313 | struct sockaddr *sa_out) | 314 | struct sockaddr *sa_out, bool try_new_addr) |
314 | { | 315 | { |
315 | struct sockaddr_storage sas; | 316 | struct sockaddr_storage sas; |
316 | struct dlm_node_addr *na; | 317 | struct dlm_node_addr *na; |
@@ -320,8 +321,16 @@ static int nodeid_to_addr(int nodeid, struct sockaddr_storage *sas_out, | |||
320 | 321 | ||
321 | spin_lock(&dlm_node_addrs_spin); | 322 | spin_lock(&dlm_node_addrs_spin); |
322 | na = find_node_addr(nodeid); | 323 | na = find_node_addr(nodeid); |
323 | if (na && na->addr_count) | 324 | if (na && na->addr_count) { |
324 | memcpy(&sas, na->addr[0], sizeof(struct sockaddr_storage)); | 325 | if (try_new_addr) { |
326 | na->curr_addr_index++; | ||
327 | if (na->curr_addr_index == na->addr_count) | ||
328 | na->curr_addr_index = 0; | ||
329 | } | ||
330 | |||
331 | memcpy(&sas, na->addr[na->curr_addr_index ], | ||
332 | sizeof(struct sockaddr_storage)); | ||
333 | } | ||
325 | spin_unlock(&dlm_node_addrs_spin); | 334 | spin_unlock(&dlm_node_addrs_spin); |
326 | 335 | ||
327 | if (!na) | 336 | if (!na) |
@@ -353,19 +362,22 @@ static int addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid) | |||
353 | { | 362 | { |
354 | struct dlm_node_addr *na; | 363 | struct dlm_node_addr *na; |
355 | int rv = -EEXIST; | 364 | int rv = -EEXIST; |
365 | int addr_i; | ||
356 | 366 | ||
357 | spin_lock(&dlm_node_addrs_spin); | 367 | spin_lock(&dlm_node_addrs_spin); |
358 | list_for_each_entry(na, &dlm_node_addrs, list) { | 368 | list_for_each_entry(na, &dlm_node_addrs, list) { |
359 | if (!na->addr_count) | 369 | if (!na->addr_count) |
360 | continue; | 370 | continue; |
361 | 371 | ||
362 | if (!addr_compare(na->addr[0], addr)) | 372 | for (addr_i = 0; addr_i < na->addr_count; addr_i++) { |
363 | continue; | 373 | if (addr_compare(na->addr[addr_i], addr)) { |
364 | 374 | *nodeid = na->nodeid; | |
365 | *nodeid = na->nodeid; | 375 | rv = 0; |
366 | rv = 0; | 376 | goto unlock; |
367 | break; | 377 | } |
378 | } | ||
368 | } | 379 | } |
380 | unlock: | ||
369 | spin_unlock(&dlm_node_addrs_spin); | 381 | spin_unlock(&dlm_node_addrs_spin); |
370 | return rv; | 382 | return rv; |
371 | } | 383 | } |
@@ -561,8 +573,23 @@ static void sctp_send_shutdown(sctp_assoc_t associd) | |||
561 | 573 | ||
562 | static void sctp_init_failed_foreach(struct connection *con) | 574 | static void sctp_init_failed_foreach(struct connection *con) |
563 | { | 575 | { |
576 | |||
577 | /* | ||
578 | * Don't try to recover base con and handle race where the | ||
579 | * other node's assoc init creates a assoc and we get that | ||
580 | * notification, then we get a notification that our attempt | ||
581 | * failed due. This happens when we are still trying the primary | ||
582 | * address, but the other node has already tried secondary addrs | ||
583 | * and found one that worked. | ||
584 | */ | ||
585 | if (!con->nodeid || con->sctp_assoc) | ||
586 | return; | ||
587 | |||
588 | log_print("Retrying SCTP association init for node %d\n", con->nodeid); | ||
589 | |||
590 | con->try_new_addr = true; | ||
564 | con->sctp_assoc = 0; | 591 | con->sctp_assoc = 0; |
565 | if (test_and_clear_bit(CF_CONNECT_PENDING, &con->flags)) { | 592 | if (test_and_clear_bit(CF_INIT_PENDING, &con->flags)) { |
566 | if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags)) | 593 | if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags)) |
567 | queue_work(send_workqueue, &con->swork); | 594 | queue_work(send_workqueue, &con->swork); |
568 | } | 595 | } |
@@ -579,15 +606,56 @@ static void sctp_init_failed(void) | |||
579 | mutex_unlock(&connections_lock); | 606 | mutex_unlock(&connections_lock); |
580 | } | 607 | } |
581 | 608 | ||
609 | static void retry_failed_sctp_send(struct connection *recv_con, | ||
610 | struct sctp_send_failed *sn_send_failed, | ||
611 | char *buf) | ||
612 | { | ||
613 | int len = sn_send_failed->ssf_length - sizeof(struct sctp_send_failed); | ||
614 | struct dlm_mhandle *mh; | ||
615 | struct connection *con; | ||
616 | char *retry_buf; | ||
617 | int nodeid = sn_send_failed->ssf_info.sinfo_ppid; | ||
618 | |||
619 | log_print("Retry sending %d bytes to node id %d", len, nodeid); | ||
620 | |||
621 | con = nodeid2con(nodeid, 0); | ||
622 | if (!con) { | ||
623 | log_print("Could not look up con for nodeid %d\n", | ||
624 | nodeid); | ||
625 | return; | ||
626 | } | ||
627 | |||
628 | mh = dlm_lowcomms_get_buffer(nodeid, len, GFP_NOFS, &retry_buf); | ||
629 | if (!mh) { | ||
630 | log_print("Could not allocate buf for retry."); | ||
631 | return; | ||
632 | } | ||
633 | memcpy(retry_buf, buf + sizeof(struct sctp_send_failed), len); | ||
634 | dlm_lowcomms_commit_buffer(mh); | ||
635 | |||
636 | /* | ||
637 | * If we got a assoc changed event before the send failed event then | ||
638 | * we only need to retry the send. | ||
639 | */ | ||
640 | if (con->sctp_assoc) { | ||
641 | if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags)) | ||
642 | queue_work(send_workqueue, &con->swork); | ||
643 | } else | ||
644 | sctp_init_failed_foreach(con); | ||
645 | } | ||
646 | |||
582 | /* Something happened to an association */ | 647 | /* Something happened to an association */ |
583 | static void process_sctp_notification(struct connection *con, | 648 | static void process_sctp_notification(struct connection *con, |
584 | struct msghdr *msg, char *buf) | 649 | struct msghdr *msg, char *buf) |
585 | { | 650 | { |
586 | union sctp_notification *sn = (union sctp_notification *)buf; | 651 | union sctp_notification *sn = (union sctp_notification *)buf; |
587 | 652 | ||
588 | if (sn->sn_header.sn_type == SCTP_ASSOC_CHANGE) { | 653 | switch (sn->sn_header.sn_type) { |
654 | case SCTP_SEND_FAILED: | ||
655 | retry_failed_sctp_send(con, &sn->sn_send_failed, buf); | ||
656 | break; | ||
657 | case SCTP_ASSOC_CHANGE: | ||
589 | switch (sn->sn_assoc_change.sac_state) { | 658 | switch (sn->sn_assoc_change.sac_state) { |
590 | |||
591 | case SCTP_COMM_UP: | 659 | case SCTP_COMM_UP: |
592 | case SCTP_RESTART: | 660 | case SCTP_RESTART: |
593 | { | 661 | { |
@@ -662,9 +730,11 @@ static void process_sctp_notification(struct connection *con, | |||
662 | log_print("connecting to %d sctp association %d", | 730 | log_print("connecting to %d sctp association %d", |
663 | nodeid, (int)sn->sn_assoc_change.sac_assoc_id); | 731 | nodeid, (int)sn->sn_assoc_change.sac_assoc_id); |
664 | 732 | ||
733 | new_con->sctp_assoc = sn->sn_assoc_change.sac_assoc_id; | ||
734 | new_con->try_new_addr = false; | ||
665 | /* Send any pending writes */ | 735 | /* Send any pending writes */ |
666 | clear_bit(CF_CONNECT_PENDING, &new_con->flags); | 736 | clear_bit(CF_CONNECT_PENDING, &new_con->flags); |
667 | clear_bit(CF_INIT_PENDING, &con->flags); | 737 | clear_bit(CF_INIT_PENDING, &new_con->flags); |
668 | if (!test_and_set_bit(CF_WRITE_PENDING, &new_con->flags)) { | 738 | if (!test_and_set_bit(CF_WRITE_PENDING, &new_con->flags)) { |
669 | queue_work(send_workqueue, &new_con->swork); | 739 | queue_work(send_workqueue, &new_con->swork); |
670 | } | 740 | } |
@@ -683,14 +753,10 @@ static void process_sctp_notification(struct connection *con, | |||
683 | } | 753 | } |
684 | break; | 754 | break; |
685 | 755 | ||
686 | /* We don't know which INIT failed, so clear the PENDING flags | ||
687 | * on them all. if assoc_id is zero then it will then try | ||
688 | * again */ | ||
689 | |||
690 | case SCTP_CANT_STR_ASSOC: | 756 | case SCTP_CANT_STR_ASSOC: |
691 | { | 757 | { |
758 | /* Will retry init when we get the send failed notification */ | ||
692 | log_print("Can't start SCTP association - retrying"); | 759 | log_print("Can't start SCTP association - retrying"); |
693 | sctp_init_failed(); | ||
694 | } | 760 | } |
695 | break; | 761 | break; |
696 | 762 | ||
@@ -699,6 +765,8 @@ static void process_sctp_notification(struct connection *con, | |||
699 | (int)sn->sn_assoc_change.sac_assoc_id, | 765 | (int)sn->sn_assoc_change.sac_assoc_id, |
700 | sn->sn_assoc_change.sac_state); | 766 | sn->sn_assoc_change.sac_state); |
701 | } | 767 | } |
768 | default: | ||
769 | ; /* fall through */ | ||
702 | } | 770 | } |
703 | } | 771 | } |
704 | 772 | ||
@@ -958,6 +1026,24 @@ static void free_entry(struct writequeue_entry *e) | |||
958 | kfree(e); | 1026 | kfree(e); |
959 | } | 1027 | } |
960 | 1028 | ||
1029 | /* | ||
1030 | * writequeue_entry_complete - try to delete and free write queue entry | ||
1031 | * @e: write queue entry to try to delete | ||
1032 | * @completed: bytes completed | ||
1033 | * | ||
1034 | * writequeue_lock must be held. | ||
1035 | */ | ||
1036 | static void writequeue_entry_complete(struct writequeue_entry *e, int completed) | ||
1037 | { | ||
1038 | e->offset += completed; | ||
1039 | e->len -= completed; | ||
1040 | |||
1041 | if (e->len == 0 && e->users == 0) { | ||
1042 | list_del(&e->list); | ||
1043 | free_entry(e); | ||
1044 | } | ||
1045 | } | ||
1046 | |||
961 | /* Initiate an SCTP association. | 1047 | /* Initiate an SCTP association. |
962 | This is a special case of send_to_sock() in that we don't yet have a | 1048 | This is a special case of send_to_sock() in that we don't yet have a |
963 | peeled-off socket for this association, so we use the listening socket | 1049 | peeled-off socket for this association, so we use the listening socket |
@@ -977,15 +1063,14 @@ static void sctp_init_assoc(struct connection *con) | |||
977 | int addrlen; | 1063 | int addrlen; |
978 | struct kvec iov[1]; | 1064 | struct kvec iov[1]; |
979 | 1065 | ||
1066 | mutex_lock(&con->sock_mutex); | ||
980 | if (test_and_set_bit(CF_INIT_PENDING, &con->flags)) | 1067 | if (test_and_set_bit(CF_INIT_PENDING, &con->flags)) |
981 | return; | 1068 | goto unlock; |
982 | |||
983 | if (con->retries++ > MAX_CONNECT_RETRIES) | ||
984 | return; | ||
985 | 1069 | ||
986 | if (nodeid_to_addr(con->nodeid, NULL, (struct sockaddr *)&rem_addr)) { | 1070 | if (nodeid_to_addr(con->nodeid, NULL, (struct sockaddr *)&rem_addr, |
1071 | con->try_new_addr)) { | ||
987 | log_print("no address for nodeid %d", con->nodeid); | 1072 | log_print("no address for nodeid %d", con->nodeid); |
988 | return; | 1073 | goto unlock; |
989 | } | 1074 | } |
990 | base_con = nodeid2con(0, 0); | 1075 | base_con = nodeid2con(0, 0); |
991 | BUG_ON(base_con == NULL); | 1076 | BUG_ON(base_con == NULL); |
@@ -1003,17 +1088,25 @@ static void sctp_init_assoc(struct connection *con) | |||
1003 | if (list_empty(&con->writequeue)) { | 1088 | if (list_empty(&con->writequeue)) { |
1004 | spin_unlock(&con->writequeue_lock); | 1089 | spin_unlock(&con->writequeue_lock); |
1005 | log_print("writequeue empty for nodeid %d", con->nodeid); | 1090 | log_print("writequeue empty for nodeid %d", con->nodeid); |
1006 | return; | 1091 | goto unlock; |
1007 | } | 1092 | } |
1008 | 1093 | ||
1009 | e = list_first_entry(&con->writequeue, struct writequeue_entry, list); | 1094 | e = list_first_entry(&con->writequeue, struct writequeue_entry, list); |
1010 | len = e->len; | 1095 | len = e->len; |
1011 | offset = e->offset; | 1096 | offset = e->offset; |
1012 | spin_unlock(&con->writequeue_lock); | ||
1013 | 1097 | ||
1014 | /* Send the first block off the write queue */ | 1098 | /* Send the first block off the write queue */ |
1015 | iov[0].iov_base = page_address(e->page)+offset; | 1099 | iov[0].iov_base = page_address(e->page)+offset; |
1016 | iov[0].iov_len = len; | 1100 | iov[0].iov_len = len; |
1101 | spin_unlock(&con->writequeue_lock); | ||
1102 | |||
1103 | if (rem_addr.ss_family == AF_INET) { | ||
1104 | struct sockaddr_in *sin = (struct sockaddr_in *)&rem_addr; | ||
1105 | log_print("Trying to connect to %pI4", &sin->sin_addr.s_addr); | ||
1106 | } else { | ||
1107 | struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&rem_addr; | ||
1108 | log_print("Trying to connect to %pI6", &sin6->sin6_addr); | ||
1109 | } | ||
1017 | 1110 | ||
1018 | cmsg = CMSG_FIRSTHDR(&outmessage); | 1111 | cmsg = CMSG_FIRSTHDR(&outmessage); |
1019 | cmsg->cmsg_level = IPPROTO_SCTP; | 1112 | cmsg->cmsg_level = IPPROTO_SCTP; |
@@ -1021,8 +1114,9 @@ static void sctp_init_assoc(struct connection *con) | |||
1021 | cmsg->cmsg_len = CMSG_LEN(sizeof(struct sctp_sndrcvinfo)); | 1114 | cmsg->cmsg_len = CMSG_LEN(sizeof(struct sctp_sndrcvinfo)); |
1022 | sinfo = CMSG_DATA(cmsg); | 1115 | sinfo = CMSG_DATA(cmsg); |
1023 | memset(sinfo, 0x00, sizeof(struct sctp_sndrcvinfo)); | 1116 | memset(sinfo, 0x00, sizeof(struct sctp_sndrcvinfo)); |
1024 | sinfo->sinfo_ppid = cpu_to_le32(dlm_our_nodeid()); | 1117 | sinfo->sinfo_ppid = cpu_to_le32(con->nodeid); |
1025 | outmessage.msg_controllen = cmsg->cmsg_len; | 1118 | outmessage.msg_controllen = cmsg->cmsg_len; |
1119 | sinfo->sinfo_flags |= SCTP_ADDR_OVER; | ||
1026 | 1120 | ||
1027 | ret = kernel_sendmsg(base_con->sock, &outmessage, iov, 1, len); | 1121 | ret = kernel_sendmsg(base_con->sock, &outmessage, iov, 1, len); |
1028 | if (ret < 0) { | 1122 | if (ret < 0) { |
@@ -1035,15 +1129,12 @@ static void sctp_init_assoc(struct connection *con) | |||
1035 | } | 1129 | } |
1036 | else { | 1130 | else { |
1037 | spin_lock(&con->writequeue_lock); | 1131 | spin_lock(&con->writequeue_lock); |
1038 | e->offset += ret; | 1132 | writequeue_entry_complete(e, ret); |
1039 | e->len -= ret; | ||
1040 | |||
1041 | if (e->len == 0 && e->users == 0) { | ||
1042 | list_del(&e->list); | ||
1043 | free_entry(e); | ||
1044 | } | ||
1045 | spin_unlock(&con->writequeue_lock); | 1133 | spin_unlock(&con->writequeue_lock); |
1046 | } | 1134 | } |
1135 | |||
1136 | unlock: | ||
1137 | mutex_unlock(&con->sock_mutex); | ||
1047 | } | 1138 | } |
1048 | 1139 | ||
1049 | /* Connect a new socket to its peer */ | 1140 | /* Connect a new socket to its peer */ |
@@ -1075,7 +1166,7 @@ static void tcp_connect_to_sock(struct connection *con) | |||
1075 | goto out_err; | 1166 | goto out_err; |
1076 | 1167 | ||
1077 | memset(&saddr, 0, sizeof(saddr)); | 1168 | memset(&saddr, 0, sizeof(saddr)); |
1078 | result = nodeid_to_addr(con->nodeid, &saddr, NULL); | 1169 | result = nodeid_to_addr(con->nodeid, &saddr, NULL, false); |
1079 | if (result < 0) { | 1170 | if (result < 0) { |
1080 | log_print("no address for nodeid %d", con->nodeid); | 1171 | log_print("no address for nodeid %d", con->nodeid); |
1081 | goto out_err; | 1172 | goto out_err; |
@@ -1254,6 +1345,7 @@ static int sctp_listen_for_all(void) | |||
1254 | int result = -EINVAL, num = 1, i, addr_len; | 1345 | int result = -EINVAL, num = 1, i, addr_len; |
1255 | struct connection *con = nodeid2con(0, GFP_NOFS); | 1346 | struct connection *con = nodeid2con(0, GFP_NOFS); |
1256 | int bufsize = NEEDED_RMEM; | 1347 | int bufsize = NEEDED_RMEM; |
1348 | int one = 1; | ||
1257 | 1349 | ||
1258 | if (!con) | 1350 | if (!con) |
1259 | return -ENOMEM; | 1351 | return -ENOMEM; |
@@ -1288,6 +1380,11 @@ static int sctp_listen_for_all(void) | |||
1288 | goto create_delsock; | 1380 | goto create_delsock; |
1289 | } | 1381 | } |
1290 | 1382 | ||
1383 | result = kernel_setsockopt(sock, SOL_SCTP, SCTP_NODELAY, (char *)&one, | ||
1384 | sizeof(one)); | ||
1385 | if (result < 0) | ||
1386 | log_print("Could not set SCTP NODELAY error %d\n", result); | ||
1387 | |||
1291 | /* Init con struct */ | 1388 | /* Init con struct */ |
1292 | sock->sk->sk_user_data = con; | 1389 | sock->sk->sk_user_data = con; |
1293 | con->sock = sock; | 1390 | con->sock = sock; |
@@ -1493,13 +1590,7 @@ static void send_to_sock(struct connection *con) | |||
1493 | } | 1590 | } |
1494 | 1591 | ||
1495 | spin_lock(&con->writequeue_lock); | 1592 | spin_lock(&con->writequeue_lock); |
1496 | e->offset += ret; | 1593 | writequeue_entry_complete(e, ret); |
1497 | e->len -= ret; | ||
1498 | |||
1499 | if (e->len == 0 && e->users == 0) { | ||
1500 | list_del(&e->list); | ||
1501 | free_entry(e); | ||
1502 | } | ||
1503 | } | 1594 | } |
1504 | spin_unlock(&con->writequeue_lock); | 1595 | spin_unlock(&con->writequeue_lock); |
1505 | out: | 1596 | out: |