diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-09-03 15:57:48 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-09-03 15:57:48 -0400 |
commit | 9cbf22b37ae0592dea809cb8d424990774c21786 (patch) | |
tree | a995760bb01015b95b0ef1b9e1c39a425e9a68fd | |
parent | ea814ab9aab23505f6828a0cc01c985e59847f4e (diff) | |
parent | b3a5bbfd780d9e9291f5f257be06e9ad6db11657 (diff) |
Merge tag 'dlm-4.3' of git://git.kernel.org/pub/scm/linux/kernel/git/teigland/linux-dlm
Pull dlm updates from David Teigland:
"This set mainly includes a change to the way the dlm uses the SCTP API
in the kernel, removing the direct dependency on the sctp module.
Other odd SCTP-related fixes are also included.
The other notable fix is for a long standing regression in the
behavior of lock value blocks for user space locks"
* tag 'dlm-4.3' of git://git.kernel.org/pub/scm/linux/kernel/git/teigland/linux-dlm:
dlm: print error from kernel_sendpage
dlm: fix lvb copy for user locks
dlm: sctp_accept_from_sock() can be static
dlm: fix reconnecting but not sending data
dlm: replace BUG_ON with a less severe handling
dlm: use sctp 1-to-1 API
dlm: fix not reconnecting on connecting error handling
dlm: fix race while closing connections
dlm: fix connection stealing if using SCTP
-rw-r--r-- | fs/dlm/lowcomms.c | 743 | ||||
-rw-r--r-- | fs/dlm/user.c | 7 | ||||
-rw-r--r-- | include/uapi/linux/dlm_device.h | 2 |
3 files changed, 305 insertions, 447 deletions
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 754fd6c0b747..87e9d796cf7d 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c | |||
@@ -120,12 +120,11 @@ struct connection { | |||
120 | struct cbuf cb; | 120 | struct cbuf cb; |
121 | int retries; | 121 | int retries; |
122 | #define MAX_CONNECT_RETRIES 3 | 122 | #define MAX_CONNECT_RETRIES 3 |
123 | int sctp_assoc; | ||
124 | struct hlist_node list; | 123 | struct hlist_node list; |
125 | struct connection *othercon; | 124 | struct connection *othercon; |
126 | struct work_struct rwork; /* Receive workqueue */ | 125 | struct work_struct rwork; /* Receive workqueue */ |
127 | struct work_struct swork; /* Send workqueue */ | 126 | struct work_struct swork; /* Send workqueue */ |
128 | bool try_new_addr; | 127 | void (*orig_error_report)(struct sock *sk); |
129 | }; | 128 | }; |
130 | #define sock2con(x) ((struct connection *)(x)->sk_user_data) | 129 | #define sock2con(x) ((struct connection *)(x)->sk_user_data) |
131 | 130 | ||
@@ -252,26 +251,6 @@ static struct connection *nodeid2con(int nodeid, gfp_t allocation) | |||
252 | return con; | 251 | return con; |
253 | } | 252 | } |
254 | 253 | ||
255 | /* This is a bit drastic, but only called when things go wrong */ | ||
256 | static struct connection *assoc2con(int assoc_id) | ||
257 | { | ||
258 | int i; | ||
259 | struct connection *con; | ||
260 | |||
261 | mutex_lock(&connections_lock); | ||
262 | |||
263 | for (i = 0 ; i < CONN_HASH_SIZE; i++) { | ||
264 | hlist_for_each_entry(con, &connection_hash[i], list) { | ||
265 | if (con->sctp_assoc == assoc_id) { | ||
266 | mutex_unlock(&connections_lock); | ||
267 | return con; | ||
268 | } | ||
269 | } | ||
270 | } | ||
271 | mutex_unlock(&connections_lock); | ||
272 | return NULL; | ||
273 | } | ||
274 | |||
275 | static struct dlm_node_addr *find_node_addr(int nodeid) | 254 | static struct dlm_node_addr *find_node_addr(int nodeid) |
276 | { | 255 | { |
277 | struct dlm_node_addr *na; | 256 | struct dlm_node_addr *na; |
@@ -322,14 +301,14 @@ static int nodeid_to_addr(int nodeid, struct sockaddr_storage *sas_out, | |||
322 | spin_lock(&dlm_node_addrs_spin); | 301 | spin_lock(&dlm_node_addrs_spin); |
323 | na = find_node_addr(nodeid); | 302 | na = find_node_addr(nodeid); |
324 | if (na && na->addr_count) { | 303 | if (na && na->addr_count) { |
304 | memcpy(&sas, na->addr[na->curr_addr_index], | ||
305 | sizeof(struct sockaddr_storage)); | ||
306 | |||
325 | if (try_new_addr) { | 307 | if (try_new_addr) { |
326 | na->curr_addr_index++; | 308 | na->curr_addr_index++; |
327 | if (na->curr_addr_index == na->addr_count) | 309 | if (na->curr_addr_index == na->addr_count) |
328 | na->curr_addr_index = 0; | 310 | na->curr_addr_index = 0; |
329 | } | 311 | } |
330 | |||
331 | memcpy(&sas, na->addr[na->curr_addr_index ], | ||
332 | sizeof(struct sockaddr_storage)); | ||
333 | } | 312 | } |
334 | spin_unlock(&dlm_node_addrs_spin); | 313 | spin_unlock(&dlm_node_addrs_spin); |
335 | 314 | ||
@@ -459,18 +438,23 @@ static inline void lowcomms_connect_sock(struct connection *con) | |||
459 | 438 | ||
460 | static void lowcomms_state_change(struct sock *sk) | 439 | static void lowcomms_state_change(struct sock *sk) |
461 | { | 440 | { |
462 | if (sk->sk_state == TCP_ESTABLISHED) | 441 | /* SCTP layer is not calling sk_data_ready when the connection |
442 | * is done, so we catch the signal through here. Also, it | ||
443 | * doesn't switch socket state when entering shutdown, so we | ||
444 | * skip the write in that case. | ||
445 | */ | ||
446 | if (sk->sk_shutdown) { | ||
447 | if (sk->sk_shutdown == RCV_SHUTDOWN) | ||
448 | lowcomms_data_ready(sk); | ||
449 | } else if (sk->sk_state == TCP_ESTABLISHED) { | ||
463 | lowcomms_write_space(sk); | 450 | lowcomms_write_space(sk); |
451 | } | ||
464 | } | 452 | } |
465 | 453 | ||
466 | int dlm_lowcomms_connect_node(int nodeid) | 454 | int dlm_lowcomms_connect_node(int nodeid) |
467 | { | 455 | { |
468 | struct connection *con; | 456 | struct connection *con; |
469 | 457 | ||
470 | /* with sctp there's no connecting without sending */ | ||
471 | if (dlm_config.ci_protocol != 0) | ||
472 | return 0; | ||
473 | |||
474 | if (nodeid == dlm_our_nodeid()) | 458 | if (nodeid == dlm_our_nodeid()) |
475 | return 0; | 459 | return 0; |
476 | 460 | ||
@@ -481,6 +465,43 @@ int dlm_lowcomms_connect_node(int nodeid) | |||
481 | return 0; | 465 | return 0; |
482 | } | 466 | } |
483 | 467 | ||
468 | static void lowcomms_error_report(struct sock *sk) | ||
469 | { | ||
470 | struct connection *con = sock2con(sk); | ||
471 | struct sockaddr_storage saddr; | ||
472 | |||
473 | if (nodeid_to_addr(con->nodeid, &saddr, NULL, false)) { | ||
474 | printk_ratelimited(KERN_ERR "dlm: node %d: socket error " | ||
475 | "sending to node %d, port %d, " | ||
476 | "sk_err=%d/%d\n", dlm_our_nodeid(), | ||
477 | con->nodeid, dlm_config.ci_tcp_port, | ||
478 | sk->sk_err, sk->sk_err_soft); | ||
479 | return; | ||
480 | } else if (saddr.ss_family == AF_INET) { | ||
481 | struct sockaddr_in *sin4 = (struct sockaddr_in *)&saddr; | ||
482 | |||
483 | printk_ratelimited(KERN_ERR "dlm: node %d: socket error " | ||
484 | "sending to node %d at %pI4, port %d, " | ||
485 | "sk_err=%d/%d\n", dlm_our_nodeid(), | ||
486 | con->nodeid, &sin4->sin_addr.s_addr, | ||
487 | dlm_config.ci_tcp_port, sk->sk_err, | ||
488 | sk->sk_err_soft); | ||
489 | } else { | ||
490 | struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&saddr; | ||
491 | |||
492 | printk_ratelimited(KERN_ERR "dlm: node %d: socket error " | ||
493 | "sending to node %d at %u.%u.%u.%u, " | ||
494 | "port %d, sk_err=%d/%d\n", dlm_our_nodeid(), | ||
495 | con->nodeid, sin6->sin6_addr.s6_addr32[0], | ||
496 | sin6->sin6_addr.s6_addr32[1], | ||
497 | sin6->sin6_addr.s6_addr32[2], | ||
498 | sin6->sin6_addr.s6_addr32[3], | ||
499 | dlm_config.ci_tcp_port, sk->sk_err, | ||
500 | sk->sk_err_soft); | ||
501 | } | ||
502 | con->orig_error_report(sk); | ||
503 | } | ||
504 | |||
484 | /* Make a socket active */ | 505 | /* Make a socket active */ |
485 | static void add_sock(struct socket *sock, struct connection *con) | 506 | static void add_sock(struct socket *sock, struct connection *con) |
486 | { | 507 | { |
@@ -492,6 +513,8 @@ static void add_sock(struct socket *sock, struct connection *con) | |||
492 | con->sock->sk->sk_state_change = lowcomms_state_change; | 513 | con->sock->sk->sk_state_change = lowcomms_state_change; |
493 | con->sock->sk->sk_user_data = con; | 514 | con->sock->sk->sk_user_data = con; |
494 | con->sock->sk->sk_allocation = GFP_NOFS; | 515 | con->sock->sk->sk_allocation = GFP_NOFS; |
516 | con->orig_error_report = con->sock->sk->sk_error_report; | ||
517 | con->sock->sk->sk_error_report = lowcomms_error_report; | ||
495 | } | 518 | } |
496 | 519 | ||
497 | /* Add the port number to an IPv6 or 4 sockaddr and return the address | 520 | /* Add the port number to an IPv6 or 4 sockaddr and return the address |
@@ -514,17 +537,24 @@ static void make_sockaddr(struct sockaddr_storage *saddr, uint16_t port, | |||
514 | } | 537 | } |
515 | 538 | ||
516 | /* Close a remote connection and tidy up */ | 539 | /* Close a remote connection and tidy up */ |
517 | static void close_connection(struct connection *con, bool and_other) | 540 | static void close_connection(struct connection *con, bool and_other, |
541 | bool tx, bool rx) | ||
518 | { | 542 | { |
519 | mutex_lock(&con->sock_mutex); | 543 | clear_bit(CF_CONNECT_PENDING, &con->flags); |
544 | clear_bit(CF_WRITE_PENDING, &con->flags); | ||
545 | if (tx && cancel_work_sync(&con->swork)) | ||
546 | log_print("canceled swork for node %d", con->nodeid); | ||
547 | if (rx && cancel_work_sync(&con->rwork)) | ||
548 | log_print("canceled rwork for node %d", con->nodeid); | ||
520 | 549 | ||
550 | mutex_lock(&con->sock_mutex); | ||
521 | if (con->sock) { | 551 | if (con->sock) { |
522 | sock_release(con->sock); | 552 | sock_release(con->sock); |
523 | con->sock = NULL; | 553 | con->sock = NULL; |
524 | } | 554 | } |
525 | if (con->othercon && and_other) { | 555 | if (con->othercon && and_other) { |
526 | /* Will only re-enter once. */ | 556 | /* Will only re-enter once. */ |
527 | close_connection(con->othercon, false); | 557 | close_connection(con->othercon, false, true, true); |
528 | } | 558 | } |
529 | if (con->rx_page) { | 559 | if (con->rx_page) { |
530 | __free_page(con->rx_page); | 560 | __free_page(con->rx_page); |
@@ -535,254 +565,6 @@ static void close_connection(struct connection *con, bool and_other) | |||
535 | mutex_unlock(&con->sock_mutex); | 565 | mutex_unlock(&con->sock_mutex); |
536 | } | 566 | } |
537 | 567 | ||
538 | /* We only send shutdown messages to nodes that are not part of the cluster */ | ||
539 | static void sctp_send_shutdown(sctp_assoc_t associd) | ||
540 | { | ||
541 | static char outcmsg[CMSG_SPACE(sizeof(struct sctp_sndrcvinfo))]; | ||
542 | struct msghdr outmessage; | ||
543 | struct cmsghdr *cmsg; | ||
544 | struct sctp_sndrcvinfo *sinfo; | ||
545 | int ret; | ||
546 | struct connection *con; | ||
547 | |||
548 | con = nodeid2con(0,0); | ||
549 | BUG_ON(con == NULL); | ||
550 | |||
551 | outmessage.msg_name = NULL; | ||
552 | outmessage.msg_namelen = 0; | ||
553 | outmessage.msg_control = outcmsg; | ||
554 | outmessage.msg_controllen = sizeof(outcmsg); | ||
555 | outmessage.msg_flags = MSG_EOR; | ||
556 | |||
557 | cmsg = CMSG_FIRSTHDR(&outmessage); | ||
558 | cmsg->cmsg_level = IPPROTO_SCTP; | ||
559 | cmsg->cmsg_type = SCTP_SNDRCV; | ||
560 | cmsg->cmsg_len = CMSG_LEN(sizeof(struct sctp_sndrcvinfo)); | ||
561 | outmessage.msg_controllen = cmsg->cmsg_len; | ||
562 | sinfo = CMSG_DATA(cmsg); | ||
563 | memset(sinfo, 0x00, sizeof(struct sctp_sndrcvinfo)); | ||
564 | |||
565 | sinfo->sinfo_flags |= MSG_EOF; | ||
566 | sinfo->sinfo_assoc_id = associd; | ||
567 | |||
568 | ret = kernel_sendmsg(con->sock, &outmessage, NULL, 0, 0); | ||
569 | |||
570 | if (ret != 0) | ||
571 | log_print("send EOF to node failed: %d", ret); | ||
572 | } | ||
573 | |||
574 | static void sctp_init_failed_foreach(struct connection *con) | ||
575 | { | ||
576 | |||
577 | /* | ||
578 | * Don't try to recover base con and handle race where the | ||
579 | * other node's assoc init creates a assoc and we get that | ||
580 | * notification, then we get a notification that our attempt | ||
581 | * failed due. This happens when we are still trying the primary | ||
582 | * address, but the other node has already tried secondary addrs | ||
583 | * and found one that worked. | ||
584 | */ | ||
585 | if (!con->nodeid || con->sctp_assoc) | ||
586 | return; | ||
587 | |||
588 | log_print("Retrying SCTP association init for node %d\n", con->nodeid); | ||
589 | |||
590 | con->try_new_addr = true; | ||
591 | con->sctp_assoc = 0; | ||
592 | if (test_and_clear_bit(CF_INIT_PENDING, &con->flags)) { | ||
593 | if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags)) | ||
594 | queue_work(send_workqueue, &con->swork); | ||
595 | } | ||
596 | } | ||
597 | |||
598 | /* INIT failed but we don't know which node... | ||
599 | restart INIT on all pending nodes */ | ||
600 | static void sctp_init_failed(void) | ||
601 | { | ||
602 | mutex_lock(&connections_lock); | ||
603 | |||
604 | foreach_conn(sctp_init_failed_foreach); | ||
605 | |||
606 | mutex_unlock(&connections_lock); | ||
607 | } | ||
608 | |||
609 | static void retry_failed_sctp_send(struct connection *recv_con, | ||
610 | struct sctp_send_failed *sn_send_failed, | ||
611 | char *buf) | ||
612 | { | ||
613 | int len = sn_send_failed->ssf_length - sizeof(struct sctp_send_failed); | ||
614 | struct dlm_mhandle *mh; | ||
615 | struct connection *con; | ||
616 | char *retry_buf; | ||
617 | int nodeid = sn_send_failed->ssf_info.sinfo_ppid; | ||
618 | |||
619 | log_print("Retry sending %d bytes to node id %d", len, nodeid); | ||
620 | |||
621 | if (!nodeid) { | ||
622 | log_print("Shouldn't resend data via listening connection."); | ||
623 | return; | ||
624 | } | ||
625 | |||
626 | con = nodeid2con(nodeid, 0); | ||
627 | if (!con) { | ||
628 | log_print("Could not look up con for nodeid %d\n", | ||
629 | nodeid); | ||
630 | return; | ||
631 | } | ||
632 | |||
633 | mh = dlm_lowcomms_get_buffer(nodeid, len, GFP_NOFS, &retry_buf); | ||
634 | if (!mh) { | ||
635 | log_print("Could not allocate buf for retry."); | ||
636 | return; | ||
637 | } | ||
638 | memcpy(retry_buf, buf + sizeof(struct sctp_send_failed), len); | ||
639 | dlm_lowcomms_commit_buffer(mh); | ||
640 | |||
641 | /* | ||
642 | * If we got a assoc changed event before the send failed event then | ||
643 | * we only need to retry the send. | ||
644 | */ | ||
645 | if (con->sctp_assoc) { | ||
646 | if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags)) | ||
647 | queue_work(send_workqueue, &con->swork); | ||
648 | } else | ||
649 | sctp_init_failed_foreach(con); | ||
650 | } | ||
651 | |||
652 | /* Something happened to an association */ | ||
653 | static void process_sctp_notification(struct connection *con, | ||
654 | struct msghdr *msg, char *buf) | ||
655 | { | ||
656 | union sctp_notification *sn = (union sctp_notification *)buf; | ||
657 | struct linger linger; | ||
658 | |||
659 | switch (sn->sn_header.sn_type) { | ||
660 | case SCTP_SEND_FAILED: | ||
661 | retry_failed_sctp_send(con, &sn->sn_send_failed, buf); | ||
662 | break; | ||
663 | case SCTP_ASSOC_CHANGE: | ||
664 | switch (sn->sn_assoc_change.sac_state) { | ||
665 | case SCTP_COMM_UP: | ||
666 | case SCTP_RESTART: | ||
667 | { | ||
668 | /* Check that the new node is in the lockspace */ | ||
669 | struct sctp_prim prim; | ||
670 | int nodeid; | ||
671 | int prim_len, ret; | ||
672 | int addr_len; | ||
673 | struct connection *new_con; | ||
674 | |||
675 | /* | ||
676 | * We get this before any data for an association. | ||
677 | * We verify that the node is in the cluster and | ||
678 | * then peel off a socket for it. | ||
679 | */ | ||
680 | if ((int)sn->sn_assoc_change.sac_assoc_id <= 0) { | ||
681 | log_print("COMM_UP for invalid assoc ID %d", | ||
682 | (int)sn->sn_assoc_change.sac_assoc_id); | ||
683 | sctp_init_failed(); | ||
684 | return; | ||
685 | } | ||
686 | memset(&prim, 0, sizeof(struct sctp_prim)); | ||
687 | prim_len = sizeof(struct sctp_prim); | ||
688 | prim.ssp_assoc_id = sn->sn_assoc_change.sac_assoc_id; | ||
689 | |||
690 | ret = kernel_getsockopt(con->sock, | ||
691 | IPPROTO_SCTP, | ||
692 | SCTP_PRIMARY_ADDR, | ||
693 | (char*)&prim, | ||
694 | &prim_len); | ||
695 | if (ret < 0) { | ||
696 | log_print("getsockopt/sctp_primary_addr on " | ||
697 | "new assoc %d failed : %d", | ||
698 | (int)sn->sn_assoc_change.sac_assoc_id, | ||
699 | ret); | ||
700 | |||
701 | /* Retry INIT later */ | ||
702 | new_con = assoc2con(sn->sn_assoc_change.sac_assoc_id); | ||
703 | if (new_con) | ||
704 | clear_bit(CF_CONNECT_PENDING, &con->flags); | ||
705 | return; | ||
706 | } | ||
707 | make_sockaddr(&prim.ssp_addr, 0, &addr_len); | ||
708 | if (addr_to_nodeid(&prim.ssp_addr, &nodeid)) { | ||
709 | unsigned char *b=(unsigned char *)&prim.ssp_addr; | ||
710 | log_print("reject connect from unknown addr"); | ||
711 | print_hex_dump_bytes("ss: ", DUMP_PREFIX_NONE, | ||
712 | b, sizeof(struct sockaddr_storage)); | ||
713 | sctp_send_shutdown(prim.ssp_assoc_id); | ||
714 | return; | ||
715 | } | ||
716 | |||
717 | new_con = nodeid2con(nodeid, GFP_NOFS); | ||
718 | if (!new_con) | ||
719 | return; | ||
720 | |||
721 | /* Peel off a new sock */ | ||
722 | lock_sock(con->sock->sk); | ||
723 | ret = sctp_do_peeloff(con->sock->sk, | ||
724 | sn->sn_assoc_change.sac_assoc_id, | ||
725 | &new_con->sock); | ||
726 | release_sock(con->sock->sk); | ||
727 | if (ret < 0) { | ||
728 | log_print("Can't peel off a socket for " | ||
729 | "connection %d to node %d: err=%d", | ||
730 | (int)sn->sn_assoc_change.sac_assoc_id, | ||
731 | nodeid, ret); | ||
732 | return; | ||
733 | } | ||
734 | add_sock(new_con->sock, new_con); | ||
735 | |||
736 | linger.l_onoff = 1; | ||
737 | linger.l_linger = 0; | ||
738 | ret = kernel_setsockopt(new_con->sock, SOL_SOCKET, SO_LINGER, | ||
739 | (char *)&linger, sizeof(linger)); | ||
740 | if (ret < 0) | ||
741 | log_print("set socket option SO_LINGER failed"); | ||
742 | |||
743 | log_print("connecting to %d sctp association %d", | ||
744 | nodeid, (int)sn->sn_assoc_change.sac_assoc_id); | ||
745 | |||
746 | new_con->sctp_assoc = sn->sn_assoc_change.sac_assoc_id; | ||
747 | new_con->try_new_addr = false; | ||
748 | /* Send any pending writes */ | ||
749 | clear_bit(CF_CONNECT_PENDING, &new_con->flags); | ||
750 | clear_bit(CF_INIT_PENDING, &new_con->flags); | ||
751 | if (!test_and_set_bit(CF_WRITE_PENDING, &new_con->flags)) { | ||
752 | queue_work(send_workqueue, &new_con->swork); | ||
753 | } | ||
754 | if (!test_and_set_bit(CF_READ_PENDING, &new_con->flags)) | ||
755 | queue_work(recv_workqueue, &new_con->rwork); | ||
756 | } | ||
757 | break; | ||
758 | |||
759 | case SCTP_COMM_LOST: | ||
760 | case SCTP_SHUTDOWN_COMP: | ||
761 | { | ||
762 | con = assoc2con(sn->sn_assoc_change.sac_assoc_id); | ||
763 | if (con) { | ||
764 | con->sctp_assoc = 0; | ||
765 | } | ||
766 | } | ||
767 | break; | ||
768 | |||
769 | case SCTP_CANT_STR_ASSOC: | ||
770 | { | ||
771 | /* Will retry init when we get the send failed notification */ | ||
772 | log_print("Can't start SCTP association - retrying"); | ||
773 | } | ||
774 | break; | ||
775 | |||
776 | default: | ||
777 | log_print("unexpected SCTP assoc change id=%d state=%d", | ||
778 | (int)sn->sn_assoc_change.sac_assoc_id, | ||
779 | sn->sn_assoc_change.sac_state); | ||
780 | } | ||
781 | default: | ||
782 | ; /* fall through */ | ||
783 | } | ||
784 | } | ||
785 | |||
786 | /* Data received from remote end */ | 568 | /* Data received from remote end */ |
787 | static int receive_from_sock(struct connection *con) | 569 | static int receive_from_sock(struct connection *con) |
788 | { | 570 | { |
@@ -793,7 +575,6 @@ static int receive_from_sock(struct connection *con) | |||
793 | int r; | 575 | int r; |
794 | int call_again_soon = 0; | 576 | int call_again_soon = 0; |
795 | int nvec; | 577 | int nvec; |
796 | char incmsg[CMSG_SPACE(sizeof(struct sctp_sndrcvinfo))]; | ||
797 | 578 | ||
798 | mutex_lock(&con->sock_mutex); | 579 | mutex_lock(&con->sock_mutex); |
799 | 580 | ||
@@ -801,6 +582,10 @@ static int receive_from_sock(struct connection *con) | |||
801 | ret = -EAGAIN; | 582 | ret = -EAGAIN; |
802 | goto out_close; | 583 | goto out_close; |
803 | } | 584 | } |
585 | if (con->nodeid == 0) { | ||
586 | ret = -EINVAL; | ||
587 | goto out_close; | ||
588 | } | ||
804 | 589 | ||
805 | if (con->rx_page == NULL) { | 590 | if (con->rx_page == NULL) { |
806 | /* | 591 | /* |
@@ -813,11 +598,6 @@ static int receive_from_sock(struct connection *con) | |||
813 | cbuf_init(&con->cb, PAGE_CACHE_SIZE); | 598 | cbuf_init(&con->cb, PAGE_CACHE_SIZE); |
814 | } | 599 | } |
815 | 600 | ||
816 | /* Only SCTP needs these really */ | ||
817 | memset(&incmsg, 0, sizeof(incmsg)); | ||
818 | msg.msg_control = incmsg; | ||
819 | msg.msg_controllen = sizeof(incmsg); | ||
820 | |||
821 | /* | 601 | /* |
822 | * iov[0] is the bit of the circular buffer between the current end | 602 | * iov[0] is the bit of the circular buffer between the current end |
823 | * point (cb.base + cb.len) and the end of the buffer. | 603 | * point (cb.base + cb.len) and the end of the buffer. |
@@ -843,31 +623,18 @@ static int receive_from_sock(struct connection *con) | |||
843 | MSG_DONTWAIT | MSG_NOSIGNAL); | 623 | MSG_DONTWAIT | MSG_NOSIGNAL); |
844 | if (ret <= 0) | 624 | if (ret <= 0) |
845 | goto out_close; | 625 | goto out_close; |
846 | 626 | else if (ret == len) | |
847 | /* Process SCTP notifications */ | ||
848 | if (msg.msg_flags & MSG_NOTIFICATION) { | ||
849 | msg.msg_control = incmsg; | ||
850 | msg.msg_controllen = sizeof(incmsg); | ||
851 | |||
852 | process_sctp_notification(con, &msg, | ||
853 | page_address(con->rx_page) + con->cb.base); | ||
854 | mutex_unlock(&con->sock_mutex); | ||
855 | return 0; | ||
856 | } | ||
857 | BUG_ON(con->nodeid == 0); | ||
858 | |||
859 | if (ret == len) | ||
860 | call_again_soon = 1; | 627 | call_again_soon = 1; |
628 | |||
861 | cbuf_add(&con->cb, ret); | 629 | cbuf_add(&con->cb, ret); |
862 | ret = dlm_process_incoming_buffer(con->nodeid, | 630 | ret = dlm_process_incoming_buffer(con->nodeid, |
863 | page_address(con->rx_page), | 631 | page_address(con->rx_page), |
864 | con->cb.base, con->cb.len, | 632 | con->cb.base, con->cb.len, |
865 | PAGE_CACHE_SIZE); | 633 | PAGE_CACHE_SIZE); |
866 | if (ret == -EBADMSG) { | 634 | if (ret == -EBADMSG) { |
867 | log_print("lowcomms: addr=%p, base=%u, len=%u, " | 635 | log_print("lowcomms: addr=%p, base=%u, len=%u, read=%d", |
868 | "iov_len=%u, iov_base[0]=%p, read=%d", | 636 | page_address(con->rx_page), con->cb.base, |
869 | page_address(con->rx_page), con->cb.base, con->cb.len, | 637 | con->cb.len, r); |
870 | len, iov[0].iov_base, r); | ||
871 | } | 638 | } |
872 | if (ret < 0) | 639 | if (ret < 0) |
873 | goto out_close; | 640 | goto out_close; |
@@ -892,7 +659,7 @@ out_resched: | |||
892 | out_close: | 659 | out_close: |
893 | mutex_unlock(&con->sock_mutex); | 660 | mutex_unlock(&con->sock_mutex); |
894 | if (ret != -EAGAIN) { | 661 | if (ret != -EAGAIN) { |
895 | close_connection(con, false); | 662 | close_connection(con, false, true, false); |
896 | /* Reconnect when there is something to send */ | 663 | /* Reconnect when there is something to send */ |
897 | } | 664 | } |
898 | /* Don't return success if we really got EOF */ | 665 | /* Don't return success if we really got EOF */ |
@@ -1033,6 +800,120 @@ accept_err: | |||
1033 | return result; | 800 | return result; |
1034 | } | 801 | } |
1035 | 802 | ||
803 | static int sctp_accept_from_sock(struct connection *con) | ||
804 | { | ||
805 | /* Check that the new node is in the lockspace */ | ||
806 | struct sctp_prim prim; | ||
807 | int nodeid; | ||
808 | int prim_len, ret; | ||
809 | int addr_len; | ||
810 | struct connection *newcon; | ||
811 | struct connection *addcon; | ||
812 | struct socket *newsock; | ||
813 | |||
814 | mutex_lock(&connections_lock); | ||
815 | if (!dlm_allow_conn) { | ||
816 | mutex_unlock(&connections_lock); | ||
817 | return -1; | ||
818 | } | ||
819 | mutex_unlock(&connections_lock); | ||
820 | |||
821 | mutex_lock_nested(&con->sock_mutex, 0); | ||
822 | |||
823 | ret = kernel_accept(con->sock, &newsock, O_NONBLOCK); | ||
824 | if (ret < 0) | ||
825 | goto accept_err; | ||
826 | |||
827 | memset(&prim, 0, sizeof(struct sctp_prim)); | ||
828 | prim_len = sizeof(struct sctp_prim); | ||
829 | |||
830 | ret = kernel_getsockopt(newsock, IPPROTO_SCTP, SCTP_PRIMARY_ADDR, | ||
831 | (char *)&prim, &prim_len); | ||
832 | if (ret < 0) { | ||
833 | log_print("getsockopt/sctp_primary_addr failed: %d", ret); | ||
834 | goto accept_err; | ||
835 | } | ||
836 | |||
837 | make_sockaddr(&prim.ssp_addr, 0, &addr_len); | ||
838 | if (addr_to_nodeid(&prim.ssp_addr, &nodeid)) { | ||
839 | unsigned char *b = (unsigned char *)&prim.ssp_addr; | ||
840 | |||
841 | log_print("reject connect from unknown addr"); | ||
842 | print_hex_dump_bytes("ss: ", DUMP_PREFIX_NONE, | ||
843 | b, sizeof(struct sockaddr_storage)); | ||
844 | goto accept_err; | ||
845 | } | ||
846 | |||
847 | newcon = nodeid2con(nodeid, GFP_NOFS); | ||
848 | if (!newcon) { | ||
849 | ret = -ENOMEM; | ||
850 | goto accept_err; | ||
851 | } | ||
852 | |||
853 | mutex_lock_nested(&newcon->sock_mutex, 1); | ||
854 | |||
855 | if (newcon->sock) { | ||
856 | struct connection *othercon = newcon->othercon; | ||
857 | |||
858 | if (!othercon) { | ||
859 | othercon = kmem_cache_zalloc(con_cache, GFP_NOFS); | ||
860 | if (!othercon) { | ||
861 | log_print("failed to allocate incoming socket"); | ||
862 | mutex_unlock(&newcon->sock_mutex); | ||
863 | ret = -ENOMEM; | ||
864 | goto accept_err; | ||
865 | } | ||
866 | othercon->nodeid = nodeid; | ||
867 | othercon->rx_action = receive_from_sock; | ||
868 | mutex_init(&othercon->sock_mutex); | ||
869 | INIT_WORK(&othercon->swork, process_send_sockets); | ||
870 | INIT_WORK(&othercon->rwork, process_recv_sockets); | ||
871 | set_bit(CF_IS_OTHERCON, &othercon->flags); | ||
872 | } | ||
873 | if (!othercon->sock) { | ||
874 | newcon->othercon = othercon; | ||
875 | othercon->sock = newsock; | ||
876 | newsock->sk->sk_user_data = othercon; | ||
877 | add_sock(newsock, othercon); | ||
878 | addcon = othercon; | ||
879 | } else { | ||
880 | printk("Extra connection from node %d attempted\n", nodeid); | ||
881 | ret = -EAGAIN; | ||
882 | mutex_unlock(&newcon->sock_mutex); | ||
883 | goto accept_err; | ||
884 | } | ||
885 | } else { | ||
886 | newsock->sk->sk_user_data = newcon; | ||
887 | newcon->rx_action = receive_from_sock; | ||
888 | add_sock(newsock, newcon); | ||
889 | addcon = newcon; | ||
890 | } | ||
891 | |||
892 | log_print("connected to %d", nodeid); | ||
893 | |||
894 | mutex_unlock(&newcon->sock_mutex); | ||
895 | |||
896 | /* | ||
897 | * Add it to the active queue in case we got data | ||
898 | * between processing the accept adding the socket | ||
899 | * to the read_sockets list | ||
900 | */ | ||
901 | if (!test_and_set_bit(CF_READ_PENDING, &addcon->flags)) | ||
902 | queue_work(recv_workqueue, &addcon->rwork); | ||
903 | mutex_unlock(&con->sock_mutex); | ||
904 | |||
905 | return 0; | ||
906 | |||
907 | accept_err: | ||
908 | mutex_unlock(&con->sock_mutex); | ||
909 | if (newsock) | ||
910 | sock_release(newsock); | ||
911 | if (ret != -EAGAIN) | ||
912 | log_print("error accepting connection from node: %d", ret); | ||
913 | |||
914 | return ret; | ||
915 | } | ||
916 | |||
1036 | static void free_entry(struct writequeue_entry *e) | 917 | static void free_entry(struct writequeue_entry *e) |
1037 | { | 918 | { |
1038 | __free_page(e->page); | 919 | __free_page(e->page); |
@@ -1057,97 +938,129 @@ static void writequeue_entry_complete(struct writequeue_entry *e, int completed) | |||
1057 | } | 938 | } |
1058 | } | 939 | } |
1059 | 940 | ||
941 | /* | ||
942 | * sctp_bind_addrs - bind a SCTP socket to all our addresses | ||
943 | */ | ||
944 | static int sctp_bind_addrs(struct connection *con, uint16_t port) | ||
945 | { | ||
946 | struct sockaddr_storage localaddr; | ||
947 | int i, addr_len, result = 0; | ||
948 | |||
949 | for (i = 0; i < dlm_local_count; i++) { | ||
950 | memcpy(&localaddr, dlm_local_addr[i], sizeof(localaddr)); | ||
951 | make_sockaddr(&localaddr, port, &addr_len); | ||
952 | |||
953 | if (!i) | ||
954 | result = kernel_bind(con->sock, | ||
955 | (struct sockaddr *)&localaddr, | ||
956 | addr_len); | ||
957 | else | ||
958 | result = kernel_setsockopt(con->sock, SOL_SCTP, | ||
959 | SCTP_SOCKOPT_BINDX_ADD, | ||
960 | (char *)&localaddr, addr_len); | ||
961 | |||
962 | if (result < 0) { | ||
963 | log_print("Can't bind to %d addr number %d, %d.\n", | ||
964 | port, i + 1, result); | ||
965 | break; | ||
966 | } | ||
967 | } | ||
968 | return result; | ||
969 | } | ||
970 | |||
1060 | /* Initiate an SCTP association. | 971 | /* Initiate an SCTP association. |
1061 | This is a special case of send_to_sock() in that we don't yet have a | 972 | This is a special case of send_to_sock() in that we don't yet have a |
1062 | peeled-off socket for this association, so we use the listening socket | 973 | peeled-off socket for this association, so we use the listening socket |
1063 | and add the primary IP address of the remote node. | 974 | and add the primary IP address of the remote node. |
1064 | */ | 975 | */ |
1065 | static void sctp_init_assoc(struct connection *con) | 976 | static void sctp_connect_to_sock(struct connection *con) |
1066 | { | 977 | { |
1067 | struct sockaddr_storage rem_addr; | 978 | struct sockaddr_storage daddr; |
1068 | char outcmsg[CMSG_SPACE(sizeof(struct sctp_sndrcvinfo))]; | 979 | int one = 1; |
1069 | struct msghdr outmessage; | 980 | int result; |
1070 | struct cmsghdr *cmsg; | 981 | int addr_len; |
1071 | struct sctp_sndrcvinfo *sinfo; | 982 | struct socket *sock; |
1072 | struct connection *base_con; | 983 | |
1073 | struct writequeue_entry *e; | 984 | if (con->nodeid == 0) { |
1074 | int len, offset; | 985 | log_print("attempt to connect sock 0 foiled"); |
1075 | int ret; | 986 | return; |
1076 | int addrlen; | 987 | } |
1077 | struct kvec iov[1]; | ||
1078 | 988 | ||
1079 | mutex_lock(&con->sock_mutex); | 989 | mutex_lock(&con->sock_mutex); |
1080 | if (test_and_set_bit(CF_INIT_PENDING, &con->flags)) | ||
1081 | goto unlock; | ||
1082 | 990 | ||
1083 | if (nodeid_to_addr(con->nodeid, NULL, (struct sockaddr *)&rem_addr, | 991 | /* Some odd races can cause double-connects, ignore them */ |
1084 | con->try_new_addr)) { | 992 | if (con->retries++ > MAX_CONNECT_RETRIES) |
993 | goto out; | ||
994 | |||
995 | if (con->sock) { | ||
996 | log_print("node %d already connected.", con->nodeid); | ||
997 | goto out; | ||
998 | } | ||
999 | |||
1000 | memset(&daddr, 0, sizeof(daddr)); | ||
1001 | result = nodeid_to_addr(con->nodeid, &daddr, NULL, true); | ||
1002 | if (result < 0) { | ||
1085 | log_print("no address for nodeid %d", con->nodeid); | 1003 | log_print("no address for nodeid %d", con->nodeid); |
1086 | goto unlock; | 1004 | goto out; |
1087 | } | 1005 | } |
1088 | base_con = nodeid2con(0, 0); | ||
1089 | BUG_ON(base_con == NULL); | ||
1090 | 1006 | ||
1091 | make_sockaddr(&rem_addr, dlm_config.ci_tcp_port, &addrlen); | 1007 | /* Create a socket to communicate with */ |
1008 | result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family, | ||
1009 | SOCK_STREAM, IPPROTO_SCTP, &sock); | ||
1010 | if (result < 0) | ||
1011 | goto socket_err; | ||
1092 | 1012 | ||
1093 | outmessage.msg_name = &rem_addr; | 1013 | sock->sk->sk_user_data = con; |
1094 | outmessage.msg_namelen = addrlen; | 1014 | con->rx_action = receive_from_sock; |
1095 | outmessage.msg_control = outcmsg; | 1015 | con->connect_action = sctp_connect_to_sock; |
1096 | outmessage.msg_controllen = sizeof(outcmsg); | 1016 | add_sock(sock, con); |
1097 | outmessage.msg_flags = MSG_EOR; | ||
1098 | 1017 | ||
1099 | spin_lock(&con->writequeue_lock); | 1018 | /* Bind to all addresses. */ |
1019 | if (sctp_bind_addrs(con, 0)) | ||
1020 | goto bind_err; | ||
1100 | 1021 | ||
1101 | if (list_empty(&con->writequeue)) { | 1022 | make_sockaddr(&daddr, dlm_config.ci_tcp_port, &addr_len); |
1102 | spin_unlock(&con->writequeue_lock); | ||
1103 | log_print("writequeue empty for nodeid %d", con->nodeid); | ||
1104 | goto unlock; | ||
1105 | } | ||
1106 | 1023 | ||
1107 | e = list_first_entry(&con->writequeue, struct writequeue_entry, list); | 1024 | log_print("connecting to %d", con->nodeid); |
1108 | len = e->len; | ||
1109 | offset = e->offset; | ||
1110 | 1025 | ||
1111 | /* Send the first block off the write queue */ | 1026 | /* Turn off Nagle's algorithm */ |
1112 | iov[0].iov_base = page_address(e->page)+offset; | 1027 | kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, (char *)&one, |
1113 | iov[0].iov_len = len; | 1028 | sizeof(one)); |
1114 | spin_unlock(&con->writequeue_lock); | ||
1115 | 1029 | ||
1116 | if (rem_addr.ss_family == AF_INET) { | 1030 | result = sock->ops->connect(sock, (struct sockaddr *)&daddr, addr_len, |
1117 | struct sockaddr_in *sin = (struct sockaddr_in *)&rem_addr; | 1031 | O_NONBLOCK); |
1118 | log_print("Trying to connect to %pI4", &sin->sin_addr.s_addr); | 1032 | if (result == -EINPROGRESS) |
1119 | } else { | 1033 | result = 0; |
1120 | struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&rem_addr; | 1034 | if (result == 0) |
1121 | log_print("Trying to connect to %pI6", &sin6->sin6_addr); | 1035 | goto out; |
1122 | } | ||
1123 | 1036 | ||
1124 | cmsg = CMSG_FIRSTHDR(&outmessage); | ||
1125 | cmsg->cmsg_level = IPPROTO_SCTP; | ||
1126 | cmsg->cmsg_type = SCTP_SNDRCV; | ||
1127 | cmsg->cmsg_len = CMSG_LEN(sizeof(struct sctp_sndrcvinfo)); | ||
1128 | sinfo = CMSG_DATA(cmsg); | ||
1129 | memset(sinfo, 0x00, sizeof(struct sctp_sndrcvinfo)); | ||
1130 | sinfo->sinfo_ppid = cpu_to_le32(con->nodeid); | ||
1131 | outmessage.msg_controllen = cmsg->cmsg_len; | ||
1132 | sinfo->sinfo_flags |= SCTP_ADDR_OVER; | ||
1133 | 1037 | ||
1134 | ret = kernel_sendmsg(base_con->sock, &outmessage, iov, 1, len); | 1038 | bind_err: |
1135 | if (ret < 0) { | 1039 | con->sock = NULL; |
1136 | log_print("Send first packet to node %d failed: %d", | 1040 | sock_release(sock); |
1137 | con->nodeid, ret); | ||
1138 | 1041 | ||
1139 | /* Try again later */ | 1042 | socket_err: |
1043 | /* | ||
1044 | * Some errors are fatal and this list might need adjusting. For other | ||
1045 | * errors we try again until the max number of retries is reached. | ||
1046 | */ | ||
1047 | if (result != -EHOSTUNREACH && | ||
1048 | result != -ENETUNREACH && | ||
1049 | result != -ENETDOWN && | ||
1050 | result != -EINVAL && | ||
1051 | result != -EPROTONOSUPPORT) { | ||
1052 | log_print("connect %d try %d error %d", con->nodeid, | ||
1053 | con->retries, result); | ||
1054 | mutex_unlock(&con->sock_mutex); | ||
1055 | msleep(1000); | ||
1140 | clear_bit(CF_CONNECT_PENDING, &con->flags); | 1056 | clear_bit(CF_CONNECT_PENDING, &con->flags); |
1141 | clear_bit(CF_INIT_PENDING, &con->flags); | 1057 | lowcomms_connect_sock(con); |
1142 | } | 1058 | return; |
1143 | else { | ||
1144 | spin_lock(&con->writequeue_lock); | ||
1145 | writequeue_entry_complete(e, ret); | ||
1146 | spin_unlock(&con->writequeue_lock); | ||
1147 | } | 1059 | } |
1148 | 1060 | ||
1149 | unlock: | 1061 | out: |
1150 | mutex_unlock(&con->sock_mutex); | 1062 | mutex_unlock(&con->sock_mutex); |
1063 | set_bit(CF_WRITE_PENDING, &con->flags); | ||
1151 | } | 1064 | } |
1152 | 1065 | ||
1153 | /* Connect a new socket to its peer */ | 1066 | /* Connect a new socket to its peer */ |
@@ -1236,11 +1149,13 @@ out_err: | |||
1236 | con->retries, result); | 1149 | con->retries, result); |
1237 | mutex_unlock(&con->sock_mutex); | 1150 | mutex_unlock(&con->sock_mutex); |
1238 | msleep(1000); | 1151 | msleep(1000); |
1152 | clear_bit(CF_CONNECT_PENDING, &con->flags); | ||
1239 | lowcomms_connect_sock(con); | 1153 | lowcomms_connect_sock(con); |
1240 | return; | 1154 | return; |
1241 | } | 1155 | } |
1242 | out: | 1156 | out: |
1243 | mutex_unlock(&con->sock_mutex); | 1157 | mutex_unlock(&con->sock_mutex); |
1158 | set_bit(CF_WRITE_PENDING, &con->flags); | ||
1244 | return; | 1159 | return; |
1245 | } | 1160 | } |
1246 | 1161 | ||
@@ -1325,37 +1240,11 @@ static void init_local(void) | |||
1325 | } | 1240 | } |
1326 | } | 1241 | } |
1327 | 1242 | ||
1328 | /* Bind to an IP address. SCTP allows multiple address so it can do | ||
1329 | multi-homing */ | ||
1330 | static int add_sctp_bind_addr(struct connection *sctp_con, | ||
1331 | struct sockaddr_storage *addr, | ||
1332 | int addr_len, int num) | ||
1333 | { | ||
1334 | int result = 0; | ||
1335 | |||
1336 | if (num == 1) | ||
1337 | result = kernel_bind(sctp_con->sock, | ||
1338 | (struct sockaddr *) addr, | ||
1339 | addr_len); | ||
1340 | else | ||
1341 | result = kernel_setsockopt(sctp_con->sock, SOL_SCTP, | ||
1342 | SCTP_SOCKOPT_BINDX_ADD, | ||
1343 | (char *)addr, addr_len); | ||
1344 | |||
1345 | if (result < 0) | ||
1346 | log_print("Can't bind to port %d addr number %d", | ||
1347 | dlm_config.ci_tcp_port, num); | ||
1348 | |||
1349 | return result; | ||
1350 | } | ||
1351 | |||
1352 | /* Initialise SCTP socket and bind to all interfaces */ | 1243 | /* Initialise SCTP socket and bind to all interfaces */ |
1353 | static int sctp_listen_for_all(void) | 1244 | static int sctp_listen_for_all(void) |
1354 | { | 1245 | { |
1355 | struct socket *sock = NULL; | 1246 | struct socket *sock = NULL; |
1356 | struct sockaddr_storage localaddr; | 1247 | int result = -EINVAL; |
1357 | struct sctp_event_subscribe subscribe; | ||
1358 | int result = -EINVAL, num = 1, i, addr_len; | ||
1359 | struct connection *con = nodeid2con(0, GFP_NOFS); | 1248 | struct connection *con = nodeid2con(0, GFP_NOFS); |
1360 | int bufsize = NEEDED_RMEM; | 1249 | int bufsize = NEEDED_RMEM; |
1361 | int one = 1; | 1250 | int one = 1; |
@@ -1366,33 +1255,17 @@ static int sctp_listen_for_all(void) | |||
1366 | log_print("Using SCTP for communications"); | 1255 | log_print("Using SCTP for communications"); |
1367 | 1256 | ||
1368 | result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family, | 1257 | result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family, |
1369 | SOCK_SEQPACKET, IPPROTO_SCTP, &sock); | 1258 | SOCK_STREAM, IPPROTO_SCTP, &sock); |
1370 | if (result < 0) { | 1259 | if (result < 0) { |
1371 | log_print("Can't create comms socket, check SCTP is loaded"); | 1260 | log_print("Can't create comms socket, check SCTP is loaded"); |
1372 | goto out; | 1261 | goto out; |
1373 | } | 1262 | } |
1374 | 1263 | ||
1375 | /* Listen for events */ | ||
1376 | memset(&subscribe, 0, sizeof(subscribe)); | ||
1377 | subscribe.sctp_data_io_event = 1; | ||
1378 | subscribe.sctp_association_event = 1; | ||
1379 | subscribe.sctp_send_failure_event = 1; | ||
1380 | subscribe.sctp_shutdown_event = 1; | ||
1381 | subscribe.sctp_partial_delivery_event = 1; | ||
1382 | |||
1383 | result = kernel_setsockopt(sock, SOL_SOCKET, SO_RCVBUFFORCE, | 1264 | result = kernel_setsockopt(sock, SOL_SOCKET, SO_RCVBUFFORCE, |
1384 | (char *)&bufsize, sizeof(bufsize)); | 1265 | (char *)&bufsize, sizeof(bufsize)); |
1385 | if (result) | 1266 | if (result) |
1386 | log_print("Error increasing buffer space on socket %d", result); | 1267 | log_print("Error increasing buffer space on socket %d", result); |
1387 | 1268 | ||
1388 | result = kernel_setsockopt(sock, SOL_SCTP, SCTP_EVENTS, | ||
1389 | (char *)&subscribe, sizeof(subscribe)); | ||
1390 | if (result < 0) { | ||
1391 | log_print("Failed to set SCTP_EVENTS on socket: result=%d", | ||
1392 | result); | ||
1393 | goto create_delsock; | ||
1394 | } | ||
1395 | |||
1396 | result = kernel_setsockopt(sock, SOL_SCTP, SCTP_NODELAY, (char *)&one, | 1269 | result = kernel_setsockopt(sock, SOL_SCTP, SCTP_NODELAY, (char *)&one, |
1397 | sizeof(one)); | 1270 | sizeof(one)); |
1398 | if (result < 0) | 1271 | if (result < 0) |
@@ -1402,19 +1275,12 @@ static int sctp_listen_for_all(void) | |||
1402 | sock->sk->sk_user_data = con; | 1275 | sock->sk->sk_user_data = con; |
1403 | con->sock = sock; | 1276 | con->sock = sock; |
1404 | con->sock->sk->sk_data_ready = lowcomms_data_ready; | 1277 | con->sock->sk->sk_data_ready = lowcomms_data_ready; |
1405 | con->rx_action = receive_from_sock; | 1278 | con->rx_action = sctp_accept_from_sock; |
1406 | con->connect_action = sctp_init_assoc; | 1279 | con->connect_action = sctp_connect_to_sock; |
1407 | 1280 | ||
1408 | /* Bind to all interfaces. */ | 1281 | /* Bind to all addresses. */ |
1409 | for (i = 0; i < dlm_local_count; i++) { | 1282 | if (sctp_bind_addrs(con, dlm_config.ci_tcp_port)) |
1410 | memcpy(&localaddr, dlm_local_addr[i], sizeof(localaddr)); | 1283 | goto create_delsock; |
1411 | make_sockaddr(&localaddr, dlm_config.ci_tcp_port, &addr_len); | ||
1412 | |||
1413 | result = add_sctp_bind_addr(con, &localaddr, addr_len, num); | ||
1414 | if (result) | ||
1415 | goto create_delsock; | ||
1416 | ++num; | ||
1417 | } | ||
1418 | 1284 | ||
1419 | result = sock->ops->listen(sock, 5); | 1285 | result = sock->ops->listen(sock, 5); |
1420 | if (result < 0) { | 1286 | if (result < 0) { |
@@ -1612,14 +1478,13 @@ out: | |||
1612 | 1478 | ||
1613 | send_error: | 1479 | send_error: |
1614 | mutex_unlock(&con->sock_mutex); | 1480 | mutex_unlock(&con->sock_mutex); |
1615 | close_connection(con, false); | 1481 | close_connection(con, false, false, true); |
1616 | lowcomms_connect_sock(con); | 1482 | lowcomms_connect_sock(con); |
1617 | return; | 1483 | return; |
1618 | 1484 | ||
1619 | out_connect: | 1485 | out_connect: |
1620 | mutex_unlock(&con->sock_mutex); | 1486 | mutex_unlock(&con->sock_mutex); |
1621 | if (!test_bit(CF_INIT_PENDING, &con->flags)) | 1487 | lowcomms_connect_sock(con); |
1622 | lowcomms_connect_sock(con); | ||
1623 | } | 1488 | } |
1624 | 1489 | ||
1625 | static void clean_one_writequeue(struct connection *con) | 1490 | static void clean_one_writequeue(struct connection *con) |
@@ -1644,15 +1509,9 @@ int dlm_lowcomms_close(int nodeid) | |||
1644 | log_print("closing connection to node %d", nodeid); | 1509 | log_print("closing connection to node %d", nodeid); |
1645 | con = nodeid2con(nodeid, 0); | 1510 | con = nodeid2con(nodeid, 0); |
1646 | if (con) { | 1511 | if (con) { |
1647 | clear_bit(CF_CONNECT_PENDING, &con->flags); | ||
1648 | clear_bit(CF_WRITE_PENDING, &con->flags); | ||
1649 | set_bit(CF_CLOSE, &con->flags); | 1512 | set_bit(CF_CLOSE, &con->flags); |
1650 | if (cancel_work_sync(&con->swork)) | 1513 | close_connection(con, true, true, true); |
1651 | log_print("canceled swork for node %d", nodeid); | ||
1652 | if (cancel_work_sync(&con->rwork)) | ||
1653 | log_print("canceled rwork for node %d", nodeid); | ||
1654 | clean_one_writequeue(con); | 1514 | clean_one_writequeue(con); |
1655 | close_connection(con, true); | ||
1656 | } | 1515 | } |
1657 | 1516 | ||
1658 | spin_lock(&dlm_node_addrs_spin); | 1517 | spin_lock(&dlm_node_addrs_spin); |
@@ -1685,10 +1544,8 @@ static void process_send_sockets(struct work_struct *work) | |||
1685 | { | 1544 | { |
1686 | struct connection *con = container_of(work, struct connection, swork); | 1545 | struct connection *con = container_of(work, struct connection, swork); |
1687 | 1546 | ||
1688 | if (test_and_clear_bit(CF_CONNECT_PENDING, &con->flags)) { | 1547 | if (test_and_clear_bit(CF_CONNECT_PENDING, &con->flags)) |
1689 | con->connect_action(con); | 1548 | con->connect_action(con); |
1690 | set_bit(CF_WRITE_PENDING, &con->flags); | ||
1691 | } | ||
1692 | if (test_and_clear_bit(CF_WRITE_PENDING, &con->flags)) | 1549 | if (test_and_clear_bit(CF_WRITE_PENDING, &con->flags)) |
1693 | send_to_sock(con); | 1550 | send_to_sock(con); |
1694 | } | 1551 | } |
@@ -1735,7 +1592,7 @@ static void stop_conn(struct connection *con) | |||
1735 | 1592 | ||
1736 | static void free_conn(struct connection *con) | 1593 | static void free_conn(struct connection *con) |
1737 | { | 1594 | { |
1738 | close_connection(con, true); | 1595 | close_connection(con, true, true, true); |
1739 | if (con->othercon) | 1596 | if (con->othercon) |
1740 | kmem_cache_free(con_cache, con->othercon); | 1597 | kmem_cache_free(con_cache, con->othercon); |
1741 | hlist_del(&con->list); | 1598 | hlist_del(&con->list); |
@@ -1806,7 +1663,7 @@ fail_unlisten: | |||
1806 | dlm_allow_conn = 0; | 1663 | dlm_allow_conn = 0; |
1807 | con = nodeid2con(0,0); | 1664 | con = nodeid2con(0,0); |
1808 | if (con) { | 1665 | if (con) { |
1809 | close_connection(con, false); | 1666 | close_connection(con, false, true, true); |
1810 | kmem_cache_free(con_cache, con); | 1667 | kmem_cache_free(con_cache, con); |
1811 | } | 1668 | } |
1812 | fail_destroy: | 1669 | fail_destroy: |
diff --git a/fs/dlm/user.c b/fs/dlm/user.c index 75ecc0d3bc85..173b3873a4f4 100644 --- a/fs/dlm/user.c +++ b/fs/dlm/user.c | |||
@@ -782,6 +782,7 @@ static ssize_t device_read(struct file *file, char __user *buf, size_t count, | |||
782 | DECLARE_WAITQUEUE(wait, current); | 782 | DECLARE_WAITQUEUE(wait, current); |
783 | struct dlm_callback cb; | 783 | struct dlm_callback cb; |
784 | int rv, resid, copy_lvb = 0; | 784 | int rv, resid, copy_lvb = 0; |
785 | int old_mode, new_mode; | ||
785 | 786 | ||
786 | if (count == sizeof(struct dlm_device_version)) { | 787 | if (count == sizeof(struct dlm_device_version)) { |
787 | rv = copy_version_to_user(buf, count); | 788 | rv = copy_version_to_user(buf, count); |
@@ -838,6 +839,9 @@ static ssize_t device_read(struct file *file, char __user *buf, size_t count, | |||
838 | 839 | ||
839 | lkb = list_entry(proc->asts.next, struct dlm_lkb, lkb_cb_list); | 840 | lkb = list_entry(proc->asts.next, struct dlm_lkb, lkb_cb_list); |
840 | 841 | ||
842 | /* rem_lkb_callback sets a new lkb_last_cast */ | ||
843 | old_mode = lkb->lkb_last_cast.mode; | ||
844 | |||
841 | rv = dlm_rem_lkb_callback(lkb->lkb_resource->res_ls, lkb, &cb, &resid); | 845 | rv = dlm_rem_lkb_callback(lkb->lkb_resource->res_ls, lkb, &cb, &resid); |
842 | if (rv < 0) { | 846 | if (rv < 0) { |
843 | /* this shouldn't happen; lkb should have been removed from | 847 | /* this shouldn't happen; lkb should have been removed from |
@@ -861,9 +865,6 @@ static ssize_t device_read(struct file *file, char __user *buf, size_t count, | |||
861 | } | 865 | } |
862 | 866 | ||
863 | if (cb.flags & DLM_CB_CAST) { | 867 | if (cb.flags & DLM_CB_CAST) { |
864 | int old_mode, new_mode; | ||
865 | |||
866 | old_mode = lkb->lkb_last_cast.mode; | ||
867 | new_mode = cb.mode; | 868 | new_mode = cb.mode; |
868 | 869 | ||
869 | if (!cb.sb_status && lkb->lkb_lksb->sb_lvbptr && | 870 | if (!cb.sb_status && lkb->lkb_lksb->sb_lvbptr && |
diff --git a/include/uapi/linux/dlm_device.h b/include/uapi/linux/dlm_device.h index 3060783c4191..df56c8ff0769 100644 --- a/include/uapi/linux/dlm_device.h +++ b/include/uapi/linux/dlm_device.h | |||
@@ -26,7 +26,7 @@ | |||
26 | /* Version of the device interface */ | 26 | /* Version of the device interface */ |
27 | #define DLM_DEVICE_VERSION_MAJOR 6 | 27 | #define DLM_DEVICE_VERSION_MAJOR 6 |
28 | #define DLM_DEVICE_VERSION_MINOR 0 | 28 | #define DLM_DEVICE_VERSION_MINOR 0 |
29 | #define DLM_DEVICE_VERSION_PATCH 1 | 29 | #define DLM_DEVICE_VERSION_PATCH 2 |
30 | 30 | ||
31 | /* struct passed to the lock write */ | 31 | /* struct passed to the lock write */ |
32 | struct dlm_lock_params { | 32 | struct dlm_lock_params { |