diff options
author | Andy Grover <andy.grover@oracle.com> | 2010-02-19 21:01:41 -0500 |
---|---|---|
committer | Andy Grover <andy.grover@oracle.com> | 2010-09-08 21:07:32 -0400 |
commit | 7c82eaf00ec7d460932be9314b29997006b799b6 (patch) | |
tree | dd1e46ceb37ab4d5b6688d04805240eb9fcb7fbe | |
parent | 35b52c70534cb7193b218ec12efe6bc595312097 (diff) |
RDS: Rewrite rds_send_drop_to() for clarity
This function has been the source of numerous bugs; it's just
too complicated. Simplified to nest spinlocks cleanly within
the second loop body, and kick out early if there are no
rms to drop.
This will be a little slower because conn lock is grabbed for
each entry instead of "caching" the lock across rms, but this
should be entirely irrelevant to fastpath performance.
Signed-off-by: Andy Grover <andy.grover@oracle.com>
-rw-r--r-- | net/rds/send.c | 64 |
1 files changed, 29 insertions, 35 deletions
diff --git a/net/rds/send.c b/net/rds/send.c index 9c1c6bcaa6c9..aee58f931b69 100644 --- a/net/rds/send.c +++ b/net/rds/send.c | |||
@@ -619,9 +619,8 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest) | |||
619 | { | 619 | { |
620 | struct rds_message *rm, *tmp; | 620 | struct rds_message *rm, *tmp; |
621 | struct rds_connection *conn; | 621 | struct rds_connection *conn; |
622 | unsigned long flags, flags2; | 622 | unsigned long flags; |
623 | LIST_HEAD(list); | 623 | LIST_HEAD(list); |
624 | int wake = 0; | ||
625 | 624 | ||
626 | /* get all the messages we're dropping under the rs lock */ | 625 | /* get all the messages we're dropping under the rs lock */ |
627 | spin_lock_irqsave(&rs->rs_lock, flags); | 626 | spin_lock_irqsave(&rs->rs_lock, flags); |
@@ -631,59 +630,54 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest) | |||
631 | dest->sin_port != rm->m_inc.i_hdr.h_dport)) | 630 | dest->sin_port != rm->m_inc.i_hdr.h_dport)) |
632 | continue; | 631 | continue; |
633 | 632 | ||
634 | wake = 1; | ||
635 | list_move(&rm->m_sock_item, &list); | 633 | list_move(&rm->m_sock_item, &list); |
636 | rds_send_sndbuf_remove(rs, rm); | 634 | rds_send_sndbuf_remove(rs, rm); |
637 | clear_bit(RDS_MSG_ON_SOCK, &rm->m_flags); | 635 | clear_bit(RDS_MSG_ON_SOCK, &rm->m_flags); |
638 | } | 636 | } |
639 | 637 | ||
640 | /* order flag updates with the rs lock */ | 638 | /* order flag updates with the rs lock */ |
641 | if (wake) | 639 | smp_mb__after_clear_bit(); |
642 | smp_mb__after_clear_bit(); | ||
643 | 640 | ||
644 | spin_unlock_irqrestore(&rs->rs_lock, flags); | 641 | spin_unlock_irqrestore(&rs->rs_lock, flags); |
645 | 642 | ||
646 | conn = NULL; | 643 | if (list_empty(&list)) |
644 | return; | ||
647 | 645 | ||
648 | /* now remove the messages from the conn list as needed */ | 646 | /* Remove the messages from the conn */ |
649 | list_for_each_entry(rm, &list, m_sock_item) { | 647 | list_for_each_entry(rm, &list, m_sock_item) { |
650 | /* We do this here rather than in the loop above, so that | 648 | |
651 | * we don't have to nest m_rs_lock under rs->rs_lock */ | 649 | conn = rm->m_inc.i_conn; |
652 | spin_lock_irqsave(&rm->m_rs_lock, flags2); | 650 | spin_lock_irqsave(&conn->c_lock, flags); |
653 | /* If this is a RDMA operation, notify the app. */ | ||
654 | spin_lock(&rs->rs_lock); | ||
655 | __rds_rdma_send_complete(rs, rm, RDS_RDMA_CANCELED); | ||
656 | spin_unlock(&rs->rs_lock); | ||
657 | rm->m_rs = NULL; | ||
658 | spin_unlock_irqrestore(&rm->m_rs_lock, flags2); | ||
659 | 651 | ||
660 | /* | 652 | /* |
661 | * If we see this flag cleared then we're *sure* that someone | 653 | * Maybe someone else beat us to removing rm from the conn. |
662 | * else beat us to removing it from the conn. If we race | 654 | * If we race with their flag update we'll get the lock and |
663 | * with their flag update we'll get the lock and then really | 655 | * then really see that the flag has been cleared. |
664 | * see that the flag has been cleared. | ||
665 | */ | 656 | */ |
666 | if (!test_bit(RDS_MSG_ON_CONN, &rm->m_flags)) | 657 | if (!test_and_clear_bit(RDS_MSG_ON_CONN, &rm->m_flags)) { |
658 | spin_unlock_irqrestore(&conn->c_lock, flags); | ||
667 | continue; | 659 | continue; |
668 | |||
669 | if (conn != rm->m_inc.i_conn) { | ||
670 | if (conn) | ||
671 | spin_unlock_irqrestore(&conn->c_lock, flags); | ||
672 | conn = rm->m_inc.i_conn; | ||
673 | spin_lock_irqsave(&conn->c_lock, flags); | ||
674 | } | 660 | } |
675 | 661 | ||
676 | if (test_and_clear_bit(RDS_MSG_ON_CONN, &rm->m_flags)) { | 662 | /* |
677 | list_del_init(&rm->m_conn_item); | 663 | * Couldn't grab m_rs_lock in top loop (lock ordering), |
678 | rds_message_put(rm); | 664 | * but we can now. |
679 | } | 665 | */ |
680 | } | 666 | spin_lock(&rm->m_rs_lock); |
681 | 667 | ||
682 | if (conn) | 668 | spin_lock(&rs->rs_lock); |
669 | __rds_rdma_send_complete(rs, rm, RDS_RDMA_CANCELED); | ||
670 | spin_unlock(&rs->rs_lock); | ||
671 | |||
672 | rm->m_rs = NULL; | ||
673 | spin_unlock(&rm->m_rs_lock); | ||
674 | |||
675 | list_del_init(&rm->m_conn_item); | ||
676 | rds_message_put(rm); | ||
683 | spin_unlock_irqrestore(&conn->c_lock, flags); | 677 | spin_unlock_irqrestore(&conn->c_lock, flags); |
678 | } | ||
684 | 679 | ||
685 | if (wake) | 680 | rds_wake_sk_sleep(rs); |
686 | rds_wake_sk_sleep(rs); | ||
687 | 681 | ||
688 | while (!list_empty(&list)) { | 682 | while (!list_empty(&list)) { |
689 | rm = list_entry(list.next, struct rds_message, m_sock_item); | 683 | rm = list_entry(list.next, struct rds_message, m_sock_item); |