aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndy Grover <andy.grover@oracle.com>2010-02-19 21:01:41 -0500
committerAndy Grover <andy.grover@oracle.com>2010-09-08 21:07:32 -0400
commit7c82eaf00ec7d460932be9314b29997006b799b6 (patch)
treedd1e46ceb37ab4d5b6688d04805240eb9fcb7fbe
parent35b52c70534cb7193b218ec12efe6bc595312097 (diff)
RDS: Rewrite rds_send_drop_to() for clarity
This function has been the source of numerous bugs; it's just too complicated. Simplified to nest spinlocks cleanly within the second loop body, and kick out early if there are no rms to drop. This will be a little slower because conn lock is grabbed for each entry instead of "caching" the lock across rms, but this should be entirely irrelevant to fastpath performance. Signed-off-by: Andy Grover <andy.grover@oracle.com>
-rw-r--r--net/rds/send.c64
1 files changed, 29 insertions, 35 deletions
diff --git a/net/rds/send.c b/net/rds/send.c
index 9c1c6bcaa6c9..aee58f931b69 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -619,9 +619,8 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest)
619{ 619{
620 struct rds_message *rm, *tmp; 620 struct rds_message *rm, *tmp;
621 struct rds_connection *conn; 621 struct rds_connection *conn;
622 unsigned long flags, flags2; 622 unsigned long flags;
623 LIST_HEAD(list); 623 LIST_HEAD(list);
624 int wake = 0;
625 624
626 /* get all the messages we're dropping under the rs lock */ 625 /* get all the messages we're dropping under the rs lock */
627 spin_lock_irqsave(&rs->rs_lock, flags); 626 spin_lock_irqsave(&rs->rs_lock, flags);
@@ -631,59 +630,54 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest)
631 dest->sin_port != rm->m_inc.i_hdr.h_dport)) 630 dest->sin_port != rm->m_inc.i_hdr.h_dport))
632 continue; 631 continue;
633 632
634 wake = 1;
635 list_move(&rm->m_sock_item, &list); 633 list_move(&rm->m_sock_item, &list);
636 rds_send_sndbuf_remove(rs, rm); 634 rds_send_sndbuf_remove(rs, rm);
637 clear_bit(RDS_MSG_ON_SOCK, &rm->m_flags); 635 clear_bit(RDS_MSG_ON_SOCK, &rm->m_flags);
638 } 636 }
639 637
640 /* order flag updates with the rs lock */ 638 /* order flag updates with the rs lock */
641 if (wake) 639 smp_mb__after_clear_bit();
642 smp_mb__after_clear_bit();
643 640
644 spin_unlock_irqrestore(&rs->rs_lock, flags); 641 spin_unlock_irqrestore(&rs->rs_lock, flags);
645 642
646 conn = NULL; 643 if (list_empty(&list))
644 return;
647 645
648 /* now remove the messages from the conn list as needed */ 646 /* Remove the messages from the conn */
649 list_for_each_entry(rm, &list, m_sock_item) { 647 list_for_each_entry(rm, &list, m_sock_item) {
650 /* We do this here rather than in the loop above, so that 648
651 * we don't have to nest m_rs_lock under rs->rs_lock */ 649 conn = rm->m_inc.i_conn;
652 spin_lock_irqsave(&rm->m_rs_lock, flags2); 650 spin_lock_irqsave(&conn->c_lock, flags);
653 /* If this is a RDMA operation, notify the app. */
654 spin_lock(&rs->rs_lock);
655 __rds_rdma_send_complete(rs, rm, RDS_RDMA_CANCELED);
656 spin_unlock(&rs->rs_lock);
657 rm->m_rs = NULL;
658 spin_unlock_irqrestore(&rm->m_rs_lock, flags2);
659 651
660 /* 652 /*
661 * If we see this flag cleared then we're *sure* that someone 653 * Maybe someone else beat us to removing rm from the conn.
662 * else beat us to removing it from the conn. If we race 654 * If we race with their flag update we'll get the lock and
663 * with their flag update we'll get the lock and then really 655 * then really see that the flag has been cleared.
664 * see that the flag has been cleared.
665 */ 656 */
666 if (!test_bit(RDS_MSG_ON_CONN, &rm->m_flags)) 657 if (!test_and_clear_bit(RDS_MSG_ON_CONN, &rm->m_flags)) {
658 spin_unlock_irqrestore(&conn->c_lock, flags);
667 continue; 659 continue;
668
669 if (conn != rm->m_inc.i_conn) {
670 if (conn)
671 spin_unlock_irqrestore(&conn->c_lock, flags);
672 conn = rm->m_inc.i_conn;
673 spin_lock_irqsave(&conn->c_lock, flags);
674 } 660 }
675 661
676 if (test_and_clear_bit(RDS_MSG_ON_CONN, &rm->m_flags)) { 662 /*
677 list_del_init(&rm->m_conn_item); 663 * Couldn't grab m_rs_lock in top loop (lock ordering),
678 rds_message_put(rm); 664 * but we can now.
679 } 665 */
680 } 666 spin_lock(&rm->m_rs_lock);
681 667
682 if (conn) 668 spin_lock(&rs->rs_lock);
669 __rds_rdma_send_complete(rs, rm, RDS_RDMA_CANCELED);
670 spin_unlock(&rs->rs_lock);
671
672 rm->m_rs = NULL;
673 spin_unlock(&rm->m_rs_lock);
674
675 list_del_init(&rm->m_conn_item);
676 rds_message_put(rm);
683 spin_unlock_irqrestore(&conn->c_lock, flags); 677 spin_unlock_irqrestore(&conn->c_lock, flags);
678 }
684 679
685 if (wake) 680 rds_wake_sk_sleep(rs);
686 rds_wake_sk_sleep(rs);
687 681
688 while (!list_empty(&list)) { 682 while (!list_empty(&list)) {
689 rm = list_entry(list.next, struct rds_message, m_sock_item); 683 rm = list_entry(list.next, struct rds_message, m_sock_item);