aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2010-05-11 18:11:11 -0400
committerAndy Grover <andy.grover@oracle.com>2010-09-08 21:15:09 -0400
commit7e3f2952eeb1a0fe2aa9882fd1705a88f9d89b35 (patch)
tree903d89d0d1184bd1f33df2b5036615d1c40cfb06
parent38a4e5e61344490f18241333d7b1b368a3a38748 (diff)
rds: don't let RDS shutdown a connection while senders are present
This is the first in a long line of patches that tries to fix races between RDS connection shutdown and RDS traffic. Here we are maintaining a count of active senders to make sure the connection doesn't go away while they are using it. Signed-off-by: Chris Mason <chris.mason@oracle.com>
-rw-r--r--net/rds/connection.c7
-rw-r--r--net/rds/ib_recv.c12
-rw-r--r--net/rds/message.c5
-rw-r--r--net/rds/rds.h1
-rw-r--r--net/rds/send.c17
5 files changed, 26 insertions, 16 deletions
diff --git a/net/rds/connection.c b/net/rds/connection.c
index 7e4e9dfdbc0b..9c249f394f29 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -148,6 +148,7 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr,
148 148
149 spin_lock_init(&conn->c_send_lock); 149 spin_lock_init(&conn->c_send_lock);
150 atomic_set(&conn->c_send_generation, 1); 150 atomic_set(&conn->c_send_generation, 1);
151 atomic_set(&conn->c_senders, 0);
151 INIT_LIST_HEAD(&conn->c_send_queue); 152 INIT_LIST_HEAD(&conn->c_send_queue);
152 INIT_LIST_HEAD(&conn->c_retrans); 153 INIT_LIST_HEAD(&conn->c_retrans);
153 154
@@ -276,6 +277,12 @@ void rds_conn_shutdown(struct rds_connection *conn)
276 spin_lock_irq(&conn->c_send_lock); 277 spin_lock_irq(&conn->c_send_lock);
277 spin_unlock_irq(&conn->c_send_lock); 278 spin_unlock_irq(&conn->c_send_lock);
278 279
280 while(atomic_read(&conn->c_senders)) {
281 schedule_timeout(1);
282 spin_lock_irq(&conn->c_send_lock);
283 spin_unlock_irq(&conn->c_send_lock);
284 }
285
279 conn->c_trans->conn_shutdown(conn); 286 conn->c_trans->conn_shutdown(conn);
280 rds_conn_reset(conn); 287 rds_conn_reset(conn);
281 288
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index 8f041f7954a2..24d14615f41a 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -863,18 +863,6 @@ int rds_ib_recv(struct rds_connection *conn)
863 int ret = 0; 863 int ret = 0;
864 864
865 rdsdebug("conn %p\n", conn); 865 rdsdebug("conn %p\n", conn);
866
867 /*
868 * If we get a temporary posting failure in this context then
869 * we're really low and we want the caller to back off for a bit.
870 */
871 mutex_lock(&ic->i_recv_mutex);
872 if (rds_ib_recv_refill(conn, 0))
873 ret = -ENOMEM;
874 else
875 rds_ib_stats_inc(s_ib_rx_refill_from_thread);
876 mutex_unlock(&ic->i_recv_mutex);
877
878 if (rds_conn_up(conn)) 866 if (rds_conn_up(conn))
879 rds_ib_attempt_ack(ic); 867 rds_ib_attempt_ack(ic);
880 868
diff --git a/net/rds/message.c b/net/rds/message.c
index 96e2bf7dc77e..84f937f11d47 100644
--- a/net/rds/message.c
+++ b/net/rds/message.c
@@ -81,7 +81,10 @@ static void rds_message_purge(struct rds_message *rm)
81void rds_message_put(struct rds_message *rm) 81void rds_message_put(struct rds_message *rm)
82{ 82{
83 rdsdebug("put rm %p ref %d\n", rm, atomic_read(&rm->m_refcount)); 83 rdsdebug("put rm %p ref %d\n", rm, atomic_read(&rm->m_refcount));
84 84 if (atomic_read(&rm->m_refcount) == 0) {
85printk(KERN_CRIT "danger refcount zero on %p\n", rm);
86WARN_ON(1);
87 }
85 if (atomic_dec_and_test(&rm->m_refcount)) { 88 if (atomic_dec_and_test(&rm->m_refcount)) {
86 BUG_ON(!list_empty(&rm->m_sock_item)); 89 BUG_ON(!list_empty(&rm->m_sock_item));
87 BUG_ON(!list_empty(&rm->m_conn_item)); 90 BUG_ON(!list_empty(&rm->m_conn_item));
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 241a0859d16e..4ab3d1aa0237 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -93,6 +93,7 @@ struct rds_connection {
93 93
94 spinlock_t c_send_lock; /* protect send ring */ 94 spinlock_t c_send_lock; /* protect send ring */
95 atomic_t c_send_generation; 95 atomic_t c_send_generation;
96 atomic_t c_senders;
96 struct rds_message *c_xmit_rm; 97 struct rds_message *c_xmit_rm;
97 unsigned long c_xmit_sg; 98 unsigned long c_xmit_sg;
98 unsigned int c_xmit_hdr_off; 99 unsigned int c_xmit_hdr_off;
diff --git a/net/rds/send.c b/net/rds/send.c
index 8e3fd9981c2e..d35c43ff792e 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -60,15 +60,23 @@ void rds_send_reset(struct rds_connection *conn)
60 struct rds_message *rm, *tmp; 60 struct rds_message *rm, *tmp;
61 unsigned long flags; 61 unsigned long flags;
62 62
63 spin_lock_irqsave(&conn->c_send_lock, flags);
63 if (conn->c_xmit_rm) { 64 if (conn->c_xmit_rm) {
65 rm = conn->c_xmit_rm;
66 conn->c_xmit_rm = NULL;
64 /* Tell the user the RDMA op is no longer mapped by the 67 /* Tell the user the RDMA op is no longer mapped by the
65 * transport. This isn't entirely true (it's flushed out 68 * transport. This isn't entirely true (it's flushed out
66 * independently) but as the connection is down, there's 69 * independently) but as the connection is down, there's
67 * no ongoing RDMA to/from that memory */ 70 * no ongoing RDMA to/from that memory */
68 rds_message_unmapped(conn->c_xmit_rm); 71printk(KERN_CRIT "send reset unmapping %p\n", rm);
69 rds_message_put(conn->c_xmit_rm); 72 rds_message_unmapped(rm);
70 conn->c_xmit_rm = NULL; 73 spin_unlock_irqrestore(&conn->c_send_lock, flags);
74
75 rds_message_put(rm);
76 } else {
77 spin_unlock_irqrestore(&conn->c_send_lock, flags);
71 } 78 }
79
72 conn->c_xmit_sg = 0; 80 conn->c_xmit_sg = 0;
73 conn->c_xmit_hdr_off = 0; 81 conn->c_xmit_hdr_off = 0;
74 conn->c_xmit_data_off = 0; 82 conn->c_xmit_data_off = 0;
@@ -131,6 +139,7 @@ restart:
131 ret = -ENOMEM; 139 ret = -ENOMEM;
132 goto out; 140 goto out;
133 } 141 }
142 atomic_inc(&conn->c_senders);
134 143
135 if (conn->c_trans->xmit_prepare) 144 if (conn->c_trans->xmit_prepare)
136 conn->c_trans->xmit_prepare(conn); 145 conn->c_trans->xmit_prepare(conn);
@@ -350,6 +359,8 @@ restart:
350 rds_send_remove_from_sock(&to_be_dropped, RDS_RDMA_DROPPED); 359 rds_send_remove_from_sock(&to_be_dropped, RDS_RDMA_DROPPED);
351 } 360 }
352 361
362 atomic_dec(&conn->c_senders);
363
353 /* 364 /*
354 * Other senders will see we have c_send_lock and exit. We 365 * Other senders will see we have c_send_lock and exit. We
355 * need to recheck the send queue and race again for c_send_lock 366 * need to recheck the send queue and race again for c_send_lock