aboutsummaryrefslogtreecommitdiffstats
path: root/net/rds/ib_cm.c
diff options
context:
space:
mode:
authorZach Brown <zach.brown@oracle.com>2010-05-18 18:48:51 -0400
committerAndy Grover <andy.grover@oracle.com>2010-09-08 21:15:17 -0400
commit3e0249f9c05cb77b66f7f09644ca9ca208d991a9 (patch)
tree2f8aaddb51590d36a54b8f7044224073107d77d9 /net/rds/ib_cm.c
parent89bf9d4158b5a1b6bd00960eb2e47601ec8cc138 (diff)
RDS/IB: add refcount tracking to struct rds_ib_device
The RDS IB client .remove callback used to free the rds_ibdev for the given device unconditionally. This could race other users of the struct. This patch adds refcounting so that we only free the rds_ibdev once all of its users are done. Many rds_ibdev users are tied to connections. We give the connection a reference and change these users to reference the device in the connection instead of looking it up in the IB client data. The only user of the IB client data remaining is the first lookup of the device as connections are built up. Incrementing the reference count of a device found in the IB client data could race with final freeing so we use an RCU grace period to make sure that freeing won't happen until those lookups are done. MRs need the rds_ibdev to get at the pool that they're freed in to. They exist outside a connection and many MRs can reference different devices from one socket, so it was natural to have each MR hold a reference. MR refs can be dropped from interrupt handlers and final device teardown can block so we push it off to a work struct. Pool teardown had to be fixed to cancel its pending work instead of deadlocking waiting for all queued work, including itself, to finish. MRs get their reference from the global device list, which gets a reference. It is left unprotected by locks and remains racy. A simple global lock would be a significant bottleneck. More scalable (complicated) locking should be done carefully in a later patch. Signed-off-by: Zach Brown <zach.brown@oracle.com>
Diffstat (limited to 'net/rds/ib_cm.c')
-rw-r--r--net/rds/ib_cm.c33
1 files changed, 14 insertions, 19 deletions
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index 73253f7c1fa3..a9fb917c00bb 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -95,7 +95,6 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even
95{ 95{
96 const struct rds_ib_connect_private *dp = NULL; 96 const struct rds_ib_connect_private *dp = NULL;
97 struct rds_ib_connection *ic = conn->c_transport_data; 97 struct rds_ib_connection *ic = conn->c_transport_data;
98 struct rds_ib_device *rds_ibdev;
99 struct ib_qp_attr qp_attr; 98 struct ib_qp_attr qp_attr;
100 int err; 99 int err;
101 100
@@ -145,12 +144,11 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even
145 if (err) 144 if (err)
146 printk(KERN_NOTICE "ib_modify_qp(IB_QP_STATE, RTS): err=%d\n", err); 145 printk(KERN_NOTICE "ib_modify_qp(IB_QP_STATE, RTS): err=%d\n", err);
147 146
148 /* update ib_device with this local ipaddr & conn */ 147 /* update ib_device with this local ipaddr */
149 rds_ibdev = ib_get_client_data(ic->i_cm_id->device, &rds_ib_client); 148 err = rds_ib_update_ipaddr(ic->rds_ibdev, conn->c_laddr);
150 err = rds_ib_update_ipaddr(rds_ibdev, conn->c_laddr);
151 if (err) 149 if (err)
152 printk(KERN_ERR "rds_ib_update_ipaddr failed (%d)\n", err); 150 printk(KERN_ERR "rds_ib_update_ipaddr failed (%d)\n",
153 rds_ib_add_conn(rds_ibdev, conn); 151 err);
154 152
155 /* If the peer gave us the last packet it saw, process this as if 153 /* If the peer gave us the last packet it saw, process this as if
156 * we had received a regular ACK. */ 154 * we had received a regular ACK. */
@@ -168,12 +166,10 @@ static void rds_ib_cm_fill_conn_param(struct rds_connection *conn,
168 u32 max_initiator_depth) 166 u32 max_initiator_depth)
169{ 167{
170 struct rds_ib_connection *ic = conn->c_transport_data; 168 struct rds_ib_connection *ic = conn->c_transport_data;
171 struct rds_ib_device *rds_ibdev; 169 struct rds_ib_device *rds_ibdev = ic->rds_ibdev;
172 170
173 memset(conn_param, 0, sizeof(struct rdma_conn_param)); 171 memset(conn_param, 0, sizeof(struct rdma_conn_param));
174 172
175 rds_ibdev = ib_get_client_data(ic->i_cm_id->device, &rds_ib_client);
176
177 conn_param->responder_resources = 173 conn_param->responder_resources =
178 min_t(u32, rds_ibdev->max_responder_resources, max_responder_resources); 174 min_t(u32, rds_ibdev->max_responder_resources, max_responder_resources);
179 conn_param->initiator_depth = 175 conn_param->initiator_depth =
@@ -241,18 +237,16 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
241 struct rds_ib_device *rds_ibdev; 237 struct rds_ib_device *rds_ibdev;
242 int ret; 238 int ret;
243 239
244 /* rds_ib_add_one creates a rds_ib_device object per IB device, 240 /*
245 * and allocates a protection domain, memory range and FMR pool 241 * It's normal to see a null device if an incoming connection races
246 * for each. If that fails for any reason, it will not register 242 * with device removal, so we don't print a warning.
247 * the rds_ibdev at all.
248 */ 243 */
249 rds_ibdev = ib_get_client_data(dev, &rds_ib_client); 244 rds_ibdev = rds_ib_get_client_data(dev);
250 if (!rds_ibdev) { 245 if (!rds_ibdev)
251 if (printk_ratelimit())
252 printk(KERN_NOTICE "RDS/IB: No client_data for device %s\n",
253 dev->name);
254 return -EOPNOTSUPP; 246 return -EOPNOTSUPP;
255 } 247
248 /* add the conn now so that connection establishment has the dev */
249 rds_ib_add_conn(rds_ibdev, conn);
256 250
257 if (rds_ibdev->max_wrs < ic->i_send_ring.w_nr + 1) 251 if (rds_ibdev->max_wrs < ic->i_send_ring.w_nr + 1)
258 rds_ib_ring_resize(&ic->i_send_ring, rds_ibdev->max_wrs - 1); 252 rds_ib_ring_resize(&ic->i_send_ring, rds_ibdev->max_wrs - 1);
@@ -371,6 +365,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
371 ic->i_send_cq, ic->i_recv_cq); 365 ic->i_send_cq, ic->i_recv_cq);
372 366
373out: 367out:
368 rds_ib_dev_put(rds_ibdev);
374 return ret; 369 return ret;
375} 370}
376 371