diff options
author | Gerd Rausch <gerd.rausch@oracle.com> | 2019-07-16 18:29:17 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2019-07-17 15:06:52 -0400 |
commit | 3a2886cca703fde5ee21baea9fedf8b1389c59d7 (patch) | |
tree | ebeffb5e2ef043aa46be9929db3a466466df50ce | |
parent | 9547dff1085d5935d6070377023096821033e30c (diff) |
net/rds: Keep track of and wait for FRWR segments in use upon shutdown
Since "rds_ib_free_frmr" and "rds_ib_free_frmr_list" simply put
the FRMR memory segments on the "drop_list" or "free_list",
and it is the job of "rds_ib_flush_mr_pool" to reap those entries
by ultimately issuing a "IB_WR_LOCAL_INV" work-request,
we need to trigger and then wait for all those memory segments
attached to a particular connection to be fully released before
we can move on to release the QP, CQ, etc.
So we make "rds_ib_conn_path_shutdown" wait for one more
atomic_t called "i_fastreg_inuse_count" that keeps track of how
many FRWR memory segments are out there marked "FRMR_IS_INUSE"
(and also wake_up rds_ib_ring_empty_wait, as they go away).
Signed-off-by: Gerd Rausch <gerd.rausch@oracle.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | net/rds/ib.h | 1 | ||||
-rw-r--r-- | net/rds/ib_cm.c | 7 | ||||
-rw-r--r-- | net/rds/ib_frmr.c | 43 |
3 files changed, 45 insertions, 6 deletions
diff --git a/net/rds/ib.h b/net/rds/ib.h index 66c03c7665b2..303c6ee8bdb7 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h | |||
@@ -156,6 +156,7 @@ struct rds_ib_connection { | |||
156 | 156 | ||
157 | /* To control the number of wrs from fastreg */ | 157 | /* To control the number of wrs from fastreg */ |
158 | atomic_t i_fastreg_wrs; | 158 | atomic_t i_fastreg_wrs; |
159 | atomic_t i_fastreg_inuse_count; | ||
159 | 160 | ||
160 | /* interrupt handling */ | 161 | /* interrupt handling */ |
161 | struct tasklet_struct i_send_tasklet; | 162 | struct tasklet_struct i_send_tasklet; |
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index 8891822eba4f..1b6fd6c8b12b 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c | |||
@@ -40,6 +40,7 @@ | |||
40 | #include "rds_single_path.h" | 40 | #include "rds_single_path.h" |
41 | #include "rds.h" | 41 | #include "rds.h" |
42 | #include "ib.h" | 42 | #include "ib.h" |
43 | #include "ib_mr.h" | ||
43 | 44 | ||
44 | /* | 45 | /* |
45 | * Set the selected protocol version | 46 | * Set the selected protocol version |
@@ -993,6 +994,11 @@ void rds_ib_conn_path_shutdown(struct rds_conn_path *cp) | |||
993 | ic->i_cm_id, err); | 994 | ic->i_cm_id, err); |
994 | } | 995 | } |
995 | 996 | ||
997 | /* kick off "flush_worker" for all pools in order to reap | ||
998 | * all FRMR registrations that are still marked "FRMR_IS_INUSE" | ||
999 | */ | ||
1000 | rds_ib_flush_mrs(); | ||
1001 | |||
996 | /* | 1002 | /* |
997 | * We want to wait for tx and rx completion to finish | 1003 | * We want to wait for tx and rx completion to finish |
998 | * before we tear down the connection, but we have to be | 1004 | * before we tear down the connection, but we have to be |
@@ -1005,6 +1011,7 @@ void rds_ib_conn_path_shutdown(struct rds_conn_path *cp) | |||
1005 | wait_event(rds_ib_ring_empty_wait, | 1011 | wait_event(rds_ib_ring_empty_wait, |
1006 | rds_ib_ring_empty(&ic->i_recv_ring) && | 1012 | rds_ib_ring_empty(&ic->i_recv_ring) && |
1007 | (atomic_read(&ic->i_signaled_sends) == 0) && | 1013 | (atomic_read(&ic->i_signaled_sends) == 0) && |
1014 | (atomic_read(&ic->i_fastreg_inuse_count) == 0) && | ||
1008 | (atomic_read(&ic->i_fastreg_wrs) == RDS_IB_DEFAULT_FR_WR)); | 1015 | (atomic_read(&ic->i_fastreg_wrs) == RDS_IB_DEFAULT_FR_WR)); |
1009 | tasklet_kill(&ic->i_send_tasklet); | 1016 | tasklet_kill(&ic->i_send_tasklet); |
1010 | tasklet_kill(&ic->i_recv_tasklet); | 1017 | tasklet_kill(&ic->i_recv_tasklet); |
diff --git a/net/rds/ib_frmr.c b/net/rds/ib_frmr.c index adaa8e99e5a9..06ecf9d2d4bf 100644 --- a/net/rds/ib_frmr.c +++ b/net/rds/ib_frmr.c | |||
@@ -32,6 +32,24 @@ | |||
32 | 32 | ||
33 | #include "ib_mr.h" | 33 | #include "ib_mr.h" |
34 | 34 | ||
35 | static inline void | ||
36 | rds_transition_frwr_state(struct rds_ib_mr *ibmr, | ||
37 | enum rds_ib_fr_state old_state, | ||
38 | enum rds_ib_fr_state new_state) | ||
39 | { | ||
40 | if (cmpxchg(&ibmr->u.frmr.fr_state, | ||
41 | old_state, new_state) == old_state && | ||
42 | old_state == FRMR_IS_INUSE) { | ||
43 | /* enforce order of ibmr->u.frmr.fr_state update | ||
44 | * before decrementing i_fastreg_inuse_count | ||
45 | */ | ||
46 | smp_mb__before_atomic(); | ||
47 | atomic_dec(&ibmr->ic->i_fastreg_inuse_count); | ||
48 | if (waitqueue_active(&rds_ib_ring_empty_wait)) | ||
49 | wake_up(&rds_ib_ring_empty_wait); | ||
50 | } | ||
51 | } | ||
52 | |||
35 | static struct rds_ib_mr *rds_ib_alloc_frmr(struct rds_ib_device *rds_ibdev, | 53 | static struct rds_ib_mr *rds_ib_alloc_frmr(struct rds_ib_device *rds_ibdev, |
36 | int npages) | 54 | int npages) |
37 | { | 55 | { |
@@ -118,13 +136,18 @@ static int rds_ib_post_reg_frmr(struct rds_ib_mr *ibmr) | |||
118 | if (unlikely(ret != ibmr->sg_len)) | 136 | if (unlikely(ret != ibmr->sg_len)) |
119 | return ret < 0 ? ret : -EINVAL; | 137 | return ret < 0 ? ret : -EINVAL; |
120 | 138 | ||
139 | if (cmpxchg(&frmr->fr_state, | ||
140 | FRMR_IS_FREE, FRMR_IS_INUSE) != FRMR_IS_FREE) | ||
141 | return -EBUSY; | ||
142 | |||
143 | atomic_inc(&ibmr->ic->i_fastreg_inuse_count); | ||
144 | |||
121 | /* Perform a WR for the fast_reg_mr. Each individual page | 145 | /* Perform a WR for the fast_reg_mr. Each individual page |
122 | * in the sg list is added to the fast reg page list and placed | 146 | * in the sg list is added to the fast reg page list and placed |
123 | * inside the fast_reg_mr WR. The key used is a rolling 8bit | 147 | * inside the fast_reg_mr WR. The key used is a rolling 8bit |
124 | * counter, which should guarantee uniqueness. | 148 | * counter, which should guarantee uniqueness. |
125 | */ | 149 | */ |
126 | ib_update_fast_reg_key(frmr->mr, ibmr->remap_count++); | 150 | ib_update_fast_reg_key(frmr->mr, ibmr->remap_count++); |
127 | frmr->fr_state = FRMR_IS_INUSE; | ||
128 | frmr->fr_reg = true; | 151 | frmr->fr_reg = true; |
129 | 152 | ||
130 | memset(®_wr, 0, sizeof(reg_wr)); | 153 | memset(®_wr, 0, sizeof(reg_wr)); |
@@ -141,7 +164,8 @@ static int rds_ib_post_reg_frmr(struct rds_ib_mr *ibmr) | |||
141 | ret = ib_post_send(ibmr->ic->i_cm_id->qp, ®_wr.wr, NULL); | 164 | ret = ib_post_send(ibmr->ic->i_cm_id->qp, ®_wr.wr, NULL); |
142 | if (unlikely(ret)) { | 165 | if (unlikely(ret)) { |
143 | /* Failure here can be because of -ENOMEM as well */ | 166 | /* Failure here can be because of -ENOMEM as well */ |
144 | frmr->fr_state = FRMR_IS_STALE; | 167 | rds_transition_frwr_state(ibmr, FRMR_IS_INUSE, FRMR_IS_STALE); |
168 | |||
145 | atomic_inc(&ibmr->ic->i_fastreg_wrs); | 169 | atomic_inc(&ibmr->ic->i_fastreg_wrs); |
146 | if (printk_ratelimit()) | 170 | if (printk_ratelimit()) |
147 | pr_warn("RDS/IB: %s returned error(%d)\n", | 171 | pr_warn("RDS/IB: %s returned error(%d)\n", |
@@ -268,8 +292,12 @@ static int rds_ib_post_inv(struct rds_ib_mr *ibmr) | |||
268 | 292 | ||
269 | ret = ib_post_send(i_cm_id->qp, s_wr, NULL); | 293 | ret = ib_post_send(i_cm_id->qp, s_wr, NULL); |
270 | if (unlikely(ret)) { | 294 | if (unlikely(ret)) { |
271 | frmr->fr_state = FRMR_IS_STALE; | 295 | rds_transition_frwr_state(ibmr, FRMR_IS_INUSE, FRMR_IS_STALE); |
272 | frmr->fr_inv = false; | 296 | frmr->fr_inv = false; |
297 | /* enforce order of frmr->fr_inv update | ||
298 | * before incrementing i_fastreg_wrs | ||
299 | */ | ||
300 | smp_mb__before_atomic(); | ||
273 | atomic_inc(&ibmr->ic->i_fastreg_wrs); | 301 | atomic_inc(&ibmr->ic->i_fastreg_wrs); |
274 | pr_err("RDS/IB: %s returned error(%d)\n", __func__, ret); | 302 | pr_err("RDS/IB: %s returned error(%d)\n", __func__, ret); |
275 | goto out; | 303 | goto out; |
@@ -297,7 +325,7 @@ void rds_ib_mr_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc) | |||
297 | struct rds_ib_frmr *frmr = &ibmr->u.frmr; | 325 | struct rds_ib_frmr *frmr = &ibmr->u.frmr; |
298 | 326 | ||
299 | if (wc->status != IB_WC_SUCCESS) { | 327 | if (wc->status != IB_WC_SUCCESS) { |
300 | frmr->fr_state = FRMR_IS_STALE; | 328 | rds_transition_frwr_state(ibmr, FRMR_IS_INUSE, FRMR_IS_STALE); |
301 | if (rds_conn_up(ic->conn)) | 329 | if (rds_conn_up(ic->conn)) |
302 | rds_ib_conn_error(ic->conn, | 330 | rds_ib_conn_error(ic->conn, |
303 | "frmr completion <%pI4,%pI4> status %u(%s), vendor_err 0x%x, disconnecting and reconnecting\n", | 331 | "frmr completion <%pI4,%pI4> status %u(%s), vendor_err 0x%x, disconnecting and reconnecting\n", |
@@ -309,8 +337,7 @@ void rds_ib_mr_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc) | |||
309 | } | 337 | } |
310 | 338 | ||
311 | if (frmr->fr_inv) { | 339 | if (frmr->fr_inv) { |
312 | if (frmr->fr_state == FRMR_IS_INUSE) | 340 | rds_transition_frwr_state(ibmr, FRMR_IS_INUSE, FRMR_IS_FREE); |
313 | frmr->fr_state = FRMR_IS_FREE; | ||
314 | frmr->fr_inv = false; | 341 | frmr->fr_inv = false; |
315 | wake_up(&frmr->fr_inv_done); | 342 | wake_up(&frmr->fr_inv_done); |
316 | } | 343 | } |
@@ -320,6 +347,10 @@ void rds_ib_mr_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc) | |||
320 | wake_up(&frmr->fr_reg_done); | 347 | wake_up(&frmr->fr_reg_done); |
321 | } | 348 | } |
322 | 349 | ||
350 | /* enforce order of frmr->{fr_reg,fr_inv} update | ||
351 | * before incrementing i_fastreg_wrs | ||
352 | */ | ||
353 | smp_mb__before_atomic(); | ||
323 | atomic_inc(&ic->i_fastreg_wrs); | 354 | atomic_inc(&ic->i_fastreg_wrs); |
324 | } | 355 | } |
325 | 356 | ||