diff options
Diffstat (limited to 'net/rds/ib_frmr.c')
-rw-r--r-- | net/rds/ib_frmr.c | 376 |
1 files changed, 376 insertions, 0 deletions
diff --git a/net/rds/ib_frmr.c b/net/rds/ib_frmr.c new file mode 100644 index 000000000000..93ff038ea9d1 --- /dev/null +++ b/net/rds/ib_frmr.c | |||
@@ -0,0 +1,376 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2016 Oracle. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | */ | ||
32 | |||
33 | #include "ib_mr.h" | ||
34 | |||
35 | static struct rds_ib_mr *rds_ib_alloc_frmr(struct rds_ib_device *rds_ibdev, | ||
36 | int npages) | ||
37 | { | ||
38 | struct rds_ib_mr_pool *pool; | ||
39 | struct rds_ib_mr *ibmr = NULL; | ||
40 | struct rds_ib_frmr *frmr; | ||
41 | int err = 0; | ||
42 | |||
43 | if (npages <= RDS_MR_8K_MSG_SIZE) | ||
44 | pool = rds_ibdev->mr_8k_pool; | ||
45 | else | ||
46 | pool = rds_ibdev->mr_1m_pool; | ||
47 | |||
48 | ibmr = rds_ib_try_reuse_ibmr(pool); | ||
49 | if (ibmr) | ||
50 | return ibmr; | ||
51 | |||
52 | ibmr = kzalloc_node(sizeof(*ibmr), GFP_KERNEL, | ||
53 | rdsibdev_to_node(rds_ibdev)); | ||
54 | if (!ibmr) { | ||
55 | err = -ENOMEM; | ||
56 | goto out_no_cigar; | ||
57 | } | ||
58 | |||
59 | frmr = &ibmr->u.frmr; | ||
60 | frmr->mr = ib_alloc_mr(rds_ibdev->pd, IB_MR_TYPE_MEM_REG, | ||
61 | pool->fmr_attr.max_pages); | ||
62 | if (IS_ERR(frmr->mr)) { | ||
63 | pr_warn("RDS/IB: %s failed to allocate MR", __func__); | ||
64 | goto out_no_cigar; | ||
65 | } | ||
66 | |||
67 | ibmr->pool = pool; | ||
68 | if (pool->pool_type == RDS_IB_MR_8K_POOL) | ||
69 | rds_ib_stats_inc(s_ib_rdma_mr_8k_alloc); | ||
70 | else | ||
71 | rds_ib_stats_inc(s_ib_rdma_mr_1m_alloc); | ||
72 | |||
73 | if (atomic_read(&pool->item_count) > pool->max_items_soft) | ||
74 | pool->max_items_soft = pool->max_items; | ||
75 | |||
76 | frmr->fr_state = FRMR_IS_FREE; | ||
77 | return ibmr; | ||
78 | |||
79 | out_no_cigar: | ||
80 | kfree(ibmr); | ||
81 | atomic_dec(&pool->item_count); | ||
82 | return ERR_PTR(err); | ||
83 | } | ||
84 | |||
85 | static void rds_ib_free_frmr(struct rds_ib_mr *ibmr, bool drop) | ||
86 | { | ||
87 | struct rds_ib_mr_pool *pool = ibmr->pool; | ||
88 | |||
89 | if (drop) | ||
90 | llist_add(&ibmr->llnode, &pool->drop_list); | ||
91 | else | ||
92 | llist_add(&ibmr->llnode, &pool->free_list); | ||
93 | atomic_add(ibmr->sg_len, &pool->free_pinned); | ||
94 | atomic_inc(&pool->dirty_count); | ||
95 | |||
96 | /* If we've pinned too many pages, request a flush */ | ||
97 | if (atomic_read(&pool->free_pinned) >= pool->max_free_pinned || | ||
98 | atomic_read(&pool->dirty_count) >= pool->max_items / 5) | ||
99 | queue_delayed_work(rds_ib_mr_wq, &pool->flush_worker, 10); | ||
100 | } | ||
101 | |||
102 | static int rds_ib_post_reg_frmr(struct rds_ib_mr *ibmr) | ||
103 | { | ||
104 | struct rds_ib_frmr *frmr = &ibmr->u.frmr; | ||
105 | struct ib_send_wr *failed_wr; | ||
106 | struct ib_reg_wr reg_wr; | ||
107 | int ret; | ||
108 | |||
109 | while (atomic_dec_return(&ibmr->ic->i_fastreg_wrs) <= 0) { | ||
110 | atomic_inc(&ibmr->ic->i_fastreg_wrs); | ||
111 | cpu_relax(); | ||
112 | } | ||
113 | |||
114 | ret = ib_map_mr_sg_zbva(frmr->mr, ibmr->sg, ibmr->sg_len, PAGE_SIZE); | ||
115 | if (unlikely(ret != ibmr->sg_len)) | ||
116 | return ret < 0 ? ret : -EINVAL; | ||
117 | |||
118 | /* Perform a WR for the fast_reg_mr. Each individual page | ||
119 | * in the sg list is added to the fast reg page list and placed | ||
120 | * inside the fast_reg_mr WR. The key used is a rolling 8bit | ||
121 | * counter, which should guarantee uniqueness. | ||
122 | */ | ||
123 | ib_update_fast_reg_key(frmr->mr, ibmr->remap_count++); | ||
124 | frmr->fr_state = FRMR_IS_INUSE; | ||
125 | |||
126 | memset(®_wr, 0, sizeof(reg_wr)); | ||
127 | reg_wr.wr.wr_id = (unsigned long)(void *)ibmr; | ||
128 | reg_wr.wr.opcode = IB_WR_REG_MR; | ||
129 | reg_wr.wr.num_sge = 0; | ||
130 | reg_wr.mr = frmr->mr; | ||
131 | reg_wr.key = frmr->mr->rkey; | ||
132 | reg_wr.access = IB_ACCESS_LOCAL_WRITE | | ||
133 | IB_ACCESS_REMOTE_READ | | ||
134 | IB_ACCESS_REMOTE_WRITE; | ||
135 | reg_wr.wr.send_flags = IB_SEND_SIGNALED; | ||
136 | |||
137 | failed_wr = ®_wr.wr; | ||
138 | ret = ib_post_send(ibmr->ic->i_cm_id->qp, ®_wr.wr, &failed_wr); | ||
139 | WARN_ON(failed_wr != ®_wr.wr); | ||
140 | if (unlikely(ret)) { | ||
141 | /* Failure here can be because of -ENOMEM as well */ | ||
142 | frmr->fr_state = FRMR_IS_STALE; | ||
143 | atomic_inc(&ibmr->ic->i_fastreg_wrs); | ||
144 | if (printk_ratelimit()) | ||
145 | pr_warn("RDS/IB: %s returned error(%d)\n", | ||
146 | __func__, ret); | ||
147 | } | ||
148 | return ret; | ||
149 | } | ||
150 | |||
151 | static int rds_ib_map_frmr(struct rds_ib_device *rds_ibdev, | ||
152 | struct rds_ib_mr_pool *pool, | ||
153 | struct rds_ib_mr *ibmr, | ||
154 | struct scatterlist *sg, unsigned int sg_len) | ||
155 | { | ||
156 | struct ib_device *dev = rds_ibdev->dev; | ||
157 | struct rds_ib_frmr *frmr = &ibmr->u.frmr; | ||
158 | int i; | ||
159 | u32 len; | ||
160 | int ret = 0; | ||
161 | |||
162 | /* We want to teardown old ibmr values here and fill it up with | ||
163 | * new sg values | ||
164 | */ | ||
165 | rds_ib_teardown_mr(ibmr); | ||
166 | |||
167 | ibmr->sg = sg; | ||
168 | ibmr->sg_len = sg_len; | ||
169 | ibmr->sg_dma_len = 0; | ||
170 | frmr->sg_byte_len = 0; | ||
171 | WARN_ON(ibmr->sg_dma_len); | ||
172 | ibmr->sg_dma_len = ib_dma_map_sg(dev, ibmr->sg, ibmr->sg_len, | ||
173 | DMA_BIDIRECTIONAL); | ||
174 | if (unlikely(!ibmr->sg_dma_len)) { | ||
175 | pr_warn("RDS/IB: %s failed!\n", __func__); | ||
176 | return -EBUSY; | ||
177 | } | ||
178 | |||
179 | frmr->sg_byte_len = 0; | ||
180 | frmr->dma_npages = 0; | ||
181 | len = 0; | ||
182 | |||
183 | ret = -EINVAL; | ||
184 | for (i = 0; i < ibmr->sg_dma_len; ++i) { | ||
185 | unsigned int dma_len = ib_sg_dma_len(dev, &ibmr->sg[i]); | ||
186 | u64 dma_addr = ib_sg_dma_address(dev, &ibmr->sg[i]); | ||
187 | |||
188 | frmr->sg_byte_len += dma_len; | ||
189 | if (dma_addr & ~PAGE_MASK) { | ||
190 | if (i > 0) | ||
191 | goto out_unmap; | ||
192 | else | ||
193 | ++frmr->dma_npages; | ||
194 | } | ||
195 | |||
196 | if ((dma_addr + dma_len) & ~PAGE_MASK) { | ||
197 | if (i < ibmr->sg_dma_len - 1) | ||
198 | goto out_unmap; | ||
199 | else | ||
200 | ++frmr->dma_npages; | ||
201 | } | ||
202 | |||
203 | len += dma_len; | ||
204 | } | ||
205 | frmr->dma_npages += len >> PAGE_SHIFT; | ||
206 | |||
207 | if (frmr->dma_npages > ibmr->pool->fmr_attr.max_pages) { | ||
208 | ret = -EMSGSIZE; | ||
209 | goto out_unmap; | ||
210 | } | ||
211 | |||
212 | ret = rds_ib_post_reg_frmr(ibmr); | ||
213 | if (ret) | ||
214 | goto out_unmap; | ||
215 | |||
216 | if (ibmr->pool->pool_type == RDS_IB_MR_8K_POOL) | ||
217 | rds_ib_stats_inc(s_ib_rdma_mr_8k_used); | ||
218 | else | ||
219 | rds_ib_stats_inc(s_ib_rdma_mr_1m_used); | ||
220 | |||
221 | return ret; | ||
222 | |||
223 | out_unmap: | ||
224 | ib_dma_unmap_sg(rds_ibdev->dev, ibmr->sg, ibmr->sg_len, | ||
225 | DMA_BIDIRECTIONAL); | ||
226 | ibmr->sg_dma_len = 0; | ||
227 | return ret; | ||
228 | } | ||
229 | |||
230 | static int rds_ib_post_inv(struct rds_ib_mr *ibmr) | ||
231 | { | ||
232 | struct ib_send_wr *s_wr, *failed_wr; | ||
233 | struct rds_ib_frmr *frmr = &ibmr->u.frmr; | ||
234 | struct rdma_cm_id *i_cm_id = ibmr->ic->i_cm_id; | ||
235 | int ret = -EINVAL; | ||
236 | |||
237 | if (!i_cm_id || !i_cm_id->qp || !frmr->mr) | ||
238 | goto out; | ||
239 | |||
240 | if (frmr->fr_state != FRMR_IS_INUSE) | ||
241 | goto out; | ||
242 | |||
243 | while (atomic_dec_return(&ibmr->ic->i_fastreg_wrs) <= 0) { | ||
244 | atomic_inc(&ibmr->ic->i_fastreg_wrs); | ||
245 | cpu_relax(); | ||
246 | } | ||
247 | |||
248 | frmr->fr_inv = true; | ||
249 | s_wr = &frmr->fr_wr; | ||
250 | |||
251 | memset(s_wr, 0, sizeof(*s_wr)); | ||
252 | s_wr->wr_id = (unsigned long)(void *)ibmr; | ||
253 | s_wr->opcode = IB_WR_LOCAL_INV; | ||
254 | s_wr->ex.invalidate_rkey = frmr->mr->rkey; | ||
255 | s_wr->send_flags = IB_SEND_SIGNALED; | ||
256 | |||
257 | failed_wr = s_wr; | ||
258 | ret = ib_post_send(i_cm_id->qp, s_wr, &failed_wr); | ||
259 | WARN_ON(failed_wr != s_wr); | ||
260 | if (unlikely(ret)) { | ||
261 | frmr->fr_state = FRMR_IS_STALE; | ||
262 | frmr->fr_inv = false; | ||
263 | atomic_inc(&ibmr->ic->i_fastreg_wrs); | ||
264 | pr_err("RDS/IB: %s returned error(%d)\n", __func__, ret); | ||
265 | goto out; | ||
266 | } | ||
267 | out: | ||
268 | return ret; | ||
269 | } | ||
270 | |||
271 | void rds_ib_mr_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc) | ||
272 | { | ||
273 | struct rds_ib_mr *ibmr = (void *)(unsigned long)wc->wr_id; | ||
274 | struct rds_ib_frmr *frmr = &ibmr->u.frmr; | ||
275 | |||
276 | if (wc->status != IB_WC_SUCCESS) { | ||
277 | frmr->fr_state = FRMR_IS_STALE; | ||
278 | if (rds_conn_up(ic->conn)) | ||
279 | rds_ib_conn_error(ic->conn, | ||
280 | "frmr completion <%pI4,%pI4> status %u(%s), vendor_err 0x%x, disconnecting and reconnecting\n", | ||
281 | &ic->conn->c_laddr, | ||
282 | &ic->conn->c_faddr, | ||
283 | wc->status, | ||
284 | ib_wc_status_msg(wc->status), | ||
285 | wc->vendor_err); | ||
286 | } | ||
287 | |||
288 | if (frmr->fr_inv) { | ||
289 | frmr->fr_state = FRMR_IS_FREE; | ||
290 | frmr->fr_inv = false; | ||
291 | } | ||
292 | |||
293 | atomic_inc(&ic->i_fastreg_wrs); | ||
294 | } | ||
295 | |||
296 | void rds_ib_unreg_frmr(struct list_head *list, unsigned int *nfreed, | ||
297 | unsigned long *unpinned, unsigned int goal) | ||
298 | { | ||
299 | struct rds_ib_mr *ibmr, *next; | ||
300 | struct rds_ib_frmr *frmr; | ||
301 | int ret = 0; | ||
302 | unsigned int freed = *nfreed; | ||
303 | |||
304 | /* String all ib_mr's onto one list and hand them to ib_unmap_fmr */ | ||
305 | list_for_each_entry(ibmr, list, unmap_list) { | ||
306 | if (ibmr->sg_dma_len) | ||
307 | ret |= rds_ib_post_inv(ibmr); | ||
308 | } | ||
309 | if (ret) | ||
310 | pr_warn("RDS/IB: %s failed (err=%d)\n", __func__, ret); | ||
311 | |||
312 | /* Now we can destroy the DMA mapping and unpin any pages */ | ||
313 | list_for_each_entry_safe(ibmr, next, list, unmap_list) { | ||
314 | *unpinned += ibmr->sg_len; | ||
315 | frmr = &ibmr->u.frmr; | ||
316 | __rds_ib_teardown_mr(ibmr); | ||
317 | if (freed < goal || frmr->fr_state == FRMR_IS_STALE) { | ||
318 | /* Don't de-allocate if the MR is not free yet */ | ||
319 | if (frmr->fr_state == FRMR_IS_INUSE) | ||
320 | continue; | ||
321 | |||
322 | if (ibmr->pool->pool_type == RDS_IB_MR_8K_POOL) | ||
323 | rds_ib_stats_inc(s_ib_rdma_mr_8k_free); | ||
324 | else | ||
325 | rds_ib_stats_inc(s_ib_rdma_mr_1m_free); | ||
326 | list_del(&ibmr->unmap_list); | ||
327 | if (frmr->mr) | ||
328 | ib_dereg_mr(frmr->mr); | ||
329 | kfree(ibmr); | ||
330 | freed++; | ||
331 | } | ||
332 | } | ||
333 | *nfreed = freed; | ||
334 | } | ||
335 | |||
336 | struct rds_ib_mr *rds_ib_reg_frmr(struct rds_ib_device *rds_ibdev, | ||
337 | struct rds_ib_connection *ic, | ||
338 | struct scatterlist *sg, | ||
339 | unsigned long nents, u32 *key) | ||
340 | { | ||
341 | struct rds_ib_mr *ibmr = NULL; | ||
342 | struct rds_ib_frmr *frmr; | ||
343 | int ret; | ||
344 | |||
345 | do { | ||
346 | if (ibmr) | ||
347 | rds_ib_free_frmr(ibmr, true); | ||
348 | ibmr = rds_ib_alloc_frmr(rds_ibdev, nents); | ||
349 | if (IS_ERR(ibmr)) | ||
350 | return ibmr; | ||
351 | frmr = &ibmr->u.frmr; | ||
352 | } while (frmr->fr_state != FRMR_IS_FREE); | ||
353 | |||
354 | ibmr->ic = ic; | ||
355 | ibmr->device = rds_ibdev; | ||
356 | ret = rds_ib_map_frmr(rds_ibdev, ibmr->pool, ibmr, sg, nents); | ||
357 | if (ret == 0) { | ||
358 | *key = frmr->mr->rkey; | ||
359 | } else { | ||
360 | rds_ib_free_frmr(ibmr, false); | ||
361 | ibmr = ERR_PTR(ret); | ||
362 | } | ||
363 | |||
364 | return ibmr; | ||
365 | } | ||
366 | |||
367 | void rds_ib_free_frmr_list(struct rds_ib_mr *ibmr) | ||
368 | { | ||
369 | struct rds_ib_mr_pool *pool = ibmr->pool; | ||
370 | struct rds_ib_frmr *frmr = &ibmr->u.frmr; | ||
371 | |||
372 | if (frmr->fr_state == FRMR_IS_STALE) | ||
373 | llist_add(&ibmr->llnode, &pool->drop_list); | ||
374 | else | ||
375 | llist_add(&ibmr->llnode, &pool->free_list); | ||
376 | } | ||