diff options
author | Sebastian Sanchez <sebastian.sanchez@intel.com> | 2017-02-08 08:26:31 -0500 |
---|---|---|
committer | Doug Ledford <dledford@redhat.com> | 2017-02-19 09:18:35 -0500 |
commit | 338adfdddf6abe89726e1146ad3102ce9663a634 (patch) | |
tree | e509ec29d80f6cd81097ffea338a8a7222ec7389 | |
parent | f3e862cb6894389a35d0beb10f73d62eb3317beb (diff) |
IB/rdmavt: Use per-CPU reference count for MRs
Having per-CPU reference count for each MR prevents
cache-line bouncing across the system. Thus, it
prevents bottlenecks. Use per-CPU reference counts
per MR.
The per-CPU reference count for FMRs is used in
atomic mode to allow accurate testing of the busy
state. Other MR types run in per-CPU mode MR until
they're freed.
Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Sebastian Sanchez <sebastian.sanchez@intel.com>
Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
-rw-r--r-- | drivers/infiniband/sw/rdmavt/mr.c | 59 | ||||
-rw-r--r-- | include/rdma/rdmavt_mr.h | 10 |
2 files changed, 43 insertions, 26 deletions
diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index 52fd15276ee6..c80a69b1ffcb 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c | |||
@@ -120,10 +120,19 @@ static void rvt_deinit_mregion(struct rvt_mregion *mr) | |||
120 | mr->mapsz = 0; | 120 | mr->mapsz = 0; |
121 | while (i) | 121 | while (i) |
122 | kfree(mr->map[--i]); | 122 | kfree(mr->map[--i]); |
123 | percpu_ref_exit(&mr->refcount); | ||
124 | } | ||
125 | |||
126 | static void __rvt_mregion_complete(struct percpu_ref *ref) | ||
127 | { | ||
128 | struct rvt_mregion *mr = container_of(ref, struct rvt_mregion, | ||
129 | refcount); | ||
130 | |||
131 | complete(&mr->comp); | ||
123 | } | 132 | } |
124 | 133 | ||
125 | static int rvt_init_mregion(struct rvt_mregion *mr, struct ib_pd *pd, | 134 | static int rvt_init_mregion(struct rvt_mregion *mr, struct ib_pd *pd, |
126 | int count) | 135 | int count, unsigned int percpu_flags) |
127 | { | 136 | { |
128 | int m, i = 0; | 137 | int m, i = 0; |
129 | struct rvt_dev_info *dev = ib_to_rvt(pd->device); | 138 | struct rvt_dev_info *dev = ib_to_rvt(pd->device); |
@@ -133,19 +142,23 @@ static int rvt_init_mregion(struct rvt_mregion *mr, struct ib_pd *pd, | |||
133 | for (; i < m; i++) { | 142 | for (; i < m; i++) { |
134 | mr->map[i] = kzalloc_node(sizeof(*mr->map[0]), GFP_KERNEL, | 143 | mr->map[i] = kzalloc_node(sizeof(*mr->map[0]), GFP_KERNEL, |
135 | dev->dparms.node); | 144 | dev->dparms.node); |
136 | if (!mr->map[i]) { | 145 | if (!mr->map[i]) |
137 | rvt_deinit_mregion(mr); | 146 | goto bail; |
138 | return -ENOMEM; | ||
139 | } | ||
140 | mr->mapsz++; | 147 | mr->mapsz++; |
141 | } | 148 | } |
142 | init_completion(&mr->comp); | 149 | init_completion(&mr->comp); |
143 | /* count returning the ptr to user */ | 150 | /* count returning the ptr to user */ |
144 | atomic_set(&mr->refcount, 1); | 151 | if (percpu_ref_init(&mr->refcount, &__rvt_mregion_complete, |
152 | percpu_flags, GFP_KERNEL)) | ||
153 | goto bail; | ||
154 | |||
145 | atomic_set(&mr->lkey_invalid, 0); | 155 | atomic_set(&mr->lkey_invalid, 0); |
146 | mr->pd = pd; | 156 | mr->pd = pd; |
147 | mr->max_segs = count; | 157 | mr->max_segs = count; |
148 | return 0; | 158 | return 0; |
159 | bail: | ||
160 | rvt_deinit_mregion(mr); | ||
161 | return -ENOMEM; | ||
149 | } | 162 | } |
150 | 163 | ||
151 | /** | 164 | /** |
@@ -180,8 +193,7 @@ static int rvt_alloc_lkey(struct rvt_mregion *mr, int dma_region) | |||
180 | if (!tmr) { | 193 | if (!tmr) { |
181 | rcu_assign_pointer(dev->dma_mr, mr); | 194 | rcu_assign_pointer(dev->dma_mr, mr); |
182 | mr->lkey_published = 1; | 195 | mr->lkey_published = 1; |
183 | } else { | 196 | rvt_get_mr(mr); |
184 | rvt_put_mr(mr); | ||
185 | } | 197 | } |
186 | goto success; | 198 | goto success; |
187 | } | 199 | } |
@@ -239,11 +251,14 @@ static void rvt_free_lkey(struct rvt_mregion *mr) | |||
239 | int freed = 0; | 251 | int freed = 0; |
240 | 252 | ||
241 | spin_lock_irqsave(&rkt->lock, flags); | 253 | spin_lock_irqsave(&rkt->lock, flags); |
242 | if (!mr->lkey_published) | 254 | if (!lkey) { |
243 | goto out; | 255 | if (mr->lkey_published) { |
244 | if (lkey == 0) { | 256 | RCU_INIT_POINTER(dev->dma_mr, NULL); |
245 | RCU_INIT_POINTER(dev->dma_mr, NULL); | 257 | rvt_put_mr(mr); |
258 | } | ||
246 | } else { | 259 | } else { |
260 | if (!mr->lkey_published) | ||
261 | goto out; | ||
247 | r = lkey >> (32 - dev->dparms.lkey_table_size); | 262 | r = lkey >> (32 - dev->dparms.lkey_table_size); |
248 | RCU_INIT_POINTER(rkt->table[r], NULL); | 263 | RCU_INIT_POINTER(rkt->table[r], NULL); |
249 | } | 264 | } |
@@ -253,7 +268,7 @@ out: | |||
253 | spin_unlock_irqrestore(&rkt->lock, flags); | 268 | spin_unlock_irqrestore(&rkt->lock, flags); |
254 | if (freed) { | 269 | if (freed) { |
255 | synchronize_rcu(); | 270 | synchronize_rcu(); |
256 | rvt_put_mr(mr); | 271 | percpu_ref_kill(&mr->refcount); |
257 | } | 272 | } |
258 | } | 273 | } |
259 | 274 | ||
@@ -269,7 +284,7 @@ static struct rvt_mr *__rvt_alloc_mr(int count, struct ib_pd *pd) | |||
269 | if (!mr) | 284 | if (!mr) |
270 | goto bail; | 285 | goto bail; |
271 | 286 | ||
272 | rval = rvt_init_mregion(&mr->mr, pd, count); | 287 | rval = rvt_init_mregion(&mr->mr, pd, count, 0); |
273 | if (rval) | 288 | if (rval) |
274 | goto bail; | 289 | goto bail; |
275 | /* | 290 | /* |
@@ -294,8 +309,8 @@ bail: | |||
294 | 309 | ||
295 | static void __rvt_free_mr(struct rvt_mr *mr) | 310 | static void __rvt_free_mr(struct rvt_mr *mr) |
296 | { | 311 | { |
297 | rvt_deinit_mregion(&mr->mr); | ||
298 | rvt_free_lkey(&mr->mr); | 312 | rvt_free_lkey(&mr->mr); |
313 | rvt_deinit_mregion(&mr->mr); | ||
299 | kfree(mr); | 314 | kfree(mr); |
300 | } | 315 | } |
301 | 316 | ||
@@ -323,7 +338,7 @@ struct ib_mr *rvt_get_dma_mr(struct ib_pd *pd, int acc) | |||
323 | goto bail; | 338 | goto bail; |
324 | } | 339 | } |
325 | 340 | ||
326 | rval = rvt_init_mregion(&mr->mr, pd, 0); | 341 | rval = rvt_init_mregion(&mr->mr, pd, 0, 0); |
327 | if (rval) { | 342 | if (rval) { |
328 | ret = ERR_PTR(rval); | 343 | ret = ERR_PTR(rval); |
329 | goto bail; | 344 | goto bail; |
@@ -445,8 +460,8 @@ int rvt_dereg_mr(struct ib_mr *ibmr) | |||
445 | timeout = wait_for_completion_timeout(&mr->mr.comp, 5 * HZ); | 460 | timeout = wait_for_completion_timeout(&mr->mr.comp, 5 * HZ); |
446 | if (!timeout) { | 461 | if (!timeout) { |
447 | rvt_pr_err(rdi, | 462 | rvt_pr_err(rdi, |
448 | "rvt_dereg_mr timeout mr %p pd %p refcount %u\n", | 463 | "rvt_dereg_mr timeout mr %p pd %p\n", |
449 | mr, mr->mr.pd, atomic_read(&mr->mr.refcount)); | 464 | mr, mr->mr.pd); |
450 | rvt_get_mr(&mr->mr); | 465 | rvt_get_mr(&mr->mr); |
451 | ret = -EBUSY; | 466 | ret = -EBUSY; |
452 | goto out; | 467 | goto out; |
@@ -623,7 +638,8 @@ struct ib_fmr *rvt_alloc_fmr(struct ib_pd *pd, int mr_access_flags, | |||
623 | if (!fmr) | 638 | if (!fmr) |
624 | goto bail; | 639 | goto bail; |
625 | 640 | ||
626 | rval = rvt_init_mregion(&fmr->mr, pd, fmr_attr->max_pages); | 641 | rval = rvt_init_mregion(&fmr->mr, pd, fmr_attr->max_pages, |
642 | PERCPU_REF_INIT_ATOMIC); | ||
627 | if (rval) | 643 | if (rval) |
628 | goto bail; | 644 | goto bail; |
629 | 645 | ||
@@ -674,11 +690,12 @@ int rvt_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, | |||
674 | struct rvt_fmr *fmr = to_ifmr(ibfmr); | 690 | struct rvt_fmr *fmr = to_ifmr(ibfmr); |
675 | struct rvt_lkey_table *rkt; | 691 | struct rvt_lkey_table *rkt; |
676 | unsigned long flags; | 692 | unsigned long flags; |
677 | int m, n, i; | 693 | int m, n; |
694 | unsigned long i; | ||
678 | u32 ps; | 695 | u32 ps; |
679 | struct rvt_dev_info *rdi = ib_to_rvt(ibfmr->device); | 696 | struct rvt_dev_info *rdi = ib_to_rvt(ibfmr->device); |
680 | 697 | ||
681 | i = atomic_read(&fmr->mr.refcount); | 698 | i = atomic_long_read(&fmr->mr.refcount.count); |
682 | if (i > 2) | 699 | if (i > 2) |
683 | return -EBUSY; | 700 | return -EBUSY; |
684 | 701 | ||
diff --git a/include/rdma/rdmavt_mr.h b/include/rdma/rdmavt_mr.h index de59de28b6a2..05698d8d9c6f 100644 --- a/include/rdma/rdmavt_mr.h +++ b/include/rdma/rdmavt_mr.h | |||
@@ -52,6 +52,7 @@ | |||
52 | * For Memory Regions. This stuff should probably be moved into rdmavt/mr.h once | 52 | * For Memory Regions. This stuff should probably be moved into rdmavt/mr.h once |
53 | * drivers no longer need access to the MR directly. | 53 | * drivers no longer need access to the MR directly. |
54 | */ | 54 | */ |
55 | #include <linux/percpu-refcount.h> | ||
55 | 56 | ||
56 | /* | 57 | /* |
57 | * A segment is a linear region of low physical memory. | 58 | * A segment is a linear region of low physical memory. |
@@ -79,11 +80,11 @@ struct rvt_mregion { | |||
79 | int access_flags; | 80 | int access_flags; |
80 | u32 max_segs; /* number of rvt_segs in all the arrays */ | 81 | u32 max_segs; /* number of rvt_segs in all the arrays */ |
81 | u32 mapsz; /* size of the map array */ | 82 | u32 mapsz; /* size of the map array */ |
83 | atomic_t lkey_invalid; /* true if current lkey is invalid */ | ||
82 | u8 page_shift; /* 0 - non unform/non powerof2 sizes */ | 84 | u8 page_shift; /* 0 - non unform/non powerof2 sizes */ |
83 | u8 lkey_published; /* in global table */ | 85 | u8 lkey_published; /* in global table */ |
84 | atomic_t lkey_invalid; /* true if current lkey is invalid */ | 86 | struct percpu_ref refcount; |
85 | struct completion comp; /* complete when refcount goes to zero */ | 87 | struct completion comp; /* complete when refcount goes to zero */ |
86 | atomic_t refcount; | ||
87 | struct rvt_segarray *map[0]; /* the segments */ | 88 | struct rvt_segarray *map[0]; /* the segments */ |
88 | }; | 89 | }; |
89 | 90 | ||
@@ -123,13 +124,12 @@ struct rvt_sge_state { | |||
123 | 124 | ||
124 | static inline void rvt_put_mr(struct rvt_mregion *mr) | 125 | static inline void rvt_put_mr(struct rvt_mregion *mr) |
125 | { | 126 | { |
126 | if (unlikely(atomic_dec_and_test(&mr->refcount))) | 127 | percpu_ref_put(&mr->refcount); |
127 | complete(&mr->comp); | ||
128 | } | 128 | } |
129 | 129 | ||
130 | static inline void rvt_get_mr(struct rvt_mregion *mr) | 130 | static inline void rvt_get_mr(struct rvt_mregion *mr) |
131 | { | 131 | { |
132 | atomic_inc(&mr->refcount); | 132 | percpu_ref_get(&mr->refcount); |
133 | } | 133 | } |
134 | 134 | ||
135 | static inline void rvt_put_ss(struct rvt_sge_state *ss) | 135 | static inline void rvt_put_ss(struct rvt_sge_state *ss) |