aboutsummaryrefslogtreecommitdiffstats
path: root/net/rds
diff options
context:
space:
mode:
Diffstat (limited to 'net/rds')
-rw-r--r--net/rds/ib.h19
-rw-r--r--net/rds/ib_cm.c9
-rw-r--r--net/rds/ib_recv.c321
3 files changed, 297 insertions, 52 deletions
diff --git a/net/rds/ib.h b/net/rds/ib.h
index 9bb7a7412a44..2efd9d11e7d4 100644
--- a/net/rds/ib.h
+++ b/net/rds/ib.h
@@ -21,6 +21,8 @@
21 21
22#define RDS_IB_SUPPORTED_PROTOCOLS 0x00000003 /* minor versions supported */ 22#define RDS_IB_SUPPORTED_PROTOCOLS 0x00000003 /* minor versions supported */
23 23
24#define RDS_IB_RECYCLE_BATCH_COUNT 32
25
24extern struct list_head rds_ib_devices; 26extern struct list_head rds_ib_devices;
25 27
26/* 28/*
@@ -30,14 +32,27 @@ extern struct list_head rds_ib_devices;
30 */ 32 */
31struct rds_page_frag { 33struct rds_page_frag {
32 struct list_head f_item; 34 struct list_head f_item;
35 struct list_head f_cache_entry;
33 struct scatterlist f_sg; 36 struct scatterlist f_sg;
34}; 37};
35 38
36struct rds_ib_incoming { 39struct rds_ib_incoming {
37 struct list_head ii_frags; 40 struct list_head ii_frags;
41 struct list_head ii_cache_entry;
38 struct rds_incoming ii_inc; 42 struct rds_incoming ii_inc;
39}; 43};
40 44
45struct rds_ib_cache_head {
46 struct list_head *first;
47 unsigned long count;
48};
49
50struct rds_ib_refill_cache {
51 struct rds_ib_cache_head *percpu;
52 struct list_head *xfer;
53 struct list_head *ready;
54};
55
41struct rds_ib_connect_private { 56struct rds_ib_connect_private {
42 /* Add new fields at the end, and don't permute existing fields. */ 57 /* Add new fields at the end, and don't permute existing fields. */
43 __be32 dp_saddr; 58 __be32 dp_saddr;
@@ -104,6 +119,8 @@ struct rds_ib_connection {
104 u64 i_recv_hdrs_dma; 119 u64 i_recv_hdrs_dma;
105 struct rds_ib_recv_work *i_recvs; 120 struct rds_ib_recv_work *i_recvs;
106 u64 i_ack_recv; /* last ACK received */ 121 u64 i_ack_recv; /* last ACK received */
122 struct rds_ib_refill_cache i_cache_incs;
123 struct rds_ib_refill_cache i_cache_frags;
107 124
108 /* sending acks */ 125 /* sending acks */
109 unsigned long i_ack_flags; 126 unsigned long i_ack_flags;
@@ -304,6 +321,8 @@ void rds_ib_flush_mrs(void);
304int __init rds_ib_recv_init(void); 321int __init rds_ib_recv_init(void);
305void rds_ib_recv_exit(void); 322void rds_ib_recv_exit(void);
306int rds_ib_recv(struct rds_connection *conn); 323int rds_ib_recv(struct rds_connection *conn);
324int rds_ib_recv_alloc_caches(struct rds_ib_connection *ic);
325void rds_ib_recv_free_caches(struct rds_ib_connection *ic);
307int rds_ib_recv_refill(struct rds_connection *conn, int prefill); 326int rds_ib_recv_refill(struct rds_connection *conn, int prefill);
308void rds_ib_inc_free(struct rds_incoming *inc); 327void rds_ib_inc_free(struct rds_incoming *inc);
309int rds_ib_inc_copy_to_user(struct rds_incoming *inc, struct iovec *iov, 328int rds_ib_inc_copy_to_user(struct rds_incoming *inc, struct iovec *iov,
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index a9fb917c00bb..10f6a8815cd0 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -709,12 +709,19 @@ int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp)
709{ 709{
710 struct rds_ib_connection *ic; 710 struct rds_ib_connection *ic;
711 unsigned long flags; 711 unsigned long flags;
712 int ret;
712 713
713 /* XXX too lazy? */ 714 /* XXX too lazy? */
714 ic = kzalloc(sizeof(struct rds_ib_connection), GFP_KERNEL); 715 ic = kzalloc(sizeof(struct rds_ib_connection), GFP_KERNEL);
715 if (!ic) 716 if (!ic)
716 return -ENOMEM; 717 return -ENOMEM;
717 718
719 ret = rds_ib_recv_alloc_caches(ic);
720 if (ret) {
721 kfree(ic);
722 return ret;
723 }
724
718 INIT_LIST_HEAD(&ic->ib_node); 725 INIT_LIST_HEAD(&ic->ib_node);
719 tasklet_init(&ic->i_recv_tasklet, rds_ib_recv_tasklet_fn, 726 tasklet_init(&ic->i_recv_tasklet, rds_ib_recv_tasklet_fn,
720 (unsigned long) ic); 727 (unsigned long) ic);
@@ -763,6 +770,8 @@ void rds_ib_conn_free(void *arg)
763 list_del(&ic->ib_node); 770 list_del(&ic->ib_node);
764 spin_unlock_irq(lock_ptr); 771 spin_unlock_irq(lock_ptr);
765 772
773 rds_ib_recv_free_caches(ic);
774
766 kfree(ic); 775 kfree(ic);
767} 776}
768 777
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index 48add108ddee..5b429b7fd81c 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -43,14 +43,6 @@ static struct kmem_cache *rds_ib_incoming_slab;
43static struct kmem_cache *rds_ib_frag_slab; 43static struct kmem_cache *rds_ib_frag_slab;
44static atomic_t rds_ib_allocation = ATOMIC_INIT(0); 44static atomic_t rds_ib_allocation = ATOMIC_INIT(0);
45 45
46/* Free frag and attached recv buffer f_sg */
47static void rds_ib_frag_free(struct rds_page_frag *frag)
48{
49 rdsdebug("frag %p page %p\n", frag, sg_page(&frag->f_sg));
50 __free_page(sg_page(&frag->f_sg));
51 kmem_cache_free(rds_ib_frag_slab, frag);
52}
53
54void rds_ib_recv_init_ring(struct rds_ib_connection *ic) 46void rds_ib_recv_init_ring(struct rds_ib_connection *ic)
55{ 47{
56 struct rds_ib_recv_work *recv; 48 struct rds_ib_recv_work *recv;
@@ -79,6 +71,151 @@ void rds_ib_recv_init_ring(struct rds_ib_connection *ic)
79 } 71 }
80} 72}
81 73
74/*
75 * The entire 'from' list, including the from element itself, is put on
76 * to the tail of the 'to' list.
77 */
78static void list_splice_entire_tail(struct list_head *from,
79 struct list_head *to)
80{
81 struct list_head *from_last = from->prev;
82
83 list_splice_tail(from_last, to);
84 list_add_tail(from_last, to);
85}
86
87static void rds_ib_cache_xfer_to_ready(struct rds_ib_refill_cache *cache)
88{
89 struct list_head *tmp;
90
91 tmp = xchg(&cache->xfer, NULL);
92 if (tmp) {
93 if (cache->ready)
94 list_splice_entire_tail(tmp, cache->ready);
95 else
96 cache->ready = tmp;
97 }
98}
99
100static int rds_ib_recv_alloc_cache(struct rds_ib_refill_cache *cache)
101{
102 struct rds_ib_cache_head *head;
103 int cpu;
104
105 cache->percpu = alloc_percpu(struct rds_ib_cache_head);
106 if (!cache->percpu)
107 return -ENOMEM;
108
109 for_each_possible_cpu(cpu) {
110 head = per_cpu_ptr(cache->percpu, cpu);
111 head->first = NULL;
112 head->count = 0;
113 }
114 cache->xfer = NULL;
115 cache->ready = NULL;
116
117 return 0;
118}
119
120int rds_ib_recv_alloc_caches(struct rds_ib_connection *ic)
121{
122 int ret;
123
124 ret = rds_ib_recv_alloc_cache(&ic->i_cache_incs);
125 if (!ret) {
126 ret = rds_ib_recv_alloc_cache(&ic->i_cache_frags);
127 if (ret)
128 free_percpu(ic->i_cache_incs.percpu);
129 }
130
131 return ret;
132}
133
134static void rds_ib_cache_splice_all_lists(struct rds_ib_refill_cache *cache,
135 struct list_head *caller_list)
136{
137 struct rds_ib_cache_head *head;
138 int cpu;
139
140 for_each_possible_cpu(cpu) {
141 head = per_cpu_ptr(cache->percpu, cpu);
142 if (head->first) {
143 list_splice_entire_tail(head->first, caller_list);
144 head->first = NULL;
145 }
146 }
147
148 if (cache->ready) {
149 list_splice_entire_tail(cache->ready, caller_list);
150 cache->ready = NULL;
151 }
152}
153
154void rds_ib_recv_free_caches(struct rds_ib_connection *ic)
155{
156 struct rds_ib_incoming *inc;
157 struct rds_ib_incoming *inc_tmp;
158 struct rds_page_frag *frag;
159 struct rds_page_frag *frag_tmp;
160 LIST_HEAD(list);
161
162 rds_ib_cache_xfer_to_ready(&ic->i_cache_incs);
163 rds_ib_cache_splice_all_lists(&ic->i_cache_incs, &list);
164 free_percpu(ic->i_cache_incs.percpu);
165
166 list_for_each_entry_safe(inc, inc_tmp, &list, ii_cache_entry) {
167 list_del(&inc->ii_cache_entry);
168 WARN_ON(!list_empty(&inc->ii_frags));
169 kmem_cache_free(rds_ib_incoming_slab, inc);
170 }
171
172 rds_ib_cache_xfer_to_ready(&ic->i_cache_frags);
173 rds_ib_cache_splice_all_lists(&ic->i_cache_frags, &list);
174 free_percpu(ic->i_cache_frags.percpu);
175
176 list_for_each_entry_safe(frag, frag_tmp, &list, f_cache_entry) {
177 list_del(&frag->f_cache_entry);
178 WARN_ON(!list_empty(&frag->f_item));
179 kmem_cache_free(rds_ib_frag_slab, frag);
180 }
181}
182
183/* fwd decl */
184static void rds_ib_recv_cache_put(struct list_head *new_item,
185 struct rds_ib_refill_cache *cache);
186static struct list_head *rds_ib_recv_cache_get(struct rds_ib_refill_cache *cache);
187
188
189/* Recycle frag and attached recv buffer f_sg */
190static void rds_ib_frag_free(struct rds_ib_connection *ic,
191 struct rds_page_frag *frag)
192{
193 rdsdebug("frag %p page %p\n", frag, sg_page(&frag->f_sg));
194
195 rds_ib_recv_cache_put(&frag->f_cache_entry, &ic->i_cache_frags);
196}
197
198/* Recycle inc after freeing attached frags */
199void rds_ib_inc_free(struct rds_incoming *inc)
200{
201 struct rds_ib_incoming *ibinc;
202 struct rds_page_frag *frag;
203 struct rds_page_frag *pos;
204 struct rds_ib_connection *ic = inc->i_conn->c_transport_data;
205
206 ibinc = container_of(inc, struct rds_ib_incoming, ii_inc);
207
208 /* Free attached frags */
209 list_for_each_entry_safe(frag, pos, &ibinc->ii_frags, f_item) {
210 list_del_init(&frag->f_item);
211 rds_ib_frag_free(ic, frag);
212 }
213 BUG_ON(!list_empty(&ibinc->ii_frags));
214
215 rdsdebug("freeing ibinc %p inc %p\n", ibinc, inc);
216 rds_ib_recv_cache_put(&ibinc->ii_cache_entry, &ic->i_cache_incs);
217}
218
82static void rds_ib_recv_clear_one(struct rds_ib_connection *ic, 219static void rds_ib_recv_clear_one(struct rds_ib_connection *ic,
83 struct rds_ib_recv_work *recv) 220 struct rds_ib_recv_work *recv)
84{ 221{
@@ -88,7 +225,7 @@ static void rds_ib_recv_clear_one(struct rds_ib_connection *ic,
88 } 225 }
89 if (recv->r_frag) { 226 if (recv->r_frag) {
90 ib_dma_unmap_sg(ic->i_cm_id->device, &recv->r_frag->f_sg, 1, DMA_FROM_DEVICE); 227 ib_dma_unmap_sg(ic->i_cm_id->device, &recv->r_frag->f_sg, 1, DMA_FROM_DEVICE);
91 rds_ib_frag_free(recv->r_frag); 228 rds_ib_frag_free(ic, recv->r_frag);
92 recv->r_frag = NULL; 229 recv->r_frag = NULL;
93 } 230 }
94} 231}
@@ -101,6 +238,61 @@ void rds_ib_recv_clear_ring(struct rds_ib_connection *ic)
101 rds_ib_recv_clear_one(ic, &ic->i_recvs[i]); 238 rds_ib_recv_clear_one(ic, &ic->i_recvs[i]);
102} 239}
103 240
241static struct rds_ib_incoming *rds_ib_refill_one_inc(struct rds_ib_connection *ic)
242{
243 struct rds_ib_incoming *ibinc;
244 struct list_head *cache_item;
245 int avail_allocs;
246
247 cache_item = rds_ib_recv_cache_get(&ic->i_cache_incs);
248 if (cache_item) {
249 ibinc = container_of(cache_item, struct rds_ib_incoming, ii_cache_entry);
250 } else {
251 avail_allocs = atomic_add_unless(&rds_ib_allocation,
252 1, rds_ib_sysctl_max_recv_allocation);
253 if (!avail_allocs) {
254 rds_ib_stats_inc(s_ib_rx_alloc_limit);
255 return NULL;
256 }
257 ibinc = kmem_cache_alloc(rds_ib_incoming_slab, GFP_NOWAIT);
258 if (!ibinc) {
259 atomic_dec(&rds_ib_allocation);
260 return NULL;
261 }
262 }
263 INIT_LIST_HEAD(&ibinc->ii_frags);
264 rds_inc_init(&ibinc->ii_inc, ic->conn, ic->conn->c_faddr);
265
266 return ibinc;
267}
268
269static struct rds_page_frag *rds_ib_refill_one_frag(struct rds_ib_connection *ic)
270{
271 struct rds_page_frag *frag;
272 struct list_head *cache_item;
273 int ret;
274
275 cache_item = rds_ib_recv_cache_get(&ic->i_cache_frags);
276 if (cache_item) {
277 frag = container_of(cache_item, struct rds_page_frag, f_cache_entry);
278 } else {
279 frag = kmem_cache_alloc(rds_ib_frag_slab, GFP_NOWAIT);
280 if (!frag)
281 return NULL;
282
283 ret = rds_page_remainder_alloc(&frag->f_sg,
284 RDS_FRAG_SIZE, GFP_NOWAIT);
285 if (ret) {
286 kmem_cache_free(rds_ib_frag_slab, frag);
287 return NULL;
288 }
289 }
290
291 INIT_LIST_HEAD(&frag->f_item);
292
293 return frag;
294}
295
104static int rds_ib_recv_refill_one(struct rds_connection *conn, 296static int rds_ib_recv_refill_one(struct rds_connection *conn,
105 struct rds_ib_recv_work *recv) 297 struct rds_ib_recv_work *recv)
106{ 298{
@@ -108,37 +300,25 @@ static int rds_ib_recv_refill_one(struct rds_connection *conn,
108 struct ib_sge *sge; 300 struct ib_sge *sge;
109 int ret = -ENOMEM; 301 int ret = -ENOMEM;
110 302
303 if (!ic->i_cache_incs.ready)
304 rds_ib_cache_xfer_to_ready(&ic->i_cache_incs);
305 if (!ic->i_cache_frags.ready)
306 rds_ib_cache_xfer_to_ready(&ic->i_cache_frags);
307
111 /* 308 /*
112 * ibinc was taken from recv if recv contained the start of a message. 309 * ibinc was taken from recv if recv contained the start of a message.
113 * recvs that were continuations will still have this allocated. 310 * recvs that were continuations will still have this allocated.
114 */ 311 */
115 if (!recv->r_ibinc) { 312 if (!recv->r_ibinc) {
116 if (!atomic_add_unless(&rds_ib_allocation, 1, rds_ib_sysctl_max_recv_allocation)) { 313 recv->r_ibinc = rds_ib_refill_one_inc(ic);
117 rds_ib_stats_inc(s_ib_rx_alloc_limit); 314 if (!recv->r_ibinc)
118 goto out;
119 }
120 recv->r_ibinc = kmem_cache_alloc(rds_ib_incoming_slab, GFP_NOWAIT);
121 if (!recv->r_ibinc) {
122 atomic_dec(&rds_ib_allocation);
123 goto out; 315 goto out;
124 }
125 INIT_LIST_HEAD(&recv->r_ibinc->ii_frags);
126 rds_inc_init(&recv->r_ibinc->ii_inc, conn, conn->c_faddr);
127 } 316 }
128 317
129 WARN_ON(recv->r_frag); /* leak! */ 318 WARN_ON(recv->r_frag); /* leak! */
130 recv->r_frag = kmem_cache_alloc(rds_ib_frag_slab, GFP_NOWAIT); 319 recv->r_frag = rds_ib_refill_one_frag(ic);
131 if (!recv->r_frag) 320 if (!recv->r_frag)
132 goto out; 321 goto out;
133 INIT_LIST_HEAD(&recv->r_frag->f_item);
134 sg_init_table(&recv->r_frag->f_sg, 1);
135 ret = rds_page_remainder_alloc(&recv->r_frag->f_sg,
136 RDS_FRAG_SIZE, GFP_NOWAIT);
137 if (ret) {
138 kmem_cache_free(rds_ib_frag_slab, recv->r_frag);
139 recv->r_frag = NULL;
140 goto out;
141 }
142 322
143 ret = ib_dma_map_sg(ic->i_cm_id->device, &recv->r_frag->f_sg, 323 ret = ib_dma_map_sg(ic->i_cm_id->device, &recv->r_frag->f_sg,
144 1, DMA_FROM_DEVICE); 324 1, DMA_FROM_DEVICE);
@@ -160,8 +340,7 @@ out:
160/* 340/*
161 * This tries to allocate and post unused work requests after making sure that 341 * This tries to allocate and post unused work requests after making sure that
162 * they have all the allocations they need to queue received fragments into 342 * they have all the allocations they need to queue received fragments into
163 * sockets. The i_recv_mutex is held here so that ring_alloc and _unalloc 343 * sockets.
164 * pairs don't go unmatched.
165 * 344 *
166 * -1 is returned if posting fails due to temporary resource exhaustion. 345 * -1 is returned if posting fails due to temporary resource exhaustion.
167 */ 346 */
@@ -216,33 +395,71 @@ int rds_ib_recv_refill(struct rds_connection *conn, int prefill)
216 return ret; 395 return ret;
217} 396}
218 397
219static void rds_ib_inc_purge(struct rds_incoming *inc) 398/*
399 * We want to recycle several types of recv allocations, like incs and frags.
400 * To use this, the *_free() function passes in the ptr to a list_head within
401 * the recyclee, as well as the cache to put it on.
402 *
403 * First, we put the memory on a percpu list. When this reaches a certain size,
404 * We move it to an intermediate non-percpu list in a lockless manner, with some
405 * xchg/compxchg wizardry.
406 *
407 * N.B. Instead of a list_head as the anchor, we use a single pointer, which can
408 * be NULL and xchg'd. The list is actually empty when the pointer is NULL, and
409 * list_empty() will return true with one element is actually present.
410 */
411static void rds_ib_recv_cache_put(struct list_head *new_item,
412 struct rds_ib_refill_cache *cache)
220{ 413{
221 struct rds_ib_incoming *ibinc; 414 unsigned long flags;
222 struct rds_page_frag *frag; 415 struct rds_ib_cache_head *chp;
223 struct rds_page_frag *pos; 416 struct list_head *old;
224 417
225 ibinc = container_of(inc, struct rds_ib_incoming, ii_inc); 418 local_irq_save(flags);
226 rdsdebug("purging ibinc %p inc %p\n", ibinc, inc);
227 419
228 list_for_each_entry_safe(frag, pos, &ibinc->ii_frags, f_item) { 420 chp = per_cpu_ptr(cache->percpu, smp_processor_id());
229 list_del_init(&frag->f_item); 421 if (!chp->first)
230 rds_ib_frag_free(frag); 422 INIT_LIST_HEAD(new_item);
231 } 423 else /* put on front */
424 list_add_tail(new_item, chp->first);
425 chp->first = new_item;
426 chp->count++;
427
428 if (chp->count < RDS_IB_RECYCLE_BATCH_COUNT)
429 goto end;
430
431 /*
432 * Return our per-cpu first list to the cache's xfer by atomically
433 * grabbing the current xfer list, appending it to our per-cpu list,
434 * and then atomically returning that entire list back to the
435 * cache's xfer list as long as it's still empty.
436 */
437 do {
438 old = xchg(&cache->xfer, NULL);
439 if (old)
440 list_splice_entire_tail(old, chp->first);
441 old = cmpxchg(&cache->xfer, NULL, chp->first);
442 } while (old);
443
444 chp->first = NULL;
445 chp->count = 0;
446end:
447 local_irq_restore(flags);
232} 448}
233 449
234void rds_ib_inc_free(struct rds_incoming *inc) 450static struct list_head *rds_ib_recv_cache_get(struct rds_ib_refill_cache *cache)
235{ 451{
236 struct rds_ib_incoming *ibinc; 452 struct list_head *head = cache->ready;
237 453
238 ibinc = container_of(inc, struct rds_ib_incoming, ii_inc); 454 if (head) {
455 if (!list_empty(head)) {
456 cache->ready = head->next;
457 list_del_init(head);
458 } else
459 cache->ready = NULL;
460 }
239 461
240 rds_ib_inc_purge(inc); 462 return head;
241 rdsdebug("freeing ibinc %p inc %p\n", ibinc, inc);
242 BUG_ON(!list_empty(&ibinc->ii_frags));
243 kmem_cache_free(rds_ib_incoming_slab, ibinc);
244 atomic_dec(&rds_ib_allocation);
245 BUG_ON(atomic_read(&rds_ib_allocation) < 0);
246} 463}
247 464
248int rds_ib_inc_copy_to_user(struct rds_incoming *inc, struct iovec *first_iov, 465int rds_ib_inc_copy_to_user(struct rds_incoming *inc, struct iovec *first_iov,
@@ -647,7 +864,7 @@ static void rds_ib_process_recv(struct rds_connection *conn,
647 * 864 *
648 * FIXME: Fold this into the code path below. 865 * FIXME: Fold this into the code path below.
649 */ 866 */
650 rds_ib_frag_free(recv->r_frag); 867 rds_ib_frag_free(ic, recv->r_frag);
651 recv->r_frag = NULL; 868 recv->r_frag = NULL;
652 return; 869 return;
653 } 870 }