aboutsummaryrefslogtreecommitdiffstats
path: root/net/rds
diff options
context:
space:
mode:
authorAndy Grover <andy.grover@oracle.com>2010-05-24 23:12:41 -0400
committerAndy Grover <andy.grover@oracle.com>2010-09-08 21:15:20 -0400
commit0b088e003ccf316a76c51be5dec2d70b93be3be8 (patch)
tree76fb8b9ab49729a97e137b73bbf4e12b65cde89d /net/rds
parentfc19de38be924728fea76026c0d1a6c4b6156084 (diff)
RDS: Use page_remainder_alloc() for recv bufs
Instead of splitting up a page into RDS_FRAG_SIZE chunks ourselves, ask rds_page_remainder_alloc() to do it. While it is possible PAGE_SIZE > FRAG_SIZE, on x86en it isn't, so having duplicate "carve up a page into buffers" code seems excessive. The other modification this spawns is the use of a single struct scatterlist in rds_page_frag instead of a bare page ptr. This causes verbosity to increase in some places, and decrease in others. Finally, I decided to unify the lifetimes and alloc/free of rds_page_frag and its page. This is a nice simplification in itself, but will be extra-nice once we come to adding cmason's recycling patch. Signed-off-by: Andy Grover <andy.grover@oracle.com>
Diffstat (limited to 'net/rds')
-rw-r--r--net/rds/ib.h7
-rw-r--r--net/rds/ib_recv.c94
-rw-r--r--net/rds/page.c1
3 files changed, 29 insertions, 73 deletions
diff --git a/net/rds/ib.h b/net/rds/ib.h
index 282ec69fe282..9bb7a7412a44 100644
--- a/net/rds/ib.h
+++ b/net/rds/ib.h
@@ -28,13 +28,9 @@ extern struct list_head rds_ib_devices;
28 * try and minimize the amount of memory tied up both the device and 28 * try and minimize the amount of memory tied up both the device and
29 * socket receive queues. 29 * socket receive queues.
30 */ 30 */
31/* page offset of the final full frag that fits in the page */
32#define RDS_PAGE_LAST_OFF (((PAGE_SIZE / RDS_FRAG_SIZE) - 1) * RDS_FRAG_SIZE)
33struct rds_page_frag { 31struct rds_page_frag {
34 struct list_head f_item; 32 struct list_head f_item;
35 struct page *f_page; 33 struct scatterlist f_sg;
36 unsigned long f_offset;
37 dma_addr_t f_mapped;
38}; 34};
39 35
40struct rds_ib_incoming { 36struct rds_ib_incoming {
@@ -107,7 +103,6 @@ struct rds_ib_connection {
107 struct rds_header *i_recv_hdrs; 103 struct rds_header *i_recv_hdrs;
108 u64 i_recv_hdrs_dma; 104 u64 i_recv_hdrs_dma;
109 struct rds_ib_recv_work *i_recvs; 105 struct rds_ib_recv_work *i_recvs;
110 struct rds_page_frag i_frag;
111 u64 i_ack_recv; /* last ACK received */ 106 u64 i_ack_recv; /* last ACK received */
112 107
113 /* sending acks */ 108 /* sending acks */
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index 24d14615f41a..f6dbf16e0741 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -43,17 +43,11 @@ static struct kmem_cache *rds_ib_incoming_slab;
43static struct kmem_cache *rds_ib_frag_slab; 43static struct kmem_cache *rds_ib_frag_slab;
44static atomic_t rds_ib_allocation = ATOMIC_INIT(0); 44static atomic_t rds_ib_allocation = ATOMIC_INIT(0);
45 45
46static void rds_ib_frag_drop_page(struct rds_page_frag *frag) 46/* Free frag and attached recv buffer f_sg */
47{
48 rdsdebug("frag %p page %p\n", frag, frag->f_page);
49 __free_page(frag->f_page);
50 frag->f_page = NULL;
51}
52
53static void rds_ib_frag_free(struct rds_page_frag *frag) 47static void rds_ib_frag_free(struct rds_page_frag *frag)
54{ 48{
55 rdsdebug("frag %p page %p\n", frag, frag->f_page); 49 rdsdebug("frag %p page %p\n", frag, sg_page(&frag->f_sg));
56 BUG_ON(frag->f_page); 50 __free_page(sg_page(&frag->f_sg));
57 kmem_cache_free(rds_ib_frag_slab, frag); 51 kmem_cache_free(rds_ib_frag_slab, frag);
58} 52}
59 53
@@ -71,12 +65,8 @@ static void rds_ib_recv_unmap_page(struct rds_ib_connection *ic,
71{ 65{
72 struct rds_page_frag *frag = recv->r_frag; 66 struct rds_page_frag *frag = recv->r_frag;
73 67
74 rdsdebug("recv %p frag %p page %p\n", recv, frag, frag->f_page); 68 rdsdebug("recv %p frag %p page %p\n", recv, frag, sg_page(&frag->f_sg));
75 if (frag->f_mapped) 69 ib_dma_unmap_sg(ic->i_cm_id->device, &frag->f_sg, 1, DMA_FROM_DEVICE);
76 ib_dma_unmap_page(ic->i_cm_id->device,
77 frag->f_mapped,
78 RDS_FRAG_SIZE, DMA_FROM_DEVICE);
79 frag->f_mapped = 0;
80} 70}
81 71
82void rds_ib_recv_init_ring(struct rds_ib_connection *ic) 72void rds_ib_recv_init_ring(struct rds_ib_connection *ic)
@@ -116,8 +106,6 @@ static void rds_ib_recv_clear_one(struct rds_ib_connection *ic,
116 } 106 }
117 if (recv->r_frag) { 107 if (recv->r_frag) {
118 rds_ib_recv_unmap_page(ic, recv); 108 rds_ib_recv_unmap_page(ic, recv);
119 if (recv->r_frag->f_page)
120 rds_ib_frag_drop_page(recv->r_frag);
121 rds_ib_frag_free(recv->r_frag); 109 rds_ib_frag_free(recv->r_frag);
122 recv->r_frag = NULL; 110 recv->r_frag = NULL;
123 } 111 }
@@ -129,16 +117,12 @@ void rds_ib_recv_clear_ring(struct rds_ib_connection *ic)
129 117
130 for (i = 0; i < ic->i_recv_ring.w_nr; i++) 118 for (i = 0; i < ic->i_recv_ring.w_nr; i++)
131 rds_ib_recv_clear_one(ic, &ic->i_recvs[i]); 119 rds_ib_recv_clear_one(ic, &ic->i_recvs[i]);
132
133 if (ic->i_frag.f_page)
134 rds_ib_frag_drop_page(&ic->i_frag);
135} 120}
136 121
137static int rds_ib_recv_refill_one(struct rds_connection *conn, 122static int rds_ib_recv_refill_one(struct rds_connection *conn,
138 struct rds_ib_recv_work *recv) 123 struct rds_ib_recv_work *recv)
139{ 124{
140 struct rds_ib_connection *ic = conn->c_transport_data; 125 struct rds_ib_connection *ic = conn->c_transport_data;
141 dma_addr_t dma_addr;
142 struct ib_sge *sge; 126 struct ib_sge *sge;
143 int ret = -ENOMEM; 127 int ret = -ENOMEM;
144 128
@@ -161,50 +145,27 @@ static int rds_ib_recv_refill_one(struct rds_connection *conn,
161 if (!recv->r_frag) 145 if (!recv->r_frag)
162 goto out; 146 goto out;
163 INIT_LIST_HEAD(&recv->r_frag->f_item); 147 INIT_LIST_HEAD(&recv->r_frag->f_item);
164 recv->r_frag->f_page = NULL; 148 sg_init_table(&recv->r_frag->f_sg, 1);
165 } 149 ret = rds_page_remainder_alloc(&recv->r_frag->f_sg,
166 150 RDS_FRAG_SIZE, GFP_NOWAIT);
167 if (!ic->i_frag.f_page) { 151 if (ret) {
168 ic->i_frag.f_page = alloc_page(GFP_NOWAIT); 152 kmem_cache_free(rds_ib_frag_slab, recv->r_frag);
169 if (!ic->i_frag.f_page) 153 recv->r_frag = NULL;
170 goto out; 154 goto out;
171 ic->i_frag.f_offset = 0; 155 }
172 } 156 }
173 157
174 dma_addr = ib_dma_map_page(ic->i_cm_id->device, 158 ret = ib_dma_map_sg(ic->i_cm_id->device, &recv->r_frag->f_sg,
175 ic->i_frag.f_page, 159 1, DMA_FROM_DEVICE);
176 ic->i_frag.f_offset, 160 WARN_ON(ret != 1);
177 RDS_FRAG_SIZE,
178 DMA_FROM_DEVICE);
179 if (ib_dma_mapping_error(ic->i_cm_id->device, dma_addr))
180 goto out;
181
182 /*
183 * Once we get the RDS_PAGE_LAST_OFF frag then rds_ib_frag_unmap()
184 * must be called on this recv. This happens as completions hit
185 * in order or on connection shutdown.
186 */
187 recv->r_frag->f_page = ic->i_frag.f_page;
188 recv->r_frag->f_offset = ic->i_frag.f_offset;
189 recv->r_frag->f_mapped = dma_addr;
190 161
191 sge = &recv->r_sge[0]; 162 sge = &recv->r_sge[0];
192 sge->addr = ic->i_recv_hdrs_dma + (recv - ic->i_recvs) * sizeof(struct rds_header); 163 sge->addr = ic->i_recv_hdrs_dma + (recv - ic->i_recvs) * sizeof(struct rds_header);
193 sge->length = sizeof(struct rds_header); 164 sge->length = sizeof(struct rds_header);
194 165
195 sge = &recv->r_sge[1]; 166 sge = &recv->r_sge[1];
196 sge->addr = dma_addr; 167 sge->addr = sg_dma_address(&recv->r_frag->f_sg);
197 sge->length = RDS_FRAG_SIZE; 168 sge->length = sg_dma_len(&recv->r_frag->f_sg);
198
199 get_page(recv->r_frag->f_page);
200
201 if (ic->i_frag.f_offset < RDS_PAGE_LAST_OFF) {
202 ic->i_frag.f_offset += RDS_FRAG_SIZE;
203 } else {
204 put_page(ic->i_frag.f_page);
205 ic->i_frag.f_page = NULL;
206 ic->i_frag.f_offset = 0;
207 }
208 169
209 ret = 0; 170 ret = 0;
210out: 171out:
@@ -247,8 +208,8 @@ int rds_ib_recv_refill(struct rds_connection *conn, int prefill)
247 /* XXX when can this fail? */ 208 /* XXX when can this fail? */
248 ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, &failed_wr); 209 ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, &failed_wr);
249 rdsdebug("recv %p ibinc %p page %p addr %lu ret %d\n", recv, 210 rdsdebug("recv %p ibinc %p page %p addr %lu ret %d\n", recv,
250 recv->r_ibinc, recv->r_frag->f_page, 211 recv->r_ibinc, sg_page(&recv->r_frag->f_sg),
251 (long) recv->r_frag->f_mapped, ret); 212 (long) sg_dma_address(&recv->r_frag->f_sg), ret);
252 if (ret) { 213 if (ret) {
253 rds_ib_conn_error(conn, "recv post on " 214 rds_ib_conn_error(conn, "recv post on "
254 "%pI4 returned %d, disconnecting and " 215 "%pI4 returned %d, disconnecting and "
@@ -281,7 +242,6 @@ static void rds_ib_inc_purge(struct rds_incoming *inc)
281 242
282 list_for_each_entry_safe(frag, pos, &ibinc->ii_frags, f_item) { 243 list_for_each_entry_safe(frag, pos, &ibinc->ii_frags, f_item) {
283 list_del_init(&frag->f_item); 244 list_del_init(&frag->f_item);
284 rds_ib_frag_drop_page(frag);
285 rds_ib_frag_free(frag); 245 rds_ib_frag_free(frag);
286 } 246 }
287} 247}
@@ -333,13 +293,13 @@ int rds_ib_inc_copy_to_user(struct rds_incoming *inc, struct iovec *first_iov,
333 to_copy = min_t(unsigned long, to_copy, len - copied); 293 to_copy = min_t(unsigned long, to_copy, len - copied);
334 294
335 rdsdebug("%lu bytes to user [%p, %zu] + %lu from frag " 295 rdsdebug("%lu bytes to user [%p, %zu] + %lu from frag "
336 "[%p, %lu] + %lu\n", 296 "[%p, %u] + %lu\n",
337 to_copy, iov->iov_base, iov->iov_len, iov_off, 297 to_copy, iov->iov_base, iov->iov_len, iov_off,
338 frag->f_page, frag->f_offset, frag_off); 298 sg_page(&frag->f_sg), frag->f_sg.offset, frag_off);
339 299
340 /* XXX needs + offset for multiple recvs per page */ 300 /* XXX needs + offset for multiple recvs per page */
341 ret = rds_page_copy_to_user(frag->f_page, 301 ret = rds_page_copy_to_user(sg_page(&frag->f_sg),
342 frag->f_offset + frag_off, 302 frag->f_sg.offset + frag_off,
343 iov->iov_base + iov_off, 303 iov->iov_base + iov_off,
344 to_copy); 304 to_copy);
345 if (ret) { 305 if (ret) {
@@ -595,7 +555,7 @@ static void rds_ib_cong_recv(struct rds_connection *conn,
595 to_copy = min(RDS_FRAG_SIZE - frag_off, PAGE_SIZE - map_off); 555 to_copy = min(RDS_FRAG_SIZE - frag_off, PAGE_SIZE - map_off);
596 BUG_ON(to_copy & 7); /* Must be 64bit aligned. */ 556 BUG_ON(to_copy & 7); /* Must be 64bit aligned. */
597 557
598 addr = kmap_atomic(frag->f_page, KM_SOFTIRQ0); 558 addr = kmap_atomic(sg_page(&frag->f_sg), KM_SOFTIRQ0);
599 559
600 src = addr + frag_off; 560 src = addr + frag_off;
601 dst = (void *)map->m_page_addrs[map_page] + map_off; 561 dst = (void *)map->m_page_addrs[map_page] + map_off;
@@ -698,12 +658,12 @@ static void rds_ib_process_recv(struct rds_connection *conn,
698 * the inc is freed. We don't go that route, so we have to drop the 658 * the inc is freed. We don't go that route, so we have to drop the
699 * page ref ourselves. We can't just leave the page on the recv 659 * page ref ourselves. We can't just leave the page on the recv
700 * because that confuses the dma mapping of pages and each recv's use 660 * because that confuses the dma mapping of pages and each recv's use
701 * of a partial page. We can leave the frag, though, it will be 661 * of a partial page.
702 * reused.
703 * 662 *
704 * FIXME: Fold this into the code path below. 663 * FIXME: Fold this into the code path below.
705 */ 664 */
706 rds_ib_frag_drop_page(recv->r_frag); 665 rds_ib_frag_free(recv->r_frag);
666 recv->r_frag = NULL;
707 return; 667 return;
708 } 668 }
709 669
diff --git a/net/rds/page.c b/net/rds/page.c
index e5b2527ae257..5e44f5ae7898 100644
--- a/net/rds/page.c
+++ b/net/rds/page.c
@@ -186,6 +186,7 @@ out:
186 ret ? 0 : scat->length); 186 ret ? 0 : scat->length);
187 return ret; 187 return ret;
188} 188}
189EXPORT_SYMBOL_GPL(rds_page_remainder_alloc);
189 190
190static int rds_page_remainder_cpu_notify(struct notifier_block *self, 191static int rds_page_remainder_cpu_notify(struct notifier_block *self,
191 unsigned long action, void *hcpu) 192 unsigned long action, void *hcpu)