diff options
author | Andy Grover <andy.grover@oracle.com> | 2010-05-24 23:12:41 -0400 |
---|---|---|
committer | Andy Grover <andy.grover@oracle.com> | 2010-09-08 21:15:20 -0400 |
commit | 0b088e003ccf316a76c51be5dec2d70b93be3be8 (patch) | |
tree | 76fb8b9ab49729a97e137b73bbf4e12b65cde89d /net | |
parent | fc19de38be924728fea76026c0d1a6c4b6156084 (diff) |
RDS: Use page_remainder_alloc() for recv bufs
Instead of splitting up a page into RDS_FRAG_SIZE chunks
ourselves, ask rds_page_remainder_alloc() to do it. While it
is possible PAGE_SIZE > FRAG_SIZE, on x86en it isn't, so having
duplicate "carve up a page into buffers" code seems excessive.
The other modification this spawns is the use of a single
struct scatterlist in rds_page_frag instead of a bare page ptr.
This causes verbosity to increase in some places, and decrease
in others.
Finally, I decided to unify the lifetimes and alloc/free of
rds_page_frag and its page. This is a nice simplification in itself,
but will be extra-nice once we come to adding cmason's recycling
patch.
Signed-off-by: Andy Grover <andy.grover@oracle.com>
Diffstat (limited to 'net')
-rw-r--r-- | net/rds/ib.h | 7 | ||||
-rw-r--r-- | net/rds/ib_recv.c | 94 | ||||
-rw-r--r-- | net/rds/page.c | 1 |
3 files changed, 29 insertions, 73 deletions
diff --git a/net/rds/ib.h b/net/rds/ib.h index 282ec69fe282..9bb7a7412a44 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h | |||
@@ -28,13 +28,9 @@ extern struct list_head rds_ib_devices; | |||
28 | * try and minimize the amount of memory tied up both the device and | 28 | * try and minimize the amount of memory tied up both the device and |
29 | * socket receive queues. | 29 | * socket receive queues. |
30 | */ | 30 | */ |
31 | /* page offset of the final full frag that fits in the page */ | ||
32 | #define RDS_PAGE_LAST_OFF (((PAGE_SIZE / RDS_FRAG_SIZE) - 1) * RDS_FRAG_SIZE) | ||
33 | struct rds_page_frag { | 31 | struct rds_page_frag { |
34 | struct list_head f_item; | 32 | struct list_head f_item; |
35 | struct page *f_page; | 33 | struct scatterlist f_sg; |
36 | unsigned long f_offset; | ||
37 | dma_addr_t f_mapped; | ||
38 | }; | 34 | }; |
39 | 35 | ||
40 | struct rds_ib_incoming { | 36 | struct rds_ib_incoming { |
@@ -107,7 +103,6 @@ struct rds_ib_connection { | |||
107 | struct rds_header *i_recv_hdrs; | 103 | struct rds_header *i_recv_hdrs; |
108 | u64 i_recv_hdrs_dma; | 104 | u64 i_recv_hdrs_dma; |
109 | struct rds_ib_recv_work *i_recvs; | 105 | struct rds_ib_recv_work *i_recvs; |
110 | struct rds_page_frag i_frag; | ||
111 | u64 i_ack_recv; /* last ACK received */ | 106 | u64 i_ack_recv; /* last ACK received */ |
112 | 107 | ||
113 | /* sending acks */ | 108 | /* sending acks */ |
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c index 24d14615f41a..f6dbf16e0741 100644 --- a/net/rds/ib_recv.c +++ b/net/rds/ib_recv.c | |||
@@ -43,17 +43,11 @@ static struct kmem_cache *rds_ib_incoming_slab; | |||
43 | static struct kmem_cache *rds_ib_frag_slab; | 43 | static struct kmem_cache *rds_ib_frag_slab; |
44 | static atomic_t rds_ib_allocation = ATOMIC_INIT(0); | 44 | static atomic_t rds_ib_allocation = ATOMIC_INIT(0); |
45 | 45 | ||
46 | static void rds_ib_frag_drop_page(struct rds_page_frag *frag) | 46 | /* Free frag and attached recv buffer f_sg */ |
47 | { | ||
48 | rdsdebug("frag %p page %p\n", frag, frag->f_page); | ||
49 | __free_page(frag->f_page); | ||
50 | frag->f_page = NULL; | ||
51 | } | ||
52 | |||
53 | static void rds_ib_frag_free(struct rds_page_frag *frag) | 47 | static void rds_ib_frag_free(struct rds_page_frag *frag) |
54 | { | 48 | { |
55 | rdsdebug("frag %p page %p\n", frag, frag->f_page); | 49 | rdsdebug("frag %p page %p\n", frag, sg_page(&frag->f_sg)); |
56 | BUG_ON(frag->f_page); | 50 | __free_page(sg_page(&frag->f_sg)); |
57 | kmem_cache_free(rds_ib_frag_slab, frag); | 51 | kmem_cache_free(rds_ib_frag_slab, frag); |
58 | } | 52 | } |
59 | 53 | ||
@@ -71,12 +65,8 @@ static void rds_ib_recv_unmap_page(struct rds_ib_connection *ic, | |||
71 | { | 65 | { |
72 | struct rds_page_frag *frag = recv->r_frag; | 66 | struct rds_page_frag *frag = recv->r_frag; |
73 | 67 | ||
74 | rdsdebug("recv %p frag %p page %p\n", recv, frag, frag->f_page); | 68 | rdsdebug("recv %p frag %p page %p\n", recv, frag, sg_page(&frag->f_sg)); |
75 | if (frag->f_mapped) | 69 | ib_dma_unmap_sg(ic->i_cm_id->device, &frag->f_sg, 1, DMA_FROM_DEVICE); |
76 | ib_dma_unmap_page(ic->i_cm_id->device, | ||
77 | frag->f_mapped, | ||
78 | RDS_FRAG_SIZE, DMA_FROM_DEVICE); | ||
79 | frag->f_mapped = 0; | ||
80 | } | 70 | } |
81 | 71 | ||
82 | void rds_ib_recv_init_ring(struct rds_ib_connection *ic) | 72 | void rds_ib_recv_init_ring(struct rds_ib_connection *ic) |
@@ -116,8 +106,6 @@ static void rds_ib_recv_clear_one(struct rds_ib_connection *ic, | |||
116 | } | 106 | } |
117 | if (recv->r_frag) { | 107 | if (recv->r_frag) { |
118 | rds_ib_recv_unmap_page(ic, recv); | 108 | rds_ib_recv_unmap_page(ic, recv); |
119 | if (recv->r_frag->f_page) | ||
120 | rds_ib_frag_drop_page(recv->r_frag); | ||
121 | rds_ib_frag_free(recv->r_frag); | 109 | rds_ib_frag_free(recv->r_frag); |
122 | recv->r_frag = NULL; | 110 | recv->r_frag = NULL; |
123 | } | 111 | } |
@@ -129,16 +117,12 @@ void rds_ib_recv_clear_ring(struct rds_ib_connection *ic) | |||
129 | 117 | ||
130 | for (i = 0; i < ic->i_recv_ring.w_nr; i++) | 118 | for (i = 0; i < ic->i_recv_ring.w_nr; i++) |
131 | rds_ib_recv_clear_one(ic, &ic->i_recvs[i]); | 119 | rds_ib_recv_clear_one(ic, &ic->i_recvs[i]); |
132 | |||
133 | if (ic->i_frag.f_page) | ||
134 | rds_ib_frag_drop_page(&ic->i_frag); | ||
135 | } | 120 | } |
136 | 121 | ||
137 | static int rds_ib_recv_refill_one(struct rds_connection *conn, | 122 | static int rds_ib_recv_refill_one(struct rds_connection *conn, |
138 | struct rds_ib_recv_work *recv) | 123 | struct rds_ib_recv_work *recv) |
139 | { | 124 | { |
140 | struct rds_ib_connection *ic = conn->c_transport_data; | 125 | struct rds_ib_connection *ic = conn->c_transport_data; |
141 | dma_addr_t dma_addr; | ||
142 | struct ib_sge *sge; | 126 | struct ib_sge *sge; |
143 | int ret = -ENOMEM; | 127 | int ret = -ENOMEM; |
144 | 128 | ||
@@ -161,50 +145,27 @@ static int rds_ib_recv_refill_one(struct rds_connection *conn, | |||
161 | if (!recv->r_frag) | 145 | if (!recv->r_frag) |
162 | goto out; | 146 | goto out; |
163 | INIT_LIST_HEAD(&recv->r_frag->f_item); | 147 | INIT_LIST_HEAD(&recv->r_frag->f_item); |
164 | recv->r_frag->f_page = NULL; | 148 | sg_init_table(&recv->r_frag->f_sg, 1); |
165 | } | 149 | ret = rds_page_remainder_alloc(&recv->r_frag->f_sg, |
166 | 150 | RDS_FRAG_SIZE, GFP_NOWAIT); | |
167 | if (!ic->i_frag.f_page) { | 151 | if (ret) { |
168 | ic->i_frag.f_page = alloc_page(GFP_NOWAIT); | 152 | kmem_cache_free(rds_ib_frag_slab, recv->r_frag); |
169 | if (!ic->i_frag.f_page) | 153 | recv->r_frag = NULL; |
170 | goto out; | 154 | goto out; |
171 | ic->i_frag.f_offset = 0; | 155 | } |
172 | } | 156 | } |
173 | 157 | ||
174 | dma_addr = ib_dma_map_page(ic->i_cm_id->device, | 158 | ret = ib_dma_map_sg(ic->i_cm_id->device, &recv->r_frag->f_sg, |
175 | ic->i_frag.f_page, | 159 | 1, DMA_FROM_DEVICE); |
176 | ic->i_frag.f_offset, | 160 | WARN_ON(ret != 1); |
177 | RDS_FRAG_SIZE, | ||
178 | DMA_FROM_DEVICE); | ||
179 | if (ib_dma_mapping_error(ic->i_cm_id->device, dma_addr)) | ||
180 | goto out; | ||
181 | |||
182 | /* | ||
183 | * Once we get the RDS_PAGE_LAST_OFF frag then rds_ib_frag_unmap() | ||
184 | * must be called on this recv. This happens as completions hit | ||
185 | * in order or on connection shutdown. | ||
186 | */ | ||
187 | recv->r_frag->f_page = ic->i_frag.f_page; | ||
188 | recv->r_frag->f_offset = ic->i_frag.f_offset; | ||
189 | recv->r_frag->f_mapped = dma_addr; | ||
190 | 161 | ||
191 | sge = &recv->r_sge[0]; | 162 | sge = &recv->r_sge[0]; |
192 | sge->addr = ic->i_recv_hdrs_dma + (recv - ic->i_recvs) * sizeof(struct rds_header); | 163 | sge->addr = ic->i_recv_hdrs_dma + (recv - ic->i_recvs) * sizeof(struct rds_header); |
193 | sge->length = sizeof(struct rds_header); | 164 | sge->length = sizeof(struct rds_header); |
194 | 165 | ||
195 | sge = &recv->r_sge[1]; | 166 | sge = &recv->r_sge[1]; |
196 | sge->addr = dma_addr; | 167 | sge->addr = sg_dma_address(&recv->r_frag->f_sg); |
197 | sge->length = RDS_FRAG_SIZE; | 168 | sge->length = sg_dma_len(&recv->r_frag->f_sg); |
198 | |||
199 | get_page(recv->r_frag->f_page); | ||
200 | |||
201 | if (ic->i_frag.f_offset < RDS_PAGE_LAST_OFF) { | ||
202 | ic->i_frag.f_offset += RDS_FRAG_SIZE; | ||
203 | } else { | ||
204 | put_page(ic->i_frag.f_page); | ||
205 | ic->i_frag.f_page = NULL; | ||
206 | ic->i_frag.f_offset = 0; | ||
207 | } | ||
208 | 169 | ||
209 | ret = 0; | 170 | ret = 0; |
210 | out: | 171 | out: |
@@ -247,8 +208,8 @@ int rds_ib_recv_refill(struct rds_connection *conn, int prefill) | |||
247 | /* XXX when can this fail? */ | 208 | /* XXX when can this fail? */ |
248 | ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, &failed_wr); | 209 | ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, &failed_wr); |
249 | rdsdebug("recv %p ibinc %p page %p addr %lu ret %d\n", recv, | 210 | rdsdebug("recv %p ibinc %p page %p addr %lu ret %d\n", recv, |
250 | recv->r_ibinc, recv->r_frag->f_page, | 211 | recv->r_ibinc, sg_page(&recv->r_frag->f_sg), |
251 | (long) recv->r_frag->f_mapped, ret); | 212 | (long) sg_dma_address(&recv->r_frag->f_sg), ret); |
252 | if (ret) { | 213 | if (ret) { |
253 | rds_ib_conn_error(conn, "recv post on " | 214 | rds_ib_conn_error(conn, "recv post on " |
254 | "%pI4 returned %d, disconnecting and " | 215 | "%pI4 returned %d, disconnecting and " |
@@ -281,7 +242,6 @@ static void rds_ib_inc_purge(struct rds_incoming *inc) | |||
281 | 242 | ||
282 | list_for_each_entry_safe(frag, pos, &ibinc->ii_frags, f_item) { | 243 | list_for_each_entry_safe(frag, pos, &ibinc->ii_frags, f_item) { |
283 | list_del_init(&frag->f_item); | 244 | list_del_init(&frag->f_item); |
284 | rds_ib_frag_drop_page(frag); | ||
285 | rds_ib_frag_free(frag); | 245 | rds_ib_frag_free(frag); |
286 | } | 246 | } |
287 | } | 247 | } |
@@ -333,13 +293,13 @@ int rds_ib_inc_copy_to_user(struct rds_incoming *inc, struct iovec *first_iov, | |||
333 | to_copy = min_t(unsigned long, to_copy, len - copied); | 293 | to_copy = min_t(unsigned long, to_copy, len - copied); |
334 | 294 | ||
335 | rdsdebug("%lu bytes to user [%p, %zu] + %lu from frag " | 295 | rdsdebug("%lu bytes to user [%p, %zu] + %lu from frag " |
336 | "[%p, %lu] + %lu\n", | 296 | "[%p, %u] + %lu\n", |
337 | to_copy, iov->iov_base, iov->iov_len, iov_off, | 297 | to_copy, iov->iov_base, iov->iov_len, iov_off, |
338 | frag->f_page, frag->f_offset, frag_off); | 298 | sg_page(&frag->f_sg), frag->f_sg.offset, frag_off); |
339 | 299 | ||
340 | /* XXX needs + offset for multiple recvs per page */ | 300 | /* XXX needs + offset for multiple recvs per page */ |
341 | ret = rds_page_copy_to_user(frag->f_page, | 301 | ret = rds_page_copy_to_user(sg_page(&frag->f_sg), |
342 | frag->f_offset + frag_off, | 302 | frag->f_sg.offset + frag_off, |
343 | iov->iov_base + iov_off, | 303 | iov->iov_base + iov_off, |
344 | to_copy); | 304 | to_copy); |
345 | if (ret) { | 305 | if (ret) { |
@@ -595,7 +555,7 @@ static void rds_ib_cong_recv(struct rds_connection *conn, | |||
595 | to_copy = min(RDS_FRAG_SIZE - frag_off, PAGE_SIZE - map_off); | 555 | to_copy = min(RDS_FRAG_SIZE - frag_off, PAGE_SIZE - map_off); |
596 | BUG_ON(to_copy & 7); /* Must be 64bit aligned. */ | 556 | BUG_ON(to_copy & 7); /* Must be 64bit aligned. */ |
597 | 557 | ||
598 | addr = kmap_atomic(frag->f_page, KM_SOFTIRQ0); | 558 | addr = kmap_atomic(sg_page(&frag->f_sg), KM_SOFTIRQ0); |
599 | 559 | ||
600 | src = addr + frag_off; | 560 | src = addr + frag_off; |
601 | dst = (void *)map->m_page_addrs[map_page] + map_off; | 561 | dst = (void *)map->m_page_addrs[map_page] + map_off; |
@@ -698,12 +658,12 @@ static void rds_ib_process_recv(struct rds_connection *conn, | |||
698 | * the inc is freed. We don't go that route, so we have to drop the | 658 | * the inc is freed. We don't go that route, so we have to drop the |
699 | * page ref ourselves. We can't just leave the page on the recv | 659 | * page ref ourselves. We can't just leave the page on the recv |
700 | * because that confuses the dma mapping of pages and each recv's use | 660 | * because that confuses the dma mapping of pages and each recv's use |
701 | * of a partial page. We can leave the frag, though, it will be | 661 | * of a partial page. |
702 | * reused. | ||
703 | * | 662 | * |
704 | * FIXME: Fold this into the code path below. | 663 | * FIXME: Fold this into the code path below. |
705 | */ | 664 | */ |
706 | rds_ib_frag_drop_page(recv->r_frag); | 665 | rds_ib_frag_free(recv->r_frag); |
666 | recv->r_frag = NULL; | ||
707 | return; | 667 | return; |
708 | } | 668 | } |
709 | 669 | ||
diff --git a/net/rds/page.c b/net/rds/page.c index e5b2527ae257..5e44f5ae7898 100644 --- a/net/rds/page.c +++ b/net/rds/page.c | |||
@@ -186,6 +186,7 @@ out: | |||
186 | ret ? 0 : scat->length); | 186 | ret ? 0 : scat->length); |
187 | return ret; | 187 | return ret; |
188 | } | 188 | } |
189 | EXPORT_SYMBOL_GPL(rds_page_remainder_alloc); | ||
189 | 190 | ||
190 | static int rds_page_remainder_cpu_notify(struct notifier_block *self, | 191 | static int rds_page_remainder_cpu_notify(struct notifier_block *self, |
191 | unsigned long action, void *hcpu) | 192 | unsigned long action, void *hcpu) |