aboutsummaryrefslogtreecommitdiffstats
path: root/crypto
diff options
context:
space:
mode:
Diffstat (limited to 'crypto')
-rw-r--r--crypto/async_tx/async_memcpy.c37
-rw-r--r--crypto/async_tx/async_pq.c174
-rw-r--r--crypto/async_tx/async_raid6_recov.c61
-rw-r--r--crypto/async_tx/async_xor.c123
-rw-r--r--crypto/async_tx/raid6test.c10
5 files changed, 238 insertions, 167 deletions
diff --git a/crypto/async_tx/async_memcpy.c b/crypto/async_tx/async_memcpy.c
index 9e62feffb374..f8c0b8dbeb75 100644
--- a/crypto/async_tx/async_memcpy.c
+++ b/crypto/async_tx/async_memcpy.c
@@ -50,33 +50,36 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
50 &dest, 1, &src, 1, len); 50 &dest, 1, &src, 1, len);
51 struct dma_device *device = chan ? chan->device : NULL; 51 struct dma_device *device = chan ? chan->device : NULL;
52 struct dma_async_tx_descriptor *tx = NULL; 52 struct dma_async_tx_descriptor *tx = NULL;
53 struct dmaengine_unmap_data *unmap = NULL;
53 54
54 if (device && is_dma_copy_aligned(device, src_offset, dest_offset, len)) { 55 if (device)
55 dma_addr_t dma_dest, dma_src; 56 unmap = dmaengine_get_unmap_data(device->dev, 2, GFP_NOIO);
57
58 if (unmap && is_dma_copy_aligned(device, src_offset, dest_offset, len)) {
56 unsigned long dma_prep_flags = 0; 59 unsigned long dma_prep_flags = 0;
57 60
58 if (submit->cb_fn) 61 if (submit->cb_fn)
59 dma_prep_flags |= DMA_PREP_INTERRUPT; 62 dma_prep_flags |= DMA_PREP_INTERRUPT;
60 if (submit->flags & ASYNC_TX_FENCE) 63 if (submit->flags & ASYNC_TX_FENCE)
61 dma_prep_flags |= DMA_PREP_FENCE; 64 dma_prep_flags |= DMA_PREP_FENCE;
62 dma_dest = dma_map_page(device->dev, dest, dest_offset, len, 65
63 DMA_FROM_DEVICE); 66 unmap->to_cnt = 1;
64 67 unmap->addr[0] = dma_map_page(device->dev, src, src_offset, len,
65 dma_src = dma_map_page(device->dev, src, src_offset, len, 68 DMA_TO_DEVICE);
66 DMA_TO_DEVICE); 69 unmap->from_cnt = 1;
67 70 unmap->addr[1] = dma_map_page(device->dev, dest, dest_offset, len,
68 tx = device->device_prep_dma_memcpy(chan, dma_dest, dma_src, 71 DMA_FROM_DEVICE);
69 len, dma_prep_flags); 72 unmap->len = len;
70 if (!tx) { 73
71 dma_unmap_page(device->dev, dma_dest, len, 74 tx = device->device_prep_dma_memcpy(chan, unmap->addr[1],
72 DMA_FROM_DEVICE); 75 unmap->addr[0], len,
73 dma_unmap_page(device->dev, dma_src, len, 76 dma_prep_flags);
74 DMA_TO_DEVICE);
75 }
76 } 77 }
77 78
78 if (tx) { 79 if (tx) {
79 pr_debug("%s: (async) len: %zu\n", __func__, len); 80 pr_debug("%s: (async) len: %zu\n", __func__, len);
81
82 dma_set_unmap(tx, unmap);
80 async_tx_submit(chan, tx, submit); 83 async_tx_submit(chan, tx, submit);
81 } else { 84 } else {
82 void *dest_buf, *src_buf; 85 void *dest_buf, *src_buf;
@@ -96,6 +99,8 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
96 async_tx_sync_epilog(submit); 99 async_tx_sync_epilog(submit);
97 } 100 }
98 101
102 dmaengine_unmap_put(unmap);
103
99 return tx; 104 return tx;
100} 105}
101EXPORT_SYMBOL_GPL(async_memcpy); 106EXPORT_SYMBOL_GPL(async_memcpy);
diff --git a/crypto/async_tx/async_pq.c b/crypto/async_tx/async_pq.c
index 91d5d385899e..d05327caf69d 100644
--- a/crypto/async_tx/async_pq.c
+++ b/crypto/async_tx/async_pq.c
@@ -46,49 +46,24 @@ static struct page *pq_scribble_page;
46 * do_async_gen_syndrome - asynchronously calculate P and/or Q 46 * do_async_gen_syndrome - asynchronously calculate P and/or Q
47 */ 47 */
48static __async_inline struct dma_async_tx_descriptor * 48static __async_inline struct dma_async_tx_descriptor *
49do_async_gen_syndrome(struct dma_chan *chan, struct page **blocks, 49do_async_gen_syndrome(struct dma_chan *chan,
50 const unsigned char *scfs, unsigned int offset, int disks, 50 const unsigned char *scfs, int disks,
51 size_t len, dma_addr_t *dma_src, 51 struct dmaengine_unmap_data *unmap,
52 enum dma_ctrl_flags dma_flags,
52 struct async_submit_ctl *submit) 53 struct async_submit_ctl *submit)
53{ 54{
54 struct dma_async_tx_descriptor *tx = NULL; 55 struct dma_async_tx_descriptor *tx = NULL;
55 struct dma_device *dma = chan->device; 56 struct dma_device *dma = chan->device;
56 enum dma_ctrl_flags dma_flags = 0;
57 enum async_tx_flags flags_orig = submit->flags; 57 enum async_tx_flags flags_orig = submit->flags;
58 dma_async_tx_callback cb_fn_orig = submit->cb_fn; 58 dma_async_tx_callback cb_fn_orig = submit->cb_fn;
59 dma_async_tx_callback cb_param_orig = submit->cb_param; 59 dma_async_tx_callback cb_param_orig = submit->cb_param;
60 int src_cnt = disks - 2; 60 int src_cnt = disks - 2;
61 unsigned char coefs[src_cnt];
62 unsigned short pq_src_cnt; 61 unsigned short pq_src_cnt;
63 dma_addr_t dma_dest[2]; 62 dma_addr_t dma_dest[2];
64 int src_off = 0; 63 int src_off = 0;
65 int idx;
66 int i;
67 64
68 /* DMAs use destinations as sources, so use BIDIRECTIONAL mapping */ 65 if (submit->flags & ASYNC_TX_FENCE)
69 if (P(blocks, disks)) 66 dma_flags |= DMA_PREP_FENCE;
70 dma_dest[0] = dma_map_page(dma->dev, P(blocks, disks), offset,
71 len, DMA_BIDIRECTIONAL);
72 else
73 dma_flags |= DMA_PREP_PQ_DISABLE_P;
74 if (Q(blocks, disks))
75 dma_dest[1] = dma_map_page(dma->dev, Q(blocks, disks), offset,
76 len, DMA_BIDIRECTIONAL);
77 else
78 dma_flags |= DMA_PREP_PQ_DISABLE_Q;
79
80 /* convert source addresses being careful to collapse 'empty'
81 * sources and update the coefficients accordingly
82 */
83 for (i = 0, idx = 0; i < src_cnt; i++) {
84 if (blocks[i] == NULL)
85 continue;
86 dma_src[idx] = dma_map_page(dma->dev, blocks[i], offset, len,
87 DMA_TO_DEVICE);
88 coefs[idx] = scfs[i];
89 idx++;
90 }
91 src_cnt = idx;
92 67
93 while (src_cnt > 0) { 68 while (src_cnt > 0) {
94 submit->flags = flags_orig; 69 submit->flags = flags_orig;
@@ -100,28 +75,25 @@ do_async_gen_syndrome(struct dma_chan *chan, struct page **blocks,
100 if (src_cnt > pq_src_cnt) { 75 if (src_cnt > pq_src_cnt) {
101 submit->flags &= ~ASYNC_TX_ACK; 76 submit->flags &= ~ASYNC_TX_ACK;
102 submit->flags |= ASYNC_TX_FENCE; 77 submit->flags |= ASYNC_TX_FENCE;
103 dma_flags |= DMA_COMPL_SKIP_DEST_UNMAP;
104 submit->cb_fn = NULL; 78 submit->cb_fn = NULL;
105 submit->cb_param = NULL; 79 submit->cb_param = NULL;
106 } else { 80 } else {
107 dma_flags &= ~DMA_COMPL_SKIP_DEST_UNMAP;
108 submit->cb_fn = cb_fn_orig; 81 submit->cb_fn = cb_fn_orig;
109 submit->cb_param = cb_param_orig; 82 submit->cb_param = cb_param_orig;
110 if (cb_fn_orig) 83 if (cb_fn_orig)
111 dma_flags |= DMA_PREP_INTERRUPT; 84 dma_flags |= DMA_PREP_INTERRUPT;
112 } 85 }
113 if (submit->flags & ASYNC_TX_FENCE)
114 dma_flags |= DMA_PREP_FENCE;
115 86
116 /* Since we have clobbered the src_list we are committed 87 /* Drivers force forward progress in case they can not provide
117 * to doing this asynchronously. Drivers force forward 88 * a descriptor
118 * progress in case they can not provide a descriptor
119 */ 89 */
120 for (;;) { 90 for (;;) {
91 dma_dest[0] = unmap->addr[disks - 2];
92 dma_dest[1] = unmap->addr[disks - 1];
121 tx = dma->device_prep_dma_pq(chan, dma_dest, 93 tx = dma->device_prep_dma_pq(chan, dma_dest,
122 &dma_src[src_off], 94 &unmap->addr[src_off],
123 pq_src_cnt, 95 pq_src_cnt,
124 &coefs[src_off], len, 96 &scfs[src_off], unmap->len,
125 dma_flags); 97 dma_flags);
126 if (likely(tx)) 98 if (likely(tx))
127 break; 99 break;
@@ -129,6 +101,7 @@ do_async_gen_syndrome(struct dma_chan *chan, struct page **blocks,
129 dma_async_issue_pending(chan); 101 dma_async_issue_pending(chan);
130 } 102 }
131 103
104 dma_set_unmap(tx, unmap);
132 async_tx_submit(chan, tx, submit); 105 async_tx_submit(chan, tx, submit);
133 submit->depend_tx = tx; 106 submit->depend_tx = tx;
134 107
@@ -188,10 +161,6 @@ do_sync_gen_syndrome(struct page **blocks, unsigned int offset, int disks,
188 * set to NULL those buffers will be replaced with the raid6_zero_page 161 * set to NULL those buffers will be replaced with the raid6_zero_page
189 * in the synchronous path and omitted in the hardware-asynchronous 162 * in the synchronous path and omitted in the hardware-asynchronous
190 * path. 163 * path.
191 *
192 * 'blocks' note: if submit->scribble is NULL then the contents of
193 * 'blocks' may be overwritten to perform address conversions
194 * (dma_map_page() or page_address()).
195 */ 164 */
196struct dma_async_tx_descriptor * 165struct dma_async_tx_descriptor *
197async_gen_syndrome(struct page **blocks, unsigned int offset, int disks, 166async_gen_syndrome(struct page **blocks, unsigned int offset, int disks,
@@ -202,26 +171,69 @@ async_gen_syndrome(struct page **blocks, unsigned int offset, int disks,
202 &P(blocks, disks), 2, 171 &P(blocks, disks), 2,
203 blocks, src_cnt, len); 172 blocks, src_cnt, len);
204 struct dma_device *device = chan ? chan->device : NULL; 173 struct dma_device *device = chan ? chan->device : NULL;
205 dma_addr_t *dma_src = NULL; 174 struct dmaengine_unmap_data *unmap = NULL;
206 175
207 BUG_ON(disks > 255 || !(P(blocks, disks) || Q(blocks, disks))); 176 BUG_ON(disks > 255 || !(P(blocks, disks) || Q(blocks, disks)));
208 177
209 if (submit->scribble) 178 if (device)
210 dma_src = submit->scribble; 179 unmap = dmaengine_get_unmap_data(device->dev, disks, GFP_NOIO);
211 else if (sizeof(dma_addr_t) <= sizeof(struct page *))
212 dma_src = (dma_addr_t *) blocks;
213 180
214 if (dma_src && device && 181 if (unmap &&
215 (src_cnt <= dma_maxpq(device, 0) || 182 (src_cnt <= dma_maxpq(device, 0) ||
216 dma_maxpq(device, DMA_PREP_CONTINUE) > 0) && 183 dma_maxpq(device, DMA_PREP_CONTINUE) > 0) &&
217 is_dma_pq_aligned(device, offset, 0, len)) { 184 is_dma_pq_aligned(device, offset, 0, len)) {
185 struct dma_async_tx_descriptor *tx;
186 enum dma_ctrl_flags dma_flags = 0;
187 unsigned char coefs[src_cnt];
188 int i, j;
189
218 /* run the p+q asynchronously */ 190 /* run the p+q asynchronously */
219 pr_debug("%s: (async) disks: %d len: %zu\n", 191 pr_debug("%s: (async) disks: %d len: %zu\n",
220 __func__, disks, len); 192 __func__, disks, len);
221 return do_async_gen_syndrome(chan, blocks, raid6_gfexp, offset, 193
222 disks, len, dma_src, submit); 194 /* convert source addresses being careful to collapse 'empty'
195 * sources and update the coefficients accordingly
196 */
197 unmap->len = len;
198 for (i = 0, j = 0; i < src_cnt; i++) {
199 if (blocks[i] == NULL)
200 continue;
201 unmap->addr[j] = dma_map_page(device->dev, blocks[i], offset,
202 len, DMA_TO_DEVICE);
203 coefs[j] = raid6_gfexp[i];
204 unmap->to_cnt++;
205 j++;
206 }
207
208 /*
209 * DMAs use destinations as sources,
210 * so use BIDIRECTIONAL mapping
211 */
212 unmap->bidi_cnt++;
213 if (P(blocks, disks))
214 unmap->addr[j++] = dma_map_page(device->dev, P(blocks, disks),
215 offset, len, DMA_BIDIRECTIONAL);
216 else {
217 unmap->addr[j++] = 0;
218 dma_flags |= DMA_PREP_PQ_DISABLE_P;
219 }
220
221 unmap->bidi_cnt++;
222 if (Q(blocks, disks))
223 unmap->addr[j++] = dma_map_page(device->dev, Q(blocks, disks),
224 offset, len, DMA_BIDIRECTIONAL);
225 else {
226 unmap->addr[j++] = 0;
227 dma_flags |= DMA_PREP_PQ_DISABLE_Q;
228 }
229
230 tx = do_async_gen_syndrome(chan, coefs, j, unmap, dma_flags, submit);
231 dmaengine_unmap_put(unmap);
232 return tx;
223 } 233 }
224 234
235 dmaengine_unmap_put(unmap);
236
225 /* run the pq synchronously */ 237 /* run the pq synchronously */
226 pr_debug("%s: (sync) disks: %d len: %zu\n", __func__, disks, len); 238 pr_debug("%s: (sync) disks: %d len: %zu\n", __func__, disks, len);
227 239
@@ -277,50 +289,60 @@ async_syndrome_val(struct page **blocks, unsigned int offset, int disks,
277 struct dma_async_tx_descriptor *tx; 289 struct dma_async_tx_descriptor *tx;
278 unsigned char coefs[disks-2]; 290 unsigned char coefs[disks-2];
279 enum dma_ctrl_flags dma_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0; 291 enum dma_ctrl_flags dma_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0;
280 dma_addr_t *dma_src = NULL; 292 struct dmaengine_unmap_data *unmap = NULL;
281 int src_cnt = 0;
282 293
283 BUG_ON(disks < 4); 294 BUG_ON(disks < 4);
284 295
285 if (submit->scribble) 296 if (device)
286 dma_src = submit->scribble; 297 unmap = dmaengine_get_unmap_data(device->dev, disks, GFP_NOIO);
287 else if (sizeof(dma_addr_t) <= sizeof(struct page *))
288 dma_src = (dma_addr_t *) blocks;
289 298
290 if (dma_src && device && disks <= dma_maxpq(device, 0) && 299 if (unmap && disks <= dma_maxpq(device, 0) &&
291 is_dma_pq_aligned(device, offset, 0, len)) { 300 is_dma_pq_aligned(device, offset, 0, len)) {
292 struct device *dev = device->dev; 301 struct device *dev = device->dev;
293 dma_addr_t *pq = &dma_src[disks-2]; 302 dma_addr_t pq[2];
294 int i; 303 int i, j = 0, src_cnt = 0;
295 304
296 pr_debug("%s: (async) disks: %d len: %zu\n", 305 pr_debug("%s: (async) disks: %d len: %zu\n",
297 __func__, disks, len); 306 __func__, disks, len);
298 if (!P(blocks, disks)) 307
308 unmap->len = len;
309 for (i = 0; i < disks-2; i++)
310 if (likely(blocks[i])) {
311 unmap->addr[j] = dma_map_page(dev, blocks[i],
312 offset, len,
313 DMA_TO_DEVICE);
314 coefs[j] = raid6_gfexp[i];
315 unmap->to_cnt++;
316 src_cnt++;
317 j++;
318 }
319
320 if (!P(blocks, disks)) {
321 pq[0] = 0;
299 dma_flags |= DMA_PREP_PQ_DISABLE_P; 322 dma_flags |= DMA_PREP_PQ_DISABLE_P;
300 else 323 } else {
301 pq[0] = dma_map_page(dev, P(blocks, disks), 324 pq[0] = dma_map_page(dev, P(blocks, disks),
302 offset, len, 325 offset, len,
303 DMA_TO_DEVICE); 326 DMA_TO_DEVICE);
304 if (!Q(blocks, disks)) 327 unmap->addr[j++] = pq[0];
328 unmap->to_cnt++;
329 }
330 if (!Q(blocks, disks)) {
331 pq[1] = 0;
305 dma_flags |= DMA_PREP_PQ_DISABLE_Q; 332 dma_flags |= DMA_PREP_PQ_DISABLE_Q;
306 else 333 } else {
307 pq[1] = dma_map_page(dev, Q(blocks, disks), 334 pq[1] = dma_map_page(dev, Q(blocks, disks),
308 offset, len, 335 offset, len,
309 DMA_TO_DEVICE); 336 DMA_TO_DEVICE);
337 unmap->addr[j++] = pq[1];
338 unmap->to_cnt++;
339 }
310 340
311 if (submit->flags & ASYNC_TX_FENCE) 341 if (submit->flags & ASYNC_TX_FENCE)
312 dma_flags |= DMA_PREP_FENCE; 342 dma_flags |= DMA_PREP_FENCE;
313 for (i = 0; i < disks-2; i++)
314 if (likely(blocks[i])) {
315 dma_src[src_cnt] = dma_map_page(dev, blocks[i],
316 offset, len,
317 DMA_TO_DEVICE);
318 coefs[src_cnt] = raid6_gfexp[i];
319 src_cnt++;
320 }
321
322 for (;;) { 343 for (;;) {
323 tx = device->device_prep_dma_pq_val(chan, pq, dma_src, 344 tx = device->device_prep_dma_pq_val(chan, pq,
345 unmap->addr,
324 src_cnt, 346 src_cnt,
325 coefs, 347 coefs,
326 len, pqres, 348 len, pqres,
@@ -330,6 +352,8 @@ async_syndrome_val(struct page **blocks, unsigned int offset, int disks,
330 async_tx_quiesce(&submit->depend_tx); 352 async_tx_quiesce(&submit->depend_tx);
331 dma_async_issue_pending(chan); 353 dma_async_issue_pending(chan);
332 } 354 }
355
356 dma_set_unmap(tx, unmap);
333 async_tx_submit(chan, tx, submit); 357 async_tx_submit(chan, tx, submit);
334 358
335 return tx; 359 return tx;
diff --git a/crypto/async_tx/async_raid6_recov.c b/crypto/async_tx/async_raid6_recov.c
index a9f08a6a582e..934a84981495 100644
--- a/crypto/async_tx/async_raid6_recov.c
+++ b/crypto/async_tx/async_raid6_recov.c
@@ -26,6 +26,7 @@
26#include <linux/dma-mapping.h> 26#include <linux/dma-mapping.h>
27#include <linux/raid/pq.h> 27#include <linux/raid/pq.h>
28#include <linux/async_tx.h> 28#include <linux/async_tx.h>
29#include <linux/dmaengine.h>
29 30
30static struct dma_async_tx_descriptor * 31static struct dma_async_tx_descriptor *
31async_sum_product(struct page *dest, struct page **srcs, unsigned char *coef, 32async_sum_product(struct page *dest, struct page **srcs, unsigned char *coef,
@@ -34,35 +35,45 @@ async_sum_product(struct page *dest, struct page **srcs, unsigned char *coef,
34 struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ, 35 struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ,
35 &dest, 1, srcs, 2, len); 36 &dest, 1, srcs, 2, len);
36 struct dma_device *dma = chan ? chan->device : NULL; 37 struct dma_device *dma = chan ? chan->device : NULL;
38 struct dmaengine_unmap_data *unmap = NULL;
37 const u8 *amul, *bmul; 39 const u8 *amul, *bmul;
38 u8 ax, bx; 40 u8 ax, bx;
39 u8 *a, *b, *c; 41 u8 *a, *b, *c;
40 42
41 if (dma) { 43 if (dma)
42 dma_addr_t dma_dest[2]; 44 unmap = dmaengine_get_unmap_data(dma->dev, 3, GFP_NOIO);
43 dma_addr_t dma_src[2]; 45
46 if (unmap) {
44 struct device *dev = dma->dev; 47 struct device *dev = dma->dev;
48 dma_addr_t pq[2];
45 struct dma_async_tx_descriptor *tx; 49 struct dma_async_tx_descriptor *tx;
46 enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P; 50 enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P;
47 51
48 if (submit->flags & ASYNC_TX_FENCE) 52 if (submit->flags & ASYNC_TX_FENCE)
49 dma_flags |= DMA_PREP_FENCE; 53 dma_flags |= DMA_PREP_FENCE;
50 dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL); 54 unmap->addr[0] = dma_map_page(dev, srcs[0], 0, len, DMA_TO_DEVICE);
51 dma_src[0] = dma_map_page(dev, srcs[0], 0, len, DMA_TO_DEVICE); 55 unmap->addr[1] = dma_map_page(dev, srcs[1], 0, len, DMA_TO_DEVICE);
52 dma_src[1] = dma_map_page(dev, srcs[1], 0, len, DMA_TO_DEVICE); 56 unmap->to_cnt = 2;
53 tx = dma->device_prep_dma_pq(chan, dma_dest, dma_src, 2, coef, 57
58 unmap->addr[2] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL);
59 unmap->bidi_cnt = 1;
60 /* engine only looks at Q, but expects it to follow P */
61 pq[1] = unmap->addr[2];
62
63 unmap->len = len;
64 tx = dma->device_prep_dma_pq(chan, pq, unmap->addr, 2, coef,
54 len, dma_flags); 65 len, dma_flags);
55 if (tx) { 66 if (tx) {
67 dma_set_unmap(tx, unmap);
56 async_tx_submit(chan, tx, submit); 68 async_tx_submit(chan, tx, submit);
69 dmaengine_unmap_put(unmap);
57 return tx; 70 return tx;
58 } 71 }
59 72
60 /* could not get a descriptor, unmap and fall through to 73 /* could not get a descriptor, unmap and fall through to
61 * the synchronous path 74 * the synchronous path
62 */ 75 */
63 dma_unmap_page(dev, dma_dest[1], len, DMA_BIDIRECTIONAL); 76 dmaengine_unmap_put(unmap);
64 dma_unmap_page(dev, dma_src[0], len, DMA_TO_DEVICE);
65 dma_unmap_page(dev, dma_src[1], len, DMA_TO_DEVICE);
66 } 77 }
67 78
68 /* run the operation synchronously */ 79 /* run the operation synchronously */
@@ -89,23 +100,38 @@ async_mult(struct page *dest, struct page *src, u8 coef, size_t len,
89 struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ, 100 struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ,
90 &dest, 1, &src, 1, len); 101 &dest, 1, &src, 1, len);
91 struct dma_device *dma = chan ? chan->device : NULL; 102 struct dma_device *dma = chan ? chan->device : NULL;
103 struct dmaengine_unmap_data *unmap = NULL;
92 const u8 *qmul; /* Q multiplier table */ 104 const u8 *qmul; /* Q multiplier table */
93 u8 *d, *s; 105 u8 *d, *s;
94 106
95 if (dma) { 107 if (dma)
108 unmap = dmaengine_get_unmap_data(dma->dev, 3, GFP_NOIO);
109
110 if (unmap) {
96 dma_addr_t dma_dest[2]; 111 dma_addr_t dma_dest[2];
97 dma_addr_t dma_src[1];
98 struct device *dev = dma->dev; 112 struct device *dev = dma->dev;
99 struct dma_async_tx_descriptor *tx; 113 struct dma_async_tx_descriptor *tx;
100 enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P; 114 enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P;
101 115
102 if (submit->flags & ASYNC_TX_FENCE) 116 if (submit->flags & ASYNC_TX_FENCE)
103 dma_flags |= DMA_PREP_FENCE; 117 dma_flags |= DMA_PREP_FENCE;
104 dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL); 118 unmap->addr[0] = dma_map_page(dev, src, 0, len, DMA_TO_DEVICE);
105 dma_src[0] = dma_map_page(dev, src, 0, len, DMA_TO_DEVICE); 119 unmap->to_cnt++;
106 tx = dma->device_prep_dma_pq(chan, dma_dest, dma_src, 1, &coef, 120 unmap->addr[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL);
107 len, dma_flags); 121 dma_dest[1] = unmap->addr[1];
122 unmap->bidi_cnt++;
123 unmap->len = len;
124
125 /* this looks funny, but the engine looks for Q at
126 * dma_dest[1] and ignores dma_dest[0] as a dest
127 * due to DMA_PREP_PQ_DISABLE_P
128 */
129 tx = dma->device_prep_dma_pq(chan, dma_dest, unmap->addr,
130 1, &coef, len, dma_flags);
131
108 if (tx) { 132 if (tx) {
133 dma_set_unmap(tx, unmap);
134 dmaengine_unmap_put(unmap);
109 async_tx_submit(chan, tx, submit); 135 async_tx_submit(chan, tx, submit);
110 return tx; 136 return tx;
111 } 137 }
@@ -113,8 +139,7 @@ async_mult(struct page *dest, struct page *src, u8 coef, size_t len,
113 /* could not get a descriptor, unmap and fall through to 139 /* could not get a descriptor, unmap and fall through to
114 * the synchronous path 140 * the synchronous path
115 */ 141 */
116 dma_unmap_page(dev, dma_dest[1], len, DMA_BIDIRECTIONAL); 142 dmaengine_unmap_put(unmap);
117 dma_unmap_page(dev, dma_src[0], len, DMA_TO_DEVICE);
118 } 143 }
119 144
120 /* no channel available, or failed to allocate a descriptor, so 145 /* no channel available, or failed to allocate a descriptor, so
diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c
index 8ade0a0481c6..3c562f5a60bb 100644
--- a/crypto/async_tx/async_xor.c
+++ b/crypto/async_tx/async_xor.c
@@ -33,48 +33,31 @@
33 33
34/* do_async_xor - dma map the pages and perform the xor with an engine */ 34/* do_async_xor - dma map the pages and perform the xor with an engine */
35static __async_inline struct dma_async_tx_descriptor * 35static __async_inline struct dma_async_tx_descriptor *
36do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list, 36do_async_xor(struct dma_chan *chan, struct dmaengine_unmap_data *unmap,
37 unsigned int offset, int src_cnt, size_t len, dma_addr_t *dma_src,
38 struct async_submit_ctl *submit) 37 struct async_submit_ctl *submit)
39{ 38{
40 struct dma_device *dma = chan->device; 39 struct dma_device *dma = chan->device;
41 struct dma_async_tx_descriptor *tx = NULL; 40 struct dma_async_tx_descriptor *tx = NULL;
42 int src_off = 0;
43 int i;
44 dma_async_tx_callback cb_fn_orig = submit->cb_fn; 41 dma_async_tx_callback cb_fn_orig = submit->cb_fn;
45 void *cb_param_orig = submit->cb_param; 42 void *cb_param_orig = submit->cb_param;
46 enum async_tx_flags flags_orig = submit->flags; 43 enum async_tx_flags flags_orig = submit->flags;
47 enum dma_ctrl_flags dma_flags; 44 enum dma_ctrl_flags dma_flags = 0;
48 int xor_src_cnt = 0; 45 int src_cnt = unmap->to_cnt;
49 dma_addr_t dma_dest; 46 int xor_src_cnt;
50 47 dma_addr_t dma_dest = unmap->addr[unmap->to_cnt];
51 /* map the dest bidrectional in case it is re-used as a source */ 48 dma_addr_t *src_list = unmap->addr;
52 dma_dest = dma_map_page(dma->dev, dest, offset, len, DMA_BIDIRECTIONAL);
53 for (i = 0; i < src_cnt; i++) {
54 /* only map the dest once */
55 if (!src_list[i])
56 continue;
57 if (unlikely(src_list[i] == dest)) {
58 dma_src[xor_src_cnt++] = dma_dest;
59 continue;
60 }
61 dma_src[xor_src_cnt++] = dma_map_page(dma->dev, src_list[i], offset,
62 len, DMA_TO_DEVICE);
63 }
64 src_cnt = xor_src_cnt;
65 49
66 while (src_cnt) { 50 while (src_cnt) {
51 dma_addr_t tmp;
52
67 submit->flags = flags_orig; 53 submit->flags = flags_orig;
68 dma_flags = 0;
69 xor_src_cnt = min(src_cnt, (int)dma->max_xor); 54 xor_src_cnt = min(src_cnt, (int)dma->max_xor);
70 /* if we are submitting additional xors, leave the chain open, 55 /* if we are submitting additional xors, leave the chain open
71 * clear the callback parameters, and leave the destination 56 * and clear the callback parameters
72 * buffer mapped
73 */ 57 */
74 if (src_cnt > xor_src_cnt) { 58 if (src_cnt > xor_src_cnt) {
75 submit->flags &= ~ASYNC_TX_ACK; 59 submit->flags &= ~ASYNC_TX_ACK;
76 submit->flags |= ASYNC_TX_FENCE; 60 submit->flags |= ASYNC_TX_FENCE;
77 dma_flags = DMA_COMPL_SKIP_DEST_UNMAP;
78 submit->cb_fn = NULL; 61 submit->cb_fn = NULL;
79 submit->cb_param = NULL; 62 submit->cb_param = NULL;
80 } else { 63 } else {
@@ -85,12 +68,18 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
85 dma_flags |= DMA_PREP_INTERRUPT; 68 dma_flags |= DMA_PREP_INTERRUPT;
86 if (submit->flags & ASYNC_TX_FENCE) 69 if (submit->flags & ASYNC_TX_FENCE)
87 dma_flags |= DMA_PREP_FENCE; 70 dma_flags |= DMA_PREP_FENCE;
88 /* Since we have clobbered the src_list we are committed 71
89 * to doing this asynchronously. Drivers force forward progress 72 /* Drivers force forward progress in case they can not provide a
90 * in case they can not provide a descriptor 73 * descriptor
91 */ 74 */
92 tx = dma->device_prep_dma_xor(chan, dma_dest, &dma_src[src_off], 75 tmp = src_list[0];
93 xor_src_cnt, len, dma_flags); 76 if (src_list > unmap->addr)
77 src_list[0] = dma_dest;
78 tx = dma->device_prep_dma_xor(chan, dma_dest, src_list,
79 xor_src_cnt, unmap->len,
80 dma_flags);
81 src_list[0] = tmp;
82
94 83
95 if (unlikely(!tx)) 84 if (unlikely(!tx))
96 async_tx_quiesce(&submit->depend_tx); 85 async_tx_quiesce(&submit->depend_tx);
@@ -99,22 +88,21 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
99 while (unlikely(!tx)) { 88 while (unlikely(!tx)) {
100 dma_async_issue_pending(chan); 89 dma_async_issue_pending(chan);
101 tx = dma->device_prep_dma_xor(chan, dma_dest, 90 tx = dma->device_prep_dma_xor(chan, dma_dest,
102 &dma_src[src_off], 91 src_list,
103 xor_src_cnt, len, 92 xor_src_cnt, unmap->len,
104 dma_flags); 93 dma_flags);
105 } 94 }
106 95
96 dma_set_unmap(tx, unmap);
107 async_tx_submit(chan, tx, submit); 97 async_tx_submit(chan, tx, submit);
108 submit->depend_tx = tx; 98 submit->depend_tx = tx;
109 99
110 if (src_cnt > xor_src_cnt) { 100 if (src_cnt > xor_src_cnt) {
111 /* drop completed sources */ 101 /* drop completed sources */
112 src_cnt -= xor_src_cnt; 102 src_cnt -= xor_src_cnt;
113 src_off += xor_src_cnt;
114
115 /* use the intermediate result a source */ 103 /* use the intermediate result a source */
116 dma_src[--src_off] = dma_dest;
117 src_cnt++; 104 src_cnt++;
105 src_list += xor_src_cnt - 1;
118 } else 106 } else
119 break; 107 break;
120 } 108 }
@@ -189,22 +177,40 @@ async_xor(struct page *dest, struct page **src_list, unsigned int offset,
189 struct dma_chan *chan = async_tx_find_channel(submit, DMA_XOR, 177 struct dma_chan *chan = async_tx_find_channel(submit, DMA_XOR,
190 &dest, 1, src_list, 178 &dest, 1, src_list,
191 src_cnt, len); 179 src_cnt, len);
192 dma_addr_t *dma_src = NULL; 180 struct dma_device *device = chan ? chan->device : NULL;
181 struct dmaengine_unmap_data *unmap = NULL;
193 182
194 BUG_ON(src_cnt <= 1); 183 BUG_ON(src_cnt <= 1);
195 184
196 if (submit->scribble) 185 if (device)
197 dma_src = submit->scribble; 186 unmap = dmaengine_get_unmap_data(device->dev, src_cnt+1, GFP_NOIO);
198 else if (sizeof(dma_addr_t) <= sizeof(struct page *)) 187
199 dma_src = (dma_addr_t *) src_list; 188 if (unmap && is_dma_xor_aligned(device, offset, 0, len)) {
189 struct dma_async_tx_descriptor *tx;
190 int i, j;
200 191
201 if (dma_src && chan && is_dma_xor_aligned(chan->device, offset, 0, len)) {
202 /* run the xor asynchronously */ 192 /* run the xor asynchronously */
203 pr_debug("%s (async): len: %zu\n", __func__, len); 193 pr_debug("%s (async): len: %zu\n", __func__, len);
204 194
205 return do_async_xor(chan, dest, src_list, offset, src_cnt, len, 195 unmap->len = len;
206 dma_src, submit); 196 for (i = 0, j = 0; i < src_cnt; i++) {
197 if (!src_list[i])
198 continue;
199 unmap->to_cnt++;
200 unmap->addr[j++] = dma_map_page(device->dev, src_list[i],
201 offset, len, DMA_TO_DEVICE);
202 }
203
204 /* map it bidirectional as it may be re-used as a source */
205 unmap->addr[j] = dma_map_page(device->dev, dest, offset, len,
206 DMA_BIDIRECTIONAL);
207 unmap->bidi_cnt = 1;
208
209 tx = do_async_xor(chan, unmap, submit);
210 dmaengine_unmap_put(unmap);
211 return tx;
207 } else { 212 } else {
213 dmaengine_unmap_put(unmap);
208 /* run the xor synchronously */ 214 /* run the xor synchronously */
209 pr_debug("%s (sync): len: %zu\n", __func__, len); 215 pr_debug("%s (sync): len: %zu\n", __func__, len);
210 WARN_ONCE(chan, "%s: no space for dma address conversion\n", 216 WARN_ONCE(chan, "%s: no space for dma address conversion\n",
@@ -268,16 +274,14 @@ async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,
268 struct dma_chan *chan = xor_val_chan(submit, dest, src_list, src_cnt, len); 274 struct dma_chan *chan = xor_val_chan(submit, dest, src_list, src_cnt, len);
269 struct dma_device *device = chan ? chan->device : NULL; 275 struct dma_device *device = chan ? chan->device : NULL;
270 struct dma_async_tx_descriptor *tx = NULL; 276 struct dma_async_tx_descriptor *tx = NULL;
271 dma_addr_t *dma_src = NULL; 277 struct dmaengine_unmap_data *unmap = NULL;
272 278
273 BUG_ON(src_cnt <= 1); 279 BUG_ON(src_cnt <= 1);
274 280
275 if (submit->scribble) 281 if (device)
276 dma_src = submit->scribble; 282 unmap = dmaengine_get_unmap_data(device->dev, src_cnt, GFP_NOIO);
277 else if (sizeof(dma_addr_t) <= sizeof(struct page *))
278 dma_src = (dma_addr_t *) src_list;
279 283
280 if (dma_src && device && src_cnt <= device->max_xor && 284 if (unmap && src_cnt <= device->max_xor &&
281 is_dma_xor_aligned(device, offset, 0, len)) { 285 is_dma_xor_aligned(device, offset, 0, len)) {
282 unsigned long dma_prep_flags = 0; 286 unsigned long dma_prep_flags = 0;
283 int i; 287 int i;
@@ -288,11 +292,15 @@ async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,
288 dma_prep_flags |= DMA_PREP_INTERRUPT; 292 dma_prep_flags |= DMA_PREP_INTERRUPT;
289 if (submit->flags & ASYNC_TX_FENCE) 293 if (submit->flags & ASYNC_TX_FENCE)
290 dma_prep_flags |= DMA_PREP_FENCE; 294 dma_prep_flags |= DMA_PREP_FENCE;
291 for (i = 0; i < src_cnt; i++)
292 dma_src[i] = dma_map_page(device->dev, src_list[i],
293 offset, len, DMA_TO_DEVICE);
294 295
295 tx = device->device_prep_dma_xor_val(chan, dma_src, src_cnt, 296 for (i = 0; i < src_cnt; i++) {
297 unmap->addr[i] = dma_map_page(device->dev, src_list[i],
298 offset, len, DMA_TO_DEVICE);
299 unmap->to_cnt++;
300 }
301 unmap->len = len;
302
303 tx = device->device_prep_dma_xor_val(chan, unmap->addr, src_cnt,
296 len, result, 304 len, result,
297 dma_prep_flags); 305 dma_prep_flags);
298 if (unlikely(!tx)) { 306 if (unlikely(!tx)) {
@@ -301,11 +309,11 @@ async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,
301 while (!tx) { 309 while (!tx) {
302 dma_async_issue_pending(chan); 310 dma_async_issue_pending(chan);
303 tx = device->device_prep_dma_xor_val(chan, 311 tx = device->device_prep_dma_xor_val(chan,
304 dma_src, src_cnt, len, result, 312 unmap->addr, src_cnt, len, result,
305 dma_prep_flags); 313 dma_prep_flags);
306 } 314 }
307 } 315 }
308 316 dma_set_unmap(tx, unmap);
309 async_tx_submit(chan, tx, submit); 317 async_tx_submit(chan, tx, submit);
310 } else { 318 } else {
311 enum async_tx_flags flags_orig = submit->flags; 319 enum async_tx_flags flags_orig = submit->flags;
@@ -327,6 +335,7 @@ async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,
327 async_tx_sync_epilog(submit); 335 async_tx_sync_epilog(submit);
328 submit->flags = flags_orig; 336 submit->flags = flags_orig;
329 } 337 }
338 dmaengine_unmap_put(unmap);
330 339
331 return tx; 340 return tx;
332} 341}
diff --git a/crypto/async_tx/raid6test.c b/crypto/async_tx/raid6test.c
index 4a92bac744dc..dad95f45b88f 100644
--- a/crypto/async_tx/raid6test.c
+++ b/crypto/async_tx/raid6test.c
@@ -28,7 +28,7 @@
28#undef pr 28#undef pr
29#define pr(fmt, args...) pr_info("raid6test: " fmt, ##args) 29#define pr(fmt, args...) pr_info("raid6test: " fmt, ##args)
30 30
31#define NDISKS 16 /* Including P and Q */ 31#define NDISKS 64 /* Including P and Q */
32 32
33static struct page *dataptrs[NDISKS]; 33static struct page *dataptrs[NDISKS];
34static addr_conv_t addr_conv[NDISKS]; 34static addr_conv_t addr_conv[NDISKS];
@@ -219,6 +219,14 @@ static int raid6_test(void)
219 err += test(11, &tests); 219 err += test(11, &tests);
220 err += test(12, &tests); 220 err += test(12, &tests);
221 } 221 }
222
223 /* the 24 disk case is special for ioatdma as it is the boudary point
224 * at which it needs to switch from 8-source ops to 16-source
225 * ops for continuation (assumes DMA_HAS_PQ_CONTINUE is not set)
226 */
227 if (NDISKS > 24)
228 err += test(24, &tests);
229
222 err += test(NDISKS, &tests); 230 err += test(NDISKS, &tests);
223 231
224 pr("\n"); 232 pr("\n");