aboutsummaryrefslogtreecommitdiffstats
path: root/crypto
diff options
context:
space:
mode:
authorDan Williams <dan.j.williams@intel.com>2009-09-08 20:42:50 -0400
committerDan Williams <dan.j.williams@intel.com>2009-09-08 20:42:50 -0400
commit0403e3827788d878163f9ef0541b748b0f88ca5d (patch)
tree2dc73744bd92c268a1310f24668167f130877278 /crypto
parentf9dd2134374c8de6b911e2b8652c6c9622eaa658 (diff)
dmaengine: add fence support
Some engines optimize operation by reading ahead in the descriptor chain such that descriptor2 may start execution before descriptor1 completes. If descriptor2 depends on the result from descriptor1 then a fence is required (on descriptor2) to disable this optimization. The async_tx api could implicitly identify dependencies via the 'depend_tx' parameter, but that would constrain cases where the dependency chain only specifies a completion order rather than a data dependency. So, provide an ASYNC_TX_FENCE to explicitly identify data dependencies. Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Diffstat (limited to 'crypto')
-rw-r--r--crypto/async_tx/async_memcpy.c7
-rw-r--r--crypto/async_tx/async_memset.c7
-rw-r--r--crypto/async_tx/async_pq.c5
-rw-r--r--crypto/async_tx/async_raid6_recov.c47
-rw-r--r--crypto/async_tx/async_xor.c11
5 files changed, 50 insertions, 27 deletions
diff --git a/crypto/async_tx/async_memcpy.c b/crypto/async_tx/async_memcpy.c
index 98e15bd0dcb5..b38cbb3fd527 100644
--- a/crypto/async_tx/async_memcpy.c
+++ b/crypto/async_tx/async_memcpy.c
@@ -52,9 +52,12 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
52 52
53 if (device) { 53 if (device) {
54 dma_addr_t dma_dest, dma_src; 54 dma_addr_t dma_dest, dma_src;
55 unsigned long dma_prep_flags; 55 unsigned long dma_prep_flags = 0;
56 56
57 dma_prep_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0; 57 if (submit->cb_fn)
58 dma_prep_flags |= DMA_PREP_INTERRUPT;
59 if (submit->flags & ASYNC_TX_FENCE)
60 dma_prep_flags |= DMA_PREP_FENCE;
58 dma_dest = dma_map_page(device->dev, dest, dest_offset, len, 61 dma_dest = dma_map_page(device->dev, dest, dest_offset, len,
59 DMA_FROM_DEVICE); 62 DMA_FROM_DEVICE);
60 63
diff --git a/crypto/async_tx/async_memset.c b/crypto/async_tx/async_memset.c
index b896a6e5f673..a374784e3329 100644
--- a/crypto/async_tx/async_memset.c
+++ b/crypto/async_tx/async_memset.c
@@ -49,9 +49,12 @@ async_memset(struct page *dest, int val, unsigned int offset, size_t len,
49 49
50 if (device) { 50 if (device) {
51 dma_addr_t dma_dest; 51 dma_addr_t dma_dest;
52 unsigned long dma_prep_flags; 52 unsigned long dma_prep_flags = 0;
53 53
54 dma_prep_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0; 54 if (submit->cb_fn)
55 dma_prep_flags |= DMA_PREP_INTERRUPT;
56 if (submit->flags & ASYNC_TX_FENCE)
57 dma_prep_flags |= DMA_PREP_FENCE;
55 dma_dest = dma_map_page(device->dev, dest, offset, len, 58 dma_dest = dma_map_page(device->dev, dest, offset, len,
56 DMA_FROM_DEVICE); 59 DMA_FROM_DEVICE);
57 60
diff --git a/crypto/async_tx/async_pq.c b/crypto/async_tx/async_pq.c
index 108b21efb499..a25e290c39fb 100644
--- a/crypto/async_tx/async_pq.c
+++ b/crypto/async_tx/async_pq.c
@@ -101,6 +101,7 @@ do_async_gen_syndrome(struct dma_chan *chan, struct page **blocks,
101 */ 101 */
102 if (src_cnt > pq_src_cnt) { 102 if (src_cnt > pq_src_cnt) {
103 submit->flags &= ~ASYNC_TX_ACK; 103 submit->flags &= ~ASYNC_TX_ACK;
104 submit->flags |= ASYNC_TX_FENCE;
104 dma_flags |= DMA_COMPL_SKIP_DEST_UNMAP; 105 dma_flags |= DMA_COMPL_SKIP_DEST_UNMAP;
105 submit->cb_fn = NULL; 106 submit->cb_fn = NULL;
106 submit->cb_param = NULL; 107 submit->cb_param = NULL;
@@ -111,6 +112,8 @@ do_async_gen_syndrome(struct dma_chan *chan, struct page **blocks,
111 if (cb_fn_orig) 112 if (cb_fn_orig)
112 dma_flags |= DMA_PREP_INTERRUPT; 113 dma_flags |= DMA_PREP_INTERRUPT;
113 } 114 }
115 if (submit->flags & ASYNC_TX_FENCE)
116 dma_flags |= DMA_PREP_FENCE;
114 117
115 /* Since we have clobbered the src_list we are committed 118 /* Since we have clobbered the src_list we are committed
116 * to doing this asynchronously. Drivers force forward 119 * to doing this asynchronously. Drivers force forward
@@ -282,6 +285,8 @@ async_syndrome_val(struct page **blocks, unsigned int offset, int disks,
282 dma_flags |= DMA_PREP_PQ_DISABLE_P; 285 dma_flags |= DMA_PREP_PQ_DISABLE_P;
283 if (!Q(blocks, disks)) 286 if (!Q(blocks, disks))
284 dma_flags |= DMA_PREP_PQ_DISABLE_Q; 287 dma_flags |= DMA_PREP_PQ_DISABLE_Q;
288 if (submit->flags & ASYNC_TX_FENCE)
289 dma_flags |= DMA_PREP_FENCE;
285 for (i = 0; i < disks; i++) 290 for (i = 0; i < disks; i++)
286 if (likely(blocks[i])) { 291 if (likely(blocks[i])) {
287 BUG_ON(is_raid6_zero_block(blocks[i])); 292 BUG_ON(is_raid6_zero_block(blocks[i]));
diff --git a/crypto/async_tx/async_raid6_recov.c b/crypto/async_tx/async_raid6_recov.c
index 0c14d48c9896..822a42d10061 100644
--- a/crypto/async_tx/async_raid6_recov.c
+++ b/crypto/async_tx/async_raid6_recov.c
@@ -44,6 +44,8 @@ async_sum_product(struct page *dest, struct page **srcs, unsigned char *coef,
44 struct dma_async_tx_descriptor *tx; 44 struct dma_async_tx_descriptor *tx;
45 enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P; 45 enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P;
46 46
47 if (submit->flags & ASYNC_TX_FENCE)
48 dma_flags |= DMA_PREP_FENCE;
47 dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL); 49 dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL);
48 dma_src[0] = dma_map_page(dev, srcs[0], 0, len, DMA_TO_DEVICE); 50 dma_src[0] = dma_map_page(dev, srcs[0], 0, len, DMA_TO_DEVICE);
49 dma_src[1] = dma_map_page(dev, srcs[1], 0, len, DMA_TO_DEVICE); 51 dma_src[1] = dma_map_page(dev, srcs[1], 0, len, DMA_TO_DEVICE);
@@ -89,6 +91,8 @@ async_mult(struct page *dest, struct page *src, u8 coef, size_t len,
89 struct dma_async_tx_descriptor *tx; 91 struct dma_async_tx_descriptor *tx;
90 enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P; 92 enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P;
91 93
94 if (submit->flags & ASYNC_TX_FENCE)
95 dma_flags |= DMA_PREP_FENCE;
92 dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL); 96 dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL);
93 dma_src[0] = dma_map_page(dev, src, 0, len, DMA_TO_DEVICE); 97 dma_src[0] = dma_map_page(dev, src, 0, len, DMA_TO_DEVICE);
94 tx = dma->device_prep_dma_pq(chan, dma_dest, dma_src, 1, &coef, 98 tx = dma->device_prep_dma_pq(chan, dma_dest, dma_src, 1, &coef,
@@ -138,7 +142,7 @@ __2data_recov_4(size_t bytes, int faila, int failb, struct page **blocks,
138 srcs[1] = q; 142 srcs[1] = q;
139 coef[0] = raid6_gfexi[failb-faila]; 143 coef[0] = raid6_gfexi[failb-faila];
140 coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]; 144 coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]];
141 init_async_submit(submit, 0, tx, NULL, NULL, scribble); 145 init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
142 tx = async_sum_product(b, srcs, coef, bytes, submit); 146 tx = async_sum_product(b, srcs, coef, bytes, submit);
143 147
144 /* Dy = P+Pxy+Dx */ 148 /* Dy = P+Pxy+Dx */
@@ -188,23 +192,23 @@ __2data_recov_5(size_t bytes, int faila, int failb, struct page **blocks,
188 dp = blocks[faila]; 192 dp = blocks[faila];
189 dq = blocks[failb]; 193 dq = blocks[failb];
190 194
191 init_async_submit(submit, 0, tx, NULL, NULL, scribble); 195 init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
192 tx = async_memcpy(dp, g, 0, 0, bytes, submit); 196 tx = async_memcpy(dp, g, 0, 0, bytes, submit);
193 init_async_submit(submit, 0, tx, NULL, NULL, scribble); 197 init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
194 tx = async_mult(dq, g, raid6_gfexp[good], bytes, submit); 198 tx = async_mult(dq, g, raid6_gfexp[good], bytes, submit);
195 199
196 /* compute P + Pxy */ 200 /* compute P + Pxy */
197 srcs[0] = dp; 201 srcs[0] = dp;
198 srcs[1] = p; 202 srcs[1] = p;
199 init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL, 203 init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
200 scribble); 204 NULL, NULL, scribble);
201 tx = async_xor(dp, srcs, 0, 2, bytes, submit); 205 tx = async_xor(dp, srcs, 0, 2, bytes, submit);
202 206
203 /* compute Q + Qxy */ 207 /* compute Q + Qxy */
204 srcs[0] = dq; 208 srcs[0] = dq;
205 srcs[1] = q; 209 srcs[1] = q;
206 init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL, 210 init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
207 scribble); 211 NULL, NULL, scribble);
208 tx = async_xor(dq, srcs, 0, 2, bytes, submit); 212 tx = async_xor(dq, srcs, 0, 2, bytes, submit);
209 213
210 /* Dx = A*(P+Pxy) + B*(Q+Qxy) */ 214 /* Dx = A*(P+Pxy) + B*(Q+Qxy) */
@@ -212,7 +216,7 @@ __2data_recov_5(size_t bytes, int faila, int failb, struct page **blocks,
212 srcs[1] = dq; 216 srcs[1] = dq;
213 coef[0] = raid6_gfexi[failb-faila]; 217 coef[0] = raid6_gfexi[failb-faila];
214 coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]; 218 coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]];
215 init_async_submit(submit, 0, tx, NULL, NULL, scribble); 219 init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
216 tx = async_sum_product(dq, srcs, coef, bytes, submit); 220 tx = async_sum_product(dq, srcs, coef, bytes, submit);
217 221
218 /* Dy = P+Pxy+Dx */ 222 /* Dy = P+Pxy+Dx */
@@ -252,7 +256,7 @@ __2data_recov_n(int disks, size_t bytes, int faila, int failb,
252 blocks[failb] = (void *)raid6_empty_zero_page; 256 blocks[failb] = (void *)raid6_empty_zero_page;
253 blocks[disks-1] = dq; 257 blocks[disks-1] = dq;
254 258
255 init_async_submit(submit, 0, tx, NULL, NULL, scribble); 259 init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
256 tx = async_gen_syndrome(blocks, 0, disks, bytes, submit); 260 tx = async_gen_syndrome(blocks, 0, disks, bytes, submit);
257 261
258 /* Restore pointer table */ 262 /* Restore pointer table */
@@ -264,15 +268,15 @@ __2data_recov_n(int disks, size_t bytes, int faila, int failb,
264 /* compute P + Pxy */ 268 /* compute P + Pxy */
265 srcs[0] = dp; 269 srcs[0] = dp;
266 srcs[1] = p; 270 srcs[1] = p;
267 init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL, 271 init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
268 scribble); 272 NULL, NULL, scribble);
269 tx = async_xor(dp, srcs, 0, 2, bytes, submit); 273 tx = async_xor(dp, srcs, 0, 2, bytes, submit);
270 274
271 /* compute Q + Qxy */ 275 /* compute Q + Qxy */
272 srcs[0] = dq; 276 srcs[0] = dq;
273 srcs[1] = q; 277 srcs[1] = q;
274 init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL, 278 init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
275 scribble); 279 NULL, NULL, scribble);
276 tx = async_xor(dq, srcs, 0, 2, bytes, submit); 280 tx = async_xor(dq, srcs, 0, 2, bytes, submit);
277 281
278 /* Dx = A*(P+Pxy) + B*(Q+Qxy) */ 282 /* Dx = A*(P+Pxy) + B*(Q+Qxy) */
@@ -280,7 +284,7 @@ __2data_recov_n(int disks, size_t bytes, int faila, int failb,
280 srcs[1] = dq; 284 srcs[1] = dq;
281 coef[0] = raid6_gfexi[failb-faila]; 285 coef[0] = raid6_gfexi[failb-faila];
282 coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]; 286 coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]];
283 init_async_submit(submit, 0, tx, NULL, NULL, scribble); 287 init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
284 tx = async_sum_product(dq, srcs, coef, bytes, submit); 288 tx = async_sum_product(dq, srcs, coef, bytes, submit);
285 289
286 /* Dy = P+Pxy+Dx */ 290 /* Dy = P+Pxy+Dx */
@@ -407,13 +411,16 @@ async_raid6_datap_recov(int disks, size_t bytes, int faila,
407 int good = faila == 0 ? 1 : 0; 411 int good = faila == 0 ? 1 : 0;
408 struct page *g = blocks[good]; 412 struct page *g = blocks[good];
409 413
410 init_async_submit(submit, 0, tx, NULL, NULL, scribble); 414 init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL,
415 scribble);
411 tx = async_memcpy(p, g, 0, 0, bytes, submit); 416 tx = async_memcpy(p, g, 0, 0, bytes, submit);
412 417
413 init_async_submit(submit, 0, tx, NULL, NULL, scribble); 418 init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL,
419 scribble);
414 tx = async_mult(dq, g, raid6_gfexp[good], bytes, submit); 420 tx = async_mult(dq, g, raid6_gfexp[good], bytes, submit);
415 } else { 421 } else {
416 init_async_submit(submit, 0, tx, NULL, NULL, scribble); 422 init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL,
423 scribble);
417 tx = async_gen_syndrome(blocks, 0, disks, bytes, submit); 424 tx = async_gen_syndrome(blocks, 0, disks, bytes, submit);
418 } 425 }
419 426
@@ -426,11 +433,11 @@ async_raid6_datap_recov(int disks, size_t bytes, int faila,
426 433
427 srcs[0] = dq; 434 srcs[0] = dq;
428 srcs[1] = q; 435 srcs[1] = q;
429 init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL, 436 init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
430 scribble); 437 NULL, NULL, scribble);
431 tx = async_xor(dq, srcs, 0, 2, bytes, submit); 438 tx = async_xor(dq, srcs, 0, 2, bytes, submit);
432 439
433 init_async_submit(submit, 0, tx, NULL, NULL, scribble); 440 init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
434 tx = async_mult(dq, dq, coef, bytes, submit); 441 tx = async_mult(dq, dq, coef, bytes, submit);
435 442
436 srcs[0] = p; 443 srcs[0] = p;
diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c
index 56b5f98da463..db279872ef3d 100644
--- a/crypto/async_tx/async_xor.c
+++ b/crypto/async_tx/async_xor.c
@@ -69,6 +69,7 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
69 */ 69 */
70 if (src_cnt > xor_src_cnt) { 70 if (src_cnt > xor_src_cnt) {
71 submit->flags &= ~ASYNC_TX_ACK; 71 submit->flags &= ~ASYNC_TX_ACK;
72 submit->flags |= ASYNC_TX_FENCE;
72 dma_flags = DMA_COMPL_SKIP_DEST_UNMAP; 73 dma_flags = DMA_COMPL_SKIP_DEST_UNMAP;
73 submit->cb_fn = NULL; 74 submit->cb_fn = NULL;
74 submit->cb_param = NULL; 75 submit->cb_param = NULL;
@@ -78,7 +79,8 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
78 } 79 }
79 if (submit->cb_fn) 80 if (submit->cb_fn)
80 dma_flags |= DMA_PREP_INTERRUPT; 81 dma_flags |= DMA_PREP_INTERRUPT;
81 82 if (submit->flags & ASYNC_TX_FENCE)
83 dma_flags |= DMA_PREP_FENCE;
82 /* Since we have clobbered the src_list we are committed 84 /* Since we have clobbered the src_list we are committed
83 * to doing this asynchronously. Drivers force forward progress 85 * to doing this asynchronously. Drivers force forward progress
84 * in case they can not provide a descriptor 86 * in case they can not provide a descriptor
@@ -264,12 +266,15 @@ async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,
264 dma_src = (dma_addr_t *) src_list; 266 dma_src = (dma_addr_t *) src_list;
265 267
266 if (dma_src && device && src_cnt <= device->max_xor) { 268 if (dma_src && device && src_cnt <= device->max_xor) {
267 unsigned long dma_prep_flags; 269 unsigned long dma_prep_flags = 0;
268 int i; 270 int i;
269 271
270 pr_debug("%s: (async) len: %zu\n", __func__, len); 272 pr_debug("%s: (async) len: %zu\n", __func__, len);
271 273
272 dma_prep_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0; 274 if (submit->cb_fn)
275 dma_prep_flags |= DMA_PREP_INTERRUPT;
276 if (submit->flags & ASYNC_TX_FENCE)
277 dma_prep_flags |= DMA_PREP_FENCE;
273 for (i = 0; i < src_cnt; i++) 278 for (i = 0; i < src_cnt; i++)
274 dma_src[i] = dma_map_page(device->dev, src_list[i], 279 dma_src[i] = dma_map_page(device->dev, src_list[i],
275 offset, len, DMA_TO_DEVICE); 280 offset, len, DMA_TO_DEVICE);