aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDan Williams <dan.j.williams@intel.com>2009-09-08 20:42:50 -0400
committerDan Williams <dan.j.williams@intel.com>2009-09-08 20:42:50 -0400
commit0403e3827788d878163f9ef0541b748b0f88ca5d (patch)
tree2dc73744bd92c268a1310f24668167f130877278
parentf9dd2134374c8de6b911e2b8652c6c9622eaa658 (diff)
dmaengine: add fence support
Some engines optimize operation by reading ahead in the descriptor chain such that descriptor2 may start execution before descriptor1 completes. If descriptor2 depends on the result from descriptor1 then a fence is required (on descriptor2) to disable this optimization. The async_tx api could implicitly identify dependencies via the 'depend_tx' parameter, but that would constrain cases where the dependency chain only specifies a completion order rather than a data dependency. So, provide an ASYNC_TX_FENCE to explicitly identify data dependencies. Signed-off-by: Dan Williams <dan.j.williams@intel.com>
-rw-r--r--crypto/async_tx/async_memcpy.c7
-rw-r--r--crypto/async_tx/async_memset.c7
-rw-r--r--crypto/async_tx/async_pq.c5
-rw-r--r--crypto/async_tx/async_raid6_recov.c47
-rw-r--r--crypto/async_tx/async_xor.c11
-rw-r--r--drivers/md/raid5.c37
-rw-r--r--include/linux/async_tx.h3
-rw-r--r--include/linux/dmaengine.h3
8 files changed, 79 insertions, 41 deletions
diff --git a/crypto/async_tx/async_memcpy.c b/crypto/async_tx/async_memcpy.c
index 98e15bd0dcb5..b38cbb3fd527 100644
--- a/crypto/async_tx/async_memcpy.c
+++ b/crypto/async_tx/async_memcpy.c
@@ -52,9 +52,12 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
52 52
53 if (device) { 53 if (device) {
54 dma_addr_t dma_dest, dma_src; 54 dma_addr_t dma_dest, dma_src;
55 unsigned long dma_prep_flags; 55 unsigned long dma_prep_flags = 0;
56 56
57 dma_prep_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0; 57 if (submit->cb_fn)
58 dma_prep_flags |= DMA_PREP_INTERRUPT;
59 if (submit->flags & ASYNC_TX_FENCE)
60 dma_prep_flags |= DMA_PREP_FENCE;
58 dma_dest = dma_map_page(device->dev, dest, dest_offset, len, 61 dma_dest = dma_map_page(device->dev, dest, dest_offset, len,
59 DMA_FROM_DEVICE); 62 DMA_FROM_DEVICE);
60 63
diff --git a/crypto/async_tx/async_memset.c b/crypto/async_tx/async_memset.c
index b896a6e5f673..a374784e3329 100644
--- a/crypto/async_tx/async_memset.c
+++ b/crypto/async_tx/async_memset.c
@@ -49,9 +49,12 @@ async_memset(struct page *dest, int val, unsigned int offset, size_t len,
49 49
50 if (device) { 50 if (device) {
51 dma_addr_t dma_dest; 51 dma_addr_t dma_dest;
52 unsigned long dma_prep_flags; 52 unsigned long dma_prep_flags = 0;
53 53
54 dma_prep_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0; 54 if (submit->cb_fn)
55 dma_prep_flags |= DMA_PREP_INTERRUPT;
56 if (submit->flags & ASYNC_TX_FENCE)
57 dma_prep_flags |= DMA_PREP_FENCE;
55 dma_dest = dma_map_page(device->dev, dest, offset, len, 58 dma_dest = dma_map_page(device->dev, dest, offset, len,
56 DMA_FROM_DEVICE); 59 DMA_FROM_DEVICE);
57 60
diff --git a/crypto/async_tx/async_pq.c b/crypto/async_tx/async_pq.c
index 108b21efb499..a25e290c39fb 100644
--- a/crypto/async_tx/async_pq.c
+++ b/crypto/async_tx/async_pq.c
@@ -101,6 +101,7 @@ do_async_gen_syndrome(struct dma_chan *chan, struct page **blocks,
101 */ 101 */
102 if (src_cnt > pq_src_cnt) { 102 if (src_cnt > pq_src_cnt) {
103 submit->flags &= ~ASYNC_TX_ACK; 103 submit->flags &= ~ASYNC_TX_ACK;
104 submit->flags |= ASYNC_TX_FENCE;
104 dma_flags |= DMA_COMPL_SKIP_DEST_UNMAP; 105 dma_flags |= DMA_COMPL_SKIP_DEST_UNMAP;
105 submit->cb_fn = NULL; 106 submit->cb_fn = NULL;
106 submit->cb_param = NULL; 107 submit->cb_param = NULL;
@@ -111,6 +112,8 @@ do_async_gen_syndrome(struct dma_chan *chan, struct page **blocks,
111 if (cb_fn_orig) 112 if (cb_fn_orig)
112 dma_flags |= DMA_PREP_INTERRUPT; 113 dma_flags |= DMA_PREP_INTERRUPT;
113 } 114 }
115 if (submit->flags & ASYNC_TX_FENCE)
116 dma_flags |= DMA_PREP_FENCE;
114 117
115 /* Since we have clobbered the src_list we are committed 118 /* Since we have clobbered the src_list we are committed
116 * to doing this asynchronously. Drivers force forward 119 * to doing this asynchronously. Drivers force forward
@@ -282,6 +285,8 @@ async_syndrome_val(struct page **blocks, unsigned int offset, int disks,
282 dma_flags |= DMA_PREP_PQ_DISABLE_P; 285 dma_flags |= DMA_PREP_PQ_DISABLE_P;
283 if (!Q(blocks, disks)) 286 if (!Q(blocks, disks))
284 dma_flags |= DMA_PREP_PQ_DISABLE_Q; 287 dma_flags |= DMA_PREP_PQ_DISABLE_Q;
288 if (submit->flags & ASYNC_TX_FENCE)
289 dma_flags |= DMA_PREP_FENCE;
285 for (i = 0; i < disks; i++) 290 for (i = 0; i < disks; i++)
286 if (likely(blocks[i])) { 291 if (likely(blocks[i])) {
287 BUG_ON(is_raid6_zero_block(blocks[i])); 292 BUG_ON(is_raid6_zero_block(blocks[i]));
diff --git a/crypto/async_tx/async_raid6_recov.c b/crypto/async_tx/async_raid6_recov.c
index 0c14d48c9896..822a42d10061 100644
--- a/crypto/async_tx/async_raid6_recov.c
+++ b/crypto/async_tx/async_raid6_recov.c
@@ -44,6 +44,8 @@ async_sum_product(struct page *dest, struct page **srcs, unsigned char *coef,
44 struct dma_async_tx_descriptor *tx; 44 struct dma_async_tx_descriptor *tx;
45 enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P; 45 enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P;
46 46
47 if (submit->flags & ASYNC_TX_FENCE)
48 dma_flags |= DMA_PREP_FENCE;
47 dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL); 49 dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL);
48 dma_src[0] = dma_map_page(dev, srcs[0], 0, len, DMA_TO_DEVICE); 50 dma_src[0] = dma_map_page(dev, srcs[0], 0, len, DMA_TO_DEVICE);
49 dma_src[1] = dma_map_page(dev, srcs[1], 0, len, DMA_TO_DEVICE); 51 dma_src[1] = dma_map_page(dev, srcs[1], 0, len, DMA_TO_DEVICE);
@@ -89,6 +91,8 @@ async_mult(struct page *dest, struct page *src, u8 coef, size_t len,
89 struct dma_async_tx_descriptor *tx; 91 struct dma_async_tx_descriptor *tx;
90 enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P; 92 enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P;
91 93
94 if (submit->flags & ASYNC_TX_FENCE)
95 dma_flags |= DMA_PREP_FENCE;
92 dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL); 96 dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL);
93 dma_src[0] = dma_map_page(dev, src, 0, len, DMA_TO_DEVICE); 97 dma_src[0] = dma_map_page(dev, src, 0, len, DMA_TO_DEVICE);
94 tx = dma->device_prep_dma_pq(chan, dma_dest, dma_src, 1, &coef, 98 tx = dma->device_prep_dma_pq(chan, dma_dest, dma_src, 1, &coef,
@@ -138,7 +142,7 @@ __2data_recov_4(size_t bytes, int faila, int failb, struct page **blocks,
138 srcs[1] = q; 142 srcs[1] = q;
139 coef[0] = raid6_gfexi[failb-faila]; 143 coef[0] = raid6_gfexi[failb-faila];
140 coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]; 144 coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]];
141 init_async_submit(submit, 0, tx, NULL, NULL, scribble); 145 init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
142 tx = async_sum_product(b, srcs, coef, bytes, submit); 146 tx = async_sum_product(b, srcs, coef, bytes, submit);
143 147
144 /* Dy = P+Pxy+Dx */ 148 /* Dy = P+Pxy+Dx */
@@ -188,23 +192,23 @@ __2data_recov_5(size_t bytes, int faila, int failb, struct page **blocks,
188 dp = blocks[faila]; 192 dp = blocks[faila];
189 dq = blocks[failb]; 193 dq = blocks[failb];
190 194
191 init_async_submit(submit, 0, tx, NULL, NULL, scribble); 195 init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
192 tx = async_memcpy(dp, g, 0, 0, bytes, submit); 196 tx = async_memcpy(dp, g, 0, 0, bytes, submit);
193 init_async_submit(submit, 0, tx, NULL, NULL, scribble); 197 init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
194 tx = async_mult(dq, g, raid6_gfexp[good], bytes, submit); 198 tx = async_mult(dq, g, raid6_gfexp[good], bytes, submit);
195 199
196 /* compute P + Pxy */ 200 /* compute P + Pxy */
197 srcs[0] = dp; 201 srcs[0] = dp;
198 srcs[1] = p; 202 srcs[1] = p;
199 init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL, 203 init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
200 scribble); 204 NULL, NULL, scribble);
201 tx = async_xor(dp, srcs, 0, 2, bytes, submit); 205 tx = async_xor(dp, srcs, 0, 2, bytes, submit);
202 206
203 /* compute Q + Qxy */ 207 /* compute Q + Qxy */
204 srcs[0] = dq; 208 srcs[0] = dq;
205 srcs[1] = q; 209 srcs[1] = q;
206 init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL, 210 init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
207 scribble); 211 NULL, NULL, scribble);
208 tx = async_xor(dq, srcs, 0, 2, bytes, submit); 212 tx = async_xor(dq, srcs, 0, 2, bytes, submit);
209 213
210 /* Dx = A*(P+Pxy) + B*(Q+Qxy) */ 214 /* Dx = A*(P+Pxy) + B*(Q+Qxy) */
@@ -212,7 +216,7 @@ __2data_recov_5(size_t bytes, int faila, int failb, struct page **blocks,
212 srcs[1] = dq; 216 srcs[1] = dq;
213 coef[0] = raid6_gfexi[failb-faila]; 217 coef[0] = raid6_gfexi[failb-faila];
214 coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]; 218 coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]];
215 init_async_submit(submit, 0, tx, NULL, NULL, scribble); 219 init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
216 tx = async_sum_product(dq, srcs, coef, bytes, submit); 220 tx = async_sum_product(dq, srcs, coef, bytes, submit);
217 221
218 /* Dy = P+Pxy+Dx */ 222 /* Dy = P+Pxy+Dx */
@@ -252,7 +256,7 @@ __2data_recov_n(int disks, size_t bytes, int faila, int failb,
252 blocks[failb] = (void *)raid6_empty_zero_page; 256 blocks[failb] = (void *)raid6_empty_zero_page;
253 blocks[disks-1] = dq; 257 blocks[disks-1] = dq;
254 258
255 init_async_submit(submit, 0, tx, NULL, NULL, scribble); 259 init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
256 tx = async_gen_syndrome(blocks, 0, disks, bytes, submit); 260 tx = async_gen_syndrome(blocks, 0, disks, bytes, submit);
257 261
258 /* Restore pointer table */ 262 /* Restore pointer table */
@@ -264,15 +268,15 @@ __2data_recov_n(int disks, size_t bytes, int faila, int failb,
264 /* compute P + Pxy */ 268 /* compute P + Pxy */
265 srcs[0] = dp; 269 srcs[0] = dp;
266 srcs[1] = p; 270 srcs[1] = p;
267 init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL, 271 init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
268 scribble); 272 NULL, NULL, scribble);
269 tx = async_xor(dp, srcs, 0, 2, bytes, submit); 273 tx = async_xor(dp, srcs, 0, 2, bytes, submit);
270 274
271 /* compute Q + Qxy */ 275 /* compute Q + Qxy */
272 srcs[0] = dq; 276 srcs[0] = dq;
273 srcs[1] = q; 277 srcs[1] = q;
274 init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL, 278 init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
275 scribble); 279 NULL, NULL, scribble);
276 tx = async_xor(dq, srcs, 0, 2, bytes, submit); 280 tx = async_xor(dq, srcs, 0, 2, bytes, submit);
277 281
278 /* Dx = A*(P+Pxy) + B*(Q+Qxy) */ 282 /* Dx = A*(P+Pxy) + B*(Q+Qxy) */
@@ -280,7 +284,7 @@ __2data_recov_n(int disks, size_t bytes, int faila, int failb,
280 srcs[1] = dq; 284 srcs[1] = dq;
281 coef[0] = raid6_gfexi[failb-faila]; 285 coef[0] = raid6_gfexi[failb-faila];
282 coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]; 286 coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]];
283 init_async_submit(submit, 0, tx, NULL, NULL, scribble); 287 init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
284 tx = async_sum_product(dq, srcs, coef, bytes, submit); 288 tx = async_sum_product(dq, srcs, coef, bytes, submit);
285 289
286 /* Dy = P+Pxy+Dx */ 290 /* Dy = P+Pxy+Dx */
@@ -407,13 +411,16 @@ async_raid6_datap_recov(int disks, size_t bytes, int faila,
407 int good = faila == 0 ? 1 : 0; 411 int good = faila == 0 ? 1 : 0;
408 struct page *g = blocks[good]; 412 struct page *g = blocks[good];
409 413
410 init_async_submit(submit, 0, tx, NULL, NULL, scribble); 414 init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL,
415 scribble);
411 tx = async_memcpy(p, g, 0, 0, bytes, submit); 416 tx = async_memcpy(p, g, 0, 0, bytes, submit);
412 417
413 init_async_submit(submit, 0, tx, NULL, NULL, scribble); 418 init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL,
419 scribble);
414 tx = async_mult(dq, g, raid6_gfexp[good], bytes, submit); 420 tx = async_mult(dq, g, raid6_gfexp[good], bytes, submit);
415 } else { 421 } else {
416 init_async_submit(submit, 0, tx, NULL, NULL, scribble); 422 init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL,
423 scribble);
417 tx = async_gen_syndrome(blocks, 0, disks, bytes, submit); 424 tx = async_gen_syndrome(blocks, 0, disks, bytes, submit);
418 } 425 }
419 426
@@ -426,11 +433,11 @@ async_raid6_datap_recov(int disks, size_t bytes, int faila,
426 433
427 srcs[0] = dq; 434 srcs[0] = dq;
428 srcs[1] = q; 435 srcs[1] = q;
429 init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL, 436 init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
430 scribble); 437 NULL, NULL, scribble);
431 tx = async_xor(dq, srcs, 0, 2, bytes, submit); 438 tx = async_xor(dq, srcs, 0, 2, bytes, submit);
432 439
433 init_async_submit(submit, 0, tx, NULL, NULL, scribble); 440 init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
434 tx = async_mult(dq, dq, coef, bytes, submit); 441 tx = async_mult(dq, dq, coef, bytes, submit);
435 442
436 srcs[0] = p; 443 srcs[0] = p;
diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c
index 56b5f98da463..db279872ef3d 100644
--- a/crypto/async_tx/async_xor.c
+++ b/crypto/async_tx/async_xor.c
@@ -69,6 +69,7 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
69 */ 69 */
70 if (src_cnt > xor_src_cnt) { 70 if (src_cnt > xor_src_cnt) {
71 submit->flags &= ~ASYNC_TX_ACK; 71 submit->flags &= ~ASYNC_TX_ACK;
72 submit->flags |= ASYNC_TX_FENCE;
72 dma_flags = DMA_COMPL_SKIP_DEST_UNMAP; 73 dma_flags = DMA_COMPL_SKIP_DEST_UNMAP;
73 submit->cb_fn = NULL; 74 submit->cb_fn = NULL;
74 submit->cb_param = NULL; 75 submit->cb_param = NULL;
@@ -78,7 +79,8 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
78 } 79 }
79 if (submit->cb_fn) 80 if (submit->cb_fn)
80 dma_flags |= DMA_PREP_INTERRUPT; 81 dma_flags |= DMA_PREP_INTERRUPT;
81 82 if (submit->flags & ASYNC_TX_FENCE)
83 dma_flags |= DMA_PREP_FENCE;
82 /* Since we have clobbered the src_list we are committed 84 /* Since we have clobbered the src_list we are committed
83 * to doing this asynchronously. Drivers force forward progress 85 * to doing this asynchronously. Drivers force forward progress
84 * in case they can not provide a descriptor 86 * in case they can not provide a descriptor
@@ -264,12 +266,15 @@ async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,
264 dma_src = (dma_addr_t *) src_list; 266 dma_src = (dma_addr_t *) src_list;
265 267
266 if (dma_src && device && src_cnt <= device->max_xor) { 268 if (dma_src && device && src_cnt <= device->max_xor) {
267 unsigned long dma_prep_flags; 269 unsigned long dma_prep_flags = 0;
268 int i; 270 int i;
269 271
270 pr_debug("%s: (async) len: %zu\n", __func__, len); 272 pr_debug("%s: (async) len: %zu\n", __func__, len);
271 273
272 dma_prep_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0; 274 if (submit->cb_fn)
275 dma_prep_flags |= DMA_PREP_INTERRUPT;
276 if (submit->flags & ASYNC_TX_FENCE)
277 dma_prep_flags |= DMA_PREP_FENCE;
273 for (i = 0; i < src_cnt; i++) 278 for (i = 0; i < src_cnt; i++)
274 dma_src[i] = dma_map_page(device->dev, src_list[i], 279 dma_src[i] = dma_map_page(device->dev, src_list[i],
275 offset, len, DMA_TO_DEVICE); 280 offset, len, DMA_TO_DEVICE);
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 0a5cf2171214..54ef8d75541d 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -502,13 +502,17 @@ async_copy_data(int frombio, struct bio *bio, struct page *page,
502 int i; 502 int i;
503 int page_offset; 503 int page_offset;
504 struct async_submit_ctl submit; 504 struct async_submit_ctl submit;
505 enum async_tx_flags flags = 0;
505 506
506 if (bio->bi_sector >= sector) 507 if (bio->bi_sector >= sector)
507 page_offset = (signed)(bio->bi_sector - sector) * 512; 508 page_offset = (signed)(bio->bi_sector - sector) * 512;
508 else 509 else
509 page_offset = (signed)(sector - bio->bi_sector) * -512; 510 page_offset = (signed)(sector - bio->bi_sector) * -512;
510 511
511 init_async_submit(&submit, 0, tx, NULL, NULL, NULL); 512 if (frombio)
513 flags |= ASYNC_TX_FENCE;
514 init_async_submit(&submit, flags, tx, NULL, NULL, NULL);
515
512 bio_for_each_segment(bvl, bio, i) { 516 bio_for_each_segment(bvl, bio, i) {
513 int len = bio_iovec_idx(bio, i)->bv_len; 517 int len = bio_iovec_idx(bio, i)->bv_len;
514 int clen; 518 int clen;
@@ -685,7 +689,7 @@ ops_run_compute5(struct stripe_head *sh, struct raid5_percpu *percpu)
685 689
686 atomic_inc(&sh->count); 690 atomic_inc(&sh->count);
687 691
688 init_async_submit(&submit, ASYNC_TX_XOR_ZERO_DST, NULL, 692 init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST, NULL,
689 ops_complete_compute, sh, to_addr_conv(sh, percpu)); 693 ops_complete_compute, sh, to_addr_conv(sh, percpu));
690 if (unlikely(count == 1)) 694 if (unlikely(count == 1))
691 tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, &submit); 695 tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, &submit);
@@ -763,7 +767,8 @@ ops_run_compute6_1(struct stripe_head *sh, struct raid5_percpu *percpu)
763 count = set_syndrome_sources(blocks, sh); 767 count = set_syndrome_sources(blocks, sh);
764 blocks[count] = NULL; /* regenerating p is not necessary */ 768 blocks[count] = NULL; /* regenerating p is not necessary */
765 BUG_ON(blocks[count+1] != dest); /* q should already be set */ 769 BUG_ON(blocks[count+1] != dest); /* q should already be set */
766 init_async_submit(&submit, 0, NULL, ops_complete_compute, sh, 770 init_async_submit(&submit, ASYNC_TX_FENCE, NULL,
771 ops_complete_compute, sh,
767 to_addr_conv(sh, percpu)); 772 to_addr_conv(sh, percpu));
768 tx = async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE, &submit); 773 tx = async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE, &submit);
769 } else { 774 } else {
@@ -775,8 +780,8 @@ ops_run_compute6_1(struct stripe_head *sh, struct raid5_percpu *percpu)
775 blocks[count++] = sh->dev[i].page; 780 blocks[count++] = sh->dev[i].page;
776 } 781 }
777 782
778 init_async_submit(&submit, ASYNC_TX_XOR_ZERO_DST, NULL, 783 init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST,
779 ops_complete_compute, sh, 784 NULL, ops_complete_compute, sh,
780 to_addr_conv(sh, percpu)); 785 to_addr_conv(sh, percpu));
781 tx = async_xor(dest, blocks, 0, count, STRIPE_SIZE, &submit); 786 tx = async_xor(dest, blocks, 0, count, STRIPE_SIZE, &submit);
782 } 787 }
@@ -837,8 +842,9 @@ ops_run_compute6_2(struct stripe_head *sh, struct raid5_percpu *percpu)
837 /* Q disk is one of the missing disks */ 842 /* Q disk is one of the missing disks */
838 if (faila == syndrome_disks) { 843 if (faila == syndrome_disks) {
839 /* Missing P+Q, just recompute */ 844 /* Missing P+Q, just recompute */
840 init_async_submit(&submit, 0, NULL, ops_complete_compute, 845 init_async_submit(&submit, ASYNC_TX_FENCE, NULL,
841 sh, to_addr_conv(sh, percpu)); 846 ops_complete_compute, sh,
847 to_addr_conv(sh, percpu));
842 return async_gen_syndrome(blocks, 0, count+2, 848 return async_gen_syndrome(blocks, 0, count+2,
843 STRIPE_SIZE, &submit); 849 STRIPE_SIZE, &submit);
844 } else { 850 } else {
@@ -859,21 +865,24 @@ ops_run_compute6_2(struct stripe_head *sh, struct raid5_percpu *percpu)
859 blocks[count++] = sh->dev[i].page; 865 blocks[count++] = sh->dev[i].page;
860 } 866 }
861 dest = sh->dev[data_target].page; 867 dest = sh->dev[data_target].page;
862 init_async_submit(&submit, ASYNC_TX_XOR_ZERO_DST, NULL, 868 init_async_submit(&submit,
863 NULL, NULL, to_addr_conv(sh, percpu)); 869 ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST,
870 NULL, NULL, NULL,
871 to_addr_conv(sh, percpu));
864 tx = async_xor(dest, blocks, 0, count, STRIPE_SIZE, 872 tx = async_xor(dest, blocks, 0, count, STRIPE_SIZE,
865 &submit); 873 &submit);
866 874
867 count = set_syndrome_sources(blocks, sh); 875 count = set_syndrome_sources(blocks, sh);
868 init_async_submit(&submit, 0, tx, ops_complete_compute, 876 init_async_submit(&submit, ASYNC_TX_FENCE, tx,
869 sh, to_addr_conv(sh, percpu)); 877 ops_complete_compute, sh,
878 to_addr_conv(sh, percpu));
870 return async_gen_syndrome(blocks, 0, count+2, 879 return async_gen_syndrome(blocks, 0, count+2,
871 STRIPE_SIZE, &submit); 880 STRIPE_SIZE, &submit);
872 } 881 }
873 } 882 }
874 883
875 init_async_submit(&submit, 0, NULL, ops_complete_compute, sh, 884 init_async_submit(&submit, ASYNC_TX_FENCE, NULL, ops_complete_compute,
876 to_addr_conv(sh, percpu)); 885 sh, to_addr_conv(sh, percpu));
877 if (failb == syndrome_disks) { 886 if (failb == syndrome_disks) {
878 /* We're missing D+P. */ 887 /* We're missing D+P. */
879 return async_raid6_datap_recov(syndrome_disks+2, STRIPE_SIZE, 888 return async_raid6_datap_recov(syndrome_disks+2, STRIPE_SIZE,
@@ -916,7 +925,7 @@ ops_run_prexor(struct stripe_head *sh, struct raid5_percpu *percpu,
916 xor_srcs[count++] = dev->page; 925 xor_srcs[count++] = dev->page;
917 } 926 }
918 927
919 init_async_submit(&submit, ASYNC_TX_XOR_DROP_DST, tx, 928 init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
920 ops_complete_prexor, sh, to_addr_conv(sh, percpu)); 929 ops_complete_prexor, sh, to_addr_conv(sh, percpu));
921 tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit); 930 tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit);
922 931
diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h
index 866e61c4e2e0..a1c486a88e88 100644
--- a/include/linux/async_tx.h
+++ b/include/linux/async_tx.h
@@ -58,11 +58,14 @@ struct dma_chan_ref {
58 * array. 58 * array.
59 * @ASYNC_TX_ACK: immediately ack the descriptor, precludes setting up a 59 * @ASYNC_TX_ACK: immediately ack the descriptor, precludes setting up a
60 * dependency chain 60 * dependency chain
61 * @ASYNC_TX_FENCE: specify that the next operation in the dependency
62 * chain uses this operation's result as an input
61 */ 63 */
62enum async_tx_flags { 64enum async_tx_flags {
63 ASYNC_TX_XOR_ZERO_DST = (1 << 0), 65 ASYNC_TX_XOR_ZERO_DST = (1 << 0),
64 ASYNC_TX_XOR_DROP_DST = (1 << 1), 66 ASYNC_TX_XOR_DROP_DST = (1 << 1),
65 ASYNC_TX_ACK = (1 << 2), 67 ASYNC_TX_ACK = (1 << 2),
68 ASYNC_TX_FENCE = (1 << 3),
66}; 69};
67 70
68/** 71/**
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 1012f1abcb54..4d6c1c925fd4 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -87,6 +87,8 @@ enum dma_transaction_type {
87 * @DMA_PREP_CONTINUE - indicate to a driver that it is reusing buffers as 87 * @DMA_PREP_CONTINUE - indicate to a driver that it is reusing buffers as
88 * sources that were the result of a previous operation, in the case of a PQ 88 * sources that were the result of a previous operation, in the case of a PQ
89 * operation it continues the calculation with new sources 89 * operation it continues the calculation with new sources
90 * @DMA_PREP_FENCE - tell the driver that subsequent operations depend
91 * on the result of this operation
90 */ 92 */
91enum dma_ctrl_flags { 93enum dma_ctrl_flags {
92 DMA_PREP_INTERRUPT = (1 << 0), 94 DMA_PREP_INTERRUPT = (1 << 0),
@@ -98,6 +100,7 @@ enum dma_ctrl_flags {
98 DMA_PREP_PQ_DISABLE_P = (1 << 6), 100 DMA_PREP_PQ_DISABLE_P = (1 << 6),
99 DMA_PREP_PQ_DISABLE_Q = (1 << 7), 101 DMA_PREP_PQ_DISABLE_Q = (1 << 7),
100 DMA_PREP_CONTINUE = (1 << 8), 102 DMA_PREP_CONTINUE = (1 << 8),
103 DMA_PREP_FENCE = (1 << 9),
101}; 104};
102 105
103/** 106/**