aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/crypto/async-tx-api.txt75
-rw-r--r--arch/arm/include/asm/hardware/iop3xx-adma.h81
-rw-r--r--arch/arm/include/asm/hardware/iop_adma.h1
-rw-r--r--arch/arm/mach-iop13xx/include/mach/adma.h119
-rw-r--r--arch/arm/mach-iop13xx/setup.c17
-rw-r--r--arch/arm/plat-iop/adma.c4
-rw-r--r--crypto/async_tx/Kconfig9
-rw-r--r--crypto/async_tx/Makefile3
-rw-r--r--crypto/async_tx/async_memcpy.c44
-rw-r--r--crypto/async_tx/async_memset.c43
-rw-r--r--crypto/async_tx/async_pq.c395
-rw-r--r--crypto/async_tx/async_raid6_recov.c455
-rw-r--r--crypto/async_tx/async_tx.c87
-rw-r--r--crypto/async_tx/async_xor.c207
-rw-r--r--crypto/async_tx/raid6test.c241
-rw-r--r--drivers/dma/Kconfig4
-rw-r--r--drivers/dma/dmaengine.c93
-rw-r--r--drivers/dma/dmatest.c40
-rw-r--r--drivers/dma/ioat/Makefile2
-rw-r--r--drivers/dma/ioat/dma.c136
-rw-r--r--drivers/dma/ioat/dma.h34
-rw-r--r--drivers/dma/ioat/dma_v2.c129
-rw-r--r--drivers/dma/ioat/dma_v2.h45
-rw-r--r--drivers/dma/ioat/dma_v3.c1220
-rw-r--r--drivers/dma/ioat/hw.h142
-rw-r--r--drivers/dma/ioat/pci.c45
-rw-r--r--drivers/dma/ioat/registers.h20
-rw-r--r--drivers/dma/iop-adma.c482
-rw-r--r--drivers/md/Kconfig26
-rw-r--r--drivers/md/raid5.c1494
-rw-r--r--drivers/md/raid5.h28
-rw-r--r--include/linux/async_tx.h129
-rw-r--r--include/linux/dmaengine.h176
-rw-r--r--include/linux/pci_ids.h10
34 files changed, 4939 insertions, 1097 deletions
diff --git a/Documentation/crypto/async-tx-api.txt b/Documentation/crypto/async-tx-api.txt
index 9f59fcbf5d82..ba046b8fa92f 100644
--- a/Documentation/crypto/async-tx-api.txt
+++ b/Documentation/crypto/async-tx-api.txt
@@ -54,20 +54,23 @@ features surfaced as a result:
54 54
553.1 General format of the API: 553.1 General format of the API:
56struct dma_async_tx_descriptor * 56struct dma_async_tx_descriptor *
57async_<operation>(<op specific parameters>, 57async_<operation>(<op specific parameters>, struct async_submit ctl *submit)
58 enum async_tx_flags flags,
59 struct dma_async_tx_descriptor *dependency,
60 dma_async_tx_callback callback_routine,
61 void *callback_parameter);
62 58
633.2 Supported operations: 593.2 Supported operations:
64memcpy - memory copy between a source and a destination buffer 60memcpy - memory copy between a source and a destination buffer
65memset - fill a destination buffer with a byte value 61memset - fill a destination buffer with a byte value
66xor - xor a series of source buffers and write the result to a 62xor - xor a series of source buffers and write the result to a
67 destination buffer 63 destination buffer
68xor_zero_sum - xor a series of source buffers and set a flag if the 64xor_val - xor a series of source buffers and set a flag if the
69 result is zero. The implementation attempts to prevent 65 result is zero. The implementation attempts to prevent
70 writes to memory 66 writes to memory
67pq - generate the p+q (raid6 syndrome) from a series of source buffers
68pq_val - validate that a p and or q buffer are in sync with a given series of
69 sources
70datap - (raid6_datap_recov) recover a raid6 data block and the p block
71 from the given sources
722data - (raid6_2data_recov) recover 2 raid6 data blocks from the given
73 sources
71 74
723.3 Descriptor management: 753.3 Descriptor management:
73The return value is non-NULL and points to a 'descriptor' when the operation 76The return value is non-NULL and points to a 'descriptor' when the operation
@@ -80,8 +83,8 @@ acknowledged by the application before the offload engine driver is allowed to
80recycle (or free) the descriptor. A descriptor can be acked by one of the 83recycle (or free) the descriptor. A descriptor can be acked by one of the
81following methods: 84following methods:
821/ setting the ASYNC_TX_ACK flag if no child operations are to be submitted 851/ setting the ASYNC_TX_ACK flag if no child operations are to be submitted
832/ setting the ASYNC_TX_DEP_ACK flag to acknowledge the parent 862/ submitting an unacknowledged descriptor as a dependency to another
84 descriptor of a new operation. 87 async_tx call will implicitly set the acknowledged state.
853/ calling async_tx_ack() on the descriptor. 883/ calling async_tx_ack() on the descriptor.
86 89
873.4 When does the operation execute? 903.4 When does the operation execute?
@@ -119,30 +122,42 @@ of an operation.
119Perform a xor->copy->xor operation where each operation depends on the 122Perform a xor->copy->xor operation where each operation depends on the
120result from the previous operation: 123result from the previous operation:
121 124
122void complete_xor_copy_xor(void *param) 125void callback(void *param)
123{ 126{
124 printk("complete\n"); 127 struct completion *cmp = param;
128
129 complete(cmp);
125} 130}
126 131
127int run_xor_copy_xor(struct page **xor_srcs, 132void run_xor_copy_xor(struct page **xor_srcs,
128 int xor_src_cnt, 133 int xor_src_cnt,
129 struct page *xor_dest, 134 struct page *xor_dest,
130 size_t xor_len, 135 size_t xor_len,
131 struct page *copy_src, 136 struct page *copy_src,
132 struct page *copy_dest, 137 struct page *copy_dest,
133 size_t copy_len) 138 size_t copy_len)
134{ 139{
135 struct dma_async_tx_descriptor *tx; 140 struct dma_async_tx_descriptor *tx;
141 addr_conv_t addr_conv[xor_src_cnt];
142 struct async_submit_ctl submit;
143 addr_conv_t addr_conv[NDISKS];
144 struct completion cmp;
145
146 init_async_submit(&submit, ASYNC_TX_XOR_DROP_DST, NULL, NULL, NULL,
147 addr_conv);
148 tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len, &submit)
136 149
137 tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len, 150 submit->depend_tx = tx;
138 ASYNC_TX_XOR_DROP_DST, NULL, NULL, NULL); 151 tx = async_memcpy(copy_dest, copy_src, 0, 0, copy_len, &submit);
139 tx = async_memcpy(copy_dest, copy_src, 0, 0, copy_len, 152
140 ASYNC_TX_DEP_ACK, tx, NULL, NULL); 153 init_completion(&cmp);
141 tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len, 154 init_async_submit(&submit, ASYNC_TX_XOR_DROP_DST | ASYNC_TX_ACK, tx,
142 ASYNC_TX_XOR_DROP_DST | ASYNC_TX_DEP_ACK | ASYNC_TX_ACK, 155 callback, &cmp, addr_conv);
143 tx, complete_xor_copy_xor, NULL); 156 tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len, &submit);
144 157
145 async_tx_issue_pending_all(); 158 async_tx_issue_pending_all();
159
160 wait_for_completion(&cmp);
146} 161}
147 162
148See include/linux/async_tx.h for more information on the flags. See the 163See include/linux/async_tx.h for more information on the flags. See the
diff --git a/arch/arm/include/asm/hardware/iop3xx-adma.h b/arch/arm/include/asm/hardware/iop3xx-adma.h
index 83e6ba338e2c..1a8c7279a28b 100644
--- a/arch/arm/include/asm/hardware/iop3xx-adma.h
+++ b/arch/arm/include/asm/hardware/iop3xx-adma.h
@@ -187,11 +187,74 @@ union iop3xx_desc {
187 void *ptr; 187 void *ptr;
188}; 188};
189 189
190/* No support for p+q operations */
191static inline int
192iop_chan_pq_slot_count(size_t len, int src_cnt, int *slots_per_op)
193{
194 BUG();
195 return 0;
196}
197
198static inline void
199iop_desc_init_pq(struct iop_adma_desc_slot *desc, int src_cnt,
200 unsigned long flags)
201{
202 BUG();
203}
204
205static inline void
206iop_desc_set_pq_addr(struct iop_adma_desc_slot *desc, dma_addr_t *addr)
207{
208 BUG();
209}
210
211static inline void
212iop_desc_set_pq_src_addr(struct iop_adma_desc_slot *desc, int src_idx,
213 dma_addr_t addr, unsigned char coef)
214{
215 BUG();
216}
217
218static inline int
219iop_chan_pq_zero_sum_slot_count(size_t len, int src_cnt, int *slots_per_op)
220{
221 BUG();
222 return 0;
223}
224
225static inline void
226iop_desc_init_pq_zero_sum(struct iop_adma_desc_slot *desc, int src_cnt,
227 unsigned long flags)
228{
229 BUG();
230}
231
232static inline void
233iop_desc_set_pq_zero_sum_byte_count(struct iop_adma_desc_slot *desc, u32 len)
234{
235 BUG();
236}
237
238#define iop_desc_set_pq_zero_sum_src_addr iop_desc_set_pq_src_addr
239
240static inline void
241iop_desc_set_pq_zero_sum_addr(struct iop_adma_desc_slot *desc, int pq_idx,
242 dma_addr_t *src)
243{
244 BUG();
245}
246
190static inline int iop_adma_get_max_xor(void) 247static inline int iop_adma_get_max_xor(void)
191{ 248{
192 return 32; 249 return 32;
193} 250}
194 251
252static inline int iop_adma_get_max_pq(void)
253{
254 BUG();
255 return 0;
256}
257
195static inline u32 iop_chan_get_current_descriptor(struct iop_adma_chan *chan) 258static inline u32 iop_chan_get_current_descriptor(struct iop_adma_chan *chan)
196{ 259{
197 int id = chan->device->id; 260 int id = chan->device->id;
@@ -332,6 +395,11 @@ static inline int iop_chan_zero_sum_slot_count(size_t len, int src_cnt,
332 return slot_cnt; 395 return slot_cnt;
333} 396}
334 397
398static inline int iop_desc_is_pq(struct iop_adma_desc_slot *desc)
399{
400 return 0;
401}
402
335static inline u32 iop_desc_get_dest_addr(struct iop_adma_desc_slot *desc, 403static inline u32 iop_desc_get_dest_addr(struct iop_adma_desc_slot *desc,
336 struct iop_adma_chan *chan) 404 struct iop_adma_chan *chan)
337{ 405{
@@ -349,6 +417,14 @@ static inline u32 iop_desc_get_dest_addr(struct iop_adma_desc_slot *desc,
349 return 0; 417 return 0;
350} 418}
351 419
420
421static inline u32 iop_desc_get_qdest_addr(struct iop_adma_desc_slot *desc,
422 struct iop_adma_chan *chan)
423{
424 BUG();
425 return 0;
426}
427
352static inline u32 iop_desc_get_byte_count(struct iop_adma_desc_slot *desc, 428static inline u32 iop_desc_get_byte_count(struct iop_adma_desc_slot *desc,
353 struct iop_adma_chan *chan) 429 struct iop_adma_chan *chan)
354{ 430{
@@ -756,13 +832,14 @@ static inline void iop_desc_set_block_fill_val(struct iop_adma_desc_slot *desc,
756 hw_desc->src[0] = val; 832 hw_desc->src[0] = val;
757} 833}
758 834
759static inline int iop_desc_get_zero_result(struct iop_adma_desc_slot *desc) 835static inline enum sum_check_flags
836iop_desc_get_zero_result(struct iop_adma_desc_slot *desc)
760{ 837{
761 struct iop3xx_desc_aau *hw_desc = desc->hw_desc; 838 struct iop3xx_desc_aau *hw_desc = desc->hw_desc;
762 struct iop3xx_aau_desc_ctrl desc_ctrl = hw_desc->desc_ctrl_field; 839 struct iop3xx_aau_desc_ctrl desc_ctrl = hw_desc->desc_ctrl_field;
763 840
764 iop_paranoia(!(desc_ctrl.tx_complete && desc_ctrl.zero_result_en)); 841 iop_paranoia(!(desc_ctrl.tx_complete && desc_ctrl.zero_result_en));
765 return desc_ctrl.zero_result_err; 842 return desc_ctrl.zero_result_err << SUM_CHECK_P;
766} 843}
767 844
768static inline void iop_chan_append(struct iop_adma_chan *chan) 845static inline void iop_chan_append(struct iop_adma_chan *chan)
diff --git a/arch/arm/include/asm/hardware/iop_adma.h b/arch/arm/include/asm/hardware/iop_adma.h
index 95dc133d0a7f..59b8c3892f76 100644
--- a/arch/arm/include/asm/hardware/iop_adma.h
+++ b/arch/arm/include/asm/hardware/iop_adma.h
@@ -108,6 +108,7 @@ struct iop_adma_desc_slot {
108 union { 108 union {
109 u32 *xor_check_result; 109 u32 *xor_check_result;
110 u32 *crc32_result; 110 u32 *crc32_result;
111 u32 *pq_check_result;
111 }; 112 };
112}; 113};
113 114
diff --git a/arch/arm/mach-iop13xx/include/mach/adma.h b/arch/arm/mach-iop13xx/include/mach/adma.h
index 5722e86f2174..6d3782d85a9f 100644
--- a/arch/arm/mach-iop13xx/include/mach/adma.h
+++ b/arch/arm/mach-iop13xx/include/mach/adma.h
@@ -150,6 +150,8 @@ static inline int iop_adma_get_max_xor(void)
150 return 16; 150 return 16;
151} 151}
152 152
153#define iop_adma_get_max_pq iop_adma_get_max_xor
154
153static inline u32 iop_chan_get_current_descriptor(struct iop_adma_chan *chan) 155static inline u32 iop_chan_get_current_descriptor(struct iop_adma_chan *chan)
154{ 156{
155 return __raw_readl(ADMA_ADAR(chan)); 157 return __raw_readl(ADMA_ADAR(chan));
@@ -211,7 +213,10 @@ iop_chan_xor_slot_count(size_t len, int src_cnt, int *slots_per_op)
211#define IOP_ADMA_MAX_BYTE_COUNT ADMA_MAX_BYTE_COUNT 213#define IOP_ADMA_MAX_BYTE_COUNT ADMA_MAX_BYTE_COUNT
212#define IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT ADMA_MAX_BYTE_COUNT 214#define IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT ADMA_MAX_BYTE_COUNT
213#define IOP_ADMA_XOR_MAX_BYTE_COUNT ADMA_MAX_BYTE_COUNT 215#define IOP_ADMA_XOR_MAX_BYTE_COUNT ADMA_MAX_BYTE_COUNT
216#define IOP_ADMA_PQ_MAX_BYTE_COUNT ADMA_MAX_BYTE_COUNT
214#define iop_chan_zero_sum_slot_count(l, s, o) iop_chan_xor_slot_count(l, s, o) 217#define iop_chan_zero_sum_slot_count(l, s, o) iop_chan_xor_slot_count(l, s, o)
218#define iop_chan_pq_slot_count iop_chan_xor_slot_count
219#define iop_chan_pq_zero_sum_slot_count iop_chan_xor_slot_count
215 220
216static inline u32 iop_desc_get_dest_addr(struct iop_adma_desc_slot *desc, 221static inline u32 iop_desc_get_dest_addr(struct iop_adma_desc_slot *desc,
217 struct iop_adma_chan *chan) 222 struct iop_adma_chan *chan)
@@ -220,6 +225,13 @@ static inline u32 iop_desc_get_dest_addr(struct iop_adma_desc_slot *desc,
220 return hw_desc->dest_addr; 225 return hw_desc->dest_addr;
221} 226}
222 227
228static inline u32 iop_desc_get_qdest_addr(struct iop_adma_desc_slot *desc,
229 struct iop_adma_chan *chan)
230{
231 struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
232 return hw_desc->q_dest_addr;
233}
234
223static inline u32 iop_desc_get_byte_count(struct iop_adma_desc_slot *desc, 235static inline u32 iop_desc_get_byte_count(struct iop_adma_desc_slot *desc,
224 struct iop_adma_chan *chan) 236 struct iop_adma_chan *chan)
225{ 237{
@@ -319,6 +331,58 @@ iop_desc_init_zero_sum(struct iop_adma_desc_slot *desc, int src_cnt,
319 return 1; 331 return 1;
320} 332}
321 333
334static inline void
335iop_desc_init_pq(struct iop_adma_desc_slot *desc, int src_cnt,
336 unsigned long flags)
337{
338 struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
339 union {
340 u32 value;
341 struct iop13xx_adma_desc_ctrl field;
342 } u_desc_ctrl;
343
344 u_desc_ctrl.value = 0;
345 u_desc_ctrl.field.src_select = src_cnt - 1;
346 u_desc_ctrl.field.xfer_dir = 3; /* local to internal bus */
347 u_desc_ctrl.field.pq_xfer_en = 1;
348 u_desc_ctrl.field.p_xfer_dis = !!(flags & DMA_PREP_PQ_DISABLE_P);
349 u_desc_ctrl.field.int_en = flags & DMA_PREP_INTERRUPT;
350 hw_desc->desc_ctrl = u_desc_ctrl.value;
351}
352
353static inline int iop_desc_is_pq(struct iop_adma_desc_slot *desc)
354{
355 struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
356 union {
357 u32 value;
358 struct iop13xx_adma_desc_ctrl field;
359 } u_desc_ctrl;
360
361 u_desc_ctrl.value = hw_desc->desc_ctrl;
362 return u_desc_ctrl.field.pq_xfer_en;
363}
364
365static inline void
366iop_desc_init_pq_zero_sum(struct iop_adma_desc_slot *desc, int src_cnt,
367 unsigned long flags)
368{
369 struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
370 union {
371 u32 value;
372 struct iop13xx_adma_desc_ctrl field;
373 } u_desc_ctrl;
374
375 u_desc_ctrl.value = 0;
376 u_desc_ctrl.field.src_select = src_cnt - 1;
377 u_desc_ctrl.field.xfer_dir = 3; /* local to internal bus */
378 u_desc_ctrl.field.zero_result = 1;
379 u_desc_ctrl.field.status_write_back_en = 1;
380 u_desc_ctrl.field.pq_xfer_en = 1;
381 u_desc_ctrl.field.p_xfer_dis = !!(flags & DMA_PREP_PQ_DISABLE_P);
382 u_desc_ctrl.field.int_en = flags & DMA_PREP_INTERRUPT;
383 hw_desc->desc_ctrl = u_desc_ctrl.value;
384}
385
322static inline void iop_desc_set_byte_count(struct iop_adma_desc_slot *desc, 386static inline void iop_desc_set_byte_count(struct iop_adma_desc_slot *desc,
323 struct iop_adma_chan *chan, 387 struct iop_adma_chan *chan,
324 u32 byte_count) 388 u32 byte_count)
@@ -351,6 +415,7 @@ iop_desc_set_zero_sum_byte_count(struct iop_adma_desc_slot *desc, u32 len)
351 } 415 }
352} 416}
353 417
418#define iop_desc_set_pq_zero_sum_byte_count iop_desc_set_zero_sum_byte_count
354 419
355static inline void iop_desc_set_dest_addr(struct iop_adma_desc_slot *desc, 420static inline void iop_desc_set_dest_addr(struct iop_adma_desc_slot *desc,
356 struct iop_adma_chan *chan, 421 struct iop_adma_chan *chan,
@@ -361,6 +426,16 @@ static inline void iop_desc_set_dest_addr(struct iop_adma_desc_slot *desc,
361 hw_desc->upper_dest_addr = 0; 426 hw_desc->upper_dest_addr = 0;
362} 427}
363 428
429static inline void
430iop_desc_set_pq_addr(struct iop_adma_desc_slot *desc, dma_addr_t *addr)
431{
432 struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
433
434 hw_desc->dest_addr = addr[0];
435 hw_desc->q_dest_addr = addr[1];
436 hw_desc->upper_dest_addr = 0;
437}
438
364static inline void iop_desc_set_memcpy_src_addr(struct iop_adma_desc_slot *desc, 439static inline void iop_desc_set_memcpy_src_addr(struct iop_adma_desc_slot *desc,
365 dma_addr_t addr) 440 dma_addr_t addr)
366{ 441{
@@ -389,6 +464,29 @@ static inline void iop_desc_set_xor_src_addr(struct iop_adma_desc_slot *desc,
389} 464}
390 465
391static inline void 466static inline void
467iop_desc_set_pq_src_addr(struct iop_adma_desc_slot *desc, int src_idx,
468 dma_addr_t addr, unsigned char coef)
469{
470 int slot_cnt = desc->slot_cnt, slots_per_op = desc->slots_per_op;
471 struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc, *iter;
472 struct iop13xx_adma_src *src;
473 int i = 0;
474
475 do {
476 iter = iop_hw_desc_slot_idx(hw_desc, i);
477 src = &iter->src[src_idx];
478 src->src_addr = addr;
479 src->pq_upper_src_addr = 0;
480 src->pq_dmlt = coef;
481 slot_cnt -= slots_per_op;
482 if (slot_cnt) {
483 i += slots_per_op;
484 addr += IOP_ADMA_PQ_MAX_BYTE_COUNT;
485 }
486 } while (slot_cnt);
487}
488
489static inline void
392iop_desc_init_interrupt(struct iop_adma_desc_slot *desc, 490iop_desc_init_interrupt(struct iop_adma_desc_slot *desc,
393 struct iop_adma_chan *chan) 491 struct iop_adma_chan *chan)
394{ 492{
@@ -399,6 +497,15 @@ iop_desc_init_interrupt(struct iop_adma_desc_slot *desc,
399} 497}
400 498
401#define iop_desc_set_zero_sum_src_addr iop_desc_set_xor_src_addr 499#define iop_desc_set_zero_sum_src_addr iop_desc_set_xor_src_addr
500#define iop_desc_set_pq_zero_sum_src_addr iop_desc_set_pq_src_addr
501
502static inline void
503iop_desc_set_pq_zero_sum_addr(struct iop_adma_desc_slot *desc, int pq_idx,
504 dma_addr_t *src)
505{
506 iop_desc_set_xor_src_addr(desc, pq_idx, src[pq_idx]);
507 iop_desc_set_xor_src_addr(desc, pq_idx+1, src[pq_idx+1]);
508}
402 509
403static inline void iop_desc_set_next_desc(struct iop_adma_desc_slot *desc, 510static inline void iop_desc_set_next_desc(struct iop_adma_desc_slot *desc,
404 u32 next_desc_addr) 511 u32 next_desc_addr)
@@ -428,18 +535,20 @@ static inline void iop_desc_set_block_fill_val(struct iop_adma_desc_slot *desc,
428 hw_desc->block_fill_data = val; 535 hw_desc->block_fill_data = val;
429} 536}
430 537
431static inline int iop_desc_get_zero_result(struct iop_adma_desc_slot *desc) 538static inline enum sum_check_flags
539iop_desc_get_zero_result(struct iop_adma_desc_slot *desc)
432{ 540{
433 struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; 541 struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
434 struct iop13xx_adma_desc_ctrl desc_ctrl = hw_desc->desc_ctrl_field; 542 struct iop13xx_adma_desc_ctrl desc_ctrl = hw_desc->desc_ctrl_field;
435 struct iop13xx_adma_byte_count byte_count = hw_desc->byte_count_field; 543 struct iop13xx_adma_byte_count byte_count = hw_desc->byte_count_field;
544 enum sum_check_flags flags;
436 545
437 BUG_ON(!(byte_count.tx_complete && desc_ctrl.zero_result)); 546 BUG_ON(!(byte_count.tx_complete && desc_ctrl.zero_result));
438 547
439 if (desc_ctrl.pq_xfer_en) 548 flags = byte_count.zero_result_err_q << SUM_CHECK_Q;
440 return byte_count.zero_result_err_q; 549 flags |= byte_count.zero_result_err << SUM_CHECK_P;
441 else 550
442 return byte_count.zero_result_err; 551 return flags;
443} 552}
444 553
445static inline void iop_chan_append(struct iop_adma_chan *chan) 554static inline void iop_chan_append(struct iop_adma_chan *chan)
diff --git a/arch/arm/mach-iop13xx/setup.c b/arch/arm/mach-iop13xx/setup.c
index bee42c609df6..5c147fb66a01 100644
--- a/arch/arm/mach-iop13xx/setup.c
+++ b/arch/arm/mach-iop13xx/setup.c
@@ -477,10 +477,8 @@ void __init iop13xx_platform_init(void)
477 plat_data = &iop13xx_adma_0_data; 477 plat_data = &iop13xx_adma_0_data;
478 dma_cap_set(DMA_MEMCPY, plat_data->cap_mask); 478 dma_cap_set(DMA_MEMCPY, plat_data->cap_mask);
479 dma_cap_set(DMA_XOR, plat_data->cap_mask); 479 dma_cap_set(DMA_XOR, plat_data->cap_mask);
480 dma_cap_set(DMA_DUAL_XOR, plat_data->cap_mask); 480 dma_cap_set(DMA_XOR_VAL, plat_data->cap_mask);
481 dma_cap_set(DMA_ZERO_SUM, plat_data->cap_mask);
482 dma_cap_set(DMA_MEMSET, plat_data->cap_mask); 481 dma_cap_set(DMA_MEMSET, plat_data->cap_mask);
483 dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask);
484 dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask); 482 dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask);
485 break; 483 break;
486 case IOP13XX_INIT_ADMA_1: 484 case IOP13XX_INIT_ADMA_1:
@@ -489,10 +487,8 @@ void __init iop13xx_platform_init(void)
489 plat_data = &iop13xx_adma_1_data; 487 plat_data = &iop13xx_adma_1_data;
490 dma_cap_set(DMA_MEMCPY, plat_data->cap_mask); 488 dma_cap_set(DMA_MEMCPY, plat_data->cap_mask);
491 dma_cap_set(DMA_XOR, plat_data->cap_mask); 489 dma_cap_set(DMA_XOR, plat_data->cap_mask);
492 dma_cap_set(DMA_DUAL_XOR, plat_data->cap_mask); 490 dma_cap_set(DMA_XOR_VAL, plat_data->cap_mask);
493 dma_cap_set(DMA_ZERO_SUM, plat_data->cap_mask);
494 dma_cap_set(DMA_MEMSET, plat_data->cap_mask); 491 dma_cap_set(DMA_MEMSET, plat_data->cap_mask);
495 dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask);
496 dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask); 492 dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask);
497 break; 493 break;
498 case IOP13XX_INIT_ADMA_2: 494 case IOP13XX_INIT_ADMA_2:
@@ -501,14 +497,11 @@ void __init iop13xx_platform_init(void)
501 plat_data = &iop13xx_adma_2_data; 497 plat_data = &iop13xx_adma_2_data;
502 dma_cap_set(DMA_MEMCPY, plat_data->cap_mask); 498 dma_cap_set(DMA_MEMCPY, plat_data->cap_mask);
503 dma_cap_set(DMA_XOR, plat_data->cap_mask); 499 dma_cap_set(DMA_XOR, plat_data->cap_mask);
504 dma_cap_set(DMA_DUAL_XOR, plat_data->cap_mask); 500 dma_cap_set(DMA_XOR_VAL, plat_data->cap_mask);
505 dma_cap_set(DMA_ZERO_SUM, plat_data->cap_mask);
506 dma_cap_set(DMA_MEMSET, plat_data->cap_mask); 501 dma_cap_set(DMA_MEMSET, plat_data->cap_mask);
507 dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask);
508 dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask); 502 dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask);
509 dma_cap_set(DMA_PQ_XOR, plat_data->cap_mask); 503 dma_cap_set(DMA_PQ, plat_data->cap_mask);
510 dma_cap_set(DMA_PQ_UPDATE, plat_data->cap_mask); 504 dma_cap_set(DMA_PQ_VAL, plat_data->cap_mask);
511 dma_cap_set(DMA_PQ_ZERO_SUM, plat_data->cap_mask);
512 break; 505 break;
513 } 506 }
514 } 507 }
diff --git a/arch/arm/plat-iop/adma.c b/arch/arm/plat-iop/adma.c
index 3c127aabe214..1ff6a37e893c 100644
--- a/arch/arm/plat-iop/adma.c
+++ b/arch/arm/plat-iop/adma.c
@@ -179,7 +179,6 @@ static int __init iop3xx_adma_cap_init(void)
179 dma_cap_set(DMA_INTERRUPT, iop3xx_dma_0_data.cap_mask); 179 dma_cap_set(DMA_INTERRUPT, iop3xx_dma_0_data.cap_mask);
180 #else 180 #else
181 dma_cap_set(DMA_MEMCPY, iop3xx_dma_0_data.cap_mask); 181 dma_cap_set(DMA_MEMCPY, iop3xx_dma_0_data.cap_mask);
182 dma_cap_set(DMA_MEMCPY_CRC32C, iop3xx_dma_0_data.cap_mask);
183 dma_cap_set(DMA_INTERRUPT, iop3xx_dma_0_data.cap_mask); 182 dma_cap_set(DMA_INTERRUPT, iop3xx_dma_0_data.cap_mask);
184 #endif 183 #endif
185 184
@@ -188,7 +187,6 @@ static int __init iop3xx_adma_cap_init(void)
188 dma_cap_set(DMA_INTERRUPT, iop3xx_dma_1_data.cap_mask); 187 dma_cap_set(DMA_INTERRUPT, iop3xx_dma_1_data.cap_mask);
189 #else 188 #else
190 dma_cap_set(DMA_MEMCPY, iop3xx_dma_1_data.cap_mask); 189 dma_cap_set(DMA_MEMCPY, iop3xx_dma_1_data.cap_mask);
191 dma_cap_set(DMA_MEMCPY_CRC32C, iop3xx_dma_1_data.cap_mask);
192 dma_cap_set(DMA_INTERRUPT, iop3xx_dma_1_data.cap_mask); 190 dma_cap_set(DMA_INTERRUPT, iop3xx_dma_1_data.cap_mask);
193 #endif 191 #endif
194 192
@@ -198,7 +196,7 @@ static int __init iop3xx_adma_cap_init(void)
198 dma_cap_set(DMA_INTERRUPT, iop3xx_aau_data.cap_mask); 196 dma_cap_set(DMA_INTERRUPT, iop3xx_aau_data.cap_mask);
199 #else 197 #else
200 dma_cap_set(DMA_XOR, iop3xx_aau_data.cap_mask); 198 dma_cap_set(DMA_XOR, iop3xx_aau_data.cap_mask);
201 dma_cap_set(DMA_ZERO_SUM, iop3xx_aau_data.cap_mask); 199 dma_cap_set(DMA_XOR_VAL, iop3xx_aau_data.cap_mask);
202 dma_cap_set(DMA_MEMSET, iop3xx_aau_data.cap_mask); 200 dma_cap_set(DMA_MEMSET, iop3xx_aau_data.cap_mask);
203 dma_cap_set(DMA_INTERRUPT, iop3xx_aau_data.cap_mask); 201 dma_cap_set(DMA_INTERRUPT, iop3xx_aau_data.cap_mask);
204 #endif 202 #endif
diff --git a/crypto/async_tx/Kconfig b/crypto/async_tx/Kconfig
index d8fb39145986..e5aeb2b79e6f 100644
--- a/crypto/async_tx/Kconfig
+++ b/crypto/async_tx/Kconfig
@@ -14,3 +14,12 @@ config ASYNC_MEMSET
14 tristate 14 tristate
15 select ASYNC_CORE 15 select ASYNC_CORE
16 16
17config ASYNC_PQ
18 tristate
19 select ASYNC_CORE
20
21config ASYNC_RAID6_RECOV
22 tristate
23 select ASYNC_CORE
24 select ASYNC_PQ
25
diff --git a/crypto/async_tx/Makefile b/crypto/async_tx/Makefile
index 27baa7d52fbc..d1e0e6f72bc1 100644
--- a/crypto/async_tx/Makefile
+++ b/crypto/async_tx/Makefile
@@ -2,3 +2,6 @@ obj-$(CONFIG_ASYNC_CORE) += async_tx.o
2obj-$(CONFIG_ASYNC_MEMCPY) += async_memcpy.o 2obj-$(CONFIG_ASYNC_MEMCPY) += async_memcpy.o
3obj-$(CONFIG_ASYNC_MEMSET) += async_memset.o 3obj-$(CONFIG_ASYNC_MEMSET) += async_memset.o
4obj-$(CONFIG_ASYNC_XOR) += async_xor.o 4obj-$(CONFIG_ASYNC_XOR) += async_xor.o
5obj-$(CONFIG_ASYNC_PQ) += async_pq.o
6obj-$(CONFIG_ASYNC_RAID6_RECOV) += async_raid6_recov.o
7obj-$(CONFIG_ASYNC_RAID6_TEST) += raid6test.o
diff --git a/crypto/async_tx/async_memcpy.c b/crypto/async_tx/async_memcpy.c
index ddccfb01c416..0ec1fb69d4ea 100644
--- a/crypto/async_tx/async_memcpy.c
+++ b/crypto/async_tx/async_memcpy.c
@@ -33,28 +33,31 @@
33 * async_memcpy - attempt to copy memory with a dma engine. 33 * async_memcpy - attempt to copy memory with a dma engine.
34 * @dest: destination page 34 * @dest: destination page
35 * @src: src page 35 * @src: src page
36 * @offset: offset in pages to start transaction 36 * @dest_offset: offset into 'dest' to start transaction
37 * @src_offset: offset into 'src' to start transaction
37 * @len: length in bytes 38 * @len: length in bytes
38 * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK, 39 * @submit: submission / completion modifiers
39 * @depend_tx: memcpy depends on the result of this transaction 40 *
40 * @cb_fn: function to call when the memcpy completes 41 * honored flags: ASYNC_TX_ACK
41 * @cb_param: parameter to pass to the callback routine
42 */ 42 */
43struct dma_async_tx_descriptor * 43struct dma_async_tx_descriptor *
44async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset, 44async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
45 unsigned int src_offset, size_t len, enum async_tx_flags flags, 45 unsigned int src_offset, size_t len,
46 struct dma_async_tx_descriptor *depend_tx, 46 struct async_submit_ctl *submit)
47 dma_async_tx_callback cb_fn, void *cb_param)
48{ 47{
49 struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_MEMCPY, 48 struct dma_chan *chan = async_tx_find_channel(submit, DMA_MEMCPY,
50 &dest, 1, &src, 1, len); 49 &dest, 1, &src, 1, len);
51 struct dma_device *device = chan ? chan->device : NULL; 50 struct dma_device *device = chan ? chan->device : NULL;
52 struct dma_async_tx_descriptor *tx = NULL; 51 struct dma_async_tx_descriptor *tx = NULL;
53 52
54 if (device) { 53 if (device && is_dma_copy_aligned(device, src_offset, dest_offset, len)) {
55 dma_addr_t dma_dest, dma_src; 54 dma_addr_t dma_dest, dma_src;
56 unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0; 55 unsigned long dma_prep_flags = 0;
57 56
57 if (submit->cb_fn)
58 dma_prep_flags |= DMA_PREP_INTERRUPT;
59 if (submit->flags & ASYNC_TX_FENCE)
60 dma_prep_flags |= DMA_PREP_FENCE;
58 dma_dest = dma_map_page(device->dev, dest, dest_offset, len, 61 dma_dest = dma_map_page(device->dev, dest, dest_offset, len,
59 DMA_FROM_DEVICE); 62 DMA_FROM_DEVICE);
60 63
@@ -67,13 +70,13 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
67 70
68 if (tx) { 71 if (tx) {
69 pr_debug("%s: (async) len: %zu\n", __func__, len); 72 pr_debug("%s: (async) len: %zu\n", __func__, len);
70 async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param); 73 async_tx_submit(chan, tx, submit);
71 } else { 74 } else {
72 void *dest_buf, *src_buf; 75 void *dest_buf, *src_buf;
73 pr_debug("%s: (sync) len: %zu\n", __func__, len); 76 pr_debug("%s: (sync) len: %zu\n", __func__, len);
74 77
75 /* wait for any prerequisite operations */ 78 /* wait for any prerequisite operations */
76 async_tx_quiesce(&depend_tx); 79 async_tx_quiesce(&submit->depend_tx);
77 80
78 dest_buf = kmap_atomic(dest, KM_USER0) + dest_offset; 81 dest_buf = kmap_atomic(dest, KM_USER0) + dest_offset;
79 src_buf = kmap_atomic(src, KM_USER1) + src_offset; 82 src_buf = kmap_atomic(src, KM_USER1) + src_offset;
@@ -83,26 +86,13 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
83 kunmap_atomic(dest_buf, KM_USER0); 86 kunmap_atomic(dest_buf, KM_USER0);
84 kunmap_atomic(src_buf, KM_USER1); 87 kunmap_atomic(src_buf, KM_USER1);
85 88
86 async_tx_sync_epilog(cb_fn, cb_param); 89 async_tx_sync_epilog(submit);
87 } 90 }
88 91
89 return tx; 92 return tx;
90} 93}
91EXPORT_SYMBOL_GPL(async_memcpy); 94EXPORT_SYMBOL_GPL(async_memcpy);
92 95
93static int __init async_memcpy_init(void)
94{
95 return 0;
96}
97
98static void __exit async_memcpy_exit(void)
99{
100 do { } while (0);
101}
102
103module_init(async_memcpy_init);
104module_exit(async_memcpy_exit);
105
106MODULE_AUTHOR("Intel Corporation"); 96MODULE_AUTHOR("Intel Corporation");
107MODULE_DESCRIPTION("asynchronous memcpy api"); 97MODULE_DESCRIPTION("asynchronous memcpy api");
108MODULE_LICENSE("GPL"); 98MODULE_LICENSE("GPL");
diff --git a/crypto/async_tx/async_memset.c b/crypto/async_tx/async_memset.c
index 5b5eb99bb244..58e4a8752aee 100644
--- a/crypto/async_tx/async_memset.c
+++ b/crypto/async_tx/async_memset.c
@@ -35,26 +35,26 @@
35 * @val: fill value 35 * @val: fill value
36 * @offset: offset in pages to start transaction 36 * @offset: offset in pages to start transaction
37 * @len: length in bytes 37 * @len: length in bytes
38 * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK 38 *
39 * @depend_tx: memset depends on the result of this transaction 39 * honored flags: ASYNC_TX_ACK
40 * @cb_fn: function to call when the memcpy completes
41 * @cb_param: parameter to pass to the callback routine
42 */ 40 */
43struct dma_async_tx_descriptor * 41struct dma_async_tx_descriptor *
44async_memset(struct page *dest, int val, unsigned int offset, 42async_memset(struct page *dest, int val, unsigned int offset, size_t len,
45 size_t len, enum async_tx_flags flags, 43 struct async_submit_ctl *submit)
46 struct dma_async_tx_descriptor *depend_tx,
47 dma_async_tx_callback cb_fn, void *cb_param)
48{ 44{
49 struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_MEMSET, 45 struct dma_chan *chan = async_tx_find_channel(submit, DMA_MEMSET,
50 &dest, 1, NULL, 0, len); 46 &dest, 1, NULL, 0, len);
51 struct dma_device *device = chan ? chan->device : NULL; 47 struct dma_device *device = chan ? chan->device : NULL;
52 struct dma_async_tx_descriptor *tx = NULL; 48 struct dma_async_tx_descriptor *tx = NULL;
53 49
54 if (device) { 50 if (device && is_dma_fill_aligned(device, offset, 0, len)) {
55 dma_addr_t dma_dest; 51 dma_addr_t dma_dest;
56 unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0; 52 unsigned long dma_prep_flags = 0;
57 53
54 if (submit->cb_fn)
55 dma_prep_flags |= DMA_PREP_INTERRUPT;
56 if (submit->flags & ASYNC_TX_FENCE)
57 dma_prep_flags |= DMA_PREP_FENCE;
58 dma_dest = dma_map_page(device->dev, dest, offset, len, 58 dma_dest = dma_map_page(device->dev, dest, offset, len,
59 DMA_FROM_DEVICE); 59 DMA_FROM_DEVICE);
60 60
@@ -64,38 +64,25 @@ async_memset(struct page *dest, int val, unsigned int offset,
64 64
65 if (tx) { 65 if (tx) {
66 pr_debug("%s: (async) len: %zu\n", __func__, len); 66 pr_debug("%s: (async) len: %zu\n", __func__, len);
67 async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param); 67 async_tx_submit(chan, tx, submit);
68 } else { /* run the memset synchronously */ 68 } else { /* run the memset synchronously */
69 void *dest_buf; 69 void *dest_buf;
70 pr_debug("%s: (sync) len: %zu\n", __func__, len); 70 pr_debug("%s: (sync) len: %zu\n", __func__, len);
71 71
72 dest_buf = (void *) (((char *) page_address(dest)) + offset); 72 dest_buf = page_address(dest) + offset;
73 73
74 /* wait for any prerequisite operations */ 74 /* wait for any prerequisite operations */
75 async_tx_quiesce(&depend_tx); 75 async_tx_quiesce(&submit->depend_tx);
76 76
77 memset(dest_buf, val, len); 77 memset(dest_buf, val, len);
78 78
79 async_tx_sync_epilog(cb_fn, cb_param); 79 async_tx_sync_epilog(submit);
80 } 80 }
81 81
82 return tx; 82 return tx;
83} 83}
84EXPORT_SYMBOL_GPL(async_memset); 84EXPORT_SYMBOL_GPL(async_memset);
85 85
86static int __init async_memset_init(void)
87{
88 return 0;
89}
90
91static void __exit async_memset_exit(void)
92{
93 do { } while (0);
94}
95
96module_init(async_memset_init);
97module_exit(async_memset_exit);
98
99MODULE_AUTHOR("Intel Corporation"); 86MODULE_AUTHOR("Intel Corporation");
100MODULE_DESCRIPTION("asynchronous memset api"); 87MODULE_DESCRIPTION("asynchronous memset api");
101MODULE_LICENSE("GPL"); 88MODULE_LICENSE("GPL");
diff --git a/crypto/async_tx/async_pq.c b/crypto/async_tx/async_pq.c
new file mode 100644
index 000000000000..b88db6d1dc65
--- /dev/null
+++ b/crypto/async_tx/async_pq.c
@@ -0,0 +1,395 @@
1/*
2 * Copyright(c) 2007 Yuri Tikhonov <yur@emcraft.com>
3 * Copyright(c) 2009 Intel Corporation
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License as published by the Free
7 * Software Foundation; either version 2 of the License, or (at your option)
8 * any later version.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 59
17 * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 *
19 * The full GNU General Public License is included in this distribution in the
20 * file called COPYING.
21 */
22#include <linux/kernel.h>
23#include <linux/interrupt.h>
24#include <linux/dma-mapping.h>
25#include <linux/raid/pq.h>
26#include <linux/async_tx.h>
27
28/**
29 * scribble - space to hold throwaway P buffer for synchronous gen_syndrome
30 */
31static struct page *scribble;
32
33static bool is_raid6_zero_block(struct page *p)
34{
35 return p == (void *) raid6_empty_zero_page;
36}
37
38/* the struct page *blocks[] parameter passed to async_gen_syndrome()
39 * and async_syndrome_val() contains the 'P' destination address at
40 * blocks[disks-2] and the 'Q' destination address at blocks[disks-1]
41 *
42 * note: these are macros as they are used as lvalues
43 */
44#define P(b, d) (b[d-2])
45#define Q(b, d) (b[d-1])
46
47/**
48 * do_async_gen_syndrome - asynchronously calculate P and/or Q
49 */
50static __async_inline struct dma_async_tx_descriptor *
51do_async_gen_syndrome(struct dma_chan *chan, struct page **blocks,
52 const unsigned char *scfs, unsigned int offset, int disks,
53 size_t len, dma_addr_t *dma_src,
54 struct async_submit_ctl *submit)
55{
56 struct dma_async_tx_descriptor *tx = NULL;
57 struct dma_device *dma = chan->device;
58 enum dma_ctrl_flags dma_flags = 0;
59 enum async_tx_flags flags_orig = submit->flags;
60 dma_async_tx_callback cb_fn_orig = submit->cb_fn;
61 dma_async_tx_callback cb_param_orig = submit->cb_param;
62 int src_cnt = disks - 2;
63 unsigned char coefs[src_cnt];
64 unsigned short pq_src_cnt;
65 dma_addr_t dma_dest[2];
66 int src_off = 0;
67 int idx;
68 int i;
69
70 /* DMAs use destinations as sources, so use BIDIRECTIONAL mapping */
71 if (P(blocks, disks))
72 dma_dest[0] = dma_map_page(dma->dev, P(blocks, disks), offset,
73 len, DMA_BIDIRECTIONAL);
74 else
75 dma_flags |= DMA_PREP_PQ_DISABLE_P;
76 if (Q(blocks, disks))
77 dma_dest[1] = dma_map_page(dma->dev, Q(blocks, disks), offset,
78 len, DMA_BIDIRECTIONAL);
79 else
80 dma_flags |= DMA_PREP_PQ_DISABLE_Q;
81
82 /* convert source addresses being careful to collapse 'empty'
83 * sources and update the coefficients accordingly
84 */
85 for (i = 0, idx = 0; i < src_cnt; i++) {
86 if (is_raid6_zero_block(blocks[i]))
87 continue;
88 dma_src[idx] = dma_map_page(dma->dev, blocks[i], offset, len,
89 DMA_TO_DEVICE);
90 coefs[idx] = scfs[i];
91 idx++;
92 }
93 src_cnt = idx;
94
95 while (src_cnt > 0) {
96 submit->flags = flags_orig;
97 pq_src_cnt = min(src_cnt, dma_maxpq(dma, dma_flags));
98 /* if we are submitting additional pqs, leave the chain open,
99 * clear the callback parameters, and leave the destination
100 * buffers mapped
101 */
102 if (src_cnt > pq_src_cnt) {
103 submit->flags &= ~ASYNC_TX_ACK;
104 submit->flags |= ASYNC_TX_FENCE;
105 dma_flags |= DMA_COMPL_SKIP_DEST_UNMAP;
106 submit->cb_fn = NULL;
107 submit->cb_param = NULL;
108 } else {
109 dma_flags &= ~DMA_COMPL_SKIP_DEST_UNMAP;
110 submit->cb_fn = cb_fn_orig;
111 submit->cb_param = cb_param_orig;
112 if (cb_fn_orig)
113 dma_flags |= DMA_PREP_INTERRUPT;
114 }
115 if (submit->flags & ASYNC_TX_FENCE)
116 dma_flags |= DMA_PREP_FENCE;
117
118 /* Since we have clobbered the src_list we are committed
119 * to doing this asynchronously. Drivers force forward
120 * progress in case they can not provide a descriptor
121 */
122 for (;;) {
123 tx = dma->device_prep_dma_pq(chan, dma_dest,
124 &dma_src[src_off],
125 pq_src_cnt,
126 &coefs[src_off], len,
127 dma_flags);
128 if (likely(tx))
129 break;
130 async_tx_quiesce(&submit->depend_tx);
131 dma_async_issue_pending(chan);
132 }
133
134 async_tx_submit(chan, tx, submit);
135 submit->depend_tx = tx;
136
137 /* drop completed sources */
138 src_cnt -= pq_src_cnt;
139 src_off += pq_src_cnt;
140
141 dma_flags |= DMA_PREP_CONTINUE;
142 }
143
144 return tx;
145}
146
147/**
148 * do_sync_gen_syndrome - synchronously calculate a raid6 syndrome
149 */
150static void
151do_sync_gen_syndrome(struct page **blocks, unsigned int offset, int disks,
152 size_t len, struct async_submit_ctl *submit)
153{
154 void **srcs;
155 int i;
156
157 if (submit->scribble)
158 srcs = submit->scribble;
159 else
160 srcs = (void **) blocks;
161
162 for (i = 0; i < disks; i++) {
163 if (is_raid6_zero_block(blocks[i])) {
164 BUG_ON(i > disks - 3); /* P or Q can't be zero */
165 srcs[i] = blocks[i];
166 } else
167 srcs[i] = page_address(blocks[i]) + offset;
168 }
169 raid6_call.gen_syndrome(disks, len, srcs);
170 async_tx_sync_epilog(submit);
171}
172
173/**
174 * async_gen_syndrome - asynchronously calculate a raid6 syndrome
175 * @blocks: source blocks from idx 0..disks-3, P @ disks-2 and Q @ disks-1
176 * @offset: common offset into each block (src and dest) to start transaction
177 * @disks: number of blocks (including missing P or Q, see below)
178 * @len: length of operation in bytes
179 * @submit: submission/completion modifiers
180 *
181 * General note: This routine assumes a field of GF(2^8) with a
182 * primitive polynomial of 0x11d and a generator of {02}.
183 *
184 * 'disks' note: callers can optionally omit either P or Q (but not
185 * both) from the calculation by setting blocks[disks-2] or
186 * blocks[disks-1] to NULL. When P or Q is omitted 'len' must be <=
187 * PAGE_SIZE as a temporary buffer of this size is used in the
188 * synchronous path. 'disks' always accounts for both destination
189 * buffers.
190 *
191 * 'blocks' note: if submit->scribble is NULL then the contents of
192 * 'blocks' may be overridden
193 */
194struct dma_async_tx_descriptor *
195async_gen_syndrome(struct page **blocks, unsigned int offset, int disks,
196 size_t len, struct async_submit_ctl *submit)
197{
198 int src_cnt = disks - 2;
199 struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ,
200 &P(blocks, disks), 2,
201 blocks, src_cnt, len);
202 struct dma_device *device = chan ? chan->device : NULL;
203 dma_addr_t *dma_src = NULL;
204
205 BUG_ON(disks > 255 || !(P(blocks, disks) || Q(blocks, disks)));
206
207 if (submit->scribble)
208 dma_src = submit->scribble;
209 else if (sizeof(dma_addr_t) <= sizeof(struct page *))
210 dma_src = (dma_addr_t *) blocks;
211
212 if (dma_src && device &&
213 (src_cnt <= dma_maxpq(device, 0) ||
214 dma_maxpq(device, DMA_PREP_CONTINUE) > 0) &&
215 is_dma_pq_aligned(device, offset, 0, len)) {
216 /* run the p+q asynchronously */
217 pr_debug("%s: (async) disks: %d len: %zu\n",
218 __func__, disks, len);
219 return do_async_gen_syndrome(chan, blocks, raid6_gfexp, offset,
220 disks, len, dma_src, submit);
221 }
222
223 /* run the pq synchronously */
224 pr_debug("%s: (sync) disks: %d len: %zu\n", __func__, disks, len);
225
226 /* wait for any prerequisite operations */
227 async_tx_quiesce(&submit->depend_tx);
228
229 if (!P(blocks, disks)) {
230 P(blocks, disks) = scribble;
231 BUG_ON(len + offset > PAGE_SIZE);
232 }
233 if (!Q(blocks, disks)) {
234 Q(blocks, disks) = scribble;
235 BUG_ON(len + offset > PAGE_SIZE);
236 }
237 do_sync_gen_syndrome(blocks, offset, disks, len, submit);
238
239 return NULL;
240}
241EXPORT_SYMBOL_GPL(async_gen_syndrome);
242
243/**
244 * async_syndrome_val - asynchronously validate a raid6 syndrome
245 * @blocks: source blocks from idx 0..disks-3, P @ disks-2 and Q @ disks-1
246 * @offset: common offset into each block (src and dest) to start transaction
247 * @disks: number of blocks (including missing P or Q, see below)
248 * @len: length of operation in bytes
249 * @pqres: on val failure SUM_CHECK_P_RESULT and/or SUM_CHECK_Q_RESULT are set
250 * @spare: temporary result buffer for the synchronous case
251 * @submit: submission / completion modifiers
252 *
253 * The same notes from async_gen_syndrome apply to the 'blocks',
254 * and 'disks' parameters of this routine. The synchronous path
255 * requires a temporary result buffer and submit->scribble to be
256 * specified.
257 */
258struct dma_async_tx_descriptor *
259async_syndrome_val(struct page **blocks, unsigned int offset, int disks,
260 size_t len, enum sum_check_flags *pqres, struct page *spare,
261 struct async_submit_ctl *submit)
262{
263 struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ_VAL,
264 NULL, 0, blocks, disks,
265 len);
266 struct dma_device *device = chan ? chan->device : NULL;
267 struct dma_async_tx_descriptor *tx;
268 enum dma_ctrl_flags dma_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0;
269 dma_addr_t *dma_src = NULL;
270
271 BUG_ON(disks < 4);
272
273 if (submit->scribble)
274 dma_src = submit->scribble;
275 else if (sizeof(dma_addr_t) <= sizeof(struct page *))
276 dma_src = (dma_addr_t *) blocks;
277
278 if (dma_src && device && disks <= dma_maxpq(device, 0) &&
279 is_dma_pq_aligned(device, offset, 0, len)) {
280 struct device *dev = device->dev;
281 dma_addr_t *pq = &dma_src[disks-2];
282 int i;
283
284 pr_debug("%s: (async) disks: %d len: %zu\n",
285 __func__, disks, len);
286 if (!P(blocks, disks))
287 dma_flags |= DMA_PREP_PQ_DISABLE_P;
288 if (!Q(blocks, disks))
289 dma_flags |= DMA_PREP_PQ_DISABLE_Q;
290 if (submit->flags & ASYNC_TX_FENCE)
291 dma_flags |= DMA_PREP_FENCE;
292 for (i = 0; i < disks; i++)
293 if (likely(blocks[i])) {
294 BUG_ON(is_raid6_zero_block(blocks[i]));
295 dma_src[i] = dma_map_page(dev, blocks[i],
296 offset, len,
297 DMA_TO_DEVICE);
298 }
299
300 for (;;) {
301 tx = device->device_prep_dma_pq_val(chan, pq, dma_src,
302 disks - 2,
303 raid6_gfexp,
304 len, pqres,
305 dma_flags);
306 if (likely(tx))
307 break;
308 async_tx_quiesce(&submit->depend_tx);
309 dma_async_issue_pending(chan);
310 }
311 async_tx_submit(chan, tx, submit);
312
313 return tx;
314 } else {
315 struct page *p_src = P(blocks, disks);
316 struct page *q_src = Q(blocks, disks);
317 enum async_tx_flags flags_orig = submit->flags;
318 dma_async_tx_callback cb_fn_orig = submit->cb_fn;
319 void *scribble = submit->scribble;
320 void *cb_param_orig = submit->cb_param;
321 void *p, *q, *s;
322
323 pr_debug("%s: (sync) disks: %d len: %zu\n",
324 __func__, disks, len);
325
326 /* caller must provide a temporary result buffer and
327 * allow the input parameters to be preserved
328 */
329 BUG_ON(!spare || !scribble);
330
331 /* wait for any prerequisite operations */
332 async_tx_quiesce(&submit->depend_tx);
333
334 /* recompute p and/or q into the temporary buffer and then
335 * check to see the result matches the current value
336 */
337 tx = NULL;
338 *pqres = 0;
339 if (p_src) {
340 init_async_submit(submit, ASYNC_TX_XOR_ZERO_DST, NULL,
341 NULL, NULL, scribble);
342 tx = async_xor(spare, blocks, offset, disks-2, len, submit);
343 async_tx_quiesce(&tx);
344 p = page_address(p_src) + offset;
345 s = page_address(spare) + offset;
346 *pqres |= !!memcmp(p, s, len) << SUM_CHECK_P;
347 }
348
349 if (q_src) {
350 P(blocks, disks) = NULL;
351 Q(blocks, disks) = spare;
352 init_async_submit(submit, 0, NULL, NULL, NULL, scribble);
353 tx = async_gen_syndrome(blocks, offset, disks, len, submit);
354 async_tx_quiesce(&tx);
355 q = page_address(q_src) + offset;
356 s = page_address(spare) + offset;
357 *pqres |= !!memcmp(q, s, len) << SUM_CHECK_Q;
358 }
359
360 /* restore P, Q and submit */
361 P(blocks, disks) = p_src;
362 Q(blocks, disks) = q_src;
363
364 submit->cb_fn = cb_fn_orig;
365 submit->cb_param = cb_param_orig;
366 submit->flags = flags_orig;
367 async_tx_sync_epilog(submit);
368
369 return NULL;
370 }
371}
372EXPORT_SYMBOL_GPL(async_syndrome_val);
373
374static int __init async_pq_init(void)
375{
376 scribble = alloc_page(GFP_KERNEL);
377
378 if (scribble)
379 return 0;
380
381 pr_err("%s: failed to allocate required spare page\n", __func__);
382
383 return -ENOMEM;
384}
385
386static void __exit async_pq_exit(void)
387{
388 put_page(scribble);
389}
390
391module_init(async_pq_init);
392module_exit(async_pq_exit);
393
394MODULE_DESCRIPTION("asynchronous raid6 syndrome generation/validation");
395MODULE_LICENSE("GPL");
diff --git a/crypto/async_tx/async_raid6_recov.c b/crypto/async_tx/async_raid6_recov.c
new file mode 100644
index 000000000000..822a42d10061
--- /dev/null
+++ b/crypto/async_tx/async_raid6_recov.c
@@ -0,0 +1,455 @@
1/*
2 * Asynchronous RAID-6 recovery calculations ASYNC_TX API.
3 * Copyright(c) 2009 Intel Corporation
4 *
5 * based on raid6recov.c:
6 * Copyright 2002 H. Peter Anvin
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms of the GNU General Public License as published by the Free
10 * Software Foundation; either version 2 of the License, or (at your option)
11 * any later version.
12 *
13 * This program is distributed in the hope that it will be useful, but WITHOUT
14 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
16 * more details.
17 *
18 * You should have received a copy of the GNU General Public License along with
19 * this program; if not, write to the Free Software Foundation, Inc., 51
20 * Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
21 *
22 */
23#include <linux/kernel.h>
24#include <linux/interrupt.h>
25#include <linux/dma-mapping.h>
26#include <linux/raid/pq.h>
27#include <linux/async_tx.h>
28
29static struct dma_async_tx_descriptor *
30async_sum_product(struct page *dest, struct page **srcs, unsigned char *coef,
31 size_t len, struct async_submit_ctl *submit)
32{
33 struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ,
34 &dest, 1, srcs, 2, len);
35 struct dma_device *dma = chan ? chan->device : NULL;
36 const u8 *amul, *bmul;
37 u8 ax, bx;
38 u8 *a, *b, *c;
39
40 if (dma) {
41 dma_addr_t dma_dest[2];
42 dma_addr_t dma_src[2];
43 struct device *dev = dma->dev;
44 struct dma_async_tx_descriptor *tx;
45 enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P;
46
47 if (submit->flags & ASYNC_TX_FENCE)
48 dma_flags |= DMA_PREP_FENCE;
49 dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL);
50 dma_src[0] = dma_map_page(dev, srcs[0], 0, len, DMA_TO_DEVICE);
51 dma_src[1] = dma_map_page(dev, srcs[1], 0, len, DMA_TO_DEVICE);
52 tx = dma->device_prep_dma_pq(chan, dma_dest, dma_src, 2, coef,
53 len, dma_flags);
54 if (tx) {
55 async_tx_submit(chan, tx, submit);
56 return tx;
57 }
58 }
59
60 /* run the operation synchronously */
61 async_tx_quiesce(&submit->depend_tx);
62 amul = raid6_gfmul[coef[0]];
63 bmul = raid6_gfmul[coef[1]];
64 a = page_address(srcs[0]);
65 b = page_address(srcs[1]);
66 c = page_address(dest);
67
68 while (len--) {
69 ax = amul[*a++];
70 bx = bmul[*b++];
71 *c++ = ax ^ bx;
72 }
73
74 return NULL;
75}
76
77static struct dma_async_tx_descriptor *
78async_mult(struct page *dest, struct page *src, u8 coef, size_t len,
79 struct async_submit_ctl *submit)
80{
81 struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ,
82 &dest, 1, &src, 1, len);
83 struct dma_device *dma = chan ? chan->device : NULL;
84 const u8 *qmul; /* Q multiplier table */
85 u8 *d, *s;
86
87 if (dma) {
88 dma_addr_t dma_dest[2];
89 dma_addr_t dma_src[1];
90 struct device *dev = dma->dev;
91 struct dma_async_tx_descriptor *tx;
92 enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P;
93
94 if (submit->flags & ASYNC_TX_FENCE)
95 dma_flags |= DMA_PREP_FENCE;
96 dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL);
97 dma_src[0] = dma_map_page(dev, src, 0, len, DMA_TO_DEVICE);
98 tx = dma->device_prep_dma_pq(chan, dma_dest, dma_src, 1, &coef,
99 len, dma_flags);
100 if (tx) {
101 async_tx_submit(chan, tx, submit);
102 return tx;
103 }
104 }
105
106 /* no channel available, or failed to allocate a descriptor, so
107 * perform the operation synchronously
108 */
109 async_tx_quiesce(&submit->depend_tx);
110 qmul = raid6_gfmul[coef];
111 d = page_address(dest);
112 s = page_address(src);
113
114 while (len--)
115 *d++ = qmul[*s++];
116
117 return NULL;
118}
119
120static struct dma_async_tx_descriptor *
121__2data_recov_4(size_t bytes, int faila, int failb, struct page **blocks,
122 struct async_submit_ctl *submit)
123{
124 struct dma_async_tx_descriptor *tx = NULL;
125 struct page *p, *q, *a, *b;
126 struct page *srcs[2];
127 unsigned char coef[2];
128 enum async_tx_flags flags = submit->flags;
129 dma_async_tx_callback cb_fn = submit->cb_fn;
130 void *cb_param = submit->cb_param;
131 void *scribble = submit->scribble;
132
133 p = blocks[4-2];
134 q = blocks[4-1];
135
136 a = blocks[faila];
137 b = blocks[failb];
138
139 /* in the 4 disk case P + Pxy == P and Q + Qxy == Q */
140 /* Dx = A*(P+Pxy) + B*(Q+Qxy) */
141 srcs[0] = p;
142 srcs[1] = q;
143 coef[0] = raid6_gfexi[failb-faila];
144 coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]];
145 init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
146 tx = async_sum_product(b, srcs, coef, bytes, submit);
147
148 /* Dy = P+Pxy+Dx */
149 srcs[0] = p;
150 srcs[1] = b;
151 init_async_submit(submit, flags | ASYNC_TX_XOR_ZERO_DST, tx, cb_fn,
152 cb_param, scribble);
153 tx = async_xor(a, srcs, 0, 2, bytes, submit);
154
155 return tx;
156
157}
158
159static struct dma_async_tx_descriptor *
160__2data_recov_5(size_t bytes, int faila, int failb, struct page **blocks,
161 struct async_submit_ctl *submit)
162{
163 struct dma_async_tx_descriptor *tx = NULL;
164 struct page *p, *q, *g, *dp, *dq;
165 struct page *srcs[2];
166 unsigned char coef[2];
167 enum async_tx_flags flags = submit->flags;
168 dma_async_tx_callback cb_fn = submit->cb_fn;
169 void *cb_param = submit->cb_param;
170 void *scribble = submit->scribble;
171 int uninitialized_var(good);
172 int i;
173
174 for (i = 0; i < 3; i++) {
175 if (i == faila || i == failb)
176 continue;
177 else {
178 good = i;
179 break;
180 }
181 }
182 BUG_ON(i >= 3);
183
184 p = blocks[5-2];
185 q = blocks[5-1];
186 g = blocks[good];
187
188 /* Compute syndrome with zero for the missing data pages
189 * Use the dead data pages as temporary storage for delta p and
190 * delta q
191 */
192 dp = blocks[faila];
193 dq = blocks[failb];
194
195 init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
196 tx = async_memcpy(dp, g, 0, 0, bytes, submit);
197 init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
198 tx = async_mult(dq, g, raid6_gfexp[good], bytes, submit);
199
200 /* compute P + Pxy */
201 srcs[0] = dp;
202 srcs[1] = p;
203 init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
204 NULL, NULL, scribble);
205 tx = async_xor(dp, srcs, 0, 2, bytes, submit);
206
207 /* compute Q + Qxy */
208 srcs[0] = dq;
209 srcs[1] = q;
210 init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
211 NULL, NULL, scribble);
212 tx = async_xor(dq, srcs, 0, 2, bytes, submit);
213
214 /* Dx = A*(P+Pxy) + B*(Q+Qxy) */
215 srcs[0] = dp;
216 srcs[1] = dq;
217 coef[0] = raid6_gfexi[failb-faila];
218 coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]];
219 init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
220 tx = async_sum_product(dq, srcs, coef, bytes, submit);
221
222 /* Dy = P+Pxy+Dx */
223 srcs[0] = dp;
224 srcs[1] = dq;
225 init_async_submit(submit, flags | ASYNC_TX_XOR_DROP_DST, tx, cb_fn,
226 cb_param, scribble);
227 tx = async_xor(dp, srcs, 0, 2, bytes, submit);
228
229 return tx;
230}
231
232static struct dma_async_tx_descriptor *
233__2data_recov_n(int disks, size_t bytes, int faila, int failb,
234 struct page **blocks, struct async_submit_ctl *submit)
235{
236 struct dma_async_tx_descriptor *tx = NULL;
237 struct page *p, *q, *dp, *dq;
238 struct page *srcs[2];
239 unsigned char coef[2];
240 enum async_tx_flags flags = submit->flags;
241 dma_async_tx_callback cb_fn = submit->cb_fn;
242 void *cb_param = submit->cb_param;
243 void *scribble = submit->scribble;
244
245 p = blocks[disks-2];
246 q = blocks[disks-1];
247
248 /* Compute syndrome with zero for the missing data pages
249 * Use the dead data pages as temporary storage for
250 * delta p and delta q
251 */
252 dp = blocks[faila];
253 blocks[faila] = (void *)raid6_empty_zero_page;
254 blocks[disks-2] = dp;
255 dq = blocks[failb];
256 blocks[failb] = (void *)raid6_empty_zero_page;
257 blocks[disks-1] = dq;
258
259 init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
260 tx = async_gen_syndrome(blocks, 0, disks, bytes, submit);
261
262 /* Restore pointer table */
263 blocks[faila] = dp;
264 blocks[failb] = dq;
265 blocks[disks-2] = p;
266 blocks[disks-1] = q;
267
268 /* compute P + Pxy */
269 srcs[0] = dp;
270 srcs[1] = p;
271 init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
272 NULL, NULL, scribble);
273 tx = async_xor(dp, srcs, 0, 2, bytes, submit);
274
275 /* compute Q + Qxy */
276 srcs[0] = dq;
277 srcs[1] = q;
278 init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
279 NULL, NULL, scribble);
280 tx = async_xor(dq, srcs, 0, 2, bytes, submit);
281
282 /* Dx = A*(P+Pxy) + B*(Q+Qxy) */
283 srcs[0] = dp;
284 srcs[1] = dq;
285 coef[0] = raid6_gfexi[failb-faila];
286 coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]];
287 init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
288 tx = async_sum_product(dq, srcs, coef, bytes, submit);
289
290 /* Dy = P+Pxy+Dx */
291 srcs[0] = dp;
292 srcs[1] = dq;
293 init_async_submit(submit, flags | ASYNC_TX_XOR_DROP_DST, tx, cb_fn,
294 cb_param, scribble);
295 tx = async_xor(dp, srcs, 0, 2, bytes, submit);
296
297 return tx;
298}
299
300/**
301 * async_raid6_2data_recov - asynchronously calculate two missing data blocks
302 * @disks: number of disks in the RAID-6 array
303 * @bytes: block size
304 * @faila: first failed drive index
305 * @failb: second failed drive index
306 * @blocks: array of source pointers where the last two entries are p and q
307 * @submit: submission/completion modifiers
308 */
309struct dma_async_tx_descriptor *
310async_raid6_2data_recov(int disks, size_t bytes, int faila, int failb,
311 struct page **blocks, struct async_submit_ctl *submit)
312{
313 BUG_ON(faila == failb);
314 if (failb < faila)
315 swap(faila, failb);
316
317 pr_debug("%s: disks: %d len: %zu\n", __func__, disks, bytes);
318
319 /* we need to preserve the contents of 'blocks' for the async
320 * case, so punt to synchronous if a scribble buffer is not available
321 */
322 if (!submit->scribble) {
323 void **ptrs = (void **) blocks;
324 int i;
325
326 async_tx_quiesce(&submit->depend_tx);
327 for (i = 0; i < disks; i++)
328 ptrs[i] = page_address(blocks[i]);
329
330 raid6_2data_recov(disks, bytes, faila, failb, ptrs);
331
332 async_tx_sync_epilog(submit);
333
334 return NULL;
335 }
336
337 switch (disks) {
338 case 4:
339 /* dma devices do not uniformly understand a zero source pq
340 * operation (in contrast to the synchronous case), so
341 * explicitly handle the 4 disk special case
342 */
343 return __2data_recov_4(bytes, faila, failb, blocks, submit);
344 case 5:
345 /* dma devices do not uniformly understand a single
346 * source pq operation (in contrast to the synchronous
347 * case), so explicitly handle the 5 disk special case
348 */
349 return __2data_recov_5(bytes, faila, failb, blocks, submit);
350 default:
351 return __2data_recov_n(disks, bytes, faila, failb, blocks, submit);
352 }
353}
354EXPORT_SYMBOL_GPL(async_raid6_2data_recov);
355
356/**
357 * async_raid6_datap_recov - asynchronously calculate a data and the 'p' block
358 * @disks: number of disks in the RAID-6 array
359 * @bytes: block size
360 * @faila: failed drive index
361 * @blocks: array of source pointers where the last two entries are p and q
362 * @submit: submission/completion modifiers
363 */
364struct dma_async_tx_descriptor *
365async_raid6_datap_recov(int disks, size_t bytes, int faila,
366 struct page **blocks, struct async_submit_ctl *submit)
367{
368 struct dma_async_tx_descriptor *tx = NULL;
369 struct page *p, *q, *dq;
370 u8 coef;
371 enum async_tx_flags flags = submit->flags;
372 dma_async_tx_callback cb_fn = submit->cb_fn;
373 void *cb_param = submit->cb_param;
374 void *scribble = submit->scribble;
375 struct page *srcs[2];
376
377 pr_debug("%s: disks: %d len: %zu\n", __func__, disks, bytes);
378
379 /* we need to preserve the contents of 'blocks' for the async
380 * case, so punt to synchronous if a scribble buffer is not available
381 */
382 if (!scribble) {
383 void **ptrs = (void **) blocks;
384 int i;
385
386 async_tx_quiesce(&submit->depend_tx);
387 for (i = 0; i < disks; i++)
388 ptrs[i] = page_address(blocks[i]);
389
390 raid6_datap_recov(disks, bytes, faila, ptrs);
391
392 async_tx_sync_epilog(submit);
393
394 return NULL;
395 }
396
397 p = blocks[disks-2];
398 q = blocks[disks-1];
399
400 /* Compute syndrome with zero for the missing data page
401 * Use the dead data page as temporary storage for delta q
402 */
403 dq = blocks[faila];
404 blocks[faila] = (void *)raid6_empty_zero_page;
405 blocks[disks-1] = dq;
406
407 /* in the 4 disk case we only need to perform a single source
408 * multiplication
409 */
410 if (disks == 4) {
411 int good = faila == 0 ? 1 : 0;
412 struct page *g = blocks[good];
413
414 init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL,
415 scribble);
416 tx = async_memcpy(p, g, 0, 0, bytes, submit);
417
418 init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL,
419 scribble);
420 tx = async_mult(dq, g, raid6_gfexp[good], bytes, submit);
421 } else {
422 init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL,
423 scribble);
424 tx = async_gen_syndrome(blocks, 0, disks, bytes, submit);
425 }
426
427 /* Restore pointer table */
428 blocks[faila] = dq;
429 blocks[disks-1] = q;
430
431 /* calculate g^{-faila} */
432 coef = raid6_gfinv[raid6_gfexp[faila]];
433
434 srcs[0] = dq;
435 srcs[1] = q;
436 init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
437 NULL, NULL, scribble);
438 tx = async_xor(dq, srcs, 0, 2, bytes, submit);
439
440 init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
441 tx = async_mult(dq, dq, coef, bytes, submit);
442
443 srcs[0] = p;
444 srcs[1] = dq;
445 init_async_submit(submit, flags | ASYNC_TX_XOR_DROP_DST, tx, cb_fn,
446 cb_param, scribble);
447 tx = async_xor(p, srcs, 0, 2, bytes, submit);
448
449 return tx;
450}
451EXPORT_SYMBOL_GPL(async_raid6_datap_recov);
452
453MODULE_AUTHOR("Dan Williams <dan.j.williams@intel.com>");
454MODULE_DESCRIPTION("asynchronous RAID-6 recovery api");
455MODULE_LICENSE("GPL");
diff --git a/crypto/async_tx/async_tx.c b/crypto/async_tx/async_tx.c
index 06eb6cc09fef..f9cdf04fe7c0 100644
--- a/crypto/async_tx/async_tx.c
+++ b/crypto/async_tx/async_tx.c
@@ -42,16 +42,21 @@ static void __exit async_tx_exit(void)
42 async_dmaengine_put(); 42 async_dmaengine_put();
43} 43}
44 44
45module_init(async_tx_init);
46module_exit(async_tx_exit);
47
45/** 48/**
46 * __async_tx_find_channel - find a channel to carry out the operation or let 49 * __async_tx_find_channel - find a channel to carry out the operation or let
47 * the transaction execute synchronously 50 * the transaction execute synchronously
48 * @depend_tx: transaction dependency 51 * @submit: transaction dependency and submission modifiers
49 * @tx_type: transaction type 52 * @tx_type: transaction type
50 */ 53 */
51struct dma_chan * 54struct dma_chan *
52__async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx, 55__async_tx_find_channel(struct async_submit_ctl *submit,
53 enum dma_transaction_type tx_type) 56 enum dma_transaction_type tx_type)
54{ 57{
58 struct dma_async_tx_descriptor *depend_tx = submit->depend_tx;
59
55 /* see if we can keep the chain on one channel */ 60 /* see if we can keep the chain on one channel */
56 if (depend_tx && 61 if (depend_tx &&
57 dma_has_cap(tx_type, depend_tx->chan->device->cap_mask)) 62 dma_has_cap(tx_type, depend_tx->chan->device->cap_mask))
@@ -59,17 +64,6 @@ __async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx,
59 return async_dma_find_channel(tx_type); 64 return async_dma_find_channel(tx_type);
60} 65}
61EXPORT_SYMBOL_GPL(__async_tx_find_channel); 66EXPORT_SYMBOL_GPL(__async_tx_find_channel);
62#else
63static int __init async_tx_init(void)
64{
65 printk(KERN_INFO "async_tx: api initialized (sync-only)\n");
66 return 0;
67}
68
69static void __exit async_tx_exit(void)
70{
71 do { } while (0);
72}
73#endif 67#endif
74 68
75 69
@@ -83,10 +77,14 @@ static void
83async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx, 77async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx,
84 struct dma_async_tx_descriptor *tx) 78 struct dma_async_tx_descriptor *tx)
85{ 79{
86 struct dma_chan *chan; 80 struct dma_chan *chan = depend_tx->chan;
87 struct dma_device *device; 81 struct dma_device *device = chan->device;
88 struct dma_async_tx_descriptor *intr_tx = (void *) ~0; 82 struct dma_async_tx_descriptor *intr_tx = (void *) ~0;
89 83
84 #ifdef CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH
85 BUG();
86 #endif
87
90 /* first check to see if we can still append to depend_tx */ 88 /* first check to see if we can still append to depend_tx */
91 spin_lock_bh(&depend_tx->lock); 89 spin_lock_bh(&depend_tx->lock);
92 if (depend_tx->parent && depend_tx->chan == tx->chan) { 90 if (depend_tx->parent && depend_tx->chan == tx->chan) {
@@ -96,11 +94,11 @@ async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx,
96 } 94 }
97 spin_unlock_bh(&depend_tx->lock); 95 spin_unlock_bh(&depend_tx->lock);
98 96
99 if (!intr_tx) 97 /* attached dependency, flush the parent channel */
98 if (!intr_tx) {
99 device->device_issue_pending(chan);
100 return; 100 return;
101 101 }
102 chan = depend_tx->chan;
103 device = chan->device;
104 102
105 /* see if we can schedule an interrupt 103 /* see if we can schedule an interrupt
106 * otherwise poll for completion 104 * otherwise poll for completion
@@ -134,6 +132,7 @@ async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx,
134 intr_tx->tx_submit(intr_tx); 132 intr_tx->tx_submit(intr_tx);
135 async_tx_ack(intr_tx); 133 async_tx_ack(intr_tx);
136 } 134 }
135 device->device_issue_pending(chan);
137 } else { 136 } else {
138 if (dma_wait_for_async_tx(depend_tx) == DMA_ERROR) 137 if (dma_wait_for_async_tx(depend_tx) == DMA_ERROR)
139 panic("%s: DMA_ERROR waiting for depend_tx\n", 138 panic("%s: DMA_ERROR waiting for depend_tx\n",
@@ -144,13 +143,14 @@ async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx,
144 143
145 144
146/** 145/**
147 * submit_disposition - while holding depend_tx->lock we must avoid submitting 146 * submit_disposition - flags for routing an incoming operation
148 * new operations to prevent a circular locking dependency with
149 * drivers that already hold a channel lock when calling
150 * async_tx_run_dependencies.
151 * @ASYNC_TX_SUBMITTED: we were able to append the new operation under the lock 147 * @ASYNC_TX_SUBMITTED: we were able to append the new operation under the lock
152 * @ASYNC_TX_CHANNEL_SWITCH: when the lock is dropped schedule a channel switch 148 * @ASYNC_TX_CHANNEL_SWITCH: when the lock is dropped schedule a channel switch
153 * @ASYNC_TX_DIRECT_SUBMIT: when the lock is dropped submit directly 149 * @ASYNC_TX_DIRECT_SUBMIT: when the lock is dropped submit directly
150 *
151 * while holding depend_tx->lock we must avoid submitting new operations
152 * to prevent a circular locking dependency with drivers that already
153 * hold a channel lock when calling async_tx_run_dependencies.
154 */ 154 */
155enum submit_disposition { 155enum submit_disposition {
156 ASYNC_TX_SUBMITTED, 156 ASYNC_TX_SUBMITTED,
@@ -160,11 +160,12 @@ enum submit_disposition {
160 160
161void 161void
162async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx, 162async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx,
163 enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx, 163 struct async_submit_ctl *submit)
164 dma_async_tx_callback cb_fn, void *cb_param)
165{ 164{
166 tx->callback = cb_fn; 165 struct dma_async_tx_descriptor *depend_tx = submit->depend_tx;
167 tx->callback_param = cb_param; 166
167 tx->callback = submit->cb_fn;
168 tx->callback_param = submit->cb_param;
168 169
169 if (depend_tx) { 170 if (depend_tx) {
170 enum submit_disposition s; 171 enum submit_disposition s;
@@ -220,30 +221,29 @@ async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx,
220 tx->tx_submit(tx); 221 tx->tx_submit(tx);
221 } 222 }
222 223
223 if (flags & ASYNC_TX_ACK) 224 if (submit->flags & ASYNC_TX_ACK)
224 async_tx_ack(tx); 225 async_tx_ack(tx);
225 226
226 if (depend_tx && (flags & ASYNC_TX_DEP_ACK)) 227 if (depend_tx)
227 async_tx_ack(depend_tx); 228 async_tx_ack(depend_tx);
228} 229}
229EXPORT_SYMBOL_GPL(async_tx_submit); 230EXPORT_SYMBOL_GPL(async_tx_submit);
230 231
231/** 232/**
232 * async_trigger_callback - schedules the callback function to be run after 233 * async_trigger_callback - schedules the callback function to be run
233 * any dependent operations have been completed. 234 * @submit: submission and completion parameters
234 * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK 235 *
235 * @depend_tx: 'callback' requires the completion of this transaction 236 * honored flags: ASYNC_TX_ACK
236 * @cb_fn: function to call after depend_tx completes 237 *
237 * @cb_param: parameter to pass to the callback routine 238 * The callback is run after any dependent operations have completed.
238 */ 239 */
239struct dma_async_tx_descriptor * 240struct dma_async_tx_descriptor *
240async_trigger_callback(enum async_tx_flags flags, 241async_trigger_callback(struct async_submit_ctl *submit)
241 struct dma_async_tx_descriptor *depend_tx,
242 dma_async_tx_callback cb_fn, void *cb_param)
243{ 242{
244 struct dma_chan *chan; 243 struct dma_chan *chan;
245 struct dma_device *device; 244 struct dma_device *device;
246 struct dma_async_tx_descriptor *tx; 245 struct dma_async_tx_descriptor *tx;
246 struct dma_async_tx_descriptor *depend_tx = submit->depend_tx;
247 247
248 if (depend_tx) { 248 if (depend_tx) {
249 chan = depend_tx->chan; 249 chan = depend_tx->chan;
@@ -262,14 +262,14 @@ async_trigger_callback(enum async_tx_flags flags,
262 if (tx) { 262 if (tx) {
263 pr_debug("%s: (async)\n", __func__); 263 pr_debug("%s: (async)\n", __func__);
264 264
265 async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param); 265 async_tx_submit(chan, tx, submit);
266 } else { 266 } else {
267 pr_debug("%s: (sync)\n", __func__); 267 pr_debug("%s: (sync)\n", __func__);
268 268
269 /* wait for any prerequisite operations */ 269 /* wait for any prerequisite operations */
270 async_tx_quiesce(&depend_tx); 270 async_tx_quiesce(&submit->depend_tx);
271 271
272 async_tx_sync_epilog(cb_fn, cb_param); 272 async_tx_sync_epilog(submit);
273 } 273 }
274 274
275 return tx; 275 return tx;
@@ -295,9 +295,6 @@ void async_tx_quiesce(struct dma_async_tx_descriptor **tx)
295} 295}
296EXPORT_SYMBOL_GPL(async_tx_quiesce); 296EXPORT_SYMBOL_GPL(async_tx_quiesce);
297 297
298module_init(async_tx_init);
299module_exit(async_tx_exit);
300
301MODULE_AUTHOR("Intel Corporation"); 298MODULE_AUTHOR("Intel Corporation");
302MODULE_DESCRIPTION("Asynchronous Bulk Memory Transactions API"); 299MODULE_DESCRIPTION("Asynchronous Bulk Memory Transactions API");
303MODULE_LICENSE("GPL"); 300MODULE_LICENSE("GPL");
diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c
index 90dd3f8bd283..b459a9034aac 100644
--- a/crypto/async_tx/async_xor.c
+++ b/crypto/async_tx/async_xor.c
@@ -33,19 +33,16 @@
33/* do_async_xor - dma map the pages and perform the xor with an engine */ 33/* do_async_xor - dma map the pages and perform the xor with an engine */
34static __async_inline struct dma_async_tx_descriptor * 34static __async_inline struct dma_async_tx_descriptor *
35do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list, 35do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
36 unsigned int offset, int src_cnt, size_t len, 36 unsigned int offset, int src_cnt, size_t len, dma_addr_t *dma_src,
37 enum async_tx_flags flags, 37 struct async_submit_ctl *submit)
38 struct dma_async_tx_descriptor *depend_tx,
39 dma_async_tx_callback cb_fn, void *cb_param)
40{ 38{
41 struct dma_device *dma = chan->device; 39 struct dma_device *dma = chan->device;
42 dma_addr_t *dma_src = (dma_addr_t *) src_list;
43 struct dma_async_tx_descriptor *tx = NULL; 40 struct dma_async_tx_descriptor *tx = NULL;
44 int src_off = 0; 41 int src_off = 0;
45 int i; 42 int i;
46 dma_async_tx_callback _cb_fn; 43 dma_async_tx_callback cb_fn_orig = submit->cb_fn;
47 void *_cb_param; 44 void *cb_param_orig = submit->cb_param;
48 enum async_tx_flags async_flags; 45 enum async_tx_flags flags_orig = submit->flags;
49 enum dma_ctrl_flags dma_flags; 46 enum dma_ctrl_flags dma_flags;
50 int xor_src_cnt; 47 int xor_src_cnt;
51 dma_addr_t dma_dest; 48 dma_addr_t dma_dest;
@@ -63,25 +60,27 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
63 } 60 }
64 61
65 while (src_cnt) { 62 while (src_cnt) {
66 async_flags = flags; 63 submit->flags = flags_orig;
67 dma_flags = 0; 64 dma_flags = 0;
68 xor_src_cnt = min(src_cnt, dma->max_xor); 65 xor_src_cnt = min(src_cnt, (int)dma->max_xor);
69 /* if we are submitting additional xors, leave the chain open, 66 /* if we are submitting additional xors, leave the chain open,
70 * clear the callback parameters, and leave the destination 67 * clear the callback parameters, and leave the destination
71 * buffer mapped 68 * buffer mapped
72 */ 69 */
73 if (src_cnt > xor_src_cnt) { 70 if (src_cnt > xor_src_cnt) {
74 async_flags &= ~ASYNC_TX_ACK; 71 submit->flags &= ~ASYNC_TX_ACK;
72 submit->flags |= ASYNC_TX_FENCE;
75 dma_flags = DMA_COMPL_SKIP_DEST_UNMAP; 73 dma_flags = DMA_COMPL_SKIP_DEST_UNMAP;
76 _cb_fn = NULL; 74 submit->cb_fn = NULL;
77 _cb_param = NULL; 75 submit->cb_param = NULL;
78 } else { 76 } else {
79 _cb_fn = cb_fn; 77 submit->cb_fn = cb_fn_orig;
80 _cb_param = cb_param; 78 submit->cb_param = cb_param_orig;
81 } 79 }
82 if (_cb_fn) 80 if (submit->cb_fn)
83 dma_flags |= DMA_PREP_INTERRUPT; 81 dma_flags |= DMA_PREP_INTERRUPT;
84 82 if (submit->flags & ASYNC_TX_FENCE)
83 dma_flags |= DMA_PREP_FENCE;
85 /* Since we have clobbered the src_list we are committed 84 /* Since we have clobbered the src_list we are committed
86 * to doing this asynchronously. Drivers force forward progress 85 * to doing this asynchronously. Drivers force forward progress
87 * in case they can not provide a descriptor 86 * in case they can not provide a descriptor
@@ -90,7 +89,7 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
90 xor_src_cnt, len, dma_flags); 89 xor_src_cnt, len, dma_flags);
91 90
92 if (unlikely(!tx)) 91 if (unlikely(!tx))
93 async_tx_quiesce(&depend_tx); 92 async_tx_quiesce(&submit->depend_tx);
94 93
95 /* spin wait for the preceeding transactions to complete */ 94 /* spin wait for the preceeding transactions to complete */
96 while (unlikely(!tx)) { 95 while (unlikely(!tx)) {
@@ -101,11 +100,8 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
101 dma_flags); 100 dma_flags);
102 } 101 }
103 102
104 async_tx_submit(chan, tx, async_flags, depend_tx, _cb_fn, 103 async_tx_submit(chan, tx, submit);
105 _cb_param); 104 submit->depend_tx = tx;
106
107 depend_tx = tx;
108 flags |= ASYNC_TX_DEP_ACK;
109 105
110 if (src_cnt > xor_src_cnt) { 106 if (src_cnt > xor_src_cnt) {
111 /* drop completed sources */ 107 /* drop completed sources */
@@ -124,23 +120,27 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
124 120
125static void 121static void
126do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset, 122do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset,
127 int src_cnt, size_t len, enum async_tx_flags flags, 123 int src_cnt, size_t len, struct async_submit_ctl *submit)
128 dma_async_tx_callback cb_fn, void *cb_param)
129{ 124{
130 int i; 125 int i;
131 int xor_src_cnt; 126 int xor_src_cnt;
132 int src_off = 0; 127 int src_off = 0;
133 void *dest_buf; 128 void *dest_buf;
134 void **srcs = (void **) src_list; 129 void **srcs;
130
131 if (submit->scribble)
132 srcs = submit->scribble;
133 else
134 srcs = (void **) src_list;
135 135
136 /* reuse the 'src_list' array to convert to buffer pointers */ 136 /* convert to buffer pointers */
137 for (i = 0; i < src_cnt; i++) 137 for (i = 0; i < src_cnt; i++)
138 srcs[i] = page_address(src_list[i]) + offset; 138 srcs[i] = page_address(src_list[i]) + offset;
139 139
140 /* set destination address */ 140 /* set destination address */
141 dest_buf = page_address(dest) + offset; 141 dest_buf = page_address(dest) + offset;
142 142
143 if (flags & ASYNC_TX_XOR_ZERO_DST) 143 if (submit->flags & ASYNC_TX_XOR_ZERO_DST)
144 memset(dest_buf, 0, len); 144 memset(dest_buf, 0, len);
145 145
146 while (src_cnt > 0) { 146 while (src_cnt > 0) {
@@ -153,61 +153,70 @@ do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset,
153 src_off += xor_src_cnt; 153 src_off += xor_src_cnt;
154 } 154 }
155 155
156 async_tx_sync_epilog(cb_fn, cb_param); 156 async_tx_sync_epilog(submit);
157} 157}
158 158
159/** 159/**
160 * async_xor - attempt to xor a set of blocks with a dma engine. 160 * async_xor - attempt to xor a set of blocks with a dma engine.
161 * xor_blocks always uses the dest as a source so the ASYNC_TX_XOR_ZERO_DST
162 * flag must be set to not include dest data in the calculation. The
163 * assumption with dma eninges is that they only use the destination
164 * buffer as a source when it is explicity specified in the source list.
165 * @dest: destination page 161 * @dest: destination page
166 * @src_list: array of source pages (if the dest is also a source it must be 162 * @src_list: array of source pages
167 * at index zero). The contents of this array may be overwritten. 163 * @offset: common src/dst offset to start transaction
168 * @offset: offset in pages to start transaction
169 * @src_cnt: number of source pages 164 * @src_cnt: number of source pages
170 * @len: length in bytes 165 * @len: length in bytes
171 * @flags: ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_XOR_DROP_DEST, 166 * @submit: submission / completion modifiers
172 * ASYNC_TX_ACK, ASYNC_TX_DEP_ACK 167 *
173 * @depend_tx: xor depends on the result of this transaction. 168 * honored flags: ASYNC_TX_ACK, ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_XOR_DROP_DST
174 * @cb_fn: function to call when the xor completes 169 *
175 * @cb_param: parameter to pass to the callback routine 170 * xor_blocks always uses the dest as a source so the
171 * ASYNC_TX_XOR_ZERO_DST flag must be set to not include dest data in
172 * the calculation. The assumption with dma eninges is that they only
173 * use the destination buffer as a source when it is explicity specified
174 * in the source list.
175 *
176 * src_list note: if the dest is also a source it must be at index zero.
177 * The contents of this array will be overwritten if a scribble region
178 * is not specified.
176 */ 179 */
177struct dma_async_tx_descriptor * 180struct dma_async_tx_descriptor *
178async_xor(struct page *dest, struct page **src_list, unsigned int offset, 181async_xor(struct page *dest, struct page **src_list, unsigned int offset,
179 int src_cnt, size_t len, enum async_tx_flags flags, 182 int src_cnt, size_t len, struct async_submit_ctl *submit)
180 struct dma_async_tx_descriptor *depend_tx,
181 dma_async_tx_callback cb_fn, void *cb_param)
182{ 183{
183 struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_XOR, 184 struct dma_chan *chan = async_tx_find_channel(submit, DMA_XOR,
184 &dest, 1, src_list, 185 &dest, 1, src_list,
185 src_cnt, len); 186 src_cnt, len);
187 dma_addr_t *dma_src = NULL;
188
186 BUG_ON(src_cnt <= 1); 189 BUG_ON(src_cnt <= 1);
187 190
188 if (chan) { 191 if (submit->scribble)
192 dma_src = submit->scribble;
193 else if (sizeof(dma_addr_t) <= sizeof(struct page *))
194 dma_src = (dma_addr_t *) src_list;
195
196 if (dma_src && chan && is_dma_xor_aligned(chan->device, offset, 0, len)) {
189 /* run the xor asynchronously */ 197 /* run the xor asynchronously */
190 pr_debug("%s (async): len: %zu\n", __func__, len); 198 pr_debug("%s (async): len: %zu\n", __func__, len);
191 199
192 return do_async_xor(chan, dest, src_list, offset, src_cnt, len, 200 return do_async_xor(chan, dest, src_list, offset, src_cnt, len,
193 flags, depend_tx, cb_fn, cb_param); 201 dma_src, submit);
194 } else { 202 } else {
195 /* run the xor synchronously */ 203 /* run the xor synchronously */
196 pr_debug("%s (sync): len: %zu\n", __func__, len); 204 pr_debug("%s (sync): len: %zu\n", __func__, len);
205 WARN_ONCE(chan, "%s: no space for dma address conversion\n",
206 __func__);
197 207
198 /* in the sync case the dest is an implied source 208 /* in the sync case the dest is an implied source
199 * (assumes the dest is the first source) 209 * (assumes the dest is the first source)
200 */ 210 */
201 if (flags & ASYNC_TX_XOR_DROP_DST) { 211 if (submit->flags & ASYNC_TX_XOR_DROP_DST) {
202 src_cnt--; 212 src_cnt--;
203 src_list++; 213 src_list++;
204 } 214 }
205 215
206 /* wait for any prerequisite operations */ 216 /* wait for any prerequisite operations */
207 async_tx_quiesce(&depend_tx); 217 async_tx_quiesce(&submit->depend_tx);
208 218
209 do_sync_xor(dest, src_list, offset, src_cnt, len, 219 do_sync_xor(dest, src_list, offset, src_cnt, len, submit);
210 flags, cb_fn, cb_param);
211 220
212 return NULL; 221 return NULL;
213 } 222 }
@@ -222,104 +231,94 @@ static int page_is_zero(struct page *p, unsigned int offset, size_t len)
222} 231}
223 232
224/** 233/**
225 * async_xor_zero_sum - attempt a xor parity check with a dma engine. 234 * async_xor_val - attempt a xor parity check with a dma engine.
226 * @dest: destination page used if the xor is performed synchronously 235 * @dest: destination page used if the xor is performed synchronously
227 * @src_list: array of source pages. The dest page must be listed as a source 236 * @src_list: array of source pages
228 * at index zero. The contents of this array may be overwritten.
229 * @offset: offset in pages to start transaction 237 * @offset: offset in pages to start transaction
230 * @src_cnt: number of source pages 238 * @src_cnt: number of source pages
231 * @len: length in bytes 239 * @len: length in bytes
232 * @result: 0 if sum == 0 else non-zero 240 * @result: 0 if sum == 0 else non-zero
233 * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK 241 * @submit: submission / completion modifiers
234 * @depend_tx: xor depends on the result of this transaction. 242 *
235 * @cb_fn: function to call when the xor completes 243 * honored flags: ASYNC_TX_ACK
236 * @cb_param: parameter to pass to the callback routine 244 *
245 * src_list note: if the dest is also a source it must be at index zero.
246 * The contents of this array will be overwritten if a scribble region
247 * is not specified.
237 */ 248 */
238struct dma_async_tx_descriptor * 249struct dma_async_tx_descriptor *
239async_xor_zero_sum(struct page *dest, struct page **src_list, 250async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,
240 unsigned int offset, int src_cnt, size_t len, 251 int src_cnt, size_t len, enum sum_check_flags *result,
241 u32 *result, enum async_tx_flags flags, 252 struct async_submit_ctl *submit)
242 struct dma_async_tx_descriptor *depend_tx,
243 dma_async_tx_callback cb_fn, void *cb_param)
244{ 253{
245 struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_ZERO_SUM, 254 struct dma_chan *chan = async_tx_find_channel(submit, DMA_XOR_VAL,
246 &dest, 1, src_list, 255 &dest, 1, src_list,
247 src_cnt, len); 256 src_cnt, len);
248 struct dma_device *device = chan ? chan->device : NULL; 257 struct dma_device *device = chan ? chan->device : NULL;
249 struct dma_async_tx_descriptor *tx = NULL; 258 struct dma_async_tx_descriptor *tx = NULL;
259 dma_addr_t *dma_src = NULL;
250 260
251 BUG_ON(src_cnt <= 1); 261 BUG_ON(src_cnt <= 1);
252 262
253 if (device && src_cnt <= device->max_xor) { 263 if (submit->scribble)
254 dma_addr_t *dma_src = (dma_addr_t *) src_list; 264 dma_src = submit->scribble;
255 unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0; 265 else if (sizeof(dma_addr_t) <= sizeof(struct page *))
266 dma_src = (dma_addr_t *) src_list;
267
268 if (dma_src && device && src_cnt <= device->max_xor &&
269 is_dma_xor_aligned(device, offset, 0, len)) {
270 unsigned long dma_prep_flags = 0;
256 int i; 271 int i;
257 272
258 pr_debug("%s: (async) len: %zu\n", __func__, len); 273 pr_debug("%s: (async) len: %zu\n", __func__, len);
259 274
275 if (submit->cb_fn)
276 dma_prep_flags |= DMA_PREP_INTERRUPT;
277 if (submit->flags & ASYNC_TX_FENCE)
278 dma_prep_flags |= DMA_PREP_FENCE;
260 for (i = 0; i < src_cnt; i++) 279 for (i = 0; i < src_cnt; i++)
261 dma_src[i] = dma_map_page(device->dev, src_list[i], 280 dma_src[i] = dma_map_page(device->dev, src_list[i],
262 offset, len, DMA_TO_DEVICE); 281 offset, len, DMA_TO_DEVICE);
263 282
264 tx = device->device_prep_dma_zero_sum(chan, dma_src, src_cnt, 283 tx = device->device_prep_dma_xor_val(chan, dma_src, src_cnt,
265 len, result, 284 len, result,
266 dma_prep_flags); 285 dma_prep_flags);
267 if (unlikely(!tx)) { 286 if (unlikely(!tx)) {
268 async_tx_quiesce(&depend_tx); 287 async_tx_quiesce(&submit->depend_tx);
269 288
270 while (!tx) { 289 while (!tx) {
271 dma_async_issue_pending(chan); 290 dma_async_issue_pending(chan);
272 tx = device->device_prep_dma_zero_sum(chan, 291 tx = device->device_prep_dma_xor_val(chan,
273 dma_src, src_cnt, len, result, 292 dma_src, src_cnt, len, result,
274 dma_prep_flags); 293 dma_prep_flags);
275 } 294 }
276 } 295 }
277 296
278 async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param); 297 async_tx_submit(chan, tx, submit);
279 } else { 298 } else {
280 unsigned long xor_flags = flags; 299 enum async_tx_flags flags_orig = submit->flags;
281 300
282 pr_debug("%s: (sync) len: %zu\n", __func__, len); 301 pr_debug("%s: (sync) len: %zu\n", __func__, len);
302 WARN_ONCE(device && src_cnt <= device->max_xor,
303 "%s: no space for dma address conversion\n",
304 __func__);
283 305
284 xor_flags |= ASYNC_TX_XOR_DROP_DST; 306 submit->flags |= ASYNC_TX_XOR_DROP_DST;
285 xor_flags &= ~ASYNC_TX_ACK; 307 submit->flags &= ~ASYNC_TX_ACK;
286 308
287 tx = async_xor(dest, src_list, offset, src_cnt, len, xor_flags, 309 tx = async_xor(dest, src_list, offset, src_cnt, len, submit);
288 depend_tx, NULL, NULL);
289 310
290 async_tx_quiesce(&tx); 311 async_tx_quiesce(&tx);
291 312
292 *result = page_is_zero(dest, offset, len) ? 0 : 1; 313 *result = !page_is_zero(dest, offset, len) << SUM_CHECK_P;
293 314
294 async_tx_sync_epilog(cb_fn, cb_param); 315 async_tx_sync_epilog(submit);
316 submit->flags = flags_orig;
295 } 317 }
296 318
297 return tx; 319 return tx;
298} 320}
299EXPORT_SYMBOL_GPL(async_xor_zero_sum); 321EXPORT_SYMBOL_GPL(async_xor_val);
300
301static int __init async_xor_init(void)
302{
303 #ifdef CONFIG_ASYNC_TX_DMA
304 /* To conserve stack space the input src_list (array of page pointers)
305 * is reused to hold the array of dma addresses passed to the driver.
306 * This conversion is only possible when dma_addr_t is less than the
307 * the size of a pointer. HIGHMEM64G is known to violate this
308 * assumption.
309 */
310 BUILD_BUG_ON(sizeof(dma_addr_t) > sizeof(struct page *));
311 #endif
312
313 return 0;
314}
315
316static void __exit async_xor_exit(void)
317{
318 do { } while (0);
319}
320
321module_init(async_xor_init);
322module_exit(async_xor_exit);
323 322
324MODULE_AUTHOR("Intel Corporation"); 323MODULE_AUTHOR("Intel Corporation");
325MODULE_DESCRIPTION("asynchronous xor/xor-zero-sum api"); 324MODULE_DESCRIPTION("asynchronous xor/xor-zero-sum api");
diff --git a/crypto/async_tx/raid6test.c b/crypto/async_tx/raid6test.c
new file mode 100644
index 000000000000..98c83ca96c83
--- /dev/null
+++ b/crypto/async_tx/raid6test.c
@@ -0,0 +1,241 @@
1/*
2 * asynchronous raid6 recovery self test
3 * Copyright (c) 2009, Intel Corporation.
4 *
5 * based on drivers/md/raid6test/test.c:
6 * Copyright 2002-2007 H. Peter Anvin
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 *
17 * You should have received a copy of the GNU General Public License along with
18 * this program; if not, write to the Free Software Foundation, Inc.,
19 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 */
22#include <linux/async_tx.h>
23#include <linux/random.h>
24
25#undef pr
26#define pr(fmt, args...) pr_info("raid6test: " fmt, ##args)
27
28#define NDISKS 16 /* Including P and Q */
29
30static struct page *dataptrs[NDISKS];
31static struct page *data[NDISKS+3];
32static struct page *spare;
33static struct page *recovi;
34static struct page *recovj;
35
36static void callback(void *param)
37{
38 struct completion *cmp = param;
39
40 complete(cmp);
41}
42
43static void makedata(int disks)
44{
45 int i, j;
46
47 for (i = 0; i < disks; i++) {
48 for (j = 0; j < PAGE_SIZE/sizeof(u32); j += sizeof(u32)) {
49 u32 *p = page_address(data[i]) + j;
50
51 *p = random32();
52 }
53
54 dataptrs[i] = data[i];
55 }
56}
57
58static char disk_type(int d, int disks)
59{
60 if (d == disks - 2)
61 return 'P';
62 else if (d == disks - 1)
63 return 'Q';
64 else
65 return 'D';
66}
67
68/* Recover two failed blocks. */
69static void raid6_dual_recov(int disks, size_t bytes, int faila, int failb, struct page **ptrs)
70{
71 struct async_submit_ctl submit;
72 addr_conv_t addr_conv[disks];
73 struct completion cmp;
74 struct dma_async_tx_descriptor *tx = NULL;
75 enum sum_check_flags result = ~0;
76
77 if (faila > failb)
78 swap(faila, failb);
79
80 if (failb == disks-1) {
81 if (faila == disks-2) {
82 /* P+Q failure. Just rebuild the syndrome. */
83 init_async_submit(&submit, 0, NULL, NULL, NULL, addr_conv);
84 tx = async_gen_syndrome(ptrs, 0, disks, bytes, &submit);
85 } else {
86 struct page *blocks[disks];
87 struct page *dest;
88 int count = 0;
89 int i;
90
91 /* data+Q failure. Reconstruct data from P,
92 * then rebuild syndrome
93 */
94 for (i = disks; i-- ; ) {
95 if (i == faila || i == failb)
96 continue;
97 blocks[count++] = ptrs[i];
98 }
99 dest = ptrs[faila];
100 init_async_submit(&submit, ASYNC_TX_XOR_ZERO_DST, NULL,
101 NULL, NULL, addr_conv);
102 tx = async_xor(dest, blocks, 0, count, bytes, &submit);
103
104 init_async_submit(&submit, 0, tx, NULL, NULL, addr_conv);
105 tx = async_gen_syndrome(ptrs, 0, disks, bytes, &submit);
106 }
107 } else {
108 if (failb == disks-2) {
109 /* data+P failure. */
110 init_async_submit(&submit, 0, NULL, NULL, NULL, addr_conv);
111 tx = async_raid6_datap_recov(disks, bytes, faila, ptrs, &submit);
112 } else {
113 /* data+data failure. */
114 init_async_submit(&submit, 0, NULL, NULL, NULL, addr_conv);
115 tx = async_raid6_2data_recov(disks, bytes, faila, failb, ptrs, &submit);
116 }
117 }
118 init_completion(&cmp);
119 init_async_submit(&submit, ASYNC_TX_ACK, tx, callback, &cmp, addr_conv);
120 tx = async_syndrome_val(ptrs, 0, disks, bytes, &result, spare, &submit);
121 async_tx_issue_pending(tx);
122
123 if (wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)) == 0)
124 pr("%s: timeout! (faila: %d failb: %d disks: %d)\n",
125 __func__, faila, failb, disks);
126
127 if (result != 0)
128 pr("%s: validation failure! faila: %d failb: %d sum_check_flags: %x\n",
129 __func__, faila, failb, result);
130}
131
132static int test_disks(int i, int j, int disks)
133{
134 int erra, errb;
135
136 memset(page_address(recovi), 0xf0, PAGE_SIZE);
137 memset(page_address(recovj), 0xba, PAGE_SIZE);
138
139 dataptrs[i] = recovi;
140 dataptrs[j] = recovj;
141
142 raid6_dual_recov(disks, PAGE_SIZE, i, j, dataptrs);
143
144 erra = memcmp(page_address(data[i]), page_address(recovi), PAGE_SIZE);
145 errb = memcmp(page_address(data[j]), page_address(recovj), PAGE_SIZE);
146
147 pr("%s(%d, %d): faila=%3d(%c) failb=%3d(%c) %s\n",
148 __func__, i, j, i, disk_type(i, disks), j, disk_type(j, disks),
149 (!erra && !errb) ? "OK" : !erra ? "ERRB" : !errb ? "ERRA" : "ERRAB");
150
151 dataptrs[i] = data[i];
152 dataptrs[j] = data[j];
153
154 return erra || errb;
155}
156
157static int test(int disks, int *tests)
158{
159 addr_conv_t addr_conv[disks];
160 struct dma_async_tx_descriptor *tx;
161 struct async_submit_ctl submit;
162 struct completion cmp;
163 int err = 0;
164 int i, j;
165
166 recovi = data[disks];
167 recovj = data[disks+1];
168 spare = data[disks+2];
169
170 makedata(disks);
171
172 /* Nuke syndromes */
173 memset(page_address(data[disks-2]), 0xee, PAGE_SIZE);
174 memset(page_address(data[disks-1]), 0xee, PAGE_SIZE);
175
176 /* Generate assumed good syndrome */
177 init_completion(&cmp);
178 init_async_submit(&submit, ASYNC_TX_ACK, NULL, callback, &cmp, addr_conv);
179 tx = async_gen_syndrome(dataptrs, 0, disks, PAGE_SIZE, &submit);
180 async_tx_issue_pending(tx);
181
182 if (wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)) == 0) {
183 pr("error: initial gen_syndrome(%d) timed out\n", disks);
184 return 1;
185 }
186
187 pr("testing the %d-disk case...\n", disks);
188 for (i = 0; i < disks-1; i++)
189 for (j = i+1; j < disks; j++) {
190 (*tests)++;
191 err += test_disks(i, j, disks);
192 }
193
194 return err;
195}
196
197
198static int raid6_test(void)
199{
200 int err = 0;
201 int tests = 0;
202 int i;
203
204 for (i = 0; i < NDISKS+3; i++) {
205 data[i] = alloc_page(GFP_KERNEL);
206 if (!data[i]) {
207 while (i--)
208 put_page(data[i]);
209 return -ENOMEM;
210 }
211 }
212
213 /* the 4-disk and 5-disk cases are special for the recovery code */
214 if (NDISKS > 4)
215 err += test(4, &tests);
216 if (NDISKS > 5)
217 err += test(5, &tests);
218 err += test(NDISKS, &tests);
219
220 pr("\n");
221 pr("complete (%d tests, %d failure%s)\n",
222 tests, err, err == 1 ? "" : "s");
223
224 for (i = 0; i < NDISKS+3; i++)
225 put_page(data[i]);
226
227 return 0;
228}
229
230static void raid6_test_exit(void)
231{
232}
233
234/* when compiled-in wait for drivers to load first (assumes dma drivers
235 * are also compliled-in)
236 */
237late_initcall(raid6_test);
238module_exit(raid6_test_exit);
239MODULE_AUTHOR("Dan Williams <dan.j.williams@intel.com>");
240MODULE_DESCRIPTION("asynchronous RAID-6 recovery self tests");
241MODULE_LICENSE("GPL");
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index 81e1020fb514..fe1f3717b1ff 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -17,11 +17,15 @@ if DMADEVICES
17 17
18comment "DMA Devices" 18comment "DMA Devices"
19 19
20config ASYNC_TX_DISABLE_CHANNEL_SWITCH
21 bool
22
20config INTEL_IOATDMA 23config INTEL_IOATDMA
21 tristate "Intel I/OAT DMA support" 24 tristate "Intel I/OAT DMA support"
22 depends on PCI && X86 25 depends on PCI && X86
23 select DMA_ENGINE 26 select DMA_ENGINE
24 select DCA 27 select DCA
28 select ASYNC_TX_DISABLE_CHANNEL_SWITCH
25 help 29 help
26 Enable support for the Intel(R) I/OAT DMA engine present 30 Enable support for the Intel(R) I/OAT DMA engine present
27 in recent Intel Xeon chipsets. 31 in recent Intel Xeon chipsets.
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index 562d182eae66..bd0b248de2cf 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -608,6 +608,40 @@ void dmaengine_put(void)
608} 608}
609EXPORT_SYMBOL(dmaengine_put); 609EXPORT_SYMBOL(dmaengine_put);
610 610
611static bool device_has_all_tx_types(struct dma_device *device)
612{
613 /* A device that satisfies this test has channels that will never cause
614 * an async_tx channel switch event as all possible operation types can
615 * be handled.
616 */
617 #ifdef CONFIG_ASYNC_TX_DMA
618 if (!dma_has_cap(DMA_INTERRUPT, device->cap_mask))
619 return false;
620 #endif
621
622 #if defined(CONFIG_ASYNC_MEMCPY) || defined(CONFIG_ASYNC_MEMCPY_MODULE)
623 if (!dma_has_cap(DMA_MEMCPY, device->cap_mask))
624 return false;
625 #endif
626
627 #if defined(CONFIG_ASYNC_MEMSET) || defined(CONFIG_ASYNC_MEMSET_MODULE)
628 if (!dma_has_cap(DMA_MEMSET, device->cap_mask))
629 return false;
630 #endif
631
632 #if defined(CONFIG_ASYNC_XOR) || defined(CONFIG_ASYNC_XOR_MODULE)
633 if (!dma_has_cap(DMA_XOR, device->cap_mask))
634 return false;
635 #endif
636
637 #if defined(CONFIG_ASYNC_PQ) || defined(CONFIG_ASYNC_PQ_MODULE)
638 if (!dma_has_cap(DMA_PQ, device->cap_mask))
639 return false;
640 #endif
641
642 return true;
643}
644
611static int get_dma_id(struct dma_device *device) 645static int get_dma_id(struct dma_device *device)
612{ 646{
613 int rc; 647 int rc;
@@ -644,8 +678,12 @@ int dma_async_device_register(struct dma_device *device)
644 !device->device_prep_dma_memcpy); 678 !device->device_prep_dma_memcpy);
645 BUG_ON(dma_has_cap(DMA_XOR, device->cap_mask) && 679 BUG_ON(dma_has_cap(DMA_XOR, device->cap_mask) &&
646 !device->device_prep_dma_xor); 680 !device->device_prep_dma_xor);
647 BUG_ON(dma_has_cap(DMA_ZERO_SUM, device->cap_mask) && 681 BUG_ON(dma_has_cap(DMA_XOR_VAL, device->cap_mask) &&
648 !device->device_prep_dma_zero_sum); 682 !device->device_prep_dma_xor_val);
683 BUG_ON(dma_has_cap(DMA_PQ, device->cap_mask) &&
684 !device->device_prep_dma_pq);
685 BUG_ON(dma_has_cap(DMA_PQ_VAL, device->cap_mask) &&
686 !device->device_prep_dma_pq_val);
649 BUG_ON(dma_has_cap(DMA_MEMSET, device->cap_mask) && 687 BUG_ON(dma_has_cap(DMA_MEMSET, device->cap_mask) &&
650 !device->device_prep_dma_memset); 688 !device->device_prep_dma_memset);
651 BUG_ON(dma_has_cap(DMA_INTERRUPT, device->cap_mask) && 689 BUG_ON(dma_has_cap(DMA_INTERRUPT, device->cap_mask) &&
@@ -661,6 +699,12 @@ int dma_async_device_register(struct dma_device *device)
661 BUG_ON(!device->device_issue_pending); 699 BUG_ON(!device->device_issue_pending);
662 BUG_ON(!device->dev); 700 BUG_ON(!device->dev);
663 701
702 /* note: this only matters in the
703 * CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH=y case
704 */
705 if (device_has_all_tx_types(device))
706 dma_cap_set(DMA_ASYNC_TX, device->cap_mask);
707
664 idr_ref = kmalloc(sizeof(*idr_ref), GFP_KERNEL); 708 idr_ref = kmalloc(sizeof(*idr_ref), GFP_KERNEL);
665 if (!idr_ref) 709 if (!idr_ref)
666 return -ENOMEM; 710 return -ENOMEM;
@@ -938,49 +982,24 @@ EXPORT_SYMBOL(dma_async_tx_descriptor_init);
938 982
939/* dma_wait_for_async_tx - spin wait for a transaction to complete 983/* dma_wait_for_async_tx - spin wait for a transaction to complete
940 * @tx: in-flight transaction to wait on 984 * @tx: in-flight transaction to wait on
941 *
942 * This routine assumes that tx was obtained from a call to async_memcpy,
943 * async_xor, async_memset, etc which ensures that tx is "in-flight" (prepped
944 * and submitted). Walking the parent chain is only meant to cover for DMA
945 * drivers that do not implement the DMA_INTERRUPT capability and may race with
946 * the driver's descriptor cleanup routine.
947 */ 985 */
948enum dma_status 986enum dma_status
949dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx) 987dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx)
950{ 988{
951 enum dma_status status; 989 unsigned long dma_sync_wait_timeout = jiffies + msecs_to_jiffies(5000);
952 struct dma_async_tx_descriptor *iter;
953 struct dma_async_tx_descriptor *parent;
954 990
955 if (!tx) 991 if (!tx)
956 return DMA_SUCCESS; 992 return DMA_SUCCESS;
957 993
958 WARN_ONCE(tx->parent, "%s: speculatively walking dependency chain for" 994 while (tx->cookie == -EBUSY) {
959 " %s\n", __func__, dma_chan_name(tx->chan)); 995 if (time_after_eq(jiffies, dma_sync_wait_timeout)) {
960 996 pr_err("%s timeout waiting for descriptor submission\n",
961 /* poll through the dependency chain, return when tx is complete */ 997 __func__);
962 do { 998 return DMA_ERROR;
963 iter = tx; 999 }
964 1000 cpu_relax();
965 /* find the root of the unsubmitted dependency chain */ 1001 }
966 do { 1002 return dma_sync_wait(tx->chan, tx->cookie);
967 parent = iter->parent;
968 if (!parent)
969 break;
970 else
971 iter = parent;
972 } while (parent);
973
974 /* there is a small window for ->parent == NULL and
975 * ->cookie == -EBUSY
976 */
977 while (iter->cookie == -EBUSY)
978 cpu_relax();
979
980 status = dma_sync_wait(iter->chan, iter->cookie);
981 } while (status == DMA_IN_PROGRESS || (iter != tx));
982
983 return status;
984} 1003}
985EXPORT_SYMBOL_GPL(dma_wait_for_async_tx); 1004EXPORT_SYMBOL_GPL(dma_wait_for_async_tx);
986 1005
diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c
index d93017fc7872..a32a4cf7b1e0 100644
--- a/drivers/dma/dmatest.c
+++ b/drivers/dma/dmatest.c
@@ -48,6 +48,11 @@ module_param(xor_sources, uint, S_IRUGO);
48MODULE_PARM_DESC(xor_sources, 48MODULE_PARM_DESC(xor_sources,
49 "Number of xor source buffers (default: 3)"); 49 "Number of xor source buffers (default: 3)");
50 50
51static unsigned int pq_sources = 3;
52module_param(pq_sources, uint, S_IRUGO);
53MODULE_PARM_DESC(pq_sources,
54 "Number of p+q source buffers (default: 3)");
55
51/* 56/*
52 * Initialization patterns. All bytes in the source buffer has bit 7 57 * Initialization patterns. All bytes in the source buffer has bit 7
53 * set, all bytes in the destination buffer has bit 7 cleared. 58 * set, all bytes in the destination buffer has bit 7 cleared.
@@ -232,6 +237,7 @@ static int dmatest_func(void *data)
232 dma_cookie_t cookie; 237 dma_cookie_t cookie;
233 enum dma_status status; 238 enum dma_status status;
234 enum dma_ctrl_flags flags; 239 enum dma_ctrl_flags flags;
240 u8 pq_coefs[pq_sources];
235 int ret; 241 int ret;
236 int src_cnt; 242 int src_cnt;
237 int dst_cnt; 243 int dst_cnt;
@@ -248,6 +254,11 @@ static int dmatest_func(void *data)
248 else if (thread->type == DMA_XOR) { 254 else if (thread->type == DMA_XOR) {
249 src_cnt = xor_sources | 1; /* force odd to ensure dst = src */ 255 src_cnt = xor_sources | 1; /* force odd to ensure dst = src */
250 dst_cnt = 1; 256 dst_cnt = 1;
257 } else if (thread->type == DMA_PQ) {
258 src_cnt = pq_sources | 1; /* force odd to ensure dst = src */
259 dst_cnt = 2;
260 for (i = 0; i < pq_sources; i++)
261 pq_coefs[i] = 1;
251 } else 262 } else
252 goto err_srcs; 263 goto err_srcs;
253 264
@@ -283,6 +294,7 @@ static int dmatest_func(void *data)
283 dma_addr_t dma_dsts[dst_cnt]; 294 dma_addr_t dma_dsts[dst_cnt];
284 struct completion cmp; 295 struct completion cmp;
285 unsigned long tmo = msecs_to_jiffies(3000); 296 unsigned long tmo = msecs_to_jiffies(3000);
297 u8 align = 0;
286 298
287 total_tests++; 299 total_tests++;
288 300
@@ -290,6 +302,18 @@ static int dmatest_func(void *data)
290 src_off = dmatest_random() % (test_buf_size - len + 1); 302 src_off = dmatest_random() % (test_buf_size - len + 1);
291 dst_off = dmatest_random() % (test_buf_size - len + 1); 303 dst_off = dmatest_random() % (test_buf_size - len + 1);
292 304
305 /* honor alignment restrictions */
306 if (thread->type == DMA_MEMCPY)
307 align = dev->copy_align;
308 else if (thread->type == DMA_XOR)
309 align = dev->xor_align;
310 else if (thread->type == DMA_PQ)
311 align = dev->pq_align;
312
313 len = (len >> align) << align;
314 src_off = (src_off >> align) << align;
315 dst_off = (dst_off >> align) << align;
316
293 dmatest_init_srcs(thread->srcs, src_off, len); 317 dmatest_init_srcs(thread->srcs, src_off, len);
294 dmatest_init_dsts(thread->dsts, dst_off, len); 318 dmatest_init_dsts(thread->dsts, dst_off, len);
295 319
@@ -306,6 +330,7 @@ static int dmatest_func(void *data)
306 DMA_BIDIRECTIONAL); 330 DMA_BIDIRECTIONAL);
307 } 331 }
308 332
333
309 if (thread->type == DMA_MEMCPY) 334 if (thread->type == DMA_MEMCPY)
310 tx = dev->device_prep_dma_memcpy(chan, 335 tx = dev->device_prep_dma_memcpy(chan,
311 dma_dsts[0] + dst_off, 336 dma_dsts[0] + dst_off,
@@ -316,6 +341,15 @@ static int dmatest_func(void *data)
316 dma_dsts[0] + dst_off, 341 dma_dsts[0] + dst_off,
317 dma_srcs, xor_sources, 342 dma_srcs, xor_sources,
318 len, flags); 343 len, flags);
344 else if (thread->type == DMA_PQ) {
345 dma_addr_t dma_pq[dst_cnt];
346
347 for (i = 0; i < dst_cnt; i++)
348 dma_pq[i] = dma_dsts[i] + dst_off;
349 tx = dev->device_prep_dma_pq(chan, dma_pq, dma_srcs,
350 pq_sources, pq_coefs,
351 len, flags);
352 }
319 353
320 if (!tx) { 354 if (!tx) {
321 for (i = 0; i < src_cnt; i++) 355 for (i = 0; i < src_cnt; i++)
@@ -459,6 +493,8 @@ static int dmatest_add_threads(struct dmatest_chan *dtc, enum dma_transaction_ty
459 op = "copy"; 493 op = "copy";
460 else if (type == DMA_XOR) 494 else if (type == DMA_XOR)
461 op = "xor"; 495 op = "xor";
496 else if (type == DMA_PQ)
497 op = "pq";
462 else 498 else
463 return -EINVAL; 499 return -EINVAL;
464 500
@@ -514,6 +550,10 @@ static int dmatest_add_channel(struct dma_chan *chan)
514 cnt = dmatest_add_threads(dtc, DMA_XOR); 550 cnt = dmatest_add_threads(dtc, DMA_XOR);
515 thread_count += cnt > 0 ? cnt : 0; 551 thread_count += cnt > 0 ? cnt : 0;
516 } 552 }
553 if (dma_has_cap(DMA_PQ, dma_dev->cap_mask)) {
554 cnt = dmatest_add_threads(dtc, DMA_PQ);
555 thread_count += cnt > 0 ?: 0;
556 }
517 557
518 pr_info("dmatest: Started %u threads using %s\n", 558 pr_info("dmatest: Started %u threads using %s\n",
519 thread_count, dma_chan_name(chan)); 559 thread_count, dma_chan_name(chan));
diff --git a/drivers/dma/ioat/Makefile b/drivers/dma/ioat/Makefile
index 205a639e84df..8997d3fb9051 100644
--- a/drivers/dma/ioat/Makefile
+++ b/drivers/dma/ioat/Makefile
@@ -1,2 +1,2 @@
1obj-$(CONFIG_INTEL_IOATDMA) += ioatdma.o 1obj-$(CONFIG_INTEL_IOATDMA) += ioatdma.o
2ioatdma-objs := pci.o dma.o dma_v2.o dca.o 2ioatdma-objs := pci.o dma.o dma_v2.o dma_v3.o dca.o
diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c
index 21527b89590c..c524d36d3c2e 100644
--- a/drivers/dma/ioat/dma.c
+++ b/drivers/dma/ioat/dma.c
@@ -263,6 +263,7 @@ static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx)
263 if (!test_and_set_bit(IOAT_COMPLETION_PENDING, &chan->state)) 263 if (!test_and_set_bit(IOAT_COMPLETION_PENDING, &chan->state))
264 mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); 264 mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
265 265
266 ioat->active += desc->hw->tx_cnt;
266 ioat->pending += desc->hw->tx_cnt; 267 ioat->pending += desc->hw->tx_cnt;
267 if (ioat->pending >= ioat_pending_level) 268 if (ioat->pending >= ioat_pending_level)
268 __ioat1_dma_memcpy_issue_pending(ioat); 269 __ioat1_dma_memcpy_issue_pending(ioat);
@@ -539,17 +540,6 @@ static void ioat1_cleanup_tasklet(unsigned long data)
539 writew(IOAT_CHANCTRL_RUN, chan->base.reg_base + IOAT_CHANCTRL_OFFSET); 540 writew(IOAT_CHANCTRL_RUN, chan->base.reg_base + IOAT_CHANCTRL_OFFSET);
540} 541}
541 542
542static void ioat_unmap(struct pci_dev *pdev, dma_addr_t addr, size_t len,
543 int direction, enum dma_ctrl_flags flags, bool dst)
544{
545 if ((dst && (flags & DMA_COMPL_DEST_UNMAP_SINGLE)) ||
546 (!dst && (flags & DMA_COMPL_SRC_UNMAP_SINGLE)))
547 pci_unmap_single(pdev, addr, len, direction);
548 else
549 pci_unmap_page(pdev, addr, len, direction);
550}
551
552
553void ioat_dma_unmap(struct ioat_chan_common *chan, enum dma_ctrl_flags flags, 543void ioat_dma_unmap(struct ioat_chan_common *chan, enum dma_ctrl_flags flags,
554 size_t len, struct ioat_dma_descriptor *hw) 544 size_t len, struct ioat_dma_descriptor *hw)
555{ 545{
@@ -623,6 +613,7 @@ static void __cleanup(struct ioat_dma_chan *ioat, unsigned long phys_complete)
623 chan->completed_cookie = tx->cookie; 613 chan->completed_cookie = tx->cookie;
624 tx->cookie = 0; 614 tx->cookie = 0;
625 ioat_dma_unmap(chan, tx->flags, desc->len, desc->hw); 615 ioat_dma_unmap(chan, tx->flags, desc->len, desc->hw);
616 ioat->active -= desc->hw->tx_cnt;
626 if (tx->callback) { 617 if (tx->callback) {
627 tx->callback(tx->callback_param); 618 tx->callback(tx->callback_param);
628 tx->callback = NULL; 619 tx->callback = NULL;
@@ -809,7 +800,7 @@ static void __devinit ioat_dma_test_callback(void *dma_async_param)
809 * ioat_dma_self_test - Perform a IOAT transaction to verify the HW works. 800 * ioat_dma_self_test - Perform a IOAT transaction to verify the HW works.
810 * @device: device to be tested 801 * @device: device to be tested
811 */ 802 */
812static int __devinit ioat_dma_self_test(struct ioatdma_device *device) 803int __devinit ioat_dma_self_test(struct ioatdma_device *device)
813{ 804{
814 int i; 805 int i;
815 u8 *src; 806 u8 *src;
@@ -1040,13 +1031,8 @@ int __devinit ioat_probe(struct ioatdma_device *device)
1040 dma_cap_set(DMA_MEMCPY, dma->cap_mask); 1031 dma_cap_set(DMA_MEMCPY, dma->cap_mask);
1041 dma->dev = &pdev->dev; 1032 dma->dev = &pdev->dev;
1042 1033
1043 dev_err(dev, "Intel(R) I/OAT DMA Engine found,"
1044 " %d channels, device version 0x%02x, driver version %s\n",
1045 dma->chancnt, device->version, IOAT_DMA_VERSION);
1046
1047 if (!dma->chancnt) { 1034 if (!dma->chancnt) {
1048 dev_err(dev, "Intel(R) I/OAT DMA Engine problem found: " 1035 dev_err(dev, "zero channels detected\n");
1049 "zero channels detected\n");
1050 goto err_setup_interrupts; 1036 goto err_setup_interrupts;
1051 } 1037 }
1052 1038
@@ -1054,7 +1040,7 @@ int __devinit ioat_probe(struct ioatdma_device *device)
1054 if (err) 1040 if (err)
1055 goto err_setup_interrupts; 1041 goto err_setup_interrupts;
1056 1042
1057 err = ioat_dma_self_test(device); 1043 err = device->self_test(device);
1058 if (err) 1044 if (err)
1059 goto err_self_test; 1045 goto err_self_test;
1060 1046
@@ -1097,6 +1083,113 @@ static void ioat1_intr_quirk(struct ioatdma_device *device)
1097 pci_write_config_dword(pdev, IOAT_PCI_DMACTRL_OFFSET, dmactrl); 1083 pci_write_config_dword(pdev, IOAT_PCI_DMACTRL_OFFSET, dmactrl);
1098} 1084}
1099 1085
1086static ssize_t ring_size_show(struct dma_chan *c, char *page)
1087{
1088 struct ioat_dma_chan *ioat = to_ioat_chan(c);
1089
1090 return sprintf(page, "%d\n", ioat->desccount);
1091}
1092static struct ioat_sysfs_entry ring_size_attr = __ATTR_RO(ring_size);
1093
1094static ssize_t ring_active_show(struct dma_chan *c, char *page)
1095{
1096 struct ioat_dma_chan *ioat = to_ioat_chan(c);
1097
1098 return sprintf(page, "%d\n", ioat->active);
1099}
1100static struct ioat_sysfs_entry ring_active_attr = __ATTR_RO(ring_active);
1101
1102static ssize_t cap_show(struct dma_chan *c, char *page)
1103{
1104 struct dma_device *dma = c->device;
1105
1106 return sprintf(page, "copy%s%s%s%s%s%s\n",
1107 dma_has_cap(DMA_PQ, dma->cap_mask) ? " pq" : "",
1108 dma_has_cap(DMA_PQ_VAL, dma->cap_mask) ? " pq_val" : "",
1109 dma_has_cap(DMA_XOR, dma->cap_mask) ? " xor" : "",
1110 dma_has_cap(DMA_XOR_VAL, dma->cap_mask) ? " xor_val" : "",
1111 dma_has_cap(DMA_MEMSET, dma->cap_mask) ? " fill" : "",
1112 dma_has_cap(DMA_INTERRUPT, dma->cap_mask) ? " intr" : "");
1113
1114}
1115struct ioat_sysfs_entry ioat_cap_attr = __ATTR_RO(cap);
1116
1117static ssize_t version_show(struct dma_chan *c, char *page)
1118{
1119 struct dma_device *dma = c->device;
1120 struct ioatdma_device *device = to_ioatdma_device(dma);
1121
1122 return sprintf(page, "%d.%d\n",
1123 device->version >> 4, device->version & 0xf);
1124}
1125struct ioat_sysfs_entry ioat_version_attr = __ATTR_RO(version);
1126
1127static struct attribute *ioat1_attrs[] = {
1128 &ring_size_attr.attr,
1129 &ring_active_attr.attr,
1130 &ioat_cap_attr.attr,
1131 &ioat_version_attr.attr,
1132 NULL,
1133};
1134
1135static ssize_t
1136ioat_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
1137{
1138 struct ioat_sysfs_entry *entry;
1139 struct ioat_chan_common *chan;
1140
1141 entry = container_of(attr, struct ioat_sysfs_entry, attr);
1142 chan = container_of(kobj, struct ioat_chan_common, kobj);
1143
1144 if (!entry->show)
1145 return -EIO;
1146 return entry->show(&chan->common, page);
1147}
1148
1149struct sysfs_ops ioat_sysfs_ops = {
1150 .show = ioat_attr_show,
1151};
1152
1153static struct kobj_type ioat1_ktype = {
1154 .sysfs_ops = &ioat_sysfs_ops,
1155 .default_attrs = ioat1_attrs,
1156};
1157
1158void ioat_kobject_add(struct ioatdma_device *device, struct kobj_type *type)
1159{
1160 struct dma_device *dma = &device->common;
1161 struct dma_chan *c;
1162
1163 list_for_each_entry(c, &dma->channels, device_node) {
1164 struct ioat_chan_common *chan = to_chan_common(c);
1165 struct kobject *parent = &c->dev->device.kobj;
1166 int err;
1167
1168 err = kobject_init_and_add(&chan->kobj, type, parent, "quickdata");
1169 if (err) {
1170 dev_warn(to_dev(chan),
1171 "sysfs init error (%d), continuing...\n", err);
1172 kobject_put(&chan->kobj);
1173 set_bit(IOAT_KOBJ_INIT_FAIL, &chan->state);
1174 }
1175 }
1176}
1177
1178void ioat_kobject_del(struct ioatdma_device *device)
1179{
1180 struct dma_device *dma = &device->common;
1181 struct dma_chan *c;
1182
1183 list_for_each_entry(c, &dma->channels, device_node) {
1184 struct ioat_chan_common *chan = to_chan_common(c);
1185
1186 if (!test_bit(IOAT_KOBJ_INIT_FAIL, &chan->state)) {
1187 kobject_del(&chan->kobj);
1188 kobject_put(&chan->kobj);
1189 }
1190 }
1191}
1192
1100int __devinit ioat1_dma_probe(struct ioatdma_device *device, int dca) 1193int __devinit ioat1_dma_probe(struct ioatdma_device *device, int dca)
1101{ 1194{
1102 struct pci_dev *pdev = device->pdev; 1195 struct pci_dev *pdev = device->pdev;
@@ -1105,6 +1198,7 @@ int __devinit ioat1_dma_probe(struct ioatdma_device *device, int dca)
1105 1198
1106 device->intr_quirk = ioat1_intr_quirk; 1199 device->intr_quirk = ioat1_intr_quirk;
1107 device->enumerate_channels = ioat1_enumerate_channels; 1200 device->enumerate_channels = ioat1_enumerate_channels;
1201 device->self_test = ioat_dma_self_test;
1108 dma = &device->common; 1202 dma = &device->common;
1109 dma->device_prep_dma_memcpy = ioat1_dma_prep_memcpy; 1203 dma->device_prep_dma_memcpy = ioat1_dma_prep_memcpy;
1110 dma->device_issue_pending = ioat1_dma_memcpy_issue_pending; 1204 dma->device_issue_pending = ioat1_dma_memcpy_issue_pending;
@@ -1119,6 +1213,8 @@ int __devinit ioat1_dma_probe(struct ioatdma_device *device, int dca)
1119 err = ioat_register(device); 1213 err = ioat_register(device);
1120 if (err) 1214 if (err)
1121 return err; 1215 return err;
1216 ioat_kobject_add(device, &ioat1_ktype);
1217
1122 if (dca) 1218 if (dca)
1123 device->dca = ioat_dca_init(pdev, device->reg_base); 1219 device->dca = ioat_dca_init(pdev, device->reg_base);
1124 1220
@@ -1131,6 +1227,8 @@ void __devexit ioat_dma_remove(struct ioatdma_device *device)
1131 1227
1132 ioat_disable_interrupts(device); 1228 ioat_disable_interrupts(device);
1133 1229
1230 ioat_kobject_del(device);
1231
1134 dma_async_device_unregister(dma); 1232 dma_async_device_unregister(dma);
1135 1233
1136 pci_pool_destroy(device->dma_pool); 1234 pci_pool_destroy(device->dma_pool);
diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h
index 8966fa5453a7..6a675a2a2d1c 100644
--- a/drivers/dma/ioat/dma.h
+++ b/drivers/dma/ioat/dma.h
@@ -60,8 +60,12 @@
60 * @dca: direct cache access context 60 * @dca: direct cache access context
61 * @intr_quirk: interrupt setup quirk (for ioat_v1 devices) 61 * @intr_quirk: interrupt setup quirk (for ioat_v1 devices)
62 * @enumerate_channels: hw version specific channel enumeration 62 * @enumerate_channels: hw version specific channel enumeration
63 * @cleanup_tasklet: select between the v2 and v3 cleanup routines
64 * @timer_fn: select between the v2 and v3 timer watchdog routines
65 * @self_test: hardware version specific self test for each supported op type
66 *
67 * Note: the v3 cleanup routine supports raid operations
63 */ 68 */
64
65struct ioatdma_device { 69struct ioatdma_device {
66 struct pci_dev *pdev; 70 struct pci_dev *pdev;
67 void __iomem *reg_base; 71 void __iomem *reg_base;
@@ -74,6 +78,9 @@ struct ioatdma_device {
74 struct dca_provider *dca; 78 struct dca_provider *dca;
75 void (*intr_quirk)(struct ioatdma_device *device); 79 void (*intr_quirk)(struct ioatdma_device *device);
76 int (*enumerate_channels)(struct ioatdma_device *device); 80 int (*enumerate_channels)(struct ioatdma_device *device);
81 void (*cleanup_tasklet)(unsigned long data);
82 void (*timer_fn)(unsigned long data);
83 int (*self_test)(struct ioatdma_device *device);
77}; 84};
78 85
79struct ioat_chan_common { 86struct ioat_chan_common {
@@ -86,6 +93,7 @@ struct ioat_chan_common {
86 #define IOAT_COMPLETION_PENDING 0 93 #define IOAT_COMPLETION_PENDING 0
87 #define IOAT_COMPLETION_ACK 1 94 #define IOAT_COMPLETION_ACK 1
88 #define IOAT_RESET_PENDING 2 95 #define IOAT_RESET_PENDING 2
96 #define IOAT_KOBJ_INIT_FAIL 3
89 struct timer_list timer; 97 struct timer_list timer;
90 #define COMPLETION_TIMEOUT msecs_to_jiffies(100) 98 #define COMPLETION_TIMEOUT msecs_to_jiffies(100)
91 #define IDLE_TIMEOUT msecs_to_jiffies(2000) 99 #define IDLE_TIMEOUT msecs_to_jiffies(2000)
@@ -94,8 +102,13 @@ struct ioat_chan_common {
94 dma_addr_t completion_dma; 102 dma_addr_t completion_dma;
95 u64 *completion; 103 u64 *completion;
96 struct tasklet_struct cleanup_task; 104 struct tasklet_struct cleanup_task;
105 struct kobject kobj;
97}; 106};
98 107
108struct ioat_sysfs_entry {
109 struct attribute attr;
110 ssize_t (*show)(struct dma_chan *, char *);
111};
99 112
100/** 113/**
101 * struct ioat_dma_chan - internal representation of a DMA channel 114 * struct ioat_dma_chan - internal representation of a DMA channel
@@ -111,6 +124,7 @@ struct ioat_dma_chan {
111 124
112 int pending; 125 int pending;
113 u16 desccount; 126 u16 desccount;
127 u16 active;
114}; 128};
115 129
116static inline struct ioat_chan_common *to_chan_common(struct dma_chan *c) 130static inline struct ioat_chan_common *to_chan_common(struct dma_chan *c)
@@ -155,7 +169,7 @@ ioat_is_complete(struct dma_chan *c, dma_cookie_t cookie,
155 169
156/** 170/**
157 * struct ioat_desc_sw - wrapper around hardware descriptor 171 * struct ioat_desc_sw - wrapper around hardware descriptor
158 * @hw: hardware DMA descriptor 172 * @hw: hardware DMA descriptor (for memcpy)
159 * @node: this descriptor will either be on the free list, 173 * @node: this descriptor will either be on the free list,
160 * or attached to a transaction list (tx_list) 174 * or attached to a transaction list (tx_list)
161 * @txd: the generic software descriptor for all engines 175 * @txd: the generic software descriptor for all engines
@@ -288,9 +302,20 @@ static inline bool is_ioat_bug(unsigned long err)
288 IOAT_CHANERR_LENGTH_ERR)); 302 IOAT_CHANERR_LENGTH_ERR));
289} 303}
290 304
305static inline void ioat_unmap(struct pci_dev *pdev, dma_addr_t addr, size_t len,
306 int direction, enum dma_ctrl_flags flags, bool dst)
307{
308 if ((dst && (flags & DMA_COMPL_DEST_UNMAP_SINGLE)) ||
309 (!dst && (flags & DMA_COMPL_SRC_UNMAP_SINGLE)))
310 pci_unmap_single(pdev, addr, len, direction);
311 else
312 pci_unmap_page(pdev, addr, len, direction);
313}
314
291int __devinit ioat_probe(struct ioatdma_device *device); 315int __devinit ioat_probe(struct ioatdma_device *device);
292int __devinit ioat_register(struct ioatdma_device *device); 316int __devinit ioat_register(struct ioatdma_device *device);
293int __devinit ioat1_dma_probe(struct ioatdma_device *dev, int dca); 317int __devinit ioat1_dma_probe(struct ioatdma_device *dev, int dca);
318int __devinit ioat_dma_self_test(struct ioatdma_device *device);
294void __devexit ioat_dma_remove(struct ioatdma_device *device); 319void __devexit ioat_dma_remove(struct ioatdma_device *device);
295struct dca_provider * __devinit ioat_dca_init(struct pci_dev *pdev, 320struct dca_provider * __devinit ioat_dca_init(struct pci_dev *pdev,
296 void __iomem *iobase); 321 void __iomem *iobase);
@@ -304,4 +329,9 @@ void ioat_dma_unmap(struct ioat_chan_common *chan, enum dma_ctrl_flags flags,
304 size_t len, struct ioat_dma_descriptor *hw); 329 size_t len, struct ioat_dma_descriptor *hw);
305bool ioat_cleanup_preamble(struct ioat_chan_common *chan, 330bool ioat_cleanup_preamble(struct ioat_chan_common *chan,
306 unsigned long *phys_complete); 331 unsigned long *phys_complete);
332void ioat_kobject_add(struct ioatdma_device *device, struct kobj_type *type);
333void ioat_kobject_del(struct ioatdma_device *device);
334extern struct sysfs_ops ioat_sysfs_ops;
335extern struct ioat_sysfs_entry ioat_version_attr;
336extern struct ioat_sysfs_entry ioat_cap_attr;
307#endif /* IOATDMA_H */ 337#endif /* IOATDMA_H */
diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c
index fa3d6db6624c..5d6ac49e0d32 100644
--- a/drivers/dma/ioat/dma_v2.c
+++ b/drivers/dma/ioat/dma_v2.c
@@ -39,7 +39,7 @@
39#include "registers.h" 39#include "registers.h"
40#include "hw.h" 40#include "hw.h"
41 41
42static int ioat_ring_alloc_order = 8; 42int ioat_ring_alloc_order = 8;
43module_param(ioat_ring_alloc_order, int, 0644); 43module_param(ioat_ring_alloc_order, int, 0644);
44MODULE_PARM_DESC(ioat_ring_alloc_order, 44MODULE_PARM_DESC(ioat_ring_alloc_order,
45 "ioat2+: allocate 2^n descriptors per channel (default: n=8)"); 45 "ioat2+: allocate 2^n descriptors per channel (default: n=8)");
@@ -48,7 +48,7 @@ module_param(ioat_ring_max_alloc_order, int, 0644);
48MODULE_PARM_DESC(ioat_ring_max_alloc_order, 48MODULE_PARM_DESC(ioat_ring_max_alloc_order,
49 "ioat2+: upper limit for dynamic ring resizing (default: n=16)"); 49 "ioat2+: upper limit for dynamic ring resizing (default: n=16)");
50 50
51static void __ioat2_issue_pending(struct ioat2_dma_chan *ioat) 51void __ioat2_issue_pending(struct ioat2_dma_chan *ioat)
52{ 52{
53 void * __iomem reg_base = ioat->base.reg_base; 53 void * __iomem reg_base = ioat->base.reg_base;
54 54
@@ -63,7 +63,7 @@ static void __ioat2_issue_pending(struct ioat2_dma_chan *ioat)
63 __func__, ioat->head, ioat->tail, ioat->issued, ioat->dmacount); 63 __func__, ioat->head, ioat->tail, ioat->issued, ioat->dmacount);
64} 64}
65 65
66static void ioat2_issue_pending(struct dma_chan *chan) 66void ioat2_issue_pending(struct dma_chan *chan)
67{ 67{
68 struct ioat2_dma_chan *ioat = to_ioat2_chan(chan); 68 struct ioat2_dma_chan *ioat = to_ioat2_chan(chan);
69 69
@@ -206,7 +206,7 @@ static void ioat2_cleanup(struct ioat2_dma_chan *ioat)
206 spin_unlock_bh(&chan->cleanup_lock); 206 spin_unlock_bh(&chan->cleanup_lock);
207} 207}
208 208
209static void ioat2_cleanup_tasklet(unsigned long data) 209void ioat2_cleanup_tasklet(unsigned long data)
210{ 210{
211 struct ioat2_dma_chan *ioat = (void *) data; 211 struct ioat2_dma_chan *ioat = (void *) data;
212 212
@@ -214,7 +214,7 @@ static void ioat2_cleanup_tasklet(unsigned long data)
214 writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET); 214 writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET);
215} 215}
216 216
217static void __restart_chan(struct ioat2_dma_chan *ioat) 217void __ioat2_restart_chan(struct ioat2_dma_chan *ioat)
218{ 218{
219 struct ioat_chan_common *chan = &ioat->base; 219 struct ioat_chan_common *chan = &ioat->base;
220 220
@@ -255,12 +255,10 @@ static void ioat2_restart_channel(struct ioat2_dma_chan *ioat)
255 if (ioat_cleanup_preamble(chan, &phys_complete)) 255 if (ioat_cleanup_preamble(chan, &phys_complete))
256 __cleanup(ioat, phys_complete); 256 __cleanup(ioat, phys_complete);
257 257
258 __restart_chan(ioat); 258 __ioat2_restart_chan(ioat);
259} 259}
260 260
261static bool reshape_ring(struct ioat2_dma_chan *ioat, int order); 261void ioat2_timer_event(unsigned long data)
262
263static void ioat2_timer_event(unsigned long data)
264{ 262{
265 struct ioat2_dma_chan *ioat = (void *) data; 263 struct ioat2_dma_chan *ioat = (void *) data;
266 struct ioat_chan_common *chan = &ioat->base; 264 struct ioat_chan_common *chan = &ioat->base;
@@ -321,7 +319,7 @@ static void ioat2_timer_event(unsigned long data)
321 * ioat2_enumerate_channels - find and initialize the device's channels 319 * ioat2_enumerate_channels - find and initialize the device's channels
322 * @device: the device to be enumerated 320 * @device: the device to be enumerated
323 */ 321 */
324static int ioat2_enumerate_channels(struct ioatdma_device *device) 322int ioat2_enumerate_channels(struct ioatdma_device *device)
325{ 323{
326 struct ioat2_dma_chan *ioat; 324 struct ioat2_dma_chan *ioat;
327 struct device *dev = &device->pdev->dev; 325 struct device *dev = &device->pdev->dev;
@@ -354,8 +352,8 @@ static int ioat2_enumerate_channels(struct ioatdma_device *device)
354 break; 352 break;
355 353
356 ioat_init_channel(device, &ioat->base, i, 354 ioat_init_channel(device, &ioat->base, i,
357 ioat2_timer_event, 355 device->timer_fn,
358 ioat2_cleanup_tasklet, 356 device->cleanup_tasklet,
359 (unsigned long) ioat); 357 (unsigned long) ioat);
360 ioat->xfercap_log = xfercap_log; 358 ioat->xfercap_log = xfercap_log;
361 spin_lock_init(&ioat->ring_lock); 359 spin_lock_init(&ioat->ring_lock);
@@ -461,7 +459,7 @@ static struct ioat_ring_ent **ioat2_alloc_ring(struct dma_chan *c, int order, gf
461/* ioat2_alloc_chan_resources - allocate/initialize ioat2 descriptor ring 459/* ioat2_alloc_chan_resources - allocate/initialize ioat2 descriptor ring
462 * @chan: channel to be initialized 460 * @chan: channel to be initialized
463 */ 461 */
464static int ioat2_alloc_chan_resources(struct dma_chan *c) 462int ioat2_alloc_chan_resources(struct dma_chan *c)
465{ 463{
466 struct ioat2_dma_chan *ioat = to_ioat2_chan(c); 464 struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
467 struct ioat_chan_common *chan = &ioat->base; 465 struct ioat_chan_common *chan = &ioat->base;
@@ -515,7 +513,7 @@ static int ioat2_alloc_chan_resources(struct dma_chan *c)
515 return 1 << ioat->alloc_order; 513 return 1 << ioat->alloc_order;
516} 514}
517 515
518static bool reshape_ring(struct ioat2_dma_chan *ioat, int order) 516bool reshape_ring(struct ioat2_dma_chan *ioat, int order)
519{ 517{
520 /* reshape differs from normal ring allocation in that we want 518 /* reshape differs from normal ring allocation in that we want
521 * to allocate a new software ring while only 519 * to allocate a new software ring while only
@@ -628,7 +626,7 @@ static bool reshape_ring(struct ioat2_dma_chan *ioat, int order)
628 * @ioat: ioat2,3 channel (ring) to operate on 626 * @ioat: ioat2,3 channel (ring) to operate on
629 * @num_descs: allocation length 627 * @num_descs: allocation length
630 */ 628 */
631static int ioat2_alloc_and_lock(u16 *idx, struct ioat2_dma_chan *ioat, int num_descs) 629int ioat2_alloc_and_lock(u16 *idx, struct ioat2_dma_chan *ioat, int num_descs)
632{ 630{
633 struct ioat_chan_common *chan = &ioat->base; 631 struct ioat_chan_common *chan = &ioat->base;
634 632
@@ -656,9 +654,11 @@ static int ioat2_alloc_and_lock(u16 *idx, struct ioat2_dma_chan *ioat, int num_d
656 spin_lock_bh(&chan->cleanup_lock); 654 spin_lock_bh(&chan->cleanup_lock);
657 if (jiffies > chan->timer.expires && 655 if (jiffies > chan->timer.expires &&
658 timer_pending(&chan->timer)) { 656 timer_pending(&chan->timer)) {
657 struct ioatdma_device *device = chan->device;
658
659 mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); 659 mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
660 spin_unlock_bh(&chan->cleanup_lock); 660 spin_unlock_bh(&chan->cleanup_lock);
661 ioat2_timer_event((unsigned long) ioat); 661 device->timer_fn((unsigned long) ioat);
662 } else 662 } else
663 spin_unlock_bh(&chan->cleanup_lock); 663 spin_unlock_bh(&chan->cleanup_lock);
664 return -ENOMEM; 664 return -ENOMEM;
@@ -671,7 +671,7 @@ static int ioat2_alloc_and_lock(u16 *idx, struct ioat2_dma_chan *ioat, int num_d
671 return 0; /* with ioat->ring_lock held */ 671 return 0; /* with ioat->ring_lock held */
672} 672}
673 673
674static struct dma_async_tx_descriptor * 674struct dma_async_tx_descriptor *
675ioat2_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest, 675ioat2_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest,
676 dma_addr_t dma_src, size_t len, unsigned long flags) 676 dma_addr_t dma_src, size_t len, unsigned long flags)
677{ 677{
@@ -711,6 +711,7 @@ ioat2_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest,
711 desc->txd.flags = flags; 711 desc->txd.flags = flags;
712 desc->len = total_len; 712 desc->len = total_len;
713 hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); 713 hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
714 hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
714 hw->ctl_f.compl_write = 1; 715 hw->ctl_f.compl_write = 1;
715 dump_desc_dbg(ioat, desc); 716 dump_desc_dbg(ioat, desc);
716 /* we leave the channel locked to ensure in order submission */ 717 /* we leave the channel locked to ensure in order submission */
@@ -722,11 +723,11 @@ ioat2_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest,
722 * ioat2_free_chan_resources - release all the descriptors 723 * ioat2_free_chan_resources - release all the descriptors
723 * @chan: the channel to be cleaned 724 * @chan: the channel to be cleaned
724 */ 725 */
725static void ioat2_free_chan_resources(struct dma_chan *c) 726void ioat2_free_chan_resources(struct dma_chan *c)
726{ 727{
727 struct ioat2_dma_chan *ioat = to_ioat2_chan(c); 728 struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
728 struct ioat_chan_common *chan = &ioat->base; 729 struct ioat_chan_common *chan = &ioat->base;
729 struct ioatdma_device *ioatdma_device = chan->device; 730 struct ioatdma_device *device = chan->device;
730 struct ioat_ring_ent *desc; 731 struct ioat_ring_ent *desc;
731 const u16 total_descs = 1 << ioat->alloc_order; 732 const u16 total_descs = 1 << ioat->alloc_order;
732 int descs; 733 int descs;
@@ -740,7 +741,7 @@ static void ioat2_free_chan_resources(struct dma_chan *c)
740 741
741 tasklet_disable(&chan->cleanup_task); 742 tasklet_disable(&chan->cleanup_task);
742 del_timer_sync(&chan->timer); 743 del_timer_sync(&chan->timer);
743 ioat2_cleanup(ioat); 744 device->cleanup_tasklet((unsigned long) ioat);
744 745
745 /* Delay 100ms after reset to allow internal DMA logic to quiesce 746 /* Delay 100ms after reset to allow internal DMA logic to quiesce
746 * before removing DMA descriptor resources. 747 * before removing DMA descriptor resources.
@@ -770,8 +771,7 @@ static void ioat2_free_chan_resources(struct dma_chan *c)
770 kfree(ioat->ring); 771 kfree(ioat->ring);
771 ioat->ring = NULL; 772 ioat->ring = NULL;
772 ioat->alloc_order = 0; 773 ioat->alloc_order = 0;
773 pci_pool_free(ioatdma_device->completion_pool, 774 pci_pool_free(device->completion_pool, chan->completion,
774 chan->completion,
775 chan->completion_dma); 775 chan->completion_dma);
776 spin_unlock_bh(&ioat->ring_lock); 776 spin_unlock_bh(&ioat->ring_lock);
777 777
@@ -781,66 +781,63 @@ static void ioat2_free_chan_resources(struct dma_chan *c)
781 ioat->dmacount = 0; 781 ioat->dmacount = 0;
782} 782}
783 783
784static enum dma_status 784enum dma_status
785ioat2_is_complete(struct dma_chan *c, dma_cookie_t cookie, 785ioat2_is_complete(struct dma_chan *c, dma_cookie_t cookie,
786 dma_cookie_t *done, dma_cookie_t *used) 786 dma_cookie_t *done, dma_cookie_t *used)
787{ 787{
788 struct ioat2_dma_chan *ioat = to_ioat2_chan(c); 788 struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
789 struct ioatdma_device *device = ioat->base.device;
789 790
790 if (ioat_is_complete(c, cookie, done, used) == DMA_SUCCESS) 791 if (ioat_is_complete(c, cookie, done, used) == DMA_SUCCESS)
791 return DMA_SUCCESS; 792 return DMA_SUCCESS;
792 793
793 ioat2_cleanup(ioat); 794 device->cleanup_tasklet((unsigned long) ioat);
794 795
795 return ioat_is_complete(c, cookie, done, used); 796 return ioat_is_complete(c, cookie, done, used);
796} 797}
797 798
798int __devinit ioat2_dma_probe(struct ioatdma_device *device, int dca) 799static ssize_t ring_size_show(struct dma_chan *c, char *page)
799{ 800{
800 struct pci_dev *pdev = device->pdev; 801 struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
801 struct dma_device *dma;
802 struct dma_chan *c;
803 struct ioat_chan_common *chan;
804 int err;
805 802
806 device->enumerate_channels = ioat2_enumerate_channels; 803 return sprintf(page, "%d\n", (1 << ioat->alloc_order) & ~1);
807 dma = &device->common; 804}
808 dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy_lock; 805static struct ioat_sysfs_entry ring_size_attr = __ATTR_RO(ring_size);
809 dma->device_issue_pending = ioat2_issue_pending;
810 dma->device_alloc_chan_resources = ioat2_alloc_chan_resources;
811 dma->device_free_chan_resources = ioat2_free_chan_resources;
812 dma->device_is_tx_complete = ioat2_is_complete;
813 806
814 err = ioat_probe(device); 807static ssize_t ring_active_show(struct dma_chan *c, char *page)
815 if (err) 808{
816 return err; 809 struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
817 ioat_set_tcp_copy_break(2048);
818 810
819 list_for_each_entry(c, &dma->channels, device_node) { 811 /* ...taken outside the lock, no need to be precise */
820 chan = to_chan_common(c); 812 return sprintf(page, "%d\n", ioat2_ring_active(ioat));
821 writel(IOAT_DCACTRL_CMPL_WRITE_ENABLE | IOAT_DMA_DCA_ANY_CPU, 813}
822 chan->reg_base + IOAT_DCACTRL_OFFSET); 814static struct ioat_sysfs_entry ring_active_attr = __ATTR_RO(ring_active);
823 }
824 815
825 err = ioat_register(device); 816static struct attribute *ioat2_attrs[] = {
826 if (err) 817 &ring_size_attr.attr,
827 return err; 818 &ring_active_attr.attr,
828 if (dca) 819 &ioat_cap_attr.attr,
829 device->dca = ioat2_dca_init(pdev, device->reg_base); 820 &ioat_version_attr.attr,
821 NULL,
822};
830 823
831 return err; 824struct kobj_type ioat2_ktype = {
832} 825 .sysfs_ops = &ioat_sysfs_ops,
826 .default_attrs = ioat2_attrs,
827};
833 828
834int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca) 829int __devinit ioat2_dma_probe(struct ioatdma_device *device, int dca)
835{ 830{
836 struct pci_dev *pdev = device->pdev; 831 struct pci_dev *pdev = device->pdev;
837 struct dma_device *dma; 832 struct dma_device *dma;
838 struct dma_chan *c; 833 struct dma_chan *c;
839 struct ioat_chan_common *chan; 834 struct ioat_chan_common *chan;
840 int err; 835 int err;
841 u16 dev_id;
842 836
843 device->enumerate_channels = ioat2_enumerate_channels; 837 device->enumerate_channels = ioat2_enumerate_channels;
838 device->cleanup_tasklet = ioat2_cleanup_tasklet;
839 device->timer_fn = ioat2_timer_event;
840 device->self_test = ioat_dma_self_test;
844 dma = &device->common; 841 dma = &device->common;
845 dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy_lock; 842 dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy_lock;
846 dma->device_issue_pending = ioat2_issue_pending; 843 dma->device_issue_pending = ioat2_issue_pending;
@@ -848,35 +845,25 @@ int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca)
848 dma->device_free_chan_resources = ioat2_free_chan_resources; 845 dma->device_free_chan_resources = ioat2_free_chan_resources;
849 dma->device_is_tx_complete = ioat2_is_complete; 846 dma->device_is_tx_complete = ioat2_is_complete;
850 847
851 /* -= IOAT ver.3 workarounds =- */
852 /* Write CHANERRMSK_INT with 3E07h to mask out the errors
853 * that can cause stability issues for IOAT ver.3
854 */
855 pci_write_config_dword(pdev, IOAT_PCI_CHANERRMASK_INT_OFFSET, 0x3e07);
856
857 /* Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit
858 * (workaround for spurious config parity error after restart)
859 */
860 pci_read_config_word(pdev, IOAT_PCI_DEVICE_ID_OFFSET, &dev_id);
861 if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0)
862 pci_write_config_dword(pdev, IOAT_PCI_DMAUNCERRSTS_OFFSET, 0x10);
863
864 err = ioat_probe(device); 848 err = ioat_probe(device);
865 if (err) 849 if (err)
866 return err; 850 return err;
867 ioat_set_tcp_copy_break(262144); 851 ioat_set_tcp_copy_break(2048);
868 852
869 list_for_each_entry(c, &dma->channels, device_node) { 853 list_for_each_entry(c, &dma->channels, device_node) {
870 chan = to_chan_common(c); 854 chan = to_chan_common(c);
871 writel(IOAT_DMA_DCA_ANY_CPU, 855 writel(IOAT_DCACTRL_CMPL_WRITE_ENABLE | IOAT_DMA_DCA_ANY_CPU,
872 chan->reg_base + IOAT_DCACTRL_OFFSET); 856 chan->reg_base + IOAT_DCACTRL_OFFSET);
873 } 857 }
874 858
875 err = ioat_register(device); 859 err = ioat_register(device);
876 if (err) 860 if (err)
877 return err; 861 return err;
862
863 ioat_kobject_add(device, &ioat2_ktype);
864
878 if (dca) 865 if (dca)
879 device->dca = ioat3_dca_init(pdev, device->reg_base); 866 device->dca = ioat2_dca_init(pdev, device->reg_base);
880 867
881 return err; 868 return err;
882} 869}
diff --git a/drivers/dma/ioat/dma_v2.h b/drivers/dma/ioat/dma_v2.h
index ac00adc81974..1d849ef74d5f 100644
--- a/drivers/dma/ioat/dma_v2.h
+++ b/drivers/dma/ioat/dma_v2.h
@@ -27,6 +27,7 @@
27 27
28 28
29extern int ioat_pending_level; 29extern int ioat_pending_level;
30extern int ioat_ring_alloc_order;
30 31
31/* 32/*
32 * workaround for IOAT ver.3.0 null descriptor issue 33 * workaround for IOAT ver.3.0 null descriptor issue
@@ -114,10 +115,36 @@ static inline u16 ioat2_xferlen_to_descs(struct ioat2_dma_chan *ioat, size_t len
114 return num_descs; 115 return num_descs;
115} 116}
116 117
118/**
119 * struct ioat_ring_ent - wrapper around hardware descriptor
120 * @hw: hardware DMA descriptor (for memcpy)
121 * @fill: hardware fill descriptor
122 * @xor: hardware xor descriptor
123 * @xor_ex: hardware xor extension descriptor
124 * @pq: hardware pq descriptor
125 * @pq_ex: hardware pq extension descriptor
126 * @pqu: hardware pq update descriptor
127 * @raw: hardware raw (un-typed) descriptor
128 * @txd: the generic software descriptor for all engines
129 * @len: total transaction length for unmap
130 * @result: asynchronous result of validate operations
131 * @id: identifier for debug
132 */
133
117struct ioat_ring_ent { 134struct ioat_ring_ent {
118 struct ioat_dma_descriptor *hw; 135 union {
136 struct ioat_dma_descriptor *hw;
137 struct ioat_fill_descriptor *fill;
138 struct ioat_xor_descriptor *xor;
139 struct ioat_xor_ext_descriptor *xor_ex;
140 struct ioat_pq_descriptor *pq;
141 struct ioat_pq_ext_descriptor *pq_ex;
142 struct ioat_pq_update_descriptor *pqu;
143 struct ioat_raw_descriptor *raw;
144 };
119 size_t len; 145 size_t len;
120 struct dma_async_tx_descriptor txd; 146 struct dma_async_tx_descriptor txd;
147 enum sum_check_flags *result;
121 #ifdef DEBUG 148 #ifdef DEBUG
122 int id; 149 int id;
123 #endif 150 #endif
@@ -143,5 +170,21 @@ int __devinit ioat2_dma_probe(struct ioatdma_device *dev, int dca);
143int __devinit ioat3_dma_probe(struct ioatdma_device *dev, int dca); 170int __devinit ioat3_dma_probe(struct ioatdma_device *dev, int dca);
144struct dca_provider * __devinit ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase); 171struct dca_provider * __devinit ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase);
145struct dca_provider * __devinit ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase); 172struct dca_provider * __devinit ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase);
173int ioat2_alloc_and_lock(u16 *idx, struct ioat2_dma_chan *ioat, int num_descs);
174int ioat2_enumerate_channels(struct ioatdma_device *device);
175struct dma_async_tx_descriptor *
176ioat2_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest,
177 dma_addr_t dma_src, size_t len, unsigned long flags);
178void ioat2_issue_pending(struct dma_chan *chan);
179int ioat2_alloc_chan_resources(struct dma_chan *c);
180void ioat2_free_chan_resources(struct dma_chan *c);
181enum dma_status ioat2_is_complete(struct dma_chan *c, dma_cookie_t cookie,
182 dma_cookie_t *done, dma_cookie_t *used);
183void __ioat2_restart_chan(struct ioat2_dma_chan *ioat);
184bool reshape_ring(struct ioat2_dma_chan *ioat, int order);
185void __ioat2_issue_pending(struct ioat2_dma_chan *ioat);
186void ioat2_cleanup_tasklet(unsigned long data);
187void ioat2_timer_event(unsigned long data);
188extern struct kobj_type ioat2_ktype;
146extern struct kmem_cache *ioat2_cache; 189extern struct kmem_cache *ioat2_cache;
147#endif /* IOATDMA_V2_H */ 190#endif /* IOATDMA_V2_H */
diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c
new file mode 100644
index 000000000000..3686dddf6bff
--- /dev/null
+++ b/drivers/dma/ioat/dma_v3.c
@@ -0,0 +1,1220 @@
1/*
2 * This file is provided under a dual BSD/GPLv2 license. When using or
3 * redistributing this file, you may do so under either license.
4 *
5 * GPL LICENSE SUMMARY
6 *
7 * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
8 *
9 * This program is free software; you can redistribute it and/or modify it
10 * under the terms and conditions of the GNU General Public License,
11 * version 2, as published by the Free Software Foundation.
12 *
13 * This program is distributed in the hope that it will be useful, but WITHOUT
14 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
16 * more details.
17 *
18 * You should have received a copy of the GNU General Public License along with
19 * this program; if not, write to the Free Software Foundation, Inc.,
20 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
21 *
22 * The full GNU General Public License is included in this distribution in
23 * the file called "COPYING".
24 *
25 * BSD LICENSE
26 *
27 * Copyright(c) 2004-2009 Intel Corporation. All rights reserved.
28 *
29 * Redistribution and use in source and binary forms, with or without
30 * modification, are permitted provided that the following conditions are met:
31 *
32 * * Redistributions of source code must retain the above copyright
33 * notice, this list of conditions and the following disclaimer.
34 * * Redistributions in binary form must reproduce the above copyright
35 * notice, this list of conditions and the following disclaimer in
36 * the documentation and/or other materials provided with the
37 * distribution.
38 * * Neither the name of Intel Corporation nor the names of its
39 * contributors may be used to endorse or promote products derived
40 * from this software without specific prior written permission.
41 *
42 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
43 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
44 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
45 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
46 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
47 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
48 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
49 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
50 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
51 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
52 * POSSIBILITY OF SUCH DAMAGE.
53 */
54
55/*
56 * Support routines for v3+ hardware
57 */
58
59#include <linux/pci.h>
60#include <linux/dmaengine.h>
61#include <linux/dma-mapping.h>
62#include "registers.h"
63#include "hw.h"
64#include "dma.h"
65#include "dma_v2.h"
66
67/* ioat hardware assumes at least two sources for raid operations */
68#define src_cnt_to_sw(x) ((x) + 2)
69#define src_cnt_to_hw(x) ((x) - 2)
70
71/* provide a lookup table for setting the source address in the base or
72 * extended descriptor of an xor or pq descriptor
73 */
74static const u8 xor_idx_to_desc __read_mostly = 0xd0;
75static const u8 xor_idx_to_field[] __read_mostly = { 1, 4, 5, 6, 7, 0, 1, 2 };
76static const u8 pq_idx_to_desc __read_mostly = 0xf8;
77static const u8 pq_idx_to_field[] __read_mostly = { 1, 4, 5, 0, 1, 2, 4, 5 };
78
79static dma_addr_t xor_get_src(struct ioat_raw_descriptor *descs[2], int idx)
80{
81 struct ioat_raw_descriptor *raw = descs[xor_idx_to_desc >> idx & 1];
82
83 return raw->field[xor_idx_to_field[idx]];
84}
85
86static void xor_set_src(struct ioat_raw_descriptor *descs[2],
87 dma_addr_t addr, u32 offset, int idx)
88{
89 struct ioat_raw_descriptor *raw = descs[xor_idx_to_desc >> idx & 1];
90
91 raw->field[xor_idx_to_field[idx]] = addr + offset;
92}
93
94static dma_addr_t pq_get_src(struct ioat_raw_descriptor *descs[2], int idx)
95{
96 struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1];
97
98 return raw->field[pq_idx_to_field[idx]];
99}
100
101static void pq_set_src(struct ioat_raw_descriptor *descs[2],
102 dma_addr_t addr, u32 offset, u8 coef, int idx)
103{
104 struct ioat_pq_descriptor *pq = (struct ioat_pq_descriptor *) descs[0];
105 struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1];
106
107 raw->field[pq_idx_to_field[idx]] = addr + offset;
108 pq->coef[idx] = coef;
109}
110
111static void ioat3_dma_unmap(struct ioat2_dma_chan *ioat,
112 struct ioat_ring_ent *desc, int idx)
113{
114 struct ioat_chan_common *chan = &ioat->base;
115 struct pci_dev *pdev = chan->device->pdev;
116 size_t len = desc->len;
117 size_t offset = len - desc->hw->size;
118 struct dma_async_tx_descriptor *tx = &desc->txd;
119 enum dma_ctrl_flags flags = tx->flags;
120
121 switch (desc->hw->ctl_f.op) {
122 case IOAT_OP_COPY:
123 if (!desc->hw->ctl_f.null) /* skip 'interrupt' ops */
124 ioat_dma_unmap(chan, flags, len, desc->hw);
125 break;
126 case IOAT_OP_FILL: {
127 struct ioat_fill_descriptor *hw = desc->fill;
128
129 if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP))
130 ioat_unmap(pdev, hw->dst_addr - offset, len,
131 PCI_DMA_FROMDEVICE, flags, 1);
132 break;
133 }
134 case IOAT_OP_XOR_VAL:
135 case IOAT_OP_XOR: {
136 struct ioat_xor_descriptor *xor = desc->xor;
137 struct ioat_ring_ent *ext;
138 struct ioat_xor_ext_descriptor *xor_ex = NULL;
139 int src_cnt = src_cnt_to_sw(xor->ctl_f.src_cnt);
140 struct ioat_raw_descriptor *descs[2];
141 int i;
142
143 if (src_cnt > 5) {
144 ext = ioat2_get_ring_ent(ioat, idx + 1);
145 xor_ex = ext->xor_ex;
146 }
147
148 if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
149 descs[0] = (struct ioat_raw_descriptor *) xor;
150 descs[1] = (struct ioat_raw_descriptor *) xor_ex;
151 for (i = 0; i < src_cnt; i++) {
152 dma_addr_t src = xor_get_src(descs, i);
153
154 ioat_unmap(pdev, src - offset, len,
155 PCI_DMA_TODEVICE, flags, 0);
156 }
157
158 /* dest is a source in xor validate operations */
159 if (xor->ctl_f.op == IOAT_OP_XOR_VAL) {
160 ioat_unmap(pdev, xor->dst_addr - offset, len,
161 PCI_DMA_TODEVICE, flags, 1);
162 break;
163 }
164 }
165
166 if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP))
167 ioat_unmap(pdev, xor->dst_addr - offset, len,
168 PCI_DMA_FROMDEVICE, flags, 1);
169 break;
170 }
171 case IOAT_OP_PQ_VAL:
172 case IOAT_OP_PQ: {
173 struct ioat_pq_descriptor *pq = desc->pq;
174 struct ioat_ring_ent *ext;
175 struct ioat_pq_ext_descriptor *pq_ex = NULL;
176 int src_cnt = src_cnt_to_sw(pq->ctl_f.src_cnt);
177 struct ioat_raw_descriptor *descs[2];
178 int i;
179
180 if (src_cnt > 3) {
181 ext = ioat2_get_ring_ent(ioat, idx + 1);
182 pq_ex = ext->pq_ex;
183 }
184
185 /* in the 'continue' case don't unmap the dests as sources */
186 if (dmaf_p_disabled_continue(flags))
187 src_cnt--;
188 else if (dmaf_continue(flags))
189 src_cnt -= 3;
190
191 if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
192 descs[0] = (struct ioat_raw_descriptor *) pq;
193 descs[1] = (struct ioat_raw_descriptor *) pq_ex;
194 for (i = 0; i < src_cnt; i++) {
195 dma_addr_t src = pq_get_src(descs, i);
196
197 ioat_unmap(pdev, src - offset, len,
198 PCI_DMA_TODEVICE, flags, 0);
199 }
200
201 /* the dests are sources in pq validate operations */
202 if (pq->ctl_f.op == IOAT_OP_XOR_VAL) {
203 if (!(flags & DMA_PREP_PQ_DISABLE_P))
204 ioat_unmap(pdev, pq->p_addr - offset,
205 len, PCI_DMA_TODEVICE, flags, 0);
206 if (!(flags & DMA_PREP_PQ_DISABLE_Q))
207 ioat_unmap(pdev, pq->q_addr - offset,
208 len, PCI_DMA_TODEVICE, flags, 0);
209 break;
210 }
211 }
212
213 if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
214 if (!(flags & DMA_PREP_PQ_DISABLE_P))
215 ioat_unmap(pdev, pq->p_addr - offset, len,
216 PCI_DMA_BIDIRECTIONAL, flags, 1);
217 if (!(flags & DMA_PREP_PQ_DISABLE_Q))
218 ioat_unmap(pdev, pq->q_addr - offset, len,
219 PCI_DMA_BIDIRECTIONAL, flags, 1);
220 }
221 break;
222 }
223 default:
224 dev_err(&pdev->dev, "%s: unknown op type: %#x\n",
225 __func__, desc->hw->ctl_f.op);
226 }
227}
228
229static bool desc_has_ext(struct ioat_ring_ent *desc)
230{
231 struct ioat_dma_descriptor *hw = desc->hw;
232
233 if (hw->ctl_f.op == IOAT_OP_XOR ||
234 hw->ctl_f.op == IOAT_OP_XOR_VAL) {
235 struct ioat_xor_descriptor *xor = desc->xor;
236
237 if (src_cnt_to_sw(xor->ctl_f.src_cnt) > 5)
238 return true;
239 } else if (hw->ctl_f.op == IOAT_OP_PQ ||
240 hw->ctl_f.op == IOAT_OP_PQ_VAL) {
241 struct ioat_pq_descriptor *pq = desc->pq;
242
243 if (src_cnt_to_sw(pq->ctl_f.src_cnt) > 3)
244 return true;
245 }
246
247 return false;
248}
249
250/**
251 * __cleanup - reclaim used descriptors
252 * @ioat: channel (ring) to clean
253 *
254 * The difference from the dma_v2.c __cleanup() is that this routine
255 * handles extended descriptors and dma-unmapping raid operations.
256 */
257static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete)
258{
259 struct ioat_chan_common *chan = &ioat->base;
260 struct ioat_ring_ent *desc;
261 bool seen_current = false;
262 u16 active;
263 int i;
264
265 dev_dbg(to_dev(chan), "%s: head: %#x tail: %#x issued: %#x\n",
266 __func__, ioat->head, ioat->tail, ioat->issued);
267
268 active = ioat2_ring_active(ioat);
269 for (i = 0; i < active && !seen_current; i++) {
270 struct dma_async_tx_descriptor *tx;
271
272 prefetch(ioat2_get_ring_ent(ioat, ioat->tail + i + 1));
273 desc = ioat2_get_ring_ent(ioat, ioat->tail + i);
274 dump_desc_dbg(ioat, desc);
275 tx = &desc->txd;
276 if (tx->cookie) {
277 chan->completed_cookie = tx->cookie;
278 ioat3_dma_unmap(ioat, desc, ioat->tail + i);
279 tx->cookie = 0;
280 if (tx->callback) {
281 tx->callback(tx->callback_param);
282 tx->callback = NULL;
283 }
284 }
285
286 if (tx->phys == phys_complete)
287 seen_current = true;
288
289 /* skip extended descriptors */
290 if (desc_has_ext(desc)) {
291 BUG_ON(i + 1 >= active);
292 i++;
293 }
294 }
295 ioat->tail += i;
296 BUG_ON(!seen_current); /* no active descs have written a completion? */
297 chan->last_completion = phys_complete;
298 if (ioat->head == ioat->tail) {
299 dev_dbg(to_dev(chan), "%s: cancel completion timeout\n",
300 __func__);
301 clear_bit(IOAT_COMPLETION_PENDING, &chan->state);
302 mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
303 }
304}
305
306static void ioat3_cleanup(struct ioat2_dma_chan *ioat)
307{
308 struct ioat_chan_common *chan = &ioat->base;
309 unsigned long phys_complete;
310
311 prefetch(chan->completion);
312
313 if (!spin_trylock_bh(&chan->cleanup_lock))
314 return;
315
316 if (!ioat_cleanup_preamble(chan, &phys_complete)) {
317 spin_unlock_bh(&chan->cleanup_lock);
318 return;
319 }
320
321 if (!spin_trylock_bh(&ioat->ring_lock)) {
322 spin_unlock_bh(&chan->cleanup_lock);
323 return;
324 }
325
326 __cleanup(ioat, phys_complete);
327
328 spin_unlock_bh(&ioat->ring_lock);
329 spin_unlock_bh(&chan->cleanup_lock);
330}
331
332static void ioat3_cleanup_tasklet(unsigned long data)
333{
334 struct ioat2_dma_chan *ioat = (void *) data;
335
336 ioat3_cleanup(ioat);
337 writew(IOAT_CHANCTRL_RUN | IOAT3_CHANCTRL_COMPL_DCA_EN,
338 ioat->base.reg_base + IOAT_CHANCTRL_OFFSET);
339}
340
341static void ioat3_restart_channel(struct ioat2_dma_chan *ioat)
342{
343 struct ioat_chan_common *chan = &ioat->base;
344 unsigned long phys_complete;
345 u32 status;
346
347 status = ioat_chansts(chan);
348 if (is_ioat_active(status) || is_ioat_idle(status))
349 ioat_suspend(chan);
350 while (is_ioat_active(status) || is_ioat_idle(status)) {
351 status = ioat_chansts(chan);
352 cpu_relax();
353 }
354
355 if (ioat_cleanup_preamble(chan, &phys_complete))
356 __cleanup(ioat, phys_complete);
357
358 __ioat2_restart_chan(ioat);
359}
360
361static void ioat3_timer_event(unsigned long data)
362{
363 struct ioat2_dma_chan *ioat = (void *) data;
364 struct ioat_chan_common *chan = &ioat->base;
365
366 spin_lock_bh(&chan->cleanup_lock);
367 if (test_bit(IOAT_COMPLETION_PENDING, &chan->state)) {
368 unsigned long phys_complete;
369 u64 status;
370
371 spin_lock_bh(&ioat->ring_lock);
372 status = ioat_chansts(chan);
373
374 /* when halted due to errors check for channel
375 * programming errors before advancing the completion state
376 */
377 if (is_ioat_halted(status)) {
378 u32 chanerr;
379
380 chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
381 BUG_ON(is_ioat_bug(chanerr));
382 }
383
384 /* if we haven't made progress and we have already
385 * acknowledged a pending completion once, then be more
386 * forceful with a restart
387 */
388 if (ioat_cleanup_preamble(chan, &phys_complete))
389 __cleanup(ioat, phys_complete);
390 else if (test_bit(IOAT_COMPLETION_ACK, &chan->state))
391 ioat3_restart_channel(ioat);
392 else {
393 set_bit(IOAT_COMPLETION_ACK, &chan->state);
394 mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
395 }
396 spin_unlock_bh(&ioat->ring_lock);
397 } else {
398 u16 active;
399
400 /* if the ring is idle, empty, and oversized try to step
401 * down the size
402 */
403 spin_lock_bh(&ioat->ring_lock);
404 active = ioat2_ring_active(ioat);
405 if (active == 0 && ioat->alloc_order > ioat_get_alloc_order())
406 reshape_ring(ioat, ioat->alloc_order-1);
407 spin_unlock_bh(&ioat->ring_lock);
408
409 /* keep shrinking until we get back to our minimum
410 * default size
411 */
412 if (ioat->alloc_order > ioat_get_alloc_order())
413 mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
414 }
415 spin_unlock_bh(&chan->cleanup_lock);
416}
417
418static enum dma_status
419ioat3_is_complete(struct dma_chan *c, dma_cookie_t cookie,
420 dma_cookie_t *done, dma_cookie_t *used)
421{
422 struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
423
424 if (ioat_is_complete(c, cookie, done, used) == DMA_SUCCESS)
425 return DMA_SUCCESS;
426
427 ioat3_cleanup(ioat);
428
429 return ioat_is_complete(c, cookie, done, used);
430}
431
432static struct dma_async_tx_descriptor *
433ioat3_prep_memset_lock(struct dma_chan *c, dma_addr_t dest, int value,
434 size_t len, unsigned long flags)
435{
436 struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
437 struct ioat_ring_ent *desc;
438 size_t total_len = len;
439 struct ioat_fill_descriptor *fill;
440 int num_descs;
441 u64 src_data = (0x0101010101010101ULL) * (value & 0xff);
442 u16 idx;
443 int i;
444
445 num_descs = ioat2_xferlen_to_descs(ioat, len);
446 if (likely(num_descs) &&
447 ioat2_alloc_and_lock(&idx, ioat, num_descs) == 0)
448 /* pass */;
449 else
450 return NULL;
451 for (i = 0; i < num_descs; i++) {
452 size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log);
453
454 desc = ioat2_get_ring_ent(ioat, idx + i);
455 fill = desc->fill;
456
457 fill->size = xfer_size;
458 fill->src_data = src_data;
459 fill->dst_addr = dest;
460 fill->ctl = 0;
461 fill->ctl_f.op = IOAT_OP_FILL;
462
463 len -= xfer_size;
464 dest += xfer_size;
465 dump_desc_dbg(ioat, desc);
466 }
467
468 desc->txd.flags = flags;
469 desc->len = total_len;
470 fill->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
471 fill->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
472 fill->ctl_f.compl_write = 1;
473 dump_desc_dbg(ioat, desc);
474
475 /* we leave the channel locked to ensure in order submission */
476 return &desc->txd;
477}
478
479static struct dma_async_tx_descriptor *
480__ioat3_prep_xor_lock(struct dma_chan *c, enum sum_check_flags *result,
481 dma_addr_t dest, dma_addr_t *src, unsigned int src_cnt,
482 size_t len, unsigned long flags)
483{
484 struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
485 struct ioat_ring_ent *compl_desc;
486 struct ioat_ring_ent *desc;
487 struct ioat_ring_ent *ext;
488 size_t total_len = len;
489 struct ioat_xor_descriptor *xor;
490 struct ioat_xor_ext_descriptor *xor_ex = NULL;
491 struct ioat_dma_descriptor *hw;
492 u32 offset = 0;
493 int num_descs;
494 int with_ext;
495 int i;
496 u16 idx;
497 u8 op = result ? IOAT_OP_XOR_VAL : IOAT_OP_XOR;
498
499 BUG_ON(src_cnt < 2);
500
501 num_descs = ioat2_xferlen_to_descs(ioat, len);
502 /* we need 2x the number of descriptors to cover greater than 5
503 * sources
504 */
505 if (src_cnt > 5) {
506 with_ext = 1;
507 num_descs *= 2;
508 } else
509 with_ext = 0;
510
511 /* completion writes from the raid engine may pass completion
512 * writes from the legacy engine, so we need one extra null
513 * (legacy) descriptor to ensure all completion writes arrive in
514 * order.
515 */
516 if (likely(num_descs) &&
517 ioat2_alloc_and_lock(&idx, ioat, num_descs+1) == 0)
518 /* pass */;
519 else
520 return NULL;
521 for (i = 0; i < num_descs; i += 1 + with_ext) {
522 struct ioat_raw_descriptor *descs[2];
523 size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log);
524 int s;
525
526 desc = ioat2_get_ring_ent(ioat, idx + i);
527 xor = desc->xor;
528
529 /* save a branch by unconditionally retrieving the
530 * extended descriptor xor_set_src() knows to not write
531 * to it in the single descriptor case
532 */
533 ext = ioat2_get_ring_ent(ioat, idx + i + 1);
534 xor_ex = ext->xor_ex;
535
536 descs[0] = (struct ioat_raw_descriptor *) xor;
537 descs[1] = (struct ioat_raw_descriptor *) xor_ex;
538 for (s = 0; s < src_cnt; s++)
539 xor_set_src(descs, src[s], offset, s);
540 xor->size = xfer_size;
541 xor->dst_addr = dest + offset;
542 xor->ctl = 0;
543 xor->ctl_f.op = op;
544 xor->ctl_f.src_cnt = src_cnt_to_hw(src_cnt);
545
546 len -= xfer_size;
547 offset += xfer_size;
548 dump_desc_dbg(ioat, desc);
549 }
550
551 /* last xor descriptor carries the unmap parameters and fence bit */
552 desc->txd.flags = flags;
553 desc->len = total_len;
554 if (result)
555 desc->result = result;
556 xor->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
557
558 /* completion descriptor carries interrupt bit */
559 compl_desc = ioat2_get_ring_ent(ioat, idx + i);
560 compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT;
561 hw = compl_desc->hw;
562 hw->ctl = 0;
563 hw->ctl_f.null = 1;
564 hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
565 hw->ctl_f.compl_write = 1;
566 hw->size = NULL_DESC_BUFFER_SIZE;
567 dump_desc_dbg(ioat, compl_desc);
568
569 /* we leave the channel locked to ensure in order submission */
570 return &desc->txd;
571}
572
573static struct dma_async_tx_descriptor *
574ioat3_prep_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
575 unsigned int src_cnt, size_t len, unsigned long flags)
576{
577 return __ioat3_prep_xor_lock(chan, NULL, dest, src, src_cnt, len, flags);
578}
579
580struct dma_async_tx_descriptor *
581ioat3_prep_xor_val(struct dma_chan *chan, dma_addr_t *src,
582 unsigned int src_cnt, size_t len,
583 enum sum_check_flags *result, unsigned long flags)
584{
585 /* the cleanup routine only sets bits on validate failure, it
586 * does not clear bits on validate success... so clear it here
587 */
588 *result = 0;
589
590 return __ioat3_prep_xor_lock(chan, result, src[0], &src[1],
591 src_cnt - 1, len, flags);
592}
593
594static void
595dump_pq_desc_dbg(struct ioat2_dma_chan *ioat, struct ioat_ring_ent *desc, struct ioat_ring_ent *ext)
596{
597 struct device *dev = to_dev(&ioat->base);
598 struct ioat_pq_descriptor *pq = desc->pq;
599 struct ioat_pq_ext_descriptor *pq_ex = ext ? ext->pq_ex : NULL;
600 struct ioat_raw_descriptor *descs[] = { (void *) pq, (void *) pq_ex };
601 int src_cnt = src_cnt_to_sw(pq->ctl_f.src_cnt);
602 int i;
603
604 dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x"
605 " sz: %#x ctl: %#x (op: %d int: %d compl: %d pq: '%s%s' src_cnt: %d)\n",
606 desc_id(desc), (unsigned long long) desc->txd.phys,
607 (unsigned long long) (pq_ex ? pq_ex->next : pq->next),
608 desc->txd.flags, pq->size, pq->ctl, pq->ctl_f.op, pq->ctl_f.int_en,
609 pq->ctl_f.compl_write,
610 pq->ctl_f.p_disable ? "" : "p", pq->ctl_f.q_disable ? "" : "q",
611 pq->ctl_f.src_cnt);
612 for (i = 0; i < src_cnt; i++)
613 dev_dbg(dev, "\tsrc[%d]: %#llx coef: %#x\n", i,
614 (unsigned long long) pq_get_src(descs, i), pq->coef[i]);
615 dev_dbg(dev, "\tP: %#llx\n", pq->p_addr);
616 dev_dbg(dev, "\tQ: %#llx\n", pq->q_addr);
617}
618
619static struct dma_async_tx_descriptor *
620__ioat3_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result,
621 const dma_addr_t *dst, const dma_addr_t *src,
622 unsigned int src_cnt, const unsigned char *scf,
623 size_t len, unsigned long flags)
624{
625 struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
626 struct ioat_chan_common *chan = &ioat->base;
627 struct ioat_ring_ent *compl_desc;
628 struct ioat_ring_ent *desc;
629 struct ioat_ring_ent *ext;
630 size_t total_len = len;
631 struct ioat_pq_descriptor *pq;
632 struct ioat_pq_ext_descriptor *pq_ex = NULL;
633 struct ioat_dma_descriptor *hw;
634 u32 offset = 0;
635 int num_descs;
636 int with_ext;
637 int i, s;
638 u16 idx;
639 u8 op = result ? IOAT_OP_PQ_VAL : IOAT_OP_PQ;
640
641 dev_dbg(to_dev(chan), "%s\n", __func__);
642 /* the engine requires at least two sources (we provide
643 * at least 1 implied source in the DMA_PREP_CONTINUE case)
644 */
645 BUG_ON(src_cnt + dmaf_continue(flags) < 2);
646
647 num_descs = ioat2_xferlen_to_descs(ioat, len);
648 /* we need 2x the number of descriptors to cover greater than 3
649 * sources
650 */
651 if (src_cnt > 3 || flags & DMA_PREP_CONTINUE) {
652 with_ext = 1;
653 num_descs *= 2;
654 } else
655 with_ext = 0;
656
657 /* completion writes from the raid engine may pass completion
658 * writes from the legacy engine, so we need one extra null
659 * (legacy) descriptor to ensure all completion writes arrive in
660 * order.
661 */
662 if (likely(num_descs) &&
663 ioat2_alloc_and_lock(&idx, ioat, num_descs+1) == 0)
664 /* pass */;
665 else
666 return NULL;
667 for (i = 0; i < num_descs; i += 1 + with_ext) {
668 struct ioat_raw_descriptor *descs[2];
669 size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log);
670
671 desc = ioat2_get_ring_ent(ioat, idx + i);
672 pq = desc->pq;
673
674 /* save a branch by unconditionally retrieving the
675 * extended descriptor pq_set_src() knows to not write
676 * to it in the single descriptor case
677 */
678 ext = ioat2_get_ring_ent(ioat, idx + i + with_ext);
679 pq_ex = ext->pq_ex;
680
681 descs[0] = (struct ioat_raw_descriptor *) pq;
682 descs[1] = (struct ioat_raw_descriptor *) pq_ex;
683
684 for (s = 0; s < src_cnt; s++)
685 pq_set_src(descs, src[s], offset, scf[s], s);
686
687 /* see the comment for dma_maxpq in include/linux/dmaengine.h */
688 if (dmaf_p_disabled_continue(flags))
689 pq_set_src(descs, dst[1], offset, 1, s++);
690 else if (dmaf_continue(flags)) {
691 pq_set_src(descs, dst[0], offset, 0, s++);
692 pq_set_src(descs, dst[1], offset, 1, s++);
693 pq_set_src(descs, dst[1], offset, 0, s++);
694 }
695 pq->size = xfer_size;
696 pq->p_addr = dst[0] + offset;
697 pq->q_addr = dst[1] + offset;
698 pq->ctl = 0;
699 pq->ctl_f.op = op;
700 pq->ctl_f.src_cnt = src_cnt_to_hw(s);
701 pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P);
702 pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q);
703
704 len -= xfer_size;
705 offset += xfer_size;
706 }
707
708 /* last pq descriptor carries the unmap parameters and fence bit */
709 desc->txd.flags = flags;
710 desc->len = total_len;
711 if (result)
712 desc->result = result;
713 pq->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
714 dump_pq_desc_dbg(ioat, desc, ext);
715
716 /* completion descriptor carries interrupt bit */
717 compl_desc = ioat2_get_ring_ent(ioat, idx + i);
718 compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT;
719 hw = compl_desc->hw;
720 hw->ctl = 0;
721 hw->ctl_f.null = 1;
722 hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
723 hw->ctl_f.compl_write = 1;
724 hw->size = NULL_DESC_BUFFER_SIZE;
725 dump_desc_dbg(ioat, compl_desc);
726
727 /* we leave the channel locked to ensure in order submission */
728 return &desc->txd;
729}
730
731static struct dma_async_tx_descriptor *
732ioat3_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
733 unsigned int src_cnt, const unsigned char *scf, size_t len,
734 unsigned long flags)
735{
736 /* handle the single source multiply case from the raid6
737 * recovery path
738 */
739 if (unlikely((flags & DMA_PREP_PQ_DISABLE_P) && src_cnt == 1)) {
740 dma_addr_t single_source[2];
741 unsigned char single_source_coef[2];
742
743 BUG_ON(flags & DMA_PREP_PQ_DISABLE_Q);
744 single_source[0] = src[0];
745 single_source[1] = src[0];
746 single_source_coef[0] = scf[0];
747 single_source_coef[1] = 0;
748
749 return __ioat3_prep_pq_lock(chan, NULL, dst, single_source, 2,
750 single_source_coef, len, flags);
751 } else
752 return __ioat3_prep_pq_lock(chan, NULL, dst, src, src_cnt, scf,
753 len, flags);
754}
755
756struct dma_async_tx_descriptor *
757ioat3_prep_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
758 unsigned int src_cnt, const unsigned char *scf, size_t len,
759 enum sum_check_flags *pqres, unsigned long flags)
760{
761 /* the cleanup routine only sets bits on validate failure, it
762 * does not clear bits on validate success... so clear it here
763 */
764 *pqres = 0;
765
766 return __ioat3_prep_pq_lock(chan, pqres, pq, src, src_cnt, scf, len,
767 flags);
768}
769
770static struct dma_async_tx_descriptor *
771ioat3_prep_pqxor(struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src,
772 unsigned int src_cnt, size_t len, unsigned long flags)
773{
774 unsigned char scf[src_cnt];
775 dma_addr_t pq[2];
776
777 memset(scf, 0, src_cnt);
778 flags |= DMA_PREP_PQ_DISABLE_Q;
779 pq[0] = dst;
780 pq[1] = ~0;
781
782 return __ioat3_prep_pq_lock(chan, NULL, pq, src, src_cnt, scf, len,
783 flags);
784}
785
786struct dma_async_tx_descriptor *
787ioat3_prep_pqxor_val(struct dma_chan *chan, dma_addr_t *src,
788 unsigned int src_cnt, size_t len,
789 enum sum_check_flags *result, unsigned long flags)
790{
791 unsigned char scf[src_cnt];
792 dma_addr_t pq[2];
793
794 /* the cleanup routine only sets bits on validate failure, it
795 * does not clear bits on validate success... so clear it here
796 */
797 *result = 0;
798
799 memset(scf, 0, src_cnt);
800 flags |= DMA_PREP_PQ_DISABLE_Q;
801 pq[0] = src[0];
802 pq[1] = ~0;
803
804 return __ioat3_prep_pq_lock(chan, result, pq, &src[1], src_cnt - 1, scf,
805 len, flags);
806}
807
808static struct dma_async_tx_descriptor *
809ioat3_prep_interrupt_lock(struct dma_chan *c, unsigned long flags)
810{
811 struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
812 struct ioat_ring_ent *desc;
813 struct ioat_dma_descriptor *hw;
814 u16 idx;
815
816 if (ioat2_alloc_and_lock(&idx, ioat, 1) == 0)
817 desc = ioat2_get_ring_ent(ioat, idx);
818 else
819 return NULL;
820
821 hw = desc->hw;
822 hw->ctl = 0;
823 hw->ctl_f.null = 1;
824 hw->ctl_f.int_en = 1;
825 hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
826 hw->ctl_f.compl_write = 1;
827 hw->size = NULL_DESC_BUFFER_SIZE;
828 hw->src_addr = 0;
829 hw->dst_addr = 0;
830
831 desc->txd.flags = flags;
832 desc->len = 1;
833
834 dump_desc_dbg(ioat, desc);
835
836 /* we leave the channel locked to ensure in order submission */
837 return &desc->txd;
838}
839
840static void __devinit ioat3_dma_test_callback(void *dma_async_param)
841{
842 struct completion *cmp = dma_async_param;
843
844 complete(cmp);
845}
846
847#define IOAT_NUM_SRC_TEST 6 /* must be <= 8 */
848static int __devinit ioat_xor_val_self_test(struct ioatdma_device *device)
849{
850 int i, src_idx;
851 struct page *dest;
852 struct page *xor_srcs[IOAT_NUM_SRC_TEST];
853 struct page *xor_val_srcs[IOAT_NUM_SRC_TEST + 1];
854 dma_addr_t dma_srcs[IOAT_NUM_SRC_TEST + 1];
855 dma_addr_t dma_addr, dest_dma;
856 struct dma_async_tx_descriptor *tx;
857 struct dma_chan *dma_chan;
858 dma_cookie_t cookie;
859 u8 cmp_byte = 0;
860 u32 cmp_word;
861 u32 xor_val_result;
862 int err = 0;
863 struct completion cmp;
864 unsigned long tmo;
865 struct device *dev = &device->pdev->dev;
866 struct dma_device *dma = &device->common;
867
868 dev_dbg(dev, "%s\n", __func__);
869
870 if (!dma_has_cap(DMA_XOR, dma->cap_mask))
871 return 0;
872
873 for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) {
874 xor_srcs[src_idx] = alloc_page(GFP_KERNEL);
875 if (!xor_srcs[src_idx]) {
876 while (src_idx--)
877 __free_page(xor_srcs[src_idx]);
878 return -ENOMEM;
879 }
880 }
881
882 dest = alloc_page(GFP_KERNEL);
883 if (!dest) {
884 while (src_idx--)
885 __free_page(xor_srcs[src_idx]);
886 return -ENOMEM;
887 }
888
889 /* Fill in src buffers */
890 for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) {
891 u8 *ptr = page_address(xor_srcs[src_idx]);
892 for (i = 0; i < PAGE_SIZE; i++)
893 ptr[i] = (1 << src_idx);
894 }
895
896 for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++)
897 cmp_byte ^= (u8) (1 << src_idx);
898
899 cmp_word = (cmp_byte << 24) | (cmp_byte << 16) |
900 (cmp_byte << 8) | cmp_byte;
901
902 memset(page_address(dest), 0, PAGE_SIZE);
903
904 dma_chan = container_of(dma->channels.next, struct dma_chan,
905 device_node);
906 if (dma->device_alloc_chan_resources(dma_chan) < 1) {
907 err = -ENODEV;
908 goto out;
909 }
910
911 /* test xor */
912 dest_dma = dma_map_page(dev, dest, 0, PAGE_SIZE, DMA_FROM_DEVICE);
913 for (i = 0; i < IOAT_NUM_SRC_TEST; i++)
914 dma_srcs[i] = dma_map_page(dev, xor_srcs[i], 0, PAGE_SIZE,
915 DMA_TO_DEVICE);
916 tx = dma->device_prep_dma_xor(dma_chan, dest_dma, dma_srcs,
917 IOAT_NUM_SRC_TEST, PAGE_SIZE,
918 DMA_PREP_INTERRUPT);
919
920 if (!tx) {
921 dev_err(dev, "Self-test xor prep failed\n");
922 err = -ENODEV;
923 goto free_resources;
924 }
925
926 async_tx_ack(tx);
927 init_completion(&cmp);
928 tx->callback = ioat3_dma_test_callback;
929 tx->callback_param = &cmp;
930 cookie = tx->tx_submit(tx);
931 if (cookie < 0) {
932 dev_err(dev, "Self-test xor setup failed\n");
933 err = -ENODEV;
934 goto free_resources;
935 }
936 dma->device_issue_pending(dma_chan);
937
938 tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
939
940 if (dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) {
941 dev_err(dev, "Self-test xor timed out\n");
942 err = -ENODEV;
943 goto free_resources;
944 }
945
946 dma_sync_single_for_cpu(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE);
947 for (i = 0; i < (PAGE_SIZE / sizeof(u32)); i++) {
948 u32 *ptr = page_address(dest);
949 if (ptr[i] != cmp_word) {
950 dev_err(dev, "Self-test xor failed compare\n");
951 err = -ENODEV;
952 goto free_resources;
953 }
954 }
955 dma_sync_single_for_device(dev, dest_dma, PAGE_SIZE, DMA_TO_DEVICE);
956
957 /* skip validate if the capability is not present */
958 if (!dma_has_cap(DMA_XOR_VAL, dma_chan->device->cap_mask))
959 goto free_resources;
960
961 /* validate the sources with the destintation page */
962 for (i = 0; i < IOAT_NUM_SRC_TEST; i++)
963 xor_val_srcs[i] = xor_srcs[i];
964 xor_val_srcs[i] = dest;
965
966 xor_val_result = 1;
967
968 for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++)
969 dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE,
970 DMA_TO_DEVICE);
971 tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs,
972 IOAT_NUM_SRC_TEST + 1, PAGE_SIZE,
973 &xor_val_result, DMA_PREP_INTERRUPT);
974 if (!tx) {
975 dev_err(dev, "Self-test zero prep failed\n");
976 err = -ENODEV;
977 goto free_resources;
978 }
979
980 async_tx_ack(tx);
981 init_completion(&cmp);
982 tx->callback = ioat3_dma_test_callback;
983 tx->callback_param = &cmp;
984 cookie = tx->tx_submit(tx);
985 if (cookie < 0) {
986 dev_err(dev, "Self-test zero setup failed\n");
987 err = -ENODEV;
988 goto free_resources;
989 }
990 dma->device_issue_pending(dma_chan);
991
992 tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
993
994 if (dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) {
995 dev_err(dev, "Self-test validate timed out\n");
996 err = -ENODEV;
997 goto free_resources;
998 }
999
1000 if (xor_val_result != 0) {
1001 dev_err(dev, "Self-test validate failed compare\n");
1002 err = -ENODEV;
1003 goto free_resources;
1004 }
1005
1006 /* skip memset if the capability is not present */
1007 if (!dma_has_cap(DMA_MEMSET, dma_chan->device->cap_mask))
1008 goto free_resources;
1009
1010 /* test memset */
1011 dma_addr = dma_map_page(dev, dest, 0,
1012 PAGE_SIZE, DMA_FROM_DEVICE);
1013 tx = dma->device_prep_dma_memset(dma_chan, dma_addr, 0, PAGE_SIZE,
1014 DMA_PREP_INTERRUPT);
1015 if (!tx) {
1016 dev_err(dev, "Self-test memset prep failed\n");
1017 err = -ENODEV;
1018 goto free_resources;
1019 }
1020
1021 async_tx_ack(tx);
1022 init_completion(&cmp);
1023 tx->callback = ioat3_dma_test_callback;
1024 tx->callback_param = &cmp;
1025 cookie = tx->tx_submit(tx);
1026 if (cookie < 0) {
1027 dev_err(dev, "Self-test memset setup failed\n");
1028 err = -ENODEV;
1029 goto free_resources;
1030 }
1031 dma->device_issue_pending(dma_chan);
1032
1033 tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
1034
1035 if (dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) {
1036 dev_err(dev, "Self-test memset timed out\n");
1037 err = -ENODEV;
1038 goto free_resources;
1039 }
1040
1041 for (i = 0; i < PAGE_SIZE/sizeof(u32); i++) {
1042 u32 *ptr = page_address(dest);
1043 if (ptr[i]) {
1044 dev_err(dev, "Self-test memset failed compare\n");
1045 err = -ENODEV;
1046 goto free_resources;
1047 }
1048 }
1049
1050 /* test for non-zero parity sum */
1051 xor_val_result = 0;
1052 for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++)
1053 dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE,
1054 DMA_TO_DEVICE);
1055 tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs,
1056 IOAT_NUM_SRC_TEST + 1, PAGE_SIZE,
1057 &xor_val_result, DMA_PREP_INTERRUPT);
1058 if (!tx) {
1059 dev_err(dev, "Self-test 2nd zero prep failed\n");
1060 err = -ENODEV;
1061 goto free_resources;
1062 }
1063
1064 async_tx_ack(tx);
1065 init_completion(&cmp);
1066 tx->callback = ioat3_dma_test_callback;
1067 tx->callback_param = &cmp;
1068 cookie = tx->tx_submit(tx);
1069 if (cookie < 0) {
1070 dev_err(dev, "Self-test 2nd zero setup failed\n");
1071 err = -ENODEV;
1072 goto free_resources;
1073 }
1074 dma->device_issue_pending(dma_chan);
1075
1076 tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
1077
1078 if (dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) {
1079 dev_err(dev, "Self-test 2nd validate timed out\n");
1080 err = -ENODEV;
1081 goto free_resources;
1082 }
1083
1084 if (xor_val_result != SUM_CHECK_P_RESULT) {
1085 dev_err(dev, "Self-test validate failed compare\n");
1086 err = -ENODEV;
1087 goto free_resources;
1088 }
1089
1090free_resources:
1091 dma->device_free_chan_resources(dma_chan);
1092out:
1093 src_idx = IOAT_NUM_SRC_TEST;
1094 while (src_idx--)
1095 __free_page(xor_srcs[src_idx]);
1096 __free_page(dest);
1097 return err;
1098}
1099
1100static int __devinit ioat3_dma_self_test(struct ioatdma_device *device)
1101{
1102 int rc = ioat_dma_self_test(device);
1103
1104 if (rc)
1105 return rc;
1106
1107 rc = ioat_xor_val_self_test(device);
1108 if (rc)
1109 return rc;
1110
1111 return 0;
1112}
1113
1114int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca)
1115{
1116 struct pci_dev *pdev = device->pdev;
1117 struct dma_device *dma;
1118 struct dma_chan *c;
1119 struct ioat_chan_common *chan;
1120 bool is_raid_device = false;
1121 int err;
1122 u16 dev_id;
1123 u32 cap;
1124
1125 device->enumerate_channels = ioat2_enumerate_channels;
1126 device->self_test = ioat3_dma_self_test;
1127 dma = &device->common;
1128 dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy_lock;
1129 dma->device_issue_pending = ioat2_issue_pending;
1130 dma->device_alloc_chan_resources = ioat2_alloc_chan_resources;
1131 dma->device_free_chan_resources = ioat2_free_chan_resources;
1132
1133 dma_cap_set(DMA_INTERRUPT, dma->cap_mask);
1134 dma->device_prep_dma_interrupt = ioat3_prep_interrupt_lock;
1135
1136 cap = readl(device->reg_base + IOAT_DMA_CAP_OFFSET);
1137 if (cap & IOAT_CAP_XOR) {
1138 is_raid_device = true;
1139 dma->max_xor = 8;
1140 dma->xor_align = 2;
1141
1142 dma_cap_set(DMA_XOR, dma->cap_mask);
1143 dma->device_prep_dma_xor = ioat3_prep_xor;
1144
1145 dma_cap_set(DMA_XOR_VAL, dma->cap_mask);
1146 dma->device_prep_dma_xor_val = ioat3_prep_xor_val;
1147 }
1148 if (cap & IOAT_CAP_PQ) {
1149 is_raid_device = true;
1150 dma_set_maxpq(dma, 8, 0);
1151 dma->pq_align = 2;
1152
1153 dma_cap_set(DMA_PQ, dma->cap_mask);
1154 dma->device_prep_dma_pq = ioat3_prep_pq;
1155
1156 dma_cap_set(DMA_PQ_VAL, dma->cap_mask);
1157 dma->device_prep_dma_pq_val = ioat3_prep_pq_val;
1158
1159 if (!(cap & IOAT_CAP_XOR)) {
1160 dma->max_xor = 8;
1161 dma->xor_align = 2;
1162
1163 dma_cap_set(DMA_XOR, dma->cap_mask);
1164 dma->device_prep_dma_xor = ioat3_prep_pqxor;
1165
1166 dma_cap_set(DMA_XOR_VAL, dma->cap_mask);
1167 dma->device_prep_dma_xor_val = ioat3_prep_pqxor_val;
1168 }
1169 }
1170 if (is_raid_device && (cap & IOAT_CAP_FILL_BLOCK)) {
1171 dma_cap_set(DMA_MEMSET, dma->cap_mask);
1172 dma->device_prep_dma_memset = ioat3_prep_memset_lock;
1173 }
1174
1175
1176 if (is_raid_device) {
1177 dma->device_is_tx_complete = ioat3_is_complete;
1178 device->cleanup_tasklet = ioat3_cleanup_tasklet;
1179 device->timer_fn = ioat3_timer_event;
1180 } else {
1181 dma->device_is_tx_complete = ioat2_is_complete;
1182 device->cleanup_tasklet = ioat2_cleanup_tasklet;
1183 device->timer_fn = ioat2_timer_event;
1184 }
1185
1186 /* -= IOAT ver.3 workarounds =- */
1187 /* Write CHANERRMSK_INT with 3E07h to mask out the errors
1188 * that can cause stability issues for IOAT ver.3
1189 */
1190 pci_write_config_dword(pdev, IOAT_PCI_CHANERRMASK_INT_OFFSET, 0x3e07);
1191
1192 /* Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit
1193 * (workaround for spurious config parity error after restart)
1194 */
1195 pci_read_config_word(pdev, IOAT_PCI_DEVICE_ID_OFFSET, &dev_id);
1196 if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0)
1197 pci_write_config_dword(pdev, IOAT_PCI_DMAUNCERRSTS_OFFSET, 0x10);
1198
1199 err = ioat_probe(device);
1200 if (err)
1201 return err;
1202 ioat_set_tcp_copy_break(262144);
1203
1204 list_for_each_entry(c, &dma->channels, device_node) {
1205 chan = to_chan_common(c);
1206 writel(IOAT_DMA_DCA_ANY_CPU,
1207 chan->reg_base + IOAT_DCACTRL_OFFSET);
1208 }
1209
1210 err = ioat_register(device);
1211 if (err)
1212 return err;
1213
1214 ioat_kobject_add(device, &ioat2_ktype);
1215
1216 if (dca)
1217 device->dca = ioat3_dca_init(pdev, device->reg_base);
1218
1219 return 0;
1220}
diff --git a/drivers/dma/ioat/hw.h b/drivers/dma/ioat/hw.h
index 7481fb13ce00..99afb12bd409 100644
--- a/drivers/dma/ioat/hw.h
+++ b/drivers/dma/ioat/hw.h
@@ -37,6 +37,7 @@
37#define IOAT_VER_1_2 0x12 /* Version 1.2 */ 37#define IOAT_VER_1_2 0x12 /* Version 1.2 */
38#define IOAT_VER_2_0 0x20 /* Version 2.0 */ 38#define IOAT_VER_2_0 0x20 /* Version 2.0 */
39#define IOAT_VER_3_0 0x30 /* Version 3.0 */ 39#define IOAT_VER_3_0 0x30 /* Version 3.0 */
40#define IOAT_VER_3_2 0x32 /* Version 3.2 */
40 41
41struct ioat_dma_descriptor { 42struct ioat_dma_descriptor {
42 uint32_t size; 43 uint32_t size;
@@ -55,6 +56,7 @@ struct ioat_dma_descriptor {
55 unsigned int dest_dca:1; 56 unsigned int dest_dca:1;
56 unsigned int hint:1; 57 unsigned int hint:1;
57 unsigned int rsvd2:13; 58 unsigned int rsvd2:13;
59 #define IOAT_OP_COPY 0x00
58 unsigned int op:8; 60 unsigned int op:8;
59 } ctl_f; 61 } ctl_f;
60 }; 62 };
@@ -70,4 +72,144 @@ struct ioat_dma_descriptor {
70 }; 72 };
71 uint64_t user2; 73 uint64_t user2;
72}; 74};
75
76struct ioat_fill_descriptor {
77 uint32_t size;
78 union {
79 uint32_t ctl;
80 struct {
81 unsigned int int_en:1;
82 unsigned int rsvd:1;
83 unsigned int dest_snoop_dis:1;
84 unsigned int compl_write:1;
85 unsigned int fence:1;
86 unsigned int rsvd2:2;
87 unsigned int dest_brk:1;
88 unsigned int bundle:1;
89 unsigned int rsvd4:15;
90 #define IOAT_OP_FILL 0x01
91 unsigned int op:8;
92 } ctl_f;
93 };
94 uint64_t src_data;
95 uint64_t dst_addr;
96 uint64_t next;
97 uint64_t rsv1;
98 uint64_t next_dst_addr;
99 uint64_t user1;
100 uint64_t user2;
101};
102
103struct ioat_xor_descriptor {
104 uint32_t size;
105 union {
106 uint32_t ctl;
107 struct {
108 unsigned int int_en:1;
109 unsigned int src_snoop_dis:1;
110 unsigned int dest_snoop_dis:1;
111 unsigned int compl_write:1;
112 unsigned int fence:1;
113 unsigned int src_cnt:3;
114 unsigned int bundle:1;
115 unsigned int dest_dca:1;
116 unsigned int hint:1;
117 unsigned int rsvd:13;
118 #define IOAT_OP_XOR 0x87
119 #define IOAT_OP_XOR_VAL 0x88
120 unsigned int op:8;
121 } ctl_f;
122 };
123 uint64_t src_addr;
124 uint64_t dst_addr;
125 uint64_t next;
126 uint64_t src_addr2;
127 uint64_t src_addr3;
128 uint64_t src_addr4;
129 uint64_t src_addr5;
130};
131
132struct ioat_xor_ext_descriptor {
133 uint64_t src_addr6;
134 uint64_t src_addr7;
135 uint64_t src_addr8;
136 uint64_t next;
137 uint64_t rsvd[4];
138};
139
140struct ioat_pq_descriptor {
141 uint32_t size;
142 union {
143 uint32_t ctl;
144 struct {
145 unsigned int int_en:1;
146 unsigned int src_snoop_dis:1;
147 unsigned int dest_snoop_dis:1;
148 unsigned int compl_write:1;
149 unsigned int fence:1;
150 unsigned int src_cnt:3;
151 unsigned int bundle:1;
152 unsigned int dest_dca:1;
153 unsigned int hint:1;
154 unsigned int p_disable:1;
155 unsigned int q_disable:1;
156 unsigned int rsvd:11;
157 #define IOAT_OP_PQ 0x89
158 #define IOAT_OP_PQ_VAL 0x8a
159 unsigned int op:8;
160 } ctl_f;
161 };
162 uint64_t src_addr;
163 uint64_t p_addr;
164 uint64_t next;
165 uint64_t src_addr2;
166 uint64_t src_addr3;
167 uint8_t coef[8];
168 uint64_t q_addr;
169};
170
171struct ioat_pq_ext_descriptor {
172 uint64_t src_addr4;
173 uint64_t src_addr5;
174 uint64_t src_addr6;
175 uint64_t next;
176 uint64_t src_addr7;
177 uint64_t src_addr8;
178 uint64_t rsvd[2];
179};
180
181struct ioat_pq_update_descriptor {
182 uint32_t size;
183 union {
184 uint32_t ctl;
185 struct {
186 unsigned int int_en:1;
187 unsigned int src_snoop_dis:1;
188 unsigned int dest_snoop_dis:1;
189 unsigned int compl_write:1;
190 unsigned int fence:1;
191 unsigned int src_cnt:3;
192 unsigned int bundle:1;
193 unsigned int dest_dca:1;
194 unsigned int hint:1;
195 unsigned int p_disable:1;
196 unsigned int q_disable:1;
197 unsigned int rsvd:3;
198 unsigned int coef:8;
199 #define IOAT_OP_PQ_UP 0x8b
200 unsigned int op:8;
201 } ctl_f;
202 };
203 uint64_t src_addr;
204 uint64_t p_addr;
205 uint64_t next;
206 uint64_t src_addr2;
207 uint64_t p_src;
208 uint64_t q_src;
209 uint64_t q_addr;
210};
211
212struct ioat_raw_descriptor {
213 uint64_t field[8];
214};
73#endif 215#endif
diff --git a/drivers/dma/ioat/pci.c b/drivers/dma/ioat/pci.c
index 61086c6bbf42..c788fa266470 100644
--- a/drivers/dma/ioat/pci.c
+++ b/drivers/dma/ioat/pci.c
@@ -36,30 +36,44 @@
36#include "hw.h" 36#include "hw.h"
37 37
38MODULE_VERSION(IOAT_DMA_VERSION); 38MODULE_VERSION(IOAT_DMA_VERSION);
39MODULE_LICENSE("GPL"); 39MODULE_LICENSE("Dual BSD/GPL");
40MODULE_AUTHOR("Intel Corporation"); 40MODULE_AUTHOR("Intel Corporation");
41 41
42static struct pci_device_id ioat_pci_tbl[] = { 42static struct pci_device_id ioat_pci_tbl[] = {
43 /* I/OAT v1 platforms */ 43 /* I/OAT v1 platforms */
44 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT) }, 44 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT) },
45 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_CNB) }, 45 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_CNB) },
46 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SCNB) }, 46 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SCNB) },
47 { PCI_DEVICE(PCI_VENDOR_ID_UNISYS, PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR) }, 47 { PCI_VDEVICE(UNISYS, PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR) },
48 48
49 /* I/OAT v2 platforms */ 49 /* I/OAT v2 platforms */
50 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB) }, 50 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB) },
51 51
52 /* I/OAT v3 platforms */ 52 /* I/OAT v3 platforms */
53 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG0) }, 53 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG0) },
54 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG1) }, 54 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG1) },
55 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG2) }, 55 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG2) },
56 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG3) }, 56 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG3) },
57 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG4) }, 57 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG4) },
58 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG5) }, 58 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG5) },
59 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG6) }, 59 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG6) },
60 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG7) }, 60 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG7) },
61
62 /* I/OAT v3.2 platforms */
63 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF0) },
64 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF1) },
65 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF2) },
66 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF3) },
67 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF4) },
68 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF5) },
69 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF6) },
70 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF7) },
71 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF8) },
72 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF9) },
73
61 { 0, } 74 { 0, }
62}; 75};
76MODULE_DEVICE_TABLE(pci, ioat_pci_tbl);
63 77
64static int __devinit ioat_pci_probe(struct pci_dev *pdev, 78static int __devinit ioat_pci_probe(struct pci_dev *pdev,
65 const struct pci_device_id *id); 79 const struct pci_device_id *id);
@@ -172,6 +186,9 @@ static int __init ioat_init_module(void)
172{ 186{
173 int err; 187 int err;
174 188
189 pr_info("%s: Intel(R) QuickData Technology Driver %s\n",
190 DRV_NAME, IOAT_DMA_VERSION);
191
175 ioat2_cache = kmem_cache_create("ioat2", sizeof(struct ioat_ring_ent), 192 ioat2_cache = kmem_cache_create("ioat2", sizeof(struct ioat_ring_ent),
176 0, SLAB_HWCACHE_ALIGN, NULL); 193 0, SLAB_HWCACHE_ALIGN, NULL);
177 if (!ioat2_cache) 194 if (!ioat2_cache)
diff --git a/drivers/dma/ioat/registers.h b/drivers/dma/ioat/registers.h
index e4334a195380..63038e18ab03 100644
--- a/drivers/dma/ioat/registers.h
+++ b/drivers/dma/ioat/registers.h
@@ -64,12 +64,27 @@
64 64
65#define IOAT_DEVICE_STATUS_OFFSET 0x0E /* 16-bit */ 65#define IOAT_DEVICE_STATUS_OFFSET 0x0E /* 16-bit */
66#define IOAT_DEVICE_STATUS_DEGRADED_MODE 0x0001 66#define IOAT_DEVICE_STATUS_DEGRADED_MODE 0x0001
67#define IOAT_DEVICE_MMIO_RESTRICTED 0x0002
68#define IOAT_DEVICE_MEMORY_BYPASS 0x0004
69#define IOAT_DEVICE_ADDRESS_REMAPPING 0x0008
70
71#define IOAT_DMA_CAP_OFFSET 0x10 /* 32-bit */
72#define IOAT_CAP_PAGE_BREAK 0x00000001
73#define IOAT_CAP_CRC 0x00000002
74#define IOAT_CAP_SKIP_MARKER 0x00000004
75#define IOAT_CAP_DCA 0x00000010
76#define IOAT_CAP_CRC_MOVE 0x00000020
77#define IOAT_CAP_FILL_BLOCK 0x00000040
78#define IOAT_CAP_APIC 0x00000080
79#define IOAT_CAP_XOR 0x00000100
80#define IOAT_CAP_PQ 0x00000200
67 81
68#define IOAT_CHANNEL_MMIO_SIZE 0x80 /* Each Channel MMIO space is this size */ 82#define IOAT_CHANNEL_MMIO_SIZE 0x80 /* Each Channel MMIO space is this size */
69 83
70/* DMA Channel Registers */ 84/* DMA Channel Registers */
71#define IOAT_CHANCTRL_OFFSET 0x00 /* 16-bit Channel Control Register */ 85#define IOAT_CHANCTRL_OFFSET 0x00 /* 16-bit Channel Control Register */
72#define IOAT_CHANCTRL_CHANNEL_PRIORITY_MASK 0xF000 86#define IOAT_CHANCTRL_CHANNEL_PRIORITY_MASK 0xF000
87#define IOAT3_CHANCTRL_COMPL_DCA_EN 0x0200
73#define IOAT_CHANCTRL_CHANNEL_IN_USE 0x0100 88#define IOAT_CHANCTRL_CHANNEL_IN_USE 0x0100
74#define IOAT_CHANCTRL_DESCRIPTOR_ADDR_SNOOP_CONTROL 0x0020 89#define IOAT_CHANCTRL_DESCRIPTOR_ADDR_SNOOP_CONTROL 0x0020
75#define IOAT_CHANCTRL_ERR_INT_EN 0x0010 90#define IOAT_CHANCTRL_ERR_INT_EN 0x0010
@@ -224,6 +239,11 @@
224#define IOAT_CHANERR_INT_CONFIGURATION_ERR 0x2000 239#define IOAT_CHANERR_INT_CONFIGURATION_ERR 0x2000
225#define IOAT_CHANERR_SOFT_ERR 0x4000 240#define IOAT_CHANERR_SOFT_ERR 0x4000
226#define IOAT_CHANERR_UNAFFILIATED_ERR 0x8000 241#define IOAT_CHANERR_UNAFFILIATED_ERR 0x8000
242#define IOAT_CHANERR_XOR_P_OR_CRC_ERR 0x10000
243#define IOAT_CHANERR_XOR_Q_ERR 0x20000
244#define IOAT_CHANERR_DESCRIPTOR_COUNT_ERR 0x40000
245
246#define IOAT_CHANERR_HANDLE_MASK (IOAT_CHANERR_XOR_P_OR_CRC_ERR | IOAT_CHANERR_XOR_Q_ERR)
227 247
228#define IOAT_CHANERR_MASK_OFFSET 0x2C /* 32-bit Channel Error Register */ 248#define IOAT_CHANERR_MASK_OFFSET 0x2C /* 32-bit Channel Error Register */
229 249
diff --git a/drivers/dma/iop-adma.c b/drivers/dma/iop-adma.c
index 9f6c16f8e2be..645ca8d54ec4 100644
--- a/drivers/dma/iop-adma.c
+++ b/drivers/dma/iop-adma.c
@@ -31,6 +31,7 @@
31#include <linux/platform_device.h> 31#include <linux/platform_device.h>
32#include <linux/memory.h> 32#include <linux/memory.h>
33#include <linux/ioport.h> 33#include <linux/ioport.h>
34#include <linux/raid/pq.h>
34 35
35#include <mach/adma.h> 36#include <mach/adma.h>
36 37
@@ -57,65 +58,110 @@ static void iop_adma_free_slots(struct iop_adma_desc_slot *slot)
57 } 58 }
58} 59}
59 60
61static void
62iop_desc_unmap(struct iop_adma_chan *iop_chan, struct iop_adma_desc_slot *desc)
63{
64 struct dma_async_tx_descriptor *tx = &desc->async_tx;
65 struct iop_adma_desc_slot *unmap = desc->group_head;
66 struct device *dev = &iop_chan->device->pdev->dev;
67 u32 len = unmap->unmap_len;
68 enum dma_ctrl_flags flags = tx->flags;
69 u32 src_cnt;
70 dma_addr_t addr;
71 dma_addr_t dest;
72
73 src_cnt = unmap->unmap_src_cnt;
74 dest = iop_desc_get_dest_addr(unmap, iop_chan);
75 if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
76 enum dma_data_direction dir;
77
78 if (src_cnt > 1) /* is xor? */
79 dir = DMA_BIDIRECTIONAL;
80 else
81 dir = DMA_FROM_DEVICE;
82
83 dma_unmap_page(dev, dest, len, dir);
84 }
85
86 if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
87 while (src_cnt--) {
88 addr = iop_desc_get_src_addr(unmap, iop_chan, src_cnt);
89 if (addr == dest)
90 continue;
91 dma_unmap_page(dev, addr, len, DMA_TO_DEVICE);
92 }
93 }
94 desc->group_head = NULL;
95}
96
97static void
98iop_desc_unmap_pq(struct iop_adma_chan *iop_chan, struct iop_adma_desc_slot *desc)
99{
100 struct dma_async_tx_descriptor *tx = &desc->async_tx;
101 struct iop_adma_desc_slot *unmap = desc->group_head;
102 struct device *dev = &iop_chan->device->pdev->dev;
103 u32 len = unmap->unmap_len;
104 enum dma_ctrl_flags flags = tx->flags;
105 u32 src_cnt = unmap->unmap_src_cnt;
106 dma_addr_t pdest = iop_desc_get_dest_addr(unmap, iop_chan);
107 dma_addr_t qdest = iop_desc_get_qdest_addr(unmap, iop_chan);
108 int i;
109
110 if (tx->flags & DMA_PREP_CONTINUE)
111 src_cnt -= 3;
112
113 if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP) && !desc->pq_check_result) {
114 dma_unmap_page(dev, pdest, len, DMA_BIDIRECTIONAL);
115 dma_unmap_page(dev, qdest, len, DMA_BIDIRECTIONAL);
116 }
117
118 if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
119 dma_addr_t addr;
120
121 for (i = 0; i < src_cnt; i++) {
122 addr = iop_desc_get_src_addr(unmap, iop_chan, i);
123 dma_unmap_page(dev, addr, len, DMA_TO_DEVICE);
124 }
125 if (desc->pq_check_result) {
126 dma_unmap_page(dev, pdest, len, DMA_TO_DEVICE);
127 dma_unmap_page(dev, qdest, len, DMA_TO_DEVICE);
128 }
129 }
130
131 desc->group_head = NULL;
132}
133
134
60static dma_cookie_t 135static dma_cookie_t
61iop_adma_run_tx_complete_actions(struct iop_adma_desc_slot *desc, 136iop_adma_run_tx_complete_actions(struct iop_adma_desc_slot *desc,
62 struct iop_adma_chan *iop_chan, dma_cookie_t cookie) 137 struct iop_adma_chan *iop_chan, dma_cookie_t cookie)
63{ 138{
64 BUG_ON(desc->async_tx.cookie < 0); 139 struct dma_async_tx_descriptor *tx = &desc->async_tx;
65 if (desc->async_tx.cookie > 0) { 140
66 cookie = desc->async_tx.cookie; 141 BUG_ON(tx->cookie < 0);
67 desc->async_tx.cookie = 0; 142 if (tx->cookie > 0) {
143 cookie = tx->cookie;
144 tx->cookie = 0;
68 145
69 /* call the callback (must not sleep or submit new 146 /* call the callback (must not sleep or submit new
70 * operations to this channel) 147 * operations to this channel)
71 */ 148 */
72 if (desc->async_tx.callback) 149 if (tx->callback)
73 desc->async_tx.callback( 150 tx->callback(tx->callback_param);
74 desc->async_tx.callback_param);
75 151
76 /* unmap dma addresses 152 /* unmap dma addresses
77 * (unmap_single vs unmap_page?) 153 * (unmap_single vs unmap_page?)
78 */ 154 */
79 if (desc->group_head && desc->unmap_len) { 155 if (desc->group_head && desc->unmap_len) {
80 struct iop_adma_desc_slot *unmap = desc->group_head; 156 if (iop_desc_is_pq(desc))
81 struct device *dev = 157 iop_desc_unmap_pq(iop_chan, desc);
82 &iop_chan->device->pdev->dev; 158 else
83 u32 len = unmap->unmap_len; 159 iop_desc_unmap(iop_chan, desc);
84 enum dma_ctrl_flags flags = desc->async_tx.flags;
85 u32 src_cnt;
86 dma_addr_t addr;
87 dma_addr_t dest;
88
89 src_cnt = unmap->unmap_src_cnt;
90 dest = iop_desc_get_dest_addr(unmap, iop_chan);
91 if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
92 enum dma_data_direction dir;
93
94 if (src_cnt > 1) /* is xor? */
95 dir = DMA_BIDIRECTIONAL;
96 else
97 dir = DMA_FROM_DEVICE;
98
99 dma_unmap_page(dev, dest, len, dir);
100 }
101
102 if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
103 while (src_cnt--) {
104 addr = iop_desc_get_src_addr(unmap,
105 iop_chan,
106 src_cnt);
107 if (addr == dest)
108 continue;
109 dma_unmap_page(dev, addr, len,
110 DMA_TO_DEVICE);
111 }
112 }
113 desc->group_head = NULL;
114 } 160 }
115 } 161 }
116 162
117 /* run dependent operations */ 163 /* run dependent operations */
118 dma_run_dependencies(&desc->async_tx); 164 dma_run_dependencies(tx);
119 165
120 return cookie; 166 return cookie;
121} 167}
@@ -287,7 +333,12 @@ static void iop_adma_tasklet(unsigned long data)
287{ 333{
288 struct iop_adma_chan *iop_chan = (struct iop_adma_chan *) data; 334 struct iop_adma_chan *iop_chan = (struct iop_adma_chan *) data;
289 335
290 spin_lock(&iop_chan->lock); 336 /* lockdep will flag depedency submissions as potentially
337 * recursive locking, this is not the case as a dependency
338 * submission will never recurse a channels submit routine.
339 * There are checks in async_tx.c to prevent this.
340 */
341 spin_lock_nested(&iop_chan->lock, SINGLE_DEPTH_NESTING);
291 __iop_adma_slot_cleanup(iop_chan); 342 __iop_adma_slot_cleanup(iop_chan);
292 spin_unlock(&iop_chan->lock); 343 spin_unlock(&iop_chan->lock);
293} 344}
@@ -661,9 +712,9 @@ iop_adma_prep_dma_xor(struct dma_chan *chan, dma_addr_t dma_dest,
661} 712}
662 713
663static struct dma_async_tx_descriptor * 714static struct dma_async_tx_descriptor *
664iop_adma_prep_dma_zero_sum(struct dma_chan *chan, dma_addr_t *dma_src, 715iop_adma_prep_dma_xor_val(struct dma_chan *chan, dma_addr_t *dma_src,
665 unsigned int src_cnt, size_t len, u32 *result, 716 unsigned int src_cnt, size_t len, u32 *result,
666 unsigned long flags) 717 unsigned long flags)
667{ 718{
668 struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); 719 struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
669 struct iop_adma_desc_slot *sw_desc, *grp_start; 720 struct iop_adma_desc_slot *sw_desc, *grp_start;
@@ -697,6 +748,118 @@ iop_adma_prep_dma_zero_sum(struct dma_chan *chan, dma_addr_t *dma_src,
697 return sw_desc ? &sw_desc->async_tx : NULL; 748 return sw_desc ? &sw_desc->async_tx : NULL;
698} 749}
699 750
751static struct dma_async_tx_descriptor *
752iop_adma_prep_dma_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
753 unsigned int src_cnt, const unsigned char *scf, size_t len,
754 unsigned long flags)
755{
756 struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
757 struct iop_adma_desc_slot *sw_desc, *g;
758 int slot_cnt, slots_per_op;
759 int continue_srcs;
760
761 if (unlikely(!len))
762 return NULL;
763 BUG_ON(len > IOP_ADMA_XOR_MAX_BYTE_COUNT);
764
765 dev_dbg(iop_chan->device->common.dev,
766 "%s src_cnt: %d len: %u flags: %lx\n",
767 __func__, src_cnt, len, flags);
768
769 if (dmaf_p_disabled_continue(flags))
770 continue_srcs = 1+src_cnt;
771 else if (dmaf_continue(flags))
772 continue_srcs = 3+src_cnt;
773 else
774 continue_srcs = 0+src_cnt;
775
776 spin_lock_bh(&iop_chan->lock);
777 slot_cnt = iop_chan_pq_slot_count(len, continue_srcs, &slots_per_op);
778 sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
779 if (sw_desc) {
780 int i;
781
782 g = sw_desc->group_head;
783 iop_desc_set_byte_count(g, iop_chan, len);
784
785 /* even if P is disabled its destination address (bits
786 * [3:0]) must match Q. It is ok if P points to an
787 * invalid address, it won't be written.
788 */
789 if (flags & DMA_PREP_PQ_DISABLE_P)
790 dst[0] = dst[1] & 0x7;
791
792 iop_desc_set_pq_addr(g, dst);
793 sw_desc->unmap_src_cnt = src_cnt;
794 sw_desc->unmap_len = len;
795 sw_desc->async_tx.flags = flags;
796 for (i = 0; i < src_cnt; i++)
797 iop_desc_set_pq_src_addr(g, i, src[i], scf[i]);
798
799 /* if we are continuing a previous operation factor in
800 * the old p and q values, see the comment for dma_maxpq
801 * in include/linux/dmaengine.h
802 */
803 if (dmaf_p_disabled_continue(flags))
804 iop_desc_set_pq_src_addr(g, i++, dst[1], 1);
805 else if (dmaf_continue(flags)) {
806 iop_desc_set_pq_src_addr(g, i++, dst[0], 0);
807 iop_desc_set_pq_src_addr(g, i++, dst[1], 1);
808 iop_desc_set_pq_src_addr(g, i++, dst[1], 0);
809 }
810 iop_desc_init_pq(g, i, flags);
811 }
812 spin_unlock_bh(&iop_chan->lock);
813
814 return sw_desc ? &sw_desc->async_tx : NULL;
815}
816
817static struct dma_async_tx_descriptor *
818iop_adma_prep_dma_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
819 unsigned int src_cnt, const unsigned char *scf,
820 size_t len, enum sum_check_flags *pqres,
821 unsigned long flags)
822{
823 struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
824 struct iop_adma_desc_slot *sw_desc, *g;
825 int slot_cnt, slots_per_op;
826
827 if (unlikely(!len))
828 return NULL;
829 BUG_ON(len > IOP_ADMA_XOR_MAX_BYTE_COUNT);
830
831 dev_dbg(iop_chan->device->common.dev, "%s src_cnt: %d len: %u\n",
832 __func__, src_cnt, len);
833
834 spin_lock_bh(&iop_chan->lock);
835 slot_cnt = iop_chan_pq_zero_sum_slot_count(len, src_cnt + 2, &slots_per_op);
836 sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
837 if (sw_desc) {
838 /* for validate operations p and q are tagged onto the
839 * end of the source list
840 */
841 int pq_idx = src_cnt;
842
843 g = sw_desc->group_head;
844 iop_desc_init_pq_zero_sum(g, src_cnt+2, flags);
845 iop_desc_set_pq_zero_sum_byte_count(g, len);
846 g->pq_check_result = pqres;
847 pr_debug("\t%s: g->pq_check_result: %p\n",
848 __func__, g->pq_check_result);
849 sw_desc->unmap_src_cnt = src_cnt+2;
850 sw_desc->unmap_len = len;
851 sw_desc->async_tx.flags = flags;
852 while (src_cnt--)
853 iop_desc_set_pq_zero_sum_src_addr(g, src_cnt,
854 src[src_cnt],
855 scf[src_cnt]);
856 iop_desc_set_pq_zero_sum_addr(g, pq_idx, src);
857 }
858 spin_unlock_bh(&iop_chan->lock);
859
860 return sw_desc ? &sw_desc->async_tx : NULL;
861}
862
700static void iop_adma_free_chan_resources(struct dma_chan *chan) 863static void iop_adma_free_chan_resources(struct dma_chan *chan)
701{ 864{
702 struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); 865 struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
@@ -907,7 +1070,7 @@ out:
907 1070
908#define IOP_ADMA_NUM_SRC_TEST 4 /* must be <= 15 */ 1071#define IOP_ADMA_NUM_SRC_TEST 4 /* must be <= 15 */
909static int __devinit 1072static int __devinit
910iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device) 1073iop_adma_xor_val_self_test(struct iop_adma_device *device)
911{ 1074{
912 int i, src_idx; 1075 int i, src_idx;
913 struct page *dest; 1076 struct page *dest;
@@ -1003,7 +1166,7 @@ iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device)
1003 PAGE_SIZE, DMA_TO_DEVICE); 1166 PAGE_SIZE, DMA_TO_DEVICE);
1004 1167
1005 /* skip zero sum if the capability is not present */ 1168 /* skip zero sum if the capability is not present */
1006 if (!dma_has_cap(DMA_ZERO_SUM, dma_chan->device->cap_mask)) 1169 if (!dma_has_cap(DMA_XOR_VAL, dma_chan->device->cap_mask))
1007 goto free_resources; 1170 goto free_resources;
1008 1171
1009 /* zero sum the sources with the destintation page */ 1172 /* zero sum the sources with the destintation page */
@@ -1017,10 +1180,10 @@ iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device)
1017 dma_srcs[i] = dma_map_page(dma_chan->device->dev, 1180 dma_srcs[i] = dma_map_page(dma_chan->device->dev,
1018 zero_sum_srcs[i], 0, PAGE_SIZE, 1181 zero_sum_srcs[i], 0, PAGE_SIZE,
1019 DMA_TO_DEVICE); 1182 DMA_TO_DEVICE);
1020 tx = iop_adma_prep_dma_zero_sum(dma_chan, dma_srcs, 1183 tx = iop_adma_prep_dma_xor_val(dma_chan, dma_srcs,
1021 IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE, 1184 IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE,
1022 &zero_sum_result, 1185 &zero_sum_result,
1023 DMA_PREP_INTERRUPT | DMA_CTRL_ACK); 1186 DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
1024 1187
1025 cookie = iop_adma_tx_submit(tx); 1188 cookie = iop_adma_tx_submit(tx);
1026 iop_adma_issue_pending(dma_chan); 1189 iop_adma_issue_pending(dma_chan);
@@ -1073,10 +1236,10 @@ iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device)
1073 dma_srcs[i] = dma_map_page(dma_chan->device->dev, 1236 dma_srcs[i] = dma_map_page(dma_chan->device->dev,
1074 zero_sum_srcs[i], 0, PAGE_SIZE, 1237 zero_sum_srcs[i], 0, PAGE_SIZE,
1075 DMA_TO_DEVICE); 1238 DMA_TO_DEVICE);
1076 tx = iop_adma_prep_dma_zero_sum(dma_chan, dma_srcs, 1239 tx = iop_adma_prep_dma_xor_val(dma_chan, dma_srcs,
1077 IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE, 1240 IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE,
1078 &zero_sum_result, 1241 &zero_sum_result,
1079 DMA_PREP_INTERRUPT | DMA_CTRL_ACK); 1242 DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
1080 1243
1081 cookie = iop_adma_tx_submit(tx); 1244 cookie = iop_adma_tx_submit(tx);
1082 iop_adma_issue_pending(dma_chan); 1245 iop_adma_issue_pending(dma_chan);
@@ -1106,6 +1269,170 @@ out:
1106 return err; 1269 return err;
1107} 1270}
1108 1271
1272#ifdef CONFIG_MD_RAID6_PQ
1273static int __devinit
1274iop_adma_pq_zero_sum_self_test(struct iop_adma_device *device)
1275{
1276 /* combined sources, software pq results, and extra hw pq results */
1277 struct page *pq[IOP_ADMA_NUM_SRC_TEST+2+2];
1278 /* ptr to the extra hw pq buffers defined above */
1279 struct page **pq_hw = &pq[IOP_ADMA_NUM_SRC_TEST+2];
1280 /* address conversion buffers (dma_map / page_address) */
1281 void *pq_sw[IOP_ADMA_NUM_SRC_TEST+2];
1282 dma_addr_t pq_src[IOP_ADMA_NUM_SRC_TEST];
1283 dma_addr_t pq_dest[2];
1284
1285 int i;
1286 struct dma_async_tx_descriptor *tx;
1287 struct dma_chan *dma_chan;
1288 dma_cookie_t cookie;
1289 u32 zero_sum_result;
1290 int err = 0;
1291 struct device *dev;
1292
1293 dev_dbg(device->common.dev, "%s\n", __func__);
1294
1295 for (i = 0; i < ARRAY_SIZE(pq); i++) {
1296 pq[i] = alloc_page(GFP_KERNEL);
1297 if (!pq[i]) {
1298 while (i--)
1299 __free_page(pq[i]);
1300 return -ENOMEM;
1301 }
1302 }
1303
1304 /* Fill in src buffers */
1305 for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++) {
1306 pq_sw[i] = page_address(pq[i]);
1307 memset(pq_sw[i], 0x11111111 * (1<<i), PAGE_SIZE);
1308 }
1309 pq_sw[i] = page_address(pq[i]);
1310 pq_sw[i+1] = page_address(pq[i+1]);
1311
1312 dma_chan = container_of(device->common.channels.next,
1313 struct dma_chan,
1314 device_node);
1315 if (iop_adma_alloc_chan_resources(dma_chan) < 1) {
1316 err = -ENODEV;
1317 goto out;
1318 }
1319
1320 dev = dma_chan->device->dev;
1321
1322 /* initialize the dests */
1323 memset(page_address(pq_hw[0]), 0 , PAGE_SIZE);
1324 memset(page_address(pq_hw[1]), 0 , PAGE_SIZE);
1325
1326 /* test pq */
1327 pq_dest[0] = dma_map_page(dev, pq_hw[0], 0, PAGE_SIZE, DMA_FROM_DEVICE);
1328 pq_dest[1] = dma_map_page(dev, pq_hw[1], 0, PAGE_SIZE, DMA_FROM_DEVICE);
1329 for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++)
1330 pq_src[i] = dma_map_page(dev, pq[i], 0, PAGE_SIZE,
1331 DMA_TO_DEVICE);
1332
1333 tx = iop_adma_prep_dma_pq(dma_chan, pq_dest, pq_src,
1334 IOP_ADMA_NUM_SRC_TEST, (u8 *)raid6_gfexp,
1335 PAGE_SIZE,
1336 DMA_PREP_INTERRUPT |
1337 DMA_CTRL_ACK);
1338
1339 cookie = iop_adma_tx_submit(tx);
1340 iop_adma_issue_pending(dma_chan);
1341 msleep(8);
1342
1343 if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) !=
1344 DMA_SUCCESS) {
1345 dev_err(dev, "Self-test pq timed out, disabling\n");
1346 err = -ENODEV;
1347 goto free_resources;
1348 }
1349
1350 raid6_call.gen_syndrome(IOP_ADMA_NUM_SRC_TEST+2, PAGE_SIZE, pq_sw);
1351
1352 if (memcmp(pq_sw[IOP_ADMA_NUM_SRC_TEST],
1353 page_address(pq_hw[0]), PAGE_SIZE) != 0) {
1354 dev_err(dev, "Self-test p failed compare, disabling\n");
1355 err = -ENODEV;
1356 goto free_resources;
1357 }
1358 if (memcmp(pq_sw[IOP_ADMA_NUM_SRC_TEST+1],
1359 page_address(pq_hw[1]), PAGE_SIZE) != 0) {
1360 dev_err(dev, "Self-test q failed compare, disabling\n");
1361 err = -ENODEV;
1362 goto free_resources;
1363 }
1364
1365 /* test correct zero sum using the software generated pq values */
1366 for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 2; i++)
1367 pq_src[i] = dma_map_page(dev, pq[i], 0, PAGE_SIZE,
1368 DMA_TO_DEVICE);
1369
1370 zero_sum_result = ~0;
1371 tx = iop_adma_prep_dma_pq_val(dma_chan, &pq_src[IOP_ADMA_NUM_SRC_TEST],
1372 pq_src, IOP_ADMA_NUM_SRC_TEST,
1373 raid6_gfexp, PAGE_SIZE, &zero_sum_result,
1374 DMA_PREP_INTERRUPT|DMA_CTRL_ACK);
1375
1376 cookie = iop_adma_tx_submit(tx);
1377 iop_adma_issue_pending(dma_chan);
1378 msleep(8);
1379
1380 if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) !=
1381 DMA_SUCCESS) {
1382 dev_err(dev, "Self-test pq-zero-sum timed out, disabling\n");
1383 err = -ENODEV;
1384 goto free_resources;
1385 }
1386
1387 if (zero_sum_result != 0) {
1388 dev_err(dev, "Self-test pq-zero-sum failed to validate: %x\n",
1389 zero_sum_result);
1390 err = -ENODEV;
1391 goto free_resources;
1392 }
1393
1394 /* test incorrect zero sum */
1395 i = IOP_ADMA_NUM_SRC_TEST;
1396 memset(pq_sw[i] + 100, 0, 100);
1397 memset(pq_sw[i+1] + 200, 0, 200);
1398 for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 2; i++)
1399 pq_src[i] = dma_map_page(dev, pq[i], 0, PAGE_SIZE,
1400 DMA_TO_DEVICE);
1401
1402 zero_sum_result = 0;
1403 tx = iop_adma_prep_dma_pq_val(dma_chan, &pq_src[IOP_ADMA_NUM_SRC_TEST],
1404 pq_src, IOP_ADMA_NUM_SRC_TEST,
1405 raid6_gfexp, PAGE_SIZE, &zero_sum_result,
1406 DMA_PREP_INTERRUPT|DMA_CTRL_ACK);
1407
1408 cookie = iop_adma_tx_submit(tx);
1409 iop_adma_issue_pending(dma_chan);
1410 msleep(8);
1411
1412 if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) !=
1413 DMA_SUCCESS) {
1414 dev_err(dev, "Self-test !pq-zero-sum timed out, disabling\n");
1415 err = -ENODEV;
1416 goto free_resources;
1417 }
1418
1419 if (zero_sum_result != (SUM_CHECK_P_RESULT | SUM_CHECK_Q_RESULT)) {
1420 dev_err(dev, "Self-test !pq-zero-sum failed to validate: %x\n",
1421 zero_sum_result);
1422 err = -ENODEV;
1423 goto free_resources;
1424 }
1425
1426free_resources:
1427 iop_adma_free_chan_resources(dma_chan);
1428out:
1429 i = ARRAY_SIZE(pq);
1430 while (i--)
1431 __free_page(pq[i]);
1432 return err;
1433}
1434#endif
1435
1109static int __devexit iop_adma_remove(struct platform_device *dev) 1436static int __devexit iop_adma_remove(struct platform_device *dev)
1110{ 1437{
1111 struct iop_adma_device *device = platform_get_drvdata(dev); 1438 struct iop_adma_device *device = platform_get_drvdata(dev);
@@ -1193,9 +1520,16 @@ static int __devinit iop_adma_probe(struct platform_device *pdev)
1193 dma_dev->max_xor = iop_adma_get_max_xor(); 1520 dma_dev->max_xor = iop_adma_get_max_xor();
1194 dma_dev->device_prep_dma_xor = iop_adma_prep_dma_xor; 1521 dma_dev->device_prep_dma_xor = iop_adma_prep_dma_xor;
1195 } 1522 }
1196 if (dma_has_cap(DMA_ZERO_SUM, dma_dev->cap_mask)) 1523 if (dma_has_cap(DMA_XOR_VAL, dma_dev->cap_mask))
1197 dma_dev->device_prep_dma_zero_sum = 1524 dma_dev->device_prep_dma_xor_val =
1198 iop_adma_prep_dma_zero_sum; 1525 iop_adma_prep_dma_xor_val;
1526 if (dma_has_cap(DMA_PQ, dma_dev->cap_mask)) {
1527 dma_set_maxpq(dma_dev, iop_adma_get_max_pq(), 0);
1528 dma_dev->device_prep_dma_pq = iop_adma_prep_dma_pq;
1529 }
1530 if (dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask))
1531 dma_dev->device_prep_dma_pq_val =
1532 iop_adma_prep_dma_pq_val;
1199 if (dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask)) 1533 if (dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask))
1200 dma_dev->device_prep_dma_interrupt = 1534 dma_dev->device_prep_dma_interrupt =
1201 iop_adma_prep_dma_interrupt; 1535 iop_adma_prep_dma_interrupt;
@@ -1249,23 +1583,35 @@ static int __devinit iop_adma_probe(struct platform_device *pdev)
1249 } 1583 }
1250 1584
1251 if (dma_has_cap(DMA_XOR, dma_dev->cap_mask) || 1585 if (dma_has_cap(DMA_XOR, dma_dev->cap_mask) ||
1252 dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) { 1586 dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) {
1253 ret = iop_adma_xor_zero_sum_self_test(adev); 1587 ret = iop_adma_xor_val_self_test(adev);
1254 dev_dbg(&pdev->dev, "xor self test returned %d\n", ret); 1588 dev_dbg(&pdev->dev, "xor self test returned %d\n", ret);
1255 if (ret) 1589 if (ret)
1256 goto err_free_iop_chan; 1590 goto err_free_iop_chan;
1257 } 1591 }
1258 1592
1593 if (dma_has_cap(DMA_PQ, dma_dev->cap_mask) &&
1594 dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask)) {
1595 #ifdef CONFIG_MD_RAID6_PQ
1596 ret = iop_adma_pq_zero_sum_self_test(adev);
1597 dev_dbg(&pdev->dev, "pq self test returned %d\n", ret);
1598 #else
1599 /* can not test raid6, so do not publish capability */
1600 dma_cap_clear(DMA_PQ, dma_dev->cap_mask);
1601 dma_cap_clear(DMA_PQ_VAL, dma_dev->cap_mask);
1602 ret = 0;
1603 #endif
1604 if (ret)
1605 goto err_free_iop_chan;
1606 }
1607
1259 dev_printk(KERN_INFO, &pdev->dev, "Intel(R) IOP: " 1608 dev_printk(KERN_INFO, &pdev->dev, "Intel(R) IOP: "
1260 "( %s%s%s%s%s%s%s%s%s%s)\n", 1609 "( %s%s%s%s%s%s%s)\n",
1261 dma_has_cap(DMA_PQ_XOR, dma_dev->cap_mask) ? "pq_xor " : "", 1610 dma_has_cap(DMA_PQ, dma_dev->cap_mask) ? "pq " : "",
1262 dma_has_cap(DMA_PQ_UPDATE, dma_dev->cap_mask) ? "pq_update " : "", 1611 dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask) ? "pq_val " : "",
1263 dma_has_cap(DMA_PQ_ZERO_SUM, dma_dev->cap_mask) ? "pq_zero_sum " : "",
1264 dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "", 1612 dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "",
1265 dma_has_cap(DMA_DUAL_XOR, dma_dev->cap_mask) ? "dual_xor " : "", 1613 dma_has_cap(DMA_XOR_VAL, dma_dev->cap_mask) ? "xor_val " : "",
1266 dma_has_cap(DMA_ZERO_SUM, dma_dev->cap_mask) ? "xor_zero_sum " : "",
1267 dma_has_cap(DMA_MEMSET, dma_dev->cap_mask) ? "fill " : "", 1614 dma_has_cap(DMA_MEMSET, dma_dev->cap_mask) ? "fill " : "",
1268 dma_has_cap(DMA_MEMCPY_CRC32C, dma_dev->cap_mask) ? "cpy+crc " : "",
1269 dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "cpy " : "", 1615 dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "cpy " : "",
1270 dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask) ? "intr " : ""); 1616 dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask) ? "intr " : "");
1271 1617
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index 020f9573fd82..2158377a1359 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -124,6 +124,8 @@ config MD_RAID456
124 select MD_RAID6_PQ 124 select MD_RAID6_PQ
125 select ASYNC_MEMCPY 125 select ASYNC_MEMCPY
126 select ASYNC_XOR 126 select ASYNC_XOR
127 select ASYNC_PQ
128 select ASYNC_RAID6_RECOV
127 ---help--- 129 ---help---
128 A RAID-5 set of N drives with a capacity of C MB per drive provides 130 A RAID-5 set of N drives with a capacity of C MB per drive provides
129 the capacity of C * (N - 1) MB, and protects against a failure 131 the capacity of C * (N - 1) MB, and protects against a failure
@@ -152,9 +154,33 @@ config MD_RAID456
152 154
153 If unsure, say Y. 155 If unsure, say Y.
154 156
157config MULTICORE_RAID456
158 bool "RAID-4/RAID-5/RAID-6 Multicore processing (EXPERIMENTAL)"
159 depends on MD_RAID456
160 depends on SMP
161 depends on EXPERIMENTAL
162 ---help---
163 Enable the raid456 module to dispatch per-stripe raid operations to a
164 thread pool.
165
166 If unsure, say N.
167
155config MD_RAID6_PQ 168config MD_RAID6_PQ
156 tristate 169 tristate
157 170
171config ASYNC_RAID6_TEST
172 tristate "Self test for hardware accelerated raid6 recovery"
173 depends on MD_RAID6_PQ
174 select ASYNC_RAID6_RECOV
175 ---help---
176 This is a one-shot self test that permutes through the
177 recovery of all the possible two disk failure scenarios for a
178 N-disk array. Recovery is performed with the asynchronous
179 raid6 recovery routines, and will optionally use an offload
180 engine if one is available.
181
182 If unsure, say N.
183
158config MD_MULTIPATH 184config MD_MULTIPATH
159 tristate "Multipath I/O support" 185 tristate "Multipath I/O support"
160 depends on BLK_DEV_MD 186 depends on BLK_DEV_MD
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index f9f991e6e138..cac6f4d3a143 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -47,7 +47,9 @@
47#include <linux/kthread.h> 47#include <linux/kthread.h>
48#include <linux/raid/pq.h> 48#include <linux/raid/pq.h>
49#include <linux/async_tx.h> 49#include <linux/async_tx.h>
50#include <linux/async.h>
50#include <linux/seq_file.h> 51#include <linux/seq_file.h>
52#include <linux/cpu.h>
51#include "md.h" 53#include "md.h"
52#include "raid5.h" 54#include "raid5.h"
53#include "bitmap.h" 55#include "bitmap.h"
@@ -499,11 +501,18 @@ async_copy_data(int frombio, struct bio *bio, struct page *page,
499 struct page *bio_page; 501 struct page *bio_page;
500 int i; 502 int i;
501 int page_offset; 503 int page_offset;
504 struct async_submit_ctl submit;
505 enum async_tx_flags flags = 0;
502 506
503 if (bio->bi_sector >= sector) 507 if (bio->bi_sector >= sector)
504 page_offset = (signed)(bio->bi_sector - sector) * 512; 508 page_offset = (signed)(bio->bi_sector - sector) * 512;
505 else 509 else
506 page_offset = (signed)(sector - bio->bi_sector) * -512; 510 page_offset = (signed)(sector - bio->bi_sector) * -512;
511
512 if (frombio)
513 flags |= ASYNC_TX_FENCE;
514 init_async_submit(&submit, flags, tx, NULL, NULL, NULL);
515
507 bio_for_each_segment(bvl, bio, i) { 516 bio_for_each_segment(bvl, bio, i) {
508 int len = bio_iovec_idx(bio, i)->bv_len; 517 int len = bio_iovec_idx(bio, i)->bv_len;
509 int clen; 518 int clen;
@@ -525,15 +534,14 @@ async_copy_data(int frombio, struct bio *bio, struct page *page,
525 bio_page = bio_iovec_idx(bio, i)->bv_page; 534 bio_page = bio_iovec_idx(bio, i)->bv_page;
526 if (frombio) 535 if (frombio)
527 tx = async_memcpy(page, bio_page, page_offset, 536 tx = async_memcpy(page, bio_page, page_offset,
528 b_offset, clen, 537 b_offset, clen, &submit);
529 ASYNC_TX_DEP_ACK,
530 tx, NULL, NULL);
531 else 538 else
532 tx = async_memcpy(bio_page, page, b_offset, 539 tx = async_memcpy(bio_page, page, b_offset,
533 page_offset, clen, 540 page_offset, clen, &submit);
534 ASYNC_TX_DEP_ACK,
535 tx, NULL, NULL);
536 } 541 }
542 /* chain the operations */
543 submit.depend_tx = tx;
544
537 if (clen < len) /* hit end of page */ 545 if (clen < len) /* hit end of page */
538 break; 546 break;
539 page_offset += len; 547 page_offset += len;
@@ -592,6 +600,7 @@ static void ops_run_biofill(struct stripe_head *sh)
592{ 600{
593 struct dma_async_tx_descriptor *tx = NULL; 601 struct dma_async_tx_descriptor *tx = NULL;
594 raid5_conf_t *conf = sh->raid_conf; 602 raid5_conf_t *conf = sh->raid_conf;
603 struct async_submit_ctl submit;
595 int i; 604 int i;
596 605
597 pr_debug("%s: stripe %llu\n", __func__, 606 pr_debug("%s: stripe %llu\n", __func__,
@@ -615,22 +624,34 @@ static void ops_run_biofill(struct stripe_head *sh)
615 } 624 }
616 625
617 atomic_inc(&sh->count); 626 atomic_inc(&sh->count);
618 async_trigger_callback(ASYNC_TX_DEP_ACK | ASYNC_TX_ACK, tx, 627 init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_biofill, sh, NULL);
619 ops_complete_biofill, sh); 628 async_trigger_callback(&submit);
620} 629}
621 630
622static void ops_complete_compute5(void *stripe_head_ref) 631static void mark_target_uptodate(struct stripe_head *sh, int target)
623{ 632{
624 struct stripe_head *sh = stripe_head_ref; 633 struct r5dev *tgt;
625 int target = sh->ops.target;
626 struct r5dev *tgt = &sh->dev[target];
627 634
628 pr_debug("%s: stripe %llu\n", __func__, 635 if (target < 0)
629 (unsigned long long)sh->sector); 636 return;
630 637
638 tgt = &sh->dev[target];
631 set_bit(R5_UPTODATE, &tgt->flags); 639 set_bit(R5_UPTODATE, &tgt->flags);
632 BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags)); 640 BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags));
633 clear_bit(R5_Wantcompute, &tgt->flags); 641 clear_bit(R5_Wantcompute, &tgt->flags);
642}
643
644static void ops_complete_compute(void *stripe_head_ref)
645{
646 struct stripe_head *sh = stripe_head_ref;
647
648 pr_debug("%s: stripe %llu\n", __func__,
649 (unsigned long long)sh->sector);
650
651 /* mark the computed target(s) as uptodate */
652 mark_target_uptodate(sh, sh->ops.target);
653 mark_target_uptodate(sh, sh->ops.target2);
654
634 clear_bit(STRIPE_COMPUTE_RUN, &sh->state); 655 clear_bit(STRIPE_COMPUTE_RUN, &sh->state);
635 if (sh->check_state == check_state_compute_run) 656 if (sh->check_state == check_state_compute_run)
636 sh->check_state = check_state_compute_result; 657 sh->check_state = check_state_compute_result;
@@ -638,16 +659,24 @@ static void ops_complete_compute5(void *stripe_head_ref)
638 release_stripe(sh); 659 release_stripe(sh);
639} 660}
640 661
641static struct dma_async_tx_descriptor *ops_run_compute5(struct stripe_head *sh) 662/* return a pointer to the address conversion region of the scribble buffer */
663static addr_conv_t *to_addr_conv(struct stripe_head *sh,
664 struct raid5_percpu *percpu)
665{
666 return percpu->scribble + sizeof(struct page *) * (sh->disks + 2);
667}
668
669static struct dma_async_tx_descriptor *
670ops_run_compute5(struct stripe_head *sh, struct raid5_percpu *percpu)
642{ 671{
643 /* kernel stack size limits the total number of disks */
644 int disks = sh->disks; 672 int disks = sh->disks;
645 struct page *xor_srcs[disks]; 673 struct page **xor_srcs = percpu->scribble;
646 int target = sh->ops.target; 674 int target = sh->ops.target;
647 struct r5dev *tgt = &sh->dev[target]; 675 struct r5dev *tgt = &sh->dev[target];
648 struct page *xor_dest = tgt->page; 676 struct page *xor_dest = tgt->page;
649 int count = 0; 677 int count = 0;
650 struct dma_async_tx_descriptor *tx; 678 struct dma_async_tx_descriptor *tx;
679 struct async_submit_ctl submit;
651 int i; 680 int i;
652 681
653 pr_debug("%s: stripe %llu block: %d\n", 682 pr_debug("%s: stripe %llu block: %d\n",
@@ -660,17 +689,212 @@ static struct dma_async_tx_descriptor *ops_run_compute5(struct stripe_head *sh)
660 689
661 atomic_inc(&sh->count); 690 atomic_inc(&sh->count);
662 691
692 init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST, NULL,
693 ops_complete_compute, sh, to_addr_conv(sh, percpu));
663 if (unlikely(count == 1)) 694 if (unlikely(count == 1))
664 tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, 695 tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, &submit);
665 0, NULL, ops_complete_compute5, sh);
666 else 696 else
667 tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, 697 tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit);
668 ASYNC_TX_XOR_ZERO_DST, NULL,
669 ops_complete_compute5, sh);
670 698
671 return tx; 699 return tx;
672} 700}
673 701
702/* set_syndrome_sources - populate source buffers for gen_syndrome
703 * @srcs - (struct page *) array of size sh->disks
704 * @sh - stripe_head to parse
705 *
706 * Populates srcs in proper layout order for the stripe and returns the
707 * 'count' of sources to be used in a call to async_gen_syndrome. The P
708 * destination buffer is recorded in srcs[count] and the Q destination
709 * is recorded in srcs[count+1]].
710 */
711static int set_syndrome_sources(struct page **srcs, struct stripe_head *sh)
712{
713 int disks = sh->disks;
714 int syndrome_disks = sh->ddf_layout ? disks : (disks - 2);
715 int d0_idx = raid6_d0(sh);
716 int count;
717 int i;
718
719 for (i = 0; i < disks; i++)
720 srcs[i] = (void *)raid6_empty_zero_page;
721
722 count = 0;
723 i = d0_idx;
724 do {
725 int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks);
726
727 srcs[slot] = sh->dev[i].page;
728 i = raid6_next_disk(i, disks);
729 } while (i != d0_idx);
730 BUG_ON(count != syndrome_disks);
731
732 return count;
733}
734
735static struct dma_async_tx_descriptor *
736ops_run_compute6_1(struct stripe_head *sh, struct raid5_percpu *percpu)
737{
738 int disks = sh->disks;
739 struct page **blocks = percpu->scribble;
740 int target;
741 int qd_idx = sh->qd_idx;
742 struct dma_async_tx_descriptor *tx;
743 struct async_submit_ctl submit;
744 struct r5dev *tgt;
745 struct page *dest;
746 int i;
747 int count;
748
749 if (sh->ops.target < 0)
750 target = sh->ops.target2;
751 else if (sh->ops.target2 < 0)
752 target = sh->ops.target;
753 else
754 /* we should only have one valid target */
755 BUG();
756 BUG_ON(target < 0);
757 pr_debug("%s: stripe %llu block: %d\n",
758 __func__, (unsigned long long)sh->sector, target);
759
760 tgt = &sh->dev[target];
761 BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags));
762 dest = tgt->page;
763
764 atomic_inc(&sh->count);
765
766 if (target == qd_idx) {
767 count = set_syndrome_sources(blocks, sh);
768 blocks[count] = NULL; /* regenerating p is not necessary */
769 BUG_ON(blocks[count+1] != dest); /* q should already be set */
770 init_async_submit(&submit, ASYNC_TX_FENCE, NULL,
771 ops_complete_compute, sh,
772 to_addr_conv(sh, percpu));
773 tx = async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE, &submit);
774 } else {
775 /* Compute any data- or p-drive using XOR */
776 count = 0;
777 for (i = disks; i-- ; ) {
778 if (i == target || i == qd_idx)
779 continue;
780 blocks[count++] = sh->dev[i].page;
781 }
782
783 init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST,
784 NULL, ops_complete_compute, sh,
785 to_addr_conv(sh, percpu));
786 tx = async_xor(dest, blocks, 0, count, STRIPE_SIZE, &submit);
787 }
788
789 return tx;
790}
791
792static struct dma_async_tx_descriptor *
793ops_run_compute6_2(struct stripe_head *sh, struct raid5_percpu *percpu)
794{
795 int i, count, disks = sh->disks;
796 int syndrome_disks = sh->ddf_layout ? disks : disks-2;
797 int d0_idx = raid6_d0(sh);
798 int faila = -1, failb = -1;
799 int target = sh->ops.target;
800 int target2 = sh->ops.target2;
801 struct r5dev *tgt = &sh->dev[target];
802 struct r5dev *tgt2 = &sh->dev[target2];
803 struct dma_async_tx_descriptor *tx;
804 struct page **blocks = percpu->scribble;
805 struct async_submit_ctl submit;
806
807 pr_debug("%s: stripe %llu block1: %d block2: %d\n",
808 __func__, (unsigned long long)sh->sector, target, target2);
809 BUG_ON(target < 0 || target2 < 0);
810 BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags));
811 BUG_ON(!test_bit(R5_Wantcompute, &tgt2->flags));
812
813 /* we need to open-code set_syndrome_sources to handle to the
814 * slot number conversion for 'faila' and 'failb'
815 */
816 for (i = 0; i < disks ; i++)
817 blocks[i] = (void *)raid6_empty_zero_page;
818 count = 0;
819 i = d0_idx;
820 do {
821 int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks);
822
823 blocks[slot] = sh->dev[i].page;
824
825 if (i == target)
826 faila = slot;
827 if (i == target2)
828 failb = slot;
829 i = raid6_next_disk(i, disks);
830 } while (i != d0_idx);
831 BUG_ON(count != syndrome_disks);
832
833 BUG_ON(faila == failb);
834 if (failb < faila)
835 swap(faila, failb);
836 pr_debug("%s: stripe: %llu faila: %d failb: %d\n",
837 __func__, (unsigned long long)sh->sector, faila, failb);
838
839 atomic_inc(&sh->count);
840
841 if (failb == syndrome_disks+1) {
842 /* Q disk is one of the missing disks */
843 if (faila == syndrome_disks) {
844 /* Missing P+Q, just recompute */
845 init_async_submit(&submit, ASYNC_TX_FENCE, NULL,
846 ops_complete_compute, sh,
847 to_addr_conv(sh, percpu));
848 return async_gen_syndrome(blocks, 0, count+2,
849 STRIPE_SIZE, &submit);
850 } else {
851 struct page *dest;
852 int data_target;
853 int qd_idx = sh->qd_idx;
854
855 /* Missing D+Q: recompute D from P, then recompute Q */
856 if (target == qd_idx)
857 data_target = target2;
858 else
859 data_target = target;
860
861 count = 0;
862 for (i = disks; i-- ; ) {
863 if (i == data_target || i == qd_idx)
864 continue;
865 blocks[count++] = sh->dev[i].page;
866 }
867 dest = sh->dev[data_target].page;
868 init_async_submit(&submit,
869 ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST,
870 NULL, NULL, NULL,
871 to_addr_conv(sh, percpu));
872 tx = async_xor(dest, blocks, 0, count, STRIPE_SIZE,
873 &submit);
874
875 count = set_syndrome_sources(blocks, sh);
876 init_async_submit(&submit, ASYNC_TX_FENCE, tx,
877 ops_complete_compute, sh,
878 to_addr_conv(sh, percpu));
879 return async_gen_syndrome(blocks, 0, count+2,
880 STRIPE_SIZE, &submit);
881 }
882 }
883
884 init_async_submit(&submit, ASYNC_TX_FENCE, NULL, ops_complete_compute,
885 sh, to_addr_conv(sh, percpu));
886 if (failb == syndrome_disks) {
887 /* We're missing D+P. */
888 return async_raid6_datap_recov(syndrome_disks+2, STRIPE_SIZE,
889 faila, blocks, &submit);
890 } else {
891 /* We're missing D+D. */
892 return async_raid6_2data_recov(syndrome_disks+2, STRIPE_SIZE,
893 faila, failb, blocks, &submit);
894 }
895}
896
897
674static void ops_complete_prexor(void *stripe_head_ref) 898static void ops_complete_prexor(void *stripe_head_ref)
675{ 899{
676 struct stripe_head *sh = stripe_head_ref; 900 struct stripe_head *sh = stripe_head_ref;
@@ -680,12 +904,13 @@ static void ops_complete_prexor(void *stripe_head_ref)
680} 904}
681 905
682static struct dma_async_tx_descriptor * 906static struct dma_async_tx_descriptor *
683ops_run_prexor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) 907ops_run_prexor(struct stripe_head *sh, struct raid5_percpu *percpu,
908 struct dma_async_tx_descriptor *tx)
684{ 909{
685 /* kernel stack size limits the total number of disks */
686 int disks = sh->disks; 910 int disks = sh->disks;
687 struct page *xor_srcs[disks]; 911 struct page **xor_srcs = percpu->scribble;
688 int count = 0, pd_idx = sh->pd_idx, i; 912 int count = 0, pd_idx = sh->pd_idx, i;
913 struct async_submit_ctl submit;
689 914
690 /* existing parity data subtracted */ 915 /* existing parity data subtracted */
691 struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page; 916 struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page;
@@ -700,9 +925,9 @@ ops_run_prexor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
700 xor_srcs[count++] = dev->page; 925 xor_srcs[count++] = dev->page;
701 } 926 }
702 927
703 tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, 928 init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
704 ASYNC_TX_DEP_ACK | ASYNC_TX_XOR_DROP_DST, tx, 929 ops_complete_prexor, sh, to_addr_conv(sh, percpu));
705 ops_complete_prexor, sh); 930 tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit);
706 931
707 return tx; 932 return tx;
708} 933}
@@ -742,17 +967,21 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
742 return tx; 967 return tx;
743} 968}
744 969
745static void ops_complete_postxor(void *stripe_head_ref) 970static void ops_complete_reconstruct(void *stripe_head_ref)
746{ 971{
747 struct stripe_head *sh = stripe_head_ref; 972 struct stripe_head *sh = stripe_head_ref;
748 int disks = sh->disks, i, pd_idx = sh->pd_idx; 973 int disks = sh->disks;
974 int pd_idx = sh->pd_idx;
975 int qd_idx = sh->qd_idx;
976 int i;
749 977
750 pr_debug("%s: stripe %llu\n", __func__, 978 pr_debug("%s: stripe %llu\n", __func__,
751 (unsigned long long)sh->sector); 979 (unsigned long long)sh->sector);
752 980
753 for (i = disks; i--; ) { 981 for (i = disks; i--; ) {
754 struct r5dev *dev = &sh->dev[i]; 982 struct r5dev *dev = &sh->dev[i];
755 if (dev->written || i == pd_idx) 983
984 if (dev->written || i == pd_idx || i == qd_idx)
756 set_bit(R5_UPTODATE, &dev->flags); 985 set_bit(R5_UPTODATE, &dev->flags);
757 } 986 }
758 987
@@ -770,12 +999,12 @@ static void ops_complete_postxor(void *stripe_head_ref)
770} 999}
771 1000
772static void 1001static void
773ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) 1002ops_run_reconstruct5(struct stripe_head *sh, struct raid5_percpu *percpu,
1003 struct dma_async_tx_descriptor *tx)
774{ 1004{
775 /* kernel stack size limits the total number of disks */
776 int disks = sh->disks; 1005 int disks = sh->disks;
777 struct page *xor_srcs[disks]; 1006 struct page **xor_srcs = percpu->scribble;
778 1007 struct async_submit_ctl submit;
779 int count = 0, pd_idx = sh->pd_idx, i; 1008 int count = 0, pd_idx = sh->pd_idx, i;
780 struct page *xor_dest; 1009 struct page *xor_dest;
781 int prexor = 0; 1010 int prexor = 0;
@@ -809,18 +1038,36 @@ ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
809 * set ASYNC_TX_XOR_DROP_DST and ASYNC_TX_XOR_ZERO_DST 1038 * set ASYNC_TX_XOR_DROP_DST and ASYNC_TX_XOR_ZERO_DST
810 * for the synchronous xor case 1039 * for the synchronous xor case
811 */ 1040 */
812 flags = ASYNC_TX_DEP_ACK | ASYNC_TX_ACK | 1041 flags = ASYNC_TX_ACK |
813 (prexor ? ASYNC_TX_XOR_DROP_DST : ASYNC_TX_XOR_ZERO_DST); 1042 (prexor ? ASYNC_TX_XOR_DROP_DST : ASYNC_TX_XOR_ZERO_DST);
814 1043
815 atomic_inc(&sh->count); 1044 atomic_inc(&sh->count);
816 1045
817 if (unlikely(count == 1)) { 1046 init_async_submit(&submit, flags, tx, ops_complete_reconstruct, sh,
818 flags &= ~(ASYNC_TX_XOR_DROP_DST | ASYNC_TX_XOR_ZERO_DST); 1047 to_addr_conv(sh, percpu));
819 tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, 1048 if (unlikely(count == 1))
820 flags, tx, ops_complete_postxor, sh); 1049 tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, &submit);
821 } else 1050 else
822 tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, 1051 tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit);
823 flags, tx, ops_complete_postxor, sh); 1052}
1053
1054static void
1055ops_run_reconstruct6(struct stripe_head *sh, struct raid5_percpu *percpu,
1056 struct dma_async_tx_descriptor *tx)
1057{
1058 struct async_submit_ctl submit;
1059 struct page **blocks = percpu->scribble;
1060 int count;
1061
1062 pr_debug("%s: stripe %llu\n", __func__, (unsigned long long)sh->sector);
1063
1064 count = set_syndrome_sources(blocks, sh);
1065
1066 atomic_inc(&sh->count);
1067
1068 init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_reconstruct,
1069 sh, to_addr_conv(sh, percpu));
1070 async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE, &submit);
824} 1071}
825 1072
826static void ops_complete_check(void *stripe_head_ref) 1073static void ops_complete_check(void *stripe_head_ref)
@@ -835,63 +1082,115 @@ static void ops_complete_check(void *stripe_head_ref)
835 release_stripe(sh); 1082 release_stripe(sh);
836} 1083}
837 1084
838static void ops_run_check(struct stripe_head *sh) 1085static void ops_run_check_p(struct stripe_head *sh, struct raid5_percpu *percpu)
839{ 1086{
840 /* kernel stack size limits the total number of disks */
841 int disks = sh->disks; 1087 int disks = sh->disks;
842 struct page *xor_srcs[disks]; 1088 int pd_idx = sh->pd_idx;
1089 int qd_idx = sh->qd_idx;
1090 struct page *xor_dest;
1091 struct page **xor_srcs = percpu->scribble;
843 struct dma_async_tx_descriptor *tx; 1092 struct dma_async_tx_descriptor *tx;
844 1093 struct async_submit_ctl submit;
845 int count = 0, pd_idx = sh->pd_idx, i; 1094 int count;
846 struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page; 1095 int i;
847 1096
848 pr_debug("%s: stripe %llu\n", __func__, 1097 pr_debug("%s: stripe %llu\n", __func__,
849 (unsigned long long)sh->sector); 1098 (unsigned long long)sh->sector);
850 1099
1100 count = 0;
1101 xor_dest = sh->dev[pd_idx].page;
1102 xor_srcs[count++] = xor_dest;
851 for (i = disks; i--; ) { 1103 for (i = disks; i--; ) {
852 struct r5dev *dev = &sh->dev[i]; 1104 if (i == pd_idx || i == qd_idx)
853 if (i != pd_idx) 1105 continue;
854 xor_srcs[count++] = dev->page; 1106 xor_srcs[count++] = sh->dev[i].page;
855 } 1107 }
856 1108
857 tx = async_xor_zero_sum(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, 1109 init_async_submit(&submit, 0, NULL, NULL, NULL,
858 &sh->ops.zero_sum_result, 0, NULL, NULL, NULL); 1110 to_addr_conv(sh, percpu));
1111 tx = async_xor_val(xor_dest, xor_srcs, 0, count, STRIPE_SIZE,
1112 &sh->ops.zero_sum_result, &submit);
1113
1114 atomic_inc(&sh->count);
1115 init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_check, sh, NULL);
1116 tx = async_trigger_callback(&submit);
1117}
1118
1119static void ops_run_check_pq(struct stripe_head *sh, struct raid5_percpu *percpu, int checkp)
1120{
1121 struct page **srcs = percpu->scribble;
1122 struct async_submit_ctl submit;
1123 int count;
1124
1125 pr_debug("%s: stripe %llu checkp: %d\n", __func__,
1126 (unsigned long long)sh->sector, checkp);
1127
1128 count = set_syndrome_sources(srcs, sh);
1129 if (!checkp)
1130 srcs[count] = NULL;
859 1131
860 atomic_inc(&sh->count); 1132 atomic_inc(&sh->count);
861 tx = async_trigger_callback(ASYNC_TX_DEP_ACK | ASYNC_TX_ACK, tx, 1133 init_async_submit(&submit, ASYNC_TX_ACK, NULL, ops_complete_check,
862 ops_complete_check, sh); 1134 sh, to_addr_conv(sh, percpu));
1135 async_syndrome_val(srcs, 0, count+2, STRIPE_SIZE,
1136 &sh->ops.zero_sum_result, percpu->spare_page, &submit);
863} 1137}
864 1138
865static void raid5_run_ops(struct stripe_head *sh, unsigned long ops_request) 1139static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
866{ 1140{
867 int overlap_clear = 0, i, disks = sh->disks; 1141 int overlap_clear = 0, i, disks = sh->disks;
868 struct dma_async_tx_descriptor *tx = NULL; 1142 struct dma_async_tx_descriptor *tx = NULL;
1143 raid5_conf_t *conf = sh->raid_conf;
1144 int level = conf->level;
1145 struct raid5_percpu *percpu;
1146 unsigned long cpu;
869 1147
1148 cpu = get_cpu();
1149 percpu = per_cpu_ptr(conf->percpu, cpu);
870 if (test_bit(STRIPE_OP_BIOFILL, &ops_request)) { 1150 if (test_bit(STRIPE_OP_BIOFILL, &ops_request)) {
871 ops_run_biofill(sh); 1151 ops_run_biofill(sh);
872 overlap_clear++; 1152 overlap_clear++;
873 } 1153 }
874 1154
875 if (test_bit(STRIPE_OP_COMPUTE_BLK, &ops_request)) { 1155 if (test_bit(STRIPE_OP_COMPUTE_BLK, &ops_request)) {
876 tx = ops_run_compute5(sh); 1156 if (level < 6)
877 /* terminate the chain if postxor is not set to be run */ 1157 tx = ops_run_compute5(sh, percpu);
878 if (tx && !test_bit(STRIPE_OP_POSTXOR, &ops_request)) 1158 else {
1159 if (sh->ops.target2 < 0 || sh->ops.target < 0)
1160 tx = ops_run_compute6_1(sh, percpu);
1161 else
1162 tx = ops_run_compute6_2(sh, percpu);
1163 }
1164 /* terminate the chain if reconstruct is not set to be run */
1165 if (tx && !test_bit(STRIPE_OP_RECONSTRUCT, &ops_request))
879 async_tx_ack(tx); 1166 async_tx_ack(tx);
880 } 1167 }
881 1168
882 if (test_bit(STRIPE_OP_PREXOR, &ops_request)) 1169 if (test_bit(STRIPE_OP_PREXOR, &ops_request))
883 tx = ops_run_prexor(sh, tx); 1170 tx = ops_run_prexor(sh, percpu, tx);
884 1171
885 if (test_bit(STRIPE_OP_BIODRAIN, &ops_request)) { 1172 if (test_bit(STRIPE_OP_BIODRAIN, &ops_request)) {
886 tx = ops_run_biodrain(sh, tx); 1173 tx = ops_run_biodrain(sh, tx);
887 overlap_clear++; 1174 overlap_clear++;
888 } 1175 }
889 1176
890 if (test_bit(STRIPE_OP_POSTXOR, &ops_request)) 1177 if (test_bit(STRIPE_OP_RECONSTRUCT, &ops_request)) {
891 ops_run_postxor(sh, tx); 1178 if (level < 6)
1179 ops_run_reconstruct5(sh, percpu, tx);
1180 else
1181 ops_run_reconstruct6(sh, percpu, tx);
1182 }
892 1183
893 if (test_bit(STRIPE_OP_CHECK, &ops_request)) 1184 if (test_bit(STRIPE_OP_CHECK, &ops_request)) {
894 ops_run_check(sh); 1185 if (sh->check_state == check_state_run)
1186 ops_run_check_p(sh, percpu);
1187 else if (sh->check_state == check_state_run_q)
1188 ops_run_check_pq(sh, percpu, 0);
1189 else if (sh->check_state == check_state_run_pq)
1190 ops_run_check_pq(sh, percpu, 1);
1191 else
1192 BUG();
1193 }
895 1194
896 if (overlap_clear) 1195 if (overlap_clear)
897 for (i = disks; i--; ) { 1196 for (i = disks; i--; ) {
@@ -899,6 +1198,7 @@ static void raid5_run_ops(struct stripe_head *sh, unsigned long ops_request)
899 if (test_and_clear_bit(R5_Overlap, &dev->flags)) 1198 if (test_and_clear_bit(R5_Overlap, &dev->flags))
900 wake_up(&sh->raid_conf->wait_for_overlap); 1199 wake_up(&sh->raid_conf->wait_for_overlap);
901 } 1200 }
1201 put_cpu();
902} 1202}
903 1203
904static int grow_one_stripe(raid5_conf_t *conf) 1204static int grow_one_stripe(raid5_conf_t *conf)
@@ -948,6 +1248,28 @@ static int grow_stripes(raid5_conf_t *conf, int num)
948 return 0; 1248 return 0;
949} 1249}
950 1250
1251/**
1252 * scribble_len - return the required size of the scribble region
1253 * @num - total number of disks in the array
1254 *
1255 * The size must be enough to contain:
1256 * 1/ a struct page pointer for each device in the array +2
1257 * 2/ room to convert each entry in (1) to its corresponding dma
1258 * (dma_map_page()) or page (page_address()) address.
1259 *
1260 * Note: the +2 is for the destination buffers of the ddf/raid6 case where we
1261 * calculate over all devices (not just the data blocks), using zeros in place
1262 * of the P and Q blocks.
1263 */
1264static size_t scribble_len(int num)
1265{
1266 size_t len;
1267
1268 len = sizeof(struct page *) * (num+2) + sizeof(addr_conv_t) * (num+2);
1269
1270 return len;
1271}
1272
951static int resize_stripes(raid5_conf_t *conf, int newsize) 1273static int resize_stripes(raid5_conf_t *conf, int newsize)
952{ 1274{
953 /* Make all the stripes able to hold 'newsize' devices. 1275 /* Make all the stripes able to hold 'newsize' devices.
@@ -976,6 +1298,7 @@ static int resize_stripes(raid5_conf_t *conf, int newsize)
976 struct stripe_head *osh, *nsh; 1298 struct stripe_head *osh, *nsh;
977 LIST_HEAD(newstripes); 1299 LIST_HEAD(newstripes);
978 struct disk_info *ndisks; 1300 struct disk_info *ndisks;
1301 unsigned long cpu;
979 int err; 1302 int err;
980 struct kmem_cache *sc; 1303 struct kmem_cache *sc;
981 int i; 1304 int i;
@@ -1041,7 +1364,7 @@ static int resize_stripes(raid5_conf_t *conf, int newsize)
1041 /* Step 3. 1364 /* Step 3.
1042 * At this point, we are holding all the stripes so the array 1365 * At this point, we are holding all the stripes so the array
1043 * is completely stalled, so now is a good time to resize 1366 * is completely stalled, so now is a good time to resize
1044 * conf->disks. 1367 * conf->disks and the scribble region
1045 */ 1368 */
1046 ndisks = kzalloc(newsize * sizeof(struct disk_info), GFP_NOIO); 1369 ndisks = kzalloc(newsize * sizeof(struct disk_info), GFP_NOIO);
1047 if (ndisks) { 1370 if (ndisks) {
@@ -1052,10 +1375,30 @@ static int resize_stripes(raid5_conf_t *conf, int newsize)
1052 } else 1375 } else
1053 err = -ENOMEM; 1376 err = -ENOMEM;
1054 1377
1378 get_online_cpus();
1379 conf->scribble_len = scribble_len(newsize);
1380 for_each_present_cpu(cpu) {
1381 struct raid5_percpu *percpu;
1382 void *scribble;
1383
1384 percpu = per_cpu_ptr(conf->percpu, cpu);
1385 scribble = kmalloc(conf->scribble_len, GFP_NOIO);
1386
1387 if (scribble) {
1388 kfree(percpu->scribble);
1389 percpu->scribble = scribble;
1390 } else {
1391 err = -ENOMEM;
1392 break;
1393 }
1394 }
1395 put_online_cpus();
1396
1055 /* Step 4, return new stripes to service */ 1397 /* Step 4, return new stripes to service */
1056 while(!list_empty(&newstripes)) { 1398 while(!list_empty(&newstripes)) {
1057 nsh = list_entry(newstripes.next, struct stripe_head, lru); 1399 nsh = list_entry(newstripes.next, struct stripe_head, lru);
1058 list_del_init(&nsh->lru); 1400 list_del_init(&nsh->lru);
1401
1059 for (i=conf->raid_disks; i < newsize; i++) 1402 for (i=conf->raid_disks; i < newsize; i++)
1060 if (nsh->dev[i].page == NULL) { 1403 if (nsh->dev[i].page == NULL) {
1061 struct page *p = alloc_page(GFP_NOIO); 1404 struct page *p = alloc_page(GFP_NOIO);
@@ -1594,258 +1937,13 @@ static sector_t compute_blocknr(struct stripe_head *sh, int i, int previous)
1594} 1937}
1595 1938
1596 1939
1597
1598/*
1599 * Copy data between a page in the stripe cache, and one or more bion
1600 * The page could align with the middle of the bio, or there could be
1601 * several bion, each with several bio_vecs, which cover part of the page
1602 * Multiple bion are linked together on bi_next. There may be extras
1603 * at the end of this list. We ignore them.
1604 */
1605static void copy_data(int frombio, struct bio *bio,
1606 struct page *page,
1607 sector_t sector)
1608{
1609 char *pa = page_address(page);
1610 struct bio_vec *bvl;
1611 int i;
1612 int page_offset;
1613
1614 if (bio->bi_sector >= sector)
1615 page_offset = (signed)(bio->bi_sector - sector) * 512;
1616 else
1617 page_offset = (signed)(sector - bio->bi_sector) * -512;
1618 bio_for_each_segment(bvl, bio, i) {
1619 int len = bio_iovec_idx(bio,i)->bv_len;
1620 int clen;
1621 int b_offset = 0;
1622
1623 if (page_offset < 0) {
1624 b_offset = -page_offset;
1625 page_offset += b_offset;
1626 len -= b_offset;
1627 }
1628
1629 if (len > 0 && page_offset + len > STRIPE_SIZE)
1630 clen = STRIPE_SIZE - page_offset;
1631 else clen = len;
1632
1633 if (clen > 0) {
1634 char *ba = __bio_kmap_atomic(bio, i, KM_USER0);
1635 if (frombio)
1636 memcpy(pa+page_offset, ba+b_offset, clen);
1637 else
1638 memcpy(ba+b_offset, pa+page_offset, clen);
1639 __bio_kunmap_atomic(ba, KM_USER0);
1640 }
1641 if (clen < len) /* hit end of page */
1642 break;
1643 page_offset += len;
1644 }
1645}
1646
1647#define check_xor() do { \
1648 if (count == MAX_XOR_BLOCKS) { \
1649 xor_blocks(count, STRIPE_SIZE, dest, ptr);\
1650 count = 0; \
1651 } \
1652 } while(0)
1653
1654static void compute_parity6(struct stripe_head *sh, int method)
1655{
1656 raid5_conf_t *conf = sh->raid_conf;
1657 int i, pd_idx, qd_idx, d0_idx, disks = sh->disks, count;
1658 int syndrome_disks = sh->ddf_layout ? disks : (disks - 2);
1659 struct bio *chosen;
1660 /**** FIX THIS: This could be very bad if disks is close to 256 ****/
1661 void *ptrs[syndrome_disks+2];
1662
1663 pd_idx = sh->pd_idx;
1664 qd_idx = sh->qd_idx;
1665 d0_idx = raid6_d0(sh);
1666
1667 pr_debug("compute_parity, stripe %llu, method %d\n",
1668 (unsigned long long)sh->sector, method);
1669
1670 switch(method) {
1671 case READ_MODIFY_WRITE:
1672 BUG(); /* READ_MODIFY_WRITE N/A for RAID-6 */
1673 case RECONSTRUCT_WRITE:
1674 for (i= disks; i-- ;)
1675 if ( i != pd_idx && i != qd_idx && sh->dev[i].towrite ) {
1676 chosen = sh->dev[i].towrite;
1677 sh->dev[i].towrite = NULL;
1678
1679 if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
1680 wake_up(&conf->wait_for_overlap);
1681
1682 BUG_ON(sh->dev[i].written);
1683 sh->dev[i].written = chosen;
1684 }
1685 break;
1686 case CHECK_PARITY:
1687 BUG(); /* Not implemented yet */
1688 }
1689
1690 for (i = disks; i--;)
1691 if (sh->dev[i].written) {
1692 sector_t sector = sh->dev[i].sector;
1693 struct bio *wbi = sh->dev[i].written;
1694 while (wbi && wbi->bi_sector < sector + STRIPE_SECTORS) {
1695 copy_data(1, wbi, sh->dev[i].page, sector);
1696 wbi = r5_next_bio(wbi, sector);
1697 }
1698
1699 set_bit(R5_LOCKED, &sh->dev[i].flags);
1700 set_bit(R5_UPTODATE, &sh->dev[i].flags);
1701 }
1702
1703 /* Note that unlike RAID-5, the ordering of the disks matters greatly.*/
1704
1705 for (i = 0; i < disks; i++)
1706 ptrs[i] = (void *)raid6_empty_zero_page;
1707
1708 count = 0;
1709 i = d0_idx;
1710 do {
1711 int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks);
1712
1713 ptrs[slot] = page_address(sh->dev[i].page);
1714 if (slot < syndrome_disks &&
1715 !test_bit(R5_UPTODATE, &sh->dev[i].flags)) {
1716 printk(KERN_ERR "block %d/%d not uptodate "
1717 "on parity calc\n", i, count);
1718 BUG();
1719 }
1720
1721 i = raid6_next_disk(i, disks);
1722 } while (i != d0_idx);
1723 BUG_ON(count != syndrome_disks);
1724
1725 raid6_call.gen_syndrome(syndrome_disks+2, STRIPE_SIZE, ptrs);
1726
1727 switch(method) {
1728 case RECONSTRUCT_WRITE:
1729 set_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
1730 set_bit(R5_UPTODATE, &sh->dev[qd_idx].flags);
1731 set_bit(R5_LOCKED, &sh->dev[pd_idx].flags);
1732 set_bit(R5_LOCKED, &sh->dev[qd_idx].flags);
1733 break;
1734 case UPDATE_PARITY:
1735 set_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
1736 set_bit(R5_UPTODATE, &sh->dev[qd_idx].flags);
1737 break;
1738 }
1739}
1740
1741
1742/* Compute one missing block */
1743static void compute_block_1(struct stripe_head *sh, int dd_idx, int nozero)
1744{
1745 int i, count, disks = sh->disks;
1746 void *ptr[MAX_XOR_BLOCKS], *dest, *p;
1747 int qd_idx = sh->qd_idx;
1748
1749 pr_debug("compute_block_1, stripe %llu, idx %d\n",
1750 (unsigned long long)sh->sector, dd_idx);
1751
1752 if ( dd_idx == qd_idx ) {
1753 /* We're actually computing the Q drive */
1754 compute_parity6(sh, UPDATE_PARITY);
1755 } else {
1756 dest = page_address(sh->dev[dd_idx].page);
1757 if (!nozero) memset(dest, 0, STRIPE_SIZE);
1758 count = 0;
1759 for (i = disks ; i--; ) {
1760 if (i == dd_idx || i == qd_idx)
1761 continue;
1762 p = page_address(sh->dev[i].page);
1763 if (test_bit(R5_UPTODATE, &sh->dev[i].flags))
1764 ptr[count++] = p;
1765 else
1766 printk("compute_block() %d, stripe %llu, %d"
1767 " not present\n", dd_idx,
1768 (unsigned long long)sh->sector, i);
1769
1770 check_xor();
1771 }
1772 if (count)
1773 xor_blocks(count, STRIPE_SIZE, dest, ptr);
1774 if (!nozero) set_bit(R5_UPTODATE, &sh->dev[dd_idx].flags);
1775 else clear_bit(R5_UPTODATE, &sh->dev[dd_idx].flags);
1776 }
1777}
1778
1779/* Compute two missing blocks */
1780static void compute_block_2(struct stripe_head *sh, int dd_idx1, int dd_idx2)
1781{
1782 int i, count, disks = sh->disks;
1783 int syndrome_disks = sh->ddf_layout ? disks : disks-2;
1784 int d0_idx = raid6_d0(sh);
1785 int faila = -1, failb = -1;
1786 /**** FIX THIS: This could be very bad if disks is close to 256 ****/
1787 void *ptrs[syndrome_disks+2];
1788
1789 for (i = 0; i < disks ; i++)
1790 ptrs[i] = (void *)raid6_empty_zero_page;
1791 count = 0;
1792 i = d0_idx;
1793 do {
1794 int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks);
1795
1796 ptrs[slot] = page_address(sh->dev[i].page);
1797
1798 if (i == dd_idx1)
1799 faila = slot;
1800 if (i == dd_idx2)
1801 failb = slot;
1802 i = raid6_next_disk(i, disks);
1803 } while (i != d0_idx);
1804 BUG_ON(count != syndrome_disks);
1805
1806 BUG_ON(faila == failb);
1807 if ( failb < faila ) { int tmp = faila; faila = failb; failb = tmp; }
1808
1809 pr_debug("compute_block_2, stripe %llu, idx %d,%d (%d,%d)\n",
1810 (unsigned long long)sh->sector, dd_idx1, dd_idx2,
1811 faila, failb);
1812
1813 if (failb == syndrome_disks+1) {
1814 /* Q disk is one of the missing disks */
1815 if (faila == syndrome_disks) {
1816 /* Missing P+Q, just recompute */
1817 compute_parity6(sh, UPDATE_PARITY);
1818 return;
1819 } else {
1820 /* We're missing D+Q; recompute D from P */
1821 compute_block_1(sh, ((dd_idx1 == sh->qd_idx) ?
1822 dd_idx2 : dd_idx1),
1823 0);
1824 compute_parity6(sh, UPDATE_PARITY); /* Is this necessary? */
1825 return;
1826 }
1827 }
1828
1829 /* We're missing D+P or D+D; */
1830 if (failb == syndrome_disks) {
1831 /* We're missing D+P. */
1832 raid6_datap_recov(syndrome_disks+2, STRIPE_SIZE, faila, ptrs);
1833 } else {
1834 /* We're missing D+D. */
1835 raid6_2data_recov(syndrome_disks+2, STRIPE_SIZE, faila, failb,
1836 ptrs);
1837 }
1838
1839 /* Both the above update both missing blocks */
1840 set_bit(R5_UPTODATE, &sh->dev[dd_idx1].flags);
1841 set_bit(R5_UPTODATE, &sh->dev[dd_idx2].flags);
1842}
1843
1844static void 1940static void
1845schedule_reconstruction5(struct stripe_head *sh, struct stripe_head_state *s, 1941schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,
1846 int rcw, int expand) 1942 int rcw, int expand)
1847{ 1943{
1848 int i, pd_idx = sh->pd_idx, disks = sh->disks; 1944 int i, pd_idx = sh->pd_idx, disks = sh->disks;
1945 raid5_conf_t *conf = sh->raid_conf;
1946 int level = conf->level;
1849 1947
1850 if (rcw) { 1948 if (rcw) {
1851 /* if we are not expanding this is a proper write request, and 1949 /* if we are not expanding this is a proper write request, and
@@ -1858,7 +1956,7 @@ schedule_reconstruction5(struct stripe_head *sh, struct stripe_head_state *s,
1858 } else 1956 } else
1859 sh->reconstruct_state = reconstruct_state_run; 1957 sh->reconstruct_state = reconstruct_state_run;
1860 1958
1861 set_bit(STRIPE_OP_POSTXOR, &s->ops_request); 1959 set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request);
1862 1960
1863 for (i = disks; i--; ) { 1961 for (i = disks; i--; ) {
1864 struct r5dev *dev = &sh->dev[i]; 1962 struct r5dev *dev = &sh->dev[i];
@@ -1871,17 +1969,18 @@ schedule_reconstruction5(struct stripe_head *sh, struct stripe_head_state *s,
1871 s->locked++; 1969 s->locked++;
1872 } 1970 }
1873 } 1971 }
1874 if (s->locked + 1 == disks) 1972 if (s->locked + conf->max_degraded == disks)
1875 if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state)) 1973 if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state))
1876 atomic_inc(&sh->raid_conf->pending_full_writes); 1974 atomic_inc(&conf->pending_full_writes);
1877 } else { 1975 } else {
1976 BUG_ON(level == 6);
1878 BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) || 1977 BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) ||
1879 test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags))); 1978 test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags)));
1880 1979
1881 sh->reconstruct_state = reconstruct_state_prexor_drain_run; 1980 sh->reconstruct_state = reconstruct_state_prexor_drain_run;
1882 set_bit(STRIPE_OP_PREXOR, &s->ops_request); 1981 set_bit(STRIPE_OP_PREXOR, &s->ops_request);
1883 set_bit(STRIPE_OP_BIODRAIN, &s->ops_request); 1982 set_bit(STRIPE_OP_BIODRAIN, &s->ops_request);
1884 set_bit(STRIPE_OP_POSTXOR, &s->ops_request); 1983 set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request);
1885 1984
1886 for (i = disks; i--; ) { 1985 for (i = disks; i--; ) {
1887 struct r5dev *dev = &sh->dev[i]; 1986 struct r5dev *dev = &sh->dev[i];
@@ -1899,13 +1998,22 @@ schedule_reconstruction5(struct stripe_head *sh, struct stripe_head_state *s,
1899 } 1998 }
1900 } 1999 }
1901 2000
1902 /* keep the parity disk locked while asynchronous operations 2001 /* keep the parity disk(s) locked while asynchronous operations
1903 * are in flight 2002 * are in flight
1904 */ 2003 */
1905 set_bit(R5_LOCKED, &sh->dev[pd_idx].flags); 2004 set_bit(R5_LOCKED, &sh->dev[pd_idx].flags);
1906 clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags); 2005 clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
1907 s->locked++; 2006 s->locked++;
1908 2007
2008 if (level == 6) {
2009 int qd_idx = sh->qd_idx;
2010 struct r5dev *dev = &sh->dev[qd_idx];
2011
2012 set_bit(R5_LOCKED, &dev->flags);
2013 clear_bit(R5_UPTODATE, &dev->flags);
2014 s->locked++;
2015 }
2016
1909 pr_debug("%s: stripe %llu locked: %d ops_request: %lx\n", 2017 pr_debug("%s: stripe %llu locked: %d ops_request: %lx\n",
1910 __func__, (unsigned long long)sh->sector, 2018 __func__, (unsigned long long)sh->sector,
1911 s->locked, s->ops_request); 2019 s->locked, s->ops_request);
@@ -1986,13 +2094,6 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in
1986 2094
1987static void end_reshape(raid5_conf_t *conf); 2095static void end_reshape(raid5_conf_t *conf);
1988 2096
1989static int page_is_zero(struct page *p)
1990{
1991 char *a = page_address(p);
1992 return ((*(u32*)a) == 0 &&
1993 memcmp(a, a+4, STRIPE_SIZE-4)==0);
1994}
1995
1996static void stripe_set_idx(sector_t stripe, raid5_conf_t *conf, int previous, 2097static void stripe_set_idx(sector_t stripe, raid5_conf_t *conf, int previous,
1997 struct stripe_head *sh) 2098 struct stripe_head *sh)
1998{ 2099{
@@ -2132,9 +2233,10 @@ static int fetch_block5(struct stripe_head *sh, struct stripe_head_state *s,
2132 set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request); 2233 set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request);
2133 set_bit(R5_Wantcompute, &dev->flags); 2234 set_bit(R5_Wantcompute, &dev->flags);
2134 sh->ops.target = disk_idx; 2235 sh->ops.target = disk_idx;
2236 sh->ops.target2 = -1;
2135 s->req_compute = 1; 2237 s->req_compute = 1;
2136 /* Careful: from this point on 'uptodate' is in the eye 2238 /* Careful: from this point on 'uptodate' is in the eye
2137 * of raid5_run_ops which services 'compute' operations 2239 * of raid_run_ops which services 'compute' operations
2138 * before writes. R5_Wantcompute flags a block that will 2240 * before writes. R5_Wantcompute flags a block that will
2139 * be R5_UPTODATE by the time it is needed for a 2241 * be R5_UPTODATE by the time it is needed for a
2140 * subsequent operation. 2242 * subsequent operation.
@@ -2173,61 +2275,104 @@ static void handle_stripe_fill5(struct stripe_head *sh,
2173 set_bit(STRIPE_HANDLE, &sh->state); 2275 set_bit(STRIPE_HANDLE, &sh->state);
2174} 2276}
2175 2277
2176static void handle_stripe_fill6(struct stripe_head *sh, 2278/* fetch_block6 - checks the given member device to see if its data needs
2177 struct stripe_head_state *s, struct r6_state *r6s, 2279 * to be read or computed to satisfy a request.
2178 int disks) 2280 *
2281 * Returns 1 when no more member devices need to be checked, otherwise returns
2282 * 0 to tell the loop in handle_stripe_fill6 to continue
2283 */
2284static int fetch_block6(struct stripe_head *sh, struct stripe_head_state *s,
2285 struct r6_state *r6s, int disk_idx, int disks)
2179{ 2286{
2180 int i; 2287 struct r5dev *dev = &sh->dev[disk_idx];
2181 for (i = disks; i--; ) { 2288 struct r5dev *fdev[2] = { &sh->dev[r6s->failed_num[0]],
2182 struct r5dev *dev = &sh->dev[i]; 2289 &sh->dev[r6s->failed_num[1]] };
2183 if (!test_bit(R5_LOCKED, &dev->flags) && 2290
2184 !test_bit(R5_UPTODATE, &dev->flags) && 2291 if (!test_bit(R5_LOCKED, &dev->flags) &&
2185 (dev->toread || (dev->towrite && 2292 !test_bit(R5_UPTODATE, &dev->flags) &&
2186 !test_bit(R5_OVERWRITE, &dev->flags)) || 2293 (dev->toread ||
2187 s->syncing || s->expanding || 2294 (dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags)) ||
2188 (s->failed >= 1 && 2295 s->syncing || s->expanding ||
2189 (sh->dev[r6s->failed_num[0]].toread || 2296 (s->failed >= 1 &&
2190 s->to_write)) || 2297 (fdev[0]->toread || s->to_write)) ||
2191 (s->failed >= 2 && 2298 (s->failed >= 2 &&
2192 (sh->dev[r6s->failed_num[1]].toread || 2299 (fdev[1]->toread || s->to_write)))) {
2193 s->to_write)))) { 2300 /* we would like to get this block, possibly by computing it,
2194 /* we would like to get this block, possibly 2301 * otherwise read it if the backing disk is insync
2195 * by computing it, but we might not be able to 2302 */
2303 BUG_ON(test_bit(R5_Wantcompute, &dev->flags));
2304 BUG_ON(test_bit(R5_Wantread, &dev->flags));
2305 if ((s->uptodate == disks - 1) &&
2306 (s->failed && (disk_idx == r6s->failed_num[0] ||
2307 disk_idx == r6s->failed_num[1]))) {
2308 /* have disk failed, and we're requested to fetch it;
2309 * do compute it
2196 */ 2310 */
2197 if ((s->uptodate == disks - 1) && 2311 pr_debug("Computing stripe %llu block %d\n",
2198 (s->failed && (i == r6s->failed_num[0] || 2312 (unsigned long long)sh->sector, disk_idx);
2199 i == r6s->failed_num[1]))) { 2313 set_bit(STRIPE_COMPUTE_RUN, &sh->state);
2200 pr_debug("Computing stripe %llu block %d\n", 2314 set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request);
2201 (unsigned long long)sh->sector, i); 2315 set_bit(R5_Wantcompute, &dev->flags);
2202 compute_block_1(sh, i, 0); 2316 sh->ops.target = disk_idx;
2203 s->uptodate++; 2317 sh->ops.target2 = -1; /* no 2nd target */
2204 } else if ( s->uptodate == disks-2 && s->failed >= 2 ) { 2318 s->req_compute = 1;
2205 /* Computing 2-failure is *very* expensive; only 2319 s->uptodate++;
2206 * do it if failed >= 2 2320 return 1;
2207 */ 2321 } else if (s->uptodate == disks-2 && s->failed >= 2) {
2208 int other; 2322 /* Computing 2-failure is *very* expensive; only
2209 for (other = disks; other--; ) { 2323 * do it if failed >= 2
2210 if (other == i) 2324 */
2211 continue; 2325 int other;
2212 if (!test_bit(R5_UPTODATE, 2326 for (other = disks; other--; ) {
2213 &sh->dev[other].flags)) 2327 if (other == disk_idx)
2214 break; 2328 continue;
2215 } 2329 if (!test_bit(R5_UPTODATE,
2216 BUG_ON(other < 0); 2330 &sh->dev[other].flags))
2217 pr_debug("Computing stripe %llu blocks %d,%d\n", 2331 break;
2218 (unsigned long long)sh->sector,
2219 i, other);
2220 compute_block_2(sh, i, other);
2221 s->uptodate += 2;
2222 } else if (test_bit(R5_Insync, &dev->flags)) {
2223 set_bit(R5_LOCKED, &dev->flags);
2224 set_bit(R5_Wantread, &dev->flags);
2225 s->locked++;
2226 pr_debug("Reading block %d (sync=%d)\n",
2227 i, s->syncing);
2228 } 2332 }
2333 BUG_ON(other < 0);
2334 pr_debug("Computing stripe %llu blocks %d,%d\n",
2335 (unsigned long long)sh->sector,
2336 disk_idx, other);
2337 set_bit(STRIPE_COMPUTE_RUN, &sh->state);
2338 set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request);
2339 set_bit(R5_Wantcompute, &sh->dev[disk_idx].flags);
2340 set_bit(R5_Wantcompute, &sh->dev[other].flags);
2341 sh->ops.target = disk_idx;
2342 sh->ops.target2 = other;
2343 s->uptodate += 2;
2344 s->req_compute = 1;
2345 return 1;
2346 } else if (test_bit(R5_Insync, &dev->flags)) {
2347 set_bit(R5_LOCKED, &dev->flags);
2348 set_bit(R5_Wantread, &dev->flags);
2349 s->locked++;
2350 pr_debug("Reading block %d (sync=%d)\n",
2351 disk_idx, s->syncing);
2229 } 2352 }
2230 } 2353 }
2354
2355 return 0;
2356}
2357
2358/**
2359 * handle_stripe_fill6 - read or compute data to satisfy pending requests.
2360 */
2361static void handle_stripe_fill6(struct stripe_head *sh,
2362 struct stripe_head_state *s, struct r6_state *r6s,
2363 int disks)
2364{
2365 int i;
2366
2367 /* look for blocks to read/compute, skip this if a compute
2368 * is already in flight, or if the stripe contents are in the
2369 * midst of changing due to a write
2370 */
2371 if (!test_bit(STRIPE_COMPUTE_RUN, &sh->state) && !sh->check_state &&
2372 !sh->reconstruct_state)
2373 for (i = disks; i--; )
2374 if (fetch_block6(sh, s, r6s, i, disks))
2375 break;
2231 set_bit(STRIPE_HANDLE, &sh->state); 2376 set_bit(STRIPE_HANDLE, &sh->state);
2232} 2377}
2233 2378
@@ -2361,114 +2506,61 @@ static void handle_stripe_dirtying5(raid5_conf_t *conf,
2361 */ 2506 */
2362 /* since handle_stripe can be called at any time we need to handle the 2507 /* since handle_stripe can be called at any time we need to handle the
2363 * case where a compute block operation has been submitted and then a 2508 * case where a compute block operation has been submitted and then a
2364 * subsequent call wants to start a write request. raid5_run_ops only 2509 * subsequent call wants to start a write request. raid_run_ops only
2365 * handles the case where compute block and postxor are requested 2510 * handles the case where compute block and reconstruct are requested
2366 * simultaneously. If this is not the case then new writes need to be 2511 * simultaneously. If this is not the case then new writes need to be
2367 * held off until the compute completes. 2512 * held off until the compute completes.
2368 */ 2513 */
2369 if ((s->req_compute || !test_bit(STRIPE_COMPUTE_RUN, &sh->state)) && 2514 if ((s->req_compute || !test_bit(STRIPE_COMPUTE_RUN, &sh->state)) &&
2370 (s->locked == 0 && (rcw == 0 || rmw == 0) && 2515 (s->locked == 0 && (rcw == 0 || rmw == 0) &&
2371 !test_bit(STRIPE_BIT_DELAY, &sh->state))) 2516 !test_bit(STRIPE_BIT_DELAY, &sh->state)))
2372 schedule_reconstruction5(sh, s, rcw == 0, 0); 2517 schedule_reconstruction(sh, s, rcw == 0, 0);
2373} 2518}
2374 2519
2375static void handle_stripe_dirtying6(raid5_conf_t *conf, 2520static void handle_stripe_dirtying6(raid5_conf_t *conf,
2376 struct stripe_head *sh, struct stripe_head_state *s, 2521 struct stripe_head *sh, struct stripe_head_state *s,
2377 struct r6_state *r6s, int disks) 2522 struct r6_state *r6s, int disks)
2378{ 2523{
2379 int rcw = 0, must_compute = 0, pd_idx = sh->pd_idx, i; 2524 int rcw = 0, pd_idx = sh->pd_idx, i;
2380 int qd_idx = sh->qd_idx; 2525 int qd_idx = sh->qd_idx;
2526
2527 set_bit(STRIPE_HANDLE, &sh->state);
2381 for (i = disks; i--; ) { 2528 for (i = disks; i--; ) {
2382 struct r5dev *dev = &sh->dev[i]; 2529 struct r5dev *dev = &sh->dev[i];
2383 /* Would I have to read this buffer for reconstruct_write */ 2530 /* check if we haven't enough data */
2384 if (!test_bit(R5_OVERWRITE, &dev->flags) 2531 if (!test_bit(R5_OVERWRITE, &dev->flags) &&
2385 && i != pd_idx && i != qd_idx 2532 i != pd_idx && i != qd_idx &&
2386 && (!test_bit(R5_LOCKED, &dev->flags) 2533 !test_bit(R5_LOCKED, &dev->flags) &&
2387 ) && 2534 !(test_bit(R5_UPTODATE, &dev->flags) ||
2388 !test_bit(R5_UPTODATE, &dev->flags)) { 2535 test_bit(R5_Wantcompute, &dev->flags))) {
2389 if (test_bit(R5_Insync, &dev->flags)) rcw++; 2536 rcw++;
2390 else { 2537 if (!test_bit(R5_Insync, &dev->flags))
2391 pr_debug("raid6: must_compute: " 2538 continue; /* it's a failed drive */
2392 "disk %d flags=%#lx\n", i, dev->flags); 2539
2393 must_compute++; 2540 if (
2541 test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
2542 pr_debug("Read_old stripe %llu "
2543 "block %d for Reconstruct\n",
2544 (unsigned long long)sh->sector, i);
2545 set_bit(R5_LOCKED, &dev->flags);
2546 set_bit(R5_Wantread, &dev->flags);
2547 s->locked++;
2548 } else {
2549 pr_debug("Request delayed stripe %llu "
2550 "block %d for Reconstruct\n",
2551 (unsigned long long)sh->sector, i);
2552 set_bit(STRIPE_DELAYED, &sh->state);
2553 set_bit(STRIPE_HANDLE, &sh->state);
2394 } 2554 }
2395 } 2555 }
2396 } 2556 }
2397 pr_debug("for sector %llu, rcw=%d, must_compute=%d\n",
2398 (unsigned long long)sh->sector, rcw, must_compute);
2399 set_bit(STRIPE_HANDLE, &sh->state);
2400
2401 if (rcw > 0)
2402 /* want reconstruct write, but need to get some data */
2403 for (i = disks; i--; ) {
2404 struct r5dev *dev = &sh->dev[i];
2405 if (!test_bit(R5_OVERWRITE, &dev->flags)
2406 && !(s->failed == 0 && (i == pd_idx || i == qd_idx))
2407 && !test_bit(R5_LOCKED, &dev->flags) &&
2408 !test_bit(R5_UPTODATE, &dev->flags) &&
2409 test_bit(R5_Insync, &dev->flags)) {
2410 if (
2411 test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
2412 pr_debug("Read_old stripe %llu "
2413 "block %d for Reconstruct\n",
2414 (unsigned long long)sh->sector, i);
2415 set_bit(R5_LOCKED, &dev->flags);
2416 set_bit(R5_Wantread, &dev->flags);
2417 s->locked++;
2418 } else {
2419 pr_debug("Request delayed stripe %llu "
2420 "block %d for Reconstruct\n",
2421 (unsigned long long)sh->sector, i);
2422 set_bit(STRIPE_DELAYED, &sh->state);
2423 set_bit(STRIPE_HANDLE, &sh->state);
2424 }
2425 }
2426 }
2427 /* now if nothing is locked, and if we have enough data, we can start a 2557 /* now if nothing is locked, and if we have enough data, we can start a
2428 * write request 2558 * write request
2429 */ 2559 */
2430 if (s->locked == 0 && rcw == 0 && 2560 if ((s->req_compute || !test_bit(STRIPE_COMPUTE_RUN, &sh->state)) &&
2561 s->locked == 0 && rcw == 0 &&
2431 !test_bit(STRIPE_BIT_DELAY, &sh->state)) { 2562 !test_bit(STRIPE_BIT_DELAY, &sh->state)) {
2432 if (must_compute > 0) { 2563 schedule_reconstruction(sh, s, 1, 0);
2433 /* We have failed blocks and need to compute them */
2434 switch (s->failed) {
2435 case 0:
2436 BUG();
2437 case 1:
2438 compute_block_1(sh, r6s->failed_num[0], 0);
2439 break;
2440 case 2:
2441 compute_block_2(sh, r6s->failed_num[0],
2442 r6s->failed_num[1]);
2443 break;
2444 default: /* This request should have been failed? */
2445 BUG();
2446 }
2447 }
2448
2449 pr_debug("Computing parity for stripe %llu\n",
2450 (unsigned long long)sh->sector);
2451 compute_parity6(sh, RECONSTRUCT_WRITE);
2452 /* now every locked buffer is ready to be written */
2453 for (i = disks; i--; )
2454 if (test_bit(R5_LOCKED, &sh->dev[i].flags)) {
2455 pr_debug("Writing stripe %llu block %d\n",
2456 (unsigned long long)sh->sector, i);
2457 s->locked++;
2458 set_bit(R5_Wantwrite, &sh->dev[i].flags);
2459 }
2460 if (s->locked == disks)
2461 if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state))
2462 atomic_inc(&conf->pending_full_writes);
2463 /* after a RECONSTRUCT_WRITE, the stripe MUST be in-sync */
2464 set_bit(STRIPE_INSYNC, &sh->state);
2465
2466 if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
2467 atomic_dec(&conf->preread_active_stripes);
2468 if (atomic_read(&conf->preread_active_stripes) <
2469 IO_THRESHOLD)
2470 md_wakeup_thread(conf->mddev->thread);
2471 }
2472 } 2564 }
2473} 2565}
2474 2566
@@ -2527,7 +2619,7 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh,
2527 * we are done. Otherwise update the mismatch count and repair 2619 * we are done. Otherwise update the mismatch count and repair
2528 * parity if !MD_RECOVERY_CHECK 2620 * parity if !MD_RECOVERY_CHECK
2529 */ 2621 */
2530 if (sh->ops.zero_sum_result == 0) 2622 if ((sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) == 0)
2531 /* parity is correct (on disc, 2623 /* parity is correct (on disc,
2532 * not in buffer any more) 2624 * not in buffer any more)
2533 */ 2625 */
@@ -2544,6 +2636,7 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh,
2544 set_bit(R5_Wantcompute, 2636 set_bit(R5_Wantcompute,
2545 &sh->dev[sh->pd_idx].flags); 2637 &sh->dev[sh->pd_idx].flags);
2546 sh->ops.target = sh->pd_idx; 2638 sh->ops.target = sh->pd_idx;
2639 sh->ops.target2 = -1;
2547 s->uptodate++; 2640 s->uptodate++;
2548 } 2641 }
2549 } 2642 }
@@ -2560,67 +2653,74 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh,
2560 2653
2561 2654
2562static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh, 2655static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh,
2563 struct stripe_head_state *s, 2656 struct stripe_head_state *s,
2564 struct r6_state *r6s, struct page *tmp_page, 2657 struct r6_state *r6s, int disks)
2565 int disks)
2566{ 2658{
2567 int update_p = 0, update_q = 0;
2568 struct r5dev *dev;
2569 int pd_idx = sh->pd_idx; 2659 int pd_idx = sh->pd_idx;
2570 int qd_idx = sh->qd_idx; 2660 int qd_idx = sh->qd_idx;
2661 struct r5dev *dev;
2571 2662
2572 set_bit(STRIPE_HANDLE, &sh->state); 2663 set_bit(STRIPE_HANDLE, &sh->state);
2573 2664
2574 BUG_ON(s->failed > 2); 2665 BUG_ON(s->failed > 2);
2575 BUG_ON(s->uptodate < disks); 2666
2576 /* Want to check and possibly repair P and Q. 2667 /* Want to check and possibly repair P and Q.
2577 * However there could be one 'failed' device, in which 2668 * However there could be one 'failed' device, in which
2578 * case we can only check one of them, possibly using the 2669 * case we can only check one of them, possibly using the
2579 * other to generate missing data 2670 * other to generate missing data
2580 */ 2671 */
2581 2672
2582 /* If !tmp_page, we cannot do the calculations, 2673 switch (sh->check_state) {
2583 * but as we have set STRIPE_HANDLE, we will soon be called 2674 case check_state_idle:
2584 * by stripe_handle with a tmp_page - just wait until then. 2675 /* start a new check operation if there are < 2 failures */
2585 */
2586 if (tmp_page) {
2587 if (s->failed == r6s->q_failed) { 2676 if (s->failed == r6s->q_failed) {
2588 /* The only possible failed device holds 'Q', so it 2677 /* The only possible failed device holds Q, so it
2589 * makes sense to check P (If anything else were failed, 2678 * makes sense to check P (If anything else were failed,
2590 * we would have used P to recreate it). 2679 * we would have used P to recreate it).
2591 */ 2680 */
2592 compute_block_1(sh, pd_idx, 1); 2681 sh->check_state = check_state_run;
2593 if (!page_is_zero(sh->dev[pd_idx].page)) {
2594 compute_block_1(sh, pd_idx, 0);
2595 update_p = 1;
2596 }
2597 } 2682 }
2598 if (!r6s->q_failed && s->failed < 2) { 2683 if (!r6s->q_failed && s->failed < 2) {
2599 /* q is not failed, and we didn't use it to generate 2684 /* Q is not failed, and we didn't use it to generate
2600 * anything, so it makes sense to check it 2685 * anything, so it makes sense to check it
2601 */ 2686 */
2602 memcpy(page_address(tmp_page), 2687 if (sh->check_state == check_state_run)
2603 page_address(sh->dev[qd_idx].page), 2688 sh->check_state = check_state_run_pq;
2604 STRIPE_SIZE); 2689 else
2605 compute_parity6(sh, UPDATE_PARITY); 2690 sh->check_state = check_state_run_q;
2606 if (memcmp(page_address(tmp_page),
2607 page_address(sh->dev[qd_idx].page),
2608 STRIPE_SIZE) != 0) {
2609 clear_bit(STRIPE_INSYNC, &sh->state);
2610 update_q = 1;
2611 }
2612 } 2691 }
2613 if (update_p || update_q) { 2692
2614 conf->mddev->resync_mismatches += STRIPE_SECTORS; 2693 /* discard potentially stale zero_sum_result */
2615 if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery)) 2694 sh->ops.zero_sum_result = 0;
2616 /* don't try to repair!! */ 2695
2617 update_p = update_q = 0; 2696 if (sh->check_state == check_state_run) {
2697 /* async_xor_zero_sum destroys the contents of P */
2698 clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
2699 s->uptodate--;
2700 }
2701 if (sh->check_state >= check_state_run &&
2702 sh->check_state <= check_state_run_pq) {
2703 /* async_syndrome_zero_sum preserves P and Q, so
2704 * no need to mark them !uptodate here
2705 */
2706 set_bit(STRIPE_OP_CHECK, &s->ops_request);
2707 break;
2618 } 2708 }
2619 2709
2710 /* we have 2-disk failure */
2711 BUG_ON(s->failed != 2);
2712 /* fall through */
2713 case check_state_compute_result:
2714 sh->check_state = check_state_idle;
2715
2716 /* check that a write has not made the stripe insync */
2717 if (test_bit(STRIPE_INSYNC, &sh->state))
2718 break;
2719
2620 /* now write out any block on a failed drive, 2720 /* now write out any block on a failed drive,
2621 * or P or Q if they need it 2721 * or P or Q if they were recomputed
2622 */ 2722 */
2623 2723 BUG_ON(s->uptodate < disks - 1); /* We don't need Q to recover */
2624 if (s->failed == 2) { 2724 if (s->failed == 2) {
2625 dev = &sh->dev[r6s->failed_num[1]]; 2725 dev = &sh->dev[r6s->failed_num[1]];
2626 s->locked++; 2726 s->locked++;
@@ -2633,14 +2733,13 @@ static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh,
2633 set_bit(R5_LOCKED, &dev->flags); 2733 set_bit(R5_LOCKED, &dev->flags);
2634 set_bit(R5_Wantwrite, &dev->flags); 2734 set_bit(R5_Wantwrite, &dev->flags);
2635 } 2735 }
2636 2736 if (sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) {
2637 if (update_p) {
2638 dev = &sh->dev[pd_idx]; 2737 dev = &sh->dev[pd_idx];
2639 s->locked++; 2738 s->locked++;
2640 set_bit(R5_LOCKED, &dev->flags); 2739 set_bit(R5_LOCKED, &dev->flags);
2641 set_bit(R5_Wantwrite, &dev->flags); 2740 set_bit(R5_Wantwrite, &dev->flags);
2642 } 2741 }
2643 if (update_q) { 2742 if (sh->ops.zero_sum_result & SUM_CHECK_Q_RESULT) {
2644 dev = &sh->dev[qd_idx]; 2743 dev = &sh->dev[qd_idx];
2645 s->locked++; 2744 s->locked++;
2646 set_bit(R5_LOCKED, &dev->flags); 2745 set_bit(R5_LOCKED, &dev->flags);
@@ -2649,6 +2748,70 @@ static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh,
2649 clear_bit(STRIPE_DEGRADED, &sh->state); 2748 clear_bit(STRIPE_DEGRADED, &sh->state);
2650 2749
2651 set_bit(STRIPE_INSYNC, &sh->state); 2750 set_bit(STRIPE_INSYNC, &sh->state);
2751 break;
2752 case check_state_run:
2753 case check_state_run_q:
2754 case check_state_run_pq:
2755 break; /* we will be called again upon completion */
2756 case check_state_check_result:
2757 sh->check_state = check_state_idle;
2758
2759 /* handle a successful check operation, if parity is correct
2760 * we are done. Otherwise update the mismatch count and repair
2761 * parity if !MD_RECOVERY_CHECK
2762 */
2763 if (sh->ops.zero_sum_result == 0) {
2764 /* both parities are correct */
2765 if (!s->failed)
2766 set_bit(STRIPE_INSYNC, &sh->state);
2767 else {
2768 /* in contrast to the raid5 case we can validate
2769 * parity, but still have a failure to write
2770 * back
2771 */
2772 sh->check_state = check_state_compute_result;
2773 /* Returning at this point means that we may go
2774 * off and bring p and/or q uptodate again so
2775 * we make sure to check zero_sum_result again
2776 * to verify if p or q need writeback
2777 */
2778 }
2779 } else {
2780 conf->mddev->resync_mismatches += STRIPE_SECTORS;
2781 if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery))
2782 /* don't try to repair!! */
2783 set_bit(STRIPE_INSYNC, &sh->state);
2784 else {
2785 int *target = &sh->ops.target;
2786
2787 sh->ops.target = -1;
2788 sh->ops.target2 = -1;
2789 sh->check_state = check_state_compute_run;
2790 set_bit(STRIPE_COMPUTE_RUN, &sh->state);
2791 set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request);
2792 if (sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) {
2793 set_bit(R5_Wantcompute,
2794 &sh->dev[pd_idx].flags);
2795 *target = pd_idx;
2796 target = &sh->ops.target2;
2797 s->uptodate++;
2798 }
2799 if (sh->ops.zero_sum_result & SUM_CHECK_Q_RESULT) {
2800 set_bit(R5_Wantcompute,
2801 &sh->dev[qd_idx].flags);
2802 *target = qd_idx;
2803 s->uptodate++;
2804 }
2805 }
2806 }
2807 break;
2808 case check_state_compute_run:
2809 break;
2810 default:
2811 printk(KERN_ERR "%s: unknown check_state: %d sector: %llu\n",
2812 __func__, sh->check_state,
2813 (unsigned long long) sh->sector);
2814 BUG();
2652 } 2815 }
2653} 2816}
2654 2817
@@ -2666,6 +2829,7 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh,
2666 if (i != sh->pd_idx && i != sh->qd_idx) { 2829 if (i != sh->pd_idx && i != sh->qd_idx) {
2667 int dd_idx, j; 2830 int dd_idx, j;
2668 struct stripe_head *sh2; 2831 struct stripe_head *sh2;
2832 struct async_submit_ctl submit;
2669 2833
2670 sector_t bn = compute_blocknr(sh, i, 1); 2834 sector_t bn = compute_blocknr(sh, i, 1);
2671 sector_t s = raid5_compute_sector(conf, bn, 0, 2835 sector_t s = raid5_compute_sector(conf, bn, 0,
@@ -2685,9 +2849,10 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh,
2685 } 2849 }
2686 2850
2687 /* place all the copies on one channel */ 2851 /* place all the copies on one channel */
2852 init_async_submit(&submit, 0, tx, NULL, NULL, NULL);
2688 tx = async_memcpy(sh2->dev[dd_idx].page, 2853 tx = async_memcpy(sh2->dev[dd_idx].page,
2689 sh->dev[i].page, 0, 0, STRIPE_SIZE, 2854 sh->dev[i].page, 0, 0, STRIPE_SIZE,
2690 ASYNC_TX_DEP_ACK, tx, NULL, NULL); 2855 &submit);
2691 2856
2692 set_bit(R5_Expanded, &sh2->dev[dd_idx].flags); 2857 set_bit(R5_Expanded, &sh2->dev[dd_idx].flags);
2693 set_bit(R5_UPTODATE, &sh2->dev[dd_idx].flags); 2858 set_bit(R5_UPTODATE, &sh2->dev[dd_idx].flags);
@@ -2973,7 +3138,7 @@ static bool handle_stripe5(struct stripe_head *sh)
2973 /* Need to write out all blocks after computing parity */ 3138 /* Need to write out all blocks after computing parity */
2974 sh->disks = conf->raid_disks; 3139 sh->disks = conf->raid_disks;
2975 stripe_set_idx(sh->sector, conf, 0, sh); 3140 stripe_set_idx(sh->sector, conf, 0, sh);
2976 schedule_reconstruction5(sh, &s, 1, 1); 3141 schedule_reconstruction(sh, &s, 1, 1);
2977 } else if (s.expanded && !sh->reconstruct_state && s.locked == 0) { 3142 } else if (s.expanded && !sh->reconstruct_state && s.locked == 0) {
2978 clear_bit(STRIPE_EXPAND_READY, &sh->state); 3143 clear_bit(STRIPE_EXPAND_READY, &sh->state);
2979 atomic_dec(&conf->reshape_stripes); 3144 atomic_dec(&conf->reshape_stripes);
@@ -2993,7 +3158,7 @@ static bool handle_stripe5(struct stripe_head *sh)
2993 md_wait_for_blocked_rdev(blocked_rdev, conf->mddev); 3158 md_wait_for_blocked_rdev(blocked_rdev, conf->mddev);
2994 3159
2995 if (s.ops_request) 3160 if (s.ops_request)
2996 raid5_run_ops(sh, s.ops_request); 3161 raid_run_ops(sh, s.ops_request);
2997 3162
2998 ops_run_io(sh, &s); 3163 ops_run_io(sh, &s);
2999 3164
@@ -3002,7 +3167,7 @@ static bool handle_stripe5(struct stripe_head *sh)
3002 return blocked_rdev == NULL; 3167 return blocked_rdev == NULL;
3003} 3168}
3004 3169
3005static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page) 3170static bool handle_stripe6(struct stripe_head *sh)
3006{ 3171{
3007 raid5_conf_t *conf = sh->raid_conf; 3172 raid5_conf_t *conf = sh->raid_conf;
3008 int disks = sh->disks; 3173 int disks = sh->disks;
@@ -3014,9 +3179,10 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
3014 mdk_rdev_t *blocked_rdev = NULL; 3179 mdk_rdev_t *blocked_rdev = NULL;
3015 3180
3016 pr_debug("handling stripe %llu, state=%#lx cnt=%d, " 3181 pr_debug("handling stripe %llu, state=%#lx cnt=%d, "
3017 "pd_idx=%d, qd_idx=%d\n", 3182 "pd_idx=%d, qd_idx=%d\n, check:%d, reconstruct:%d\n",
3018 (unsigned long long)sh->sector, sh->state, 3183 (unsigned long long)sh->sector, sh->state,
3019 atomic_read(&sh->count), pd_idx, qd_idx); 3184 atomic_read(&sh->count), pd_idx, qd_idx,
3185 sh->check_state, sh->reconstruct_state);
3020 memset(&s, 0, sizeof(s)); 3186 memset(&s, 0, sizeof(s));
3021 3187
3022 spin_lock(&sh->lock); 3188 spin_lock(&sh->lock);
@@ -3036,35 +3202,24 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
3036 3202
3037 pr_debug("check %d: state 0x%lx read %p write %p written %p\n", 3203 pr_debug("check %d: state 0x%lx read %p write %p written %p\n",
3038 i, dev->flags, dev->toread, dev->towrite, dev->written); 3204 i, dev->flags, dev->toread, dev->towrite, dev->written);
3039 /* maybe we can reply to a read */ 3205 /* maybe we can reply to a read
3040 if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread) { 3206 *
3041 struct bio *rbi, *rbi2; 3207 * new wantfill requests are only permitted while
3042 pr_debug("Return read for disc %d\n", i); 3208 * ops_complete_biofill is guaranteed to be inactive
3043 spin_lock_irq(&conf->device_lock); 3209 */
3044 rbi = dev->toread; 3210 if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread &&
3045 dev->toread = NULL; 3211 !test_bit(STRIPE_BIOFILL_RUN, &sh->state))
3046 if (test_and_clear_bit(R5_Overlap, &dev->flags)) 3212 set_bit(R5_Wantfill, &dev->flags);
3047 wake_up(&conf->wait_for_overlap);
3048 spin_unlock_irq(&conf->device_lock);
3049 while (rbi && rbi->bi_sector < dev->sector + STRIPE_SECTORS) {
3050 copy_data(0, rbi, dev->page, dev->sector);
3051 rbi2 = r5_next_bio(rbi, dev->sector);
3052 spin_lock_irq(&conf->device_lock);
3053 if (!raid5_dec_bi_phys_segments(rbi)) {
3054 rbi->bi_next = return_bi;
3055 return_bi = rbi;
3056 }
3057 spin_unlock_irq(&conf->device_lock);
3058 rbi = rbi2;
3059 }
3060 }
3061 3213
3062 /* now count some things */ 3214 /* now count some things */
3063 if (test_bit(R5_LOCKED, &dev->flags)) s.locked++; 3215 if (test_bit(R5_LOCKED, &dev->flags)) s.locked++;
3064 if (test_bit(R5_UPTODATE, &dev->flags)) s.uptodate++; 3216 if (test_bit(R5_UPTODATE, &dev->flags)) s.uptodate++;
3217 if (test_bit(R5_Wantcompute, &dev->flags))
3218 BUG_ON(++s.compute > 2);
3065 3219
3066 3220 if (test_bit(R5_Wantfill, &dev->flags)) {
3067 if (dev->toread) 3221 s.to_fill++;
3222 } else if (dev->toread)
3068 s.to_read++; 3223 s.to_read++;
3069 if (dev->towrite) { 3224 if (dev->towrite) {
3070 s.to_write++; 3225 s.to_write++;
@@ -3105,6 +3260,11 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
3105 blocked_rdev = NULL; 3260 blocked_rdev = NULL;
3106 } 3261 }
3107 3262
3263 if (s.to_fill && !test_bit(STRIPE_BIOFILL_RUN, &sh->state)) {
3264 set_bit(STRIPE_OP_BIOFILL, &s.ops_request);
3265 set_bit(STRIPE_BIOFILL_RUN, &sh->state);
3266 }
3267
3108 pr_debug("locked=%d uptodate=%d to_read=%d" 3268 pr_debug("locked=%d uptodate=%d to_read=%d"
3109 " to_write=%d failed=%d failed_num=%d,%d\n", 3269 " to_write=%d failed=%d failed_num=%d,%d\n",
3110 s.locked, s.uptodate, s.to_read, s.to_write, s.failed, 3270 s.locked, s.uptodate, s.to_read, s.to_write, s.failed,
@@ -3145,19 +3305,62 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
3145 * or to load a block that is being partially written. 3305 * or to load a block that is being partially written.
3146 */ 3306 */
3147 if (s.to_read || s.non_overwrite || (s.to_write && s.failed) || 3307 if (s.to_read || s.non_overwrite || (s.to_write && s.failed) ||
3148 (s.syncing && (s.uptodate < disks)) || s.expanding) 3308 (s.syncing && (s.uptodate + s.compute < disks)) || s.expanding)
3149 handle_stripe_fill6(sh, &s, &r6s, disks); 3309 handle_stripe_fill6(sh, &s, &r6s, disks);
3150 3310
3151 /* now to consider writing and what else, if anything should be read */ 3311 /* Now we check to see if any write operations have recently
3152 if (s.to_write) 3312 * completed
3313 */
3314 if (sh->reconstruct_state == reconstruct_state_drain_result) {
3315 int qd_idx = sh->qd_idx;
3316
3317 sh->reconstruct_state = reconstruct_state_idle;
3318 /* All the 'written' buffers and the parity blocks are ready to
3319 * be written back to disk
3320 */
3321 BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags));
3322 BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[qd_idx].flags));
3323 for (i = disks; i--; ) {
3324 dev = &sh->dev[i];
3325 if (test_bit(R5_LOCKED, &dev->flags) &&
3326 (i == sh->pd_idx || i == qd_idx ||
3327 dev->written)) {
3328 pr_debug("Writing block %d\n", i);
3329 BUG_ON(!test_bit(R5_UPTODATE, &dev->flags));
3330 set_bit(R5_Wantwrite, &dev->flags);
3331 if (!test_bit(R5_Insync, &dev->flags) ||
3332 ((i == sh->pd_idx || i == qd_idx) &&
3333 s.failed == 0))
3334 set_bit(STRIPE_INSYNC, &sh->state);
3335 }
3336 }
3337 if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
3338 atomic_dec(&conf->preread_active_stripes);
3339 if (atomic_read(&conf->preread_active_stripes) <
3340 IO_THRESHOLD)
3341 md_wakeup_thread(conf->mddev->thread);
3342 }
3343 }
3344
3345 /* Now to consider new write requests and what else, if anything
3346 * should be read. We do not handle new writes when:
3347 * 1/ A 'write' operation (copy+gen_syndrome) is already in flight.
3348 * 2/ A 'check' operation is in flight, as it may clobber the parity
3349 * block.
3350 */
3351 if (s.to_write && !sh->reconstruct_state && !sh->check_state)
3153 handle_stripe_dirtying6(conf, sh, &s, &r6s, disks); 3352 handle_stripe_dirtying6(conf, sh, &s, &r6s, disks);
3154 3353
3155 /* maybe we need to check and possibly fix the parity for this stripe 3354 /* maybe we need to check and possibly fix the parity for this stripe
3156 * Any reads will already have been scheduled, so we just see if enough 3355 * Any reads will already have been scheduled, so we just see if enough
3157 * data is available 3356 * data is available. The parity check is held off while parity
3357 * dependent operations are in flight.
3158 */ 3358 */
3159 if (s.syncing && s.locked == 0 && !test_bit(STRIPE_INSYNC, &sh->state)) 3359 if (sh->check_state ||
3160 handle_parity_checks6(conf, sh, &s, &r6s, tmp_page, disks); 3360 (s.syncing && s.locked == 0 &&
3361 !test_bit(STRIPE_COMPUTE_RUN, &sh->state) &&
3362 !test_bit(STRIPE_INSYNC, &sh->state)))
3363 handle_parity_checks6(conf, sh, &s, &r6s, disks);
3161 3364
3162 if (s.syncing && s.locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) { 3365 if (s.syncing && s.locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) {
3163 md_done_sync(conf->mddev, STRIPE_SECTORS,1); 3366 md_done_sync(conf->mddev, STRIPE_SECTORS,1);
@@ -3178,15 +3381,29 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
3178 set_bit(R5_Wantwrite, &dev->flags); 3381 set_bit(R5_Wantwrite, &dev->flags);
3179 set_bit(R5_ReWrite, &dev->flags); 3382 set_bit(R5_ReWrite, &dev->flags);
3180 set_bit(R5_LOCKED, &dev->flags); 3383 set_bit(R5_LOCKED, &dev->flags);
3384 s.locked++;
3181 } else { 3385 } else {
3182 /* let's read it back */ 3386 /* let's read it back */
3183 set_bit(R5_Wantread, &dev->flags); 3387 set_bit(R5_Wantread, &dev->flags);
3184 set_bit(R5_LOCKED, &dev->flags); 3388 set_bit(R5_LOCKED, &dev->flags);
3389 s.locked++;
3185 } 3390 }
3186 } 3391 }
3187 } 3392 }
3188 3393
3189 if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state)) { 3394 /* Finish reconstruct operations initiated by the expansion process */
3395 if (sh->reconstruct_state == reconstruct_state_result) {
3396 sh->reconstruct_state = reconstruct_state_idle;
3397 clear_bit(STRIPE_EXPANDING, &sh->state);
3398 for (i = conf->raid_disks; i--; ) {
3399 set_bit(R5_Wantwrite, &sh->dev[i].flags);
3400 set_bit(R5_LOCKED, &sh->dev[i].flags);
3401 s.locked++;
3402 }
3403 }
3404
3405 if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state) &&
3406 !sh->reconstruct_state) {
3190 struct stripe_head *sh2 3407 struct stripe_head *sh2
3191 = get_active_stripe(conf, sh->sector, 1, 1, 1); 3408 = get_active_stripe(conf, sh->sector, 1, 1, 1);
3192 if (sh2 && test_bit(STRIPE_EXPAND_SOURCE, &sh2->state)) { 3409 if (sh2 && test_bit(STRIPE_EXPAND_SOURCE, &sh2->state)) {
@@ -3207,14 +3424,8 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
3207 /* Need to write out all blocks after computing P&Q */ 3424 /* Need to write out all blocks after computing P&Q */
3208 sh->disks = conf->raid_disks; 3425 sh->disks = conf->raid_disks;
3209 stripe_set_idx(sh->sector, conf, 0, sh); 3426 stripe_set_idx(sh->sector, conf, 0, sh);
3210 compute_parity6(sh, RECONSTRUCT_WRITE); 3427 schedule_reconstruction(sh, &s, 1, 1);
3211 for (i = conf->raid_disks ; i-- ; ) { 3428 } else if (s.expanded && !sh->reconstruct_state && s.locked == 0) {
3212 set_bit(R5_LOCKED, &sh->dev[i].flags);
3213 s.locked++;
3214 set_bit(R5_Wantwrite, &sh->dev[i].flags);
3215 }
3216 clear_bit(STRIPE_EXPANDING, &sh->state);
3217 } else if (s.expanded) {
3218 clear_bit(STRIPE_EXPAND_READY, &sh->state); 3429 clear_bit(STRIPE_EXPAND_READY, &sh->state);
3219 atomic_dec(&conf->reshape_stripes); 3430 atomic_dec(&conf->reshape_stripes);
3220 wake_up(&conf->wait_for_overlap); 3431 wake_up(&conf->wait_for_overlap);
@@ -3232,6 +3443,9 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
3232 if (unlikely(blocked_rdev)) 3443 if (unlikely(blocked_rdev))
3233 md_wait_for_blocked_rdev(blocked_rdev, conf->mddev); 3444 md_wait_for_blocked_rdev(blocked_rdev, conf->mddev);
3234 3445
3446 if (s.ops_request)
3447 raid_run_ops(sh, s.ops_request);
3448
3235 ops_run_io(sh, &s); 3449 ops_run_io(sh, &s);
3236 3450
3237 return_io(return_bi); 3451 return_io(return_bi);
@@ -3240,16 +3454,14 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
3240} 3454}
3241 3455
3242/* returns true if the stripe was handled */ 3456/* returns true if the stripe was handled */
3243static bool handle_stripe(struct stripe_head *sh, struct page *tmp_page) 3457static bool handle_stripe(struct stripe_head *sh)
3244{ 3458{
3245 if (sh->raid_conf->level == 6) 3459 if (sh->raid_conf->level == 6)
3246 return handle_stripe6(sh, tmp_page); 3460 return handle_stripe6(sh);
3247 else 3461 else
3248 return handle_stripe5(sh); 3462 return handle_stripe5(sh);
3249} 3463}
3250 3464
3251
3252
3253static void raid5_activate_delayed(raid5_conf_t *conf) 3465static void raid5_activate_delayed(raid5_conf_t *conf)
3254{ 3466{
3255 if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD) { 3467 if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD) {
@@ -4046,7 +4258,7 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski
4046 spin_unlock(&sh->lock); 4258 spin_unlock(&sh->lock);
4047 4259
4048 /* wait for any blocked device to be handled */ 4260 /* wait for any blocked device to be handled */
4049 while(unlikely(!handle_stripe(sh, NULL))) 4261 while (unlikely(!handle_stripe(sh)))
4050 ; 4262 ;
4051 release_stripe(sh); 4263 release_stripe(sh);
4052 4264
@@ -4103,7 +4315,7 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio)
4103 return handled; 4315 return handled;
4104 } 4316 }
4105 4317
4106 handle_stripe(sh, NULL); 4318 handle_stripe(sh);
4107 release_stripe(sh); 4319 release_stripe(sh);
4108 handled++; 4320 handled++;
4109 } 4321 }
@@ -4117,6 +4329,36 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio)
4117 return handled; 4329 return handled;
4118} 4330}
4119 4331
4332#ifdef CONFIG_MULTICORE_RAID456
4333static void __process_stripe(void *param, async_cookie_t cookie)
4334{
4335 struct stripe_head *sh = param;
4336
4337 handle_stripe(sh);
4338 release_stripe(sh);
4339}
4340
4341static void process_stripe(struct stripe_head *sh, struct list_head *domain)
4342{
4343 async_schedule_domain(__process_stripe, sh, domain);
4344}
4345
4346static void synchronize_stripe_processing(struct list_head *domain)
4347{
4348 async_synchronize_full_domain(domain);
4349}
4350#else
4351static void process_stripe(struct stripe_head *sh, struct list_head *domain)
4352{
4353 handle_stripe(sh);
4354 release_stripe(sh);
4355 cond_resched();
4356}
4357
4358static void synchronize_stripe_processing(struct list_head *domain)
4359{
4360}
4361#endif
4120 4362
4121 4363
4122/* 4364/*
@@ -4131,6 +4373,7 @@ static void raid5d(mddev_t *mddev)
4131 struct stripe_head *sh; 4373 struct stripe_head *sh;
4132 raid5_conf_t *conf = mddev->private; 4374 raid5_conf_t *conf = mddev->private;
4133 int handled; 4375 int handled;
4376 LIST_HEAD(raid_domain);
4134 4377
4135 pr_debug("+++ raid5d active\n"); 4378 pr_debug("+++ raid5d active\n");
4136 4379
@@ -4167,8 +4410,7 @@ static void raid5d(mddev_t *mddev)
4167 spin_unlock_irq(&conf->device_lock); 4410 spin_unlock_irq(&conf->device_lock);
4168 4411
4169 handled++; 4412 handled++;
4170 handle_stripe(sh, conf->spare_page); 4413 process_stripe(sh, &raid_domain);
4171 release_stripe(sh);
4172 4414
4173 spin_lock_irq(&conf->device_lock); 4415 spin_lock_irq(&conf->device_lock);
4174 } 4416 }
@@ -4176,6 +4418,7 @@ static void raid5d(mddev_t *mddev)
4176 4418
4177 spin_unlock_irq(&conf->device_lock); 4419 spin_unlock_irq(&conf->device_lock);
4178 4420
4421 synchronize_stripe_processing(&raid_domain);
4179 async_tx_issue_pending_all(); 4422 async_tx_issue_pending_all();
4180 unplug_slaves(mddev); 4423 unplug_slaves(mddev);
4181 4424
@@ -4308,6 +4551,118 @@ raid5_size(mddev_t *mddev, sector_t sectors, int raid_disks)
4308 return sectors * (raid_disks - conf->max_degraded); 4551 return sectors * (raid_disks - conf->max_degraded);
4309} 4552}
4310 4553
4554static void raid5_free_percpu(raid5_conf_t *conf)
4555{
4556 struct raid5_percpu *percpu;
4557 unsigned long cpu;
4558
4559 if (!conf->percpu)
4560 return;
4561
4562 get_online_cpus();
4563 for_each_possible_cpu(cpu) {
4564 percpu = per_cpu_ptr(conf->percpu, cpu);
4565 safe_put_page(percpu->spare_page);
4566 kfree(percpu->scribble);
4567 }
4568#ifdef CONFIG_HOTPLUG_CPU
4569 unregister_cpu_notifier(&conf->cpu_notify);
4570#endif
4571 put_online_cpus();
4572
4573 free_percpu(conf->percpu);
4574}
4575
4576static void free_conf(raid5_conf_t *conf)
4577{
4578 shrink_stripes(conf);
4579 raid5_free_percpu(conf);
4580 kfree(conf->disks);
4581 kfree(conf->stripe_hashtbl);
4582 kfree(conf);
4583}
4584
4585#ifdef CONFIG_HOTPLUG_CPU
4586static int raid456_cpu_notify(struct notifier_block *nfb, unsigned long action,
4587 void *hcpu)
4588{
4589 raid5_conf_t *conf = container_of(nfb, raid5_conf_t, cpu_notify);
4590 long cpu = (long)hcpu;
4591 struct raid5_percpu *percpu = per_cpu_ptr(conf->percpu, cpu);
4592
4593 switch (action) {
4594 case CPU_UP_PREPARE:
4595 case CPU_UP_PREPARE_FROZEN:
4596 if (conf->level == 6 && !percpu->spare_page)
4597 percpu->spare_page = alloc_page(GFP_KERNEL);
4598 if (!percpu->scribble)
4599 percpu->scribble = kmalloc(conf->scribble_len, GFP_KERNEL);
4600
4601 if (!percpu->scribble ||
4602 (conf->level == 6 && !percpu->spare_page)) {
4603 safe_put_page(percpu->spare_page);
4604 kfree(percpu->scribble);
4605 pr_err("%s: failed memory allocation for cpu%ld\n",
4606 __func__, cpu);
4607 return NOTIFY_BAD;
4608 }
4609 break;
4610 case CPU_DEAD:
4611 case CPU_DEAD_FROZEN:
4612 safe_put_page(percpu->spare_page);
4613 kfree(percpu->scribble);
4614 percpu->spare_page = NULL;
4615 percpu->scribble = NULL;
4616 break;
4617 default:
4618 break;
4619 }
4620 return NOTIFY_OK;
4621}
4622#endif
4623
4624static int raid5_alloc_percpu(raid5_conf_t *conf)
4625{
4626 unsigned long cpu;
4627 struct page *spare_page;
4628 struct raid5_percpu *allcpus;
4629 void *scribble;
4630 int err;
4631
4632 allcpus = alloc_percpu(struct raid5_percpu);
4633 if (!allcpus)
4634 return -ENOMEM;
4635 conf->percpu = allcpus;
4636
4637 get_online_cpus();
4638 err = 0;
4639 for_each_present_cpu(cpu) {
4640 if (conf->level == 6) {
4641 spare_page = alloc_page(GFP_KERNEL);
4642 if (!spare_page) {
4643 err = -ENOMEM;
4644 break;
4645 }
4646 per_cpu_ptr(conf->percpu, cpu)->spare_page = spare_page;
4647 }
4648 scribble = kmalloc(scribble_len(conf->raid_disks), GFP_KERNEL);
4649 if (!scribble) {
4650 err = -ENOMEM;
4651 break;
4652 }
4653 per_cpu_ptr(conf->percpu, cpu)->scribble = scribble;
4654 }
4655#ifdef CONFIG_HOTPLUG_CPU
4656 conf->cpu_notify.notifier_call = raid456_cpu_notify;
4657 conf->cpu_notify.priority = 0;
4658 if (err == 0)
4659 err = register_cpu_notifier(&conf->cpu_notify);
4660#endif
4661 put_online_cpus();
4662
4663 return err;
4664}
4665
4311static raid5_conf_t *setup_conf(mddev_t *mddev) 4666static raid5_conf_t *setup_conf(mddev_t *mddev)
4312{ 4667{
4313 raid5_conf_t *conf; 4668 raid5_conf_t *conf;
@@ -4349,6 +4704,7 @@ static raid5_conf_t *setup_conf(mddev_t *mddev)
4349 goto abort; 4704 goto abort;
4350 4705
4351 conf->raid_disks = mddev->raid_disks; 4706 conf->raid_disks = mddev->raid_disks;
4707 conf->scribble_len = scribble_len(conf->raid_disks);
4352 if (mddev->reshape_position == MaxSector) 4708 if (mddev->reshape_position == MaxSector)
4353 conf->previous_raid_disks = mddev->raid_disks; 4709 conf->previous_raid_disks = mddev->raid_disks;
4354 else 4710 else
@@ -4364,11 +4720,10 @@ static raid5_conf_t *setup_conf(mddev_t *mddev)
4364 if ((conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL) 4720 if ((conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL)
4365 goto abort; 4721 goto abort;
4366 4722
4367 if (mddev->new_level == 6) { 4723 conf->level = mddev->new_level;
4368 conf->spare_page = alloc_page(GFP_KERNEL); 4724 if (raid5_alloc_percpu(conf) != 0)
4369 if (!conf->spare_page) 4725 goto abort;
4370 goto abort; 4726
4371 }
4372 spin_lock_init(&conf->device_lock); 4727 spin_lock_init(&conf->device_lock);
4373 init_waitqueue_head(&conf->wait_for_stripe); 4728 init_waitqueue_head(&conf->wait_for_stripe);
4374 init_waitqueue_head(&conf->wait_for_overlap); 4729 init_waitqueue_head(&conf->wait_for_overlap);
@@ -4439,11 +4794,7 @@ static raid5_conf_t *setup_conf(mddev_t *mddev)
4439 4794
4440 abort: 4795 abort:
4441 if (conf) { 4796 if (conf) {
4442 shrink_stripes(conf); 4797 free_conf(conf);
4443 safe_put_page(conf->spare_page);
4444 kfree(conf->disks);
4445 kfree(conf->stripe_hashtbl);
4446 kfree(conf);
4447 return ERR_PTR(-EIO); 4798 return ERR_PTR(-EIO);
4448 } else 4799 } else
4449 return ERR_PTR(-ENOMEM); 4800 return ERR_PTR(-ENOMEM);
@@ -4613,12 +4964,8 @@ abort:
4613 md_unregister_thread(mddev->thread); 4964 md_unregister_thread(mddev->thread);
4614 mddev->thread = NULL; 4965 mddev->thread = NULL;
4615 if (conf) { 4966 if (conf) {
4616 shrink_stripes(conf);
4617 print_raid5_conf(conf); 4967 print_raid5_conf(conf);
4618 safe_put_page(conf->spare_page); 4968 free_conf(conf);
4619 kfree(conf->disks);
4620 kfree(conf->stripe_hashtbl);
4621 kfree(conf);
4622 } 4969 }
4623 mddev->private = NULL; 4970 mddev->private = NULL;
4624 printk(KERN_ALERT "raid5: failed to run raid set %s\n", mdname(mddev)); 4971 printk(KERN_ALERT "raid5: failed to run raid set %s\n", mdname(mddev));
@@ -4633,13 +4980,10 @@ static int stop(mddev_t *mddev)
4633 4980
4634 md_unregister_thread(mddev->thread); 4981 md_unregister_thread(mddev->thread);
4635 mddev->thread = NULL; 4982 mddev->thread = NULL;
4636 shrink_stripes(conf);
4637 kfree(conf->stripe_hashtbl);
4638 mddev->queue->backing_dev_info.congested_fn = NULL; 4983 mddev->queue->backing_dev_info.congested_fn = NULL;
4639 blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ 4984 blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
4640 sysfs_remove_group(&mddev->kobj, &raid5_attrs_group); 4985 sysfs_remove_group(&mddev->kobj, &raid5_attrs_group);
4641 kfree(conf->disks); 4986 free_conf(conf);
4642 kfree(conf);
4643 mddev->private = NULL; 4987 mddev->private = NULL;
4644 return 0; 4988 return 0;
4645} 4989}
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index 9459689c4ea0..2390e0e83daf 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -2,6 +2,7 @@
2#define _RAID5_H 2#define _RAID5_H
3 3
4#include <linux/raid/xor.h> 4#include <linux/raid/xor.h>
5#include <linux/dmaengine.h>
5 6
6/* 7/*
7 * 8 *
@@ -175,7 +176,9 @@
175 */ 176 */
176enum check_states { 177enum check_states {
177 check_state_idle = 0, 178 check_state_idle = 0,
178 check_state_run, /* parity check */ 179 check_state_run, /* xor parity check */
180 check_state_run_q, /* q-parity check */
181 check_state_run_pq, /* pq dual parity check */
179 check_state_check_result, 182 check_state_check_result,
180 check_state_compute_run, /* parity repair */ 183 check_state_compute_run, /* parity repair */
181 check_state_compute_result, 184 check_state_compute_result,
@@ -215,8 +218,8 @@ struct stripe_head {
215 * @target - STRIPE_OP_COMPUTE_BLK target 218 * @target - STRIPE_OP_COMPUTE_BLK target
216 */ 219 */
217 struct stripe_operations { 220 struct stripe_operations {
218 int target; 221 int target, target2;
219 u32 zero_sum_result; 222 enum sum_check_flags zero_sum_result;
220 } ops; 223 } ops;
221 struct r5dev { 224 struct r5dev {
222 struct bio req; 225 struct bio req;
@@ -298,7 +301,7 @@ struct r6_state {
298#define STRIPE_OP_COMPUTE_BLK 1 301#define STRIPE_OP_COMPUTE_BLK 1
299#define STRIPE_OP_PREXOR 2 302#define STRIPE_OP_PREXOR 2
300#define STRIPE_OP_BIODRAIN 3 303#define STRIPE_OP_BIODRAIN 3
301#define STRIPE_OP_POSTXOR 4 304#define STRIPE_OP_RECONSTRUCT 4
302#define STRIPE_OP_CHECK 5 305#define STRIPE_OP_CHECK 5
303 306
304/* 307/*
@@ -385,8 +388,21 @@ struct raid5_private_data {
385 * (fresh device added). 388 * (fresh device added).
386 * Cleared when a sync completes. 389 * Cleared when a sync completes.
387 */ 390 */
388 391 /* per cpu variables */
389 struct page *spare_page; /* Used when checking P/Q in raid6 */ 392 struct raid5_percpu {
393 struct page *spare_page; /* Used when checking P/Q in raid6 */
394 void *scribble; /* space for constructing buffer
395 * lists and performing address
396 * conversions
397 */
398 } *percpu;
399 size_t scribble_len; /* size of scribble region must be
400 * associated with conf to handle
401 * cpu hotplug while reshaping
402 */
403#ifdef CONFIG_HOTPLUG_CPU
404 struct notifier_block cpu_notify;
405#endif
390 406
391 /* 407 /*
392 * Free stripes pool 408 * Free stripes pool
diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h
index 5fc2ef8d97fa..a1c486a88e88 100644
--- a/include/linux/async_tx.h
+++ b/include/linux/async_tx.h
@@ -58,25 +58,60 @@ struct dma_chan_ref {
58 * array. 58 * array.
59 * @ASYNC_TX_ACK: immediately ack the descriptor, precludes setting up a 59 * @ASYNC_TX_ACK: immediately ack the descriptor, precludes setting up a
60 * dependency chain 60 * dependency chain
61 * @ASYNC_TX_DEP_ACK: ack the dependency descriptor. Useful for chaining. 61 * @ASYNC_TX_FENCE: specify that the next operation in the dependency
62 * chain uses this operation's result as an input
62 */ 63 */
63enum async_tx_flags { 64enum async_tx_flags {
64 ASYNC_TX_XOR_ZERO_DST = (1 << 0), 65 ASYNC_TX_XOR_ZERO_DST = (1 << 0),
65 ASYNC_TX_XOR_DROP_DST = (1 << 1), 66 ASYNC_TX_XOR_DROP_DST = (1 << 1),
66 ASYNC_TX_ACK = (1 << 3), 67 ASYNC_TX_ACK = (1 << 2),
67 ASYNC_TX_DEP_ACK = (1 << 4), 68 ASYNC_TX_FENCE = (1 << 3),
69};
70
71/**
72 * struct async_submit_ctl - async_tx submission/completion modifiers
73 * @flags: submission modifiers
74 * @depend_tx: parent dependency of the current operation being submitted
75 * @cb_fn: callback routine to run at operation completion
76 * @cb_param: parameter for the callback routine
77 * @scribble: caller provided space for dma/page address conversions
78 */
79struct async_submit_ctl {
80 enum async_tx_flags flags;
81 struct dma_async_tx_descriptor *depend_tx;
82 dma_async_tx_callback cb_fn;
83 void *cb_param;
84 void *scribble;
68}; 85};
69 86
70#ifdef CONFIG_DMA_ENGINE 87#ifdef CONFIG_DMA_ENGINE
71#define async_tx_issue_pending_all dma_issue_pending_all 88#define async_tx_issue_pending_all dma_issue_pending_all
89
90/**
91 * async_tx_issue_pending - send pending descriptor to the hardware channel
92 * @tx: descriptor handle to retrieve hardware context
93 *
94 * Note: any dependent operations will have already been issued by
95 * async_tx_channel_switch, or (in the case of no channel switch) will
96 * be already pending on this channel.
97 */
98static inline void async_tx_issue_pending(struct dma_async_tx_descriptor *tx)
99{
100 if (likely(tx)) {
101 struct dma_chan *chan = tx->chan;
102 struct dma_device *dma = chan->device;
103
104 dma->device_issue_pending(chan);
105 }
106}
72#ifdef CONFIG_ARCH_HAS_ASYNC_TX_FIND_CHANNEL 107#ifdef CONFIG_ARCH_HAS_ASYNC_TX_FIND_CHANNEL
73#include <asm/async_tx.h> 108#include <asm/async_tx.h>
74#else 109#else
75#define async_tx_find_channel(dep, type, dst, dst_count, src, src_count, len) \ 110#define async_tx_find_channel(dep, type, dst, dst_count, src, src_count, len) \
76 __async_tx_find_channel(dep, type) 111 __async_tx_find_channel(dep, type)
77struct dma_chan * 112struct dma_chan *
78__async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx, 113__async_tx_find_channel(struct async_submit_ctl *submit,
79 enum dma_transaction_type tx_type); 114 enum dma_transaction_type tx_type);
80#endif /* CONFIG_ARCH_HAS_ASYNC_TX_FIND_CHANNEL */ 115#endif /* CONFIG_ARCH_HAS_ASYNC_TX_FIND_CHANNEL */
81#else 116#else
82static inline void async_tx_issue_pending_all(void) 117static inline void async_tx_issue_pending_all(void)
@@ -84,10 +119,16 @@ static inline void async_tx_issue_pending_all(void)
84 do { } while (0); 119 do { } while (0);
85} 120}
86 121
122static inline void async_tx_issue_pending(struct dma_async_tx_descriptor *tx)
123{
124 do { } while (0);
125}
126
87static inline struct dma_chan * 127static inline struct dma_chan *
88async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx, 128async_tx_find_channel(struct async_submit_ctl *submit,
89 enum dma_transaction_type tx_type, struct page **dst, int dst_count, 129 enum dma_transaction_type tx_type, struct page **dst,
90 struct page **src, int src_count, size_t len) 130 int dst_count, struct page **src, int src_count,
131 size_t len)
91{ 132{
92 return NULL; 133 return NULL;
93} 134}
@@ -99,46 +140,70 @@ async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx,
99 * @cb_fn_param: parameter to pass to the callback routine 140 * @cb_fn_param: parameter to pass to the callback routine
100 */ 141 */
101static inline void 142static inline void
102async_tx_sync_epilog(dma_async_tx_callback cb_fn, void *cb_fn_param) 143async_tx_sync_epilog(struct async_submit_ctl *submit)
103{ 144{
104 if (cb_fn) 145 if (submit->cb_fn)
105 cb_fn(cb_fn_param); 146 submit->cb_fn(submit->cb_param);
106} 147}
107 148
108void 149typedef union {
109async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx, 150 unsigned long addr;
110 enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx, 151 struct page *page;
111 dma_async_tx_callback cb_fn, void *cb_fn_param); 152 dma_addr_t dma;
153} addr_conv_t;
154
155static inline void
156init_async_submit(struct async_submit_ctl *args, enum async_tx_flags flags,
157 struct dma_async_tx_descriptor *tx,
158 dma_async_tx_callback cb_fn, void *cb_param,
159 addr_conv_t *scribble)
160{
161 args->flags = flags;
162 args->depend_tx = tx;
163 args->cb_fn = cb_fn;
164 args->cb_param = cb_param;
165 args->scribble = scribble;
166}
167
168void async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx,
169 struct async_submit_ctl *submit);
112 170
113struct dma_async_tx_descriptor * 171struct dma_async_tx_descriptor *
114async_xor(struct page *dest, struct page **src_list, unsigned int offset, 172async_xor(struct page *dest, struct page **src_list, unsigned int offset,
115 int src_cnt, size_t len, enum async_tx_flags flags, 173 int src_cnt, size_t len, struct async_submit_ctl *submit);
116 struct dma_async_tx_descriptor *depend_tx,
117 dma_async_tx_callback cb_fn, void *cb_fn_param);
118 174
119struct dma_async_tx_descriptor * 175struct dma_async_tx_descriptor *
120async_xor_zero_sum(struct page *dest, struct page **src_list, 176async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,
121 unsigned int offset, int src_cnt, size_t len, 177 int src_cnt, size_t len, enum sum_check_flags *result,
122 u32 *result, enum async_tx_flags flags, 178 struct async_submit_ctl *submit);
123 struct dma_async_tx_descriptor *depend_tx,
124 dma_async_tx_callback cb_fn, void *cb_fn_param);
125 179
126struct dma_async_tx_descriptor * 180struct dma_async_tx_descriptor *
127async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset, 181async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
128 unsigned int src_offset, size_t len, enum async_tx_flags flags, 182 unsigned int src_offset, size_t len,
129 struct dma_async_tx_descriptor *depend_tx, 183 struct async_submit_ctl *submit);
130 dma_async_tx_callback cb_fn, void *cb_fn_param);
131 184
132struct dma_async_tx_descriptor * 185struct dma_async_tx_descriptor *
133async_memset(struct page *dest, int val, unsigned int offset, 186async_memset(struct page *dest, int val, unsigned int offset,
134 size_t len, enum async_tx_flags flags, 187 size_t len, struct async_submit_ctl *submit);
135 struct dma_async_tx_descriptor *depend_tx, 188
136 dma_async_tx_callback cb_fn, void *cb_fn_param); 189struct dma_async_tx_descriptor *async_trigger_callback(struct async_submit_ctl *submit);
190
191struct dma_async_tx_descriptor *
192async_gen_syndrome(struct page **blocks, unsigned int offset, int src_cnt,
193 size_t len, struct async_submit_ctl *submit);
194
195struct dma_async_tx_descriptor *
196async_syndrome_val(struct page **blocks, unsigned int offset, int src_cnt,
197 size_t len, enum sum_check_flags *pqres, struct page *spare,
198 struct async_submit_ctl *submit);
199
200struct dma_async_tx_descriptor *
201async_raid6_2data_recov(int src_num, size_t bytes, int faila, int failb,
202 struct page **ptrs, struct async_submit_ctl *submit);
137 203
138struct dma_async_tx_descriptor * 204struct dma_async_tx_descriptor *
139async_trigger_callback(enum async_tx_flags flags, 205async_raid6_datap_recov(int src_num, size_t bytes, int faila,
140 struct dma_async_tx_descriptor *depend_tx, 206 struct page **ptrs, struct async_submit_ctl *submit);
141 dma_async_tx_callback cb_fn, void *cb_fn_param);
142 207
143void async_tx_quiesce(struct dma_async_tx_descriptor **tx); 208void async_tx_quiesce(struct dma_async_tx_descriptor **tx);
144#endif /* _ASYNC_TX_H_ */ 209#endif /* _ASYNC_TX_H_ */
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index f114bc7790bc..2b9f2ac7ed60 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -48,19 +48,20 @@ enum dma_status {
48 48
49/** 49/**
50 * enum dma_transaction_type - DMA transaction types/indexes 50 * enum dma_transaction_type - DMA transaction types/indexes
51 *
52 * Note: The DMA_ASYNC_TX capability is not to be set by drivers. It is
53 * automatically set as dma devices are registered.
51 */ 54 */
52enum dma_transaction_type { 55enum dma_transaction_type {
53 DMA_MEMCPY, 56 DMA_MEMCPY,
54 DMA_XOR, 57 DMA_XOR,
55 DMA_PQ_XOR, 58 DMA_PQ,
56 DMA_DUAL_XOR, 59 DMA_XOR_VAL,
57 DMA_PQ_UPDATE, 60 DMA_PQ_VAL,
58 DMA_ZERO_SUM,
59 DMA_PQ_ZERO_SUM,
60 DMA_MEMSET, 61 DMA_MEMSET,
61 DMA_MEMCPY_CRC32C,
62 DMA_INTERRUPT, 62 DMA_INTERRUPT,
63 DMA_PRIVATE, 63 DMA_PRIVATE,
64 DMA_ASYNC_TX,
64 DMA_SLAVE, 65 DMA_SLAVE,
65}; 66};
66 67
@@ -70,18 +71,25 @@ enum dma_transaction_type {
70 71
71/** 72/**
72 * enum dma_ctrl_flags - DMA flags to augment operation preparation, 73 * enum dma_ctrl_flags - DMA flags to augment operation preparation,
73 * control completion, and communicate status. 74 * control completion, and communicate status.
74 * @DMA_PREP_INTERRUPT - trigger an interrupt (callback) upon completion of 75 * @DMA_PREP_INTERRUPT - trigger an interrupt (callback) upon completion of
75 * this transaction 76 * this transaction
76 * @DMA_CTRL_ACK - the descriptor cannot be reused until the client 77 * @DMA_CTRL_ACK - the descriptor cannot be reused until the client
77 * acknowledges receipt, i.e. has has a chance to establish any 78 * acknowledges receipt, i.e. has has a chance to establish any dependency
78 * dependency chains 79 * chains
79 * @DMA_COMPL_SKIP_SRC_UNMAP - set to disable dma-unmapping the source buffer(s) 80 * @DMA_COMPL_SKIP_SRC_UNMAP - set to disable dma-unmapping the source buffer(s)
80 * @DMA_COMPL_SKIP_DEST_UNMAP - set to disable dma-unmapping the destination(s) 81 * @DMA_COMPL_SKIP_DEST_UNMAP - set to disable dma-unmapping the destination(s)
81 * @DMA_COMPL_SRC_UNMAP_SINGLE - set to do the source dma-unmapping as single 82 * @DMA_COMPL_SRC_UNMAP_SINGLE - set to do the source dma-unmapping as single
82 * (if not set, do the source dma-unmapping as page) 83 * (if not set, do the source dma-unmapping as page)
83 * @DMA_COMPL_DEST_UNMAP_SINGLE - set to do the destination dma-unmapping as single 84 * @DMA_COMPL_DEST_UNMAP_SINGLE - set to do the destination dma-unmapping as single
84 * (if not set, do the destination dma-unmapping as page) 85 * (if not set, do the destination dma-unmapping as page)
86 * @DMA_PREP_PQ_DISABLE_P - prevent generation of P while generating Q
87 * @DMA_PREP_PQ_DISABLE_Q - prevent generation of Q while generating P
88 * @DMA_PREP_CONTINUE - indicate to a driver that it is reusing buffers as
89 * sources that were the result of a previous operation, in the case of a PQ
90 * operation it continues the calculation with new sources
91 * @DMA_PREP_FENCE - tell the driver that subsequent operations depend
92 * on the result of this operation
85 */ 93 */
86enum dma_ctrl_flags { 94enum dma_ctrl_flags {
87 DMA_PREP_INTERRUPT = (1 << 0), 95 DMA_PREP_INTERRUPT = (1 << 0),
@@ -90,9 +98,32 @@ enum dma_ctrl_flags {
90 DMA_COMPL_SKIP_DEST_UNMAP = (1 << 3), 98 DMA_COMPL_SKIP_DEST_UNMAP = (1 << 3),
91 DMA_COMPL_SRC_UNMAP_SINGLE = (1 << 4), 99 DMA_COMPL_SRC_UNMAP_SINGLE = (1 << 4),
92 DMA_COMPL_DEST_UNMAP_SINGLE = (1 << 5), 100 DMA_COMPL_DEST_UNMAP_SINGLE = (1 << 5),
101 DMA_PREP_PQ_DISABLE_P = (1 << 6),
102 DMA_PREP_PQ_DISABLE_Q = (1 << 7),
103 DMA_PREP_CONTINUE = (1 << 8),
104 DMA_PREP_FENCE = (1 << 9),
93}; 105};
94 106
95/** 107/**
108 * enum sum_check_bits - bit position of pq_check_flags
109 */
110enum sum_check_bits {
111 SUM_CHECK_P = 0,
112 SUM_CHECK_Q = 1,
113};
114
115/**
116 * enum pq_check_flags - result of async_{xor,pq}_zero_sum operations
117 * @SUM_CHECK_P_RESULT - 1 if xor zero sum error, 0 otherwise
118 * @SUM_CHECK_Q_RESULT - 1 if reed-solomon zero sum error, 0 otherwise
119 */
120enum sum_check_flags {
121 SUM_CHECK_P_RESULT = (1 << SUM_CHECK_P),
122 SUM_CHECK_Q_RESULT = (1 << SUM_CHECK_Q),
123};
124
125
126/**
96 * dma_cap_mask_t - capabilities bitmap modeled after cpumask_t. 127 * dma_cap_mask_t - capabilities bitmap modeled after cpumask_t.
97 * See linux/cpumask.h 128 * See linux/cpumask.h
98 */ 129 */
@@ -210,6 +241,11 @@ struct dma_async_tx_descriptor {
210 * @global_node: list_head for global dma_device_list 241 * @global_node: list_head for global dma_device_list
211 * @cap_mask: one or more dma_capability flags 242 * @cap_mask: one or more dma_capability flags
212 * @max_xor: maximum number of xor sources, 0 if no capability 243 * @max_xor: maximum number of xor sources, 0 if no capability
244 * @max_pq: maximum number of PQ sources and PQ-continue capability
245 * @copy_align: alignment shift for memcpy operations
246 * @xor_align: alignment shift for xor operations
247 * @pq_align: alignment shift for pq operations
248 * @fill_align: alignment shift for memset operations
213 * @dev_id: unique device ID 249 * @dev_id: unique device ID
214 * @dev: struct device reference for dma mapping api 250 * @dev: struct device reference for dma mapping api
215 * @device_alloc_chan_resources: allocate resources and return the 251 * @device_alloc_chan_resources: allocate resources and return the
@@ -217,7 +253,9 @@ struct dma_async_tx_descriptor {
217 * @device_free_chan_resources: release DMA channel's resources 253 * @device_free_chan_resources: release DMA channel's resources
218 * @device_prep_dma_memcpy: prepares a memcpy operation 254 * @device_prep_dma_memcpy: prepares a memcpy operation
219 * @device_prep_dma_xor: prepares a xor operation 255 * @device_prep_dma_xor: prepares a xor operation
220 * @device_prep_dma_zero_sum: prepares a zero_sum operation 256 * @device_prep_dma_xor_val: prepares a xor validation operation
257 * @device_prep_dma_pq: prepares a pq operation
258 * @device_prep_dma_pq_val: prepares a pqzero_sum operation
221 * @device_prep_dma_memset: prepares a memset operation 259 * @device_prep_dma_memset: prepares a memset operation
222 * @device_prep_dma_interrupt: prepares an end of chain interrupt operation 260 * @device_prep_dma_interrupt: prepares an end of chain interrupt operation
223 * @device_prep_slave_sg: prepares a slave dma operation 261 * @device_prep_slave_sg: prepares a slave dma operation
@@ -232,7 +270,13 @@ struct dma_device {
232 struct list_head channels; 270 struct list_head channels;
233 struct list_head global_node; 271 struct list_head global_node;
234 dma_cap_mask_t cap_mask; 272 dma_cap_mask_t cap_mask;
235 int max_xor; 273 unsigned short max_xor;
274 unsigned short max_pq;
275 u8 copy_align;
276 u8 xor_align;
277 u8 pq_align;
278 u8 fill_align;
279 #define DMA_HAS_PQ_CONTINUE (1 << 15)
236 280
237 int dev_id; 281 int dev_id;
238 struct device *dev; 282 struct device *dev;
@@ -246,9 +290,17 @@ struct dma_device {
246 struct dma_async_tx_descriptor *(*device_prep_dma_xor)( 290 struct dma_async_tx_descriptor *(*device_prep_dma_xor)(
247 struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src, 291 struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
248 unsigned int src_cnt, size_t len, unsigned long flags); 292 unsigned int src_cnt, size_t len, unsigned long flags);
249 struct dma_async_tx_descriptor *(*device_prep_dma_zero_sum)( 293 struct dma_async_tx_descriptor *(*device_prep_dma_xor_val)(
250 struct dma_chan *chan, dma_addr_t *src, unsigned int src_cnt, 294 struct dma_chan *chan, dma_addr_t *src, unsigned int src_cnt,
251 size_t len, u32 *result, unsigned long flags); 295 size_t len, enum sum_check_flags *result, unsigned long flags);
296 struct dma_async_tx_descriptor *(*device_prep_dma_pq)(
297 struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
298 unsigned int src_cnt, const unsigned char *scf,
299 size_t len, unsigned long flags);
300 struct dma_async_tx_descriptor *(*device_prep_dma_pq_val)(
301 struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
302 unsigned int src_cnt, const unsigned char *scf, size_t len,
303 enum sum_check_flags *pqres, unsigned long flags);
252 struct dma_async_tx_descriptor *(*device_prep_dma_memset)( 304 struct dma_async_tx_descriptor *(*device_prep_dma_memset)(
253 struct dma_chan *chan, dma_addr_t dest, int value, size_t len, 305 struct dma_chan *chan, dma_addr_t dest, int value, size_t len,
254 unsigned long flags); 306 unsigned long flags);
@@ -267,6 +319,96 @@ struct dma_device {
267 void (*device_issue_pending)(struct dma_chan *chan); 319 void (*device_issue_pending)(struct dma_chan *chan);
268}; 320};
269 321
322static inline bool dmaengine_check_align(u8 align, size_t off1, size_t off2, size_t len)
323{
324 size_t mask;
325
326 if (!align)
327 return true;
328 mask = (1 << align) - 1;
329 if (mask & (off1 | off2 | len))
330 return false;
331 return true;
332}
333
334static inline bool is_dma_copy_aligned(struct dma_device *dev, size_t off1,
335 size_t off2, size_t len)
336{
337 return dmaengine_check_align(dev->copy_align, off1, off2, len);
338}
339
340static inline bool is_dma_xor_aligned(struct dma_device *dev, size_t off1,
341 size_t off2, size_t len)
342{
343 return dmaengine_check_align(dev->xor_align, off1, off2, len);
344}
345
346static inline bool is_dma_pq_aligned(struct dma_device *dev, size_t off1,
347 size_t off2, size_t len)
348{
349 return dmaengine_check_align(dev->pq_align, off1, off2, len);
350}
351
352static inline bool is_dma_fill_aligned(struct dma_device *dev, size_t off1,
353 size_t off2, size_t len)
354{
355 return dmaengine_check_align(dev->fill_align, off1, off2, len);
356}
357
358static inline void
359dma_set_maxpq(struct dma_device *dma, int maxpq, int has_pq_continue)
360{
361 dma->max_pq = maxpq;
362 if (has_pq_continue)
363 dma->max_pq |= DMA_HAS_PQ_CONTINUE;
364}
365
366static inline bool dmaf_continue(enum dma_ctrl_flags flags)
367{
368 return (flags & DMA_PREP_CONTINUE) == DMA_PREP_CONTINUE;
369}
370
371static inline bool dmaf_p_disabled_continue(enum dma_ctrl_flags flags)
372{
373 enum dma_ctrl_flags mask = DMA_PREP_CONTINUE | DMA_PREP_PQ_DISABLE_P;
374
375 return (flags & mask) == mask;
376}
377
378static inline bool dma_dev_has_pq_continue(struct dma_device *dma)
379{
380 return (dma->max_pq & DMA_HAS_PQ_CONTINUE) == DMA_HAS_PQ_CONTINUE;
381}
382
383static unsigned short dma_dev_to_maxpq(struct dma_device *dma)
384{
385 return dma->max_pq & ~DMA_HAS_PQ_CONTINUE;
386}
387
388/* dma_maxpq - reduce maxpq in the face of continued operations
389 * @dma - dma device with PQ capability
390 * @flags - to check if DMA_PREP_CONTINUE and DMA_PREP_PQ_DISABLE_P are set
391 *
392 * When an engine does not support native continuation we need 3 extra
393 * source slots to reuse P and Q with the following coefficients:
394 * 1/ {00} * P : remove P from Q', but use it as a source for P'
395 * 2/ {01} * Q : use Q to continue Q' calculation
396 * 3/ {00} * Q : subtract Q from P' to cancel (2)
397 *
398 * In the case where P is disabled we only need 1 extra source:
399 * 1/ {01} * Q : use Q to continue Q' calculation
400 */
401static inline int dma_maxpq(struct dma_device *dma, enum dma_ctrl_flags flags)
402{
403 if (dma_dev_has_pq_continue(dma) || !dmaf_continue(flags))
404 return dma_dev_to_maxpq(dma);
405 else if (dmaf_p_disabled_continue(flags))
406 return dma_dev_to_maxpq(dma) - 1;
407 else if (dmaf_continue(flags))
408 return dma_dev_to_maxpq(dma) - 3;
409 BUG();
410}
411
270/* --- public DMA engine API --- */ 412/* --- public DMA engine API --- */
271 413
272#ifdef CONFIG_DMA_ENGINE 414#ifdef CONFIG_DMA_ENGINE
@@ -296,7 +438,11 @@ static inline void net_dmaengine_put(void)
296#ifdef CONFIG_ASYNC_TX_DMA 438#ifdef CONFIG_ASYNC_TX_DMA
297#define async_dmaengine_get() dmaengine_get() 439#define async_dmaengine_get() dmaengine_get()
298#define async_dmaengine_put() dmaengine_put() 440#define async_dmaengine_put() dmaengine_put()
441#ifdef CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH
442#define async_dma_find_channel(type) dma_find_channel(DMA_ASYNC_TX)
443#else
299#define async_dma_find_channel(type) dma_find_channel(type) 444#define async_dma_find_channel(type) dma_find_channel(type)
445#endif /* CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH */
300#else 446#else
301static inline void async_dmaengine_get(void) 447static inline void async_dmaengine_get(void)
302{ 448{
@@ -309,7 +455,7 @@ async_dma_find_channel(enum dma_transaction_type type)
309{ 455{
310 return NULL; 456 return NULL;
311} 457}
312#endif 458#endif /* CONFIG_ASYNC_TX_DMA */
313 459
314dma_cookie_t dma_async_memcpy_buf_to_buf(struct dma_chan *chan, 460dma_cookie_t dma_async_memcpy_buf_to_buf(struct dma_chan *chan,
315 void *dest, void *src, size_t len); 461 void *dest, void *src, size_t len);
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index a3b000365795..bbeb13ceb8e8 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2515,6 +2515,16 @@
2515#define PCI_DEVICE_ID_INTEL_E7525_MCH 0x359e 2515#define PCI_DEVICE_ID_INTEL_E7525_MCH 0x359e
2516#define PCI_DEVICE_ID_INTEL_IOAT_CNB 0x360b 2516#define PCI_DEVICE_ID_INTEL_IOAT_CNB 0x360b
2517#define PCI_DEVICE_ID_INTEL_FBD_CNB 0x360c 2517#define PCI_DEVICE_ID_INTEL_FBD_CNB 0x360c
2518#define PCI_DEVICE_ID_INTEL_IOAT_JSF0 0x3710
2519#define PCI_DEVICE_ID_INTEL_IOAT_JSF1 0x3711
2520#define PCI_DEVICE_ID_INTEL_IOAT_JSF2 0x3712
2521#define PCI_DEVICE_ID_INTEL_IOAT_JSF3 0x3713
2522#define PCI_DEVICE_ID_INTEL_IOAT_JSF4 0x3714
2523#define PCI_DEVICE_ID_INTEL_IOAT_JSF5 0x3715
2524#define PCI_DEVICE_ID_INTEL_IOAT_JSF6 0x3716
2525#define PCI_DEVICE_ID_INTEL_IOAT_JSF7 0x3717
2526#define PCI_DEVICE_ID_INTEL_IOAT_JSF8 0x3718
2527#define PCI_DEVICE_ID_INTEL_IOAT_JSF9 0x3719
2518#define PCI_DEVICE_ID_INTEL_ICH10_0 0x3a14 2528#define PCI_DEVICE_ID_INTEL_ICH10_0 0x3a14
2519#define PCI_DEVICE_ID_INTEL_ICH10_1 0x3a16 2529#define PCI_DEVICE_ID_INTEL_ICH10_1 0x3a16
2520#define PCI_DEVICE_ID_INTEL_ICH10_2 0x3a18 2530#define PCI_DEVICE_ID_INTEL_ICH10_2 0x3a18