aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/crypto/async-tx-api.txt75
-rw-r--r--arch/arm/include/asm/hardware/iop3xx-adma.h5
-rw-r--r--arch/arm/mach-iop13xx/include/mach/adma.h12
-rw-r--r--arch/arm/mach-iop13xx/setup.c10
-rw-r--r--arch/arm/plat-iop/adma.c2
-rw-r--r--crypto/async_tx/Kconfig9
-rw-r--r--crypto/async_tx/Makefile3
-rw-r--r--crypto/async_tx/async_memcpy.c39
-rw-r--r--crypto/async_tx/async_memset.c38
-rw-r--r--crypto/async_tx/async_pq.c388
-rw-r--r--crypto/async_tx/async_raid6_recov.c448
-rw-r--r--crypto/async_tx/async_tx.c83
-rw-r--r--crypto/async_tx/async_xor.c199
-rw-r--r--crypto/async_tx/raid6test.c241
-rw-r--r--drivers/dma/Kconfig2
-rw-r--r--drivers/dma/dmaengine.c53
-rw-r--r--drivers/dma/dmatest.c26
-rw-r--r--drivers/dma/iop-adma.c40
-rw-r--r--drivers/md/Kconfig26
-rw-r--r--drivers/md/raid5.c1486
-rw-r--r--drivers/md/raid5.h28
-rw-r--r--include/linux/async_tx.h126
-rw-r--r--include/linux/dmaengine.h116
23 files changed, 2528 insertions, 927 deletions
diff --git a/Documentation/crypto/async-tx-api.txt b/Documentation/crypto/async-tx-api.txt
index 9f59fcbf5d8..ba046b8fa92 100644
--- a/Documentation/crypto/async-tx-api.txt
+++ b/Documentation/crypto/async-tx-api.txt
@@ -54,20 +54,23 @@ features surfaced as a result:
54 54
553.1 General format of the API: 553.1 General format of the API:
56struct dma_async_tx_descriptor * 56struct dma_async_tx_descriptor *
57async_<operation>(<op specific parameters>, 57async_<operation>(<op specific parameters>, struct async_submit ctl *submit)
58 enum async_tx_flags flags,
59 struct dma_async_tx_descriptor *dependency,
60 dma_async_tx_callback callback_routine,
61 void *callback_parameter);
62 58
633.2 Supported operations: 593.2 Supported operations:
64memcpy - memory copy between a source and a destination buffer 60memcpy - memory copy between a source and a destination buffer
65memset - fill a destination buffer with a byte value 61memset - fill a destination buffer with a byte value
66xor - xor a series of source buffers and write the result to a 62xor - xor a series of source buffers and write the result to a
67 destination buffer 63 destination buffer
68xor_zero_sum - xor a series of source buffers and set a flag if the 64xor_val - xor a series of source buffers and set a flag if the
69 result is zero. The implementation attempts to prevent 65 result is zero. The implementation attempts to prevent
70 writes to memory 66 writes to memory
67pq - generate the p+q (raid6 syndrome) from a series of source buffers
68pq_val - validate that a p and or q buffer are in sync with a given series of
69 sources
70datap - (raid6_datap_recov) recover a raid6 data block and the p block
71 from the given sources
722data - (raid6_2data_recov) recover 2 raid6 data blocks from the given
73 sources
71 74
723.3 Descriptor management: 753.3 Descriptor management:
73The return value is non-NULL and points to a 'descriptor' when the operation 76The return value is non-NULL and points to a 'descriptor' when the operation
@@ -80,8 +83,8 @@ acknowledged by the application before the offload engine driver is allowed to
80recycle (or free) the descriptor. A descriptor can be acked by one of the 83recycle (or free) the descriptor. A descriptor can be acked by one of the
81following methods: 84following methods:
821/ setting the ASYNC_TX_ACK flag if no child operations are to be submitted 851/ setting the ASYNC_TX_ACK flag if no child operations are to be submitted
832/ setting the ASYNC_TX_DEP_ACK flag to acknowledge the parent 862/ submitting an unacknowledged descriptor as a dependency to another
84 descriptor of a new operation. 87 async_tx call will implicitly set the acknowledged state.
853/ calling async_tx_ack() on the descriptor. 883/ calling async_tx_ack() on the descriptor.
86 89
873.4 When does the operation execute? 903.4 When does the operation execute?
@@ -119,30 +122,42 @@ of an operation.
119Perform a xor->copy->xor operation where each operation depends on the 122Perform a xor->copy->xor operation where each operation depends on the
120result from the previous operation: 123result from the previous operation:
121 124
122void complete_xor_copy_xor(void *param) 125void callback(void *param)
123{ 126{
124 printk("complete\n"); 127 struct completion *cmp = param;
128
129 complete(cmp);
125} 130}
126 131
127int run_xor_copy_xor(struct page **xor_srcs, 132void run_xor_copy_xor(struct page **xor_srcs,
128 int xor_src_cnt, 133 int xor_src_cnt,
129 struct page *xor_dest, 134 struct page *xor_dest,
130 size_t xor_len, 135 size_t xor_len,
131 struct page *copy_src, 136 struct page *copy_src,
132 struct page *copy_dest, 137 struct page *copy_dest,
133 size_t copy_len) 138 size_t copy_len)
134{ 139{
135 struct dma_async_tx_descriptor *tx; 140 struct dma_async_tx_descriptor *tx;
141 addr_conv_t addr_conv[xor_src_cnt];
142 struct async_submit_ctl submit;
143 addr_conv_t addr_conv[NDISKS];
144 struct completion cmp;
145
146 init_async_submit(&submit, ASYNC_TX_XOR_DROP_DST, NULL, NULL, NULL,
147 addr_conv);
148 tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len, &submit)
136 149
137 tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len, 150 submit->depend_tx = tx;
138 ASYNC_TX_XOR_DROP_DST, NULL, NULL, NULL); 151 tx = async_memcpy(copy_dest, copy_src, 0, 0, copy_len, &submit);
139 tx = async_memcpy(copy_dest, copy_src, 0, 0, copy_len, 152
140 ASYNC_TX_DEP_ACK, tx, NULL, NULL); 153 init_completion(&cmp);
141 tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len, 154 init_async_submit(&submit, ASYNC_TX_XOR_DROP_DST | ASYNC_TX_ACK, tx,
142 ASYNC_TX_XOR_DROP_DST | ASYNC_TX_DEP_ACK | ASYNC_TX_ACK, 155 callback, &cmp, addr_conv);
143 tx, complete_xor_copy_xor, NULL); 156 tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len, &submit);
144 157
145 async_tx_issue_pending_all(); 158 async_tx_issue_pending_all();
159
160 wait_for_completion(&cmp);
146} 161}
147 162
148See include/linux/async_tx.h for more information on the flags. See the 163See include/linux/async_tx.h for more information on the flags. See the
diff --git a/arch/arm/include/asm/hardware/iop3xx-adma.h b/arch/arm/include/asm/hardware/iop3xx-adma.h
index 83e6ba338e2..26eefea0231 100644
--- a/arch/arm/include/asm/hardware/iop3xx-adma.h
+++ b/arch/arm/include/asm/hardware/iop3xx-adma.h
@@ -756,13 +756,14 @@ static inline void iop_desc_set_block_fill_val(struct iop_adma_desc_slot *desc,
756 hw_desc->src[0] = val; 756 hw_desc->src[0] = val;
757} 757}
758 758
759static inline int iop_desc_get_zero_result(struct iop_adma_desc_slot *desc) 759static inline enum sum_check_flags
760iop_desc_get_zero_result(struct iop_adma_desc_slot *desc)
760{ 761{
761 struct iop3xx_desc_aau *hw_desc = desc->hw_desc; 762 struct iop3xx_desc_aau *hw_desc = desc->hw_desc;
762 struct iop3xx_aau_desc_ctrl desc_ctrl = hw_desc->desc_ctrl_field; 763 struct iop3xx_aau_desc_ctrl desc_ctrl = hw_desc->desc_ctrl_field;
763 764
764 iop_paranoia(!(desc_ctrl.tx_complete && desc_ctrl.zero_result_en)); 765 iop_paranoia(!(desc_ctrl.tx_complete && desc_ctrl.zero_result_en));
765 return desc_ctrl.zero_result_err; 766 return desc_ctrl.zero_result_err << SUM_CHECK_P;
766} 767}
767 768
768static inline void iop_chan_append(struct iop_adma_chan *chan) 769static inline void iop_chan_append(struct iop_adma_chan *chan)
diff --git a/arch/arm/mach-iop13xx/include/mach/adma.h b/arch/arm/mach-iop13xx/include/mach/adma.h
index 5722e86f217..1cd31df8924 100644
--- a/arch/arm/mach-iop13xx/include/mach/adma.h
+++ b/arch/arm/mach-iop13xx/include/mach/adma.h
@@ -428,18 +428,20 @@ static inline void iop_desc_set_block_fill_val(struct iop_adma_desc_slot *desc,
428 hw_desc->block_fill_data = val; 428 hw_desc->block_fill_data = val;
429} 429}
430 430
431static inline int iop_desc_get_zero_result(struct iop_adma_desc_slot *desc) 431static inline enum sum_check_flags
432iop_desc_get_zero_result(struct iop_adma_desc_slot *desc)
432{ 433{
433 struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; 434 struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
434 struct iop13xx_adma_desc_ctrl desc_ctrl = hw_desc->desc_ctrl_field; 435 struct iop13xx_adma_desc_ctrl desc_ctrl = hw_desc->desc_ctrl_field;
435 struct iop13xx_adma_byte_count byte_count = hw_desc->byte_count_field; 436 struct iop13xx_adma_byte_count byte_count = hw_desc->byte_count_field;
437 enum sum_check_flags flags;
436 438
437 BUG_ON(!(byte_count.tx_complete && desc_ctrl.zero_result)); 439 BUG_ON(!(byte_count.tx_complete && desc_ctrl.zero_result));
438 440
439 if (desc_ctrl.pq_xfer_en) 441 flags = byte_count.zero_result_err_q << SUM_CHECK_Q;
440 return byte_count.zero_result_err_q; 442 flags |= byte_count.zero_result_err << SUM_CHECK_P;
441 else 443
442 return byte_count.zero_result_err; 444 return flags;
443} 445}
444 446
445static inline void iop_chan_append(struct iop_adma_chan *chan) 447static inline void iop_chan_append(struct iop_adma_chan *chan)
diff --git a/arch/arm/mach-iop13xx/setup.c b/arch/arm/mach-iop13xx/setup.c
index bee42c609df..faaef95342b 100644
--- a/arch/arm/mach-iop13xx/setup.c
+++ b/arch/arm/mach-iop13xx/setup.c
@@ -478,7 +478,7 @@ void __init iop13xx_platform_init(void)
478 dma_cap_set(DMA_MEMCPY, plat_data->cap_mask); 478 dma_cap_set(DMA_MEMCPY, plat_data->cap_mask);
479 dma_cap_set(DMA_XOR, plat_data->cap_mask); 479 dma_cap_set(DMA_XOR, plat_data->cap_mask);
480 dma_cap_set(DMA_DUAL_XOR, plat_data->cap_mask); 480 dma_cap_set(DMA_DUAL_XOR, plat_data->cap_mask);
481 dma_cap_set(DMA_ZERO_SUM, plat_data->cap_mask); 481 dma_cap_set(DMA_XOR_VAL, plat_data->cap_mask);
482 dma_cap_set(DMA_MEMSET, plat_data->cap_mask); 482 dma_cap_set(DMA_MEMSET, plat_data->cap_mask);
483 dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask); 483 dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask);
484 dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask); 484 dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask);
@@ -490,7 +490,7 @@ void __init iop13xx_platform_init(void)
490 dma_cap_set(DMA_MEMCPY, plat_data->cap_mask); 490 dma_cap_set(DMA_MEMCPY, plat_data->cap_mask);
491 dma_cap_set(DMA_XOR, plat_data->cap_mask); 491 dma_cap_set(DMA_XOR, plat_data->cap_mask);
492 dma_cap_set(DMA_DUAL_XOR, plat_data->cap_mask); 492 dma_cap_set(DMA_DUAL_XOR, plat_data->cap_mask);
493 dma_cap_set(DMA_ZERO_SUM, plat_data->cap_mask); 493 dma_cap_set(DMA_XOR_VAL, plat_data->cap_mask);
494 dma_cap_set(DMA_MEMSET, plat_data->cap_mask); 494 dma_cap_set(DMA_MEMSET, plat_data->cap_mask);
495 dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask); 495 dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask);
496 dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask); 496 dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask);
@@ -502,13 +502,13 @@ void __init iop13xx_platform_init(void)
502 dma_cap_set(DMA_MEMCPY, plat_data->cap_mask); 502 dma_cap_set(DMA_MEMCPY, plat_data->cap_mask);
503 dma_cap_set(DMA_XOR, plat_data->cap_mask); 503 dma_cap_set(DMA_XOR, plat_data->cap_mask);
504 dma_cap_set(DMA_DUAL_XOR, plat_data->cap_mask); 504 dma_cap_set(DMA_DUAL_XOR, plat_data->cap_mask);
505 dma_cap_set(DMA_ZERO_SUM, plat_data->cap_mask); 505 dma_cap_set(DMA_XOR_VAL, plat_data->cap_mask);
506 dma_cap_set(DMA_MEMSET, plat_data->cap_mask); 506 dma_cap_set(DMA_MEMSET, plat_data->cap_mask);
507 dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask); 507 dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask);
508 dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask); 508 dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask);
509 dma_cap_set(DMA_PQ_XOR, plat_data->cap_mask); 509 dma_cap_set(DMA_PQ, plat_data->cap_mask);
510 dma_cap_set(DMA_PQ_UPDATE, plat_data->cap_mask); 510 dma_cap_set(DMA_PQ_UPDATE, plat_data->cap_mask);
511 dma_cap_set(DMA_PQ_ZERO_SUM, plat_data->cap_mask); 511 dma_cap_set(DMA_PQ_VAL, plat_data->cap_mask);
512 break; 512 break;
513 } 513 }
514 } 514 }
diff --git a/arch/arm/plat-iop/adma.c b/arch/arm/plat-iop/adma.c
index 3c127aabe21..da1dd0dab07 100644
--- a/arch/arm/plat-iop/adma.c
+++ b/arch/arm/plat-iop/adma.c
@@ -198,7 +198,7 @@ static int __init iop3xx_adma_cap_init(void)
198 dma_cap_set(DMA_INTERRUPT, iop3xx_aau_data.cap_mask); 198 dma_cap_set(DMA_INTERRUPT, iop3xx_aau_data.cap_mask);
199 #else 199 #else
200 dma_cap_set(DMA_XOR, iop3xx_aau_data.cap_mask); 200 dma_cap_set(DMA_XOR, iop3xx_aau_data.cap_mask);
201 dma_cap_set(DMA_ZERO_SUM, iop3xx_aau_data.cap_mask); 201 dma_cap_set(DMA_XOR_VAL, iop3xx_aau_data.cap_mask);
202 dma_cap_set(DMA_MEMSET, iop3xx_aau_data.cap_mask); 202 dma_cap_set(DMA_MEMSET, iop3xx_aau_data.cap_mask);
203 dma_cap_set(DMA_INTERRUPT, iop3xx_aau_data.cap_mask); 203 dma_cap_set(DMA_INTERRUPT, iop3xx_aau_data.cap_mask);
204 #endif 204 #endif
diff --git a/crypto/async_tx/Kconfig b/crypto/async_tx/Kconfig
index d8fb3914598..e5aeb2b79e6 100644
--- a/crypto/async_tx/Kconfig
+++ b/crypto/async_tx/Kconfig
@@ -14,3 +14,12 @@ config ASYNC_MEMSET
14 tristate 14 tristate
15 select ASYNC_CORE 15 select ASYNC_CORE
16 16
17config ASYNC_PQ
18 tristate
19 select ASYNC_CORE
20
21config ASYNC_RAID6_RECOV
22 tristate
23 select ASYNC_CORE
24 select ASYNC_PQ
25
diff --git a/crypto/async_tx/Makefile b/crypto/async_tx/Makefile
index 27baa7d52fb..d1e0e6f72bc 100644
--- a/crypto/async_tx/Makefile
+++ b/crypto/async_tx/Makefile
@@ -2,3 +2,6 @@ obj-$(CONFIG_ASYNC_CORE) += async_tx.o
2obj-$(CONFIG_ASYNC_MEMCPY) += async_memcpy.o 2obj-$(CONFIG_ASYNC_MEMCPY) += async_memcpy.o
3obj-$(CONFIG_ASYNC_MEMSET) += async_memset.o 3obj-$(CONFIG_ASYNC_MEMSET) += async_memset.o
4obj-$(CONFIG_ASYNC_XOR) += async_xor.o 4obj-$(CONFIG_ASYNC_XOR) += async_xor.o
5obj-$(CONFIG_ASYNC_PQ) += async_pq.o
6obj-$(CONFIG_ASYNC_RAID6_RECOV) += async_raid6_recov.o
7obj-$(CONFIG_ASYNC_RAID6_TEST) += raid6test.o
diff --git a/crypto/async_tx/async_memcpy.c b/crypto/async_tx/async_memcpy.c
index ddccfb01c41..98e15bd0dcb 100644
--- a/crypto/async_tx/async_memcpy.c
+++ b/crypto/async_tx/async_memcpy.c
@@ -33,28 +33,28 @@
33 * async_memcpy - attempt to copy memory with a dma engine. 33 * async_memcpy - attempt to copy memory with a dma engine.
34 * @dest: destination page 34 * @dest: destination page
35 * @src: src page 35 * @src: src page
36 * @offset: offset in pages to start transaction 36 * @dest_offset: offset into 'dest' to start transaction
37 * @src_offset: offset into 'src' to start transaction
37 * @len: length in bytes 38 * @len: length in bytes
38 * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK, 39 * @submit: submission / completion modifiers
39 * @depend_tx: memcpy depends on the result of this transaction 40 *
40 * @cb_fn: function to call when the memcpy completes 41 * honored flags: ASYNC_TX_ACK
41 * @cb_param: parameter to pass to the callback routine
42 */ 42 */
43struct dma_async_tx_descriptor * 43struct dma_async_tx_descriptor *
44async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset, 44async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
45 unsigned int src_offset, size_t len, enum async_tx_flags flags, 45 unsigned int src_offset, size_t len,
46 struct dma_async_tx_descriptor *depend_tx, 46 struct async_submit_ctl *submit)
47 dma_async_tx_callback cb_fn, void *cb_param)
48{ 47{
49 struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_MEMCPY, 48 struct dma_chan *chan = async_tx_find_channel(submit, DMA_MEMCPY,
50 &dest, 1, &src, 1, len); 49 &dest, 1, &src, 1, len);
51 struct dma_device *device = chan ? chan->device : NULL; 50 struct dma_device *device = chan ? chan->device : NULL;
52 struct dma_async_tx_descriptor *tx = NULL; 51 struct dma_async_tx_descriptor *tx = NULL;
53 52
54 if (device) { 53 if (device) {
55 dma_addr_t dma_dest, dma_src; 54 dma_addr_t dma_dest, dma_src;
56 unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0; 55 unsigned long dma_prep_flags;
57 56
57 dma_prep_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0;
58 dma_dest = dma_map_page(device->dev, dest, dest_offset, len, 58 dma_dest = dma_map_page(device->dev, dest, dest_offset, len,
59 DMA_FROM_DEVICE); 59 DMA_FROM_DEVICE);
60 60
@@ -67,13 +67,13 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
67 67
68 if (tx) { 68 if (tx) {
69 pr_debug("%s: (async) len: %zu\n", __func__, len); 69 pr_debug("%s: (async) len: %zu\n", __func__, len);
70 async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param); 70 async_tx_submit(chan, tx, submit);
71 } else { 71 } else {
72 void *dest_buf, *src_buf; 72 void *dest_buf, *src_buf;
73 pr_debug("%s: (sync) len: %zu\n", __func__, len); 73 pr_debug("%s: (sync) len: %zu\n", __func__, len);
74 74
75 /* wait for any prerequisite operations */ 75 /* wait for any prerequisite operations */
76 async_tx_quiesce(&depend_tx); 76 async_tx_quiesce(&submit->depend_tx);
77 77
78 dest_buf = kmap_atomic(dest, KM_USER0) + dest_offset; 78 dest_buf = kmap_atomic(dest, KM_USER0) + dest_offset;
79 src_buf = kmap_atomic(src, KM_USER1) + src_offset; 79 src_buf = kmap_atomic(src, KM_USER1) + src_offset;
@@ -83,26 +83,13 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
83 kunmap_atomic(dest_buf, KM_USER0); 83 kunmap_atomic(dest_buf, KM_USER0);
84 kunmap_atomic(src_buf, KM_USER1); 84 kunmap_atomic(src_buf, KM_USER1);
85 85
86 async_tx_sync_epilog(cb_fn, cb_param); 86 async_tx_sync_epilog(submit);
87 } 87 }
88 88
89 return tx; 89 return tx;
90} 90}
91EXPORT_SYMBOL_GPL(async_memcpy); 91EXPORT_SYMBOL_GPL(async_memcpy);
92 92
93static int __init async_memcpy_init(void)
94{
95 return 0;
96}
97
98static void __exit async_memcpy_exit(void)
99{
100 do { } while (0);
101}
102
103module_init(async_memcpy_init);
104module_exit(async_memcpy_exit);
105
106MODULE_AUTHOR("Intel Corporation"); 93MODULE_AUTHOR("Intel Corporation");
107MODULE_DESCRIPTION("asynchronous memcpy api"); 94MODULE_DESCRIPTION("asynchronous memcpy api");
108MODULE_LICENSE("GPL"); 95MODULE_LICENSE("GPL");
diff --git a/crypto/async_tx/async_memset.c b/crypto/async_tx/async_memset.c
index 5b5eb99bb24..b896a6e5f67 100644
--- a/crypto/async_tx/async_memset.c
+++ b/crypto/async_tx/async_memset.c
@@ -35,26 +35,23 @@
35 * @val: fill value 35 * @val: fill value
36 * @offset: offset in pages to start transaction 36 * @offset: offset in pages to start transaction
37 * @len: length in bytes 37 * @len: length in bytes
38 * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK 38 *
39 * @depend_tx: memset depends on the result of this transaction 39 * honored flags: ASYNC_TX_ACK
40 * @cb_fn: function to call when the memcpy completes
41 * @cb_param: parameter to pass to the callback routine
42 */ 40 */
43struct dma_async_tx_descriptor * 41struct dma_async_tx_descriptor *
44async_memset(struct page *dest, int val, unsigned int offset, 42async_memset(struct page *dest, int val, unsigned int offset, size_t len,
45 size_t len, enum async_tx_flags flags, 43 struct async_submit_ctl *submit)
46 struct dma_async_tx_descriptor *depend_tx,
47 dma_async_tx_callback cb_fn, void *cb_param)
48{ 44{
49 struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_MEMSET, 45 struct dma_chan *chan = async_tx_find_channel(submit, DMA_MEMSET,
50 &dest, 1, NULL, 0, len); 46 &dest, 1, NULL, 0, len);
51 struct dma_device *device = chan ? chan->device : NULL; 47 struct dma_device *device = chan ? chan->device : NULL;
52 struct dma_async_tx_descriptor *tx = NULL; 48 struct dma_async_tx_descriptor *tx = NULL;
53 49
54 if (device) { 50 if (device) {
55 dma_addr_t dma_dest; 51 dma_addr_t dma_dest;
56 unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0; 52 unsigned long dma_prep_flags;
57 53
54 dma_prep_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0;
58 dma_dest = dma_map_page(device->dev, dest, offset, len, 55 dma_dest = dma_map_page(device->dev, dest, offset, len,
59 DMA_FROM_DEVICE); 56 DMA_FROM_DEVICE);
60 57
@@ -64,38 +61,25 @@ async_memset(struct page *dest, int val, unsigned int offset,
64 61
65 if (tx) { 62 if (tx) {
66 pr_debug("%s: (async) len: %zu\n", __func__, len); 63 pr_debug("%s: (async) len: %zu\n", __func__, len);
67 async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param); 64 async_tx_submit(chan, tx, submit);
68 } else { /* run the memset synchronously */ 65 } else { /* run the memset synchronously */
69 void *dest_buf; 66 void *dest_buf;
70 pr_debug("%s: (sync) len: %zu\n", __func__, len); 67 pr_debug("%s: (sync) len: %zu\n", __func__, len);
71 68
72 dest_buf = (void *) (((char *) page_address(dest)) + offset); 69 dest_buf = page_address(dest) + offset;
73 70
74 /* wait for any prerequisite operations */ 71 /* wait for any prerequisite operations */
75 async_tx_quiesce(&depend_tx); 72 async_tx_quiesce(&submit->depend_tx);
76 73
77 memset(dest_buf, val, len); 74 memset(dest_buf, val, len);
78 75
79 async_tx_sync_epilog(cb_fn, cb_param); 76 async_tx_sync_epilog(submit);
80 } 77 }
81 78
82 return tx; 79 return tx;
83} 80}
84EXPORT_SYMBOL_GPL(async_memset); 81EXPORT_SYMBOL_GPL(async_memset);
85 82
86static int __init async_memset_init(void)
87{
88 return 0;
89}
90
91static void __exit async_memset_exit(void)
92{
93 do { } while (0);
94}
95
96module_init(async_memset_init);
97module_exit(async_memset_exit);
98
99MODULE_AUTHOR("Intel Corporation"); 83MODULE_AUTHOR("Intel Corporation");
100MODULE_DESCRIPTION("asynchronous memset api"); 84MODULE_DESCRIPTION("asynchronous memset api");
101MODULE_LICENSE("GPL"); 85MODULE_LICENSE("GPL");
diff --git a/crypto/async_tx/async_pq.c b/crypto/async_tx/async_pq.c
new file mode 100644
index 00000000000..108b21efb49
--- /dev/null
+++ b/crypto/async_tx/async_pq.c
@@ -0,0 +1,388 @@
1/*
2 * Copyright(c) 2007 Yuri Tikhonov <yur@emcraft.com>
3 * Copyright(c) 2009 Intel Corporation
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License as published by the Free
7 * Software Foundation; either version 2 of the License, or (at your option)
8 * any later version.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 59
17 * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 *
19 * The full GNU General Public License is included in this distribution in the
20 * file called COPYING.
21 */
22#include <linux/kernel.h>
23#include <linux/interrupt.h>
24#include <linux/dma-mapping.h>
25#include <linux/raid/pq.h>
26#include <linux/async_tx.h>
27
28/**
29 * scribble - space to hold throwaway P buffer for synchronous gen_syndrome
30 */
31static struct page *scribble;
32
33static bool is_raid6_zero_block(struct page *p)
34{
35 return p == (void *) raid6_empty_zero_page;
36}
37
38/* the struct page *blocks[] parameter passed to async_gen_syndrome()
39 * and async_syndrome_val() contains the 'P' destination address at
40 * blocks[disks-2] and the 'Q' destination address at blocks[disks-1]
41 *
42 * note: these are macros as they are used as lvalues
43 */
44#define P(b, d) (b[d-2])
45#define Q(b, d) (b[d-1])
46
47/**
48 * do_async_gen_syndrome - asynchronously calculate P and/or Q
49 */
50static __async_inline struct dma_async_tx_descriptor *
51do_async_gen_syndrome(struct dma_chan *chan, struct page **blocks,
52 const unsigned char *scfs, unsigned int offset, int disks,
53 size_t len, dma_addr_t *dma_src,
54 struct async_submit_ctl *submit)
55{
56 struct dma_async_tx_descriptor *tx = NULL;
57 struct dma_device *dma = chan->device;
58 enum dma_ctrl_flags dma_flags = 0;
59 enum async_tx_flags flags_orig = submit->flags;
60 dma_async_tx_callback cb_fn_orig = submit->cb_fn;
61 dma_async_tx_callback cb_param_orig = submit->cb_param;
62 int src_cnt = disks - 2;
63 unsigned char coefs[src_cnt];
64 unsigned short pq_src_cnt;
65 dma_addr_t dma_dest[2];
66 int src_off = 0;
67 int idx;
68 int i;
69
70 /* DMAs use destinations as sources, so use BIDIRECTIONAL mapping */
71 if (P(blocks, disks))
72 dma_dest[0] = dma_map_page(dma->dev, P(blocks, disks), offset,
73 len, DMA_BIDIRECTIONAL);
74 else
75 dma_flags |= DMA_PREP_PQ_DISABLE_P;
76 if (Q(blocks, disks))
77 dma_dest[1] = dma_map_page(dma->dev, Q(blocks, disks), offset,
78 len, DMA_BIDIRECTIONAL);
79 else
80 dma_flags |= DMA_PREP_PQ_DISABLE_Q;
81
82 /* convert source addresses being careful to collapse 'empty'
83 * sources and update the coefficients accordingly
84 */
85 for (i = 0, idx = 0; i < src_cnt; i++) {
86 if (is_raid6_zero_block(blocks[i]))
87 continue;
88 dma_src[idx] = dma_map_page(dma->dev, blocks[i], offset, len,
89 DMA_TO_DEVICE);
90 coefs[idx] = scfs[i];
91 idx++;
92 }
93 src_cnt = idx;
94
95 while (src_cnt > 0) {
96 submit->flags = flags_orig;
97 pq_src_cnt = min(src_cnt, dma_maxpq(dma, dma_flags));
98 /* if we are submitting additional pqs, leave the chain open,
99 * clear the callback parameters, and leave the destination
100 * buffers mapped
101 */
102 if (src_cnt > pq_src_cnt) {
103 submit->flags &= ~ASYNC_TX_ACK;
104 dma_flags |= DMA_COMPL_SKIP_DEST_UNMAP;
105 submit->cb_fn = NULL;
106 submit->cb_param = NULL;
107 } else {
108 dma_flags &= ~DMA_COMPL_SKIP_DEST_UNMAP;
109 submit->cb_fn = cb_fn_orig;
110 submit->cb_param = cb_param_orig;
111 if (cb_fn_orig)
112 dma_flags |= DMA_PREP_INTERRUPT;
113 }
114
115 /* Since we have clobbered the src_list we are committed
116 * to doing this asynchronously. Drivers force forward
117 * progress in case they can not provide a descriptor
118 */
119 for (;;) {
120 tx = dma->device_prep_dma_pq(chan, dma_dest,
121 &dma_src[src_off],
122 pq_src_cnt,
123 &coefs[src_off], len,
124 dma_flags);
125 if (likely(tx))
126 break;
127 async_tx_quiesce(&submit->depend_tx);
128 dma_async_issue_pending(chan);
129 }
130
131 async_tx_submit(chan, tx, submit);
132 submit->depend_tx = tx;
133
134 /* drop completed sources */
135 src_cnt -= pq_src_cnt;
136 src_off += pq_src_cnt;
137
138 dma_flags |= DMA_PREP_CONTINUE;
139 }
140
141 return tx;
142}
143
144/**
145 * do_sync_gen_syndrome - synchronously calculate a raid6 syndrome
146 */
147static void
148do_sync_gen_syndrome(struct page **blocks, unsigned int offset, int disks,
149 size_t len, struct async_submit_ctl *submit)
150{
151 void **srcs;
152 int i;
153
154 if (submit->scribble)
155 srcs = submit->scribble;
156 else
157 srcs = (void **) blocks;
158
159 for (i = 0; i < disks; i++) {
160 if (is_raid6_zero_block(blocks[i])) {
161 BUG_ON(i > disks - 3); /* P or Q can't be zero */
162 srcs[i] = blocks[i];
163 } else
164 srcs[i] = page_address(blocks[i]) + offset;
165 }
166 raid6_call.gen_syndrome(disks, len, srcs);
167 async_tx_sync_epilog(submit);
168}
169
170/**
171 * async_gen_syndrome - asynchronously calculate a raid6 syndrome
172 * @blocks: source blocks from idx 0..disks-3, P @ disks-2 and Q @ disks-1
173 * @offset: common offset into each block (src and dest) to start transaction
174 * @disks: number of blocks (including missing P or Q, see below)
175 * @len: length of operation in bytes
176 * @submit: submission/completion modifiers
177 *
178 * General note: This routine assumes a field of GF(2^8) with a
179 * primitive polynomial of 0x11d and a generator of {02}.
180 *
181 * 'disks' note: callers can optionally omit either P or Q (but not
182 * both) from the calculation by setting blocks[disks-2] or
183 * blocks[disks-1] to NULL. When P or Q is omitted 'len' must be <=
184 * PAGE_SIZE as a temporary buffer of this size is used in the
185 * synchronous path. 'disks' always accounts for both destination
186 * buffers.
187 *
188 * 'blocks' note: if submit->scribble is NULL then the contents of
189 * 'blocks' may be overridden
190 */
191struct dma_async_tx_descriptor *
192async_gen_syndrome(struct page **blocks, unsigned int offset, int disks,
193 size_t len, struct async_submit_ctl *submit)
194{
195 int src_cnt = disks - 2;
196 struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ,
197 &P(blocks, disks), 2,
198 blocks, src_cnt, len);
199 struct dma_device *device = chan ? chan->device : NULL;
200 dma_addr_t *dma_src = NULL;
201
202 BUG_ON(disks > 255 || !(P(blocks, disks) || Q(blocks, disks)));
203
204 if (submit->scribble)
205 dma_src = submit->scribble;
206 else if (sizeof(dma_addr_t) <= sizeof(struct page *))
207 dma_src = (dma_addr_t *) blocks;
208
209 if (dma_src && device &&
210 (src_cnt <= dma_maxpq(device, 0) ||
211 dma_maxpq(device, DMA_PREP_CONTINUE) > 0)) {
212 /* run the p+q asynchronously */
213 pr_debug("%s: (async) disks: %d len: %zu\n",
214 __func__, disks, len);
215 return do_async_gen_syndrome(chan, blocks, raid6_gfexp, offset,
216 disks, len, dma_src, submit);
217 }
218
219 /* run the pq synchronously */
220 pr_debug("%s: (sync) disks: %d len: %zu\n", __func__, disks, len);
221
222 /* wait for any prerequisite operations */
223 async_tx_quiesce(&submit->depend_tx);
224
225 if (!P(blocks, disks)) {
226 P(blocks, disks) = scribble;
227 BUG_ON(len + offset > PAGE_SIZE);
228 }
229 if (!Q(blocks, disks)) {
230 Q(blocks, disks) = scribble;
231 BUG_ON(len + offset > PAGE_SIZE);
232 }
233 do_sync_gen_syndrome(blocks, offset, disks, len, submit);
234
235 return NULL;
236}
237EXPORT_SYMBOL_GPL(async_gen_syndrome);
238
239/**
240 * async_syndrome_val - asynchronously validate a raid6 syndrome
241 * @blocks: source blocks from idx 0..disks-3, P @ disks-2 and Q @ disks-1
242 * @offset: common offset into each block (src and dest) to start transaction
243 * @disks: number of blocks (including missing P or Q, see below)
244 * @len: length of operation in bytes
245 * @pqres: on val failure SUM_CHECK_P_RESULT and/or SUM_CHECK_Q_RESULT are set
246 * @spare: temporary result buffer for the synchronous case
247 * @submit: submission / completion modifiers
248 *
249 * The same notes from async_gen_syndrome apply to the 'blocks',
250 * and 'disks' parameters of this routine. The synchronous path
251 * requires a temporary result buffer and submit->scribble to be
252 * specified.
253 */
254struct dma_async_tx_descriptor *
255async_syndrome_val(struct page **blocks, unsigned int offset, int disks,
256 size_t len, enum sum_check_flags *pqres, struct page *spare,
257 struct async_submit_ctl *submit)
258{
259 struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ_VAL,
260 NULL, 0, blocks, disks,
261 len);
262 struct dma_device *device = chan ? chan->device : NULL;
263 struct dma_async_tx_descriptor *tx;
264 enum dma_ctrl_flags dma_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0;
265 dma_addr_t *dma_src = NULL;
266
267 BUG_ON(disks < 4);
268
269 if (submit->scribble)
270 dma_src = submit->scribble;
271 else if (sizeof(dma_addr_t) <= sizeof(struct page *))
272 dma_src = (dma_addr_t *) blocks;
273
274 if (dma_src && device && disks <= dma_maxpq(device, 0)) {
275 struct device *dev = device->dev;
276 dma_addr_t *pq = &dma_src[disks-2];
277 int i;
278
279 pr_debug("%s: (async) disks: %d len: %zu\n",
280 __func__, disks, len);
281 if (!P(blocks, disks))
282 dma_flags |= DMA_PREP_PQ_DISABLE_P;
283 if (!Q(blocks, disks))
284 dma_flags |= DMA_PREP_PQ_DISABLE_Q;
285 for (i = 0; i < disks; i++)
286 if (likely(blocks[i])) {
287 BUG_ON(is_raid6_zero_block(blocks[i]));
288 dma_src[i] = dma_map_page(dev, blocks[i],
289 offset, len,
290 DMA_TO_DEVICE);
291 }
292
293 for (;;) {
294 tx = device->device_prep_dma_pq_val(chan, pq, dma_src,
295 disks - 2,
296 raid6_gfexp,
297 len, pqres,
298 dma_flags);
299 if (likely(tx))
300 break;
301 async_tx_quiesce(&submit->depend_tx);
302 dma_async_issue_pending(chan);
303 }
304 async_tx_submit(chan, tx, submit);
305
306 return tx;
307 } else {
308 struct page *p_src = P(blocks, disks);
309 struct page *q_src = Q(blocks, disks);
310 enum async_tx_flags flags_orig = submit->flags;
311 dma_async_tx_callback cb_fn_orig = submit->cb_fn;
312 void *scribble = submit->scribble;
313 void *cb_param_orig = submit->cb_param;
314 void *p, *q, *s;
315
316 pr_debug("%s: (sync) disks: %d len: %zu\n",
317 __func__, disks, len);
318
319 /* caller must provide a temporary result buffer and
320 * allow the input parameters to be preserved
321 */
322 BUG_ON(!spare || !scribble);
323
324 /* wait for any prerequisite operations */
325 async_tx_quiesce(&submit->depend_tx);
326
327 /* recompute p and/or q into the temporary buffer and then
328 * check to see the result matches the current value
329 */
330 tx = NULL;
331 *pqres = 0;
332 if (p_src) {
333 init_async_submit(submit, ASYNC_TX_XOR_ZERO_DST, NULL,
334 NULL, NULL, scribble);
335 tx = async_xor(spare, blocks, offset, disks-2, len, submit);
336 async_tx_quiesce(&tx);
337 p = page_address(p_src) + offset;
338 s = page_address(spare) + offset;
339 *pqres |= !!memcmp(p, s, len) << SUM_CHECK_P;
340 }
341
342 if (q_src) {
343 P(blocks, disks) = NULL;
344 Q(blocks, disks) = spare;
345 init_async_submit(submit, 0, NULL, NULL, NULL, scribble);
346 tx = async_gen_syndrome(blocks, offset, disks, len, submit);
347 async_tx_quiesce(&tx);
348 q = page_address(q_src) + offset;
349 s = page_address(spare) + offset;
350 *pqres |= !!memcmp(q, s, len) << SUM_CHECK_Q;
351 }
352
353 /* restore P, Q and submit */
354 P(blocks, disks) = p_src;
355 Q(blocks, disks) = q_src;
356
357 submit->cb_fn = cb_fn_orig;
358 submit->cb_param = cb_param_orig;
359 submit->flags = flags_orig;
360 async_tx_sync_epilog(submit);
361
362 return NULL;
363 }
364}
365EXPORT_SYMBOL_GPL(async_syndrome_val);
366
367static int __init async_pq_init(void)
368{
369 scribble = alloc_page(GFP_KERNEL);
370
371 if (scribble)
372 return 0;
373
374 pr_err("%s: failed to allocate required spare page\n", __func__);
375
376 return -ENOMEM;
377}
378
379static void __exit async_pq_exit(void)
380{
381 put_page(scribble);
382}
383
384module_init(async_pq_init);
385module_exit(async_pq_exit);
386
387MODULE_DESCRIPTION("asynchronous raid6 syndrome generation/validation");
388MODULE_LICENSE("GPL");
diff --git a/crypto/async_tx/async_raid6_recov.c b/crypto/async_tx/async_raid6_recov.c
new file mode 100644
index 00000000000..0c14d48c989
--- /dev/null
+++ b/crypto/async_tx/async_raid6_recov.c
@@ -0,0 +1,448 @@
1/*
2 * Asynchronous RAID-6 recovery calculations ASYNC_TX API.
3 * Copyright(c) 2009 Intel Corporation
4 *
5 * based on raid6recov.c:
6 * Copyright 2002 H. Peter Anvin
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms of the GNU General Public License as published by the Free
10 * Software Foundation; either version 2 of the License, or (at your option)
11 * any later version.
12 *
13 * This program is distributed in the hope that it will be useful, but WITHOUT
14 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
16 * more details.
17 *
18 * You should have received a copy of the GNU General Public License along with
19 * this program; if not, write to the Free Software Foundation, Inc., 51
20 * Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
21 *
22 */
23#include <linux/kernel.h>
24#include <linux/interrupt.h>
25#include <linux/dma-mapping.h>
26#include <linux/raid/pq.h>
27#include <linux/async_tx.h>
28
29static struct dma_async_tx_descriptor *
30async_sum_product(struct page *dest, struct page **srcs, unsigned char *coef,
31 size_t len, struct async_submit_ctl *submit)
32{
33 struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ,
34 &dest, 1, srcs, 2, len);
35 struct dma_device *dma = chan ? chan->device : NULL;
36 const u8 *amul, *bmul;
37 u8 ax, bx;
38 u8 *a, *b, *c;
39
40 if (dma) {
41 dma_addr_t dma_dest[2];
42 dma_addr_t dma_src[2];
43 struct device *dev = dma->dev;
44 struct dma_async_tx_descriptor *tx;
45 enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P;
46
47 dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL);
48 dma_src[0] = dma_map_page(dev, srcs[0], 0, len, DMA_TO_DEVICE);
49 dma_src[1] = dma_map_page(dev, srcs[1], 0, len, DMA_TO_DEVICE);
50 tx = dma->device_prep_dma_pq(chan, dma_dest, dma_src, 2, coef,
51 len, dma_flags);
52 if (tx) {
53 async_tx_submit(chan, tx, submit);
54 return tx;
55 }
56 }
57
58 /* run the operation synchronously */
59 async_tx_quiesce(&submit->depend_tx);
60 amul = raid6_gfmul[coef[0]];
61 bmul = raid6_gfmul[coef[1]];
62 a = page_address(srcs[0]);
63 b = page_address(srcs[1]);
64 c = page_address(dest);
65
66 while (len--) {
67 ax = amul[*a++];
68 bx = bmul[*b++];
69 *c++ = ax ^ bx;
70 }
71
72 return NULL;
73}
74
75static struct dma_async_tx_descriptor *
76async_mult(struct page *dest, struct page *src, u8 coef, size_t len,
77 struct async_submit_ctl *submit)
78{
79 struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ,
80 &dest, 1, &src, 1, len);
81 struct dma_device *dma = chan ? chan->device : NULL;
82 const u8 *qmul; /* Q multiplier table */
83 u8 *d, *s;
84
85 if (dma) {
86 dma_addr_t dma_dest[2];
87 dma_addr_t dma_src[1];
88 struct device *dev = dma->dev;
89 struct dma_async_tx_descriptor *tx;
90 enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P;
91
92 dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL);
93 dma_src[0] = dma_map_page(dev, src, 0, len, DMA_TO_DEVICE);
94 tx = dma->device_prep_dma_pq(chan, dma_dest, dma_src, 1, &coef,
95 len, dma_flags);
96 if (tx) {
97 async_tx_submit(chan, tx, submit);
98 return tx;
99 }
100 }
101
102 /* no channel available, or failed to allocate a descriptor, so
103 * perform the operation synchronously
104 */
105 async_tx_quiesce(&submit->depend_tx);
106 qmul = raid6_gfmul[coef];
107 d = page_address(dest);
108 s = page_address(src);
109
110 while (len--)
111 *d++ = qmul[*s++];
112
113 return NULL;
114}
115
116static struct dma_async_tx_descriptor *
117__2data_recov_4(size_t bytes, int faila, int failb, struct page **blocks,
118 struct async_submit_ctl *submit)
119{
120 struct dma_async_tx_descriptor *tx = NULL;
121 struct page *p, *q, *a, *b;
122 struct page *srcs[2];
123 unsigned char coef[2];
124 enum async_tx_flags flags = submit->flags;
125 dma_async_tx_callback cb_fn = submit->cb_fn;
126 void *cb_param = submit->cb_param;
127 void *scribble = submit->scribble;
128
129 p = blocks[4-2];
130 q = blocks[4-1];
131
132 a = blocks[faila];
133 b = blocks[failb];
134
135 /* in the 4 disk case P + Pxy == P and Q + Qxy == Q */
136 /* Dx = A*(P+Pxy) + B*(Q+Qxy) */
137 srcs[0] = p;
138 srcs[1] = q;
139 coef[0] = raid6_gfexi[failb-faila];
140 coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]];
141 init_async_submit(submit, 0, tx, NULL, NULL, scribble);
142 tx = async_sum_product(b, srcs, coef, bytes, submit);
143
144 /* Dy = P+Pxy+Dx */
145 srcs[0] = p;
146 srcs[1] = b;
147 init_async_submit(submit, flags | ASYNC_TX_XOR_ZERO_DST, tx, cb_fn,
148 cb_param, scribble);
149 tx = async_xor(a, srcs, 0, 2, bytes, submit);
150
151 return tx;
152
153}
154
155static struct dma_async_tx_descriptor *
156__2data_recov_5(size_t bytes, int faila, int failb, struct page **blocks,
157 struct async_submit_ctl *submit)
158{
159 struct dma_async_tx_descriptor *tx = NULL;
160 struct page *p, *q, *g, *dp, *dq;
161 struct page *srcs[2];
162 unsigned char coef[2];
163 enum async_tx_flags flags = submit->flags;
164 dma_async_tx_callback cb_fn = submit->cb_fn;
165 void *cb_param = submit->cb_param;
166 void *scribble = submit->scribble;
167 int uninitialized_var(good);
168 int i;
169
170 for (i = 0; i < 3; i++) {
171 if (i == faila || i == failb)
172 continue;
173 else {
174 good = i;
175 break;
176 }
177 }
178 BUG_ON(i >= 3);
179
180 p = blocks[5-2];
181 q = blocks[5-1];
182 g = blocks[good];
183
184 /* Compute syndrome with zero for the missing data pages
185 * Use the dead data pages as temporary storage for delta p and
186 * delta q
187 */
188 dp = blocks[faila];
189 dq = blocks[failb];
190
191 init_async_submit(submit, 0, tx, NULL, NULL, scribble);
192 tx = async_memcpy(dp, g, 0, 0, bytes, submit);
193 init_async_submit(submit, 0, tx, NULL, NULL, scribble);
194 tx = async_mult(dq, g, raid6_gfexp[good], bytes, submit);
195
196 /* compute P + Pxy */
197 srcs[0] = dp;
198 srcs[1] = p;
199 init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL,
200 scribble);
201 tx = async_xor(dp, srcs, 0, 2, bytes, submit);
202
203 /* compute Q + Qxy */
204 srcs[0] = dq;
205 srcs[1] = q;
206 init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL,
207 scribble);
208 tx = async_xor(dq, srcs, 0, 2, bytes, submit);
209
210 /* Dx = A*(P+Pxy) + B*(Q+Qxy) */
211 srcs[0] = dp;
212 srcs[1] = dq;
213 coef[0] = raid6_gfexi[failb-faila];
214 coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]];
215 init_async_submit(submit, 0, tx, NULL, NULL, scribble);
216 tx = async_sum_product(dq, srcs, coef, bytes, submit);
217
218 /* Dy = P+Pxy+Dx */
219 srcs[0] = dp;
220 srcs[1] = dq;
221 init_async_submit(submit, flags | ASYNC_TX_XOR_DROP_DST, tx, cb_fn,
222 cb_param, scribble);
223 tx = async_xor(dp, srcs, 0, 2, bytes, submit);
224
225 return tx;
226}
227
228static struct dma_async_tx_descriptor *
229__2data_recov_n(int disks, size_t bytes, int faila, int failb,
230 struct page **blocks, struct async_submit_ctl *submit)
231{
232 struct dma_async_tx_descriptor *tx = NULL;
233 struct page *p, *q, *dp, *dq;
234 struct page *srcs[2];
235 unsigned char coef[2];
236 enum async_tx_flags flags = submit->flags;
237 dma_async_tx_callback cb_fn = submit->cb_fn;
238 void *cb_param = submit->cb_param;
239 void *scribble = submit->scribble;
240
241 p = blocks[disks-2];
242 q = blocks[disks-1];
243
244 /* Compute syndrome with zero for the missing data pages
245 * Use the dead data pages as temporary storage for
246 * delta p and delta q
247 */
248 dp = blocks[faila];
249 blocks[faila] = (void *)raid6_empty_zero_page;
250 blocks[disks-2] = dp;
251 dq = blocks[failb];
252 blocks[failb] = (void *)raid6_empty_zero_page;
253 blocks[disks-1] = dq;
254
255 init_async_submit(submit, 0, tx, NULL, NULL, scribble);
256 tx = async_gen_syndrome(blocks, 0, disks, bytes, submit);
257
258 /* Restore pointer table */
259 blocks[faila] = dp;
260 blocks[failb] = dq;
261 blocks[disks-2] = p;
262 blocks[disks-1] = q;
263
264 /* compute P + Pxy */
265 srcs[0] = dp;
266 srcs[1] = p;
267 init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL,
268 scribble);
269 tx = async_xor(dp, srcs, 0, 2, bytes, submit);
270
271 /* compute Q + Qxy */
272 srcs[0] = dq;
273 srcs[1] = q;
274 init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL,
275 scribble);
276 tx = async_xor(dq, srcs, 0, 2, bytes, submit);
277
278 /* Dx = A*(P+Pxy) + B*(Q+Qxy) */
279 srcs[0] = dp;
280 srcs[1] = dq;
281 coef[0] = raid6_gfexi[failb-faila];
282 coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]];
283 init_async_submit(submit, 0, tx, NULL, NULL, scribble);
284 tx = async_sum_product(dq, srcs, coef, bytes, submit);
285
286 /* Dy = P+Pxy+Dx */
287 srcs[0] = dp;
288 srcs[1] = dq;
289 init_async_submit(submit, flags | ASYNC_TX_XOR_DROP_DST, tx, cb_fn,
290 cb_param, scribble);
291 tx = async_xor(dp, srcs, 0, 2, bytes, submit);
292
293 return tx;
294}
295
296/**
297 * async_raid6_2data_recov - asynchronously calculate two missing data blocks
298 * @disks: number of disks in the RAID-6 array
299 * @bytes: block size
300 * @faila: first failed drive index
301 * @failb: second failed drive index
302 * @blocks: array of source pointers where the last two entries are p and q
303 * @submit: submission/completion modifiers
304 */
305struct dma_async_tx_descriptor *
306async_raid6_2data_recov(int disks, size_t bytes, int faila, int failb,
307 struct page **blocks, struct async_submit_ctl *submit)
308{
309 BUG_ON(faila == failb);
310 if (failb < faila)
311 swap(faila, failb);
312
313 pr_debug("%s: disks: %d len: %zu\n", __func__, disks, bytes);
314
315 /* we need to preserve the contents of 'blocks' for the async
316 * case, so punt to synchronous if a scribble buffer is not available
317 */
318 if (!submit->scribble) {
319 void **ptrs = (void **) blocks;
320 int i;
321
322 async_tx_quiesce(&submit->depend_tx);
323 for (i = 0; i < disks; i++)
324 ptrs[i] = page_address(blocks[i]);
325
326 raid6_2data_recov(disks, bytes, faila, failb, ptrs);
327
328 async_tx_sync_epilog(submit);
329
330 return NULL;
331 }
332
333 switch (disks) {
334 case 4:
335 /* dma devices do not uniformly understand a zero source pq
336 * operation (in contrast to the synchronous case), so
337 * explicitly handle the 4 disk special case
338 */
339 return __2data_recov_4(bytes, faila, failb, blocks, submit);
340 case 5:
341 /* dma devices do not uniformly understand a single
342 * source pq operation (in contrast to the synchronous
343 * case), so explicitly handle the 5 disk special case
344 */
345 return __2data_recov_5(bytes, faila, failb, blocks, submit);
346 default:
347 return __2data_recov_n(disks, bytes, faila, failb, blocks, submit);
348 }
349}
350EXPORT_SYMBOL_GPL(async_raid6_2data_recov);
351
352/**
353 * async_raid6_datap_recov - asynchronously calculate a data and the 'p' block
354 * @disks: number of disks in the RAID-6 array
355 * @bytes: block size
356 * @faila: failed drive index
357 * @blocks: array of source pointers where the last two entries are p and q
358 * @submit: submission/completion modifiers
359 */
360struct dma_async_tx_descriptor *
361async_raid6_datap_recov(int disks, size_t bytes, int faila,
362 struct page **blocks, struct async_submit_ctl *submit)
363{
364 struct dma_async_tx_descriptor *tx = NULL;
365 struct page *p, *q, *dq;
366 u8 coef;
367 enum async_tx_flags flags = submit->flags;
368 dma_async_tx_callback cb_fn = submit->cb_fn;
369 void *cb_param = submit->cb_param;
370 void *scribble = submit->scribble;
371 struct page *srcs[2];
372
373 pr_debug("%s: disks: %d len: %zu\n", __func__, disks, bytes);
374
375 /* we need to preserve the contents of 'blocks' for the async
376 * case, so punt to synchronous if a scribble buffer is not available
377 */
378 if (!scribble) {
379 void **ptrs = (void **) blocks;
380 int i;
381
382 async_tx_quiesce(&submit->depend_tx);
383 for (i = 0; i < disks; i++)
384 ptrs[i] = page_address(blocks[i]);
385
386 raid6_datap_recov(disks, bytes, faila, ptrs);
387
388 async_tx_sync_epilog(submit);
389
390 return NULL;
391 }
392
393 p = blocks[disks-2];
394 q = blocks[disks-1];
395
396 /* Compute syndrome with zero for the missing data page
397 * Use the dead data page as temporary storage for delta q
398 */
399 dq = blocks[faila];
400 blocks[faila] = (void *)raid6_empty_zero_page;
401 blocks[disks-1] = dq;
402
403 /* in the 4 disk case we only need to perform a single source
404 * multiplication
405 */
406 if (disks == 4) {
407 int good = faila == 0 ? 1 : 0;
408 struct page *g = blocks[good];
409
410 init_async_submit(submit, 0, tx, NULL, NULL, scribble);
411 tx = async_memcpy(p, g, 0, 0, bytes, submit);
412
413 init_async_submit(submit, 0, tx, NULL, NULL, scribble);
414 tx = async_mult(dq, g, raid6_gfexp[good], bytes, submit);
415 } else {
416 init_async_submit(submit, 0, tx, NULL, NULL, scribble);
417 tx = async_gen_syndrome(blocks, 0, disks, bytes, submit);
418 }
419
420 /* Restore pointer table */
421 blocks[faila] = dq;
422 blocks[disks-1] = q;
423
424 /* calculate g^{-faila} */
425 coef = raid6_gfinv[raid6_gfexp[faila]];
426
427 srcs[0] = dq;
428 srcs[1] = q;
429 init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL,
430 scribble);
431 tx = async_xor(dq, srcs, 0, 2, bytes, submit);
432
433 init_async_submit(submit, 0, tx, NULL, NULL, scribble);
434 tx = async_mult(dq, dq, coef, bytes, submit);
435
436 srcs[0] = p;
437 srcs[1] = dq;
438 init_async_submit(submit, flags | ASYNC_TX_XOR_DROP_DST, tx, cb_fn,
439 cb_param, scribble);
440 tx = async_xor(p, srcs, 0, 2, bytes, submit);
441
442 return tx;
443}
444EXPORT_SYMBOL_GPL(async_raid6_datap_recov);
445
446MODULE_AUTHOR("Dan Williams <dan.j.williams@intel.com>");
447MODULE_DESCRIPTION("asynchronous RAID-6 recovery api");
448MODULE_LICENSE("GPL");
diff --git a/crypto/async_tx/async_tx.c b/crypto/async_tx/async_tx.c
index 06eb6cc09fe..60615fedcf5 100644
--- a/crypto/async_tx/async_tx.c
+++ b/crypto/async_tx/async_tx.c
@@ -42,16 +42,21 @@ static void __exit async_tx_exit(void)
42 async_dmaengine_put(); 42 async_dmaengine_put();
43} 43}
44 44
45module_init(async_tx_init);
46module_exit(async_tx_exit);
47
45/** 48/**
46 * __async_tx_find_channel - find a channel to carry out the operation or let 49 * __async_tx_find_channel - find a channel to carry out the operation or let
47 * the transaction execute synchronously 50 * the transaction execute synchronously
48 * @depend_tx: transaction dependency 51 * @submit: transaction dependency and submission modifiers
49 * @tx_type: transaction type 52 * @tx_type: transaction type
50 */ 53 */
51struct dma_chan * 54struct dma_chan *
52__async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx, 55__async_tx_find_channel(struct async_submit_ctl *submit,
53 enum dma_transaction_type tx_type) 56 enum dma_transaction_type tx_type)
54{ 57{
58 struct dma_async_tx_descriptor *depend_tx = submit->depend_tx;
59
55 /* see if we can keep the chain on one channel */ 60 /* see if we can keep the chain on one channel */
56 if (depend_tx && 61 if (depend_tx &&
57 dma_has_cap(tx_type, depend_tx->chan->device->cap_mask)) 62 dma_has_cap(tx_type, depend_tx->chan->device->cap_mask))
@@ -59,17 +64,6 @@ __async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx,
59 return async_dma_find_channel(tx_type); 64 return async_dma_find_channel(tx_type);
60} 65}
61EXPORT_SYMBOL_GPL(__async_tx_find_channel); 66EXPORT_SYMBOL_GPL(__async_tx_find_channel);
62#else
63static int __init async_tx_init(void)
64{
65 printk(KERN_INFO "async_tx: api initialized (sync-only)\n");
66 return 0;
67}
68
69static void __exit async_tx_exit(void)
70{
71 do { } while (0);
72}
73#endif 67#endif
74 68
75 69
@@ -83,8 +77,8 @@ static void
83async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx, 77async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx,
84 struct dma_async_tx_descriptor *tx) 78 struct dma_async_tx_descriptor *tx)
85{ 79{
86 struct dma_chan *chan; 80 struct dma_chan *chan = depend_tx->chan;
87 struct dma_device *device; 81 struct dma_device *device = chan->device;
88 struct dma_async_tx_descriptor *intr_tx = (void *) ~0; 82 struct dma_async_tx_descriptor *intr_tx = (void *) ~0;
89 83
90 /* first check to see if we can still append to depend_tx */ 84 /* first check to see if we can still append to depend_tx */
@@ -96,11 +90,11 @@ async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx,
96 } 90 }
97 spin_unlock_bh(&depend_tx->lock); 91 spin_unlock_bh(&depend_tx->lock);
98 92
99 if (!intr_tx) 93 /* attached dependency, flush the parent channel */
94 if (!intr_tx) {
95 device->device_issue_pending(chan);
100 return; 96 return;
101 97 }
102 chan = depend_tx->chan;
103 device = chan->device;
104 98
105 /* see if we can schedule an interrupt 99 /* see if we can schedule an interrupt
106 * otherwise poll for completion 100 * otherwise poll for completion
@@ -134,6 +128,7 @@ async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx,
134 intr_tx->tx_submit(intr_tx); 128 intr_tx->tx_submit(intr_tx);
135 async_tx_ack(intr_tx); 129 async_tx_ack(intr_tx);
136 } 130 }
131 device->device_issue_pending(chan);
137 } else { 132 } else {
138 if (dma_wait_for_async_tx(depend_tx) == DMA_ERROR) 133 if (dma_wait_for_async_tx(depend_tx) == DMA_ERROR)
139 panic("%s: DMA_ERROR waiting for depend_tx\n", 134 panic("%s: DMA_ERROR waiting for depend_tx\n",
@@ -144,13 +139,14 @@ async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx,
144 139
145 140
146/** 141/**
147 * submit_disposition - while holding depend_tx->lock we must avoid submitting 142 * submit_disposition - flags for routing an incoming operation
148 * new operations to prevent a circular locking dependency with
149 * drivers that already hold a channel lock when calling
150 * async_tx_run_dependencies.
151 * @ASYNC_TX_SUBMITTED: we were able to append the new operation under the lock 143 * @ASYNC_TX_SUBMITTED: we were able to append the new operation under the lock
152 * @ASYNC_TX_CHANNEL_SWITCH: when the lock is dropped schedule a channel switch 144 * @ASYNC_TX_CHANNEL_SWITCH: when the lock is dropped schedule a channel switch
153 * @ASYNC_TX_DIRECT_SUBMIT: when the lock is dropped submit directly 145 * @ASYNC_TX_DIRECT_SUBMIT: when the lock is dropped submit directly
146 *
147 * while holding depend_tx->lock we must avoid submitting new operations
148 * to prevent a circular locking dependency with drivers that already
149 * hold a channel lock when calling async_tx_run_dependencies.
154 */ 150 */
155enum submit_disposition { 151enum submit_disposition {
156 ASYNC_TX_SUBMITTED, 152 ASYNC_TX_SUBMITTED,
@@ -160,11 +156,12 @@ enum submit_disposition {
160 156
161void 157void
162async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx, 158async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx,
163 enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx, 159 struct async_submit_ctl *submit)
164 dma_async_tx_callback cb_fn, void *cb_param)
165{ 160{
166 tx->callback = cb_fn; 161 struct dma_async_tx_descriptor *depend_tx = submit->depend_tx;
167 tx->callback_param = cb_param; 162
163 tx->callback = submit->cb_fn;
164 tx->callback_param = submit->cb_param;
168 165
169 if (depend_tx) { 166 if (depend_tx) {
170 enum submit_disposition s; 167 enum submit_disposition s;
@@ -220,30 +217,29 @@ async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx,
220 tx->tx_submit(tx); 217 tx->tx_submit(tx);
221 } 218 }
222 219
223 if (flags & ASYNC_TX_ACK) 220 if (submit->flags & ASYNC_TX_ACK)
224 async_tx_ack(tx); 221 async_tx_ack(tx);
225 222
226 if (depend_tx && (flags & ASYNC_TX_DEP_ACK)) 223 if (depend_tx)
227 async_tx_ack(depend_tx); 224 async_tx_ack(depend_tx);
228} 225}
229EXPORT_SYMBOL_GPL(async_tx_submit); 226EXPORT_SYMBOL_GPL(async_tx_submit);
230 227
231/** 228/**
232 * async_trigger_callback - schedules the callback function to be run after 229 * async_trigger_callback - schedules the callback function to be run
233 * any dependent operations have been completed. 230 * @submit: submission and completion parameters
234 * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK 231 *
235 * @depend_tx: 'callback' requires the completion of this transaction 232 * honored flags: ASYNC_TX_ACK
236 * @cb_fn: function to call after depend_tx completes 233 *
237 * @cb_param: parameter to pass to the callback routine 234 * The callback is run after any dependent operations have completed.
238 */ 235 */
239struct dma_async_tx_descriptor * 236struct dma_async_tx_descriptor *
240async_trigger_callback(enum async_tx_flags flags, 237async_trigger_callback(struct async_submit_ctl *submit)
241 struct dma_async_tx_descriptor *depend_tx,
242 dma_async_tx_callback cb_fn, void *cb_param)
243{ 238{
244 struct dma_chan *chan; 239 struct dma_chan *chan;
245 struct dma_device *device; 240 struct dma_device *device;
246 struct dma_async_tx_descriptor *tx; 241 struct dma_async_tx_descriptor *tx;
242 struct dma_async_tx_descriptor *depend_tx = submit->depend_tx;
247 243
248 if (depend_tx) { 244 if (depend_tx) {
249 chan = depend_tx->chan; 245 chan = depend_tx->chan;
@@ -262,14 +258,14 @@ async_trigger_callback(enum async_tx_flags flags,
262 if (tx) { 258 if (tx) {
263 pr_debug("%s: (async)\n", __func__); 259 pr_debug("%s: (async)\n", __func__);
264 260
265 async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param); 261 async_tx_submit(chan, tx, submit);
266 } else { 262 } else {
267 pr_debug("%s: (sync)\n", __func__); 263 pr_debug("%s: (sync)\n", __func__);
268 264
269 /* wait for any prerequisite operations */ 265 /* wait for any prerequisite operations */
270 async_tx_quiesce(&depend_tx); 266 async_tx_quiesce(&submit->depend_tx);
271 267
272 async_tx_sync_epilog(cb_fn, cb_param); 268 async_tx_sync_epilog(submit);
273 } 269 }
274 270
275 return tx; 271 return tx;
@@ -295,9 +291,6 @@ void async_tx_quiesce(struct dma_async_tx_descriptor **tx)
295} 291}
296EXPORT_SYMBOL_GPL(async_tx_quiesce); 292EXPORT_SYMBOL_GPL(async_tx_quiesce);
297 293
298module_init(async_tx_init);
299module_exit(async_tx_exit);
300
301MODULE_AUTHOR("Intel Corporation"); 294MODULE_AUTHOR("Intel Corporation");
302MODULE_DESCRIPTION("Asynchronous Bulk Memory Transactions API"); 295MODULE_DESCRIPTION("Asynchronous Bulk Memory Transactions API");
303MODULE_LICENSE("GPL"); 296MODULE_LICENSE("GPL");
diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c
index 95fe2c8d6c5..56b5f98da46 100644
--- a/crypto/async_tx/async_xor.c
+++ b/crypto/async_tx/async_xor.c
@@ -33,19 +33,16 @@
33/* do_async_xor - dma map the pages and perform the xor with an engine */ 33/* do_async_xor - dma map the pages and perform the xor with an engine */
34static __async_inline struct dma_async_tx_descriptor * 34static __async_inline struct dma_async_tx_descriptor *
35do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list, 35do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
36 unsigned int offset, int src_cnt, size_t len, 36 unsigned int offset, int src_cnt, size_t len, dma_addr_t *dma_src,
37 enum async_tx_flags flags, 37 struct async_submit_ctl *submit)
38 struct dma_async_tx_descriptor *depend_tx,
39 dma_async_tx_callback cb_fn, void *cb_param)
40{ 38{
41 struct dma_device *dma = chan->device; 39 struct dma_device *dma = chan->device;
42 dma_addr_t *dma_src = (dma_addr_t *) src_list;
43 struct dma_async_tx_descriptor *tx = NULL; 40 struct dma_async_tx_descriptor *tx = NULL;
44 int src_off = 0; 41 int src_off = 0;
45 int i; 42 int i;
46 dma_async_tx_callback _cb_fn; 43 dma_async_tx_callback cb_fn_orig = submit->cb_fn;
47 void *_cb_param; 44 void *cb_param_orig = submit->cb_param;
48 enum async_tx_flags async_flags; 45 enum async_tx_flags flags_orig = submit->flags;
49 enum dma_ctrl_flags dma_flags; 46 enum dma_ctrl_flags dma_flags;
50 int xor_src_cnt; 47 int xor_src_cnt;
51 dma_addr_t dma_dest; 48 dma_addr_t dma_dest;
@@ -63,23 +60,23 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
63 } 60 }
64 61
65 while (src_cnt) { 62 while (src_cnt) {
66 async_flags = flags; 63 submit->flags = flags_orig;
67 dma_flags = 0; 64 dma_flags = 0;
68 xor_src_cnt = min(src_cnt, dma->max_xor); 65 xor_src_cnt = min(src_cnt, (int)dma->max_xor);
69 /* if we are submitting additional xors, leave the chain open, 66 /* if we are submitting additional xors, leave the chain open,
70 * clear the callback parameters, and leave the destination 67 * clear the callback parameters, and leave the destination
71 * buffer mapped 68 * buffer mapped
72 */ 69 */
73 if (src_cnt > xor_src_cnt) { 70 if (src_cnt > xor_src_cnt) {
74 async_flags &= ~ASYNC_TX_ACK; 71 submit->flags &= ~ASYNC_TX_ACK;
75 dma_flags = DMA_COMPL_SKIP_DEST_UNMAP; 72 dma_flags = DMA_COMPL_SKIP_DEST_UNMAP;
76 _cb_fn = NULL; 73 submit->cb_fn = NULL;
77 _cb_param = NULL; 74 submit->cb_param = NULL;
78 } else { 75 } else {
79 _cb_fn = cb_fn; 76 submit->cb_fn = cb_fn_orig;
80 _cb_param = cb_param; 77 submit->cb_param = cb_param_orig;
81 } 78 }
82 if (_cb_fn) 79 if (submit->cb_fn)
83 dma_flags |= DMA_PREP_INTERRUPT; 80 dma_flags |= DMA_PREP_INTERRUPT;
84 81
85 /* Since we have clobbered the src_list we are committed 82 /* Since we have clobbered the src_list we are committed
@@ -90,7 +87,7 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
90 xor_src_cnt, len, dma_flags); 87 xor_src_cnt, len, dma_flags);
91 88
92 if (unlikely(!tx)) 89 if (unlikely(!tx))
93 async_tx_quiesce(&depend_tx); 90 async_tx_quiesce(&submit->depend_tx);
94 91
95 /* spin wait for the preceeding transactions to complete */ 92 /* spin wait for the preceeding transactions to complete */
96 while (unlikely(!tx)) { 93 while (unlikely(!tx)) {
@@ -101,11 +98,8 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
101 dma_flags); 98 dma_flags);
102 } 99 }
103 100
104 async_tx_submit(chan, tx, async_flags, depend_tx, _cb_fn, 101 async_tx_submit(chan, tx, submit);
105 _cb_param); 102 submit->depend_tx = tx;
106
107 depend_tx = tx;
108 flags |= ASYNC_TX_DEP_ACK;
109 103
110 if (src_cnt > xor_src_cnt) { 104 if (src_cnt > xor_src_cnt) {
111 /* drop completed sources */ 105 /* drop completed sources */
@@ -124,23 +118,27 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
124 118
125static void 119static void
126do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset, 120do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset,
127 int src_cnt, size_t len, enum async_tx_flags flags, 121 int src_cnt, size_t len, struct async_submit_ctl *submit)
128 dma_async_tx_callback cb_fn, void *cb_param)
129{ 122{
130 int i; 123 int i;
131 int xor_src_cnt; 124 int xor_src_cnt;
132 int src_off = 0; 125 int src_off = 0;
133 void *dest_buf; 126 void *dest_buf;
134 void **srcs = (void **) src_list; 127 void **srcs;
128
129 if (submit->scribble)
130 srcs = submit->scribble;
131 else
132 srcs = (void **) src_list;
135 133
136 /* reuse the 'src_list' array to convert to buffer pointers */ 134 /* convert to buffer pointers */
137 for (i = 0; i < src_cnt; i++) 135 for (i = 0; i < src_cnt; i++)
138 srcs[i] = page_address(src_list[i]) + offset; 136 srcs[i] = page_address(src_list[i]) + offset;
139 137
140 /* set destination address */ 138 /* set destination address */
141 dest_buf = page_address(dest) + offset; 139 dest_buf = page_address(dest) + offset;
142 140
143 if (flags & ASYNC_TX_XOR_ZERO_DST) 141 if (submit->flags & ASYNC_TX_XOR_ZERO_DST)
144 memset(dest_buf, 0, len); 142 memset(dest_buf, 0, len);
145 143
146 while (src_cnt > 0) { 144 while (src_cnt > 0) {
@@ -153,61 +151,70 @@ do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset,
153 src_off += xor_src_cnt; 151 src_off += xor_src_cnt;
154 } 152 }
155 153
156 async_tx_sync_epilog(cb_fn, cb_param); 154 async_tx_sync_epilog(submit);
157} 155}
158 156
159/** 157/**
160 * async_xor - attempt to xor a set of blocks with a dma engine. 158 * async_xor - attempt to xor a set of blocks with a dma engine.
161 * xor_blocks always uses the dest as a source so the ASYNC_TX_XOR_ZERO_DST
162 * flag must be set to not include dest data in the calculation. The
163 * assumption with dma eninges is that they only use the destination
164 * buffer as a source when it is explicity specified in the source list.
165 * @dest: destination page 159 * @dest: destination page
166 * @src_list: array of source pages (if the dest is also a source it must be 160 * @src_list: array of source pages
167 * at index zero). The contents of this array may be overwritten. 161 * @offset: common src/dst offset to start transaction
168 * @offset: offset in pages to start transaction
169 * @src_cnt: number of source pages 162 * @src_cnt: number of source pages
170 * @len: length in bytes 163 * @len: length in bytes
171 * @flags: ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_XOR_DROP_DEST, 164 * @submit: submission / completion modifiers
172 * ASYNC_TX_ACK, ASYNC_TX_DEP_ACK 165 *
173 * @depend_tx: xor depends on the result of this transaction. 166 * honored flags: ASYNC_TX_ACK, ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_XOR_DROP_DST
174 * @cb_fn: function to call when the xor completes 167 *
175 * @cb_param: parameter to pass to the callback routine 168 * xor_blocks always uses the dest as a source so the
169 * ASYNC_TX_XOR_ZERO_DST flag must be set to not include dest data in
170 * the calculation. The assumption with dma eninges is that they only
171 * use the destination buffer as a source when it is explicity specified
172 * in the source list.
173 *
174 * src_list note: if the dest is also a source it must be at index zero.
175 * The contents of this array will be overwritten if a scribble region
176 * is not specified.
176 */ 177 */
177struct dma_async_tx_descriptor * 178struct dma_async_tx_descriptor *
178async_xor(struct page *dest, struct page **src_list, unsigned int offset, 179async_xor(struct page *dest, struct page **src_list, unsigned int offset,
179 int src_cnt, size_t len, enum async_tx_flags flags, 180 int src_cnt, size_t len, struct async_submit_ctl *submit)
180 struct dma_async_tx_descriptor *depend_tx,
181 dma_async_tx_callback cb_fn, void *cb_param)
182{ 181{
183 struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_XOR, 182 struct dma_chan *chan = async_tx_find_channel(submit, DMA_XOR,
184 &dest, 1, src_list, 183 &dest, 1, src_list,
185 src_cnt, len); 184 src_cnt, len);
185 dma_addr_t *dma_src = NULL;
186
186 BUG_ON(src_cnt <= 1); 187 BUG_ON(src_cnt <= 1);
187 188
188 if (chan) { 189 if (submit->scribble)
190 dma_src = submit->scribble;
191 else if (sizeof(dma_addr_t) <= sizeof(struct page *))
192 dma_src = (dma_addr_t *) src_list;
193
194 if (dma_src && chan) {
189 /* run the xor asynchronously */ 195 /* run the xor asynchronously */
190 pr_debug("%s (async): len: %zu\n", __func__, len); 196 pr_debug("%s (async): len: %zu\n", __func__, len);
191 197
192 return do_async_xor(chan, dest, src_list, offset, src_cnt, len, 198 return do_async_xor(chan, dest, src_list, offset, src_cnt, len,
193 flags, depend_tx, cb_fn, cb_param); 199 dma_src, submit);
194 } else { 200 } else {
195 /* run the xor synchronously */ 201 /* run the xor synchronously */
196 pr_debug("%s (sync): len: %zu\n", __func__, len); 202 pr_debug("%s (sync): len: %zu\n", __func__, len);
203 WARN_ONCE(chan, "%s: no space for dma address conversion\n",
204 __func__);
197 205
198 /* in the sync case the dest is an implied source 206 /* in the sync case the dest is an implied source
199 * (assumes the dest is the first source) 207 * (assumes the dest is the first source)
200 */ 208 */
201 if (flags & ASYNC_TX_XOR_DROP_DST) { 209 if (submit->flags & ASYNC_TX_XOR_DROP_DST) {
202 src_cnt--; 210 src_cnt--;
203 src_list++; 211 src_list++;
204 } 212 }
205 213
206 /* wait for any prerequisite operations */ 214 /* wait for any prerequisite operations */
207 async_tx_quiesce(&depend_tx); 215 async_tx_quiesce(&submit->depend_tx);
208 216
209 do_sync_xor(dest, src_list, offset, src_cnt, len, 217 do_sync_xor(dest, src_list, offset, src_cnt, len, submit);
210 flags, cb_fn, cb_param);
211 218
212 return NULL; 219 return NULL;
213 } 220 }
@@ -222,104 +229,90 @@ static int page_is_zero(struct page *p, unsigned int offset, size_t len)
222} 229}
223 230
224/** 231/**
225 * async_xor_zero_sum - attempt a xor parity check with a dma engine. 232 * async_xor_val - attempt a xor parity check with a dma engine.
226 * @dest: destination page used if the xor is performed synchronously 233 * @dest: destination page used if the xor is performed synchronously
227 * @src_list: array of source pages. The dest page must be listed as a source 234 * @src_list: array of source pages
228 * at index zero. The contents of this array may be overwritten.
229 * @offset: offset in pages to start transaction 235 * @offset: offset in pages to start transaction
230 * @src_cnt: number of source pages 236 * @src_cnt: number of source pages
231 * @len: length in bytes 237 * @len: length in bytes
232 * @result: 0 if sum == 0 else non-zero 238 * @result: 0 if sum == 0 else non-zero
233 * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK 239 * @submit: submission / completion modifiers
234 * @depend_tx: xor depends on the result of this transaction. 240 *
235 * @cb_fn: function to call when the xor completes 241 * honored flags: ASYNC_TX_ACK
236 * @cb_param: parameter to pass to the callback routine 242 *
243 * src_list note: if the dest is also a source it must be at index zero.
244 * The contents of this array will be overwritten if a scribble region
245 * is not specified.
237 */ 246 */
238struct dma_async_tx_descriptor * 247struct dma_async_tx_descriptor *
239async_xor_zero_sum(struct page *dest, struct page **src_list, 248async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,
240 unsigned int offset, int src_cnt, size_t len, 249 int src_cnt, size_t len, enum sum_check_flags *result,
241 u32 *result, enum async_tx_flags flags, 250 struct async_submit_ctl *submit)
242 struct dma_async_tx_descriptor *depend_tx,
243 dma_async_tx_callback cb_fn, void *cb_param)
244{ 251{
245 struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_ZERO_SUM, 252 struct dma_chan *chan = async_tx_find_channel(submit, DMA_XOR_VAL,
246 &dest, 1, src_list, 253 &dest, 1, src_list,
247 src_cnt, len); 254 src_cnt, len);
248 struct dma_device *device = chan ? chan->device : NULL; 255 struct dma_device *device = chan ? chan->device : NULL;
249 struct dma_async_tx_descriptor *tx = NULL; 256 struct dma_async_tx_descriptor *tx = NULL;
257 dma_addr_t *dma_src = NULL;
250 258
251 BUG_ON(src_cnt <= 1); 259 BUG_ON(src_cnt <= 1);
252 260
253 if (device && src_cnt <= device->max_xor) { 261 if (submit->scribble)
254 dma_addr_t *dma_src = (dma_addr_t *) src_list; 262 dma_src = submit->scribble;
255 unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0; 263 else if (sizeof(dma_addr_t) <= sizeof(struct page *))
264 dma_src = (dma_addr_t *) src_list;
265
266 if (dma_src && device && src_cnt <= device->max_xor) {
267 unsigned long dma_prep_flags;
256 int i; 268 int i;
257 269
258 pr_debug("%s: (async) len: %zu\n", __func__, len); 270 pr_debug("%s: (async) len: %zu\n", __func__, len);
259 271
272 dma_prep_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0;
260 for (i = 0; i < src_cnt; i++) 273 for (i = 0; i < src_cnt; i++)
261 dma_src[i] = dma_map_page(device->dev, src_list[i], 274 dma_src[i] = dma_map_page(device->dev, src_list[i],
262 offset, len, DMA_TO_DEVICE); 275 offset, len, DMA_TO_DEVICE);
263 276
264 tx = device->device_prep_dma_zero_sum(chan, dma_src, src_cnt, 277 tx = device->device_prep_dma_xor_val(chan, dma_src, src_cnt,
265 len, result, 278 len, result,
266 dma_prep_flags); 279 dma_prep_flags);
267 if (unlikely(!tx)) { 280 if (unlikely(!tx)) {
268 async_tx_quiesce(&depend_tx); 281 async_tx_quiesce(&submit->depend_tx);
269 282
270 while (!tx) { 283 while (!tx) {
271 dma_async_issue_pending(chan); 284 dma_async_issue_pending(chan);
272 tx = device->device_prep_dma_zero_sum(chan, 285 tx = device->device_prep_dma_xor_val(chan,
273 dma_src, src_cnt, len, result, 286 dma_src, src_cnt, len, result,
274 dma_prep_flags); 287 dma_prep_flags);
275 } 288 }
276 } 289 }
277 290
278 async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param); 291 async_tx_submit(chan, tx, submit);
279 } else { 292 } else {
280 unsigned long xor_flags = flags; 293 enum async_tx_flags flags_orig = submit->flags;
281 294
282 pr_debug("%s: (sync) len: %zu\n", __func__, len); 295 pr_debug("%s: (sync) len: %zu\n", __func__, len);
296 WARN_ONCE(device && src_cnt <= device->max_xor,
297 "%s: no space for dma address conversion\n",
298 __func__);
283 299
284 xor_flags |= ASYNC_TX_XOR_DROP_DST; 300 submit->flags |= ASYNC_TX_XOR_DROP_DST;
285 xor_flags &= ~ASYNC_TX_ACK; 301 submit->flags &= ~ASYNC_TX_ACK;
286 302
287 tx = async_xor(dest, src_list, offset, src_cnt, len, xor_flags, 303 tx = async_xor(dest, src_list, offset, src_cnt, len, submit);
288 depend_tx, NULL, NULL);
289 304
290 async_tx_quiesce(&tx); 305 async_tx_quiesce(&tx);
291 306
292 *result = page_is_zero(dest, offset, len) ? 0 : 1; 307 *result = !page_is_zero(dest, offset, len) << SUM_CHECK_P;
293 308
294 async_tx_sync_epilog(cb_fn, cb_param); 309 async_tx_sync_epilog(submit);
310 submit->flags = flags_orig;
295 } 311 }
296 312
297 return tx; 313 return tx;
298} 314}
299EXPORT_SYMBOL_GPL(async_xor_zero_sum); 315EXPORT_SYMBOL_GPL(async_xor_val);
300
301static int __init async_xor_init(void)
302{
303 #ifdef CONFIG_DMA_ENGINE
304 /* To conserve stack space the input src_list (array of page pointers)
305 * is reused to hold the array of dma addresses passed to the driver.
306 * This conversion is only possible when dma_addr_t is less than the
307 * the size of a pointer. HIGHMEM64G is known to violate this
308 * assumption.
309 */
310 BUILD_BUG_ON(sizeof(dma_addr_t) > sizeof(struct page *));
311 #endif
312
313 return 0;
314}
315
316static void __exit async_xor_exit(void)
317{
318 do { } while (0);
319}
320
321module_init(async_xor_init);
322module_exit(async_xor_exit);
323 316
324MODULE_AUTHOR("Intel Corporation"); 317MODULE_AUTHOR("Intel Corporation");
325MODULE_DESCRIPTION("asynchronous xor/xor-zero-sum api"); 318MODULE_DESCRIPTION("asynchronous xor/xor-zero-sum api");
diff --git a/crypto/async_tx/raid6test.c b/crypto/async_tx/raid6test.c
new file mode 100644
index 00000000000..98c83ca96c8
--- /dev/null
+++ b/crypto/async_tx/raid6test.c
@@ -0,0 +1,241 @@
1/*
2 * asynchronous raid6 recovery self test
3 * Copyright (c) 2009, Intel Corporation.
4 *
5 * based on drivers/md/raid6test/test.c:
6 * Copyright 2002-2007 H. Peter Anvin
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 *
17 * You should have received a copy of the GNU General Public License along with
18 * this program; if not, write to the Free Software Foundation, Inc.,
19 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 */
22#include <linux/async_tx.h>
23#include <linux/random.h>
24
25#undef pr
26#define pr(fmt, args...) pr_info("raid6test: " fmt, ##args)
27
28#define NDISKS 16 /* Including P and Q */
29
30static struct page *dataptrs[NDISKS];
31static struct page *data[NDISKS+3];
32static struct page *spare;
33static struct page *recovi;
34static struct page *recovj;
35
36static void callback(void *param)
37{
38 struct completion *cmp = param;
39
40 complete(cmp);
41}
42
43static void makedata(int disks)
44{
45 int i, j;
46
47 for (i = 0; i < disks; i++) {
48 for (j = 0; j < PAGE_SIZE/sizeof(u32); j += sizeof(u32)) {
49 u32 *p = page_address(data[i]) + j;
50
51 *p = random32();
52 }
53
54 dataptrs[i] = data[i];
55 }
56}
57
58static char disk_type(int d, int disks)
59{
60 if (d == disks - 2)
61 return 'P';
62 else if (d == disks - 1)
63 return 'Q';
64 else
65 return 'D';
66}
67
68/* Recover two failed blocks. */
69static void raid6_dual_recov(int disks, size_t bytes, int faila, int failb, struct page **ptrs)
70{
71 struct async_submit_ctl submit;
72 addr_conv_t addr_conv[disks];
73 struct completion cmp;
74 struct dma_async_tx_descriptor *tx = NULL;
75 enum sum_check_flags result = ~0;
76
77 if (faila > failb)
78 swap(faila, failb);
79
80 if (failb == disks-1) {
81 if (faila == disks-2) {
82 /* P+Q failure. Just rebuild the syndrome. */
83 init_async_submit(&submit, 0, NULL, NULL, NULL, addr_conv);
84 tx = async_gen_syndrome(ptrs, 0, disks, bytes, &submit);
85 } else {
86 struct page *blocks[disks];
87 struct page *dest;
88 int count = 0;
89 int i;
90
91 /* data+Q failure. Reconstruct data from P,
92 * then rebuild syndrome
93 */
94 for (i = disks; i-- ; ) {
95 if (i == faila || i == failb)
96 continue;
97 blocks[count++] = ptrs[i];
98 }
99 dest = ptrs[faila];
100 init_async_submit(&submit, ASYNC_TX_XOR_ZERO_DST, NULL,
101 NULL, NULL, addr_conv);
102 tx = async_xor(dest, blocks, 0, count, bytes, &submit);
103
104 init_async_submit(&submit, 0, tx, NULL, NULL, addr_conv);
105 tx = async_gen_syndrome(ptrs, 0, disks, bytes, &submit);
106 }
107 } else {
108 if (failb == disks-2) {
109 /* data+P failure. */
110 init_async_submit(&submit, 0, NULL, NULL, NULL, addr_conv);
111 tx = async_raid6_datap_recov(disks, bytes, faila, ptrs, &submit);
112 } else {
113 /* data+data failure. */
114 init_async_submit(&submit, 0, NULL, NULL, NULL, addr_conv);
115 tx = async_raid6_2data_recov(disks, bytes, faila, failb, ptrs, &submit);
116 }
117 }
118 init_completion(&cmp);
119 init_async_submit(&submit, ASYNC_TX_ACK, tx, callback, &cmp, addr_conv);
120 tx = async_syndrome_val(ptrs, 0, disks, bytes, &result, spare, &submit);
121 async_tx_issue_pending(tx);
122
123 if (wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)) == 0)
124 pr("%s: timeout! (faila: %d failb: %d disks: %d)\n",
125 __func__, faila, failb, disks);
126
127 if (result != 0)
128 pr("%s: validation failure! faila: %d failb: %d sum_check_flags: %x\n",
129 __func__, faila, failb, result);
130}
131
132static int test_disks(int i, int j, int disks)
133{
134 int erra, errb;
135
136 memset(page_address(recovi), 0xf0, PAGE_SIZE);
137 memset(page_address(recovj), 0xba, PAGE_SIZE);
138
139 dataptrs[i] = recovi;
140 dataptrs[j] = recovj;
141
142 raid6_dual_recov(disks, PAGE_SIZE, i, j, dataptrs);
143
144 erra = memcmp(page_address(data[i]), page_address(recovi), PAGE_SIZE);
145 errb = memcmp(page_address(data[j]), page_address(recovj), PAGE_SIZE);
146
147 pr("%s(%d, %d): faila=%3d(%c) failb=%3d(%c) %s\n",
148 __func__, i, j, i, disk_type(i, disks), j, disk_type(j, disks),
149 (!erra && !errb) ? "OK" : !erra ? "ERRB" : !errb ? "ERRA" : "ERRAB");
150
151 dataptrs[i] = data[i];
152 dataptrs[j] = data[j];
153
154 return erra || errb;
155}
156
157static int test(int disks, int *tests)
158{
159 addr_conv_t addr_conv[disks];
160 struct dma_async_tx_descriptor *tx;
161 struct async_submit_ctl submit;
162 struct completion cmp;
163 int err = 0;
164 int i, j;
165
166 recovi = data[disks];
167 recovj = data[disks+1];
168 spare = data[disks+2];
169
170 makedata(disks);
171
172 /* Nuke syndromes */
173 memset(page_address(data[disks-2]), 0xee, PAGE_SIZE);
174 memset(page_address(data[disks-1]), 0xee, PAGE_SIZE);
175
176 /* Generate assumed good syndrome */
177 init_completion(&cmp);
178 init_async_submit(&submit, ASYNC_TX_ACK, NULL, callback, &cmp, addr_conv);
179 tx = async_gen_syndrome(dataptrs, 0, disks, PAGE_SIZE, &submit);
180 async_tx_issue_pending(tx);
181
182 if (wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)) == 0) {
183 pr("error: initial gen_syndrome(%d) timed out\n", disks);
184 return 1;
185 }
186
187 pr("testing the %d-disk case...\n", disks);
188 for (i = 0; i < disks-1; i++)
189 for (j = i+1; j < disks; j++) {
190 (*tests)++;
191 err += test_disks(i, j, disks);
192 }
193
194 return err;
195}
196
197
198static int raid6_test(void)
199{
200 int err = 0;
201 int tests = 0;
202 int i;
203
204 for (i = 0; i < NDISKS+3; i++) {
205 data[i] = alloc_page(GFP_KERNEL);
206 if (!data[i]) {
207 while (i--)
208 put_page(data[i]);
209 return -ENOMEM;
210 }
211 }
212
213 /* the 4-disk and 5-disk cases are special for the recovery code */
214 if (NDISKS > 4)
215 err += test(4, &tests);
216 if (NDISKS > 5)
217 err += test(5, &tests);
218 err += test(NDISKS, &tests);
219
220 pr("\n");
221 pr("complete (%d tests, %d failure%s)\n",
222 tests, err, err == 1 ? "" : "s");
223
224 for (i = 0; i < NDISKS+3; i++)
225 put_page(data[i]);
226
227 return 0;
228}
229
230static void raid6_test_exit(void)
231{
232}
233
234/* when compiled-in wait for drivers to load first (assumes dma drivers
235 * are also compliled-in)
236 */
237late_initcall(raid6_test);
238module_exit(raid6_test_exit);
239MODULE_AUTHOR("Dan Williams <dan.j.williams@intel.com>");
240MODULE_DESCRIPTION("asynchronous RAID-6 recovery self tests");
241MODULE_LICENSE("GPL");
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index 3b3c01b6f1e..912a51b5cbd 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -4,7 +4,7 @@
4 4
5menuconfig DMADEVICES 5menuconfig DMADEVICES
6 bool "DMA Engine support" 6 bool "DMA Engine support"
7 depends on !HIGHMEM64G && HAS_DMA 7 depends on HAS_DMA
8 help 8 help
9 DMA engines can do asynchronous data transfers without 9 DMA engines can do asynchronous data transfers without
10 involving the host CPU. Currently, this framework can be 10 involving the host CPU. Currently, this framework can be
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index 5a87384ea4f..96598479eec 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -644,8 +644,12 @@ int dma_async_device_register(struct dma_device *device)
644 !device->device_prep_dma_memcpy); 644 !device->device_prep_dma_memcpy);
645 BUG_ON(dma_has_cap(DMA_XOR, device->cap_mask) && 645 BUG_ON(dma_has_cap(DMA_XOR, device->cap_mask) &&
646 !device->device_prep_dma_xor); 646 !device->device_prep_dma_xor);
647 BUG_ON(dma_has_cap(DMA_ZERO_SUM, device->cap_mask) && 647 BUG_ON(dma_has_cap(DMA_XOR_VAL, device->cap_mask) &&
648 !device->device_prep_dma_zero_sum); 648 !device->device_prep_dma_xor_val);
649 BUG_ON(dma_has_cap(DMA_PQ, device->cap_mask) &&
650 !device->device_prep_dma_pq);
651 BUG_ON(dma_has_cap(DMA_PQ_VAL, device->cap_mask) &&
652 !device->device_prep_dma_pq_val);
649 BUG_ON(dma_has_cap(DMA_MEMSET, device->cap_mask) && 653 BUG_ON(dma_has_cap(DMA_MEMSET, device->cap_mask) &&
650 !device->device_prep_dma_memset); 654 !device->device_prep_dma_memset);
651 BUG_ON(dma_has_cap(DMA_INTERRUPT, device->cap_mask) && 655 BUG_ON(dma_has_cap(DMA_INTERRUPT, device->cap_mask) &&
@@ -939,49 +943,24 @@ EXPORT_SYMBOL(dma_async_tx_descriptor_init);
939 943
940/* dma_wait_for_async_tx - spin wait for a transaction to complete 944/* dma_wait_for_async_tx - spin wait for a transaction to complete
941 * @tx: in-flight transaction to wait on 945 * @tx: in-flight transaction to wait on
942 *
943 * This routine assumes that tx was obtained from a call to async_memcpy,
944 * async_xor, async_memset, etc which ensures that tx is "in-flight" (prepped
945 * and submitted). Walking the parent chain is only meant to cover for DMA
946 * drivers that do not implement the DMA_INTERRUPT capability and may race with
947 * the driver's descriptor cleanup routine.
948 */ 946 */
949enum dma_status 947enum dma_status
950dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx) 948dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx)
951{ 949{
952 enum dma_status status; 950 unsigned long dma_sync_wait_timeout = jiffies + msecs_to_jiffies(5000);
953 struct dma_async_tx_descriptor *iter;
954 struct dma_async_tx_descriptor *parent;
955 951
956 if (!tx) 952 if (!tx)
957 return DMA_SUCCESS; 953 return DMA_SUCCESS;
958 954
959 WARN_ONCE(tx->parent, "%s: speculatively walking dependency chain for" 955 while (tx->cookie == -EBUSY) {
960 " %s\n", __func__, dma_chan_name(tx->chan)); 956 if (time_after_eq(jiffies, dma_sync_wait_timeout)) {
961 957 pr_err("%s timeout waiting for descriptor submission\n",
962 /* poll through the dependency chain, return when tx is complete */ 958 __func__);
963 do { 959 return DMA_ERROR;
964 iter = tx; 960 }
965 961 cpu_relax();
966 /* find the root of the unsubmitted dependency chain */ 962 }
967 do { 963 return dma_sync_wait(tx->chan, tx->cookie);
968 parent = iter->parent;
969 if (!parent)
970 break;
971 else
972 iter = parent;
973 } while (parent);
974
975 /* there is a small window for ->parent == NULL and
976 * ->cookie == -EBUSY
977 */
978 while (iter->cookie == -EBUSY)
979 cpu_relax();
980
981 status = dma_sync_wait(iter->chan, iter->cookie);
982 } while (status == DMA_IN_PROGRESS || (iter != tx));
983
984 return status;
985} 964}
986EXPORT_SYMBOL_GPL(dma_wait_for_async_tx); 965EXPORT_SYMBOL_GPL(dma_wait_for_async_tx);
987 966
diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c
index fb7da5141e9..58e49e41c7a 100644
--- a/drivers/dma/dmatest.c
+++ b/drivers/dma/dmatest.c
@@ -43,6 +43,11 @@ module_param(xor_sources, uint, S_IRUGO);
43MODULE_PARM_DESC(xor_sources, 43MODULE_PARM_DESC(xor_sources,
44 "Number of xor source buffers (default: 3)"); 44 "Number of xor source buffers (default: 3)");
45 45
46static unsigned int pq_sources = 3;
47module_param(pq_sources, uint, S_IRUGO);
48MODULE_PARM_DESC(pq_sources,
49 "Number of p+q source buffers (default: 3)");
50
46/* 51/*
47 * Initialization patterns. All bytes in the source buffer has bit 7 52 * Initialization patterns. All bytes in the source buffer has bit 7
48 * set, all bytes in the destination buffer has bit 7 cleared. 53 * set, all bytes in the destination buffer has bit 7 cleared.
@@ -227,6 +232,7 @@ static int dmatest_func(void *data)
227 dma_cookie_t cookie; 232 dma_cookie_t cookie;
228 enum dma_status status; 233 enum dma_status status;
229 enum dma_ctrl_flags flags; 234 enum dma_ctrl_flags flags;
235 u8 pq_coefs[pq_sources];
230 int ret; 236 int ret;
231 int src_cnt; 237 int src_cnt;
232 int dst_cnt; 238 int dst_cnt;
@@ -243,6 +249,11 @@ static int dmatest_func(void *data)
243 else if (thread->type == DMA_XOR) { 249 else if (thread->type == DMA_XOR) {
244 src_cnt = xor_sources | 1; /* force odd to ensure dst = src */ 250 src_cnt = xor_sources | 1; /* force odd to ensure dst = src */
245 dst_cnt = 1; 251 dst_cnt = 1;
252 } else if (thread->type == DMA_PQ) {
253 src_cnt = pq_sources | 1; /* force odd to ensure dst = src */
254 dst_cnt = 2;
255 for (i = 0; i < pq_sources; i++)
256 pq_coefs[i] = 1;
246 } else 257 } else
247 goto err_srcs; 258 goto err_srcs;
248 259
@@ -310,6 +321,15 @@ static int dmatest_func(void *data)
310 dma_dsts[0] + dst_off, 321 dma_dsts[0] + dst_off,
311 dma_srcs, xor_sources, 322 dma_srcs, xor_sources,
312 len, flags); 323 len, flags);
324 else if (thread->type == DMA_PQ) {
325 dma_addr_t dma_pq[dst_cnt];
326
327 for (i = 0; i < dst_cnt; i++)
328 dma_pq[i] = dma_dsts[i] + dst_off;
329 tx = dev->device_prep_dma_pq(chan, dma_pq, dma_srcs,
330 pq_sources, pq_coefs,
331 len, flags);
332 }
313 333
314 if (!tx) { 334 if (!tx) {
315 for (i = 0; i < src_cnt; i++) 335 for (i = 0; i < src_cnt; i++)
@@ -446,6 +466,8 @@ static int dmatest_add_threads(struct dmatest_chan *dtc, enum dma_transaction_ty
446 op = "copy"; 466 op = "copy";
447 else if (type == DMA_XOR) 467 else if (type == DMA_XOR)
448 op = "xor"; 468 op = "xor";
469 else if (type == DMA_PQ)
470 op = "pq";
449 else 471 else
450 return -EINVAL; 472 return -EINVAL;
451 473
@@ -501,6 +523,10 @@ static int dmatest_add_channel(struct dma_chan *chan)
501 cnt = dmatest_add_threads(dtc, DMA_XOR); 523 cnt = dmatest_add_threads(dtc, DMA_XOR);
502 thread_count += cnt > 0 ?: 0; 524 thread_count += cnt > 0 ?: 0;
503 } 525 }
526 if (dma_has_cap(DMA_PQ, dma_dev->cap_mask)) {
527 cnt = dmatest_add_threads(dtc, DMA_PQ);
528 thread_count += cnt > 0 ?: 0;
529 }
504 530
505 pr_info("dmatest: Started %u threads using %s\n", 531 pr_info("dmatest: Started %u threads using %s\n",
506 thread_count, dma_chan_name(chan)); 532 thread_count, dma_chan_name(chan));
diff --git a/drivers/dma/iop-adma.c b/drivers/dma/iop-adma.c
index 2f052265122..4496bc60666 100644
--- a/drivers/dma/iop-adma.c
+++ b/drivers/dma/iop-adma.c
@@ -660,9 +660,9 @@ iop_adma_prep_dma_xor(struct dma_chan *chan, dma_addr_t dma_dest,
660} 660}
661 661
662static struct dma_async_tx_descriptor * 662static struct dma_async_tx_descriptor *
663iop_adma_prep_dma_zero_sum(struct dma_chan *chan, dma_addr_t *dma_src, 663iop_adma_prep_dma_xor_val(struct dma_chan *chan, dma_addr_t *dma_src,
664 unsigned int src_cnt, size_t len, u32 *result, 664 unsigned int src_cnt, size_t len, u32 *result,
665 unsigned long flags) 665 unsigned long flags)
666{ 666{
667 struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); 667 struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
668 struct iop_adma_desc_slot *sw_desc, *grp_start; 668 struct iop_adma_desc_slot *sw_desc, *grp_start;
@@ -906,7 +906,7 @@ out:
906 906
907#define IOP_ADMA_NUM_SRC_TEST 4 /* must be <= 15 */ 907#define IOP_ADMA_NUM_SRC_TEST 4 /* must be <= 15 */
908static int __devinit 908static int __devinit
909iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device) 909iop_adma_xor_val_self_test(struct iop_adma_device *device)
910{ 910{
911 int i, src_idx; 911 int i, src_idx;
912 struct page *dest; 912 struct page *dest;
@@ -1002,7 +1002,7 @@ iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device)
1002 PAGE_SIZE, DMA_TO_DEVICE); 1002 PAGE_SIZE, DMA_TO_DEVICE);
1003 1003
1004 /* skip zero sum if the capability is not present */ 1004 /* skip zero sum if the capability is not present */
1005 if (!dma_has_cap(DMA_ZERO_SUM, dma_chan->device->cap_mask)) 1005 if (!dma_has_cap(DMA_XOR_VAL, dma_chan->device->cap_mask))
1006 goto free_resources; 1006 goto free_resources;
1007 1007
1008 /* zero sum the sources with the destintation page */ 1008 /* zero sum the sources with the destintation page */
@@ -1016,10 +1016,10 @@ iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device)
1016 dma_srcs[i] = dma_map_page(dma_chan->device->dev, 1016 dma_srcs[i] = dma_map_page(dma_chan->device->dev,
1017 zero_sum_srcs[i], 0, PAGE_SIZE, 1017 zero_sum_srcs[i], 0, PAGE_SIZE,
1018 DMA_TO_DEVICE); 1018 DMA_TO_DEVICE);
1019 tx = iop_adma_prep_dma_zero_sum(dma_chan, dma_srcs, 1019 tx = iop_adma_prep_dma_xor_val(dma_chan, dma_srcs,
1020 IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE, 1020 IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE,
1021 &zero_sum_result, 1021 &zero_sum_result,
1022 DMA_PREP_INTERRUPT | DMA_CTRL_ACK); 1022 DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
1023 1023
1024 cookie = iop_adma_tx_submit(tx); 1024 cookie = iop_adma_tx_submit(tx);
1025 iop_adma_issue_pending(dma_chan); 1025 iop_adma_issue_pending(dma_chan);
@@ -1072,10 +1072,10 @@ iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device)
1072 dma_srcs[i] = dma_map_page(dma_chan->device->dev, 1072 dma_srcs[i] = dma_map_page(dma_chan->device->dev,
1073 zero_sum_srcs[i], 0, PAGE_SIZE, 1073 zero_sum_srcs[i], 0, PAGE_SIZE,
1074 DMA_TO_DEVICE); 1074 DMA_TO_DEVICE);
1075 tx = iop_adma_prep_dma_zero_sum(dma_chan, dma_srcs, 1075 tx = iop_adma_prep_dma_xor_val(dma_chan, dma_srcs,
1076 IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE, 1076 IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE,
1077 &zero_sum_result, 1077 &zero_sum_result,
1078 DMA_PREP_INTERRUPT | DMA_CTRL_ACK); 1078 DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
1079 1079
1080 cookie = iop_adma_tx_submit(tx); 1080 cookie = iop_adma_tx_submit(tx);
1081 iop_adma_issue_pending(dma_chan); 1081 iop_adma_issue_pending(dma_chan);
@@ -1192,9 +1192,9 @@ static int __devinit iop_adma_probe(struct platform_device *pdev)
1192 dma_dev->max_xor = iop_adma_get_max_xor(); 1192 dma_dev->max_xor = iop_adma_get_max_xor();
1193 dma_dev->device_prep_dma_xor = iop_adma_prep_dma_xor; 1193 dma_dev->device_prep_dma_xor = iop_adma_prep_dma_xor;
1194 } 1194 }
1195 if (dma_has_cap(DMA_ZERO_SUM, dma_dev->cap_mask)) 1195 if (dma_has_cap(DMA_XOR_VAL, dma_dev->cap_mask))
1196 dma_dev->device_prep_dma_zero_sum = 1196 dma_dev->device_prep_dma_xor_val =
1197 iop_adma_prep_dma_zero_sum; 1197 iop_adma_prep_dma_xor_val;
1198 if (dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask)) 1198 if (dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask))
1199 dma_dev->device_prep_dma_interrupt = 1199 dma_dev->device_prep_dma_interrupt =
1200 iop_adma_prep_dma_interrupt; 1200 iop_adma_prep_dma_interrupt;
@@ -1249,7 +1249,7 @@ static int __devinit iop_adma_probe(struct platform_device *pdev)
1249 1249
1250 if (dma_has_cap(DMA_XOR, dma_dev->cap_mask) || 1250 if (dma_has_cap(DMA_XOR, dma_dev->cap_mask) ||
1251 dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) { 1251 dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) {
1252 ret = iop_adma_xor_zero_sum_self_test(adev); 1252 ret = iop_adma_xor_val_self_test(adev);
1253 dev_dbg(&pdev->dev, "xor self test returned %d\n", ret); 1253 dev_dbg(&pdev->dev, "xor self test returned %d\n", ret);
1254 if (ret) 1254 if (ret)
1255 goto err_free_iop_chan; 1255 goto err_free_iop_chan;
@@ -1257,12 +1257,12 @@ static int __devinit iop_adma_probe(struct platform_device *pdev)
1257 1257
1258 dev_printk(KERN_INFO, &pdev->dev, "Intel(R) IOP: " 1258 dev_printk(KERN_INFO, &pdev->dev, "Intel(R) IOP: "
1259 "( %s%s%s%s%s%s%s%s%s%s)\n", 1259 "( %s%s%s%s%s%s%s%s%s%s)\n",
1260 dma_has_cap(DMA_PQ_XOR, dma_dev->cap_mask) ? "pq_xor " : "", 1260 dma_has_cap(DMA_PQ, dma_dev->cap_mask) ? "pq " : "",
1261 dma_has_cap(DMA_PQ_UPDATE, dma_dev->cap_mask) ? "pq_update " : "", 1261 dma_has_cap(DMA_PQ_UPDATE, dma_dev->cap_mask) ? "pq_update " : "",
1262 dma_has_cap(DMA_PQ_ZERO_SUM, dma_dev->cap_mask) ? "pq_zero_sum " : "", 1262 dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask) ? "pq_val " : "",
1263 dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "", 1263 dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "",
1264 dma_has_cap(DMA_DUAL_XOR, dma_dev->cap_mask) ? "dual_xor " : "", 1264 dma_has_cap(DMA_DUAL_XOR, dma_dev->cap_mask) ? "dual_xor " : "",
1265 dma_has_cap(DMA_ZERO_SUM, dma_dev->cap_mask) ? "xor_zero_sum " : "", 1265 dma_has_cap(DMA_XOR_VAL, dma_dev->cap_mask) ? "xor_val " : "",
1266 dma_has_cap(DMA_MEMSET, dma_dev->cap_mask) ? "fill " : "", 1266 dma_has_cap(DMA_MEMSET, dma_dev->cap_mask) ? "fill " : "",
1267 dma_has_cap(DMA_MEMCPY_CRC32C, dma_dev->cap_mask) ? "cpy+crc " : "", 1267 dma_has_cap(DMA_MEMCPY_CRC32C, dma_dev->cap_mask) ? "cpy+crc " : "",
1268 dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "cpy " : "", 1268 dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "cpy " : "",
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index 36e0675be9f..09c0c6e49ab 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -124,6 +124,8 @@ config MD_RAID456
124 select MD_RAID6_PQ 124 select MD_RAID6_PQ
125 select ASYNC_MEMCPY 125 select ASYNC_MEMCPY
126 select ASYNC_XOR 126 select ASYNC_XOR
127 select ASYNC_PQ
128 select ASYNC_RAID6_RECOV
127 ---help--- 129 ---help---
128 A RAID-5 set of N drives with a capacity of C MB per drive provides 130 A RAID-5 set of N drives with a capacity of C MB per drive provides
129 the capacity of C * (N - 1) MB, and protects against a failure 131 the capacity of C * (N - 1) MB, and protects against a failure
@@ -152,9 +154,33 @@ config MD_RAID456
152 154
153 If unsure, say Y. 155 If unsure, say Y.
154 156
157config MULTICORE_RAID456
158 bool "RAID-4/RAID-5/RAID-6 Multicore processing (EXPERIMENTAL)"
159 depends on MD_RAID456
160 depends on SMP
161 depends on EXPERIMENTAL
162 ---help---
163 Enable the raid456 module to dispatch per-stripe raid operations to a
164 thread pool.
165
166 If unsure, say N.
167
155config MD_RAID6_PQ 168config MD_RAID6_PQ
156 tristate 169 tristate
157 170
171config ASYNC_RAID6_TEST
172 tristate "Self test for hardware accelerated raid6 recovery"
173 depends on MD_RAID6_PQ
174 select ASYNC_RAID6_RECOV
175 ---help---
176 This is a one-shot self test that permutes through the
177 recovery of all the possible two disk failure scenarios for a
178 N-disk array. Recovery is performed with the asynchronous
179 raid6 recovery routines, and will optionally use an offload
180 engine if one is available.
181
182 If unsure, say N.
183
158config MD_MULTIPATH 184config MD_MULTIPATH
159 tristate "Multipath I/O support" 185 tristate "Multipath I/O support"
160 depends on BLK_DEV_MD 186 depends on BLK_DEV_MD
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index bb37fb1b2d8..0a5cf217121 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -47,7 +47,9 @@
47#include <linux/kthread.h> 47#include <linux/kthread.h>
48#include <linux/raid/pq.h> 48#include <linux/raid/pq.h>
49#include <linux/async_tx.h> 49#include <linux/async_tx.h>
50#include <linux/async.h>
50#include <linux/seq_file.h> 51#include <linux/seq_file.h>
52#include <linux/cpu.h>
51#include "md.h" 53#include "md.h"
52#include "raid5.h" 54#include "raid5.h"
53#include "bitmap.h" 55#include "bitmap.h"
@@ -499,11 +501,14 @@ async_copy_data(int frombio, struct bio *bio, struct page *page,
499 struct page *bio_page; 501 struct page *bio_page;
500 int i; 502 int i;
501 int page_offset; 503 int page_offset;
504 struct async_submit_ctl submit;
502 505
503 if (bio->bi_sector >= sector) 506 if (bio->bi_sector >= sector)
504 page_offset = (signed)(bio->bi_sector - sector) * 512; 507 page_offset = (signed)(bio->bi_sector - sector) * 512;
505 else 508 else
506 page_offset = (signed)(sector - bio->bi_sector) * -512; 509 page_offset = (signed)(sector - bio->bi_sector) * -512;
510
511 init_async_submit(&submit, 0, tx, NULL, NULL, NULL);
507 bio_for_each_segment(bvl, bio, i) { 512 bio_for_each_segment(bvl, bio, i) {
508 int len = bio_iovec_idx(bio, i)->bv_len; 513 int len = bio_iovec_idx(bio, i)->bv_len;
509 int clen; 514 int clen;
@@ -525,15 +530,14 @@ async_copy_data(int frombio, struct bio *bio, struct page *page,
525 bio_page = bio_iovec_idx(bio, i)->bv_page; 530 bio_page = bio_iovec_idx(bio, i)->bv_page;
526 if (frombio) 531 if (frombio)
527 tx = async_memcpy(page, bio_page, page_offset, 532 tx = async_memcpy(page, bio_page, page_offset,
528 b_offset, clen, 533 b_offset, clen, &submit);
529 ASYNC_TX_DEP_ACK,
530 tx, NULL, NULL);
531 else 534 else
532 tx = async_memcpy(bio_page, page, b_offset, 535 tx = async_memcpy(bio_page, page, b_offset,
533 page_offset, clen, 536 page_offset, clen, &submit);
534 ASYNC_TX_DEP_ACK,
535 tx, NULL, NULL);
536 } 537 }
538 /* chain the operations */
539 submit.depend_tx = tx;
540
537 if (clen < len) /* hit end of page */ 541 if (clen < len) /* hit end of page */
538 break; 542 break;
539 page_offset += len; 543 page_offset += len;
@@ -592,6 +596,7 @@ static void ops_run_biofill(struct stripe_head *sh)
592{ 596{
593 struct dma_async_tx_descriptor *tx = NULL; 597 struct dma_async_tx_descriptor *tx = NULL;
594 raid5_conf_t *conf = sh->raid_conf; 598 raid5_conf_t *conf = sh->raid_conf;
599 struct async_submit_ctl submit;
595 int i; 600 int i;
596 601
597 pr_debug("%s: stripe %llu\n", __func__, 602 pr_debug("%s: stripe %llu\n", __func__,
@@ -615,22 +620,34 @@ static void ops_run_biofill(struct stripe_head *sh)
615 } 620 }
616 621
617 atomic_inc(&sh->count); 622 atomic_inc(&sh->count);
618 async_trigger_callback(ASYNC_TX_DEP_ACK | ASYNC_TX_ACK, tx, 623 init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_biofill, sh, NULL);
619 ops_complete_biofill, sh); 624 async_trigger_callback(&submit);
620} 625}
621 626
622static void ops_complete_compute5(void *stripe_head_ref) 627static void mark_target_uptodate(struct stripe_head *sh, int target)
623{ 628{
624 struct stripe_head *sh = stripe_head_ref; 629 struct r5dev *tgt;
625 int target = sh->ops.target;
626 struct r5dev *tgt = &sh->dev[target];
627 630
628 pr_debug("%s: stripe %llu\n", __func__, 631 if (target < 0)
629 (unsigned long long)sh->sector); 632 return;
630 633
634 tgt = &sh->dev[target];
631 set_bit(R5_UPTODATE, &tgt->flags); 635 set_bit(R5_UPTODATE, &tgt->flags);
632 BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags)); 636 BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags));
633 clear_bit(R5_Wantcompute, &tgt->flags); 637 clear_bit(R5_Wantcompute, &tgt->flags);
638}
639
640static void ops_complete_compute(void *stripe_head_ref)
641{
642 struct stripe_head *sh = stripe_head_ref;
643
644 pr_debug("%s: stripe %llu\n", __func__,
645 (unsigned long long)sh->sector);
646
647 /* mark the computed target(s) as uptodate */
648 mark_target_uptodate(sh, sh->ops.target);
649 mark_target_uptodate(sh, sh->ops.target2);
650
634 clear_bit(STRIPE_COMPUTE_RUN, &sh->state); 651 clear_bit(STRIPE_COMPUTE_RUN, &sh->state);
635 if (sh->check_state == check_state_compute_run) 652 if (sh->check_state == check_state_compute_run)
636 sh->check_state = check_state_compute_result; 653 sh->check_state = check_state_compute_result;
@@ -638,16 +655,24 @@ static void ops_complete_compute5(void *stripe_head_ref)
638 release_stripe(sh); 655 release_stripe(sh);
639} 656}
640 657
641static struct dma_async_tx_descriptor *ops_run_compute5(struct stripe_head *sh) 658/* return a pointer to the address conversion region of the scribble buffer */
659static addr_conv_t *to_addr_conv(struct stripe_head *sh,
660 struct raid5_percpu *percpu)
661{
662 return percpu->scribble + sizeof(struct page *) * (sh->disks + 2);
663}
664
665static struct dma_async_tx_descriptor *
666ops_run_compute5(struct stripe_head *sh, struct raid5_percpu *percpu)
642{ 667{
643 /* kernel stack size limits the total number of disks */
644 int disks = sh->disks; 668 int disks = sh->disks;
645 struct page *xor_srcs[disks]; 669 struct page **xor_srcs = percpu->scribble;
646 int target = sh->ops.target; 670 int target = sh->ops.target;
647 struct r5dev *tgt = &sh->dev[target]; 671 struct r5dev *tgt = &sh->dev[target];
648 struct page *xor_dest = tgt->page; 672 struct page *xor_dest = tgt->page;
649 int count = 0; 673 int count = 0;
650 struct dma_async_tx_descriptor *tx; 674 struct dma_async_tx_descriptor *tx;
675 struct async_submit_ctl submit;
651 int i; 676 int i;
652 677
653 pr_debug("%s: stripe %llu block: %d\n", 678 pr_debug("%s: stripe %llu block: %d\n",
@@ -660,17 +685,207 @@ static struct dma_async_tx_descriptor *ops_run_compute5(struct stripe_head *sh)
660 685
661 atomic_inc(&sh->count); 686 atomic_inc(&sh->count);
662 687
688 init_async_submit(&submit, ASYNC_TX_XOR_ZERO_DST, NULL,
689 ops_complete_compute, sh, to_addr_conv(sh, percpu));
663 if (unlikely(count == 1)) 690 if (unlikely(count == 1))
664 tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, 691 tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, &submit);
665 0, NULL, ops_complete_compute5, sh);
666 else 692 else
667 tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, 693 tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit);
668 ASYNC_TX_XOR_ZERO_DST, NULL,
669 ops_complete_compute5, sh);
670 694
671 return tx; 695 return tx;
672} 696}
673 697
698/* set_syndrome_sources - populate source buffers for gen_syndrome
699 * @srcs - (struct page *) array of size sh->disks
700 * @sh - stripe_head to parse
701 *
702 * Populates srcs in proper layout order for the stripe and returns the
703 * 'count' of sources to be used in a call to async_gen_syndrome. The P
704 * destination buffer is recorded in srcs[count] and the Q destination
705 * is recorded in srcs[count+1]].
706 */
707static int set_syndrome_sources(struct page **srcs, struct stripe_head *sh)
708{
709 int disks = sh->disks;
710 int syndrome_disks = sh->ddf_layout ? disks : (disks - 2);
711 int d0_idx = raid6_d0(sh);
712 int count;
713 int i;
714
715 for (i = 0; i < disks; i++)
716 srcs[i] = (void *)raid6_empty_zero_page;
717
718 count = 0;
719 i = d0_idx;
720 do {
721 int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks);
722
723 srcs[slot] = sh->dev[i].page;
724 i = raid6_next_disk(i, disks);
725 } while (i != d0_idx);
726 BUG_ON(count != syndrome_disks);
727
728 return count;
729}
730
731static struct dma_async_tx_descriptor *
732ops_run_compute6_1(struct stripe_head *sh, struct raid5_percpu *percpu)
733{
734 int disks = sh->disks;
735 struct page **blocks = percpu->scribble;
736 int target;
737 int qd_idx = sh->qd_idx;
738 struct dma_async_tx_descriptor *tx;
739 struct async_submit_ctl submit;
740 struct r5dev *tgt;
741 struct page *dest;
742 int i;
743 int count;
744
745 if (sh->ops.target < 0)
746 target = sh->ops.target2;
747 else if (sh->ops.target2 < 0)
748 target = sh->ops.target;
749 else
750 /* we should only have one valid target */
751 BUG();
752 BUG_ON(target < 0);
753 pr_debug("%s: stripe %llu block: %d\n",
754 __func__, (unsigned long long)sh->sector, target);
755
756 tgt = &sh->dev[target];
757 BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags));
758 dest = tgt->page;
759
760 atomic_inc(&sh->count);
761
762 if (target == qd_idx) {
763 count = set_syndrome_sources(blocks, sh);
764 blocks[count] = NULL; /* regenerating p is not necessary */
765 BUG_ON(blocks[count+1] != dest); /* q should already be set */
766 init_async_submit(&submit, 0, NULL, ops_complete_compute, sh,
767 to_addr_conv(sh, percpu));
768 tx = async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE, &submit);
769 } else {
770 /* Compute any data- or p-drive using XOR */
771 count = 0;
772 for (i = disks; i-- ; ) {
773 if (i == target || i == qd_idx)
774 continue;
775 blocks[count++] = sh->dev[i].page;
776 }
777
778 init_async_submit(&submit, ASYNC_TX_XOR_ZERO_DST, NULL,
779 ops_complete_compute, sh,
780 to_addr_conv(sh, percpu));
781 tx = async_xor(dest, blocks, 0, count, STRIPE_SIZE, &submit);
782 }
783
784 return tx;
785}
786
787static struct dma_async_tx_descriptor *
788ops_run_compute6_2(struct stripe_head *sh, struct raid5_percpu *percpu)
789{
790 int i, count, disks = sh->disks;
791 int syndrome_disks = sh->ddf_layout ? disks : disks-2;
792 int d0_idx = raid6_d0(sh);
793 int faila = -1, failb = -1;
794 int target = sh->ops.target;
795 int target2 = sh->ops.target2;
796 struct r5dev *tgt = &sh->dev[target];
797 struct r5dev *tgt2 = &sh->dev[target2];
798 struct dma_async_tx_descriptor *tx;
799 struct page **blocks = percpu->scribble;
800 struct async_submit_ctl submit;
801
802 pr_debug("%s: stripe %llu block1: %d block2: %d\n",
803 __func__, (unsigned long long)sh->sector, target, target2);
804 BUG_ON(target < 0 || target2 < 0);
805 BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags));
806 BUG_ON(!test_bit(R5_Wantcompute, &tgt2->flags));
807
808 /* we need to open-code set_syndrome_sources to handle to the
809 * slot number conversion for 'faila' and 'failb'
810 */
811 for (i = 0; i < disks ; i++)
812 blocks[i] = (void *)raid6_empty_zero_page;
813 count = 0;
814 i = d0_idx;
815 do {
816 int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks);
817
818 blocks[slot] = sh->dev[i].page;
819
820 if (i == target)
821 faila = slot;
822 if (i == target2)
823 failb = slot;
824 i = raid6_next_disk(i, disks);
825 } while (i != d0_idx);
826 BUG_ON(count != syndrome_disks);
827
828 BUG_ON(faila == failb);
829 if (failb < faila)
830 swap(faila, failb);
831 pr_debug("%s: stripe: %llu faila: %d failb: %d\n",
832 __func__, (unsigned long long)sh->sector, faila, failb);
833
834 atomic_inc(&sh->count);
835
836 if (failb == syndrome_disks+1) {
837 /* Q disk is one of the missing disks */
838 if (faila == syndrome_disks) {
839 /* Missing P+Q, just recompute */
840 init_async_submit(&submit, 0, NULL, ops_complete_compute,
841 sh, to_addr_conv(sh, percpu));
842 return async_gen_syndrome(blocks, 0, count+2,
843 STRIPE_SIZE, &submit);
844 } else {
845 struct page *dest;
846 int data_target;
847 int qd_idx = sh->qd_idx;
848
849 /* Missing D+Q: recompute D from P, then recompute Q */
850 if (target == qd_idx)
851 data_target = target2;
852 else
853 data_target = target;
854
855 count = 0;
856 for (i = disks; i-- ; ) {
857 if (i == data_target || i == qd_idx)
858 continue;
859 blocks[count++] = sh->dev[i].page;
860 }
861 dest = sh->dev[data_target].page;
862 init_async_submit(&submit, ASYNC_TX_XOR_ZERO_DST, NULL,
863 NULL, NULL, to_addr_conv(sh, percpu));
864 tx = async_xor(dest, blocks, 0, count, STRIPE_SIZE,
865 &submit);
866
867 count = set_syndrome_sources(blocks, sh);
868 init_async_submit(&submit, 0, tx, ops_complete_compute,
869 sh, to_addr_conv(sh, percpu));
870 return async_gen_syndrome(blocks, 0, count+2,
871 STRIPE_SIZE, &submit);
872 }
873 }
874
875 init_async_submit(&submit, 0, NULL, ops_complete_compute, sh,
876 to_addr_conv(sh, percpu));
877 if (failb == syndrome_disks) {
878 /* We're missing D+P. */
879 return async_raid6_datap_recov(syndrome_disks+2, STRIPE_SIZE,
880 faila, blocks, &submit);
881 } else {
882 /* We're missing D+D. */
883 return async_raid6_2data_recov(syndrome_disks+2, STRIPE_SIZE,
884 faila, failb, blocks, &submit);
885 }
886}
887
888
674static void ops_complete_prexor(void *stripe_head_ref) 889static void ops_complete_prexor(void *stripe_head_ref)
675{ 890{
676 struct stripe_head *sh = stripe_head_ref; 891 struct stripe_head *sh = stripe_head_ref;
@@ -680,12 +895,13 @@ static void ops_complete_prexor(void *stripe_head_ref)
680} 895}
681 896
682static struct dma_async_tx_descriptor * 897static struct dma_async_tx_descriptor *
683ops_run_prexor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) 898ops_run_prexor(struct stripe_head *sh, struct raid5_percpu *percpu,
899 struct dma_async_tx_descriptor *tx)
684{ 900{
685 /* kernel stack size limits the total number of disks */
686 int disks = sh->disks; 901 int disks = sh->disks;
687 struct page *xor_srcs[disks]; 902 struct page **xor_srcs = percpu->scribble;
688 int count = 0, pd_idx = sh->pd_idx, i; 903 int count = 0, pd_idx = sh->pd_idx, i;
904 struct async_submit_ctl submit;
689 905
690 /* existing parity data subtracted */ 906 /* existing parity data subtracted */
691 struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page; 907 struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page;
@@ -700,9 +916,9 @@ ops_run_prexor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
700 xor_srcs[count++] = dev->page; 916 xor_srcs[count++] = dev->page;
701 } 917 }
702 918
703 tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, 919 init_async_submit(&submit, ASYNC_TX_XOR_DROP_DST, tx,
704 ASYNC_TX_DEP_ACK | ASYNC_TX_XOR_DROP_DST, tx, 920 ops_complete_prexor, sh, to_addr_conv(sh, percpu));
705 ops_complete_prexor, sh); 921 tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit);
706 922
707 return tx; 923 return tx;
708} 924}
@@ -742,17 +958,21 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
742 return tx; 958 return tx;
743} 959}
744 960
745static void ops_complete_postxor(void *stripe_head_ref) 961static void ops_complete_reconstruct(void *stripe_head_ref)
746{ 962{
747 struct stripe_head *sh = stripe_head_ref; 963 struct stripe_head *sh = stripe_head_ref;
748 int disks = sh->disks, i, pd_idx = sh->pd_idx; 964 int disks = sh->disks;
965 int pd_idx = sh->pd_idx;
966 int qd_idx = sh->qd_idx;
967 int i;
749 968
750 pr_debug("%s: stripe %llu\n", __func__, 969 pr_debug("%s: stripe %llu\n", __func__,
751 (unsigned long long)sh->sector); 970 (unsigned long long)sh->sector);
752 971
753 for (i = disks; i--; ) { 972 for (i = disks; i--; ) {
754 struct r5dev *dev = &sh->dev[i]; 973 struct r5dev *dev = &sh->dev[i];
755 if (dev->written || i == pd_idx) 974
975 if (dev->written || i == pd_idx || i == qd_idx)
756 set_bit(R5_UPTODATE, &dev->flags); 976 set_bit(R5_UPTODATE, &dev->flags);
757 } 977 }
758 978
@@ -770,12 +990,12 @@ static void ops_complete_postxor(void *stripe_head_ref)
770} 990}
771 991
772static void 992static void
773ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) 993ops_run_reconstruct5(struct stripe_head *sh, struct raid5_percpu *percpu,
994 struct dma_async_tx_descriptor *tx)
774{ 995{
775 /* kernel stack size limits the total number of disks */
776 int disks = sh->disks; 996 int disks = sh->disks;
777 struct page *xor_srcs[disks]; 997 struct page **xor_srcs = percpu->scribble;
778 998 struct async_submit_ctl submit;
779 int count = 0, pd_idx = sh->pd_idx, i; 999 int count = 0, pd_idx = sh->pd_idx, i;
780 struct page *xor_dest; 1000 struct page *xor_dest;
781 int prexor = 0; 1001 int prexor = 0;
@@ -809,18 +1029,36 @@ ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
809 * set ASYNC_TX_XOR_DROP_DST and ASYNC_TX_XOR_ZERO_DST 1029 * set ASYNC_TX_XOR_DROP_DST and ASYNC_TX_XOR_ZERO_DST
810 * for the synchronous xor case 1030 * for the synchronous xor case
811 */ 1031 */
812 flags = ASYNC_TX_DEP_ACK | ASYNC_TX_ACK | 1032 flags = ASYNC_TX_ACK |
813 (prexor ? ASYNC_TX_XOR_DROP_DST : ASYNC_TX_XOR_ZERO_DST); 1033 (prexor ? ASYNC_TX_XOR_DROP_DST : ASYNC_TX_XOR_ZERO_DST);
814 1034
815 atomic_inc(&sh->count); 1035 atomic_inc(&sh->count);
816 1036
817 if (unlikely(count == 1)) { 1037 init_async_submit(&submit, flags, tx, ops_complete_reconstruct, sh,
818 flags &= ~(ASYNC_TX_XOR_DROP_DST | ASYNC_TX_XOR_ZERO_DST); 1038 to_addr_conv(sh, percpu));
819 tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, 1039 if (unlikely(count == 1))
820 flags, tx, ops_complete_postxor, sh); 1040 tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, &submit);
821 } else 1041 else
822 tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, 1042 tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit);
823 flags, tx, ops_complete_postxor, sh); 1043}
1044
1045static void
1046ops_run_reconstruct6(struct stripe_head *sh, struct raid5_percpu *percpu,
1047 struct dma_async_tx_descriptor *tx)
1048{
1049 struct async_submit_ctl submit;
1050 struct page **blocks = percpu->scribble;
1051 int count;
1052
1053 pr_debug("%s: stripe %llu\n", __func__, (unsigned long long)sh->sector);
1054
1055 count = set_syndrome_sources(blocks, sh);
1056
1057 atomic_inc(&sh->count);
1058
1059 init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_reconstruct,
1060 sh, to_addr_conv(sh, percpu));
1061 async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE, &submit);
824} 1062}
825 1063
826static void ops_complete_check(void *stripe_head_ref) 1064static void ops_complete_check(void *stripe_head_ref)
@@ -835,63 +1073,115 @@ static void ops_complete_check(void *stripe_head_ref)
835 release_stripe(sh); 1073 release_stripe(sh);
836} 1074}
837 1075
838static void ops_run_check(struct stripe_head *sh) 1076static void ops_run_check_p(struct stripe_head *sh, struct raid5_percpu *percpu)
839{ 1077{
840 /* kernel stack size limits the total number of disks */
841 int disks = sh->disks; 1078 int disks = sh->disks;
842 struct page *xor_srcs[disks]; 1079 int pd_idx = sh->pd_idx;
1080 int qd_idx = sh->qd_idx;
1081 struct page *xor_dest;
1082 struct page **xor_srcs = percpu->scribble;
843 struct dma_async_tx_descriptor *tx; 1083 struct dma_async_tx_descriptor *tx;
844 1084 struct async_submit_ctl submit;
845 int count = 0, pd_idx = sh->pd_idx, i; 1085 int count;
846 struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page; 1086 int i;
847 1087
848 pr_debug("%s: stripe %llu\n", __func__, 1088 pr_debug("%s: stripe %llu\n", __func__,
849 (unsigned long long)sh->sector); 1089 (unsigned long long)sh->sector);
850 1090
1091 count = 0;
1092 xor_dest = sh->dev[pd_idx].page;
1093 xor_srcs[count++] = xor_dest;
851 for (i = disks; i--; ) { 1094 for (i = disks; i--; ) {
852 struct r5dev *dev = &sh->dev[i]; 1095 if (i == pd_idx || i == qd_idx)
853 if (i != pd_idx) 1096 continue;
854 xor_srcs[count++] = dev->page; 1097 xor_srcs[count++] = sh->dev[i].page;
855 } 1098 }
856 1099
857 tx = async_xor_zero_sum(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, 1100 init_async_submit(&submit, 0, NULL, NULL, NULL,
858 &sh->ops.zero_sum_result, 0, NULL, NULL, NULL); 1101 to_addr_conv(sh, percpu));
1102 tx = async_xor_val(xor_dest, xor_srcs, 0, count, STRIPE_SIZE,
1103 &sh->ops.zero_sum_result, &submit);
1104
1105 atomic_inc(&sh->count);
1106 init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_check, sh, NULL);
1107 tx = async_trigger_callback(&submit);
1108}
1109
1110static void ops_run_check_pq(struct stripe_head *sh, struct raid5_percpu *percpu, int checkp)
1111{
1112 struct page **srcs = percpu->scribble;
1113 struct async_submit_ctl submit;
1114 int count;
1115
1116 pr_debug("%s: stripe %llu checkp: %d\n", __func__,
1117 (unsigned long long)sh->sector, checkp);
1118
1119 count = set_syndrome_sources(srcs, sh);
1120 if (!checkp)
1121 srcs[count] = NULL;
859 1122
860 atomic_inc(&sh->count); 1123 atomic_inc(&sh->count);
861 tx = async_trigger_callback(ASYNC_TX_DEP_ACK | ASYNC_TX_ACK, tx, 1124 init_async_submit(&submit, ASYNC_TX_ACK, NULL, ops_complete_check,
862 ops_complete_check, sh); 1125 sh, to_addr_conv(sh, percpu));
1126 async_syndrome_val(srcs, 0, count+2, STRIPE_SIZE,
1127 &sh->ops.zero_sum_result, percpu->spare_page, &submit);
863} 1128}
864 1129
865static void raid5_run_ops(struct stripe_head *sh, unsigned long ops_request) 1130static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
866{ 1131{
867 int overlap_clear = 0, i, disks = sh->disks; 1132 int overlap_clear = 0, i, disks = sh->disks;
868 struct dma_async_tx_descriptor *tx = NULL; 1133 struct dma_async_tx_descriptor *tx = NULL;
1134 raid5_conf_t *conf = sh->raid_conf;
1135 int level = conf->level;
1136 struct raid5_percpu *percpu;
1137 unsigned long cpu;
869 1138
1139 cpu = get_cpu();
1140 percpu = per_cpu_ptr(conf->percpu, cpu);
870 if (test_bit(STRIPE_OP_BIOFILL, &ops_request)) { 1141 if (test_bit(STRIPE_OP_BIOFILL, &ops_request)) {
871 ops_run_biofill(sh); 1142 ops_run_biofill(sh);
872 overlap_clear++; 1143 overlap_clear++;
873 } 1144 }
874 1145
875 if (test_bit(STRIPE_OP_COMPUTE_BLK, &ops_request)) { 1146 if (test_bit(STRIPE_OP_COMPUTE_BLK, &ops_request)) {
876 tx = ops_run_compute5(sh); 1147 if (level < 6)
877 /* terminate the chain if postxor is not set to be run */ 1148 tx = ops_run_compute5(sh, percpu);
878 if (tx && !test_bit(STRIPE_OP_POSTXOR, &ops_request)) 1149 else {
1150 if (sh->ops.target2 < 0 || sh->ops.target < 0)
1151 tx = ops_run_compute6_1(sh, percpu);
1152 else
1153 tx = ops_run_compute6_2(sh, percpu);
1154 }
1155 /* terminate the chain if reconstruct is not set to be run */
1156 if (tx && !test_bit(STRIPE_OP_RECONSTRUCT, &ops_request))
879 async_tx_ack(tx); 1157 async_tx_ack(tx);
880 } 1158 }
881 1159
882 if (test_bit(STRIPE_OP_PREXOR, &ops_request)) 1160 if (test_bit(STRIPE_OP_PREXOR, &ops_request))
883 tx = ops_run_prexor(sh, tx); 1161 tx = ops_run_prexor(sh, percpu, tx);
884 1162
885 if (test_bit(STRIPE_OP_BIODRAIN, &ops_request)) { 1163 if (test_bit(STRIPE_OP_BIODRAIN, &ops_request)) {
886 tx = ops_run_biodrain(sh, tx); 1164 tx = ops_run_biodrain(sh, tx);
887 overlap_clear++; 1165 overlap_clear++;
888 } 1166 }
889 1167
890 if (test_bit(STRIPE_OP_POSTXOR, &ops_request)) 1168 if (test_bit(STRIPE_OP_RECONSTRUCT, &ops_request)) {
891 ops_run_postxor(sh, tx); 1169 if (level < 6)
1170 ops_run_reconstruct5(sh, percpu, tx);
1171 else
1172 ops_run_reconstruct6(sh, percpu, tx);
1173 }
892 1174
893 if (test_bit(STRIPE_OP_CHECK, &ops_request)) 1175 if (test_bit(STRIPE_OP_CHECK, &ops_request)) {
894 ops_run_check(sh); 1176 if (sh->check_state == check_state_run)
1177 ops_run_check_p(sh, percpu);
1178 else if (sh->check_state == check_state_run_q)
1179 ops_run_check_pq(sh, percpu, 0);
1180 else if (sh->check_state == check_state_run_pq)
1181 ops_run_check_pq(sh, percpu, 1);
1182 else
1183 BUG();
1184 }
895 1185
896 if (overlap_clear) 1186 if (overlap_clear)
897 for (i = disks; i--; ) { 1187 for (i = disks; i--; ) {
@@ -899,6 +1189,7 @@ static void raid5_run_ops(struct stripe_head *sh, unsigned long ops_request)
899 if (test_and_clear_bit(R5_Overlap, &dev->flags)) 1189 if (test_and_clear_bit(R5_Overlap, &dev->flags))
900 wake_up(&sh->raid_conf->wait_for_overlap); 1190 wake_up(&sh->raid_conf->wait_for_overlap);
901 } 1191 }
1192 put_cpu();
902} 1193}
903 1194
904static int grow_one_stripe(raid5_conf_t *conf) 1195static int grow_one_stripe(raid5_conf_t *conf)
@@ -948,6 +1239,28 @@ static int grow_stripes(raid5_conf_t *conf, int num)
948 return 0; 1239 return 0;
949} 1240}
950 1241
1242/**
1243 * scribble_len - return the required size of the scribble region
1244 * @num - total number of disks in the array
1245 *
1246 * The size must be enough to contain:
1247 * 1/ a struct page pointer for each device in the array +2
1248 * 2/ room to convert each entry in (1) to its corresponding dma
1249 * (dma_map_page()) or page (page_address()) address.
1250 *
1251 * Note: the +2 is for the destination buffers of the ddf/raid6 case where we
1252 * calculate over all devices (not just the data blocks), using zeros in place
1253 * of the P and Q blocks.
1254 */
1255static size_t scribble_len(int num)
1256{
1257 size_t len;
1258
1259 len = sizeof(struct page *) * (num+2) + sizeof(addr_conv_t) * (num+2);
1260
1261 return len;
1262}
1263
951static int resize_stripes(raid5_conf_t *conf, int newsize) 1264static int resize_stripes(raid5_conf_t *conf, int newsize)
952{ 1265{
953 /* Make all the stripes able to hold 'newsize' devices. 1266 /* Make all the stripes able to hold 'newsize' devices.
@@ -976,6 +1289,7 @@ static int resize_stripes(raid5_conf_t *conf, int newsize)
976 struct stripe_head *osh, *nsh; 1289 struct stripe_head *osh, *nsh;
977 LIST_HEAD(newstripes); 1290 LIST_HEAD(newstripes);
978 struct disk_info *ndisks; 1291 struct disk_info *ndisks;
1292 unsigned long cpu;
979 int err; 1293 int err;
980 struct kmem_cache *sc; 1294 struct kmem_cache *sc;
981 int i; 1295 int i;
@@ -1041,7 +1355,7 @@ static int resize_stripes(raid5_conf_t *conf, int newsize)
1041 /* Step 3. 1355 /* Step 3.
1042 * At this point, we are holding all the stripes so the array 1356 * At this point, we are holding all the stripes so the array
1043 * is completely stalled, so now is a good time to resize 1357 * is completely stalled, so now is a good time to resize
1044 * conf->disks. 1358 * conf->disks and the scribble region
1045 */ 1359 */
1046 ndisks = kzalloc(newsize * sizeof(struct disk_info), GFP_NOIO); 1360 ndisks = kzalloc(newsize * sizeof(struct disk_info), GFP_NOIO);
1047 if (ndisks) { 1361 if (ndisks) {
@@ -1052,10 +1366,30 @@ static int resize_stripes(raid5_conf_t *conf, int newsize)
1052 } else 1366 } else
1053 err = -ENOMEM; 1367 err = -ENOMEM;
1054 1368
1369 get_online_cpus();
1370 conf->scribble_len = scribble_len(newsize);
1371 for_each_present_cpu(cpu) {
1372 struct raid5_percpu *percpu;
1373 void *scribble;
1374
1375 percpu = per_cpu_ptr(conf->percpu, cpu);
1376 scribble = kmalloc(conf->scribble_len, GFP_NOIO);
1377
1378 if (scribble) {
1379 kfree(percpu->scribble);
1380 percpu->scribble = scribble;
1381 } else {
1382 err = -ENOMEM;
1383 break;
1384 }
1385 }
1386 put_online_cpus();
1387
1055 /* Step 4, return new stripes to service */ 1388 /* Step 4, return new stripes to service */
1056 while(!list_empty(&newstripes)) { 1389 while(!list_empty(&newstripes)) {
1057 nsh = list_entry(newstripes.next, struct stripe_head, lru); 1390 nsh = list_entry(newstripes.next, struct stripe_head, lru);
1058 list_del_init(&nsh->lru); 1391 list_del_init(&nsh->lru);
1392
1059 for (i=conf->raid_disks; i < newsize; i++) 1393 for (i=conf->raid_disks; i < newsize; i++)
1060 if (nsh->dev[i].page == NULL) { 1394 if (nsh->dev[i].page == NULL) {
1061 struct page *p = alloc_page(GFP_NOIO); 1395 struct page *p = alloc_page(GFP_NOIO);
@@ -1594,258 +1928,13 @@ static sector_t compute_blocknr(struct stripe_head *sh, int i, int previous)
1594} 1928}
1595 1929
1596 1930
1597
1598/*
1599 * Copy data between a page in the stripe cache, and one or more bion
1600 * The page could align with the middle of the bio, or there could be
1601 * several bion, each with several bio_vecs, which cover part of the page
1602 * Multiple bion are linked together on bi_next. There may be extras
1603 * at the end of this list. We ignore them.
1604 */
1605static void copy_data(int frombio, struct bio *bio,
1606 struct page *page,
1607 sector_t sector)
1608{
1609 char *pa = page_address(page);
1610 struct bio_vec *bvl;
1611 int i;
1612 int page_offset;
1613
1614 if (bio->bi_sector >= sector)
1615 page_offset = (signed)(bio->bi_sector - sector) * 512;
1616 else
1617 page_offset = (signed)(sector - bio->bi_sector) * -512;
1618 bio_for_each_segment(bvl, bio, i) {
1619 int len = bio_iovec_idx(bio,i)->bv_len;
1620 int clen;
1621 int b_offset = 0;
1622
1623 if (page_offset < 0) {
1624 b_offset = -page_offset;
1625 page_offset += b_offset;
1626 len -= b_offset;
1627 }
1628
1629 if (len > 0 && page_offset + len > STRIPE_SIZE)
1630 clen = STRIPE_SIZE - page_offset;
1631 else clen = len;
1632
1633 if (clen > 0) {
1634 char *ba = __bio_kmap_atomic(bio, i, KM_USER0);
1635 if (frombio)
1636 memcpy(pa+page_offset, ba+b_offset, clen);
1637 else
1638 memcpy(ba+b_offset, pa+page_offset, clen);
1639 __bio_kunmap_atomic(ba, KM_USER0);
1640 }
1641 if (clen < len) /* hit end of page */
1642 break;
1643 page_offset += len;
1644 }
1645}
1646
1647#define check_xor() do { \
1648 if (count == MAX_XOR_BLOCKS) { \
1649 xor_blocks(count, STRIPE_SIZE, dest, ptr);\
1650 count = 0; \
1651 } \
1652 } while(0)
1653
1654static void compute_parity6(struct stripe_head *sh, int method)
1655{
1656 raid5_conf_t *conf = sh->raid_conf;
1657 int i, pd_idx, qd_idx, d0_idx, disks = sh->disks, count;
1658 int syndrome_disks = sh->ddf_layout ? disks : (disks - 2);
1659 struct bio *chosen;
1660 /**** FIX THIS: This could be very bad if disks is close to 256 ****/
1661 void *ptrs[syndrome_disks+2];
1662
1663 pd_idx = sh->pd_idx;
1664 qd_idx = sh->qd_idx;
1665 d0_idx = raid6_d0(sh);
1666
1667 pr_debug("compute_parity, stripe %llu, method %d\n",
1668 (unsigned long long)sh->sector, method);
1669
1670 switch(method) {
1671 case READ_MODIFY_WRITE:
1672 BUG(); /* READ_MODIFY_WRITE N/A for RAID-6 */
1673 case RECONSTRUCT_WRITE:
1674 for (i= disks; i-- ;)
1675 if ( i != pd_idx && i != qd_idx && sh->dev[i].towrite ) {
1676 chosen = sh->dev[i].towrite;
1677 sh->dev[i].towrite = NULL;
1678
1679 if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
1680 wake_up(&conf->wait_for_overlap);
1681
1682 BUG_ON(sh->dev[i].written);
1683 sh->dev[i].written = chosen;
1684 }
1685 break;
1686 case CHECK_PARITY:
1687 BUG(); /* Not implemented yet */
1688 }
1689
1690 for (i = disks; i--;)
1691 if (sh->dev[i].written) {
1692 sector_t sector = sh->dev[i].sector;
1693 struct bio *wbi = sh->dev[i].written;
1694 while (wbi && wbi->bi_sector < sector + STRIPE_SECTORS) {
1695 copy_data(1, wbi, sh->dev[i].page, sector);
1696 wbi = r5_next_bio(wbi, sector);
1697 }
1698
1699 set_bit(R5_LOCKED, &sh->dev[i].flags);
1700 set_bit(R5_UPTODATE, &sh->dev[i].flags);
1701 }
1702
1703 /* Note that unlike RAID-5, the ordering of the disks matters greatly.*/
1704
1705 for (i = 0; i < disks; i++)
1706 ptrs[i] = (void *)raid6_empty_zero_page;
1707
1708 count = 0;
1709 i = d0_idx;
1710 do {
1711 int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks);
1712
1713 ptrs[slot] = page_address(sh->dev[i].page);
1714 if (slot < syndrome_disks &&
1715 !test_bit(R5_UPTODATE, &sh->dev[i].flags)) {
1716 printk(KERN_ERR "block %d/%d not uptodate "
1717 "on parity calc\n", i, count);
1718 BUG();
1719 }
1720
1721 i = raid6_next_disk(i, disks);
1722 } while (i != d0_idx);
1723 BUG_ON(count != syndrome_disks);
1724
1725 raid6_call.gen_syndrome(syndrome_disks+2, STRIPE_SIZE, ptrs);
1726
1727 switch(method) {
1728 case RECONSTRUCT_WRITE:
1729 set_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
1730 set_bit(R5_UPTODATE, &sh->dev[qd_idx].flags);
1731 set_bit(R5_LOCKED, &sh->dev[pd_idx].flags);
1732 set_bit(R5_LOCKED, &sh->dev[qd_idx].flags);
1733 break;
1734 case UPDATE_PARITY:
1735 set_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
1736 set_bit(R5_UPTODATE, &sh->dev[qd_idx].flags);
1737 break;
1738 }
1739}
1740
1741
1742/* Compute one missing block */
1743static void compute_block_1(struct stripe_head *sh, int dd_idx, int nozero)
1744{
1745 int i, count, disks = sh->disks;
1746 void *ptr[MAX_XOR_BLOCKS], *dest, *p;
1747 int qd_idx = sh->qd_idx;
1748
1749 pr_debug("compute_block_1, stripe %llu, idx %d\n",
1750 (unsigned long long)sh->sector, dd_idx);
1751
1752 if ( dd_idx == qd_idx ) {
1753 /* We're actually computing the Q drive */
1754 compute_parity6(sh, UPDATE_PARITY);
1755 } else {
1756 dest = page_address(sh->dev[dd_idx].page);
1757 if (!nozero) memset(dest, 0, STRIPE_SIZE);
1758 count = 0;
1759 for (i = disks ; i--; ) {
1760 if (i == dd_idx || i == qd_idx)
1761 continue;
1762 p = page_address(sh->dev[i].page);
1763 if (test_bit(R5_UPTODATE, &sh->dev[i].flags))
1764 ptr[count++] = p;
1765 else
1766 printk("compute_block() %d, stripe %llu, %d"
1767 " not present\n", dd_idx,
1768 (unsigned long long)sh->sector, i);
1769
1770 check_xor();
1771 }
1772 if (count)
1773 xor_blocks(count, STRIPE_SIZE, dest, ptr);
1774 if (!nozero) set_bit(R5_UPTODATE, &sh->dev[dd_idx].flags);
1775 else clear_bit(R5_UPTODATE, &sh->dev[dd_idx].flags);
1776 }
1777}
1778
1779/* Compute two missing blocks */
1780static void compute_block_2(struct stripe_head *sh, int dd_idx1, int dd_idx2)
1781{
1782 int i, count, disks = sh->disks;
1783 int syndrome_disks = sh->ddf_layout ? disks : disks-2;
1784 int d0_idx = raid6_d0(sh);
1785 int faila = -1, failb = -1;
1786 /**** FIX THIS: This could be very bad if disks is close to 256 ****/
1787 void *ptrs[syndrome_disks+2];
1788
1789 for (i = 0; i < disks ; i++)
1790 ptrs[i] = (void *)raid6_empty_zero_page;
1791 count = 0;
1792 i = d0_idx;
1793 do {
1794 int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks);
1795
1796 ptrs[slot] = page_address(sh->dev[i].page);
1797
1798 if (i == dd_idx1)
1799 faila = slot;
1800 if (i == dd_idx2)
1801 failb = slot;
1802 i = raid6_next_disk(i, disks);
1803 } while (i != d0_idx);
1804 BUG_ON(count != syndrome_disks);
1805
1806 BUG_ON(faila == failb);
1807 if ( failb < faila ) { int tmp = faila; faila = failb; failb = tmp; }
1808
1809 pr_debug("compute_block_2, stripe %llu, idx %d,%d (%d,%d)\n",
1810 (unsigned long long)sh->sector, dd_idx1, dd_idx2,
1811 faila, failb);
1812
1813 if (failb == syndrome_disks+1) {
1814 /* Q disk is one of the missing disks */
1815 if (faila == syndrome_disks) {
1816 /* Missing P+Q, just recompute */
1817 compute_parity6(sh, UPDATE_PARITY);
1818 return;
1819 } else {
1820 /* We're missing D+Q; recompute D from P */
1821 compute_block_1(sh, ((dd_idx1 == sh->qd_idx) ?
1822 dd_idx2 : dd_idx1),
1823 0);
1824 compute_parity6(sh, UPDATE_PARITY); /* Is this necessary? */
1825 return;
1826 }
1827 }
1828
1829 /* We're missing D+P or D+D; */
1830 if (failb == syndrome_disks) {
1831 /* We're missing D+P. */
1832 raid6_datap_recov(syndrome_disks+2, STRIPE_SIZE, faila, ptrs);
1833 } else {
1834 /* We're missing D+D. */
1835 raid6_2data_recov(syndrome_disks+2, STRIPE_SIZE, faila, failb,
1836 ptrs);
1837 }
1838
1839 /* Both the above update both missing blocks */
1840 set_bit(R5_UPTODATE, &sh->dev[dd_idx1].flags);
1841 set_bit(R5_UPTODATE, &sh->dev[dd_idx2].flags);
1842}
1843
1844static void 1931static void
1845schedule_reconstruction5(struct stripe_head *sh, struct stripe_head_state *s, 1932schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,
1846 int rcw, int expand) 1933 int rcw, int expand)
1847{ 1934{
1848 int i, pd_idx = sh->pd_idx, disks = sh->disks; 1935 int i, pd_idx = sh->pd_idx, disks = sh->disks;
1936 raid5_conf_t *conf = sh->raid_conf;
1937 int level = conf->level;
1849 1938
1850 if (rcw) { 1939 if (rcw) {
1851 /* if we are not expanding this is a proper write request, and 1940 /* if we are not expanding this is a proper write request, and
@@ -1858,7 +1947,7 @@ schedule_reconstruction5(struct stripe_head *sh, struct stripe_head_state *s,
1858 } else 1947 } else
1859 sh->reconstruct_state = reconstruct_state_run; 1948 sh->reconstruct_state = reconstruct_state_run;
1860 1949
1861 set_bit(STRIPE_OP_POSTXOR, &s->ops_request); 1950 set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request);
1862 1951
1863 for (i = disks; i--; ) { 1952 for (i = disks; i--; ) {
1864 struct r5dev *dev = &sh->dev[i]; 1953 struct r5dev *dev = &sh->dev[i];
@@ -1871,17 +1960,18 @@ schedule_reconstruction5(struct stripe_head *sh, struct stripe_head_state *s,
1871 s->locked++; 1960 s->locked++;
1872 } 1961 }
1873 } 1962 }
1874 if (s->locked + 1 == disks) 1963 if (s->locked + conf->max_degraded == disks)
1875 if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state)) 1964 if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state))
1876 atomic_inc(&sh->raid_conf->pending_full_writes); 1965 atomic_inc(&conf->pending_full_writes);
1877 } else { 1966 } else {
1967 BUG_ON(level == 6);
1878 BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) || 1968 BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) ||
1879 test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags))); 1969 test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags)));
1880 1970
1881 sh->reconstruct_state = reconstruct_state_prexor_drain_run; 1971 sh->reconstruct_state = reconstruct_state_prexor_drain_run;
1882 set_bit(STRIPE_OP_PREXOR, &s->ops_request); 1972 set_bit(STRIPE_OP_PREXOR, &s->ops_request);
1883 set_bit(STRIPE_OP_BIODRAIN, &s->ops_request); 1973 set_bit(STRIPE_OP_BIODRAIN, &s->ops_request);
1884 set_bit(STRIPE_OP_POSTXOR, &s->ops_request); 1974 set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request);
1885 1975
1886 for (i = disks; i--; ) { 1976 for (i = disks; i--; ) {
1887 struct r5dev *dev = &sh->dev[i]; 1977 struct r5dev *dev = &sh->dev[i];
@@ -1899,13 +1989,22 @@ schedule_reconstruction5(struct stripe_head *sh, struct stripe_head_state *s,
1899 } 1989 }
1900 } 1990 }
1901 1991
1902 /* keep the parity disk locked while asynchronous operations 1992 /* keep the parity disk(s) locked while asynchronous operations
1903 * are in flight 1993 * are in flight
1904 */ 1994 */
1905 set_bit(R5_LOCKED, &sh->dev[pd_idx].flags); 1995 set_bit(R5_LOCKED, &sh->dev[pd_idx].flags);
1906 clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags); 1996 clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
1907 s->locked++; 1997 s->locked++;
1908 1998
1999 if (level == 6) {
2000 int qd_idx = sh->qd_idx;
2001 struct r5dev *dev = &sh->dev[qd_idx];
2002
2003 set_bit(R5_LOCKED, &dev->flags);
2004 clear_bit(R5_UPTODATE, &dev->flags);
2005 s->locked++;
2006 }
2007
1909 pr_debug("%s: stripe %llu locked: %d ops_request: %lx\n", 2008 pr_debug("%s: stripe %llu locked: %d ops_request: %lx\n",
1910 __func__, (unsigned long long)sh->sector, 2009 __func__, (unsigned long long)sh->sector,
1911 s->locked, s->ops_request); 2010 s->locked, s->ops_request);
@@ -1986,13 +2085,6 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in
1986 2085
1987static void end_reshape(raid5_conf_t *conf); 2086static void end_reshape(raid5_conf_t *conf);
1988 2087
1989static int page_is_zero(struct page *p)
1990{
1991 char *a = page_address(p);
1992 return ((*(u32*)a) == 0 &&
1993 memcmp(a, a+4, STRIPE_SIZE-4)==0);
1994}
1995
1996static void stripe_set_idx(sector_t stripe, raid5_conf_t *conf, int previous, 2088static void stripe_set_idx(sector_t stripe, raid5_conf_t *conf, int previous,
1997 struct stripe_head *sh) 2089 struct stripe_head *sh)
1998{ 2090{
@@ -2133,9 +2225,10 @@ static int fetch_block5(struct stripe_head *sh, struct stripe_head_state *s,
2133 set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request); 2225 set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request);
2134 set_bit(R5_Wantcompute, &dev->flags); 2226 set_bit(R5_Wantcompute, &dev->flags);
2135 sh->ops.target = disk_idx; 2227 sh->ops.target = disk_idx;
2228 sh->ops.target2 = -1;
2136 s->req_compute = 1; 2229 s->req_compute = 1;
2137 /* Careful: from this point on 'uptodate' is in the eye 2230 /* Careful: from this point on 'uptodate' is in the eye
2138 * of raid5_run_ops which services 'compute' operations 2231 * of raid_run_ops which services 'compute' operations
2139 * before writes. R5_Wantcompute flags a block that will 2232 * before writes. R5_Wantcompute flags a block that will
2140 * be R5_UPTODATE by the time it is needed for a 2233 * be R5_UPTODATE by the time it is needed for a
2141 * subsequent operation. 2234 * subsequent operation.
@@ -2174,61 +2267,104 @@ static void handle_stripe_fill5(struct stripe_head *sh,
2174 set_bit(STRIPE_HANDLE, &sh->state); 2267 set_bit(STRIPE_HANDLE, &sh->state);
2175} 2268}
2176 2269
2177static void handle_stripe_fill6(struct stripe_head *sh, 2270/* fetch_block6 - checks the given member device to see if its data needs
2178 struct stripe_head_state *s, struct r6_state *r6s, 2271 * to be read or computed to satisfy a request.
2179 int disks) 2272 *
2273 * Returns 1 when no more member devices need to be checked, otherwise returns
2274 * 0 to tell the loop in handle_stripe_fill6 to continue
2275 */
2276static int fetch_block6(struct stripe_head *sh, struct stripe_head_state *s,
2277 struct r6_state *r6s, int disk_idx, int disks)
2180{ 2278{
2181 int i; 2279 struct r5dev *dev = &sh->dev[disk_idx];
2182 for (i = disks; i--; ) { 2280 struct r5dev *fdev[2] = { &sh->dev[r6s->failed_num[0]],
2183 struct r5dev *dev = &sh->dev[i]; 2281 &sh->dev[r6s->failed_num[1]] };
2184 if (!test_bit(R5_LOCKED, &dev->flags) && 2282
2185 !test_bit(R5_UPTODATE, &dev->flags) && 2283 if (!test_bit(R5_LOCKED, &dev->flags) &&
2186 (dev->toread || (dev->towrite && 2284 !test_bit(R5_UPTODATE, &dev->flags) &&
2187 !test_bit(R5_OVERWRITE, &dev->flags)) || 2285 (dev->toread ||
2188 s->syncing || s->expanding || 2286 (dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags)) ||
2189 (s->failed >= 1 && 2287 s->syncing || s->expanding ||
2190 (sh->dev[r6s->failed_num[0]].toread || 2288 (s->failed >= 1 &&
2191 s->to_write)) || 2289 (fdev[0]->toread || s->to_write)) ||
2192 (s->failed >= 2 && 2290 (s->failed >= 2 &&
2193 (sh->dev[r6s->failed_num[1]].toread || 2291 (fdev[1]->toread || s->to_write)))) {
2194 s->to_write)))) { 2292 /* we would like to get this block, possibly by computing it,
2195 /* we would like to get this block, possibly 2293 * otherwise read it if the backing disk is insync
2196 * by computing it, but we might not be able to 2294 */
2295 BUG_ON(test_bit(R5_Wantcompute, &dev->flags));
2296 BUG_ON(test_bit(R5_Wantread, &dev->flags));
2297 if ((s->uptodate == disks - 1) &&
2298 (s->failed && (disk_idx == r6s->failed_num[0] ||
2299 disk_idx == r6s->failed_num[1]))) {
2300 /* have disk failed, and we're requested to fetch it;
2301 * do compute it
2197 */ 2302 */
2198 if ((s->uptodate == disks - 1) && 2303 pr_debug("Computing stripe %llu block %d\n",
2199 (s->failed && (i == r6s->failed_num[0] || 2304 (unsigned long long)sh->sector, disk_idx);
2200 i == r6s->failed_num[1]))) { 2305 set_bit(STRIPE_COMPUTE_RUN, &sh->state);
2201 pr_debug("Computing stripe %llu block %d\n", 2306 set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request);
2202 (unsigned long long)sh->sector, i); 2307 set_bit(R5_Wantcompute, &dev->flags);
2203 compute_block_1(sh, i, 0); 2308 sh->ops.target = disk_idx;
2204 s->uptodate++; 2309 sh->ops.target2 = -1; /* no 2nd target */
2205 } else if ( s->uptodate == disks-2 && s->failed >= 2 ) { 2310 s->req_compute = 1;
2206 /* Computing 2-failure is *very* expensive; only 2311 s->uptodate++;
2207 * do it if failed >= 2 2312 return 1;
2208 */ 2313 } else if (s->uptodate == disks-2 && s->failed >= 2) {
2209 int other; 2314 /* Computing 2-failure is *very* expensive; only
2210 for (other = disks; other--; ) { 2315 * do it if failed >= 2
2211 if (other == i) 2316 */
2212 continue; 2317 int other;
2213 if (!test_bit(R5_UPTODATE, 2318 for (other = disks; other--; ) {
2214 &sh->dev[other].flags)) 2319 if (other == disk_idx)
2215 break; 2320 continue;
2216 } 2321 if (!test_bit(R5_UPTODATE,
2217 BUG_ON(other < 0); 2322 &sh->dev[other].flags))
2218 pr_debug("Computing stripe %llu blocks %d,%d\n", 2323 break;
2219 (unsigned long long)sh->sector,
2220 i, other);
2221 compute_block_2(sh, i, other);
2222 s->uptodate += 2;
2223 } else if (test_bit(R5_Insync, &dev->flags)) {
2224 set_bit(R5_LOCKED, &dev->flags);
2225 set_bit(R5_Wantread, &dev->flags);
2226 s->locked++;
2227 pr_debug("Reading block %d (sync=%d)\n",
2228 i, s->syncing);
2229 } 2324 }
2325 BUG_ON(other < 0);
2326 pr_debug("Computing stripe %llu blocks %d,%d\n",
2327 (unsigned long long)sh->sector,
2328 disk_idx, other);
2329 set_bit(STRIPE_COMPUTE_RUN, &sh->state);
2330 set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request);
2331 set_bit(R5_Wantcompute, &sh->dev[disk_idx].flags);
2332 set_bit(R5_Wantcompute, &sh->dev[other].flags);
2333 sh->ops.target = disk_idx;
2334 sh->ops.target2 = other;
2335 s->uptodate += 2;
2336 s->req_compute = 1;
2337 return 1;
2338 } else if (test_bit(R5_Insync, &dev->flags)) {
2339 set_bit(R5_LOCKED, &dev->flags);
2340 set_bit(R5_Wantread, &dev->flags);
2341 s->locked++;
2342 pr_debug("Reading block %d (sync=%d)\n",
2343 disk_idx, s->syncing);
2230 } 2344 }
2231 } 2345 }
2346
2347 return 0;
2348}
2349
2350/**
2351 * handle_stripe_fill6 - read or compute data to satisfy pending requests.
2352 */
2353static void handle_stripe_fill6(struct stripe_head *sh,
2354 struct stripe_head_state *s, struct r6_state *r6s,
2355 int disks)
2356{
2357 int i;
2358
2359 /* look for blocks to read/compute, skip this if a compute
2360 * is already in flight, or if the stripe contents are in the
2361 * midst of changing due to a write
2362 */
2363 if (!test_bit(STRIPE_COMPUTE_RUN, &sh->state) && !sh->check_state &&
2364 !sh->reconstruct_state)
2365 for (i = disks; i--; )
2366 if (fetch_block6(sh, s, r6s, i, disks))
2367 break;
2232 set_bit(STRIPE_HANDLE, &sh->state); 2368 set_bit(STRIPE_HANDLE, &sh->state);
2233} 2369}
2234 2370
@@ -2362,114 +2498,61 @@ static void handle_stripe_dirtying5(raid5_conf_t *conf,
2362 */ 2498 */
2363 /* since handle_stripe can be called at any time we need to handle the 2499 /* since handle_stripe can be called at any time we need to handle the
2364 * case where a compute block operation has been submitted and then a 2500 * case where a compute block operation has been submitted and then a
2365 * subsequent call wants to start a write request. raid5_run_ops only 2501 * subsequent call wants to start a write request. raid_run_ops only
2366 * handles the case where compute block and postxor are requested 2502 * handles the case where compute block and reconstruct are requested
2367 * simultaneously. If this is not the case then new writes need to be 2503 * simultaneously. If this is not the case then new writes need to be
2368 * held off until the compute completes. 2504 * held off until the compute completes.
2369 */ 2505 */
2370 if ((s->req_compute || !test_bit(STRIPE_COMPUTE_RUN, &sh->state)) && 2506 if ((s->req_compute || !test_bit(STRIPE_COMPUTE_RUN, &sh->state)) &&
2371 (s->locked == 0 && (rcw == 0 || rmw == 0) && 2507 (s->locked == 0 && (rcw == 0 || rmw == 0) &&
2372 !test_bit(STRIPE_BIT_DELAY, &sh->state))) 2508 !test_bit(STRIPE_BIT_DELAY, &sh->state)))
2373 schedule_reconstruction5(sh, s, rcw == 0, 0); 2509 schedule_reconstruction(sh, s, rcw == 0, 0);
2374} 2510}
2375 2511
2376static void handle_stripe_dirtying6(raid5_conf_t *conf, 2512static void handle_stripe_dirtying6(raid5_conf_t *conf,
2377 struct stripe_head *sh, struct stripe_head_state *s, 2513 struct stripe_head *sh, struct stripe_head_state *s,
2378 struct r6_state *r6s, int disks) 2514 struct r6_state *r6s, int disks)
2379{ 2515{
2380 int rcw = 0, must_compute = 0, pd_idx = sh->pd_idx, i; 2516 int rcw = 0, pd_idx = sh->pd_idx, i;
2381 int qd_idx = sh->qd_idx; 2517 int qd_idx = sh->qd_idx;
2518
2519 set_bit(STRIPE_HANDLE, &sh->state);
2382 for (i = disks; i--; ) { 2520 for (i = disks; i--; ) {
2383 struct r5dev *dev = &sh->dev[i]; 2521 struct r5dev *dev = &sh->dev[i];
2384 /* Would I have to read this buffer for reconstruct_write */ 2522 /* check if we haven't enough data */
2385 if (!test_bit(R5_OVERWRITE, &dev->flags) 2523 if (!test_bit(R5_OVERWRITE, &dev->flags) &&
2386 && i != pd_idx && i != qd_idx 2524 i != pd_idx && i != qd_idx &&
2387 && (!test_bit(R5_LOCKED, &dev->flags) 2525 !test_bit(R5_LOCKED, &dev->flags) &&
2388 ) && 2526 !(test_bit(R5_UPTODATE, &dev->flags) ||
2389 !test_bit(R5_UPTODATE, &dev->flags)) { 2527 test_bit(R5_Wantcompute, &dev->flags))) {
2390 if (test_bit(R5_Insync, &dev->flags)) rcw++; 2528 rcw++;
2391 else { 2529 if (!test_bit(R5_Insync, &dev->flags))
2392 pr_debug("raid6: must_compute: " 2530 continue; /* it's a failed drive */
2393 "disk %d flags=%#lx\n", i, dev->flags); 2531
2394 must_compute++; 2532 if (
2533 test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
2534 pr_debug("Read_old stripe %llu "
2535 "block %d for Reconstruct\n",
2536 (unsigned long long)sh->sector, i);
2537 set_bit(R5_LOCKED, &dev->flags);
2538 set_bit(R5_Wantread, &dev->flags);
2539 s->locked++;
2540 } else {
2541 pr_debug("Request delayed stripe %llu "
2542 "block %d for Reconstruct\n",
2543 (unsigned long long)sh->sector, i);
2544 set_bit(STRIPE_DELAYED, &sh->state);
2545 set_bit(STRIPE_HANDLE, &sh->state);
2395 } 2546 }
2396 } 2547 }
2397 } 2548 }
2398 pr_debug("for sector %llu, rcw=%d, must_compute=%d\n",
2399 (unsigned long long)sh->sector, rcw, must_compute);
2400 set_bit(STRIPE_HANDLE, &sh->state);
2401
2402 if (rcw > 0)
2403 /* want reconstruct write, but need to get some data */
2404 for (i = disks; i--; ) {
2405 struct r5dev *dev = &sh->dev[i];
2406 if (!test_bit(R5_OVERWRITE, &dev->flags)
2407 && !(s->failed == 0 && (i == pd_idx || i == qd_idx))
2408 && !test_bit(R5_LOCKED, &dev->flags) &&
2409 !test_bit(R5_UPTODATE, &dev->flags) &&
2410 test_bit(R5_Insync, &dev->flags)) {
2411 if (
2412 test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
2413 pr_debug("Read_old stripe %llu "
2414 "block %d for Reconstruct\n",
2415 (unsigned long long)sh->sector, i);
2416 set_bit(R5_LOCKED, &dev->flags);
2417 set_bit(R5_Wantread, &dev->flags);
2418 s->locked++;
2419 } else {
2420 pr_debug("Request delayed stripe %llu "
2421 "block %d for Reconstruct\n",
2422 (unsigned long long)sh->sector, i);
2423 set_bit(STRIPE_DELAYED, &sh->state);
2424 set_bit(STRIPE_HANDLE, &sh->state);
2425 }
2426 }
2427 }
2428 /* now if nothing is locked, and if we have enough data, we can start a 2549 /* now if nothing is locked, and if we have enough data, we can start a
2429 * write request 2550 * write request
2430 */ 2551 */
2431 if (s->locked == 0 && rcw == 0 && 2552 if ((s->req_compute || !test_bit(STRIPE_COMPUTE_RUN, &sh->state)) &&
2553 s->locked == 0 && rcw == 0 &&
2432 !test_bit(STRIPE_BIT_DELAY, &sh->state)) { 2554 !test_bit(STRIPE_BIT_DELAY, &sh->state)) {
2433 if (must_compute > 0) { 2555 schedule_reconstruction(sh, s, 1, 0);
2434 /* We have failed blocks and need to compute them */
2435 switch (s->failed) {
2436 case 0:
2437 BUG();
2438 case 1:
2439 compute_block_1(sh, r6s->failed_num[0], 0);
2440 break;
2441 case 2:
2442 compute_block_2(sh, r6s->failed_num[0],
2443 r6s->failed_num[1]);
2444 break;
2445 default: /* This request should have been failed? */
2446 BUG();
2447 }
2448 }
2449
2450 pr_debug("Computing parity for stripe %llu\n",
2451 (unsigned long long)sh->sector);
2452 compute_parity6(sh, RECONSTRUCT_WRITE);
2453 /* now every locked buffer is ready to be written */
2454 for (i = disks; i--; )
2455 if (test_bit(R5_LOCKED, &sh->dev[i].flags)) {
2456 pr_debug("Writing stripe %llu block %d\n",
2457 (unsigned long long)sh->sector, i);
2458 s->locked++;
2459 set_bit(R5_Wantwrite, &sh->dev[i].flags);
2460 }
2461 if (s->locked == disks)
2462 if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state))
2463 atomic_inc(&conf->pending_full_writes);
2464 /* after a RECONSTRUCT_WRITE, the stripe MUST be in-sync */
2465 set_bit(STRIPE_INSYNC, &sh->state);
2466
2467 if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
2468 atomic_dec(&conf->preread_active_stripes);
2469 if (atomic_read(&conf->preread_active_stripes) <
2470 IO_THRESHOLD)
2471 md_wakeup_thread(conf->mddev->thread);
2472 }
2473 } 2556 }
2474} 2557}
2475 2558
@@ -2528,7 +2611,7 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh,
2528 * we are done. Otherwise update the mismatch count and repair 2611 * we are done. Otherwise update the mismatch count and repair
2529 * parity if !MD_RECOVERY_CHECK 2612 * parity if !MD_RECOVERY_CHECK
2530 */ 2613 */
2531 if (sh->ops.zero_sum_result == 0) 2614 if ((sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) == 0)
2532 /* parity is correct (on disc, 2615 /* parity is correct (on disc,
2533 * not in buffer any more) 2616 * not in buffer any more)
2534 */ 2617 */
@@ -2545,6 +2628,7 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh,
2545 set_bit(R5_Wantcompute, 2628 set_bit(R5_Wantcompute,
2546 &sh->dev[sh->pd_idx].flags); 2629 &sh->dev[sh->pd_idx].flags);
2547 sh->ops.target = sh->pd_idx; 2630 sh->ops.target = sh->pd_idx;
2631 sh->ops.target2 = -1;
2548 s->uptodate++; 2632 s->uptodate++;
2549 } 2633 }
2550 } 2634 }
@@ -2561,67 +2645,74 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh,
2561 2645
2562 2646
2563static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh, 2647static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh,
2564 struct stripe_head_state *s, 2648 struct stripe_head_state *s,
2565 struct r6_state *r6s, struct page *tmp_page, 2649 struct r6_state *r6s, int disks)
2566 int disks)
2567{ 2650{
2568 int update_p = 0, update_q = 0;
2569 struct r5dev *dev;
2570 int pd_idx = sh->pd_idx; 2651 int pd_idx = sh->pd_idx;
2571 int qd_idx = sh->qd_idx; 2652 int qd_idx = sh->qd_idx;
2653 struct r5dev *dev;
2572 2654
2573 set_bit(STRIPE_HANDLE, &sh->state); 2655 set_bit(STRIPE_HANDLE, &sh->state);
2574 2656
2575 BUG_ON(s->failed > 2); 2657 BUG_ON(s->failed > 2);
2576 BUG_ON(s->uptodate < disks); 2658
2577 /* Want to check and possibly repair P and Q. 2659 /* Want to check and possibly repair P and Q.
2578 * However there could be one 'failed' device, in which 2660 * However there could be one 'failed' device, in which
2579 * case we can only check one of them, possibly using the 2661 * case we can only check one of them, possibly using the
2580 * other to generate missing data 2662 * other to generate missing data
2581 */ 2663 */
2582 2664
2583 /* If !tmp_page, we cannot do the calculations, 2665 switch (sh->check_state) {
2584 * but as we have set STRIPE_HANDLE, we will soon be called 2666 case check_state_idle:
2585 * by stripe_handle with a tmp_page - just wait until then. 2667 /* start a new check operation if there are < 2 failures */
2586 */
2587 if (tmp_page) {
2588 if (s->failed == r6s->q_failed) { 2668 if (s->failed == r6s->q_failed) {
2589 /* The only possible failed device holds 'Q', so it 2669 /* The only possible failed device holds Q, so it
2590 * makes sense to check P (If anything else were failed, 2670 * makes sense to check P (If anything else were failed,
2591 * we would have used P to recreate it). 2671 * we would have used P to recreate it).
2592 */ 2672 */
2593 compute_block_1(sh, pd_idx, 1); 2673 sh->check_state = check_state_run;
2594 if (!page_is_zero(sh->dev[pd_idx].page)) {
2595 compute_block_1(sh, pd_idx, 0);
2596 update_p = 1;
2597 }
2598 } 2674 }
2599 if (!r6s->q_failed && s->failed < 2) { 2675 if (!r6s->q_failed && s->failed < 2) {
2600 /* q is not failed, and we didn't use it to generate 2676 /* Q is not failed, and we didn't use it to generate
2601 * anything, so it makes sense to check it 2677 * anything, so it makes sense to check it
2602 */ 2678 */
2603 memcpy(page_address(tmp_page), 2679 if (sh->check_state == check_state_run)
2604 page_address(sh->dev[qd_idx].page), 2680 sh->check_state = check_state_run_pq;
2605 STRIPE_SIZE); 2681 else
2606 compute_parity6(sh, UPDATE_PARITY); 2682 sh->check_state = check_state_run_q;
2607 if (memcmp(page_address(tmp_page),
2608 page_address(sh->dev[qd_idx].page),
2609 STRIPE_SIZE) != 0) {
2610 clear_bit(STRIPE_INSYNC, &sh->state);
2611 update_q = 1;
2612 }
2613 } 2683 }
2614 if (update_p || update_q) { 2684
2615 conf->mddev->resync_mismatches += STRIPE_SECTORS; 2685 /* discard potentially stale zero_sum_result */
2616 if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery)) 2686 sh->ops.zero_sum_result = 0;
2617 /* don't try to repair!! */ 2687
2618 update_p = update_q = 0; 2688 if (sh->check_state == check_state_run) {
2689 /* async_xor_zero_sum destroys the contents of P */
2690 clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
2691 s->uptodate--;
2619 } 2692 }
2693 if (sh->check_state >= check_state_run &&
2694 sh->check_state <= check_state_run_pq) {
2695 /* async_syndrome_zero_sum preserves P and Q, so
2696 * no need to mark them !uptodate here
2697 */
2698 set_bit(STRIPE_OP_CHECK, &s->ops_request);
2699 break;
2700 }
2701
2702 /* we have 2-disk failure */
2703 BUG_ON(s->failed != 2);
2704 /* fall through */
2705 case check_state_compute_result:
2706 sh->check_state = check_state_idle;
2707
2708 /* check that a write has not made the stripe insync */
2709 if (test_bit(STRIPE_INSYNC, &sh->state))
2710 break;
2620 2711
2621 /* now write out any block on a failed drive, 2712 /* now write out any block on a failed drive,
2622 * or P or Q if they need it 2713 * or P or Q if they were recomputed
2623 */ 2714 */
2624 2715 BUG_ON(s->uptodate < disks - 1); /* We don't need Q to recover */
2625 if (s->failed == 2) { 2716 if (s->failed == 2) {
2626 dev = &sh->dev[r6s->failed_num[1]]; 2717 dev = &sh->dev[r6s->failed_num[1]];
2627 s->locked++; 2718 s->locked++;
@@ -2634,14 +2725,13 @@ static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh,
2634 set_bit(R5_LOCKED, &dev->flags); 2725 set_bit(R5_LOCKED, &dev->flags);
2635 set_bit(R5_Wantwrite, &dev->flags); 2726 set_bit(R5_Wantwrite, &dev->flags);
2636 } 2727 }
2637 2728 if (sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) {
2638 if (update_p) {
2639 dev = &sh->dev[pd_idx]; 2729 dev = &sh->dev[pd_idx];
2640 s->locked++; 2730 s->locked++;
2641 set_bit(R5_LOCKED, &dev->flags); 2731 set_bit(R5_LOCKED, &dev->flags);
2642 set_bit(R5_Wantwrite, &dev->flags); 2732 set_bit(R5_Wantwrite, &dev->flags);
2643 } 2733 }
2644 if (update_q) { 2734 if (sh->ops.zero_sum_result & SUM_CHECK_Q_RESULT) {
2645 dev = &sh->dev[qd_idx]; 2735 dev = &sh->dev[qd_idx];
2646 s->locked++; 2736 s->locked++;
2647 set_bit(R5_LOCKED, &dev->flags); 2737 set_bit(R5_LOCKED, &dev->flags);
@@ -2650,6 +2740,70 @@ static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh,
2650 clear_bit(STRIPE_DEGRADED, &sh->state); 2740 clear_bit(STRIPE_DEGRADED, &sh->state);
2651 2741
2652 set_bit(STRIPE_INSYNC, &sh->state); 2742 set_bit(STRIPE_INSYNC, &sh->state);
2743 break;
2744 case check_state_run:
2745 case check_state_run_q:
2746 case check_state_run_pq:
2747 break; /* we will be called again upon completion */
2748 case check_state_check_result:
2749 sh->check_state = check_state_idle;
2750
2751 /* handle a successful check operation, if parity is correct
2752 * we are done. Otherwise update the mismatch count and repair
2753 * parity if !MD_RECOVERY_CHECK
2754 */
2755 if (sh->ops.zero_sum_result == 0) {
2756 /* both parities are correct */
2757 if (!s->failed)
2758 set_bit(STRIPE_INSYNC, &sh->state);
2759 else {
2760 /* in contrast to the raid5 case we can validate
2761 * parity, but still have a failure to write
2762 * back
2763 */
2764 sh->check_state = check_state_compute_result;
2765 /* Returning at this point means that we may go
2766 * off and bring p and/or q uptodate again so
2767 * we make sure to check zero_sum_result again
2768 * to verify if p or q need writeback
2769 */
2770 }
2771 } else {
2772 conf->mddev->resync_mismatches += STRIPE_SECTORS;
2773 if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery))
2774 /* don't try to repair!! */
2775 set_bit(STRIPE_INSYNC, &sh->state);
2776 else {
2777 int *target = &sh->ops.target;
2778
2779 sh->ops.target = -1;
2780 sh->ops.target2 = -1;
2781 sh->check_state = check_state_compute_run;
2782 set_bit(STRIPE_COMPUTE_RUN, &sh->state);
2783 set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request);
2784 if (sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) {
2785 set_bit(R5_Wantcompute,
2786 &sh->dev[pd_idx].flags);
2787 *target = pd_idx;
2788 target = &sh->ops.target2;
2789 s->uptodate++;
2790 }
2791 if (sh->ops.zero_sum_result & SUM_CHECK_Q_RESULT) {
2792 set_bit(R5_Wantcompute,
2793 &sh->dev[qd_idx].flags);
2794 *target = qd_idx;
2795 s->uptodate++;
2796 }
2797 }
2798 }
2799 break;
2800 case check_state_compute_run:
2801 break;
2802 default:
2803 printk(KERN_ERR "%s: unknown check_state: %d sector: %llu\n",
2804 __func__, sh->check_state,
2805 (unsigned long long) sh->sector);
2806 BUG();
2653 } 2807 }
2654} 2808}
2655 2809
@@ -2667,6 +2821,7 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh,
2667 if (i != sh->pd_idx && i != sh->qd_idx) { 2821 if (i != sh->pd_idx && i != sh->qd_idx) {
2668 int dd_idx, j; 2822 int dd_idx, j;
2669 struct stripe_head *sh2; 2823 struct stripe_head *sh2;
2824 struct async_submit_ctl submit;
2670 2825
2671 sector_t bn = compute_blocknr(sh, i, 1); 2826 sector_t bn = compute_blocknr(sh, i, 1);
2672 sector_t s = raid5_compute_sector(conf, bn, 0, 2827 sector_t s = raid5_compute_sector(conf, bn, 0,
@@ -2686,9 +2841,10 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh,
2686 } 2841 }
2687 2842
2688 /* place all the copies on one channel */ 2843 /* place all the copies on one channel */
2844 init_async_submit(&submit, 0, tx, NULL, NULL, NULL);
2689 tx = async_memcpy(sh2->dev[dd_idx].page, 2845 tx = async_memcpy(sh2->dev[dd_idx].page,
2690 sh->dev[i].page, 0, 0, STRIPE_SIZE, 2846 sh->dev[i].page, 0, 0, STRIPE_SIZE,
2691 ASYNC_TX_DEP_ACK, tx, NULL, NULL); 2847 &submit);
2692 2848
2693 set_bit(R5_Expanded, &sh2->dev[dd_idx].flags); 2849 set_bit(R5_Expanded, &sh2->dev[dd_idx].flags);
2694 set_bit(R5_UPTODATE, &sh2->dev[dd_idx].flags); 2850 set_bit(R5_UPTODATE, &sh2->dev[dd_idx].flags);
@@ -2974,7 +3130,7 @@ static bool handle_stripe5(struct stripe_head *sh)
2974 /* Need to write out all blocks after computing parity */ 3130 /* Need to write out all blocks after computing parity */
2975 sh->disks = conf->raid_disks; 3131 sh->disks = conf->raid_disks;
2976 stripe_set_idx(sh->sector, conf, 0, sh); 3132 stripe_set_idx(sh->sector, conf, 0, sh);
2977 schedule_reconstruction5(sh, &s, 1, 1); 3133 schedule_reconstruction(sh, &s, 1, 1);
2978 } else if (s.expanded && !sh->reconstruct_state && s.locked == 0) { 3134 } else if (s.expanded && !sh->reconstruct_state && s.locked == 0) {
2979 clear_bit(STRIPE_EXPAND_READY, &sh->state); 3135 clear_bit(STRIPE_EXPAND_READY, &sh->state);
2980 atomic_dec(&conf->reshape_stripes); 3136 atomic_dec(&conf->reshape_stripes);
@@ -2994,7 +3150,7 @@ static bool handle_stripe5(struct stripe_head *sh)
2994 md_wait_for_blocked_rdev(blocked_rdev, conf->mddev); 3150 md_wait_for_blocked_rdev(blocked_rdev, conf->mddev);
2995 3151
2996 if (s.ops_request) 3152 if (s.ops_request)
2997 raid5_run_ops(sh, s.ops_request); 3153 raid_run_ops(sh, s.ops_request);
2998 3154
2999 ops_run_io(sh, &s); 3155 ops_run_io(sh, &s);
3000 3156
@@ -3003,7 +3159,7 @@ static bool handle_stripe5(struct stripe_head *sh)
3003 return blocked_rdev == NULL; 3159 return blocked_rdev == NULL;
3004} 3160}
3005 3161
3006static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page) 3162static bool handle_stripe6(struct stripe_head *sh)
3007{ 3163{
3008 raid5_conf_t *conf = sh->raid_conf; 3164 raid5_conf_t *conf = sh->raid_conf;
3009 int disks = sh->disks; 3165 int disks = sh->disks;
@@ -3015,9 +3171,10 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
3015 mdk_rdev_t *blocked_rdev = NULL; 3171 mdk_rdev_t *blocked_rdev = NULL;
3016 3172
3017 pr_debug("handling stripe %llu, state=%#lx cnt=%d, " 3173 pr_debug("handling stripe %llu, state=%#lx cnt=%d, "
3018 "pd_idx=%d, qd_idx=%d\n", 3174 "pd_idx=%d, qd_idx=%d\n, check:%d, reconstruct:%d\n",
3019 (unsigned long long)sh->sector, sh->state, 3175 (unsigned long long)sh->sector, sh->state,
3020 atomic_read(&sh->count), pd_idx, qd_idx); 3176 atomic_read(&sh->count), pd_idx, qd_idx,
3177 sh->check_state, sh->reconstruct_state);
3021 memset(&s, 0, sizeof(s)); 3178 memset(&s, 0, sizeof(s));
3022 3179
3023 spin_lock(&sh->lock); 3180 spin_lock(&sh->lock);
@@ -3037,35 +3194,24 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
3037 3194
3038 pr_debug("check %d: state 0x%lx read %p write %p written %p\n", 3195 pr_debug("check %d: state 0x%lx read %p write %p written %p\n",
3039 i, dev->flags, dev->toread, dev->towrite, dev->written); 3196 i, dev->flags, dev->toread, dev->towrite, dev->written);
3040 /* maybe we can reply to a read */ 3197 /* maybe we can reply to a read
3041 if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread) { 3198 *
3042 struct bio *rbi, *rbi2; 3199 * new wantfill requests are only permitted while
3043 pr_debug("Return read for disc %d\n", i); 3200 * ops_complete_biofill is guaranteed to be inactive
3044 spin_lock_irq(&conf->device_lock); 3201 */
3045 rbi = dev->toread; 3202 if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread &&
3046 dev->toread = NULL; 3203 !test_bit(STRIPE_BIOFILL_RUN, &sh->state))
3047 if (test_and_clear_bit(R5_Overlap, &dev->flags)) 3204 set_bit(R5_Wantfill, &dev->flags);
3048 wake_up(&conf->wait_for_overlap);
3049 spin_unlock_irq(&conf->device_lock);
3050 while (rbi && rbi->bi_sector < dev->sector + STRIPE_SECTORS) {
3051 copy_data(0, rbi, dev->page, dev->sector);
3052 rbi2 = r5_next_bio(rbi, dev->sector);
3053 spin_lock_irq(&conf->device_lock);
3054 if (!raid5_dec_bi_phys_segments(rbi)) {
3055 rbi->bi_next = return_bi;
3056 return_bi = rbi;
3057 }
3058 spin_unlock_irq(&conf->device_lock);
3059 rbi = rbi2;
3060 }
3061 }
3062 3205
3063 /* now count some things */ 3206 /* now count some things */
3064 if (test_bit(R5_LOCKED, &dev->flags)) s.locked++; 3207 if (test_bit(R5_LOCKED, &dev->flags)) s.locked++;
3065 if (test_bit(R5_UPTODATE, &dev->flags)) s.uptodate++; 3208 if (test_bit(R5_UPTODATE, &dev->flags)) s.uptodate++;
3209 if (test_bit(R5_Wantcompute, &dev->flags))
3210 BUG_ON(++s.compute > 2);
3066 3211
3067 3212 if (test_bit(R5_Wantfill, &dev->flags)) {
3068 if (dev->toread) 3213 s.to_fill++;
3214 } else if (dev->toread)
3069 s.to_read++; 3215 s.to_read++;
3070 if (dev->towrite) { 3216 if (dev->towrite) {
3071 s.to_write++; 3217 s.to_write++;
@@ -3106,6 +3252,11 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
3106 blocked_rdev = NULL; 3252 blocked_rdev = NULL;
3107 } 3253 }
3108 3254
3255 if (s.to_fill && !test_bit(STRIPE_BIOFILL_RUN, &sh->state)) {
3256 set_bit(STRIPE_OP_BIOFILL, &s.ops_request);
3257 set_bit(STRIPE_BIOFILL_RUN, &sh->state);
3258 }
3259
3109 pr_debug("locked=%d uptodate=%d to_read=%d" 3260 pr_debug("locked=%d uptodate=%d to_read=%d"
3110 " to_write=%d failed=%d failed_num=%d,%d\n", 3261 " to_write=%d failed=%d failed_num=%d,%d\n",
3111 s.locked, s.uptodate, s.to_read, s.to_write, s.failed, 3262 s.locked, s.uptodate, s.to_read, s.to_write, s.failed,
@@ -3146,19 +3297,62 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
3146 * or to load a block that is being partially written. 3297 * or to load a block that is being partially written.
3147 */ 3298 */
3148 if (s.to_read || s.non_overwrite || (s.to_write && s.failed) || 3299 if (s.to_read || s.non_overwrite || (s.to_write && s.failed) ||
3149 (s.syncing && (s.uptodate < disks)) || s.expanding) 3300 (s.syncing && (s.uptodate + s.compute < disks)) || s.expanding)
3150 handle_stripe_fill6(sh, &s, &r6s, disks); 3301 handle_stripe_fill6(sh, &s, &r6s, disks);
3151 3302
3152 /* now to consider writing and what else, if anything should be read */ 3303 /* Now we check to see if any write operations have recently
3153 if (s.to_write) 3304 * completed
3305 */
3306 if (sh->reconstruct_state == reconstruct_state_drain_result) {
3307 int qd_idx = sh->qd_idx;
3308
3309 sh->reconstruct_state = reconstruct_state_idle;
3310 /* All the 'written' buffers and the parity blocks are ready to
3311 * be written back to disk
3312 */
3313 BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags));
3314 BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[qd_idx].flags));
3315 for (i = disks; i--; ) {
3316 dev = &sh->dev[i];
3317 if (test_bit(R5_LOCKED, &dev->flags) &&
3318 (i == sh->pd_idx || i == qd_idx ||
3319 dev->written)) {
3320 pr_debug("Writing block %d\n", i);
3321 BUG_ON(!test_bit(R5_UPTODATE, &dev->flags));
3322 set_bit(R5_Wantwrite, &dev->flags);
3323 if (!test_bit(R5_Insync, &dev->flags) ||
3324 ((i == sh->pd_idx || i == qd_idx) &&
3325 s.failed == 0))
3326 set_bit(STRIPE_INSYNC, &sh->state);
3327 }
3328 }
3329 if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
3330 atomic_dec(&conf->preread_active_stripes);
3331 if (atomic_read(&conf->preread_active_stripes) <
3332 IO_THRESHOLD)
3333 md_wakeup_thread(conf->mddev->thread);
3334 }
3335 }
3336
3337 /* Now to consider new write requests and what else, if anything
3338 * should be read. We do not handle new writes when:
3339 * 1/ A 'write' operation (copy+gen_syndrome) is already in flight.
3340 * 2/ A 'check' operation is in flight, as it may clobber the parity
3341 * block.
3342 */
3343 if (s.to_write && !sh->reconstruct_state && !sh->check_state)
3154 handle_stripe_dirtying6(conf, sh, &s, &r6s, disks); 3344 handle_stripe_dirtying6(conf, sh, &s, &r6s, disks);
3155 3345
3156 /* maybe we need to check and possibly fix the parity for this stripe 3346 /* maybe we need to check and possibly fix the parity for this stripe
3157 * Any reads will already have been scheduled, so we just see if enough 3347 * Any reads will already have been scheduled, so we just see if enough
3158 * data is available 3348 * data is available. The parity check is held off while parity
3349 * dependent operations are in flight.
3159 */ 3350 */
3160 if (s.syncing && s.locked == 0 && !test_bit(STRIPE_INSYNC, &sh->state)) 3351 if (sh->check_state ||
3161 handle_parity_checks6(conf, sh, &s, &r6s, tmp_page, disks); 3352 (s.syncing && s.locked == 0 &&
3353 !test_bit(STRIPE_COMPUTE_RUN, &sh->state) &&
3354 !test_bit(STRIPE_INSYNC, &sh->state)))
3355 handle_parity_checks6(conf, sh, &s, &r6s, disks);
3162 3356
3163 if (s.syncing && s.locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) { 3357 if (s.syncing && s.locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) {
3164 md_done_sync(conf->mddev, STRIPE_SECTORS,1); 3358 md_done_sync(conf->mddev, STRIPE_SECTORS,1);
@@ -3179,15 +3373,29 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
3179 set_bit(R5_Wantwrite, &dev->flags); 3373 set_bit(R5_Wantwrite, &dev->flags);
3180 set_bit(R5_ReWrite, &dev->flags); 3374 set_bit(R5_ReWrite, &dev->flags);
3181 set_bit(R5_LOCKED, &dev->flags); 3375 set_bit(R5_LOCKED, &dev->flags);
3376 s.locked++;
3182 } else { 3377 } else {
3183 /* let's read it back */ 3378 /* let's read it back */
3184 set_bit(R5_Wantread, &dev->flags); 3379 set_bit(R5_Wantread, &dev->flags);
3185 set_bit(R5_LOCKED, &dev->flags); 3380 set_bit(R5_LOCKED, &dev->flags);
3381 s.locked++;
3186 } 3382 }
3187 } 3383 }
3188 } 3384 }
3189 3385
3190 if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state)) { 3386 /* Finish reconstruct operations initiated by the expansion process */
3387 if (sh->reconstruct_state == reconstruct_state_result) {
3388 sh->reconstruct_state = reconstruct_state_idle;
3389 clear_bit(STRIPE_EXPANDING, &sh->state);
3390 for (i = conf->raid_disks; i--; ) {
3391 set_bit(R5_Wantwrite, &sh->dev[i].flags);
3392 set_bit(R5_LOCKED, &sh->dev[i].flags);
3393 s.locked++;
3394 }
3395 }
3396
3397 if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state) &&
3398 !sh->reconstruct_state) {
3191 struct stripe_head *sh2 3399 struct stripe_head *sh2
3192 = get_active_stripe(conf, sh->sector, 1, 1, 1); 3400 = get_active_stripe(conf, sh->sector, 1, 1, 1);
3193 if (sh2 && test_bit(STRIPE_EXPAND_SOURCE, &sh2->state)) { 3401 if (sh2 && test_bit(STRIPE_EXPAND_SOURCE, &sh2->state)) {
@@ -3208,14 +3416,8 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
3208 /* Need to write out all blocks after computing P&Q */ 3416 /* Need to write out all blocks after computing P&Q */
3209 sh->disks = conf->raid_disks; 3417 sh->disks = conf->raid_disks;
3210 stripe_set_idx(sh->sector, conf, 0, sh); 3418 stripe_set_idx(sh->sector, conf, 0, sh);
3211 compute_parity6(sh, RECONSTRUCT_WRITE); 3419 schedule_reconstruction(sh, &s, 1, 1);
3212 for (i = conf->raid_disks ; i-- ; ) { 3420 } else if (s.expanded && !sh->reconstruct_state && s.locked == 0) {
3213 set_bit(R5_LOCKED, &sh->dev[i].flags);
3214 s.locked++;
3215 set_bit(R5_Wantwrite, &sh->dev[i].flags);
3216 }
3217 clear_bit(STRIPE_EXPANDING, &sh->state);
3218 } else if (s.expanded) {
3219 clear_bit(STRIPE_EXPAND_READY, &sh->state); 3421 clear_bit(STRIPE_EXPAND_READY, &sh->state);
3220 atomic_dec(&conf->reshape_stripes); 3422 atomic_dec(&conf->reshape_stripes);
3221 wake_up(&conf->wait_for_overlap); 3423 wake_up(&conf->wait_for_overlap);
@@ -3233,6 +3435,9 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
3233 if (unlikely(blocked_rdev)) 3435 if (unlikely(blocked_rdev))
3234 md_wait_for_blocked_rdev(blocked_rdev, conf->mddev); 3436 md_wait_for_blocked_rdev(blocked_rdev, conf->mddev);
3235 3437
3438 if (s.ops_request)
3439 raid_run_ops(sh, s.ops_request);
3440
3236 ops_run_io(sh, &s); 3441 ops_run_io(sh, &s);
3237 3442
3238 return_io(return_bi); 3443 return_io(return_bi);
@@ -3241,16 +3446,14 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
3241} 3446}
3242 3447
3243/* returns true if the stripe was handled */ 3448/* returns true if the stripe was handled */
3244static bool handle_stripe(struct stripe_head *sh, struct page *tmp_page) 3449static bool handle_stripe(struct stripe_head *sh)
3245{ 3450{
3246 if (sh->raid_conf->level == 6) 3451 if (sh->raid_conf->level == 6)
3247 return handle_stripe6(sh, tmp_page); 3452 return handle_stripe6(sh);
3248 else 3453 else
3249 return handle_stripe5(sh); 3454 return handle_stripe5(sh);
3250} 3455}
3251 3456
3252
3253
3254static void raid5_activate_delayed(raid5_conf_t *conf) 3457static void raid5_activate_delayed(raid5_conf_t *conf)
3255{ 3458{
3256 if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD) { 3459 if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD) {
@@ -4046,7 +4249,7 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski
4046 spin_unlock(&sh->lock); 4249 spin_unlock(&sh->lock);
4047 4250
4048 /* wait for any blocked device to be handled */ 4251 /* wait for any blocked device to be handled */
4049 while(unlikely(!handle_stripe(sh, NULL))) 4252 while (unlikely(!handle_stripe(sh)))
4050 ; 4253 ;
4051 release_stripe(sh); 4254 release_stripe(sh);
4052 4255
@@ -4103,7 +4306,7 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio)
4103 return handled; 4306 return handled;
4104 } 4307 }
4105 4308
4106 handle_stripe(sh, NULL); 4309 handle_stripe(sh);
4107 release_stripe(sh); 4310 release_stripe(sh);
4108 handled++; 4311 handled++;
4109 } 4312 }
@@ -4117,6 +4320,36 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio)
4117 return handled; 4320 return handled;
4118} 4321}
4119 4322
4323#ifdef CONFIG_MULTICORE_RAID456
4324static void __process_stripe(void *param, async_cookie_t cookie)
4325{
4326 struct stripe_head *sh = param;
4327
4328 handle_stripe(sh);
4329 release_stripe(sh);
4330}
4331
4332static void process_stripe(struct stripe_head *sh, struct list_head *domain)
4333{
4334 async_schedule_domain(__process_stripe, sh, domain);
4335}
4336
4337static void synchronize_stripe_processing(struct list_head *domain)
4338{
4339 async_synchronize_full_domain(domain);
4340}
4341#else
4342static void process_stripe(struct stripe_head *sh, struct list_head *domain)
4343{
4344 handle_stripe(sh);
4345 release_stripe(sh);
4346 cond_resched();
4347}
4348
4349static void synchronize_stripe_processing(struct list_head *domain)
4350{
4351}
4352#endif
4120 4353
4121 4354
4122/* 4355/*
@@ -4131,6 +4364,7 @@ static void raid5d(mddev_t *mddev)
4131 struct stripe_head *sh; 4364 struct stripe_head *sh;
4132 raid5_conf_t *conf = mddev_to_conf(mddev); 4365 raid5_conf_t *conf = mddev_to_conf(mddev);
4133 int handled; 4366 int handled;
4367 LIST_HEAD(raid_domain);
4134 4368
4135 pr_debug("+++ raid5d active\n"); 4369 pr_debug("+++ raid5d active\n");
4136 4370
@@ -4167,8 +4401,7 @@ static void raid5d(mddev_t *mddev)
4167 spin_unlock_irq(&conf->device_lock); 4401 spin_unlock_irq(&conf->device_lock);
4168 4402
4169 handled++; 4403 handled++;
4170 handle_stripe(sh, conf->spare_page); 4404 process_stripe(sh, &raid_domain);
4171 release_stripe(sh);
4172 4405
4173 spin_lock_irq(&conf->device_lock); 4406 spin_lock_irq(&conf->device_lock);
4174 } 4407 }
@@ -4176,6 +4409,7 @@ static void raid5d(mddev_t *mddev)
4176 4409
4177 spin_unlock_irq(&conf->device_lock); 4410 spin_unlock_irq(&conf->device_lock);
4178 4411
4412 synchronize_stripe_processing(&raid_domain);
4179 async_tx_issue_pending_all(); 4413 async_tx_issue_pending_all();
4180 unplug_slaves(mddev); 4414 unplug_slaves(mddev);
4181 4415
@@ -4308,6 +4542,118 @@ raid5_size(mddev_t *mddev, sector_t sectors, int raid_disks)
4308 return sectors * (raid_disks - conf->max_degraded); 4542 return sectors * (raid_disks - conf->max_degraded);
4309} 4543}
4310 4544
4545static void raid5_free_percpu(raid5_conf_t *conf)
4546{
4547 struct raid5_percpu *percpu;
4548 unsigned long cpu;
4549
4550 if (!conf->percpu)
4551 return;
4552
4553 get_online_cpus();
4554 for_each_possible_cpu(cpu) {
4555 percpu = per_cpu_ptr(conf->percpu, cpu);
4556 safe_put_page(percpu->spare_page);
4557 kfree(percpu->scribble);
4558 }
4559#ifdef CONFIG_HOTPLUG_CPU
4560 unregister_cpu_notifier(&conf->cpu_notify);
4561#endif
4562 put_online_cpus();
4563
4564 free_percpu(conf->percpu);
4565}
4566
4567static void free_conf(raid5_conf_t *conf)
4568{
4569 shrink_stripes(conf);
4570 raid5_free_percpu(conf);
4571 kfree(conf->disks);
4572 kfree(conf->stripe_hashtbl);
4573 kfree(conf);
4574}
4575
4576#ifdef CONFIG_HOTPLUG_CPU
4577static int raid456_cpu_notify(struct notifier_block *nfb, unsigned long action,
4578 void *hcpu)
4579{
4580 raid5_conf_t *conf = container_of(nfb, raid5_conf_t, cpu_notify);
4581 long cpu = (long)hcpu;
4582 struct raid5_percpu *percpu = per_cpu_ptr(conf->percpu, cpu);
4583
4584 switch (action) {
4585 case CPU_UP_PREPARE:
4586 case CPU_UP_PREPARE_FROZEN:
4587 if (conf->level == 6 && !percpu->spare_page)
4588 percpu->spare_page = alloc_page(GFP_KERNEL);
4589 if (!percpu->scribble)
4590 percpu->scribble = kmalloc(conf->scribble_len, GFP_KERNEL);
4591
4592 if (!percpu->scribble ||
4593 (conf->level == 6 && !percpu->spare_page)) {
4594 safe_put_page(percpu->spare_page);
4595 kfree(percpu->scribble);
4596 pr_err("%s: failed memory allocation for cpu%ld\n",
4597 __func__, cpu);
4598 return NOTIFY_BAD;
4599 }
4600 break;
4601 case CPU_DEAD:
4602 case CPU_DEAD_FROZEN:
4603 safe_put_page(percpu->spare_page);
4604 kfree(percpu->scribble);
4605 percpu->spare_page = NULL;
4606 percpu->scribble = NULL;
4607 break;
4608 default:
4609 break;
4610 }
4611 return NOTIFY_OK;
4612}
4613#endif
4614
4615static int raid5_alloc_percpu(raid5_conf_t *conf)
4616{
4617 unsigned long cpu;
4618 struct page *spare_page;
4619 struct raid5_percpu *allcpus;
4620 void *scribble;
4621 int err;
4622
4623 allcpus = alloc_percpu(struct raid5_percpu);
4624 if (!allcpus)
4625 return -ENOMEM;
4626 conf->percpu = allcpus;
4627
4628 get_online_cpus();
4629 err = 0;
4630 for_each_present_cpu(cpu) {
4631 if (conf->level == 6) {
4632 spare_page = alloc_page(GFP_KERNEL);
4633 if (!spare_page) {
4634 err = -ENOMEM;
4635 break;
4636 }
4637 per_cpu_ptr(conf->percpu, cpu)->spare_page = spare_page;
4638 }
4639 scribble = kmalloc(scribble_len(conf->raid_disks), GFP_KERNEL);
4640 if (!scribble) {
4641 err = -ENOMEM;
4642 break;
4643 }
4644 per_cpu_ptr(conf->percpu, cpu)->scribble = scribble;
4645 }
4646#ifdef CONFIG_HOTPLUG_CPU
4647 conf->cpu_notify.notifier_call = raid456_cpu_notify;
4648 conf->cpu_notify.priority = 0;
4649 if (err == 0)
4650 err = register_cpu_notifier(&conf->cpu_notify);
4651#endif
4652 put_online_cpus();
4653
4654 return err;
4655}
4656
4311static raid5_conf_t *setup_conf(mddev_t *mddev) 4657static raid5_conf_t *setup_conf(mddev_t *mddev)
4312{ 4658{
4313 raid5_conf_t *conf; 4659 raid5_conf_t *conf;
@@ -4347,6 +4693,7 @@ static raid5_conf_t *setup_conf(mddev_t *mddev)
4347 goto abort; 4693 goto abort;
4348 4694
4349 conf->raid_disks = mddev->raid_disks; 4695 conf->raid_disks = mddev->raid_disks;
4696 conf->scribble_len = scribble_len(conf->raid_disks);
4350 if (mddev->reshape_position == MaxSector) 4697 if (mddev->reshape_position == MaxSector)
4351 conf->previous_raid_disks = mddev->raid_disks; 4698 conf->previous_raid_disks = mddev->raid_disks;
4352 else 4699 else
@@ -4362,11 +4709,10 @@ static raid5_conf_t *setup_conf(mddev_t *mddev)
4362 if ((conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL) 4709 if ((conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL)
4363 goto abort; 4710 goto abort;
4364 4711
4365 if (mddev->new_level == 6) { 4712 conf->level = mddev->new_level;
4366 conf->spare_page = alloc_page(GFP_KERNEL); 4713 if (raid5_alloc_percpu(conf) != 0)
4367 if (!conf->spare_page) 4714 goto abort;
4368 goto abort; 4715
4369 }
4370 spin_lock_init(&conf->device_lock); 4716 spin_lock_init(&conf->device_lock);
4371 init_waitqueue_head(&conf->wait_for_stripe); 4717 init_waitqueue_head(&conf->wait_for_stripe);
4372 init_waitqueue_head(&conf->wait_for_overlap); 4718 init_waitqueue_head(&conf->wait_for_overlap);
@@ -4402,7 +4748,6 @@ static raid5_conf_t *setup_conf(mddev_t *mddev)
4402 } 4748 }
4403 4749
4404 conf->chunk_size = mddev->new_chunk; 4750 conf->chunk_size = mddev->new_chunk;
4405 conf->level = mddev->new_level;
4406 if (conf->level == 6) 4751 if (conf->level == 6)
4407 conf->max_degraded = 2; 4752 conf->max_degraded = 2;
4408 else 4753 else
@@ -4437,11 +4782,7 @@ static raid5_conf_t *setup_conf(mddev_t *mddev)
4437 4782
4438 abort: 4783 abort:
4439 if (conf) { 4784 if (conf) {
4440 shrink_stripes(conf); 4785 free_conf(conf);
4441 safe_put_page(conf->spare_page);
4442 kfree(conf->disks);
4443 kfree(conf->stripe_hashtbl);
4444 kfree(conf);
4445 return ERR_PTR(-EIO); 4786 return ERR_PTR(-EIO);
4446 } else 4787 } else
4447 return ERR_PTR(-ENOMEM); 4788 return ERR_PTR(-ENOMEM);
@@ -4607,12 +4948,8 @@ abort:
4607 md_unregister_thread(mddev->thread); 4948 md_unregister_thread(mddev->thread);
4608 mddev->thread = NULL; 4949 mddev->thread = NULL;
4609 if (conf) { 4950 if (conf) {
4610 shrink_stripes(conf);
4611 print_raid5_conf(conf); 4951 print_raid5_conf(conf);
4612 safe_put_page(conf->spare_page); 4952 free_conf(conf);
4613 kfree(conf->disks);
4614 kfree(conf->stripe_hashtbl);
4615 kfree(conf);
4616 } 4953 }
4617 mddev->private = NULL; 4954 mddev->private = NULL;
4618 printk(KERN_ALERT "raid5: failed to run raid set %s\n", mdname(mddev)); 4955 printk(KERN_ALERT "raid5: failed to run raid set %s\n", mdname(mddev));
@@ -4627,13 +4964,10 @@ static int stop(mddev_t *mddev)
4627 4964
4628 md_unregister_thread(mddev->thread); 4965 md_unregister_thread(mddev->thread);
4629 mddev->thread = NULL; 4966 mddev->thread = NULL;
4630 shrink_stripes(conf);
4631 kfree(conf->stripe_hashtbl);
4632 mddev->queue->backing_dev_info.congested_fn = NULL; 4967 mddev->queue->backing_dev_info.congested_fn = NULL;
4633 blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ 4968 blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
4634 sysfs_remove_group(&mddev->kobj, &raid5_attrs_group); 4969 sysfs_remove_group(&mddev->kobj, &raid5_attrs_group);
4635 kfree(conf->disks); 4970 free_conf(conf);
4636 kfree(conf);
4637 mddev->private = NULL; 4971 mddev->private = NULL;
4638 return 0; 4972 return 0;
4639} 4973}
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index 52ba99954de..116d0b44b2a 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -2,6 +2,7 @@
2#define _RAID5_H 2#define _RAID5_H
3 3
4#include <linux/raid/xor.h> 4#include <linux/raid/xor.h>
5#include <linux/dmaengine.h>
5 6
6/* 7/*
7 * 8 *
@@ -175,7 +176,9 @@
175 */ 176 */
176enum check_states { 177enum check_states {
177 check_state_idle = 0, 178 check_state_idle = 0,
178 check_state_run, /* parity check */ 179 check_state_run, /* xor parity check */
180 check_state_run_q, /* q-parity check */
181 check_state_run_pq, /* pq dual parity check */
179 check_state_check_result, 182 check_state_check_result,
180 check_state_compute_run, /* parity repair */ 183 check_state_compute_run, /* parity repair */
181 check_state_compute_result, 184 check_state_compute_result,
@@ -215,8 +218,8 @@ struct stripe_head {
215 * @target - STRIPE_OP_COMPUTE_BLK target 218 * @target - STRIPE_OP_COMPUTE_BLK target
216 */ 219 */
217 struct stripe_operations { 220 struct stripe_operations {
218 int target; 221 int target, target2;
219 u32 zero_sum_result; 222 enum sum_check_flags zero_sum_result;
220 } ops; 223 } ops;
221 struct r5dev { 224 struct r5dev {
222 struct bio req; 225 struct bio req;
@@ -298,7 +301,7 @@ struct r6_state {
298#define STRIPE_OP_COMPUTE_BLK 1 301#define STRIPE_OP_COMPUTE_BLK 1
299#define STRIPE_OP_PREXOR 2 302#define STRIPE_OP_PREXOR 2
300#define STRIPE_OP_BIODRAIN 3 303#define STRIPE_OP_BIODRAIN 3
301#define STRIPE_OP_POSTXOR 4 304#define STRIPE_OP_RECONSTRUCT 4
302#define STRIPE_OP_CHECK 5 305#define STRIPE_OP_CHECK 5
303 306
304/* 307/*
@@ -383,8 +386,21 @@ struct raid5_private_data {
383 * (fresh device added). 386 * (fresh device added).
384 * Cleared when a sync completes. 387 * Cleared when a sync completes.
385 */ 388 */
386 389 /* per cpu variables */
387 struct page *spare_page; /* Used when checking P/Q in raid6 */ 390 struct raid5_percpu {
391 struct page *spare_page; /* Used when checking P/Q in raid6 */
392 void *scribble; /* space for constructing buffer
393 * lists and performing address
394 * conversions
395 */
396 } *percpu;
397 size_t scribble_len; /* size of scribble region must be
398 * associated with conf to handle
399 * cpu hotplug while reshaping
400 */
401#ifdef CONFIG_HOTPLUG_CPU
402 struct notifier_block cpu_notify;
403#endif
388 404
389 /* 405 /*
390 * Free stripes pool 406 * Free stripes pool
diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h
index 5fc2ef8d97f..866e61c4e2e 100644
--- a/include/linux/async_tx.h
+++ b/include/linux/async_tx.h
@@ -58,25 +58,57 @@ struct dma_chan_ref {
58 * array. 58 * array.
59 * @ASYNC_TX_ACK: immediately ack the descriptor, precludes setting up a 59 * @ASYNC_TX_ACK: immediately ack the descriptor, precludes setting up a
60 * dependency chain 60 * dependency chain
61 * @ASYNC_TX_DEP_ACK: ack the dependency descriptor. Useful for chaining.
62 */ 61 */
63enum async_tx_flags { 62enum async_tx_flags {
64 ASYNC_TX_XOR_ZERO_DST = (1 << 0), 63 ASYNC_TX_XOR_ZERO_DST = (1 << 0),
65 ASYNC_TX_XOR_DROP_DST = (1 << 1), 64 ASYNC_TX_XOR_DROP_DST = (1 << 1),
66 ASYNC_TX_ACK = (1 << 3), 65 ASYNC_TX_ACK = (1 << 2),
67 ASYNC_TX_DEP_ACK = (1 << 4), 66};
67
68/**
69 * struct async_submit_ctl - async_tx submission/completion modifiers
70 * @flags: submission modifiers
71 * @depend_tx: parent dependency of the current operation being submitted
72 * @cb_fn: callback routine to run at operation completion
73 * @cb_param: parameter for the callback routine
74 * @scribble: caller provided space for dma/page address conversions
75 */
76struct async_submit_ctl {
77 enum async_tx_flags flags;
78 struct dma_async_tx_descriptor *depend_tx;
79 dma_async_tx_callback cb_fn;
80 void *cb_param;
81 void *scribble;
68}; 82};
69 83
70#ifdef CONFIG_DMA_ENGINE 84#ifdef CONFIG_DMA_ENGINE
71#define async_tx_issue_pending_all dma_issue_pending_all 85#define async_tx_issue_pending_all dma_issue_pending_all
86
87/**
88 * async_tx_issue_pending - send pending descriptor to the hardware channel
89 * @tx: descriptor handle to retrieve hardware context
90 *
91 * Note: any dependent operations will have already been issued by
92 * async_tx_channel_switch, or (in the case of no channel switch) will
93 * be already pending on this channel.
94 */
95static inline void async_tx_issue_pending(struct dma_async_tx_descriptor *tx)
96{
97 if (likely(tx)) {
98 struct dma_chan *chan = tx->chan;
99 struct dma_device *dma = chan->device;
100
101 dma->device_issue_pending(chan);
102 }
103}
72#ifdef CONFIG_ARCH_HAS_ASYNC_TX_FIND_CHANNEL 104#ifdef CONFIG_ARCH_HAS_ASYNC_TX_FIND_CHANNEL
73#include <asm/async_tx.h> 105#include <asm/async_tx.h>
74#else 106#else
75#define async_tx_find_channel(dep, type, dst, dst_count, src, src_count, len) \ 107#define async_tx_find_channel(dep, type, dst, dst_count, src, src_count, len) \
76 __async_tx_find_channel(dep, type) 108 __async_tx_find_channel(dep, type)
77struct dma_chan * 109struct dma_chan *
78__async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx, 110__async_tx_find_channel(struct async_submit_ctl *submit,
79 enum dma_transaction_type tx_type); 111 enum dma_transaction_type tx_type);
80#endif /* CONFIG_ARCH_HAS_ASYNC_TX_FIND_CHANNEL */ 112#endif /* CONFIG_ARCH_HAS_ASYNC_TX_FIND_CHANNEL */
81#else 113#else
82static inline void async_tx_issue_pending_all(void) 114static inline void async_tx_issue_pending_all(void)
@@ -84,10 +116,16 @@ static inline void async_tx_issue_pending_all(void)
84 do { } while (0); 116 do { } while (0);
85} 117}
86 118
119static inline void async_tx_issue_pending(struct dma_async_tx_descriptor *tx)
120{
121 do { } while (0);
122}
123
87static inline struct dma_chan * 124static inline struct dma_chan *
88async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx, 125async_tx_find_channel(struct async_submit_ctl *submit,
89 enum dma_transaction_type tx_type, struct page **dst, int dst_count, 126 enum dma_transaction_type tx_type, struct page **dst,
90 struct page **src, int src_count, size_t len) 127 int dst_count, struct page **src, int src_count,
128 size_t len)
91{ 129{
92 return NULL; 130 return NULL;
93} 131}
@@ -99,46 +137,70 @@ async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx,
99 * @cb_fn_param: parameter to pass to the callback routine 137 * @cb_fn_param: parameter to pass to the callback routine
100 */ 138 */
101static inline void 139static inline void
102async_tx_sync_epilog(dma_async_tx_callback cb_fn, void *cb_fn_param) 140async_tx_sync_epilog(struct async_submit_ctl *submit)
103{ 141{
104 if (cb_fn) 142 if (submit->cb_fn)
105 cb_fn(cb_fn_param); 143 submit->cb_fn(submit->cb_param);
106} 144}
107 145
108void 146typedef union {
109async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx, 147 unsigned long addr;
110 enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx, 148 struct page *page;
111 dma_async_tx_callback cb_fn, void *cb_fn_param); 149 dma_addr_t dma;
150} addr_conv_t;
151
152static inline void
153init_async_submit(struct async_submit_ctl *args, enum async_tx_flags flags,
154 struct dma_async_tx_descriptor *tx,
155 dma_async_tx_callback cb_fn, void *cb_param,
156 addr_conv_t *scribble)
157{
158 args->flags = flags;
159 args->depend_tx = tx;
160 args->cb_fn = cb_fn;
161 args->cb_param = cb_param;
162 args->scribble = scribble;
163}
164
165void async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx,
166 struct async_submit_ctl *submit);
112 167
113struct dma_async_tx_descriptor * 168struct dma_async_tx_descriptor *
114async_xor(struct page *dest, struct page **src_list, unsigned int offset, 169async_xor(struct page *dest, struct page **src_list, unsigned int offset,
115 int src_cnt, size_t len, enum async_tx_flags flags, 170 int src_cnt, size_t len, struct async_submit_ctl *submit);
116 struct dma_async_tx_descriptor *depend_tx,
117 dma_async_tx_callback cb_fn, void *cb_fn_param);
118 171
119struct dma_async_tx_descriptor * 172struct dma_async_tx_descriptor *
120async_xor_zero_sum(struct page *dest, struct page **src_list, 173async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,
121 unsigned int offset, int src_cnt, size_t len, 174 int src_cnt, size_t len, enum sum_check_flags *result,
122 u32 *result, enum async_tx_flags flags, 175 struct async_submit_ctl *submit);
123 struct dma_async_tx_descriptor *depend_tx,
124 dma_async_tx_callback cb_fn, void *cb_fn_param);
125 176
126struct dma_async_tx_descriptor * 177struct dma_async_tx_descriptor *
127async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset, 178async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
128 unsigned int src_offset, size_t len, enum async_tx_flags flags, 179 unsigned int src_offset, size_t len,
129 struct dma_async_tx_descriptor *depend_tx, 180 struct async_submit_ctl *submit);
130 dma_async_tx_callback cb_fn, void *cb_fn_param);
131 181
132struct dma_async_tx_descriptor * 182struct dma_async_tx_descriptor *
133async_memset(struct page *dest, int val, unsigned int offset, 183async_memset(struct page *dest, int val, unsigned int offset,
134 size_t len, enum async_tx_flags flags, 184 size_t len, struct async_submit_ctl *submit);
135 struct dma_async_tx_descriptor *depend_tx, 185
136 dma_async_tx_callback cb_fn, void *cb_fn_param); 186struct dma_async_tx_descriptor *async_trigger_callback(struct async_submit_ctl *submit);
187
188struct dma_async_tx_descriptor *
189async_gen_syndrome(struct page **blocks, unsigned int offset, int src_cnt,
190 size_t len, struct async_submit_ctl *submit);
191
192struct dma_async_tx_descriptor *
193async_syndrome_val(struct page **blocks, unsigned int offset, int src_cnt,
194 size_t len, enum sum_check_flags *pqres, struct page *spare,
195 struct async_submit_ctl *submit);
196
197struct dma_async_tx_descriptor *
198async_raid6_2data_recov(int src_num, size_t bytes, int faila, int failb,
199 struct page **ptrs, struct async_submit_ctl *submit);
137 200
138struct dma_async_tx_descriptor * 201struct dma_async_tx_descriptor *
139async_trigger_callback(enum async_tx_flags flags, 202async_raid6_datap_recov(int src_num, size_t bytes, int faila,
140 struct dma_async_tx_descriptor *depend_tx, 203 struct page **ptrs, struct async_submit_ctl *submit);
141 dma_async_tx_callback cb_fn, void *cb_fn_param);
142 204
143void async_tx_quiesce(struct dma_async_tx_descriptor **tx); 205void async_tx_quiesce(struct dma_async_tx_descriptor **tx);
144#endif /* _ASYNC_TX_H_ */ 206#endif /* _ASYNC_TX_H_ */
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index ffefba81c81..1012f1abcb5 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -52,11 +52,11 @@ enum dma_status {
52enum dma_transaction_type { 52enum dma_transaction_type {
53 DMA_MEMCPY, 53 DMA_MEMCPY,
54 DMA_XOR, 54 DMA_XOR,
55 DMA_PQ_XOR, 55 DMA_PQ,
56 DMA_DUAL_XOR, 56 DMA_DUAL_XOR,
57 DMA_PQ_UPDATE, 57 DMA_PQ_UPDATE,
58 DMA_ZERO_SUM, 58 DMA_XOR_VAL,
59 DMA_PQ_ZERO_SUM, 59 DMA_PQ_VAL,
60 DMA_MEMSET, 60 DMA_MEMSET,
61 DMA_MEMCPY_CRC32C, 61 DMA_MEMCPY_CRC32C,
62 DMA_INTERRUPT, 62 DMA_INTERRUPT,
@@ -70,18 +70,23 @@ enum dma_transaction_type {
70 70
71/** 71/**
72 * enum dma_ctrl_flags - DMA flags to augment operation preparation, 72 * enum dma_ctrl_flags - DMA flags to augment operation preparation,
73 * control completion, and communicate status. 73 * control completion, and communicate status.
74 * @DMA_PREP_INTERRUPT - trigger an interrupt (callback) upon completion of 74 * @DMA_PREP_INTERRUPT - trigger an interrupt (callback) upon completion of
75 * this transaction 75 * this transaction
76 * @DMA_CTRL_ACK - the descriptor cannot be reused until the client 76 * @DMA_CTRL_ACK - the descriptor cannot be reused until the client
77 * acknowledges receipt, i.e. has has a chance to establish any 77 * acknowledges receipt, i.e. has has a chance to establish any dependency
78 * dependency chains 78 * chains
79 * @DMA_COMPL_SKIP_SRC_UNMAP - set to disable dma-unmapping the source buffer(s) 79 * @DMA_COMPL_SKIP_SRC_UNMAP - set to disable dma-unmapping the source buffer(s)
80 * @DMA_COMPL_SKIP_DEST_UNMAP - set to disable dma-unmapping the destination(s) 80 * @DMA_COMPL_SKIP_DEST_UNMAP - set to disable dma-unmapping the destination(s)
81 * @DMA_COMPL_SRC_UNMAP_SINGLE - set to do the source dma-unmapping as single 81 * @DMA_COMPL_SRC_UNMAP_SINGLE - set to do the source dma-unmapping as single
82 * (if not set, do the source dma-unmapping as page) 82 * (if not set, do the source dma-unmapping as page)
83 * @DMA_COMPL_DEST_UNMAP_SINGLE - set to do the destination dma-unmapping as single 83 * @DMA_COMPL_DEST_UNMAP_SINGLE - set to do the destination dma-unmapping as single
84 * (if not set, do the destination dma-unmapping as page) 84 * (if not set, do the destination dma-unmapping as page)
85 * @DMA_PREP_PQ_DISABLE_P - prevent generation of P while generating Q
86 * @DMA_PREP_PQ_DISABLE_Q - prevent generation of Q while generating P
87 * @DMA_PREP_CONTINUE - indicate to a driver that it is reusing buffers as
88 * sources that were the result of a previous operation, in the case of a PQ
89 * operation it continues the calculation with new sources
85 */ 90 */
86enum dma_ctrl_flags { 91enum dma_ctrl_flags {
87 DMA_PREP_INTERRUPT = (1 << 0), 92 DMA_PREP_INTERRUPT = (1 << 0),
@@ -90,9 +95,31 @@ enum dma_ctrl_flags {
90 DMA_COMPL_SKIP_DEST_UNMAP = (1 << 3), 95 DMA_COMPL_SKIP_DEST_UNMAP = (1 << 3),
91 DMA_COMPL_SRC_UNMAP_SINGLE = (1 << 4), 96 DMA_COMPL_SRC_UNMAP_SINGLE = (1 << 4),
92 DMA_COMPL_DEST_UNMAP_SINGLE = (1 << 5), 97 DMA_COMPL_DEST_UNMAP_SINGLE = (1 << 5),
98 DMA_PREP_PQ_DISABLE_P = (1 << 6),
99 DMA_PREP_PQ_DISABLE_Q = (1 << 7),
100 DMA_PREP_CONTINUE = (1 << 8),
93}; 101};
94 102
95/** 103/**
104 * enum sum_check_bits - bit position of pq_check_flags
105 */
106enum sum_check_bits {
107 SUM_CHECK_P = 0,
108 SUM_CHECK_Q = 1,
109};
110
111/**
112 * enum pq_check_flags - result of async_{xor,pq}_zero_sum operations
113 * @SUM_CHECK_P_RESULT - 1 if xor zero sum error, 0 otherwise
114 * @SUM_CHECK_Q_RESULT - 1 if reed-solomon zero sum error, 0 otherwise
115 */
116enum sum_check_flags {
117 SUM_CHECK_P_RESULT = (1 << SUM_CHECK_P),
118 SUM_CHECK_Q_RESULT = (1 << SUM_CHECK_Q),
119};
120
121
122/**
96 * dma_cap_mask_t - capabilities bitmap modeled after cpumask_t. 123 * dma_cap_mask_t - capabilities bitmap modeled after cpumask_t.
97 * See linux/cpumask.h 124 * See linux/cpumask.h
98 */ 125 */
@@ -213,6 +240,7 @@ struct dma_async_tx_descriptor {
213 * @global_node: list_head for global dma_device_list 240 * @global_node: list_head for global dma_device_list
214 * @cap_mask: one or more dma_capability flags 241 * @cap_mask: one or more dma_capability flags
215 * @max_xor: maximum number of xor sources, 0 if no capability 242 * @max_xor: maximum number of xor sources, 0 if no capability
243 * @max_pq: maximum number of PQ sources and PQ-continue capability
216 * @dev_id: unique device ID 244 * @dev_id: unique device ID
217 * @dev: struct device reference for dma mapping api 245 * @dev: struct device reference for dma mapping api
218 * @device_alloc_chan_resources: allocate resources and return the 246 * @device_alloc_chan_resources: allocate resources and return the
@@ -220,7 +248,9 @@ struct dma_async_tx_descriptor {
220 * @device_free_chan_resources: release DMA channel's resources 248 * @device_free_chan_resources: release DMA channel's resources
221 * @device_prep_dma_memcpy: prepares a memcpy operation 249 * @device_prep_dma_memcpy: prepares a memcpy operation
222 * @device_prep_dma_xor: prepares a xor operation 250 * @device_prep_dma_xor: prepares a xor operation
223 * @device_prep_dma_zero_sum: prepares a zero_sum operation 251 * @device_prep_dma_xor_val: prepares a xor validation operation
252 * @device_prep_dma_pq: prepares a pq operation
253 * @device_prep_dma_pq_val: prepares a pqzero_sum operation
224 * @device_prep_dma_memset: prepares a memset operation 254 * @device_prep_dma_memset: prepares a memset operation
225 * @device_prep_dma_interrupt: prepares an end of chain interrupt operation 255 * @device_prep_dma_interrupt: prepares an end of chain interrupt operation
226 * @device_prep_slave_sg: prepares a slave dma operation 256 * @device_prep_slave_sg: prepares a slave dma operation
@@ -235,7 +265,9 @@ struct dma_device {
235 struct list_head channels; 265 struct list_head channels;
236 struct list_head global_node; 266 struct list_head global_node;
237 dma_cap_mask_t cap_mask; 267 dma_cap_mask_t cap_mask;
238 int max_xor; 268 unsigned short max_xor;
269 unsigned short max_pq;
270 #define DMA_HAS_PQ_CONTINUE (1 << 15)
239 271
240 int dev_id; 272 int dev_id;
241 struct device *dev; 273 struct device *dev;
@@ -249,9 +281,17 @@ struct dma_device {
249 struct dma_async_tx_descriptor *(*device_prep_dma_xor)( 281 struct dma_async_tx_descriptor *(*device_prep_dma_xor)(
250 struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src, 282 struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
251 unsigned int src_cnt, size_t len, unsigned long flags); 283 unsigned int src_cnt, size_t len, unsigned long flags);
252 struct dma_async_tx_descriptor *(*device_prep_dma_zero_sum)( 284 struct dma_async_tx_descriptor *(*device_prep_dma_xor_val)(
253 struct dma_chan *chan, dma_addr_t *src, unsigned int src_cnt, 285 struct dma_chan *chan, dma_addr_t *src, unsigned int src_cnt,
254 size_t len, u32 *result, unsigned long flags); 286 size_t len, enum sum_check_flags *result, unsigned long flags);
287 struct dma_async_tx_descriptor *(*device_prep_dma_pq)(
288 struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
289 unsigned int src_cnt, const unsigned char *scf,
290 size_t len, unsigned long flags);
291 struct dma_async_tx_descriptor *(*device_prep_dma_pq_val)(
292 struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
293 unsigned int src_cnt, const unsigned char *scf, size_t len,
294 enum sum_check_flags *pqres, unsigned long flags);
255 struct dma_async_tx_descriptor *(*device_prep_dma_memset)( 295 struct dma_async_tx_descriptor *(*device_prep_dma_memset)(
256 struct dma_chan *chan, dma_addr_t dest, int value, size_t len, 296 struct dma_chan *chan, dma_addr_t dest, int value, size_t len,
257 unsigned long flags); 297 unsigned long flags);
@@ -270,6 +310,60 @@ struct dma_device {
270 void (*device_issue_pending)(struct dma_chan *chan); 310 void (*device_issue_pending)(struct dma_chan *chan);
271}; 311};
272 312
313static inline void
314dma_set_maxpq(struct dma_device *dma, int maxpq, int has_pq_continue)
315{
316 dma->max_pq = maxpq;
317 if (has_pq_continue)
318 dma->max_pq |= DMA_HAS_PQ_CONTINUE;
319}
320
321static inline bool dmaf_continue(enum dma_ctrl_flags flags)
322{
323 return (flags & DMA_PREP_CONTINUE) == DMA_PREP_CONTINUE;
324}
325
326static inline bool dmaf_p_disabled_continue(enum dma_ctrl_flags flags)
327{
328 enum dma_ctrl_flags mask = DMA_PREP_CONTINUE | DMA_PREP_PQ_DISABLE_P;
329
330 return (flags & mask) == mask;
331}
332
333static inline bool dma_dev_has_pq_continue(struct dma_device *dma)
334{
335 return (dma->max_pq & DMA_HAS_PQ_CONTINUE) == DMA_HAS_PQ_CONTINUE;
336}
337
338static unsigned short dma_dev_to_maxpq(struct dma_device *dma)
339{
340 return dma->max_pq & ~DMA_HAS_PQ_CONTINUE;
341}
342
343/* dma_maxpq - reduce maxpq in the face of continued operations
344 * @dma - dma device with PQ capability
345 * @flags - to check if DMA_PREP_CONTINUE and DMA_PREP_PQ_DISABLE_P are set
346 *
347 * When an engine does not support native continuation we need 3 extra
348 * source slots to reuse P and Q with the following coefficients:
349 * 1/ {00} * P : remove P from Q', but use it as a source for P'
350 * 2/ {01} * Q : use Q to continue Q' calculation
351 * 3/ {00} * Q : subtract Q from P' to cancel (2)
352 *
353 * In the case where P is disabled we only need 1 extra source:
354 * 1/ {01} * Q : use Q to continue Q' calculation
355 */
356static inline int dma_maxpq(struct dma_device *dma, enum dma_ctrl_flags flags)
357{
358 if (dma_dev_has_pq_continue(dma) || !dmaf_continue(flags))
359 return dma_dev_to_maxpq(dma);
360 else if (dmaf_p_disabled_continue(flags))
361 return dma_dev_to_maxpq(dma) - 1;
362 else if (dmaf_continue(flags))
363 return dma_dev_to_maxpq(dma) - 3;
364 BUG();
365}
366
273/* --- public DMA engine API --- */ 367/* --- public DMA engine API --- */
274 368
275#ifdef CONFIG_DMA_ENGINE 369#ifdef CONFIG_DMA_ENGINE