diff options
author | Dan Williams <dan.j.williams@intel.com> | 2009-09-08 20:42:29 -0400 |
---|---|---|
committer | Dan Williams <dan.j.williams@intel.com> | 2009-09-08 20:42:29 -0400 |
commit | f9dd2134374c8de6b911e2b8652c6c9622eaa658 (patch) | |
tree | c1b8f8d622941606b9e7247ab31d811ba4295011 | |
parent | 4b652f0db3be891c7b76b109c3b55003b920fc96 (diff) | |
parent | 07a3b417dc3d00802bd7b4874c3e811f0b015a7d (diff) |
Merge branch 'md-raid6-accel' into ioat3.2
Conflicts:
include/linux/dmaengine.h
-rw-r--r-- | Documentation/crypto/async-tx-api.txt | 75 | ||||
-rw-r--r-- | arch/arm/include/asm/hardware/iop3xx-adma.h | 5 | ||||
-rw-r--r-- | arch/arm/mach-iop13xx/include/mach/adma.h | 12 | ||||
-rw-r--r-- | arch/arm/mach-iop13xx/setup.c | 10 | ||||
-rw-r--r-- | arch/arm/plat-iop/adma.c | 2 | ||||
-rw-r--r-- | crypto/async_tx/Kconfig | 9 | ||||
-rw-r--r-- | crypto/async_tx/Makefile | 3 | ||||
-rw-r--r-- | crypto/async_tx/async_memcpy.c | 39 | ||||
-rw-r--r-- | crypto/async_tx/async_memset.c | 38 | ||||
-rw-r--r-- | crypto/async_tx/async_pq.c | 388 | ||||
-rw-r--r-- | crypto/async_tx/async_raid6_recov.c | 448 | ||||
-rw-r--r-- | crypto/async_tx/async_tx.c | 83 | ||||
-rw-r--r-- | crypto/async_tx/async_xor.c | 199 | ||||
-rw-r--r-- | crypto/async_tx/raid6test.c | 241 | ||||
-rw-r--r-- | drivers/dma/Kconfig | 2 | ||||
-rw-r--r-- | drivers/dma/dmaengine.c | 53 | ||||
-rw-r--r-- | drivers/dma/dmatest.c | 26 | ||||
-rw-r--r-- | drivers/dma/iop-adma.c | 40 | ||||
-rw-r--r-- | drivers/md/Kconfig | 26 | ||||
-rw-r--r-- | drivers/md/raid5.c | 1486 | ||||
-rw-r--r-- | drivers/md/raid5.h | 28 | ||||
-rw-r--r-- | include/linux/async_tx.h | 126 | ||||
-rw-r--r-- | include/linux/dmaengine.h | 116 |
23 files changed, 2528 insertions, 927 deletions
diff --git a/Documentation/crypto/async-tx-api.txt b/Documentation/crypto/async-tx-api.txt index 9f59fcbf5d82..ba046b8fa92f 100644 --- a/Documentation/crypto/async-tx-api.txt +++ b/Documentation/crypto/async-tx-api.txt | |||
@@ -54,20 +54,23 @@ features surfaced as a result: | |||
54 | 54 | ||
55 | 3.1 General format of the API: | 55 | 3.1 General format of the API: |
56 | struct dma_async_tx_descriptor * | 56 | struct dma_async_tx_descriptor * |
57 | async_<operation>(<op specific parameters>, | 57 | async_<operation>(<op specific parameters>, struct async_submit ctl *submit) |
58 | enum async_tx_flags flags, | ||
59 | struct dma_async_tx_descriptor *dependency, | ||
60 | dma_async_tx_callback callback_routine, | ||
61 | void *callback_parameter); | ||
62 | 58 | ||
63 | 3.2 Supported operations: | 59 | 3.2 Supported operations: |
64 | memcpy - memory copy between a source and a destination buffer | 60 | memcpy - memory copy between a source and a destination buffer |
65 | memset - fill a destination buffer with a byte value | 61 | memset - fill a destination buffer with a byte value |
66 | xor - xor a series of source buffers and write the result to a | 62 | xor - xor a series of source buffers and write the result to a |
67 | destination buffer | 63 | destination buffer |
68 | xor_zero_sum - xor a series of source buffers and set a flag if the | 64 | xor_val - xor a series of source buffers and set a flag if the |
69 | result is zero. The implementation attempts to prevent | 65 | result is zero. The implementation attempts to prevent |
70 | writes to memory | 66 | writes to memory |
67 | pq - generate the p+q (raid6 syndrome) from a series of source buffers | ||
68 | pq_val - validate that a p and or q buffer are in sync with a given series of | ||
69 | sources | ||
70 | datap - (raid6_datap_recov) recover a raid6 data block and the p block | ||
71 | from the given sources | ||
72 | 2data - (raid6_2data_recov) recover 2 raid6 data blocks from the given | ||
73 | sources | ||
71 | 74 | ||
72 | 3.3 Descriptor management: | 75 | 3.3 Descriptor management: |
73 | The return value is non-NULL and points to a 'descriptor' when the operation | 76 | The return value is non-NULL and points to a 'descriptor' when the operation |
@@ -80,8 +83,8 @@ acknowledged by the application before the offload engine driver is allowed to | |||
80 | recycle (or free) the descriptor. A descriptor can be acked by one of the | 83 | recycle (or free) the descriptor. A descriptor can be acked by one of the |
81 | following methods: | 84 | following methods: |
82 | 1/ setting the ASYNC_TX_ACK flag if no child operations are to be submitted | 85 | 1/ setting the ASYNC_TX_ACK flag if no child operations are to be submitted |
83 | 2/ setting the ASYNC_TX_DEP_ACK flag to acknowledge the parent | 86 | 2/ submitting an unacknowledged descriptor as a dependency to another |
84 | descriptor of a new operation. | 87 | async_tx call will implicitly set the acknowledged state. |
85 | 3/ calling async_tx_ack() on the descriptor. | 88 | 3/ calling async_tx_ack() on the descriptor. |
86 | 89 | ||
87 | 3.4 When does the operation execute? | 90 | 3.4 When does the operation execute? |
@@ -119,30 +122,42 @@ of an operation. | |||
119 | Perform a xor->copy->xor operation where each operation depends on the | 122 | Perform a xor->copy->xor operation where each operation depends on the |
120 | result from the previous operation: | 123 | result from the previous operation: |
121 | 124 | ||
122 | void complete_xor_copy_xor(void *param) | 125 | void callback(void *param) |
123 | { | 126 | { |
124 | printk("complete\n"); | 127 | struct completion *cmp = param; |
128 | |||
129 | complete(cmp); | ||
125 | } | 130 | } |
126 | 131 | ||
127 | int run_xor_copy_xor(struct page **xor_srcs, | 132 | void run_xor_copy_xor(struct page **xor_srcs, |
128 | int xor_src_cnt, | 133 | int xor_src_cnt, |
129 | struct page *xor_dest, | 134 | struct page *xor_dest, |
130 | size_t xor_len, | 135 | size_t xor_len, |
131 | struct page *copy_src, | 136 | struct page *copy_src, |
132 | struct page *copy_dest, | 137 | struct page *copy_dest, |
133 | size_t copy_len) | 138 | size_t copy_len) |
134 | { | 139 | { |
135 | struct dma_async_tx_descriptor *tx; | 140 | struct dma_async_tx_descriptor *tx; |
141 | addr_conv_t addr_conv[xor_src_cnt]; | ||
142 | struct async_submit_ctl submit; | ||
143 | addr_conv_t addr_conv[NDISKS]; | ||
144 | struct completion cmp; | ||
145 | |||
146 | init_async_submit(&submit, ASYNC_TX_XOR_DROP_DST, NULL, NULL, NULL, | ||
147 | addr_conv); | ||
148 | tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len, &submit) | ||
136 | 149 | ||
137 | tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len, | 150 | submit->depend_tx = tx; |
138 | ASYNC_TX_XOR_DROP_DST, NULL, NULL, NULL); | 151 | tx = async_memcpy(copy_dest, copy_src, 0, 0, copy_len, &submit); |
139 | tx = async_memcpy(copy_dest, copy_src, 0, 0, copy_len, | 152 | |
140 | ASYNC_TX_DEP_ACK, tx, NULL, NULL); | 153 | init_completion(&cmp); |
141 | tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len, | 154 | init_async_submit(&submit, ASYNC_TX_XOR_DROP_DST | ASYNC_TX_ACK, tx, |
142 | ASYNC_TX_XOR_DROP_DST | ASYNC_TX_DEP_ACK | ASYNC_TX_ACK, | 155 | callback, &cmp, addr_conv); |
143 | tx, complete_xor_copy_xor, NULL); | 156 | tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len, &submit); |
144 | 157 | ||
145 | async_tx_issue_pending_all(); | 158 | async_tx_issue_pending_all(); |
159 | |||
160 | wait_for_completion(&cmp); | ||
146 | } | 161 | } |
147 | 162 | ||
148 | See include/linux/async_tx.h for more information on the flags. See the | 163 | See include/linux/async_tx.h for more information on the flags. See the |
diff --git a/arch/arm/include/asm/hardware/iop3xx-adma.h b/arch/arm/include/asm/hardware/iop3xx-adma.h index 83e6ba338e2c..26eefea02314 100644 --- a/arch/arm/include/asm/hardware/iop3xx-adma.h +++ b/arch/arm/include/asm/hardware/iop3xx-adma.h | |||
@@ -756,13 +756,14 @@ static inline void iop_desc_set_block_fill_val(struct iop_adma_desc_slot *desc, | |||
756 | hw_desc->src[0] = val; | 756 | hw_desc->src[0] = val; |
757 | } | 757 | } |
758 | 758 | ||
759 | static inline int iop_desc_get_zero_result(struct iop_adma_desc_slot *desc) | 759 | static inline enum sum_check_flags |
760 | iop_desc_get_zero_result(struct iop_adma_desc_slot *desc) | ||
760 | { | 761 | { |
761 | struct iop3xx_desc_aau *hw_desc = desc->hw_desc; | 762 | struct iop3xx_desc_aau *hw_desc = desc->hw_desc; |
762 | struct iop3xx_aau_desc_ctrl desc_ctrl = hw_desc->desc_ctrl_field; | 763 | struct iop3xx_aau_desc_ctrl desc_ctrl = hw_desc->desc_ctrl_field; |
763 | 764 | ||
764 | iop_paranoia(!(desc_ctrl.tx_complete && desc_ctrl.zero_result_en)); | 765 | iop_paranoia(!(desc_ctrl.tx_complete && desc_ctrl.zero_result_en)); |
765 | return desc_ctrl.zero_result_err; | 766 | return desc_ctrl.zero_result_err << SUM_CHECK_P; |
766 | } | 767 | } |
767 | 768 | ||
768 | static inline void iop_chan_append(struct iop_adma_chan *chan) | 769 | static inline void iop_chan_append(struct iop_adma_chan *chan) |
diff --git a/arch/arm/mach-iop13xx/include/mach/adma.h b/arch/arm/mach-iop13xx/include/mach/adma.h index 5722e86f2174..1cd31df8924d 100644 --- a/arch/arm/mach-iop13xx/include/mach/adma.h +++ b/arch/arm/mach-iop13xx/include/mach/adma.h | |||
@@ -428,18 +428,20 @@ static inline void iop_desc_set_block_fill_val(struct iop_adma_desc_slot *desc, | |||
428 | hw_desc->block_fill_data = val; | 428 | hw_desc->block_fill_data = val; |
429 | } | 429 | } |
430 | 430 | ||
431 | static inline int iop_desc_get_zero_result(struct iop_adma_desc_slot *desc) | 431 | static inline enum sum_check_flags |
432 | iop_desc_get_zero_result(struct iop_adma_desc_slot *desc) | ||
432 | { | 433 | { |
433 | struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; | 434 | struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; |
434 | struct iop13xx_adma_desc_ctrl desc_ctrl = hw_desc->desc_ctrl_field; | 435 | struct iop13xx_adma_desc_ctrl desc_ctrl = hw_desc->desc_ctrl_field; |
435 | struct iop13xx_adma_byte_count byte_count = hw_desc->byte_count_field; | 436 | struct iop13xx_adma_byte_count byte_count = hw_desc->byte_count_field; |
437 | enum sum_check_flags flags; | ||
436 | 438 | ||
437 | BUG_ON(!(byte_count.tx_complete && desc_ctrl.zero_result)); | 439 | BUG_ON(!(byte_count.tx_complete && desc_ctrl.zero_result)); |
438 | 440 | ||
439 | if (desc_ctrl.pq_xfer_en) | 441 | flags = byte_count.zero_result_err_q << SUM_CHECK_Q; |
440 | return byte_count.zero_result_err_q; | 442 | flags |= byte_count.zero_result_err << SUM_CHECK_P; |
441 | else | 443 | |
442 | return byte_count.zero_result_err; | 444 | return flags; |
443 | } | 445 | } |
444 | 446 | ||
445 | static inline void iop_chan_append(struct iop_adma_chan *chan) | 447 | static inline void iop_chan_append(struct iop_adma_chan *chan) |
diff --git a/arch/arm/mach-iop13xx/setup.c b/arch/arm/mach-iop13xx/setup.c index bee42c609df6..faaef95342b6 100644 --- a/arch/arm/mach-iop13xx/setup.c +++ b/arch/arm/mach-iop13xx/setup.c | |||
@@ -478,7 +478,7 @@ void __init iop13xx_platform_init(void) | |||
478 | dma_cap_set(DMA_MEMCPY, plat_data->cap_mask); | 478 | dma_cap_set(DMA_MEMCPY, plat_data->cap_mask); |
479 | dma_cap_set(DMA_XOR, plat_data->cap_mask); | 479 | dma_cap_set(DMA_XOR, plat_data->cap_mask); |
480 | dma_cap_set(DMA_DUAL_XOR, plat_data->cap_mask); | 480 | dma_cap_set(DMA_DUAL_XOR, plat_data->cap_mask); |
481 | dma_cap_set(DMA_ZERO_SUM, plat_data->cap_mask); | 481 | dma_cap_set(DMA_XOR_VAL, plat_data->cap_mask); |
482 | dma_cap_set(DMA_MEMSET, plat_data->cap_mask); | 482 | dma_cap_set(DMA_MEMSET, plat_data->cap_mask); |
483 | dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask); | 483 | dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask); |
484 | dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask); | 484 | dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask); |
@@ -490,7 +490,7 @@ void __init iop13xx_platform_init(void) | |||
490 | dma_cap_set(DMA_MEMCPY, plat_data->cap_mask); | 490 | dma_cap_set(DMA_MEMCPY, plat_data->cap_mask); |
491 | dma_cap_set(DMA_XOR, plat_data->cap_mask); | 491 | dma_cap_set(DMA_XOR, plat_data->cap_mask); |
492 | dma_cap_set(DMA_DUAL_XOR, plat_data->cap_mask); | 492 | dma_cap_set(DMA_DUAL_XOR, plat_data->cap_mask); |
493 | dma_cap_set(DMA_ZERO_SUM, plat_data->cap_mask); | 493 | dma_cap_set(DMA_XOR_VAL, plat_data->cap_mask); |
494 | dma_cap_set(DMA_MEMSET, plat_data->cap_mask); | 494 | dma_cap_set(DMA_MEMSET, plat_data->cap_mask); |
495 | dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask); | 495 | dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask); |
496 | dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask); | 496 | dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask); |
@@ -502,13 +502,13 @@ void __init iop13xx_platform_init(void) | |||
502 | dma_cap_set(DMA_MEMCPY, plat_data->cap_mask); | 502 | dma_cap_set(DMA_MEMCPY, plat_data->cap_mask); |
503 | dma_cap_set(DMA_XOR, plat_data->cap_mask); | 503 | dma_cap_set(DMA_XOR, plat_data->cap_mask); |
504 | dma_cap_set(DMA_DUAL_XOR, plat_data->cap_mask); | 504 | dma_cap_set(DMA_DUAL_XOR, plat_data->cap_mask); |
505 | dma_cap_set(DMA_ZERO_SUM, plat_data->cap_mask); | 505 | dma_cap_set(DMA_XOR_VAL, plat_data->cap_mask); |
506 | dma_cap_set(DMA_MEMSET, plat_data->cap_mask); | 506 | dma_cap_set(DMA_MEMSET, plat_data->cap_mask); |
507 | dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask); | 507 | dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask); |
508 | dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask); | 508 | dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask); |
509 | dma_cap_set(DMA_PQ_XOR, plat_data->cap_mask); | 509 | dma_cap_set(DMA_PQ, plat_data->cap_mask); |
510 | dma_cap_set(DMA_PQ_UPDATE, plat_data->cap_mask); | 510 | dma_cap_set(DMA_PQ_UPDATE, plat_data->cap_mask); |
511 | dma_cap_set(DMA_PQ_ZERO_SUM, plat_data->cap_mask); | 511 | dma_cap_set(DMA_PQ_VAL, plat_data->cap_mask); |
512 | break; | 512 | break; |
513 | } | 513 | } |
514 | } | 514 | } |
diff --git a/arch/arm/plat-iop/adma.c b/arch/arm/plat-iop/adma.c index 3c127aabe214..da1dd0dab07c 100644 --- a/arch/arm/plat-iop/adma.c +++ b/arch/arm/plat-iop/adma.c | |||
@@ -198,7 +198,7 @@ static int __init iop3xx_adma_cap_init(void) | |||
198 | dma_cap_set(DMA_INTERRUPT, iop3xx_aau_data.cap_mask); | 198 | dma_cap_set(DMA_INTERRUPT, iop3xx_aau_data.cap_mask); |
199 | #else | 199 | #else |
200 | dma_cap_set(DMA_XOR, iop3xx_aau_data.cap_mask); | 200 | dma_cap_set(DMA_XOR, iop3xx_aau_data.cap_mask); |
201 | dma_cap_set(DMA_ZERO_SUM, iop3xx_aau_data.cap_mask); | 201 | dma_cap_set(DMA_XOR_VAL, iop3xx_aau_data.cap_mask); |
202 | dma_cap_set(DMA_MEMSET, iop3xx_aau_data.cap_mask); | 202 | dma_cap_set(DMA_MEMSET, iop3xx_aau_data.cap_mask); |
203 | dma_cap_set(DMA_INTERRUPT, iop3xx_aau_data.cap_mask); | 203 | dma_cap_set(DMA_INTERRUPT, iop3xx_aau_data.cap_mask); |
204 | #endif | 204 | #endif |
diff --git a/crypto/async_tx/Kconfig b/crypto/async_tx/Kconfig index d8fb39145986..e5aeb2b79e6f 100644 --- a/crypto/async_tx/Kconfig +++ b/crypto/async_tx/Kconfig | |||
@@ -14,3 +14,12 @@ config ASYNC_MEMSET | |||
14 | tristate | 14 | tristate |
15 | select ASYNC_CORE | 15 | select ASYNC_CORE |
16 | 16 | ||
17 | config ASYNC_PQ | ||
18 | tristate | ||
19 | select ASYNC_CORE | ||
20 | |||
21 | config ASYNC_RAID6_RECOV | ||
22 | tristate | ||
23 | select ASYNC_CORE | ||
24 | select ASYNC_PQ | ||
25 | |||
diff --git a/crypto/async_tx/Makefile b/crypto/async_tx/Makefile index 27baa7d52fbc..d1e0e6f72bc1 100644 --- a/crypto/async_tx/Makefile +++ b/crypto/async_tx/Makefile | |||
@@ -2,3 +2,6 @@ obj-$(CONFIG_ASYNC_CORE) += async_tx.o | |||
2 | obj-$(CONFIG_ASYNC_MEMCPY) += async_memcpy.o | 2 | obj-$(CONFIG_ASYNC_MEMCPY) += async_memcpy.o |
3 | obj-$(CONFIG_ASYNC_MEMSET) += async_memset.o | 3 | obj-$(CONFIG_ASYNC_MEMSET) += async_memset.o |
4 | obj-$(CONFIG_ASYNC_XOR) += async_xor.o | 4 | obj-$(CONFIG_ASYNC_XOR) += async_xor.o |
5 | obj-$(CONFIG_ASYNC_PQ) += async_pq.o | ||
6 | obj-$(CONFIG_ASYNC_RAID6_RECOV) += async_raid6_recov.o | ||
7 | obj-$(CONFIG_ASYNC_RAID6_TEST) += raid6test.o | ||
diff --git a/crypto/async_tx/async_memcpy.c b/crypto/async_tx/async_memcpy.c index ddccfb01c416..98e15bd0dcb5 100644 --- a/crypto/async_tx/async_memcpy.c +++ b/crypto/async_tx/async_memcpy.c | |||
@@ -33,28 +33,28 @@ | |||
33 | * async_memcpy - attempt to copy memory with a dma engine. | 33 | * async_memcpy - attempt to copy memory with a dma engine. |
34 | * @dest: destination page | 34 | * @dest: destination page |
35 | * @src: src page | 35 | * @src: src page |
36 | * @offset: offset in pages to start transaction | 36 | * @dest_offset: offset into 'dest' to start transaction |
37 | * @src_offset: offset into 'src' to start transaction | ||
37 | * @len: length in bytes | 38 | * @len: length in bytes |
38 | * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK, | 39 | * @submit: submission / completion modifiers |
39 | * @depend_tx: memcpy depends on the result of this transaction | 40 | * |
40 | * @cb_fn: function to call when the memcpy completes | 41 | * honored flags: ASYNC_TX_ACK |
41 | * @cb_param: parameter to pass to the callback routine | ||
42 | */ | 42 | */ |
43 | struct dma_async_tx_descriptor * | 43 | struct dma_async_tx_descriptor * |
44 | async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset, | 44 | async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset, |
45 | unsigned int src_offset, size_t len, enum async_tx_flags flags, | 45 | unsigned int src_offset, size_t len, |
46 | struct dma_async_tx_descriptor *depend_tx, | 46 | struct async_submit_ctl *submit) |
47 | dma_async_tx_callback cb_fn, void *cb_param) | ||
48 | { | 47 | { |
49 | struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_MEMCPY, | 48 | struct dma_chan *chan = async_tx_find_channel(submit, DMA_MEMCPY, |
50 | &dest, 1, &src, 1, len); | 49 | &dest, 1, &src, 1, len); |
51 | struct dma_device *device = chan ? chan->device : NULL; | 50 | struct dma_device *device = chan ? chan->device : NULL; |
52 | struct dma_async_tx_descriptor *tx = NULL; | 51 | struct dma_async_tx_descriptor *tx = NULL; |
53 | 52 | ||
54 | if (device) { | 53 | if (device) { |
55 | dma_addr_t dma_dest, dma_src; | 54 | dma_addr_t dma_dest, dma_src; |
56 | unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0; | 55 | unsigned long dma_prep_flags; |
57 | 56 | ||
57 | dma_prep_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0; | ||
58 | dma_dest = dma_map_page(device->dev, dest, dest_offset, len, | 58 | dma_dest = dma_map_page(device->dev, dest, dest_offset, len, |
59 | DMA_FROM_DEVICE); | 59 | DMA_FROM_DEVICE); |
60 | 60 | ||
@@ -67,13 +67,13 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset, | |||
67 | 67 | ||
68 | if (tx) { | 68 | if (tx) { |
69 | pr_debug("%s: (async) len: %zu\n", __func__, len); | 69 | pr_debug("%s: (async) len: %zu\n", __func__, len); |
70 | async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param); | 70 | async_tx_submit(chan, tx, submit); |
71 | } else { | 71 | } else { |
72 | void *dest_buf, *src_buf; | 72 | void *dest_buf, *src_buf; |
73 | pr_debug("%s: (sync) len: %zu\n", __func__, len); | 73 | pr_debug("%s: (sync) len: %zu\n", __func__, len); |
74 | 74 | ||
75 | /* wait for any prerequisite operations */ | 75 | /* wait for any prerequisite operations */ |
76 | async_tx_quiesce(&depend_tx); | 76 | async_tx_quiesce(&submit->depend_tx); |
77 | 77 | ||
78 | dest_buf = kmap_atomic(dest, KM_USER0) + dest_offset; | 78 | dest_buf = kmap_atomic(dest, KM_USER0) + dest_offset; |
79 | src_buf = kmap_atomic(src, KM_USER1) + src_offset; | 79 | src_buf = kmap_atomic(src, KM_USER1) + src_offset; |
@@ -83,26 +83,13 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset, | |||
83 | kunmap_atomic(dest_buf, KM_USER0); | 83 | kunmap_atomic(dest_buf, KM_USER0); |
84 | kunmap_atomic(src_buf, KM_USER1); | 84 | kunmap_atomic(src_buf, KM_USER1); |
85 | 85 | ||
86 | async_tx_sync_epilog(cb_fn, cb_param); | 86 | async_tx_sync_epilog(submit); |
87 | } | 87 | } |
88 | 88 | ||
89 | return tx; | 89 | return tx; |
90 | } | 90 | } |
91 | EXPORT_SYMBOL_GPL(async_memcpy); | 91 | EXPORT_SYMBOL_GPL(async_memcpy); |
92 | 92 | ||
93 | static int __init async_memcpy_init(void) | ||
94 | { | ||
95 | return 0; | ||
96 | } | ||
97 | |||
98 | static void __exit async_memcpy_exit(void) | ||
99 | { | ||
100 | do { } while (0); | ||
101 | } | ||
102 | |||
103 | module_init(async_memcpy_init); | ||
104 | module_exit(async_memcpy_exit); | ||
105 | |||
106 | MODULE_AUTHOR("Intel Corporation"); | 93 | MODULE_AUTHOR("Intel Corporation"); |
107 | MODULE_DESCRIPTION("asynchronous memcpy api"); | 94 | MODULE_DESCRIPTION("asynchronous memcpy api"); |
108 | MODULE_LICENSE("GPL"); | 95 | MODULE_LICENSE("GPL"); |
diff --git a/crypto/async_tx/async_memset.c b/crypto/async_tx/async_memset.c index 5b5eb99bb244..b896a6e5f673 100644 --- a/crypto/async_tx/async_memset.c +++ b/crypto/async_tx/async_memset.c | |||
@@ -35,26 +35,23 @@ | |||
35 | * @val: fill value | 35 | * @val: fill value |
36 | * @offset: offset in pages to start transaction | 36 | * @offset: offset in pages to start transaction |
37 | * @len: length in bytes | 37 | * @len: length in bytes |
38 | * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK | 38 | * |
39 | * @depend_tx: memset depends on the result of this transaction | 39 | * honored flags: ASYNC_TX_ACK |
40 | * @cb_fn: function to call when the memcpy completes | ||
41 | * @cb_param: parameter to pass to the callback routine | ||
42 | */ | 40 | */ |
43 | struct dma_async_tx_descriptor * | 41 | struct dma_async_tx_descriptor * |
44 | async_memset(struct page *dest, int val, unsigned int offset, | 42 | async_memset(struct page *dest, int val, unsigned int offset, size_t len, |
45 | size_t len, enum async_tx_flags flags, | 43 | struct async_submit_ctl *submit) |
46 | struct dma_async_tx_descriptor *depend_tx, | ||
47 | dma_async_tx_callback cb_fn, void *cb_param) | ||
48 | { | 44 | { |
49 | struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_MEMSET, | 45 | struct dma_chan *chan = async_tx_find_channel(submit, DMA_MEMSET, |
50 | &dest, 1, NULL, 0, len); | 46 | &dest, 1, NULL, 0, len); |
51 | struct dma_device *device = chan ? chan->device : NULL; | 47 | struct dma_device *device = chan ? chan->device : NULL; |
52 | struct dma_async_tx_descriptor *tx = NULL; | 48 | struct dma_async_tx_descriptor *tx = NULL; |
53 | 49 | ||
54 | if (device) { | 50 | if (device) { |
55 | dma_addr_t dma_dest; | 51 | dma_addr_t dma_dest; |
56 | unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0; | 52 | unsigned long dma_prep_flags; |
57 | 53 | ||
54 | dma_prep_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0; | ||
58 | dma_dest = dma_map_page(device->dev, dest, offset, len, | 55 | dma_dest = dma_map_page(device->dev, dest, offset, len, |
59 | DMA_FROM_DEVICE); | 56 | DMA_FROM_DEVICE); |
60 | 57 | ||
@@ -64,38 +61,25 @@ async_memset(struct page *dest, int val, unsigned int offset, | |||
64 | 61 | ||
65 | if (tx) { | 62 | if (tx) { |
66 | pr_debug("%s: (async) len: %zu\n", __func__, len); | 63 | pr_debug("%s: (async) len: %zu\n", __func__, len); |
67 | async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param); | 64 | async_tx_submit(chan, tx, submit); |
68 | } else { /* run the memset synchronously */ | 65 | } else { /* run the memset synchronously */ |
69 | void *dest_buf; | 66 | void *dest_buf; |
70 | pr_debug("%s: (sync) len: %zu\n", __func__, len); | 67 | pr_debug("%s: (sync) len: %zu\n", __func__, len); |
71 | 68 | ||
72 | dest_buf = (void *) (((char *) page_address(dest)) + offset); | 69 | dest_buf = page_address(dest) + offset; |
73 | 70 | ||
74 | /* wait for any prerequisite operations */ | 71 | /* wait for any prerequisite operations */ |
75 | async_tx_quiesce(&depend_tx); | 72 | async_tx_quiesce(&submit->depend_tx); |
76 | 73 | ||
77 | memset(dest_buf, val, len); | 74 | memset(dest_buf, val, len); |
78 | 75 | ||
79 | async_tx_sync_epilog(cb_fn, cb_param); | 76 | async_tx_sync_epilog(submit); |
80 | } | 77 | } |
81 | 78 | ||
82 | return tx; | 79 | return tx; |
83 | } | 80 | } |
84 | EXPORT_SYMBOL_GPL(async_memset); | 81 | EXPORT_SYMBOL_GPL(async_memset); |
85 | 82 | ||
86 | static int __init async_memset_init(void) | ||
87 | { | ||
88 | return 0; | ||
89 | } | ||
90 | |||
91 | static void __exit async_memset_exit(void) | ||
92 | { | ||
93 | do { } while (0); | ||
94 | } | ||
95 | |||
96 | module_init(async_memset_init); | ||
97 | module_exit(async_memset_exit); | ||
98 | |||
99 | MODULE_AUTHOR("Intel Corporation"); | 83 | MODULE_AUTHOR("Intel Corporation"); |
100 | MODULE_DESCRIPTION("asynchronous memset api"); | 84 | MODULE_DESCRIPTION("asynchronous memset api"); |
101 | MODULE_LICENSE("GPL"); | 85 | MODULE_LICENSE("GPL"); |
diff --git a/crypto/async_tx/async_pq.c b/crypto/async_tx/async_pq.c new file mode 100644 index 000000000000..108b21efb499 --- /dev/null +++ b/crypto/async_tx/async_pq.c | |||
@@ -0,0 +1,388 @@ | |||
1 | /* | ||
2 | * Copyright(c) 2007 Yuri Tikhonov <yur@emcraft.com> | ||
3 | * Copyright(c) 2009 Intel Corporation | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify it | ||
6 | * under the terms of the GNU General Public License as published by the Free | ||
7 | * Software Foundation; either version 2 of the License, or (at your option) | ||
8 | * any later version. | ||
9 | * | ||
10 | * This program is distributed in the hope that it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License along with | ||
16 | * this program; if not, write to the Free Software Foundation, Inc., 59 | ||
17 | * Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
18 | * | ||
19 | * The full GNU General Public License is included in this distribution in the | ||
20 | * file called COPYING. | ||
21 | */ | ||
22 | #include <linux/kernel.h> | ||
23 | #include <linux/interrupt.h> | ||
24 | #include <linux/dma-mapping.h> | ||
25 | #include <linux/raid/pq.h> | ||
26 | #include <linux/async_tx.h> | ||
27 | |||
28 | /** | ||
29 | * scribble - space to hold throwaway P buffer for synchronous gen_syndrome | ||
30 | */ | ||
31 | static struct page *scribble; | ||
32 | |||
33 | static bool is_raid6_zero_block(struct page *p) | ||
34 | { | ||
35 | return p == (void *) raid6_empty_zero_page; | ||
36 | } | ||
37 | |||
38 | /* the struct page *blocks[] parameter passed to async_gen_syndrome() | ||
39 | * and async_syndrome_val() contains the 'P' destination address at | ||
40 | * blocks[disks-2] and the 'Q' destination address at blocks[disks-1] | ||
41 | * | ||
42 | * note: these are macros as they are used as lvalues | ||
43 | */ | ||
44 | #define P(b, d) (b[d-2]) | ||
45 | #define Q(b, d) (b[d-1]) | ||
46 | |||
47 | /** | ||
48 | * do_async_gen_syndrome - asynchronously calculate P and/or Q | ||
49 | */ | ||
50 | static __async_inline struct dma_async_tx_descriptor * | ||
51 | do_async_gen_syndrome(struct dma_chan *chan, struct page **blocks, | ||
52 | const unsigned char *scfs, unsigned int offset, int disks, | ||
53 | size_t len, dma_addr_t *dma_src, | ||
54 | struct async_submit_ctl *submit) | ||
55 | { | ||
56 | struct dma_async_tx_descriptor *tx = NULL; | ||
57 | struct dma_device *dma = chan->device; | ||
58 | enum dma_ctrl_flags dma_flags = 0; | ||
59 | enum async_tx_flags flags_orig = submit->flags; | ||
60 | dma_async_tx_callback cb_fn_orig = submit->cb_fn; | ||
61 | dma_async_tx_callback cb_param_orig = submit->cb_param; | ||
62 | int src_cnt = disks - 2; | ||
63 | unsigned char coefs[src_cnt]; | ||
64 | unsigned short pq_src_cnt; | ||
65 | dma_addr_t dma_dest[2]; | ||
66 | int src_off = 0; | ||
67 | int idx; | ||
68 | int i; | ||
69 | |||
70 | /* DMAs use destinations as sources, so use BIDIRECTIONAL mapping */ | ||
71 | if (P(blocks, disks)) | ||
72 | dma_dest[0] = dma_map_page(dma->dev, P(blocks, disks), offset, | ||
73 | len, DMA_BIDIRECTIONAL); | ||
74 | else | ||
75 | dma_flags |= DMA_PREP_PQ_DISABLE_P; | ||
76 | if (Q(blocks, disks)) | ||
77 | dma_dest[1] = dma_map_page(dma->dev, Q(blocks, disks), offset, | ||
78 | len, DMA_BIDIRECTIONAL); | ||
79 | else | ||
80 | dma_flags |= DMA_PREP_PQ_DISABLE_Q; | ||
81 | |||
82 | /* convert source addresses being careful to collapse 'empty' | ||
83 | * sources and update the coefficients accordingly | ||
84 | */ | ||
85 | for (i = 0, idx = 0; i < src_cnt; i++) { | ||
86 | if (is_raid6_zero_block(blocks[i])) | ||
87 | continue; | ||
88 | dma_src[idx] = dma_map_page(dma->dev, blocks[i], offset, len, | ||
89 | DMA_TO_DEVICE); | ||
90 | coefs[idx] = scfs[i]; | ||
91 | idx++; | ||
92 | } | ||
93 | src_cnt = idx; | ||
94 | |||
95 | while (src_cnt > 0) { | ||
96 | submit->flags = flags_orig; | ||
97 | pq_src_cnt = min(src_cnt, dma_maxpq(dma, dma_flags)); | ||
98 | /* if we are submitting additional pqs, leave the chain open, | ||
99 | * clear the callback parameters, and leave the destination | ||
100 | * buffers mapped | ||
101 | */ | ||
102 | if (src_cnt > pq_src_cnt) { | ||
103 | submit->flags &= ~ASYNC_TX_ACK; | ||
104 | dma_flags |= DMA_COMPL_SKIP_DEST_UNMAP; | ||
105 | submit->cb_fn = NULL; | ||
106 | submit->cb_param = NULL; | ||
107 | } else { | ||
108 | dma_flags &= ~DMA_COMPL_SKIP_DEST_UNMAP; | ||
109 | submit->cb_fn = cb_fn_orig; | ||
110 | submit->cb_param = cb_param_orig; | ||
111 | if (cb_fn_orig) | ||
112 | dma_flags |= DMA_PREP_INTERRUPT; | ||
113 | } | ||
114 | |||
115 | /* Since we have clobbered the src_list we are committed | ||
116 | * to doing this asynchronously. Drivers force forward | ||
117 | * progress in case they can not provide a descriptor | ||
118 | */ | ||
119 | for (;;) { | ||
120 | tx = dma->device_prep_dma_pq(chan, dma_dest, | ||
121 | &dma_src[src_off], | ||
122 | pq_src_cnt, | ||
123 | &coefs[src_off], len, | ||
124 | dma_flags); | ||
125 | if (likely(tx)) | ||
126 | break; | ||
127 | async_tx_quiesce(&submit->depend_tx); | ||
128 | dma_async_issue_pending(chan); | ||
129 | } | ||
130 | |||
131 | async_tx_submit(chan, tx, submit); | ||
132 | submit->depend_tx = tx; | ||
133 | |||
134 | /* drop completed sources */ | ||
135 | src_cnt -= pq_src_cnt; | ||
136 | src_off += pq_src_cnt; | ||
137 | |||
138 | dma_flags |= DMA_PREP_CONTINUE; | ||
139 | } | ||
140 | |||
141 | return tx; | ||
142 | } | ||
143 | |||
144 | /** | ||
145 | * do_sync_gen_syndrome - synchronously calculate a raid6 syndrome | ||
146 | */ | ||
147 | static void | ||
148 | do_sync_gen_syndrome(struct page **blocks, unsigned int offset, int disks, | ||
149 | size_t len, struct async_submit_ctl *submit) | ||
150 | { | ||
151 | void **srcs; | ||
152 | int i; | ||
153 | |||
154 | if (submit->scribble) | ||
155 | srcs = submit->scribble; | ||
156 | else | ||
157 | srcs = (void **) blocks; | ||
158 | |||
159 | for (i = 0; i < disks; i++) { | ||
160 | if (is_raid6_zero_block(blocks[i])) { | ||
161 | BUG_ON(i > disks - 3); /* P or Q can't be zero */ | ||
162 | srcs[i] = blocks[i]; | ||
163 | } else | ||
164 | srcs[i] = page_address(blocks[i]) + offset; | ||
165 | } | ||
166 | raid6_call.gen_syndrome(disks, len, srcs); | ||
167 | async_tx_sync_epilog(submit); | ||
168 | } | ||
169 | |||
170 | /** | ||
171 | * async_gen_syndrome - asynchronously calculate a raid6 syndrome | ||
172 | * @blocks: source blocks from idx 0..disks-3, P @ disks-2 and Q @ disks-1 | ||
173 | * @offset: common offset into each block (src and dest) to start transaction | ||
174 | * @disks: number of blocks (including missing P or Q, see below) | ||
175 | * @len: length of operation in bytes | ||
176 | * @submit: submission/completion modifiers | ||
177 | * | ||
178 | * General note: This routine assumes a field of GF(2^8) with a | ||
179 | * primitive polynomial of 0x11d and a generator of {02}. | ||
180 | * | ||
181 | * 'disks' note: callers can optionally omit either P or Q (but not | ||
182 | * both) from the calculation by setting blocks[disks-2] or | ||
183 | * blocks[disks-1] to NULL. When P or Q is omitted 'len' must be <= | ||
184 | * PAGE_SIZE as a temporary buffer of this size is used in the | ||
185 | * synchronous path. 'disks' always accounts for both destination | ||
186 | * buffers. | ||
187 | * | ||
188 | * 'blocks' note: if submit->scribble is NULL then the contents of | ||
189 | * 'blocks' may be overridden | ||
190 | */ | ||
191 | struct dma_async_tx_descriptor * | ||
192 | async_gen_syndrome(struct page **blocks, unsigned int offset, int disks, | ||
193 | size_t len, struct async_submit_ctl *submit) | ||
194 | { | ||
195 | int src_cnt = disks - 2; | ||
196 | struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ, | ||
197 | &P(blocks, disks), 2, | ||
198 | blocks, src_cnt, len); | ||
199 | struct dma_device *device = chan ? chan->device : NULL; | ||
200 | dma_addr_t *dma_src = NULL; | ||
201 | |||
202 | BUG_ON(disks > 255 || !(P(blocks, disks) || Q(blocks, disks))); | ||
203 | |||
204 | if (submit->scribble) | ||
205 | dma_src = submit->scribble; | ||
206 | else if (sizeof(dma_addr_t) <= sizeof(struct page *)) | ||
207 | dma_src = (dma_addr_t *) blocks; | ||
208 | |||
209 | if (dma_src && device && | ||
210 | (src_cnt <= dma_maxpq(device, 0) || | ||
211 | dma_maxpq(device, DMA_PREP_CONTINUE) > 0)) { | ||
212 | /* run the p+q asynchronously */ | ||
213 | pr_debug("%s: (async) disks: %d len: %zu\n", | ||
214 | __func__, disks, len); | ||
215 | return do_async_gen_syndrome(chan, blocks, raid6_gfexp, offset, | ||
216 | disks, len, dma_src, submit); | ||
217 | } | ||
218 | |||
219 | /* run the pq synchronously */ | ||
220 | pr_debug("%s: (sync) disks: %d len: %zu\n", __func__, disks, len); | ||
221 | |||
222 | /* wait for any prerequisite operations */ | ||
223 | async_tx_quiesce(&submit->depend_tx); | ||
224 | |||
225 | if (!P(blocks, disks)) { | ||
226 | P(blocks, disks) = scribble; | ||
227 | BUG_ON(len + offset > PAGE_SIZE); | ||
228 | } | ||
229 | if (!Q(blocks, disks)) { | ||
230 | Q(blocks, disks) = scribble; | ||
231 | BUG_ON(len + offset > PAGE_SIZE); | ||
232 | } | ||
233 | do_sync_gen_syndrome(blocks, offset, disks, len, submit); | ||
234 | |||
235 | return NULL; | ||
236 | } | ||
237 | EXPORT_SYMBOL_GPL(async_gen_syndrome); | ||
238 | |||
239 | /** | ||
240 | * async_syndrome_val - asynchronously validate a raid6 syndrome | ||
241 | * @blocks: source blocks from idx 0..disks-3, P @ disks-2 and Q @ disks-1 | ||
242 | * @offset: common offset into each block (src and dest) to start transaction | ||
243 | * @disks: number of blocks (including missing P or Q, see below) | ||
244 | * @len: length of operation in bytes | ||
245 | * @pqres: on val failure SUM_CHECK_P_RESULT and/or SUM_CHECK_Q_RESULT are set | ||
246 | * @spare: temporary result buffer for the synchronous case | ||
247 | * @submit: submission / completion modifiers | ||
248 | * | ||
249 | * The same notes from async_gen_syndrome apply to the 'blocks', | ||
250 | * and 'disks' parameters of this routine. The synchronous path | ||
251 | * requires a temporary result buffer and submit->scribble to be | ||
252 | * specified. | ||
253 | */ | ||
254 | struct dma_async_tx_descriptor * | ||
255 | async_syndrome_val(struct page **blocks, unsigned int offset, int disks, | ||
256 | size_t len, enum sum_check_flags *pqres, struct page *spare, | ||
257 | struct async_submit_ctl *submit) | ||
258 | { | ||
259 | struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ_VAL, | ||
260 | NULL, 0, blocks, disks, | ||
261 | len); | ||
262 | struct dma_device *device = chan ? chan->device : NULL; | ||
263 | struct dma_async_tx_descriptor *tx; | ||
264 | enum dma_ctrl_flags dma_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0; | ||
265 | dma_addr_t *dma_src = NULL; | ||
266 | |||
267 | BUG_ON(disks < 4); | ||
268 | |||
269 | if (submit->scribble) | ||
270 | dma_src = submit->scribble; | ||
271 | else if (sizeof(dma_addr_t) <= sizeof(struct page *)) | ||
272 | dma_src = (dma_addr_t *) blocks; | ||
273 | |||
274 | if (dma_src && device && disks <= dma_maxpq(device, 0)) { | ||
275 | struct device *dev = device->dev; | ||
276 | dma_addr_t *pq = &dma_src[disks-2]; | ||
277 | int i; | ||
278 | |||
279 | pr_debug("%s: (async) disks: %d len: %zu\n", | ||
280 | __func__, disks, len); | ||
281 | if (!P(blocks, disks)) | ||
282 | dma_flags |= DMA_PREP_PQ_DISABLE_P; | ||
283 | if (!Q(blocks, disks)) | ||
284 | dma_flags |= DMA_PREP_PQ_DISABLE_Q; | ||
285 | for (i = 0; i < disks; i++) | ||
286 | if (likely(blocks[i])) { | ||
287 | BUG_ON(is_raid6_zero_block(blocks[i])); | ||
288 | dma_src[i] = dma_map_page(dev, blocks[i], | ||
289 | offset, len, | ||
290 | DMA_TO_DEVICE); | ||
291 | } | ||
292 | |||
293 | for (;;) { | ||
294 | tx = device->device_prep_dma_pq_val(chan, pq, dma_src, | ||
295 | disks - 2, | ||
296 | raid6_gfexp, | ||
297 | len, pqres, | ||
298 | dma_flags); | ||
299 | if (likely(tx)) | ||
300 | break; | ||
301 | async_tx_quiesce(&submit->depend_tx); | ||
302 | dma_async_issue_pending(chan); | ||
303 | } | ||
304 | async_tx_submit(chan, tx, submit); | ||
305 | |||
306 | return tx; | ||
307 | } else { | ||
308 | struct page *p_src = P(blocks, disks); | ||
309 | struct page *q_src = Q(blocks, disks); | ||
310 | enum async_tx_flags flags_orig = submit->flags; | ||
311 | dma_async_tx_callback cb_fn_orig = submit->cb_fn; | ||
312 | void *scribble = submit->scribble; | ||
313 | void *cb_param_orig = submit->cb_param; | ||
314 | void *p, *q, *s; | ||
315 | |||
316 | pr_debug("%s: (sync) disks: %d len: %zu\n", | ||
317 | __func__, disks, len); | ||
318 | |||
319 | /* caller must provide a temporary result buffer and | ||
320 | * allow the input parameters to be preserved | ||
321 | */ | ||
322 | BUG_ON(!spare || !scribble); | ||
323 | |||
324 | /* wait for any prerequisite operations */ | ||
325 | async_tx_quiesce(&submit->depend_tx); | ||
326 | |||
327 | /* recompute p and/or q into the temporary buffer and then | ||
328 | * check to see the result matches the current value | ||
329 | */ | ||
330 | tx = NULL; | ||
331 | *pqres = 0; | ||
332 | if (p_src) { | ||
333 | init_async_submit(submit, ASYNC_TX_XOR_ZERO_DST, NULL, | ||
334 | NULL, NULL, scribble); | ||
335 | tx = async_xor(spare, blocks, offset, disks-2, len, submit); | ||
336 | async_tx_quiesce(&tx); | ||
337 | p = page_address(p_src) + offset; | ||
338 | s = page_address(spare) + offset; | ||
339 | *pqres |= !!memcmp(p, s, len) << SUM_CHECK_P; | ||
340 | } | ||
341 | |||
342 | if (q_src) { | ||
343 | P(blocks, disks) = NULL; | ||
344 | Q(blocks, disks) = spare; | ||
345 | init_async_submit(submit, 0, NULL, NULL, NULL, scribble); | ||
346 | tx = async_gen_syndrome(blocks, offset, disks, len, submit); | ||
347 | async_tx_quiesce(&tx); | ||
348 | q = page_address(q_src) + offset; | ||
349 | s = page_address(spare) + offset; | ||
350 | *pqres |= !!memcmp(q, s, len) << SUM_CHECK_Q; | ||
351 | } | ||
352 | |||
353 | /* restore P, Q and submit */ | ||
354 | P(blocks, disks) = p_src; | ||
355 | Q(blocks, disks) = q_src; | ||
356 | |||
357 | submit->cb_fn = cb_fn_orig; | ||
358 | submit->cb_param = cb_param_orig; | ||
359 | submit->flags = flags_orig; | ||
360 | async_tx_sync_epilog(submit); | ||
361 | |||
362 | return NULL; | ||
363 | } | ||
364 | } | ||
365 | EXPORT_SYMBOL_GPL(async_syndrome_val); | ||
366 | |||
367 | static int __init async_pq_init(void) | ||
368 | { | ||
369 | scribble = alloc_page(GFP_KERNEL); | ||
370 | |||
371 | if (scribble) | ||
372 | return 0; | ||
373 | |||
374 | pr_err("%s: failed to allocate required spare page\n", __func__); | ||
375 | |||
376 | return -ENOMEM; | ||
377 | } | ||
378 | |||
379 | static void __exit async_pq_exit(void) | ||
380 | { | ||
381 | put_page(scribble); | ||
382 | } | ||
383 | |||
384 | module_init(async_pq_init); | ||
385 | module_exit(async_pq_exit); | ||
386 | |||
387 | MODULE_DESCRIPTION("asynchronous raid6 syndrome generation/validation"); | ||
388 | MODULE_LICENSE("GPL"); | ||
diff --git a/crypto/async_tx/async_raid6_recov.c b/crypto/async_tx/async_raid6_recov.c new file mode 100644 index 000000000000..0c14d48c9896 --- /dev/null +++ b/crypto/async_tx/async_raid6_recov.c | |||
@@ -0,0 +1,448 @@ | |||
1 | /* | ||
2 | * Asynchronous RAID-6 recovery calculations ASYNC_TX API. | ||
3 | * Copyright(c) 2009 Intel Corporation | ||
4 | * | ||
5 | * based on raid6recov.c: | ||
6 | * Copyright 2002 H. Peter Anvin | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify it | ||
9 | * under the terms of the GNU General Public License as published by the Free | ||
10 | * Software Foundation; either version 2 of the License, or (at your option) | ||
11 | * any later version. | ||
12 | * | ||
13 | * This program is distributed in the hope that it will be useful, but WITHOUT | ||
14 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
15 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
16 | * more details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU General Public License along with | ||
19 | * this program; if not, write to the Free Software Foundation, Inc., 51 | ||
20 | * Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. | ||
21 | * | ||
22 | */ | ||
23 | #include <linux/kernel.h> | ||
24 | #include <linux/interrupt.h> | ||
25 | #include <linux/dma-mapping.h> | ||
26 | #include <linux/raid/pq.h> | ||
27 | #include <linux/async_tx.h> | ||
28 | |||
29 | static struct dma_async_tx_descriptor * | ||
30 | async_sum_product(struct page *dest, struct page **srcs, unsigned char *coef, | ||
31 | size_t len, struct async_submit_ctl *submit) | ||
32 | { | ||
33 | struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ, | ||
34 | &dest, 1, srcs, 2, len); | ||
35 | struct dma_device *dma = chan ? chan->device : NULL; | ||
36 | const u8 *amul, *bmul; | ||
37 | u8 ax, bx; | ||
38 | u8 *a, *b, *c; | ||
39 | |||
40 | if (dma) { | ||
41 | dma_addr_t dma_dest[2]; | ||
42 | dma_addr_t dma_src[2]; | ||
43 | struct device *dev = dma->dev; | ||
44 | struct dma_async_tx_descriptor *tx; | ||
45 | enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P; | ||
46 | |||
47 | dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL); | ||
48 | dma_src[0] = dma_map_page(dev, srcs[0], 0, len, DMA_TO_DEVICE); | ||
49 | dma_src[1] = dma_map_page(dev, srcs[1], 0, len, DMA_TO_DEVICE); | ||
50 | tx = dma->device_prep_dma_pq(chan, dma_dest, dma_src, 2, coef, | ||
51 | len, dma_flags); | ||
52 | if (tx) { | ||
53 | async_tx_submit(chan, tx, submit); | ||
54 | return tx; | ||
55 | } | ||
56 | } | ||
57 | |||
58 | /* run the operation synchronously */ | ||
59 | async_tx_quiesce(&submit->depend_tx); | ||
60 | amul = raid6_gfmul[coef[0]]; | ||
61 | bmul = raid6_gfmul[coef[1]]; | ||
62 | a = page_address(srcs[0]); | ||
63 | b = page_address(srcs[1]); | ||
64 | c = page_address(dest); | ||
65 | |||
66 | while (len--) { | ||
67 | ax = amul[*a++]; | ||
68 | bx = bmul[*b++]; | ||
69 | *c++ = ax ^ bx; | ||
70 | } | ||
71 | |||
72 | return NULL; | ||
73 | } | ||
74 | |||
75 | static struct dma_async_tx_descriptor * | ||
76 | async_mult(struct page *dest, struct page *src, u8 coef, size_t len, | ||
77 | struct async_submit_ctl *submit) | ||
78 | { | ||
79 | struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ, | ||
80 | &dest, 1, &src, 1, len); | ||
81 | struct dma_device *dma = chan ? chan->device : NULL; | ||
82 | const u8 *qmul; /* Q multiplier table */ | ||
83 | u8 *d, *s; | ||
84 | |||
85 | if (dma) { | ||
86 | dma_addr_t dma_dest[2]; | ||
87 | dma_addr_t dma_src[1]; | ||
88 | struct device *dev = dma->dev; | ||
89 | struct dma_async_tx_descriptor *tx; | ||
90 | enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P; | ||
91 | |||
92 | dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL); | ||
93 | dma_src[0] = dma_map_page(dev, src, 0, len, DMA_TO_DEVICE); | ||
94 | tx = dma->device_prep_dma_pq(chan, dma_dest, dma_src, 1, &coef, | ||
95 | len, dma_flags); | ||
96 | if (tx) { | ||
97 | async_tx_submit(chan, tx, submit); | ||
98 | return tx; | ||
99 | } | ||
100 | } | ||
101 | |||
102 | /* no channel available, or failed to allocate a descriptor, so | ||
103 | * perform the operation synchronously | ||
104 | */ | ||
105 | async_tx_quiesce(&submit->depend_tx); | ||
106 | qmul = raid6_gfmul[coef]; | ||
107 | d = page_address(dest); | ||
108 | s = page_address(src); | ||
109 | |||
110 | while (len--) | ||
111 | *d++ = qmul[*s++]; | ||
112 | |||
113 | return NULL; | ||
114 | } | ||
115 | |||
116 | static struct dma_async_tx_descriptor * | ||
117 | __2data_recov_4(size_t bytes, int faila, int failb, struct page **blocks, | ||
118 | struct async_submit_ctl *submit) | ||
119 | { | ||
120 | struct dma_async_tx_descriptor *tx = NULL; | ||
121 | struct page *p, *q, *a, *b; | ||
122 | struct page *srcs[2]; | ||
123 | unsigned char coef[2]; | ||
124 | enum async_tx_flags flags = submit->flags; | ||
125 | dma_async_tx_callback cb_fn = submit->cb_fn; | ||
126 | void *cb_param = submit->cb_param; | ||
127 | void *scribble = submit->scribble; | ||
128 | |||
129 | p = blocks[4-2]; | ||
130 | q = blocks[4-1]; | ||
131 | |||
132 | a = blocks[faila]; | ||
133 | b = blocks[failb]; | ||
134 | |||
135 | /* in the 4 disk case P + Pxy == P and Q + Qxy == Q */ | ||
136 | /* Dx = A*(P+Pxy) + B*(Q+Qxy) */ | ||
137 | srcs[0] = p; | ||
138 | srcs[1] = q; | ||
139 | coef[0] = raid6_gfexi[failb-faila]; | ||
140 | coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]; | ||
141 | init_async_submit(submit, 0, tx, NULL, NULL, scribble); | ||
142 | tx = async_sum_product(b, srcs, coef, bytes, submit); | ||
143 | |||
144 | /* Dy = P+Pxy+Dx */ | ||
145 | srcs[0] = p; | ||
146 | srcs[1] = b; | ||
147 | init_async_submit(submit, flags | ASYNC_TX_XOR_ZERO_DST, tx, cb_fn, | ||
148 | cb_param, scribble); | ||
149 | tx = async_xor(a, srcs, 0, 2, bytes, submit); | ||
150 | |||
151 | return tx; | ||
152 | |||
153 | } | ||
154 | |||
155 | static struct dma_async_tx_descriptor * | ||
156 | __2data_recov_5(size_t bytes, int faila, int failb, struct page **blocks, | ||
157 | struct async_submit_ctl *submit) | ||
158 | { | ||
159 | struct dma_async_tx_descriptor *tx = NULL; | ||
160 | struct page *p, *q, *g, *dp, *dq; | ||
161 | struct page *srcs[2]; | ||
162 | unsigned char coef[2]; | ||
163 | enum async_tx_flags flags = submit->flags; | ||
164 | dma_async_tx_callback cb_fn = submit->cb_fn; | ||
165 | void *cb_param = submit->cb_param; | ||
166 | void *scribble = submit->scribble; | ||
167 | int uninitialized_var(good); | ||
168 | int i; | ||
169 | |||
170 | for (i = 0; i < 3; i++) { | ||
171 | if (i == faila || i == failb) | ||
172 | continue; | ||
173 | else { | ||
174 | good = i; | ||
175 | break; | ||
176 | } | ||
177 | } | ||
178 | BUG_ON(i >= 3); | ||
179 | |||
180 | p = blocks[5-2]; | ||
181 | q = blocks[5-1]; | ||
182 | g = blocks[good]; | ||
183 | |||
184 | /* Compute syndrome with zero for the missing data pages | ||
185 | * Use the dead data pages as temporary storage for delta p and | ||
186 | * delta q | ||
187 | */ | ||
188 | dp = blocks[faila]; | ||
189 | dq = blocks[failb]; | ||
190 | |||
191 | init_async_submit(submit, 0, tx, NULL, NULL, scribble); | ||
192 | tx = async_memcpy(dp, g, 0, 0, bytes, submit); | ||
193 | init_async_submit(submit, 0, tx, NULL, NULL, scribble); | ||
194 | tx = async_mult(dq, g, raid6_gfexp[good], bytes, submit); | ||
195 | |||
196 | /* compute P + Pxy */ | ||
197 | srcs[0] = dp; | ||
198 | srcs[1] = p; | ||
199 | init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL, | ||
200 | scribble); | ||
201 | tx = async_xor(dp, srcs, 0, 2, bytes, submit); | ||
202 | |||
203 | /* compute Q + Qxy */ | ||
204 | srcs[0] = dq; | ||
205 | srcs[1] = q; | ||
206 | init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL, | ||
207 | scribble); | ||
208 | tx = async_xor(dq, srcs, 0, 2, bytes, submit); | ||
209 | |||
210 | /* Dx = A*(P+Pxy) + B*(Q+Qxy) */ | ||
211 | srcs[0] = dp; | ||
212 | srcs[1] = dq; | ||
213 | coef[0] = raid6_gfexi[failb-faila]; | ||
214 | coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]; | ||
215 | init_async_submit(submit, 0, tx, NULL, NULL, scribble); | ||
216 | tx = async_sum_product(dq, srcs, coef, bytes, submit); | ||
217 | |||
218 | /* Dy = P+Pxy+Dx */ | ||
219 | srcs[0] = dp; | ||
220 | srcs[1] = dq; | ||
221 | init_async_submit(submit, flags | ASYNC_TX_XOR_DROP_DST, tx, cb_fn, | ||
222 | cb_param, scribble); | ||
223 | tx = async_xor(dp, srcs, 0, 2, bytes, submit); | ||
224 | |||
225 | return tx; | ||
226 | } | ||
227 | |||
228 | static struct dma_async_tx_descriptor * | ||
229 | __2data_recov_n(int disks, size_t bytes, int faila, int failb, | ||
230 | struct page **blocks, struct async_submit_ctl *submit) | ||
231 | { | ||
232 | struct dma_async_tx_descriptor *tx = NULL; | ||
233 | struct page *p, *q, *dp, *dq; | ||
234 | struct page *srcs[2]; | ||
235 | unsigned char coef[2]; | ||
236 | enum async_tx_flags flags = submit->flags; | ||
237 | dma_async_tx_callback cb_fn = submit->cb_fn; | ||
238 | void *cb_param = submit->cb_param; | ||
239 | void *scribble = submit->scribble; | ||
240 | |||
241 | p = blocks[disks-2]; | ||
242 | q = blocks[disks-1]; | ||
243 | |||
244 | /* Compute syndrome with zero for the missing data pages | ||
245 | * Use the dead data pages as temporary storage for | ||
246 | * delta p and delta q | ||
247 | */ | ||
248 | dp = blocks[faila]; | ||
249 | blocks[faila] = (void *)raid6_empty_zero_page; | ||
250 | blocks[disks-2] = dp; | ||
251 | dq = blocks[failb]; | ||
252 | blocks[failb] = (void *)raid6_empty_zero_page; | ||
253 | blocks[disks-1] = dq; | ||
254 | |||
255 | init_async_submit(submit, 0, tx, NULL, NULL, scribble); | ||
256 | tx = async_gen_syndrome(blocks, 0, disks, bytes, submit); | ||
257 | |||
258 | /* Restore pointer table */ | ||
259 | blocks[faila] = dp; | ||
260 | blocks[failb] = dq; | ||
261 | blocks[disks-2] = p; | ||
262 | blocks[disks-1] = q; | ||
263 | |||
264 | /* compute P + Pxy */ | ||
265 | srcs[0] = dp; | ||
266 | srcs[1] = p; | ||
267 | init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL, | ||
268 | scribble); | ||
269 | tx = async_xor(dp, srcs, 0, 2, bytes, submit); | ||
270 | |||
271 | /* compute Q + Qxy */ | ||
272 | srcs[0] = dq; | ||
273 | srcs[1] = q; | ||
274 | init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL, | ||
275 | scribble); | ||
276 | tx = async_xor(dq, srcs, 0, 2, bytes, submit); | ||
277 | |||
278 | /* Dx = A*(P+Pxy) + B*(Q+Qxy) */ | ||
279 | srcs[0] = dp; | ||
280 | srcs[1] = dq; | ||
281 | coef[0] = raid6_gfexi[failb-faila]; | ||
282 | coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]; | ||
283 | init_async_submit(submit, 0, tx, NULL, NULL, scribble); | ||
284 | tx = async_sum_product(dq, srcs, coef, bytes, submit); | ||
285 | |||
286 | /* Dy = P+Pxy+Dx */ | ||
287 | srcs[0] = dp; | ||
288 | srcs[1] = dq; | ||
289 | init_async_submit(submit, flags | ASYNC_TX_XOR_DROP_DST, tx, cb_fn, | ||
290 | cb_param, scribble); | ||
291 | tx = async_xor(dp, srcs, 0, 2, bytes, submit); | ||
292 | |||
293 | return tx; | ||
294 | } | ||
295 | |||
296 | /** | ||
297 | * async_raid6_2data_recov - asynchronously calculate two missing data blocks | ||
298 | * @disks: number of disks in the RAID-6 array | ||
299 | * @bytes: block size | ||
300 | * @faila: first failed drive index | ||
301 | * @failb: second failed drive index | ||
302 | * @blocks: array of source pointers where the last two entries are p and q | ||
303 | * @submit: submission/completion modifiers | ||
304 | */ | ||
305 | struct dma_async_tx_descriptor * | ||
306 | async_raid6_2data_recov(int disks, size_t bytes, int faila, int failb, | ||
307 | struct page **blocks, struct async_submit_ctl *submit) | ||
308 | { | ||
309 | BUG_ON(faila == failb); | ||
310 | if (failb < faila) | ||
311 | swap(faila, failb); | ||
312 | |||
313 | pr_debug("%s: disks: %d len: %zu\n", __func__, disks, bytes); | ||
314 | |||
315 | /* we need to preserve the contents of 'blocks' for the async | ||
316 | * case, so punt to synchronous if a scribble buffer is not available | ||
317 | */ | ||
318 | if (!submit->scribble) { | ||
319 | void **ptrs = (void **) blocks; | ||
320 | int i; | ||
321 | |||
322 | async_tx_quiesce(&submit->depend_tx); | ||
323 | for (i = 0; i < disks; i++) | ||
324 | ptrs[i] = page_address(blocks[i]); | ||
325 | |||
326 | raid6_2data_recov(disks, bytes, faila, failb, ptrs); | ||
327 | |||
328 | async_tx_sync_epilog(submit); | ||
329 | |||
330 | return NULL; | ||
331 | } | ||
332 | |||
333 | switch (disks) { | ||
334 | case 4: | ||
335 | /* dma devices do not uniformly understand a zero source pq | ||
336 | * operation (in contrast to the synchronous case), so | ||
337 | * explicitly handle the 4 disk special case | ||
338 | */ | ||
339 | return __2data_recov_4(bytes, faila, failb, blocks, submit); | ||
340 | case 5: | ||
341 | /* dma devices do not uniformly understand a single | ||
342 | * source pq operation (in contrast to the synchronous | ||
343 | * case), so explicitly handle the 5 disk special case | ||
344 | */ | ||
345 | return __2data_recov_5(bytes, faila, failb, blocks, submit); | ||
346 | default: | ||
347 | return __2data_recov_n(disks, bytes, faila, failb, blocks, submit); | ||
348 | } | ||
349 | } | ||
350 | EXPORT_SYMBOL_GPL(async_raid6_2data_recov); | ||
351 | |||
352 | /** | ||
353 | * async_raid6_datap_recov - asynchronously calculate a data and the 'p' block | ||
354 | * @disks: number of disks in the RAID-6 array | ||
355 | * @bytes: block size | ||
356 | * @faila: failed drive index | ||
357 | * @blocks: array of source pointers where the last two entries are p and q | ||
358 | * @submit: submission/completion modifiers | ||
359 | */ | ||
360 | struct dma_async_tx_descriptor * | ||
361 | async_raid6_datap_recov(int disks, size_t bytes, int faila, | ||
362 | struct page **blocks, struct async_submit_ctl *submit) | ||
363 | { | ||
364 | struct dma_async_tx_descriptor *tx = NULL; | ||
365 | struct page *p, *q, *dq; | ||
366 | u8 coef; | ||
367 | enum async_tx_flags flags = submit->flags; | ||
368 | dma_async_tx_callback cb_fn = submit->cb_fn; | ||
369 | void *cb_param = submit->cb_param; | ||
370 | void *scribble = submit->scribble; | ||
371 | struct page *srcs[2]; | ||
372 | |||
373 | pr_debug("%s: disks: %d len: %zu\n", __func__, disks, bytes); | ||
374 | |||
375 | /* we need to preserve the contents of 'blocks' for the async | ||
376 | * case, so punt to synchronous if a scribble buffer is not available | ||
377 | */ | ||
378 | if (!scribble) { | ||
379 | void **ptrs = (void **) blocks; | ||
380 | int i; | ||
381 | |||
382 | async_tx_quiesce(&submit->depend_tx); | ||
383 | for (i = 0; i < disks; i++) | ||
384 | ptrs[i] = page_address(blocks[i]); | ||
385 | |||
386 | raid6_datap_recov(disks, bytes, faila, ptrs); | ||
387 | |||
388 | async_tx_sync_epilog(submit); | ||
389 | |||
390 | return NULL; | ||
391 | } | ||
392 | |||
393 | p = blocks[disks-2]; | ||
394 | q = blocks[disks-1]; | ||
395 | |||
396 | /* Compute syndrome with zero for the missing data page | ||
397 | * Use the dead data page as temporary storage for delta q | ||
398 | */ | ||
399 | dq = blocks[faila]; | ||
400 | blocks[faila] = (void *)raid6_empty_zero_page; | ||
401 | blocks[disks-1] = dq; | ||
402 | |||
403 | /* in the 4 disk case we only need to perform a single source | ||
404 | * multiplication | ||
405 | */ | ||
406 | if (disks == 4) { | ||
407 | int good = faila == 0 ? 1 : 0; | ||
408 | struct page *g = blocks[good]; | ||
409 | |||
410 | init_async_submit(submit, 0, tx, NULL, NULL, scribble); | ||
411 | tx = async_memcpy(p, g, 0, 0, bytes, submit); | ||
412 | |||
413 | init_async_submit(submit, 0, tx, NULL, NULL, scribble); | ||
414 | tx = async_mult(dq, g, raid6_gfexp[good], bytes, submit); | ||
415 | } else { | ||
416 | init_async_submit(submit, 0, tx, NULL, NULL, scribble); | ||
417 | tx = async_gen_syndrome(blocks, 0, disks, bytes, submit); | ||
418 | } | ||
419 | |||
420 | /* Restore pointer table */ | ||
421 | blocks[faila] = dq; | ||
422 | blocks[disks-1] = q; | ||
423 | |||
424 | /* calculate g^{-faila} */ | ||
425 | coef = raid6_gfinv[raid6_gfexp[faila]]; | ||
426 | |||
427 | srcs[0] = dq; | ||
428 | srcs[1] = q; | ||
429 | init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL, | ||
430 | scribble); | ||
431 | tx = async_xor(dq, srcs, 0, 2, bytes, submit); | ||
432 | |||
433 | init_async_submit(submit, 0, tx, NULL, NULL, scribble); | ||
434 | tx = async_mult(dq, dq, coef, bytes, submit); | ||
435 | |||
436 | srcs[0] = p; | ||
437 | srcs[1] = dq; | ||
438 | init_async_submit(submit, flags | ASYNC_TX_XOR_DROP_DST, tx, cb_fn, | ||
439 | cb_param, scribble); | ||
440 | tx = async_xor(p, srcs, 0, 2, bytes, submit); | ||
441 | |||
442 | return tx; | ||
443 | } | ||
444 | EXPORT_SYMBOL_GPL(async_raid6_datap_recov); | ||
445 | |||
446 | MODULE_AUTHOR("Dan Williams <dan.j.williams@intel.com>"); | ||
447 | MODULE_DESCRIPTION("asynchronous RAID-6 recovery api"); | ||
448 | MODULE_LICENSE("GPL"); | ||
diff --git a/crypto/async_tx/async_tx.c b/crypto/async_tx/async_tx.c index 06eb6cc09fef..60615fedcf5e 100644 --- a/crypto/async_tx/async_tx.c +++ b/crypto/async_tx/async_tx.c | |||
@@ -42,16 +42,21 @@ static void __exit async_tx_exit(void) | |||
42 | async_dmaengine_put(); | 42 | async_dmaengine_put(); |
43 | } | 43 | } |
44 | 44 | ||
45 | module_init(async_tx_init); | ||
46 | module_exit(async_tx_exit); | ||
47 | |||
45 | /** | 48 | /** |
46 | * __async_tx_find_channel - find a channel to carry out the operation or let | 49 | * __async_tx_find_channel - find a channel to carry out the operation or let |
47 | * the transaction execute synchronously | 50 | * the transaction execute synchronously |
48 | * @depend_tx: transaction dependency | 51 | * @submit: transaction dependency and submission modifiers |
49 | * @tx_type: transaction type | 52 | * @tx_type: transaction type |
50 | */ | 53 | */ |
51 | struct dma_chan * | 54 | struct dma_chan * |
52 | __async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx, | 55 | __async_tx_find_channel(struct async_submit_ctl *submit, |
53 | enum dma_transaction_type tx_type) | 56 | enum dma_transaction_type tx_type) |
54 | { | 57 | { |
58 | struct dma_async_tx_descriptor *depend_tx = submit->depend_tx; | ||
59 | |||
55 | /* see if we can keep the chain on one channel */ | 60 | /* see if we can keep the chain on one channel */ |
56 | if (depend_tx && | 61 | if (depend_tx && |
57 | dma_has_cap(tx_type, depend_tx->chan->device->cap_mask)) | 62 | dma_has_cap(tx_type, depend_tx->chan->device->cap_mask)) |
@@ -59,17 +64,6 @@ __async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx, | |||
59 | return async_dma_find_channel(tx_type); | 64 | return async_dma_find_channel(tx_type); |
60 | } | 65 | } |
61 | EXPORT_SYMBOL_GPL(__async_tx_find_channel); | 66 | EXPORT_SYMBOL_GPL(__async_tx_find_channel); |
62 | #else | ||
63 | static int __init async_tx_init(void) | ||
64 | { | ||
65 | printk(KERN_INFO "async_tx: api initialized (sync-only)\n"); | ||
66 | return 0; | ||
67 | } | ||
68 | |||
69 | static void __exit async_tx_exit(void) | ||
70 | { | ||
71 | do { } while (0); | ||
72 | } | ||
73 | #endif | 67 | #endif |
74 | 68 | ||
75 | 69 | ||
@@ -83,8 +77,8 @@ static void | |||
83 | async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx, | 77 | async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx, |
84 | struct dma_async_tx_descriptor *tx) | 78 | struct dma_async_tx_descriptor *tx) |
85 | { | 79 | { |
86 | struct dma_chan *chan; | 80 | struct dma_chan *chan = depend_tx->chan; |
87 | struct dma_device *device; | 81 | struct dma_device *device = chan->device; |
88 | struct dma_async_tx_descriptor *intr_tx = (void *) ~0; | 82 | struct dma_async_tx_descriptor *intr_tx = (void *) ~0; |
89 | 83 | ||
90 | /* first check to see if we can still append to depend_tx */ | 84 | /* first check to see if we can still append to depend_tx */ |
@@ -96,11 +90,11 @@ async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx, | |||
96 | } | 90 | } |
97 | spin_unlock_bh(&depend_tx->lock); | 91 | spin_unlock_bh(&depend_tx->lock); |
98 | 92 | ||
99 | if (!intr_tx) | 93 | /* attached dependency, flush the parent channel */ |
94 | if (!intr_tx) { | ||
95 | device->device_issue_pending(chan); | ||
100 | return; | 96 | return; |
101 | 97 | } | |
102 | chan = depend_tx->chan; | ||
103 | device = chan->device; | ||
104 | 98 | ||
105 | /* see if we can schedule an interrupt | 99 | /* see if we can schedule an interrupt |
106 | * otherwise poll for completion | 100 | * otherwise poll for completion |
@@ -134,6 +128,7 @@ async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx, | |||
134 | intr_tx->tx_submit(intr_tx); | 128 | intr_tx->tx_submit(intr_tx); |
135 | async_tx_ack(intr_tx); | 129 | async_tx_ack(intr_tx); |
136 | } | 130 | } |
131 | device->device_issue_pending(chan); | ||
137 | } else { | 132 | } else { |
138 | if (dma_wait_for_async_tx(depend_tx) == DMA_ERROR) | 133 | if (dma_wait_for_async_tx(depend_tx) == DMA_ERROR) |
139 | panic("%s: DMA_ERROR waiting for depend_tx\n", | 134 | panic("%s: DMA_ERROR waiting for depend_tx\n", |
@@ -144,13 +139,14 @@ async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx, | |||
144 | 139 | ||
145 | 140 | ||
146 | /** | 141 | /** |
147 | * submit_disposition - while holding depend_tx->lock we must avoid submitting | 142 | * submit_disposition - flags for routing an incoming operation |
148 | * new operations to prevent a circular locking dependency with | ||
149 | * drivers that already hold a channel lock when calling | ||
150 | * async_tx_run_dependencies. | ||
151 | * @ASYNC_TX_SUBMITTED: we were able to append the new operation under the lock | 143 | * @ASYNC_TX_SUBMITTED: we were able to append the new operation under the lock |
152 | * @ASYNC_TX_CHANNEL_SWITCH: when the lock is dropped schedule a channel switch | 144 | * @ASYNC_TX_CHANNEL_SWITCH: when the lock is dropped schedule a channel switch |
153 | * @ASYNC_TX_DIRECT_SUBMIT: when the lock is dropped submit directly | 145 | * @ASYNC_TX_DIRECT_SUBMIT: when the lock is dropped submit directly |
146 | * | ||
147 | * while holding depend_tx->lock we must avoid submitting new operations | ||
148 | * to prevent a circular locking dependency with drivers that already | ||
149 | * hold a channel lock when calling async_tx_run_dependencies. | ||
154 | */ | 150 | */ |
155 | enum submit_disposition { | 151 | enum submit_disposition { |
156 | ASYNC_TX_SUBMITTED, | 152 | ASYNC_TX_SUBMITTED, |
@@ -160,11 +156,12 @@ enum submit_disposition { | |||
160 | 156 | ||
161 | void | 157 | void |
162 | async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx, | 158 | async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx, |
163 | enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx, | 159 | struct async_submit_ctl *submit) |
164 | dma_async_tx_callback cb_fn, void *cb_param) | ||
165 | { | 160 | { |
166 | tx->callback = cb_fn; | 161 | struct dma_async_tx_descriptor *depend_tx = submit->depend_tx; |
167 | tx->callback_param = cb_param; | 162 | |
163 | tx->callback = submit->cb_fn; | ||
164 | tx->callback_param = submit->cb_param; | ||
168 | 165 | ||
169 | if (depend_tx) { | 166 | if (depend_tx) { |
170 | enum submit_disposition s; | 167 | enum submit_disposition s; |
@@ -220,30 +217,29 @@ async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx, | |||
220 | tx->tx_submit(tx); | 217 | tx->tx_submit(tx); |
221 | } | 218 | } |
222 | 219 | ||
223 | if (flags & ASYNC_TX_ACK) | 220 | if (submit->flags & ASYNC_TX_ACK) |
224 | async_tx_ack(tx); | 221 | async_tx_ack(tx); |
225 | 222 | ||
226 | if (depend_tx && (flags & ASYNC_TX_DEP_ACK)) | 223 | if (depend_tx) |
227 | async_tx_ack(depend_tx); | 224 | async_tx_ack(depend_tx); |
228 | } | 225 | } |
229 | EXPORT_SYMBOL_GPL(async_tx_submit); | 226 | EXPORT_SYMBOL_GPL(async_tx_submit); |
230 | 227 | ||
231 | /** | 228 | /** |
232 | * async_trigger_callback - schedules the callback function to be run after | 229 | * async_trigger_callback - schedules the callback function to be run |
233 | * any dependent operations have been completed. | 230 | * @submit: submission and completion parameters |
234 | * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK | 231 | * |
235 | * @depend_tx: 'callback' requires the completion of this transaction | 232 | * honored flags: ASYNC_TX_ACK |
236 | * @cb_fn: function to call after depend_tx completes | 233 | * |
237 | * @cb_param: parameter to pass to the callback routine | 234 | * The callback is run after any dependent operations have completed. |
238 | */ | 235 | */ |
239 | struct dma_async_tx_descriptor * | 236 | struct dma_async_tx_descriptor * |
240 | async_trigger_callback(enum async_tx_flags flags, | 237 | async_trigger_callback(struct async_submit_ctl *submit) |
241 | struct dma_async_tx_descriptor *depend_tx, | ||
242 | dma_async_tx_callback cb_fn, void *cb_param) | ||
243 | { | 238 | { |
244 | struct dma_chan *chan; | 239 | struct dma_chan *chan; |
245 | struct dma_device *device; | 240 | struct dma_device *device; |
246 | struct dma_async_tx_descriptor *tx; | 241 | struct dma_async_tx_descriptor *tx; |
242 | struct dma_async_tx_descriptor *depend_tx = submit->depend_tx; | ||
247 | 243 | ||
248 | if (depend_tx) { | 244 | if (depend_tx) { |
249 | chan = depend_tx->chan; | 245 | chan = depend_tx->chan; |
@@ -262,14 +258,14 @@ async_trigger_callback(enum async_tx_flags flags, | |||
262 | if (tx) { | 258 | if (tx) { |
263 | pr_debug("%s: (async)\n", __func__); | 259 | pr_debug("%s: (async)\n", __func__); |
264 | 260 | ||
265 | async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param); | 261 | async_tx_submit(chan, tx, submit); |
266 | } else { | 262 | } else { |
267 | pr_debug("%s: (sync)\n", __func__); | 263 | pr_debug("%s: (sync)\n", __func__); |
268 | 264 | ||
269 | /* wait for any prerequisite operations */ | 265 | /* wait for any prerequisite operations */ |
270 | async_tx_quiesce(&depend_tx); | 266 | async_tx_quiesce(&submit->depend_tx); |
271 | 267 | ||
272 | async_tx_sync_epilog(cb_fn, cb_param); | 268 | async_tx_sync_epilog(submit); |
273 | } | 269 | } |
274 | 270 | ||
275 | return tx; | 271 | return tx; |
@@ -295,9 +291,6 @@ void async_tx_quiesce(struct dma_async_tx_descriptor **tx) | |||
295 | } | 291 | } |
296 | EXPORT_SYMBOL_GPL(async_tx_quiesce); | 292 | EXPORT_SYMBOL_GPL(async_tx_quiesce); |
297 | 293 | ||
298 | module_init(async_tx_init); | ||
299 | module_exit(async_tx_exit); | ||
300 | |||
301 | MODULE_AUTHOR("Intel Corporation"); | 294 | MODULE_AUTHOR("Intel Corporation"); |
302 | MODULE_DESCRIPTION("Asynchronous Bulk Memory Transactions API"); | 295 | MODULE_DESCRIPTION("Asynchronous Bulk Memory Transactions API"); |
303 | MODULE_LICENSE("GPL"); | 296 | MODULE_LICENSE("GPL"); |
diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c index 95fe2c8d6c51..56b5f98da463 100644 --- a/crypto/async_tx/async_xor.c +++ b/crypto/async_tx/async_xor.c | |||
@@ -33,19 +33,16 @@ | |||
33 | /* do_async_xor - dma map the pages and perform the xor with an engine */ | 33 | /* do_async_xor - dma map the pages and perform the xor with an engine */ |
34 | static __async_inline struct dma_async_tx_descriptor * | 34 | static __async_inline struct dma_async_tx_descriptor * |
35 | do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list, | 35 | do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list, |
36 | unsigned int offset, int src_cnt, size_t len, | 36 | unsigned int offset, int src_cnt, size_t len, dma_addr_t *dma_src, |
37 | enum async_tx_flags flags, | 37 | struct async_submit_ctl *submit) |
38 | struct dma_async_tx_descriptor *depend_tx, | ||
39 | dma_async_tx_callback cb_fn, void *cb_param) | ||
40 | { | 38 | { |
41 | struct dma_device *dma = chan->device; | 39 | struct dma_device *dma = chan->device; |
42 | dma_addr_t *dma_src = (dma_addr_t *) src_list; | ||
43 | struct dma_async_tx_descriptor *tx = NULL; | 40 | struct dma_async_tx_descriptor *tx = NULL; |
44 | int src_off = 0; | 41 | int src_off = 0; |
45 | int i; | 42 | int i; |
46 | dma_async_tx_callback _cb_fn; | 43 | dma_async_tx_callback cb_fn_orig = submit->cb_fn; |
47 | void *_cb_param; | 44 | void *cb_param_orig = submit->cb_param; |
48 | enum async_tx_flags async_flags; | 45 | enum async_tx_flags flags_orig = submit->flags; |
49 | enum dma_ctrl_flags dma_flags; | 46 | enum dma_ctrl_flags dma_flags; |
50 | int xor_src_cnt; | 47 | int xor_src_cnt; |
51 | dma_addr_t dma_dest; | 48 | dma_addr_t dma_dest; |
@@ -63,23 +60,23 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list, | |||
63 | } | 60 | } |
64 | 61 | ||
65 | while (src_cnt) { | 62 | while (src_cnt) { |
66 | async_flags = flags; | 63 | submit->flags = flags_orig; |
67 | dma_flags = 0; | 64 | dma_flags = 0; |
68 | xor_src_cnt = min(src_cnt, dma->max_xor); | 65 | xor_src_cnt = min(src_cnt, (int)dma->max_xor); |
69 | /* if we are submitting additional xors, leave the chain open, | 66 | /* if we are submitting additional xors, leave the chain open, |
70 | * clear the callback parameters, and leave the destination | 67 | * clear the callback parameters, and leave the destination |
71 | * buffer mapped | 68 | * buffer mapped |
72 | */ | 69 | */ |
73 | if (src_cnt > xor_src_cnt) { | 70 | if (src_cnt > xor_src_cnt) { |
74 | async_flags &= ~ASYNC_TX_ACK; | 71 | submit->flags &= ~ASYNC_TX_ACK; |
75 | dma_flags = DMA_COMPL_SKIP_DEST_UNMAP; | 72 | dma_flags = DMA_COMPL_SKIP_DEST_UNMAP; |
76 | _cb_fn = NULL; | 73 | submit->cb_fn = NULL; |
77 | _cb_param = NULL; | 74 | submit->cb_param = NULL; |
78 | } else { | 75 | } else { |
79 | _cb_fn = cb_fn; | 76 | submit->cb_fn = cb_fn_orig; |
80 | _cb_param = cb_param; | 77 | submit->cb_param = cb_param_orig; |
81 | } | 78 | } |
82 | if (_cb_fn) | 79 | if (submit->cb_fn) |
83 | dma_flags |= DMA_PREP_INTERRUPT; | 80 | dma_flags |= DMA_PREP_INTERRUPT; |
84 | 81 | ||
85 | /* Since we have clobbered the src_list we are committed | 82 | /* Since we have clobbered the src_list we are committed |
@@ -90,7 +87,7 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list, | |||
90 | xor_src_cnt, len, dma_flags); | 87 | xor_src_cnt, len, dma_flags); |
91 | 88 | ||
92 | if (unlikely(!tx)) | 89 | if (unlikely(!tx)) |
93 | async_tx_quiesce(&depend_tx); | 90 | async_tx_quiesce(&submit->depend_tx); |
94 | 91 | ||
95 | /* spin wait for the preceeding transactions to complete */ | 92 | /* spin wait for the preceeding transactions to complete */ |
96 | while (unlikely(!tx)) { | 93 | while (unlikely(!tx)) { |
@@ -101,11 +98,8 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list, | |||
101 | dma_flags); | 98 | dma_flags); |
102 | } | 99 | } |
103 | 100 | ||
104 | async_tx_submit(chan, tx, async_flags, depend_tx, _cb_fn, | 101 | async_tx_submit(chan, tx, submit); |
105 | _cb_param); | 102 | submit->depend_tx = tx; |
106 | |||
107 | depend_tx = tx; | ||
108 | flags |= ASYNC_TX_DEP_ACK; | ||
109 | 103 | ||
110 | if (src_cnt > xor_src_cnt) { | 104 | if (src_cnt > xor_src_cnt) { |
111 | /* drop completed sources */ | 105 | /* drop completed sources */ |
@@ -124,23 +118,27 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list, | |||
124 | 118 | ||
125 | static void | 119 | static void |
126 | do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset, | 120 | do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset, |
127 | int src_cnt, size_t len, enum async_tx_flags flags, | 121 | int src_cnt, size_t len, struct async_submit_ctl *submit) |
128 | dma_async_tx_callback cb_fn, void *cb_param) | ||
129 | { | 122 | { |
130 | int i; | 123 | int i; |
131 | int xor_src_cnt; | 124 | int xor_src_cnt; |
132 | int src_off = 0; | 125 | int src_off = 0; |
133 | void *dest_buf; | 126 | void *dest_buf; |
134 | void **srcs = (void **) src_list; | 127 | void **srcs; |
128 | |||
129 | if (submit->scribble) | ||
130 | srcs = submit->scribble; | ||
131 | else | ||
132 | srcs = (void **) src_list; | ||
135 | 133 | ||
136 | /* reuse the 'src_list' array to convert to buffer pointers */ | 134 | /* convert to buffer pointers */ |
137 | for (i = 0; i < src_cnt; i++) | 135 | for (i = 0; i < src_cnt; i++) |
138 | srcs[i] = page_address(src_list[i]) + offset; | 136 | srcs[i] = page_address(src_list[i]) + offset; |
139 | 137 | ||
140 | /* set destination address */ | 138 | /* set destination address */ |
141 | dest_buf = page_address(dest) + offset; | 139 | dest_buf = page_address(dest) + offset; |
142 | 140 | ||
143 | if (flags & ASYNC_TX_XOR_ZERO_DST) | 141 | if (submit->flags & ASYNC_TX_XOR_ZERO_DST) |
144 | memset(dest_buf, 0, len); | 142 | memset(dest_buf, 0, len); |
145 | 143 | ||
146 | while (src_cnt > 0) { | 144 | while (src_cnt > 0) { |
@@ -153,61 +151,70 @@ do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset, | |||
153 | src_off += xor_src_cnt; | 151 | src_off += xor_src_cnt; |
154 | } | 152 | } |
155 | 153 | ||
156 | async_tx_sync_epilog(cb_fn, cb_param); | 154 | async_tx_sync_epilog(submit); |
157 | } | 155 | } |
158 | 156 | ||
159 | /** | 157 | /** |
160 | * async_xor - attempt to xor a set of blocks with a dma engine. | 158 | * async_xor - attempt to xor a set of blocks with a dma engine. |
161 | * xor_blocks always uses the dest as a source so the ASYNC_TX_XOR_ZERO_DST | ||
162 | * flag must be set to not include dest data in the calculation. The | ||
163 | * assumption with dma eninges is that they only use the destination | ||
164 | * buffer as a source when it is explicity specified in the source list. | ||
165 | * @dest: destination page | 159 | * @dest: destination page |
166 | * @src_list: array of source pages (if the dest is also a source it must be | 160 | * @src_list: array of source pages |
167 | * at index zero). The contents of this array may be overwritten. | 161 | * @offset: common src/dst offset to start transaction |
168 | * @offset: offset in pages to start transaction | ||
169 | * @src_cnt: number of source pages | 162 | * @src_cnt: number of source pages |
170 | * @len: length in bytes | 163 | * @len: length in bytes |
171 | * @flags: ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_XOR_DROP_DEST, | 164 | * @submit: submission / completion modifiers |
172 | * ASYNC_TX_ACK, ASYNC_TX_DEP_ACK | 165 | * |
173 | * @depend_tx: xor depends on the result of this transaction. | 166 | * honored flags: ASYNC_TX_ACK, ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_XOR_DROP_DST |
174 | * @cb_fn: function to call when the xor completes | 167 | * |
175 | * @cb_param: parameter to pass to the callback routine | 168 | * xor_blocks always uses the dest as a source so the |
169 | * ASYNC_TX_XOR_ZERO_DST flag must be set to not include dest data in | ||
170 | * the calculation. The assumption with dma eninges is that they only | ||
171 | * use the destination buffer as a source when it is explicity specified | ||
172 | * in the source list. | ||
173 | * | ||
174 | * src_list note: if the dest is also a source it must be at index zero. | ||
175 | * The contents of this array will be overwritten if a scribble region | ||
176 | * is not specified. | ||
176 | */ | 177 | */ |
177 | struct dma_async_tx_descriptor * | 178 | struct dma_async_tx_descriptor * |
178 | async_xor(struct page *dest, struct page **src_list, unsigned int offset, | 179 | async_xor(struct page *dest, struct page **src_list, unsigned int offset, |
179 | int src_cnt, size_t len, enum async_tx_flags flags, | 180 | int src_cnt, size_t len, struct async_submit_ctl *submit) |
180 | struct dma_async_tx_descriptor *depend_tx, | ||
181 | dma_async_tx_callback cb_fn, void *cb_param) | ||
182 | { | 181 | { |
183 | struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_XOR, | 182 | struct dma_chan *chan = async_tx_find_channel(submit, DMA_XOR, |
184 | &dest, 1, src_list, | 183 | &dest, 1, src_list, |
185 | src_cnt, len); | 184 | src_cnt, len); |
185 | dma_addr_t *dma_src = NULL; | ||
186 | |||
186 | BUG_ON(src_cnt <= 1); | 187 | BUG_ON(src_cnt <= 1); |
187 | 188 | ||
188 | if (chan) { | 189 | if (submit->scribble) |
190 | dma_src = submit->scribble; | ||
191 | else if (sizeof(dma_addr_t) <= sizeof(struct page *)) | ||
192 | dma_src = (dma_addr_t *) src_list; | ||
193 | |||
194 | if (dma_src && chan) { | ||
189 | /* run the xor asynchronously */ | 195 | /* run the xor asynchronously */ |
190 | pr_debug("%s (async): len: %zu\n", __func__, len); | 196 | pr_debug("%s (async): len: %zu\n", __func__, len); |
191 | 197 | ||
192 | return do_async_xor(chan, dest, src_list, offset, src_cnt, len, | 198 | return do_async_xor(chan, dest, src_list, offset, src_cnt, len, |
193 | flags, depend_tx, cb_fn, cb_param); | 199 | dma_src, submit); |
194 | } else { | 200 | } else { |
195 | /* run the xor synchronously */ | 201 | /* run the xor synchronously */ |
196 | pr_debug("%s (sync): len: %zu\n", __func__, len); | 202 | pr_debug("%s (sync): len: %zu\n", __func__, len); |
203 | WARN_ONCE(chan, "%s: no space for dma address conversion\n", | ||
204 | __func__); | ||
197 | 205 | ||
198 | /* in the sync case the dest is an implied source | 206 | /* in the sync case the dest is an implied source |
199 | * (assumes the dest is the first source) | 207 | * (assumes the dest is the first source) |
200 | */ | 208 | */ |
201 | if (flags & ASYNC_TX_XOR_DROP_DST) { | 209 | if (submit->flags & ASYNC_TX_XOR_DROP_DST) { |
202 | src_cnt--; | 210 | src_cnt--; |
203 | src_list++; | 211 | src_list++; |
204 | } | 212 | } |
205 | 213 | ||
206 | /* wait for any prerequisite operations */ | 214 | /* wait for any prerequisite operations */ |
207 | async_tx_quiesce(&depend_tx); | 215 | async_tx_quiesce(&submit->depend_tx); |
208 | 216 | ||
209 | do_sync_xor(dest, src_list, offset, src_cnt, len, | 217 | do_sync_xor(dest, src_list, offset, src_cnt, len, submit); |
210 | flags, cb_fn, cb_param); | ||
211 | 218 | ||
212 | return NULL; | 219 | return NULL; |
213 | } | 220 | } |
@@ -222,104 +229,90 @@ static int page_is_zero(struct page *p, unsigned int offset, size_t len) | |||
222 | } | 229 | } |
223 | 230 | ||
224 | /** | 231 | /** |
225 | * async_xor_zero_sum - attempt a xor parity check with a dma engine. | 232 | * async_xor_val - attempt a xor parity check with a dma engine. |
226 | * @dest: destination page used if the xor is performed synchronously | 233 | * @dest: destination page used if the xor is performed synchronously |
227 | * @src_list: array of source pages. The dest page must be listed as a source | 234 | * @src_list: array of source pages |
228 | * at index zero. The contents of this array may be overwritten. | ||
229 | * @offset: offset in pages to start transaction | 235 | * @offset: offset in pages to start transaction |
230 | * @src_cnt: number of source pages | 236 | * @src_cnt: number of source pages |
231 | * @len: length in bytes | 237 | * @len: length in bytes |
232 | * @result: 0 if sum == 0 else non-zero | 238 | * @result: 0 if sum == 0 else non-zero |
233 | * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK | 239 | * @submit: submission / completion modifiers |
234 | * @depend_tx: xor depends on the result of this transaction. | 240 | * |
235 | * @cb_fn: function to call when the xor completes | 241 | * honored flags: ASYNC_TX_ACK |
236 | * @cb_param: parameter to pass to the callback routine | 242 | * |
243 | * src_list note: if the dest is also a source it must be at index zero. | ||
244 | * The contents of this array will be overwritten if a scribble region | ||
245 | * is not specified. | ||
237 | */ | 246 | */ |
238 | struct dma_async_tx_descriptor * | 247 | struct dma_async_tx_descriptor * |
239 | async_xor_zero_sum(struct page *dest, struct page **src_list, | 248 | async_xor_val(struct page *dest, struct page **src_list, unsigned int offset, |
240 | unsigned int offset, int src_cnt, size_t len, | 249 | int src_cnt, size_t len, enum sum_check_flags *result, |
241 | u32 *result, enum async_tx_flags flags, | 250 | struct async_submit_ctl *submit) |
242 | struct dma_async_tx_descriptor *depend_tx, | ||
243 | dma_async_tx_callback cb_fn, void *cb_param) | ||
244 | { | 251 | { |
245 | struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_ZERO_SUM, | 252 | struct dma_chan *chan = async_tx_find_channel(submit, DMA_XOR_VAL, |
246 | &dest, 1, src_list, | 253 | &dest, 1, src_list, |
247 | src_cnt, len); | 254 | src_cnt, len); |
248 | struct dma_device *device = chan ? chan->device : NULL; | 255 | struct dma_device *device = chan ? chan->device : NULL; |
249 | struct dma_async_tx_descriptor *tx = NULL; | 256 | struct dma_async_tx_descriptor *tx = NULL; |
257 | dma_addr_t *dma_src = NULL; | ||
250 | 258 | ||
251 | BUG_ON(src_cnt <= 1); | 259 | BUG_ON(src_cnt <= 1); |
252 | 260 | ||
253 | if (device && src_cnt <= device->max_xor) { | 261 | if (submit->scribble) |
254 | dma_addr_t *dma_src = (dma_addr_t *) src_list; | 262 | dma_src = submit->scribble; |
255 | unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0; | 263 | else if (sizeof(dma_addr_t) <= sizeof(struct page *)) |
264 | dma_src = (dma_addr_t *) src_list; | ||
265 | |||
266 | if (dma_src && device && src_cnt <= device->max_xor) { | ||
267 | unsigned long dma_prep_flags; | ||
256 | int i; | 268 | int i; |
257 | 269 | ||
258 | pr_debug("%s: (async) len: %zu\n", __func__, len); | 270 | pr_debug("%s: (async) len: %zu\n", __func__, len); |
259 | 271 | ||
272 | dma_prep_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0; | ||
260 | for (i = 0; i < src_cnt; i++) | 273 | for (i = 0; i < src_cnt; i++) |
261 | dma_src[i] = dma_map_page(device->dev, src_list[i], | 274 | dma_src[i] = dma_map_page(device->dev, src_list[i], |
262 | offset, len, DMA_TO_DEVICE); | 275 | offset, len, DMA_TO_DEVICE); |
263 | 276 | ||
264 | tx = device->device_prep_dma_zero_sum(chan, dma_src, src_cnt, | 277 | tx = device->device_prep_dma_xor_val(chan, dma_src, src_cnt, |
265 | len, result, | 278 | len, result, |
266 | dma_prep_flags); | 279 | dma_prep_flags); |
267 | if (unlikely(!tx)) { | 280 | if (unlikely(!tx)) { |
268 | async_tx_quiesce(&depend_tx); | 281 | async_tx_quiesce(&submit->depend_tx); |
269 | 282 | ||
270 | while (!tx) { | 283 | while (!tx) { |
271 | dma_async_issue_pending(chan); | 284 | dma_async_issue_pending(chan); |
272 | tx = device->device_prep_dma_zero_sum(chan, | 285 | tx = device->device_prep_dma_xor_val(chan, |
273 | dma_src, src_cnt, len, result, | 286 | dma_src, src_cnt, len, result, |
274 | dma_prep_flags); | 287 | dma_prep_flags); |
275 | } | 288 | } |
276 | } | 289 | } |
277 | 290 | ||
278 | async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param); | 291 | async_tx_submit(chan, tx, submit); |
279 | } else { | 292 | } else { |
280 | unsigned long xor_flags = flags; | 293 | enum async_tx_flags flags_orig = submit->flags; |
281 | 294 | ||
282 | pr_debug("%s: (sync) len: %zu\n", __func__, len); | 295 | pr_debug("%s: (sync) len: %zu\n", __func__, len); |
296 | WARN_ONCE(device && src_cnt <= device->max_xor, | ||
297 | "%s: no space for dma address conversion\n", | ||
298 | __func__); | ||
283 | 299 | ||
284 | xor_flags |= ASYNC_TX_XOR_DROP_DST; | 300 | submit->flags |= ASYNC_TX_XOR_DROP_DST; |
285 | xor_flags &= ~ASYNC_TX_ACK; | 301 | submit->flags &= ~ASYNC_TX_ACK; |
286 | 302 | ||
287 | tx = async_xor(dest, src_list, offset, src_cnt, len, xor_flags, | 303 | tx = async_xor(dest, src_list, offset, src_cnt, len, submit); |
288 | depend_tx, NULL, NULL); | ||
289 | 304 | ||
290 | async_tx_quiesce(&tx); | 305 | async_tx_quiesce(&tx); |
291 | 306 | ||
292 | *result = page_is_zero(dest, offset, len) ? 0 : 1; | 307 | *result = !page_is_zero(dest, offset, len) << SUM_CHECK_P; |
293 | 308 | ||
294 | async_tx_sync_epilog(cb_fn, cb_param); | 309 | async_tx_sync_epilog(submit); |
310 | submit->flags = flags_orig; | ||
295 | } | 311 | } |
296 | 312 | ||
297 | return tx; | 313 | return tx; |
298 | } | 314 | } |
299 | EXPORT_SYMBOL_GPL(async_xor_zero_sum); | 315 | EXPORT_SYMBOL_GPL(async_xor_val); |
300 | |||
301 | static int __init async_xor_init(void) | ||
302 | { | ||
303 | #ifdef CONFIG_DMA_ENGINE | ||
304 | /* To conserve stack space the input src_list (array of page pointers) | ||
305 | * is reused to hold the array of dma addresses passed to the driver. | ||
306 | * This conversion is only possible when dma_addr_t is less than the | ||
307 | * the size of a pointer. HIGHMEM64G is known to violate this | ||
308 | * assumption. | ||
309 | */ | ||
310 | BUILD_BUG_ON(sizeof(dma_addr_t) > sizeof(struct page *)); | ||
311 | #endif | ||
312 | |||
313 | return 0; | ||
314 | } | ||
315 | |||
316 | static void __exit async_xor_exit(void) | ||
317 | { | ||
318 | do { } while (0); | ||
319 | } | ||
320 | |||
321 | module_init(async_xor_init); | ||
322 | module_exit(async_xor_exit); | ||
323 | 316 | ||
324 | MODULE_AUTHOR("Intel Corporation"); | 317 | MODULE_AUTHOR("Intel Corporation"); |
325 | MODULE_DESCRIPTION("asynchronous xor/xor-zero-sum api"); | 318 | MODULE_DESCRIPTION("asynchronous xor/xor-zero-sum api"); |
diff --git a/crypto/async_tx/raid6test.c b/crypto/async_tx/raid6test.c new file mode 100644 index 000000000000..98c83ca96c83 --- /dev/null +++ b/crypto/async_tx/raid6test.c | |||
@@ -0,0 +1,241 @@ | |||
1 | /* | ||
2 | * asynchronous raid6 recovery self test | ||
3 | * Copyright (c) 2009, Intel Corporation. | ||
4 | * | ||
5 | * based on drivers/md/raid6test/test.c: | ||
6 | * Copyright 2002-2007 H. Peter Anvin | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify it | ||
9 | * under the terms and conditions of the GNU General Public License, | ||
10 | * version 2, as published by the Free Software Foundation. | ||
11 | * | ||
12 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
14 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
15 | * more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License along with | ||
18 | * this program; if not, write to the Free Software Foundation, Inc., | ||
19 | * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. | ||
20 | * | ||
21 | */ | ||
22 | #include <linux/async_tx.h> | ||
23 | #include <linux/random.h> | ||
24 | |||
25 | #undef pr | ||
26 | #define pr(fmt, args...) pr_info("raid6test: " fmt, ##args) | ||
27 | |||
28 | #define NDISKS 16 /* Including P and Q */ | ||
29 | |||
30 | static struct page *dataptrs[NDISKS]; | ||
31 | static struct page *data[NDISKS+3]; | ||
32 | static struct page *spare; | ||
33 | static struct page *recovi; | ||
34 | static struct page *recovj; | ||
35 | |||
36 | static void callback(void *param) | ||
37 | { | ||
38 | struct completion *cmp = param; | ||
39 | |||
40 | complete(cmp); | ||
41 | } | ||
42 | |||
43 | static void makedata(int disks) | ||
44 | { | ||
45 | int i, j; | ||
46 | |||
47 | for (i = 0; i < disks; i++) { | ||
48 | for (j = 0; j < PAGE_SIZE/sizeof(u32); j += sizeof(u32)) { | ||
49 | u32 *p = page_address(data[i]) + j; | ||
50 | |||
51 | *p = random32(); | ||
52 | } | ||
53 | |||
54 | dataptrs[i] = data[i]; | ||
55 | } | ||
56 | } | ||
57 | |||
58 | static char disk_type(int d, int disks) | ||
59 | { | ||
60 | if (d == disks - 2) | ||
61 | return 'P'; | ||
62 | else if (d == disks - 1) | ||
63 | return 'Q'; | ||
64 | else | ||
65 | return 'D'; | ||
66 | } | ||
67 | |||
68 | /* Recover two failed blocks. */ | ||
69 | static void raid6_dual_recov(int disks, size_t bytes, int faila, int failb, struct page **ptrs) | ||
70 | { | ||
71 | struct async_submit_ctl submit; | ||
72 | addr_conv_t addr_conv[disks]; | ||
73 | struct completion cmp; | ||
74 | struct dma_async_tx_descriptor *tx = NULL; | ||
75 | enum sum_check_flags result = ~0; | ||
76 | |||
77 | if (faila > failb) | ||
78 | swap(faila, failb); | ||
79 | |||
80 | if (failb == disks-1) { | ||
81 | if (faila == disks-2) { | ||
82 | /* P+Q failure. Just rebuild the syndrome. */ | ||
83 | init_async_submit(&submit, 0, NULL, NULL, NULL, addr_conv); | ||
84 | tx = async_gen_syndrome(ptrs, 0, disks, bytes, &submit); | ||
85 | } else { | ||
86 | struct page *blocks[disks]; | ||
87 | struct page *dest; | ||
88 | int count = 0; | ||
89 | int i; | ||
90 | |||
91 | /* data+Q failure. Reconstruct data from P, | ||
92 | * then rebuild syndrome | ||
93 | */ | ||
94 | for (i = disks; i-- ; ) { | ||
95 | if (i == faila || i == failb) | ||
96 | continue; | ||
97 | blocks[count++] = ptrs[i]; | ||
98 | } | ||
99 | dest = ptrs[faila]; | ||
100 | init_async_submit(&submit, ASYNC_TX_XOR_ZERO_DST, NULL, | ||
101 | NULL, NULL, addr_conv); | ||
102 | tx = async_xor(dest, blocks, 0, count, bytes, &submit); | ||
103 | |||
104 | init_async_submit(&submit, 0, tx, NULL, NULL, addr_conv); | ||
105 | tx = async_gen_syndrome(ptrs, 0, disks, bytes, &submit); | ||
106 | } | ||
107 | } else { | ||
108 | if (failb == disks-2) { | ||
109 | /* data+P failure. */ | ||
110 | init_async_submit(&submit, 0, NULL, NULL, NULL, addr_conv); | ||
111 | tx = async_raid6_datap_recov(disks, bytes, faila, ptrs, &submit); | ||
112 | } else { | ||
113 | /* data+data failure. */ | ||
114 | init_async_submit(&submit, 0, NULL, NULL, NULL, addr_conv); | ||
115 | tx = async_raid6_2data_recov(disks, bytes, faila, failb, ptrs, &submit); | ||
116 | } | ||
117 | } | ||
118 | init_completion(&cmp); | ||
119 | init_async_submit(&submit, ASYNC_TX_ACK, tx, callback, &cmp, addr_conv); | ||
120 | tx = async_syndrome_val(ptrs, 0, disks, bytes, &result, spare, &submit); | ||
121 | async_tx_issue_pending(tx); | ||
122 | |||
123 | if (wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)) == 0) | ||
124 | pr("%s: timeout! (faila: %d failb: %d disks: %d)\n", | ||
125 | __func__, faila, failb, disks); | ||
126 | |||
127 | if (result != 0) | ||
128 | pr("%s: validation failure! faila: %d failb: %d sum_check_flags: %x\n", | ||
129 | __func__, faila, failb, result); | ||
130 | } | ||
131 | |||
132 | static int test_disks(int i, int j, int disks) | ||
133 | { | ||
134 | int erra, errb; | ||
135 | |||
136 | memset(page_address(recovi), 0xf0, PAGE_SIZE); | ||
137 | memset(page_address(recovj), 0xba, PAGE_SIZE); | ||
138 | |||
139 | dataptrs[i] = recovi; | ||
140 | dataptrs[j] = recovj; | ||
141 | |||
142 | raid6_dual_recov(disks, PAGE_SIZE, i, j, dataptrs); | ||
143 | |||
144 | erra = memcmp(page_address(data[i]), page_address(recovi), PAGE_SIZE); | ||
145 | errb = memcmp(page_address(data[j]), page_address(recovj), PAGE_SIZE); | ||
146 | |||
147 | pr("%s(%d, %d): faila=%3d(%c) failb=%3d(%c) %s\n", | ||
148 | __func__, i, j, i, disk_type(i, disks), j, disk_type(j, disks), | ||
149 | (!erra && !errb) ? "OK" : !erra ? "ERRB" : !errb ? "ERRA" : "ERRAB"); | ||
150 | |||
151 | dataptrs[i] = data[i]; | ||
152 | dataptrs[j] = data[j]; | ||
153 | |||
154 | return erra || errb; | ||
155 | } | ||
156 | |||
157 | static int test(int disks, int *tests) | ||
158 | { | ||
159 | addr_conv_t addr_conv[disks]; | ||
160 | struct dma_async_tx_descriptor *tx; | ||
161 | struct async_submit_ctl submit; | ||
162 | struct completion cmp; | ||
163 | int err = 0; | ||
164 | int i, j; | ||
165 | |||
166 | recovi = data[disks]; | ||
167 | recovj = data[disks+1]; | ||
168 | spare = data[disks+2]; | ||
169 | |||
170 | makedata(disks); | ||
171 | |||
172 | /* Nuke syndromes */ | ||
173 | memset(page_address(data[disks-2]), 0xee, PAGE_SIZE); | ||
174 | memset(page_address(data[disks-1]), 0xee, PAGE_SIZE); | ||
175 | |||
176 | /* Generate assumed good syndrome */ | ||
177 | init_completion(&cmp); | ||
178 | init_async_submit(&submit, ASYNC_TX_ACK, NULL, callback, &cmp, addr_conv); | ||
179 | tx = async_gen_syndrome(dataptrs, 0, disks, PAGE_SIZE, &submit); | ||
180 | async_tx_issue_pending(tx); | ||
181 | |||
182 | if (wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)) == 0) { | ||
183 | pr("error: initial gen_syndrome(%d) timed out\n", disks); | ||
184 | return 1; | ||
185 | } | ||
186 | |||
187 | pr("testing the %d-disk case...\n", disks); | ||
188 | for (i = 0; i < disks-1; i++) | ||
189 | for (j = i+1; j < disks; j++) { | ||
190 | (*tests)++; | ||
191 | err += test_disks(i, j, disks); | ||
192 | } | ||
193 | |||
194 | return err; | ||
195 | } | ||
196 | |||
197 | |||
198 | static int raid6_test(void) | ||
199 | { | ||
200 | int err = 0; | ||
201 | int tests = 0; | ||
202 | int i; | ||
203 | |||
204 | for (i = 0; i < NDISKS+3; i++) { | ||
205 | data[i] = alloc_page(GFP_KERNEL); | ||
206 | if (!data[i]) { | ||
207 | while (i--) | ||
208 | put_page(data[i]); | ||
209 | return -ENOMEM; | ||
210 | } | ||
211 | } | ||
212 | |||
213 | /* the 4-disk and 5-disk cases are special for the recovery code */ | ||
214 | if (NDISKS > 4) | ||
215 | err += test(4, &tests); | ||
216 | if (NDISKS > 5) | ||
217 | err += test(5, &tests); | ||
218 | err += test(NDISKS, &tests); | ||
219 | |||
220 | pr("\n"); | ||
221 | pr("complete (%d tests, %d failure%s)\n", | ||
222 | tests, err, err == 1 ? "" : "s"); | ||
223 | |||
224 | for (i = 0; i < NDISKS+3; i++) | ||
225 | put_page(data[i]); | ||
226 | |||
227 | return 0; | ||
228 | } | ||
229 | |||
230 | static void raid6_test_exit(void) | ||
231 | { | ||
232 | } | ||
233 | |||
234 | /* when compiled-in wait for drivers to load first (assumes dma drivers | ||
235 | * are also compliled-in) | ||
236 | */ | ||
237 | late_initcall(raid6_test); | ||
238 | module_exit(raid6_test_exit); | ||
239 | MODULE_AUTHOR("Dan Williams <dan.j.williams@intel.com>"); | ||
240 | MODULE_DESCRIPTION("asynchronous RAID-6 recovery self tests"); | ||
241 | MODULE_LICENSE("GPL"); | ||
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index 3b3c01b6f1ee..912a51b5cbd3 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig | |||
@@ -4,7 +4,7 @@ | |||
4 | 4 | ||
5 | menuconfig DMADEVICES | 5 | menuconfig DMADEVICES |
6 | bool "DMA Engine support" | 6 | bool "DMA Engine support" |
7 | depends on !HIGHMEM64G && HAS_DMA | 7 | depends on HAS_DMA |
8 | help | 8 | help |
9 | DMA engines can do asynchronous data transfers without | 9 | DMA engines can do asynchronous data transfers without |
10 | involving the host CPU. Currently, this framework can be | 10 | involving the host CPU. Currently, this framework can be |
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index 5a87384ea4ff..96598479eece 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c | |||
@@ -644,8 +644,12 @@ int dma_async_device_register(struct dma_device *device) | |||
644 | !device->device_prep_dma_memcpy); | 644 | !device->device_prep_dma_memcpy); |
645 | BUG_ON(dma_has_cap(DMA_XOR, device->cap_mask) && | 645 | BUG_ON(dma_has_cap(DMA_XOR, device->cap_mask) && |
646 | !device->device_prep_dma_xor); | 646 | !device->device_prep_dma_xor); |
647 | BUG_ON(dma_has_cap(DMA_ZERO_SUM, device->cap_mask) && | 647 | BUG_ON(dma_has_cap(DMA_XOR_VAL, device->cap_mask) && |
648 | !device->device_prep_dma_zero_sum); | 648 | !device->device_prep_dma_xor_val); |
649 | BUG_ON(dma_has_cap(DMA_PQ, device->cap_mask) && | ||
650 | !device->device_prep_dma_pq); | ||
651 | BUG_ON(dma_has_cap(DMA_PQ_VAL, device->cap_mask) && | ||
652 | !device->device_prep_dma_pq_val); | ||
649 | BUG_ON(dma_has_cap(DMA_MEMSET, device->cap_mask) && | 653 | BUG_ON(dma_has_cap(DMA_MEMSET, device->cap_mask) && |
650 | !device->device_prep_dma_memset); | 654 | !device->device_prep_dma_memset); |
651 | BUG_ON(dma_has_cap(DMA_INTERRUPT, device->cap_mask) && | 655 | BUG_ON(dma_has_cap(DMA_INTERRUPT, device->cap_mask) && |
@@ -939,49 +943,24 @@ EXPORT_SYMBOL(dma_async_tx_descriptor_init); | |||
939 | 943 | ||
940 | /* dma_wait_for_async_tx - spin wait for a transaction to complete | 944 | /* dma_wait_for_async_tx - spin wait for a transaction to complete |
941 | * @tx: in-flight transaction to wait on | 945 | * @tx: in-flight transaction to wait on |
942 | * | ||
943 | * This routine assumes that tx was obtained from a call to async_memcpy, | ||
944 | * async_xor, async_memset, etc which ensures that tx is "in-flight" (prepped | ||
945 | * and submitted). Walking the parent chain is only meant to cover for DMA | ||
946 | * drivers that do not implement the DMA_INTERRUPT capability and may race with | ||
947 | * the driver's descriptor cleanup routine. | ||
948 | */ | 946 | */ |
949 | enum dma_status | 947 | enum dma_status |
950 | dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx) | 948 | dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx) |
951 | { | 949 | { |
952 | enum dma_status status; | 950 | unsigned long dma_sync_wait_timeout = jiffies + msecs_to_jiffies(5000); |
953 | struct dma_async_tx_descriptor *iter; | ||
954 | struct dma_async_tx_descriptor *parent; | ||
955 | 951 | ||
956 | if (!tx) | 952 | if (!tx) |
957 | return DMA_SUCCESS; | 953 | return DMA_SUCCESS; |
958 | 954 | ||
959 | WARN_ONCE(tx->parent, "%s: speculatively walking dependency chain for" | 955 | while (tx->cookie == -EBUSY) { |
960 | " %s\n", __func__, dma_chan_name(tx->chan)); | 956 | if (time_after_eq(jiffies, dma_sync_wait_timeout)) { |
961 | 957 | pr_err("%s timeout waiting for descriptor submission\n", | |
962 | /* poll through the dependency chain, return when tx is complete */ | 958 | __func__); |
963 | do { | 959 | return DMA_ERROR; |
964 | iter = tx; | 960 | } |
965 | 961 | cpu_relax(); | |
966 | /* find the root of the unsubmitted dependency chain */ | 962 | } |
967 | do { | 963 | return dma_sync_wait(tx->chan, tx->cookie); |
968 | parent = iter->parent; | ||
969 | if (!parent) | ||
970 | break; | ||
971 | else | ||
972 | iter = parent; | ||
973 | } while (parent); | ||
974 | |||
975 | /* there is a small window for ->parent == NULL and | ||
976 | * ->cookie == -EBUSY | ||
977 | */ | ||
978 | while (iter->cookie == -EBUSY) | ||
979 | cpu_relax(); | ||
980 | |||
981 | status = dma_sync_wait(iter->chan, iter->cookie); | ||
982 | } while (status == DMA_IN_PROGRESS || (iter != tx)); | ||
983 | |||
984 | return status; | ||
985 | } | 964 | } |
986 | EXPORT_SYMBOL_GPL(dma_wait_for_async_tx); | 965 | EXPORT_SYMBOL_GPL(dma_wait_for_async_tx); |
987 | 966 | ||
diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c index fb7da5141e96..58e49e41c7a3 100644 --- a/drivers/dma/dmatest.c +++ b/drivers/dma/dmatest.c | |||
@@ -43,6 +43,11 @@ module_param(xor_sources, uint, S_IRUGO); | |||
43 | MODULE_PARM_DESC(xor_sources, | 43 | MODULE_PARM_DESC(xor_sources, |
44 | "Number of xor source buffers (default: 3)"); | 44 | "Number of xor source buffers (default: 3)"); |
45 | 45 | ||
46 | static unsigned int pq_sources = 3; | ||
47 | module_param(pq_sources, uint, S_IRUGO); | ||
48 | MODULE_PARM_DESC(pq_sources, | ||
49 | "Number of p+q source buffers (default: 3)"); | ||
50 | |||
46 | /* | 51 | /* |
47 | * Initialization patterns. All bytes in the source buffer has bit 7 | 52 | * Initialization patterns. All bytes in the source buffer has bit 7 |
48 | * set, all bytes in the destination buffer has bit 7 cleared. | 53 | * set, all bytes in the destination buffer has bit 7 cleared. |
@@ -227,6 +232,7 @@ static int dmatest_func(void *data) | |||
227 | dma_cookie_t cookie; | 232 | dma_cookie_t cookie; |
228 | enum dma_status status; | 233 | enum dma_status status; |
229 | enum dma_ctrl_flags flags; | 234 | enum dma_ctrl_flags flags; |
235 | u8 pq_coefs[pq_sources]; | ||
230 | int ret; | 236 | int ret; |
231 | int src_cnt; | 237 | int src_cnt; |
232 | int dst_cnt; | 238 | int dst_cnt; |
@@ -243,6 +249,11 @@ static int dmatest_func(void *data) | |||
243 | else if (thread->type == DMA_XOR) { | 249 | else if (thread->type == DMA_XOR) { |
244 | src_cnt = xor_sources | 1; /* force odd to ensure dst = src */ | 250 | src_cnt = xor_sources | 1; /* force odd to ensure dst = src */ |
245 | dst_cnt = 1; | 251 | dst_cnt = 1; |
252 | } else if (thread->type == DMA_PQ) { | ||
253 | src_cnt = pq_sources | 1; /* force odd to ensure dst = src */ | ||
254 | dst_cnt = 2; | ||
255 | for (i = 0; i < pq_sources; i++) | ||
256 | pq_coefs[i] = 1; | ||
246 | } else | 257 | } else |
247 | goto err_srcs; | 258 | goto err_srcs; |
248 | 259 | ||
@@ -310,6 +321,15 @@ static int dmatest_func(void *data) | |||
310 | dma_dsts[0] + dst_off, | 321 | dma_dsts[0] + dst_off, |
311 | dma_srcs, xor_sources, | 322 | dma_srcs, xor_sources, |
312 | len, flags); | 323 | len, flags); |
324 | else if (thread->type == DMA_PQ) { | ||
325 | dma_addr_t dma_pq[dst_cnt]; | ||
326 | |||
327 | for (i = 0; i < dst_cnt; i++) | ||
328 | dma_pq[i] = dma_dsts[i] + dst_off; | ||
329 | tx = dev->device_prep_dma_pq(chan, dma_pq, dma_srcs, | ||
330 | pq_sources, pq_coefs, | ||
331 | len, flags); | ||
332 | } | ||
313 | 333 | ||
314 | if (!tx) { | 334 | if (!tx) { |
315 | for (i = 0; i < src_cnt; i++) | 335 | for (i = 0; i < src_cnt; i++) |
@@ -446,6 +466,8 @@ static int dmatest_add_threads(struct dmatest_chan *dtc, enum dma_transaction_ty | |||
446 | op = "copy"; | 466 | op = "copy"; |
447 | else if (type == DMA_XOR) | 467 | else if (type == DMA_XOR) |
448 | op = "xor"; | 468 | op = "xor"; |
469 | else if (type == DMA_PQ) | ||
470 | op = "pq"; | ||
449 | else | 471 | else |
450 | return -EINVAL; | 472 | return -EINVAL; |
451 | 473 | ||
@@ -501,6 +523,10 @@ static int dmatest_add_channel(struct dma_chan *chan) | |||
501 | cnt = dmatest_add_threads(dtc, DMA_XOR); | 523 | cnt = dmatest_add_threads(dtc, DMA_XOR); |
502 | thread_count += cnt > 0 ?: 0; | 524 | thread_count += cnt > 0 ?: 0; |
503 | } | 525 | } |
526 | if (dma_has_cap(DMA_PQ, dma_dev->cap_mask)) { | ||
527 | cnt = dmatest_add_threads(dtc, DMA_PQ); | ||
528 | thread_count += cnt > 0 ?: 0; | ||
529 | } | ||
504 | 530 | ||
505 | pr_info("dmatest: Started %u threads using %s\n", | 531 | pr_info("dmatest: Started %u threads using %s\n", |
506 | thread_count, dma_chan_name(chan)); | 532 | thread_count, dma_chan_name(chan)); |
diff --git a/drivers/dma/iop-adma.c b/drivers/dma/iop-adma.c index 2f052265122f..4496bc606662 100644 --- a/drivers/dma/iop-adma.c +++ b/drivers/dma/iop-adma.c | |||
@@ -660,9 +660,9 @@ iop_adma_prep_dma_xor(struct dma_chan *chan, dma_addr_t dma_dest, | |||
660 | } | 660 | } |
661 | 661 | ||
662 | static struct dma_async_tx_descriptor * | 662 | static struct dma_async_tx_descriptor * |
663 | iop_adma_prep_dma_zero_sum(struct dma_chan *chan, dma_addr_t *dma_src, | 663 | iop_adma_prep_dma_xor_val(struct dma_chan *chan, dma_addr_t *dma_src, |
664 | unsigned int src_cnt, size_t len, u32 *result, | 664 | unsigned int src_cnt, size_t len, u32 *result, |
665 | unsigned long flags) | 665 | unsigned long flags) |
666 | { | 666 | { |
667 | struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); | 667 | struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); |
668 | struct iop_adma_desc_slot *sw_desc, *grp_start; | 668 | struct iop_adma_desc_slot *sw_desc, *grp_start; |
@@ -906,7 +906,7 @@ out: | |||
906 | 906 | ||
907 | #define IOP_ADMA_NUM_SRC_TEST 4 /* must be <= 15 */ | 907 | #define IOP_ADMA_NUM_SRC_TEST 4 /* must be <= 15 */ |
908 | static int __devinit | 908 | static int __devinit |
909 | iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device) | 909 | iop_adma_xor_val_self_test(struct iop_adma_device *device) |
910 | { | 910 | { |
911 | int i, src_idx; | 911 | int i, src_idx; |
912 | struct page *dest; | 912 | struct page *dest; |
@@ -1002,7 +1002,7 @@ iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device) | |||
1002 | PAGE_SIZE, DMA_TO_DEVICE); | 1002 | PAGE_SIZE, DMA_TO_DEVICE); |
1003 | 1003 | ||
1004 | /* skip zero sum if the capability is not present */ | 1004 | /* skip zero sum if the capability is not present */ |
1005 | if (!dma_has_cap(DMA_ZERO_SUM, dma_chan->device->cap_mask)) | 1005 | if (!dma_has_cap(DMA_XOR_VAL, dma_chan->device->cap_mask)) |
1006 | goto free_resources; | 1006 | goto free_resources; |
1007 | 1007 | ||
1008 | /* zero sum the sources with the destintation page */ | 1008 | /* zero sum the sources with the destintation page */ |
@@ -1016,10 +1016,10 @@ iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device) | |||
1016 | dma_srcs[i] = dma_map_page(dma_chan->device->dev, | 1016 | dma_srcs[i] = dma_map_page(dma_chan->device->dev, |
1017 | zero_sum_srcs[i], 0, PAGE_SIZE, | 1017 | zero_sum_srcs[i], 0, PAGE_SIZE, |
1018 | DMA_TO_DEVICE); | 1018 | DMA_TO_DEVICE); |
1019 | tx = iop_adma_prep_dma_zero_sum(dma_chan, dma_srcs, | 1019 | tx = iop_adma_prep_dma_xor_val(dma_chan, dma_srcs, |
1020 | IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE, | 1020 | IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE, |
1021 | &zero_sum_result, | 1021 | &zero_sum_result, |
1022 | DMA_PREP_INTERRUPT | DMA_CTRL_ACK); | 1022 | DMA_PREP_INTERRUPT | DMA_CTRL_ACK); |
1023 | 1023 | ||
1024 | cookie = iop_adma_tx_submit(tx); | 1024 | cookie = iop_adma_tx_submit(tx); |
1025 | iop_adma_issue_pending(dma_chan); | 1025 | iop_adma_issue_pending(dma_chan); |
@@ -1072,10 +1072,10 @@ iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device) | |||
1072 | dma_srcs[i] = dma_map_page(dma_chan->device->dev, | 1072 | dma_srcs[i] = dma_map_page(dma_chan->device->dev, |
1073 | zero_sum_srcs[i], 0, PAGE_SIZE, | 1073 | zero_sum_srcs[i], 0, PAGE_SIZE, |
1074 | DMA_TO_DEVICE); | 1074 | DMA_TO_DEVICE); |
1075 | tx = iop_adma_prep_dma_zero_sum(dma_chan, dma_srcs, | 1075 | tx = iop_adma_prep_dma_xor_val(dma_chan, dma_srcs, |
1076 | IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE, | 1076 | IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE, |
1077 | &zero_sum_result, | 1077 | &zero_sum_result, |
1078 | DMA_PREP_INTERRUPT | DMA_CTRL_ACK); | 1078 | DMA_PREP_INTERRUPT | DMA_CTRL_ACK); |
1079 | 1079 | ||
1080 | cookie = iop_adma_tx_submit(tx); | 1080 | cookie = iop_adma_tx_submit(tx); |
1081 | iop_adma_issue_pending(dma_chan); | 1081 | iop_adma_issue_pending(dma_chan); |
@@ -1192,9 +1192,9 @@ static int __devinit iop_adma_probe(struct platform_device *pdev) | |||
1192 | dma_dev->max_xor = iop_adma_get_max_xor(); | 1192 | dma_dev->max_xor = iop_adma_get_max_xor(); |
1193 | dma_dev->device_prep_dma_xor = iop_adma_prep_dma_xor; | 1193 | dma_dev->device_prep_dma_xor = iop_adma_prep_dma_xor; |
1194 | } | 1194 | } |
1195 | if (dma_has_cap(DMA_ZERO_SUM, dma_dev->cap_mask)) | 1195 | if (dma_has_cap(DMA_XOR_VAL, dma_dev->cap_mask)) |
1196 | dma_dev->device_prep_dma_zero_sum = | 1196 | dma_dev->device_prep_dma_xor_val = |
1197 | iop_adma_prep_dma_zero_sum; | 1197 | iop_adma_prep_dma_xor_val; |
1198 | if (dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask)) | 1198 | if (dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask)) |
1199 | dma_dev->device_prep_dma_interrupt = | 1199 | dma_dev->device_prep_dma_interrupt = |
1200 | iop_adma_prep_dma_interrupt; | 1200 | iop_adma_prep_dma_interrupt; |
@@ -1249,7 +1249,7 @@ static int __devinit iop_adma_probe(struct platform_device *pdev) | |||
1249 | 1249 | ||
1250 | if (dma_has_cap(DMA_XOR, dma_dev->cap_mask) || | 1250 | if (dma_has_cap(DMA_XOR, dma_dev->cap_mask) || |
1251 | dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) { | 1251 | dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) { |
1252 | ret = iop_adma_xor_zero_sum_self_test(adev); | 1252 | ret = iop_adma_xor_val_self_test(adev); |
1253 | dev_dbg(&pdev->dev, "xor self test returned %d\n", ret); | 1253 | dev_dbg(&pdev->dev, "xor self test returned %d\n", ret); |
1254 | if (ret) | 1254 | if (ret) |
1255 | goto err_free_iop_chan; | 1255 | goto err_free_iop_chan; |
@@ -1257,12 +1257,12 @@ static int __devinit iop_adma_probe(struct platform_device *pdev) | |||
1257 | 1257 | ||
1258 | dev_printk(KERN_INFO, &pdev->dev, "Intel(R) IOP: " | 1258 | dev_printk(KERN_INFO, &pdev->dev, "Intel(R) IOP: " |
1259 | "( %s%s%s%s%s%s%s%s%s%s)\n", | 1259 | "( %s%s%s%s%s%s%s%s%s%s)\n", |
1260 | dma_has_cap(DMA_PQ_XOR, dma_dev->cap_mask) ? "pq_xor " : "", | 1260 | dma_has_cap(DMA_PQ, dma_dev->cap_mask) ? "pq " : "", |
1261 | dma_has_cap(DMA_PQ_UPDATE, dma_dev->cap_mask) ? "pq_update " : "", | 1261 | dma_has_cap(DMA_PQ_UPDATE, dma_dev->cap_mask) ? "pq_update " : "", |
1262 | dma_has_cap(DMA_PQ_ZERO_SUM, dma_dev->cap_mask) ? "pq_zero_sum " : "", | 1262 | dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask) ? "pq_val " : "", |
1263 | dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "", | 1263 | dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "", |
1264 | dma_has_cap(DMA_DUAL_XOR, dma_dev->cap_mask) ? "dual_xor " : "", | 1264 | dma_has_cap(DMA_DUAL_XOR, dma_dev->cap_mask) ? "dual_xor " : "", |
1265 | dma_has_cap(DMA_ZERO_SUM, dma_dev->cap_mask) ? "xor_zero_sum " : "", | 1265 | dma_has_cap(DMA_XOR_VAL, dma_dev->cap_mask) ? "xor_val " : "", |
1266 | dma_has_cap(DMA_MEMSET, dma_dev->cap_mask) ? "fill " : "", | 1266 | dma_has_cap(DMA_MEMSET, dma_dev->cap_mask) ? "fill " : "", |
1267 | dma_has_cap(DMA_MEMCPY_CRC32C, dma_dev->cap_mask) ? "cpy+crc " : "", | 1267 | dma_has_cap(DMA_MEMCPY_CRC32C, dma_dev->cap_mask) ? "cpy+crc " : "", |
1268 | dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "cpy " : "", | 1268 | dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "cpy " : "", |
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 36e0675be9f7..09c0c6e49ab5 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig | |||
@@ -124,6 +124,8 @@ config MD_RAID456 | |||
124 | select MD_RAID6_PQ | 124 | select MD_RAID6_PQ |
125 | select ASYNC_MEMCPY | 125 | select ASYNC_MEMCPY |
126 | select ASYNC_XOR | 126 | select ASYNC_XOR |
127 | select ASYNC_PQ | ||
128 | select ASYNC_RAID6_RECOV | ||
127 | ---help--- | 129 | ---help--- |
128 | A RAID-5 set of N drives with a capacity of C MB per drive provides | 130 | A RAID-5 set of N drives with a capacity of C MB per drive provides |
129 | the capacity of C * (N - 1) MB, and protects against a failure | 131 | the capacity of C * (N - 1) MB, and protects against a failure |
@@ -152,9 +154,33 @@ config MD_RAID456 | |||
152 | 154 | ||
153 | If unsure, say Y. | 155 | If unsure, say Y. |
154 | 156 | ||
157 | config MULTICORE_RAID456 | ||
158 | bool "RAID-4/RAID-5/RAID-6 Multicore processing (EXPERIMENTAL)" | ||
159 | depends on MD_RAID456 | ||
160 | depends on SMP | ||
161 | depends on EXPERIMENTAL | ||
162 | ---help--- | ||
163 | Enable the raid456 module to dispatch per-stripe raid operations to a | ||
164 | thread pool. | ||
165 | |||
166 | If unsure, say N. | ||
167 | |||
155 | config MD_RAID6_PQ | 168 | config MD_RAID6_PQ |
156 | tristate | 169 | tristate |
157 | 170 | ||
171 | config ASYNC_RAID6_TEST | ||
172 | tristate "Self test for hardware accelerated raid6 recovery" | ||
173 | depends on MD_RAID6_PQ | ||
174 | select ASYNC_RAID6_RECOV | ||
175 | ---help--- | ||
176 | This is a one-shot self test that permutes through the | ||
177 | recovery of all the possible two disk failure scenarios for a | ||
178 | N-disk array. Recovery is performed with the asynchronous | ||
179 | raid6 recovery routines, and will optionally use an offload | ||
180 | engine if one is available. | ||
181 | |||
182 | If unsure, say N. | ||
183 | |||
158 | config MD_MULTIPATH | 184 | config MD_MULTIPATH |
159 | tristate "Multipath I/O support" | 185 | tristate "Multipath I/O support" |
160 | depends on BLK_DEV_MD | 186 | depends on BLK_DEV_MD |
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index bb37fb1b2d82..0a5cf2171214 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -47,7 +47,9 @@ | |||
47 | #include <linux/kthread.h> | 47 | #include <linux/kthread.h> |
48 | #include <linux/raid/pq.h> | 48 | #include <linux/raid/pq.h> |
49 | #include <linux/async_tx.h> | 49 | #include <linux/async_tx.h> |
50 | #include <linux/async.h> | ||
50 | #include <linux/seq_file.h> | 51 | #include <linux/seq_file.h> |
52 | #include <linux/cpu.h> | ||
51 | #include "md.h" | 53 | #include "md.h" |
52 | #include "raid5.h" | 54 | #include "raid5.h" |
53 | #include "bitmap.h" | 55 | #include "bitmap.h" |
@@ -499,11 +501,14 @@ async_copy_data(int frombio, struct bio *bio, struct page *page, | |||
499 | struct page *bio_page; | 501 | struct page *bio_page; |
500 | int i; | 502 | int i; |
501 | int page_offset; | 503 | int page_offset; |
504 | struct async_submit_ctl submit; | ||
502 | 505 | ||
503 | if (bio->bi_sector >= sector) | 506 | if (bio->bi_sector >= sector) |
504 | page_offset = (signed)(bio->bi_sector - sector) * 512; | 507 | page_offset = (signed)(bio->bi_sector - sector) * 512; |
505 | else | 508 | else |
506 | page_offset = (signed)(sector - bio->bi_sector) * -512; | 509 | page_offset = (signed)(sector - bio->bi_sector) * -512; |
510 | |||
511 | init_async_submit(&submit, 0, tx, NULL, NULL, NULL); | ||
507 | bio_for_each_segment(bvl, bio, i) { | 512 | bio_for_each_segment(bvl, bio, i) { |
508 | int len = bio_iovec_idx(bio, i)->bv_len; | 513 | int len = bio_iovec_idx(bio, i)->bv_len; |
509 | int clen; | 514 | int clen; |
@@ -525,15 +530,14 @@ async_copy_data(int frombio, struct bio *bio, struct page *page, | |||
525 | bio_page = bio_iovec_idx(bio, i)->bv_page; | 530 | bio_page = bio_iovec_idx(bio, i)->bv_page; |
526 | if (frombio) | 531 | if (frombio) |
527 | tx = async_memcpy(page, bio_page, page_offset, | 532 | tx = async_memcpy(page, bio_page, page_offset, |
528 | b_offset, clen, | 533 | b_offset, clen, &submit); |
529 | ASYNC_TX_DEP_ACK, | ||
530 | tx, NULL, NULL); | ||
531 | else | 534 | else |
532 | tx = async_memcpy(bio_page, page, b_offset, | 535 | tx = async_memcpy(bio_page, page, b_offset, |
533 | page_offset, clen, | 536 | page_offset, clen, &submit); |
534 | ASYNC_TX_DEP_ACK, | ||
535 | tx, NULL, NULL); | ||
536 | } | 537 | } |
538 | /* chain the operations */ | ||
539 | submit.depend_tx = tx; | ||
540 | |||
537 | if (clen < len) /* hit end of page */ | 541 | if (clen < len) /* hit end of page */ |
538 | break; | 542 | break; |
539 | page_offset += len; | 543 | page_offset += len; |
@@ -592,6 +596,7 @@ static void ops_run_biofill(struct stripe_head *sh) | |||
592 | { | 596 | { |
593 | struct dma_async_tx_descriptor *tx = NULL; | 597 | struct dma_async_tx_descriptor *tx = NULL; |
594 | raid5_conf_t *conf = sh->raid_conf; | 598 | raid5_conf_t *conf = sh->raid_conf; |
599 | struct async_submit_ctl submit; | ||
595 | int i; | 600 | int i; |
596 | 601 | ||
597 | pr_debug("%s: stripe %llu\n", __func__, | 602 | pr_debug("%s: stripe %llu\n", __func__, |
@@ -615,22 +620,34 @@ static void ops_run_biofill(struct stripe_head *sh) | |||
615 | } | 620 | } |
616 | 621 | ||
617 | atomic_inc(&sh->count); | 622 | atomic_inc(&sh->count); |
618 | async_trigger_callback(ASYNC_TX_DEP_ACK | ASYNC_TX_ACK, tx, | 623 | init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_biofill, sh, NULL); |
619 | ops_complete_biofill, sh); | 624 | async_trigger_callback(&submit); |
620 | } | 625 | } |
621 | 626 | ||
622 | static void ops_complete_compute5(void *stripe_head_ref) | 627 | static void mark_target_uptodate(struct stripe_head *sh, int target) |
623 | { | 628 | { |
624 | struct stripe_head *sh = stripe_head_ref; | 629 | struct r5dev *tgt; |
625 | int target = sh->ops.target; | ||
626 | struct r5dev *tgt = &sh->dev[target]; | ||
627 | 630 | ||
628 | pr_debug("%s: stripe %llu\n", __func__, | 631 | if (target < 0) |
629 | (unsigned long long)sh->sector); | 632 | return; |
630 | 633 | ||
634 | tgt = &sh->dev[target]; | ||
631 | set_bit(R5_UPTODATE, &tgt->flags); | 635 | set_bit(R5_UPTODATE, &tgt->flags); |
632 | BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags)); | 636 | BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags)); |
633 | clear_bit(R5_Wantcompute, &tgt->flags); | 637 | clear_bit(R5_Wantcompute, &tgt->flags); |
638 | } | ||
639 | |||
640 | static void ops_complete_compute(void *stripe_head_ref) | ||
641 | { | ||
642 | struct stripe_head *sh = stripe_head_ref; | ||
643 | |||
644 | pr_debug("%s: stripe %llu\n", __func__, | ||
645 | (unsigned long long)sh->sector); | ||
646 | |||
647 | /* mark the computed target(s) as uptodate */ | ||
648 | mark_target_uptodate(sh, sh->ops.target); | ||
649 | mark_target_uptodate(sh, sh->ops.target2); | ||
650 | |||
634 | clear_bit(STRIPE_COMPUTE_RUN, &sh->state); | 651 | clear_bit(STRIPE_COMPUTE_RUN, &sh->state); |
635 | if (sh->check_state == check_state_compute_run) | 652 | if (sh->check_state == check_state_compute_run) |
636 | sh->check_state = check_state_compute_result; | 653 | sh->check_state = check_state_compute_result; |
@@ -638,16 +655,24 @@ static void ops_complete_compute5(void *stripe_head_ref) | |||
638 | release_stripe(sh); | 655 | release_stripe(sh); |
639 | } | 656 | } |
640 | 657 | ||
641 | static struct dma_async_tx_descriptor *ops_run_compute5(struct stripe_head *sh) | 658 | /* return a pointer to the address conversion region of the scribble buffer */ |
659 | static addr_conv_t *to_addr_conv(struct stripe_head *sh, | ||
660 | struct raid5_percpu *percpu) | ||
661 | { | ||
662 | return percpu->scribble + sizeof(struct page *) * (sh->disks + 2); | ||
663 | } | ||
664 | |||
665 | static struct dma_async_tx_descriptor * | ||
666 | ops_run_compute5(struct stripe_head *sh, struct raid5_percpu *percpu) | ||
642 | { | 667 | { |
643 | /* kernel stack size limits the total number of disks */ | ||
644 | int disks = sh->disks; | 668 | int disks = sh->disks; |
645 | struct page *xor_srcs[disks]; | 669 | struct page **xor_srcs = percpu->scribble; |
646 | int target = sh->ops.target; | 670 | int target = sh->ops.target; |
647 | struct r5dev *tgt = &sh->dev[target]; | 671 | struct r5dev *tgt = &sh->dev[target]; |
648 | struct page *xor_dest = tgt->page; | 672 | struct page *xor_dest = tgt->page; |
649 | int count = 0; | 673 | int count = 0; |
650 | struct dma_async_tx_descriptor *tx; | 674 | struct dma_async_tx_descriptor *tx; |
675 | struct async_submit_ctl submit; | ||
651 | int i; | 676 | int i; |
652 | 677 | ||
653 | pr_debug("%s: stripe %llu block: %d\n", | 678 | pr_debug("%s: stripe %llu block: %d\n", |
@@ -660,17 +685,207 @@ static struct dma_async_tx_descriptor *ops_run_compute5(struct stripe_head *sh) | |||
660 | 685 | ||
661 | atomic_inc(&sh->count); | 686 | atomic_inc(&sh->count); |
662 | 687 | ||
688 | init_async_submit(&submit, ASYNC_TX_XOR_ZERO_DST, NULL, | ||
689 | ops_complete_compute, sh, to_addr_conv(sh, percpu)); | ||
663 | if (unlikely(count == 1)) | 690 | if (unlikely(count == 1)) |
664 | tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, | 691 | tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, &submit); |
665 | 0, NULL, ops_complete_compute5, sh); | ||
666 | else | 692 | else |
667 | tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, | 693 | tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit); |
668 | ASYNC_TX_XOR_ZERO_DST, NULL, | ||
669 | ops_complete_compute5, sh); | ||
670 | 694 | ||
671 | return tx; | 695 | return tx; |
672 | } | 696 | } |
673 | 697 | ||
698 | /* set_syndrome_sources - populate source buffers for gen_syndrome | ||
699 | * @srcs - (struct page *) array of size sh->disks | ||
700 | * @sh - stripe_head to parse | ||
701 | * | ||
702 | * Populates srcs in proper layout order for the stripe and returns the | ||
703 | * 'count' of sources to be used in a call to async_gen_syndrome. The P | ||
704 | * destination buffer is recorded in srcs[count] and the Q destination | ||
705 | * is recorded in srcs[count+1]]. | ||
706 | */ | ||
707 | static int set_syndrome_sources(struct page **srcs, struct stripe_head *sh) | ||
708 | { | ||
709 | int disks = sh->disks; | ||
710 | int syndrome_disks = sh->ddf_layout ? disks : (disks - 2); | ||
711 | int d0_idx = raid6_d0(sh); | ||
712 | int count; | ||
713 | int i; | ||
714 | |||
715 | for (i = 0; i < disks; i++) | ||
716 | srcs[i] = (void *)raid6_empty_zero_page; | ||
717 | |||
718 | count = 0; | ||
719 | i = d0_idx; | ||
720 | do { | ||
721 | int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks); | ||
722 | |||
723 | srcs[slot] = sh->dev[i].page; | ||
724 | i = raid6_next_disk(i, disks); | ||
725 | } while (i != d0_idx); | ||
726 | BUG_ON(count != syndrome_disks); | ||
727 | |||
728 | return count; | ||
729 | } | ||
730 | |||
731 | static struct dma_async_tx_descriptor * | ||
732 | ops_run_compute6_1(struct stripe_head *sh, struct raid5_percpu *percpu) | ||
733 | { | ||
734 | int disks = sh->disks; | ||
735 | struct page **blocks = percpu->scribble; | ||
736 | int target; | ||
737 | int qd_idx = sh->qd_idx; | ||
738 | struct dma_async_tx_descriptor *tx; | ||
739 | struct async_submit_ctl submit; | ||
740 | struct r5dev *tgt; | ||
741 | struct page *dest; | ||
742 | int i; | ||
743 | int count; | ||
744 | |||
745 | if (sh->ops.target < 0) | ||
746 | target = sh->ops.target2; | ||
747 | else if (sh->ops.target2 < 0) | ||
748 | target = sh->ops.target; | ||
749 | else | ||
750 | /* we should only have one valid target */ | ||
751 | BUG(); | ||
752 | BUG_ON(target < 0); | ||
753 | pr_debug("%s: stripe %llu block: %d\n", | ||
754 | __func__, (unsigned long long)sh->sector, target); | ||
755 | |||
756 | tgt = &sh->dev[target]; | ||
757 | BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags)); | ||
758 | dest = tgt->page; | ||
759 | |||
760 | atomic_inc(&sh->count); | ||
761 | |||
762 | if (target == qd_idx) { | ||
763 | count = set_syndrome_sources(blocks, sh); | ||
764 | blocks[count] = NULL; /* regenerating p is not necessary */ | ||
765 | BUG_ON(blocks[count+1] != dest); /* q should already be set */ | ||
766 | init_async_submit(&submit, 0, NULL, ops_complete_compute, sh, | ||
767 | to_addr_conv(sh, percpu)); | ||
768 | tx = async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE, &submit); | ||
769 | } else { | ||
770 | /* Compute any data- or p-drive using XOR */ | ||
771 | count = 0; | ||
772 | for (i = disks; i-- ; ) { | ||
773 | if (i == target || i == qd_idx) | ||
774 | continue; | ||
775 | blocks[count++] = sh->dev[i].page; | ||
776 | } | ||
777 | |||
778 | init_async_submit(&submit, ASYNC_TX_XOR_ZERO_DST, NULL, | ||
779 | ops_complete_compute, sh, | ||
780 | to_addr_conv(sh, percpu)); | ||
781 | tx = async_xor(dest, blocks, 0, count, STRIPE_SIZE, &submit); | ||
782 | } | ||
783 | |||
784 | return tx; | ||
785 | } | ||
786 | |||
787 | static struct dma_async_tx_descriptor * | ||
788 | ops_run_compute6_2(struct stripe_head *sh, struct raid5_percpu *percpu) | ||
789 | { | ||
790 | int i, count, disks = sh->disks; | ||
791 | int syndrome_disks = sh->ddf_layout ? disks : disks-2; | ||
792 | int d0_idx = raid6_d0(sh); | ||
793 | int faila = -1, failb = -1; | ||
794 | int target = sh->ops.target; | ||
795 | int target2 = sh->ops.target2; | ||
796 | struct r5dev *tgt = &sh->dev[target]; | ||
797 | struct r5dev *tgt2 = &sh->dev[target2]; | ||
798 | struct dma_async_tx_descriptor *tx; | ||
799 | struct page **blocks = percpu->scribble; | ||
800 | struct async_submit_ctl submit; | ||
801 | |||
802 | pr_debug("%s: stripe %llu block1: %d block2: %d\n", | ||
803 | __func__, (unsigned long long)sh->sector, target, target2); | ||
804 | BUG_ON(target < 0 || target2 < 0); | ||
805 | BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags)); | ||
806 | BUG_ON(!test_bit(R5_Wantcompute, &tgt2->flags)); | ||
807 | |||
808 | /* we need to open-code set_syndrome_sources to handle to the | ||
809 | * slot number conversion for 'faila' and 'failb' | ||
810 | */ | ||
811 | for (i = 0; i < disks ; i++) | ||
812 | blocks[i] = (void *)raid6_empty_zero_page; | ||
813 | count = 0; | ||
814 | i = d0_idx; | ||
815 | do { | ||
816 | int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks); | ||
817 | |||
818 | blocks[slot] = sh->dev[i].page; | ||
819 | |||
820 | if (i == target) | ||
821 | faila = slot; | ||
822 | if (i == target2) | ||
823 | failb = slot; | ||
824 | i = raid6_next_disk(i, disks); | ||
825 | } while (i != d0_idx); | ||
826 | BUG_ON(count != syndrome_disks); | ||
827 | |||
828 | BUG_ON(faila == failb); | ||
829 | if (failb < faila) | ||
830 | swap(faila, failb); | ||
831 | pr_debug("%s: stripe: %llu faila: %d failb: %d\n", | ||
832 | __func__, (unsigned long long)sh->sector, faila, failb); | ||
833 | |||
834 | atomic_inc(&sh->count); | ||
835 | |||
836 | if (failb == syndrome_disks+1) { | ||
837 | /* Q disk is one of the missing disks */ | ||
838 | if (faila == syndrome_disks) { | ||
839 | /* Missing P+Q, just recompute */ | ||
840 | init_async_submit(&submit, 0, NULL, ops_complete_compute, | ||
841 | sh, to_addr_conv(sh, percpu)); | ||
842 | return async_gen_syndrome(blocks, 0, count+2, | ||
843 | STRIPE_SIZE, &submit); | ||
844 | } else { | ||
845 | struct page *dest; | ||
846 | int data_target; | ||
847 | int qd_idx = sh->qd_idx; | ||
848 | |||
849 | /* Missing D+Q: recompute D from P, then recompute Q */ | ||
850 | if (target == qd_idx) | ||
851 | data_target = target2; | ||
852 | else | ||
853 | data_target = target; | ||
854 | |||
855 | count = 0; | ||
856 | for (i = disks; i-- ; ) { | ||
857 | if (i == data_target || i == qd_idx) | ||
858 | continue; | ||
859 | blocks[count++] = sh->dev[i].page; | ||
860 | } | ||
861 | dest = sh->dev[data_target].page; | ||
862 | init_async_submit(&submit, ASYNC_TX_XOR_ZERO_DST, NULL, | ||
863 | NULL, NULL, to_addr_conv(sh, percpu)); | ||
864 | tx = async_xor(dest, blocks, 0, count, STRIPE_SIZE, | ||
865 | &submit); | ||
866 | |||
867 | count = set_syndrome_sources(blocks, sh); | ||
868 | init_async_submit(&submit, 0, tx, ops_complete_compute, | ||
869 | sh, to_addr_conv(sh, percpu)); | ||
870 | return async_gen_syndrome(blocks, 0, count+2, | ||
871 | STRIPE_SIZE, &submit); | ||
872 | } | ||
873 | } | ||
874 | |||
875 | init_async_submit(&submit, 0, NULL, ops_complete_compute, sh, | ||
876 | to_addr_conv(sh, percpu)); | ||
877 | if (failb == syndrome_disks) { | ||
878 | /* We're missing D+P. */ | ||
879 | return async_raid6_datap_recov(syndrome_disks+2, STRIPE_SIZE, | ||
880 | faila, blocks, &submit); | ||
881 | } else { | ||
882 | /* We're missing D+D. */ | ||
883 | return async_raid6_2data_recov(syndrome_disks+2, STRIPE_SIZE, | ||
884 | faila, failb, blocks, &submit); | ||
885 | } | ||
886 | } | ||
887 | |||
888 | |||
674 | static void ops_complete_prexor(void *stripe_head_ref) | 889 | static void ops_complete_prexor(void *stripe_head_ref) |
675 | { | 890 | { |
676 | struct stripe_head *sh = stripe_head_ref; | 891 | struct stripe_head *sh = stripe_head_ref; |
@@ -680,12 +895,13 @@ static void ops_complete_prexor(void *stripe_head_ref) | |||
680 | } | 895 | } |
681 | 896 | ||
682 | static struct dma_async_tx_descriptor * | 897 | static struct dma_async_tx_descriptor * |
683 | ops_run_prexor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) | 898 | ops_run_prexor(struct stripe_head *sh, struct raid5_percpu *percpu, |
899 | struct dma_async_tx_descriptor *tx) | ||
684 | { | 900 | { |
685 | /* kernel stack size limits the total number of disks */ | ||
686 | int disks = sh->disks; | 901 | int disks = sh->disks; |
687 | struct page *xor_srcs[disks]; | 902 | struct page **xor_srcs = percpu->scribble; |
688 | int count = 0, pd_idx = sh->pd_idx, i; | 903 | int count = 0, pd_idx = sh->pd_idx, i; |
904 | struct async_submit_ctl submit; | ||
689 | 905 | ||
690 | /* existing parity data subtracted */ | 906 | /* existing parity data subtracted */ |
691 | struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page; | 907 | struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page; |
@@ -700,9 +916,9 @@ ops_run_prexor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) | |||
700 | xor_srcs[count++] = dev->page; | 916 | xor_srcs[count++] = dev->page; |
701 | } | 917 | } |
702 | 918 | ||
703 | tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, | 919 | init_async_submit(&submit, ASYNC_TX_XOR_DROP_DST, tx, |
704 | ASYNC_TX_DEP_ACK | ASYNC_TX_XOR_DROP_DST, tx, | 920 | ops_complete_prexor, sh, to_addr_conv(sh, percpu)); |
705 | ops_complete_prexor, sh); | 921 | tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit); |
706 | 922 | ||
707 | return tx; | 923 | return tx; |
708 | } | 924 | } |
@@ -742,17 +958,21 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) | |||
742 | return tx; | 958 | return tx; |
743 | } | 959 | } |
744 | 960 | ||
745 | static void ops_complete_postxor(void *stripe_head_ref) | 961 | static void ops_complete_reconstruct(void *stripe_head_ref) |
746 | { | 962 | { |
747 | struct stripe_head *sh = stripe_head_ref; | 963 | struct stripe_head *sh = stripe_head_ref; |
748 | int disks = sh->disks, i, pd_idx = sh->pd_idx; | 964 | int disks = sh->disks; |
965 | int pd_idx = sh->pd_idx; | ||
966 | int qd_idx = sh->qd_idx; | ||
967 | int i; | ||
749 | 968 | ||
750 | pr_debug("%s: stripe %llu\n", __func__, | 969 | pr_debug("%s: stripe %llu\n", __func__, |
751 | (unsigned long long)sh->sector); | 970 | (unsigned long long)sh->sector); |
752 | 971 | ||
753 | for (i = disks; i--; ) { | 972 | for (i = disks; i--; ) { |
754 | struct r5dev *dev = &sh->dev[i]; | 973 | struct r5dev *dev = &sh->dev[i]; |
755 | if (dev->written || i == pd_idx) | 974 | |
975 | if (dev->written || i == pd_idx || i == qd_idx) | ||
756 | set_bit(R5_UPTODATE, &dev->flags); | 976 | set_bit(R5_UPTODATE, &dev->flags); |
757 | } | 977 | } |
758 | 978 | ||
@@ -770,12 +990,12 @@ static void ops_complete_postxor(void *stripe_head_ref) | |||
770 | } | 990 | } |
771 | 991 | ||
772 | static void | 992 | static void |
773 | ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) | 993 | ops_run_reconstruct5(struct stripe_head *sh, struct raid5_percpu *percpu, |
994 | struct dma_async_tx_descriptor *tx) | ||
774 | { | 995 | { |
775 | /* kernel stack size limits the total number of disks */ | ||
776 | int disks = sh->disks; | 996 | int disks = sh->disks; |
777 | struct page *xor_srcs[disks]; | 997 | struct page **xor_srcs = percpu->scribble; |
778 | 998 | struct async_submit_ctl submit; | |
779 | int count = 0, pd_idx = sh->pd_idx, i; | 999 | int count = 0, pd_idx = sh->pd_idx, i; |
780 | struct page *xor_dest; | 1000 | struct page *xor_dest; |
781 | int prexor = 0; | 1001 | int prexor = 0; |
@@ -809,18 +1029,36 @@ ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) | |||
809 | * set ASYNC_TX_XOR_DROP_DST and ASYNC_TX_XOR_ZERO_DST | 1029 | * set ASYNC_TX_XOR_DROP_DST and ASYNC_TX_XOR_ZERO_DST |
810 | * for the synchronous xor case | 1030 | * for the synchronous xor case |
811 | */ | 1031 | */ |
812 | flags = ASYNC_TX_DEP_ACK | ASYNC_TX_ACK | | 1032 | flags = ASYNC_TX_ACK | |
813 | (prexor ? ASYNC_TX_XOR_DROP_DST : ASYNC_TX_XOR_ZERO_DST); | 1033 | (prexor ? ASYNC_TX_XOR_DROP_DST : ASYNC_TX_XOR_ZERO_DST); |
814 | 1034 | ||
815 | atomic_inc(&sh->count); | 1035 | atomic_inc(&sh->count); |
816 | 1036 | ||
817 | if (unlikely(count == 1)) { | 1037 | init_async_submit(&submit, flags, tx, ops_complete_reconstruct, sh, |
818 | flags &= ~(ASYNC_TX_XOR_DROP_DST | ASYNC_TX_XOR_ZERO_DST); | 1038 | to_addr_conv(sh, percpu)); |
819 | tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, | 1039 | if (unlikely(count == 1)) |
820 | flags, tx, ops_complete_postxor, sh); | 1040 | tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, &submit); |
821 | } else | 1041 | else |
822 | tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, | 1042 | tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit); |
823 | flags, tx, ops_complete_postxor, sh); | 1043 | } |
1044 | |||
1045 | static void | ||
1046 | ops_run_reconstruct6(struct stripe_head *sh, struct raid5_percpu *percpu, | ||
1047 | struct dma_async_tx_descriptor *tx) | ||
1048 | { | ||
1049 | struct async_submit_ctl submit; | ||
1050 | struct page **blocks = percpu->scribble; | ||
1051 | int count; | ||
1052 | |||
1053 | pr_debug("%s: stripe %llu\n", __func__, (unsigned long long)sh->sector); | ||
1054 | |||
1055 | count = set_syndrome_sources(blocks, sh); | ||
1056 | |||
1057 | atomic_inc(&sh->count); | ||
1058 | |||
1059 | init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_reconstruct, | ||
1060 | sh, to_addr_conv(sh, percpu)); | ||
1061 | async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE, &submit); | ||
824 | } | 1062 | } |
825 | 1063 | ||
826 | static void ops_complete_check(void *stripe_head_ref) | 1064 | static void ops_complete_check(void *stripe_head_ref) |
@@ -835,63 +1073,115 @@ static void ops_complete_check(void *stripe_head_ref) | |||
835 | release_stripe(sh); | 1073 | release_stripe(sh); |
836 | } | 1074 | } |
837 | 1075 | ||
838 | static void ops_run_check(struct stripe_head *sh) | 1076 | static void ops_run_check_p(struct stripe_head *sh, struct raid5_percpu *percpu) |
839 | { | 1077 | { |
840 | /* kernel stack size limits the total number of disks */ | ||
841 | int disks = sh->disks; | 1078 | int disks = sh->disks; |
842 | struct page *xor_srcs[disks]; | 1079 | int pd_idx = sh->pd_idx; |
1080 | int qd_idx = sh->qd_idx; | ||
1081 | struct page *xor_dest; | ||
1082 | struct page **xor_srcs = percpu->scribble; | ||
843 | struct dma_async_tx_descriptor *tx; | 1083 | struct dma_async_tx_descriptor *tx; |
844 | 1084 | struct async_submit_ctl submit; | |
845 | int count = 0, pd_idx = sh->pd_idx, i; | 1085 | int count; |
846 | struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page; | 1086 | int i; |
847 | 1087 | ||
848 | pr_debug("%s: stripe %llu\n", __func__, | 1088 | pr_debug("%s: stripe %llu\n", __func__, |
849 | (unsigned long long)sh->sector); | 1089 | (unsigned long long)sh->sector); |
850 | 1090 | ||
1091 | count = 0; | ||
1092 | xor_dest = sh->dev[pd_idx].page; | ||
1093 | xor_srcs[count++] = xor_dest; | ||
851 | for (i = disks; i--; ) { | 1094 | for (i = disks; i--; ) { |
852 | struct r5dev *dev = &sh->dev[i]; | 1095 | if (i == pd_idx || i == qd_idx) |
853 | if (i != pd_idx) | 1096 | continue; |
854 | xor_srcs[count++] = dev->page; | 1097 | xor_srcs[count++] = sh->dev[i].page; |
855 | } | 1098 | } |
856 | 1099 | ||
857 | tx = async_xor_zero_sum(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, | 1100 | init_async_submit(&submit, 0, NULL, NULL, NULL, |
858 | &sh->ops.zero_sum_result, 0, NULL, NULL, NULL); | 1101 | to_addr_conv(sh, percpu)); |
1102 | tx = async_xor_val(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, | ||
1103 | &sh->ops.zero_sum_result, &submit); | ||
1104 | |||
1105 | atomic_inc(&sh->count); | ||
1106 | init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_check, sh, NULL); | ||
1107 | tx = async_trigger_callback(&submit); | ||
1108 | } | ||
1109 | |||
1110 | static void ops_run_check_pq(struct stripe_head *sh, struct raid5_percpu *percpu, int checkp) | ||
1111 | { | ||
1112 | struct page **srcs = percpu->scribble; | ||
1113 | struct async_submit_ctl submit; | ||
1114 | int count; | ||
1115 | |||
1116 | pr_debug("%s: stripe %llu checkp: %d\n", __func__, | ||
1117 | (unsigned long long)sh->sector, checkp); | ||
1118 | |||
1119 | count = set_syndrome_sources(srcs, sh); | ||
1120 | if (!checkp) | ||
1121 | srcs[count] = NULL; | ||
859 | 1122 | ||
860 | atomic_inc(&sh->count); | 1123 | atomic_inc(&sh->count); |
861 | tx = async_trigger_callback(ASYNC_TX_DEP_ACK | ASYNC_TX_ACK, tx, | 1124 | init_async_submit(&submit, ASYNC_TX_ACK, NULL, ops_complete_check, |
862 | ops_complete_check, sh); | 1125 | sh, to_addr_conv(sh, percpu)); |
1126 | async_syndrome_val(srcs, 0, count+2, STRIPE_SIZE, | ||
1127 | &sh->ops.zero_sum_result, percpu->spare_page, &submit); | ||
863 | } | 1128 | } |
864 | 1129 | ||
865 | static void raid5_run_ops(struct stripe_head *sh, unsigned long ops_request) | 1130 | static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request) |
866 | { | 1131 | { |
867 | int overlap_clear = 0, i, disks = sh->disks; | 1132 | int overlap_clear = 0, i, disks = sh->disks; |
868 | struct dma_async_tx_descriptor *tx = NULL; | 1133 | struct dma_async_tx_descriptor *tx = NULL; |
1134 | raid5_conf_t *conf = sh->raid_conf; | ||
1135 | int level = conf->level; | ||
1136 | struct raid5_percpu *percpu; | ||
1137 | unsigned long cpu; | ||
869 | 1138 | ||
1139 | cpu = get_cpu(); | ||
1140 | percpu = per_cpu_ptr(conf->percpu, cpu); | ||
870 | if (test_bit(STRIPE_OP_BIOFILL, &ops_request)) { | 1141 | if (test_bit(STRIPE_OP_BIOFILL, &ops_request)) { |
871 | ops_run_biofill(sh); | 1142 | ops_run_biofill(sh); |
872 | overlap_clear++; | 1143 | overlap_clear++; |
873 | } | 1144 | } |
874 | 1145 | ||
875 | if (test_bit(STRIPE_OP_COMPUTE_BLK, &ops_request)) { | 1146 | if (test_bit(STRIPE_OP_COMPUTE_BLK, &ops_request)) { |
876 | tx = ops_run_compute5(sh); | 1147 | if (level < 6) |
877 | /* terminate the chain if postxor is not set to be run */ | 1148 | tx = ops_run_compute5(sh, percpu); |
878 | if (tx && !test_bit(STRIPE_OP_POSTXOR, &ops_request)) | 1149 | else { |
1150 | if (sh->ops.target2 < 0 || sh->ops.target < 0) | ||
1151 | tx = ops_run_compute6_1(sh, percpu); | ||
1152 | else | ||
1153 | tx = ops_run_compute6_2(sh, percpu); | ||
1154 | } | ||
1155 | /* terminate the chain if reconstruct is not set to be run */ | ||
1156 | if (tx && !test_bit(STRIPE_OP_RECONSTRUCT, &ops_request)) | ||
879 | async_tx_ack(tx); | 1157 | async_tx_ack(tx); |
880 | } | 1158 | } |
881 | 1159 | ||
882 | if (test_bit(STRIPE_OP_PREXOR, &ops_request)) | 1160 | if (test_bit(STRIPE_OP_PREXOR, &ops_request)) |
883 | tx = ops_run_prexor(sh, tx); | 1161 | tx = ops_run_prexor(sh, percpu, tx); |
884 | 1162 | ||
885 | if (test_bit(STRIPE_OP_BIODRAIN, &ops_request)) { | 1163 | if (test_bit(STRIPE_OP_BIODRAIN, &ops_request)) { |
886 | tx = ops_run_biodrain(sh, tx); | 1164 | tx = ops_run_biodrain(sh, tx); |
887 | overlap_clear++; | 1165 | overlap_clear++; |
888 | } | 1166 | } |
889 | 1167 | ||
890 | if (test_bit(STRIPE_OP_POSTXOR, &ops_request)) | 1168 | if (test_bit(STRIPE_OP_RECONSTRUCT, &ops_request)) { |
891 | ops_run_postxor(sh, tx); | 1169 | if (level < 6) |
1170 | ops_run_reconstruct5(sh, percpu, tx); | ||
1171 | else | ||
1172 | ops_run_reconstruct6(sh, percpu, tx); | ||
1173 | } | ||
892 | 1174 | ||
893 | if (test_bit(STRIPE_OP_CHECK, &ops_request)) | 1175 | if (test_bit(STRIPE_OP_CHECK, &ops_request)) { |
894 | ops_run_check(sh); | 1176 | if (sh->check_state == check_state_run) |
1177 | ops_run_check_p(sh, percpu); | ||
1178 | else if (sh->check_state == check_state_run_q) | ||
1179 | ops_run_check_pq(sh, percpu, 0); | ||
1180 | else if (sh->check_state == check_state_run_pq) | ||
1181 | ops_run_check_pq(sh, percpu, 1); | ||
1182 | else | ||
1183 | BUG(); | ||
1184 | } | ||
895 | 1185 | ||
896 | if (overlap_clear) | 1186 | if (overlap_clear) |
897 | for (i = disks; i--; ) { | 1187 | for (i = disks; i--; ) { |
@@ -899,6 +1189,7 @@ static void raid5_run_ops(struct stripe_head *sh, unsigned long ops_request) | |||
899 | if (test_and_clear_bit(R5_Overlap, &dev->flags)) | 1189 | if (test_and_clear_bit(R5_Overlap, &dev->flags)) |
900 | wake_up(&sh->raid_conf->wait_for_overlap); | 1190 | wake_up(&sh->raid_conf->wait_for_overlap); |
901 | } | 1191 | } |
1192 | put_cpu(); | ||
902 | } | 1193 | } |
903 | 1194 | ||
904 | static int grow_one_stripe(raid5_conf_t *conf) | 1195 | static int grow_one_stripe(raid5_conf_t *conf) |
@@ -948,6 +1239,28 @@ static int grow_stripes(raid5_conf_t *conf, int num) | |||
948 | return 0; | 1239 | return 0; |
949 | } | 1240 | } |
950 | 1241 | ||
1242 | /** | ||
1243 | * scribble_len - return the required size of the scribble region | ||
1244 | * @num - total number of disks in the array | ||
1245 | * | ||
1246 | * The size must be enough to contain: | ||
1247 | * 1/ a struct page pointer for each device in the array +2 | ||
1248 | * 2/ room to convert each entry in (1) to its corresponding dma | ||
1249 | * (dma_map_page()) or page (page_address()) address. | ||
1250 | * | ||
1251 | * Note: the +2 is for the destination buffers of the ddf/raid6 case where we | ||
1252 | * calculate over all devices (not just the data blocks), using zeros in place | ||
1253 | * of the P and Q blocks. | ||
1254 | */ | ||
1255 | static size_t scribble_len(int num) | ||
1256 | { | ||
1257 | size_t len; | ||
1258 | |||
1259 | len = sizeof(struct page *) * (num+2) + sizeof(addr_conv_t) * (num+2); | ||
1260 | |||
1261 | return len; | ||
1262 | } | ||
1263 | |||
951 | static int resize_stripes(raid5_conf_t *conf, int newsize) | 1264 | static int resize_stripes(raid5_conf_t *conf, int newsize) |
952 | { | 1265 | { |
953 | /* Make all the stripes able to hold 'newsize' devices. | 1266 | /* Make all the stripes able to hold 'newsize' devices. |
@@ -976,6 +1289,7 @@ static int resize_stripes(raid5_conf_t *conf, int newsize) | |||
976 | struct stripe_head *osh, *nsh; | 1289 | struct stripe_head *osh, *nsh; |
977 | LIST_HEAD(newstripes); | 1290 | LIST_HEAD(newstripes); |
978 | struct disk_info *ndisks; | 1291 | struct disk_info *ndisks; |
1292 | unsigned long cpu; | ||
979 | int err; | 1293 | int err; |
980 | struct kmem_cache *sc; | 1294 | struct kmem_cache *sc; |
981 | int i; | 1295 | int i; |
@@ -1041,7 +1355,7 @@ static int resize_stripes(raid5_conf_t *conf, int newsize) | |||
1041 | /* Step 3. | 1355 | /* Step 3. |
1042 | * At this point, we are holding all the stripes so the array | 1356 | * At this point, we are holding all the stripes so the array |
1043 | * is completely stalled, so now is a good time to resize | 1357 | * is completely stalled, so now is a good time to resize |
1044 | * conf->disks. | 1358 | * conf->disks and the scribble region |
1045 | */ | 1359 | */ |
1046 | ndisks = kzalloc(newsize * sizeof(struct disk_info), GFP_NOIO); | 1360 | ndisks = kzalloc(newsize * sizeof(struct disk_info), GFP_NOIO); |
1047 | if (ndisks) { | 1361 | if (ndisks) { |
@@ -1052,10 +1366,30 @@ static int resize_stripes(raid5_conf_t *conf, int newsize) | |||
1052 | } else | 1366 | } else |
1053 | err = -ENOMEM; | 1367 | err = -ENOMEM; |
1054 | 1368 | ||
1369 | get_online_cpus(); | ||
1370 | conf->scribble_len = scribble_len(newsize); | ||
1371 | for_each_present_cpu(cpu) { | ||
1372 | struct raid5_percpu *percpu; | ||
1373 | void *scribble; | ||
1374 | |||
1375 | percpu = per_cpu_ptr(conf->percpu, cpu); | ||
1376 | scribble = kmalloc(conf->scribble_len, GFP_NOIO); | ||
1377 | |||
1378 | if (scribble) { | ||
1379 | kfree(percpu->scribble); | ||
1380 | percpu->scribble = scribble; | ||
1381 | } else { | ||
1382 | err = -ENOMEM; | ||
1383 | break; | ||
1384 | } | ||
1385 | } | ||
1386 | put_online_cpus(); | ||
1387 | |||
1055 | /* Step 4, return new stripes to service */ | 1388 | /* Step 4, return new stripes to service */ |
1056 | while(!list_empty(&newstripes)) { | 1389 | while(!list_empty(&newstripes)) { |
1057 | nsh = list_entry(newstripes.next, struct stripe_head, lru); | 1390 | nsh = list_entry(newstripes.next, struct stripe_head, lru); |
1058 | list_del_init(&nsh->lru); | 1391 | list_del_init(&nsh->lru); |
1392 | |||
1059 | for (i=conf->raid_disks; i < newsize; i++) | 1393 | for (i=conf->raid_disks; i < newsize; i++) |
1060 | if (nsh->dev[i].page == NULL) { | 1394 | if (nsh->dev[i].page == NULL) { |
1061 | struct page *p = alloc_page(GFP_NOIO); | 1395 | struct page *p = alloc_page(GFP_NOIO); |
@@ -1594,258 +1928,13 @@ static sector_t compute_blocknr(struct stripe_head *sh, int i, int previous) | |||
1594 | } | 1928 | } |
1595 | 1929 | ||
1596 | 1930 | ||
1597 | |||
1598 | /* | ||
1599 | * Copy data between a page in the stripe cache, and one or more bion | ||
1600 | * The page could align with the middle of the bio, or there could be | ||
1601 | * several bion, each with several bio_vecs, which cover part of the page | ||
1602 | * Multiple bion are linked together on bi_next. There may be extras | ||
1603 | * at the end of this list. We ignore them. | ||
1604 | */ | ||
1605 | static void copy_data(int frombio, struct bio *bio, | ||
1606 | struct page *page, | ||
1607 | sector_t sector) | ||
1608 | { | ||
1609 | char *pa = page_address(page); | ||
1610 | struct bio_vec *bvl; | ||
1611 | int i; | ||
1612 | int page_offset; | ||
1613 | |||
1614 | if (bio->bi_sector >= sector) | ||
1615 | page_offset = (signed)(bio->bi_sector - sector) * 512; | ||
1616 | else | ||
1617 | page_offset = (signed)(sector - bio->bi_sector) * -512; | ||
1618 | bio_for_each_segment(bvl, bio, i) { | ||
1619 | int len = bio_iovec_idx(bio,i)->bv_len; | ||
1620 | int clen; | ||
1621 | int b_offset = 0; | ||
1622 | |||
1623 | if (page_offset < 0) { | ||
1624 | b_offset = -page_offset; | ||
1625 | page_offset += b_offset; | ||
1626 | len -= b_offset; | ||
1627 | } | ||
1628 | |||
1629 | if (len > 0 && page_offset + len > STRIPE_SIZE) | ||
1630 | clen = STRIPE_SIZE - page_offset; | ||
1631 | else clen = len; | ||
1632 | |||
1633 | if (clen > 0) { | ||
1634 | char *ba = __bio_kmap_atomic(bio, i, KM_USER0); | ||
1635 | if (frombio) | ||
1636 | memcpy(pa+page_offset, ba+b_offset, clen); | ||
1637 | else | ||
1638 | memcpy(ba+b_offset, pa+page_offset, clen); | ||
1639 | __bio_kunmap_atomic(ba, KM_USER0); | ||
1640 | } | ||
1641 | if (clen < len) /* hit end of page */ | ||
1642 | break; | ||
1643 | page_offset += len; | ||
1644 | } | ||
1645 | } | ||
1646 | |||
1647 | #define check_xor() do { \ | ||
1648 | if (count == MAX_XOR_BLOCKS) { \ | ||
1649 | xor_blocks(count, STRIPE_SIZE, dest, ptr);\ | ||
1650 | count = 0; \ | ||
1651 | } \ | ||
1652 | } while(0) | ||
1653 | |||
1654 | static void compute_parity6(struct stripe_head *sh, int method) | ||
1655 | { | ||
1656 | raid5_conf_t *conf = sh->raid_conf; | ||
1657 | int i, pd_idx, qd_idx, d0_idx, disks = sh->disks, count; | ||
1658 | int syndrome_disks = sh->ddf_layout ? disks : (disks - 2); | ||
1659 | struct bio *chosen; | ||
1660 | /**** FIX THIS: This could be very bad if disks is close to 256 ****/ | ||
1661 | void *ptrs[syndrome_disks+2]; | ||
1662 | |||
1663 | pd_idx = sh->pd_idx; | ||
1664 | qd_idx = sh->qd_idx; | ||
1665 | d0_idx = raid6_d0(sh); | ||
1666 | |||
1667 | pr_debug("compute_parity, stripe %llu, method %d\n", | ||
1668 | (unsigned long long)sh->sector, method); | ||
1669 | |||
1670 | switch(method) { | ||
1671 | case READ_MODIFY_WRITE: | ||
1672 | BUG(); /* READ_MODIFY_WRITE N/A for RAID-6 */ | ||
1673 | case RECONSTRUCT_WRITE: | ||
1674 | for (i= disks; i-- ;) | ||
1675 | if ( i != pd_idx && i != qd_idx && sh->dev[i].towrite ) { | ||
1676 | chosen = sh->dev[i].towrite; | ||
1677 | sh->dev[i].towrite = NULL; | ||
1678 | |||
1679 | if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) | ||
1680 | wake_up(&conf->wait_for_overlap); | ||
1681 | |||
1682 | BUG_ON(sh->dev[i].written); | ||
1683 | sh->dev[i].written = chosen; | ||
1684 | } | ||
1685 | break; | ||
1686 | case CHECK_PARITY: | ||
1687 | BUG(); /* Not implemented yet */ | ||
1688 | } | ||
1689 | |||
1690 | for (i = disks; i--;) | ||
1691 | if (sh->dev[i].written) { | ||
1692 | sector_t sector = sh->dev[i].sector; | ||
1693 | struct bio *wbi = sh->dev[i].written; | ||
1694 | while (wbi && wbi->bi_sector < sector + STRIPE_SECTORS) { | ||
1695 | copy_data(1, wbi, sh->dev[i].page, sector); | ||
1696 | wbi = r5_next_bio(wbi, sector); | ||
1697 | } | ||
1698 | |||
1699 | set_bit(R5_LOCKED, &sh->dev[i].flags); | ||
1700 | set_bit(R5_UPTODATE, &sh->dev[i].flags); | ||
1701 | } | ||
1702 | |||
1703 | /* Note that unlike RAID-5, the ordering of the disks matters greatly.*/ | ||
1704 | |||
1705 | for (i = 0; i < disks; i++) | ||
1706 | ptrs[i] = (void *)raid6_empty_zero_page; | ||
1707 | |||
1708 | count = 0; | ||
1709 | i = d0_idx; | ||
1710 | do { | ||
1711 | int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks); | ||
1712 | |||
1713 | ptrs[slot] = page_address(sh->dev[i].page); | ||
1714 | if (slot < syndrome_disks && | ||
1715 | !test_bit(R5_UPTODATE, &sh->dev[i].flags)) { | ||
1716 | printk(KERN_ERR "block %d/%d not uptodate " | ||
1717 | "on parity calc\n", i, count); | ||
1718 | BUG(); | ||
1719 | } | ||
1720 | |||
1721 | i = raid6_next_disk(i, disks); | ||
1722 | } while (i != d0_idx); | ||
1723 | BUG_ON(count != syndrome_disks); | ||
1724 | |||
1725 | raid6_call.gen_syndrome(syndrome_disks+2, STRIPE_SIZE, ptrs); | ||
1726 | |||
1727 | switch(method) { | ||
1728 | case RECONSTRUCT_WRITE: | ||
1729 | set_bit(R5_UPTODATE, &sh->dev[pd_idx].flags); | ||
1730 | set_bit(R5_UPTODATE, &sh->dev[qd_idx].flags); | ||
1731 | set_bit(R5_LOCKED, &sh->dev[pd_idx].flags); | ||
1732 | set_bit(R5_LOCKED, &sh->dev[qd_idx].flags); | ||
1733 | break; | ||
1734 | case UPDATE_PARITY: | ||
1735 | set_bit(R5_UPTODATE, &sh->dev[pd_idx].flags); | ||
1736 | set_bit(R5_UPTODATE, &sh->dev[qd_idx].flags); | ||
1737 | break; | ||
1738 | } | ||
1739 | } | ||
1740 | |||
1741 | |||
1742 | /* Compute one missing block */ | ||
1743 | static void compute_block_1(struct stripe_head *sh, int dd_idx, int nozero) | ||
1744 | { | ||
1745 | int i, count, disks = sh->disks; | ||
1746 | void *ptr[MAX_XOR_BLOCKS], *dest, *p; | ||
1747 | int qd_idx = sh->qd_idx; | ||
1748 | |||
1749 | pr_debug("compute_block_1, stripe %llu, idx %d\n", | ||
1750 | (unsigned long long)sh->sector, dd_idx); | ||
1751 | |||
1752 | if ( dd_idx == qd_idx ) { | ||
1753 | /* We're actually computing the Q drive */ | ||
1754 | compute_parity6(sh, UPDATE_PARITY); | ||
1755 | } else { | ||
1756 | dest = page_address(sh->dev[dd_idx].page); | ||
1757 | if (!nozero) memset(dest, 0, STRIPE_SIZE); | ||
1758 | count = 0; | ||
1759 | for (i = disks ; i--; ) { | ||
1760 | if (i == dd_idx || i == qd_idx) | ||
1761 | continue; | ||
1762 | p = page_address(sh->dev[i].page); | ||
1763 | if (test_bit(R5_UPTODATE, &sh->dev[i].flags)) | ||
1764 | ptr[count++] = p; | ||
1765 | else | ||
1766 | printk("compute_block() %d, stripe %llu, %d" | ||
1767 | " not present\n", dd_idx, | ||
1768 | (unsigned long long)sh->sector, i); | ||
1769 | |||
1770 | check_xor(); | ||
1771 | } | ||
1772 | if (count) | ||
1773 | xor_blocks(count, STRIPE_SIZE, dest, ptr); | ||
1774 | if (!nozero) set_bit(R5_UPTODATE, &sh->dev[dd_idx].flags); | ||
1775 | else clear_bit(R5_UPTODATE, &sh->dev[dd_idx].flags); | ||
1776 | } | ||
1777 | } | ||
1778 | |||
1779 | /* Compute two missing blocks */ | ||
1780 | static void compute_block_2(struct stripe_head *sh, int dd_idx1, int dd_idx2) | ||
1781 | { | ||
1782 | int i, count, disks = sh->disks; | ||
1783 | int syndrome_disks = sh->ddf_layout ? disks : disks-2; | ||
1784 | int d0_idx = raid6_d0(sh); | ||
1785 | int faila = -1, failb = -1; | ||
1786 | /**** FIX THIS: This could be very bad if disks is close to 256 ****/ | ||
1787 | void *ptrs[syndrome_disks+2]; | ||
1788 | |||
1789 | for (i = 0; i < disks ; i++) | ||
1790 | ptrs[i] = (void *)raid6_empty_zero_page; | ||
1791 | count = 0; | ||
1792 | i = d0_idx; | ||
1793 | do { | ||
1794 | int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks); | ||
1795 | |||
1796 | ptrs[slot] = page_address(sh->dev[i].page); | ||
1797 | |||
1798 | if (i == dd_idx1) | ||
1799 | faila = slot; | ||
1800 | if (i == dd_idx2) | ||
1801 | failb = slot; | ||
1802 | i = raid6_next_disk(i, disks); | ||
1803 | } while (i != d0_idx); | ||
1804 | BUG_ON(count != syndrome_disks); | ||
1805 | |||
1806 | BUG_ON(faila == failb); | ||
1807 | if ( failb < faila ) { int tmp = faila; faila = failb; failb = tmp; } | ||
1808 | |||
1809 | pr_debug("compute_block_2, stripe %llu, idx %d,%d (%d,%d)\n", | ||
1810 | (unsigned long long)sh->sector, dd_idx1, dd_idx2, | ||
1811 | faila, failb); | ||
1812 | |||
1813 | if (failb == syndrome_disks+1) { | ||
1814 | /* Q disk is one of the missing disks */ | ||
1815 | if (faila == syndrome_disks) { | ||
1816 | /* Missing P+Q, just recompute */ | ||
1817 | compute_parity6(sh, UPDATE_PARITY); | ||
1818 | return; | ||
1819 | } else { | ||
1820 | /* We're missing D+Q; recompute D from P */ | ||
1821 | compute_block_1(sh, ((dd_idx1 == sh->qd_idx) ? | ||
1822 | dd_idx2 : dd_idx1), | ||
1823 | 0); | ||
1824 | compute_parity6(sh, UPDATE_PARITY); /* Is this necessary? */ | ||
1825 | return; | ||
1826 | } | ||
1827 | } | ||
1828 | |||
1829 | /* We're missing D+P or D+D; */ | ||
1830 | if (failb == syndrome_disks) { | ||
1831 | /* We're missing D+P. */ | ||
1832 | raid6_datap_recov(syndrome_disks+2, STRIPE_SIZE, faila, ptrs); | ||
1833 | } else { | ||
1834 | /* We're missing D+D. */ | ||
1835 | raid6_2data_recov(syndrome_disks+2, STRIPE_SIZE, faila, failb, | ||
1836 | ptrs); | ||
1837 | } | ||
1838 | |||
1839 | /* Both the above update both missing blocks */ | ||
1840 | set_bit(R5_UPTODATE, &sh->dev[dd_idx1].flags); | ||
1841 | set_bit(R5_UPTODATE, &sh->dev[dd_idx2].flags); | ||
1842 | } | ||
1843 | |||
1844 | static void | 1931 | static void |
1845 | schedule_reconstruction5(struct stripe_head *sh, struct stripe_head_state *s, | 1932 | schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s, |
1846 | int rcw, int expand) | 1933 | int rcw, int expand) |
1847 | { | 1934 | { |
1848 | int i, pd_idx = sh->pd_idx, disks = sh->disks; | 1935 | int i, pd_idx = sh->pd_idx, disks = sh->disks; |
1936 | raid5_conf_t *conf = sh->raid_conf; | ||
1937 | int level = conf->level; | ||
1849 | 1938 | ||
1850 | if (rcw) { | 1939 | if (rcw) { |
1851 | /* if we are not expanding this is a proper write request, and | 1940 | /* if we are not expanding this is a proper write request, and |
@@ -1858,7 +1947,7 @@ schedule_reconstruction5(struct stripe_head *sh, struct stripe_head_state *s, | |||
1858 | } else | 1947 | } else |
1859 | sh->reconstruct_state = reconstruct_state_run; | 1948 | sh->reconstruct_state = reconstruct_state_run; |
1860 | 1949 | ||
1861 | set_bit(STRIPE_OP_POSTXOR, &s->ops_request); | 1950 | set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request); |
1862 | 1951 | ||
1863 | for (i = disks; i--; ) { | 1952 | for (i = disks; i--; ) { |
1864 | struct r5dev *dev = &sh->dev[i]; | 1953 | struct r5dev *dev = &sh->dev[i]; |
@@ -1871,17 +1960,18 @@ schedule_reconstruction5(struct stripe_head *sh, struct stripe_head_state *s, | |||
1871 | s->locked++; | 1960 | s->locked++; |
1872 | } | 1961 | } |
1873 | } | 1962 | } |
1874 | if (s->locked + 1 == disks) | 1963 | if (s->locked + conf->max_degraded == disks) |
1875 | if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state)) | 1964 | if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state)) |
1876 | atomic_inc(&sh->raid_conf->pending_full_writes); | 1965 | atomic_inc(&conf->pending_full_writes); |
1877 | } else { | 1966 | } else { |
1967 | BUG_ON(level == 6); | ||
1878 | BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) || | 1968 | BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) || |
1879 | test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags))); | 1969 | test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags))); |
1880 | 1970 | ||
1881 | sh->reconstruct_state = reconstruct_state_prexor_drain_run; | 1971 | sh->reconstruct_state = reconstruct_state_prexor_drain_run; |
1882 | set_bit(STRIPE_OP_PREXOR, &s->ops_request); | 1972 | set_bit(STRIPE_OP_PREXOR, &s->ops_request); |
1883 | set_bit(STRIPE_OP_BIODRAIN, &s->ops_request); | 1973 | set_bit(STRIPE_OP_BIODRAIN, &s->ops_request); |
1884 | set_bit(STRIPE_OP_POSTXOR, &s->ops_request); | 1974 | set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request); |
1885 | 1975 | ||
1886 | for (i = disks; i--; ) { | 1976 | for (i = disks; i--; ) { |
1887 | struct r5dev *dev = &sh->dev[i]; | 1977 | struct r5dev *dev = &sh->dev[i]; |
@@ -1899,13 +1989,22 @@ schedule_reconstruction5(struct stripe_head *sh, struct stripe_head_state *s, | |||
1899 | } | 1989 | } |
1900 | } | 1990 | } |
1901 | 1991 | ||
1902 | /* keep the parity disk locked while asynchronous operations | 1992 | /* keep the parity disk(s) locked while asynchronous operations |
1903 | * are in flight | 1993 | * are in flight |
1904 | */ | 1994 | */ |
1905 | set_bit(R5_LOCKED, &sh->dev[pd_idx].flags); | 1995 | set_bit(R5_LOCKED, &sh->dev[pd_idx].flags); |
1906 | clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags); | 1996 | clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags); |
1907 | s->locked++; | 1997 | s->locked++; |
1908 | 1998 | ||
1999 | if (level == 6) { | ||
2000 | int qd_idx = sh->qd_idx; | ||
2001 | struct r5dev *dev = &sh->dev[qd_idx]; | ||
2002 | |||
2003 | set_bit(R5_LOCKED, &dev->flags); | ||
2004 | clear_bit(R5_UPTODATE, &dev->flags); | ||
2005 | s->locked++; | ||
2006 | } | ||
2007 | |||
1909 | pr_debug("%s: stripe %llu locked: %d ops_request: %lx\n", | 2008 | pr_debug("%s: stripe %llu locked: %d ops_request: %lx\n", |
1910 | __func__, (unsigned long long)sh->sector, | 2009 | __func__, (unsigned long long)sh->sector, |
1911 | s->locked, s->ops_request); | 2010 | s->locked, s->ops_request); |
@@ -1986,13 +2085,6 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in | |||
1986 | 2085 | ||
1987 | static void end_reshape(raid5_conf_t *conf); | 2086 | static void end_reshape(raid5_conf_t *conf); |
1988 | 2087 | ||
1989 | static int page_is_zero(struct page *p) | ||
1990 | { | ||
1991 | char *a = page_address(p); | ||
1992 | return ((*(u32*)a) == 0 && | ||
1993 | memcmp(a, a+4, STRIPE_SIZE-4)==0); | ||
1994 | } | ||
1995 | |||
1996 | static void stripe_set_idx(sector_t stripe, raid5_conf_t *conf, int previous, | 2088 | static void stripe_set_idx(sector_t stripe, raid5_conf_t *conf, int previous, |
1997 | struct stripe_head *sh) | 2089 | struct stripe_head *sh) |
1998 | { | 2090 | { |
@@ -2133,9 +2225,10 @@ static int fetch_block5(struct stripe_head *sh, struct stripe_head_state *s, | |||
2133 | set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request); | 2225 | set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request); |
2134 | set_bit(R5_Wantcompute, &dev->flags); | 2226 | set_bit(R5_Wantcompute, &dev->flags); |
2135 | sh->ops.target = disk_idx; | 2227 | sh->ops.target = disk_idx; |
2228 | sh->ops.target2 = -1; | ||
2136 | s->req_compute = 1; | 2229 | s->req_compute = 1; |
2137 | /* Careful: from this point on 'uptodate' is in the eye | 2230 | /* Careful: from this point on 'uptodate' is in the eye |
2138 | * of raid5_run_ops which services 'compute' operations | 2231 | * of raid_run_ops which services 'compute' operations |
2139 | * before writes. R5_Wantcompute flags a block that will | 2232 | * before writes. R5_Wantcompute flags a block that will |
2140 | * be R5_UPTODATE by the time it is needed for a | 2233 | * be R5_UPTODATE by the time it is needed for a |
2141 | * subsequent operation. | 2234 | * subsequent operation. |
@@ -2174,61 +2267,104 @@ static void handle_stripe_fill5(struct stripe_head *sh, | |||
2174 | set_bit(STRIPE_HANDLE, &sh->state); | 2267 | set_bit(STRIPE_HANDLE, &sh->state); |
2175 | } | 2268 | } |
2176 | 2269 | ||
2177 | static void handle_stripe_fill6(struct stripe_head *sh, | 2270 | /* fetch_block6 - checks the given member device to see if its data needs |
2178 | struct stripe_head_state *s, struct r6_state *r6s, | 2271 | * to be read or computed to satisfy a request. |
2179 | int disks) | 2272 | * |
2273 | * Returns 1 when no more member devices need to be checked, otherwise returns | ||
2274 | * 0 to tell the loop in handle_stripe_fill6 to continue | ||
2275 | */ | ||
2276 | static int fetch_block6(struct stripe_head *sh, struct stripe_head_state *s, | ||
2277 | struct r6_state *r6s, int disk_idx, int disks) | ||
2180 | { | 2278 | { |
2181 | int i; | 2279 | struct r5dev *dev = &sh->dev[disk_idx]; |
2182 | for (i = disks; i--; ) { | 2280 | struct r5dev *fdev[2] = { &sh->dev[r6s->failed_num[0]], |
2183 | struct r5dev *dev = &sh->dev[i]; | 2281 | &sh->dev[r6s->failed_num[1]] }; |
2184 | if (!test_bit(R5_LOCKED, &dev->flags) && | 2282 | |
2185 | !test_bit(R5_UPTODATE, &dev->flags) && | 2283 | if (!test_bit(R5_LOCKED, &dev->flags) && |
2186 | (dev->toread || (dev->towrite && | 2284 | !test_bit(R5_UPTODATE, &dev->flags) && |
2187 | !test_bit(R5_OVERWRITE, &dev->flags)) || | 2285 | (dev->toread || |
2188 | s->syncing || s->expanding || | 2286 | (dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags)) || |
2189 | (s->failed >= 1 && | 2287 | s->syncing || s->expanding || |
2190 | (sh->dev[r6s->failed_num[0]].toread || | 2288 | (s->failed >= 1 && |
2191 | s->to_write)) || | 2289 | (fdev[0]->toread || s->to_write)) || |
2192 | (s->failed >= 2 && | 2290 | (s->failed >= 2 && |
2193 | (sh->dev[r6s->failed_num[1]].toread || | 2291 | (fdev[1]->toread || s->to_write)))) { |
2194 | s->to_write)))) { | 2292 | /* we would like to get this block, possibly by computing it, |
2195 | /* we would like to get this block, possibly | 2293 | * otherwise read it if the backing disk is insync |
2196 | * by computing it, but we might not be able to | 2294 | */ |
2295 | BUG_ON(test_bit(R5_Wantcompute, &dev->flags)); | ||
2296 | BUG_ON(test_bit(R5_Wantread, &dev->flags)); | ||
2297 | if ((s->uptodate == disks - 1) && | ||
2298 | (s->failed && (disk_idx == r6s->failed_num[0] || | ||
2299 | disk_idx == r6s->failed_num[1]))) { | ||
2300 | /* have disk failed, and we're requested to fetch it; | ||
2301 | * do compute it | ||
2197 | */ | 2302 | */ |
2198 | if ((s->uptodate == disks - 1) && | 2303 | pr_debug("Computing stripe %llu block %d\n", |
2199 | (s->failed && (i == r6s->failed_num[0] || | 2304 | (unsigned long long)sh->sector, disk_idx); |
2200 | i == r6s->failed_num[1]))) { | 2305 | set_bit(STRIPE_COMPUTE_RUN, &sh->state); |
2201 | pr_debug("Computing stripe %llu block %d\n", | 2306 | set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request); |
2202 | (unsigned long long)sh->sector, i); | 2307 | set_bit(R5_Wantcompute, &dev->flags); |
2203 | compute_block_1(sh, i, 0); | 2308 | sh->ops.target = disk_idx; |
2204 | s->uptodate++; | 2309 | sh->ops.target2 = -1; /* no 2nd target */ |
2205 | } else if ( s->uptodate == disks-2 && s->failed >= 2 ) { | 2310 | s->req_compute = 1; |
2206 | /* Computing 2-failure is *very* expensive; only | 2311 | s->uptodate++; |
2207 | * do it if failed >= 2 | 2312 | return 1; |
2208 | */ | 2313 | } else if (s->uptodate == disks-2 && s->failed >= 2) { |
2209 | int other; | 2314 | /* Computing 2-failure is *very* expensive; only |
2210 | for (other = disks; other--; ) { | 2315 | * do it if failed >= 2 |
2211 | if (other == i) | 2316 | */ |
2212 | continue; | 2317 | int other; |
2213 | if (!test_bit(R5_UPTODATE, | 2318 | for (other = disks; other--; ) { |
2214 | &sh->dev[other].flags)) | 2319 | if (other == disk_idx) |
2215 | break; | 2320 | continue; |
2216 | } | 2321 | if (!test_bit(R5_UPTODATE, |
2217 | BUG_ON(other < 0); | 2322 | &sh->dev[other].flags)) |
2218 | pr_debug("Computing stripe %llu blocks %d,%d\n", | 2323 | break; |
2219 | (unsigned long long)sh->sector, | ||
2220 | i, other); | ||
2221 | compute_block_2(sh, i, other); | ||
2222 | s->uptodate += 2; | ||
2223 | } else if (test_bit(R5_Insync, &dev->flags)) { | ||
2224 | set_bit(R5_LOCKED, &dev->flags); | ||
2225 | set_bit(R5_Wantread, &dev->flags); | ||
2226 | s->locked++; | ||
2227 | pr_debug("Reading block %d (sync=%d)\n", | ||
2228 | i, s->syncing); | ||
2229 | } | 2324 | } |
2325 | BUG_ON(other < 0); | ||
2326 | pr_debug("Computing stripe %llu blocks %d,%d\n", | ||
2327 | (unsigned long long)sh->sector, | ||
2328 | disk_idx, other); | ||
2329 | set_bit(STRIPE_COMPUTE_RUN, &sh->state); | ||
2330 | set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request); | ||
2331 | set_bit(R5_Wantcompute, &sh->dev[disk_idx].flags); | ||
2332 | set_bit(R5_Wantcompute, &sh->dev[other].flags); | ||
2333 | sh->ops.target = disk_idx; | ||
2334 | sh->ops.target2 = other; | ||
2335 | s->uptodate += 2; | ||
2336 | s->req_compute = 1; | ||
2337 | return 1; | ||
2338 | } else if (test_bit(R5_Insync, &dev->flags)) { | ||
2339 | set_bit(R5_LOCKED, &dev->flags); | ||
2340 | set_bit(R5_Wantread, &dev->flags); | ||
2341 | s->locked++; | ||
2342 | pr_debug("Reading block %d (sync=%d)\n", | ||
2343 | disk_idx, s->syncing); | ||
2230 | } | 2344 | } |
2231 | } | 2345 | } |
2346 | |||
2347 | return 0; | ||
2348 | } | ||
2349 | |||
2350 | /** | ||
2351 | * handle_stripe_fill6 - read or compute data to satisfy pending requests. | ||
2352 | */ | ||
2353 | static void handle_stripe_fill6(struct stripe_head *sh, | ||
2354 | struct stripe_head_state *s, struct r6_state *r6s, | ||
2355 | int disks) | ||
2356 | { | ||
2357 | int i; | ||
2358 | |||
2359 | /* look for blocks to read/compute, skip this if a compute | ||
2360 | * is already in flight, or if the stripe contents are in the | ||
2361 | * midst of changing due to a write | ||
2362 | */ | ||
2363 | if (!test_bit(STRIPE_COMPUTE_RUN, &sh->state) && !sh->check_state && | ||
2364 | !sh->reconstruct_state) | ||
2365 | for (i = disks; i--; ) | ||
2366 | if (fetch_block6(sh, s, r6s, i, disks)) | ||
2367 | break; | ||
2232 | set_bit(STRIPE_HANDLE, &sh->state); | 2368 | set_bit(STRIPE_HANDLE, &sh->state); |
2233 | } | 2369 | } |
2234 | 2370 | ||
@@ -2362,114 +2498,61 @@ static void handle_stripe_dirtying5(raid5_conf_t *conf, | |||
2362 | */ | 2498 | */ |
2363 | /* since handle_stripe can be called at any time we need to handle the | 2499 | /* since handle_stripe can be called at any time we need to handle the |
2364 | * case where a compute block operation has been submitted and then a | 2500 | * case where a compute block operation has been submitted and then a |
2365 | * subsequent call wants to start a write request. raid5_run_ops only | 2501 | * subsequent call wants to start a write request. raid_run_ops only |
2366 | * handles the case where compute block and postxor are requested | 2502 | * handles the case where compute block and reconstruct are requested |
2367 | * simultaneously. If this is not the case then new writes need to be | 2503 | * simultaneously. If this is not the case then new writes need to be |
2368 | * held off until the compute completes. | 2504 | * held off until the compute completes. |
2369 | */ | 2505 | */ |
2370 | if ((s->req_compute || !test_bit(STRIPE_COMPUTE_RUN, &sh->state)) && | 2506 | if ((s->req_compute || !test_bit(STRIPE_COMPUTE_RUN, &sh->state)) && |
2371 | (s->locked == 0 && (rcw == 0 || rmw == 0) && | 2507 | (s->locked == 0 && (rcw == 0 || rmw == 0) && |
2372 | !test_bit(STRIPE_BIT_DELAY, &sh->state))) | 2508 | !test_bit(STRIPE_BIT_DELAY, &sh->state))) |
2373 | schedule_reconstruction5(sh, s, rcw == 0, 0); | 2509 | schedule_reconstruction(sh, s, rcw == 0, 0); |
2374 | } | 2510 | } |
2375 | 2511 | ||
2376 | static void handle_stripe_dirtying6(raid5_conf_t *conf, | 2512 | static void handle_stripe_dirtying6(raid5_conf_t *conf, |
2377 | struct stripe_head *sh, struct stripe_head_state *s, | 2513 | struct stripe_head *sh, struct stripe_head_state *s, |
2378 | struct r6_state *r6s, int disks) | 2514 | struct r6_state *r6s, int disks) |
2379 | { | 2515 | { |
2380 | int rcw = 0, must_compute = 0, pd_idx = sh->pd_idx, i; | 2516 | int rcw = 0, pd_idx = sh->pd_idx, i; |
2381 | int qd_idx = sh->qd_idx; | 2517 | int qd_idx = sh->qd_idx; |
2518 | |||
2519 | set_bit(STRIPE_HANDLE, &sh->state); | ||
2382 | for (i = disks; i--; ) { | 2520 | for (i = disks; i--; ) { |
2383 | struct r5dev *dev = &sh->dev[i]; | 2521 | struct r5dev *dev = &sh->dev[i]; |
2384 | /* Would I have to read this buffer for reconstruct_write */ | 2522 | /* check if we haven't enough data */ |
2385 | if (!test_bit(R5_OVERWRITE, &dev->flags) | 2523 | if (!test_bit(R5_OVERWRITE, &dev->flags) && |
2386 | && i != pd_idx && i != qd_idx | 2524 | i != pd_idx && i != qd_idx && |
2387 | && (!test_bit(R5_LOCKED, &dev->flags) | 2525 | !test_bit(R5_LOCKED, &dev->flags) && |
2388 | ) && | 2526 | !(test_bit(R5_UPTODATE, &dev->flags) || |
2389 | !test_bit(R5_UPTODATE, &dev->flags)) { | 2527 | test_bit(R5_Wantcompute, &dev->flags))) { |
2390 | if (test_bit(R5_Insync, &dev->flags)) rcw++; | 2528 | rcw++; |
2391 | else { | 2529 | if (!test_bit(R5_Insync, &dev->flags)) |
2392 | pr_debug("raid6: must_compute: " | 2530 | continue; /* it's a failed drive */ |
2393 | "disk %d flags=%#lx\n", i, dev->flags); | 2531 | |
2394 | must_compute++; | 2532 | if ( |
2533 | test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { | ||
2534 | pr_debug("Read_old stripe %llu " | ||
2535 | "block %d for Reconstruct\n", | ||
2536 | (unsigned long long)sh->sector, i); | ||
2537 | set_bit(R5_LOCKED, &dev->flags); | ||
2538 | set_bit(R5_Wantread, &dev->flags); | ||
2539 | s->locked++; | ||
2540 | } else { | ||
2541 | pr_debug("Request delayed stripe %llu " | ||
2542 | "block %d for Reconstruct\n", | ||
2543 | (unsigned long long)sh->sector, i); | ||
2544 | set_bit(STRIPE_DELAYED, &sh->state); | ||
2545 | set_bit(STRIPE_HANDLE, &sh->state); | ||
2395 | } | 2546 | } |
2396 | } | 2547 | } |
2397 | } | 2548 | } |
2398 | pr_debug("for sector %llu, rcw=%d, must_compute=%d\n", | ||
2399 | (unsigned long long)sh->sector, rcw, must_compute); | ||
2400 | set_bit(STRIPE_HANDLE, &sh->state); | ||
2401 | |||
2402 | if (rcw > 0) | ||
2403 | /* want reconstruct write, but need to get some data */ | ||
2404 | for (i = disks; i--; ) { | ||
2405 | struct r5dev *dev = &sh->dev[i]; | ||
2406 | if (!test_bit(R5_OVERWRITE, &dev->flags) | ||
2407 | && !(s->failed == 0 && (i == pd_idx || i == qd_idx)) | ||
2408 | && !test_bit(R5_LOCKED, &dev->flags) && | ||
2409 | !test_bit(R5_UPTODATE, &dev->flags) && | ||
2410 | test_bit(R5_Insync, &dev->flags)) { | ||
2411 | if ( | ||
2412 | test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { | ||
2413 | pr_debug("Read_old stripe %llu " | ||
2414 | "block %d for Reconstruct\n", | ||
2415 | (unsigned long long)sh->sector, i); | ||
2416 | set_bit(R5_LOCKED, &dev->flags); | ||
2417 | set_bit(R5_Wantread, &dev->flags); | ||
2418 | s->locked++; | ||
2419 | } else { | ||
2420 | pr_debug("Request delayed stripe %llu " | ||
2421 | "block %d for Reconstruct\n", | ||
2422 | (unsigned long long)sh->sector, i); | ||
2423 | set_bit(STRIPE_DELAYED, &sh->state); | ||
2424 | set_bit(STRIPE_HANDLE, &sh->state); | ||
2425 | } | ||
2426 | } | ||
2427 | } | ||
2428 | /* now if nothing is locked, and if we have enough data, we can start a | 2549 | /* now if nothing is locked, and if we have enough data, we can start a |
2429 | * write request | 2550 | * write request |
2430 | */ | 2551 | */ |
2431 | if (s->locked == 0 && rcw == 0 && | 2552 | if ((s->req_compute || !test_bit(STRIPE_COMPUTE_RUN, &sh->state)) && |
2553 | s->locked == 0 && rcw == 0 && | ||
2432 | !test_bit(STRIPE_BIT_DELAY, &sh->state)) { | 2554 | !test_bit(STRIPE_BIT_DELAY, &sh->state)) { |
2433 | if (must_compute > 0) { | 2555 | schedule_reconstruction(sh, s, 1, 0); |
2434 | /* We have failed blocks and need to compute them */ | ||
2435 | switch (s->failed) { | ||
2436 | case 0: | ||
2437 | BUG(); | ||
2438 | case 1: | ||
2439 | compute_block_1(sh, r6s->failed_num[0], 0); | ||
2440 | break; | ||
2441 | case 2: | ||
2442 | compute_block_2(sh, r6s->failed_num[0], | ||
2443 | r6s->failed_num[1]); | ||
2444 | break; | ||
2445 | default: /* This request should have been failed? */ | ||
2446 | BUG(); | ||
2447 | } | ||
2448 | } | ||
2449 | |||
2450 | pr_debug("Computing parity for stripe %llu\n", | ||
2451 | (unsigned long long)sh->sector); | ||
2452 | compute_parity6(sh, RECONSTRUCT_WRITE); | ||
2453 | /* now every locked buffer is ready to be written */ | ||
2454 | for (i = disks; i--; ) | ||
2455 | if (test_bit(R5_LOCKED, &sh->dev[i].flags)) { | ||
2456 | pr_debug("Writing stripe %llu block %d\n", | ||
2457 | (unsigned long long)sh->sector, i); | ||
2458 | s->locked++; | ||
2459 | set_bit(R5_Wantwrite, &sh->dev[i].flags); | ||
2460 | } | ||
2461 | if (s->locked == disks) | ||
2462 | if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state)) | ||
2463 | atomic_inc(&conf->pending_full_writes); | ||
2464 | /* after a RECONSTRUCT_WRITE, the stripe MUST be in-sync */ | ||
2465 | set_bit(STRIPE_INSYNC, &sh->state); | ||
2466 | |||
2467 | if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { | ||
2468 | atomic_dec(&conf->preread_active_stripes); | ||
2469 | if (atomic_read(&conf->preread_active_stripes) < | ||
2470 | IO_THRESHOLD) | ||
2471 | md_wakeup_thread(conf->mddev->thread); | ||
2472 | } | ||
2473 | } | 2556 | } |
2474 | } | 2557 | } |
2475 | 2558 | ||
@@ -2528,7 +2611,7 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh, | |||
2528 | * we are done. Otherwise update the mismatch count and repair | 2611 | * we are done. Otherwise update the mismatch count and repair |
2529 | * parity if !MD_RECOVERY_CHECK | 2612 | * parity if !MD_RECOVERY_CHECK |
2530 | */ | 2613 | */ |
2531 | if (sh->ops.zero_sum_result == 0) | 2614 | if ((sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) == 0) |
2532 | /* parity is correct (on disc, | 2615 | /* parity is correct (on disc, |
2533 | * not in buffer any more) | 2616 | * not in buffer any more) |
2534 | */ | 2617 | */ |
@@ -2545,6 +2628,7 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh, | |||
2545 | set_bit(R5_Wantcompute, | 2628 | set_bit(R5_Wantcompute, |
2546 | &sh->dev[sh->pd_idx].flags); | 2629 | &sh->dev[sh->pd_idx].flags); |
2547 | sh->ops.target = sh->pd_idx; | 2630 | sh->ops.target = sh->pd_idx; |
2631 | sh->ops.target2 = -1; | ||
2548 | s->uptodate++; | 2632 | s->uptodate++; |
2549 | } | 2633 | } |
2550 | } | 2634 | } |
@@ -2561,67 +2645,74 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh, | |||
2561 | 2645 | ||
2562 | 2646 | ||
2563 | static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh, | 2647 | static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh, |
2564 | struct stripe_head_state *s, | 2648 | struct stripe_head_state *s, |
2565 | struct r6_state *r6s, struct page *tmp_page, | 2649 | struct r6_state *r6s, int disks) |
2566 | int disks) | ||
2567 | { | 2650 | { |
2568 | int update_p = 0, update_q = 0; | ||
2569 | struct r5dev *dev; | ||
2570 | int pd_idx = sh->pd_idx; | 2651 | int pd_idx = sh->pd_idx; |
2571 | int qd_idx = sh->qd_idx; | 2652 | int qd_idx = sh->qd_idx; |
2653 | struct r5dev *dev; | ||
2572 | 2654 | ||
2573 | set_bit(STRIPE_HANDLE, &sh->state); | 2655 | set_bit(STRIPE_HANDLE, &sh->state); |
2574 | 2656 | ||
2575 | BUG_ON(s->failed > 2); | 2657 | BUG_ON(s->failed > 2); |
2576 | BUG_ON(s->uptodate < disks); | 2658 | |
2577 | /* Want to check and possibly repair P and Q. | 2659 | /* Want to check and possibly repair P and Q. |
2578 | * However there could be one 'failed' device, in which | 2660 | * However there could be one 'failed' device, in which |
2579 | * case we can only check one of them, possibly using the | 2661 | * case we can only check one of them, possibly using the |
2580 | * other to generate missing data | 2662 | * other to generate missing data |
2581 | */ | 2663 | */ |
2582 | 2664 | ||
2583 | /* If !tmp_page, we cannot do the calculations, | 2665 | switch (sh->check_state) { |
2584 | * but as we have set STRIPE_HANDLE, we will soon be called | 2666 | case check_state_idle: |
2585 | * by stripe_handle with a tmp_page - just wait until then. | 2667 | /* start a new check operation if there are < 2 failures */ |
2586 | */ | ||
2587 | if (tmp_page) { | ||
2588 | if (s->failed == r6s->q_failed) { | 2668 | if (s->failed == r6s->q_failed) { |
2589 | /* The only possible failed device holds 'Q', so it | 2669 | /* The only possible failed device holds Q, so it |
2590 | * makes sense to check P (If anything else were failed, | 2670 | * makes sense to check P (If anything else were failed, |
2591 | * we would have used P to recreate it). | 2671 | * we would have used P to recreate it). |
2592 | */ | 2672 | */ |
2593 | compute_block_1(sh, pd_idx, 1); | 2673 | sh->check_state = check_state_run; |
2594 | if (!page_is_zero(sh->dev[pd_idx].page)) { | ||
2595 | compute_block_1(sh, pd_idx, 0); | ||
2596 | update_p = 1; | ||
2597 | } | ||
2598 | } | 2674 | } |
2599 | if (!r6s->q_failed && s->failed < 2) { | 2675 | if (!r6s->q_failed && s->failed < 2) { |
2600 | /* q is not failed, and we didn't use it to generate | 2676 | /* Q is not failed, and we didn't use it to generate |
2601 | * anything, so it makes sense to check it | 2677 | * anything, so it makes sense to check it |
2602 | */ | 2678 | */ |
2603 | memcpy(page_address(tmp_page), | 2679 | if (sh->check_state == check_state_run) |
2604 | page_address(sh->dev[qd_idx].page), | 2680 | sh->check_state = check_state_run_pq; |
2605 | STRIPE_SIZE); | 2681 | else |
2606 | compute_parity6(sh, UPDATE_PARITY); | 2682 | sh->check_state = check_state_run_q; |
2607 | if (memcmp(page_address(tmp_page), | ||
2608 | page_address(sh->dev[qd_idx].page), | ||
2609 | STRIPE_SIZE) != 0) { | ||
2610 | clear_bit(STRIPE_INSYNC, &sh->state); | ||
2611 | update_q = 1; | ||
2612 | } | ||
2613 | } | 2683 | } |
2614 | if (update_p || update_q) { | 2684 | |
2615 | conf->mddev->resync_mismatches += STRIPE_SECTORS; | 2685 | /* discard potentially stale zero_sum_result */ |
2616 | if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery)) | 2686 | sh->ops.zero_sum_result = 0; |
2617 | /* don't try to repair!! */ | 2687 | |
2618 | update_p = update_q = 0; | 2688 | if (sh->check_state == check_state_run) { |
2689 | /* async_xor_zero_sum destroys the contents of P */ | ||
2690 | clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags); | ||
2691 | s->uptodate--; | ||
2619 | } | 2692 | } |
2693 | if (sh->check_state >= check_state_run && | ||
2694 | sh->check_state <= check_state_run_pq) { | ||
2695 | /* async_syndrome_zero_sum preserves P and Q, so | ||
2696 | * no need to mark them !uptodate here | ||
2697 | */ | ||
2698 | set_bit(STRIPE_OP_CHECK, &s->ops_request); | ||
2699 | break; | ||
2700 | } | ||
2701 | |||
2702 | /* we have 2-disk failure */ | ||
2703 | BUG_ON(s->failed != 2); | ||
2704 | /* fall through */ | ||
2705 | case check_state_compute_result: | ||
2706 | sh->check_state = check_state_idle; | ||
2707 | |||
2708 | /* check that a write has not made the stripe insync */ | ||
2709 | if (test_bit(STRIPE_INSYNC, &sh->state)) | ||
2710 | break; | ||
2620 | 2711 | ||
2621 | /* now write out any block on a failed drive, | 2712 | /* now write out any block on a failed drive, |
2622 | * or P or Q if they need it | 2713 | * or P or Q if they were recomputed |
2623 | */ | 2714 | */ |
2624 | 2715 | BUG_ON(s->uptodate < disks - 1); /* We don't need Q to recover */ | |
2625 | if (s->failed == 2) { | 2716 | if (s->failed == 2) { |
2626 | dev = &sh->dev[r6s->failed_num[1]]; | 2717 | dev = &sh->dev[r6s->failed_num[1]]; |
2627 | s->locked++; | 2718 | s->locked++; |
@@ -2634,14 +2725,13 @@ static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh, | |||
2634 | set_bit(R5_LOCKED, &dev->flags); | 2725 | set_bit(R5_LOCKED, &dev->flags); |
2635 | set_bit(R5_Wantwrite, &dev->flags); | 2726 | set_bit(R5_Wantwrite, &dev->flags); |
2636 | } | 2727 | } |
2637 | 2728 | if (sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) { | |
2638 | if (update_p) { | ||
2639 | dev = &sh->dev[pd_idx]; | 2729 | dev = &sh->dev[pd_idx]; |
2640 | s->locked++; | 2730 | s->locked++; |
2641 | set_bit(R5_LOCKED, &dev->flags); | 2731 | set_bit(R5_LOCKED, &dev->flags); |
2642 | set_bit(R5_Wantwrite, &dev->flags); | 2732 | set_bit(R5_Wantwrite, &dev->flags); |
2643 | } | 2733 | } |
2644 | if (update_q) { | 2734 | if (sh->ops.zero_sum_result & SUM_CHECK_Q_RESULT) { |
2645 | dev = &sh->dev[qd_idx]; | 2735 | dev = &sh->dev[qd_idx]; |
2646 | s->locked++; | 2736 | s->locked++; |
2647 | set_bit(R5_LOCKED, &dev->flags); | 2737 | set_bit(R5_LOCKED, &dev->flags); |
@@ -2650,6 +2740,70 @@ static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh, | |||
2650 | clear_bit(STRIPE_DEGRADED, &sh->state); | 2740 | clear_bit(STRIPE_DEGRADED, &sh->state); |
2651 | 2741 | ||
2652 | set_bit(STRIPE_INSYNC, &sh->state); | 2742 | set_bit(STRIPE_INSYNC, &sh->state); |
2743 | break; | ||
2744 | case check_state_run: | ||
2745 | case check_state_run_q: | ||
2746 | case check_state_run_pq: | ||
2747 | break; /* we will be called again upon completion */ | ||
2748 | case check_state_check_result: | ||
2749 | sh->check_state = check_state_idle; | ||
2750 | |||
2751 | /* handle a successful check operation, if parity is correct | ||
2752 | * we are done. Otherwise update the mismatch count and repair | ||
2753 | * parity if !MD_RECOVERY_CHECK | ||
2754 | */ | ||
2755 | if (sh->ops.zero_sum_result == 0) { | ||
2756 | /* both parities are correct */ | ||
2757 | if (!s->failed) | ||
2758 | set_bit(STRIPE_INSYNC, &sh->state); | ||
2759 | else { | ||
2760 | /* in contrast to the raid5 case we can validate | ||
2761 | * parity, but still have a failure to write | ||
2762 | * back | ||
2763 | */ | ||
2764 | sh->check_state = check_state_compute_result; | ||
2765 | /* Returning at this point means that we may go | ||
2766 | * off and bring p and/or q uptodate again so | ||
2767 | * we make sure to check zero_sum_result again | ||
2768 | * to verify if p or q need writeback | ||
2769 | */ | ||
2770 | } | ||
2771 | } else { | ||
2772 | conf->mddev->resync_mismatches += STRIPE_SECTORS; | ||
2773 | if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery)) | ||
2774 | /* don't try to repair!! */ | ||
2775 | set_bit(STRIPE_INSYNC, &sh->state); | ||
2776 | else { | ||
2777 | int *target = &sh->ops.target; | ||
2778 | |||
2779 | sh->ops.target = -1; | ||
2780 | sh->ops.target2 = -1; | ||
2781 | sh->check_state = check_state_compute_run; | ||
2782 | set_bit(STRIPE_COMPUTE_RUN, &sh->state); | ||
2783 | set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request); | ||
2784 | if (sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) { | ||
2785 | set_bit(R5_Wantcompute, | ||
2786 | &sh->dev[pd_idx].flags); | ||
2787 | *target = pd_idx; | ||
2788 | target = &sh->ops.target2; | ||
2789 | s->uptodate++; | ||
2790 | } | ||
2791 | if (sh->ops.zero_sum_result & SUM_CHECK_Q_RESULT) { | ||
2792 | set_bit(R5_Wantcompute, | ||
2793 | &sh->dev[qd_idx].flags); | ||
2794 | *target = qd_idx; | ||
2795 | s->uptodate++; | ||
2796 | } | ||
2797 | } | ||
2798 | } | ||
2799 | break; | ||
2800 | case check_state_compute_run: | ||
2801 | break; | ||
2802 | default: | ||
2803 | printk(KERN_ERR "%s: unknown check_state: %d sector: %llu\n", | ||
2804 | __func__, sh->check_state, | ||
2805 | (unsigned long long) sh->sector); | ||
2806 | BUG(); | ||
2653 | } | 2807 | } |
2654 | } | 2808 | } |
2655 | 2809 | ||
@@ -2667,6 +2821,7 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh, | |||
2667 | if (i != sh->pd_idx && i != sh->qd_idx) { | 2821 | if (i != sh->pd_idx && i != sh->qd_idx) { |
2668 | int dd_idx, j; | 2822 | int dd_idx, j; |
2669 | struct stripe_head *sh2; | 2823 | struct stripe_head *sh2; |
2824 | struct async_submit_ctl submit; | ||
2670 | 2825 | ||
2671 | sector_t bn = compute_blocknr(sh, i, 1); | 2826 | sector_t bn = compute_blocknr(sh, i, 1); |
2672 | sector_t s = raid5_compute_sector(conf, bn, 0, | 2827 | sector_t s = raid5_compute_sector(conf, bn, 0, |
@@ -2686,9 +2841,10 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh, | |||
2686 | } | 2841 | } |
2687 | 2842 | ||
2688 | /* place all the copies on one channel */ | 2843 | /* place all the copies on one channel */ |
2844 | init_async_submit(&submit, 0, tx, NULL, NULL, NULL); | ||
2689 | tx = async_memcpy(sh2->dev[dd_idx].page, | 2845 | tx = async_memcpy(sh2->dev[dd_idx].page, |
2690 | sh->dev[i].page, 0, 0, STRIPE_SIZE, | 2846 | sh->dev[i].page, 0, 0, STRIPE_SIZE, |
2691 | ASYNC_TX_DEP_ACK, tx, NULL, NULL); | 2847 | &submit); |
2692 | 2848 | ||
2693 | set_bit(R5_Expanded, &sh2->dev[dd_idx].flags); | 2849 | set_bit(R5_Expanded, &sh2->dev[dd_idx].flags); |
2694 | set_bit(R5_UPTODATE, &sh2->dev[dd_idx].flags); | 2850 | set_bit(R5_UPTODATE, &sh2->dev[dd_idx].flags); |
@@ -2974,7 +3130,7 @@ static bool handle_stripe5(struct stripe_head *sh) | |||
2974 | /* Need to write out all blocks after computing parity */ | 3130 | /* Need to write out all blocks after computing parity */ |
2975 | sh->disks = conf->raid_disks; | 3131 | sh->disks = conf->raid_disks; |
2976 | stripe_set_idx(sh->sector, conf, 0, sh); | 3132 | stripe_set_idx(sh->sector, conf, 0, sh); |
2977 | schedule_reconstruction5(sh, &s, 1, 1); | 3133 | schedule_reconstruction(sh, &s, 1, 1); |
2978 | } else if (s.expanded && !sh->reconstruct_state && s.locked == 0) { | 3134 | } else if (s.expanded && !sh->reconstruct_state && s.locked == 0) { |
2979 | clear_bit(STRIPE_EXPAND_READY, &sh->state); | 3135 | clear_bit(STRIPE_EXPAND_READY, &sh->state); |
2980 | atomic_dec(&conf->reshape_stripes); | 3136 | atomic_dec(&conf->reshape_stripes); |
@@ -2994,7 +3150,7 @@ static bool handle_stripe5(struct stripe_head *sh) | |||
2994 | md_wait_for_blocked_rdev(blocked_rdev, conf->mddev); | 3150 | md_wait_for_blocked_rdev(blocked_rdev, conf->mddev); |
2995 | 3151 | ||
2996 | if (s.ops_request) | 3152 | if (s.ops_request) |
2997 | raid5_run_ops(sh, s.ops_request); | 3153 | raid_run_ops(sh, s.ops_request); |
2998 | 3154 | ||
2999 | ops_run_io(sh, &s); | 3155 | ops_run_io(sh, &s); |
3000 | 3156 | ||
@@ -3003,7 +3159,7 @@ static bool handle_stripe5(struct stripe_head *sh) | |||
3003 | return blocked_rdev == NULL; | 3159 | return blocked_rdev == NULL; |
3004 | } | 3160 | } |
3005 | 3161 | ||
3006 | static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page) | 3162 | static bool handle_stripe6(struct stripe_head *sh) |
3007 | { | 3163 | { |
3008 | raid5_conf_t *conf = sh->raid_conf; | 3164 | raid5_conf_t *conf = sh->raid_conf; |
3009 | int disks = sh->disks; | 3165 | int disks = sh->disks; |
@@ -3015,9 +3171,10 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page) | |||
3015 | mdk_rdev_t *blocked_rdev = NULL; | 3171 | mdk_rdev_t *blocked_rdev = NULL; |
3016 | 3172 | ||
3017 | pr_debug("handling stripe %llu, state=%#lx cnt=%d, " | 3173 | pr_debug("handling stripe %llu, state=%#lx cnt=%d, " |
3018 | "pd_idx=%d, qd_idx=%d\n", | 3174 | "pd_idx=%d, qd_idx=%d\n, check:%d, reconstruct:%d\n", |
3019 | (unsigned long long)sh->sector, sh->state, | 3175 | (unsigned long long)sh->sector, sh->state, |
3020 | atomic_read(&sh->count), pd_idx, qd_idx); | 3176 | atomic_read(&sh->count), pd_idx, qd_idx, |
3177 | sh->check_state, sh->reconstruct_state); | ||
3021 | memset(&s, 0, sizeof(s)); | 3178 | memset(&s, 0, sizeof(s)); |
3022 | 3179 | ||
3023 | spin_lock(&sh->lock); | 3180 | spin_lock(&sh->lock); |
@@ -3037,35 +3194,24 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page) | |||
3037 | 3194 | ||
3038 | pr_debug("check %d: state 0x%lx read %p write %p written %p\n", | 3195 | pr_debug("check %d: state 0x%lx read %p write %p written %p\n", |
3039 | i, dev->flags, dev->toread, dev->towrite, dev->written); | 3196 | i, dev->flags, dev->toread, dev->towrite, dev->written); |
3040 | /* maybe we can reply to a read */ | 3197 | /* maybe we can reply to a read |
3041 | if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread) { | 3198 | * |
3042 | struct bio *rbi, *rbi2; | 3199 | * new wantfill requests are only permitted while |
3043 | pr_debug("Return read for disc %d\n", i); | 3200 | * ops_complete_biofill is guaranteed to be inactive |
3044 | spin_lock_irq(&conf->device_lock); | 3201 | */ |
3045 | rbi = dev->toread; | 3202 | if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread && |
3046 | dev->toread = NULL; | 3203 | !test_bit(STRIPE_BIOFILL_RUN, &sh->state)) |
3047 | if (test_and_clear_bit(R5_Overlap, &dev->flags)) | 3204 | set_bit(R5_Wantfill, &dev->flags); |
3048 | wake_up(&conf->wait_for_overlap); | ||
3049 | spin_unlock_irq(&conf->device_lock); | ||
3050 | while (rbi && rbi->bi_sector < dev->sector + STRIPE_SECTORS) { | ||
3051 | copy_data(0, rbi, dev->page, dev->sector); | ||
3052 | rbi2 = r5_next_bio(rbi, dev->sector); | ||
3053 | spin_lock_irq(&conf->device_lock); | ||
3054 | if (!raid5_dec_bi_phys_segments(rbi)) { | ||
3055 | rbi->bi_next = return_bi; | ||
3056 | return_bi = rbi; | ||
3057 | } | ||
3058 | spin_unlock_irq(&conf->device_lock); | ||
3059 | rbi = rbi2; | ||
3060 | } | ||
3061 | } | ||
3062 | 3205 | ||
3063 | /* now count some things */ | 3206 | /* now count some things */ |
3064 | if (test_bit(R5_LOCKED, &dev->flags)) s.locked++; | 3207 | if (test_bit(R5_LOCKED, &dev->flags)) s.locked++; |
3065 | if (test_bit(R5_UPTODATE, &dev->flags)) s.uptodate++; | 3208 | if (test_bit(R5_UPTODATE, &dev->flags)) s.uptodate++; |
3209 | if (test_bit(R5_Wantcompute, &dev->flags)) | ||
3210 | BUG_ON(++s.compute > 2); | ||
3066 | 3211 | ||
3067 | 3212 | if (test_bit(R5_Wantfill, &dev->flags)) { | |
3068 | if (dev->toread) | 3213 | s.to_fill++; |
3214 | } else if (dev->toread) | ||
3069 | s.to_read++; | 3215 | s.to_read++; |
3070 | if (dev->towrite) { | 3216 | if (dev->towrite) { |
3071 | s.to_write++; | 3217 | s.to_write++; |
@@ -3106,6 +3252,11 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page) | |||
3106 | blocked_rdev = NULL; | 3252 | blocked_rdev = NULL; |
3107 | } | 3253 | } |
3108 | 3254 | ||
3255 | if (s.to_fill && !test_bit(STRIPE_BIOFILL_RUN, &sh->state)) { | ||
3256 | set_bit(STRIPE_OP_BIOFILL, &s.ops_request); | ||
3257 | set_bit(STRIPE_BIOFILL_RUN, &sh->state); | ||
3258 | } | ||
3259 | |||
3109 | pr_debug("locked=%d uptodate=%d to_read=%d" | 3260 | pr_debug("locked=%d uptodate=%d to_read=%d" |
3110 | " to_write=%d failed=%d failed_num=%d,%d\n", | 3261 | " to_write=%d failed=%d failed_num=%d,%d\n", |
3111 | s.locked, s.uptodate, s.to_read, s.to_write, s.failed, | 3262 | s.locked, s.uptodate, s.to_read, s.to_write, s.failed, |
@@ -3146,19 +3297,62 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page) | |||
3146 | * or to load a block that is being partially written. | 3297 | * or to load a block that is being partially written. |
3147 | */ | 3298 | */ |
3148 | if (s.to_read || s.non_overwrite || (s.to_write && s.failed) || | 3299 | if (s.to_read || s.non_overwrite || (s.to_write && s.failed) || |
3149 | (s.syncing && (s.uptodate < disks)) || s.expanding) | 3300 | (s.syncing && (s.uptodate + s.compute < disks)) || s.expanding) |
3150 | handle_stripe_fill6(sh, &s, &r6s, disks); | 3301 | handle_stripe_fill6(sh, &s, &r6s, disks); |
3151 | 3302 | ||
3152 | /* now to consider writing and what else, if anything should be read */ | 3303 | /* Now we check to see if any write operations have recently |
3153 | if (s.to_write) | 3304 | * completed |
3305 | */ | ||
3306 | if (sh->reconstruct_state == reconstruct_state_drain_result) { | ||
3307 | int qd_idx = sh->qd_idx; | ||
3308 | |||
3309 | sh->reconstruct_state = reconstruct_state_idle; | ||
3310 | /* All the 'written' buffers and the parity blocks are ready to | ||
3311 | * be written back to disk | ||
3312 | */ | ||
3313 | BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags)); | ||
3314 | BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[qd_idx].flags)); | ||
3315 | for (i = disks; i--; ) { | ||
3316 | dev = &sh->dev[i]; | ||
3317 | if (test_bit(R5_LOCKED, &dev->flags) && | ||
3318 | (i == sh->pd_idx || i == qd_idx || | ||
3319 | dev->written)) { | ||
3320 | pr_debug("Writing block %d\n", i); | ||
3321 | BUG_ON(!test_bit(R5_UPTODATE, &dev->flags)); | ||
3322 | set_bit(R5_Wantwrite, &dev->flags); | ||
3323 | if (!test_bit(R5_Insync, &dev->flags) || | ||
3324 | ((i == sh->pd_idx || i == qd_idx) && | ||
3325 | s.failed == 0)) | ||
3326 | set_bit(STRIPE_INSYNC, &sh->state); | ||
3327 | } | ||
3328 | } | ||
3329 | if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { | ||
3330 | atomic_dec(&conf->preread_active_stripes); | ||
3331 | if (atomic_read(&conf->preread_active_stripes) < | ||
3332 | IO_THRESHOLD) | ||
3333 | md_wakeup_thread(conf->mddev->thread); | ||
3334 | } | ||
3335 | } | ||
3336 | |||
3337 | /* Now to consider new write requests and what else, if anything | ||
3338 | * should be read. We do not handle new writes when: | ||
3339 | * 1/ A 'write' operation (copy+gen_syndrome) is already in flight. | ||
3340 | * 2/ A 'check' operation is in flight, as it may clobber the parity | ||
3341 | * block. | ||
3342 | */ | ||
3343 | if (s.to_write && !sh->reconstruct_state && !sh->check_state) | ||
3154 | handle_stripe_dirtying6(conf, sh, &s, &r6s, disks); | 3344 | handle_stripe_dirtying6(conf, sh, &s, &r6s, disks); |
3155 | 3345 | ||
3156 | /* maybe we need to check and possibly fix the parity for this stripe | 3346 | /* maybe we need to check and possibly fix the parity for this stripe |
3157 | * Any reads will already have been scheduled, so we just see if enough | 3347 | * Any reads will already have been scheduled, so we just see if enough |
3158 | * data is available | 3348 | * data is available. The parity check is held off while parity |
3349 | * dependent operations are in flight. | ||
3159 | */ | 3350 | */ |
3160 | if (s.syncing && s.locked == 0 && !test_bit(STRIPE_INSYNC, &sh->state)) | 3351 | if (sh->check_state || |
3161 | handle_parity_checks6(conf, sh, &s, &r6s, tmp_page, disks); | 3352 | (s.syncing && s.locked == 0 && |
3353 | !test_bit(STRIPE_COMPUTE_RUN, &sh->state) && | ||
3354 | !test_bit(STRIPE_INSYNC, &sh->state))) | ||
3355 | handle_parity_checks6(conf, sh, &s, &r6s, disks); | ||
3162 | 3356 | ||
3163 | if (s.syncing && s.locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) { | 3357 | if (s.syncing && s.locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) { |
3164 | md_done_sync(conf->mddev, STRIPE_SECTORS,1); | 3358 | md_done_sync(conf->mddev, STRIPE_SECTORS,1); |
@@ -3179,15 +3373,29 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page) | |||
3179 | set_bit(R5_Wantwrite, &dev->flags); | 3373 | set_bit(R5_Wantwrite, &dev->flags); |
3180 | set_bit(R5_ReWrite, &dev->flags); | 3374 | set_bit(R5_ReWrite, &dev->flags); |
3181 | set_bit(R5_LOCKED, &dev->flags); | 3375 | set_bit(R5_LOCKED, &dev->flags); |
3376 | s.locked++; | ||
3182 | } else { | 3377 | } else { |
3183 | /* let's read it back */ | 3378 | /* let's read it back */ |
3184 | set_bit(R5_Wantread, &dev->flags); | 3379 | set_bit(R5_Wantread, &dev->flags); |
3185 | set_bit(R5_LOCKED, &dev->flags); | 3380 | set_bit(R5_LOCKED, &dev->flags); |
3381 | s.locked++; | ||
3186 | } | 3382 | } |
3187 | } | 3383 | } |
3188 | } | 3384 | } |
3189 | 3385 | ||
3190 | if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state)) { | 3386 | /* Finish reconstruct operations initiated by the expansion process */ |
3387 | if (sh->reconstruct_state == reconstruct_state_result) { | ||
3388 | sh->reconstruct_state = reconstruct_state_idle; | ||
3389 | clear_bit(STRIPE_EXPANDING, &sh->state); | ||
3390 | for (i = conf->raid_disks; i--; ) { | ||
3391 | set_bit(R5_Wantwrite, &sh->dev[i].flags); | ||
3392 | set_bit(R5_LOCKED, &sh->dev[i].flags); | ||
3393 | s.locked++; | ||
3394 | } | ||
3395 | } | ||
3396 | |||
3397 | if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state) && | ||
3398 | !sh->reconstruct_state) { | ||
3191 | struct stripe_head *sh2 | 3399 | struct stripe_head *sh2 |
3192 | = get_active_stripe(conf, sh->sector, 1, 1, 1); | 3400 | = get_active_stripe(conf, sh->sector, 1, 1, 1); |
3193 | if (sh2 && test_bit(STRIPE_EXPAND_SOURCE, &sh2->state)) { | 3401 | if (sh2 && test_bit(STRIPE_EXPAND_SOURCE, &sh2->state)) { |
@@ -3208,14 +3416,8 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page) | |||
3208 | /* Need to write out all blocks after computing P&Q */ | 3416 | /* Need to write out all blocks after computing P&Q */ |
3209 | sh->disks = conf->raid_disks; | 3417 | sh->disks = conf->raid_disks; |
3210 | stripe_set_idx(sh->sector, conf, 0, sh); | 3418 | stripe_set_idx(sh->sector, conf, 0, sh); |
3211 | compute_parity6(sh, RECONSTRUCT_WRITE); | 3419 | schedule_reconstruction(sh, &s, 1, 1); |
3212 | for (i = conf->raid_disks ; i-- ; ) { | 3420 | } else if (s.expanded && !sh->reconstruct_state && s.locked == 0) { |
3213 | set_bit(R5_LOCKED, &sh->dev[i].flags); | ||
3214 | s.locked++; | ||
3215 | set_bit(R5_Wantwrite, &sh->dev[i].flags); | ||
3216 | } | ||
3217 | clear_bit(STRIPE_EXPANDING, &sh->state); | ||
3218 | } else if (s.expanded) { | ||
3219 | clear_bit(STRIPE_EXPAND_READY, &sh->state); | 3421 | clear_bit(STRIPE_EXPAND_READY, &sh->state); |
3220 | atomic_dec(&conf->reshape_stripes); | 3422 | atomic_dec(&conf->reshape_stripes); |
3221 | wake_up(&conf->wait_for_overlap); | 3423 | wake_up(&conf->wait_for_overlap); |
@@ -3233,6 +3435,9 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page) | |||
3233 | if (unlikely(blocked_rdev)) | 3435 | if (unlikely(blocked_rdev)) |
3234 | md_wait_for_blocked_rdev(blocked_rdev, conf->mddev); | 3436 | md_wait_for_blocked_rdev(blocked_rdev, conf->mddev); |
3235 | 3437 | ||
3438 | if (s.ops_request) | ||
3439 | raid_run_ops(sh, s.ops_request); | ||
3440 | |||
3236 | ops_run_io(sh, &s); | 3441 | ops_run_io(sh, &s); |
3237 | 3442 | ||
3238 | return_io(return_bi); | 3443 | return_io(return_bi); |
@@ -3241,16 +3446,14 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page) | |||
3241 | } | 3446 | } |
3242 | 3447 | ||
3243 | /* returns true if the stripe was handled */ | 3448 | /* returns true if the stripe was handled */ |
3244 | static bool handle_stripe(struct stripe_head *sh, struct page *tmp_page) | 3449 | static bool handle_stripe(struct stripe_head *sh) |
3245 | { | 3450 | { |
3246 | if (sh->raid_conf->level == 6) | 3451 | if (sh->raid_conf->level == 6) |
3247 | return handle_stripe6(sh, tmp_page); | 3452 | return handle_stripe6(sh); |
3248 | else | 3453 | else |
3249 | return handle_stripe5(sh); | 3454 | return handle_stripe5(sh); |
3250 | } | 3455 | } |
3251 | 3456 | ||
3252 | |||
3253 | |||
3254 | static void raid5_activate_delayed(raid5_conf_t *conf) | 3457 | static void raid5_activate_delayed(raid5_conf_t *conf) |
3255 | { | 3458 | { |
3256 | if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD) { | 3459 | if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD) { |
@@ -4046,7 +4249,7 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski | |||
4046 | spin_unlock(&sh->lock); | 4249 | spin_unlock(&sh->lock); |
4047 | 4250 | ||
4048 | /* wait for any blocked device to be handled */ | 4251 | /* wait for any blocked device to be handled */ |
4049 | while(unlikely(!handle_stripe(sh, NULL))) | 4252 | while (unlikely(!handle_stripe(sh))) |
4050 | ; | 4253 | ; |
4051 | release_stripe(sh); | 4254 | release_stripe(sh); |
4052 | 4255 | ||
@@ -4103,7 +4306,7 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio) | |||
4103 | return handled; | 4306 | return handled; |
4104 | } | 4307 | } |
4105 | 4308 | ||
4106 | handle_stripe(sh, NULL); | 4309 | handle_stripe(sh); |
4107 | release_stripe(sh); | 4310 | release_stripe(sh); |
4108 | handled++; | 4311 | handled++; |
4109 | } | 4312 | } |
@@ -4117,6 +4320,36 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio) | |||
4117 | return handled; | 4320 | return handled; |
4118 | } | 4321 | } |
4119 | 4322 | ||
4323 | #ifdef CONFIG_MULTICORE_RAID456 | ||
4324 | static void __process_stripe(void *param, async_cookie_t cookie) | ||
4325 | { | ||
4326 | struct stripe_head *sh = param; | ||
4327 | |||
4328 | handle_stripe(sh); | ||
4329 | release_stripe(sh); | ||
4330 | } | ||
4331 | |||
4332 | static void process_stripe(struct stripe_head *sh, struct list_head *domain) | ||
4333 | { | ||
4334 | async_schedule_domain(__process_stripe, sh, domain); | ||
4335 | } | ||
4336 | |||
4337 | static void synchronize_stripe_processing(struct list_head *domain) | ||
4338 | { | ||
4339 | async_synchronize_full_domain(domain); | ||
4340 | } | ||
4341 | #else | ||
4342 | static void process_stripe(struct stripe_head *sh, struct list_head *domain) | ||
4343 | { | ||
4344 | handle_stripe(sh); | ||
4345 | release_stripe(sh); | ||
4346 | cond_resched(); | ||
4347 | } | ||
4348 | |||
4349 | static void synchronize_stripe_processing(struct list_head *domain) | ||
4350 | { | ||
4351 | } | ||
4352 | #endif | ||
4120 | 4353 | ||
4121 | 4354 | ||
4122 | /* | 4355 | /* |
@@ -4131,6 +4364,7 @@ static void raid5d(mddev_t *mddev) | |||
4131 | struct stripe_head *sh; | 4364 | struct stripe_head *sh; |
4132 | raid5_conf_t *conf = mddev_to_conf(mddev); | 4365 | raid5_conf_t *conf = mddev_to_conf(mddev); |
4133 | int handled; | 4366 | int handled; |
4367 | LIST_HEAD(raid_domain); | ||
4134 | 4368 | ||
4135 | pr_debug("+++ raid5d active\n"); | 4369 | pr_debug("+++ raid5d active\n"); |
4136 | 4370 | ||
@@ -4167,8 +4401,7 @@ static void raid5d(mddev_t *mddev) | |||
4167 | spin_unlock_irq(&conf->device_lock); | 4401 | spin_unlock_irq(&conf->device_lock); |
4168 | 4402 | ||
4169 | handled++; | 4403 | handled++; |
4170 | handle_stripe(sh, conf->spare_page); | 4404 | process_stripe(sh, &raid_domain); |
4171 | release_stripe(sh); | ||
4172 | 4405 | ||
4173 | spin_lock_irq(&conf->device_lock); | 4406 | spin_lock_irq(&conf->device_lock); |
4174 | } | 4407 | } |
@@ -4176,6 +4409,7 @@ static void raid5d(mddev_t *mddev) | |||
4176 | 4409 | ||
4177 | spin_unlock_irq(&conf->device_lock); | 4410 | spin_unlock_irq(&conf->device_lock); |
4178 | 4411 | ||
4412 | synchronize_stripe_processing(&raid_domain); | ||
4179 | async_tx_issue_pending_all(); | 4413 | async_tx_issue_pending_all(); |
4180 | unplug_slaves(mddev); | 4414 | unplug_slaves(mddev); |
4181 | 4415 | ||
@@ -4308,6 +4542,118 @@ raid5_size(mddev_t *mddev, sector_t sectors, int raid_disks) | |||
4308 | return sectors * (raid_disks - conf->max_degraded); | 4542 | return sectors * (raid_disks - conf->max_degraded); |
4309 | } | 4543 | } |
4310 | 4544 | ||
4545 | static void raid5_free_percpu(raid5_conf_t *conf) | ||
4546 | { | ||
4547 | struct raid5_percpu *percpu; | ||
4548 | unsigned long cpu; | ||
4549 | |||
4550 | if (!conf->percpu) | ||
4551 | return; | ||
4552 | |||
4553 | get_online_cpus(); | ||
4554 | for_each_possible_cpu(cpu) { | ||
4555 | percpu = per_cpu_ptr(conf->percpu, cpu); | ||
4556 | safe_put_page(percpu->spare_page); | ||
4557 | kfree(percpu->scribble); | ||
4558 | } | ||
4559 | #ifdef CONFIG_HOTPLUG_CPU | ||
4560 | unregister_cpu_notifier(&conf->cpu_notify); | ||
4561 | #endif | ||
4562 | put_online_cpus(); | ||
4563 | |||
4564 | free_percpu(conf->percpu); | ||
4565 | } | ||
4566 | |||
4567 | static void free_conf(raid5_conf_t *conf) | ||
4568 | { | ||
4569 | shrink_stripes(conf); | ||
4570 | raid5_free_percpu(conf); | ||
4571 | kfree(conf->disks); | ||
4572 | kfree(conf->stripe_hashtbl); | ||
4573 | kfree(conf); | ||
4574 | } | ||
4575 | |||
4576 | #ifdef CONFIG_HOTPLUG_CPU | ||
4577 | static int raid456_cpu_notify(struct notifier_block *nfb, unsigned long action, | ||
4578 | void *hcpu) | ||
4579 | { | ||
4580 | raid5_conf_t *conf = container_of(nfb, raid5_conf_t, cpu_notify); | ||
4581 | long cpu = (long)hcpu; | ||
4582 | struct raid5_percpu *percpu = per_cpu_ptr(conf->percpu, cpu); | ||
4583 | |||
4584 | switch (action) { | ||
4585 | case CPU_UP_PREPARE: | ||
4586 | case CPU_UP_PREPARE_FROZEN: | ||
4587 | if (conf->level == 6 && !percpu->spare_page) | ||
4588 | percpu->spare_page = alloc_page(GFP_KERNEL); | ||
4589 | if (!percpu->scribble) | ||
4590 | percpu->scribble = kmalloc(conf->scribble_len, GFP_KERNEL); | ||
4591 | |||
4592 | if (!percpu->scribble || | ||
4593 | (conf->level == 6 && !percpu->spare_page)) { | ||
4594 | safe_put_page(percpu->spare_page); | ||
4595 | kfree(percpu->scribble); | ||
4596 | pr_err("%s: failed memory allocation for cpu%ld\n", | ||
4597 | __func__, cpu); | ||
4598 | return NOTIFY_BAD; | ||
4599 | } | ||
4600 | break; | ||
4601 | case CPU_DEAD: | ||
4602 | case CPU_DEAD_FROZEN: | ||
4603 | safe_put_page(percpu->spare_page); | ||
4604 | kfree(percpu->scribble); | ||
4605 | percpu->spare_page = NULL; | ||
4606 | percpu->scribble = NULL; | ||
4607 | break; | ||
4608 | default: | ||
4609 | break; | ||
4610 | } | ||
4611 | return NOTIFY_OK; | ||
4612 | } | ||
4613 | #endif | ||
4614 | |||
4615 | static int raid5_alloc_percpu(raid5_conf_t *conf) | ||
4616 | { | ||
4617 | unsigned long cpu; | ||
4618 | struct page *spare_page; | ||
4619 | struct raid5_percpu *allcpus; | ||
4620 | void *scribble; | ||
4621 | int err; | ||
4622 | |||
4623 | allcpus = alloc_percpu(struct raid5_percpu); | ||
4624 | if (!allcpus) | ||
4625 | return -ENOMEM; | ||
4626 | conf->percpu = allcpus; | ||
4627 | |||
4628 | get_online_cpus(); | ||
4629 | err = 0; | ||
4630 | for_each_present_cpu(cpu) { | ||
4631 | if (conf->level == 6) { | ||
4632 | spare_page = alloc_page(GFP_KERNEL); | ||
4633 | if (!spare_page) { | ||
4634 | err = -ENOMEM; | ||
4635 | break; | ||
4636 | } | ||
4637 | per_cpu_ptr(conf->percpu, cpu)->spare_page = spare_page; | ||
4638 | } | ||
4639 | scribble = kmalloc(scribble_len(conf->raid_disks), GFP_KERNEL); | ||
4640 | if (!scribble) { | ||
4641 | err = -ENOMEM; | ||
4642 | break; | ||
4643 | } | ||
4644 | per_cpu_ptr(conf->percpu, cpu)->scribble = scribble; | ||
4645 | } | ||
4646 | #ifdef CONFIG_HOTPLUG_CPU | ||
4647 | conf->cpu_notify.notifier_call = raid456_cpu_notify; | ||
4648 | conf->cpu_notify.priority = 0; | ||
4649 | if (err == 0) | ||
4650 | err = register_cpu_notifier(&conf->cpu_notify); | ||
4651 | #endif | ||
4652 | put_online_cpus(); | ||
4653 | |||
4654 | return err; | ||
4655 | } | ||
4656 | |||
4311 | static raid5_conf_t *setup_conf(mddev_t *mddev) | 4657 | static raid5_conf_t *setup_conf(mddev_t *mddev) |
4312 | { | 4658 | { |
4313 | raid5_conf_t *conf; | 4659 | raid5_conf_t *conf; |
@@ -4347,6 +4693,7 @@ static raid5_conf_t *setup_conf(mddev_t *mddev) | |||
4347 | goto abort; | 4693 | goto abort; |
4348 | 4694 | ||
4349 | conf->raid_disks = mddev->raid_disks; | 4695 | conf->raid_disks = mddev->raid_disks; |
4696 | conf->scribble_len = scribble_len(conf->raid_disks); | ||
4350 | if (mddev->reshape_position == MaxSector) | 4697 | if (mddev->reshape_position == MaxSector) |
4351 | conf->previous_raid_disks = mddev->raid_disks; | 4698 | conf->previous_raid_disks = mddev->raid_disks; |
4352 | else | 4699 | else |
@@ -4362,11 +4709,10 @@ static raid5_conf_t *setup_conf(mddev_t *mddev) | |||
4362 | if ((conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL) | 4709 | if ((conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL) |
4363 | goto abort; | 4710 | goto abort; |
4364 | 4711 | ||
4365 | if (mddev->new_level == 6) { | 4712 | conf->level = mddev->new_level; |
4366 | conf->spare_page = alloc_page(GFP_KERNEL); | 4713 | if (raid5_alloc_percpu(conf) != 0) |
4367 | if (!conf->spare_page) | 4714 | goto abort; |
4368 | goto abort; | 4715 | |
4369 | } | ||
4370 | spin_lock_init(&conf->device_lock); | 4716 | spin_lock_init(&conf->device_lock); |
4371 | init_waitqueue_head(&conf->wait_for_stripe); | 4717 | init_waitqueue_head(&conf->wait_for_stripe); |
4372 | init_waitqueue_head(&conf->wait_for_overlap); | 4718 | init_waitqueue_head(&conf->wait_for_overlap); |
@@ -4402,7 +4748,6 @@ static raid5_conf_t *setup_conf(mddev_t *mddev) | |||
4402 | } | 4748 | } |
4403 | 4749 | ||
4404 | conf->chunk_size = mddev->new_chunk; | 4750 | conf->chunk_size = mddev->new_chunk; |
4405 | conf->level = mddev->new_level; | ||
4406 | if (conf->level == 6) | 4751 | if (conf->level == 6) |
4407 | conf->max_degraded = 2; | 4752 | conf->max_degraded = 2; |
4408 | else | 4753 | else |
@@ -4437,11 +4782,7 @@ static raid5_conf_t *setup_conf(mddev_t *mddev) | |||
4437 | 4782 | ||
4438 | abort: | 4783 | abort: |
4439 | if (conf) { | 4784 | if (conf) { |
4440 | shrink_stripes(conf); | 4785 | free_conf(conf); |
4441 | safe_put_page(conf->spare_page); | ||
4442 | kfree(conf->disks); | ||
4443 | kfree(conf->stripe_hashtbl); | ||
4444 | kfree(conf); | ||
4445 | return ERR_PTR(-EIO); | 4786 | return ERR_PTR(-EIO); |
4446 | } else | 4787 | } else |
4447 | return ERR_PTR(-ENOMEM); | 4788 | return ERR_PTR(-ENOMEM); |
@@ -4607,12 +4948,8 @@ abort: | |||
4607 | md_unregister_thread(mddev->thread); | 4948 | md_unregister_thread(mddev->thread); |
4608 | mddev->thread = NULL; | 4949 | mddev->thread = NULL; |
4609 | if (conf) { | 4950 | if (conf) { |
4610 | shrink_stripes(conf); | ||
4611 | print_raid5_conf(conf); | 4951 | print_raid5_conf(conf); |
4612 | safe_put_page(conf->spare_page); | 4952 | free_conf(conf); |
4613 | kfree(conf->disks); | ||
4614 | kfree(conf->stripe_hashtbl); | ||
4615 | kfree(conf); | ||
4616 | } | 4953 | } |
4617 | mddev->private = NULL; | 4954 | mddev->private = NULL; |
4618 | printk(KERN_ALERT "raid5: failed to run raid set %s\n", mdname(mddev)); | 4955 | printk(KERN_ALERT "raid5: failed to run raid set %s\n", mdname(mddev)); |
@@ -4627,13 +4964,10 @@ static int stop(mddev_t *mddev) | |||
4627 | 4964 | ||
4628 | md_unregister_thread(mddev->thread); | 4965 | md_unregister_thread(mddev->thread); |
4629 | mddev->thread = NULL; | 4966 | mddev->thread = NULL; |
4630 | shrink_stripes(conf); | ||
4631 | kfree(conf->stripe_hashtbl); | ||
4632 | mddev->queue->backing_dev_info.congested_fn = NULL; | 4967 | mddev->queue->backing_dev_info.congested_fn = NULL; |
4633 | blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ | 4968 | blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ |
4634 | sysfs_remove_group(&mddev->kobj, &raid5_attrs_group); | 4969 | sysfs_remove_group(&mddev->kobj, &raid5_attrs_group); |
4635 | kfree(conf->disks); | 4970 | free_conf(conf); |
4636 | kfree(conf); | ||
4637 | mddev->private = NULL; | 4971 | mddev->private = NULL; |
4638 | return 0; | 4972 | return 0; |
4639 | } | 4973 | } |
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index 52ba99954dec..116d0b44b2a9 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h | |||
@@ -2,6 +2,7 @@ | |||
2 | #define _RAID5_H | 2 | #define _RAID5_H |
3 | 3 | ||
4 | #include <linux/raid/xor.h> | 4 | #include <linux/raid/xor.h> |
5 | #include <linux/dmaengine.h> | ||
5 | 6 | ||
6 | /* | 7 | /* |
7 | * | 8 | * |
@@ -175,7 +176,9 @@ | |||
175 | */ | 176 | */ |
176 | enum check_states { | 177 | enum check_states { |
177 | check_state_idle = 0, | 178 | check_state_idle = 0, |
178 | check_state_run, /* parity check */ | 179 | check_state_run, /* xor parity check */ |
180 | check_state_run_q, /* q-parity check */ | ||
181 | check_state_run_pq, /* pq dual parity check */ | ||
179 | check_state_check_result, | 182 | check_state_check_result, |
180 | check_state_compute_run, /* parity repair */ | 183 | check_state_compute_run, /* parity repair */ |
181 | check_state_compute_result, | 184 | check_state_compute_result, |
@@ -215,8 +218,8 @@ struct stripe_head { | |||
215 | * @target - STRIPE_OP_COMPUTE_BLK target | 218 | * @target - STRIPE_OP_COMPUTE_BLK target |
216 | */ | 219 | */ |
217 | struct stripe_operations { | 220 | struct stripe_operations { |
218 | int target; | 221 | int target, target2; |
219 | u32 zero_sum_result; | 222 | enum sum_check_flags zero_sum_result; |
220 | } ops; | 223 | } ops; |
221 | struct r5dev { | 224 | struct r5dev { |
222 | struct bio req; | 225 | struct bio req; |
@@ -298,7 +301,7 @@ struct r6_state { | |||
298 | #define STRIPE_OP_COMPUTE_BLK 1 | 301 | #define STRIPE_OP_COMPUTE_BLK 1 |
299 | #define STRIPE_OP_PREXOR 2 | 302 | #define STRIPE_OP_PREXOR 2 |
300 | #define STRIPE_OP_BIODRAIN 3 | 303 | #define STRIPE_OP_BIODRAIN 3 |
301 | #define STRIPE_OP_POSTXOR 4 | 304 | #define STRIPE_OP_RECONSTRUCT 4 |
302 | #define STRIPE_OP_CHECK 5 | 305 | #define STRIPE_OP_CHECK 5 |
303 | 306 | ||
304 | /* | 307 | /* |
@@ -383,8 +386,21 @@ struct raid5_private_data { | |||
383 | * (fresh device added). | 386 | * (fresh device added). |
384 | * Cleared when a sync completes. | 387 | * Cleared when a sync completes. |
385 | */ | 388 | */ |
386 | 389 | /* per cpu variables */ | |
387 | struct page *spare_page; /* Used when checking P/Q in raid6 */ | 390 | struct raid5_percpu { |
391 | struct page *spare_page; /* Used when checking P/Q in raid6 */ | ||
392 | void *scribble; /* space for constructing buffer | ||
393 | * lists and performing address | ||
394 | * conversions | ||
395 | */ | ||
396 | } *percpu; | ||
397 | size_t scribble_len; /* size of scribble region must be | ||
398 | * associated with conf to handle | ||
399 | * cpu hotplug while reshaping | ||
400 | */ | ||
401 | #ifdef CONFIG_HOTPLUG_CPU | ||
402 | struct notifier_block cpu_notify; | ||
403 | #endif | ||
388 | 404 | ||
389 | /* | 405 | /* |
390 | * Free stripes pool | 406 | * Free stripes pool |
diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h index 5fc2ef8d97fa..866e61c4e2e0 100644 --- a/include/linux/async_tx.h +++ b/include/linux/async_tx.h | |||
@@ -58,25 +58,57 @@ struct dma_chan_ref { | |||
58 | * array. | 58 | * array. |
59 | * @ASYNC_TX_ACK: immediately ack the descriptor, precludes setting up a | 59 | * @ASYNC_TX_ACK: immediately ack the descriptor, precludes setting up a |
60 | * dependency chain | 60 | * dependency chain |
61 | * @ASYNC_TX_DEP_ACK: ack the dependency descriptor. Useful for chaining. | ||
62 | */ | 61 | */ |
63 | enum async_tx_flags { | 62 | enum async_tx_flags { |
64 | ASYNC_TX_XOR_ZERO_DST = (1 << 0), | 63 | ASYNC_TX_XOR_ZERO_DST = (1 << 0), |
65 | ASYNC_TX_XOR_DROP_DST = (1 << 1), | 64 | ASYNC_TX_XOR_DROP_DST = (1 << 1), |
66 | ASYNC_TX_ACK = (1 << 3), | 65 | ASYNC_TX_ACK = (1 << 2), |
67 | ASYNC_TX_DEP_ACK = (1 << 4), | 66 | }; |
67 | |||
68 | /** | ||
69 | * struct async_submit_ctl - async_tx submission/completion modifiers | ||
70 | * @flags: submission modifiers | ||
71 | * @depend_tx: parent dependency of the current operation being submitted | ||
72 | * @cb_fn: callback routine to run at operation completion | ||
73 | * @cb_param: parameter for the callback routine | ||
74 | * @scribble: caller provided space for dma/page address conversions | ||
75 | */ | ||
76 | struct async_submit_ctl { | ||
77 | enum async_tx_flags flags; | ||
78 | struct dma_async_tx_descriptor *depend_tx; | ||
79 | dma_async_tx_callback cb_fn; | ||
80 | void *cb_param; | ||
81 | void *scribble; | ||
68 | }; | 82 | }; |
69 | 83 | ||
70 | #ifdef CONFIG_DMA_ENGINE | 84 | #ifdef CONFIG_DMA_ENGINE |
71 | #define async_tx_issue_pending_all dma_issue_pending_all | 85 | #define async_tx_issue_pending_all dma_issue_pending_all |
86 | |||
87 | /** | ||
88 | * async_tx_issue_pending - send pending descriptor to the hardware channel | ||
89 | * @tx: descriptor handle to retrieve hardware context | ||
90 | * | ||
91 | * Note: any dependent operations will have already been issued by | ||
92 | * async_tx_channel_switch, or (in the case of no channel switch) will | ||
93 | * be already pending on this channel. | ||
94 | */ | ||
95 | static inline void async_tx_issue_pending(struct dma_async_tx_descriptor *tx) | ||
96 | { | ||
97 | if (likely(tx)) { | ||
98 | struct dma_chan *chan = tx->chan; | ||
99 | struct dma_device *dma = chan->device; | ||
100 | |||
101 | dma->device_issue_pending(chan); | ||
102 | } | ||
103 | } | ||
72 | #ifdef CONFIG_ARCH_HAS_ASYNC_TX_FIND_CHANNEL | 104 | #ifdef CONFIG_ARCH_HAS_ASYNC_TX_FIND_CHANNEL |
73 | #include <asm/async_tx.h> | 105 | #include <asm/async_tx.h> |
74 | #else | 106 | #else |
75 | #define async_tx_find_channel(dep, type, dst, dst_count, src, src_count, len) \ | 107 | #define async_tx_find_channel(dep, type, dst, dst_count, src, src_count, len) \ |
76 | __async_tx_find_channel(dep, type) | 108 | __async_tx_find_channel(dep, type) |
77 | struct dma_chan * | 109 | struct dma_chan * |
78 | __async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx, | 110 | __async_tx_find_channel(struct async_submit_ctl *submit, |
79 | enum dma_transaction_type tx_type); | 111 | enum dma_transaction_type tx_type); |
80 | #endif /* CONFIG_ARCH_HAS_ASYNC_TX_FIND_CHANNEL */ | 112 | #endif /* CONFIG_ARCH_HAS_ASYNC_TX_FIND_CHANNEL */ |
81 | #else | 113 | #else |
82 | static inline void async_tx_issue_pending_all(void) | 114 | static inline void async_tx_issue_pending_all(void) |
@@ -84,10 +116,16 @@ static inline void async_tx_issue_pending_all(void) | |||
84 | do { } while (0); | 116 | do { } while (0); |
85 | } | 117 | } |
86 | 118 | ||
119 | static inline void async_tx_issue_pending(struct dma_async_tx_descriptor *tx) | ||
120 | { | ||
121 | do { } while (0); | ||
122 | } | ||
123 | |||
87 | static inline struct dma_chan * | 124 | static inline struct dma_chan * |
88 | async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx, | 125 | async_tx_find_channel(struct async_submit_ctl *submit, |
89 | enum dma_transaction_type tx_type, struct page **dst, int dst_count, | 126 | enum dma_transaction_type tx_type, struct page **dst, |
90 | struct page **src, int src_count, size_t len) | 127 | int dst_count, struct page **src, int src_count, |
128 | size_t len) | ||
91 | { | 129 | { |
92 | return NULL; | 130 | return NULL; |
93 | } | 131 | } |
@@ -99,46 +137,70 @@ async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx, | |||
99 | * @cb_fn_param: parameter to pass to the callback routine | 137 | * @cb_fn_param: parameter to pass to the callback routine |
100 | */ | 138 | */ |
101 | static inline void | 139 | static inline void |
102 | async_tx_sync_epilog(dma_async_tx_callback cb_fn, void *cb_fn_param) | 140 | async_tx_sync_epilog(struct async_submit_ctl *submit) |
103 | { | 141 | { |
104 | if (cb_fn) | 142 | if (submit->cb_fn) |
105 | cb_fn(cb_fn_param); | 143 | submit->cb_fn(submit->cb_param); |
106 | } | 144 | } |
107 | 145 | ||
108 | void | 146 | typedef union { |
109 | async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx, | 147 | unsigned long addr; |
110 | enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx, | 148 | struct page *page; |
111 | dma_async_tx_callback cb_fn, void *cb_fn_param); | 149 | dma_addr_t dma; |
150 | } addr_conv_t; | ||
151 | |||
152 | static inline void | ||
153 | init_async_submit(struct async_submit_ctl *args, enum async_tx_flags flags, | ||
154 | struct dma_async_tx_descriptor *tx, | ||
155 | dma_async_tx_callback cb_fn, void *cb_param, | ||
156 | addr_conv_t *scribble) | ||
157 | { | ||
158 | args->flags = flags; | ||
159 | args->depend_tx = tx; | ||
160 | args->cb_fn = cb_fn; | ||
161 | args->cb_param = cb_param; | ||
162 | args->scribble = scribble; | ||
163 | } | ||
164 | |||
165 | void async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx, | ||
166 | struct async_submit_ctl *submit); | ||
112 | 167 | ||
113 | struct dma_async_tx_descriptor * | 168 | struct dma_async_tx_descriptor * |
114 | async_xor(struct page *dest, struct page **src_list, unsigned int offset, | 169 | async_xor(struct page *dest, struct page **src_list, unsigned int offset, |
115 | int src_cnt, size_t len, enum async_tx_flags flags, | 170 | int src_cnt, size_t len, struct async_submit_ctl *submit); |
116 | struct dma_async_tx_descriptor *depend_tx, | ||
117 | dma_async_tx_callback cb_fn, void *cb_fn_param); | ||
118 | 171 | ||
119 | struct dma_async_tx_descriptor * | 172 | struct dma_async_tx_descriptor * |
120 | async_xor_zero_sum(struct page *dest, struct page **src_list, | 173 | async_xor_val(struct page *dest, struct page **src_list, unsigned int offset, |
121 | unsigned int offset, int src_cnt, size_t len, | 174 | int src_cnt, size_t len, enum sum_check_flags *result, |
122 | u32 *result, enum async_tx_flags flags, | 175 | struct async_submit_ctl *submit); |
123 | struct dma_async_tx_descriptor *depend_tx, | ||
124 | dma_async_tx_callback cb_fn, void *cb_fn_param); | ||
125 | 176 | ||
126 | struct dma_async_tx_descriptor * | 177 | struct dma_async_tx_descriptor * |
127 | async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset, | 178 | async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset, |
128 | unsigned int src_offset, size_t len, enum async_tx_flags flags, | 179 | unsigned int src_offset, size_t len, |
129 | struct dma_async_tx_descriptor *depend_tx, | 180 | struct async_submit_ctl *submit); |
130 | dma_async_tx_callback cb_fn, void *cb_fn_param); | ||
131 | 181 | ||
132 | struct dma_async_tx_descriptor * | 182 | struct dma_async_tx_descriptor * |
133 | async_memset(struct page *dest, int val, unsigned int offset, | 183 | async_memset(struct page *dest, int val, unsigned int offset, |
134 | size_t len, enum async_tx_flags flags, | 184 | size_t len, struct async_submit_ctl *submit); |
135 | struct dma_async_tx_descriptor *depend_tx, | 185 | |
136 | dma_async_tx_callback cb_fn, void *cb_fn_param); | 186 | struct dma_async_tx_descriptor *async_trigger_callback(struct async_submit_ctl *submit); |
187 | |||
188 | struct dma_async_tx_descriptor * | ||
189 | async_gen_syndrome(struct page **blocks, unsigned int offset, int src_cnt, | ||
190 | size_t len, struct async_submit_ctl *submit); | ||
191 | |||
192 | struct dma_async_tx_descriptor * | ||
193 | async_syndrome_val(struct page **blocks, unsigned int offset, int src_cnt, | ||
194 | size_t len, enum sum_check_flags *pqres, struct page *spare, | ||
195 | struct async_submit_ctl *submit); | ||
196 | |||
197 | struct dma_async_tx_descriptor * | ||
198 | async_raid6_2data_recov(int src_num, size_t bytes, int faila, int failb, | ||
199 | struct page **ptrs, struct async_submit_ctl *submit); | ||
137 | 200 | ||
138 | struct dma_async_tx_descriptor * | 201 | struct dma_async_tx_descriptor * |
139 | async_trigger_callback(enum async_tx_flags flags, | 202 | async_raid6_datap_recov(int src_num, size_t bytes, int faila, |
140 | struct dma_async_tx_descriptor *depend_tx, | 203 | struct page **ptrs, struct async_submit_ctl *submit); |
141 | dma_async_tx_callback cb_fn, void *cb_fn_param); | ||
142 | 204 | ||
143 | void async_tx_quiesce(struct dma_async_tx_descriptor **tx); | 205 | void async_tx_quiesce(struct dma_async_tx_descriptor **tx); |
144 | #endif /* _ASYNC_TX_H_ */ | 206 | #endif /* _ASYNC_TX_H_ */ |
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index ffefba81c818..1012f1abcb54 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h | |||
@@ -52,11 +52,11 @@ enum dma_status { | |||
52 | enum dma_transaction_type { | 52 | enum dma_transaction_type { |
53 | DMA_MEMCPY, | 53 | DMA_MEMCPY, |
54 | DMA_XOR, | 54 | DMA_XOR, |
55 | DMA_PQ_XOR, | 55 | DMA_PQ, |
56 | DMA_DUAL_XOR, | 56 | DMA_DUAL_XOR, |
57 | DMA_PQ_UPDATE, | 57 | DMA_PQ_UPDATE, |
58 | DMA_ZERO_SUM, | 58 | DMA_XOR_VAL, |
59 | DMA_PQ_ZERO_SUM, | 59 | DMA_PQ_VAL, |
60 | DMA_MEMSET, | 60 | DMA_MEMSET, |
61 | DMA_MEMCPY_CRC32C, | 61 | DMA_MEMCPY_CRC32C, |
62 | DMA_INTERRUPT, | 62 | DMA_INTERRUPT, |
@@ -70,18 +70,23 @@ enum dma_transaction_type { | |||
70 | 70 | ||
71 | /** | 71 | /** |
72 | * enum dma_ctrl_flags - DMA flags to augment operation preparation, | 72 | * enum dma_ctrl_flags - DMA flags to augment operation preparation, |
73 | * control completion, and communicate status. | 73 | * control completion, and communicate status. |
74 | * @DMA_PREP_INTERRUPT - trigger an interrupt (callback) upon completion of | 74 | * @DMA_PREP_INTERRUPT - trigger an interrupt (callback) upon completion of |
75 | * this transaction | 75 | * this transaction |
76 | * @DMA_CTRL_ACK - the descriptor cannot be reused until the client | 76 | * @DMA_CTRL_ACK - the descriptor cannot be reused until the client |
77 | * acknowledges receipt, i.e. has has a chance to establish any | 77 | * acknowledges receipt, i.e. has has a chance to establish any dependency |
78 | * dependency chains | 78 | * chains |
79 | * @DMA_COMPL_SKIP_SRC_UNMAP - set to disable dma-unmapping the source buffer(s) | 79 | * @DMA_COMPL_SKIP_SRC_UNMAP - set to disable dma-unmapping the source buffer(s) |
80 | * @DMA_COMPL_SKIP_DEST_UNMAP - set to disable dma-unmapping the destination(s) | 80 | * @DMA_COMPL_SKIP_DEST_UNMAP - set to disable dma-unmapping the destination(s) |
81 | * @DMA_COMPL_SRC_UNMAP_SINGLE - set to do the source dma-unmapping as single | 81 | * @DMA_COMPL_SRC_UNMAP_SINGLE - set to do the source dma-unmapping as single |
82 | * (if not set, do the source dma-unmapping as page) | 82 | * (if not set, do the source dma-unmapping as page) |
83 | * @DMA_COMPL_DEST_UNMAP_SINGLE - set to do the destination dma-unmapping as single | 83 | * @DMA_COMPL_DEST_UNMAP_SINGLE - set to do the destination dma-unmapping as single |
84 | * (if not set, do the destination dma-unmapping as page) | 84 | * (if not set, do the destination dma-unmapping as page) |
85 | * @DMA_PREP_PQ_DISABLE_P - prevent generation of P while generating Q | ||
86 | * @DMA_PREP_PQ_DISABLE_Q - prevent generation of Q while generating P | ||
87 | * @DMA_PREP_CONTINUE - indicate to a driver that it is reusing buffers as | ||
88 | * sources that were the result of a previous operation, in the case of a PQ | ||
89 | * operation it continues the calculation with new sources | ||
85 | */ | 90 | */ |
86 | enum dma_ctrl_flags { | 91 | enum dma_ctrl_flags { |
87 | DMA_PREP_INTERRUPT = (1 << 0), | 92 | DMA_PREP_INTERRUPT = (1 << 0), |
@@ -90,9 +95,31 @@ enum dma_ctrl_flags { | |||
90 | DMA_COMPL_SKIP_DEST_UNMAP = (1 << 3), | 95 | DMA_COMPL_SKIP_DEST_UNMAP = (1 << 3), |
91 | DMA_COMPL_SRC_UNMAP_SINGLE = (1 << 4), | 96 | DMA_COMPL_SRC_UNMAP_SINGLE = (1 << 4), |
92 | DMA_COMPL_DEST_UNMAP_SINGLE = (1 << 5), | 97 | DMA_COMPL_DEST_UNMAP_SINGLE = (1 << 5), |
98 | DMA_PREP_PQ_DISABLE_P = (1 << 6), | ||
99 | DMA_PREP_PQ_DISABLE_Q = (1 << 7), | ||
100 | DMA_PREP_CONTINUE = (1 << 8), | ||
93 | }; | 101 | }; |
94 | 102 | ||
95 | /** | 103 | /** |
104 | * enum sum_check_bits - bit position of pq_check_flags | ||
105 | */ | ||
106 | enum sum_check_bits { | ||
107 | SUM_CHECK_P = 0, | ||
108 | SUM_CHECK_Q = 1, | ||
109 | }; | ||
110 | |||
111 | /** | ||
112 | * enum pq_check_flags - result of async_{xor,pq}_zero_sum operations | ||
113 | * @SUM_CHECK_P_RESULT - 1 if xor zero sum error, 0 otherwise | ||
114 | * @SUM_CHECK_Q_RESULT - 1 if reed-solomon zero sum error, 0 otherwise | ||
115 | */ | ||
116 | enum sum_check_flags { | ||
117 | SUM_CHECK_P_RESULT = (1 << SUM_CHECK_P), | ||
118 | SUM_CHECK_Q_RESULT = (1 << SUM_CHECK_Q), | ||
119 | }; | ||
120 | |||
121 | |||
122 | /** | ||
96 | * dma_cap_mask_t - capabilities bitmap modeled after cpumask_t. | 123 | * dma_cap_mask_t - capabilities bitmap modeled after cpumask_t. |
97 | * See linux/cpumask.h | 124 | * See linux/cpumask.h |
98 | */ | 125 | */ |
@@ -213,6 +240,7 @@ struct dma_async_tx_descriptor { | |||
213 | * @global_node: list_head for global dma_device_list | 240 | * @global_node: list_head for global dma_device_list |
214 | * @cap_mask: one or more dma_capability flags | 241 | * @cap_mask: one or more dma_capability flags |
215 | * @max_xor: maximum number of xor sources, 0 if no capability | 242 | * @max_xor: maximum number of xor sources, 0 if no capability |
243 | * @max_pq: maximum number of PQ sources and PQ-continue capability | ||
216 | * @dev_id: unique device ID | 244 | * @dev_id: unique device ID |
217 | * @dev: struct device reference for dma mapping api | 245 | * @dev: struct device reference for dma mapping api |
218 | * @device_alloc_chan_resources: allocate resources and return the | 246 | * @device_alloc_chan_resources: allocate resources and return the |
@@ -220,7 +248,9 @@ struct dma_async_tx_descriptor { | |||
220 | * @device_free_chan_resources: release DMA channel's resources | 248 | * @device_free_chan_resources: release DMA channel's resources |
221 | * @device_prep_dma_memcpy: prepares a memcpy operation | 249 | * @device_prep_dma_memcpy: prepares a memcpy operation |
222 | * @device_prep_dma_xor: prepares a xor operation | 250 | * @device_prep_dma_xor: prepares a xor operation |
223 | * @device_prep_dma_zero_sum: prepares a zero_sum operation | 251 | * @device_prep_dma_xor_val: prepares a xor validation operation |
252 | * @device_prep_dma_pq: prepares a pq operation | ||
253 | * @device_prep_dma_pq_val: prepares a pqzero_sum operation | ||
224 | * @device_prep_dma_memset: prepares a memset operation | 254 | * @device_prep_dma_memset: prepares a memset operation |
225 | * @device_prep_dma_interrupt: prepares an end of chain interrupt operation | 255 | * @device_prep_dma_interrupt: prepares an end of chain interrupt operation |
226 | * @device_prep_slave_sg: prepares a slave dma operation | 256 | * @device_prep_slave_sg: prepares a slave dma operation |
@@ -235,7 +265,9 @@ struct dma_device { | |||
235 | struct list_head channels; | 265 | struct list_head channels; |
236 | struct list_head global_node; | 266 | struct list_head global_node; |
237 | dma_cap_mask_t cap_mask; | 267 | dma_cap_mask_t cap_mask; |
238 | int max_xor; | 268 | unsigned short max_xor; |
269 | unsigned short max_pq; | ||
270 | #define DMA_HAS_PQ_CONTINUE (1 << 15) | ||
239 | 271 | ||
240 | int dev_id; | 272 | int dev_id; |
241 | struct device *dev; | 273 | struct device *dev; |
@@ -249,9 +281,17 @@ struct dma_device { | |||
249 | struct dma_async_tx_descriptor *(*device_prep_dma_xor)( | 281 | struct dma_async_tx_descriptor *(*device_prep_dma_xor)( |
250 | struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src, | 282 | struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src, |
251 | unsigned int src_cnt, size_t len, unsigned long flags); | 283 | unsigned int src_cnt, size_t len, unsigned long flags); |
252 | struct dma_async_tx_descriptor *(*device_prep_dma_zero_sum)( | 284 | struct dma_async_tx_descriptor *(*device_prep_dma_xor_val)( |
253 | struct dma_chan *chan, dma_addr_t *src, unsigned int src_cnt, | 285 | struct dma_chan *chan, dma_addr_t *src, unsigned int src_cnt, |
254 | size_t len, u32 *result, unsigned long flags); | 286 | size_t len, enum sum_check_flags *result, unsigned long flags); |
287 | struct dma_async_tx_descriptor *(*device_prep_dma_pq)( | ||
288 | struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src, | ||
289 | unsigned int src_cnt, const unsigned char *scf, | ||
290 | size_t len, unsigned long flags); | ||
291 | struct dma_async_tx_descriptor *(*device_prep_dma_pq_val)( | ||
292 | struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src, | ||
293 | unsigned int src_cnt, const unsigned char *scf, size_t len, | ||
294 | enum sum_check_flags *pqres, unsigned long flags); | ||
255 | struct dma_async_tx_descriptor *(*device_prep_dma_memset)( | 295 | struct dma_async_tx_descriptor *(*device_prep_dma_memset)( |
256 | struct dma_chan *chan, dma_addr_t dest, int value, size_t len, | 296 | struct dma_chan *chan, dma_addr_t dest, int value, size_t len, |
257 | unsigned long flags); | 297 | unsigned long flags); |
@@ -270,6 +310,60 @@ struct dma_device { | |||
270 | void (*device_issue_pending)(struct dma_chan *chan); | 310 | void (*device_issue_pending)(struct dma_chan *chan); |
271 | }; | 311 | }; |
272 | 312 | ||
313 | static inline void | ||
314 | dma_set_maxpq(struct dma_device *dma, int maxpq, int has_pq_continue) | ||
315 | { | ||
316 | dma->max_pq = maxpq; | ||
317 | if (has_pq_continue) | ||
318 | dma->max_pq |= DMA_HAS_PQ_CONTINUE; | ||
319 | } | ||
320 | |||
321 | static inline bool dmaf_continue(enum dma_ctrl_flags flags) | ||
322 | { | ||
323 | return (flags & DMA_PREP_CONTINUE) == DMA_PREP_CONTINUE; | ||
324 | } | ||
325 | |||
326 | static inline bool dmaf_p_disabled_continue(enum dma_ctrl_flags flags) | ||
327 | { | ||
328 | enum dma_ctrl_flags mask = DMA_PREP_CONTINUE | DMA_PREP_PQ_DISABLE_P; | ||
329 | |||
330 | return (flags & mask) == mask; | ||
331 | } | ||
332 | |||
333 | static inline bool dma_dev_has_pq_continue(struct dma_device *dma) | ||
334 | { | ||
335 | return (dma->max_pq & DMA_HAS_PQ_CONTINUE) == DMA_HAS_PQ_CONTINUE; | ||
336 | } | ||
337 | |||
338 | static unsigned short dma_dev_to_maxpq(struct dma_device *dma) | ||
339 | { | ||
340 | return dma->max_pq & ~DMA_HAS_PQ_CONTINUE; | ||
341 | } | ||
342 | |||
343 | /* dma_maxpq - reduce maxpq in the face of continued operations | ||
344 | * @dma - dma device with PQ capability | ||
345 | * @flags - to check if DMA_PREP_CONTINUE and DMA_PREP_PQ_DISABLE_P are set | ||
346 | * | ||
347 | * When an engine does not support native continuation we need 3 extra | ||
348 | * source slots to reuse P and Q with the following coefficients: | ||
349 | * 1/ {00} * P : remove P from Q', but use it as a source for P' | ||
350 | * 2/ {01} * Q : use Q to continue Q' calculation | ||
351 | * 3/ {00} * Q : subtract Q from P' to cancel (2) | ||
352 | * | ||
353 | * In the case where P is disabled we only need 1 extra source: | ||
354 | * 1/ {01} * Q : use Q to continue Q' calculation | ||
355 | */ | ||
356 | static inline int dma_maxpq(struct dma_device *dma, enum dma_ctrl_flags flags) | ||
357 | { | ||
358 | if (dma_dev_has_pq_continue(dma) || !dmaf_continue(flags)) | ||
359 | return dma_dev_to_maxpq(dma); | ||
360 | else if (dmaf_p_disabled_continue(flags)) | ||
361 | return dma_dev_to_maxpq(dma) - 1; | ||
362 | else if (dmaf_continue(flags)) | ||
363 | return dma_dev_to_maxpq(dma) - 3; | ||
364 | BUG(); | ||
365 | } | ||
366 | |||
273 | /* --- public DMA engine API --- */ | 367 | /* --- public DMA engine API --- */ |
274 | 368 | ||
275 | #ifdef CONFIG_DMA_ENGINE | 369 | #ifdef CONFIG_DMA_ENGINE |