diff options
author | Dan Williams <dan.j.williams@intel.com> | 2008-07-16 22:44:56 -0400 |
---|---|---|
committer | Dan Williams <dan.j.williams@intel.com> | 2008-07-17 20:59:55 -0400 |
commit | 1e55db2d6bdef92abc981b68673564e63c80da4d (patch) | |
tree | ac3cd7c249735772df8c08b1d803563bad854527 | |
parent | 20fc190b0ef58bf8b3b0bff9de122083956f82ec (diff) |
async_tx: ensure the xor destination buffer remains dma-mapped
When the number of source buffers for an xor operation exceeds the hardware
channel maximum async_xor creates a chain of dependent operations. The result
of one operation is reused as an input to the next to continue the xor
calculation. The destination buffer should remain mapped for the duration of
the entire chain. To provide this guarantee the code must no longer be allowed
to fallback to the synchronous path as this will preclude the buffer from being
unmapped, i.e. the dma-driver will potentially miss the descriptor with
!DMA_COMPL_SKIP_DEST_UNMAP.
Cc: Neil Brown <neilb@suse.de>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
-rw-r--r-- | crypto/async_tx/async_xor.c | 244 |
1 files changed, 113 insertions, 131 deletions
diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c index 3a0dddca5a10..1fcf45ac81ec 100644 --- a/crypto/async_tx/async_xor.c +++ b/crypto/async_tx/async_xor.c | |||
@@ -35,72 +35,118 @@ | |||
35 | * when CONFIG_DMA_ENGINE=n | 35 | * when CONFIG_DMA_ENGINE=n |
36 | */ | 36 | */ |
37 | static __always_inline struct dma_async_tx_descriptor * | 37 | static __always_inline struct dma_async_tx_descriptor * |
38 | do_async_xor(struct dma_device *device, | 38 | do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list, |
39 | struct dma_chan *chan, struct page *dest, struct page **src_list, | 39 | unsigned int offset, int src_cnt, size_t len, |
40 | unsigned int offset, unsigned int src_cnt, size_t len, | 40 | enum async_tx_flags flags, |
41 | enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx, | 41 | struct dma_async_tx_descriptor *depend_tx, |
42 | dma_async_tx_callback cb_fn, void *cb_param) | 42 | dma_async_tx_callback cb_fn, void *cb_param) |
43 | { | 43 | { |
44 | dma_addr_t dma_dest; | 44 | struct dma_device *dma = chan->device; |
45 | dma_addr_t *dma_src = (dma_addr_t *) src_list; | 45 | dma_addr_t *dma_src = (dma_addr_t *) src_list; |
46 | struct dma_async_tx_descriptor *tx; | 46 | struct dma_async_tx_descriptor *tx = NULL; |
47 | int src_off = 0; | ||
47 | int i; | 48 | int i; |
48 | unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0; | 49 | dma_async_tx_callback _cb_fn; |
49 | 50 | void *_cb_param; | |
50 | pr_debug("%s: len: %zu\n", __func__, len); | 51 | enum async_tx_flags async_flags; |
51 | 52 | enum dma_ctrl_flags dma_flags; | |
52 | dma_dest = dma_map_page(device->dev, dest, offset, len, | 53 | int xor_src_cnt; |
53 | DMA_FROM_DEVICE); | 54 | dma_addr_t dma_dest; |
54 | 55 | ||
56 | dma_dest = dma_map_page(dma->dev, dest, offset, len, DMA_FROM_DEVICE); | ||
55 | for (i = 0; i < src_cnt; i++) | 57 | for (i = 0; i < src_cnt; i++) |
56 | dma_src[i] = dma_map_page(device->dev, src_list[i], offset, | 58 | dma_src[i] = dma_map_page(dma->dev, src_list[i], offset, |
57 | len, DMA_TO_DEVICE); | 59 | len, DMA_TO_DEVICE); |
58 | 60 | ||
59 | /* Since we have clobbered the src_list we are committed | 61 | while (src_cnt) { |
60 | * to doing this asynchronously. Drivers force forward progress | 62 | async_flags = flags; |
61 | * in case they can not provide a descriptor | 63 | dma_flags = 0; |
62 | */ | 64 | xor_src_cnt = min(src_cnt, dma->max_xor); |
63 | tx = device->device_prep_dma_xor(chan, dma_dest, dma_src, src_cnt, len, | 65 | /* if we are submitting additional xors, leave the chain open, |
64 | dma_prep_flags); | 66 | * clear the callback parameters, and leave the destination |
65 | if (!tx) { | 67 | * buffer mapped |
66 | if (depend_tx) | 68 | */ |
69 | if (src_cnt > xor_src_cnt) { | ||
70 | async_flags &= ~ASYNC_TX_ACK; | ||
71 | dma_flags = DMA_COMPL_SKIP_DEST_UNMAP; | ||
72 | _cb_fn = NULL; | ||
73 | _cb_param = NULL; | ||
74 | } else { | ||
75 | _cb_fn = cb_fn; | ||
76 | _cb_param = cb_param; | ||
77 | } | ||
78 | if (_cb_fn) | ||
79 | dma_flags |= DMA_PREP_INTERRUPT; | ||
80 | |||
81 | /* Since we have clobbered the src_list we are committed | ||
82 | * to doing this asynchronously. Drivers force forward progress | ||
83 | * in case they can not provide a descriptor | ||
84 | */ | ||
85 | tx = dma->device_prep_dma_xor(chan, dma_dest, &dma_src[src_off], | ||
86 | xor_src_cnt, len, dma_flags); | ||
87 | |||
88 | if (unlikely(!tx && depend_tx)) | ||
67 | dma_wait_for_async_tx(depend_tx); | 89 | dma_wait_for_async_tx(depend_tx); |
68 | 90 | ||
69 | while (!tx) | 91 | /* spin wait for the preceeding transactions to complete */ |
70 | tx = device->device_prep_dma_xor(chan, dma_dest, | 92 | while (unlikely(!tx)) |
71 | dma_src, src_cnt, len, | 93 | tx = dma->device_prep_dma_xor(chan, dma_dest, |
72 | dma_prep_flags); | 94 | &dma_src[src_off], |
73 | } | 95 | xor_src_cnt, len, |
96 | dma_flags); | ||
97 | |||
98 | async_tx_submit(chan, tx, async_flags, depend_tx, _cb_fn, | ||
99 | _cb_param); | ||
100 | |||
101 | depend_tx = tx; | ||
102 | flags |= ASYNC_TX_DEP_ACK; | ||
103 | |||
104 | if (src_cnt > xor_src_cnt) { | ||
105 | /* drop completed sources */ | ||
106 | src_cnt -= xor_src_cnt; | ||
107 | src_off += xor_src_cnt; | ||
74 | 108 | ||
75 | async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param); | 109 | /* use the intermediate result a source */ |
110 | dma_src[--src_off] = dma_dest; | ||
111 | src_cnt++; | ||
112 | } else | ||
113 | break; | ||
114 | } | ||
76 | 115 | ||
77 | return tx; | 116 | return tx; |
78 | } | 117 | } |
79 | 118 | ||
80 | static void | 119 | static void |
81 | do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset, | 120 | do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset, |
82 | unsigned int src_cnt, size_t len, enum async_tx_flags flags, | 121 | int src_cnt, size_t len, enum async_tx_flags flags, |
83 | struct dma_async_tx_descriptor *depend_tx, | 122 | struct dma_async_tx_descriptor *depend_tx, |
84 | dma_async_tx_callback cb_fn, void *cb_param) | 123 | dma_async_tx_callback cb_fn, void *cb_param) |
85 | { | 124 | { |
86 | void *_dest; | ||
87 | int i; | 125 | int i; |
88 | 126 | int xor_src_cnt; | |
89 | pr_debug("%s: len: %zu\n", __func__, len); | 127 | int src_off = 0; |
128 | void *dest_buf; | ||
129 | void **srcs = (void **) src_list; | ||
90 | 130 | ||
91 | /* reuse the 'src_list' array to convert to buffer pointers */ | 131 | /* reuse the 'src_list' array to convert to buffer pointers */ |
92 | for (i = 0; i < src_cnt; i++) | 132 | for (i = 0; i < src_cnt; i++) |
93 | src_list[i] = (struct page *) | 133 | srcs[i] = page_address(src_list[i]) + offset; |
94 | (page_address(src_list[i]) + offset); | ||
95 | 134 | ||
96 | /* set destination address */ | 135 | /* set destination address */ |
97 | _dest = page_address(dest) + offset; | 136 | dest_buf = page_address(dest) + offset; |
98 | 137 | ||
99 | if (flags & ASYNC_TX_XOR_ZERO_DST) | 138 | if (flags & ASYNC_TX_XOR_ZERO_DST) |
100 | memset(_dest, 0, len); | 139 | memset(dest_buf, 0, len); |
140 | |||
141 | while (src_cnt > 0) { | ||
142 | /* process up to 'MAX_XOR_BLOCKS' sources */ | ||
143 | xor_src_cnt = min(src_cnt, MAX_XOR_BLOCKS); | ||
144 | xor_blocks(xor_src_cnt, len, dest_buf, &srcs[src_off]); | ||
101 | 145 | ||
102 | xor_blocks(src_cnt, len, _dest, | 146 | /* drop completed sources */ |
103 | (void **) src_list); | 147 | src_cnt -= xor_src_cnt; |
148 | src_off += xor_src_cnt; | ||
149 | } | ||
104 | 150 | ||
105 | async_tx_sync_epilog(flags, depend_tx, cb_fn, cb_param); | 151 | async_tx_sync_epilog(flags, depend_tx, cb_fn, cb_param); |
106 | } | 152 | } |
@@ -132,106 +178,42 @@ async_xor(struct page *dest, struct page **src_list, unsigned int offset, | |||
132 | struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_XOR, | 178 | struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_XOR, |
133 | &dest, 1, src_list, | 179 | &dest, 1, src_list, |
134 | src_cnt, len); | 180 | src_cnt, len); |
135 | struct dma_device *device = chan ? chan->device : NULL; | ||
136 | struct dma_async_tx_descriptor *tx = NULL; | ||
137 | dma_async_tx_callback _cb_fn; | ||
138 | void *_cb_param; | ||
139 | unsigned long local_flags; | ||
140 | int xor_src_cnt; | ||
141 | int i = 0, src_off = 0; | ||
142 | |||
143 | BUG_ON(src_cnt <= 1); | 181 | BUG_ON(src_cnt <= 1); |
144 | 182 | ||
145 | while (src_cnt) { | 183 | if (chan) { |
146 | local_flags = flags; | 184 | /* run the xor asynchronously */ |
147 | if (device) { /* run the xor asynchronously */ | 185 | pr_debug("%s (async): len: %zu\n", __func__, len); |
148 | xor_src_cnt = min(src_cnt, device->max_xor); | ||
149 | /* if we are submitting additional xors | ||
150 | * only set the callback on the last transaction | ||
151 | */ | ||
152 | if (src_cnt > xor_src_cnt) { | ||
153 | local_flags &= ~ASYNC_TX_ACK; | ||
154 | _cb_fn = NULL; | ||
155 | _cb_param = NULL; | ||
156 | } else { | ||
157 | _cb_fn = cb_fn; | ||
158 | _cb_param = cb_param; | ||
159 | } | ||
160 | |||
161 | tx = do_async_xor(device, chan, dest, | ||
162 | &src_list[src_off], offset, | ||
163 | xor_src_cnt, len, local_flags, | ||
164 | depend_tx, _cb_fn, _cb_param); | ||
165 | } else { /* run the xor synchronously */ | ||
166 | /* in the sync case the dest is an implied source | ||
167 | * (assumes the dest is at the src_off index) | ||
168 | */ | ||
169 | if (flags & ASYNC_TX_XOR_DROP_DST) { | ||
170 | src_cnt--; | ||
171 | src_off++; | ||
172 | } | ||
173 | |||
174 | /* process up to 'MAX_XOR_BLOCKS' sources */ | ||
175 | xor_src_cnt = min(src_cnt, MAX_XOR_BLOCKS); | ||
176 | 186 | ||
177 | /* if we are submitting additional xors | 187 | return do_async_xor(chan, dest, src_list, offset, src_cnt, len, |
178 | * only set the callback on the last transaction | 188 | flags, depend_tx, cb_fn, cb_param); |
179 | */ | 189 | } else { |
180 | if (src_cnt > xor_src_cnt) { | 190 | /* run the xor synchronously */ |
181 | local_flags &= ~ASYNC_TX_ACK; | 191 | pr_debug("%s (sync): len: %zu\n", __func__, len); |
182 | _cb_fn = NULL; | ||
183 | _cb_param = NULL; | ||
184 | } else { | ||
185 | _cb_fn = cb_fn; | ||
186 | _cb_param = cb_param; | ||
187 | } | ||
188 | |||
189 | /* wait for any prerequisite operations */ | ||
190 | if (depend_tx) { | ||
191 | /* if ack is already set then we cannot be sure | ||
192 | * we are referring to the correct operation | ||
193 | */ | ||
194 | BUG_ON(async_tx_test_ack(depend_tx)); | ||
195 | if (dma_wait_for_async_tx(depend_tx) == | ||
196 | DMA_ERROR) | ||
197 | panic("%s: DMA_ERROR waiting for " | ||
198 | "depend_tx\n", | ||
199 | __func__); | ||
200 | } | ||
201 | |||
202 | do_sync_xor(dest, &src_list[src_off], offset, | ||
203 | xor_src_cnt, len, local_flags, depend_tx, | ||
204 | _cb_fn, _cb_param); | ||
205 | } | ||
206 | 192 | ||
207 | /* the previous tx is hidden from the client, | 193 | /* in the sync case the dest is an implied source |
208 | * so ack it | 194 | * (assumes the dest is the first source) |
209 | */ | 195 | */ |
210 | if (i && depend_tx) | 196 | if (flags & ASYNC_TX_XOR_DROP_DST) { |
211 | async_tx_ack(depend_tx); | 197 | src_cnt--; |
212 | 198 | src_list++; | |
213 | depend_tx = tx; | 199 | } |
214 | 200 | ||
215 | if (src_cnt > xor_src_cnt) { | 201 | /* wait for any prerequisite operations */ |
216 | /* drop completed sources */ | 202 | if (depend_tx) { |
217 | src_cnt -= xor_src_cnt; | 203 | /* if ack is already set then we cannot be sure |
218 | src_off += xor_src_cnt; | 204 | * we are referring to the correct operation |
205 | */ | ||
206 | BUG_ON(async_tx_test_ack(depend_tx)); | ||
207 | if (dma_wait_for_async_tx(depend_tx) == DMA_ERROR) | ||
208 | panic("%s: DMA_ERROR waiting for depend_tx\n", | ||
209 | __func__); | ||
210 | } | ||
219 | 211 | ||
220 | /* unconditionally preserve the destination */ | 212 | do_sync_xor(dest, src_list, offset, src_cnt, len, |
221 | flags &= ~ASYNC_TX_XOR_ZERO_DST; | 213 | flags, depend_tx, cb_fn, cb_param); |
222 | 214 | ||
223 | /* use the intermediate result a source, but remember | 215 | return NULL; |
224 | * it's dropped, because it's implied, in the sync case | ||
225 | */ | ||
226 | src_list[--src_off] = dest; | ||
227 | src_cnt++; | ||
228 | flags |= ASYNC_TX_XOR_DROP_DST; | ||
229 | } else | ||
230 | src_cnt = 0; | ||
231 | i++; | ||
232 | } | 216 | } |
233 | |||
234 | return tx; | ||
235 | } | 217 | } |
236 | EXPORT_SYMBOL_GPL(async_xor); | 218 | EXPORT_SYMBOL_GPL(async_xor); |
237 | 219 | ||