aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorDan Williams <dan.j.williams@intel.com>2009-07-14 15:20:36 -0400
committerDan Williams <dan.j.williams@intel.com>2009-08-29 22:09:27 -0400
commitb2f46fd8ef3dff2ab30f31126833f78b7480283a (patch)
tree9f111e3e313b4d142c12d2d8156a2704a36904f8 /include
parent95475e57113c66aac7583925736ed2e2d58c990d (diff)
async_tx: add support for asynchronous GF multiplication
[ Based on an original patch by Yuri Tikhonov ] This adds support for doing asynchronous GF multiplication by adding two additional functions to the async_tx API: async_gen_syndrome() does simultaneous XOR and Galois field multiplication of sources. async_syndrome_val() validates the given source buffers against known P and Q values. When a request is made to run async_pq against more than the hardware maximum number of supported sources we need to reuse the previous generated P and Q values as sources into the next operation. Care must be taken to remove Q from P' and P from Q'. For example to perform a 5 source pq op with hardware that only supports 4 sources at a time the following approach is taken: p, q = PQ(src0, src1, src2, src3, COEF({01}, {02}, {04}, {08})) p', q' = PQ(p, q, q, src4, COEF({00}, {01}, {00}, {10})) p' = p + q + q + src4 = p + src4 q' = {00}*p + {01}*q + {00}*q + {10}*src4 = q + {10}*src4 Note: 4 is the minimum acceptable maxpq otherwise we punt to synchronous-software path. The DMA_PREP_CONTINUE flag indicates to the driver to reuse p and q as sources (in the above manner) and fill the remaining slots up to maxpq with the new sources/coefficients. Note1: Some devices have native support for P+Q continuation and can skip this extra work. Devices with this capability can advertise it with dma_set_maxpq. It is up to each driver how to handle the DMA_PREP_CONTINUE flag. Note2: The api supports disabling the generation of P when generating Q, this is ignored by the synchronous path but is implemented by some dma devices to save unnecessary writes. In this case the continuation algorithm is simplified to only reuse Q as a source. Cc: H. Peter Anvin <hpa@zytor.com> Cc: David Woodhouse <David.Woodhouse@intel.com> Signed-off-by: Yuri Tikhonov <yur@emcraft.com> Signed-off-by: Ilya Yanok <yanok@emcraft.com> Reviewed-by: Andre Noll <maan@systemlinux.org> Acked-by: Maciej Sosnowski <maciej.sosnowski@intel.com> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Diffstat (limited to 'include')
-rw-r--r--include/linux/async_tx.h9
-rw-r--r--include/linux/dmaengine.h87
2 files changed, 90 insertions, 6 deletions
diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h
index 12a2efcbd565..e6ce5f004f98 100644
--- a/include/linux/async_tx.h
+++ b/include/linux/async_tx.h
@@ -185,5 +185,14 @@ async_memset(struct page *dest, int val, unsigned int offset,
185 185
186struct dma_async_tx_descriptor *async_trigger_callback(struct async_submit_ctl *submit); 186struct dma_async_tx_descriptor *async_trigger_callback(struct async_submit_ctl *submit);
187 187
188struct dma_async_tx_descriptor *
189async_gen_syndrome(struct page **blocks, unsigned int offset, int src_cnt,
190 size_t len, struct async_submit_ctl *submit);
191
192struct dma_async_tx_descriptor *
193async_syndrome_val(struct page **blocks, unsigned int offset, int src_cnt,
194 size_t len, enum sum_check_flags *pqres, struct page *spare,
195 struct async_submit_ctl *submit);
196
188void async_tx_quiesce(struct dma_async_tx_descriptor **tx); 197void async_tx_quiesce(struct dma_async_tx_descriptor **tx);
189#endif /* _ASYNC_TX_H_ */ 198#endif /* _ASYNC_TX_H_ */
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 02447afcebad..ce010cd991d2 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -52,7 +52,7 @@ enum dma_status {
52enum dma_transaction_type { 52enum dma_transaction_type {
53 DMA_MEMCPY, 53 DMA_MEMCPY,
54 DMA_XOR, 54 DMA_XOR,
55 DMA_PQ_XOR, 55 DMA_PQ,
56 DMA_DUAL_XOR, 56 DMA_DUAL_XOR,
57 DMA_PQ_UPDATE, 57 DMA_PQ_UPDATE,
58 DMA_XOR_VAL, 58 DMA_XOR_VAL,
@@ -70,20 +70,28 @@ enum dma_transaction_type {
70 70
71/** 71/**
72 * enum dma_ctrl_flags - DMA flags to augment operation preparation, 72 * enum dma_ctrl_flags - DMA flags to augment operation preparation,
73 * control completion, and communicate status. 73 * control completion, and communicate status.
74 * @DMA_PREP_INTERRUPT - trigger an interrupt (callback) upon completion of 74 * @DMA_PREP_INTERRUPT - trigger an interrupt (callback) upon completion of
75 * this transaction 75 * this transaction
76 * @DMA_CTRL_ACK - the descriptor cannot be reused until the client 76 * @DMA_CTRL_ACK - the descriptor cannot be reused until the client
77 * acknowledges receipt, i.e. has has a chance to establish any 77 * acknowledges receipt, i.e. has has a chance to establish any dependency
78 * dependency chains 78 * chains
79 * @DMA_COMPL_SKIP_SRC_UNMAP - set to disable dma-unmapping the source buffer(s) 79 * @DMA_COMPL_SKIP_SRC_UNMAP - set to disable dma-unmapping the source buffer(s)
80 * @DMA_COMPL_SKIP_DEST_UNMAP - set to disable dma-unmapping the destination(s) 80 * @DMA_COMPL_SKIP_DEST_UNMAP - set to disable dma-unmapping the destination(s)
81 * @DMA_PREP_PQ_DISABLE_P - prevent generation of P while generating Q
82 * @DMA_PREP_PQ_DISABLE_Q - prevent generation of Q while generating P
83 * @DMA_PREP_CONTINUE - indicate to a driver that it is reusing buffers as
84 * sources that were the result of a previous operation, in the case of a PQ
85 * operation it continues the calculation with new sources
81 */ 86 */
82enum dma_ctrl_flags { 87enum dma_ctrl_flags {
83 DMA_PREP_INTERRUPT = (1 << 0), 88 DMA_PREP_INTERRUPT = (1 << 0),
84 DMA_CTRL_ACK = (1 << 1), 89 DMA_CTRL_ACK = (1 << 1),
85 DMA_COMPL_SKIP_SRC_UNMAP = (1 << 2), 90 DMA_COMPL_SKIP_SRC_UNMAP = (1 << 2),
86 DMA_COMPL_SKIP_DEST_UNMAP = (1 << 3), 91 DMA_COMPL_SKIP_DEST_UNMAP = (1 << 3),
92 DMA_PREP_PQ_DISABLE_P = (1 << 4),
93 DMA_PREP_PQ_DISABLE_Q = (1 << 5),
94 DMA_PREP_CONTINUE = (1 << 6),
87}; 95};
88 96
89/** 97/**
@@ -226,6 +234,7 @@ struct dma_async_tx_descriptor {
226 * @global_node: list_head for global dma_device_list 234 * @global_node: list_head for global dma_device_list
227 * @cap_mask: one or more dma_capability flags 235 * @cap_mask: one or more dma_capability flags
228 * @max_xor: maximum number of xor sources, 0 if no capability 236 * @max_xor: maximum number of xor sources, 0 if no capability
237 * @max_pq: maximum number of PQ sources and PQ-continue capability
229 * @dev_id: unique device ID 238 * @dev_id: unique device ID
230 * @dev: struct device reference for dma mapping api 239 * @dev: struct device reference for dma mapping api
231 * @device_alloc_chan_resources: allocate resources and return the 240 * @device_alloc_chan_resources: allocate resources and return the
@@ -234,6 +243,8 @@ struct dma_async_tx_descriptor {
234 * @device_prep_dma_memcpy: prepares a memcpy operation 243 * @device_prep_dma_memcpy: prepares a memcpy operation
235 * @device_prep_dma_xor: prepares a xor operation 244 * @device_prep_dma_xor: prepares a xor operation
236 * @device_prep_dma_xor_val: prepares a xor validation operation 245 * @device_prep_dma_xor_val: prepares a xor validation operation
246 * @device_prep_dma_pq: prepares a pq operation
247 * @device_prep_dma_pq_val: prepares a pqzero_sum operation
237 * @device_prep_dma_memset: prepares a memset operation 248 * @device_prep_dma_memset: prepares a memset operation
238 * @device_prep_dma_interrupt: prepares an end of chain interrupt operation 249 * @device_prep_dma_interrupt: prepares an end of chain interrupt operation
239 * @device_prep_slave_sg: prepares a slave dma operation 250 * @device_prep_slave_sg: prepares a slave dma operation
@@ -248,7 +259,9 @@ struct dma_device {
248 struct list_head channels; 259 struct list_head channels;
249 struct list_head global_node; 260 struct list_head global_node;
250 dma_cap_mask_t cap_mask; 261 dma_cap_mask_t cap_mask;
251 int max_xor; 262 unsigned short max_xor;
263 unsigned short max_pq;
264 #define DMA_HAS_PQ_CONTINUE (1 << 15)
252 265
253 int dev_id; 266 int dev_id;
254 struct device *dev; 267 struct device *dev;
@@ -265,6 +278,14 @@ struct dma_device {
265 struct dma_async_tx_descriptor *(*device_prep_dma_xor_val)( 278 struct dma_async_tx_descriptor *(*device_prep_dma_xor_val)(
266 struct dma_chan *chan, dma_addr_t *src, unsigned int src_cnt, 279 struct dma_chan *chan, dma_addr_t *src, unsigned int src_cnt,
267 size_t len, enum sum_check_flags *result, unsigned long flags); 280 size_t len, enum sum_check_flags *result, unsigned long flags);
281 struct dma_async_tx_descriptor *(*device_prep_dma_pq)(
282 struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
283 unsigned int src_cnt, const unsigned char *scf,
284 size_t len, unsigned long flags);
285 struct dma_async_tx_descriptor *(*device_prep_dma_pq_val)(
286 struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
287 unsigned int src_cnt, const unsigned char *scf, size_t len,
288 enum sum_check_flags *pqres, unsigned long flags);
268 struct dma_async_tx_descriptor *(*device_prep_dma_memset)( 289 struct dma_async_tx_descriptor *(*device_prep_dma_memset)(
269 struct dma_chan *chan, dma_addr_t dest, int value, size_t len, 290 struct dma_chan *chan, dma_addr_t dest, int value, size_t len,
270 unsigned long flags); 291 unsigned long flags);
@@ -283,6 +304,60 @@ struct dma_device {
283 void (*device_issue_pending)(struct dma_chan *chan); 304 void (*device_issue_pending)(struct dma_chan *chan);
284}; 305};
285 306
307static inline void
308dma_set_maxpq(struct dma_device *dma, int maxpq, int has_pq_continue)
309{
310 dma->max_pq = maxpq;
311 if (has_pq_continue)
312 dma->max_pq |= DMA_HAS_PQ_CONTINUE;
313}
314
315static inline bool dmaf_continue(enum dma_ctrl_flags flags)
316{
317 return (flags & DMA_PREP_CONTINUE) == DMA_PREP_CONTINUE;
318}
319
320static inline bool dmaf_p_disabled_continue(enum dma_ctrl_flags flags)
321{
322 enum dma_ctrl_flags mask = DMA_PREP_CONTINUE | DMA_PREP_PQ_DISABLE_P;
323
324 return (flags & mask) == mask;
325}
326
327static inline bool dma_dev_has_pq_continue(struct dma_device *dma)
328{
329 return (dma->max_pq & DMA_HAS_PQ_CONTINUE) == DMA_HAS_PQ_CONTINUE;
330}
331
332static unsigned short dma_dev_to_maxpq(struct dma_device *dma)
333{
334 return dma->max_pq & ~DMA_HAS_PQ_CONTINUE;
335}
336
337/* dma_maxpq - reduce maxpq in the face of continued operations
338 * @dma - dma device with PQ capability
339 * @flags - to check if DMA_PREP_CONTINUE and DMA_PREP_PQ_DISABLE_P are set
340 *
341 * When an engine does not support native continuation we need 3 extra
342 * source slots to reuse P and Q with the following coefficients:
343 * 1/ {00} * P : remove P from Q', but use it as a source for P'
344 * 2/ {01} * Q : use Q to continue Q' calculation
345 * 3/ {00} * Q : subtract Q from P' to cancel (2)
346 *
347 * In the case where P is disabled we only need 1 extra source:
348 * 1/ {01} * Q : use Q to continue Q' calculation
349 */
350static inline int dma_maxpq(struct dma_device *dma, enum dma_ctrl_flags flags)
351{
352 if (dma_dev_has_pq_continue(dma) || !dmaf_continue(flags))
353 return dma_dev_to_maxpq(dma);
354 else if (dmaf_p_disabled_continue(flags))
355 return dma_dev_to_maxpq(dma) - 1;
356 else if (dmaf_continue(flags))
357 return dma_dev_to_maxpq(dma) - 3;
358 BUG();
359}
360
286/* --- public DMA engine API --- */ 361/* --- public DMA engine API --- */
287 362
288#ifdef CONFIG_DMA_ENGINE 363#ifdef CONFIG_DMA_ENGINE