async_tx: add support for asynchronous GF multiplication

[ Based on an original patch by Yuri Tikhonov ] This adds support for doing asynchronous GF multiplication by adding two additional functions to the async_tx API: async_gen_syndrome() does simultaneous XOR and Galois field multiplication of sources. async_syndrome_val() validates the given source buffers against known P and Q values. When a request is made to run async_pq against more than the hardware maximum number of supported sources we need to reuse the previous generated P and Q values as sources into the next operation. Care must be taken to remove Q from P' and P from Q'. For example to perform a 5 source pq op with hardware that only supports 4 sources at a time the following approach is taken: p, q = PQ(src0, src1, src2, src3, COEF({01}, {02}, {04}, {08})) p', q' = PQ(p, q, q, src4, COEF({00}, {01}, {00}, {10})) p' = p + q + q + src4 = p + src4 q' = {00}*p + {01}*q + {00}*q + {10}*src4 = q + {10}*src4 Note: 4 is the minimum acceptable maxpq otherwise we punt to synchronous-software path. The DMA_PREP_CONTINUE flag indicates to the driver to reuse p and q as sources (in the above manner) and fill the remaining slots up to maxpq with the new sources/coefficients. Note1: Some devices have native support for P+Q continuation and can skip this extra work. Devices with this capability can advertise it with dma_set_maxpq. It is up to each driver how to handle the DMA_PREP_CONTINUE flag. Note2: The api supports disabling the generation of P when generating Q, this is ignored by the synchronous path but is implemented by some dma devices to save unnecessary writes. In this case the continuation algorithm is simplified to only reuse Q as a source. Cc: H. Peter Anvin <hpa@zytor.com> Cc: David Woodhouse <David.Woodhouse@intel.com> Signed-off-by: Yuri Tikhonov <yur@emcraft.com> Signed-off-by: Ilya Yanok <yanok@emcraft.com> Reviewed-by: Andre Noll <maan@systemlinux.org> Acked-by: Maciej Sosnowski <maciej.sosnowski@intel.com> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
author: Dan Williams <dan.j.williams@intel.com> 2009-07-14 15:20:36 -0400
committer: Dan Williams <dan.j.williams@intel.com> 2009-08-29 22:09:27 -0400
commit: b2f46fd8ef3dff2ab30f31126833f78b7480283a (patch)
tree: 9f111e3e313b4d142c12d2d8156a2704a36904f8 /include
parent: 95475e57113c66aac7583925736ed2e2d58c990d (diff)
2 files changed, 90 insertions, 6 deletions
diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h
index 12a2efcbd565..e6ce5f004f98 100644
--- a/include/linux/async_tx.h
+++ b/include/linux/async_tx.h
@@ -185,5 +185,14 @@ async_memset(struct page *dest, int val, unsigned int offset,
 struct dma_async_tx_descriptor *async_trigger_callback(struct async_submit_ctl *submit);
+struct dma_async_tx_descriptor *
+async_gen_syndrome(struct page **blocks, unsigned int offset, int src_cnt,
+                   size_t len, struct async_submit_ctl *submit);
+struct dma_async_tx_descriptor *
+async_syndrome_val(struct page **blocks, unsigned int offset, int src_cnt,
+                   size_t len, enum sum_check_flags *pqres, struct page *spare,
+                   struct async_submit_ctl *submit);
 void async_tx_quiesce(struct dma_async_tx_descriptor **tx);
 #endif /* _ASYNC_TX_H_ */
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 02447afcebad..ce010cd991d2 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -52,7 +52,7 @@ enum dma_status {
 enum dma_transaction_type {
        DMA_MEMCPY,
        DMA_XOR,
-        DMA_PQ_XOR,
+        DMA_PQ,
        DMA_DUAL_XOR,
        DMA_PQ_UPDATE,
        DMA_XOR_VAL,
@@ -70,20 +70,28 @@ enum dma_transaction_type {
 /**
 * enum dma_ctrl_flags - DMA flags to augment operation preparation,
- *      control completion, and communicate status.
+ *  control completion, and communicate status.
 * @DMA_PREP_INTERRUPT - trigger an interrupt (callback) upon completion of
- *      this transaction
+ *  this transaction
 * @DMA_CTRL_ACK - the descriptor cannot be reused until the client
- *      acknowledges receipt, i.e. has has a chance to establish any
+ *  acknowledges receipt, i.e. has has a chance to establish any dependency
- *      dependency chains
+ *  chains
 * @DMA_COMPL_SKIP_SRC_UNMAP - set to disable dma-unmapping the source buffer(s)
 * @DMA_COMPL_SKIP_DEST_UNMAP - set to disable dma-unmapping the destination(s)
+ * @DMA_PREP_PQ_DISABLE_P - prevent generation of P while generating Q
+ * @DMA_PREP_PQ_DISABLE_Q - prevent generation of Q while generating P
+ * @DMA_PREP_CONTINUE - indicate to a driver that it is reusing buffers as
+ *  sources that were the result of a previous operation, in the case of a PQ
+ *  operation it continues the calculation with new sources
 */
 enum dma_ctrl_flags {
        DMA_PREP_INTERRUPT = (1 << 0),
        DMA_CTRL_ACK = (1 << 1),
        DMA_COMPL_SKIP_SRC_UNMAP = (1 << 2),
        DMA_COMPL_SKIP_DEST_UNMAP = (1 << 3),
+        DMA_PREP_PQ_DISABLE_P = (1 << 4),
+        DMA_PREP_PQ_DISABLE_Q = (1 << 5),
+        DMA_PREP_CONTINUE = (1 << 6),
 };
 /**
@@ -226,6 +234,7 @@ struct dma_async_tx_descriptor {
 * @global_node: list_head for global dma_device_list
 * @cap_mask: one or more dma_capability flags
 * @max_xor: maximum number of xor sources, 0 if no capability
+ * @max_pq: maximum number of PQ sources and PQ-continue capability
 * @dev_id: unique device ID
 * @dev: struct device reference for dma mapping api
 * @device_alloc_chan_resources: allocate resources and return the
@@ -234,6 +243,8 @@ struct dma_async_tx_descriptor {
 * @device_prep_dma_memcpy: prepares a memcpy operation
 * @device_prep_dma_xor: prepares a xor operation
 * @device_prep_dma_xor_val: prepares a xor validation operation
+ * @device_prep_dma_pq: prepares a pq operation
+ * @device_prep_dma_pq_val: prepares a pqzero_sum operation
 * @device_prep_dma_memset: prepares a memset operation
 * @device_prep_dma_interrupt: prepares an end of chain interrupt operation
 * @device_prep_slave_sg: prepares a slave dma operation
@@ -248,7 +259,9 @@ struct dma_device {
        struct list_head channels;
        struct list_head global_node;
        dma_cap_mask_t  cap_mask;
-        int max_xor;
+        unsigned short max_xor;
+        unsigned short max_pq;
+        #define DMA_HAS_PQ_CONTINUE (1 << 15)
        int dev_id;
        struct device *dev;
@@ -265,6 +278,14 @@ struct dma_device {
        struct dma_async_tx_descriptor *(*device_prep_dma_xor_val)(
                struct dma_chan *chan, dma_addr_t *src, unsigned int src_cnt,
                size_t len, enum sum_check_flags *result, unsigned long flags);
+        struct dma_async_tx_descriptor *(*device_prep_dma_pq)(
+                struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
+                unsigned int src_cnt, const unsigned char *scf,
+                size_t len, unsigned long flags);
+        struct dma_async_tx_descriptor *(*device_prep_dma_pq_val)(
+                struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
+                unsigned int src_cnt, const unsigned char *scf, size_t len,
+                enum sum_check_flags *pqres, unsigned long flags);
        struct dma_async_tx_descriptor *(*device_prep_dma_memset)(
                struct dma_chan *chan, dma_addr_t dest, int value, size_t len,
                unsigned long flags);
@@ -283,6 +304,60 @@ struct dma_device {
        void (*device_issue_pending)(struct dma_chan *chan);
 };
+static inline void
+dma_set_maxpq(struct dma_device *dma, int maxpq, int has_pq_continue)
+{
+        dma->max_pq = maxpq;
+        if (has_pq_continue)
+                dma->max_pq |= DMA_HAS_PQ_CONTINUE;
+}
+static inline bool dmaf_continue(enum dma_ctrl_flags flags)
+{
+        return (flags & DMA_PREP_CONTINUE) == DMA_PREP_CONTINUE;
+}
+static inline bool dmaf_p_disabled_continue(enum dma_ctrl_flags flags)
+{
+        enum dma_ctrl_flags mask = DMA_PREP_CONTINUE | DMA_PREP_PQ_DISABLE_P;
+        return (flags & mask) == mask;
+}
+static inline bool dma_dev_has_pq_continue(struct dma_device *dma)
+{
+        return (dma->max_pq & DMA_HAS_PQ_CONTINUE) == DMA_HAS_PQ_CONTINUE;
+}
+static unsigned short dma_dev_to_maxpq(struct dma_device *dma)
+{
+        return dma->max_pq & ~DMA_HAS_PQ_CONTINUE;
+}
+/* dma_maxpq - reduce maxpq in the face of continued operations
+ * @dma - dma device with PQ capability
+ * @flags - to check if DMA_PREP_CONTINUE and DMA_PREP_PQ_DISABLE_P are set
+ *
+ * When an engine does not support native continuation we need 3 extra
+ * source slots to reuse P and Q with the following coefficients:
+ * 1/ {00} * P : remove P from Q', but use it as a source for P'
+ * 2/ {01} * Q : use Q to continue Q' calculation
+ * 3/ {00} * Q : subtract Q from P' to cancel (2)
+ *
+ * In the case where P is disabled we only need 1 extra source:
+ * 1/ {01} * Q : use Q to continue Q' calculation
+ */
+static inline int dma_maxpq(struct dma_device *dma, enum dma_ctrl_flags flags)
+{
+        if (dma_dev_has_pq_continue(dma) || !dmaf_continue(flags))
+                return dma_dev_to_maxpq(dma);
+        else if (dmaf_p_disabled_continue(flags))
+                return dma_dev_to_maxpq(dma) - 1;
+        else if (dmaf_continue(flags))
+                return dma_dev_to_maxpq(dma) - 3;
+        BUG();
+}
 /* --- public DMA engine API --- */
 #ifdef CONFIG_DMA_ENGINE
author	Dan Williams <dan.j.williams@intel.com>	2009-07-14 15:20:36 -0400
committer	Dan Williams <dan.j.williams@intel.com>	2009-08-29 22:09:27 -0400
commit	b2f46fd8ef3dff2ab30f31126833f78b7480283a (patch)
tree	9f111e3e313b4d142c12d2d8156a2704a36904f8 /include
parent	95475e57113c66aac7583925736ed2e2d58c990d (diff)

diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h index 12a2efcbd565..e6ce5f004f98 100644 --- a/include/linux/async_tx.h +++ b/include/linux/async_tx.h
@@ -185,5 +185,14 @@ async_memset(struct page *dest, int val, unsigned int offset,
185		185
186	struct dma_async_tx_descriptor async_trigger_callback(struct async_submit_ctl submit);	186	struct dma_async_tx_descriptor async_trigger_callback(struct async_submit_ctl submit);
187		187
		188	struct dma_async_tx_descriptor *
		189	async_gen_syndrome(struct page **blocks, unsigned int offset, int src_cnt,
		190	size_t len, struct async_submit_ctl *submit);
		191
		192	struct dma_async_tx_descriptor *
		193	async_syndrome_val(struct page **blocks, unsigned int offset, int src_cnt,
		194	size_t len, enum sum_check_flags pqres, struct page spare,
		195	struct async_submit_ctl *submit);
		196
188	void async_tx_quiesce(struct dma_async_tx_descriptor **tx);	197	void async_tx_quiesce(struct dma_async_tx_descriptor **tx);
189	#endif /* _ASYNC_TX_H_ */	198	#endif /* _ASYNC_TX_H_ */


diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 02447afcebad..ce010cd991d2 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h
@@ -52,7 +52,7 @@ enum dma_status {
52	enum dma_transaction_type {	52	enum dma_transaction_type {
53	DMA_MEMCPY,	53	DMA_MEMCPY,
54	DMA_XOR,	54	DMA_XOR,
55	DMA_PQ_XOR,	55	DMA_PQ,
56	DMA_DUAL_XOR,	56	DMA_DUAL_XOR,
57	DMA_PQ_UPDATE,	57	DMA_PQ_UPDATE,
58	DMA_XOR_VAL,	58	DMA_XOR_VAL,
@@ -70,20 +70,28 @@ enum dma_transaction_type {
70		70
71	/**	71	/**
72	* enum dma_ctrl_flags - DMA flags to augment operation preparation,	72	* enum dma_ctrl_flags - DMA flags to augment operation preparation,
73	* control completion, and communicate status.	73	* control completion, and communicate status.
74	* @DMA_PREP_INTERRUPT - trigger an interrupt (callback) upon completion of	74	* @DMA_PREP_INTERRUPT - trigger an interrupt (callback) upon completion of
75	* this transaction	75	* this transaction
76	* @DMA_CTRL_ACK - the descriptor cannot be reused until the client	76	* @DMA_CTRL_ACK - the descriptor cannot be reused until the client
77	* acknowledges receipt, i.e. has has a chance to establish any	77	* acknowledges receipt, i.e. has has a chance to establish any dependency
78	* dependency chains	78	* chains
79	* @DMA_COMPL_SKIP_SRC_UNMAP - set to disable dma-unmapping the source buffer(s)	79	* @DMA_COMPL_SKIP_SRC_UNMAP - set to disable dma-unmapping the source buffer(s)
80	* @DMA_COMPL_SKIP_DEST_UNMAP - set to disable dma-unmapping the destination(s)	80	* @DMA_COMPL_SKIP_DEST_UNMAP - set to disable dma-unmapping the destination(s)
		81	* @DMA_PREP_PQ_DISABLE_P - prevent generation of P while generating Q
		82	* @DMA_PREP_PQ_DISABLE_Q - prevent generation of Q while generating P
		83	* @DMA_PREP_CONTINUE - indicate to a driver that it is reusing buffers as
		84	* sources that were the result of a previous operation, in the case of a PQ
		85	* operation it continues the calculation with new sources
81	*/	86	*/
82	enum dma_ctrl_flags {	87	enum dma_ctrl_flags {
83	DMA_PREP_INTERRUPT = (1 << 0),	88	DMA_PREP_INTERRUPT = (1 << 0),
84	DMA_CTRL_ACK = (1 << 1),	89	DMA_CTRL_ACK = (1 << 1),
85	DMA_COMPL_SKIP_SRC_UNMAP = (1 << 2),	90	DMA_COMPL_SKIP_SRC_UNMAP = (1 << 2),
86	DMA_COMPL_SKIP_DEST_UNMAP = (1 << 3),	91	DMA_COMPL_SKIP_DEST_UNMAP = (1 << 3),
		92	DMA_PREP_PQ_DISABLE_P = (1 << 4),
		93	DMA_PREP_PQ_DISABLE_Q = (1 << 5),
		94	DMA_PREP_CONTINUE = (1 << 6),
87	};	95	};
88		96
89	/**	97	/**
@@ -226,6 +234,7 @@ struct dma_async_tx_descriptor {
226	* @global_node: list_head for global dma_device_list	234	* @global_node: list_head for global dma_device_list
227	* @cap_mask: one or more dma_capability flags	235	* @cap_mask: one or more dma_capability flags
228	* @max_xor: maximum number of xor sources, 0 if no capability	236	* @max_xor: maximum number of xor sources, 0 if no capability
		237	* @max_pq: maximum number of PQ sources and PQ-continue capability
229	* @dev_id: unique device ID	238	* @dev_id: unique device ID
230	* @dev: struct device reference for dma mapping api	239	* @dev: struct device reference for dma mapping api
231	* @device_alloc_chan_resources: allocate resources and return the	240	* @device_alloc_chan_resources: allocate resources and return the
@@ -234,6 +243,8 @@ struct dma_async_tx_descriptor {
234	* @device_prep_dma_memcpy: prepares a memcpy operation	243	* @device_prep_dma_memcpy: prepares a memcpy operation
235	* @device_prep_dma_xor: prepares a xor operation	244	* @device_prep_dma_xor: prepares a xor operation
236	* @device_prep_dma_xor_val: prepares a xor validation operation	245	* @device_prep_dma_xor_val: prepares a xor validation operation
		246	* @device_prep_dma_pq: prepares a pq operation
		247	* @device_prep_dma_pq_val: prepares a pqzero_sum operation
237	* @device_prep_dma_memset: prepares a memset operation	248	* @device_prep_dma_memset: prepares a memset operation
238	* @device_prep_dma_interrupt: prepares an end of chain interrupt operation	249	* @device_prep_dma_interrupt: prepares an end of chain interrupt operation
239	* @device_prep_slave_sg: prepares a slave dma operation	250	* @device_prep_slave_sg: prepares a slave dma operation
@@ -248,7 +259,9 @@ struct dma_device {
248	struct list_head channels;	259	struct list_head channels;
249	struct list_head global_node;	260	struct list_head global_node;
250	dma_cap_mask_t cap_mask;	261	dma_cap_mask_t cap_mask;
251	int max_xor;	262	unsigned short max_xor;
		263	unsigned short max_pq;
		264	#define DMA_HAS_PQ_CONTINUE (1 << 15)
252		265
253	int dev_id;	266	int dev_id;
254	struct device *dev;	267	struct device *dev;
@@ -265,6 +278,14 @@ struct dma_device {
265	struct dma_async_tx_descriptor (device_prep_dma_xor_val)(	278	struct dma_async_tx_descriptor (device_prep_dma_xor_val)(
266	struct dma_chan chan, dma_addr_t src, unsigned int src_cnt,	279	struct dma_chan chan, dma_addr_t src, unsigned int src_cnt,
267	size_t len, enum sum_check_flags *result, unsigned long flags);	280	size_t len, enum sum_check_flags *result, unsigned long flags);
		281	struct dma_async_tx_descriptor (device_prep_dma_pq)(
		282	struct dma_chan chan, dma_addr_t dst, dma_addr_t *src,
		283	unsigned int src_cnt, const unsigned char *scf,
		284	size_t len, unsigned long flags);
		285	struct dma_async_tx_descriptor (device_prep_dma_pq_val)(
		286	struct dma_chan chan, dma_addr_t pq, dma_addr_t *src,
		287	unsigned int src_cnt, const unsigned char *scf, size_t len,
		288	enum sum_check_flags *pqres, unsigned long flags);
268	struct dma_async_tx_descriptor (device_prep_dma_memset)(	289	struct dma_async_tx_descriptor (device_prep_dma_memset)(
269	struct dma_chan *chan, dma_addr_t dest, int value, size_t len,	290	struct dma_chan *chan, dma_addr_t dest, int value, size_t len,
270	unsigned long flags);	291	unsigned long flags);
@@ -283,6 +304,60 @@ struct dma_device {
283	void (device_issue_pending)(struct dma_chan chan);	304	void (device_issue_pending)(struct dma_chan chan);
284	};	305	};
285		306
		307	static inline void
		308	dma_set_maxpq(struct dma_device *dma, int maxpq, int has_pq_continue)
		309	{
		310	dma->max_pq = maxpq;
		311	if (has_pq_continue)
		312	dma->max_pq \|= DMA_HAS_PQ_CONTINUE;
		313	}
		314
		315	static inline bool dmaf_continue(enum dma_ctrl_flags flags)
		316	{
		317	return (flags & DMA_PREP_CONTINUE) == DMA_PREP_CONTINUE;
		318	}
		319
		320	static inline bool dmaf_p_disabled_continue(enum dma_ctrl_flags flags)
		321	{
		322	enum dma_ctrl_flags mask = DMA_PREP_CONTINUE \| DMA_PREP_PQ_DISABLE_P;
		323
		324	return (flags & mask) == mask;
		325	}
		326
		327	static inline bool dma_dev_has_pq_continue(struct dma_device *dma)
		328	{
		329	return (dma->max_pq & DMA_HAS_PQ_CONTINUE) == DMA_HAS_PQ_CONTINUE;
		330	}
		331
		332	static unsigned short dma_dev_to_maxpq(struct dma_device *dma)
		333	{
		334	return dma->max_pq & ~DMA_HAS_PQ_CONTINUE;
		335	}
		336
		337	/* dma_maxpq - reduce maxpq in the face of continued operations
		338	* @dma - dma device with PQ capability
		339	* @flags - to check if DMA_PREP_CONTINUE and DMA_PREP_PQ_DISABLE_P are set
		340	*
		341	* When an engine does not support native continuation we need 3 extra
		342	* source slots to reuse P and Q with the following coefficients:
		343	* 1/ {00} * P : remove P from Q', but use it as a source for P'
		344	* 2/ {01} * Q : use Q to continue Q' calculation
		345	* 3/ {00} * Q : subtract Q from P' to cancel (2)
		346	*
		347	* In the case where P is disabled we only need 1 extra source:
		348	* 1/ {01} * Q : use Q to continue Q' calculation
		349	*/
		350	static inline int dma_maxpq(struct dma_device *dma, enum dma_ctrl_flags flags)
		351	{
		352	if (dma_dev_has_pq_continue(dma) \|\| !dmaf_continue(flags))
		353	return dma_dev_to_maxpq(dma);
		354	else if (dmaf_p_disabled_continue(flags))
		355	return dma_dev_to_maxpq(dma) - 1;
		356	else if (dmaf_continue(flags))
		357	return dma_dev_to_maxpq(dma) - 3;
		358	BUG();
		359	}
		360
286	/* --- public DMA engine API --- */	361	/* --- public DMA engine API --- */
287		362
288	#ifdef CONFIG_DMA_ENGINE	363	#ifdef CONFIG_DMA_ENGINE