From 6b45e0f24c7b5316dea7ed5995e1b809a3266b43 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Date: Wed, 1 Feb 2017 12:48:42 -0700
Subject: fpga zynq: Check for errors after completing DMA

The completion did not check the interrupt status to see if any error
bits were asserted, check error bits and dump some registers if things
went wrong.

A few fixes are needed to make this work, the IXR_ERROR_FLAGS_MASK was
wrong, it included the done bits, which shows a bug in mask/unmask_irqs
which were using the wrong bits, simplify all of this stuff.

Signed-off-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Reviewed-by: Moritz Fischer <moritz.fischer@ettus.com>
Acked-by: Alan Tull <atull@opensource.altera.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/fpga/zynq-fpga.c | 54 ++++++++++++++++++++++++++++--------------------
 1 file changed, 32 insertions(+), 22 deletions(-)

(limited to 'drivers/fpga')

diff --git a/drivers/fpga/zynq-fpga.c b/drivers/fpga/zynq-fpga.c
index 1812bf7614e1..f674e32832ec 100644
--- a/drivers/fpga/zynq-fpga.c
+++ b/drivers/fpga/zynq-fpga.c
@@ -89,7 +89,7 @@
 #define IXR_D_P_DONE_MASK		BIT(12)
  /* FPGA programmed */
 #define IXR_PCFG_DONE_MASK		BIT(2)
-#define IXR_ERROR_FLAGS_MASK		0x00F0F860
+#define IXR_ERROR_FLAGS_MASK		0x00F0C860
 #define IXR_ALL_MASK			0xF8F7F87F
 
 /* Miscellaneous constant values */
@@ -143,23 +143,10 @@ static inline u32 zynq_fpga_read(const struct zynq_fpga_priv *priv,
 	readl_poll_timeout(priv->io_base + addr, val, cond, sleep_us, \
 			   timeout_us)
 
-static void zynq_fpga_mask_irqs(struct zynq_fpga_priv *priv)
+/* Cause the specified irq mask bits to generate IRQs */
+static inline void zynq_fpga_set_irq(struct zynq_fpga_priv *priv, u32 enable)
 {
-	u32 intr_mask;
-
-	intr_mask = zynq_fpga_read(priv, INT_MASK_OFFSET);
-	zynq_fpga_write(priv, INT_MASK_OFFSET,
-			intr_mask | IXR_DMA_DONE_MASK | IXR_ERROR_FLAGS_MASK);
-}
-
-static void zynq_fpga_unmask_irqs(struct zynq_fpga_priv *priv)
-{
-	u32 intr_mask;
-
-	intr_mask = zynq_fpga_read(priv, INT_MASK_OFFSET);
-	zynq_fpga_write(priv, INT_MASK_OFFSET,
-			intr_mask
-			& ~(IXR_D_P_DONE_MASK | IXR_ERROR_FLAGS_MASK));
+	zynq_fpga_write(priv, INT_MASK_OFFSET, ~enable);
 }
 
 static irqreturn_t zynq_fpga_isr(int irq, void *data)
@@ -167,7 +154,7 @@ static irqreturn_t zynq_fpga_isr(int irq, void *data)
 	struct zynq_fpga_priv *priv = data;
 
 	/* disable DMA and error IRQs */
-	zynq_fpga_mask_irqs(priv);
+	zynq_fpga_set_irq(priv, 0);
 
 	complete(&priv->dma_done);
 
@@ -285,6 +272,7 @@ static int zynq_fpga_ops_write(struct fpga_manager *mgr,
 			       const char *buf, size_t count)
 {
 	struct zynq_fpga_priv *priv;
+	const char *why;
 	int err;
 	char *kbuf;
 	size_t in_count;
@@ -312,7 +300,7 @@ static int zynq_fpga_ops_write(struct fpga_manager *mgr,
 	reinit_completion(&priv->dma_done);
 
 	/* enable DMA and error IRQs */
-	zynq_fpga_unmask_irqs(priv);
+	zynq_fpga_set_irq(priv, IXR_D_P_DONE_MASK | IXR_ERROR_FLAGS_MASK);
 
 	/* the +1 in the src addr is used to hold off on DMA_DONE IRQ
 	 * until both AXI and PCAP are done ...
@@ -331,11 +319,33 @@ static int zynq_fpga_ops_write(struct fpga_manager *mgr,
 	intr_status = zynq_fpga_read(priv, INT_STS_OFFSET);
 	zynq_fpga_write(priv, INT_STS_OFFSET, intr_status);
 
+	if (intr_status & IXR_ERROR_FLAGS_MASK) {
+		why = "DMA reported error";
+		err = -EIO;
+		goto out_report;
+	}
+
 	if (!((intr_status & IXR_D_P_DONE_MASK) == IXR_D_P_DONE_MASK)) {
-		dev_err(&mgr->dev, "Error configuring FPGA\n");
-		err = -EFAULT;
+		why = "DMA did not complete";
+		err = -EIO;
+		goto out_report;
 	}
 
+	err = 0;
+	goto out_clk;
+
+out_report:
+	dev_err(&mgr->dev,
+		"%s: INT_STS:0x%x CTRL:0x%x LOCK:0x%x INT_MASK:0x%x STATUS:0x%x MCTRL:0x%x\n",
+		why,
+		intr_status,
+		zynq_fpga_read(priv, CTRL_OFFSET),
+		zynq_fpga_read(priv, LOCK_OFFSET),
+		zynq_fpga_read(priv, INT_MASK_OFFSET),
+		zynq_fpga_read(priv, STATUS_OFFSET),
+		zynq_fpga_read(priv, MCTRL_OFFSET));
+
+out_clk:
 	clk_disable(priv->clk);
 
 out_free:
@@ -452,7 +462,7 @@ static int zynq_fpga_probe(struct platform_device *pdev)
 	/* unlock the device */
 	zynq_fpga_write(priv, UNLOCK_OFFSET, UNLOCK_MASK);
 
-	zynq_fpga_write(priv, INT_MASK_OFFSET, 0xFFFFFFFF);
+	zynq_fpga_set_irq(priv, 0);
 	zynq_fpga_write(priv, INT_STS_OFFSET, IXR_ALL_MASK);
 	err = devm_request_irq(dev, priv->irq, zynq_fpga_isr, 0, dev_name(dev),
 			       priv);
-- 
cgit v1.2.2


From b496df86ac1bbe393a55ddbfed35d46e74ef9767 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Date: Wed, 1 Feb 2017 12:48:43 -0700
Subject: fpga zynq: Check the bitstream for validity

There is no sense in sending a bitstream we know will not work, and
with the variety of options for bitstream generation in Xilinx tools
it is not terribly clear what the correct input should be.

This is particularly important for Zynq since auto-correction was
removed from the driver and the Zynq hardware only accepts a bitstream
format that is different from what the Xilinx tools typically produce.

Worse, the hardware provides no indication why the bitstream fails,
it simply times out if the input is wrong.

The best option here is to have the kernel print a message informing
the user they are using a malformed bistream and programming failure
isn't for any of the myriad of other reasons.

Signed-off-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Acked-by: Moritz Fischer <moritz.fischer@ettus.com>
Acked-by: Alan Tull <atull@opensource.altera.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/fpga/zynq-fpga.c | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

(limited to 'drivers/fpga')

diff --git a/drivers/fpga/zynq-fpga.c b/drivers/fpga/zynq-fpga.c
index f674e32832ec..c3fc2a231e28 100644
--- a/drivers/fpga/zynq-fpga.c
+++ b/drivers/fpga/zynq-fpga.c
@@ -161,6 +161,19 @@ static irqreturn_t zynq_fpga_isr(int irq, void *data)
 	return IRQ_HANDLED;
 }
 
+/* Sanity check the proposed bitstream. It must start with the sync word in
+ * the correct byte order, and be dword aligned. The input is a Xilinx .bin
+ * file with every 32 bit quantity swapped.
+ */
+static bool zynq_fpga_has_sync(const u8 *buf, size_t count)
+{
+	for (; count >= 4; buf += 4, count -= 4)
+		if (buf[0] == 0x66 && buf[1] == 0x55 && buf[2] == 0x99 &&
+		    buf[3] == 0xaa)
+			return true;
+	return false;
+}
+
 static int zynq_fpga_ops_write_init(struct fpga_manager *mgr,
 				    struct fpga_image_info *info,
 				    const char *buf, size_t count)
@@ -177,6 +190,13 @@ static int zynq_fpga_ops_write_init(struct fpga_manager *mgr,
 
 	/* don't globally reset PL if we're doing partial reconfig */
 	if (!(info->flags & FPGA_MGR_PARTIAL_RECONFIG)) {
+		if (!zynq_fpga_has_sync(buf, count)) {
+			dev_err(&mgr->dev,
+				"Invalid bitstream, could not find a sync word. Bitstream must be a byte swapped .bin file\n");
+			err = -EINVAL;
+			goto out_err;
+		}
+
 		/* assert AXI interface resets */
 		regmap_write(priv->slcr, SLCR_FPGA_RST_CTRL_OFFSET,
 			     FPGA_RST_ALL_MASK);
@@ -410,6 +430,7 @@ static enum fpga_mgr_states zynq_fpga_ops_state(struct fpga_manager *mgr)
 }
 
 static const struct fpga_manager_ops zynq_fpga_ops = {
+	.initial_header_size = 128,
 	.state = zynq_fpga_ops_state,
 	.write_init = zynq_fpga_ops_write_init,
 	.write = zynq_fpga_ops_write,
-- 
cgit v1.2.2


From baa6d396635129d8a67793e884f3b2182c7354b3 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Date: Wed, 1 Feb 2017 12:48:44 -0700
Subject: fpga: Add scatterlist based programming

Requiring contiguous kernel memory is not a good idea, this is a limited
resource and allocation can fail under normal work loads.

This introduces a .write_sg op that supporting drivers can provide
to DMA directly from dis-contiguous memory and a new entry point
fpga_mgr_buf_load_sg that users can call to directly provide page
lists.

The full matrix of compatibility is provided, either the linear or sg
interface can be used by the user with a driver supporting either
interface.

A notable change for drivers is that the .write op can now be called
multiple times.

Signed-off-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Acked-by: Alan Tull <atull@opensource.altera.com>
Acked-by: Moritz Fischer <moritz.fischer@ettus.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/fpga/fpga-mgr.c | 236 ++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 206 insertions(+), 30 deletions(-)

(limited to 'drivers/fpga')

diff --git a/drivers/fpga/fpga-mgr.c b/drivers/fpga/fpga-mgr.c
index f0a69d3e60a5..86d2cb203533 100644
--- a/drivers/fpga/fpga-mgr.c
+++ b/drivers/fpga/fpga-mgr.c
@@ -25,16 +25,106 @@
 #include <linux/of.h>
 #include <linux/mutex.h>
 #include <linux/slab.h>
+#include <linux/scatterlist.h>
+#include <linux/highmem.h>
 
 static DEFINE_IDA(fpga_mgr_ida);
 static struct class *fpga_mgr_class;
 
+/*
+ * Call the low level driver's write_init function.  This will do the
+ * device-specific things to get the FPGA into the state where it is ready to
+ * receive an FPGA image. The low level driver only gets to see the first
+ * initial_header_size bytes in the buffer.
+ */
+static int fpga_mgr_write_init_buf(struct fpga_manager *mgr,
+				   struct fpga_image_info *info,
+				   const char *buf, size_t count)
+{
+	int ret;
+
+	mgr->state = FPGA_MGR_STATE_WRITE_INIT;
+	if (!mgr->mops->initial_header_size)
+		ret = mgr->mops->write_init(mgr, info, NULL, 0);
+	else
+		ret = mgr->mops->write_init(
+		    mgr, info, buf, min(mgr->mops->initial_header_size, count));
+
+	if (ret) {
+		dev_err(&mgr->dev, "Error preparing FPGA for writing\n");
+		mgr->state = FPGA_MGR_STATE_WRITE_INIT_ERR;
+		return ret;
+	}
+
+	return 0;
+}
+
+static int fpga_mgr_write_init_sg(struct fpga_manager *mgr,
+				  struct fpga_image_info *info,
+				  struct sg_table *sgt)
+{
+	struct sg_mapping_iter miter;
+	size_t len;
+	char *buf;
+	int ret;
+
+	if (!mgr->mops->initial_header_size)
+		return fpga_mgr_write_init_buf(mgr, info, NULL, 0);
+
+	/*
+	 * First try to use miter to map the first fragment to access the
+	 * header, this is the typical path.
+	 */
+	sg_miter_start(&miter, sgt->sgl, sgt->nents, SG_MITER_FROM_SG);
+	if (sg_miter_next(&miter) &&
+	    miter.length >= mgr->mops->initial_header_size) {
+		ret = fpga_mgr_write_init_buf(mgr, info, miter.addr,
+					      miter.length);
+		sg_miter_stop(&miter);
+		return ret;
+	}
+	sg_miter_stop(&miter);
+
+	/* Otherwise copy the fragments into temporary memory. */
+	buf = kmalloc(mgr->mops->initial_header_size, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	len = sg_copy_to_buffer(sgt->sgl, sgt->nents, buf,
+				mgr->mops->initial_header_size);
+	ret = fpga_mgr_write_init_buf(mgr, info, buf, len);
+
+	kfree(buf);
+
+	return ret;
+}
+
+/*
+ * After all the FPGA image has been written, do the device specific steps to
+ * finish and set the FPGA into operating mode.
+ */
+static int fpga_mgr_write_complete(struct fpga_manager *mgr,
+				   struct fpga_image_info *info)
+{
+	int ret;
+
+	mgr->state = FPGA_MGR_STATE_WRITE_COMPLETE;
+	ret = mgr->mops->write_complete(mgr, info);
+	if (ret) {
+		dev_err(&mgr->dev, "Error after writing image data to FPGA\n");
+		mgr->state = FPGA_MGR_STATE_WRITE_COMPLETE_ERR;
+		return ret;
+	}
+	mgr->state = FPGA_MGR_STATE_OPERATING;
+
+	return 0;
+}
+
 /**
- * fpga_mgr_buf_load - load fpga from image in buffer
+ * fpga_mgr_buf_load_sg - load fpga from image in buffer from a scatter list
  * @mgr:	fpga manager
  * @info:	fpga image specific information
- * @buf:	buffer contain fpga image
- * @count:	byte count of buf
+ * @sgt:	scatterlist table
  *
  * Step the low level fpga manager through the device-specific steps of getting
  * an FPGA ready to be configured, writing the image to it, then doing whatever
@@ -42,54 +132,139 @@ static struct class *fpga_mgr_class;
  * mgr pointer from of_fpga_mgr_get() or fpga_mgr_get() and checked that it is
  * not an error code.
  *
+ * This is the preferred entry point for FPGA programming, it does not require
+ * any contiguous kernel memory.
+ *
  * Return: 0 on success, negative error code otherwise.
  */
-int fpga_mgr_buf_load(struct fpga_manager *mgr, struct fpga_image_info *info,
-		      const char *buf, size_t count)
+int fpga_mgr_buf_load_sg(struct fpga_manager *mgr, struct fpga_image_info *info,
+			 struct sg_table *sgt)
 {
-	struct device *dev = &mgr->dev;
 	int ret;
 
-	/*
-	 * Call the low level driver's write_init function.  This will do the
-	 * device-specific things to get the FPGA into the state where it is
-	 * ready to receive an FPGA image. The low level driver only gets to
-	 * see the first initial_header_size bytes in the buffer.
-	 */
-	mgr->state = FPGA_MGR_STATE_WRITE_INIT;
-	ret = mgr->mops->write_init(mgr, info, buf,
-				    min(mgr->mops->initial_header_size, count));
+	ret = fpga_mgr_write_init_sg(mgr, info, sgt);
+	if (ret)
+		return ret;
+
+	/* Write the FPGA image to the FPGA. */
+	mgr->state = FPGA_MGR_STATE_WRITE;
+	if (mgr->mops->write_sg) {
+		ret = mgr->mops->write_sg(mgr, sgt);
+	} else {
+		struct sg_mapping_iter miter;
+
+		sg_miter_start(&miter, sgt->sgl, sgt->nents, SG_MITER_FROM_SG);
+		while (sg_miter_next(&miter)) {
+			ret = mgr->mops->write(mgr, miter.addr, miter.length);
+			if (ret)
+				break;
+		}
+		sg_miter_stop(&miter);
+	}
+
 	if (ret) {
-		dev_err(dev, "Error preparing FPGA for writing\n");
-		mgr->state = FPGA_MGR_STATE_WRITE_INIT_ERR;
+		dev_err(&mgr->dev, "Error while writing image data to FPGA\n");
+		mgr->state = FPGA_MGR_STATE_WRITE_ERR;
 		return ret;
 	}
 
+	return fpga_mgr_write_complete(mgr, info);
+}
+EXPORT_SYMBOL_GPL(fpga_mgr_buf_load_sg);
+
+static int fpga_mgr_buf_load_mapped(struct fpga_manager *mgr,
+				    struct fpga_image_info *info,
+				    const char *buf, size_t count)
+{
+	int ret;
+
+	ret = fpga_mgr_write_init_buf(mgr, info, buf, count);
+	if (ret)
+		return ret;
+
 	/*
 	 * Write the FPGA image to the FPGA.
 	 */
 	mgr->state = FPGA_MGR_STATE_WRITE;
 	ret = mgr->mops->write(mgr, buf, count);
 	if (ret) {
-		dev_err(dev, "Error while writing image data to FPGA\n");
+		dev_err(&mgr->dev, "Error while writing image data to FPGA\n");
 		mgr->state = FPGA_MGR_STATE_WRITE_ERR;
 		return ret;
 	}
 
+	return fpga_mgr_write_complete(mgr, info);
+}
+
+/**
+ * fpga_mgr_buf_load - load fpga from image in buffer
+ * @mgr:	fpga manager
+ * @flags:	flags setting fpga confuration modes
+ * @buf:	buffer contain fpga image
+ * @count:	byte count of buf
+ *
+ * Step the low level fpga manager through the device-specific steps of getting
+ * an FPGA ready to be configured, writing the image to it, then doing whatever
+ * post-configuration steps necessary.  This code assumes the caller got the
+ * mgr pointer from of_fpga_mgr_get() and checked that it is not an error code.
+ *
+ * Return: 0 on success, negative error code otherwise.
+ */
+int fpga_mgr_buf_load(struct fpga_manager *mgr, struct fpga_image_info *info,
+		      const char *buf, size_t count)
+{
+	struct page **pages;
+	struct sg_table sgt;
+	const void *p;
+	int nr_pages;
+	int index;
+	int rc;
+
 	/*
-	 * After all the FPGA image has been written, do the device specific
-	 * steps to finish and set the FPGA into operating mode.
+	 * This is just a fast path if the caller has already created a
+	 * contiguous kernel buffer and the driver doesn't require SG, non-SG
+	 * drivers will still work on the slow path.
 	 */
-	mgr->state = FPGA_MGR_STATE_WRITE_COMPLETE;
-	ret = mgr->mops->write_complete(mgr, info);
-	if (ret) {
-		dev_err(dev, "Error after writing image data to FPGA\n");
-		mgr->state = FPGA_MGR_STATE_WRITE_COMPLETE_ERR;
-		return ret;
+	if (mgr->mops->write)
+		return fpga_mgr_buf_load_mapped(mgr, info, buf, count);
+
+	/*
+	 * Convert the linear kernel pointer into a sg_table of pages for use
+	 * by the driver.
+	 */
+	nr_pages = DIV_ROUND_UP((unsigned long)buf + count, PAGE_SIZE) -
+		   (unsigned long)buf / PAGE_SIZE;
+	pages = kmalloc_array(nr_pages, sizeof(struct page *), GFP_KERNEL);
+	if (!pages)
+		return -ENOMEM;
+
+	p = buf - offset_in_page(buf);
+	for (index = 0; index < nr_pages; index++) {
+		if (is_vmalloc_addr(p))
+			pages[index] = vmalloc_to_page(p);
+		else
+			pages[index] = kmap_to_page((void *)p);
+		if (!pages[index]) {
+			kfree(pages);
+			return -EFAULT;
+		}
+		p += PAGE_SIZE;
 	}
-	mgr->state = FPGA_MGR_STATE_OPERATING;
 
-	return 0;
+	/*
+	 * The temporary pages list is used to code share the merging algorithm
+	 * in sg_alloc_table_from_pages
+	 */
+	rc = sg_alloc_table_from_pages(&sgt, pages, index, offset_in_page(buf),
+				       count, GFP_KERNEL);
+	kfree(pages);
+	if (rc)
+		return rc;
+
+	rc = fpga_mgr_buf_load_sg(mgr, info, &sgt);
+	sg_free_table(&sgt);
+
+	return rc;
 }
 EXPORT_SYMBOL_GPL(fpga_mgr_buf_load);
 
@@ -291,8 +466,9 @@ int fpga_mgr_register(struct device *dev, const char *name,
 	struct fpga_manager *mgr;
 	int id, ret;
 
-	if (!mops || !mops->write_init || !mops->write ||
-	    !mops->write_complete || !mops->state) {
+	if (!mops || !mops->write_complete || !mops->state ||
+	    !mops->write_init || (!mops->write && !mops->write_sg) ||
+	    (mops->write && mops->write_sg)) {
 		dev_err(dev, "Attempt to register without fpga_manager_ops\n");
 		return -EINVAL;
 	}
-- 
cgit v1.2.2


From 425902f5c8e3034b2d9d7a714289d8d579c733b2 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Date: Wed, 1 Feb 2017 12:48:45 -0700
Subject: fpga zynq: Use the scatterlist interface

This allows the driver to avoid a high order coherent DMA allocation
and memory copy. With this patch it can DMA directly from the kernel
pages that the bitfile is stored in.

Since this is now a gather DMA operation the driver uses the ISR
to feed the chips DMA queue with each entry from the SGL.

Signed-off-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Acked-by: Moritz Fischer <moritz.fischer@ettus.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/fpga/zynq-fpga.c | 174 ++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 135 insertions(+), 39 deletions(-)

(limited to 'drivers/fpga')

diff --git a/drivers/fpga/zynq-fpga.c b/drivers/fpga/zynq-fpga.c
index c3fc2a231e28..34cb98139442 100644
--- a/drivers/fpga/zynq-fpga.c
+++ b/drivers/fpga/zynq-fpga.c
@@ -30,6 +30,7 @@
 #include <linux/pm.h>
 #include <linux/regmap.h>
 #include <linux/string.h>
+#include <linux/scatterlist.h>
 
 /* Offsets into SLCR regmap */
 
@@ -80,6 +81,7 @@
 
 /* FPGA init status */
 #define STATUS_DMA_Q_F			BIT(31)
+#define STATUS_DMA_Q_E			BIT(30)
 #define STATUS_PCFG_INIT_MASK		BIT(4)
 
 /* Interrupt Status/Mask Register Bit definitions */
@@ -98,12 +100,16 @@
 #define DMA_INVALID_ADDRESS		GENMASK(31, 0)
 /* Used to unlock the dev */
 #define UNLOCK_MASK			0x757bdf0d
-/* Timeout for DMA to complete */
-#define DMA_DONE_TIMEOUT		msecs_to_jiffies(1000)
 /* Timeout for polling reset bits */
 #define INIT_POLL_TIMEOUT		2500000
 /* Delay for polling reset bits */
 #define INIT_POLL_DELAY			20
+/* Signal this is the last DMA transfer, wait for the AXI and PCAP before
+ * interrupting
+ */
+#define DMA_SRC_LAST_TRANSFER		1
+/* Timeout for DMA completion */
+#define DMA_TIMEOUT_MS			5000
 
 /* Masks for controlling stuff in SLCR */
 /* Disable all Level shifters */
@@ -124,6 +130,11 @@ struct zynq_fpga_priv {
 	void __iomem *io_base;
 	struct regmap *slcr;
 
+	spinlock_t dma_lock;
+	unsigned int dma_elm;
+	unsigned int dma_nelms;
+	struct scatterlist *cur_sg;
+
 	struct completion dma_done;
 };
 
@@ -149,13 +160,80 @@ static inline void zynq_fpga_set_irq(struct zynq_fpga_priv *priv, u32 enable)
 	zynq_fpga_write(priv, INT_MASK_OFFSET, ~enable);
 }
 
+/* Must be called with dma_lock held */
+static void zynq_step_dma(struct zynq_fpga_priv *priv)
+{
+	u32 addr;
+	u32 len;
+	bool first;
+
+	first = priv->dma_elm == 0;
+	while (priv->cur_sg) {
+		/* Feed the DMA queue until it is full. */
+		if (zynq_fpga_read(priv, STATUS_OFFSET) & STATUS_DMA_Q_F)
+			break;
+
+		addr = sg_dma_address(priv->cur_sg);
+		len = sg_dma_len(priv->cur_sg);
+		if (priv->dma_elm + 1 == priv->dma_nelms) {
+			/* The last transfer waits for the PCAP to finish too,
+			 * notice this also changes the irq_mask to ignore
+			 * IXR_DMA_DONE_MASK which ensures we do not trigger
+			 * the completion too early.
+			 */
+			addr |= DMA_SRC_LAST_TRANSFER;
+			priv->cur_sg = NULL;
+		} else {
+			priv->cur_sg = sg_next(priv->cur_sg);
+			priv->dma_elm++;
+		}
+
+		zynq_fpga_write(priv, DMA_SRC_ADDR_OFFSET, addr);
+		zynq_fpga_write(priv, DMA_DST_ADDR_OFFSET, DMA_INVALID_ADDRESS);
+		zynq_fpga_write(priv, DMA_SRC_LEN_OFFSET, len / 4);
+		zynq_fpga_write(priv, DMA_DEST_LEN_OFFSET, 0);
+	}
+
+	/* Once the first transfer is queued we can turn on the ISR, future
+	 * calls to zynq_step_dma will happen from the ISR context. The
+	 * dma_lock spinlock guarentees this handover is done coherently, the
+	 * ISR enable is put at the end to avoid another CPU spinning in the
+	 * ISR on this lock.
+	 */
+	if (first && priv->cur_sg) {
+		zynq_fpga_set_irq(priv,
+				  IXR_DMA_DONE_MASK | IXR_ERROR_FLAGS_MASK);
+	} else if (!priv->cur_sg) {
+		/* The last transfer changes to DMA & PCAP mode since we do
+		 * not want to continue until everything has been flushed into
+		 * the PCAP.
+		 */
+		zynq_fpga_set_irq(priv,
+				  IXR_D_P_DONE_MASK | IXR_ERROR_FLAGS_MASK);
+	}
+}
+
 static irqreturn_t zynq_fpga_isr(int irq, void *data)
 {
 	struct zynq_fpga_priv *priv = data;
+	u32 intr_status;
 
-	/* disable DMA and error IRQs */
-	zynq_fpga_set_irq(priv, 0);
+	/* If anything other than DMA completion is reported stop and hand
+	 * control back to zynq_fpga_ops_write, something went wrong,
+	 * otherwise progress the DMA.
+	 */
+	spin_lock(&priv->dma_lock);
+	intr_status = zynq_fpga_read(priv, INT_STS_OFFSET);
+	if (!(intr_status & IXR_ERROR_FLAGS_MASK) &&
+	    (intr_status & IXR_DMA_DONE_MASK) && priv->cur_sg) {
+		zynq_fpga_write(priv, INT_STS_OFFSET, IXR_DMA_DONE_MASK);
+		zynq_step_dma(priv);
+		spin_unlock(&priv->dma_lock);
+		return IRQ_HANDLED;
+	}
+	spin_unlock(&priv->dma_lock);
 
+	zynq_fpga_set_irq(priv, 0);
 	complete(&priv->dma_done);
 
 	return IRQ_HANDLED;
@@ -266,10 +344,11 @@ static int zynq_fpga_ops_write_init(struct fpga_manager *mgr,
 	zynq_fpga_write(priv, CTRL_OFFSET,
 			(CTRL_PCAP_PR_MASK | CTRL_PCAP_MODE_MASK | ctrl));
 
-	/* check that we have room in the command queue */
+	/* We expect that the command queue is empty right now. */
 	status = zynq_fpga_read(priv, STATUS_OFFSET);
-	if (status & STATUS_DMA_Q_F) {
-		dev_err(&mgr->dev, "DMA command queue full\n");
+	if ((status & STATUS_DMA_Q_F) ||
+	    (status & STATUS_DMA_Q_E) != STATUS_DMA_Q_E) {
+		dev_err(&mgr->dev, "DMA command queue not right\n");
 		err = -EBUSY;
 		goto out_err;
 	}
@@ -288,27 +367,36 @@ out_err:
 	return err;
 }
 
-static int zynq_fpga_ops_write(struct fpga_manager *mgr,
-			       const char *buf, size_t count)
+static int zynq_fpga_ops_write(struct fpga_manager *mgr, struct sg_table *sgt)
 {
 	struct zynq_fpga_priv *priv;
 	const char *why;
 	int err;
-	char *kbuf;
-	size_t in_count;
-	dma_addr_t dma_addr;
-	u32 transfer_length;
 	u32 intr_status;
+	unsigned long timeout;
+	unsigned long flags;
+	struct scatterlist *sg;
+	int i;
 
-	in_count = count;
 	priv = mgr->priv;
 
-	kbuf =
-	    dma_alloc_coherent(mgr->dev.parent, count, &dma_addr, GFP_KERNEL);
-	if (!kbuf)
-		return -ENOMEM;
+	/* The hardware can only DMA multiples of 4 bytes, and it requires the
+	 * starting addresses to be aligned to 64 bits (UG585 pg 212).
+	 */
+	for_each_sg(sgt->sgl, sg, sgt->nents, i) {
+		if ((sg->offset % 8) || (sg->length % 4)) {
+			dev_err(&mgr->dev,
+			    "Invalid bitstream, chunks must be aligned\n");
+			return -EINVAL;
+		}
+	}
 
-	memcpy(kbuf, buf, count);
+	priv->dma_nelms =
+	    dma_map_sg(mgr->dev.parent, sgt->sgl, sgt->nents, DMA_TO_DEVICE);
+	if (priv->dma_nelms == 0) {
+		dev_err(&mgr->dev, "Unable to DMA map (TO_DEVICE)\n");
+		return -ENOMEM;
+	}
 
 	/* enable clock */
 	err = clk_enable(priv->clk);
@@ -316,28 +404,31 @@ static int zynq_fpga_ops_write(struct fpga_manager *mgr,
 		goto out_free;
 
 	zynq_fpga_write(priv, INT_STS_OFFSET, IXR_ALL_MASK);
-
 	reinit_completion(&priv->dma_done);
 
-	/* enable DMA and error IRQs */
-	zynq_fpga_set_irq(priv, IXR_D_P_DONE_MASK | IXR_ERROR_FLAGS_MASK);
-
-	/* the +1 in the src addr is used to hold off on DMA_DONE IRQ
-	 * until both AXI and PCAP are done ...
-	 */
-	zynq_fpga_write(priv, DMA_SRC_ADDR_OFFSET, (u32)(dma_addr) + 1);
-	zynq_fpga_write(priv, DMA_DST_ADDR_OFFSET, (u32)DMA_INVALID_ADDRESS);
-
-	/* convert #bytes to #words */
-	transfer_length = (count + 3) / 4;
+	/* zynq_step_dma will turn on interrupts */
+	spin_lock_irqsave(&priv->dma_lock, flags);
+	priv->dma_elm = 0;
+	priv->cur_sg = sgt->sgl;
+	zynq_step_dma(priv);
+	spin_unlock_irqrestore(&priv->dma_lock, flags);
 
-	zynq_fpga_write(priv, DMA_SRC_LEN_OFFSET, transfer_length);
-	zynq_fpga_write(priv, DMA_DEST_LEN_OFFSET, 0);
+	timeout = wait_for_completion_timeout(&priv->dma_done,
+					      msecs_to_jiffies(DMA_TIMEOUT_MS));
 
-	wait_for_completion(&priv->dma_done);
+	spin_lock_irqsave(&priv->dma_lock, flags);
+	zynq_fpga_set_irq(priv, 0);
+	priv->cur_sg = NULL;
+	spin_unlock_irqrestore(&priv->dma_lock, flags);
 
 	intr_status = zynq_fpga_read(priv, INT_STS_OFFSET);
-	zynq_fpga_write(priv, INT_STS_OFFSET, intr_status);
+	zynq_fpga_write(priv, INT_STS_OFFSET, IXR_ALL_MASK);
+
+	/* There doesn't seem to be a way to force cancel any DMA, so if
+	 * something went wrong we are relying on the hardware to have halted
+	 * the DMA before we get here, if there was we could use
+	 * wait_for_completion_interruptible too.
+	 */
 
 	if (intr_status & IXR_ERROR_FLAGS_MASK) {
 		why = "DMA reported error";
@@ -345,8 +436,12 @@ static int zynq_fpga_ops_write(struct fpga_manager *mgr,
 		goto out_report;
 	}
 
-	if (!((intr_status & IXR_D_P_DONE_MASK) == IXR_D_P_DONE_MASK)) {
-		why = "DMA did not complete";
+	if (priv->cur_sg ||
+	    !((intr_status & IXR_D_P_DONE_MASK) == IXR_D_P_DONE_MASK)) {
+		if (timeout == 0)
+			why = "DMA timed out";
+		else
+			why = "DMA did not complete";
 		err = -EIO;
 		goto out_report;
 	}
@@ -369,7 +464,7 @@ out_clk:
 	clk_disable(priv->clk);
 
 out_free:
-	dma_free_coherent(mgr->dev.parent, count, kbuf, dma_addr);
+	dma_unmap_sg(mgr->dev.parent, sgt->sgl, sgt->nents, DMA_TO_DEVICE);
 	return err;
 }
 
@@ -433,7 +528,7 @@ static const struct fpga_manager_ops zynq_fpga_ops = {
 	.initial_header_size = 128,
 	.state = zynq_fpga_ops_state,
 	.write_init = zynq_fpga_ops_write_init,
-	.write = zynq_fpga_ops_write,
+	.write_sg = zynq_fpga_ops_write,
 	.write_complete = zynq_fpga_ops_write_complete,
 };
 
@@ -447,6 +542,7 @@ static int zynq_fpga_probe(struct platform_device *pdev)
 	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
 	if (!priv)
 		return -ENOMEM;
+	spin_lock_init(&priv->dma_lock);
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	priv->io_base = devm_ioremap_resource(dev, res);
-- 
cgit v1.2.2