diff options
author | Jason Gunthorpe <jgunthorpe@obsidianresearch.com> | 2017-02-01 14:48:45 -0500 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2017-02-10 09:20:44 -0500 |
commit | 425902f5c8e3034b2d9d7a714289d8d579c733b2 (patch) | |
tree | e21330c7880799f64abed26cf1c7737adb515fb1 /drivers/fpga/zynq-fpga.c | |
parent | baa6d396635129d8a67793e884f3b2182c7354b3 (diff) |
fpga zynq: Use the scatterlist interface
This allows the driver to avoid a high order coherent DMA allocation
and memory copy. With this patch it can DMA directly from the kernel
pages that the bitfile is stored in.
Since this is now a gather DMA operation the driver uses the ISR
to feed the chips DMA queue with each entry from the SGL.
Signed-off-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Acked-by: Moritz Fischer <moritz.fischer@ettus.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'drivers/fpga/zynq-fpga.c')
-rw-r--r-- | drivers/fpga/zynq-fpga.c | 174 |
1 files changed, 135 insertions, 39 deletions
diff --git a/drivers/fpga/zynq-fpga.c b/drivers/fpga/zynq-fpga.c index c3fc2a231e28..34cb98139442 100644 --- a/drivers/fpga/zynq-fpga.c +++ b/drivers/fpga/zynq-fpga.c | |||
@@ -30,6 +30,7 @@ | |||
30 | #include <linux/pm.h> | 30 | #include <linux/pm.h> |
31 | #include <linux/regmap.h> | 31 | #include <linux/regmap.h> |
32 | #include <linux/string.h> | 32 | #include <linux/string.h> |
33 | #include <linux/scatterlist.h> | ||
33 | 34 | ||
34 | /* Offsets into SLCR regmap */ | 35 | /* Offsets into SLCR regmap */ |
35 | 36 | ||
@@ -80,6 +81,7 @@ | |||
80 | 81 | ||
81 | /* FPGA init status */ | 82 | /* FPGA init status */ |
82 | #define STATUS_DMA_Q_F BIT(31) | 83 | #define STATUS_DMA_Q_F BIT(31) |
84 | #define STATUS_DMA_Q_E BIT(30) | ||
83 | #define STATUS_PCFG_INIT_MASK BIT(4) | 85 | #define STATUS_PCFG_INIT_MASK BIT(4) |
84 | 86 | ||
85 | /* Interrupt Status/Mask Register Bit definitions */ | 87 | /* Interrupt Status/Mask Register Bit definitions */ |
@@ -98,12 +100,16 @@ | |||
98 | #define DMA_INVALID_ADDRESS GENMASK(31, 0) | 100 | #define DMA_INVALID_ADDRESS GENMASK(31, 0) |
99 | /* Used to unlock the dev */ | 101 | /* Used to unlock the dev */ |
100 | #define UNLOCK_MASK 0x757bdf0d | 102 | #define UNLOCK_MASK 0x757bdf0d |
101 | /* Timeout for DMA to complete */ | ||
102 | #define DMA_DONE_TIMEOUT msecs_to_jiffies(1000) | ||
103 | /* Timeout for polling reset bits */ | 103 | /* Timeout for polling reset bits */ |
104 | #define INIT_POLL_TIMEOUT 2500000 | 104 | #define INIT_POLL_TIMEOUT 2500000 |
105 | /* Delay for polling reset bits */ | 105 | /* Delay for polling reset bits */ |
106 | #define INIT_POLL_DELAY 20 | 106 | #define INIT_POLL_DELAY 20 |
107 | /* Signal this is the last DMA transfer, wait for the AXI and PCAP before | ||
108 | * interrupting | ||
109 | */ | ||
110 | #define DMA_SRC_LAST_TRANSFER 1 | ||
111 | /* Timeout for DMA completion */ | ||
112 | #define DMA_TIMEOUT_MS 5000 | ||
107 | 113 | ||
108 | /* Masks for controlling stuff in SLCR */ | 114 | /* Masks for controlling stuff in SLCR */ |
109 | /* Disable all Level shifters */ | 115 | /* Disable all Level shifters */ |
@@ -124,6 +130,11 @@ struct zynq_fpga_priv { | |||
124 | void __iomem *io_base; | 130 | void __iomem *io_base; |
125 | struct regmap *slcr; | 131 | struct regmap *slcr; |
126 | 132 | ||
133 | spinlock_t dma_lock; | ||
134 | unsigned int dma_elm; | ||
135 | unsigned int dma_nelms; | ||
136 | struct scatterlist *cur_sg; | ||
137 | |||
127 | struct completion dma_done; | 138 | struct completion dma_done; |
128 | }; | 139 | }; |
129 | 140 | ||
@@ -149,13 +160,80 @@ static inline void zynq_fpga_set_irq(struct zynq_fpga_priv *priv, u32 enable) | |||
149 | zynq_fpga_write(priv, INT_MASK_OFFSET, ~enable); | 160 | zynq_fpga_write(priv, INT_MASK_OFFSET, ~enable); |
150 | } | 161 | } |
151 | 162 | ||
163 | /* Must be called with dma_lock held */ | ||
164 | static void zynq_step_dma(struct zynq_fpga_priv *priv) | ||
165 | { | ||
166 | u32 addr; | ||
167 | u32 len; | ||
168 | bool first; | ||
169 | |||
170 | first = priv->dma_elm == 0; | ||
171 | while (priv->cur_sg) { | ||
172 | /* Feed the DMA queue until it is full. */ | ||
173 | if (zynq_fpga_read(priv, STATUS_OFFSET) & STATUS_DMA_Q_F) | ||
174 | break; | ||
175 | |||
176 | addr = sg_dma_address(priv->cur_sg); | ||
177 | len = sg_dma_len(priv->cur_sg); | ||
178 | if (priv->dma_elm + 1 == priv->dma_nelms) { | ||
179 | /* The last transfer waits for the PCAP to finish too, | ||
180 | * notice this also changes the irq_mask to ignore | ||
181 | * IXR_DMA_DONE_MASK which ensures we do not trigger | ||
182 | * the completion too early. | ||
183 | */ | ||
184 | addr |= DMA_SRC_LAST_TRANSFER; | ||
185 | priv->cur_sg = NULL; | ||
186 | } else { | ||
187 | priv->cur_sg = sg_next(priv->cur_sg); | ||
188 | priv->dma_elm++; | ||
189 | } | ||
190 | |||
191 | zynq_fpga_write(priv, DMA_SRC_ADDR_OFFSET, addr); | ||
192 | zynq_fpga_write(priv, DMA_DST_ADDR_OFFSET, DMA_INVALID_ADDRESS); | ||
193 | zynq_fpga_write(priv, DMA_SRC_LEN_OFFSET, len / 4); | ||
194 | zynq_fpga_write(priv, DMA_DEST_LEN_OFFSET, 0); | ||
195 | } | ||
196 | |||
197 | /* Once the first transfer is queued we can turn on the ISR, future | ||
198 | * calls to zynq_step_dma will happen from the ISR context. The | ||
199 | * dma_lock spinlock guarentees this handover is done coherently, the | ||
200 | * ISR enable is put at the end to avoid another CPU spinning in the | ||
201 | * ISR on this lock. | ||
202 | */ | ||
203 | if (first && priv->cur_sg) { | ||
204 | zynq_fpga_set_irq(priv, | ||
205 | IXR_DMA_DONE_MASK | IXR_ERROR_FLAGS_MASK); | ||
206 | } else if (!priv->cur_sg) { | ||
207 | /* The last transfer changes to DMA & PCAP mode since we do | ||
208 | * not want to continue until everything has been flushed into | ||
209 | * the PCAP. | ||
210 | */ | ||
211 | zynq_fpga_set_irq(priv, | ||
212 | IXR_D_P_DONE_MASK | IXR_ERROR_FLAGS_MASK); | ||
213 | } | ||
214 | } | ||
215 | |||
152 | static irqreturn_t zynq_fpga_isr(int irq, void *data) | 216 | static irqreturn_t zynq_fpga_isr(int irq, void *data) |
153 | { | 217 | { |
154 | struct zynq_fpga_priv *priv = data; | 218 | struct zynq_fpga_priv *priv = data; |
219 | u32 intr_status; | ||
155 | 220 | ||
156 | /* disable DMA and error IRQs */ | 221 | /* If anything other than DMA completion is reported stop and hand |
157 | zynq_fpga_set_irq(priv, 0); | 222 | * control back to zynq_fpga_ops_write, something went wrong, |
223 | * otherwise progress the DMA. | ||
224 | */ | ||
225 | spin_lock(&priv->dma_lock); | ||
226 | intr_status = zynq_fpga_read(priv, INT_STS_OFFSET); | ||
227 | if (!(intr_status & IXR_ERROR_FLAGS_MASK) && | ||
228 | (intr_status & IXR_DMA_DONE_MASK) && priv->cur_sg) { | ||
229 | zynq_fpga_write(priv, INT_STS_OFFSET, IXR_DMA_DONE_MASK); | ||
230 | zynq_step_dma(priv); | ||
231 | spin_unlock(&priv->dma_lock); | ||
232 | return IRQ_HANDLED; | ||
233 | } | ||
234 | spin_unlock(&priv->dma_lock); | ||
158 | 235 | ||
236 | zynq_fpga_set_irq(priv, 0); | ||
159 | complete(&priv->dma_done); | 237 | complete(&priv->dma_done); |
160 | 238 | ||
161 | return IRQ_HANDLED; | 239 | return IRQ_HANDLED; |
@@ -266,10 +344,11 @@ static int zynq_fpga_ops_write_init(struct fpga_manager *mgr, | |||
266 | zynq_fpga_write(priv, CTRL_OFFSET, | 344 | zynq_fpga_write(priv, CTRL_OFFSET, |
267 | (CTRL_PCAP_PR_MASK | CTRL_PCAP_MODE_MASK | ctrl)); | 345 | (CTRL_PCAP_PR_MASK | CTRL_PCAP_MODE_MASK | ctrl)); |
268 | 346 | ||
269 | /* check that we have room in the command queue */ | 347 | /* We expect that the command queue is empty right now. */ |
270 | status = zynq_fpga_read(priv, STATUS_OFFSET); | 348 | status = zynq_fpga_read(priv, STATUS_OFFSET); |
271 | if (status & STATUS_DMA_Q_F) { | 349 | if ((status & STATUS_DMA_Q_F) || |
272 | dev_err(&mgr->dev, "DMA command queue full\n"); | 350 | (status & STATUS_DMA_Q_E) != STATUS_DMA_Q_E) { |
351 | dev_err(&mgr->dev, "DMA command queue not right\n"); | ||
273 | err = -EBUSY; | 352 | err = -EBUSY; |
274 | goto out_err; | 353 | goto out_err; |
275 | } | 354 | } |
@@ -288,27 +367,36 @@ out_err: | |||
288 | return err; | 367 | return err; |
289 | } | 368 | } |
290 | 369 | ||
291 | static int zynq_fpga_ops_write(struct fpga_manager *mgr, | 370 | static int zynq_fpga_ops_write(struct fpga_manager *mgr, struct sg_table *sgt) |
292 | const char *buf, size_t count) | ||
293 | { | 371 | { |
294 | struct zynq_fpga_priv *priv; | 372 | struct zynq_fpga_priv *priv; |
295 | const char *why; | 373 | const char *why; |
296 | int err; | 374 | int err; |
297 | char *kbuf; | ||
298 | size_t in_count; | ||
299 | dma_addr_t dma_addr; | ||
300 | u32 transfer_length; | ||
301 | u32 intr_status; | 375 | u32 intr_status; |
376 | unsigned long timeout; | ||
377 | unsigned long flags; | ||
378 | struct scatterlist *sg; | ||
379 | int i; | ||
302 | 380 | ||
303 | in_count = count; | ||
304 | priv = mgr->priv; | 381 | priv = mgr->priv; |
305 | 382 | ||
306 | kbuf = | 383 | /* The hardware can only DMA multiples of 4 bytes, and it requires the |
307 | dma_alloc_coherent(mgr->dev.parent, count, &dma_addr, GFP_KERNEL); | 384 | * starting addresses to be aligned to 64 bits (UG585 pg 212). |
308 | if (!kbuf) | 385 | */ |
309 | return -ENOMEM; | 386 | for_each_sg(sgt->sgl, sg, sgt->nents, i) { |
387 | if ((sg->offset % 8) || (sg->length % 4)) { | ||
388 | dev_err(&mgr->dev, | ||
389 | "Invalid bitstream, chunks must be aligned\n"); | ||
390 | return -EINVAL; | ||
391 | } | ||
392 | } | ||
310 | 393 | ||
311 | memcpy(kbuf, buf, count); | 394 | priv->dma_nelms = |
395 | dma_map_sg(mgr->dev.parent, sgt->sgl, sgt->nents, DMA_TO_DEVICE); | ||
396 | if (priv->dma_nelms == 0) { | ||
397 | dev_err(&mgr->dev, "Unable to DMA map (TO_DEVICE)\n"); | ||
398 | return -ENOMEM; | ||
399 | } | ||
312 | 400 | ||
313 | /* enable clock */ | 401 | /* enable clock */ |
314 | err = clk_enable(priv->clk); | 402 | err = clk_enable(priv->clk); |
@@ -316,28 +404,31 @@ static int zynq_fpga_ops_write(struct fpga_manager *mgr, | |||
316 | goto out_free; | 404 | goto out_free; |
317 | 405 | ||
318 | zynq_fpga_write(priv, INT_STS_OFFSET, IXR_ALL_MASK); | 406 | zynq_fpga_write(priv, INT_STS_OFFSET, IXR_ALL_MASK); |
319 | |||
320 | reinit_completion(&priv->dma_done); | 407 | reinit_completion(&priv->dma_done); |
321 | 408 | ||
322 | /* enable DMA and error IRQs */ | 409 | /* zynq_step_dma will turn on interrupts */ |
323 | zynq_fpga_set_irq(priv, IXR_D_P_DONE_MASK | IXR_ERROR_FLAGS_MASK); | 410 | spin_lock_irqsave(&priv->dma_lock, flags); |
324 | 411 | priv->dma_elm = 0; | |
325 | /* the +1 in the src addr is used to hold off on DMA_DONE IRQ | 412 | priv->cur_sg = sgt->sgl; |
326 | * until both AXI and PCAP are done ... | 413 | zynq_step_dma(priv); |
327 | */ | 414 | spin_unlock_irqrestore(&priv->dma_lock, flags); |
328 | zynq_fpga_write(priv, DMA_SRC_ADDR_OFFSET, (u32)(dma_addr) + 1); | ||
329 | zynq_fpga_write(priv, DMA_DST_ADDR_OFFSET, (u32)DMA_INVALID_ADDRESS); | ||
330 | |||
331 | /* convert #bytes to #words */ | ||
332 | transfer_length = (count + 3) / 4; | ||
333 | 415 | ||
334 | zynq_fpga_write(priv, DMA_SRC_LEN_OFFSET, transfer_length); | 416 | timeout = wait_for_completion_timeout(&priv->dma_done, |
335 | zynq_fpga_write(priv, DMA_DEST_LEN_OFFSET, 0); | 417 | msecs_to_jiffies(DMA_TIMEOUT_MS)); |
336 | 418 | ||
337 | wait_for_completion(&priv->dma_done); | 419 | spin_lock_irqsave(&priv->dma_lock, flags); |
420 | zynq_fpga_set_irq(priv, 0); | ||
421 | priv->cur_sg = NULL; | ||
422 | spin_unlock_irqrestore(&priv->dma_lock, flags); | ||
338 | 423 | ||
339 | intr_status = zynq_fpga_read(priv, INT_STS_OFFSET); | 424 | intr_status = zynq_fpga_read(priv, INT_STS_OFFSET); |
340 | zynq_fpga_write(priv, INT_STS_OFFSET, intr_status); | 425 | zynq_fpga_write(priv, INT_STS_OFFSET, IXR_ALL_MASK); |
426 | |||
427 | /* There doesn't seem to be a way to force cancel any DMA, so if | ||
428 | * something went wrong we are relying on the hardware to have halted | ||
429 | * the DMA before we get here, if there was we could use | ||
430 | * wait_for_completion_interruptible too. | ||
431 | */ | ||
341 | 432 | ||
342 | if (intr_status & IXR_ERROR_FLAGS_MASK) { | 433 | if (intr_status & IXR_ERROR_FLAGS_MASK) { |
343 | why = "DMA reported error"; | 434 | why = "DMA reported error"; |
@@ -345,8 +436,12 @@ static int zynq_fpga_ops_write(struct fpga_manager *mgr, | |||
345 | goto out_report; | 436 | goto out_report; |
346 | } | 437 | } |
347 | 438 | ||
348 | if (!((intr_status & IXR_D_P_DONE_MASK) == IXR_D_P_DONE_MASK)) { | 439 | if (priv->cur_sg || |
349 | why = "DMA did not complete"; | 440 | !((intr_status & IXR_D_P_DONE_MASK) == IXR_D_P_DONE_MASK)) { |
441 | if (timeout == 0) | ||
442 | why = "DMA timed out"; | ||
443 | else | ||
444 | why = "DMA did not complete"; | ||
350 | err = -EIO; | 445 | err = -EIO; |
351 | goto out_report; | 446 | goto out_report; |
352 | } | 447 | } |
@@ -369,7 +464,7 @@ out_clk: | |||
369 | clk_disable(priv->clk); | 464 | clk_disable(priv->clk); |
370 | 465 | ||
371 | out_free: | 466 | out_free: |
372 | dma_free_coherent(mgr->dev.parent, count, kbuf, dma_addr); | 467 | dma_unmap_sg(mgr->dev.parent, sgt->sgl, sgt->nents, DMA_TO_DEVICE); |
373 | return err; | 468 | return err; |
374 | } | 469 | } |
375 | 470 | ||
@@ -433,7 +528,7 @@ static const struct fpga_manager_ops zynq_fpga_ops = { | |||
433 | .initial_header_size = 128, | 528 | .initial_header_size = 128, |
434 | .state = zynq_fpga_ops_state, | 529 | .state = zynq_fpga_ops_state, |
435 | .write_init = zynq_fpga_ops_write_init, | 530 | .write_init = zynq_fpga_ops_write_init, |
436 | .write = zynq_fpga_ops_write, | 531 | .write_sg = zynq_fpga_ops_write, |
437 | .write_complete = zynq_fpga_ops_write_complete, | 532 | .write_complete = zynq_fpga_ops_write_complete, |
438 | }; | 533 | }; |
439 | 534 | ||
@@ -447,6 +542,7 @@ static int zynq_fpga_probe(struct platform_device *pdev) | |||
447 | priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); | 542 | priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); |
448 | if (!priv) | 543 | if (!priv) |
449 | return -ENOMEM; | 544 | return -ENOMEM; |
545 | spin_lock_init(&priv->dma_lock); | ||
450 | 546 | ||
451 | res = platform_get_resource(pdev, IORESOURCE_MEM, 0); | 547 | res = platform_get_resource(pdev, IORESOURCE_MEM, 0); |
452 | priv->io_base = devm_ioremap_resource(dev, res); | 548 | priv->io_base = devm_ioremap_resource(dev, res); |