diff options
Diffstat (limited to 'drivers/dma/ioat_dma.c')
-rw-r--r-- | drivers/dma/ioat_dma.c | 402 |
1 files changed, 367 insertions, 35 deletions
diff --git a/drivers/dma/ioat_dma.c b/drivers/dma/ioat_dma.c index 318e8a22d814..a52156e56886 100644 --- a/drivers/dma/ioat_dma.c +++ b/drivers/dma/ioat_dma.c | |||
@@ -32,6 +32,7 @@ | |||
32 | #include <linux/dmaengine.h> | 32 | #include <linux/dmaengine.h> |
33 | #include <linux/delay.h> | 33 | #include <linux/delay.h> |
34 | #include <linux/dma-mapping.h> | 34 | #include <linux/dma-mapping.h> |
35 | #include <linux/workqueue.h> | ||
35 | #include "ioatdma.h" | 36 | #include "ioatdma.h" |
36 | #include "ioatdma_registers.h" | 37 | #include "ioatdma_registers.h" |
37 | #include "ioatdma_hw.h" | 38 | #include "ioatdma_hw.h" |
@@ -41,11 +42,23 @@ | |||
41 | #define to_ioat_desc(lh) container_of(lh, struct ioat_desc_sw, node) | 42 | #define to_ioat_desc(lh) container_of(lh, struct ioat_desc_sw, node) |
42 | #define tx_to_ioat_desc(tx) container_of(tx, struct ioat_desc_sw, async_tx) | 43 | #define tx_to_ioat_desc(tx) container_of(tx, struct ioat_desc_sw, async_tx) |
43 | 44 | ||
45 | #define chan_num(ch) ((int)((ch)->reg_base - (ch)->device->reg_base) / 0x80) | ||
44 | static int ioat_pending_level = 4; | 46 | static int ioat_pending_level = 4; |
45 | module_param(ioat_pending_level, int, 0644); | 47 | module_param(ioat_pending_level, int, 0644); |
46 | MODULE_PARM_DESC(ioat_pending_level, | 48 | MODULE_PARM_DESC(ioat_pending_level, |
47 | "high-water mark for pushing ioat descriptors (default: 4)"); | 49 | "high-water mark for pushing ioat descriptors (default: 4)"); |
48 | 50 | ||
51 | #define RESET_DELAY msecs_to_jiffies(100) | ||
52 | #define WATCHDOG_DELAY round_jiffies(msecs_to_jiffies(2000)) | ||
53 | static void ioat_dma_chan_reset_part2(struct work_struct *work); | ||
54 | static void ioat_dma_chan_watchdog(struct work_struct *work); | ||
55 | |||
56 | /* | ||
57 | * workaround for IOAT ver.3.0 null descriptor issue | ||
58 | * (channel returns error when size is 0) | ||
59 | */ | ||
60 | #define NULL_DESC_BUFFER_SIZE 1 | ||
61 | |||
49 | /* internal functions */ | 62 | /* internal functions */ |
50 | static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan); | 63 | static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan); |
51 | static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan); | 64 | static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan); |
@@ -122,6 +135,38 @@ static int ioat_dma_enumerate_channels(struct ioatdma_device *device) | |||
122 | int i; | 135 | int i; |
123 | struct ioat_dma_chan *ioat_chan; | 136 | struct ioat_dma_chan *ioat_chan; |
124 | 137 | ||
138 | /* | ||
139 | * IOAT ver.3 workarounds | ||
140 | */ | ||
141 | if (device->version == IOAT_VER_3_0) { | ||
142 | u32 chan_err_mask; | ||
143 | u16 dev_id; | ||
144 | u32 dmauncerrsts; | ||
145 | |||
146 | /* | ||
147 | * Write CHANERRMSK_INT with 3E07h to mask out the errors | ||
148 | * that can cause stability issues for IOAT ver.3 | ||
149 | */ | ||
150 | chan_err_mask = 0x3E07; | ||
151 | pci_write_config_dword(device->pdev, | ||
152 | IOAT_PCI_CHANERRMASK_INT_OFFSET, | ||
153 | chan_err_mask); | ||
154 | |||
155 | /* | ||
156 | * Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit | ||
157 | * (workaround for spurious config parity error after restart) | ||
158 | */ | ||
159 | pci_read_config_word(device->pdev, | ||
160 | IOAT_PCI_DEVICE_ID_OFFSET, | ||
161 | &dev_id); | ||
162 | if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0) { | ||
163 | dmauncerrsts = 0x10; | ||
164 | pci_write_config_dword(device->pdev, | ||
165 | IOAT_PCI_DMAUNCERRSTS_OFFSET, | ||
166 | dmauncerrsts); | ||
167 | } | ||
168 | } | ||
169 | |||
125 | device->common.chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET); | 170 | device->common.chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET); |
126 | xfercap_scale = readb(device->reg_base + IOAT_XFERCAP_OFFSET); | 171 | xfercap_scale = readb(device->reg_base + IOAT_XFERCAP_OFFSET); |
127 | xfercap = (xfercap_scale == 0 ? -1 : (1UL << xfercap_scale)); | 172 | xfercap = (xfercap_scale == 0 ? -1 : (1UL << xfercap_scale)); |
@@ -137,6 +182,7 @@ static int ioat_dma_enumerate_channels(struct ioatdma_device *device) | |||
137 | ioat_chan->reg_base = device->reg_base + (0x80 * (i + 1)); | 182 | ioat_chan->reg_base = device->reg_base + (0x80 * (i + 1)); |
138 | ioat_chan->xfercap = xfercap; | 183 | ioat_chan->xfercap = xfercap; |
139 | ioat_chan->desccount = 0; | 184 | ioat_chan->desccount = 0; |
185 | INIT_DELAYED_WORK(&ioat_chan->work, ioat_dma_chan_reset_part2); | ||
140 | if (ioat_chan->device->version != IOAT_VER_1_2) { | 186 | if (ioat_chan->device->version != IOAT_VER_1_2) { |
141 | writel(IOAT_DCACTRL_CMPL_WRITE_ENABLE | 187 | writel(IOAT_DCACTRL_CMPL_WRITE_ENABLE |
142 | | IOAT_DMA_DCA_ANY_CPU, | 188 | | IOAT_DMA_DCA_ANY_CPU, |
@@ -175,7 +221,7 @@ static void ioat1_dma_memcpy_issue_pending(struct dma_chan *chan) | |||
175 | { | 221 | { |
176 | struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); | 222 | struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); |
177 | 223 | ||
178 | if (ioat_chan->pending != 0) { | 224 | if (ioat_chan->pending > 0) { |
179 | spin_lock_bh(&ioat_chan->desc_lock); | 225 | spin_lock_bh(&ioat_chan->desc_lock); |
180 | __ioat1_dma_memcpy_issue_pending(ioat_chan); | 226 | __ioat1_dma_memcpy_issue_pending(ioat_chan); |
181 | spin_unlock_bh(&ioat_chan->desc_lock); | 227 | spin_unlock_bh(&ioat_chan->desc_lock); |
@@ -194,13 +240,228 @@ static void ioat2_dma_memcpy_issue_pending(struct dma_chan *chan) | |||
194 | { | 240 | { |
195 | struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); | 241 | struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); |
196 | 242 | ||
197 | if (ioat_chan->pending != 0) { | 243 | if (ioat_chan->pending > 0) { |
198 | spin_lock_bh(&ioat_chan->desc_lock); | 244 | spin_lock_bh(&ioat_chan->desc_lock); |
199 | __ioat2_dma_memcpy_issue_pending(ioat_chan); | 245 | __ioat2_dma_memcpy_issue_pending(ioat_chan); |
200 | spin_unlock_bh(&ioat_chan->desc_lock); | 246 | spin_unlock_bh(&ioat_chan->desc_lock); |
201 | } | 247 | } |
202 | } | 248 | } |
203 | 249 | ||
250 | |||
251 | /** | ||
252 | * ioat_dma_chan_reset_part2 - reinit the channel after a reset | ||
253 | */ | ||
254 | static void ioat_dma_chan_reset_part2(struct work_struct *work) | ||
255 | { | ||
256 | struct ioat_dma_chan *ioat_chan = | ||
257 | container_of(work, struct ioat_dma_chan, work.work); | ||
258 | struct ioat_desc_sw *desc; | ||
259 | |||
260 | spin_lock_bh(&ioat_chan->cleanup_lock); | ||
261 | spin_lock_bh(&ioat_chan->desc_lock); | ||
262 | |||
263 | ioat_chan->completion_virt->low = 0; | ||
264 | ioat_chan->completion_virt->high = 0; | ||
265 | ioat_chan->pending = 0; | ||
266 | |||
267 | /* | ||
268 | * count the descriptors waiting, and be sure to do it | ||
269 | * right for both the CB1 line and the CB2 ring | ||
270 | */ | ||
271 | ioat_chan->dmacount = 0; | ||
272 | if (ioat_chan->used_desc.prev) { | ||
273 | desc = to_ioat_desc(ioat_chan->used_desc.prev); | ||
274 | do { | ||
275 | ioat_chan->dmacount++; | ||
276 | desc = to_ioat_desc(desc->node.next); | ||
277 | } while (&desc->node != ioat_chan->used_desc.next); | ||
278 | } | ||
279 | |||
280 | /* | ||
281 | * write the new starting descriptor address | ||
282 | * this puts channel engine into ARMED state | ||
283 | */ | ||
284 | desc = to_ioat_desc(ioat_chan->used_desc.prev); | ||
285 | switch (ioat_chan->device->version) { | ||
286 | case IOAT_VER_1_2: | ||
287 | writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF, | ||
288 | ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW); | ||
289 | writel(((u64) desc->async_tx.phys) >> 32, | ||
290 | ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_HIGH); | ||
291 | |||
292 | writeb(IOAT_CHANCMD_START, ioat_chan->reg_base | ||
293 | + IOAT_CHANCMD_OFFSET(ioat_chan->device->version)); | ||
294 | break; | ||
295 | case IOAT_VER_2_0: | ||
296 | writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF, | ||
297 | ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW); | ||
298 | writel(((u64) desc->async_tx.phys) >> 32, | ||
299 | ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH); | ||
300 | |||
301 | /* tell the engine to go with what's left to be done */ | ||
302 | writew(ioat_chan->dmacount, | ||
303 | ioat_chan->reg_base + IOAT_CHAN_DMACOUNT_OFFSET); | ||
304 | |||
305 | break; | ||
306 | } | ||
307 | dev_err(&ioat_chan->device->pdev->dev, | ||
308 | "chan%d reset - %d descs waiting, %d total desc\n", | ||
309 | chan_num(ioat_chan), ioat_chan->dmacount, ioat_chan->desccount); | ||
310 | |||
311 | spin_unlock_bh(&ioat_chan->desc_lock); | ||
312 | spin_unlock_bh(&ioat_chan->cleanup_lock); | ||
313 | } | ||
314 | |||
315 | /** | ||
316 | * ioat_dma_reset_channel - restart a channel | ||
317 | * @ioat_chan: IOAT DMA channel handle | ||
318 | */ | ||
319 | static void ioat_dma_reset_channel(struct ioat_dma_chan *ioat_chan) | ||
320 | { | ||
321 | u32 chansts, chanerr; | ||
322 | |||
323 | if (!ioat_chan->used_desc.prev) | ||
324 | return; | ||
325 | |||
326 | chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET); | ||
327 | chansts = (ioat_chan->completion_virt->low | ||
328 | & IOAT_CHANSTS_DMA_TRANSFER_STATUS); | ||
329 | if (chanerr) { | ||
330 | dev_err(&ioat_chan->device->pdev->dev, | ||
331 | "chan%d, CHANSTS = 0x%08x CHANERR = 0x%04x, clearing\n", | ||
332 | chan_num(ioat_chan), chansts, chanerr); | ||
333 | writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET); | ||
334 | } | ||
335 | |||
336 | /* | ||
337 | * whack it upside the head with a reset | ||
338 | * and wait for things to settle out. | ||
339 | * force the pending count to a really big negative | ||
340 | * to make sure no one forces an issue_pending | ||
341 | * while we're waiting. | ||
342 | */ | ||
343 | |||
344 | spin_lock_bh(&ioat_chan->desc_lock); | ||
345 | ioat_chan->pending = INT_MIN; | ||
346 | writeb(IOAT_CHANCMD_RESET, | ||
347 | ioat_chan->reg_base | ||
348 | + IOAT_CHANCMD_OFFSET(ioat_chan->device->version)); | ||
349 | spin_unlock_bh(&ioat_chan->desc_lock); | ||
350 | |||
351 | /* schedule the 2nd half instead of sleeping a long time */ | ||
352 | schedule_delayed_work(&ioat_chan->work, RESET_DELAY); | ||
353 | } | ||
354 | |||
355 | /** | ||
356 | * ioat_dma_chan_watchdog - watch for stuck channels | ||
357 | */ | ||
358 | static void ioat_dma_chan_watchdog(struct work_struct *work) | ||
359 | { | ||
360 | struct ioatdma_device *device = | ||
361 | container_of(work, struct ioatdma_device, work.work); | ||
362 | struct ioat_dma_chan *ioat_chan; | ||
363 | int i; | ||
364 | |||
365 | union { | ||
366 | u64 full; | ||
367 | struct { | ||
368 | u32 low; | ||
369 | u32 high; | ||
370 | }; | ||
371 | } completion_hw; | ||
372 | unsigned long compl_desc_addr_hw; | ||
373 | |||
374 | for (i = 0; i < device->common.chancnt; i++) { | ||
375 | ioat_chan = ioat_lookup_chan_by_index(device, i); | ||
376 | |||
377 | if (ioat_chan->device->version == IOAT_VER_1_2 | ||
378 | /* have we started processing anything yet */ | ||
379 | && ioat_chan->last_completion | ||
380 | /* have we completed any since last watchdog cycle? */ | ||
381 | && (ioat_chan->last_completion == | ||
382 | ioat_chan->watchdog_completion) | ||
383 | /* has TCP stuck on one cookie since last watchdog? */ | ||
384 | && (ioat_chan->watchdog_tcp_cookie == | ||
385 | ioat_chan->watchdog_last_tcp_cookie) | ||
386 | && (ioat_chan->watchdog_tcp_cookie != | ||
387 | ioat_chan->completed_cookie) | ||
388 | /* is there something in the chain to be processed? */ | ||
389 | /* CB1 chain always has at least the last one processed */ | ||
390 | && (ioat_chan->used_desc.prev != ioat_chan->used_desc.next) | ||
391 | && ioat_chan->pending == 0) { | ||
392 | |||
393 | /* | ||
394 | * check CHANSTS register for completed | ||
395 | * descriptor address. | ||
396 | * if it is different than completion writeback, | ||
397 | * it is not zero | ||
398 | * and it has changed since the last watchdog | ||
399 | * we can assume that channel | ||
400 | * is still working correctly | ||
401 | * and the problem is in completion writeback. | ||
402 | * update completion writeback | ||
403 | * with actual CHANSTS value | ||
404 | * else | ||
405 | * try resetting the channel | ||
406 | */ | ||
407 | |||
408 | completion_hw.low = readl(ioat_chan->reg_base + | ||
409 | IOAT_CHANSTS_OFFSET_LOW(ioat_chan->device->version)); | ||
410 | completion_hw.high = readl(ioat_chan->reg_base + | ||
411 | IOAT_CHANSTS_OFFSET_HIGH(ioat_chan->device->version)); | ||
412 | #if (BITS_PER_LONG == 64) | ||
413 | compl_desc_addr_hw = | ||
414 | completion_hw.full | ||
415 | & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR; | ||
416 | #else | ||
417 | compl_desc_addr_hw = | ||
418 | completion_hw.low & IOAT_LOW_COMPLETION_MASK; | ||
419 | #endif | ||
420 | |||
421 | if ((compl_desc_addr_hw != 0) | ||
422 | && (compl_desc_addr_hw != ioat_chan->watchdog_completion) | ||
423 | && (compl_desc_addr_hw != ioat_chan->last_compl_desc_addr_hw)) { | ||
424 | ioat_chan->last_compl_desc_addr_hw = compl_desc_addr_hw; | ||
425 | ioat_chan->completion_virt->low = completion_hw.low; | ||
426 | ioat_chan->completion_virt->high = completion_hw.high; | ||
427 | } else { | ||
428 | ioat_dma_reset_channel(ioat_chan); | ||
429 | ioat_chan->watchdog_completion = 0; | ||
430 | ioat_chan->last_compl_desc_addr_hw = 0; | ||
431 | } | ||
432 | |||
433 | /* | ||
434 | * for version 2.0 if there are descriptors yet to be processed | ||
435 | * and the last completed hasn't changed since the last watchdog | ||
436 | * if they haven't hit the pending level | ||
437 | * issue the pending to push them through | ||
438 | * else | ||
439 | * try resetting the channel | ||
440 | */ | ||
441 | } else if (ioat_chan->device->version == IOAT_VER_2_0 | ||
442 | && ioat_chan->used_desc.prev | ||
443 | && ioat_chan->last_completion | ||
444 | && ioat_chan->last_completion == ioat_chan->watchdog_completion) { | ||
445 | |||
446 | if (ioat_chan->pending < ioat_pending_level) | ||
447 | ioat2_dma_memcpy_issue_pending(&ioat_chan->common); | ||
448 | else { | ||
449 | ioat_dma_reset_channel(ioat_chan); | ||
450 | ioat_chan->watchdog_completion = 0; | ||
451 | } | ||
452 | } else { | ||
453 | ioat_chan->last_compl_desc_addr_hw = 0; | ||
454 | ioat_chan->watchdog_completion | ||
455 | = ioat_chan->last_completion; | ||
456 | } | ||
457 | |||
458 | ioat_chan->watchdog_last_tcp_cookie = | ||
459 | ioat_chan->watchdog_tcp_cookie; | ||
460 | } | ||
461 | |||
462 | schedule_delayed_work(&device->work, WATCHDOG_DELAY); | ||
463 | } | ||
464 | |||
204 | static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx) | 465 | static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx) |
205 | { | 466 | { |
206 | struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan); | 467 | struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan); |
@@ -250,6 +511,13 @@ static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx) | |||
250 | prev = new; | 511 | prev = new; |
251 | } while (len && (new = ioat1_dma_get_next_descriptor(ioat_chan))); | 512 | } while (len && (new = ioat1_dma_get_next_descriptor(ioat_chan))); |
252 | 513 | ||
514 | if (!new) { | ||
515 | dev_err(&ioat_chan->device->pdev->dev, | ||
516 | "tx submit failed\n"); | ||
517 | spin_unlock_bh(&ioat_chan->desc_lock); | ||
518 | return -ENOMEM; | ||
519 | } | ||
520 | |||
253 | hw->ctl = IOAT_DMA_DESCRIPTOR_CTL_CP_STS; | 521 | hw->ctl = IOAT_DMA_DESCRIPTOR_CTL_CP_STS; |
254 | if (new->async_tx.callback) { | 522 | if (new->async_tx.callback) { |
255 | hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_INT_GN; | 523 | hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_INT_GN; |
@@ -335,7 +603,14 @@ static dma_cookie_t ioat2_tx_submit(struct dma_async_tx_descriptor *tx) | |||
335 | desc_count++; | 603 | desc_count++; |
336 | } while (len && (new = ioat2_dma_get_next_descriptor(ioat_chan))); | 604 | } while (len && (new = ioat2_dma_get_next_descriptor(ioat_chan))); |
337 | 605 | ||
338 | hw->ctl = IOAT_DMA_DESCRIPTOR_CTL_CP_STS; | 606 | if (!new) { |
607 | dev_err(&ioat_chan->device->pdev->dev, | ||
608 | "tx submit failed\n"); | ||
609 | spin_unlock_bh(&ioat_chan->desc_lock); | ||
610 | return -ENOMEM; | ||
611 | } | ||
612 | |||
613 | hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_CP_STS; | ||
339 | if (new->async_tx.callback) { | 614 | if (new->async_tx.callback) { |
340 | hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_INT_GN; | 615 | hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_INT_GN; |
341 | if (first != new) { | 616 | if (first != new) { |
@@ -406,6 +681,7 @@ static struct ioat_desc_sw *ioat_dma_alloc_descriptor( | |||
406 | desc_sw->async_tx.tx_submit = ioat1_tx_submit; | 681 | desc_sw->async_tx.tx_submit = ioat1_tx_submit; |
407 | break; | 682 | break; |
408 | case IOAT_VER_2_0: | 683 | case IOAT_VER_2_0: |
684 | case IOAT_VER_3_0: | ||
409 | desc_sw->async_tx.tx_submit = ioat2_tx_submit; | 685 | desc_sw->async_tx.tx_submit = ioat2_tx_submit; |
410 | break; | 686 | break; |
411 | } | 687 | } |
@@ -452,7 +728,8 @@ static void ioat2_dma_massage_chan_desc(struct ioat_dma_chan *ioat_chan) | |||
452 | * ioat_dma_alloc_chan_resources - returns the number of allocated descriptors | 728 | * ioat_dma_alloc_chan_resources - returns the number of allocated descriptors |
453 | * @chan: the channel to be filled out | 729 | * @chan: the channel to be filled out |
454 | */ | 730 | */ |
455 | static int ioat_dma_alloc_chan_resources(struct dma_chan *chan) | 731 | static int ioat_dma_alloc_chan_resources(struct dma_chan *chan, |
732 | struct dma_client *client) | ||
456 | { | 733 | { |
457 | struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); | 734 | struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); |
458 | struct ioat_desc_sw *desc; | 735 | struct ioat_desc_sw *desc; |
@@ -555,6 +832,7 @@ static void ioat_dma_free_chan_resources(struct dma_chan *chan) | |||
555 | } | 832 | } |
556 | break; | 833 | break; |
557 | case IOAT_VER_2_0: | 834 | case IOAT_VER_2_0: |
835 | case IOAT_VER_3_0: | ||
558 | list_for_each_entry_safe(desc, _desc, | 836 | list_for_each_entry_safe(desc, _desc, |
559 | ioat_chan->free_desc.next, node) { | 837 | ioat_chan->free_desc.next, node) { |
560 | list_del(&desc->node); | 838 | list_del(&desc->node); |
@@ -585,6 +863,10 @@ static void ioat_dma_free_chan_resources(struct dma_chan *chan) | |||
585 | ioat_chan->last_completion = ioat_chan->completion_addr = 0; | 863 | ioat_chan->last_completion = ioat_chan->completion_addr = 0; |
586 | ioat_chan->pending = 0; | 864 | ioat_chan->pending = 0; |
587 | ioat_chan->dmacount = 0; | 865 | ioat_chan->dmacount = 0; |
866 | ioat_chan->watchdog_completion = 0; | ||
867 | ioat_chan->last_compl_desc_addr_hw = 0; | ||
868 | ioat_chan->watchdog_tcp_cookie = | ||
869 | ioat_chan->watchdog_last_tcp_cookie = 0; | ||
588 | } | 870 | } |
589 | 871 | ||
590 | /** | 872 | /** |
@@ -640,7 +922,8 @@ ioat2_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan) | |||
640 | 922 | ||
641 | /* set up the noop descriptor */ | 923 | /* set up the noop descriptor */ |
642 | noop_desc = to_ioat_desc(ioat_chan->used_desc.next); | 924 | noop_desc = to_ioat_desc(ioat_chan->used_desc.next); |
643 | noop_desc->hw->size = 0; | 925 | /* set size to non-zero value (channel returns error when size is 0) */ |
926 | noop_desc->hw->size = NULL_DESC_BUFFER_SIZE; | ||
644 | noop_desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL; | 927 | noop_desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL; |
645 | noop_desc->hw->src_addr = 0; | 928 | noop_desc->hw->src_addr = 0; |
646 | noop_desc->hw->dst_addr = 0; | 929 | noop_desc->hw->dst_addr = 0; |
@@ -690,6 +973,7 @@ static struct ioat_desc_sw *ioat_dma_get_next_descriptor( | |||
690 | return ioat1_dma_get_next_descriptor(ioat_chan); | 973 | return ioat1_dma_get_next_descriptor(ioat_chan); |
691 | break; | 974 | break; |
692 | case IOAT_VER_2_0: | 975 | case IOAT_VER_2_0: |
976 | case IOAT_VER_3_0: | ||
693 | return ioat2_dma_get_next_descriptor(ioat_chan); | 977 | return ioat2_dma_get_next_descriptor(ioat_chan); |
694 | break; | 978 | break; |
695 | } | 979 | } |
@@ -716,8 +1000,12 @@ static struct dma_async_tx_descriptor *ioat1_dma_prep_memcpy( | |||
716 | new->src = dma_src; | 1000 | new->src = dma_src; |
717 | new->async_tx.flags = flags; | 1001 | new->async_tx.flags = flags; |
718 | return &new->async_tx; | 1002 | return &new->async_tx; |
719 | } else | 1003 | } else { |
1004 | dev_err(&ioat_chan->device->pdev->dev, | ||
1005 | "chan%d - get_next_desc failed: %d descs waiting, %d total desc\n", | ||
1006 | chan_num(ioat_chan), ioat_chan->dmacount, ioat_chan->desccount); | ||
720 | return NULL; | 1007 | return NULL; |
1008 | } | ||
721 | } | 1009 | } |
722 | 1010 | ||
723 | static struct dma_async_tx_descriptor *ioat2_dma_prep_memcpy( | 1011 | static struct dma_async_tx_descriptor *ioat2_dma_prep_memcpy( |
@@ -744,8 +1032,13 @@ static struct dma_async_tx_descriptor *ioat2_dma_prep_memcpy( | |||
744 | new->src = dma_src; | 1032 | new->src = dma_src; |
745 | new->async_tx.flags = flags; | 1033 | new->async_tx.flags = flags; |
746 | return &new->async_tx; | 1034 | return &new->async_tx; |
747 | } else | 1035 | } else { |
1036 | spin_unlock_bh(&ioat_chan->desc_lock); | ||
1037 | dev_err(&ioat_chan->device->pdev->dev, | ||
1038 | "chan%d - get_next_desc failed: %d descs waiting, %d total desc\n", | ||
1039 | chan_num(ioat_chan), ioat_chan->dmacount, ioat_chan->desccount); | ||
748 | return NULL; | 1040 | return NULL; |
1041 | } | ||
749 | } | 1042 | } |
750 | 1043 | ||
751 | static void ioat_dma_cleanup_tasklet(unsigned long data) | 1044 | static void ioat_dma_cleanup_tasklet(unsigned long data) |
@@ -756,6 +1049,27 @@ static void ioat_dma_cleanup_tasklet(unsigned long data) | |||
756 | chan->reg_base + IOAT_CHANCTRL_OFFSET); | 1049 | chan->reg_base + IOAT_CHANCTRL_OFFSET); |
757 | } | 1050 | } |
758 | 1051 | ||
1052 | static void | ||
1053 | ioat_dma_unmap(struct ioat_dma_chan *ioat_chan, struct ioat_desc_sw *desc) | ||
1054 | { | ||
1055 | /* | ||
1056 | * yes we are unmapping both _page and _single | ||
1057 | * alloc'd regions with unmap_page. Is this | ||
1058 | * *really* that bad? | ||
1059 | */ | ||
1060 | if (!(desc->async_tx.flags & DMA_COMPL_SKIP_DEST_UNMAP)) | ||
1061 | pci_unmap_page(ioat_chan->device->pdev, | ||
1062 | pci_unmap_addr(desc, dst), | ||
1063 | pci_unmap_len(desc, len), | ||
1064 | PCI_DMA_FROMDEVICE); | ||
1065 | |||
1066 | if (!(desc->async_tx.flags & DMA_COMPL_SKIP_SRC_UNMAP)) | ||
1067 | pci_unmap_page(ioat_chan->device->pdev, | ||
1068 | pci_unmap_addr(desc, src), | ||
1069 | pci_unmap_len(desc, len), | ||
1070 | PCI_DMA_TODEVICE); | ||
1071 | } | ||
1072 | |||
759 | /** | 1073 | /** |
760 | * ioat_dma_memcpy_cleanup - cleanup up finished descriptors | 1074 | * ioat_dma_memcpy_cleanup - cleanup up finished descriptors |
761 | * @chan: ioat channel to be cleaned up | 1075 | * @chan: ioat channel to be cleaned up |
@@ -799,11 +1113,27 @@ static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan) | |||
799 | 1113 | ||
800 | if (phys_complete == ioat_chan->last_completion) { | 1114 | if (phys_complete == ioat_chan->last_completion) { |
801 | spin_unlock_bh(&ioat_chan->cleanup_lock); | 1115 | spin_unlock_bh(&ioat_chan->cleanup_lock); |
1116 | /* | ||
1117 | * perhaps we're stuck so hard that the watchdog can't go off? | ||
1118 | * try to catch it after 2 seconds | ||
1119 | */ | ||
1120 | if (ioat_chan->device->version != IOAT_VER_3_0) { | ||
1121 | if (time_after(jiffies, | ||
1122 | ioat_chan->last_completion_time + HZ*WATCHDOG_DELAY)) { | ||
1123 | ioat_dma_chan_watchdog(&(ioat_chan->device->work.work)); | ||
1124 | ioat_chan->last_completion_time = jiffies; | ||
1125 | } | ||
1126 | } | ||
802 | return; | 1127 | return; |
803 | } | 1128 | } |
1129 | ioat_chan->last_completion_time = jiffies; | ||
804 | 1130 | ||
805 | cookie = 0; | 1131 | cookie = 0; |
806 | spin_lock_bh(&ioat_chan->desc_lock); | 1132 | if (!spin_trylock_bh(&ioat_chan->desc_lock)) { |
1133 | spin_unlock_bh(&ioat_chan->cleanup_lock); | ||
1134 | return; | ||
1135 | } | ||
1136 | |||
807 | switch (ioat_chan->device->version) { | 1137 | switch (ioat_chan->device->version) { |
808 | case IOAT_VER_1_2: | 1138 | case IOAT_VER_1_2: |
809 | list_for_each_entry_safe(desc, _desc, | 1139 | list_for_each_entry_safe(desc, _desc, |
@@ -816,21 +1146,7 @@ static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan) | |||
816 | */ | 1146 | */ |
817 | if (desc->async_tx.cookie) { | 1147 | if (desc->async_tx.cookie) { |
818 | cookie = desc->async_tx.cookie; | 1148 | cookie = desc->async_tx.cookie; |
819 | 1149 | ioat_dma_unmap(ioat_chan, desc); | |
820 | /* | ||
821 | * yes we are unmapping both _page and _single | ||
822 | * alloc'd regions with unmap_page. Is this | ||
823 | * *really* that bad? | ||
824 | */ | ||
825 | pci_unmap_page(ioat_chan->device->pdev, | ||
826 | pci_unmap_addr(desc, dst), | ||
827 | pci_unmap_len(desc, len), | ||
828 | PCI_DMA_FROMDEVICE); | ||
829 | pci_unmap_page(ioat_chan->device->pdev, | ||
830 | pci_unmap_addr(desc, src), | ||
831 | pci_unmap_len(desc, len), | ||
832 | PCI_DMA_TODEVICE); | ||
833 | |||
834 | if (desc->async_tx.callback) { | 1150 | if (desc->async_tx.callback) { |
835 | desc->async_tx.callback(desc->async_tx.callback_param); | 1151 | desc->async_tx.callback(desc->async_tx.callback_param); |
836 | desc->async_tx.callback = NULL; | 1152 | desc->async_tx.callback = NULL; |
@@ -862,6 +1178,7 @@ static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan) | |||
862 | } | 1178 | } |
863 | break; | 1179 | break; |
864 | case IOAT_VER_2_0: | 1180 | case IOAT_VER_2_0: |
1181 | case IOAT_VER_3_0: | ||
865 | /* has some other thread has already cleaned up? */ | 1182 | /* has some other thread has already cleaned up? */ |
866 | if (ioat_chan->used_desc.prev == NULL) | 1183 | if (ioat_chan->used_desc.prev == NULL) |
867 | break; | 1184 | break; |
@@ -889,16 +1206,7 @@ static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan) | |||
889 | if (desc->async_tx.cookie) { | 1206 | if (desc->async_tx.cookie) { |
890 | cookie = desc->async_tx.cookie; | 1207 | cookie = desc->async_tx.cookie; |
891 | desc->async_tx.cookie = 0; | 1208 | desc->async_tx.cookie = 0; |
892 | 1209 | ioat_dma_unmap(ioat_chan, desc); | |
893 | pci_unmap_page(ioat_chan->device->pdev, | ||
894 | pci_unmap_addr(desc, dst), | ||
895 | pci_unmap_len(desc, len), | ||
896 | PCI_DMA_FROMDEVICE); | ||
897 | pci_unmap_page(ioat_chan->device->pdev, | ||
898 | pci_unmap_addr(desc, src), | ||
899 | pci_unmap_len(desc, len), | ||
900 | PCI_DMA_TODEVICE); | ||
901 | |||
902 | if (desc->async_tx.callback) { | 1210 | if (desc->async_tx.callback) { |
903 | desc->async_tx.callback(desc->async_tx.callback_param); | 1211 | desc->async_tx.callback(desc->async_tx.callback_param); |
904 | desc->async_tx.callback = NULL; | 1212 | desc->async_tx.callback = NULL; |
@@ -943,6 +1251,7 @@ static enum dma_status ioat_dma_is_complete(struct dma_chan *chan, | |||
943 | 1251 | ||
944 | last_used = chan->cookie; | 1252 | last_used = chan->cookie; |
945 | last_complete = ioat_chan->completed_cookie; | 1253 | last_complete = ioat_chan->completed_cookie; |
1254 | ioat_chan->watchdog_tcp_cookie = cookie; | ||
946 | 1255 | ||
947 | if (done) | 1256 | if (done) |
948 | *done = last_complete; | 1257 | *done = last_complete; |
@@ -973,10 +1282,19 @@ static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan) | |||
973 | spin_lock_bh(&ioat_chan->desc_lock); | 1282 | spin_lock_bh(&ioat_chan->desc_lock); |
974 | 1283 | ||
975 | desc = ioat_dma_get_next_descriptor(ioat_chan); | 1284 | desc = ioat_dma_get_next_descriptor(ioat_chan); |
1285 | |||
1286 | if (!desc) { | ||
1287 | dev_err(&ioat_chan->device->pdev->dev, | ||
1288 | "Unable to start null desc - get next desc failed\n"); | ||
1289 | spin_unlock_bh(&ioat_chan->desc_lock); | ||
1290 | return; | ||
1291 | } | ||
1292 | |||
976 | desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL | 1293 | desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL |
977 | | IOAT_DMA_DESCRIPTOR_CTL_INT_GN | 1294 | | IOAT_DMA_DESCRIPTOR_CTL_INT_GN |
978 | | IOAT_DMA_DESCRIPTOR_CTL_CP_STS; | 1295 | | IOAT_DMA_DESCRIPTOR_CTL_CP_STS; |
979 | desc->hw->size = 0; | 1296 | /* set size to non-zero value (channel returns error when size is 0) */ |
1297 | desc->hw->size = NULL_DESC_BUFFER_SIZE; | ||
980 | desc->hw->src_addr = 0; | 1298 | desc->hw->src_addr = 0; |
981 | desc->hw->dst_addr = 0; | 1299 | desc->hw->dst_addr = 0; |
982 | async_tx_ack(&desc->async_tx); | 1300 | async_tx_ack(&desc->async_tx); |
@@ -994,6 +1312,7 @@ static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan) | |||
994 | + IOAT_CHANCMD_OFFSET(ioat_chan->device->version)); | 1312 | + IOAT_CHANCMD_OFFSET(ioat_chan->device->version)); |
995 | break; | 1313 | break; |
996 | case IOAT_VER_2_0: | 1314 | case IOAT_VER_2_0: |
1315 | case IOAT_VER_3_0: | ||
997 | writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF, | 1316 | writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF, |
998 | ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW); | 1317 | ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW); |
999 | writel(((u64) desc->async_tx.phys) >> 32, | 1318 | writel(((u64) desc->async_tx.phys) >> 32, |
@@ -1049,7 +1368,7 @@ static int ioat_dma_self_test(struct ioatdma_device *device) | |||
1049 | dma_chan = container_of(device->common.channels.next, | 1368 | dma_chan = container_of(device->common.channels.next, |
1050 | struct dma_chan, | 1369 | struct dma_chan, |
1051 | device_node); | 1370 | device_node); |
1052 | if (device->common.device_alloc_chan_resources(dma_chan) < 1) { | 1371 | if (device->common.device_alloc_chan_resources(dma_chan, NULL) < 1) { |
1053 | dev_err(&device->pdev->dev, | 1372 | dev_err(&device->pdev->dev, |
1054 | "selftest cannot allocate chan resource\n"); | 1373 | "selftest cannot allocate chan resource\n"); |
1055 | err = -ENODEV; | 1374 | err = -ENODEV; |
@@ -1312,6 +1631,7 @@ struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev, | |||
1312 | ioat1_dma_memcpy_issue_pending; | 1631 | ioat1_dma_memcpy_issue_pending; |
1313 | break; | 1632 | break; |
1314 | case IOAT_VER_2_0: | 1633 | case IOAT_VER_2_0: |
1634 | case IOAT_VER_3_0: | ||
1315 | device->common.device_prep_dma_memcpy = ioat2_dma_prep_memcpy; | 1635 | device->common.device_prep_dma_memcpy = ioat2_dma_prep_memcpy; |
1316 | device->common.device_issue_pending = | 1636 | device->common.device_issue_pending = |
1317 | ioat2_dma_memcpy_issue_pending; | 1637 | ioat2_dma_memcpy_issue_pending; |
@@ -1331,8 +1651,16 @@ struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev, | |||
1331 | if (err) | 1651 | if (err) |
1332 | goto err_self_test; | 1652 | goto err_self_test; |
1333 | 1653 | ||
1654 | ioat_set_tcp_copy_break(device); | ||
1655 | |||
1334 | dma_async_device_register(&device->common); | 1656 | dma_async_device_register(&device->common); |
1335 | 1657 | ||
1658 | if (device->version != IOAT_VER_3_0) { | ||
1659 | INIT_DELAYED_WORK(&device->work, ioat_dma_chan_watchdog); | ||
1660 | schedule_delayed_work(&device->work, | ||
1661 | WATCHDOG_DELAY); | ||
1662 | } | ||
1663 | |||
1336 | return device; | 1664 | return device; |
1337 | 1665 | ||
1338 | err_self_test: | 1666 | err_self_test: |
@@ -1365,6 +1693,10 @@ void ioat_dma_remove(struct ioatdma_device *device) | |||
1365 | pci_release_regions(device->pdev); | 1693 | pci_release_regions(device->pdev); |
1366 | pci_disable_device(device->pdev); | 1694 | pci_disable_device(device->pdev); |
1367 | 1695 | ||
1696 | if (device->version != IOAT_VER_3_0) { | ||
1697 | cancel_delayed_work(&device->work); | ||
1698 | } | ||
1699 | |||
1368 | list_for_each_entry_safe(chan, _chan, | 1700 | list_for_each_entry_safe(chan, _chan, |
1369 | &device->common.channels, device_node) { | 1701 | &device->common.channels, device_node) { |
1370 | ioat_chan = to_ioat_chan(chan); | 1702 | ioat_chan = to_ioat_chan(chan); |