diff options
author | Dan Williams <dan.j.williams@intel.com> | 2009-09-08 15:01:04 -0400 |
---|---|---|
committer | Dan Williams <dan.j.williams@intel.com> | 2009-09-08 20:30:24 -0400 |
commit | 4fb9b9e8d55880523db550043dfb204696dd0422 (patch) | |
tree | 733a672aeb819bb8133b16329a6b5088cf9ee693 /drivers | |
parent | 6df9183a153291a2585a8dfe67597fc18c201147 (diff) |
ioat: cleanup completion status reads
The cleanup path makes an effort to only perform an atomic read of the
64-bit completion address. However in the 32-bit case it does not
matter if we read the upper-32 and lower-32 non-atomically because the
upper-32 will always be zero.
Signed-off-by: Maciej Sosnowski <maciej.sosnowski@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/dma/ioat/dma.c | 78 | ||||
-rw-r--r-- | drivers/dma/ioat/dma.h | 10 | ||||
-rw-r--r-- | drivers/dma/ioat/dma_v2.c | 25 | ||||
-rw-r--r-- | drivers/dma/ioat/registers.h | 8 |
4 files changed, 46 insertions, 75 deletions
diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c index edf4f5e5de73..08417ad4edca 100644 --- a/drivers/dma/ioat/dma.c +++ b/drivers/dma/ioat/dma.c | |||
@@ -201,8 +201,7 @@ static void ioat1_reset_part2(struct work_struct *work) | |||
201 | spin_lock_bh(&chan->cleanup_lock); | 201 | spin_lock_bh(&chan->cleanup_lock); |
202 | spin_lock_bh(&ioat->desc_lock); | 202 | spin_lock_bh(&ioat->desc_lock); |
203 | 203 | ||
204 | chan->completion_virt->low = 0; | 204 | *chan->completion = 0; |
205 | chan->completion_virt->high = 0; | ||
206 | ioat->pending = 0; | 205 | ioat->pending = 0; |
207 | 206 | ||
208 | /* count the descriptors waiting */ | 207 | /* count the descriptors waiting */ |
@@ -256,8 +255,7 @@ static void ioat1_reset_channel(struct ioat_dma_chan *ioat) | |||
256 | 255 | ||
257 | dev_dbg(to_dev(chan), "%s\n", __func__); | 256 | dev_dbg(to_dev(chan), "%s\n", __func__); |
258 | chanerr = readl(reg_base + IOAT_CHANERR_OFFSET); | 257 | chanerr = readl(reg_base + IOAT_CHANERR_OFFSET); |
259 | chansts = (chan->completion_virt->low | 258 | chansts = *chan->completion & IOAT_CHANSTS_DMA_TRANSFER_STATUS; |
260 | & IOAT_CHANSTS_DMA_TRANSFER_STATUS); | ||
261 | if (chanerr) { | 259 | if (chanerr) { |
262 | dev_err(to_dev(chan), | 260 | dev_err(to_dev(chan), |
263 | "chan%d, CHANSTS = 0x%08x CHANERR = 0x%04x, clearing\n", | 261 | "chan%d, CHANSTS = 0x%08x CHANERR = 0x%04x, clearing\n", |
@@ -293,14 +291,8 @@ static void ioat1_chan_watchdog(struct work_struct *work) | |||
293 | struct ioat_dma_chan *ioat; | 291 | struct ioat_dma_chan *ioat; |
294 | struct ioat_chan_common *chan; | 292 | struct ioat_chan_common *chan; |
295 | int i; | 293 | int i; |
296 | 294 | u64 completion; | |
297 | union { | 295 | u32 completion_low; |
298 | u64 full; | ||
299 | struct { | ||
300 | u32 low; | ||
301 | u32 high; | ||
302 | }; | ||
303 | } completion_hw; | ||
304 | unsigned long compl_desc_addr_hw; | 296 | unsigned long compl_desc_addr_hw; |
305 | 297 | ||
306 | for (i = 0; i < device->common.chancnt; i++) { | 298 | for (i = 0; i < device->common.chancnt; i++) { |
@@ -334,25 +326,24 @@ static void ioat1_chan_watchdog(struct work_struct *work) | |||
334 | * try resetting the channel | 326 | * try resetting the channel |
335 | */ | 327 | */ |
336 | 328 | ||
337 | completion_hw.low = readl(chan->reg_base + | 329 | /* we need to read the low address first as this |
330 | * causes the chipset to latch the upper bits | ||
331 | * for the subsequent read | ||
332 | */ | ||
333 | completion_low = readl(chan->reg_base + | ||
338 | IOAT_CHANSTS_OFFSET_LOW(chan->device->version)); | 334 | IOAT_CHANSTS_OFFSET_LOW(chan->device->version)); |
339 | completion_hw.high = readl(chan->reg_base + | 335 | completion = readl(chan->reg_base + |
340 | IOAT_CHANSTS_OFFSET_HIGH(chan->device->version)); | 336 | IOAT_CHANSTS_OFFSET_HIGH(chan->device->version)); |
341 | #if (BITS_PER_LONG == 64) | 337 | completion <<= 32; |
342 | compl_desc_addr_hw = | 338 | completion |= completion_low; |
343 | completion_hw.full | 339 | compl_desc_addr_hw = completion & |
344 | & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR; | 340 | IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR; |
345 | #else | ||
346 | compl_desc_addr_hw = | ||
347 | completion_hw.low & IOAT_LOW_COMPLETION_MASK; | ||
348 | #endif | ||
349 | 341 | ||
350 | if ((compl_desc_addr_hw != 0) | 342 | if ((compl_desc_addr_hw != 0) |
351 | && (compl_desc_addr_hw != chan->watchdog_completion) | 343 | && (compl_desc_addr_hw != chan->watchdog_completion) |
352 | && (compl_desc_addr_hw != chan->last_compl_desc_addr_hw)) { | 344 | && (compl_desc_addr_hw != chan->last_compl_desc_addr_hw)) { |
353 | chan->last_compl_desc_addr_hw = compl_desc_addr_hw; | 345 | chan->last_compl_desc_addr_hw = compl_desc_addr_hw; |
354 | chan->completion_virt->low = completion_hw.low; | 346 | *chan->completion = completion; |
355 | chan->completion_virt->high = completion_hw.high; | ||
356 | } else { | 347 | } else { |
357 | ioat1_reset_channel(ioat); | 348 | ioat1_reset_channel(ioat); |
358 | chan->watchdog_completion = 0; | 349 | chan->watchdog_completion = 0; |
@@ -492,14 +483,12 @@ static int ioat1_dma_alloc_chan_resources(struct dma_chan *c) | |||
492 | 483 | ||
493 | /* allocate a completion writeback area */ | 484 | /* allocate a completion writeback area */ |
494 | /* doing 2 32bit writes to mmio since 1 64b write doesn't work */ | 485 | /* doing 2 32bit writes to mmio since 1 64b write doesn't work */ |
495 | chan->completion_virt = pci_pool_alloc(chan->device->completion_pool, | 486 | chan->completion = pci_pool_alloc(chan->device->completion_pool, |
496 | GFP_KERNEL, | 487 | GFP_KERNEL, &chan->completion_dma); |
497 | &chan->completion_addr); | 488 | memset(chan->completion, 0, sizeof(*chan->completion)); |
498 | memset(chan->completion_virt, 0, | 489 | writel(((u64) chan->completion_dma) & 0x00000000FFFFFFFF, |
499 | sizeof(*chan->completion_virt)); | ||
500 | writel(((u64) chan->completion_addr) & 0x00000000FFFFFFFF, | ||
501 | chan->reg_base + IOAT_CHANCMP_OFFSET_LOW); | 490 | chan->reg_base + IOAT_CHANCMP_OFFSET_LOW); |
502 | writel(((u64) chan->completion_addr) >> 32, | 491 | writel(((u64) chan->completion_dma) >> 32, |
503 | chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH); | 492 | chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH); |
504 | 493 | ||
505 | tasklet_enable(&chan->cleanup_task); | 494 | tasklet_enable(&chan->cleanup_task); |
@@ -558,15 +547,16 @@ static void ioat1_dma_free_chan_resources(struct dma_chan *c) | |||
558 | spin_unlock_bh(&ioat->desc_lock); | 547 | spin_unlock_bh(&ioat->desc_lock); |
559 | 548 | ||
560 | pci_pool_free(ioatdma_device->completion_pool, | 549 | pci_pool_free(ioatdma_device->completion_pool, |
561 | chan->completion_virt, | 550 | chan->completion, |
562 | chan->completion_addr); | 551 | chan->completion_dma); |
563 | 552 | ||
564 | /* one is ok since we left it on there on purpose */ | 553 | /* one is ok since we left it on there on purpose */ |
565 | if (in_use_descs > 1) | 554 | if (in_use_descs > 1) |
566 | dev_err(to_dev(chan), "Freeing %d in use descriptors!\n", | 555 | dev_err(to_dev(chan), "Freeing %d in use descriptors!\n", |
567 | in_use_descs - 1); | 556 | in_use_descs - 1); |
568 | 557 | ||
569 | chan->last_completion = chan->completion_addr = 0; | 558 | chan->last_completion = 0; |
559 | chan->completion_dma = 0; | ||
570 | chan->watchdog_completion = 0; | 560 | chan->watchdog_completion = 0; |
571 | chan->last_compl_desc_addr_hw = 0; | 561 | chan->last_compl_desc_addr_hw = 0; |
572 | chan->watchdog_tcp_cookie = chan->watchdog_last_tcp_cookie = 0; | 562 | chan->watchdog_tcp_cookie = chan->watchdog_last_tcp_cookie = 0; |
@@ -709,25 +699,15 @@ void ioat_dma_unmap(struct ioat_chan_common *chan, enum dma_ctrl_flags flags, | |||
709 | unsigned long ioat_get_current_completion(struct ioat_chan_common *chan) | 699 | unsigned long ioat_get_current_completion(struct ioat_chan_common *chan) |
710 | { | 700 | { |
711 | unsigned long phys_complete; | 701 | unsigned long phys_complete; |
702 | u64 completion; | ||
712 | 703 | ||
713 | /* The completion writeback can happen at any time, | 704 | completion = *chan->completion; |
714 | so reads by the driver need to be atomic operations | 705 | phys_complete = completion & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR; |
715 | The descriptor physical addresses are limited to 32-bits | ||
716 | when the CPU can only do a 32-bit mov */ | ||
717 | |||
718 | #if (BITS_PER_LONG == 64) | ||
719 | phys_complete = | ||
720 | chan->completion_virt->full | ||
721 | & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR; | ||
722 | #else | ||
723 | phys_complete = chan->completion_virt->low & IOAT_LOW_COMPLETION_MASK; | ||
724 | #endif | ||
725 | 706 | ||
726 | dev_dbg(to_dev(chan), "%s: phys_complete: %#llx\n", __func__, | 707 | dev_dbg(to_dev(chan), "%s: phys_complete: %#llx\n", __func__, |
727 | (unsigned long long) phys_complete); | 708 | (unsigned long long) phys_complete); |
728 | 709 | ||
729 | if ((chan->completion_virt->full | 710 | if ((completion & IOAT_CHANSTS_DMA_TRANSFER_STATUS) == |
730 | & IOAT_CHANSTS_DMA_TRANSFER_STATUS) == | ||
731 | IOAT_CHANSTS_DMA_TRANSFER_STATUS_HALTED) { | 711 | IOAT_CHANSTS_DMA_TRANSFER_STATUS_HALTED) { |
732 | dev_err(to_dev(chan), "Channel halted, chanerr = %x\n", | 712 | dev_err(to_dev(chan), "Channel halted, chanerr = %x\n", |
733 | readl(chan->reg_base + IOAT_CHANERR_OFFSET)); | 713 | readl(chan->reg_base + IOAT_CHANERR_OFFSET)); |
@@ -750,7 +730,7 @@ static void ioat1_cleanup(struct ioat_dma_chan *ioat) | |||
750 | dma_cookie_t cookie = 0; | 730 | dma_cookie_t cookie = 0; |
751 | struct dma_async_tx_descriptor *tx; | 731 | struct dma_async_tx_descriptor *tx; |
752 | 732 | ||
753 | prefetch(chan->completion_virt); | 733 | prefetch(chan->completion); |
754 | 734 | ||
755 | if (!spin_trylock_bh(&chan->cleanup_lock)) | 735 | if (!spin_trylock_bh(&chan->cleanup_lock)) |
756 | return; | 736 | return; |
diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h index 9f9edc2cd079..5fd6e2de84db 100644 --- a/drivers/dma/ioat/dma.h +++ b/drivers/dma/ioat/dma.h | |||
@@ -96,14 +96,8 @@ struct ioat_chan_common { | |||
96 | struct ioatdma_device *device; | 96 | struct ioatdma_device *device; |
97 | struct dma_chan common; | 97 | struct dma_chan common; |
98 | 98 | ||
99 | dma_addr_t completion_addr; | 99 | dma_addr_t completion_dma; |
100 | union { | 100 | u64 *completion; |
101 | u64 full; /* HW completion writeback */ | ||
102 | struct { | ||
103 | u32 low; | ||
104 | u32 high; | ||
105 | }; | ||
106 | } *completion_virt; | ||
107 | unsigned long last_compl_desc_addr_hw; | 101 | unsigned long last_compl_desc_addr_hw; |
108 | struct tasklet_struct cleanup_task; | 102 | struct tasklet_struct cleanup_task; |
109 | }; | 103 | }; |
diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c index 58881860f400..ca1134249341 100644 --- a/drivers/dma/ioat/dma_v2.c +++ b/drivers/dma/ioat/dma_v2.c | |||
@@ -200,8 +200,7 @@ static void ioat2_reset_channel(struct ioat2_dma_chan *ioat) | |||
200 | return; | 200 | return; |
201 | 201 | ||
202 | chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); | 202 | chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); |
203 | chansts = (chan->completion_virt->low | 203 | chansts = *chan->completion & IOAT_CHANSTS_DMA_TRANSFER_STATUS; |
204 | & IOAT_CHANSTS_DMA_TRANSFER_STATUS); | ||
205 | if (chanerr) { | 204 | if (chanerr) { |
206 | dev_err(to_dev(chan), | 205 | dev_err(to_dev(chan), |
207 | "chan%d, CHANSTS = 0x%08x CHANERR = 0x%04x, clearing\n", | 206 | "chan%d, CHANSTS = 0x%08x CHANERR = 0x%04x, clearing\n", |
@@ -281,7 +280,7 @@ static void ioat2_cleanup(struct ioat2_dma_chan *ioat) | |||
281 | int i; | 280 | int i; |
282 | struct dma_async_tx_descriptor *tx; | 281 | struct dma_async_tx_descriptor *tx; |
283 | 282 | ||
284 | prefetch(chan->completion_virt); | 283 | prefetch(chan->completion); |
285 | 284 | ||
286 | spin_lock_bh(&chan->cleanup_lock); | 285 | spin_lock_bh(&chan->cleanup_lock); |
287 | phys_complete = ioat_get_current_completion(chan); | 286 | phys_complete = ioat_get_current_completion(chan); |
@@ -470,17 +469,15 @@ static int ioat2_alloc_chan_resources(struct dma_chan *c) | |||
470 | 469 | ||
471 | /* allocate a completion writeback area */ | 470 | /* allocate a completion writeback area */ |
472 | /* doing 2 32bit writes to mmio since 1 64b write doesn't work */ | 471 | /* doing 2 32bit writes to mmio since 1 64b write doesn't work */ |
473 | chan->completion_virt = pci_pool_alloc(chan->device->completion_pool, | 472 | chan->completion = pci_pool_alloc(chan->device->completion_pool, |
474 | GFP_KERNEL, | 473 | GFP_KERNEL, &chan->completion_dma); |
475 | &chan->completion_addr); | 474 | if (!chan->completion) |
476 | if (!chan->completion_virt) | ||
477 | return -ENOMEM; | 475 | return -ENOMEM; |
478 | 476 | ||
479 | memset(chan->completion_virt, 0, | 477 | memset(chan->completion, 0, sizeof(*chan->completion)); |
480 | sizeof(*chan->completion_virt)); | 478 | writel(((u64) chan->completion_dma) & 0x00000000FFFFFFFF, |
481 | writel(((u64) chan->completion_addr) & 0x00000000FFFFFFFF, | ||
482 | chan->reg_base + IOAT_CHANCMP_OFFSET_LOW); | 479 | chan->reg_base + IOAT_CHANCMP_OFFSET_LOW); |
483 | writel(((u64) chan->completion_addr) >> 32, | 480 | writel(((u64) chan->completion_dma) >> 32, |
484 | chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH); | 481 | chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH); |
485 | 482 | ||
486 | ioat->alloc_order = ioat_get_alloc_order(); | 483 | ioat->alloc_order = ioat_get_alloc_order(); |
@@ -655,12 +652,12 @@ static void ioat2_free_chan_resources(struct dma_chan *c) | |||
655 | ioat->ring = NULL; | 652 | ioat->ring = NULL; |
656 | ioat->alloc_order = 0; | 653 | ioat->alloc_order = 0; |
657 | pci_pool_free(ioatdma_device->completion_pool, | 654 | pci_pool_free(ioatdma_device->completion_pool, |
658 | chan->completion_virt, | 655 | chan->completion, |
659 | chan->completion_addr); | 656 | chan->completion_dma); |
660 | spin_unlock_bh(&ioat->ring_lock); | 657 | spin_unlock_bh(&ioat->ring_lock); |
661 | 658 | ||
662 | chan->last_completion = 0; | 659 | chan->last_completion = 0; |
663 | chan->completion_addr = 0; | 660 | chan->completion_dma = 0; |
664 | ioat->pending = 0; | 661 | ioat->pending = 0; |
665 | ioat->dmacount = 0; | 662 | ioat->dmacount = 0; |
666 | chan->watchdog_completion = 0; | 663 | chan->watchdog_completion = 0; |
diff --git a/drivers/dma/ioat/registers.h b/drivers/dma/ioat/registers.h index 49bc277424f8..a83c7332125c 100644 --- a/drivers/dma/ioat/registers.h +++ b/drivers/dma/ioat/registers.h | |||
@@ -94,10 +94,10 @@ | |||
94 | #define IOAT2_CHANSTS_OFFSET_HIGH 0x0C | 94 | #define IOAT2_CHANSTS_OFFSET_HIGH 0x0C |
95 | #define IOAT_CHANSTS_OFFSET_HIGH(ver) ((ver) < IOAT_VER_2_0 \ | 95 | #define IOAT_CHANSTS_OFFSET_HIGH(ver) ((ver) < IOAT_VER_2_0 \ |
96 | ? IOAT1_CHANSTS_OFFSET_HIGH : IOAT2_CHANSTS_OFFSET_HIGH) | 96 | ? IOAT1_CHANSTS_OFFSET_HIGH : IOAT2_CHANSTS_OFFSET_HIGH) |
97 | #define IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR ~0x3F | 97 | #define IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR (~0x3fULL) |
98 | #define IOAT_CHANSTS_SOFT_ERR 0x0000000000000010 | 98 | #define IOAT_CHANSTS_SOFT_ERR 0x10ULL |
99 | #define IOAT_CHANSTS_UNAFFILIATED_ERR 0x0000000000000008 | 99 | #define IOAT_CHANSTS_UNAFFILIATED_ERR 0x8ULL |
100 | #define IOAT_CHANSTS_DMA_TRANSFER_STATUS 0x0000000000000007 | 100 | #define IOAT_CHANSTS_DMA_TRANSFER_STATUS 0x7ULL |
101 | #define IOAT_CHANSTS_DMA_TRANSFER_STATUS_ACTIVE 0x0 | 101 | #define IOAT_CHANSTS_DMA_TRANSFER_STATUS_ACTIVE 0x0 |
102 | #define IOAT_CHANSTS_DMA_TRANSFER_STATUS_DONE 0x1 | 102 | #define IOAT_CHANSTS_DMA_TRANSFER_STATUS_DONE 0x1 |
103 | #define IOAT_CHANSTS_DMA_TRANSFER_STATUS_SUSPENDED 0x2 | 103 | #define IOAT_CHANSTS_DMA_TRANSFER_STATUS_SUSPENDED 0x2 |