aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/dma/ioat/dma_v2.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/dma/ioat/dma_v2.c')
-rw-r--r--drivers/dma/ioat/dma_v2.c185
1 files changed, 95 insertions, 90 deletions
diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c
index f540e0be7f31..3c8b32a83794 100644
--- a/drivers/dma/ioat/dma_v2.c
+++ b/drivers/dma/ioat/dma_v2.c
@@ -27,6 +27,7 @@
27 27
28#include <linux/init.h> 28#include <linux/init.h>
29#include <linux/module.h> 29#include <linux/module.h>
30#include <linux/slab.h>
30#include <linux/pci.h> 31#include <linux/pci.h>
31#include <linux/interrupt.h> 32#include <linux/interrupt.h>
32#include <linux/dmaengine.h> 33#include <linux/dmaengine.h>
@@ -55,8 +56,6 @@ void __ioat2_issue_pending(struct ioat2_dma_chan *ioat)
55 56
56 ioat->dmacount += ioat2_ring_pending(ioat); 57 ioat->dmacount += ioat2_ring_pending(ioat);
57 ioat->issued = ioat->head; 58 ioat->issued = ioat->head;
58 /* make descriptor updates globally visible before notifying channel */
59 wmb();
60 writew(ioat->dmacount, chan->reg_base + IOAT_CHAN_DMACOUNT_OFFSET); 59 writew(ioat->dmacount, chan->reg_base + IOAT_CHAN_DMACOUNT_OFFSET);
61 dev_dbg(to_dev(chan), 60 dev_dbg(to_dev(chan),
62 "%s: head: %#x tail: %#x issued: %#x count: %#x\n", 61 "%s: head: %#x tail: %#x issued: %#x count: %#x\n",
@@ -68,9 +67,9 @@ void ioat2_issue_pending(struct dma_chan *c)
68 struct ioat2_dma_chan *ioat = to_ioat2_chan(c); 67 struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
69 68
70 if (ioat2_ring_pending(ioat)) { 69 if (ioat2_ring_pending(ioat)) {
71 spin_lock_bh(&ioat->ring_lock); 70 spin_lock_bh(&ioat->prep_lock);
72 __ioat2_issue_pending(ioat); 71 __ioat2_issue_pending(ioat);
73 spin_unlock_bh(&ioat->ring_lock); 72 spin_unlock_bh(&ioat->prep_lock);
74 } 73 }
75} 74}
76 75
@@ -79,7 +78,7 @@ void ioat2_issue_pending(struct dma_chan *c)
79 * @ioat: ioat2+ channel 78 * @ioat: ioat2+ channel
80 * 79 *
81 * Check if the number of unsubmitted descriptors has exceeded the 80 * Check if the number of unsubmitted descriptors has exceeded the
82 * watermark. Called with ring_lock held 81 * watermark. Called with prep_lock held
83 */ 82 */
84static void ioat2_update_pending(struct ioat2_dma_chan *ioat) 83static void ioat2_update_pending(struct ioat2_dma_chan *ioat)
85{ 84{
@@ -91,7 +90,6 @@ static void __ioat2_start_null_desc(struct ioat2_dma_chan *ioat)
91{ 90{
92 struct ioat_ring_ent *desc; 91 struct ioat_ring_ent *desc;
93 struct ioat_dma_descriptor *hw; 92 struct ioat_dma_descriptor *hw;
94 int idx;
95 93
96 if (ioat2_ring_space(ioat) < 1) { 94 if (ioat2_ring_space(ioat) < 1) {
97 dev_err(to_dev(&ioat->base), 95 dev_err(to_dev(&ioat->base),
@@ -101,8 +99,7 @@ static void __ioat2_start_null_desc(struct ioat2_dma_chan *ioat)
101 99
102 dev_dbg(to_dev(&ioat->base), "%s: head: %#x tail: %#x issued: %#x\n", 100 dev_dbg(to_dev(&ioat->base), "%s: head: %#x tail: %#x issued: %#x\n",
103 __func__, ioat->head, ioat->tail, ioat->issued); 101 __func__, ioat->head, ioat->tail, ioat->issued);
104 idx = ioat2_desc_alloc(ioat, 1); 102 desc = ioat2_get_ring_ent(ioat, ioat->head);
105 desc = ioat2_get_ring_ent(ioat, idx);
106 103
107 hw = desc->hw; 104 hw = desc->hw;
108 hw->ctl = 0; 105 hw->ctl = 0;
@@ -116,14 +113,16 @@ static void __ioat2_start_null_desc(struct ioat2_dma_chan *ioat)
116 async_tx_ack(&desc->txd); 113 async_tx_ack(&desc->txd);
117 ioat2_set_chainaddr(ioat, desc->txd.phys); 114 ioat2_set_chainaddr(ioat, desc->txd.phys);
118 dump_desc_dbg(ioat, desc); 115 dump_desc_dbg(ioat, desc);
116 wmb();
117 ioat->head += 1;
119 __ioat2_issue_pending(ioat); 118 __ioat2_issue_pending(ioat);
120} 119}
121 120
122static void ioat2_start_null_desc(struct ioat2_dma_chan *ioat) 121static void ioat2_start_null_desc(struct ioat2_dma_chan *ioat)
123{ 122{
124 spin_lock_bh(&ioat->ring_lock); 123 spin_lock_bh(&ioat->prep_lock);
125 __ioat2_start_null_desc(ioat); 124 __ioat2_start_null_desc(ioat);
126 spin_unlock_bh(&ioat->ring_lock); 125 spin_unlock_bh(&ioat->prep_lock);
127} 126}
128 127
129static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete) 128static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete)
@@ -133,15 +132,16 @@ static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete)
133 struct ioat_ring_ent *desc; 132 struct ioat_ring_ent *desc;
134 bool seen_current = false; 133 bool seen_current = false;
135 u16 active; 134 u16 active;
136 int i; 135 int idx = ioat->tail, i;
137 136
138 dev_dbg(to_dev(chan), "%s: head: %#x tail: %#x issued: %#x\n", 137 dev_dbg(to_dev(chan), "%s: head: %#x tail: %#x issued: %#x\n",
139 __func__, ioat->head, ioat->tail, ioat->issued); 138 __func__, ioat->head, ioat->tail, ioat->issued);
140 139
141 active = ioat2_ring_active(ioat); 140 active = ioat2_ring_active(ioat);
142 for (i = 0; i < active && !seen_current; i++) { 141 for (i = 0; i < active && !seen_current; i++) {
143 prefetch(ioat2_get_ring_ent(ioat, ioat->tail + i + 1)); 142 smp_read_barrier_depends();
144 desc = ioat2_get_ring_ent(ioat, ioat->tail + i); 143 prefetch(ioat2_get_ring_ent(ioat, idx + i + 1));
144 desc = ioat2_get_ring_ent(ioat, idx + i);
145 tx = &desc->txd; 145 tx = &desc->txd;
146 dump_desc_dbg(ioat, desc); 146 dump_desc_dbg(ioat, desc);
147 if (tx->cookie) { 147 if (tx->cookie) {
@@ -157,11 +157,12 @@ static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete)
157 if (tx->phys == phys_complete) 157 if (tx->phys == phys_complete)
158 seen_current = true; 158 seen_current = true;
159 } 159 }
160 ioat->tail += i; 160 smp_mb(); /* finish all descriptor reads before incrementing tail */
161 ioat->tail = idx + i;
161 BUG_ON(active && !seen_current); /* no active descs have written a completion? */ 162 BUG_ON(active && !seen_current); /* no active descs have written a completion? */
162 163
163 chan->last_completion = phys_complete; 164 chan->last_completion = phys_complete;
164 if (ioat->head == ioat->tail) { 165 if (active - i == 0) {
165 dev_dbg(to_dev(chan), "%s: cancel completion timeout\n", 166 dev_dbg(to_dev(chan), "%s: cancel completion timeout\n",
166 __func__); 167 __func__);
167 clear_bit(IOAT_COMPLETION_PENDING, &chan->state); 168 clear_bit(IOAT_COMPLETION_PENDING, &chan->state);
@@ -178,24 +179,9 @@ static void ioat2_cleanup(struct ioat2_dma_chan *ioat)
178 struct ioat_chan_common *chan = &ioat->base; 179 struct ioat_chan_common *chan = &ioat->base;
179 unsigned long phys_complete; 180 unsigned long phys_complete;
180 181
181 prefetch(chan->completion); 182 spin_lock_bh(&chan->cleanup_lock);
182 183 if (ioat_cleanup_preamble(chan, &phys_complete))
183 if (!spin_trylock_bh(&chan->cleanup_lock)) 184 __cleanup(ioat, phys_complete);
184 return;
185
186 if (!ioat_cleanup_preamble(chan, &phys_complete)) {
187 spin_unlock_bh(&chan->cleanup_lock);
188 return;
189 }
190
191 if (!spin_trylock_bh(&ioat->ring_lock)) {
192 spin_unlock_bh(&chan->cleanup_lock);
193 return;
194 }
195
196 __cleanup(ioat, phys_complete);
197
198 spin_unlock_bh(&ioat->ring_lock);
199 spin_unlock_bh(&chan->cleanup_lock); 185 spin_unlock_bh(&chan->cleanup_lock);
200} 186}
201 187
@@ -286,12 +272,10 @@ void ioat2_timer_event(unsigned long data)
286 struct ioat2_dma_chan *ioat = to_ioat2_chan((void *) data); 272 struct ioat2_dma_chan *ioat = to_ioat2_chan((void *) data);
287 struct ioat_chan_common *chan = &ioat->base; 273 struct ioat_chan_common *chan = &ioat->base;
288 274
289 spin_lock_bh(&chan->cleanup_lock);
290 if (test_bit(IOAT_COMPLETION_PENDING, &chan->state)) { 275 if (test_bit(IOAT_COMPLETION_PENDING, &chan->state)) {
291 unsigned long phys_complete; 276 unsigned long phys_complete;
292 u64 status; 277 u64 status;
293 278
294 spin_lock_bh(&ioat->ring_lock);
295 status = ioat_chansts(chan); 279 status = ioat_chansts(chan);
296 280
297 /* when halted due to errors check for channel 281 /* when halted due to errors check for channel
@@ -310,26 +294,31 @@ void ioat2_timer_event(unsigned long data)
310 * acknowledged a pending completion once, then be more 294 * acknowledged a pending completion once, then be more
311 * forceful with a restart 295 * forceful with a restart
312 */ 296 */
313 if (ioat_cleanup_preamble(chan, &phys_complete)) 297 spin_lock_bh(&chan->cleanup_lock);
298 if (ioat_cleanup_preamble(chan, &phys_complete)) {
314 __cleanup(ioat, phys_complete); 299 __cleanup(ioat, phys_complete);
315 else if (test_bit(IOAT_COMPLETION_ACK, &chan->state)) 300 } else if (test_bit(IOAT_COMPLETION_ACK, &chan->state)) {
301 spin_lock_bh(&ioat->prep_lock);
316 ioat2_restart_channel(ioat); 302 ioat2_restart_channel(ioat);
317 else { 303 spin_unlock_bh(&ioat->prep_lock);
304 } else {
318 set_bit(IOAT_COMPLETION_ACK, &chan->state); 305 set_bit(IOAT_COMPLETION_ACK, &chan->state);
319 mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); 306 mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
320 } 307 }
321 spin_unlock_bh(&ioat->ring_lock); 308 spin_unlock_bh(&chan->cleanup_lock);
322 } else { 309 } else {
323 u16 active; 310 u16 active;
324 311
325 /* if the ring is idle, empty, and oversized try to step 312 /* if the ring is idle, empty, and oversized try to step
326 * down the size 313 * down the size
327 */ 314 */
328 spin_lock_bh(&ioat->ring_lock); 315 spin_lock_bh(&chan->cleanup_lock);
316 spin_lock_bh(&ioat->prep_lock);
329 active = ioat2_ring_active(ioat); 317 active = ioat2_ring_active(ioat);
330 if (active == 0 && ioat->alloc_order > ioat_get_alloc_order()) 318 if (active == 0 && ioat->alloc_order > ioat_get_alloc_order())
331 reshape_ring(ioat, ioat->alloc_order-1); 319 reshape_ring(ioat, ioat->alloc_order-1);
332 spin_unlock_bh(&ioat->ring_lock); 320 spin_unlock_bh(&ioat->prep_lock);
321 spin_unlock_bh(&chan->cleanup_lock);
333 322
334 /* keep shrinking until we get back to our minimum 323 /* keep shrinking until we get back to our minimum
335 * default size 324 * default size
@@ -337,7 +326,6 @@ void ioat2_timer_event(unsigned long data)
337 if (ioat->alloc_order > ioat_get_alloc_order()) 326 if (ioat->alloc_order > ioat_get_alloc_order())
338 mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT); 327 mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
339 } 328 }
340 spin_unlock_bh(&chan->cleanup_lock);
341} 329}
342 330
343static int ioat2_reset_hw(struct ioat_chan_common *chan) 331static int ioat2_reset_hw(struct ioat_chan_common *chan)
@@ -391,7 +379,7 @@ int ioat2_enumerate_channels(struct ioatdma_device *device)
391 379
392 ioat_init_channel(device, &ioat->base, i); 380 ioat_init_channel(device, &ioat->base, i);
393 ioat->xfercap_log = xfercap_log; 381 ioat->xfercap_log = xfercap_log;
394 spin_lock_init(&ioat->ring_lock); 382 spin_lock_init(&ioat->prep_lock);
395 if (device->reset_hw(&ioat->base)) { 383 if (device->reset_hw(&ioat->base)) {
396 i = 0; 384 i = 0;
397 break; 385 break;
@@ -417,8 +405,17 @@ static dma_cookie_t ioat2_tx_submit_unlock(struct dma_async_tx_descriptor *tx)
417 405
418 if (!test_and_set_bit(IOAT_COMPLETION_PENDING, &chan->state)) 406 if (!test_and_set_bit(IOAT_COMPLETION_PENDING, &chan->state))
419 mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); 407 mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
408
409 /* make descriptor updates visible before advancing ioat->head,
410 * this is purposefully not smp_wmb() since we are also
411 * publishing the descriptor updates to a dma device
412 */
413 wmb();
414
415 ioat->head += ioat->produce;
416
420 ioat2_update_pending(ioat); 417 ioat2_update_pending(ioat);
421 spin_unlock_bh(&ioat->ring_lock); 418 spin_unlock_bh(&ioat->prep_lock);
422 419
423 return cookie; 420 return cookie;
424} 421}
@@ -530,13 +527,15 @@ int ioat2_alloc_chan_resources(struct dma_chan *c)
530 if (!ring) 527 if (!ring)
531 return -ENOMEM; 528 return -ENOMEM;
532 529
533 spin_lock_bh(&ioat->ring_lock); 530 spin_lock_bh(&chan->cleanup_lock);
531 spin_lock_bh(&ioat->prep_lock);
534 ioat->ring = ring; 532 ioat->ring = ring;
535 ioat->head = 0; 533 ioat->head = 0;
536 ioat->issued = 0; 534 ioat->issued = 0;
537 ioat->tail = 0; 535 ioat->tail = 0;
538 ioat->alloc_order = order; 536 ioat->alloc_order = order;
539 spin_unlock_bh(&ioat->ring_lock); 537 spin_unlock_bh(&ioat->prep_lock);
538 spin_unlock_bh(&chan->cleanup_lock);
540 539
541 tasklet_enable(&chan->cleanup_task); 540 tasklet_enable(&chan->cleanup_task);
542 ioat2_start_null_desc(ioat); 541 ioat2_start_null_desc(ioat);
@@ -552,7 +551,7 @@ bool reshape_ring(struct ioat2_dma_chan *ioat, int order)
552 */ 551 */
553 struct ioat_chan_common *chan = &ioat->base; 552 struct ioat_chan_common *chan = &ioat->base;
554 struct dma_chan *c = &chan->common; 553 struct dma_chan *c = &chan->common;
555 const u16 curr_size = ioat2_ring_mask(ioat) + 1; 554 const u16 curr_size = ioat2_ring_size(ioat);
556 const u16 active = ioat2_ring_active(ioat); 555 const u16 active = ioat2_ring_active(ioat);
557 const u16 new_size = 1 << order; 556 const u16 new_size = 1 << order;
558 struct ioat_ring_ent **ring; 557 struct ioat_ring_ent **ring;
@@ -652,54 +651,61 @@ bool reshape_ring(struct ioat2_dma_chan *ioat, int order)
652} 651}
653 652
654/** 653/**
655 * ioat2_alloc_and_lock - common descriptor alloc boilerplate for ioat2,3 ops 654 * ioat2_check_space_lock - verify space and grab ring producer lock
656 * @idx: gets starting descriptor index on successful allocation
657 * @ioat: ioat2,3 channel (ring) to operate on 655 * @ioat: ioat2,3 channel (ring) to operate on
658 * @num_descs: allocation length 656 * @num_descs: allocation length
659 */ 657 */
660int ioat2_alloc_and_lock(u16 *idx, struct ioat2_dma_chan *ioat, int num_descs) 658int ioat2_check_space_lock(struct ioat2_dma_chan *ioat, int num_descs)
661{ 659{
662 struct ioat_chan_common *chan = &ioat->base; 660 struct ioat_chan_common *chan = &ioat->base;
661 bool retry;
663 662
664 spin_lock_bh(&ioat->ring_lock); 663 retry:
664 spin_lock_bh(&ioat->prep_lock);
665 /* never allow the last descriptor to be consumed, we need at 665 /* never allow the last descriptor to be consumed, we need at
666 * least one free at all times to allow for on-the-fly ring 666 * least one free at all times to allow for on-the-fly ring
667 * resizing. 667 * resizing.
668 */ 668 */
669 while (unlikely(ioat2_ring_space(ioat) <= num_descs)) { 669 if (likely(ioat2_ring_space(ioat) > num_descs)) {
670 if (reshape_ring(ioat, ioat->alloc_order + 1) && 670 dev_dbg(to_dev(chan), "%s: num_descs: %d (%x:%x:%x)\n",
671 ioat2_ring_space(ioat) > num_descs) 671 __func__, num_descs, ioat->head, ioat->tail, ioat->issued);
672 break; 672 ioat->produce = num_descs;
673 673 return 0; /* with ioat->prep_lock held */
674 if (printk_ratelimit())
675 dev_dbg(to_dev(chan),
676 "%s: ring full! num_descs: %d (%x:%x:%x)\n",
677 __func__, num_descs, ioat->head, ioat->tail,
678 ioat->issued);
679 spin_unlock_bh(&ioat->ring_lock);
680
681 /* progress reclaim in the allocation failure case we
682 * may be called under bh_disabled so we need to trigger
683 * the timer event directly
684 */
685 spin_lock_bh(&chan->cleanup_lock);
686 if (jiffies > chan->timer.expires &&
687 timer_pending(&chan->timer)) {
688 struct ioatdma_device *device = chan->device;
689
690 mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
691 spin_unlock_bh(&chan->cleanup_lock);
692 device->timer_fn((unsigned long) &chan->common);
693 } else
694 spin_unlock_bh(&chan->cleanup_lock);
695 return -ENOMEM;
696 } 674 }
675 retry = test_and_set_bit(IOAT_RESHAPE_PENDING, &chan->state);
676 spin_unlock_bh(&ioat->prep_lock);
697 677
698 dev_dbg(to_dev(chan), "%s: num_descs: %d (%x:%x:%x)\n", 678 /* is another cpu already trying to expand the ring? */
699 __func__, num_descs, ioat->head, ioat->tail, ioat->issued); 679 if (retry)
680 goto retry;
700 681
701 *idx = ioat2_desc_alloc(ioat, num_descs); 682 spin_lock_bh(&chan->cleanup_lock);
702 return 0; /* with ioat->ring_lock held */ 683 spin_lock_bh(&ioat->prep_lock);
684 retry = reshape_ring(ioat, ioat->alloc_order + 1);
685 clear_bit(IOAT_RESHAPE_PENDING, &chan->state);
686 spin_unlock_bh(&ioat->prep_lock);
687 spin_unlock_bh(&chan->cleanup_lock);
688
689 /* if we were able to expand the ring retry the allocation */
690 if (retry)
691 goto retry;
692
693 if (printk_ratelimit())
694 dev_dbg(to_dev(chan), "%s: ring full! num_descs: %d (%x:%x:%x)\n",
695 __func__, num_descs, ioat->head, ioat->tail, ioat->issued);
696
697 /* progress reclaim in the allocation failure case we may be
698 * called under bh_disabled so we need to trigger the timer
699 * event directly
700 */
701 if (jiffies > chan->timer.expires && timer_pending(&chan->timer)) {
702 struct ioatdma_device *device = chan->device;
703
704 mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
705 device->timer_fn((unsigned long) &chan->common);
706 }
707
708 return -ENOMEM;
703} 709}
704 710
705struct dma_async_tx_descriptor * 711struct dma_async_tx_descriptor *
@@ -712,14 +718,11 @@ ioat2_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest,
712 dma_addr_t dst = dma_dest; 718 dma_addr_t dst = dma_dest;
713 dma_addr_t src = dma_src; 719 dma_addr_t src = dma_src;
714 size_t total_len = len; 720 size_t total_len = len;
715 int num_descs; 721 int num_descs, idx, i;
716 u16 idx;
717 int i;
718 722
719 num_descs = ioat2_xferlen_to_descs(ioat, len); 723 num_descs = ioat2_xferlen_to_descs(ioat, len);
720 if (likely(num_descs) && 724 if (likely(num_descs) && ioat2_check_space_lock(ioat, num_descs) == 0)
721 ioat2_alloc_and_lock(&idx, ioat, num_descs) == 0) 725 idx = ioat->head;
722 /* pass */;
723 else 726 else
724 return NULL; 727 return NULL;
725 i = 0; 728 i = 0;
@@ -776,7 +779,8 @@ void ioat2_free_chan_resources(struct dma_chan *c)
776 device->cleanup_fn((unsigned long) c); 779 device->cleanup_fn((unsigned long) c);
777 device->reset_hw(chan); 780 device->reset_hw(chan);
778 781
779 spin_lock_bh(&ioat->ring_lock); 782 spin_lock_bh(&chan->cleanup_lock);
783 spin_lock_bh(&ioat->prep_lock);
780 descs = ioat2_ring_space(ioat); 784 descs = ioat2_ring_space(ioat);
781 dev_dbg(to_dev(chan), "freeing %d idle descriptors\n", descs); 785 dev_dbg(to_dev(chan), "freeing %d idle descriptors\n", descs);
782 for (i = 0; i < descs; i++) { 786 for (i = 0; i < descs; i++) {
@@ -799,7 +803,8 @@ void ioat2_free_chan_resources(struct dma_chan *c)
799 ioat->alloc_order = 0; 803 ioat->alloc_order = 0;
800 pci_pool_free(device->completion_pool, chan->completion, 804 pci_pool_free(device->completion_pool, chan->completion,
801 chan->completion_dma); 805 chan->completion_dma);
802 spin_unlock_bh(&ioat->ring_lock); 806 spin_unlock_bh(&ioat->prep_lock);
807 spin_unlock_bh(&chan->cleanup_lock);
803 808
804 chan->last_completion = 0; 809 chan->last_completion = 0;
805 chan->completion_dma = 0; 810 chan->completion_dma = 0;