aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/dma/ioat/dma_v2.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/dma/ioat/dma_v2.c')
-rw-r--r--drivers/dma/ioat/dma_v2.c186
1 files changed, 95 insertions, 91 deletions
diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c
index b5ae56c211e6..3c8b32a83794 100644
--- a/drivers/dma/ioat/dma_v2.c
+++ b/drivers/dma/ioat/dma_v2.c
@@ -56,8 +56,6 @@ void __ioat2_issue_pending(struct ioat2_dma_chan *ioat)
56 56
57 ioat->dmacount += ioat2_ring_pending(ioat); 57 ioat->dmacount += ioat2_ring_pending(ioat);
58 ioat->issued = ioat->head; 58 ioat->issued = ioat->head;
59 /* make descriptor updates globally visible before notifying channel */
60 wmb();
61 writew(ioat->dmacount, chan->reg_base + IOAT_CHAN_DMACOUNT_OFFSET); 59 writew(ioat->dmacount, chan->reg_base + IOAT_CHAN_DMACOUNT_OFFSET);
62 dev_dbg(to_dev(chan), 60 dev_dbg(to_dev(chan),
63 "%s: head: %#x tail: %#x issued: %#x count: %#x\n", 61 "%s: head: %#x tail: %#x issued: %#x count: %#x\n",
@@ -69,9 +67,9 @@ void ioat2_issue_pending(struct dma_chan *c)
69 struct ioat2_dma_chan *ioat = to_ioat2_chan(c); 67 struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
70 68
71 if (ioat2_ring_pending(ioat)) { 69 if (ioat2_ring_pending(ioat)) {
72 spin_lock_bh(&ioat->ring_lock); 70 spin_lock_bh(&ioat->prep_lock);
73 __ioat2_issue_pending(ioat); 71 __ioat2_issue_pending(ioat);
74 spin_unlock_bh(&ioat->ring_lock); 72 spin_unlock_bh(&ioat->prep_lock);
75 } 73 }
76} 74}
77 75
@@ -80,7 +78,7 @@ void ioat2_issue_pending(struct dma_chan *c)
80 * @ioat: ioat2+ channel 78 * @ioat: ioat2+ channel
81 * 79 *
82 * Check if the number of unsubmitted descriptors has exceeded the 80 * Check if the number of unsubmitted descriptors has exceeded the
83 * watermark. Called with ring_lock held 81 * watermark. Called with prep_lock held
84 */ 82 */
85static void ioat2_update_pending(struct ioat2_dma_chan *ioat) 83static void ioat2_update_pending(struct ioat2_dma_chan *ioat)
86{ 84{
@@ -92,7 +90,6 @@ static void __ioat2_start_null_desc(struct ioat2_dma_chan *ioat)
92{ 90{
93 struct ioat_ring_ent *desc; 91 struct ioat_ring_ent *desc;
94 struct ioat_dma_descriptor *hw; 92 struct ioat_dma_descriptor *hw;
95 int idx;
96 93
97 if (ioat2_ring_space(ioat) < 1) { 94 if (ioat2_ring_space(ioat) < 1) {
98 dev_err(to_dev(&ioat->base), 95 dev_err(to_dev(&ioat->base),
@@ -102,8 +99,7 @@ static void __ioat2_start_null_desc(struct ioat2_dma_chan *ioat)
102 99
103 dev_dbg(to_dev(&ioat->base), "%s: head: %#x tail: %#x issued: %#x\n", 100 dev_dbg(to_dev(&ioat->base), "%s: head: %#x tail: %#x issued: %#x\n",
104 __func__, ioat->head, ioat->tail, ioat->issued); 101 __func__, ioat->head, ioat->tail, ioat->issued);
105 idx = ioat2_desc_alloc(ioat, 1); 102 desc = ioat2_get_ring_ent(ioat, ioat->head);
106 desc = ioat2_get_ring_ent(ioat, idx);
107 103
108 hw = desc->hw; 104 hw = desc->hw;
109 hw->ctl = 0; 105 hw->ctl = 0;
@@ -117,14 +113,16 @@ static void __ioat2_start_null_desc(struct ioat2_dma_chan *ioat)
117 async_tx_ack(&desc->txd); 113 async_tx_ack(&desc->txd);
118 ioat2_set_chainaddr(ioat, desc->txd.phys); 114 ioat2_set_chainaddr(ioat, desc->txd.phys);
119 dump_desc_dbg(ioat, desc); 115 dump_desc_dbg(ioat, desc);
116 wmb();
117 ioat->head += 1;
120 __ioat2_issue_pending(ioat); 118 __ioat2_issue_pending(ioat);
121} 119}
122 120
123static void ioat2_start_null_desc(struct ioat2_dma_chan *ioat) 121static void ioat2_start_null_desc(struct ioat2_dma_chan *ioat)
124{ 122{
125 spin_lock_bh(&ioat->ring_lock); 123 spin_lock_bh(&ioat->prep_lock);
126 __ioat2_start_null_desc(ioat); 124 __ioat2_start_null_desc(ioat);
127 spin_unlock_bh(&ioat->ring_lock); 125 spin_unlock_bh(&ioat->prep_lock);
128} 126}
129 127
130static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete) 128static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete)
@@ -134,15 +132,16 @@ static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete)
134 struct ioat_ring_ent *desc; 132 struct ioat_ring_ent *desc;
135 bool seen_current = false; 133 bool seen_current = false;
136 u16 active; 134 u16 active;
137 int i; 135 int idx = ioat->tail, i;
138 136
139 dev_dbg(to_dev(chan), "%s: head: %#x tail: %#x issued: %#x\n", 137 dev_dbg(to_dev(chan), "%s: head: %#x tail: %#x issued: %#x\n",
140 __func__, ioat->head, ioat->tail, ioat->issued); 138 __func__, ioat->head, ioat->tail, ioat->issued);
141 139
142 active = ioat2_ring_active(ioat); 140 active = ioat2_ring_active(ioat);
143 for (i = 0; i < active && !seen_current; i++) { 141 for (i = 0; i < active && !seen_current; i++) {
144 prefetch(ioat2_get_ring_ent(ioat, ioat->tail + i + 1)); 142 smp_read_barrier_depends();
145 desc = ioat2_get_ring_ent(ioat, ioat->tail + i); 143 prefetch(ioat2_get_ring_ent(ioat, idx + i + 1));
144 desc = ioat2_get_ring_ent(ioat, idx + i);
146 tx = &desc->txd; 145 tx = &desc->txd;
147 dump_desc_dbg(ioat, desc); 146 dump_desc_dbg(ioat, desc);
148 if (tx->cookie) { 147 if (tx->cookie) {
@@ -158,11 +157,12 @@ static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete)
158 if (tx->phys == phys_complete) 157 if (tx->phys == phys_complete)
159 seen_current = true; 158 seen_current = true;
160 } 159 }
161 ioat->tail += i; 160 smp_mb(); /* finish all descriptor reads before incrementing tail */
161 ioat->tail = idx + i;
162 BUG_ON(active && !seen_current); /* no active descs have written a completion? */ 162 BUG_ON(active && !seen_current); /* no active descs have written a completion? */
163 163
164 chan->last_completion = phys_complete; 164 chan->last_completion = phys_complete;
165 if (ioat->head == ioat->tail) { 165 if (active - i == 0) {
166 dev_dbg(to_dev(chan), "%s: cancel completion timeout\n", 166 dev_dbg(to_dev(chan), "%s: cancel completion timeout\n",
167 __func__); 167 __func__);
168 clear_bit(IOAT_COMPLETION_PENDING, &chan->state); 168 clear_bit(IOAT_COMPLETION_PENDING, &chan->state);
@@ -179,24 +179,9 @@ static void ioat2_cleanup(struct ioat2_dma_chan *ioat)
179 struct ioat_chan_common *chan = &ioat->base; 179 struct ioat_chan_common *chan = &ioat->base;
180 unsigned long phys_complete; 180 unsigned long phys_complete;
181 181
182 prefetch(chan->completion); 182 spin_lock_bh(&chan->cleanup_lock);
183 183 if (ioat_cleanup_preamble(chan, &phys_complete))
184 if (!spin_trylock_bh(&chan->cleanup_lock)) 184 __cleanup(ioat, phys_complete);
185 return;
186
187 if (!ioat_cleanup_preamble(chan, &phys_complete)) {
188 spin_unlock_bh(&chan->cleanup_lock);
189 return;
190 }
191
192 if (!spin_trylock_bh(&ioat->ring_lock)) {
193 spin_unlock_bh(&chan->cleanup_lock);
194 return;
195 }
196
197 __cleanup(ioat, phys_complete);
198
199 spin_unlock_bh(&ioat->ring_lock);
200 spin_unlock_bh(&chan->cleanup_lock); 185 spin_unlock_bh(&chan->cleanup_lock);
201} 186}
202 187
@@ -287,12 +272,10 @@ void ioat2_timer_event(unsigned long data)
287 struct ioat2_dma_chan *ioat = to_ioat2_chan((void *) data); 272 struct ioat2_dma_chan *ioat = to_ioat2_chan((void *) data);
288 struct ioat_chan_common *chan = &ioat->base; 273 struct ioat_chan_common *chan = &ioat->base;
289 274
290 spin_lock_bh(&chan->cleanup_lock);
291 if (test_bit(IOAT_COMPLETION_PENDING, &chan->state)) { 275 if (test_bit(IOAT_COMPLETION_PENDING, &chan->state)) {
292 unsigned long phys_complete; 276 unsigned long phys_complete;
293 u64 status; 277 u64 status;
294 278
295 spin_lock_bh(&ioat->ring_lock);
296 status = ioat_chansts(chan); 279 status = ioat_chansts(chan);
297 280
298 /* when halted due to errors check for channel 281 /* when halted due to errors check for channel
@@ -311,26 +294,31 @@ void ioat2_timer_event(unsigned long data)
311 * acknowledged a pending completion once, then be more 294 * acknowledged a pending completion once, then be more
312 * forceful with a restart 295 * forceful with a restart
313 */ 296 */
314 if (ioat_cleanup_preamble(chan, &phys_complete)) 297 spin_lock_bh(&chan->cleanup_lock);
298 if (ioat_cleanup_preamble(chan, &phys_complete)) {
315 __cleanup(ioat, phys_complete); 299 __cleanup(ioat, phys_complete);
316 else if (test_bit(IOAT_COMPLETION_ACK, &chan->state)) 300 } else if (test_bit(IOAT_COMPLETION_ACK, &chan->state)) {
301 spin_lock_bh(&ioat->prep_lock);
317 ioat2_restart_channel(ioat); 302 ioat2_restart_channel(ioat);
318 else { 303 spin_unlock_bh(&ioat->prep_lock);
304 } else {
319 set_bit(IOAT_COMPLETION_ACK, &chan->state); 305 set_bit(IOAT_COMPLETION_ACK, &chan->state);
320 mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); 306 mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
321 } 307 }
322 spin_unlock_bh(&ioat->ring_lock); 308 spin_unlock_bh(&chan->cleanup_lock);
323 } else { 309 } else {
324 u16 active; 310 u16 active;
325 311
326 /* if the ring is idle, empty, and oversized try to step 312 /* if the ring is idle, empty, and oversized try to step
327 * down the size 313 * down the size
328 */ 314 */
329 spin_lock_bh(&ioat->ring_lock); 315 spin_lock_bh(&chan->cleanup_lock);
316 spin_lock_bh(&ioat->prep_lock);
330 active = ioat2_ring_active(ioat); 317 active = ioat2_ring_active(ioat);
331 if (active == 0 && ioat->alloc_order > ioat_get_alloc_order()) 318 if (active == 0 && ioat->alloc_order > ioat_get_alloc_order())
332 reshape_ring(ioat, ioat->alloc_order-1); 319 reshape_ring(ioat, ioat->alloc_order-1);
333 spin_unlock_bh(&ioat->ring_lock); 320 spin_unlock_bh(&ioat->prep_lock);
321 spin_unlock_bh(&chan->cleanup_lock);
334 322
335 /* keep shrinking until we get back to our minimum 323 /* keep shrinking until we get back to our minimum
336 * default size 324 * default size
@@ -338,7 +326,6 @@ void ioat2_timer_event(unsigned long data)
338 if (ioat->alloc_order > ioat_get_alloc_order()) 326 if (ioat->alloc_order > ioat_get_alloc_order())
339 mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT); 327 mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
340 } 328 }
341 spin_unlock_bh(&chan->cleanup_lock);
342} 329}
343 330
344static int ioat2_reset_hw(struct ioat_chan_common *chan) 331static int ioat2_reset_hw(struct ioat_chan_common *chan)
@@ -392,7 +379,7 @@ int ioat2_enumerate_channels(struct ioatdma_device *device)
392 379
393 ioat_init_channel(device, &ioat->base, i); 380 ioat_init_channel(device, &ioat->base, i);
394 ioat->xfercap_log = xfercap_log; 381 ioat->xfercap_log = xfercap_log;
395 spin_lock_init(&ioat->ring_lock); 382 spin_lock_init(&ioat->prep_lock);
396 if (device->reset_hw(&ioat->base)) { 383 if (device->reset_hw(&ioat->base)) {
397 i = 0; 384 i = 0;
398 break; 385 break;
@@ -418,8 +405,17 @@ static dma_cookie_t ioat2_tx_submit_unlock(struct dma_async_tx_descriptor *tx)
418 405
419 if (!test_and_set_bit(IOAT_COMPLETION_PENDING, &chan->state)) 406 if (!test_and_set_bit(IOAT_COMPLETION_PENDING, &chan->state))
420 mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); 407 mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
408
409 /* make descriptor updates visible before advancing ioat->head,
410 * this is purposefully not smp_wmb() since we are also
411 * publishing the descriptor updates to a dma device
412 */
413 wmb();
414
415 ioat->head += ioat->produce;
416
421 ioat2_update_pending(ioat); 417 ioat2_update_pending(ioat);
422 spin_unlock_bh(&ioat->ring_lock); 418 spin_unlock_bh(&ioat->prep_lock);
423 419
424 return cookie; 420 return cookie;
425} 421}
@@ -531,13 +527,15 @@ int ioat2_alloc_chan_resources(struct dma_chan *c)
531 if (!ring) 527 if (!ring)
532 return -ENOMEM; 528 return -ENOMEM;
533 529
534 spin_lock_bh(&ioat->ring_lock); 530 spin_lock_bh(&chan->cleanup_lock);
531 spin_lock_bh(&ioat->prep_lock);
535 ioat->ring = ring; 532 ioat->ring = ring;
536 ioat->head = 0; 533 ioat->head = 0;
537 ioat->issued = 0; 534 ioat->issued = 0;
538 ioat->tail = 0; 535 ioat->tail = 0;
539 ioat->alloc_order = order; 536 ioat->alloc_order = order;
540 spin_unlock_bh(&ioat->ring_lock); 537 spin_unlock_bh(&ioat->prep_lock);
538 spin_unlock_bh(&chan->cleanup_lock);
541 539
542 tasklet_enable(&chan->cleanup_task); 540 tasklet_enable(&chan->cleanup_task);
543 ioat2_start_null_desc(ioat); 541 ioat2_start_null_desc(ioat);
@@ -553,7 +551,7 @@ bool reshape_ring(struct ioat2_dma_chan *ioat, int order)
553 */ 551 */
554 struct ioat_chan_common *chan = &ioat->base; 552 struct ioat_chan_common *chan = &ioat->base;
555 struct dma_chan *c = &chan->common; 553 struct dma_chan *c = &chan->common;
556 const u16 curr_size = ioat2_ring_mask(ioat) + 1; 554 const u16 curr_size = ioat2_ring_size(ioat);
557 const u16 active = ioat2_ring_active(ioat); 555 const u16 active = ioat2_ring_active(ioat);
558 const u16 new_size = 1 << order; 556 const u16 new_size = 1 << order;
559 struct ioat_ring_ent **ring; 557 struct ioat_ring_ent **ring;
@@ -653,54 +651,61 @@ bool reshape_ring(struct ioat2_dma_chan *ioat, int order)
653} 651}
654 652
655/** 653/**
656 * ioat2_alloc_and_lock - common descriptor alloc boilerplate for ioat2,3 ops 654 * ioat2_check_space_lock - verify space and grab ring producer lock
657 * @idx: gets starting descriptor index on successful allocation
658 * @ioat: ioat2,3 channel (ring) to operate on 655 * @ioat: ioat2,3 channel (ring) to operate on
659 * @num_descs: allocation length 656 * @num_descs: allocation length
660 */ 657 */
661int ioat2_alloc_and_lock(u16 *idx, struct ioat2_dma_chan *ioat, int num_descs) 658int ioat2_check_space_lock(struct ioat2_dma_chan *ioat, int num_descs)
662{ 659{
663 struct ioat_chan_common *chan = &ioat->base; 660 struct ioat_chan_common *chan = &ioat->base;
661 bool retry;
664 662
665 spin_lock_bh(&ioat->ring_lock); 663 retry:
664 spin_lock_bh(&ioat->prep_lock);
666 /* never allow the last descriptor to be consumed, we need at 665 /* never allow the last descriptor to be consumed, we need at
667 * least one free at all times to allow for on-the-fly ring 666 * least one free at all times to allow for on-the-fly ring
668 * resizing. 667 * resizing.
669 */ 668 */
670 while (unlikely(ioat2_ring_space(ioat) <= num_descs)) { 669 if (likely(ioat2_ring_space(ioat) > num_descs)) {
671 if (reshape_ring(ioat, ioat->alloc_order + 1) && 670 dev_dbg(to_dev(chan), "%s: num_descs: %d (%x:%x:%x)\n",
672 ioat2_ring_space(ioat) > num_descs) 671 __func__, num_descs, ioat->head, ioat->tail, ioat->issued);
673 break; 672 ioat->produce = num_descs;
674 673 return 0; /* with ioat->prep_lock held */
675 if (printk_ratelimit())
676 dev_dbg(to_dev(chan),
677 "%s: ring full! num_descs: %d (%x:%x:%x)\n",
678 __func__, num_descs, ioat->head, ioat->tail,
679 ioat->issued);
680 spin_unlock_bh(&ioat->ring_lock);
681
682 /* progress reclaim in the allocation failure case we
683 * may be called under bh_disabled so we need to trigger
684 * the timer event directly
685 */
686 spin_lock_bh(&chan->cleanup_lock);
687 if (jiffies > chan->timer.expires &&
688 timer_pending(&chan->timer)) {
689 struct ioatdma_device *device = chan->device;
690
691 mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
692 spin_unlock_bh(&chan->cleanup_lock);
693 device->timer_fn((unsigned long) &chan->common);
694 } else
695 spin_unlock_bh(&chan->cleanup_lock);
696 return -ENOMEM;
697 } 674 }
675 retry = test_and_set_bit(IOAT_RESHAPE_PENDING, &chan->state);
676 spin_unlock_bh(&ioat->prep_lock);
698 677
699 dev_dbg(to_dev(chan), "%s: num_descs: %d (%x:%x:%x)\n", 678 /* is another cpu already trying to expand the ring? */
700 __func__, num_descs, ioat->head, ioat->tail, ioat->issued); 679 if (retry)
680 goto retry;
701 681
702 *idx = ioat2_desc_alloc(ioat, num_descs); 682 spin_lock_bh(&chan->cleanup_lock);
703 return 0; /* with ioat->ring_lock held */ 683 spin_lock_bh(&ioat->prep_lock);
684 retry = reshape_ring(ioat, ioat->alloc_order + 1);
685 clear_bit(IOAT_RESHAPE_PENDING, &chan->state);
686 spin_unlock_bh(&ioat->prep_lock);
687 spin_unlock_bh(&chan->cleanup_lock);
688
689 /* if we were able to expand the ring retry the allocation */
690 if (retry)
691 goto retry;
692
693 if (printk_ratelimit())
694 dev_dbg(to_dev(chan), "%s: ring full! num_descs: %d (%x:%x:%x)\n",
695 __func__, num_descs, ioat->head, ioat->tail, ioat->issued);
696
697 /* progress reclaim in the allocation failure case we may be
698 * called under bh_disabled so we need to trigger the timer
699 * event directly
700 */
701 if (jiffies > chan->timer.expires && timer_pending(&chan->timer)) {
702 struct ioatdma_device *device = chan->device;
703
704 mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
705 device->timer_fn((unsigned long) &chan->common);
706 }
707
708 return -ENOMEM;
704} 709}
705 710
706struct dma_async_tx_descriptor * 711struct dma_async_tx_descriptor *
@@ -713,14 +718,11 @@ ioat2_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest,
713 dma_addr_t dst = dma_dest; 718 dma_addr_t dst = dma_dest;
714 dma_addr_t src = dma_src; 719 dma_addr_t src = dma_src;
715 size_t total_len = len; 720 size_t total_len = len;
716 int num_descs; 721 int num_descs, idx, i;
717 u16 idx;
718 int i;
719 722
720 num_descs = ioat2_xferlen_to_descs(ioat, len); 723 num_descs = ioat2_xferlen_to_descs(ioat, len);
721 if (likely(num_descs) && 724 if (likely(num_descs) && ioat2_check_space_lock(ioat, num_descs) == 0)
722 ioat2_alloc_and_lock(&idx, ioat, num_descs) == 0) 725 idx = ioat->head;
723 /* pass */;
724 else 726 else
725 return NULL; 727 return NULL;
726 i = 0; 728 i = 0;
@@ -777,7 +779,8 @@ void ioat2_free_chan_resources(struct dma_chan *c)
777 device->cleanup_fn((unsigned long) c); 779 device->cleanup_fn((unsigned long) c);
778 device->reset_hw(chan); 780 device->reset_hw(chan);
779 781
780 spin_lock_bh(&ioat->ring_lock); 782 spin_lock_bh(&chan->cleanup_lock);
783 spin_lock_bh(&ioat->prep_lock);
781 descs = ioat2_ring_space(ioat); 784 descs = ioat2_ring_space(ioat);
782 dev_dbg(to_dev(chan), "freeing %d idle descriptors\n", descs); 785 dev_dbg(to_dev(chan), "freeing %d idle descriptors\n", descs);
783 for (i = 0; i < descs; i++) { 786 for (i = 0; i < descs; i++) {
@@ -800,7 +803,8 @@ void ioat2_free_chan_resources(struct dma_chan *c)
800 ioat->alloc_order = 0; 803 ioat->alloc_order = 0;
801 pci_pool_free(device->completion_pool, chan->completion, 804 pci_pool_free(device->completion_pool, chan->completion,
802 chan->completion_dma); 805 chan->completion_dma);
803 spin_unlock_bh(&ioat->ring_lock); 806 spin_unlock_bh(&ioat->prep_lock);
807 spin_unlock_bh(&chan->cleanup_lock);
804 808
805 chan->last_completion = 0; 809 chan->last_completion = 0;
806 chan->completion_dma = 0; 810 chan->completion_dma = 0;
@@ -855,7 +859,7 @@ int __devinit ioat2_dma_probe(struct ioatdma_device *device, int dca)
855 dma->device_issue_pending = ioat2_issue_pending; 859 dma->device_issue_pending = ioat2_issue_pending;
856 dma->device_alloc_chan_resources = ioat2_alloc_chan_resources; 860 dma->device_alloc_chan_resources = ioat2_alloc_chan_resources;
857 dma->device_free_chan_resources = ioat2_free_chan_resources; 861 dma->device_free_chan_resources = ioat2_free_chan_resources;
858 dma->device_is_tx_complete = ioat_is_dma_complete; 862 dma->device_tx_status = ioat_tx_status;
859 863
860 err = ioat_probe(device); 864 err = ioat_probe(device);
861 if (err) 865 if (err)