aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorShannon Nelson <shannon.nelson@intel.com>2007-10-18 06:07:14 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-10-18 17:37:32 -0400
commit7f2b291f56d08e001454d16d3c92e175434898b3 (patch)
tree41b0c324e93db47fd5114fed2fddbba963492383
parent5149fd010f404889b7d8f79159057791fbb817b1 (diff)
I/OAT: Tighten descriptor setup performance
The change to the async_tx interface cost this driver some performance by spreading the descriptor setup across several functions, including multiple passes over the new descriptor chain. Here we bring the work back into one primary function and only do one pass. [akpm@linux-foundation.org: cleanups, uninline] Signed-off-by: Shannon Nelson <shannon.nelson@intel.com> Cc: Dan Williams <dan.j.williams@intel.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--drivers/dma/ioat_dma.c173
-rw-r--r--drivers/dma/ioatdma.h6
2 files changed, 95 insertions, 84 deletions
diff --git a/drivers/dma/ioat_dma.c b/drivers/dma/ioat_dma.c
index c44f5517edbd..15906027c8de 100644
--- a/drivers/dma/ioat_dma.c
+++ b/drivers/dma/ioat_dma.c
@@ -46,9 +46,12 @@
46/* internal functions */ 46/* internal functions */
47static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan); 47static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan);
48static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan); 48static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan);
49static struct ioat_desc_sw *
50ioat_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan);
49 51
50static struct ioat_dma_chan *ioat_lookup_chan_by_index(struct ioatdma_device *device, 52static inline struct ioat_dma_chan *ioat_lookup_chan_by_index(
51 int index) 53 struct ioatdma_device *device,
54 int index)
52{ 55{
53 return device->idx[index]; 56 return device->idx[index];
54} 57}
@@ -148,57 +151,90 @@ static void ioat_set_src(dma_addr_t addr,
148 struct dma_async_tx_descriptor *tx, 151 struct dma_async_tx_descriptor *tx,
149 int index) 152 int index)
150{ 153{
151 struct ioat_desc_sw *iter, *desc = tx_to_ioat_desc(tx); 154 tx_to_ioat_desc(tx)->src = addr;
152 struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan);
153
154 pci_unmap_addr_set(desc, src, addr);
155
156 list_for_each_entry(iter, &desc->async_tx.tx_list, node) {
157 iter->hw->src_addr = addr;
158 addr += ioat_chan->xfercap;
159 }
160
161} 155}
162 156
163static void ioat_set_dest(dma_addr_t addr, 157static void ioat_set_dest(dma_addr_t addr,
164 struct dma_async_tx_descriptor *tx, 158 struct dma_async_tx_descriptor *tx,
165 int index) 159 int index)
166{ 160{
167 struct ioat_desc_sw *iter, *desc = tx_to_ioat_desc(tx); 161 tx_to_ioat_desc(tx)->dst = addr;
168 struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan);
169
170 pci_unmap_addr_set(desc, dst, addr);
171
172 list_for_each_entry(iter, &desc->async_tx.tx_list, node) {
173 iter->hw->dst_addr = addr;
174 addr += ioat_chan->xfercap;
175 }
176} 162}
177 163
178static dma_cookie_t ioat_tx_submit(struct dma_async_tx_descriptor *tx) 164static dma_cookie_t ioat_tx_submit(struct dma_async_tx_descriptor *tx)
179{ 165{
180 struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan); 166 struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan);
181 struct ioat_desc_sw *desc = tx_to_ioat_desc(tx); 167 struct ioat_desc_sw *first = tx_to_ioat_desc(tx);
168 struct ioat_desc_sw *prev, *new;
169 struct ioat_dma_descriptor *hw;
182 int append = 0; 170 int append = 0;
183 dma_cookie_t cookie; 171 dma_cookie_t cookie;
184 struct ioat_desc_sw *group_start; 172 LIST_HEAD(new_chain);
173 u32 copy;
174 size_t len;
175 dma_addr_t src, dst;
176 int orig_ack;
177 unsigned int desc_count = 0;
178
179 /* src and dest and len are stored in the initial descriptor */
180 len = first->len;
181 src = first->src;
182 dst = first->dst;
183 orig_ack = first->async_tx.ack;
184 new = first;
185 185
186 group_start = list_entry(desc->async_tx.tx_list.next,
187 struct ioat_desc_sw, node);
188 spin_lock_bh(&ioat_chan->desc_lock); 186 spin_lock_bh(&ioat_chan->desc_lock);
187 prev = to_ioat_desc(ioat_chan->used_desc.prev);
188 prefetch(prev->hw);
189 do {
190 copy = min((u32) len, ioat_chan->xfercap);
191
192 new->async_tx.ack = 1;
193
194 hw = new->hw;
195 hw->size = copy;
196 hw->ctl = 0;
197 hw->src_addr = src;
198 hw->dst_addr = dst;
199 hw->next = 0;
200
201 /* chain together the physical address list for the HW */
202 wmb();
203 prev->hw->next = (u64) new->async_tx.phys;
204
205 len -= copy;
206 dst += copy;
207 src += copy;
208
209 list_add_tail(&new->node, &new_chain);
210 desc_count++;
211 prev = new;
212 } while (len && (new = ioat_dma_get_next_descriptor(ioat_chan)));
213
214 hw->ctl = IOAT_DMA_DESCRIPTOR_CTL_CP_STS;
215 new->tx_cnt = desc_count;
216 new->async_tx.ack = orig_ack; /* client is in control of this ack */
217
218 /* store the original values for use in later cleanup */
219 if (new != first) {
220 new->src = first->src;
221 new->dst = first->dst;
222 new->len = first->len;
223 }
224
189 /* cookie incr and addition to used_list must be atomic */ 225 /* cookie incr and addition to used_list must be atomic */
190 cookie = ioat_chan->common.cookie; 226 cookie = ioat_chan->common.cookie;
191 cookie++; 227 cookie++;
192 if (cookie < 0) 228 if (cookie < 0)
193 cookie = 1; 229 cookie = 1;
194 ioat_chan->common.cookie = desc->async_tx.cookie = cookie; 230 ioat_chan->common.cookie = new->async_tx.cookie = cookie;
195 231
196 /* write address into NextDescriptor field of last desc in chain */ 232 /* write address into NextDescriptor field of last desc in chain */
197 to_ioat_desc(ioat_chan->used_desc.prev)->hw->next = 233 to_ioat_desc(ioat_chan->used_desc.prev)->hw->next =
198 group_start->async_tx.phys; 234 first->async_tx.phys;
199 list_splice_init(&desc->async_tx.tx_list, ioat_chan->used_desc.prev); 235 __list_splice(&new_chain, ioat_chan->used_desc.prev);
200 236
201 ioat_chan->pending += desc->tx_cnt; 237 ioat_chan->pending += desc_count;
202 if (ioat_chan->pending >= 4) { 238 if (ioat_chan->pending >= 4) {
203 append = 1; 239 append = 1;
204 ioat_chan->pending = 0; 240 ioat_chan->pending = 0;
@@ -348,6 +384,7 @@ static void ioat_dma_free_chan_resources(struct dma_chan *chan)
348 ioat_chan->last_completion = ioat_chan->completion_addr = 0; 384 ioat_chan->last_completion = ioat_chan->completion_addr = 0;
349 ioat_chan->pending = 0; 385 ioat_chan->pending = 0;
350} 386}
387
351/** 388/**
352 * ioat_dma_get_next_descriptor - return the next available descriptor 389 * ioat_dma_get_next_descriptor - return the next available descriptor
353 * @ioat_chan: IOAT DMA channel handle 390 * @ioat_chan: IOAT DMA channel handle
@@ -356,8 +393,8 @@ static void ioat_dma_free_chan_resources(struct dma_chan *chan)
356 * channel's desc_lock held. Allocates more descriptors if the channel 393 * channel's desc_lock held. Allocates more descriptors if the channel
357 * has run out. 394 * has run out.
358 */ 395 */
359static struct ioat_desc_sw *ioat_dma_get_next_descriptor( 396static struct ioat_desc_sw *
360 struct ioat_dma_chan *ioat_chan) 397ioat_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan)
361{ 398{
362 struct ioat_desc_sw *new = NULL; 399 struct ioat_desc_sw *new = NULL;
363 400
@@ -382,51 +419,11 @@ static struct dma_async_tx_descriptor *ioat_dma_prep_memcpy(
382 int int_en) 419 int int_en)
383{ 420{
384 struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); 421 struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
385 struct ioat_desc_sw *first, *prev, *new; 422 struct ioat_desc_sw *new;
386 LIST_HEAD(new_chain);
387 u32 copy;
388 size_t orig_len;
389 int desc_count = 0;
390
391 if (!len)
392 return NULL;
393
394 orig_len = len;
395
396 first = NULL;
397 prev = NULL;
398 423
399 spin_lock_bh(&ioat_chan->desc_lock); 424 spin_lock_bh(&ioat_chan->desc_lock);
400 while (len) { 425 new = ioat_dma_get_next_descriptor(ioat_chan);
401 new = ioat_dma_get_next_descriptor(ioat_chan); 426 new->len = len;
402 copy = min((u32) len, ioat_chan->xfercap);
403
404 new->hw->size = copy;
405 new->hw->ctl = 0;
406 new->async_tx.cookie = 0;
407 new->async_tx.ack = 1;
408
409 /* chain together the physical address list for the HW */
410 if (!first)
411 first = new;
412 else
413 prev->hw->next = (u64) new->async_tx.phys;
414
415 prev = new;
416 len -= copy;
417 list_add_tail(&new->node, &new_chain);
418 desc_count++;
419 }
420
421 list_splice(&new_chain, &new->async_tx.tx_list);
422
423 new->hw->ctl = IOAT_DMA_DESCRIPTOR_CTL_CP_STS;
424 new->hw->next = 0;
425 new->tx_cnt = desc_count;
426 new->async_tx.ack = 0; /* client is in control of this ack */
427 new->async_tx.cookie = -EBUSY;
428
429 pci_unmap_len_set(new, len, orig_len);
430 spin_unlock_bh(&ioat_chan->desc_lock); 427 spin_unlock_bh(&ioat_chan->desc_lock);
431 428
432 return new ? &new->async_tx : NULL; 429 return new ? &new->async_tx : NULL;
@@ -464,7 +461,7 @@ static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan)
464 461
465 prefetch(ioat_chan->completion_virt); 462 prefetch(ioat_chan->completion_virt);
466 463
467 if (!spin_trylock(&ioat_chan->cleanup_lock)) 464 if (!spin_trylock_bh(&ioat_chan->cleanup_lock))
468 return; 465 return;
469 466
470 /* The completion writeback can happen at any time, 467 /* The completion writeback can happen at any time,
@@ -474,12 +471,15 @@ static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan)
474 471
475#if (BITS_PER_LONG == 64) 472#if (BITS_PER_LONG == 64)
476 phys_complete = 473 phys_complete =
477 ioat_chan->completion_virt->full & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR; 474 ioat_chan->completion_virt->full
475 & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR;
478#else 476#else
479 phys_complete = ioat_chan->completion_virt->low & IOAT_LOW_COMPLETION_MASK; 477 phys_complete =
478 ioat_chan->completion_virt->low & IOAT_LOW_COMPLETION_MASK;
480#endif 479#endif
481 480
482 if ((ioat_chan->completion_virt->full & IOAT_CHANSTS_DMA_TRANSFER_STATUS) == 481 if ((ioat_chan->completion_virt->full
482 & IOAT_CHANSTS_DMA_TRANSFER_STATUS) ==
483 IOAT_CHANSTS_DMA_TRANSFER_STATUS_HALTED) { 483 IOAT_CHANSTS_DMA_TRANSFER_STATUS_HALTED) {
484 dev_err(&ioat_chan->device->pdev->dev, 484 dev_err(&ioat_chan->device->pdev->dev,
485 "Channel halted, chanerr = %x\n", 485 "Channel halted, chanerr = %x\n",
@@ -489,7 +489,7 @@ static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan)
489 } 489 }
490 490
491 if (phys_complete == ioat_chan->last_completion) { 491 if (phys_complete == ioat_chan->last_completion) {
492 spin_unlock(&ioat_chan->cleanup_lock); 492 spin_unlock_bh(&ioat_chan->cleanup_lock);
493 return; 493 return;
494 } 494 }
495 495
@@ -548,7 +548,7 @@ static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan)
548 if (cookie != 0) 548 if (cookie != 0)
549 ioat_chan->completed_cookie = cookie; 549 ioat_chan->completed_cookie = cookie;
550 550
551 spin_unlock(&ioat_chan->cleanup_lock); 551 spin_unlock_bh(&ioat_chan->cleanup_lock);
552} 552}
553 553
554static void ioat_dma_dependency_added(struct dma_chan *chan) 554static void ioat_dma_dependency_added(struct dma_chan *chan)
@@ -613,8 +613,13 @@ static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan)
613 spin_lock_bh(&ioat_chan->desc_lock); 613 spin_lock_bh(&ioat_chan->desc_lock);
614 614
615 desc = ioat_dma_get_next_descriptor(ioat_chan); 615 desc = ioat_dma_get_next_descriptor(ioat_chan);
616 desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL; 616 desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL
617 | IOAT_DMA_DESCRIPTOR_CTL_INT_GN
618 | IOAT_DMA_DESCRIPTOR_CTL_CP_STS;
617 desc->hw->next = 0; 619 desc->hw->next = 0;
620 desc->hw->size = 0;
621 desc->hw->src_addr = 0;
622 desc->hw->dst_addr = 0;
618 desc->async_tx.ack = 1; 623 desc->async_tx.ack = 1;
619 624
620 list_add_tail(&desc->node, &ioat_chan->used_desc); 625 list_add_tail(&desc->node, &ioat_chan->used_desc);
@@ -688,6 +693,12 @@ static int ioat_dma_self_test(struct ioatdma_device *device)
688 DMA_FROM_DEVICE); 693 DMA_FROM_DEVICE);
689 ioat_set_dest(addr, tx, 0); 694 ioat_set_dest(addr, tx, 0);
690 cookie = ioat_tx_submit(tx); 695 cookie = ioat_tx_submit(tx);
696 if (cookie < 0) {
697 dev_err(&device->pdev->dev,
698 "Self-test setup failed, disabling\n");
699 err = -ENODEV;
700 goto free_resources;
701 }
691 ioat_dma_memcpy_issue_pending(dma_chan); 702 ioat_dma_memcpy_issue_pending(dma_chan);
692 msleep(1); 703 msleep(1);
693 704
diff --git a/drivers/dma/ioatdma.h b/drivers/dma/ioatdma.h
index d3643f264507..5f9881e7b0ed 100644
--- a/drivers/dma/ioatdma.h
+++ b/drivers/dma/ioatdma.h
@@ -124,9 +124,9 @@ struct ioat_desc_sw {
124 struct ioat_dma_descriptor *hw; 124 struct ioat_dma_descriptor *hw;
125 struct list_head node; 125 struct list_head node;
126 int tx_cnt; 126 int tx_cnt;
127 DECLARE_PCI_UNMAP_LEN(len) 127 size_t len;
128 DECLARE_PCI_UNMAP_ADDR(src) 128 dma_addr_t src;
129 DECLARE_PCI_UNMAP_ADDR(dst) 129 dma_addr_t dst;
130 struct dma_async_tx_descriptor async_tx; 130 struct dma_async_tx_descriptor async_tx;
131}; 131};
132 132