aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorShannon Nelson <shannon.nelson@intel.com>2007-08-14 20:36:31 -0400
committerDavid S. Miller <davem@davemloft.net>2007-08-14 20:36:31 -0400
commit54a09feb0ebb018dadaebeb51e860154198abc83 (patch)
tree3e85288f49ba8376e58db649250d60c2560e5204 /drivers
parent9c29a377f99b42c59721112cd2388cf27547fc84 (diff)
[IOAT]: Remove redundant struct member to avoid descriptor cache miss
The layout for struct ioat_desc_sw is non-optimal and causes an extra cache hit for every descriptor processed. By tightening up the struct layout and removing one item, we pull in the fields that get used in the speedpath and get a little better performance. Before: ------- struct ioat_desc_sw { struct ioat_dma_descriptor * hw; /* 0 8 */ struct list_head node; /* 8 16 */ int tx_cnt; /* 24 4 */ /* XXX 4 bytes hole, try to pack */ dma_addr_t src; /* 32 8 */ __u32 src_len; /* 40 4 */ /* XXX 4 bytes hole, try to pack */ dma_addr_t dst; /* 48 8 */ __u32 dst_len; /* 56 4 */ /* XXX 4 bytes hole, try to pack */ /* --- cacheline 1 boundary (64 bytes) --- */ struct dma_async_tx_descriptor async_tx; /* 64 144 */ /* --- cacheline 3 boundary (192 bytes) was 16 bytes ago --- */ /* size: 208, cachelines: 4 */ /* sum members: 196, holes: 3, sum holes: 12 */ /* last cacheline: 16 bytes */ }; /* definitions: 1 */ After: ------ struct ioat_desc_sw { struct ioat_dma_descriptor * hw; /* 0 8 */ struct list_head node; /* 8 16 */ int tx_cnt; /* 24 4 */ __u32 len; /* 28 4 */ dma_addr_t src; /* 32 8 */ dma_addr_t dst; /* 40 8 */ struct dma_async_tx_descriptor async_tx; /* 48 144 */ /* --- cacheline 3 boundary (192 bytes) --- */ /* size: 192, cachelines: 3 */ }; /* definitions: 1 */ Signed-off-by: Shannon Nelson <shannon.nelson@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/dma/ioatdma.c7
-rw-r--r--drivers/dma/ioatdma.h3
2 files changed, 4 insertions, 6 deletions
diff --git a/drivers/dma/ioatdma.c b/drivers/dma/ioatdma.c
index 5fbe56b5cea0..2d1f17865b64 100644
--- a/drivers/dma/ioatdma.c
+++ b/drivers/dma/ioatdma.c
@@ -347,8 +347,7 @@ ioat_dma_prep_memcpy(struct dma_chan *chan, size_t len, int int_en)
347 new->async_tx.ack = 0; /* client is in control of this ack */ 347 new->async_tx.ack = 0; /* client is in control of this ack */
348 new->async_tx.cookie = -EBUSY; 348 new->async_tx.cookie = -EBUSY;
349 349
350 pci_unmap_len_set(new, src_len, orig_len); 350 pci_unmap_len_set(new, len, orig_len);
351 pci_unmap_len_set(new, dst_len, orig_len);
352 spin_unlock_bh(&ioat_chan->desc_lock); 351 spin_unlock_bh(&ioat_chan->desc_lock);
353 352
354 return new ? &new->async_tx : NULL; 353 return new ? &new->async_tx : NULL;
@@ -423,11 +422,11 @@ static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *chan)
423 */ 422 */
424 pci_unmap_page(chan->device->pdev, 423 pci_unmap_page(chan->device->pdev,
425 pci_unmap_addr(desc, dst), 424 pci_unmap_addr(desc, dst),
426 pci_unmap_len(desc, dst_len), 425 pci_unmap_len(desc, len),
427 PCI_DMA_FROMDEVICE); 426 PCI_DMA_FROMDEVICE);
428 pci_unmap_page(chan->device->pdev, 427 pci_unmap_page(chan->device->pdev,
429 pci_unmap_addr(desc, src), 428 pci_unmap_addr(desc, src),
430 pci_unmap_len(desc, src_len), 429 pci_unmap_len(desc, len),
431 PCI_DMA_TODEVICE); 430 PCI_DMA_TODEVICE);
432 } 431 }
433 432
diff --git a/drivers/dma/ioatdma.h b/drivers/dma/ioatdma.h
index d3726478031a..bf4dad70e0f5 100644
--- a/drivers/dma/ioatdma.h
+++ b/drivers/dma/ioatdma.h
@@ -111,10 +111,9 @@ struct ioat_desc_sw {
111 struct ioat_dma_descriptor *hw; 111 struct ioat_dma_descriptor *hw;
112 struct list_head node; 112 struct list_head node;
113 int tx_cnt; 113 int tx_cnt;
114 DECLARE_PCI_UNMAP_LEN(len)
114 DECLARE_PCI_UNMAP_ADDR(src) 115 DECLARE_PCI_UNMAP_ADDR(src)
115 DECLARE_PCI_UNMAP_LEN(src_len)
116 DECLARE_PCI_UNMAP_ADDR(dst) 116 DECLARE_PCI_UNMAP_ADDR(dst)
117 DECLARE_PCI_UNMAP_LEN(dst_len)
118 struct dma_async_tx_descriptor async_tx; 117 struct dma_async_tx_descriptor async_tx;
119}; 118};
120 119