aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/pci
diff options
context:
space:
mode:
authormark gross <mgross@linux.intel.com>2008-04-18 16:53:58 -0400
committerGreg Kroah-Hartman <gregkh@suse.de>2008-04-21 00:47:13 -0400
commit80b20dd853a2b00802e6254dc9f690f0da1a925c (patch)
tree38f7718fab83b9892aa76be5d782571a3abdd43a /drivers/pci
parenta391f19717984a8f70756b29074298f379fcfdbc (diff)
PCI: pci-iommu-iotlb-flushing-speedup
The following patch is an update to use an array instead of a list of IOVA's in the implementation of defered iotlb flushes. It takes inspiration from sba_iommu.c I like this implementation better as it encapsulates the batch process within intel-iommu.c, and no longer touches iova.h (which is shared) Performance data: Netperf 32byte UDP streaming 2.6.25-rc3-mm1: IOMMU-strict : 58Mps @ 62% cpu NO-IOMMU : 71Mbs @ 41% cpu List-based IOMMU-default-batched-IOTLB flush: 66Mbps @ 57% cpu with this patch: IOMMU-strict : 73Mps @ 75% cpu NO-IOMMU : 74Mbs @ 42% cpu Array-based IOMMU-default-batched-IOTLB flush: 72Mbps @ 62% cpu Signed-off-by: <mgross@linux.intel.com> Cc: Grant Grundler <grundler@parisc-linux.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
Diffstat (limited to 'drivers/pci')
-rw-r--r--drivers/pci/intel-iommu.c76
-rw-r--r--drivers/pci/iova.h2
2 files changed, 40 insertions, 38 deletions
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 8690a0d45d7f..301c68fab03b 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -59,8 +59,17 @@ static void flush_unmaps_timeout(unsigned long data);
59DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0); 59DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
60 60
61static struct intel_iommu *g_iommus; 61static struct intel_iommu *g_iommus;
62
63#define HIGH_WATER_MARK 250
64struct deferred_flush_tables {
65 int next;
66 struct iova *iova[HIGH_WATER_MARK];
67 struct dmar_domain *domain[HIGH_WATER_MARK];
68};
69
70static struct deferred_flush_tables *deferred_flush;
71
62/* bitmap for indexing intel_iommus */ 72/* bitmap for indexing intel_iommus */
63static unsigned long *g_iommus_to_flush;
64static int g_num_of_iommus; 73static int g_num_of_iommus;
65 74
66static DEFINE_SPINLOCK(async_umap_flush_lock); 75static DEFINE_SPINLOCK(async_umap_flush_lock);
@@ -68,10 +77,6 @@ static LIST_HEAD(unmaps_to_do);
68 77
69static int timer_on; 78static int timer_on;
70static long list_size; 79static long list_size;
71static int high_watermark;
72
73static struct dentry *intel_iommu_debug, *debug;
74
75 80
76static void domain_remove_dev_info(struct dmar_domain *domain); 81static void domain_remove_dev_info(struct dmar_domain *domain);
77 82
@@ -1692,7 +1697,7 @@ int __init init_dmars(void)
1692 struct dmar_rmrr_unit *rmrr; 1697 struct dmar_rmrr_unit *rmrr;
1693 struct pci_dev *pdev; 1698 struct pci_dev *pdev;
1694 struct intel_iommu *iommu; 1699 struct intel_iommu *iommu;
1695 int nlongs, i, ret, unit = 0; 1700 int i, ret, unit = 0;
1696 1701
1697 /* 1702 /*
1698 * for each drhd 1703 * for each drhd
@@ -1711,17 +1716,16 @@ int __init init_dmars(void)
1711 */ 1716 */
1712 } 1717 }
1713 1718
1714 nlongs = BITS_TO_LONGS(g_num_of_iommus);
1715 g_iommus_to_flush = kzalloc(nlongs * sizeof(unsigned long), GFP_KERNEL);
1716 if (!g_iommus_to_flush) {
1717 printk(KERN_ERR "Intel-IOMMU: "
1718 "Allocating bitmap array failed\n");
1719 return -ENOMEM;
1720 }
1721
1722 g_iommus = kzalloc(g_num_of_iommus * sizeof(*iommu), GFP_KERNEL); 1719 g_iommus = kzalloc(g_num_of_iommus * sizeof(*iommu), GFP_KERNEL);
1723 if (!g_iommus) { 1720 if (!g_iommus) {
1724 kfree(g_iommus_to_flush); 1721 ret = -ENOMEM;
1722 goto error;
1723 }
1724
1725 deferred_flush = kzalloc(g_num_of_iommus *
1726 sizeof(struct deferred_flush_tables), GFP_KERNEL);
1727 if (!deferred_flush) {
1728 kfree(g_iommus);
1725 ret = -ENOMEM; 1729 ret = -ENOMEM;
1726 goto error; 1730 goto error;
1727 } 1731 }
@@ -1970,42 +1974,48 @@ error:
1970 1974
1971static void flush_unmaps(void) 1975static void flush_unmaps(void)
1972{ 1976{
1973 struct iova *node, *n; 1977 int i, j;
1974 unsigned long flags;
1975 int i;
1976 1978
1977 spin_lock_irqsave(&async_umap_flush_lock, flags);
1978 timer_on = 0; 1979 timer_on = 0;
1979 1980
1980 /* just flush them all */ 1981 /* just flush them all */
1981 for (i = 0; i < g_num_of_iommus; i++) { 1982 for (i = 0; i < g_num_of_iommus; i++) {
1982 if (test_and_clear_bit(i, g_iommus_to_flush)) 1983 if (deferred_flush[i].next) {
1983 iommu_flush_iotlb_global(&g_iommus[i], 0); 1984 iommu_flush_iotlb_global(&g_iommus[i], 0);
1985 for (j = 0; j < deferred_flush[i].next; j++) {
1986 __free_iova(&deferred_flush[i].domain[j]->iovad,
1987 deferred_flush[i].iova[j]);
1988 }
1989 deferred_flush[i].next = 0;
1990 }
1984 } 1991 }
1985 1992
1986 list_for_each_entry_safe(node, n, &unmaps_to_do, list) {
1987 /* free iova */
1988 list_del(&node->list);
1989 __free_iova(&((struct dmar_domain *)node->dmar)->iovad, node);
1990
1991 }
1992 list_size = 0; 1993 list_size = 0;
1993 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
1994} 1994}
1995 1995
1996static void flush_unmaps_timeout(unsigned long data) 1996static void flush_unmaps_timeout(unsigned long data)
1997{ 1997{
1998 unsigned long flags;
1999
2000 spin_lock_irqsave(&async_umap_flush_lock, flags);
1998 flush_unmaps(); 2001 flush_unmaps();
2002 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
1999} 2003}
2000 2004
2001static void add_unmap(struct dmar_domain *dom, struct iova *iova) 2005static void add_unmap(struct dmar_domain *dom, struct iova *iova)
2002{ 2006{
2003 unsigned long flags; 2007 unsigned long flags;
2008 int next, iommu_id;
2004 2009
2005 spin_lock_irqsave(&async_umap_flush_lock, flags); 2010 spin_lock_irqsave(&async_umap_flush_lock, flags);
2006 iova->dmar = dom; 2011 if (list_size == HIGH_WATER_MARK)
2007 list_add(&iova->list, &unmaps_to_do); 2012 flush_unmaps();
2008 set_bit((dom->iommu - g_iommus), g_iommus_to_flush); 2013
2014 iommu_id = dom->iommu - g_iommus;
2015 next = deferred_flush[iommu_id].next;
2016 deferred_flush[iommu_id].domain[next] = dom;
2017 deferred_flush[iommu_id].iova[next] = iova;
2018 deferred_flush[iommu_id].next++;
2009 2019
2010 if (!timer_on) { 2020 if (!timer_on) {
2011 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10)); 2021 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
@@ -2054,8 +2064,6 @@ static void intel_unmap_single(struct device *dev, dma_addr_t dev_addr,
2054 * queue up the release of the unmap to save the 1/6th of the 2064 * queue up the release of the unmap to save the 1/6th of the
2055 * cpu used up by the iotlb flush operation... 2065 * cpu used up by the iotlb flush operation...
2056 */ 2066 */
2057 if (list_size > high_watermark)
2058 flush_unmaps();
2059 } 2067 }
2060} 2068}
2061 2069
@@ -2380,10 +2388,6 @@ int __init intel_iommu_init(void)
2380 if (dmar_table_init()) 2388 if (dmar_table_init())
2381 return -ENODEV; 2389 return -ENODEV;
2382 2390
2383 high_watermark = 250;
2384 intel_iommu_debug = debugfs_create_dir("intel_iommu", NULL);
2385 debug = debugfs_create_u32("high_watermark", S_IWUGO | S_IRUGO,
2386 intel_iommu_debug, &high_watermark);
2387 iommu_init_mempool(); 2391 iommu_init_mempool();
2388 dmar_init_reserved_ranges(); 2392 dmar_init_reserved_ranges();
2389 2393
diff --git a/drivers/pci/iova.h b/drivers/pci/iova.h
index 2f1317801b20..228f6c94b69c 100644
--- a/drivers/pci/iova.h
+++ b/drivers/pci/iova.h
@@ -24,8 +24,6 @@ struct iova {
24 struct rb_node node; 24 struct rb_node node;
25 unsigned long pfn_hi; /* IOMMU dish out addr hi */ 25 unsigned long pfn_hi; /* IOMMU dish out addr hi */
26 unsigned long pfn_lo; /* IOMMU dish out addr lo */ 26 unsigned long pfn_lo; /* IOMMU dish out addr lo */
27 struct list_head list;
28 void *dmar;
29}; 27};
30 28
31/* holds all the iova translations for a domain */ 29/* holds all the iova translations for a domain */