diff options
author | mark gross <mgross@linux.intel.com> | 2008-04-18 16:53:58 -0400 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@suse.de> | 2008-04-21 00:47:13 -0400 |
commit | 80b20dd853a2b00802e6254dc9f690f0da1a925c (patch) | |
tree | 38f7718fab83b9892aa76be5d782571a3abdd43a | |
parent | a391f19717984a8f70756b29074298f379fcfdbc (diff) |
PCI: pci-iommu-iotlb-flushing-speedup
The following patch is an update to use an array instead of a list of
IOVA's in the implementation of defered iotlb flushes. It takes
inspiration from sba_iommu.c
I like this implementation better as it encapsulates the batch process
within intel-iommu.c, and no longer touches iova.h (which is shared)
Performance data: Netperf 32byte UDP streaming
2.6.25-rc3-mm1:
IOMMU-strict : 58Mps @ 62% cpu
NO-IOMMU : 71Mbs @ 41% cpu
List-based IOMMU-default-batched-IOTLB flush: 66Mbps @ 57% cpu
with this patch:
IOMMU-strict : 73Mps @ 75% cpu
NO-IOMMU : 74Mbs @ 42% cpu
Array-based IOMMU-default-batched-IOTLB flush: 72Mbps @ 62% cpu
Signed-off-by: <mgross@linux.intel.com>
Cc: Grant Grundler <grundler@parisc-linux.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
-rw-r--r-- | drivers/pci/intel-iommu.c | 76 | ||||
-rw-r--r-- | drivers/pci/iova.h | 2 |
2 files changed, 40 insertions, 38 deletions
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 8690a0d45d7f..301c68fab03b 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c | |||
@@ -59,8 +59,17 @@ static void flush_unmaps_timeout(unsigned long data); | |||
59 | DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0); | 59 | DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0); |
60 | 60 | ||
61 | static struct intel_iommu *g_iommus; | 61 | static struct intel_iommu *g_iommus; |
62 | |||
63 | #define HIGH_WATER_MARK 250 | ||
64 | struct deferred_flush_tables { | ||
65 | int next; | ||
66 | struct iova *iova[HIGH_WATER_MARK]; | ||
67 | struct dmar_domain *domain[HIGH_WATER_MARK]; | ||
68 | }; | ||
69 | |||
70 | static struct deferred_flush_tables *deferred_flush; | ||
71 | |||
62 | /* bitmap for indexing intel_iommus */ | 72 | /* bitmap for indexing intel_iommus */ |
63 | static unsigned long *g_iommus_to_flush; | ||
64 | static int g_num_of_iommus; | 73 | static int g_num_of_iommus; |
65 | 74 | ||
66 | static DEFINE_SPINLOCK(async_umap_flush_lock); | 75 | static DEFINE_SPINLOCK(async_umap_flush_lock); |
@@ -68,10 +77,6 @@ static LIST_HEAD(unmaps_to_do); | |||
68 | 77 | ||
69 | static int timer_on; | 78 | static int timer_on; |
70 | static long list_size; | 79 | static long list_size; |
71 | static int high_watermark; | ||
72 | |||
73 | static struct dentry *intel_iommu_debug, *debug; | ||
74 | |||
75 | 80 | ||
76 | static void domain_remove_dev_info(struct dmar_domain *domain); | 81 | static void domain_remove_dev_info(struct dmar_domain *domain); |
77 | 82 | ||
@@ -1692,7 +1697,7 @@ int __init init_dmars(void) | |||
1692 | struct dmar_rmrr_unit *rmrr; | 1697 | struct dmar_rmrr_unit *rmrr; |
1693 | struct pci_dev *pdev; | 1698 | struct pci_dev *pdev; |
1694 | struct intel_iommu *iommu; | 1699 | struct intel_iommu *iommu; |
1695 | int nlongs, i, ret, unit = 0; | 1700 | int i, ret, unit = 0; |
1696 | 1701 | ||
1697 | /* | 1702 | /* |
1698 | * for each drhd | 1703 | * for each drhd |
@@ -1711,17 +1716,16 @@ int __init init_dmars(void) | |||
1711 | */ | 1716 | */ |
1712 | } | 1717 | } |
1713 | 1718 | ||
1714 | nlongs = BITS_TO_LONGS(g_num_of_iommus); | ||
1715 | g_iommus_to_flush = kzalloc(nlongs * sizeof(unsigned long), GFP_KERNEL); | ||
1716 | if (!g_iommus_to_flush) { | ||
1717 | printk(KERN_ERR "Intel-IOMMU: " | ||
1718 | "Allocating bitmap array failed\n"); | ||
1719 | return -ENOMEM; | ||
1720 | } | ||
1721 | |||
1722 | g_iommus = kzalloc(g_num_of_iommus * sizeof(*iommu), GFP_KERNEL); | 1719 | g_iommus = kzalloc(g_num_of_iommus * sizeof(*iommu), GFP_KERNEL); |
1723 | if (!g_iommus) { | 1720 | if (!g_iommus) { |
1724 | kfree(g_iommus_to_flush); | 1721 | ret = -ENOMEM; |
1722 | goto error; | ||
1723 | } | ||
1724 | |||
1725 | deferred_flush = kzalloc(g_num_of_iommus * | ||
1726 | sizeof(struct deferred_flush_tables), GFP_KERNEL); | ||
1727 | if (!deferred_flush) { | ||
1728 | kfree(g_iommus); | ||
1725 | ret = -ENOMEM; | 1729 | ret = -ENOMEM; |
1726 | goto error; | 1730 | goto error; |
1727 | } | 1731 | } |
@@ -1970,42 +1974,48 @@ error: | |||
1970 | 1974 | ||
1971 | static void flush_unmaps(void) | 1975 | static void flush_unmaps(void) |
1972 | { | 1976 | { |
1973 | struct iova *node, *n; | 1977 | int i, j; |
1974 | unsigned long flags; | ||
1975 | int i; | ||
1976 | 1978 | ||
1977 | spin_lock_irqsave(&async_umap_flush_lock, flags); | ||
1978 | timer_on = 0; | 1979 | timer_on = 0; |
1979 | 1980 | ||
1980 | /* just flush them all */ | 1981 | /* just flush them all */ |
1981 | for (i = 0; i < g_num_of_iommus; i++) { | 1982 | for (i = 0; i < g_num_of_iommus; i++) { |
1982 | if (test_and_clear_bit(i, g_iommus_to_flush)) | 1983 | if (deferred_flush[i].next) { |
1983 | iommu_flush_iotlb_global(&g_iommus[i], 0); | 1984 | iommu_flush_iotlb_global(&g_iommus[i], 0); |
1985 | for (j = 0; j < deferred_flush[i].next; j++) { | ||
1986 | __free_iova(&deferred_flush[i].domain[j]->iovad, | ||
1987 | deferred_flush[i].iova[j]); | ||
1988 | } | ||
1989 | deferred_flush[i].next = 0; | ||
1990 | } | ||
1984 | } | 1991 | } |
1985 | 1992 | ||
1986 | list_for_each_entry_safe(node, n, &unmaps_to_do, list) { | ||
1987 | /* free iova */ | ||
1988 | list_del(&node->list); | ||
1989 | __free_iova(&((struct dmar_domain *)node->dmar)->iovad, node); | ||
1990 | |||
1991 | } | ||
1992 | list_size = 0; | 1993 | list_size = 0; |
1993 | spin_unlock_irqrestore(&async_umap_flush_lock, flags); | ||
1994 | } | 1994 | } |
1995 | 1995 | ||
1996 | static void flush_unmaps_timeout(unsigned long data) | 1996 | static void flush_unmaps_timeout(unsigned long data) |
1997 | { | 1997 | { |
1998 | unsigned long flags; | ||
1999 | |||
2000 | spin_lock_irqsave(&async_umap_flush_lock, flags); | ||
1998 | flush_unmaps(); | 2001 | flush_unmaps(); |
2002 | spin_unlock_irqrestore(&async_umap_flush_lock, flags); | ||
1999 | } | 2003 | } |
2000 | 2004 | ||
2001 | static void add_unmap(struct dmar_domain *dom, struct iova *iova) | 2005 | static void add_unmap(struct dmar_domain *dom, struct iova *iova) |
2002 | { | 2006 | { |
2003 | unsigned long flags; | 2007 | unsigned long flags; |
2008 | int next, iommu_id; | ||
2004 | 2009 | ||
2005 | spin_lock_irqsave(&async_umap_flush_lock, flags); | 2010 | spin_lock_irqsave(&async_umap_flush_lock, flags); |
2006 | iova->dmar = dom; | 2011 | if (list_size == HIGH_WATER_MARK) |
2007 | list_add(&iova->list, &unmaps_to_do); | 2012 | flush_unmaps(); |
2008 | set_bit((dom->iommu - g_iommus), g_iommus_to_flush); | 2013 | |
2014 | iommu_id = dom->iommu - g_iommus; | ||
2015 | next = deferred_flush[iommu_id].next; | ||
2016 | deferred_flush[iommu_id].domain[next] = dom; | ||
2017 | deferred_flush[iommu_id].iova[next] = iova; | ||
2018 | deferred_flush[iommu_id].next++; | ||
2009 | 2019 | ||
2010 | if (!timer_on) { | 2020 | if (!timer_on) { |
2011 | mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10)); | 2021 | mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10)); |
@@ -2054,8 +2064,6 @@ static void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, | |||
2054 | * queue up the release of the unmap to save the 1/6th of the | 2064 | * queue up the release of the unmap to save the 1/6th of the |
2055 | * cpu used up by the iotlb flush operation... | 2065 | * cpu used up by the iotlb flush operation... |
2056 | */ | 2066 | */ |
2057 | if (list_size > high_watermark) | ||
2058 | flush_unmaps(); | ||
2059 | } | 2067 | } |
2060 | } | 2068 | } |
2061 | 2069 | ||
@@ -2380,10 +2388,6 @@ int __init intel_iommu_init(void) | |||
2380 | if (dmar_table_init()) | 2388 | if (dmar_table_init()) |
2381 | return -ENODEV; | 2389 | return -ENODEV; |
2382 | 2390 | ||
2383 | high_watermark = 250; | ||
2384 | intel_iommu_debug = debugfs_create_dir("intel_iommu", NULL); | ||
2385 | debug = debugfs_create_u32("high_watermark", S_IWUGO | S_IRUGO, | ||
2386 | intel_iommu_debug, &high_watermark); | ||
2387 | iommu_init_mempool(); | 2391 | iommu_init_mempool(); |
2388 | dmar_init_reserved_ranges(); | 2392 | dmar_init_reserved_ranges(); |
2389 | 2393 | ||
diff --git a/drivers/pci/iova.h b/drivers/pci/iova.h index 2f1317801b20..228f6c94b69c 100644 --- a/drivers/pci/iova.h +++ b/drivers/pci/iova.h | |||
@@ -24,8 +24,6 @@ struct iova { | |||
24 | struct rb_node node; | 24 | struct rb_node node; |
25 | unsigned long pfn_hi; /* IOMMU dish out addr hi */ | 25 | unsigned long pfn_hi; /* IOMMU dish out addr hi */ |
26 | unsigned long pfn_lo; /* IOMMU dish out addr lo */ | 26 | unsigned long pfn_lo; /* IOMMU dish out addr lo */ |
27 | struct list_head list; | ||
28 | void *dmar; | ||
29 | }; | 27 | }; |
30 | 28 | ||
31 | /* holds all the iova translations for a domain */ | 29 | /* holds all the iova translations for a domain */ |