diff options
author | mark gross <mgross@linux.intel.com> | 2008-03-04 18:22:08 -0500 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@suse.de> | 2008-04-21 00:47:07 -0400 |
commit | 5e0d2a6fc094a9b5047998deefeb1254c66856ee (patch) | |
tree | eb4f5bfbd1b5f937685c1f980ca83fc21c377fea | |
parent | 0255f543d9888fb5c5fbcd265ca2eee2d6ecff6a (diff) |
PCI: iommu: iotlb flushing
This patch is for batching up the flushing of the IOTLB for the DMAR
implementation found in the Intel VT-d hardware. It works by building a list
of to be flushed IOTLB entries and a bitmap list of which DMAR engine they are
from.
After either a high water mark (250 accessible via debugfs) or 10ms the list
of iova's will be reclaimed and the DMAR engines associated are IOTLB-flushed.
This approach recovers 15 to 20% of the performance lost when using the IOMMU
for my netperf udp stream benchmark with small packets. It can be disabled
with a kernel boot parameter "intel_iommu=strict".
Its use does weaken the IOMMU protections a bit.
Signed-off-by: Mark Gross <mgross@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
-rw-r--r-- | Documentation/kernel-parameters.txt | 4 | ||||
-rw-r--r-- | drivers/pci/intel-iommu.c | 147 | ||||
-rw-r--r-- | drivers/pci/iova.h | 2 |
3 files changed, 135 insertions, 18 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index e30d8fe4e4b1..f7492cd10093 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
@@ -847,6 +847,10 @@ and is between 256 and 4096 characters. It is defined in the file | |||
847 | than 32 bit addressing. The default is to look | 847 | than 32 bit addressing. The default is to look |
848 | for translation below 32 bit and if not available | 848 | for translation below 32 bit and if not available |
849 | then look in the higher range. | 849 | then look in the higher range. |
850 | strict [Default Off] | ||
851 | With this option on every unmap_single operation will | ||
852 | result in a hardware IOTLB flush operation as opposed | ||
853 | to batching them for performance. | ||
850 | 854 | ||
851 | io_delay= [X86-32,X86-64] I/O delay method | 855 | io_delay= [X86-32,X86-64] I/O delay method |
852 | 0x80 | 856 | 0x80 |
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 4cb949f0ebd9..8690a0d45d7f 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c | |||
@@ -22,6 +22,7 @@ | |||
22 | 22 | ||
23 | #include <linux/init.h> | 23 | #include <linux/init.h> |
24 | #include <linux/bitmap.h> | 24 | #include <linux/bitmap.h> |
25 | #include <linux/debugfs.h> | ||
25 | #include <linux/slab.h> | 26 | #include <linux/slab.h> |
26 | #include <linux/irq.h> | 27 | #include <linux/irq.h> |
27 | #include <linux/interrupt.h> | 28 | #include <linux/interrupt.h> |
@@ -31,6 +32,7 @@ | |||
31 | #include <linux/dmar.h> | 32 | #include <linux/dmar.h> |
32 | #include <linux/dma-mapping.h> | 33 | #include <linux/dma-mapping.h> |
33 | #include <linux/mempool.h> | 34 | #include <linux/mempool.h> |
35 | #include <linux/timer.h> | ||
34 | #include "iova.h" | 36 | #include "iova.h" |
35 | #include "intel-iommu.h" | 37 | #include "intel-iommu.h" |
36 | #include <asm/proto.h> /* force_iommu in this header in x86-64*/ | 38 | #include <asm/proto.h> /* force_iommu in this header in x86-64*/ |
@@ -51,11 +53,32 @@ | |||
51 | 53 | ||
52 | #define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1) | 54 | #define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1) |
53 | 55 | ||
56 | |||
57 | static void flush_unmaps_timeout(unsigned long data); | ||
58 | |||
59 | DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0); | ||
60 | |||
61 | static struct intel_iommu *g_iommus; | ||
62 | /* bitmap for indexing intel_iommus */ | ||
63 | static unsigned long *g_iommus_to_flush; | ||
64 | static int g_num_of_iommus; | ||
65 | |||
66 | static DEFINE_SPINLOCK(async_umap_flush_lock); | ||
67 | static LIST_HEAD(unmaps_to_do); | ||
68 | |||
69 | static int timer_on; | ||
70 | static long list_size; | ||
71 | static int high_watermark; | ||
72 | |||
73 | static struct dentry *intel_iommu_debug, *debug; | ||
74 | |||
75 | |||
54 | static void domain_remove_dev_info(struct dmar_domain *domain); | 76 | static void domain_remove_dev_info(struct dmar_domain *domain); |
55 | 77 | ||
56 | static int dmar_disabled; | 78 | static int dmar_disabled; |
57 | static int __initdata dmar_map_gfx = 1; | 79 | static int __initdata dmar_map_gfx = 1; |
58 | static int dmar_forcedac; | 80 | static int dmar_forcedac; |
81 | static int intel_iommu_strict; | ||
59 | 82 | ||
60 | #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1)) | 83 | #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1)) |
61 | static DEFINE_SPINLOCK(device_domain_lock); | 84 | static DEFINE_SPINLOCK(device_domain_lock); |
@@ -74,9 +97,13 @@ static int __init intel_iommu_setup(char *str) | |||
74 | printk(KERN_INFO | 97 | printk(KERN_INFO |
75 | "Intel-IOMMU: disable GFX device mapping\n"); | 98 | "Intel-IOMMU: disable GFX device mapping\n"); |
76 | } else if (!strncmp(str, "forcedac", 8)) { | 99 | } else if (!strncmp(str, "forcedac", 8)) { |
77 | printk (KERN_INFO | 100 | printk(KERN_INFO |
78 | "Intel-IOMMU: Forcing DAC for PCI devices\n"); | 101 | "Intel-IOMMU: Forcing DAC for PCI devices\n"); |
79 | dmar_forcedac = 1; | 102 | dmar_forcedac = 1; |
103 | } else if (!strncmp(str, "strict", 6)) { | ||
104 | printk(KERN_INFO | ||
105 | "Intel-IOMMU: disable batched IOTLB flush\n"); | ||
106 | intel_iommu_strict = 1; | ||
80 | } | 107 | } |
81 | 108 | ||
82 | str += strcspn(str, ","); | 109 | str += strcspn(str, ","); |
@@ -966,17 +993,13 @@ static int iommu_init_domains(struct intel_iommu *iommu) | |||
966 | set_bit(0, iommu->domain_ids); | 993 | set_bit(0, iommu->domain_ids); |
967 | return 0; | 994 | return 0; |
968 | } | 995 | } |
969 | 996 | static struct intel_iommu *alloc_iommu(struct intel_iommu *iommu, | |
970 | static struct intel_iommu *alloc_iommu(struct dmar_drhd_unit *drhd) | 997 | struct dmar_drhd_unit *drhd) |
971 | { | 998 | { |
972 | struct intel_iommu *iommu; | ||
973 | int ret; | 999 | int ret; |
974 | int map_size; | 1000 | int map_size; |
975 | u32 ver; | 1001 | u32 ver; |
976 | 1002 | ||
977 | iommu = kzalloc(sizeof(*iommu), GFP_KERNEL); | ||
978 | if (!iommu) | ||
979 | return NULL; | ||
980 | iommu->reg = ioremap(drhd->reg_base_addr, PAGE_SIZE_4K); | 1003 | iommu->reg = ioremap(drhd->reg_base_addr, PAGE_SIZE_4K); |
981 | if (!iommu->reg) { | 1004 | if (!iommu->reg) { |
982 | printk(KERN_ERR "IOMMU: can't map the region\n"); | 1005 | printk(KERN_ERR "IOMMU: can't map the region\n"); |
@@ -1404,7 +1427,7 @@ static int dmar_pci_device_match(struct pci_dev *devices[], int cnt, | |||
1404 | int index; | 1427 | int index; |
1405 | 1428 | ||
1406 | while (dev) { | 1429 | while (dev) { |
1407 | for (index = 0; index < cnt; index ++) | 1430 | for (index = 0; index < cnt; index++) |
1408 | if (dev == devices[index]) | 1431 | if (dev == devices[index]) |
1409 | return 1; | 1432 | return 1; |
1410 | 1433 | ||
@@ -1669,7 +1692,7 @@ int __init init_dmars(void) | |||
1669 | struct dmar_rmrr_unit *rmrr; | 1692 | struct dmar_rmrr_unit *rmrr; |
1670 | struct pci_dev *pdev; | 1693 | struct pci_dev *pdev; |
1671 | struct intel_iommu *iommu; | 1694 | struct intel_iommu *iommu; |
1672 | int ret, unit = 0; | 1695 | int nlongs, i, ret, unit = 0; |
1673 | 1696 | ||
1674 | /* | 1697 | /* |
1675 | * for each drhd | 1698 | * for each drhd |
@@ -1680,7 +1703,35 @@ int __init init_dmars(void) | |||
1680 | for_each_drhd_unit(drhd) { | 1703 | for_each_drhd_unit(drhd) { |
1681 | if (drhd->ignored) | 1704 | if (drhd->ignored) |
1682 | continue; | 1705 | continue; |
1683 | iommu = alloc_iommu(drhd); | 1706 | g_num_of_iommus++; |
1707 | /* | ||
1708 | * lock not needed as this is only incremented in the single | ||
1709 | * threaded kernel __init code path all other access are read | ||
1710 | * only | ||
1711 | */ | ||
1712 | } | ||
1713 | |||
1714 | nlongs = BITS_TO_LONGS(g_num_of_iommus); | ||
1715 | g_iommus_to_flush = kzalloc(nlongs * sizeof(unsigned long), GFP_KERNEL); | ||
1716 | if (!g_iommus_to_flush) { | ||
1717 | printk(KERN_ERR "Intel-IOMMU: " | ||
1718 | "Allocating bitmap array failed\n"); | ||
1719 | return -ENOMEM; | ||
1720 | } | ||
1721 | |||
1722 | g_iommus = kzalloc(g_num_of_iommus * sizeof(*iommu), GFP_KERNEL); | ||
1723 | if (!g_iommus) { | ||
1724 | kfree(g_iommus_to_flush); | ||
1725 | ret = -ENOMEM; | ||
1726 | goto error; | ||
1727 | } | ||
1728 | |||
1729 | i = 0; | ||
1730 | for_each_drhd_unit(drhd) { | ||
1731 | if (drhd->ignored) | ||
1732 | continue; | ||
1733 | iommu = alloc_iommu(&g_iommus[i], drhd); | ||
1734 | i++; | ||
1684 | if (!iommu) { | 1735 | if (!iommu) { |
1685 | ret = -ENOMEM; | 1736 | ret = -ENOMEM; |
1686 | goto error; | 1737 | goto error; |
@@ -1713,7 +1764,6 @@ int __init init_dmars(void) | |||
1713 | * endfor | 1764 | * endfor |
1714 | */ | 1765 | */ |
1715 | for_each_rmrr_units(rmrr) { | 1766 | for_each_rmrr_units(rmrr) { |
1716 | int i; | ||
1717 | for (i = 0; i < rmrr->devices_cnt; i++) { | 1767 | for (i = 0; i < rmrr->devices_cnt; i++) { |
1718 | pdev = rmrr->devices[i]; | 1768 | pdev = rmrr->devices[i]; |
1719 | /* some BIOS lists non-exist devices in DMAR table */ | 1769 | /* some BIOS lists non-exist devices in DMAR table */ |
@@ -1769,6 +1819,7 @@ error: | |||
1769 | iommu = drhd->iommu; | 1819 | iommu = drhd->iommu; |
1770 | free_iommu(iommu); | 1820 | free_iommu(iommu); |
1771 | } | 1821 | } |
1822 | kfree(g_iommus); | ||
1772 | return ret; | 1823 | return ret; |
1773 | } | 1824 | } |
1774 | 1825 | ||
@@ -1917,6 +1968,53 @@ error: | |||
1917 | return 0; | 1968 | return 0; |
1918 | } | 1969 | } |
1919 | 1970 | ||
1971 | static void flush_unmaps(void) | ||
1972 | { | ||
1973 | struct iova *node, *n; | ||
1974 | unsigned long flags; | ||
1975 | int i; | ||
1976 | |||
1977 | spin_lock_irqsave(&async_umap_flush_lock, flags); | ||
1978 | timer_on = 0; | ||
1979 | |||
1980 | /* just flush them all */ | ||
1981 | for (i = 0; i < g_num_of_iommus; i++) { | ||
1982 | if (test_and_clear_bit(i, g_iommus_to_flush)) | ||
1983 | iommu_flush_iotlb_global(&g_iommus[i], 0); | ||
1984 | } | ||
1985 | |||
1986 | list_for_each_entry_safe(node, n, &unmaps_to_do, list) { | ||
1987 | /* free iova */ | ||
1988 | list_del(&node->list); | ||
1989 | __free_iova(&((struct dmar_domain *)node->dmar)->iovad, node); | ||
1990 | |||
1991 | } | ||
1992 | list_size = 0; | ||
1993 | spin_unlock_irqrestore(&async_umap_flush_lock, flags); | ||
1994 | } | ||
1995 | |||
1996 | static void flush_unmaps_timeout(unsigned long data) | ||
1997 | { | ||
1998 | flush_unmaps(); | ||
1999 | } | ||
2000 | |||
2001 | static void add_unmap(struct dmar_domain *dom, struct iova *iova) | ||
2002 | { | ||
2003 | unsigned long flags; | ||
2004 | |||
2005 | spin_lock_irqsave(&async_umap_flush_lock, flags); | ||
2006 | iova->dmar = dom; | ||
2007 | list_add(&iova->list, &unmaps_to_do); | ||
2008 | set_bit((dom->iommu - g_iommus), g_iommus_to_flush); | ||
2009 | |||
2010 | if (!timer_on) { | ||
2011 | mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10)); | ||
2012 | timer_on = 1; | ||
2013 | } | ||
2014 | list_size++; | ||
2015 | spin_unlock_irqrestore(&async_umap_flush_lock, flags); | ||
2016 | } | ||
2017 | |||
1920 | static void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, | 2018 | static void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, |
1921 | size_t size, int dir) | 2019 | size_t size, int dir) |
1922 | { | 2020 | { |
@@ -1944,13 +2042,21 @@ static void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, | |||
1944 | dma_pte_clear_range(domain, start_addr, start_addr + size); | 2042 | dma_pte_clear_range(domain, start_addr, start_addr + size); |
1945 | /* free page tables */ | 2043 | /* free page tables */ |
1946 | dma_pte_free_pagetable(domain, start_addr, start_addr + size); | 2044 | dma_pte_free_pagetable(domain, start_addr, start_addr + size); |
1947 | 2045 | if (intel_iommu_strict) { | |
1948 | if (iommu_flush_iotlb_psi(domain->iommu, domain->id, start_addr, | 2046 | if (iommu_flush_iotlb_psi(domain->iommu, |
1949 | size >> PAGE_SHIFT_4K, 0)) | 2047 | domain->id, start_addr, size >> PAGE_SHIFT_4K, 0)) |
1950 | iommu_flush_write_buffer(domain->iommu); | 2048 | iommu_flush_write_buffer(domain->iommu); |
1951 | 2049 | /* free iova */ | |
1952 | /* free iova */ | 2050 | __free_iova(&domain->iovad, iova); |
1953 | __free_iova(&domain->iovad, iova); | 2051 | } else { |
2052 | add_unmap(domain, iova); | ||
2053 | /* | ||
2054 | * queue up the release of the unmap to save the 1/6th of the | ||
2055 | * cpu used up by the iotlb flush operation... | ||
2056 | */ | ||
2057 | if (list_size > high_watermark) | ||
2058 | flush_unmaps(); | ||
2059 | } | ||
1954 | } | 2060 | } |
1955 | 2061 | ||
1956 | static void * intel_alloc_coherent(struct device *hwdev, size_t size, | 2062 | static void * intel_alloc_coherent(struct device *hwdev, size_t size, |
@@ -2274,6 +2380,10 @@ int __init intel_iommu_init(void) | |||
2274 | if (dmar_table_init()) | 2380 | if (dmar_table_init()) |
2275 | return -ENODEV; | 2381 | return -ENODEV; |
2276 | 2382 | ||
2383 | high_watermark = 250; | ||
2384 | intel_iommu_debug = debugfs_create_dir("intel_iommu", NULL); | ||
2385 | debug = debugfs_create_u32("high_watermark", S_IWUGO | S_IRUGO, | ||
2386 | intel_iommu_debug, &high_watermark); | ||
2277 | iommu_init_mempool(); | 2387 | iommu_init_mempool(); |
2278 | dmar_init_reserved_ranges(); | 2388 | dmar_init_reserved_ranges(); |
2279 | 2389 | ||
@@ -2289,6 +2399,7 @@ int __init intel_iommu_init(void) | |||
2289 | printk(KERN_INFO | 2399 | printk(KERN_INFO |
2290 | "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n"); | 2400 | "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n"); |
2291 | 2401 | ||
2402 | init_timer(&unmap_timer); | ||
2292 | force_iommu = 1; | 2403 | force_iommu = 1; |
2293 | dma_ops = &intel_dma_ops; | 2404 | dma_ops = &intel_dma_ops; |
2294 | return 0; | 2405 | return 0; |
diff --git a/drivers/pci/iova.h b/drivers/pci/iova.h index 228f6c94b69c..2f1317801b20 100644 --- a/drivers/pci/iova.h +++ b/drivers/pci/iova.h | |||
@@ -24,6 +24,8 @@ struct iova { | |||
24 | struct rb_node node; | 24 | struct rb_node node; |
25 | unsigned long pfn_hi; /* IOMMU dish out addr hi */ | 25 | unsigned long pfn_hi; /* IOMMU dish out addr hi */ |
26 | unsigned long pfn_lo; /* IOMMU dish out addr lo */ | 26 | unsigned long pfn_lo; /* IOMMU dish out addr lo */ |
27 | struct list_head list; | ||
28 | void *dmar; | ||
27 | }; | 29 | }; |
28 | 30 | ||
29 | /* holds all the iova translations for a domain */ | 31 | /* holds all the iova translations for a domain */ |