aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorKeshavamurthy, Anil S <anil.s.keshavamurthy@intel.com>2007-10-21 19:41:41 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-10-22 11:13:18 -0400
commit10e5247f40f3bf7508a0ed2848c9cae37bddf4bc (patch)
treeadca606f00ebcbdbdc5c474f012105d7e59152f6 /drivers
parent89910cccb8fec0c1140d33a743e72a712efd4f05 (diff)
Intel IOMMU: DMAR detection and parsing logic
This patch supports the upcomming Intel IOMMU hardware a.k.a. Intel(R) Virtualization Technology for Directed I/O Architecture and the hardware spec for the same can be found here http://www.intel.com/technology/virtualization/index.htm FAQ! (questions from akpm, answers from ak) > So... what's all this code for? > > I assume that the intent here is to speed things up under Xen, etc? Yes in some cases, but not this code. That would be the Xen version of this code that could potentially assign whole devices to guests. I expect this to be only useful in some special cases though because most hardware is not virtualizable and you typically want an own instance for each guest. Ok at some point KVM might implement this too; i likely would use this code for this. > Do we > have any benchmark results to help us to decide whether a merge would be > justified? The main advantage for doing it in the normal kernel is not performance, but more safety. Broken devices won't be able to corrupt memory by doing random DMA. Unfortunately that doesn't work for graphics yet, for that need user space interfaces for the X server are needed. There are some potential performance benefits too: - When you have a device that cannot address the complete address range an IOMMU can remap its memory instead of bounce buffering. Remapping is likely cheaper than copying. - The IOMMU can merge sg lists into a single virtual block. This could potentially speed up SG IO when the device is slow walking SG lists. [I long ago benchmarked 5% on some block benchmark with an old MPT Fusion; but it probably depends a lot on the HBA] And you get better driver debugging because unexpected memory accesses from the devices will cause a trappable event. > > Does it slow anything down? It adds more overhead to each IO so yes. This patch: Add support for early detection and parsing of DMAR's (DMA Remapping) reported to OS via ACPI tables. DMA remapping(DMAR) devices support enables independent address translations for Direct Memory Access(DMA) from Devices. These DMA remapping devices are reported via ACPI tables and includes pci device scope covered by these DMA remapping device. For detailed info on the specification of "Intel(R) Virtualization Technology for Directed I/O Architecture" please see http://www.intel.com/technology/virtualization/index.htm Signed-off-by: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com> Cc: Andi Kleen <ak@suse.de> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Muli Ben-Yehuda <muli@il.ibm.com> Cc: "Siddha, Suresh B" <suresh.b.siddha@intel.com> Cc: Arjan van de Ven <arjan@infradead.org> Cc: Ashok Raj <ashok.raj@intel.com> Cc: "David S. Miller" <davem@davemloft.net> Cc: Christoph Lameter <clameter@sgi.com> Cc: Greg KH <greg@kroah.com> Cc: Len Brown <lenb@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/pci/Makefile3
-rw-r--r--drivers/pci/dmar.c329
2 files changed, 332 insertions, 0 deletions
diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile
index 006054a40995..836ab2f250d1 100644
--- a/drivers/pci/Makefile
+++ b/drivers/pci/Makefile
@@ -20,6 +20,9 @@ obj-$(CONFIG_PCI_MSI) += msi.o
20# Build the Hypertransport interrupt support 20# Build the Hypertransport interrupt support
21obj-$(CONFIG_HT_IRQ) += htirq.o 21obj-$(CONFIG_HT_IRQ) += htirq.o
22 22
23# Build Intel IOMMU support
24obj-$(CONFIG_DMAR) += dmar.o
25
23# 26#
24# Some architectures use the generic PCI setup functions 27# Some architectures use the generic PCI setup functions
25# 28#
diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
new file mode 100644
index 000000000000..5dfdfdac92e1
--- /dev/null
+++ b/drivers/pci/dmar.c
@@ -0,0 +1,329 @@
1/*
2 * Copyright (c) 2006, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
16 *
17 * Copyright (C) Ashok Raj <ashok.raj@intel.com>
18 * Copyright (C) Shaohua Li <shaohua.li@intel.com>
19 * Copyright (C) Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
20 *
21 * This file implements early detection/parsing of DMA Remapping Devices
22 * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI
23 * tables.
24 */
25
26#include <linux/pci.h>
27#include <linux/dmar.h>
28
29#undef PREFIX
30#define PREFIX "DMAR:"
31
32/* No locks are needed as DMA remapping hardware unit
33 * list is constructed at boot time and hotplug of
34 * these units are not supported by the architecture.
35 */
36LIST_HEAD(dmar_drhd_units);
37LIST_HEAD(dmar_rmrr_units);
38
39static struct acpi_table_header * __initdata dmar_tbl;
40
41static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
42{
43 /*
44 * add INCLUDE_ALL at the tail, so scan the list will find it at
45 * the very end.
46 */
47 if (drhd->include_all)
48 list_add_tail(&drhd->list, &dmar_drhd_units);
49 else
50 list_add(&drhd->list, &dmar_drhd_units);
51}
52
53static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr)
54{
55 list_add(&rmrr->list, &dmar_rmrr_units);
56}
57
58static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope,
59 struct pci_dev **dev, u16 segment)
60{
61 struct pci_bus *bus;
62 struct pci_dev *pdev = NULL;
63 struct acpi_dmar_pci_path *path;
64 int count;
65
66 bus = pci_find_bus(segment, scope->bus);
67 path = (struct acpi_dmar_pci_path *)(scope + 1);
68 count = (scope->length - sizeof(struct acpi_dmar_device_scope))
69 / sizeof(struct acpi_dmar_pci_path);
70
71 while (count) {
72 if (pdev)
73 pci_dev_put(pdev);
74 /*
75 * Some BIOSes list non-exist devices in DMAR table, just
76 * ignore it
77 */
78 if (!bus) {
79 printk(KERN_WARNING
80 PREFIX "Device scope bus [%d] not found\n",
81 scope->bus);
82 break;
83 }
84 pdev = pci_get_slot(bus, PCI_DEVFN(path->dev, path->fn));
85 if (!pdev) {
86 printk(KERN_WARNING PREFIX
87 "Device scope device [%04x:%02x:%02x.%02x] not found\n",
88 segment, bus->number, path->dev, path->fn);
89 break;
90 }
91 path ++;
92 count --;
93 bus = pdev->subordinate;
94 }
95 if (!pdev) {
96 printk(KERN_WARNING PREFIX
97 "Device scope device [%04x:%02x:%02x.%02x] not found\n",
98 segment, scope->bus, path->dev, path->fn);
99 *dev = NULL;
100 return 0;
101 }
102 if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT && \
103 pdev->subordinate) || (scope->entry_type == \
104 ACPI_DMAR_SCOPE_TYPE_BRIDGE && !pdev->subordinate)) {
105 pci_dev_put(pdev);
106 printk(KERN_WARNING PREFIX
107 "Device scope type does not match for %s\n",
108 pci_name(pdev));
109 return -EINVAL;
110 }
111 *dev = pdev;
112 return 0;
113}
114
115static int __init dmar_parse_dev_scope(void *start, void *end, int *cnt,
116 struct pci_dev ***devices, u16 segment)
117{
118 struct acpi_dmar_device_scope *scope;
119 void * tmp = start;
120 int index;
121 int ret;
122
123 *cnt = 0;
124 while (start < end) {
125 scope = start;
126 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
127 scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE)
128 (*cnt)++;
129 else
130 printk(KERN_WARNING PREFIX
131 "Unsupported device scope\n");
132 start += scope->length;
133 }
134 if (*cnt == 0)
135 return 0;
136
137 *devices = kcalloc(*cnt, sizeof(struct pci_dev *), GFP_KERNEL);
138 if (!*devices)
139 return -ENOMEM;
140
141 start = tmp;
142 index = 0;
143 while (start < end) {
144 scope = start;
145 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
146 scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE) {
147 ret = dmar_parse_one_dev_scope(scope,
148 &(*devices)[index], segment);
149 if (ret) {
150 kfree(*devices);
151 return ret;
152 }
153 index ++;
154 }
155 start += scope->length;
156 }
157
158 return 0;
159}
160
161/**
162 * dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition
163 * structure which uniquely represent one DMA remapping hardware unit
164 * present in the platform
165 */
166static int __init
167dmar_parse_one_drhd(struct acpi_dmar_header *header)
168{
169 struct acpi_dmar_hardware_unit *drhd;
170 struct dmar_drhd_unit *dmaru;
171 int ret = 0;
172 static int include_all;
173
174 dmaru = kzalloc(sizeof(*dmaru), GFP_KERNEL);
175 if (!dmaru)
176 return -ENOMEM;
177
178 drhd = (struct acpi_dmar_hardware_unit *)header;
179 dmaru->reg_base_addr = drhd->address;
180 dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
181
182 if (!dmaru->include_all)
183 ret = dmar_parse_dev_scope((void *)(drhd + 1),
184 ((void *)drhd) + header->length,
185 &dmaru->devices_cnt, &dmaru->devices,
186 drhd->segment);
187 else {
188 /* Only allow one INCLUDE_ALL */
189 if (include_all) {
190 printk(KERN_WARNING PREFIX "Only one INCLUDE_ALL "
191 "device scope is allowed\n");
192 ret = -EINVAL;
193 }
194 include_all = 1;
195 }
196
197 if (ret || (dmaru->devices_cnt == 0 && !dmaru->include_all))
198 kfree(dmaru);
199 else
200 dmar_register_drhd_unit(dmaru);
201 return ret;
202}
203
204static int __init
205dmar_parse_one_rmrr(struct acpi_dmar_header *header)
206{
207 struct acpi_dmar_reserved_memory *rmrr;
208 struct dmar_rmrr_unit *rmrru;
209 int ret = 0;
210
211 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
212 if (!rmrru)
213 return -ENOMEM;
214
215 rmrr = (struct acpi_dmar_reserved_memory *)header;
216 rmrru->base_address = rmrr->base_address;
217 rmrru->end_address = rmrr->end_address;
218 ret = dmar_parse_dev_scope((void *)(rmrr + 1),
219 ((void *)rmrr) + header->length,
220 &rmrru->devices_cnt, &rmrru->devices, rmrr->segment);
221
222 if (ret || (rmrru->devices_cnt == 0))
223 kfree(rmrru);
224 else
225 dmar_register_rmrr_unit(rmrru);
226 return ret;
227}
228
229static void __init
230dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
231{
232 struct acpi_dmar_hardware_unit *drhd;
233 struct acpi_dmar_reserved_memory *rmrr;
234
235 switch (header->type) {
236 case ACPI_DMAR_TYPE_HARDWARE_UNIT:
237 drhd = (struct acpi_dmar_hardware_unit *)header;
238 printk (KERN_INFO PREFIX
239 "DRHD (flags: 0x%08x)base: 0x%016Lx\n",
240 drhd->flags, drhd->address);
241 break;
242 case ACPI_DMAR_TYPE_RESERVED_MEMORY:
243 rmrr = (struct acpi_dmar_reserved_memory *)header;
244
245 printk (KERN_INFO PREFIX
246 "RMRR base: 0x%016Lx end: 0x%016Lx\n",
247 rmrr->base_address, rmrr->end_address);
248 break;
249 }
250}
251
252/**
253 * parse_dmar_table - parses the DMA reporting table
254 */
255static int __init
256parse_dmar_table(void)
257{
258 struct acpi_table_dmar *dmar;
259 struct acpi_dmar_header *entry_header;
260 int ret = 0;
261
262 dmar = (struct acpi_table_dmar *)dmar_tbl;
263 if (!dmar)
264 return -ENODEV;
265
266 if (!dmar->width) {
267 printk (KERN_WARNING PREFIX "Zero: Invalid DMAR haw\n");
268 return -EINVAL;
269 }
270
271 printk (KERN_INFO PREFIX "Host address width %d\n",
272 dmar->width + 1);
273
274 entry_header = (struct acpi_dmar_header *)(dmar + 1);
275 while (((unsigned long)entry_header) <
276 (((unsigned long)dmar) + dmar_tbl->length)) {
277 dmar_table_print_dmar_entry(entry_header);
278
279 switch (entry_header->type) {
280 case ACPI_DMAR_TYPE_HARDWARE_UNIT:
281 ret = dmar_parse_one_drhd(entry_header);
282 break;
283 case ACPI_DMAR_TYPE_RESERVED_MEMORY:
284 ret = dmar_parse_one_rmrr(entry_header);
285 break;
286 default:
287 printk(KERN_WARNING PREFIX
288 "Unknown DMAR structure type\n");
289 ret = 0; /* for forward compatibility */
290 break;
291 }
292 if (ret)
293 break;
294
295 entry_header = ((void *)entry_header + entry_header->length);
296 }
297 return ret;
298}
299
300
301int __init dmar_table_init(void)
302{
303
304 parse_dmar_table();
305 if (list_empty(&dmar_drhd_units)) {
306 printk(KERN_INFO PREFIX "No DMAR devices found\n");
307 return -ENODEV;
308 }
309 return 0;
310}
311
312/**
313 * early_dmar_detect - checks to see if the platform supports DMAR devices
314 */
315int __init early_dmar_detect(void)
316{
317 acpi_status status = AE_OK;
318
319 /* if we could find DMAR table, then there are DMAR devices */
320 status = acpi_get_table(ACPI_SIG_DMAR, 0,
321 (struct acpi_table_header **)&dmar_tbl);
322
323 if (ACPI_SUCCESS(status) && !dmar_tbl) {
324 printk (KERN_WARNING PREFIX "Unable to map DMAR\n");
325 status = AE_NOT_FOUND;
326 }
327
328 return (ACPI_SUCCESS(status) ? 1 : 0);
329}