aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-12-05 12:49:07 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2009-12-05 12:49:07 -0500
commit7b626acb8f983eb83b396ab96cc24b18d635d487 (patch)
tree8c3320191311e6186d3aa722f1acd12acd47ece8
parent1ebb275afcf5a47092e995541d6c604eef96062a (diff)
parent4528752f49c1f4025473d12bc5fa9181085c3f22 (diff)
Merge branch 'core-iommu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'core-iommu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (63 commits) x86, Calgary IOMMU quirk: Find nearest matching Calgary while walking up the PCI tree x86/amd-iommu: Remove amd_iommu_pd_table x86/amd-iommu: Move reset_iommu_command_buffer out of locked code x86/amd-iommu: Cleanup DTE flushing code x86/amd-iommu: Introduce iommu_flush_device() function x86/amd-iommu: Cleanup attach/detach_device code x86/amd-iommu: Keep devices per domain in a list x86/amd-iommu: Add device bind reference counting x86/amd-iommu: Use dev->arch->iommu to store iommu related information x86/amd-iommu: Remove support for domain sharing x86/amd-iommu: Rearrange dma_ops related functions x86/amd-iommu: Move some pte allocation functions in the right section x86/amd-iommu: Remove iommu parameter from dma_ops_domain_alloc x86/amd-iommu: Use get_device_id and check_device where appropriate x86/amd-iommu: Move find_protection_domain to helper functions x86/amd-iommu: Simplify get_device_resources() x86/amd-iommu: Let domain_for_device handle aliases x86/amd-iommu: Remove iommu specific handling from dma_ops path x86/amd-iommu: Remove iommu parameter from __(un)map_single x86/amd-iommu: Make alloc_new_range aware of multiple IOMMUs ...
-rw-r--r--arch/ia64/include/asm/swiotlb.h2
-rw-r--r--arch/ia64/kernel/pci-swiotlb.c4
-rw-r--r--arch/powerpc/kernel/setup_32.c2
-rw-r--r--arch/powerpc/kernel/setup_64.c2
-rw-r--r--arch/x86/include/asm/amd_iommu.h16
-rw-r--r--arch/x86/include/asm/amd_iommu_proto.h38
-rw-r--r--arch/x86/include/asm/amd_iommu_types.h54
-rw-r--r--arch/x86/include/asm/calgary.h2
-rw-r--r--arch/x86/include/asm/device.h2
-rw-r--r--arch/x86/include/asm/dma-mapping.h5
-rw-r--r--arch/x86/include/asm/gart.h9
-rw-r--r--arch/x86/include/asm/iommu.h2
-rw-r--r--arch/x86/include/asm/swiotlb.h9
-rw-r--r--arch/x86/include/asm/x86_init.h10
-rw-r--r--arch/x86/kernel/amd_iommu.c1247
-rw-r--r--arch/x86/kernel/amd_iommu_init.c94
-rw-r--r--arch/x86/kernel/aperture_64.c4
-rw-r--r--arch/x86/kernel/crash.c5
-rw-r--r--arch/x86/kernel/pci-calgary_64.c94
-rw-r--r--arch/x86/kernel/pci-dma.c39
-rw-r--r--arch/x86/kernel/pci-gart_64.c156
-rw-r--r--arch/x86/kernel/pci-nommu.c11
-rw-r--r--arch/x86/kernel/pci-swiotlb.c18
-rw-r--r--arch/x86/kernel/reboot.c4
-rw-r--r--arch/x86/kernel/x86_init.c8
-rw-r--r--drivers/char/agp/Kconfig3
-rw-r--r--drivers/pci/dmar.c7
-rw-r--r--drivers/pci/intel-iommu.c6
-rw-r--r--include/linux/bootmem.h1
-rw-r--r--include/linux/dmar.h15
-rw-r--r--include/linux/swiotlb.h12
-rw-r--r--lib/swiotlb.c46
-rw-r--r--mm/bootmem.c24
33 files changed, 1068 insertions, 883 deletions
diff --git a/arch/ia64/include/asm/swiotlb.h b/arch/ia64/include/asm/swiotlb.h
index dcbaea7ce128..f0acde68aaea 100644
--- a/arch/ia64/include/asm/swiotlb.h
+++ b/arch/ia64/include/asm/swiotlb.h
@@ -4,8 +4,6 @@
4#include <linux/dma-mapping.h> 4#include <linux/dma-mapping.h>
5#include <linux/swiotlb.h> 5#include <linux/swiotlb.h>
6 6
7extern int swiotlb_force;
8
9#ifdef CONFIG_SWIOTLB 7#ifdef CONFIG_SWIOTLB
10extern int swiotlb; 8extern int swiotlb;
11extern void pci_swiotlb_init(void); 9extern void pci_swiotlb_init(void);
diff --git a/arch/ia64/kernel/pci-swiotlb.c b/arch/ia64/kernel/pci-swiotlb.c
index 285aae8431c6..53292abf846c 100644
--- a/arch/ia64/kernel/pci-swiotlb.c
+++ b/arch/ia64/kernel/pci-swiotlb.c
@@ -41,7 +41,7 @@ struct dma_map_ops swiotlb_dma_ops = {
41void __init swiotlb_dma_init(void) 41void __init swiotlb_dma_init(void)
42{ 42{
43 dma_ops = &swiotlb_dma_ops; 43 dma_ops = &swiotlb_dma_ops;
44 swiotlb_init(); 44 swiotlb_init(1);
45} 45}
46 46
47void __init pci_swiotlb_init(void) 47void __init pci_swiotlb_init(void)
@@ -51,7 +51,7 @@ void __init pci_swiotlb_init(void)
51 swiotlb = 1; 51 swiotlb = 1;
52 printk(KERN_INFO "PCI-DMA: Re-initialize machine vector.\n"); 52 printk(KERN_INFO "PCI-DMA: Re-initialize machine vector.\n");
53 machvec_init("dig"); 53 machvec_init("dig");
54 swiotlb_init(); 54 swiotlb_init(1);
55 dma_ops = &swiotlb_dma_ops; 55 dma_ops = &swiotlb_dma_ops;
56#else 56#else
57 panic("Unable to find Intel IOMMU"); 57 panic("Unable to find Intel IOMMU");
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index 53bcf3d792db..b152de3e64d4 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -345,7 +345,7 @@ void __init setup_arch(char **cmdline_p)
345 345
346#ifdef CONFIG_SWIOTLB 346#ifdef CONFIG_SWIOTLB
347 if (ppc_swiotlb_enable) 347 if (ppc_swiotlb_enable)
348 swiotlb_init(); 348 swiotlb_init(1);
349#endif 349#endif
350 350
351 paging_init(); 351 paging_init();
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 04f638d82fb3..df2c9e932b37 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -550,7 +550,7 @@ void __init setup_arch(char **cmdline_p)
550 550
551#ifdef CONFIG_SWIOTLB 551#ifdef CONFIG_SWIOTLB
552 if (ppc_swiotlb_enable) 552 if (ppc_swiotlb_enable)
553 swiotlb_init(); 553 swiotlb_init(1);
554#endif 554#endif
555 555
556 paging_init(); 556 paging_init();
diff --git a/arch/x86/include/asm/amd_iommu.h b/arch/x86/include/asm/amd_iommu.h
index 4b180897e6b5..5af2982133b5 100644
--- a/arch/x86/include/asm/amd_iommu.h
+++ b/arch/x86/include/asm/amd_iommu.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2007-2008 Advanced Micro Devices, Inc. 2 * Copyright (C) 2007-2009 Advanced Micro Devices, Inc.
3 * Author: Joerg Roedel <joerg.roedel@amd.com> 3 * Author: Joerg Roedel <joerg.roedel@amd.com>
4 * Leo Duran <leo.duran@amd.com> 4 * Leo Duran <leo.duran@amd.com>
5 * 5 *
@@ -23,19 +23,13 @@
23#include <linux/irqreturn.h> 23#include <linux/irqreturn.h>
24 24
25#ifdef CONFIG_AMD_IOMMU 25#ifdef CONFIG_AMD_IOMMU
26extern int amd_iommu_init(void); 26
27extern int amd_iommu_init_dma_ops(void);
28extern int amd_iommu_init_passthrough(void);
29extern void amd_iommu_detect(void); 27extern void amd_iommu_detect(void);
30extern irqreturn_t amd_iommu_int_handler(int irq, void *data); 28
31extern void amd_iommu_flush_all_domains(void);
32extern void amd_iommu_flush_all_devices(void);
33extern void amd_iommu_shutdown(void);
34extern void amd_iommu_apply_erratum_63(u16 devid);
35#else 29#else
36static inline int amd_iommu_init(void) { return -ENODEV; } 30
37static inline void amd_iommu_detect(void) { } 31static inline void amd_iommu_detect(void) { }
38static inline void amd_iommu_shutdown(void) { } 32
39#endif 33#endif
40 34
41#endif /* _ASM_X86_AMD_IOMMU_H */ 35#endif /* _ASM_X86_AMD_IOMMU_H */
diff --git a/arch/x86/include/asm/amd_iommu_proto.h b/arch/x86/include/asm/amd_iommu_proto.h
new file mode 100644
index 000000000000..84786fb9a23b
--- /dev/null
+++ b/arch/x86/include/asm/amd_iommu_proto.h
@@ -0,0 +1,38 @@
1/*
2 * Copyright (C) 2009 Advanced Micro Devices, Inc.
3 * Author: Joerg Roedel <joerg.roedel@amd.com>
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 as published
7 * by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */
18
19#ifndef _ASM_X86_AMD_IOMMU_PROTO_H
20#define _ASM_X86_AMD_IOMMU_PROTO_H
21
22struct amd_iommu;
23
24extern int amd_iommu_init_dma_ops(void);
25extern int amd_iommu_init_passthrough(void);
26extern irqreturn_t amd_iommu_int_handler(int irq, void *data);
27extern void amd_iommu_flush_all_domains(void);
28extern void amd_iommu_flush_all_devices(void);
29extern void amd_iommu_apply_erratum_63(u16 devid);
30extern void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu);
31
32#ifndef CONFIG_AMD_IOMMU_STATS
33
34static inline void amd_iommu_stats_init(void) { }
35
36#endif /* !CONFIG_AMD_IOMMU_STATS */
37
38#endif /* _ASM_X86_AMD_IOMMU_PROTO_H */
diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index 2a2cc7a78a81..ba19ad4c47d0 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2007-2008 Advanced Micro Devices, Inc. 2 * Copyright (C) 2007-2009 Advanced Micro Devices, Inc.
3 * Author: Joerg Roedel <joerg.roedel@amd.com> 3 * Author: Joerg Roedel <joerg.roedel@amd.com>
4 * Leo Duran <leo.duran@amd.com> 4 * Leo Duran <leo.duran@amd.com>
5 * 5 *
@@ -25,6 +25,11 @@
25#include <linux/spinlock.h> 25#include <linux/spinlock.h>
26 26
27/* 27/*
28 * Maximum number of IOMMUs supported
29 */
30#define MAX_IOMMUS 32
31
32/*
28 * some size calculation constants 33 * some size calculation constants
29 */ 34 */
30#define DEV_TABLE_ENTRY_SIZE 32 35#define DEV_TABLE_ENTRY_SIZE 32
@@ -206,6 +211,9 @@ extern bool amd_iommu_dump;
206 printk(KERN_INFO "AMD-Vi: " format, ## arg); \ 211 printk(KERN_INFO "AMD-Vi: " format, ## arg); \
207 } while(0); 212 } while(0);
208 213
214/* global flag if IOMMUs cache non-present entries */
215extern bool amd_iommu_np_cache;
216
209/* 217/*
210 * Make iterating over all IOMMUs easier 218 * Make iterating over all IOMMUs easier
211 */ 219 */
@@ -226,6 +234,8 @@ extern bool amd_iommu_dump;
226 * independent of their use. 234 * independent of their use.
227 */ 235 */
228struct protection_domain { 236struct protection_domain {
237 struct list_head list; /* for list of all protection domains */
238 struct list_head dev_list; /* List of all devices in this domain */
229 spinlock_t lock; /* mostly used to lock the page table*/ 239 spinlock_t lock; /* mostly used to lock the page table*/
230 u16 id; /* the domain id written to the device table */ 240 u16 id; /* the domain id written to the device table */
231 int mode; /* paging mode (0-6 levels) */ 241 int mode; /* paging mode (0-6 levels) */
@@ -233,7 +243,20 @@ struct protection_domain {
233 unsigned long flags; /* flags to find out type of domain */ 243 unsigned long flags; /* flags to find out type of domain */
234 bool updated; /* complete domain flush required */ 244 bool updated; /* complete domain flush required */
235 unsigned dev_cnt; /* devices assigned to this domain */ 245 unsigned dev_cnt; /* devices assigned to this domain */
246 unsigned dev_iommu[MAX_IOMMUS]; /* per-IOMMU reference count */
236 void *priv; /* private data */ 247 void *priv; /* private data */
248
249};
250
251/*
252 * This struct contains device specific data for the IOMMU
253 */
254struct iommu_dev_data {
255 struct list_head list; /* For domain->dev_list */
256 struct device *dev; /* Device this data belong to */
257 struct device *alias; /* The Alias Device */
258 struct protection_domain *domain; /* Domain the device is bound to */
259 atomic_t bind; /* Domain attach reverent count */
237}; 260};
238 261
239/* 262/*
@@ -291,6 +314,9 @@ struct dma_ops_domain {
291struct amd_iommu { 314struct amd_iommu {
292 struct list_head list; 315 struct list_head list;
293 316
317 /* Index within the IOMMU array */
318 int index;
319
294 /* locks the accesses to the hardware */ 320 /* locks the accesses to the hardware */
295 spinlock_t lock; 321 spinlock_t lock;
296 322
@@ -357,6 +383,21 @@ struct amd_iommu {
357extern struct list_head amd_iommu_list; 383extern struct list_head amd_iommu_list;
358 384
359/* 385/*
386 * Array with pointers to each IOMMU struct
387 * The indices are referenced in the protection domains
388 */
389extern struct amd_iommu *amd_iommus[MAX_IOMMUS];
390
391/* Number of IOMMUs present in the system */
392extern int amd_iommus_present;
393
394/*
395 * Declarations for the global list of all protection domains
396 */
397extern spinlock_t amd_iommu_pd_lock;
398extern struct list_head amd_iommu_pd_list;
399
400/*
360 * Structure defining one entry in the device table 401 * Structure defining one entry in the device table
361 */ 402 */
362struct dev_table_entry { 403struct dev_table_entry {
@@ -416,15 +457,9 @@ extern unsigned amd_iommu_aperture_order;
416/* largest PCI device id we expect translation requests for */ 457/* largest PCI device id we expect translation requests for */
417extern u16 amd_iommu_last_bdf; 458extern u16 amd_iommu_last_bdf;
418 459
419/* data structures for protection domain handling */
420extern struct protection_domain **amd_iommu_pd_table;
421
422/* allocation bitmap for domain ids */ 460/* allocation bitmap for domain ids */
423extern unsigned long *amd_iommu_pd_alloc_bitmap; 461extern unsigned long *amd_iommu_pd_alloc_bitmap;
424 462
425/* will be 1 if device isolation is enabled */
426extern bool amd_iommu_isolate;
427
428/* 463/*
429 * If true, the addresses will be flushed on unmap time, not when 464 * If true, the addresses will be flushed on unmap time, not when
430 * they are reused 465 * they are reused
@@ -462,11 +497,6 @@ struct __iommu_counter {
462#define ADD_STATS_COUNTER(name, x) 497#define ADD_STATS_COUNTER(name, x)
463#define SUB_STATS_COUNTER(name, x) 498#define SUB_STATS_COUNTER(name, x)
464 499
465static inline void amd_iommu_stats_init(void) { }
466
467#endif /* CONFIG_AMD_IOMMU_STATS */ 500#endif /* CONFIG_AMD_IOMMU_STATS */
468 501
469/* some function prototypes */
470extern void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu);
471
472#endif /* _ASM_X86_AMD_IOMMU_TYPES_H */ 502#endif /* _ASM_X86_AMD_IOMMU_TYPES_H */
diff --git a/arch/x86/include/asm/calgary.h b/arch/x86/include/asm/calgary.h
index b03bedb62aa7..0918654305af 100644
--- a/arch/x86/include/asm/calgary.h
+++ b/arch/x86/include/asm/calgary.h
@@ -62,10 +62,8 @@ struct cal_chipset_ops {
62extern int use_calgary; 62extern int use_calgary;
63 63
64#ifdef CONFIG_CALGARY_IOMMU 64#ifdef CONFIG_CALGARY_IOMMU
65extern int calgary_iommu_init(void);
66extern void detect_calgary(void); 65extern void detect_calgary(void);
67#else 66#else
68static inline int calgary_iommu_init(void) { return 1; }
69static inline void detect_calgary(void) { return; } 67static inline void detect_calgary(void) { return; }
70#endif 68#endif
71 69
diff --git a/arch/x86/include/asm/device.h b/arch/x86/include/asm/device.h
index cee34e9ca45b..029f230ab637 100644
--- a/arch/x86/include/asm/device.h
+++ b/arch/x86/include/asm/device.h
@@ -8,7 +8,7 @@ struct dev_archdata {
8#ifdef CONFIG_X86_64 8#ifdef CONFIG_X86_64
9struct dma_map_ops *dma_ops; 9struct dma_map_ops *dma_ops;
10#endif 10#endif
11#ifdef CONFIG_DMAR 11#if defined(CONFIG_DMAR) || defined(CONFIG_AMD_IOMMU)
12 void *iommu; /* hook for IOMMU specific extension */ 12 void *iommu; /* hook for IOMMU specific extension */
13#endif 13#endif
14}; 14};
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index 6a25d5d42836..0f6c02f3b7d4 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -20,7 +20,8 @@
20# define ISA_DMA_BIT_MASK DMA_BIT_MASK(32) 20# define ISA_DMA_BIT_MASK DMA_BIT_MASK(32)
21#endif 21#endif
22 22
23extern dma_addr_t bad_dma_address; 23#define DMA_ERROR_CODE 0
24
24extern int iommu_merge; 25extern int iommu_merge;
25extern struct device x86_dma_fallback_dev; 26extern struct device x86_dma_fallback_dev;
26extern int panic_on_overflow; 27extern int panic_on_overflow;
@@ -48,7 +49,7 @@ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
48 if (ops->mapping_error) 49 if (ops->mapping_error)
49 return ops->mapping_error(dev, dma_addr); 50 return ops->mapping_error(dev, dma_addr);
50 51
51 return (dma_addr == bad_dma_address); 52 return (dma_addr == DMA_ERROR_CODE);
52} 53}
53 54
54#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) 55#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
diff --git a/arch/x86/include/asm/gart.h b/arch/x86/include/asm/gart.h
index 6cfdafa409d8..4ac5b0f33fc1 100644
--- a/arch/x86/include/asm/gart.h
+++ b/arch/x86/include/asm/gart.h
@@ -35,8 +35,7 @@ extern int gart_iommu_aperture_allowed;
35extern int gart_iommu_aperture_disabled; 35extern int gart_iommu_aperture_disabled;
36 36
37extern void early_gart_iommu_check(void); 37extern void early_gart_iommu_check(void);
38extern void gart_iommu_init(void); 38extern int gart_iommu_init(void);
39extern void gart_iommu_shutdown(void);
40extern void __init gart_parse_options(char *); 39extern void __init gart_parse_options(char *);
41extern void gart_iommu_hole_init(void); 40extern void gart_iommu_hole_init(void);
42 41
@@ -48,12 +47,6 @@ extern void gart_iommu_hole_init(void);
48static inline void early_gart_iommu_check(void) 47static inline void early_gart_iommu_check(void)
49{ 48{
50} 49}
51static inline void gart_iommu_init(void)
52{
53}
54static inline void gart_iommu_shutdown(void)
55{
56}
57static inline void gart_parse_options(char *options) 50static inline void gart_parse_options(char *options)
58{ 51{
59} 52}
diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h
index fd6d21bbee6c..345c99cef152 100644
--- a/arch/x86/include/asm/iommu.h
+++ b/arch/x86/include/asm/iommu.h
@@ -1,8 +1,6 @@
1#ifndef _ASM_X86_IOMMU_H 1#ifndef _ASM_X86_IOMMU_H
2#define _ASM_X86_IOMMU_H 2#define _ASM_X86_IOMMU_H
3 3
4extern void pci_iommu_shutdown(void);
5extern void no_iommu_init(void);
6extern struct dma_map_ops nommu_dma_ops; 4extern struct dma_map_ops nommu_dma_ops;
7extern int force_iommu, no_iommu; 5extern int force_iommu, no_iommu;
8extern int iommu_detected; 6extern int iommu_detected;
diff --git a/arch/x86/include/asm/swiotlb.h b/arch/x86/include/asm/swiotlb.h
index b9e4e20174fb..87ffcb12a1b8 100644
--- a/arch/x86/include/asm/swiotlb.h
+++ b/arch/x86/include/asm/swiotlb.h
@@ -3,17 +3,14 @@
3 3
4#include <linux/swiotlb.h> 4#include <linux/swiotlb.h>
5 5
6/* SWIOTLB interface */
7
8extern int swiotlb_force;
9
10#ifdef CONFIG_SWIOTLB 6#ifdef CONFIG_SWIOTLB
11extern int swiotlb; 7extern int swiotlb;
12extern void pci_swiotlb_init(void); 8extern int pci_swiotlb_init(void);
13#else 9#else
14#define swiotlb 0 10#define swiotlb 0
15static inline void pci_swiotlb_init(void) 11static inline int pci_swiotlb_init(void)
16{ 12{
13 return 0;
17} 14}
18#endif 15#endif
19 16
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 2c756fd4ab0e..d8e71459f025 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -91,6 +91,14 @@ struct x86_init_timers {
91}; 91};
92 92
93/** 93/**
94 * struct x86_init_iommu - platform specific iommu setup
95 * @iommu_init: platform specific iommu setup
96 */
97struct x86_init_iommu {
98 int (*iommu_init)(void);
99};
100
101/**
94 * struct x86_init_ops - functions for platform specific setup 102 * struct x86_init_ops - functions for platform specific setup
95 * 103 *
96 */ 104 */
@@ -101,6 +109,7 @@ struct x86_init_ops {
101 struct x86_init_oem oem; 109 struct x86_init_oem oem;
102 struct x86_init_paging paging; 110 struct x86_init_paging paging;
103 struct x86_init_timers timers; 111 struct x86_init_timers timers;
112 struct x86_init_iommu iommu;
104}; 113};
105 114
106/** 115/**
@@ -121,6 +130,7 @@ struct x86_platform_ops {
121 unsigned long (*calibrate_tsc)(void); 130 unsigned long (*calibrate_tsc)(void);
122 unsigned long (*get_wallclock)(void); 131 unsigned long (*get_wallclock)(void);
123 int (*set_wallclock)(unsigned long nowtime); 132 int (*set_wallclock)(unsigned long nowtime);
133 void (*iommu_shutdown)(void);
124}; 134};
125 135
126extern struct x86_init_ops x86_init; 136extern struct x86_init_ops x86_init;
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 0285521e0a99..32fb09102a13 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2007-2008 Advanced Micro Devices, Inc. 2 * Copyright (C) 2007-2009 Advanced Micro Devices, Inc.
3 * Author: Joerg Roedel <joerg.roedel@amd.com> 3 * Author: Joerg Roedel <joerg.roedel@amd.com>
4 * Leo Duran <leo.duran@amd.com> 4 * Leo Duran <leo.duran@amd.com>
5 * 5 *
@@ -28,6 +28,7 @@
28#include <asm/proto.h> 28#include <asm/proto.h>
29#include <asm/iommu.h> 29#include <asm/iommu.h>
30#include <asm/gart.h> 30#include <asm/gart.h>
31#include <asm/amd_iommu_proto.h>
31#include <asm/amd_iommu_types.h> 32#include <asm/amd_iommu_types.h>
32#include <asm/amd_iommu.h> 33#include <asm/amd_iommu.h>
33 34
@@ -56,20 +57,115 @@ struct iommu_cmd {
56 u32 data[4]; 57 u32 data[4];
57}; 58};
58 59
59static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
60 struct unity_map_entry *e);
61static struct dma_ops_domain *find_protection_domain(u16 devid);
62static u64 *alloc_pte(struct protection_domain *domain,
63 unsigned long address, int end_lvl,
64 u64 **pte_page, gfp_t gfp);
65static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
66 unsigned long start_page,
67 unsigned int pages);
68static void reset_iommu_command_buffer(struct amd_iommu *iommu); 60static void reset_iommu_command_buffer(struct amd_iommu *iommu);
69static u64 *fetch_pte(struct protection_domain *domain,
70 unsigned long address, int map_size);
71static void update_domain(struct protection_domain *domain); 61static void update_domain(struct protection_domain *domain);
72 62
63/****************************************************************************
64 *
65 * Helper functions
66 *
67 ****************************************************************************/
68
69static inline u16 get_device_id(struct device *dev)
70{
71 struct pci_dev *pdev = to_pci_dev(dev);
72
73 return calc_devid(pdev->bus->number, pdev->devfn);
74}
75
76static struct iommu_dev_data *get_dev_data(struct device *dev)
77{
78 return dev->archdata.iommu;
79}
80
81/*
82 * In this function the list of preallocated protection domains is traversed to
83 * find the domain for a specific device
84 */
85static struct dma_ops_domain *find_protection_domain(u16 devid)
86{
87 struct dma_ops_domain *entry, *ret = NULL;
88 unsigned long flags;
89 u16 alias = amd_iommu_alias_table[devid];
90
91 if (list_empty(&iommu_pd_list))
92 return NULL;
93
94 spin_lock_irqsave(&iommu_pd_list_lock, flags);
95
96 list_for_each_entry(entry, &iommu_pd_list, list) {
97 if (entry->target_dev == devid ||
98 entry->target_dev == alias) {
99 ret = entry;
100 break;
101 }
102 }
103
104 spin_unlock_irqrestore(&iommu_pd_list_lock, flags);
105
106 return ret;
107}
108
109/*
110 * This function checks if the driver got a valid device from the caller to
111 * avoid dereferencing invalid pointers.
112 */
113static bool check_device(struct device *dev)
114{
115 u16 devid;
116
117 if (!dev || !dev->dma_mask)
118 return false;
119
120 /* No device or no PCI device */
121 if (!dev || dev->bus != &pci_bus_type)
122 return false;
123
124 devid = get_device_id(dev);
125
126 /* Out of our scope? */
127 if (devid > amd_iommu_last_bdf)
128 return false;
129
130 if (amd_iommu_rlookup_table[devid] == NULL)
131 return false;
132
133 return true;
134}
135
136static int iommu_init_device(struct device *dev)
137{
138 struct iommu_dev_data *dev_data;
139 struct pci_dev *pdev;
140 u16 devid, alias;
141
142 if (dev->archdata.iommu)
143 return 0;
144
145 dev_data = kzalloc(sizeof(*dev_data), GFP_KERNEL);
146 if (!dev_data)
147 return -ENOMEM;
148
149 dev_data->dev = dev;
150
151 devid = get_device_id(dev);
152 alias = amd_iommu_alias_table[devid];
153 pdev = pci_get_bus_and_slot(PCI_BUS(alias), alias & 0xff);
154 if (pdev)
155 dev_data->alias = &pdev->dev;
156
157 atomic_set(&dev_data->bind, 0);
158
159 dev->archdata.iommu = dev_data;
160
161
162 return 0;
163}
164
165static void iommu_uninit_device(struct device *dev)
166{
167 kfree(dev->archdata.iommu);
168}
73#ifdef CONFIG_AMD_IOMMU_STATS 169#ifdef CONFIG_AMD_IOMMU_STATS
74 170
75/* 171/*
@@ -90,7 +186,6 @@ DECLARE_STATS_COUNTER(alloced_io_mem);
90DECLARE_STATS_COUNTER(total_map_requests); 186DECLARE_STATS_COUNTER(total_map_requests);
91 187
92static struct dentry *stats_dir; 188static struct dentry *stats_dir;
93static struct dentry *de_isolate;
94static struct dentry *de_fflush; 189static struct dentry *de_fflush;
95 190
96static void amd_iommu_stats_add(struct __iommu_counter *cnt) 191static void amd_iommu_stats_add(struct __iommu_counter *cnt)
@@ -108,9 +203,6 @@ static void amd_iommu_stats_init(void)
108 if (stats_dir == NULL) 203 if (stats_dir == NULL)
109 return; 204 return;
110 205
111 de_isolate = debugfs_create_bool("isolation", 0444, stats_dir,
112 (u32 *)&amd_iommu_isolate);
113
114 de_fflush = debugfs_create_bool("fullflush", 0444, stats_dir, 206 de_fflush = debugfs_create_bool("fullflush", 0444, stats_dir,
115 (u32 *)&amd_iommu_unmap_flush); 207 (u32 *)&amd_iommu_unmap_flush);
116 208
@@ -130,12 +222,6 @@ static void amd_iommu_stats_init(void)
130 222
131#endif 223#endif
132 224
133/* returns !0 if the IOMMU is caching non-present entries in its TLB */
134static int iommu_has_npcache(struct amd_iommu *iommu)
135{
136 return iommu->cap & (1UL << IOMMU_CAP_NPCACHE);
137}
138
139/**************************************************************************** 225/****************************************************************************
140 * 226 *
141 * Interrupt handling functions 227 * Interrupt handling functions
@@ -199,6 +285,7 @@ static void iommu_print_event(struct amd_iommu *iommu, void *__evt)
199 break; 285 break;
200 case EVENT_TYPE_ILL_CMD: 286 case EVENT_TYPE_ILL_CMD:
201 printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address); 287 printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address);
288 iommu->reset_in_progress = true;
202 reset_iommu_command_buffer(iommu); 289 reset_iommu_command_buffer(iommu);
203 dump_command(address); 290 dump_command(address);
204 break; 291 break;
@@ -321,11 +408,8 @@ static void __iommu_wait_for_completion(struct amd_iommu *iommu)
321 status &= ~MMIO_STATUS_COM_WAIT_INT_MASK; 408 status &= ~MMIO_STATUS_COM_WAIT_INT_MASK;
322 writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET); 409 writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET);
323 410
324 if (unlikely(i == EXIT_LOOP_COUNT)) { 411 if (unlikely(i == EXIT_LOOP_COUNT))
325 spin_unlock(&iommu->lock); 412 iommu->reset_in_progress = true;
326 reset_iommu_command_buffer(iommu);
327 spin_lock(&iommu->lock);
328 }
329} 413}
330 414
331/* 415/*
@@ -372,26 +456,46 @@ static int iommu_completion_wait(struct amd_iommu *iommu)
372out: 456out:
373 spin_unlock_irqrestore(&iommu->lock, flags); 457 spin_unlock_irqrestore(&iommu->lock, flags);
374 458
459 if (iommu->reset_in_progress)
460 reset_iommu_command_buffer(iommu);
461
375 return 0; 462 return 0;
376} 463}
377 464
465static void iommu_flush_complete(struct protection_domain *domain)
466{
467 int i;
468
469 for (i = 0; i < amd_iommus_present; ++i) {
470 if (!domain->dev_iommu[i])
471 continue;
472
473 /*
474 * Devices of this domain are behind this IOMMU
475 * We need to wait for completion of all commands.
476 */
477 iommu_completion_wait(amd_iommus[i]);
478 }
479}
480
378/* 481/*
379 * Command send function for invalidating a device table entry 482 * Command send function for invalidating a device table entry
380 */ 483 */
381static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid) 484static int iommu_flush_device(struct device *dev)
382{ 485{
486 struct amd_iommu *iommu;
383 struct iommu_cmd cmd; 487 struct iommu_cmd cmd;
384 int ret; 488 u16 devid;
385 489
386 BUG_ON(iommu == NULL); 490 devid = get_device_id(dev);
491 iommu = amd_iommu_rlookup_table[devid];
387 492
493 /* Build command */
388 memset(&cmd, 0, sizeof(cmd)); 494 memset(&cmd, 0, sizeof(cmd));
389 CMD_SET_TYPE(&cmd, CMD_INV_DEV_ENTRY); 495 CMD_SET_TYPE(&cmd, CMD_INV_DEV_ENTRY);
390 cmd.data[0] = devid; 496 cmd.data[0] = devid;
391 497
392 ret = iommu_queue_command(iommu, &cmd); 498 return iommu_queue_command(iommu, &cmd);
393
394 return ret;
395} 499}
396 500
397static void __iommu_build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address, 501static void __iommu_build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address,
@@ -430,11 +534,11 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu,
430 * It invalidates a single PTE if the range to flush is within a single 534 * It invalidates a single PTE if the range to flush is within a single
431 * page. Otherwise it flushes the whole TLB of the IOMMU. 535 * page. Otherwise it flushes the whole TLB of the IOMMU.
432 */ 536 */
433static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid, 537static void __iommu_flush_pages(struct protection_domain *domain,
434 u64 address, size_t size) 538 u64 address, size_t size, int pde)
435{ 539{
436 int s = 0; 540 int s = 0, i;
437 unsigned pages = iommu_num_pages(address, size, PAGE_SIZE); 541 unsigned long pages = iommu_num_pages(address, size, PAGE_SIZE);
438 542
439 address &= PAGE_MASK; 543 address &= PAGE_MASK;
440 544
@@ -447,142 +551,212 @@ static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid,
447 s = 1; 551 s = 1;
448 } 552 }
449 553
450 iommu_queue_inv_iommu_pages(iommu, address, domid, 0, s);
451 554
452 return 0; 555 for (i = 0; i < amd_iommus_present; ++i) {
556 if (!domain->dev_iommu[i])
557 continue;
558
559 /*
560 * Devices of this domain are behind this IOMMU
561 * We need a TLB flush
562 */
563 iommu_queue_inv_iommu_pages(amd_iommus[i], address,
564 domain->id, pde, s);
565 }
566
567 return;
453} 568}
454 569
455/* Flush the whole IO/TLB for a given protection domain */ 570static void iommu_flush_pages(struct protection_domain *domain,
456static void iommu_flush_tlb(struct amd_iommu *iommu, u16 domid) 571 u64 address, size_t size)
457{ 572{
458 u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; 573 __iommu_flush_pages(domain, address, size, 0);
459 574}
460 INC_STATS_COUNTER(domain_flush_single);
461 575
462 iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 1); 576/* Flush the whole IO/TLB for a given protection domain */
577static void iommu_flush_tlb(struct protection_domain *domain)
578{
579 __iommu_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 0);
463} 580}
464 581
465/* Flush the whole IO/TLB for a given protection domain - including PDE */ 582/* Flush the whole IO/TLB for a given protection domain - including PDE */
466static void iommu_flush_tlb_pde(struct amd_iommu *iommu, u16 domid) 583static void iommu_flush_tlb_pde(struct protection_domain *domain)
467{ 584{
468 u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; 585 __iommu_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1);
469
470 INC_STATS_COUNTER(domain_flush_single);
471
472 iommu_queue_inv_iommu_pages(iommu, address, domid, 1, 1);
473} 586}
474 587
588
475/* 589/*
476 * This function flushes one domain on one IOMMU 590 * This function flushes the DTEs for all devices in domain
477 */ 591 */
478static void flush_domain_on_iommu(struct amd_iommu *iommu, u16 domid) 592static void iommu_flush_domain_devices(struct protection_domain *domain)
479{ 593{
480 struct iommu_cmd cmd; 594 struct iommu_dev_data *dev_data;
481 unsigned long flags; 595 unsigned long flags;
482 596
483 __iommu_build_inv_iommu_pages(&cmd, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 597 spin_lock_irqsave(&domain->lock, flags);
484 domid, 1, 1);
485 598
486 spin_lock_irqsave(&iommu->lock, flags); 599 list_for_each_entry(dev_data, &domain->dev_list, list)
487 __iommu_queue_command(iommu, &cmd); 600 iommu_flush_device(dev_data->dev);
488 __iommu_completion_wait(iommu); 601
489 __iommu_wait_for_completion(iommu); 602 spin_unlock_irqrestore(&domain->lock, flags);
490 spin_unlock_irqrestore(&iommu->lock, flags);
491} 603}
492 604
493static void flush_all_domains_on_iommu(struct amd_iommu *iommu) 605static void iommu_flush_all_domain_devices(void)
494{ 606{
495 int i; 607 struct protection_domain *domain;
608 unsigned long flags;
496 609
497 for (i = 1; i < MAX_DOMAIN_ID; ++i) { 610 spin_lock_irqsave(&amd_iommu_pd_lock, flags);
498 if (!test_bit(i, amd_iommu_pd_alloc_bitmap)) 611
499 continue; 612 list_for_each_entry(domain, &amd_iommu_pd_list, list) {
500 flush_domain_on_iommu(iommu, i); 613 iommu_flush_domain_devices(domain);
614 iommu_flush_complete(domain);
501 } 615 }
502 616
617 spin_unlock_irqrestore(&amd_iommu_pd_lock, flags);
618}
619
620void amd_iommu_flush_all_devices(void)
621{
622 iommu_flush_all_domain_devices();
503} 623}
504 624
505/* 625/*
506 * This function is used to flush the IO/TLB for a given protection domain 626 * This function uses heavy locking and may disable irqs for some time. But
507 * on every IOMMU in the system 627 * this is no issue because it is only called during resume.
508 */ 628 */
509static void iommu_flush_domain(u16 domid) 629void amd_iommu_flush_all_domains(void)
510{ 630{
511 struct amd_iommu *iommu; 631 struct protection_domain *domain;
632 unsigned long flags;
512 633
513 INC_STATS_COUNTER(domain_flush_all); 634 spin_lock_irqsave(&amd_iommu_pd_lock, flags);
514 635
515 for_each_iommu(iommu) 636 list_for_each_entry(domain, &amd_iommu_pd_list, list) {
516 flush_domain_on_iommu(iommu, domid); 637 spin_lock(&domain->lock);
638 iommu_flush_tlb_pde(domain);
639 iommu_flush_complete(domain);
640 spin_unlock(&domain->lock);
641 }
642
643 spin_unlock_irqrestore(&amd_iommu_pd_lock, flags);
517} 644}
518 645
519void amd_iommu_flush_all_domains(void) 646static void reset_iommu_command_buffer(struct amd_iommu *iommu)
520{ 647{
521 struct amd_iommu *iommu; 648 pr_err("AMD-Vi: Resetting IOMMU command buffer\n");
522 649
523 for_each_iommu(iommu) 650 if (iommu->reset_in_progress)
524 flush_all_domains_on_iommu(iommu); 651 panic("AMD-Vi: ILLEGAL_COMMAND_ERROR while resetting command buffer\n");
652
653 amd_iommu_reset_cmd_buffer(iommu);
654 amd_iommu_flush_all_devices();
655 amd_iommu_flush_all_domains();
656
657 iommu->reset_in_progress = false;
525} 658}
526 659
527static void flush_all_devices_for_iommu(struct amd_iommu *iommu) 660/****************************************************************************
661 *
662 * The functions below are used the create the page table mappings for
663 * unity mapped regions.
664 *
665 ****************************************************************************/
666
667/*
668 * This function is used to add another level to an IO page table. Adding
669 * another level increases the size of the address space by 9 bits to a size up
670 * to 64 bits.
671 */
672static bool increase_address_space(struct protection_domain *domain,
673 gfp_t gfp)
528{ 674{
529 int i; 675 u64 *pte;
530 676
531 for (i = 0; i <= amd_iommu_last_bdf; ++i) { 677 if (domain->mode == PAGE_MODE_6_LEVEL)
532 if (iommu != amd_iommu_rlookup_table[i]) 678 /* address space already 64 bit large */
533 continue; 679 return false;
534 680
535 iommu_queue_inv_dev_entry(iommu, i); 681 pte = (void *)get_zeroed_page(gfp);
536 iommu_completion_wait(iommu); 682 if (!pte)
537 } 683 return false;
684
685 *pte = PM_LEVEL_PDE(domain->mode,
686 virt_to_phys(domain->pt_root));
687 domain->pt_root = pte;
688 domain->mode += 1;
689 domain->updated = true;
690
691 return true;
538} 692}
539 693
540static void flush_devices_by_domain(struct protection_domain *domain) 694static u64 *alloc_pte(struct protection_domain *domain,
695 unsigned long address,
696 int end_lvl,
697 u64 **pte_page,
698 gfp_t gfp)
541{ 699{
542 struct amd_iommu *iommu; 700 u64 *pte, *page;
543 int i; 701 int level;
544 702
545 for (i = 0; i <= amd_iommu_last_bdf; ++i) { 703 while (address > PM_LEVEL_SIZE(domain->mode))
546 if ((domain == NULL && amd_iommu_pd_table[i] == NULL) || 704 increase_address_space(domain, gfp);
547 (amd_iommu_pd_table[i] != domain))
548 continue;
549 705
550 iommu = amd_iommu_rlookup_table[i]; 706 level = domain->mode - 1;
551 if (!iommu) 707 pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)];
552 continue;
553 708
554 iommu_queue_inv_dev_entry(iommu, i); 709 while (level > end_lvl) {
555 iommu_completion_wait(iommu); 710 if (!IOMMU_PTE_PRESENT(*pte)) {
711 page = (u64 *)get_zeroed_page(gfp);
712 if (!page)
713 return NULL;
714 *pte = PM_LEVEL_PDE(level, virt_to_phys(page));
715 }
716
717 level -= 1;
718
719 pte = IOMMU_PTE_PAGE(*pte);
720
721 if (pte_page && level == end_lvl)
722 *pte_page = pte;
723
724 pte = &pte[PM_LEVEL_INDEX(level, address)];
556 } 725 }
726
727 return pte;
557} 728}
558 729
559static void reset_iommu_command_buffer(struct amd_iommu *iommu) 730/*
731 * This function checks if there is a PTE for a given dma address. If
732 * there is one, it returns the pointer to it.
733 */
734static u64 *fetch_pte(struct protection_domain *domain,
735 unsigned long address, int map_size)
560{ 736{
561 pr_err("AMD-Vi: Resetting IOMMU command buffer\n"); 737 int level;
738 u64 *pte;
562 739
563 if (iommu->reset_in_progress) 740 level = domain->mode - 1;
564 panic("AMD-Vi: ILLEGAL_COMMAND_ERROR while resetting command buffer\n"); 741 pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)];
565 742
566 iommu->reset_in_progress = true; 743 while (level > map_size) {
744 if (!IOMMU_PTE_PRESENT(*pte))
745 return NULL;
567 746
568 amd_iommu_reset_cmd_buffer(iommu); 747 level -= 1;
569 flush_all_devices_for_iommu(iommu);
570 flush_all_domains_on_iommu(iommu);
571 748
572 iommu->reset_in_progress = false; 749 pte = IOMMU_PTE_PAGE(*pte);
573} 750 pte = &pte[PM_LEVEL_INDEX(level, address)];
574 751
575void amd_iommu_flush_all_devices(void) 752 if ((PM_PTE_LEVEL(*pte) == 0) && level != map_size) {
576{ 753 pte = NULL;
577 flush_devices_by_domain(NULL); 754 break;
578} 755 }
756 }
579 757
580/**************************************************************************** 758 return pte;
581 * 759}
582 * The functions below are used the create the page table mappings for
583 * unity mapped regions.
584 *
585 ****************************************************************************/
586 760
587/* 761/*
588 * Generic mapping functions. It maps a physical address into a DMA 762 * Generic mapping functions. It maps a physical address into a DMA
@@ -654,28 +828,6 @@ static int iommu_for_unity_map(struct amd_iommu *iommu,
654} 828}
655 829
656/* 830/*
657 * Init the unity mappings for a specific IOMMU in the system
658 *
659 * Basically iterates over all unity mapping entries and applies them to
660 * the default domain DMA of that IOMMU if necessary.
661 */
662static int iommu_init_unity_mappings(struct amd_iommu *iommu)
663{
664 struct unity_map_entry *entry;
665 int ret;
666
667 list_for_each_entry(entry, &amd_iommu_unity_map, list) {
668 if (!iommu_for_unity_map(iommu, entry))
669 continue;
670 ret = dma_ops_unity_map(iommu->default_dom, entry);
671 if (ret)
672 return ret;
673 }
674
675 return 0;
676}
677
678/*
679 * This function actually applies the mapping to the page table of the 831 * This function actually applies the mapping to the page table of the
680 * dma_ops domain. 832 * dma_ops domain.
681 */ 833 */
@@ -704,6 +856,28 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
704} 856}
705 857
706/* 858/*
859 * Init the unity mappings for a specific IOMMU in the system
860 *
861 * Basically iterates over all unity mapping entries and applies them to
862 * the default domain DMA of that IOMMU if necessary.
863 */
864static int iommu_init_unity_mappings(struct amd_iommu *iommu)
865{
866 struct unity_map_entry *entry;
867 int ret;
868
869 list_for_each_entry(entry, &amd_iommu_unity_map, list) {
870 if (!iommu_for_unity_map(iommu, entry))
871 continue;
872 ret = dma_ops_unity_map(iommu->default_dom, entry);
873 if (ret)
874 return ret;
875 }
876
877 return 0;
878}
879
880/*
707 * Inits the unity mappings required for a specific device 881 * Inits the unity mappings required for a specific device
708 */ 882 */
709static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom, 883static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom,
@@ -740,34 +914,23 @@ static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom,
740 */ 914 */
741 915
742/* 916/*
743 * This function checks if there is a PTE for a given dma address. If 917 * Used to reserve address ranges in the aperture (e.g. for exclusion
744 * there is one, it returns the pointer to it. 918 * ranges.
745 */ 919 */
746static u64 *fetch_pte(struct protection_domain *domain, 920static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
747 unsigned long address, int map_size) 921 unsigned long start_page,
922 unsigned int pages)
748{ 923{
749 int level; 924 unsigned int i, last_page = dom->aperture_size >> PAGE_SHIFT;
750 u64 *pte;
751
752 level = domain->mode - 1;
753 pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)];
754
755 while (level > map_size) {
756 if (!IOMMU_PTE_PRESENT(*pte))
757 return NULL;
758
759 level -= 1;
760 925
761 pte = IOMMU_PTE_PAGE(*pte); 926 if (start_page + pages > last_page)
762 pte = &pte[PM_LEVEL_INDEX(level, address)]; 927 pages = last_page - start_page;
763 928
764 if ((PM_PTE_LEVEL(*pte) == 0) && level != map_size) { 929 for (i = start_page; i < start_page + pages; ++i) {
765 pte = NULL; 930 int index = i / APERTURE_RANGE_PAGES;
766 break; 931 int page = i % APERTURE_RANGE_PAGES;
767 } 932 __set_bit(page, dom->aperture[index]->bitmap);
768 } 933 }
769
770 return pte;
771} 934}
772 935
773/* 936/*
@@ -775,11 +938,11 @@ static u64 *fetch_pte(struct protection_domain *domain,
775 * aperture in case of dma_ops domain allocation or address allocation 938 * aperture in case of dma_ops domain allocation or address allocation
776 * failure. 939 * failure.
777 */ 940 */
778static int alloc_new_range(struct amd_iommu *iommu, 941static int alloc_new_range(struct dma_ops_domain *dma_dom,
779 struct dma_ops_domain *dma_dom,
780 bool populate, gfp_t gfp) 942 bool populate, gfp_t gfp)
781{ 943{
782 int index = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT; 944 int index = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT;
945 struct amd_iommu *iommu;
783 int i; 946 int i;
784 947
785#ifdef CONFIG_IOMMU_STRESS 948#ifdef CONFIG_IOMMU_STRESS
@@ -819,14 +982,17 @@ static int alloc_new_range(struct amd_iommu *iommu,
819 dma_dom->aperture_size += APERTURE_RANGE_SIZE; 982 dma_dom->aperture_size += APERTURE_RANGE_SIZE;
820 983
821 /* Intialize the exclusion range if necessary */ 984 /* Intialize the exclusion range if necessary */
822 if (iommu->exclusion_start && 985 for_each_iommu(iommu) {
823 iommu->exclusion_start >= dma_dom->aperture[index]->offset && 986 if (iommu->exclusion_start &&
824 iommu->exclusion_start < dma_dom->aperture_size) { 987 iommu->exclusion_start >= dma_dom->aperture[index]->offset
825 unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT; 988 && iommu->exclusion_start < dma_dom->aperture_size) {
826 int pages = iommu_num_pages(iommu->exclusion_start, 989 unsigned long startpage;
827 iommu->exclusion_length, 990 int pages = iommu_num_pages(iommu->exclusion_start,
828 PAGE_SIZE); 991 iommu->exclusion_length,
829 dma_ops_reserve_addresses(dma_dom, startpage, pages); 992 PAGE_SIZE);
993 startpage = iommu->exclusion_start >> PAGE_SHIFT;
994 dma_ops_reserve_addresses(dma_dom, startpage, pages);
995 }
830 } 996 }
831 997
832 /* 998 /*
@@ -928,7 +1094,7 @@ static unsigned long dma_ops_alloc_addresses(struct device *dev,
928 } 1094 }
929 1095
930 if (unlikely(address == -1)) 1096 if (unlikely(address == -1))
931 address = bad_dma_address; 1097 address = DMA_ERROR_CODE;
932 1098
933 WARN_ON((address + (PAGE_SIZE*pages)) > dom->aperture_size); 1099 WARN_ON((address + (PAGE_SIZE*pages)) > dom->aperture_size);
934 1100
@@ -973,6 +1139,31 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom,
973 * 1139 *
974 ****************************************************************************/ 1140 ****************************************************************************/
975 1141
1142/*
1143 * This function adds a protection domain to the global protection domain list
1144 */
1145static void add_domain_to_list(struct protection_domain *domain)
1146{
1147 unsigned long flags;
1148
1149 spin_lock_irqsave(&amd_iommu_pd_lock, flags);
1150 list_add(&domain->list, &amd_iommu_pd_list);
1151 spin_unlock_irqrestore(&amd_iommu_pd_lock, flags);
1152}
1153
1154/*
1155 * This function removes a protection domain to the global
1156 * protection domain list
1157 */
1158static void del_domain_from_list(struct protection_domain *domain)
1159{
1160 unsigned long flags;
1161
1162 spin_lock_irqsave(&amd_iommu_pd_lock, flags);
1163 list_del(&domain->list);
1164 spin_unlock_irqrestore(&amd_iommu_pd_lock, flags);
1165}
1166
976static u16 domain_id_alloc(void) 1167static u16 domain_id_alloc(void)
977{ 1168{
978 unsigned long flags; 1169 unsigned long flags;
@@ -1000,26 +1191,6 @@ static void domain_id_free(int id)
1000 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); 1191 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
1001} 1192}
1002 1193
1003/*
1004 * Used to reserve address ranges in the aperture (e.g. for exclusion
1005 * ranges.
1006 */
1007static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
1008 unsigned long start_page,
1009 unsigned int pages)
1010{
1011 unsigned int i, last_page = dom->aperture_size >> PAGE_SHIFT;
1012
1013 if (start_page + pages > last_page)
1014 pages = last_page - start_page;
1015
1016 for (i = start_page; i < start_page + pages; ++i) {
1017 int index = i / APERTURE_RANGE_PAGES;
1018 int page = i % APERTURE_RANGE_PAGES;
1019 __set_bit(page, dom->aperture[index]->bitmap);
1020 }
1021}
1022
1023static void free_pagetable(struct protection_domain *domain) 1194static void free_pagetable(struct protection_domain *domain)
1024{ 1195{
1025 int i, j; 1196 int i, j;
@@ -1061,6 +1232,8 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom)
1061 if (!dom) 1232 if (!dom)
1062 return; 1233 return;
1063 1234
1235 del_domain_from_list(&dom->domain);
1236
1064 free_pagetable(&dom->domain); 1237 free_pagetable(&dom->domain);
1065 1238
1066 for (i = 0; i < APERTURE_MAX_RANGES; ++i) { 1239 for (i = 0; i < APERTURE_MAX_RANGES; ++i) {
@@ -1078,7 +1251,7 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom)
1078 * It also intializes the page table and the address allocator data 1251 * It also intializes the page table and the address allocator data
1079 * structures required for the dma_ops interface 1252 * structures required for the dma_ops interface
1080 */ 1253 */
1081static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu) 1254static struct dma_ops_domain *dma_ops_domain_alloc(void)
1082{ 1255{
1083 struct dma_ops_domain *dma_dom; 1256 struct dma_ops_domain *dma_dom;
1084 1257
@@ -1091,6 +1264,7 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu)
1091 dma_dom->domain.id = domain_id_alloc(); 1264 dma_dom->domain.id = domain_id_alloc();
1092 if (dma_dom->domain.id == 0) 1265 if (dma_dom->domain.id == 0)
1093 goto free_dma_dom; 1266 goto free_dma_dom;
1267 INIT_LIST_HEAD(&dma_dom->domain.dev_list);
1094 dma_dom->domain.mode = PAGE_MODE_2_LEVEL; 1268 dma_dom->domain.mode = PAGE_MODE_2_LEVEL;
1095 dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL); 1269 dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL);
1096 dma_dom->domain.flags = PD_DMA_OPS_MASK; 1270 dma_dom->domain.flags = PD_DMA_OPS_MASK;
@@ -1101,7 +1275,9 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu)
1101 dma_dom->need_flush = false; 1275 dma_dom->need_flush = false;
1102 dma_dom->target_dev = 0xffff; 1276 dma_dom->target_dev = 0xffff;
1103 1277
1104 if (alloc_new_range(iommu, dma_dom, true, GFP_KERNEL)) 1278 add_domain_to_list(&dma_dom->domain);
1279
1280 if (alloc_new_range(dma_dom, true, GFP_KERNEL))
1105 goto free_dma_dom; 1281 goto free_dma_dom;
1106 1282
1107 /* 1283 /*
@@ -1129,22 +1305,6 @@ static bool dma_ops_domain(struct protection_domain *domain)
1129 return domain->flags & PD_DMA_OPS_MASK; 1305 return domain->flags & PD_DMA_OPS_MASK;
1130} 1306}
1131 1307
1132/*
1133 * Find out the protection domain structure for a given PCI device. This
1134 * will give us the pointer to the page table root for example.
1135 */
1136static struct protection_domain *domain_for_device(u16 devid)
1137{
1138 struct protection_domain *dom;
1139 unsigned long flags;
1140
1141 read_lock_irqsave(&amd_iommu_devtable_lock, flags);
1142 dom = amd_iommu_pd_table[devid];
1143 read_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
1144
1145 return dom;
1146}
1147
1148static void set_dte_entry(u16 devid, struct protection_domain *domain) 1308static void set_dte_entry(u16 devid, struct protection_domain *domain)
1149{ 1309{
1150 u64 pte_root = virt_to_phys(domain->pt_root); 1310 u64 pte_root = virt_to_phys(domain->pt_root);
@@ -1156,42 +1316,123 @@ static void set_dte_entry(u16 devid, struct protection_domain *domain)
1156 amd_iommu_dev_table[devid].data[2] = domain->id; 1316 amd_iommu_dev_table[devid].data[2] = domain->id;
1157 amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root); 1317 amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root);
1158 amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root); 1318 amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root);
1319}
1320
1321static void clear_dte_entry(u16 devid)
1322{
1323 /* remove entry from the device table seen by the hardware */
1324 amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV;
1325 amd_iommu_dev_table[devid].data[1] = 0;
1326 amd_iommu_dev_table[devid].data[2] = 0;
1159 1327
1160 amd_iommu_pd_table[devid] = domain; 1328 amd_iommu_apply_erratum_63(devid);
1329}
1330
1331static void do_attach(struct device *dev, struct protection_domain *domain)
1332{
1333 struct iommu_dev_data *dev_data;
1334 struct amd_iommu *iommu;
1335 u16 devid;
1336
1337 devid = get_device_id(dev);
1338 iommu = amd_iommu_rlookup_table[devid];
1339 dev_data = get_dev_data(dev);
1340
1341 /* Update data structures */
1342 dev_data->domain = domain;
1343 list_add(&dev_data->list, &domain->dev_list);
1344 set_dte_entry(devid, domain);
1345
1346 /* Do reference counting */
1347 domain->dev_iommu[iommu->index] += 1;
1348 domain->dev_cnt += 1;
1349
1350 /* Flush the DTE entry */
1351 iommu_flush_device(dev);
1352}
1353
1354static void do_detach(struct device *dev)
1355{
1356 struct iommu_dev_data *dev_data;
1357 struct amd_iommu *iommu;
1358 u16 devid;
1359
1360 devid = get_device_id(dev);
1361 iommu = amd_iommu_rlookup_table[devid];
1362 dev_data = get_dev_data(dev);
1363
1364 /* decrease reference counters */
1365 dev_data->domain->dev_iommu[iommu->index] -= 1;
1366 dev_data->domain->dev_cnt -= 1;
1367
1368 /* Update data structures */
1369 dev_data->domain = NULL;
1370 list_del(&dev_data->list);
1371 clear_dte_entry(devid);
1372
1373 /* Flush the DTE entry */
1374 iommu_flush_device(dev);
1161} 1375}
1162 1376
1163/* 1377/*
1164 * If a device is not yet associated with a domain, this function does 1378 * If a device is not yet associated with a domain, this function does
1165 * assigns it visible for the hardware 1379 * assigns it visible for the hardware
1166 */ 1380 */
1167static void __attach_device(struct amd_iommu *iommu, 1381static int __attach_device(struct device *dev,
1168 struct protection_domain *domain, 1382 struct protection_domain *domain)
1169 u16 devid)
1170{ 1383{
1384 struct iommu_dev_data *dev_data, *alias_data;
1385
1386 dev_data = get_dev_data(dev);
1387 alias_data = get_dev_data(dev_data->alias);
1388
1389 if (!alias_data)
1390 return -EINVAL;
1391
1171 /* lock domain */ 1392 /* lock domain */
1172 spin_lock(&domain->lock); 1393 spin_lock(&domain->lock);
1173 1394
1174 /* update DTE entry */ 1395 /* Some sanity checks */
1175 set_dte_entry(devid, domain); 1396 if (alias_data->domain != NULL &&
1397 alias_data->domain != domain)
1398 return -EBUSY;
1176 1399
1177 domain->dev_cnt += 1; 1400 if (dev_data->domain != NULL &&
1401 dev_data->domain != domain)
1402 return -EBUSY;
1403
1404 /* Do real assignment */
1405 if (dev_data->alias != dev) {
1406 alias_data = get_dev_data(dev_data->alias);
1407 if (alias_data->domain == NULL)
1408 do_attach(dev_data->alias, domain);
1409
1410 atomic_inc(&alias_data->bind);
1411 }
1412
1413 if (dev_data->domain == NULL)
1414 do_attach(dev, domain);
1415
1416 atomic_inc(&dev_data->bind);
1178 1417
1179 /* ready */ 1418 /* ready */
1180 spin_unlock(&domain->lock); 1419 spin_unlock(&domain->lock);
1420
1421 return 0;
1181} 1422}
1182 1423
1183/* 1424/*
1184 * If a device is not yet associated with a domain, this function does 1425 * If a device is not yet associated with a domain, this function does
1185 * assigns it visible for the hardware 1426 * assigns it visible for the hardware
1186 */ 1427 */
1187static void attach_device(struct amd_iommu *iommu, 1428static int attach_device(struct device *dev,
1188 struct protection_domain *domain, 1429 struct protection_domain *domain)
1189 u16 devid)
1190{ 1430{
1191 unsigned long flags; 1431 unsigned long flags;
1432 int ret;
1192 1433
1193 write_lock_irqsave(&amd_iommu_devtable_lock, flags); 1434 write_lock_irqsave(&amd_iommu_devtable_lock, flags);
1194 __attach_device(iommu, domain, devid); 1435 ret = __attach_device(dev, domain);
1195 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); 1436 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
1196 1437
1197 /* 1438 /*
@@ -1199,98 +1440,125 @@ static void attach_device(struct amd_iommu *iommu,
1199 * left the caches in the IOMMU dirty. So we have to flush 1440 * left the caches in the IOMMU dirty. So we have to flush
1200 * here to evict all dirty stuff. 1441 * here to evict all dirty stuff.
1201 */ 1442 */
1202 iommu_queue_inv_dev_entry(iommu, devid); 1443 iommu_flush_tlb_pde(domain);
1203 iommu_flush_tlb_pde(iommu, domain->id); 1444
1445 return ret;
1204} 1446}
1205 1447
1206/* 1448/*
1207 * Removes a device from a protection domain (unlocked) 1449 * Removes a device from a protection domain (unlocked)
1208 */ 1450 */
1209static void __detach_device(struct protection_domain *domain, u16 devid) 1451static void __detach_device(struct device *dev)
1210{ 1452{
1453 struct iommu_dev_data *dev_data = get_dev_data(dev);
1454 struct iommu_dev_data *alias_data;
1455 unsigned long flags;
1211 1456
1212 /* lock domain */ 1457 BUG_ON(!dev_data->domain);
1213 spin_lock(&domain->lock);
1214
1215 /* remove domain from the lookup table */
1216 amd_iommu_pd_table[devid] = NULL;
1217 1458
1218 /* remove entry from the device table seen by the hardware */ 1459 spin_lock_irqsave(&dev_data->domain->lock, flags);
1219 amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV;
1220 amd_iommu_dev_table[devid].data[1] = 0;
1221 amd_iommu_dev_table[devid].data[2] = 0;
1222 1460
1223 amd_iommu_apply_erratum_63(devid); 1461 if (dev_data->alias != dev) {
1462 alias_data = get_dev_data(dev_data->alias);
1463 if (atomic_dec_and_test(&alias_data->bind))
1464 do_detach(dev_data->alias);
1465 }
1224 1466
1225 /* decrease reference counter */ 1467 if (atomic_dec_and_test(&dev_data->bind))
1226 domain->dev_cnt -= 1; 1468 do_detach(dev);
1227 1469
1228 /* ready */ 1470 spin_unlock_irqrestore(&dev_data->domain->lock, flags);
1229 spin_unlock(&domain->lock);
1230 1471
1231 /* 1472 /*
1232 * If we run in passthrough mode the device must be assigned to the 1473 * If we run in passthrough mode the device must be assigned to the
1233 * passthrough domain if it is detached from any other domain 1474 * passthrough domain if it is detached from any other domain
1234 */ 1475 */
1235 if (iommu_pass_through) { 1476 if (iommu_pass_through && dev_data->domain == NULL)
1236 struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; 1477 __attach_device(dev, pt_domain);
1237 __attach_device(iommu, pt_domain, devid);
1238 }
1239} 1478}
1240 1479
1241/* 1480/*
1242 * Removes a device from a protection domain (with devtable_lock held) 1481 * Removes a device from a protection domain (with devtable_lock held)
1243 */ 1482 */
1244static void detach_device(struct protection_domain *domain, u16 devid) 1483static void detach_device(struct device *dev)
1245{ 1484{
1246 unsigned long flags; 1485 unsigned long flags;
1247 1486
1248 /* lock device table */ 1487 /* lock device table */
1249 write_lock_irqsave(&amd_iommu_devtable_lock, flags); 1488 write_lock_irqsave(&amd_iommu_devtable_lock, flags);
1250 __detach_device(domain, devid); 1489 __detach_device(dev);
1251 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); 1490 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
1252} 1491}
1253 1492
1493/*
1494 * Find out the protection domain structure for a given PCI device. This
1495 * will give us the pointer to the page table root for example.
1496 */
1497static struct protection_domain *domain_for_device(struct device *dev)
1498{
1499 struct protection_domain *dom;
1500 struct iommu_dev_data *dev_data, *alias_data;
1501 unsigned long flags;
1502 u16 devid, alias;
1503
1504 devid = get_device_id(dev);
1505 alias = amd_iommu_alias_table[devid];
1506 dev_data = get_dev_data(dev);
1507 alias_data = get_dev_data(dev_data->alias);
1508 if (!alias_data)
1509 return NULL;
1510
1511 read_lock_irqsave(&amd_iommu_devtable_lock, flags);
1512 dom = dev_data->domain;
1513 if (dom == NULL &&
1514 alias_data->domain != NULL) {
1515 __attach_device(dev, alias_data->domain);
1516 dom = alias_data->domain;
1517 }
1518
1519 read_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
1520
1521 return dom;
1522}
1523
1254static int device_change_notifier(struct notifier_block *nb, 1524static int device_change_notifier(struct notifier_block *nb,
1255 unsigned long action, void *data) 1525 unsigned long action, void *data)
1256{ 1526{
1257 struct device *dev = data; 1527 struct device *dev = data;
1258 struct pci_dev *pdev = to_pci_dev(dev); 1528 u16 devid;
1259 u16 devid = calc_devid(pdev->bus->number, pdev->devfn);
1260 struct protection_domain *domain; 1529 struct protection_domain *domain;
1261 struct dma_ops_domain *dma_domain; 1530 struct dma_ops_domain *dma_domain;
1262 struct amd_iommu *iommu; 1531 struct amd_iommu *iommu;
1263 unsigned long flags; 1532 unsigned long flags;
1264 1533
1265 if (devid > amd_iommu_last_bdf) 1534 if (!check_device(dev))
1266 goto out; 1535 return 0;
1267
1268 devid = amd_iommu_alias_table[devid];
1269
1270 iommu = amd_iommu_rlookup_table[devid];
1271 if (iommu == NULL)
1272 goto out;
1273
1274 domain = domain_for_device(devid);
1275 1536
1276 if (domain && !dma_ops_domain(domain)) 1537 devid = get_device_id(dev);
1277 WARN_ONCE(1, "AMD IOMMU WARNING: device %s already bound " 1538 iommu = amd_iommu_rlookup_table[devid];
1278 "to a non-dma-ops domain\n", dev_name(dev));
1279 1539
1280 switch (action) { 1540 switch (action) {
1281 case BUS_NOTIFY_UNBOUND_DRIVER: 1541 case BUS_NOTIFY_UNBOUND_DRIVER:
1542
1543 domain = domain_for_device(dev);
1544
1282 if (!domain) 1545 if (!domain)
1283 goto out; 1546 goto out;
1284 if (iommu_pass_through) 1547 if (iommu_pass_through)
1285 break; 1548 break;
1286 detach_device(domain, devid); 1549 detach_device(dev);
1287 break; 1550 break;
1288 case BUS_NOTIFY_ADD_DEVICE: 1551 case BUS_NOTIFY_ADD_DEVICE:
1552
1553 iommu_init_device(dev);
1554
1555 domain = domain_for_device(dev);
1556
1289 /* allocate a protection domain if a device is added */ 1557 /* allocate a protection domain if a device is added */
1290 dma_domain = find_protection_domain(devid); 1558 dma_domain = find_protection_domain(devid);
1291 if (dma_domain) 1559 if (dma_domain)
1292 goto out; 1560 goto out;
1293 dma_domain = dma_ops_domain_alloc(iommu); 1561 dma_domain = dma_ops_domain_alloc();
1294 if (!dma_domain) 1562 if (!dma_domain)
1295 goto out; 1563 goto out;
1296 dma_domain->target_dev = devid; 1564 dma_domain->target_dev = devid;
@@ -1300,11 +1568,15 @@ static int device_change_notifier(struct notifier_block *nb,
1300 spin_unlock_irqrestore(&iommu_pd_list_lock, flags); 1568 spin_unlock_irqrestore(&iommu_pd_list_lock, flags);
1301 1569
1302 break; 1570 break;
1571 case BUS_NOTIFY_DEL_DEVICE:
1572
1573 iommu_uninit_device(dev);
1574
1303 default: 1575 default:
1304 goto out; 1576 goto out;
1305 } 1577 }
1306 1578
1307 iommu_queue_inv_dev_entry(iommu, devid); 1579 iommu_flush_device(dev);
1308 iommu_completion_wait(iommu); 1580 iommu_completion_wait(iommu);
1309 1581
1310out: 1582out:
@@ -1322,106 +1594,46 @@ static struct notifier_block device_nb = {
1322 *****************************************************************************/ 1594 *****************************************************************************/
1323 1595
1324/* 1596/*
1325 * This function checks if the driver got a valid device from the caller to
1326 * avoid dereferencing invalid pointers.
1327 */
1328static bool check_device(struct device *dev)
1329{
1330 if (!dev || !dev->dma_mask)
1331 return false;
1332
1333 return true;
1334}
1335
1336/*
1337 * In this function the list of preallocated protection domains is traversed to
1338 * find the domain for a specific device
1339 */
1340static struct dma_ops_domain *find_protection_domain(u16 devid)
1341{
1342 struct dma_ops_domain *entry, *ret = NULL;
1343 unsigned long flags;
1344
1345 if (list_empty(&iommu_pd_list))
1346 return NULL;
1347
1348 spin_lock_irqsave(&iommu_pd_list_lock, flags);
1349
1350 list_for_each_entry(entry, &iommu_pd_list, list) {
1351 if (entry->target_dev == devid) {
1352 ret = entry;
1353 break;
1354 }
1355 }
1356
1357 spin_unlock_irqrestore(&iommu_pd_list_lock, flags);
1358
1359 return ret;
1360}
1361
1362/*
1363 * In the dma_ops path we only have the struct device. This function 1597 * In the dma_ops path we only have the struct device. This function
1364 * finds the corresponding IOMMU, the protection domain and the 1598 * finds the corresponding IOMMU, the protection domain and the
1365 * requestor id for a given device. 1599 * requestor id for a given device.
1366 * If the device is not yet associated with a domain this is also done 1600 * If the device is not yet associated with a domain this is also done
1367 * in this function. 1601 * in this function.
1368 */ 1602 */
1369static int get_device_resources(struct device *dev, 1603static struct protection_domain *get_domain(struct device *dev)
1370 struct amd_iommu **iommu,
1371 struct protection_domain **domain,
1372 u16 *bdf)
1373{ 1604{
1605 struct protection_domain *domain;
1374 struct dma_ops_domain *dma_dom; 1606 struct dma_ops_domain *dma_dom;
1375 struct pci_dev *pcidev; 1607 u16 devid = get_device_id(dev);
1376 u16 _bdf;
1377
1378 *iommu = NULL;
1379 *domain = NULL;
1380 *bdf = 0xffff;
1381
1382 if (dev->bus != &pci_bus_type)
1383 return 0;
1384 1608
1385 pcidev = to_pci_dev(dev); 1609 if (!check_device(dev))
1386 _bdf = calc_devid(pcidev->bus->number, pcidev->devfn); 1610 return ERR_PTR(-EINVAL);
1387 1611
1388 /* device not translated by any IOMMU in the system? */ 1612 domain = domain_for_device(dev);
1389 if (_bdf > amd_iommu_last_bdf) 1613 if (domain != NULL && !dma_ops_domain(domain))
1390 return 0; 1614 return ERR_PTR(-EBUSY);
1391 1615
1392 *bdf = amd_iommu_alias_table[_bdf]; 1616 if (domain != NULL)
1617 return domain;
1393 1618
1394 *iommu = amd_iommu_rlookup_table[*bdf]; 1619 /* Device not bount yet - bind it */
1395 if (*iommu == NULL) 1620 dma_dom = find_protection_domain(devid);
1396 return 0; 1621 if (!dma_dom)
1397 *domain = domain_for_device(*bdf); 1622 dma_dom = amd_iommu_rlookup_table[devid]->default_dom;
1398 if (*domain == NULL) { 1623 attach_device(dev, &dma_dom->domain);
1399 dma_dom = find_protection_domain(*bdf); 1624 DUMP_printk("Using protection domain %d for device %s\n",
1400 if (!dma_dom) 1625 dma_dom->domain.id, dev_name(dev));
1401 dma_dom = (*iommu)->default_dom;
1402 *domain = &dma_dom->domain;
1403 attach_device(*iommu, *domain, *bdf);
1404 DUMP_printk("Using protection domain %d for device %s\n",
1405 (*domain)->id, dev_name(dev));
1406 }
1407
1408 if (domain_for_device(_bdf) == NULL)
1409 attach_device(*iommu, *domain, _bdf);
1410 1626
1411 return 1; 1627 return &dma_dom->domain;
1412} 1628}
1413 1629
1414static void update_device_table(struct protection_domain *domain) 1630static void update_device_table(struct protection_domain *domain)
1415{ 1631{
1416 unsigned long flags; 1632 struct iommu_dev_data *dev_data;
1417 int i;
1418 1633
1419 for (i = 0; i <= amd_iommu_last_bdf; ++i) { 1634 list_for_each_entry(dev_data, &domain->dev_list, list) {
1420 if (amd_iommu_pd_table[i] != domain) 1635 u16 devid = get_device_id(dev_data->dev);
1421 continue; 1636 set_dte_entry(devid, domain);
1422 write_lock_irqsave(&amd_iommu_devtable_lock, flags);
1423 set_dte_entry(i, domain);
1424 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
1425 } 1637 }
1426} 1638}
1427 1639
@@ -1431,76 +1643,13 @@ static void update_domain(struct protection_domain *domain)
1431 return; 1643 return;
1432 1644
1433 update_device_table(domain); 1645 update_device_table(domain);
1434 flush_devices_by_domain(domain); 1646 iommu_flush_domain_devices(domain);
1435 iommu_flush_domain(domain->id); 1647 iommu_flush_tlb_pde(domain);
1436 1648
1437 domain->updated = false; 1649 domain->updated = false;
1438} 1650}
1439 1651
1440/* 1652/*
1441 * This function is used to add another level to an IO page table. Adding
1442 * another level increases the size of the address space by 9 bits to a size up
1443 * to 64 bits.
1444 */
1445static bool increase_address_space(struct protection_domain *domain,
1446 gfp_t gfp)
1447{
1448 u64 *pte;
1449
1450 if (domain->mode == PAGE_MODE_6_LEVEL)
1451 /* address space already 64 bit large */
1452 return false;
1453
1454 pte = (void *)get_zeroed_page(gfp);
1455 if (!pte)
1456 return false;
1457
1458 *pte = PM_LEVEL_PDE(domain->mode,
1459 virt_to_phys(domain->pt_root));
1460 domain->pt_root = pte;
1461 domain->mode += 1;
1462 domain->updated = true;
1463
1464 return true;
1465}
1466
1467static u64 *alloc_pte(struct protection_domain *domain,
1468 unsigned long address,
1469 int end_lvl,
1470 u64 **pte_page,
1471 gfp_t gfp)
1472{
1473 u64 *pte, *page;
1474 int level;
1475
1476 while (address > PM_LEVEL_SIZE(domain->mode))
1477 increase_address_space(domain, gfp);
1478
1479 level = domain->mode - 1;
1480 pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)];
1481
1482 while (level > end_lvl) {
1483 if (!IOMMU_PTE_PRESENT(*pte)) {
1484 page = (u64 *)get_zeroed_page(gfp);
1485 if (!page)
1486 return NULL;
1487 *pte = PM_LEVEL_PDE(level, virt_to_phys(page));
1488 }
1489
1490 level -= 1;
1491
1492 pte = IOMMU_PTE_PAGE(*pte);
1493
1494 if (pte_page && level == end_lvl)
1495 *pte_page = pte;
1496
1497 pte = &pte[PM_LEVEL_INDEX(level, address)];
1498 }
1499
1500 return pte;
1501}
1502
1503/*
1504 * This function fetches the PTE for a given address in the aperture 1653 * This function fetches the PTE for a given address in the aperture
1505 */ 1654 */
1506static u64* dma_ops_get_pte(struct dma_ops_domain *dom, 1655static u64* dma_ops_get_pte(struct dma_ops_domain *dom,
@@ -1530,8 +1679,7 @@ static u64* dma_ops_get_pte(struct dma_ops_domain *dom,
1530 * This is the generic map function. It maps one 4kb page at paddr to 1679 * This is the generic map function. It maps one 4kb page at paddr to
1531 * the given address in the DMA address space for the domain. 1680 * the given address in the DMA address space for the domain.
1532 */ 1681 */
1533static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu, 1682static dma_addr_t dma_ops_domain_map(struct dma_ops_domain *dom,
1534 struct dma_ops_domain *dom,
1535 unsigned long address, 1683 unsigned long address,
1536 phys_addr_t paddr, 1684 phys_addr_t paddr,
1537 int direction) 1685 int direction)
@@ -1544,7 +1692,7 @@ static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu,
1544 1692
1545 pte = dma_ops_get_pte(dom, address); 1693 pte = dma_ops_get_pte(dom, address);
1546 if (!pte) 1694 if (!pte)
1547 return bad_dma_address; 1695 return DMA_ERROR_CODE;
1548 1696
1549 __pte = paddr | IOMMU_PTE_P | IOMMU_PTE_FC; 1697 __pte = paddr | IOMMU_PTE_P | IOMMU_PTE_FC;
1550 1698
@@ -1565,8 +1713,7 @@ static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu,
1565/* 1713/*
1566 * The generic unmapping function for on page in the DMA address space. 1714 * The generic unmapping function for on page in the DMA address space.
1567 */ 1715 */
1568static void dma_ops_domain_unmap(struct amd_iommu *iommu, 1716static void dma_ops_domain_unmap(struct dma_ops_domain *dom,
1569 struct dma_ops_domain *dom,
1570 unsigned long address) 1717 unsigned long address)
1571{ 1718{
1572 struct aperture_range *aperture; 1719 struct aperture_range *aperture;
@@ -1597,7 +1744,6 @@ static void dma_ops_domain_unmap(struct amd_iommu *iommu,
1597 * Must be called with the domain lock held. 1744 * Must be called with the domain lock held.
1598 */ 1745 */
1599static dma_addr_t __map_single(struct device *dev, 1746static dma_addr_t __map_single(struct device *dev,
1600 struct amd_iommu *iommu,
1601 struct dma_ops_domain *dma_dom, 1747 struct dma_ops_domain *dma_dom,
1602 phys_addr_t paddr, 1748 phys_addr_t paddr,
1603 size_t size, 1749 size_t size,
@@ -1625,7 +1771,7 @@ static dma_addr_t __map_single(struct device *dev,
1625retry: 1771retry:
1626 address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask, 1772 address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask,
1627 dma_mask); 1773 dma_mask);
1628 if (unlikely(address == bad_dma_address)) { 1774 if (unlikely(address == DMA_ERROR_CODE)) {
1629 /* 1775 /*
1630 * setting next_address here will let the address 1776 * setting next_address here will let the address
1631 * allocator only scan the new allocated range in the 1777 * allocator only scan the new allocated range in the
@@ -1633,7 +1779,7 @@ retry:
1633 */ 1779 */
1634 dma_dom->next_address = dma_dom->aperture_size; 1780 dma_dom->next_address = dma_dom->aperture_size;
1635 1781
1636 if (alloc_new_range(iommu, dma_dom, false, GFP_ATOMIC)) 1782 if (alloc_new_range(dma_dom, false, GFP_ATOMIC))
1637 goto out; 1783 goto out;
1638 1784
1639 /* 1785 /*
@@ -1645,8 +1791,8 @@ retry:
1645 1791
1646 start = address; 1792 start = address;
1647 for (i = 0; i < pages; ++i) { 1793 for (i = 0; i < pages; ++i) {
1648 ret = dma_ops_domain_map(iommu, dma_dom, start, paddr, dir); 1794 ret = dma_ops_domain_map(dma_dom, start, paddr, dir);
1649 if (ret == bad_dma_address) 1795 if (ret == DMA_ERROR_CODE)
1650 goto out_unmap; 1796 goto out_unmap;
1651 1797
1652 paddr += PAGE_SIZE; 1798 paddr += PAGE_SIZE;
@@ -1657,10 +1803,10 @@ retry:
1657 ADD_STATS_COUNTER(alloced_io_mem, size); 1803 ADD_STATS_COUNTER(alloced_io_mem, size);
1658 1804
1659 if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) { 1805 if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) {
1660 iommu_flush_tlb(iommu, dma_dom->domain.id); 1806 iommu_flush_tlb(&dma_dom->domain);
1661 dma_dom->need_flush = false; 1807 dma_dom->need_flush = false;
1662 } else if (unlikely(iommu_has_npcache(iommu))) 1808 } else if (unlikely(amd_iommu_np_cache))
1663 iommu_flush_pages(iommu, dma_dom->domain.id, address, size); 1809 iommu_flush_pages(&dma_dom->domain, address, size);
1664 1810
1665out: 1811out:
1666 return address; 1812 return address;
@@ -1669,20 +1815,19 @@ out_unmap:
1669 1815
1670 for (--i; i >= 0; --i) { 1816 for (--i; i >= 0; --i) {
1671 start -= PAGE_SIZE; 1817 start -= PAGE_SIZE;
1672 dma_ops_domain_unmap(iommu, dma_dom, start); 1818 dma_ops_domain_unmap(dma_dom, start);
1673 } 1819 }
1674 1820
1675 dma_ops_free_addresses(dma_dom, address, pages); 1821 dma_ops_free_addresses(dma_dom, address, pages);
1676 1822
1677 return bad_dma_address; 1823 return DMA_ERROR_CODE;
1678} 1824}
1679 1825
1680/* 1826/*
1681 * Does the reverse of the __map_single function. Must be called with 1827 * Does the reverse of the __map_single function. Must be called with
1682 * the domain lock held too 1828 * the domain lock held too
1683 */ 1829 */
1684static void __unmap_single(struct amd_iommu *iommu, 1830static void __unmap_single(struct dma_ops_domain *dma_dom,
1685 struct dma_ops_domain *dma_dom,
1686 dma_addr_t dma_addr, 1831 dma_addr_t dma_addr,
1687 size_t size, 1832 size_t size,
1688 int dir) 1833 int dir)
@@ -1690,7 +1835,7 @@ static void __unmap_single(struct amd_iommu *iommu,
1690 dma_addr_t i, start; 1835 dma_addr_t i, start;
1691 unsigned int pages; 1836 unsigned int pages;
1692 1837
1693 if ((dma_addr == bad_dma_address) || 1838 if ((dma_addr == DMA_ERROR_CODE) ||
1694 (dma_addr + size > dma_dom->aperture_size)) 1839 (dma_addr + size > dma_dom->aperture_size))
1695 return; 1840 return;
1696 1841
@@ -1699,7 +1844,7 @@ static void __unmap_single(struct amd_iommu *iommu,
1699 start = dma_addr; 1844 start = dma_addr;
1700 1845
1701 for (i = 0; i < pages; ++i) { 1846 for (i = 0; i < pages; ++i) {
1702 dma_ops_domain_unmap(iommu, dma_dom, start); 1847 dma_ops_domain_unmap(dma_dom, start);
1703 start += PAGE_SIZE; 1848 start += PAGE_SIZE;
1704 } 1849 }
1705 1850
@@ -1708,7 +1853,7 @@ static void __unmap_single(struct amd_iommu *iommu,
1708 dma_ops_free_addresses(dma_dom, dma_addr, pages); 1853 dma_ops_free_addresses(dma_dom, dma_addr, pages);
1709 1854
1710 if (amd_iommu_unmap_flush || dma_dom->need_flush) { 1855 if (amd_iommu_unmap_flush || dma_dom->need_flush) {
1711 iommu_flush_pages(iommu, dma_dom->domain.id, dma_addr, size); 1856 iommu_flush_pages(&dma_dom->domain, dma_addr, size);
1712 dma_dom->need_flush = false; 1857 dma_dom->need_flush = false;
1713 } 1858 }
1714} 1859}
@@ -1722,36 +1867,29 @@ static dma_addr_t map_page(struct device *dev, struct page *page,
1722 struct dma_attrs *attrs) 1867 struct dma_attrs *attrs)
1723{ 1868{
1724 unsigned long flags; 1869 unsigned long flags;
1725 struct amd_iommu *iommu;
1726 struct protection_domain *domain; 1870 struct protection_domain *domain;
1727 u16 devid;
1728 dma_addr_t addr; 1871 dma_addr_t addr;
1729 u64 dma_mask; 1872 u64 dma_mask;
1730 phys_addr_t paddr = page_to_phys(page) + offset; 1873 phys_addr_t paddr = page_to_phys(page) + offset;
1731 1874
1732 INC_STATS_COUNTER(cnt_map_single); 1875 INC_STATS_COUNTER(cnt_map_single);
1733 1876
1734 if (!check_device(dev)) 1877 domain = get_domain(dev);
1735 return bad_dma_address; 1878 if (PTR_ERR(domain) == -EINVAL)
1736
1737 dma_mask = *dev->dma_mask;
1738
1739 get_device_resources(dev, &iommu, &domain, &devid);
1740
1741 if (iommu == NULL || domain == NULL)
1742 /* device not handled by any AMD IOMMU */
1743 return (dma_addr_t)paddr; 1879 return (dma_addr_t)paddr;
1880 else if (IS_ERR(domain))
1881 return DMA_ERROR_CODE;
1744 1882
1745 if (!dma_ops_domain(domain)) 1883 dma_mask = *dev->dma_mask;
1746 return bad_dma_address;
1747 1884
1748 spin_lock_irqsave(&domain->lock, flags); 1885 spin_lock_irqsave(&domain->lock, flags);
1749 addr = __map_single(dev, iommu, domain->priv, paddr, size, dir, false, 1886
1887 addr = __map_single(dev, domain->priv, paddr, size, dir, false,
1750 dma_mask); 1888 dma_mask);
1751 if (addr == bad_dma_address) 1889 if (addr == DMA_ERROR_CODE)
1752 goto out; 1890 goto out;
1753 1891
1754 iommu_completion_wait(iommu); 1892 iommu_flush_complete(domain);
1755 1893
1756out: 1894out:
1757 spin_unlock_irqrestore(&domain->lock, flags); 1895 spin_unlock_irqrestore(&domain->lock, flags);
@@ -1766,25 +1904,19 @@ static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size,
1766 enum dma_data_direction dir, struct dma_attrs *attrs) 1904 enum dma_data_direction dir, struct dma_attrs *attrs)
1767{ 1905{
1768 unsigned long flags; 1906 unsigned long flags;
1769 struct amd_iommu *iommu;
1770 struct protection_domain *domain; 1907 struct protection_domain *domain;
1771 u16 devid;
1772 1908
1773 INC_STATS_COUNTER(cnt_unmap_single); 1909 INC_STATS_COUNTER(cnt_unmap_single);
1774 1910
1775 if (!check_device(dev) || 1911 domain = get_domain(dev);
1776 !get_device_resources(dev, &iommu, &domain, &devid)) 1912 if (IS_ERR(domain))
1777 /* device not handled by any AMD IOMMU */
1778 return;
1779
1780 if (!dma_ops_domain(domain))
1781 return; 1913 return;
1782 1914
1783 spin_lock_irqsave(&domain->lock, flags); 1915 spin_lock_irqsave(&domain->lock, flags);
1784 1916
1785 __unmap_single(iommu, domain->priv, dma_addr, size, dir); 1917 __unmap_single(domain->priv, dma_addr, size, dir);
1786 1918
1787 iommu_completion_wait(iommu); 1919 iommu_flush_complete(domain);
1788 1920
1789 spin_unlock_irqrestore(&domain->lock, flags); 1921 spin_unlock_irqrestore(&domain->lock, flags);
1790} 1922}
@@ -1816,9 +1948,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,
1816 struct dma_attrs *attrs) 1948 struct dma_attrs *attrs)
1817{ 1949{
1818 unsigned long flags; 1950 unsigned long flags;
1819 struct amd_iommu *iommu;
1820 struct protection_domain *domain; 1951 struct protection_domain *domain;
1821 u16 devid;
1822 int i; 1952 int i;
1823 struct scatterlist *s; 1953 struct scatterlist *s;
1824 phys_addr_t paddr; 1954 phys_addr_t paddr;
@@ -1827,25 +1957,20 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,
1827 1957
1828 INC_STATS_COUNTER(cnt_map_sg); 1958 INC_STATS_COUNTER(cnt_map_sg);
1829 1959
1830 if (!check_device(dev)) 1960 domain = get_domain(dev);
1961 if (PTR_ERR(domain) == -EINVAL)
1962 return map_sg_no_iommu(dev, sglist, nelems, dir);
1963 else if (IS_ERR(domain))
1831 return 0; 1964 return 0;
1832 1965
1833 dma_mask = *dev->dma_mask; 1966 dma_mask = *dev->dma_mask;
1834 1967
1835 get_device_resources(dev, &iommu, &domain, &devid);
1836
1837 if (!iommu || !domain)
1838 return map_sg_no_iommu(dev, sglist, nelems, dir);
1839
1840 if (!dma_ops_domain(domain))
1841 return 0;
1842
1843 spin_lock_irqsave(&domain->lock, flags); 1968 spin_lock_irqsave(&domain->lock, flags);
1844 1969
1845 for_each_sg(sglist, s, nelems, i) { 1970 for_each_sg(sglist, s, nelems, i) {
1846 paddr = sg_phys(s); 1971 paddr = sg_phys(s);
1847 1972
1848 s->dma_address = __map_single(dev, iommu, domain->priv, 1973 s->dma_address = __map_single(dev, domain->priv,
1849 paddr, s->length, dir, false, 1974 paddr, s->length, dir, false,
1850 dma_mask); 1975 dma_mask);
1851 1976
@@ -1856,7 +1981,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,
1856 goto unmap; 1981 goto unmap;
1857 } 1982 }
1858 1983
1859 iommu_completion_wait(iommu); 1984 iommu_flush_complete(domain);
1860 1985
1861out: 1986out:
1862 spin_unlock_irqrestore(&domain->lock, flags); 1987 spin_unlock_irqrestore(&domain->lock, flags);
@@ -1865,7 +1990,7 @@ out:
1865unmap: 1990unmap:
1866 for_each_sg(sglist, s, mapped_elems, i) { 1991 for_each_sg(sglist, s, mapped_elems, i) {
1867 if (s->dma_address) 1992 if (s->dma_address)
1868 __unmap_single(iommu, domain->priv, s->dma_address, 1993 __unmap_single(domain->priv, s->dma_address,
1869 s->dma_length, dir); 1994 s->dma_length, dir);
1870 s->dma_address = s->dma_length = 0; 1995 s->dma_address = s->dma_length = 0;
1871 } 1996 }
@@ -1884,30 +2009,25 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist,
1884 struct dma_attrs *attrs) 2009 struct dma_attrs *attrs)
1885{ 2010{
1886 unsigned long flags; 2011 unsigned long flags;
1887 struct amd_iommu *iommu;
1888 struct protection_domain *domain; 2012 struct protection_domain *domain;
1889 struct scatterlist *s; 2013 struct scatterlist *s;
1890 u16 devid;
1891 int i; 2014 int i;
1892 2015
1893 INC_STATS_COUNTER(cnt_unmap_sg); 2016 INC_STATS_COUNTER(cnt_unmap_sg);
1894 2017
1895 if (!check_device(dev) || 2018 domain = get_domain(dev);
1896 !get_device_resources(dev, &iommu, &domain, &devid)) 2019 if (IS_ERR(domain))
1897 return;
1898
1899 if (!dma_ops_domain(domain))
1900 return; 2020 return;
1901 2021
1902 spin_lock_irqsave(&domain->lock, flags); 2022 spin_lock_irqsave(&domain->lock, flags);
1903 2023
1904 for_each_sg(sglist, s, nelems, i) { 2024 for_each_sg(sglist, s, nelems, i) {
1905 __unmap_single(iommu, domain->priv, s->dma_address, 2025 __unmap_single(domain->priv, s->dma_address,
1906 s->dma_length, dir); 2026 s->dma_length, dir);
1907 s->dma_address = s->dma_length = 0; 2027 s->dma_address = s->dma_length = 0;
1908 } 2028 }
1909 2029
1910 iommu_completion_wait(iommu); 2030 iommu_flush_complete(domain);
1911 2031
1912 spin_unlock_irqrestore(&domain->lock, flags); 2032 spin_unlock_irqrestore(&domain->lock, flags);
1913} 2033}
@@ -1920,49 +2040,44 @@ static void *alloc_coherent(struct device *dev, size_t size,
1920{ 2040{
1921 unsigned long flags; 2041 unsigned long flags;
1922 void *virt_addr; 2042 void *virt_addr;
1923 struct amd_iommu *iommu;
1924 struct protection_domain *domain; 2043 struct protection_domain *domain;
1925 u16 devid;
1926 phys_addr_t paddr; 2044 phys_addr_t paddr;
1927 u64 dma_mask = dev->coherent_dma_mask; 2045 u64 dma_mask = dev->coherent_dma_mask;
1928 2046
1929 INC_STATS_COUNTER(cnt_alloc_coherent); 2047 INC_STATS_COUNTER(cnt_alloc_coherent);
1930 2048
1931 if (!check_device(dev)) 2049 domain = get_domain(dev);
2050 if (PTR_ERR(domain) == -EINVAL) {
2051 virt_addr = (void *)__get_free_pages(flag, get_order(size));
2052 *dma_addr = __pa(virt_addr);
2053 return virt_addr;
2054 } else if (IS_ERR(domain))
1932 return NULL; 2055 return NULL;
1933 2056
1934 if (!get_device_resources(dev, &iommu, &domain, &devid)) 2057 dma_mask = dev->coherent_dma_mask;
1935 flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); 2058 flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
2059 flag |= __GFP_ZERO;
1936 2060
1937 flag |= __GFP_ZERO;
1938 virt_addr = (void *)__get_free_pages(flag, get_order(size)); 2061 virt_addr = (void *)__get_free_pages(flag, get_order(size));
1939 if (!virt_addr) 2062 if (!virt_addr)
1940 return NULL; 2063 return NULL;
1941 2064
1942 paddr = virt_to_phys(virt_addr); 2065 paddr = virt_to_phys(virt_addr);
1943 2066
1944 if (!iommu || !domain) {
1945 *dma_addr = (dma_addr_t)paddr;
1946 return virt_addr;
1947 }
1948
1949 if (!dma_ops_domain(domain))
1950 goto out_free;
1951
1952 if (!dma_mask) 2067 if (!dma_mask)
1953 dma_mask = *dev->dma_mask; 2068 dma_mask = *dev->dma_mask;
1954 2069
1955 spin_lock_irqsave(&domain->lock, flags); 2070 spin_lock_irqsave(&domain->lock, flags);
1956 2071
1957 *dma_addr = __map_single(dev, iommu, domain->priv, paddr, 2072 *dma_addr = __map_single(dev, domain->priv, paddr,
1958 size, DMA_BIDIRECTIONAL, true, dma_mask); 2073 size, DMA_BIDIRECTIONAL, true, dma_mask);
1959 2074
1960 if (*dma_addr == bad_dma_address) { 2075 if (*dma_addr == DMA_ERROR_CODE) {
1961 spin_unlock_irqrestore(&domain->lock, flags); 2076 spin_unlock_irqrestore(&domain->lock, flags);
1962 goto out_free; 2077 goto out_free;
1963 } 2078 }
1964 2079
1965 iommu_completion_wait(iommu); 2080 iommu_flush_complete(domain);
1966 2081
1967 spin_unlock_irqrestore(&domain->lock, flags); 2082 spin_unlock_irqrestore(&domain->lock, flags);
1968 2083
@@ -1982,28 +2097,19 @@ static void free_coherent(struct device *dev, size_t size,
1982 void *virt_addr, dma_addr_t dma_addr) 2097 void *virt_addr, dma_addr_t dma_addr)
1983{ 2098{
1984 unsigned long flags; 2099 unsigned long flags;
1985 struct amd_iommu *iommu;
1986 struct protection_domain *domain; 2100 struct protection_domain *domain;
1987 u16 devid;
1988 2101
1989 INC_STATS_COUNTER(cnt_free_coherent); 2102 INC_STATS_COUNTER(cnt_free_coherent);
1990 2103
1991 if (!check_device(dev)) 2104 domain = get_domain(dev);
1992 return; 2105 if (IS_ERR(domain))
1993
1994 get_device_resources(dev, &iommu, &domain, &devid);
1995
1996 if (!iommu || !domain)
1997 goto free_mem;
1998
1999 if (!dma_ops_domain(domain))
2000 goto free_mem; 2106 goto free_mem;
2001 2107
2002 spin_lock_irqsave(&domain->lock, flags); 2108 spin_lock_irqsave(&domain->lock, flags);
2003 2109
2004 __unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL); 2110 __unmap_single(domain->priv, dma_addr, size, DMA_BIDIRECTIONAL);
2005 2111
2006 iommu_completion_wait(iommu); 2112 iommu_flush_complete(domain);
2007 2113
2008 spin_unlock_irqrestore(&domain->lock, flags); 2114 spin_unlock_irqrestore(&domain->lock, flags);
2009 2115
@@ -2017,22 +2123,7 @@ free_mem:
2017 */ 2123 */
2018static int amd_iommu_dma_supported(struct device *dev, u64 mask) 2124static int amd_iommu_dma_supported(struct device *dev, u64 mask)
2019{ 2125{
2020 u16 bdf; 2126 return check_device(dev);
2021 struct pci_dev *pcidev;
2022
2023 /* No device or no PCI device */
2024 if (!dev || dev->bus != &pci_bus_type)
2025 return 0;
2026
2027 pcidev = to_pci_dev(dev);
2028
2029 bdf = calc_devid(pcidev->bus->number, pcidev->devfn);
2030
2031 /* Out of our scope? */
2032 if (bdf > amd_iommu_last_bdf)
2033 return 0;
2034
2035 return 1;
2036} 2127}
2037 2128
2038/* 2129/*
@@ -2046,25 +2137,30 @@ static void prealloc_protection_domains(void)
2046{ 2137{
2047 struct pci_dev *dev = NULL; 2138 struct pci_dev *dev = NULL;
2048 struct dma_ops_domain *dma_dom; 2139 struct dma_ops_domain *dma_dom;
2049 struct amd_iommu *iommu;
2050 u16 devid; 2140 u16 devid;
2051 2141
2052 while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { 2142 while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
2053 devid = calc_devid(dev->bus->number, dev->devfn); 2143
2054 if (devid > amd_iommu_last_bdf) 2144 /* Do we handle this device? */
2055 continue; 2145 if (!check_device(&dev->dev))
2056 devid = amd_iommu_alias_table[devid];
2057 if (domain_for_device(devid))
2058 continue; 2146 continue;
2059 iommu = amd_iommu_rlookup_table[devid]; 2147
2060 if (!iommu) 2148 iommu_init_device(&dev->dev);
2149
2150 /* Is there already any domain for it? */
2151 if (domain_for_device(&dev->dev))
2061 continue; 2152 continue;
2062 dma_dom = dma_ops_domain_alloc(iommu); 2153
2154 devid = get_device_id(&dev->dev);
2155
2156 dma_dom = dma_ops_domain_alloc();
2063 if (!dma_dom) 2157 if (!dma_dom)
2064 continue; 2158 continue;
2065 init_unity_mappings_for_device(dma_dom, devid); 2159 init_unity_mappings_for_device(dma_dom, devid);
2066 dma_dom->target_dev = devid; 2160 dma_dom->target_dev = devid;
2067 2161
2162 attach_device(&dev->dev, &dma_dom->domain);
2163
2068 list_add_tail(&dma_dom->list, &iommu_pd_list); 2164 list_add_tail(&dma_dom->list, &iommu_pd_list);
2069 } 2165 }
2070} 2166}
@@ -2093,7 +2189,7 @@ int __init amd_iommu_init_dma_ops(void)
2093 * protection domain will be assigned to the default one. 2189 * protection domain will be assigned to the default one.
2094 */ 2190 */
2095 for_each_iommu(iommu) { 2191 for_each_iommu(iommu) {
2096 iommu->default_dom = dma_ops_domain_alloc(iommu); 2192 iommu->default_dom = dma_ops_domain_alloc();
2097 if (iommu->default_dom == NULL) 2193 if (iommu->default_dom == NULL)
2098 return -ENOMEM; 2194 return -ENOMEM;
2099 iommu->default_dom->domain.flags |= PD_DEFAULT_MASK; 2195 iommu->default_dom->domain.flags |= PD_DEFAULT_MASK;
@@ -2103,15 +2199,12 @@ int __init amd_iommu_init_dma_ops(void)
2103 } 2199 }
2104 2200
2105 /* 2201 /*
2106 * If device isolation is enabled, pre-allocate the protection 2202 * Pre-allocate the protection domains for each device.
2107 * domains for each device.
2108 */ 2203 */
2109 if (amd_iommu_isolate) 2204 prealloc_protection_domains();
2110 prealloc_protection_domains();
2111 2205
2112 iommu_detected = 1; 2206 iommu_detected = 1;
2113 force_iommu = 1; 2207 swiotlb = 0;
2114 bad_dma_address = 0;
2115#ifdef CONFIG_GART_IOMMU 2208#ifdef CONFIG_GART_IOMMU
2116 gart_iommu_aperture_disabled = 1; 2209 gart_iommu_aperture_disabled = 1;
2117 gart_iommu_aperture = 0; 2210 gart_iommu_aperture = 0;
@@ -2150,14 +2243,17 @@ free_domains:
2150 2243
2151static void cleanup_domain(struct protection_domain *domain) 2244static void cleanup_domain(struct protection_domain *domain)
2152{ 2245{
2246 struct iommu_dev_data *dev_data, *next;
2153 unsigned long flags; 2247 unsigned long flags;
2154 u16 devid;
2155 2248
2156 write_lock_irqsave(&amd_iommu_devtable_lock, flags); 2249 write_lock_irqsave(&amd_iommu_devtable_lock, flags);
2157 2250
2158 for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) 2251 list_for_each_entry_safe(dev_data, next, &domain->dev_list, list) {
2159 if (amd_iommu_pd_table[devid] == domain) 2252 struct device *dev = dev_data->dev;
2160 __detach_device(domain, devid); 2253
2254 do_detach(dev);
2255 atomic_set(&dev_data->bind, 0);
2256 }
2161 2257
2162 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); 2258 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
2163} 2259}
@@ -2167,6 +2263,8 @@ static void protection_domain_free(struct protection_domain *domain)
2167 if (!domain) 2263 if (!domain)
2168 return; 2264 return;
2169 2265
2266 del_domain_from_list(domain);
2267
2170 if (domain->id) 2268 if (domain->id)
2171 domain_id_free(domain->id); 2269 domain_id_free(domain->id);
2172 2270
@@ -2185,6 +2283,9 @@ static struct protection_domain *protection_domain_alloc(void)
2185 domain->id = domain_id_alloc(); 2283 domain->id = domain_id_alloc();
2186 if (!domain->id) 2284 if (!domain->id)
2187 goto out_err; 2285 goto out_err;
2286 INIT_LIST_HEAD(&domain->dev_list);
2287
2288 add_domain_to_list(domain);
2188 2289
2189 return domain; 2290 return domain;
2190 2291
@@ -2241,26 +2342,23 @@ static void amd_iommu_domain_destroy(struct iommu_domain *dom)
2241static void amd_iommu_detach_device(struct iommu_domain *dom, 2342static void amd_iommu_detach_device(struct iommu_domain *dom,
2242 struct device *dev) 2343 struct device *dev)
2243{ 2344{
2244 struct protection_domain *domain = dom->priv; 2345 struct iommu_dev_data *dev_data = dev->archdata.iommu;
2245 struct amd_iommu *iommu; 2346 struct amd_iommu *iommu;
2246 struct pci_dev *pdev;
2247 u16 devid; 2347 u16 devid;
2248 2348
2249 if (dev->bus != &pci_bus_type) 2349 if (!check_device(dev))
2250 return; 2350 return;
2251 2351
2252 pdev = to_pci_dev(dev); 2352 devid = get_device_id(dev);
2253
2254 devid = calc_devid(pdev->bus->number, pdev->devfn);
2255 2353
2256 if (devid > 0) 2354 if (dev_data->domain != NULL)
2257 detach_device(domain, devid); 2355 detach_device(dev);
2258 2356
2259 iommu = amd_iommu_rlookup_table[devid]; 2357 iommu = amd_iommu_rlookup_table[devid];
2260 if (!iommu) 2358 if (!iommu)
2261 return; 2359 return;
2262 2360
2263 iommu_queue_inv_dev_entry(iommu, devid); 2361 iommu_flush_device(dev);
2264 iommu_completion_wait(iommu); 2362 iommu_completion_wait(iommu);
2265} 2363}
2266 2364
@@ -2268,35 +2366,30 @@ static int amd_iommu_attach_device(struct iommu_domain *dom,
2268 struct device *dev) 2366 struct device *dev)
2269{ 2367{
2270 struct protection_domain *domain = dom->priv; 2368 struct protection_domain *domain = dom->priv;
2271 struct protection_domain *old_domain; 2369 struct iommu_dev_data *dev_data;
2272 struct amd_iommu *iommu; 2370 struct amd_iommu *iommu;
2273 struct pci_dev *pdev; 2371 int ret;
2274 u16 devid; 2372 u16 devid;
2275 2373
2276 if (dev->bus != &pci_bus_type) 2374 if (!check_device(dev))
2277 return -EINVAL; 2375 return -EINVAL;
2278 2376
2279 pdev = to_pci_dev(dev); 2377 dev_data = dev->archdata.iommu;
2280 2378
2281 devid = calc_devid(pdev->bus->number, pdev->devfn); 2379 devid = get_device_id(dev);
2282
2283 if (devid >= amd_iommu_last_bdf ||
2284 devid != amd_iommu_alias_table[devid])
2285 return -EINVAL;
2286 2380
2287 iommu = amd_iommu_rlookup_table[devid]; 2381 iommu = amd_iommu_rlookup_table[devid];
2288 if (!iommu) 2382 if (!iommu)
2289 return -EINVAL; 2383 return -EINVAL;
2290 2384
2291 old_domain = domain_for_device(devid); 2385 if (dev_data->domain)
2292 if (old_domain) 2386 detach_device(dev);
2293 detach_device(old_domain, devid);
2294 2387
2295 attach_device(iommu, domain, devid); 2388 ret = attach_device(dev, domain);
2296 2389
2297 iommu_completion_wait(iommu); 2390 iommu_completion_wait(iommu);
2298 2391
2299 return 0; 2392 return ret;
2300} 2393}
2301 2394
2302static int amd_iommu_map_range(struct iommu_domain *dom, 2395static int amd_iommu_map_range(struct iommu_domain *dom,
@@ -2342,7 +2435,7 @@ static void amd_iommu_unmap_range(struct iommu_domain *dom,
2342 iova += PAGE_SIZE; 2435 iova += PAGE_SIZE;
2343 } 2436 }
2344 2437
2345 iommu_flush_domain(domain->id); 2438 iommu_flush_tlb_pde(domain);
2346} 2439}
2347 2440
2348static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, 2441static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom,
@@ -2393,8 +2486,9 @@ static struct iommu_ops amd_iommu_ops = {
2393 2486
2394int __init amd_iommu_init_passthrough(void) 2487int __init amd_iommu_init_passthrough(void)
2395{ 2488{
2489 struct amd_iommu *iommu;
2396 struct pci_dev *dev = NULL; 2490 struct pci_dev *dev = NULL;
2397 u16 devid, devid2; 2491 u16 devid;
2398 2492
2399 /* allocate passthroug domain */ 2493 /* allocate passthroug domain */
2400 pt_domain = protection_domain_alloc(); 2494 pt_domain = protection_domain_alloc();
@@ -2404,20 +2498,17 @@ int __init amd_iommu_init_passthrough(void)
2404 pt_domain->mode |= PAGE_MODE_NONE; 2498 pt_domain->mode |= PAGE_MODE_NONE;
2405 2499
2406 while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { 2500 while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
2407 struct amd_iommu *iommu;
2408 2501
2409 devid = calc_devid(dev->bus->number, dev->devfn); 2502 if (!check_device(&dev->dev))
2410 if (devid > amd_iommu_last_bdf)
2411 continue; 2503 continue;
2412 2504
2413 devid2 = amd_iommu_alias_table[devid]; 2505 devid = get_device_id(&dev->dev);
2414 2506
2415 iommu = amd_iommu_rlookup_table[devid2]; 2507 iommu = amd_iommu_rlookup_table[devid];
2416 if (!iommu) 2508 if (!iommu)
2417 continue; 2509 continue;
2418 2510
2419 __attach_device(iommu, pt_domain, devid); 2511 attach_device(&dev->dev, pt_domain);
2420 __attach_device(iommu, pt_domain, devid2);
2421 } 2512 }
2422 2513
2423 pr_info("AMD-Vi: Initialized for Passthrough Mode\n"); 2514 pr_info("AMD-Vi: Initialized for Passthrough Mode\n");
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index c20001e4f556..7ffc39965233 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2007-2008 Advanced Micro Devices, Inc. 2 * Copyright (C) 2007-2009 Advanced Micro Devices, Inc.
3 * Author: Joerg Roedel <joerg.roedel@amd.com> 3 * Author: Joerg Roedel <joerg.roedel@amd.com>
4 * Leo Duran <leo.duran@amd.com> 4 * Leo Duran <leo.duran@amd.com>
5 * 5 *
@@ -25,10 +25,12 @@
25#include <linux/interrupt.h> 25#include <linux/interrupt.h>
26#include <linux/msi.h> 26#include <linux/msi.h>
27#include <asm/pci-direct.h> 27#include <asm/pci-direct.h>
28#include <asm/amd_iommu_proto.h>
28#include <asm/amd_iommu_types.h> 29#include <asm/amd_iommu_types.h>
29#include <asm/amd_iommu.h> 30#include <asm/amd_iommu.h>
30#include <asm/iommu.h> 31#include <asm/iommu.h>
31#include <asm/gart.h> 32#include <asm/gart.h>
33#include <asm/x86_init.h>
32 34
33/* 35/*
34 * definitions for the ACPI scanning code 36 * definitions for the ACPI scanning code
@@ -123,18 +125,24 @@ u16 amd_iommu_last_bdf; /* largest PCI device id we have
123 to handle */ 125 to handle */
124LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings 126LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings
125 we find in ACPI */ 127 we find in ACPI */
126#ifdef CONFIG_IOMMU_STRESS
127bool amd_iommu_isolate = false;
128#else
129bool amd_iommu_isolate = true; /* if true, device isolation is
130 enabled */
131#endif
132
133bool amd_iommu_unmap_flush; /* if true, flush on every unmap */ 128bool amd_iommu_unmap_flush; /* if true, flush on every unmap */
134 129
135LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the 130LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the
136 system */ 131 system */
137 132
133/* Array to assign indices to IOMMUs*/
134struct amd_iommu *amd_iommus[MAX_IOMMUS];
135int amd_iommus_present;
136
137/* IOMMUs have a non-present cache? */
138bool amd_iommu_np_cache __read_mostly;
139
140/*
141 * List of protection domains - used during resume
142 */
143LIST_HEAD(amd_iommu_pd_list);
144spinlock_t amd_iommu_pd_lock;
145
138/* 146/*
139 * Pointer to the device table which is shared by all AMD IOMMUs 147 * Pointer to the device table which is shared by all AMD IOMMUs
140 * it is indexed by the PCI device id or the HT unit id and contains 148 * it is indexed by the PCI device id or the HT unit id and contains
@@ -157,12 +165,6 @@ u16 *amd_iommu_alias_table;
157struct amd_iommu **amd_iommu_rlookup_table; 165struct amd_iommu **amd_iommu_rlookup_table;
158 166
159/* 167/*
160 * The pd table (protection domain table) is used to find the protection domain
161 * data structure a device belongs to. Indexed with the PCI device id too.
162 */
163struct protection_domain **amd_iommu_pd_table;
164
165/*
166 * AMD IOMMU allows up to 2^16 differend protection domains. This is a bitmap 168 * AMD IOMMU allows up to 2^16 differend protection domains. This is a bitmap
167 * to know which ones are already in use. 169 * to know which ones are already in use.
168 */ 170 */
@@ -838,7 +840,18 @@ static void __init free_iommu_all(void)
838static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) 840static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
839{ 841{
840 spin_lock_init(&iommu->lock); 842 spin_lock_init(&iommu->lock);
843
844 /* Add IOMMU to internal data structures */
841 list_add_tail(&iommu->list, &amd_iommu_list); 845 list_add_tail(&iommu->list, &amd_iommu_list);
846 iommu->index = amd_iommus_present++;
847
848 if (unlikely(iommu->index >= MAX_IOMMUS)) {
849 WARN(1, "AMD-Vi: System has more IOMMUs than supported by this driver\n");
850 return -ENOSYS;
851 }
852
853 /* Index is fine - add IOMMU to the array */
854 amd_iommus[iommu->index] = iommu;
842 855
843 /* 856 /*
844 * Copy data from ACPI table entry to the iommu struct 857 * Copy data from ACPI table entry to the iommu struct
@@ -868,6 +881,9 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
868 init_iommu_from_acpi(iommu, h); 881 init_iommu_from_acpi(iommu, h);
869 init_iommu_devices(iommu); 882 init_iommu_devices(iommu);
870 883
884 if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE))
885 amd_iommu_np_cache = true;
886
871 return pci_enable_device(iommu->dev); 887 return pci_enable_device(iommu->dev);
872} 888}
873 889
@@ -925,7 +941,7 @@ static int __init init_iommu_all(struct acpi_table_header *table)
925 * 941 *
926 ****************************************************************************/ 942 ****************************************************************************/
927 943
928static int __init iommu_setup_msi(struct amd_iommu *iommu) 944static int iommu_setup_msi(struct amd_iommu *iommu)
929{ 945{
930 int r; 946 int r;
931 947
@@ -1176,19 +1192,10 @@ static struct sys_device device_amd_iommu = {
1176 * functions. Finally it prints some information about AMD IOMMUs and 1192 * functions. Finally it prints some information about AMD IOMMUs and
1177 * the driver state and enables the hardware. 1193 * the driver state and enables the hardware.
1178 */ 1194 */
1179int __init amd_iommu_init(void) 1195static int __init amd_iommu_init(void)
1180{ 1196{
1181 int i, ret = 0; 1197 int i, ret = 0;
1182 1198
1183
1184 if (no_iommu) {
1185 printk(KERN_INFO "AMD-Vi disabled by kernel command line\n");
1186 return 0;
1187 }
1188
1189 if (!amd_iommu_detected)
1190 return -ENODEV;
1191
1192 /* 1199 /*
1193 * First parse ACPI tables to find the largest Bus/Dev/Func 1200 * First parse ACPI tables to find the largest Bus/Dev/Func
1194 * we need to handle. Upon this information the shared data 1201 * we need to handle. Upon this information the shared data
@@ -1225,15 +1232,6 @@ int __init amd_iommu_init(void)
1225 if (amd_iommu_rlookup_table == NULL) 1232 if (amd_iommu_rlookup_table == NULL)
1226 goto free; 1233 goto free;
1227 1234
1228 /*
1229 * Protection Domain table - maps devices to protection domains
1230 * This table has the same size as the rlookup_table
1231 */
1232 amd_iommu_pd_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
1233 get_order(rlookup_table_size));
1234 if (amd_iommu_pd_table == NULL)
1235 goto free;
1236
1237 amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages( 1235 amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages(
1238 GFP_KERNEL | __GFP_ZERO, 1236 GFP_KERNEL | __GFP_ZERO,
1239 get_order(MAX_DOMAIN_ID/8)); 1237 get_order(MAX_DOMAIN_ID/8));
@@ -1255,6 +1253,8 @@ int __init amd_iommu_init(void)
1255 */ 1253 */
1256 amd_iommu_pd_alloc_bitmap[0] = 1; 1254 amd_iommu_pd_alloc_bitmap[0] = 1;
1257 1255
1256 spin_lock_init(&amd_iommu_pd_lock);
1257
1258 /* 1258 /*
1259 * now the data structures are allocated and basically initialized 1259 * now the data structures are allocated and basically initialized
1260 * start the real acpi table scan 1260 * start the real acpi table scan
@@ -1286,17 +1286,12 @@ int __init amd_iommu_init(void)
1286 if (iommu_pass_through) 1286 if (iommu_pass_through)
1287 goto out; 1287 goto out;
1288 1288
1289 printk(KERN_INFO "AMD-Vi: device isolation ");
1290 if (amd_iommu_isolate)
1291 printk("enabled\n");
1292 else
1293 printk("disabled\n");
1294
1295 if (amd_iommu_unmap_flush) 1289 if (amd_iommu_unmap_flush)
1296 printk(KERN_INFO "AMD-Vi: IO/TLB flush on unmap enabled\n"); 1290 printk(KERN_INFO "AMD-Vi: IO/TLB flush on unmap enabled\n");
1297 else 1291 else
1298 printk(KERN_INFO "AMD-Vi: Lazy IO/TLB flushing enabled\n"); 1292 printk(KERN_INFO "AMD-Vi: Lazy IO/TLB flushing enabled\n");
1299 1293
1294 x86_platform.iommu_shutdown = disable_iommus;
1300out: 1295out:
1301 return ret; 1296 return ret;
1302 1297
@@ -1304,9 +1299,6 @@ free:
1304 free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, 1299 free_pages((unsigned long)amd_iommu_pd_alloc_bitmap,
1305 get_order(MAX_DOMAIN_ID/8)); 1300 get_order(MAX_DOMAIN_ID/8));
1306 1301
1307 free_pages((unsigned long)amd_iommu_pd_table,
1308 get_order(rlookup_table_size));
1309
1310 free_pages((unsigned long)amd_iommu_rlookup_table, 1302 free_pages((unsigned long)amd_iommu_rlookup_table,
1311 get_order(rlookup_table_size)); 1303 get_order(rlookup_table_size));
1312 1304
@@ -1323,11 +1315,6 @@ free:
1323 goto out; 1315 goto out;
1324} 1316}
1325 1317
1326void amd_iommu_shutdown(void)
1327{
1328 disable_iommus();
1329}
1330
1331/**************************************************************************** 1318/****************************************************************************
1332 * 1319 *
1333 * Early detect code. This code runs at IOMMU detection time in the DMA 1320 * Early detect code. This code runs at IOMMU detection time in the DMA
@@ -1342,16 +1329,13 @@ static int __init early_amd_iommu_detect(struct acpi_table_header *table)
1342 1329
1343void __init amd_iommu_detect(void) 1330void __init amd_iommu_detect(void)
1344{ 1331{
1345 if (swiotlb || no_iommu || (iommu_detected && !gart_iommu_aperture)) 1332 if (no_iommu || (iommu_detected && !gart_iommu_aperture))
1346 return; 1333 return;
1347 1334
1348 if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) { 1335 if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) {
1349 iommu_detected = 1; 1336 iommu_detected = 1;
1350 amd_iommu_detected = 1; 1337 amd_iommu_detected = 1;
1351#ifdef CONFIG_GART_IOMMU 1338 x86_init.iommu.iommu_init = amd_iommu_init;
1352 gart_iommu_aperture_disabled = 1;
1353 gart_iommu_aperture = 0;
1354#endif
1355 } 1339 }
1356} 1340}
1357 1341
@@ -1372,10 +1356,6 @@ static int __init parse_amd_iommu_dump(char *str)
1372static int __init parse_amd_iommu_options(char *str) 1356static int __init parse_amd_iommu_options(char *str)
1373{ 1357{
1374 for (; *str; ++str) { 1358 for (; *str; ++str) {
1375 if (strncmp(str, "isolate", 7) == 0)
1376 amd_iommu_isolate = true;
1377 if (strncmp(str, "share", 5) == 0)
1378 amd_iommu_isolate = false;
1379 if (strncmp(str, "fullflush", 9) == 0) 1359 if (strncmp(str, "fullflush", 9) == 0)
1380 amd_iommu_unmap_flush = true; 1360 amd_iommu_unmap_flush = true;
1381 } 1361 }
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 128111d8ffe0..e0dfb6856aa2 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -28,6 +28,7 @@
28#include <asm/pci-direct.h> 28#include <asm/pci-direct.h>
29#include <asm/dma.h> 29#include <asm/dma.h>
30#include <asm/k8.h> 30#include <asm/k8.h>
31#include <asm/x86_init.h>
31 32
32int gart_iommu_aperture; 33int gart_iommu_aperture;
33int gart_iommu_aperture_disabled __initdata; 34int gart_iommu_aperture_disabled __initdata;
@@ -400,6 +401,7 @@ void __init gart_iommu_hole_init(void)
400 401
401 iommu_detected = 1; 402 iommu_detected = 1;
402 gart_iommu_aperture = 1; 403 gart_iommu_aperture = 1;
404 x86_init.iommu.iommu_init = gart_iommu_init;
403 405
404 aper_order = (read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL) >> 1) & 7; 406 aper_order = (read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL) >> 1) & 7;
405 aper_size = (32 * 1024 * 1024) << aper_order; 407 aper_size = (32 * 1024 * 1024) << aper_order;
@@ -456,7 +458,7 @@ out:
456 458
457 if (aper_alloc) { 459 if (aper_alloc) {
458 /* Got the aperture from the AGP bridge */ 460 /* Got the aperture from the AGP bridge */
459 } else if (swiotlb && !valid_agp) { 461 } else if (!valid_agp) {
460 /* Do nothing */ 462 /* Do nothing */
461 } else if ((!no_iommu && max_pfn > MAX_DMA32_PFN) || 463 } else if ((!no_iommu && max_pfn > MAX_DMA32_PFN) ||
462 force_iommu || 464 force_iommu ||
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 5e409dc298a4..a4849c10a77e 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -27,8 +27,7 @@
27#include <asm/cpu.h> 27#include <asm/cpu.h>
28#include <asm/reboot.h> 28#include <asm/reboot.h>
29#include <asm/virtext.h> 29#include <asm/virtext.h>
30#include <asm/iommu.h> 30#include <asm/x86_init.h>
31
32 31
33#if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) 32#if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC)
34 33
@@ -106,7 +105,7 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
106#endif 105#endif
107 106
108#ifdef CONFIG_X86_64 107#ifdef CONFIG_X86_64
109 pci_iommu_shutdown(); 108 x86_platform.iommu_shutdown();
110#endif 109#endif
111 110
112 crash_save_cpu(regs, safe_smp_processor_id()); 111 crash_save_cpu(regs, safe_smp_processor_id());
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index 971a3bec47a8..c563e4c8ff39 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -46,6 +46,7 @@
46#include <asm/dma.h> 46#include <asm/dma.h>
47#include <asm/rio.h> 47#include <asm/rio.h>
48#include <asm/bios_ebda.h> 48#include <asm/bios_ebda.h>
49#include <asm/x86_init.h>
49 50
50#ifdef CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT 51#ifdef CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT
51int use_calgary __read_mostly = 1; 52int use_calgary __read_mostly = 1;
@@ -244,7 +245,7 @@ static unsigned long iommu_range_alloc(struct device *dev,
244 if (panic_on_overflow) 245 if (panic_on_overflow)
245 panic("Calgary: fix the allocator.\n"); 246 panic("Calgary: fix the allocator.\n");
246 else 247 else
247 return bad_dma_address; 248 return DMA_ERROR_CODE;
248 } 249 }
249 } 250 }
250 251
@@ -260,12 +261,15 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl,
260 void *vaddr, unsigned int npages, int direction) 261 void *vaddr, unsigned int npages, int direction)
261{ 262{
262 unsigned long entry; 263 unsigned long entry;
263 dma_addr_t ret = bad_dma_address; 264 dma_addr_t ret;
264 265
265 entry = iommu_range_alloc(dev, tbl, npages); 266 entry = iommu_range_alloc(dev, tbl, npages);
266 267
267 if (unlikely(entry == bad_dma_address)) 268 if (unlikely(entry == DMA_ERROR_CODE)) {
268 goto error; 269 printk(KERN_WARNING "Calgary: failed to allocate %u pages in "
270 "iommu %p\n", npages, tbl);
271 return DMA_ERROR_CODE;
272 }
269 273
270 /* set the return dma address */ 274 /* set the return dma address */
271 ret = (entry << PAGE_SHIFT) | ((unsigned long)vaddr & ~PAGE_MASK); 275 ret = (entry << PAGE_SHIFT) | ((unsigned long)vaddr & ~PAGE_MASK);
@@ -273,13 +277,7 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl,
273 /* put the TCEs in the HW table */ 277 /* put the TCEs in the HW table */
274 tce_build(tbl, entry, npages, (unsigned long)vaddr & PAGE_MASK, 278 tce_build(tbl, entry, npages, (unsigned long)vaddr & PAGE_MASK,
275 direction); 279 direction);
276
277 return ret; 280 return ret;
278
279error:
280 printk(KERN_WARNING "Calgary: failed to allocate %u pages in "
281 "iommu %p\n", npages, tbl);
282 return bad_dma_address;
283} 281}
284 282
285static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, 283static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
@@ -290,8 +288,8 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
290 unsigned long flags; 288 unsigned long flags;
291 289
292 /* were we called with bad_dma_address? */ 290 /* were we called with bad_dma_address? */
293 badend = bad_dma_address + (EMERGENCY_PAGES * PAGE_SIZE); 291 badend = DMA_ERROR_CODE + (EMERGENCY_PAGES * PAGE_SIZE);
294 if (unlikely((dma_addr >= bad_dma_address) && (dma_addr < badend))) { 292 if (unlikely((dma_addr >= DMA_ERROR_CODE) && (dma_addr < badend))) {
295 WARN(1, KERN_ERR "Calgary: driver tried unmapping bad DMA " 293 WARN(1, KERN_ERR "Calgary: driver tried unmapping bad DMA "
296 "address 0x%Lx\n", dma_addr); 294 "address 0x%Lx\n", dma_addr);
297 return; 295 return;
@@ -318,13 +316,15 @@ static inline struct iommu_table *find_iommu_table(struct device *dev)
318 316
319 pdev = to_pci_dev(dev); 317 pdev = to_pci_dev(dev);
320 318
319 /* search up the device tree for an iommu */
321 pbus = pdev->bus; 320 pbus = pdev->bus;
322 321 do {
323 /* is the device behind a bridge? Look for the root bus */ 322 tbl = pci_iommu(pbus);
324 while (pbus->parent) 323 if (tbl && tbl->it_busno == pbus->number)
324 break;
325 tbl = NULL;
325 pbus = pbus->parent; 326 pbus = pbus->parent;
326 327 } while (pbus);
327 tbl = pci_iommu(pbus);
328 328
329 BUG_ON(tbl && (tbl->it_busno != pbus->number)); 329 BUG_ON(tbl && (tbl->it_busno != pbus->number));
330 330
@@ -373,7 +373,7 @@ static int calgary_map_sg(struct device *dev, struct scatterlist *sg,
373 npages = iommu_num_pages(vaddr, s->length, PAGE_SIZE); 373 npages = iommu_num_pages(vaddr, s->length, PAGE_SIZE);
374 374
375 entry = iommu_range_alloc(dev, tbl, npages); 375 entry = iommu_range_alloc(dev, tbl, npages);
376 if (entry == bad_dma_address) { 376 if (entry == DMA_ERROR_CODE) {
377 /* makes sure unmap knows to stop */ 377 /* makes sure unmap knows to stop */
378 s->dma_length = 0; 378 s->dma_length = 0;
379 goto error; 379 goto error;
@@ -391,7 +391,7 @@ static int calgary_map_sg(struct device *dev, struct scatterlist *sg,
391error: 391error:
392 calgary_unmap_sg(dev, sg, nelems, dir, NULL); 392 calgary_unmap_sg(dev, sg, nelems, dir, NULL);
393 for_each_sg(sg, s, nelems, i) { 393 for_each_sg(sg, s, nelems, i) {
394 sg->dma_address = bad_dma_address; 394 sg->dma_address = DMA_ERROR_CODE;
395 sg->dma_length = 0; 395 sg->dma_length = 0;
396 } 396 }
397 return 0; 397 return 0;
@@ -446,7 +446,7 @@ static void* calgary_alloc_coherent(struct device *dev, size_t size,
446 446
447 /* set up tces to cover the allocated range */ 447 /* set up tces to cover the allocated range */
448 mapping = iommu_alloc(dev, tbl, ret, npages, DMA_BIDIRECTIONAL); 448 mapping = iommu_alloc(dev, tbl, ret, npages, DMA_BIDIRECTIONAL);
449 if (mapping == bad_dma_address) 449 if (mapping == DMA_ERROR_CODE)
450 goto free; 450 goto free;
451 *dma_handle = mapping; 451 *dma_handle = mapping;
452 return ret; 452 return ret;
@@ -727,7 +727,7 @@ static void __init calgary_reserve_regions(struct pci_dev *dev)
727 struct iommu_table *tbl = pci_iommu(dev->bus); 727 struct iommu_table *tbl = pci_iommu(dev->bus);
728 728
729 /* reserve EMERGENCY_PAGES from bad_dma_address and up */ 729 /* reserve EMERGENCY_PAGES from bad_dma_address and up */
730 iommu_range_reserve(tbl, bad_dma_address, EMERGENCY_PAGES); 730 iommu_range_reserve(tbl, DMA_ERROR_CODE, EMERGENCY_PAGES);
731 731
732 /* avoid the BIOS/VGA first 640KB-1MB region */ 732 /* avoid the BIOS/VGA first 640KB-1MB region */
733 /* for CalIOC2 - avoid the entire first MB */ 733 /* for CalIOC2 - avoid the entire first MB */
@@ -1344,6 +1344,23 @@ static void __init get_tce_space_from_tar(void)
1344 return; 1344 return;
1345} 1345}
1346 1346
1347static int __init calgary_iommu_init(void)
1348{
1349 int ret;
1350
1351 /* ok, we're trying to use Calgary - let's roll */
1352 printk(KERN_INFO "PCI-DMA: Using Calgary IOMMU\n");
1353
1354 ret = calgary_init();
1355 if (ret) {
1356 printk(KERN_ERR "PCI-DMA: Calgary init failed %d, "
1357 "falling back to no_iommu\n", ret);
1358 return ret;
1359 }
1360
1361 return 0;
1362}
1363
1347void __init detect_calgary(void) 1364void __init detect_calgary(void)
1348{ 1365{
1349 int bus; 1366 int bus;
@@ -1357,7 +1374,7 @@ void __init detect_calgary(void)
1357 * if the user specified iommu=off or iommu=soft or we found 1374 * if the user specified iommu=off or iommu=soft or we found
1358 * another HW IOMMU already, bail out. 1375 * another HW IOMMU already, bail out.
1359 */ 1376 */
1360 if (swiotlb || no_iommu || iommu_detected) 1377 if (no_iommu || iommu_detected)
1361 return; 1378 return;
1362 1379
1363 if (!use_calgary) 1380 if (!use_calgary)
@@ -1442,9 +1459,7 @@ void __init detect_calgary(void)
1442 printk(KERN_INFO "PCI-DMA: Calgary TCE table spec is %d\n", 1459 printk(KERN_INFO "PCI-DMA: Calgary TCE table spec is %d\n",
1443 specified_table_size); 1460 specified_table_size);
1444 1461
1445 /* swiotlb for devices that aren't behind the Calgary. */ 1462 x86_init.iommu.iommu_init = calgary_iommu_init;
1446 if (max_pfn > MAX_DMA32_PFN)
1447 swiotlb = 1;
1448 } 1463 }
1449 return; 1464 return;
1450 1465
@@ -1457,35 +1472,6 @@ cleanup:
1457 } 1472 }
1458} 1473}
1459 1474
1460int __init calgary_iommu_init(void)
1461{
1462 int ret;
1463
1464 if (no_iommu || (swiotlb && !calgary_detected))
1465 return -ENODEV;
1466
1467 if (!calgary_detected)
1468 return -ENODEV;
1469
1470 /* ok, we're trying to use Calgary - let's roll */
1471 printk(KERN_INFO "PCI-DMA: Using Calgary IOMMU\n");
1472
1473 ret = calgary_init();
1474 if (ret) {
1475 printk(KERN_ERR "PCI-DMA: Calgary init failed %d, "
1476 "falling back to no_iommu\n", ret);
1477 return ret;
1478 }
1479
1480 force_iommu = 1;
1481 bad_dma_address = 0x0;
1482 /* dma_ops is set to swiotlb or nommu */
1483 if (!dma_ops)
1484 dma_ops = &nommu_dma_ops;
1485
1486 return 0;
1487}
1488
1489static int __init calgary_parse_options(char *p) 1475static int __init calgary_parse_options(char *p)
1490{ 1476{
1491 unsigned int bridge; 1477 unsigned int bridge;
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index a6e804d16c35..afcc58b69c7c 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -11,10 +11,11 @@
11#include <asm/gart.h> 11#include <asm/gart.h>
12#include <asm/calgary.h> 12#include <asm/calgary.h>
13#include <asm/amd_iommu.h> 13#include <asm/amd_iommu.h>
14#include <asm/x86_init.h>
14 15
15static int forbid_dac __read_mostly; 16static int forbid_dac __read_mostly;
16 17
17struct dma_map_ops *dma_ops; 18struct dma_map_ops *dma_ops = &nommu_dma_ops;
18EXPORT_SYMBOL(dma_ops); 19EXPORT_SYMBOL(dma_ops);
19 20
20static int iommu_sac_force __read_mostly; 21static int iommu_sac_force __read_mostly;
@@ -42,9 +43,6 @@ int iommu_detected __read_mostly = 0;
42 */ 43 */
43int iommu_pass_through __read_mostly; 44int iommu_pass_through __read_mostly;
44 45
45dma_addr_t bad_dma_address __read_mostly = 0;
46EXPORT_SYMBOL(bad_dma_address);
47
48/* Dummy device used for NULL arguments (normally ISA). */ 46/* Dummy device used for NULL arguments (normally ISA). */
49struct device x86_dma_fallback_dev = { 47struct device x86_dma_fallback_dev = {
50 .init_name = "fallback device", 48 .init_name = "fallback device",
@@ -126,20 +124,17 @@ void __init pci_iommu_alloc(void)
126 /* free the range so iommu could get some range less than 4G */ 124 /* free the range so iommu could get some range less than 4G */
127 dma32_free_bootmem(); 125 dma32_free_bootmem();
128#endif 126#endif
127 if (pci_swiotlb_init())
128 return;
129 129
130 /*
131 * The order of these functions is important for
132 * fall-back/fail-over reasons
133 */
134 gart_iommu_hole_init(); 130 gart_iommu_hole_init();
135 131
136 detect_calgary(); 132 detect_calgary();
137 133
138 detect_intel_iommu(); 134 detect_intel_iommu();
139 135
136 /* needs to be called after gart_iommu_hole_init */
140 amd_iommu_detect(); 137 amd_iommu_detect();
141
142 pci_swiotlb_init();
143} 138}
144 139
145void *dma_generic_alloc_coherent(struct device *dev, size_t size, 140void *dma_generic_alloc_coherent(struct device *dev, size_t size,
@@ -214,7 +209,7 @@ static __init int iommu_setup(char *p)
214 if (!strncmp(p, "allowdac", 8)) 209 if (!strncmp(p, "allowdac", 8))
215 forbid_dac = 0; 210 forbid_dac = 0;
216 if (!strncmp(p, "nodac", 5)) 211 if (!strncmp(p, "nodac", 5))
217 forbid_dac = -1; 212 forbid_dac = 1;
218 if (!strncmp(p, "usedac", 6)) { 213 if (!strncmp(p, "usedac", 6)) {
219 forbid_dac = -1; 214 forbid_dac = -1;
220 return 1; 215 return 1;
@@ -289,25 +284,17 @@ static int __init pci_iommu_init(void)
289#ifdef CONFIG_PCI 284#ifdef CONFIG_PCI
290 dma_debug_add_bus(&pci_bus_type); 285 dma_debug_add_bus(&pci_bus_type);
291#endif 286#endif
287 x86_init.iommu.iommu_init();
292 288
293 calgary_iommu_init(); 289 if (swiotlb) {
294 290 printk(KERN_INFO "PCI-DMA: "
295 intel_iommu_init(); 291 "Using software bounce buffering for IO (SWIOTLB)\n");
292 swiotlb_print_info();
293 } else
294 swiotlb_free();
296 295
297 amd_iommu_init();
298
299 gart_iommu_init();
300
301 no_iommu_init();
302 return 0; 296 return 0;
303} 297}
304
305void pci_iommu_shutdown(void)
306{
307 gart_iommu_shutdown();
308
309 amd_iommu_shutdown();
310}
311/* Must execute after PCI subsystem */ 298/* Must execute after PCI subsystem */
312rootfs_initcall(pci_iommu_init); 299rootfs_initcall(pci_iommu_init);
313 300
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index a7f1b64f86e0..e6a0d402f171 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -39,6 +39,7 @@
39#include <asm/swiotlb.h> 39#include <asm/swiotlb.h>
40#include <asm/dma.h> 40#include <asm/dma.h>
41#include <asm/k8.h> 41#include <asm/k8.h>
42#include <asm/x86_init.h>
42 43
43static unsigned long iommu_bus_base; /* GART remapping area (physical) */ 44static unsigned long iommu_bus_base; /* GART remapping area (physical) */
44static unsigned long iommu_size; /* size of remapping area bytes */ 45static unsigned long iommu_size; /* size of remapping area bytes */
@@ -46,6 +47,8 @@ static unsigned long iommu_pages; /* .. and in pages */
46 47
47static u32 *iommu_gatt_base; /* Remapping table */ 48static u32 *iommu_gatt_base; /* Remapping table */
48 49
50static dma_addr_t bad_dma_addr;
51
49/* 52/*
50 * If this is disabled the IOMMU will use an optimized flushing strategy 53 * If this is disabled the IOMMU will use an optimized flushing strategy
51 * of only flushing when an mapping is reused. With it true the GART is 54 * of only flushing when an mapping is reused. With it true the GART is
@@ -92,7 +95,7 @@ static unsigned long alloc_iommu(struct device *dev, int size,
92 95
93 base_index = ALIGN(iommu_bus_base & dma_get_seg_boundary(dev), 96 base_index = ALIGN(iommu_bus_base & dma_get_seg_boundary(dev),
94 PAGE_SIZE) >> PAGE_SHIFT; 97 PAGE_SIZE) >> PAGE_SHIFT;
95 boundary_size = ALIGN((unsigned long long)dma_get_seg_boundary(dev) + 1, 98 boundary_size = ALIGN((u64)dma_get_seg_boundary(dev) + 1,
96 PAGE_SIZE) >> PAGE_SHIFT; 99 PAGE_SIZE) >> PAGE_SHIFT;
97 100
98 spin_lock_irqsave(&iommu_bitmap_lock, flags); 101 spin_lock_irqsave(&iommu_bitmap_lock, flags);
@@ -216,7 +219,7 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem,
216 if (panic_on_overflow) 219 if (panic_on_overflow)
217 panic("dma_map_area overflow %lu bytes\n", size); 220 panic("dma_map_area overflow %lu bytes\n", size);
218 iommu_full(dev, size, dir); 221 iommu_full(dev, size, dir);
219 return bad_dma_address; 222 return bad_dma_addr;
220 } 223 }
221 224
222 for (i = 0; i < npages; i++) { 225 for (i = 0; i < npages; i++) {
@@ -294,7 +297,7 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg,
294 int i; 297 int i;
295 298
296#ifdef CONFIG_IOMMU_DEBUG 299#ifdef CONFIG_IOMMU_DEBUG
297 printk(KERN_DEBUG "dma_map_sg overflow\n"); 300 pr_debug("dma_map_sg overflow\n");
298#endif 301#endif
299 302
300 for_each_sg(sg, s, nents, i) { 303 for_each_sg(sg, s, nents, i) {
@@ -302,7 +305,7 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg,
302 305
303 if (nonforced_iommu(dev, addr, s->length)) { 306 if (nonforced_iommu(dev, addr, s->length)) {
304 addr = dma_map_area(dev, addr, s->length, dir, 0); 307 addr = dma_map_area(dev, addr, s->length, dir, 0);
305 if (addr == bad_dma_address) { 308 if (addr == bad_dma_addr) {
306 if (i > 0) 309 if (i > 0)
307 gart_unmap_sg(dev, sg, i, dir, NULL); 310 gart_unmap_sg(dev, sg, i, dir, NULL);
308 nents = 0; 311 nents = 0;
@@ -389,12 +392,14 @@ static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents,
389 if (!dev) 392 if (!dev)
390 dev = &x86_dma_fallback_dev; 393 dev = &x86_dma_fallback_dev;
391 394
392 out = 0; 395 out = 0;
393 start = 0; 396 start = 0;
394 start_sg = sgmap = sg; 397 start_sg = sg;
395 seg_size = 0; 398 sgmap = sg;
396 max_seg_size = dma_get_max_seg_size(dev); 399 seg_size = 0;
397 ps = NULL; /* shut up gcc */ 400 max_seg_size = dma_get_max_seg_size(dev);
401 ps = NULL; /* shut up gcc */
402
398 for_each_sg(sg, s, nents, i) { 403 for_each_sg(sg, s, nents, i) {
399 dma_addr_t addr = sg_phys(s); 404 dma_addr_t addr = sg_phys(s);
400 405
@@ -417,11 +422,12 @@ static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents,
417 sgmap, pages, need) < 0) 422 sgmap, pages, need) < 0)
418 goto error; 423 goto error;
419 out++; 424 out++;
420 seg_size = 0; 425
421 sgmap = sg_next(sgmap); 426 seg_size = 0;
422 pages = 0; 427 sgmap = sg_next(sgmap);
423 start = i; 428 pages = 0;
424 start_sg = s; 429 start = i;
430 start_sg = s;
425 } 431 }
426 } 432 }
427 433
@@ -455,7 +461,7 @@ error:
455 461
456 iommu_full(dev, pages << PAGE_SHIFT, dir); 462 iommu_full(dev, pages << PAGE_SHIFT, dir);
457 for_each_sg(sg, s, nents, i) 463 for_each_sg(sg, s, nents, i)
458 s->dma_address = bad_dma_address; 464 s->dma_address = bad_dma_addr;
459 return 0; 465 return 0;
460} 466}
461 467
@@ -479,7 +485,7 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr,
479 DMA_BIDIRECTIONAL, align_mask); 485 DMA_BIDIRECTIONAL, align_mask);
480 486
481 flush_gart(); 487 flush_gart();
482 if (paddr != bad_dma_address) { 488 if (paddr != bad_dma_addr) {
483 *dma_addr = paddr; 489 *dma_addr = paddr;
484 return page_address(page); 490 return page_address(page);
485 } 491 }
@@ -499,6 +505,11 @@ gart_free_coherent(struct device *dev, size_t size, void *vaddr,
499 free_pages((unsigned long)vaddr, get_order(size)); 505 free_pages((unsigned long)vaddr, get_order(size));
500} 506}
501 507
508static int gart_mapping_error(struct device *dev, dma_addr_t dma_addr)
509{
510 return (dma_addr == bad_dma_addr);
511}
512
502static int no_agp; 513static int no_agp;
503 514
504static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size) 515static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size)
@@ -515,7 +526,7 @@ static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size)
515 iommu_size -= round_up(a, PMD_PAGE_SIZE) - a; 526 iommu_size -= round_up(a, PMD_PAGE_SIZE) - a;
516 527
517 if (iommu_size < 64*1024*1024) { 528 if (iommu_size < 64*1024*1024) {
518 printk(KERN_WARNING 529 pr_warning(
519 "PCI-DMA: Warning: Small IOMMU %luMB." 530 "PCI-DMA: Warning: Small IOMMU %luMB."
520 " Consider increasing the AGP aperture in BIOS\n", 531 " Consider increasing the AGP aperture in BIOS\n",
521 iommu_size >> 20); 532 iommu_size >> 20);
@@ -570,28 +581,32 @@ void set_up_gart_resume(u32 aper_order, u32 aper_alloc)
570 aperture_alloc = aper_alloc; 581 aperture_alloc = aper_alloc;
571} 582}
572 583
573static int gart_resume(struct sys_device *dev) 584static void gart_fixup_northbridges(struct sys_device *dev)
574{ 585{
575 printk(KERN_INFO "PCI-DMA: Resuming GART IOMMU\n"); 586 int i;
576 587
577 if (fix_up_north_bridges) { 588 if (!fix_up_north_bridges)
578 int i; 589 return;
579 590
580 printk(KERN_INFO "PCI-DMA: Restoring GART aperture settings\n"); 591 pr_info("PCI-DMA: Restoring GART aperture settings\n");
581 592
582 for (i = 0; i < num_k8_northbridges; i++) { 593 for (i = 0; i < num_k8_northbridges; i++) {
583 struct pci_dev *dev = k8_northbridges[i]; 594 struct pci_dev *dev = k8_northbridges[i];
584 595
585 /* 596 /*
586 * Don't enable translations just yet. That is the next 597 * Don't enable translations just yet. That is the next
587 * step. Restore the pre-suspend aperture settings. 598 * step. Restore the pre-suspend aperture settings.
588 */ 599 */
589 pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, 600 pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, aperture_order << 1);
590 aperture_order << 1); 601 pci_write_config_dword(dev, AMD64_GARTAPERTUREBASE, aperture_alloc >> 25);
591 pci_write_config_dword(dev, AMD64_GARTAPERTUREBASE,
592 aperture_alloc >> 25);
593 }
594 } 602 }
603}
604
605static int gart_resume(struct sys_device *dev)
606{
607 pr_info("PCI-DMA: Resuming GART IOMMU\n");
608
609 gart_fixup_northbridges(dev);
595 610
596 enable_gart_translations(); 611 enable_gart_translations();
597 612
@@ -604,15 +619,14 @@ static int gart_suspend(struct sys_device *dev, pm_message_t state)
604} 619}
605 620
606static struct sysdev_class gart_sysdev_class = { 621static struct sysdev_class gart_sysdev_class = {
607 .name = "gart", 622 .name = "gart",
608 .suspend = gart_suspend, 623 .suspend = gart_suspend,
609 .resume = gart_resume, 624 .resume = gart_resume,
610 625
611}; 626};
612 627
613static struct sys_device device_gart = { 628static struct sys_device device_gart = {
614 .id = 0, 629 .cls = &gart_sysdev_class,
615 .cls = &gart_sysdev_class,
616}; 630};
617 631
618/* 632/*
@@ -627,7 +641,8 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
627 void *gatt; 641 void *gatt;
628 int i, error; 642 int i, error;
629 643
630 printk(KERN_INFO "PCI-DMA: Disabling AGP.\n"); 644 pr_info("PCI-DMA: Disabling AGP.\n");
645
631 aper_size = aper_base = info->aper_size = 0; 646 aper_size = aper_base = info->aper_size = 0;
632 dev = NULL; 647 dev = NULL;
633 for (i = 0; i < num_k8_northbridges; i++) { 648 for (i = 0; i < num_k8_northbridges; i++) {
@@ -645,6 +660,7 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
645 } 660 }
646 if (!aper_base) 661 if (!aper_base)
647 goto nommu; 662 goto nommu;
663
648 info->aper_base = aper_base; 664 info->aper_base = aper_base;
649 info->aper_size = aper_size >> 20; 665 info->aper_size = aper_size >> 20;
650 666
@@ -667,14 +683,14 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
667 683
668 flush_gart(); 684 flush_gart();
669 685
670 printk(KERN_INFO "PCI-DMA: aperture base @ %x size %u KB\n", 686 pr_info("PCI-DMA: aperture base @ %x size %u KB\n",
671 aper_base, aper_size>>10); 687 aper_base, aper_size>>10);
672 688
673 return 0; 689 return 0;
674 690
675 nommu: 691 nommu:
676 /* Should not happen anymore */ 692 /* Should not happen anymore */
677 printk(KERN_WARNING "PCI-DMA: More than 4GB of RAM and no IOMMU\n" 693 pr_warning("PCI-DMA: More than 4GB of RAM and no IOMMU\n"
678 "falling back to iommu=soft.\n"); 694 "falling back to iommu=soft.\n");
679 return -1; 695 return -1;
680} 696}
@@ -686,14 +702,15 @@ static struct dma_map_ops gart_dma_ops = {
686 .unmap_page = gart_unmap_page, 702 .unmap_page = gart_unmap_page,
687 .alloc_coherent = gart_alloc_coherent, 703 .alloc_coherent = gart_alloc_coherent,
688 .free_coherent = gart_free_coherent, 704 .free_coherent = gart_free_coherent,
705 .mapping_error = gart_mapping_error,
689}; 706};
690 707
691void gart_iommu_shutdown(void) 708static void gart_iommu_shutdown(void)
692{ 709{
693 struct pci_dev *dev; 710 struct pci_dev *dev;
694 int i; 711 int i;
695 712
696 if (no_agp && (dma_ops != &gart_dma_ops)) 713 if (no_agp)
697 return; 714 return;
698 715
699 for (i = 0; i < num_k8_northbridges; i++) { 716 for (i = 0; i < num_k8_northbridges; i++) {
@@ -708,7 +725,7 @@ void gart_iommu_shutdown(void)
708 } 725 }
709} 726}
710 727
711void __init gart_iommu_init(void) 728int __init gart_iommu_init(void)
712{ 729{
713 struct agp_kern_info info; 730 struct agp_kern_info info;
714 unsigned long iommu_start; 731 unsigned long iommu_start;
@@ -718,7 +735,7 @@ void __init gart_iommu_init(void)
718 long i; 735 long i;
719 736
720 if (cache_k8_northbridges() < 0 || num_k8_northbridges == 0) 737 if (cache_k8_northbridges() < 0 || num_k8_northbridges == 0)
721 return; 738 return 0;
722 739
723#ifndef CONFIG_AGP_AMD64 740#ifndef CONFIG_AGP_AMD64
724 no_agp = 1; 741 no_agp = 1;
@@ -730,35 +747,28 @@ void __init gart_iommu_init(void)
730 (agp_copy_info(agp_bridge, &info) < 0); 747 (agp_copy_info(agp_bridge, &info) < 0);
731#endif 748#endif
732 749
733 if (swiotlb)
734 return;
735
736 /* Did we detect a different HW IOMMU? */
737 if (iommu_detected && !gart_iommu_aperture)
738 return;
739
740 if (no_iommu || 750 if (no_iommu ||
741 (!force_iommu && max_pfn <= MAX_DMA32_PFN) || 751 (!force_iommu && max_pfn <= MAX_DMA32_PFN) ||
742 !gart_iommu_aperture || 752 !gart_iommu_aperture ||
743 (no_agp && init_k8_gatt(&info) < 0)) { 753 (no_agp && init_k8_gatt(&info) < 0)) {
744 if (max_pfn > MAX_DMA32_PFN) { 754 if (max_pfn > MAX_DMA32_PFN) {
745 printk(KERN_WARNING "More than 4GB of memory " 755 pr_warning("More than 4GB of memory but GART IOMMU not available.\n");
746 "but GART IOMMU not available.\n"); 756 pr_warning("falling back to iommu=soft.\n");
747 printk(KERN_WARNING "falling back to iommu=soft.\n");
748 } 757 }
749 return; 758 return 0;
750 } 759 }
751 760
752 /* need to map that range */ 761 /* need to map that range */
753 aper_size = info.aper_size << 20; 762 aper_size = info.aper_size << 20;
754 aper_base = info.aper_base; 763 aper_base = info.aper_base;
755 end_pfn = (aper_base>>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT); 764 end_pfn = (aper_base>>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT);
765
756 if (end_pfn > max_low_pfn_mapped) { 766 if (end_pfn > max_low_pfn_mapped) {
757 start_pfn = (aper_base>>PAGE_SHIFT); 767 start_pfn = (aper_base>>PAGE_SHIFT);
758 init_memory_mapping(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT); 768 init_memory_mapping(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
759 } 769 }
760 770
761 printk(KERN_INFO "PCI-DMA: using GART IOMMU.\n"); 771 pr_info("PCI-DMA: using GART IOMMU.\n");
762 iommu_size = check_iommu_size(info.aper_base, aper_size); 772 iommu_size = check_iommu_size(info.aper_base, aper_size);
763 iommu_pages = iommu_size >> PAGE_SHIFT; 773 iommu_pages = iommu_size >> PAGE_SHIFT;
764 774
@@ -773,8 +783,7 @@ void __init gart_iommu_init(void)
773 783
774 ret = dma_debug_resize_entries(iommu_pages); 784 ret = dma_debug_resize_entries(iommu_pages);
775 if (ret) 785 if (ret)
776 printk(KERN_DEBUG 786 pr_debug("PCI-DMA: Cannot trace all the entries\n");
777 "PCI-DMA: Cannot trace all the entries\n");
778 } 787 }
779#endif 788#endif
780 789
@@ -784,15 +793,14 @@ void __init gart_iommu_init(void)
784 */ 793 */
785 iommu_area_reserve(iommu_gart_bitmap, 0, EMERGENCY_PAGES); 794 iommu_area_reserve(iommu_gart_bitmap, 0, EMERGENCY_PAGES);
786 795
787 agp_memory_reserved = iommu_size; 796 pr_info("PCI-DMA: Reserving %luMB of IOMMU area in the AGP aperture\n",
788 printk(KERN_INFO
789 "PCI-DMA: Reserving %luMB of IOMMU area in the AGP aperture\n",
790 iommu_size >> 20); 797 iommu_size >> 20);
791 798
792 iommu_start = aper_size - iommu_size; 799 agp_memory_reserved = iommu_size;
793 iommu_bus_base = info.aper_base + iommu_start; 800 iommu_start = aper_size - iommu_size;
794 bad_dma_address = iommu_bus_base; 801 iommu_bus_base = info.aper_base + iommu_start;
795 iommu_gatt_base = agp_gatt_table + (iommu_start>>PAGE_SHIFT); 802 bad_dma_addr = iommu_bus_base;
803 iommu_gatt_base = agp_gatt_table + (iommu_start>>PAGE_SHIFT);
796 804
797 /* 805 /*
798 * Unmap the IOMMU part of the GART. The alias of the page is 806 * Unmap the IOMMU part of the GART. The alias of the page is
@@ -814,7 +822,7 @@ void __init gart_iommu_init(void)
814 * the pages as Not-Present: 822 * the pages as Not-Present:
815 */ 823 */
816 wbinvd(); 824 wbinvd();
817 825
818 /* 826 /*
819 * Now all caches are flushed and we can safely enable 827 * Now all caches are flushed and we can safely enable
820 * GART hardware. Doing it early leaves the possibility 828 * GART hardware. Doing it early leaves the possibility
@@ -838,6 +846,10 @@ void __init gart_iommu_init(void)
838 846
839 flush_gart(); 847 flush_gart();
840 dma_ops = &gart_dma_ops; 848 dma_ops = &gart_dma_ops;
849 x86_platform.iommu_shutdown = gart_iommu_shutdown;
850 swiotlb = 0;
851
852 return 0;
841} 853}
842 854
843void __init gart_parse_options(char *p) 855void __init gart_parse_options(char *p)
@@ -856,7 +868,7 @@ void __init gart_parse_options(char *p)
856#endif 868#endif
857 if (isdigit(*p) && get_option(&p, &arg)) 869 if (isdigit(*p) && get_option(&p, &arg))
858 iommu_size = arg; 870 iommu_size = arg;
859 if (!strncmp(p, "fullflush", 8)) 871 if (!strncmp(p, "fullflush", 9))
860 iommu_fullflush = 1; 872 iommu_fullflush = 1;
861 if (!strncmp(p, "nofullflush", 11)) 873 if (!strncmp(p, "nofullflush", 11))
862 iommu_fullflush = 0; 874 iommu_fullflush = 0;
diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c
index a3933d4330cd..22be12b60a8f 100644
--- a/arch/x86/kernel/pci-nommu.c
+++ b/arch/x86/kernel/pci-nommu.c
@@ -33,7 +33,7 @@ static dma_addr_t nommu_map_page(struct device *dev, struct page *page,
33 dma_addr_t bus = page_to_phys(page) + offset; 33 dma_addr_t bus = page_to_phys(page) + offset;
34 WARN_ON(size == 0); 34 WARN_ON(size == 0);
35 if (!check_addr("map_single", dev, bus, size)) 35 if (!check_addr("map_single", dev, bus, size))
36 return bad_dma_address; 36 return DMA_ERROR_CODE;
37 flush_write_buffers(); 37 flush_write_buffers();
38 return bus; 38 return bus;
39} 39}
@@ -103,12 +103,3 @@ struct dma_map_ops nommu_dma_ops = {
103 .sync_sg_for_device = nommu_sync_sg_for_device, 103 .sync_sg_for_device = nommu_sync_sg_for_device,
104 .is_phys = 1, 104 .is_phys = 1,
105}; 105};
106
107void __init no_iommu_init(void)
108{
109 if (dma_ops)
110 return;
111
112 force_iommu = 0; /* no HW IOMMU */
113 dma_ops = &nommu_dma_ops;
114}
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index aaa6b7839f1e..e3c0a66b9e77 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -42,18 +42,28 @@ static struct dma_map_ops swiotlb_dma_ops = {
42 .dma_supported = NULL, 42 .dma_supported = NULL,
43}; 43};
44 44
45void __init pci_swiotlb_init(void) 45/*
46 * pci_swiotlb_init - initialize swiotlb if necessary
47 *
48 * This returns non-zero if we are forced to use swiotlb (by the boot
49 * option).
50 */
51int __init pci_swiotlb_init(void)
46{ 52{
53 int use_swiotlb = swiotlb | swiotlb_force;
54
47 /* don't initialize swiotlb if iommu=off (no_iommu=1) */ 55 /* don't initialize swiotlb if iommu=off (no_iommu=1) */
48#ifdef CONFIG_X86_64 56#ifdef CONFIG_X86_64
49 if ((!iommu_detected && !no_iommu && max_pfn > MAX_DMA32_PFN)) 57 if (!no_iommu && max_pfn > MAX_DMA32_PFN)
50 swiotlb = 1; 58 swiotlb = 1;
51#endif 59#endif
52 if (swiotlb_force) 60 if (swiotlb_force)
53 swiotlb = 1; 61 swiotlb = 1;
62
54 if (swiotlb) { 63 if (swiotlb) {
55 printk(KERN_INFO "PCI-DMA: Using software bounce buffering for IO (SWIOTLB)\n"); 64 swiotlb_init(0);
56 swiotlb_init();
57 dma_ops = &swiotlb_dma_ops; 65 dma_ops = &swiotlb_dma_ops;
58 } 66 }
67
68 return use_swiotlb;
59} 69}
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index f93078746e00..2b97fc5b124e 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -23,7 +23,7 @@
23# include <linux/ctype.h> 23# include <linux/ctype.h>
24# include <linux/mc146818rtc.h> 24# include <linux/mc146818rtc.h>
25#else 25#else
26# include <asm/iommu.h> 26# include <asm/x86_init.h>
27#endif 27#endif
28 28
29/* 29/*
@@ -622,7 +622,7 @@ void native_machine_shutdown(void)
622#endif 622#endif
623 623
624#ifdef CONFIG_X86_64 624#ifdef CONFIG_X86_64
625 pci_iommu_shutdown(); 625 x86_platform.iommu_shutdown();
626#endif 626#endif
627} 627}
628 628
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index 4449a4a2c2ed..d11c5ff7c65e 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -14,10 +14,13 @@
14#include <asm/time.h> 14#include <asm/time.h>
15#include <asm/irq.h> 15#include <asm/irq.h>
16#include <asm/tsc.h> 16#include <asm/tsc.h>
17#include <asm/iommu.h>
17 18
18void __cpuinit x86_init_noop(void) { } 19void __cpuinit x86_init_noop(void) { }
19void __init x86_init_uint_noop(unsigned int unused) { } 20void __init x86_init_uint_noop(unsigned int unused) { }
20void __init x86_init_pgd_noop(pgd_t *unused) { } 21void __init x86_init_pgd_noop(pgd_t *unused) { }
22int __init iommu_init_noop(void) { return 0; }
23void iommu_shutdown_noop(void) { }
21 24
22/* 25/*
23 * The platform setup functions are preset with the default functions 26 * The platform setup functions are preset with the default functions
@@ -62,6 +65,10 @@ struct x86_init_ops x86_init __initdata = {
62 .tsc_pre_init = x86_init_noop, 65 .tsc_pre_init = x86_init_noop,
63 .timer_init = hpet_time_init, 66 .timer_init = hpet_time_init,
64 }, 67 },
68
69 .iommu = {
70 .iommu_init = iommu_init_noop,
71 },
65}; 72};
66 73
67struct x86_cpuinit_ops x86_cpuinit __cpuinitdata = { 74struct x86_cpuinit_ops x86_cpuinit __cpuinitdata = {
@@ -72,4 +79,5 @@ struct x86_platform_ops x86_platform = {
72 .calibrate_tsc = native_calibrate_tsc, 79 .calibrate_tsc = native_calibrate_tsc,
73 .get_wallclock = mach_get_cmos_time, 80 .get_wallclock = mach_get_cmos_time,
74 .set_wallclock = mach_set_rtc_mmss, 81 .set_wallclock = mach_set_rtc_mmss,
82 .iommu_shutdown = iommu_shutdown_noop,
75}; 83};
diff --git a/drivers/char/agp/Kconfig b/drivers/char/agp/Kconfig
index ccb1fa89de29..2fb3a480f6b0 100644
--- a/drivers/char/agp/Kconfig
+++ b/drivers/char/agp/Kconfig
@@ -56,9 +56,8 @@ config AGP_AMD
56 X on AMD Irongate, 761, and 762 chipsets. 56 X on AMD Irongate, 761, and 762 chipsets.
57 57
58config AGP_AMD64 58config AGP_AMD64
59 tristate "AMD Opteron/Athlon64 on-CPU GART support" if !GART_IOMMU 59 tristate "AMD Opteron/Athlon64 on-CPU GART support"
60 depends on AGP && X86 60 depends on AGP && X86
61 default y if GART_IOMMU
62 help 61 help
63 This option gives you AGP support for the GLX component of 62 This option gives you AGP support for the GLX component of
64 X using the on-CPU northbridge of the AMD Athlon64/Opteron CPUs. 63 X using the on-CPU northbridge of the AMD Athlon64/Opteron CPUs.
diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
index b952ebc7a78b..416f6ac65b76 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -645,10 +645,13 @@ void __init detect_intel_iommu(void)
645 "x2apic and Intr-remapping.\n"); 645 "x2apic and Intr-remapping.\n");
646#endif 646#endif
647#ifdef CONFIG_DMAR 647#ifdef CONFIG_DMAR
648 if (ret && !no_iommu && !iommu_detected && !swiotlb && 648 if (ret && !no_iommu && !iommu_detected && !dmar_disabled)
649 !dmar_disabled)
650 iommu_detected = 1; 649 iommu_detected = 1;
651#endif 650#endif
651#ifdef CONFIG_X86
652 if (ret)
653 x86_init.iommu.iommu_init = intel_iommu_init;
654#endif
652 } 655 }
653 early_acpi_os_unmap_memory(dmar_tbl, dmar_tbl_size); 656 early_acpi_os_unmap_memory(dmar_tbl, dmar_tbl_size);
654 dmar_tbl = NULL; 657 dmar_tbl = NULL;
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 1840a0578a42..9261327b49f3 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -3266,7 +3266,7 @@ int __init intel_iommu_init(void)
3266 * Check the need for DMA-remapping initialization now. 3266 * Check the need for DMA-remapping initialization now.
3267 * Above initialization will also be used by Interrupt-remapping. 3267 * Above initialization will also be used by Interrupt-remapping.
3268 */ 3268 */
3269 if (no_iommu || swiotlb || dmar_disabled) 3269 if (no_iommu || dmar_disabled)
3270 return -ENODEV; 3270 return -ENODEV;
3271 3271
3272 iommu_init_mempool(); 3272 iommu_init_mempool();
@@ -3287,7 +3287,9 @@ int __init intel_iommu_init(void)
3287 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n"); 3287 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
3288 3288
3289 init_timer(&unmap_timer); 3289 init_timer(&unmap_timer);
3290 force_iommu = 1; 3290#ifdef CONFIG_SWIOTLB
3291 swiotlb = 0;
3292#endif
3291 dma_ops = &intel_dma_ops; 3293 dma_ops = &intel_dma_ops;
3292 3294
3293 init_iommu_sysfs(); 3295 init_iommu_sysfs();
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index dd97fb8408a8..b10ec49ee2dd 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -53,6 +53,7 @@ extern void free_bootmem_node(pg_data_t *pgdat,
53 unsigned long addr, 53 unsigned long addr,
54 unsigned long size); 54 unsigned long size);
55extern void free_bootmem(unsigned long addr, unsigned long size); 55extern void free_bootmem(unsigned long addr, unsigned long size);
56extern void free_bootmem_late(unsigned long addr, unsigned long size);
56 57
57/* 58/*
58 * Flags for reserve_bootmem (also if CONFIG_HAVE_ARCH_BOOTMEM_NODE, 59 * Flags for reserve_bootmem (also if CONFIG_HAVE_ARCH_BOOTMEM_NODE,
diff --git a/include/linux/dmar.h b/include/linux/dmar.h
index 4a2b162c256a..5de4c9e5856d 100644
--- a/include/linux/dmar.h
+++ b/include/linux/dmar.h
@@ -208,16 +208,9 @@ struct dmar_atsr_unit {
208 u8 include_all:1; /* include all ports */ 208 u8 include_all:1; /* include all ports */
209}; 209};
210 210
211/* Intel DMAR initialization functions */
212extern int intel_iommu_init(void); 211extern int intel_iommu_init(void);
213#else 212#else /* !CONFIG_DMAR: */
214static inline int intel_iommu_init(void) 213static inline int intel_iommu_init(void) { return -ENODEV; }
215{ 214#endif /* CONFIG_DMAR */
216#ifdef CONFIG_INTR_REMAP 215
217 return dmar_dev_scope_init();
218#else
219 return -ENODEV;
220#endif
221}
222#endif /* !CONFIG_DMAR */
223#endif /* __DMAR_H__ */ 216#endif /* __DMAR_H__ */
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 73b1f1cec423..febedcf67c7e 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -7,6 +7,8 @@ struct device;
7struct dma_attrs; 7struct dma_attrs;
8struct scatterlist; 8struct scatterlist;
9 9
10extern int swiotlb_force;
11
10/* 12/*
11 * Maximum allowable number of contiguous slabs to map, 13 * Maximum allowable number of contiguous slabs to map,
12 * must be a power of 2. What is the appropriate value ? 14 * must be a power of 2. What is the appropriate value ?
@@ -20,8 +22,7 @@ struct scatterlist;
20 */ 22 */
21#define IO_TLB_SHIFT 11 23#define IO_TLB_SHIFT 11
22 24
23extern void 25extern void swiotlb_init(int verbose);
24swiotlb_init(void);
25 26
26extern void 27extern void
27*swiotlb_alloc_coherent(struct device *hwdev, size_t size, 28*swiotlb_alloc_coherent(struct device *hwdev, size_t size,
@@ -88,4 +89,11 @@ swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr);
88extern int 89extern int
89swiotlb_dma_supported(struct device *hwdev, u64 mask); 90swiotlb_dma_supported(struct device *hwdev, u64 mask);
90 91
92#ifdef CONFIG_SWIOTLB
93extern void __init swiotlb_free(void);
94#else
95static inline void swiotlb_free(void) { }
96#endif
97
98extern void swiotlb_print_info(void);
91#endif /* __LINUX_SWIOTLB_H */ 99#endif /* __LINUX_SWIOTLB_H */
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index ac25cd28e807..795472d8ae24 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -97,6 +97,8 @@ static phys_addr_t *io_tlb_orig_addr;
97 */ 97 */
98static DEFINE_SPINLOCK(io_tlb_lock); 98static DEFINE_SPINLOCK(io_tlb_lock);
99 99
100static int late_alloc;
101
100static int __init 102static int __init
101setup_io_tlb_npages(char *str) 103setup_io_tlb_npages(char *str)
102{ 104{
@@ -109,6 +111,7 @@ setup_io_tlb_npages(char *str)
109 ++str; 111 ++str;
110 if (!strcmp(str, "force")) 112 if (!strcmp(str, "force"))
111 swiotlb_force = 1; 113 swiotlb_force = 1;
114
112 return 1; 115 return 1;
113} 116}
114__setup("swiotlb=", setup_io_tlb_npages); 117__setup("swiotlb=", setup_io_tlb_npages);
@@ -121,8 +124,9 @@ static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev,
121 return phys_to_dma(hwdev, virt_to_phys(address)); 124 return phys_to_dma(hwdev, virt_to_phys(address));
122} 125}
123 126
124static void swiotlb_print_info(unsigned long bytes) 127void swiotlb_print_info(void)
125{ 128{
129 unsigned long bytes = io_tlb_nslabs << IO_TLB_SHIFT;
126 phys_addr_t pstart, pend; 130 phys_addr_t pstart, pend;
127 131
128 pstart = virt_to_phys(io_tlb_start); 132 pstart = virt_to_phys(io_tlb_start);
@@ -140,7 +144,7 @@ static void swiotlb_print_info(unsigned long bytes)
140 * structures for the software IO TLB used to implement the DMA API. 144 * structures for the software IO TLB used to implement the DMA API.
141 */ 145 */
142void __init 146void __init
143swiotlb_init_with_default_size(size_t default_size) 147swiotlb_init_with_default_size(size_t default_size, int verbose)
144{ 148{
145 unsigned long i, bytes; 149 unsigned long i, bytes;
146 150
@@ -176,14 +180,14 @@ swiotlb_init_with_default_size(size_t default_size)
176 io_tlb_overflow_buffer = alloc_bootmem_low(io_tlb_overflow); 180 io_tlb_overflow_buffer = alloc_bootmem_low(io_tlb_overflow);
177 if (!io_tlb_overflow_buffer) 181 if (!io_tlb_overflow_buffer)
178 panic("Cannot allocate SWIOTLB overflow buffer!\n"); 182 panic("Cannot allocate SWIOTLB overflow buffer!\n");
179 183 if (verbose)
180 swiotlb_print_info(bytes); 184 swiotlb_print_info();
181} 185}
182 186
183void __init 187void __init
184swiotlb_init(void) 188swiotlb_init(int verbose)
185{ 189{
186 swiotlb_init_with_default_size(64 * (1<<20)); /* default to 64MB */ 190 swiotlb_init_with_default_size(64 * (1<<20), verbose); /* default to 64MB */
187} 191}
188 192
189/* 193/*
@@ -260,7 +264,9 @@ swiotlb_late_init_with_default_size(size_t default_size)
260 if (!io_tlb_overflow_buffer) 264 if (!io_tlb_overflow_buffer)
261 goto cleanup4; 265 goto cleanup4;
262 266
263 swiotlb_print_info(bytes); 267 swiotlb_print_info();
268
269 late_alloc = 1;
264 270
265 return 0; 271 return 0;
266 272
@@ -281,6 +287,32 @@ cleanup1:
281 return -ENOMEM; 287 return -ENOMEM;
282} 288}
283 289
290void __init swiotlb_free(void)
291{
292 if (!io_tlb_overflow_buffer)
293 return;
294
295 if (late_alloc) {
296 free_pages((unsigned long)io_tlb_overflow_buffer,
297 get_order(io_tlb_overflow));
298 free_pages((unsigned long)io_tlb_orig_addr,
299 get_order(io_tlb_nslabs * sizeof(phys_addr_t)));
300 free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs *
301 sizeof(int)));
302 free_pages((unsigned long)io_tlb_start,
303 get_order(io_tlb_nslabs << IO_TLB_SHIFT));
304 } else {
305 free_bootmem_late(__pa(io_tlb_overflow_buffer),
306 io_tlb_overflow);
307 free_bootmem_late(__pa(io_tlb_orig_addr),
308 io_tlb_nslabs * sizeof(phys_addr_t));
309 free_bootmem_late(__pa(io_tlb_list),
310 io_tlb_nslabs * sizeof(int));
311 free_bootmem_late(__pa(io_tlb_start),
312 io_tlb_nslabs << IO_TLB_SHIFT);
313 }
314}
315
284static int is_swiotlb_buffer(phys_addr_t paddr) 316static int is_swiotlb_buffer(phys_addr_t paddr)
285{ 317{
286 return paddr >= virt_to_phys(io_tlb_start) && 318 return paddr >= virt_to_phys(io_tlb_start) &&
diff --git a/mm/bootmem.c b/mm/bootmem.c
index 555d5d2731c6..d1dc23cc7f10 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -143,6 +143,30 @@ unsigned long __init init_bootmem(unsigned long start, unsigned long pages)
143 return init_bootmem_core(NODE_DATA(0)->bdata, start, 0, pages); 143 return init_bootmem_core(NODE_DATA(0)->bdata, start, 0, pages);
144} 144}
145 145
146/*
147 * free_bootmem_late - free bootmem pages directly to page allocator
148 * @addr: starting address of the range
149 * @size: size of the range in bytes
150 *
151 * This is only useful when the bootmem allocator has already been torn
152 * down, but we are still initializing the system. Pages are given directly
153 * to the page allocator, no bootmem metadata is updated because it is gone.
154 */
155void __init free_bootmem_late(unsigned long addr, unsigned long size)
156{
157 unsigned long cursor, end;
158
159 kmemleak_free_part(__va(addr), size);
160
161 cursor = PFN_UP(addr);
162 end = PFN_DOWN(addr + size);
163
164 for (; cursor < end; cursor++) {
165 __free_pages_bootmem(pfn_to_page(cursor), 0);
166 totalram_pages++;
167 }
168}
169
146static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata) 170static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
147{ 171{
148 int aligned; 172 int aligned;