diff options
-rw-r--r-- | arch/ia64/Kconfig | 4 | ||||
-rw-r--r-- | arch/ia64/hp/common/hwsw_iommu.c | 13 | ||||
-rw-r--r-- | arch/ia64/hp/common/sba_iommu.c | 45 | ||||
-rw-r--r-- | arch/ia64/lib/Makefile | 2 | ||||
-rw-r--r-- | arch/x86_64/kernel/Makefile | 2 | ||||
-rw-r--r-- | include/asm-ia64/machvec_hpzx1.h | 21 | ||||
-rw-r--r-- | include/asm-ia64/machvec_hpzx1_swiotlb.h | 3 | ||||
-rw-r--r-- | include/asm-x86_64/dma-mapping.h | 31 | ||||
-rw-r--r-- | include/asm-x86_64/swiotlb.h | 8 | ||||
-rw-r--r-- | lib/Makefile | 2 | ||||
-rw-r--r-- | lib/swiotlb.c (renamed from arch/ia64/lib/swiotlb.c) | 244 |
11 files changed, 295 insertions, 80 deletions
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 945c15a0722b..8f699a2e7981 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig | |||
@@ -26,6 +26,10 @@ config MMU | |||
26 | bool | 26 | bool |
27 | default y | 27 | default y |
28 | 28 | ||
29 | config SWIOTLB | ||
30 | bool | ||
31 | default y | ||
32 | |||
29 | config RWSEM_XCHGADD_ALGORITHM | 33 | config RWSEM_XCHGADD_ALGORITHM |
30 | bool | 34 | bool |
31 | default y | 35 | default y |
diff --git a/arch/ia64/hp/common/hwsw_iommu.c b/arch/ia64/hp/common/hwsw_iommu.c index 80f8ef013939..317c334c5a18 100644 --- a/arch/ia64/hp/common/hwsw_iommu.c +++ b/arch/ia64/hp/common/hwsw_iommu.c | |||
@@ -17,7 +17,7 @@ | |||
17 | #include <asm/machvec.h> | 17 | #include <asm/machvec.h> |
18 | 18 | ||
19 | /* swiotlb declarations & definitions: */ | 19 | /* swiotlb declarations & definitions: */ |
20 | extern void swiotlb_init_with_default_size (size_t size); | 20 | extern int swiotlb_late_init_with_default_size (size_t size); |
21 | extern ia64_mv_dma_alloc_coherent swiotlb_alloc_coherent; | 21 | extern ia64_mv_dma_alloc_coherent swiotlb_alloc_coherent; |
22 | extern ia64_mv_dma_free_coherent swiotlb_free_coherent; | 22 | extern ia64_mv_dma_free_coherent swiotlb_free_coherent; |
23 | extern ia64_mv_dma_map_single swiotlb_map_single; | 23 | extern ia64_mv_dma_map_single swiotlb_map_single; |
@@ -67,7 +67,16 @@ void | |||
67 | hwsw_init (void) | 67 | hwsw_init (void) |
68 | { | 68 | { |
69 | /* default to a smallish 2MB sw I/O TLB */ | 69 | /* default to a smallish 2MB sw I/O TLB */ |
70 | swiotlb_init_with_default_size (2 * (1<<20)); | 70 | if (swiotlb_late_init_with_default_size (2 * (1<<20)) != 0) { |
71 | #ifdef CONFIG_IA64_GENERIC | ||
72 | /* Better to have normal DMA than panic */ | ||
73 | printk(KERN_WARNING "%s: Failed to initialize software I/O TLB," | ||
74 | " reverting to hpzx1 platform vector\n", __FUNCTION__); | ||
75 | machvec_init("hpzx1"); | ||
76 | #else | ||
77 | panic("Unable to initialize software I/O TLB services"); | ||
78 | #endif | ||
79 | } | ||
71 | } | 80 | } |
72 | 81 | ||
73 | void * | 82 | void * |
diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c index 11957598a8b9..e64ca04ace89 100644 --- a/arch/ia64/hp/common/sba_iommu.c +++ b/arch/ia64/hp/common/sba_iommu.c | |||
@@ -2028,9 +2028,40 @@ static struct acpi_driver acpi_sba_ioc_driver = { | |||
2028 | static int __init | 2028 | static int __init |
2029 | sba_init(void) | 2029 | sba_init(void) |
2030 | { | 2030 | { |
2031 | if (!ia64_platform_is("hpzx1") && !ia64_platform_is("hpzx1_swiotlb")) | ||
2032 | return 0; | ||
2033 | |||
2031 | acpi_bus_register_driver(&acpi_sba_ioc_driver); | 2034 | acpi_bus_register_driver(&acpi_sba_ioc_driver); |
2032 | if (!ioc_list) | 2035 | if (!ioc_list) { |
2036 | #ifdef CONFIG_IA64_GENERIC | ||
2037 | extern int swiotlb_late_init_with_default_size (size_t size); | ||
2038 | |||
2039 | /* | ||
2040 | * If we didn't find something sba_iommu can claim, we | ||
2041 | * need to setup the swiotlb and switch to the dig machvec. | ||
2042 | */ | ||
2043 | if (swiotlb_late_init_with_default_size(64 * (1<<20)) != 0) | ||
2044 | panic("Unable to find SBA IOMMU or initialize " | ||
2045 | "software I/O TLB: Try machvec=dig boot option"); | ||
2046 | machvec_init("dig"); | ||
2047 | #else | ||
2048 | panic("Unable to find SBA IOMMU: Try a generic or DIG kernel"); | ||
2049 | #endif | ||
2033 | return 0; | 2050 | return 0; |
2051 | } | ||
2052 | |||
2053 | #if defined(CONFIG_IA64_GENERIC) || defined(CONFIG_IA64_HP_ZX1_SWIOTLB) | ||
2054 | /* | ||
2055 | * hpzx1_swiotlb needs to have a fairly small swiotlb bounce | ||
2056 | * buffer setup to support devices with smaller DMA masks than | ||
2057 | * sba_iommu can handle. | ||
2058 | */ | ||
2059 | if (ia64_platform_is("hpzx1_swiotlb")) { | ||
2060 | extern void hwsw_init(void); | ||
2061 | |||
2062 | hwsw_init(); | ||
2063 | } | ||
2064 | #endif | ||
2034 | 2065 | ||
2035 | #ifdef CONFIG_PCI | 2066 | #ifdef CONFIG_PCI |
2036 | { | 2067 | { |
@@ -2048,18 +2079,6 @@ sba_init(void) | |||
2048 | 2079 | ||
2049 | subsys_initcall(sba_init); /* must be initialized after ACPI etc., but before any drivers... */ | 2080 | subsys_initcall(sba_init); /* must be initialized after ACPI etc., but before any drivers... */ |
2050 | 2081 | ||
2051 | extern void dig_setup(char**); | ||
2052 | /* | ||
2053 | * MAX_DMA_ADDRESS needs to be setup prior to paging_init to do any good, | ||
2054 | * so we use the platform_setup hook to fix it up. | ||
2055 | */ | ||
2056 | void __init | ||
2057 | sba_setup(char **cmdline_p) | ||
2058 | { | ||
2059 | MAX_DMA_ADDRESS = ~0UL; | ||
2060 | dig_setup(cmdline_p); | ||
2061 | } | ||
2062 | |||
2063 | static int __init | 2082 | static int __init |
2064 | nosbagart(char *str) | 2083 | nosbagart(char *str) |
2065 | { | 2084 | { |
diff --git a/arch/ia64/lib/Makefile b/arch/ia64/lib/Makefile index cb1af597370b..ac64664a1807 100644 --- a/arch/ia64/lib/Makefile +++ b/arch/ia64/lib/Makefile | |||
@@ -9,7 +9,7 @@ lib-y := __divsi3.o __udivsi3.o __modsi3.o __umodsi3.o \ | |||
9 | bitop.o checksum.o clear_page.o csum_partial_copy.o \ | 9 | bitop.o checksum.o clear_page.o csum_partial_copy.o \ |
10 | clear_user.o strncpy_from_user.o strlen_user.o strnlen_user.o \ | 10 | clear_user.o strncpy_from_user.o strlen_user.o strnlen_user.o \ |
11 | flush.o ip_fast_csum.o do_csum.o \ | 11 | flush.o ip_fast_csum.o do_csum.o \ |
12 | memset.o strlen.o swiotlb.o | 12 | memset.o strlen.o |
13 | 13 | ||
14 | lib-$(CONFIG_ITANIUM) += copy_page.o copy_user.o memcpy.o | 14 | lib-$(CONFIG_ITANIUM) += copy_page.o copy_user.o memcpy.o |
15 | lib-$(CONFIG_MCKINLEY) += copy_page_mck.o memcpy_mck.o | 15 | lib-$(CONFIG_MCKINLEY) += copy_page_mck.o memcpy_mck.o |
diff --git a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile index bcdd0a805fe7..14328cab5d3a 100644 --- a/arch/x86_64/kernel/Makefile +++ b/arch/x86_64/kernel/Makefile | |||
@@ -27,7 +27,6 @@ obj-$(CONFIG_CPU_FREQ) += cpufreq/ | |||
27 | obj-$(CONFIG_EARLY_PRINTK) += early_printk.o | 27 | obj-$(CONFIG_EARLY_PRINTK) += early_printk.o |
28 | obj-$(CONFIG_GART_IOMMU) += pci-gart.o aperture.o | 28 | obj-$(CONFIG_GART_IOMMU) += pci-gart.o aperture.o |
29 | obj-$(CONFIG_DUMMY_IOMMU) += pci-nommu.o pci-dma.o | 29 | obj-$(CONFIG_DUMMY_IOMMU) += pci-nommu.o pci-dma.o |
30 | obj-$(CONFIG_SWIOTLB) += swiotlb.o | ||
31 | obj-$(CONFIG_KPROBES) += kprobes.o | 30 | obj-$(CONFIG_KPROBES) += kprobes.o |
32 | obj-$(CONFIG_X86_PM_TIMER) += pmtimer.o | 31 | obj-$(CONFIG_X86_PM_TIMER) += pmtimer.o |
33 | 32 | ||
@@ -41,7 +40,6 @@ CFLAGS_vsyscall.o := $(PROFILING) -g0 | |||
41 | bootflag-y += ../../i386/kernel/bootflag.o | 40 | bootflag-y += ../../i386/kernel/bootflag.o |
42 | cpuid-$(subst m,y,$(CONFIG_X86_CPUID)) += ../../i386/kernel/cpuid.o | 41 | cpuid-$(subst m,y,$(CONFIG_X86_CPUID)) += ../../i386/kernel/cpuid.o |
43 | topology-y += ../../i386/mach-default/topology.o | 42 | topology-y += ../../i386/mach-default/topology.o |
44 | swiotlb-$(CONFIG_SWIOTLB) += ../../ia64/lib/swiotlb.o | ||
45 | microcode-$(subst m,y,$(CONFIG_MICROCODE)) += ../../i386/kernel/microcode.o | 43 | microcode-$(subst m,y,$(CONFIG_MICROCODE)) += ../../i386/kernel/microcode.o |
46 | intel_cacheinfo-y += ../../i386/kernel/cpu/intel_cacheinfo.o | 44 | intel_cacheinfo-y += ../../i386/kernel/cpu/intel_cacheinfo.o |
47 | quirks-y += ../../i386/kernel/quirks.o | 45 | quirks-y += ../../i386/kernel/quirks.o |
diff --git a/include/asm-ia64/machvec_hpzx1.h b/include/asm-ia64/machvec_hpzx1.h index daafe504c5f4..e90daf9ce340 100644 --- a/include/asm-ia64/machvec_hpzx1.h +++ b/include/asm-ia64/machvec_hpzx1.h | |||
@@ -1,8 +1,7 @@ | |||
1 | #ifndef _ASM_IA64_MACHVEC_HPZX1_h | 1 | #ifndef _ASM_IA64_MACHVEC_HPZX1_h |
2 | #define _ASM_IA64_MACHVEC_HPZX1_h | 2 | #define _ASM_IA64_MACHVEC_HPZX1_h |
3 | 3 | ||
4 | extern ia64_mv_setup_t dig_setup; | 4 | extern ia64_mv_setup_t dig_setup; |
5 | extern ia64_mv_setup_t sba_setup; | ||
6 | extern ia64_mv_dma_alloc_coherent sba_alloc_coherent; | 5 | extern ia64_mv_dma_alloc_coherent sba_alloc_coherent; |
7 | extern ia64_mv_dma_free_coherent sba_free_coherent; | 6 | extern ia64_mv_dma_free_coherent sba_free_coherent; |
8 | extern ia64_mv_dma_map_single sba_map_single; | 7 | extern ia64_mv_dma_map_single sba_map_single; |
@@ -19,15 +18,15 @@ extern ia64_mv_dma_mapping_error sba_dma_mapping_error; | |||
19 | * platform's machvec structure. When compiling a non-generic kernel, | 18 | * platform's machvec structure. When compiling a non-generic kernel, |
20 | * the macros are used directly. | 19 | * the macros are used directly. |
21 | */ | 20 | */ |
22 | #define platform_name "hpzx1" | 21 | #define platform_name "hpzx1" |
23 | #define platform_setup sba_setup | 22 | #define platform_setup dig_setup |
24 | #define platform_dma_init machvec_noop | 23 | #define platform_dma_init machvec_noop |
25 | #define platform_dma_alloc_coherent sba_alloc_coherent | 24 | #define platform_dma_alloc_coherent sba_alloc_coherent |
26 | #define platform_dma_free_coherent sba_free_coherent | 25 | #define platform_dma_free_coherent sba_free_coherent |
27 | #define platform_dma_map_single sba_map_single | 26 | #define platform_dma_map_single sba_map_single |
28 | #define platform_dma_unmap_single sba_unmap_single | 27 | #define platform_dma_unmap_single sba_unmap_single |
29 | #define platform_dma_map_sg sba_map_sg | 28 | #define platform_dma_map_sg sba_map_sg |
30 | #define platform_dma_unmap_sg sba_unmap_sg | 29 | #define platform_dma_unmap_sg sba_unmap_sg |
31 | #define platform_dma_sync_single_for_cpu machvec_dma_sync_single | 30 | #define platform_dma_sync_single_for_cpu machvec_dma_sync_single |
32 | #define platform_dma_sync_sg_for_cpu machvec_dma_sync_sg | 31 | #define platform_dma_sync_sg_for_cpu machvec_dma_sync_sg |
33 | #define platform_dma_sync_single_for_device machvec_dma_sync_single | 32 | #define platform_dma_sync_single_for_device machvec_dma_sync_single |
diff --git a/include/asm-ia64/machvec_hpzx1_swiotlb.h b/include/asm-ia64/machvec_hpzx1_swiotlb.h index 9924b1b00a6c..f00a34a148ff 100644 --- a/include/asm-ia64/machvec_hpzx1_swiotlb.h +++ b/include/asm-ia64/machvec_hpzx1_swiotlb.h | |||
@@ -2,7 +2,6 @@ | |||
2 | #define _ASM_IA64_MACHVEC_HPZX1_SWIOTLB_h | 2 | #define _ASM_IA64_MACHVEC_HPZX1_SWIOTLB_h |
3 | 3 | ||
4 | extern ia64_mv_setup_t dig_setup; | 4 | extern ia64_mv_setup_t dig_setup; |
5 | extern ia64_mv_dma_init hwsw_init; | ||
6 | extern ia64_mv_dma_alloc_coherent hwsw_alloc_coherent; | 5 | extern ia64_mv_dma_alloc_coherent hwsw_alloc_coherent; |
7 | extern ia64_mv_dma_free_coherent hwsw_free_coherent; | 6 | extern ia64_mv_dma_free_coherent hwsw_free_coherent; |
8 | extern ia64_mv_dma_map_single hwsw_map_single; | 7 | extern ia64_mv_dma_map_single hwsw_map_single; |
@@ -26,7 +25,7 @@ extern ia64_mv_dma_sync_sg_for_device hwsw_sync_sg_for_device; | |||
26 | #define platform_name "hpzx1_swiotlb" | 25 | #define platform_name "hpzx1_swiotlb" |
27 | 26 | ||
28 | #define platform_setup dig_setup | 27 | #define platform_setup dig_setup |
29 | #define platform_dma_init hwsw_init | 28 | #define platform_dma_init machvec_noop |
30 | #define platform_dma_alloc_coherent hwsw_alloc_coherent | 29 | #define platform_dma_alloc_coherent hwsw_alloc_coherent |
31 | #define platform_dma_free_coherent hwsw_free_coherent | 30 | #define platform_dma_free_coherent hwsw_free_coherent |
32 | #define platform_dma_map_single hwsw_map_single | 31 | #define platform_dma_map_single hwsw_map_single |
diff --git a/include/asm-x86_64/dma-mapping.h b/include/asm-x86_64/dma-mapping.h index e784fdc524f1..45ca88c875c0 100644 --- a/include/asm-x86_64/dma-mapping.h +++ b/include/asm-x86_64/dma-mapping.h | |||
@@ -85,10 +85,33 @@ static inline void dma_sync_single_for_device(struct device *hwdev, | |||
85 | flush_write_buffers(); | 85 | flush_write_buffers(); |
86 | } | 86 | } |
87 | 87 | ||
88 | #define dma_sync_single_range_for_cpu(dev, dma_handle, offset, size, dir) \ | 88 | static inline void dma_sync_single_range_for_cpu(struct device *hwdev, |
89 | dma_sync_single_for_cpu(dev, dma_handle, size, dir) | 89 | dma_addr_t dma_handle, |
90 | #define dma_sync_single_range_for_device(dev, dma_handle, offset, size, dir) \ | 90 | unsigned long offset, |
91 | dma_sync_single_for_device(dev, dma_handle, size, dir) | 91 | size_t size, int direction) |
92 | { | ||
93 | if (direction == DMA_NONE) | ||
94 | out_of_line_bug(); | ||
95 | |||
96 | if (swiotlb) | ||
97 | return swiotlb_sync_single_range_for_cpu(hwdev,dma_handle,offset,size,direction); | ||
98 | |||
99 | flush_write_buffers(); | ||
100 | } | ||
101 | |||
102 | static inline void dma_sync_single_range_for_device(struct device *hwdev, | ||
103 | dma_addr_t dma_handle, | ||
104 | unsigned long offset, | ||
105 | size_t size, int direction) | ||
106 | { | ||
107 | if (direction == DMA_NONE) | ||
108 | out_of_line_bug(); | ||
109 | |||
110 | if (swiotlb) | ||
111 | return swiotlb_sync_single_range_for_device(hwdev,dma_handle,offset,size,direction); | ||
112 | |||
113 | flush_write_buffers(); | ||
114 | } | ||
92 | 115 | ||
93 | static inline void dma_sync_sg_for_cpu(struct device *hwdev, | 116 | static inline void dma_sync_sg_for_cpu(struct device *hwdev, |
94 | struct scatterlist *sg, | 117 | struct scatterlist *sg, |
diff --git a/include/asm-x86_64/swiotlb.h b/include/asm-x86_64/swiotlb.h index 36293061f4ed..9b011dd8d19d 100644 --- a/include/asm-x86_64/swiotlb.h +++ b/include/asm-x86_64/swiotlb.h | |||
@@ -15,6 +15,14 @@ extern void swiotlb_sync_single_for_cpu(struct device *hwdev, | |||
15 | extern void swiotlb_sync_single_for_device(struct device *hwdev, | 15 | extern void swiotlb_sync_single_for_device(struct device *hwdev, |
16 | dma_addr_t dev_addr, | 16 | dma_addr_t dev_addr, |
17 | size_t size, int dir); | 17 | size_t size, int dir); |
18 | extern void swiotlb_sync_single_range_for_cpu(struct device *hwdev, | ||
19 | dma_addr_t dev_addr, | ||
20 | unsigned long offset, | ||
21 | size_t size, int dir); | ||
22 | extern void swiotlb_sync_single_range_for_device(struct device *hwdev, | ||
23 | dma_addr_t dev_addr, | ||
24 | unsigned long offset, | ||
25 | size_t size, int dir); | ||
18 | extern void swiotlb_sync_sg_for_cpu(struct device *hwdev, | 26 | extern void swiotlb_sync_sg_for_cpu(struct device *hwdev, |
19 | struct scatterlist *sg, int nelems, | 27 | struct scatterlist *sg, int nelems, |
20 | int dir); | 28 | int dir); |
diff --git a/lib/Makefile b/lib/Makefile index 44a46750690a..8535f4d7d1c3 100644 --- a/lib/Makefile +++ b/lib/Makefile | |||
@@ -44,6 +44,8 @@ obj-$(CONFIG_TEXTSEARCH_KMP) += ts_kmp.o | |||
44 | obj-$(CONFIG_TEXTSEARCH_BM) += ts_bm.o | 44 | obj-$(CONFIG_TEXTSEARCH_BM) += ts_bm.o |
45 | obj-$(CONFIG_TEXTSEARCH_FSM) += ts_fsm.o | 45 | obj-$(CONFIG_TEXTSEARCH_FSM) += ts_fsm.o |
46 | 46 | ||
47 | obj-$(CONFIG_SWIOTLB) += swiotlb.o | ||
48 | |||
47 | hostprogs-y := gen_crc32table | 49 | hostprogs-y := gen_crc32table |
48 | clean-files := crc32table.h | 50 | clean-files := crc32table.h |
49 | 51 | ||
diff --git a/arch/ia64/lib/swiotlb.c b/lib/swiotlb.c index a604efc7f6c9..5bdeaaea57fd 100644 --- a/arch/ia64/lib/swiotlb.c +++ b/lib/swiotlb.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * Dynamic DMA mapping support. | 2 | * Dynamic DMA mapping support. |
3 | * | 3 | * |
4 | * This implementation is for IA-64 platforms that do not support | 4 | * This implementation is for IA-64 and EM64T platforms that do not support |
5 | * I/O TLBs (aka DMA address translation hardware). | 5 | * I/O TLBs (aka DMA address translation hardware). |
6 | * Copyright (C) 2000 Asit Mallick <Asit.K.Mallick@intel.com> | 6 | * Copyright (C) 2000 Asit Mallick <Asit.K.Mallick@intel.com> |
7 | * Copyright (C) 2000 Goutham Rao <goutham.rao@intel.com> | 7 | * Copyright (C) 2000 Goutham Rao <goutham.rao@intel.com> |
@@ -11,21 +11,23 @@ | |||
11 | * 03/05/07 davidm Switch from PCI-DMA to generic device DMA API. | 11 | * 03/05/07 davidm Switch from PCI-DMA to generic device DMA API. |
12 | * 00/12/13 davidm Rename to swiotlb.c and add mark_clean() to avoid | 12 | * 00/12/13 davidm Rename to swiotlb.c and add mark_clean() to avoid |
13 | * unnecessary i-cache flushing. | 13 | * unnecessary i-cache flushing. |
14 | * 04/07/.. ak Better overflow handling. Assorted fixes. | 14 | * 04/07/.. ak Better overflow handling. Assorted fixes. |
15 | * 05/09/10 linville Add support for syncing ranges, support syncing for | ||
16 | * DMA_BIDIRECTIONAL mappings, miscellaneous cleanup. | ||
15 | */ | 17 | */ |
16 | 18 | ||
17 | #include <linux/cache.h> | 19 | #include <linux/cache.h> |
20 | #include <linux/dma-mapping.h> | ||
18 | #include <linux/mm.h> | 21 | #include <linux/mm.h> |
19 | #include <linux/module.h> | 22 | #include <linux/module.h> |
20 | #include <linux/pci.h> | ||
21 | #include <linux/spinlock.h> | 23 | #include <linux/spinlock.h> |
22 | #include <linux/string.h> | 24 | #include <linux/string.h> |
23 | #include <linux/types.h> | 25 | #include <linux/types.h> |
24 | #include <linux/ctype.h> | 26 | #include <linux/ctype.h> |
25 | 27 | ||
26 | #include <asm/io.h> | 28 | #include <asm/io.h> |
27 | #include <asm/pci.h> | ||
28 | #include <asm/dma.h> | 29 | #include <asm/dma.h> |
30 | #include <asm/scatterlist.h> | ||
29 | 31 | ||
30 | #include <linux/init.h> | 32 | #include <linux/init.h> |
31 | #include <linux/bootmem.h> | 33 | #include <linux/bootmem.h> |
@@ -49,6 +51,23 @@ | |||
49 | */ | 51 | */ |
50 | #define IO_TLB_SHIFT 11 | 52 | #define IO_TLB_SHIFT 11 |
51 | 53 | ||
54 | #define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT)) | ||
55 | |||
56 | /* | ||
57 | * Minimum IO TLB size to bother booting with. Systems with mainly | ||
58 | * 64bit capable cards will only lightly use the swiotlb. If we can't | ||
59 | * allocate a contiguous 1MB, we're probably in trouble anyway. | ||
60 | */ | ||
61 | #define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT) | ||
62 | |||
63 | /* | ||
64 | * Enumeration for sync targets | ||
65 | */ | ||
66 | enum dma_sync_target { | ||
67 | SYNC_FOR_CPU = 0, | ||
68 | SYNC_FOR_DEVICE = 1, | ||
69 | }; | ||
70 | |||
52 | int swiotlb_force; | 71 | int swiotlb_force; |
53 | 72 | ||
54 | /* | 73 | /* |
@@ -108,7 +127,7 @@ __setup("swiotlb=", setup_io_tlb_npages); | |||
108 | 127 | ||
109 | /* | 128 | /* |
110 | * Statically reserve bounce buffer space and initialize bounce buffer data | 129 | * Statically reserve bounce buffer space and initialize bounce buffer data |
111 | * structures for the software IO TLB used to implement the PCI DMA API. | 130 | * structures for the software IO TLB used to implement the DMA API. |
112 | */ | 131 | */ |
113 | void | 132 | void |
114 | swiotlb_init_with_default_size (size_t default_size) | 133 | swiotlb_init_with_default_size (size_t default_size) |
@@ -154,6 +173,99 @@ swiotlb_init (void) | |||
154 | swiotlb_init_with_default_size(64 * (1<<20)); /* default to 64MB */ | 173 | swiotlb_init_with_default_size(64 * (1<<20)); /* default to 64MB */ |
155 | } | 174 | } |
156 | 175 | ||
176 | /* | ||
177 | * Systems with larger DMA zones (those that don't support ISA) can | ||
178 | * initialize the swiotlb later using the slab allocator if needed. | ||
179 | * This should be just like above, but with some error catching. | ||
180 | */ | ||
181 | int | ||
182 | swiotlb_late_init_with_default_size (size_t default_size) | ||
183 | { | ||
184 | unsigned long i, req_nslabs = io_tlb_nslabs; | ||
185 | unsigned int order; | ||
186 | |||
187 | if (!io_tlb_nslabs) { | ||
188 | io_tlb_nslabs = (default_size >> IO_TLB_SHIFT); | ||
189 | io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); | ||
190 | } | ||
191 | |||
192 | /* | ||
193 | * Get IO TLB memory from the low pages | ||
194 | */ | ||
195 | order = get_order(io_tlb_nslabs * (1 << IO_TLB_SHIFT)); | ||
196 | io_tlb_nslabs = SLABS_PER_PAGE << order; | ||
197 | |||
198 | while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) { | ||
199 | io_tlb_start = (char *)__get_free_pages(GFP_DMA | __GFP_NOWARN, | ||
200 | order); | ||
201 | if (io_tlb_start) | ||
202 | break; | ||
203 | order--; | ||
204 | } | ||
205 | |||
206 | if (!io_tlb_start) | ||
207 | goto cleanup1; | ||
208 | |||
209 | if (order != get_order(io_tlb_nslabs * (1 << IO_TLB_SHIFT))) { | ||
210 | printk(KERN_WARNING "Warning: only able to allocate %ld MB " | ||
211 | "for software IO TLB\n", (PAGE_SIZE << order) >> 20); | ||
212 | io_tlb_nslabs = SLABS_PER_PAGE << order; | ||
213 | } | ||
214 | io_tlb_end = io_tlb_start + io_tlb_nslabs * (1 << IO_TLB_SHIFT); | ||
215 | memset(io_tlb_start, 0, io_tlb_nslabs * (1 << IO_TLB_SHIFT)); | ||
216 | |||
217 | /* | ||
218 | * Allocate and initialize the free list array. This array is used | ||
219 | * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE | ||
220 | * between io_tlb_start and io_tlb_end. | ||
221 | */ | ||
222 | io_tlb_list = (unsigned int *)__get_free_pages(GFP_KERNEL, | ||
223 | get_order(io_tlb_nslabs * sizeof(int))); | ||
224 | if (!io_tlb_list) | ||
225 | goto cleanup2; | ||
226 | |||
227 | for (i = 0; i < io_tlb_nslabs; i++) | ||
228 | io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE); | ||
229 | io_tlb_index = 0; | ||
230 | |||
231 | io_tlb_orig_addr = (unsigned char **)__get_free_pages(GFP_KERNEL, | ||
232 | get_order(io_tlb_nslabs * sizeof(char *))); | ||
233 | if (!io_tlb_orig_addr) | ||
234 | goto cleanup3; | ||
235 | |||
236 | memset(io_tlb_orig_addr, 0, io_tlb_nslabs * sizeof(char *)); | ||
237 | |||
238 | /* | ||
239 | * Get the overflow emergency buffer | ||
240 | */ | ||
241 | io_tlb_overflow_buffer = (void *)__get_free_pages(GFP_DMA, | ||
242 | get_order(io_tlb_overflow)); | ||
243 | if (!io_tlb_overflow_buffer) | ||
244 | goto cleanup4; | ||
245 | |||
246 | printk(KERN_INFO "Placing %ldMB software IO TLB between 0x%lx - " | ||
247 | "0x%lx\n", (io_tlb_nslabs * (1 << IO_TLB_SHIFT)) >> 20, | ||
248 | virt_to_phys(io_tlb_start), virt_to_phys(io_tlb_end)); | ||
249 | |||
250 | return 0; | ||
251 | |||
252 | cleanup4: | ||
253 | free_pages((unsigned long)io_tlb_orig_addr, get_order(io_tlb_nslabs * | ||
254 | sizeof(char *))); | ||
255 | io_tlb_orig_addr = NULL; | ||
256 | cleanup3: | ||
257 | free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs * | ||
258 | sizeof(int))); | ||
259 | io_tlb_list = NULL; | ||
260 | io_tlb_end = NULL; | ||
261 | cleanup2: | ||
262 | free_pages((unsigned long)io_tlb_start, order); | ||
263 | io_tlb_start = NULL; | ||
264 | cleanup1: | ||
265 | io_tlb_nslabs = req_nslabs; | ||
266 | return -ENOMEM; | ||
267 | } | ||
268 | |||
157 | static inline int | 269 | static inline int |
158 | address_needs_mapping(struct device *hwdev, dma_addr_t addr) | 270 | address_needs_mapping(struct device *hwdev, dma_addr_t addr) |
159 | { | 271 | { |
@@ -295,21 +407,28 @@ unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir) | |||
295 | } | 407 | } |
296 | 408 | ||
297 | static void | 409 | static void |
298 | sync_single(struct device *hwdev, char *dma_addr, size_t size, int dir) | 410 | sync_single(struct device *hwdev, char *dma_addr, size_t size, |
411 | int dir, int target) | ||
299 | { | 412 | { |
300 | int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT; | 413 | int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT; |
301 | char *buffer = io_tlb_orig_addr[index]; | 414 | char *buffer = io_tlb_orig_addr[index]; |
302 | 415 | ||
303 | /* | 416 | switch (target) { |
304 | * bounce... copy the data back into/from the original buffer | 417 | case SYNC_FOR_CPU: |
305 | * XXX How do you handle DMA_BIDIRECTIONAL here ? | 418 | if (likely(dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)) |
306 | */ | 419 | memcpy(buffer, dma_addr, size); |
307 | if (dir == DMA_FROM_DEVICE) | 420 | else if (dir != DMA_TO_DEVICE) |
308 | memcpy(buffer, dma_addr, size); | 421 | BUG(); |
309 | else if (dir == DMA_TO_DEVICE) | 422 | break; |
310 | memcpy(dma_addr, buffer, size); | 423 | case SYNC_FOR_DEVICE: |
311 | else | 424 | if (likely(dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)) |
425 | memcpy(dma_addr, buffer, size); | ||
426 | else if (dir != DMA_FROM_DEVICE) | ||
427 | BUG(); | ||
428 | break; | ||
429 | default: | ||
312 | BUG(); | 430 | BUG(); |
431 | } | ||
313 | } | 432 | } |
314 | 433 | ||
315 | void * | 434 | void * |
@@ -383,24 +502,24 @@ swiotlb_full(struct device *dev, size_t size, int dir, int do_panic) | |||
383 | /* | 502 | /* |
384 | * Ran out of IOMMU space for this operation. This is very bad. | 503 | * Ran out of IOMMU space for this operation. This is very bad. |
385 | * Unfortunately the drivers cannot handle this operation properly. | 504 | * Unfortunately the drivers cannot handle this operation properly. |
386 | * unless they check for pci_dma_mapping_error (most don't) | 505 | * unless they check for dma_mapping_error (most don't) |
387 | * When the mapping is small enough return a static buffer to limit | 506 | * When the mapping is small enough return a static buffer to limit |
388 | * the damage, or panic when the transfer is too big. | 507 | * the damage, or panic when the transfer is too big. |
389 | */ | 508 | */ |
390 | printk(KERN_ERR "PCI-DMA: Out of SW-IOMMU space for %lu bytes at " | 509 | printk(KERN_ERR "DMA: Out of SW-IOMMU space for %lu bytes at " |
391 | "device %s\n", size, dev ? dev->bus_id : "?"); | 510 | "device %s\n", size, dev ? dev->bus_id : "?"); |
392 | 511 | ||
393 | if (size > io_tlb_overflow && do_panic) { | 512 | if (size > io_tlb_overflow && do_panic) { |
394 | if (dir == PCI_DMA_FROMDEVICE || dir == PCI_DMA_BIDIRECTIONAL) | 513 | if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) |
395 | panic("PCI-DMA: Memory would be corrupted\n"); | 514 | panic("DMA: Memory would be corrupted\n"); |
396 | if (dir == PCI_DMA_TODEVICE || dir == PCI_DMA_BIDIRECTIONAL) | 515 | if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) |
397 | panic("PCI-DMA: Random memory would be DMAed\n"); | 516 | panic("DMA: Random memory would be DMAed\n"); |
398 | } | 517 | } |
399 | } | 518 | } |
400 | 519 | ||
401 | /* | 520 | /* |
402 | * Map a single buffer of the indicated size for DMA in streaming mode. The | 521 | * Map a single buffer of the indicated size for DMA in streaming mode. The |
403 | * PCI address to use is returned. | 522 | * physical address to use is returned. |
404 | * | 523 | * |
405 | * Once the device is given the dma address, the device owns this memory until | 524 | * Once the device is given the dma address, the device owns this memory until |
406 | * either swiotlb_unmap_single or swiotlb_dma_sync_single is performed. | 525 | * either swiotlb_unmap_single or swiotlb_dma_sync_single is performed. |
@@ -487,39 +606,73 @@ swiotlb_unmap_single(struct device *hwdev, dma_addr_t dev_addr, size_t size, | |||
487 | * after a transfer. | 606 | * after a transfer. |
488 | * | 607 | * |
489 | * If you perform a swiotlb_map_single() but wish to interrogate the buffer | 608 | * If you perform a swiotlb_map_single() but wish to interrogate the buffer |
490 | * using the cpu, yet do not wish to teardown the PCI dma mapping, you must | 609 | * using the cpu, yet do not wish to teardown the dma mapping, you must |
491 | * call this function before doing so. At the next point you give the PCI dma | 610 | * call this function before doing so. At the next point you give the dma |
492 | * address back to the card, you must first perform a | 611 | * address back to the card, you must first perform a |
493 | * swiotlb_dma_sync_for_device, and then the device again owns the buffer | 612 | * swiotlb_dma_sync_for_device, and then the device again owns the buffer |
494 | */ | 613 | */ |
495 | void | 614 | static inline void |
496 | swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr, | 615 | swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr, |
497 | size_t size, int dir) | 616 | size_t size, int dir, int target) |
498 | { | 617 | { |
499 | char *dma_addr = phys_to_virt(dev_addr); | 618 | char *dma_addr = phys_to_virt(dev_addr); |
500 | 619 | ||
501 | if (dir == DMA_NONE) | 620 | if (dir == DMA_NONE) |
502 | BUG(); | 621 | BUG(); |
503 | if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end) | 622 | if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end) |
504 | sync_single(hwdev, dma_addr, size, dir); | 623 | sync_single(hwdev, dma_addr, size, dir, target); |
505 | else if (dir == DMA_FROM_DEVICE) | 624 | else if (dir == DMA_FROM_DEVICE) |
506 | mark_clean(dma_addr, size); | 625 | mark_clean(dma_addr, size); |
507 | } | 626 | } |
508 | 627 | ||
509 | void | 628 | void |
629 | swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr, | ||
630 | size_t size, int dir) | ||
631 | { | ||
632 | swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_CPU); | ||
633 | } | ||
634 | |||
635 | void | ||
510 | swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr, | 636 | swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr, |
511 | size_t size, int dir) | 637 | size_t size, int dir) |
512 | { | 638 | { |
513 | char *dma_addr = phys_to_virt(dev_addr); | 639 | swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_DEVICE); |
640 | } | ||
641 | |||
642 | /* | ||
643 | * Same as above, but for a sub-range of the mapping. | ||
644 | */ | ||
645 | static inline void | ||
646 | swiotlb_sync_single_range(struct device *hwdev, dma_addr_t dev_addr, | ||
647 | unsigned long offset, size_t size, | ||
648 | int dir, int target) | ||
649 | { | ||
650 | char *dma_addr = phys_to_virt(dev_addr) + offset; | ||
514 | 651 | ||
515 | if (dir == DMA_NONE) | 652 | if (dir == DMA_NONE) |
516 | BUG(); | 653 | BUG(); |
517 | if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end) | 654 | if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end) |
518 | sync_single(hwdev, dma_addr, size, dir); | 655 | sync_single(hwdev, dma_addr, size, dir, target); |
519 | else if (dir == DMA_FROM_DEVICE) | 656 | else if (dir == DMA_FROM_DEVICE) |
520 | mark_clean(dma_addr, size); | 657 | mark_clean(dma_addr, size); |
521 | } | 658 | } |
522 | 659 | ||
660 | void | ||
661 | swiotlb_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dev_addr, | ||
662 | unsigned long offset, size_t size, int dir) | ||
663 | { | ||
664 | swiotlb_sync_single_range(hwdev, dev_addr, offset, size, dir, | ||
665 | SYNC_FOR_CPU); | ||
666 | } | ||
667 | |||
668 | void | ||
669 | swiotlb_sync_single_range_for_device(struct device *hwdev, dma_addr_t dev_addr, | ||
670 | unsigned long offset, size_t size, int dir) | ||
671 | { | ||
672 | swiotlb_sync_single_range(hwdev, dev_addr, offset, size, dir, | ||
673 | SYNC_FOR_DEVICE); | ||
674 | } | ||
675 | |||
523 | /* | 676 | /* |
524 | * Map a set of buffers described by scatterlist in streaming mode for DMA. | 677 | * Map a set of buffers described by scatterlist in streaming mode for DMA. |
525 | * This is the scatter-gather version of the above swiotlb_map_single | 678 | * This is the scatter-gather version of the above swiotlb_map_single |
@@ -594,9 +747,9 @@ swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nelems, | |||
594 | * The same as swiotlb_sync_single_* but for a scatter-gather list, same rules | 747 | * The same as swiotlb_sync_single_* but for a scatter-gather list, same rules |
595 | * and usage. | 748 | * and usage. |
596 | */ | 749 | */ |
597 | void | 750 | static inline void |
598 | swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg, | 751 | swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sg, |
599 | int nelems, int dir) | 752 | int nelems, int dir, int target) |
600 | { | 753 | { |
601 | int i; | 754 | int i; |
602 | 755 | ||
@@ -606,22 +759,21 @@ swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg, | |||
606 | for (i = 0; i < nelems; i++, sg++) | 759 | for (i = 0; i < nelems; i++, sg++) |
607 | if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg)) | 760 | if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg)) |
608 | sync_single(hwdev, (void *) sg->dma_address, | 761 | sync_single(hwdev, (void *) sg->dma_address, |
609 | sg->dma_length, dir); | 762 | sg->dma_length, dir, target); |
763 | } | ||
764 | |||
765 | void | ||
766 | swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg, | ||
767 | int nelems, int dir) | ||
768 | { | ||
769 | swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_CPU); | ||
610 | } | 770 | } |
611 | 771 | ||
612 | void | 772 | void |
613 | swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg, | 773 | swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg, |
614 | int nelems, int dir) | 774 | int nelems, int dir) |
615 | { | 775 | { |
616 | int i; | 776 | swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_DEVICE); |
617 | |||
618 | if (dir == DMA_NONE) | ||
619 | BUG(); | ||
620 | |||
621 | for (i = 0; i < nelems; i++, sg++) | ||
622 | if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg)) | ||
623 | sync_single(hwdev, (void *) sg->dma_address, | ||
624 | sg->dma_length, dir); | ||
625 | } | 777 | } |
626 | 778 | ||
627 | int | 779 | int |
@@ -631,9 +783,9 @@ swiotlb_dma_mapping_error(dma_addr_t dma_addr) | |||
631 | } | 783 | } |
632 | 784 | ||
633 | /* | 785 | /* |
634 | * Return whether the given PCI device DMA address mask can be supported | 786 | * Return whether the given device DMA address mask can be supported |
635 | * properly. For example, if your device can only drive the low 24-bits | 787 | * properly. For example, if your device can only drive the low 24-bits |
636 | * during PCI bus mastering, then you would pass 0x00ffffff as the mask to | 788 | * during bus mastering, then you would pass 0x00ffffff as the mask to |
637 | * this function. | 789 | * this function. |
638 | */ | 790 | */ |
639 | int | 791 | int |
@@ -649,6 +801,8 @@ EXPORT_SYMBOL(swiotlb_map_sg); | |||
649 | EXPORT_SYMBOL(swiotlb_unmap_sg); | 801 | EXPORT_SYMBOL(swiotlb_unmap_sg); |
650 | EXPORT_SYMBOL(swiotlb_sync_single_for_cpu); | 802 | EXPORT_SYMBOL(swiotlb_sync_single_for_cpu); |
651 | EXPORT_SYMBOL(swiotlb_sync_single_for_device); | 803 | EXPORT_SYMBOL(swiotlb_sync_single_for_device); |
804 | EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_cpu); | ||
805 | EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_device); | ||
652 | EXPORT_SYMBOL(swiotlb_sync_sg_for_cpu); | 806 | EXPORT_SYMBOL(swiotlb_sync_sg_for_cpu); |
653 | EXPORT_SYMBOL(swiotlb_sync_sg_for_device); | 807 | EXPORT_SYMBOL(swiotlb_sync_sg_for_device); |
654 | EXPORT_SYMBOL(swiotlb_dma_mapping_error); | 808 | EXPORT_SYMBOL(swiotlb_dma_mapping_error); |