aboutsummaryrefslogtreecommitdiffstats
path: root/arch/tile/include/asm
diff options
context:
space:
mode:
authorChris Metcalf <cmetcalf@tilera.com>2012-06-15 15:23:06 -0400
committerChris Metcalf <cmetcalf@tilera.com>2012-07-18 16:40:17 -0400
commit41bb38fc5398ae878c799647f3c4b25374029afb (patch)
tree5d7e01bd4176db1241b801f83cf92f32231b8e8b /arch/tile/include/asm
parenteef015c8aa74451f848307fe5f65485070533bbb (diff)
tile pci: enable IOMMU to support DMA for legacy devices
This change uses the TRIO IOMMU to map the PCI DMA space and physical memory at different addresses. We also now use the dma_mapping_ops to provide support for non-PCI DMA, PCIe DMA (64-bit) and legacy PCI DMA (32-bit). We use the kernel's software I/O TLB framework (i.e. bounce buffers) for the legacy 32-bit PCI device support since there are a limited number of TLB entries in the IOMMU and it is non-trivial to handle indexing, searching, matching, etc. For 32-bit devices the performance impact of bounce buffers should not be a concern. Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
Diffstat (limited to 'arch/tile/include/asm')
-rw-r--r--arch/tile/include/asm/Kbuild1
-rw-r--r--arch/tile/include/asm/device.h33
-rw-r--r--arch/tile/include/asm/dma-mapping.h146
-rw-r--r--arch/tile/include/asm/pci.h76
4 files changed, 198 insertions, 58 deletions
diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild
index 143473e3a0b..fb7c65ae8de 100644
--- a/arch/tile/include/asm/Kbuild
+++ b/arch/tile/include/asm/Kbuild
@@ -9,7 +9,6 @@ header-y += hardwall.h
9generic-y += bug.h 9generic-y += bug.h
10generic-y += bugs.h 10generic-y += bugs.h
11generic-y += cputime.h 11generic-y += cputime.h
12generic-y += device.h
13generic-y += div64.h 12generic-y += div64.h
14generic-y += emergency-restart.h 13generic-y += emergency-restart.h
15generic-y += errno.h 14generic-y += errno.h
diff --git a/arch/tile/include/asm/device.h b/arch/tile/include/asm/device.h
new file mode 100644
index 00000000000..5182705bd05
--- /dev/null
+++ b/arch/tile/include/asm/device.h
@@ -0,0 +1,33 @@
1/*
2 * Copyright 2010 Tilera Corporation. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation, version 2.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11 * NON INFRINGEMENT. See the GNU General Public License for
12 * more details.
13 * Arch specific extensions to struct device
14 */
15
16#ifndef _ASM_TILE_DEVICE_H
17#define _ASM_TILE_DEVICE_H
18
19struct dev_archdata {
20 /* DMA operations on that device */
21 struct dma_map_ops *dma_ops;
22
23 /* Offset of the DMA address from the PA. */
24 dma_addr_t dma_offset;
25
26 /* Highest DMA address that can be generated by this device. */
27 dma_addr_t max_direct_dma_addr;
28};
29
30struct pdev_archdata {
31};
32
33#endif /* _ASM_TILE_DEVICE_H */
diff --git a/arch/tile/include/asm/dma-mapping.h b/arch/tile/include/asm/dma-mapping.h
index eaa06d175b3..4b6247d1a31 100644
--- a/arch/tile/include/asm/dma-mapping.h
+++ b/arch/tile/include/asm/dma-mapping.h
@@ -20,69 +20,80 @@
20#include <linux/cache.h> 20#include <linux/cache.h>
21#include <linux/io.h> 21#include <linux/io.h>
22 22
23/* 23extern struct dma_map_ops *tile_dma_map_ops;
24 * Note that on x86 and powerpc, there is a "struct dma_mapping_ops" 24extern struct dma_map_ops *gx_pci_dma_map_ops;
25 * that is used for all the DMA operations. For now, we don't have an 25extern struct dma_map_ops *gx_legacy_pci_dma_map_ops;
26 * equivalent on tile, because we only have a single way of doing DMA. 26
27 * (Tilera bug 7994 to use dma_mapping_ops.) 27static inline struct dma_map_ops *get_dma_ops(struct device *dev)
28 */ 28{
29 if (dev && dev->archdata.dma_ops)
30 return dev->archdata.dma_ops;
31 else
32 return tile_dma_map_ops;
33}
34
35static inline dma_addr_t get_dma_offset(struct device *dev)
36{
37 return dev->archdata.dma_offset;
38}
39
40static inline void set_dma_offset(struct device *dev, dma_addr_t off)
41{
42 dev->archdata.dma_offset = off;
43}
29 44
30#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) 45static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
31#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) 46{
32 47 return paddr + get_dma_offset(dev);
33extern dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size, 48}
34 enum dma_data_direction); 49
35extern void dma_unmap_single(struct device *dev, dma_addr_t dma_addr, 50static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
36 size_t size, enum dma_data_direction); 51{
37extern int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, 52 return daddr - get_dma_offset(dev);
38 enum dma_data_direction); 53}
39extern void dma_unmap_sg(struct device *dev, struct scatterlist *sg, 54
40 int nhwentries, enum dma_data_direction); 55static inline void dma_mark_clean(void *addr, size_t size) {}
41extern dma_addr_t dma_map_page(struct device *dev, struct page *page, 56
42 unsigned long offset, size_t size, 57#include <asm-generic/dma-mapping-common.h>
43 enum dma_data_direction); 58
44extern void dma_unmap_page(struct device *dev, dma_addr_t dma_address, 59static inline void set_dma_ops(struct device *dev, struct dma_map_ops *ops)
45 size_t size, enum dma_data_direction); 60{
46extern void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, 61 dev->archdata.dma_ops = ops;
47 int nelems, enum dma_data_direction); 62}
48extern void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, 63
49 int nelems, enum dma_data_direction); 64static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
50 65{
51 66 if (!dev->dma_mask)
52void *dma_alloc_coherent(struct device *dev, size_t size, 67 return 0;
53 dma_addr_t *dma_handle, gfp_t flag); 68
54 69 return addr + size - 1 <= *dev->dma_mask;
55void dma_free_coherent(struct device *dev, size_t size, 70}
56 void *vaddr, dma_addr_t dma_handle);
57
58extern void dma_sync_single_for_cpu(struct device *, dma_addr_t, size_t,
59 enum dma_data_direction);
60extern void dma_sync_single_for_device(struct device *, dma_addr_t,
61 size_t, enum dma_data_direction);
62extern void dma_sync_single_range_for_cpu(struct device *, dma_addr_t,
63 unsigned long offset, size_t,
64 enum dma_data_direction);
65extern void dma_sync_single_range_for_device(struct device *, dma_addr_t,
66 unsigned long offset, size_t,
67 enum dma_data_direction);
68extern void dma_cache_sync(struct device *dev, void *vaddr, size_t,
69 enum dma_data_direction);
70 71
71static inline int 72static inline int
72dma_mapping_error(struct device *dev, dma_addr_t dma_addr) 73dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
73{ 74{
74 return 0; 75 return get_dma_ops(dev)->mapping_error(dev, dma_addr);
75} 76}
76 77
77static inline int 78static inline int
78dma_supported(struct device *dev, u64 mask) 79dma_supported(struct device *dev, u64 mask)
79{ 80{
80 return 1; 81 return get_dma_ops(dev)->dma_supported(dev, mask);
81} 82}
82 83
83static inline int 84static inline int
84dma_set_mask(struct device *dev, u64 mask) 85dma_set_mask(struct device *dev, u64 mask)
85{ 86{
87 struct dma_map_ops *dma_ops = get_dma_ops(dev);
88
89 /* Handle legacy PCI devices with limited memory addressability. */
90 if ((dma_ops == gx_pci_dma_map_ops) && (mask <= DMA_BIT_MASK(32))) {
91 set_dma_ops(dev, gx_legacy_pci_dma_map_ops);
92 set_dma_offset(dev, 0);
93 if (mask > dev->archdata.max_direct_dma_addr)
94 mask = dev->archdata.max_direct_dma_addr;
95 }
96
86 if (!dev->dma_mask || !dma_supported(dev, mask)) 97 if (!dev->dma_mask || !dma_supported(dev, mask))
87 return -EIO; 98 return -EIO;
88 99
@@ -91,4 +102,43 @@ dma_set_mask(struct device *dev, u64 mask)
91 return 0; 102 return 0;
92} 103}
93 104
105static inline void *dma_alloc_attrs(struct device *dev, size_t size,
106 dma_addr_t *dma_handle, gfp_t flag,
107 struct dma_attrs *attrs)
108{
109 struct dma_map_ops *dma_ops = get_dma_ops(dev);
110 void *cpu_addr;
111
112 cpu_addr = dma_ops->alloc(dev, size, dma_handle, flag, attrs);
113
114 debug_dma_alloc_coherent(dev, size, *dma_handle, cpu_addr);
115
116 return cpu_addr;
117}
118
119static inline void dma_free_attrs(struct device *dev, size_t size,
120 void *cpu_addr, dma_addr_t dma_handle,
121 struct dma_attrs *attrs)
122{
123 struct dma_map_ops *dma_ops = get_dma_ops(dev);
124
125 debug_dma_free_coherent(dev, size, cpu_addr, dma_handle);
126
127 dma_ops->free(dev, size, cpu_addr, dma_handle, attrs);
128}
129
130#define dma_alloc_coherent(d, s, h, f) dma_alloc_attrs(d, s, h, f, NULL)
131#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_attrs(d, s, h, f, NULL)
132#define dma_free_coherent(d, s, v, h) dma_free_attrs(d, s, v, h, NULL)
133#define dma_free_noncoherent(d, s, v, h) dma_free_attrs(d, s, v, h, NULL)
134
135/*
136 * dma_alloc_noncoherent() is #defined to return coherent memory,
137 * so there's no need to do any flushing here.
138 */
139static inline void dma_cache_sync(struct device *dev, void *vaddr, size_t size,
140 enum dma_data_direction direction)
141{
142}
143
94#endif /* _ASM_TILE_DMA_MAPPING_H */ 144#endif /* _ASM_TILE_DMA_MAPPING_H */
diff --git a/arch/tile/include/asm/pci.h b/arch/tile/include/asm/pci.h
index 2c224c47d8a..553b7ff018c 100644
--- a/arch/tile/include/asm/pci.h
+++ b/arch/tile/include/asm/pci.h
@@ -15,6 +15,7 @@
15#ifndef _ASM_TILE_PCI_H 15#ifndef _ASM_TILE_PCI_H
16#define _ASM_TILE_PCI_H 16#define _ASM_TILE_PCI_H
17 17
18#include <linux/dma-mapping.h>
18#include <linux/pci.h> 19#include <linux/pci.h>
19#include <linux/numa.h> 20#include <linux/numa.h>
20#include <asm-generic/pci_iomap.h> 21#include <asm-generic/pci_iomap.h>
@@ -53,6 +54,16 @@ static inline void pci_iounmap(struct pci_dev *dev, void __iomem *addr) {}
53 54
54#define TILE_NUM_PCIE 2 55#define TILE_NUM_PCIE 2
55 56
57/*
58 * The hypervisor maps the entirety of CPA-space as bus addresses, so
59 * bus addresses are physical addresses. The networking and block
60 * device layers use this boolean for bounce buffer decisions.
61 */
62#define PCI_DMA_BUS_IS_PHYS 1
63
64/* generic pci stuff */
65#include <asm-generic/pci.h>
66
56#else 67#else
57 68
58#include <asm/page.h> 69#include <asm/page.h>
@@ -85,7 +96,47 @@ static inline void pci_iounmap(struct pci_dev *dev, void __iomem *addr) {}
85/* 96/*
86 * Each Mem-Map interrupt region occupies 4KB. 97 * Each Mem-Map interrupt region occupies 4KB.
87 */ 98 */
88#define MEM_MAP_INTR_REGION_SIZE (1<< TRIO_MAP_MEM_LIM__ADDR_SHIFT) 99#define MEM_MAP_INTR_REGION_SIZE (1 << TRIO_MAP_MEM_LIM__ADDR_SHIFT)
100
101/*
102 * Allocate the PCI BAR window right below 4GB.
103 */
104#define TILE_PCI_BAR_WINDOW_TOP (1ULL << 32)
105
106/*
107 * Allocate 1GB for the PCI BAR window.
108 */
109#define TILE_PCI_BAR_WINDOW_SIZE (1 << 30)
110
111/*
112 * This is the highest bus address targeting the host memory that
113 * can be generated by legacy PCI devices with 32-bit or less
114 * DMA capability, dictated by the BAR window size and location.
115 */
116#define TILE_PCI_MAX_DIRECT_DMA_ADDRESS \
117 (TILE_PCI_BAR_WINDOW_TOP - TILE_PCI_BAR_WINDOW_SIZE - 1)
118
119/*
120 * We shift the PCI bus range for all the physical memory up by the whole PA
121 * range. The corresponding CPA of an incoming PCI request will be the PCI
122 * address minus TILE_PCI_MEM_MAP_BASE_OFFSET. This also implies
123 * that the 64-bit capable devices will be given DMA addresses as
124 * the CPA plus TILE_PCI_MEM_MAP_BASE_OFFSET. To support 32-bit
125 * devices, we create a separate map region that handles the low
126 * 4GB.
127 */
128#define TILE_PCI_MEM_MAP_BASE_OFFSET (1ULL << CHIP_PA_WIDTH())
129
130/*
131 * End of the PCI memory resource.
132 */
133#define TILE_PCI_MEM_END \
134 ((1ULL << CHIP_PA_WIDTH()) + TILE_PCI_BAR_WINDOW_TOP)
135
136/*
137 * Start of the PCI memory resource.
138 */
139#define TILE_PCI_MEM_START (TILE_PCI_MEM_END - TILE_PCI_BAR_WINDOW_SIZE)
89 140
90/* 141/*
91 * Structure of a PCI controller (host bridge) on Gx. 142 * Structure of a PCI controller (host bridge) on Gx.
@@ -108,6 +159,8 @@ struct pci_controller {
108 int index; /* PCI domain number */ 159 int index; /* PCI domain number */
109 struct pci_bus *root_bus; 160 struct pci_bus *root_bus;
110 161
162 uint64_t mem_offset; /* cpu->bus memory mapping offset. */
163
111 int last_busno; 164 int last_busno;
112 165
113 struct pci_ops *ops; 166 struct pci_ops *ops;
@@ -126,14 +179,22 @@ extern gxio_trio_context_t trio_contexts[TILEGX_NUM_TRIO];
126 179
127extern void pci_iounmap(struct pci_dev *dev, void __iomem *); 180extern void pci_iounmap(struct pci_dev *dev, void __iomem *);
128 181
129#endif /* __tilegx__ */ 182extern void
183pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region,
184 struct resource *res);
185
186extern void
187pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
188 struct pci_bus_region *region);
130 189
131/* 190/*
132 * The hypervisor maps the entirety of CPA-space as bus addresses, so 191 * The PCI address space does not equal the physical memory address
133 * bus addresses are physical addresses. The networking and block 192 * space (we have an IOMMU). The IDE and SCSI device layers use this
134 * device layers use this boolean for bounce buffer decisions. 193 * boolean for bounce buffer decisions.
135 */ 194 */
136#define PCI_DMA_BUS_IS_PHYS 1 195#define PCI_DMA_BUS_IS_PHYS 0
196
197#endif /* __tilegx__ */
137 198
138int __init tile_pci_init(void); 199int __init tile_pci_init(void);
139int __init pcibios_init(void); 200int __init pcibios_init(void);
@@ -169,7 +230,4 @@ static inline int pcibios_assign_all_busses(void)
169/* implement the pci_ DMA API in terms of the generic device dma_ one */ 230/* implement the pci_ DMA API in terms of the generic device dma_ one */
170#include <asm-generic/pci-dma-compat.h> 231#include <asm-generic/pci-dma-compat.h>
171 232
172/* generic pci stuff */
173#include <asm-generic/pci.h>
174
175#endif /* _ASM_TILE_PCI_H */ 233#endif /* _ASM_TILE_PCI_H */