aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorChris Metcalf <cmetcalf@tilera.com>2012-06-15 15:23:06 -0400
committerChris Metcalf <cmetcalf@tilera.com>2012-07-18 16:40:17 -0400
commit41bb38fc5398ae878c799647f3c4b25374029afb (patch)
tree5d7e01bd4176db1241b801f83cf92f32231b8e8b /arch
parenteef015c8aa74451f848307fe5f65485070533bbb (diff)
tile pci: enable IOMMU to support DMA for legacy devices
This change uses the TRIO IOMMU to map the PCI DMA space and physical memory at different addresses. We also now use the dma_mapping_ops to provide support for non-PCI DMA, PCIe DMA (64-bit) and legacy PCI DMA (32-bit). We use the kernel's software I/O TLB framework (i.e. bounce buffers) for the legacy 32-bit PCI device support since there are a limited number of TLB entries in the IOMMU and it is non-trivial to handle indexing, searching, matching, etc. For 32-bit devices the performance impact of bounce buffers should not be a concern. Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
Diffstat (limited to 'arch')
-rw-r--r--arch/tile/Kconfig18
-rw-r--r--arch/tile/include/asm/Kbuild1
-rw-r--r--arch/tile/include/asm/device.h33
-rw-r--r--arch/tile/include/asm/dma-mapping.h146
-rw-r--r--arch/tile/include/asm/pci.h76
-rw-r--r--arch/tile/kernel/pci-dma.c369
-rw-r--r--arch/tile/kernel/pci_gx.c113
-rw-r--r--arch/tile/kernel/setup.c35
8 files changed, 588 insertions, 203 deletions
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index 0ad771f7a7e1..557e3a381ca0 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -3,6 +3,8 @@
3 3
4config TILE 4config TILE
5 def_bool y 5 def_bool y
6 select HAVE_DMA_ATTRS
7 select HAVE_DMA_API_DEBUG
6 select HAVE_KVM if !TILEGX 8 select HAVE_KVM if !TILEGX
7 select GENERIC_FIND_FIRST_BIT 9 select GENERIC_FIND_FIRST_BIT
8 select USE_GENERIC_SMP_HELPERS 10 select USE_GENERIC_SMP_HELPERS
@@ -79,6 +81,9 @@ config ARCH_DMA_ADDR_T_64BIT
79config NEED_DMA_MAP_STATE 81config NEED_DMA_MAP_STATE
80 def_bool y 82 def_bool y
81 83
84config ARCH_HAS_DMA_SET_COHERENT_MASK
85 bool
86
82config LOCKDEP_SUPPORT 87config LOCKDEP_SUPPORT
83 def_bool y 88 def_bool y
84 89
@@ -215,6 +220,19 @@ config HIGHMEM
215config ZONE_DMA 220config ZONE_DMA
216 def_bool y 221 def_bool y
217 222
223config IOMMU_HELPER
224 bool
225
226config NEED_SG_DMA_LENGTH
227 bool
228
229config SWIOTLB
230 bool
231 default TILEGX
232 select IOMMU_HELPER
233 select NEED_SG_DMA_LENGTH
234 select ARCH_HAS_DMA_SET_COHERENT_MASK
235
218# We do not currently support disabling NUMA. 236# We do not currently support disabling NUMA.
219config NUMA 237config NUMA
220 bool # "NUMA Memory Allocation and Scheduler Support" 238 bool # "NUMA Memory Allocation and Scheduler Support"
diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild
index 143473e3a0bb..fb7c65ae8de0 100644
--- a/arch/tile/include/asm/Kbuild
+++ b/arch/tile/include/asm/Kbuild
@@ -9,7 +9,6 @@ header-y += hardwall.h
9generic-y += bug.h 9generic-y += bug.h
10generic-y += bugs.h 10generic-y += bugs.h
11generic-y += cputime.h 11generic-y += cputime.h
12generic-y += device.h
13generic-y += div64.h 12generic-y += div64.h
14generic-y += emergency-restart.h 13generic-y += emergency-restart.h
15generic-y += errno.h 14generic-y += errno.h
diff --git a/arch/tile/include/asm/device.h b/arch/tile/include/asm/device.h
new file mode 100644
index 000000000000..5182705bd056
--- /dev/null
+++ b/arch/tile/include/asm/device.h
@@ -0,0 +1,33 @@
1/*
2 * Copyright 2010 Tilera Corporation. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation, version 2.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11 * NON INFRINGEMENT. See the GNU General Public License for
12 * more details.
13 * Arch specific extensions to struct device
14 */
15
16#ifndef _ASM_TILE_DEVICE_H
17#define _ASM_TILE_DEVICE_H
18
19struct dev_archdata {
20 /* DMA operations on that device */
21 struct dma_map_ops *dma_ops;
22
23 /* Offset of the DMA address from the PA. */
24 dma_addr_t dma_offset;
25
26 /* Highest DMA address that can be generated by this device. */
27 dma_addr_t max_direct_dma_addr;
28};
29
30struct pdev_archdata {
31};
32
33#endif /* _ASM_TILE_DEVICE_H */
diff --git a/arch/tile/include/asm/dma-mapping.h b/arch/tile/include/asm/dma-mapping.h
index eaa06d175b39..4b6247d1a315 100644
--- a/arch/tile/include/asm/dma-mapping.h
+++ b/arch/tile/include/asm/dma-mapping.h
@@ -20,69 +20,80 @@
20#include <linux/cache.h> 20#include <linux/cache.h>
21#include <linux/io.h> 21#include <linux/io.h>
22 22
23/* 23extern struct dma_map_ops *tile_dma_map_ops;
24 * Note that on x86 and powerpc, there is a "struct dma_mapping_ops" 24extern struct dma_map_ops *gx_pci_dma_map_ops;
25 * that is used for all the DMA operations. For now, we don't have an 25extern struct dma_map_ops *gx_legacy_pci_dma_map_ops;
26 * equivalent on tile, because we only have a single way of doing DMA. 26
27 * (Tilera bug 7994 to use dma_mapping_ops.) 27static inline struct dma_map_ops *get_dma_ops(struct device *dev)
28 */ 28{
29 if (dev && dev->archdata.dma_ops)
30 return dev->archdata.dma_ops;
31 else
32 return tile_dma_map_ops;
33}
34
35static inline dma_addr_t get_dma_offset(struct device *dev)
36{
37 return dev->archdata.dma_offset;
38}
39
40static inline void set_dma_offset(struct device *dev, dma_addr_t off)
41{
42 dev->archdata.dma_offset = off;
43}
29 44
30#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) 45static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
31#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) 46{
32 47 return paddr + get_dma_offset(dev);
33extern dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size, 48}
34 enum dma_data_direction); 49
35extern void dma_unmap_single(struct device *dev, dma_addr_t dma_addr, 50static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
36 size_t size, enum dma_data_direction); 51{
37extern int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, 52 return daddr - get_dma_offset(dev);
38 enum dma_data_direction); 53}
39extern void dma_unmap_sg(struct device *dev, struct scatterlist *sg, 54
40 int nhwentries, enum dma_data_direction); 55static inline void dma_mark_clean(void *addr, size_t size) {}
41extern dma_addr_t dma_map_page(struct device *dev, struct page *page, 56
42 unsigned long offset, size_t size, 57#include <asm-generic/dma-mapping-common.h>
43 enum dma_data_direction); 58
44extern void dma_unmap_page(struct device *dev, dma_addr_t dma_address, 59static inline void set_dma_ops(struct device *dev, struct dma_map_ops *ops)
45 size_t size, enum dma_data_direction); 60{
46extern void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, 61 dev->archdata.dma_ops = ops;
47 int nelems, enum dma_data_direction); 62}
48extern void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, 63
49 int nelems, enum dma_data_direction); 64static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
50 65{
51 66 if (!dev->dma_mask)
52void *dma_alloc_coherent(struct device *dev, size_t size, 67 return 0;
53 dma_addr_t *dma_handle, gfp_t flag); 68
54 69 return addr + size - 1 <= *dev->dma_mask;
55void dma_free_coherent(struct device *dev, size_t size, 70}
56 void *vaddr, dma_addr_t dma_handle);
57
58extern void dma_sync_single_for_cpu(struct device *, dma_addr_t, size_t,
59 enum dma_data_direction);
60extern void dma_sync_single_for_device(struct device *, dma_addr_t,
61 size_t, enum dma_data_direction);
62extern void dma_sync_single_range_for_cpu(struct device *, dma_addr_t,
63 unsigned long offset, size_t,
64 enum dma_data_direction);
65extern void dma_sync_single_range_for_device(struct device *, dma_addr_t,
66 unsigned long offset, size_t,
67 enum dma_data_direction);
68extern void dma_cache_sync(struct device *dev, void *vaddr, size_t,
69 enum dma_data_direction);
70 71
71static inline int 72static inline int
72dma_mapping_error(struct device *dev, dma_addr_t dma_addr) 73dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
73{ 74{
74 return 0; 75 return get_dma_ops(dev)->mapping_error(dev, dma_addr);
75} 76}
76 77
77static inline int 78static inline int
78dma_supported(struct device *dev, u64 mask) 79dma_supported(struct device *dev, u64 mask)
79{ 80{
80 return 1; 81 return get_dma_ops(dev)->dma_supported(dev, mask);
81} 82}
82 83
83static inline int 84static inline int
84dma_set_mask(struct device *dev, u64 mask) 85dma_set_mask(struct device *dev, u64 mask)
85{ 86{
87 struct dma_map_ops *dma_ops = get_dma_ops(dev);
88
89 /* Handle legacy PCI devices with limited memory addressability. */
90 if ((dma_ops == gx_pci_dma_map_ops) && (mask <= DMA_BIT_MASK(32))) {
91 set_dma_ops(dev, gx_legacy_pci_dma_map_ops);
92 set_dma_offset(dev, 0);
93 if (mask > dev->archdata.max_direct_dma_addr)
94 mask = dev->archdata.max_direct_dma_addr;
95 }
96
86 if (!dev->dma_mask || !dma_supported(dev, mask)) 97 if (!dev->dma_mask || !dma_supported(dev, mask))
87 return -EIO; 98 return -EIO;
88 99
@@ -91,4 +102,43 @@ dma_set_mask(struct device *dev, u64 mask)
91 return 0; 102 return 0;
92} 103}
93 104
105static inline void *dma_alloc_attrs(struct device *dev, size_t size,
106 dma_addr_t *dma_handle, gfp_t flag,
107 struct dma_attrs *attrs)
108{
109 struct dma_map_ops *dma_ops = get_dma_ops(dev);
110 void *cpu_addr;
111
112 cpu_addr = dma_ops->alloc(dev, size, dma_handle, flag, attrs);
113
114 debug_dma_alloc_coherent(dev, size, *dma_handle, cpu_addr);
115
116 return cpu_addr;
117}
118
119static inline void dma_free_attrs(struct device *dev, size_t size,
120 void *cpu_addr, dma_addr_t dma_handle,
121 struct dma_attrs *attrs)
122{
123 struct dma_map_ops *dma_ops = get_dma_ops(dev);
124
125 debug_dma_free_coherent(dev, size, cpu_addr, dma_handle);
126
127 dma_ops->free(dev, size, cpu_addr, dma_handle, attrs);
128}
129
130#define dma_alloc_coherent(d, s, h, f) dma_alloc_attrs(d, s, h, f, NULL)
131#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_attrs(d, s, h, f, NULL)
132#define dma_free_coherent(d, s, v, h) dma_free_attrs(d, s, v, h, NULL)
133#define dma_free_noncoherent(d, s, v, h) dma_free_attrs(d, s, v, h, NULL)
134
135/*
136 * dma_alloc_noncoherent() is #defined to return coherent memory,
137 * so there's no need to do any flushing here.
138 */
139static inline void dma_cache_sync(struct device *dev, void *vaddr, size_t size,
140 enum dma_data_direction direction)
141{
142}
143
94#endif /* _ASM_TILE_DMA_MAPPING_H */ 144#endif /* _ASM_TILE_DMA_MAPPING_H */
diff --git a/arch/tile/include/asm/pci.h b/arch/tile/include/asm/pci.h
index 2c224c47d8ab..553b7ff018c4 100644
--- a/arch/tile/include/asm/pci.h
+++ b/arch/tile/include/asm/pci.h
@@ -15,6 +15,7 @@
15#ifndef _ASM_TILE_PCI_H 15#ifndef _ASM_TILE_PCI_H
16#define _ASM_TILE_PCI_H 16#define _ASM_TILE_PCI_H
17 17
18#include <linux/dma-mapping.h>
18#include <linux/pci.h> 19#include <linux/pci.h>
19#include <linux/numa.h> 20#include <linux/numa.h>
20#include <asm-generic/pci_iomap.h> 21#include <asm-generic/pci_iomap.h>
@@ -53,6 +54,16 @@ static inline void pci_iounmap(struct pci_dev *dev, void __iomem *addr) {}
53 54
54#define TILE_NUM_PCIE 2 55#define TILE_NUM_PCIE 2
55 56
57/*
58 * The hypervisor maps the entirety of CPA-space as bus addresses, so
59 * bus addresses are physical addresses. The networking and block
60 * device layers use this boolean for bounce buffer decisions.
61 */
62#define PCI_DMA_BUS_IS_PHYS 1
63
64/* generic pci stuff */
65#include <asm-generic/pci.h>
66
56#else 67#else
57 68
58#include <asm/page.h> 69#include <asm/page.h>
@@ -85,7 +96,47 @@ static inline void pci_iounmap(struct pci_dev *dev, void __iomem *addr) {}
85/* 96/*
86 * Each Mem-Map interrupt region occupies 4KB. 97 * Each Mem-Map interrupt region occupies 4KB.
87 */ 98 */
88#define MEM_MAP_INTR_REGION_SIZE (1<< TRIO_MAP_MEM_LIM__ADDR_SHIFT) 99#define MEM_MAP_INTR_REGION_SIZE (1 << TRIO_MAP_MEM_LIM__ADDR_SHIFT)
100
101/*
102 * Allocate the PCI BAR window right below 4GB.
103 */
104#define TILE_PCI_BAR_WINDOW_TOP (1ULL << 32)
105
106/*
107 * Allocate 1GB for the PCI BAR window.
108 */
109#define TILE_PCI_BAR_WINDOW_SIZE (1 << 30)
110
111/*
112 * This is the highest bus address targeting the host memory that
113 * can be generated by legacy PCI devices with 32-bit or less
114 * DMA capability, dictated by the BAR window size and location.
115 */
116#define TILE_PCI_MAX_DIRECT_DMA_ADDRESS \
117 (TILE_PCI_BAR_WINDOW_TOP - TILE_PCI_BAR_WINDOW_SIZE - 1)
118
119/*
120 * We shift the PCI bus range for all the physical memory up by the whole PA
121 * range. The corresponding CPA of an incoming PCI request will be the PCI
122 * address minus TILE_PCI_MEM_MAP_BASE_OFFSET. This also implies
123 * that the 64-bit capable devices will be given DMA addresses as
124 * the CPA plus TILE_PCI_MEM_MAP_BASE_OFFSET. To support 32-bit
125 * devices, we create a separate map region that handles the low
126 * 4GB.
127 */
128#define TILE_PCI_MEM_MAP_BASE_OFFSET (1ULL << CHIP_PA_WIDTH())
129
130/*
131 * End of the PCI memory resource.
132 */
133#define TILE_PCI_MEM_END \
134 ((1ULL << CHIP_PA_WIDTH()) + TILE_PCI_BAR_WINDOW_TOP)
135
136/*
137 * Start of the PCI memory resource.
138 */
139#define TILE_PCI_MEM_START (TILE_PCI_MEM_END - TILE_PCI_BAR_WINDOW_SIZE)
89 140
90/* 141/*
91 * Structure of a PCI controller (host bridge) on Gx. 142 * Structure of a PCI controller (host bridge) on Gx.
@@ -108,6 +159,8 @@ struct pci_controller {
108 int index; /* PCI domain number */ 159 int index; /* PCI domain number */
109 struct pci_bus *root_bus; 160 struct pci_bus *root_bus;
110 161
162 uint64_t mem_offset; /* cpu->bus memory mapping offset. */
163
111 int last_busno; 164 int last_busno;
112 165
113 struct pci_ops *ops; 166 struct pci_ops *ops;
@@ -126,14 +179,22 @@ extern gxio_trio_context_t trio_contexts[TILEGX_NUM_TRIO];
126 179
127extern void pci_iounmap(struct pci_dev *dev, void __iomem *); 180extern void pci_iounmap(struct pci_dev *dev, void __iomem *);
128 181
129#endif /* __tilegx__ */ 182extern void
183pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region,
184 struct resource *res);
185
186extern void
187pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
188 struct pci_bus_region *region);
130 189
131/* 190/*
132 * The hypervisor maps the entirety of CPA-space as bus addresses, so 191 * The PCI address space does not equal the physical memory address
133 * bus addresses are physical addresses. The networking and block 192 * space (we have an IOMMU). The IDE and SCSI device layers use this
134 * device layers use this boolean for bounce buffer decisions. 193 * boolean for bounce buffer decisions.
135 */ 194 */
136#define PCI_DMA_BUS_IS_PHYS 1 195#define PCI_DMA_BUS_IS_PHYS 0
196
197#endif /* __tilegx__ */
137 198
138int __init tile_pci_init(void); 199int __init tile_pci_init(void);
139int __init pcibios_init(void); 200int __init pcibios_init(void);
@@ -169,7 +230,4 @@ static inline int pcibios_assign_all_busses(void)
169/* implement the pci_ DMA API in terms of the generic device dma_ one */ 230/* implement the pci_ DMA API in terms of the generic device dma_ one */
170#include <asm-generic/pci-dma-compat.h> 231#include <asm-generic/pci-dma-compat.h>
171 232
172/* generic pci stuff */
173#include <asm-generic/pci.h>
174
175#endif /* _ASM_TILE_PCI_H */ 233#endif /* _ASM_TILE_PCI_H */
diff --git a/arch/tile/kernel/pci-dma.c b/arch/tile/kernel/pci-dma.c
index edd856a000c5..b9fe80ec1089 100644
--- a/arch/tile/kernel/pci-dma.c
+++ b/arch/tile/kernel/pci-dma.c
@@ -14,6 +14,7 @@
14 14
15#include <linux/mm.h> 15#include <linux/mm.h>
16#include <linux/dma-mapping.h> 16#include <linux/dma-mapping.h>
17#include <linux/swiotlb.h>
17#include <linux/vmalloc.h> 18#include <linux/vmalloc.h>
18#include <linux/export.h> 19#include <linux/export.h>
19#include <asm/tlbflush.h> 20#include <asm/tlbflush.h>
@@ -31,10 +32,9 @@
31#define PAGE_HOME_DMA PAGE_HOME_HASH 32#define PAGE_HOME_DMA PAGE_HOME_HASH
32#endif 33#endif
33 34
34void *dma_alloc_coherent(struct device *dev, 35static void *tile_dma_alloc_coherent(struct device *dev, size_t size,
35 size_t size, 36 dma_addr_t *dma_handle, gfp_t gfp,
36 dma_addr_t *dma_handle, 37 struct dma_attrs *attrs)
37 gfp_t gfp)
38{ 38{
39 u64 dma_mask = dev->coherent_dma_mask ?: DMA_BIT_MASK(32); 39 u64 dma_mask = dev->coherent_dma_mask ?: DMA_BIT_MASK(32);
40 int node = dev_to_node(dev); 40 int node = dev_to_node(dev);
@@ -68,19 +68,19 @@ void *dma_alloc_coherent(struct device *dev,
68 } 68 }
69 69
70 *dma_handle = addr; 70 *dma_handle = addr;
71
71 return page_address(pg); 72 return page_address(pg);
72} 73}
73EXPORT_SYMBOL(dma_alloc_coherent);
74 74
75/* 75/*
76 * Free memory that was allocated with dma_alloc_coherent. 76 * Free memory that was allocated with tile_dma_alloc_coherent.
77 */ 77 */
78void dma_free_coherent(struct device *dev, size_t size, 78static void tile_dma_free_coherent(struct device *dev, size_t size,
79 void *vaddr, dma_addr_t dma_handle) 79 void *vaddr, dma_addr_t dma_handle,
80 struct dma_attrs *attrs)
80{ 81{
81 homecache_free_pages((unsigned long)vaddr, get_order(size)); 82 homecache_free_pages((unsigned long)vaddr, get_order(size));
82} 83}
83EXPORT_SYMBOL(dma_free_coherent);
84 84
85/* 85/*
86 * The map routines "map" the specified address range for DMA 86 * The map routines "map" the specified address range for DMA
@@ -199,38 +199,182 @@ static void __dma_complete_pa_range(dma_addr_t dma_addr, size_t size,
199 } 199 }
200} 200}
201 201
202static int tile_dma_map_sg(struct device *dev, struct scatterlist *sglist,
203 int nents, enum dma_data_direction direction,
204 struct dma_attrs *attrs)
205{
206 struct scatterlist *sg;
207 int i;
202 208
203/* 209 BUG_ON(!valid_dma_direction(direction));
204 * dma_map_single can be passed any memory address, and there appear 210
205 * to be no alignment constraints. 211 WARN_ON(nents == 0 || sglist->length == 0);
206 * 212
207 * There is a chance that the start of the buffer will share a cache 213 for_each_sg(sglist, sg, nents, i) {
208 * line with some other data that has been touched in the meantime. 214 sg->dma_address = sg_phys(sg);
209 */ 215 __dma_prep_pa_range(sg->dma_address, sg->length, direction);
210dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size, 216#ifdef CONFIG_NEED_SG_DMA_LENGTH
211 enum dma_data_direction direction) 217 sg->dma_length = sg->length;
218#endif
219 }
220
221 return nents;
222}
223
224static void tile_dma_unmap_sg(struct device *dev, struct scatterlist *sglist,
225 int nents, enum dma_data_direction direction,
226 struct dma_attrs *attrs)
227{
228 struct scatterlist *sg;
229 int i;
230
231 BUG_ON(!valid_dma_direction(direction));
232 for_each_sg(sglist, sg, nents, i) {
233 sg->dma_address = sg_phys(sg);
234 __dma_complete_pa_range(sg->dma_address, sg->length,
235 direction);
236 }
237}
238
239static dma_addr_t tile_dma_map_page(struct device *dev, struct page *page,
240 unsigned long offset, size_t size,
241 enum dma_data_direction direction,
242 struct dma_attrs *attrs)
212{ 243{
213 dma_addr_t dma_addr = __pa(ptr); 244 BUG_ON(!valid_dma_direction(direction));
245
246 BUG_ON(offset + size > PAGE_SIZE);
247 __dma_prep_page(page, offset, size, direction);
248
249 return page_to_pa(page) + offset;
250}
251
252static void tile_dma_unmap_page(struct device *dev, dma_addr_t dma_address,
253 size_t size, enum dma_data_direction direction,
254 struct dma_attrs *attrs)
255{
256 BUG_ON(!valid_dma_direction(direction));
257
258 __dma_complete_page(pfn_to_page(PFN_DOWN(dma_address)),
259 dma_address & PAGE_OFFSET, size, direction);
260}
214 261
262static void tile_dma_sync_single_for_cpu(struct device *dev,
263 dma_addr_t dma_handle,
264 size_t size,
265 enum dma_data_direction direction)
266{
215 BUG_ON(!valid_dma_direction(direction)); 267 BUG_ON(!valid_dma_direction(direction));
216 WARN_ON(size == 0);
217 268
218 __dma_prep_pa_range(dma_addr, size, direction); 269 __dma_complete_pa_range(dma_handle, size, direction);
270}
219 271
220 return dma_addr; 272static void tile_dma_sync_single_for_device(struct device *dev,
273 dma_addr_t dma_handle, size_t size,
274 enum dma_data_direction direction)
275{
276 __dma_prep_pa_range(dma_handle, size, direction);
221} 277}
222EXPORT_SYMBOL(dma_map_single);
223 278
224void dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size, 279static void tile_dma_sync_sg_for_cpu(struct device *dev,
225 enum dma_data_direction direction) 280 struct scatterlist *sglist, int nelems,
281 enum dma_data_direction direction)
226{ 282{
283 struct scatterlist *sg;
284 int i;
285
227 BUG_ON(!valid_dma_direction(direction)); 286 BUG_ON(!valid_dma_direction(direction));
228 __dma_complete_pa_range(dma_addr, size, direction); 287 WARN_ON(nelems == 0 || sglist->length == 0);
288
289 for_each_sg(sglist, sg, nelems, i) {
290 dma_sync_single_for_cpu(dev, sg->dma_address,
291 sg_dma_len(sg), direction);
292 }
229} 293}
230EXPORT_SYMBOL(dma_unmap_single);
231 294
232int dma_map_sg(struct device *dev, struct scatterlist *sglist, int nents, 295static void tile_dma_sync_sg_for_device(struct device *dev,
233 enum dma_data_direction direction) 296 struct scatterlist *sglist, int nelems,
297 enum dma_data_direction direction)
298{
299 struct scatterlist *sg;
300 int i;
301
302 BUG_ON(!valid_dma_direction(direction));
303 WARN_ON(nelems == 0 || sglist->length == 0);
304
305 for_each_sg(sglist, sg, nelems, i) {
306 dma_sync_single_for_device(dev, sg->dma_address,
307 sg_dma_len(sg), direction);
308 }
309}
310
311static inline int
312tile_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
313{
314 return 0;
315}
316
317static inline int
318tile_dma_supported(struct device *dev, u64 mask)
319{
320 return 1;
321}
322
323static struct dma_map_ops tile_default_dma_map_ops = {
324 .alloc = tile_dma_alloc_coherent,
325 .free = tile_dma_free_coherent,
326 .map_page = tile_dma_map_page,
327 .unmap_page = tile_dma_unmap_page,
328 .map_sg = tile_dma_map_sg,
329 .unmap_sg = tile_dma_unmap_sg,
330 .sync_single_for_cpu = tile_dma_sync_single_for_cpu,
331 .sync_single_for_device = tile_dma_sync_single_for_device,
332 .sync_sg_for_cpu = tile_dma_sync_sg_for_cpu,
333 .sync_sg_for_device = tile_dma_sync_sg_for_device,
334 .mapping_error = tile_dma_mapping_error,
335 .dma_supported = tile_dma_supported
336};
337
338struct dma_map_ops *tile_dma_map_ops = &tile_default_dma_map_ops;
339EXPORT_SYMBOL(tile_dma_map_ops);
340
341/* Generic PCI DMA mapping functions */
342
343static void *tile_pci_dma_alloc_coherent(struct device *dev, size_t size,
344 dma_addr_t *dma_handle, gfp_t gfp,
345 struct dma_attrs *attrs)
346{
347 int node = dev_to_node(dev);
348 int order = get_order(size);
349 struct page *pg;
350 dma_addr_t addr;
351
352 gfp |= __GFP_ZERO;
353
354 pg = homecache_alloc_pages_node(node, gfp, order, PAGE_HOME_DMA);
355 if (pg == NULL)
356 return NULL;
357
358 addr = page_to_phys(pg);
359
360 *dma_handle = phys_to_dma(dev, addr);
361
362 return page_address(pg);
363}
364
365/*
366 * Free memory that was allocated with tile_pci_dma_alloc_coherent.
367 */
368static void tile_pci_dma_free_coherent(struct device *dev, size_t size,
369 void *vaddr, dma_addr_t dma_handle,
370 struct dma_attrs *attrs)
371{
372 homecache_free_pages((unsigned long)vaddr, get_order(size));
373}
374
375static int tile_pci_dma_map_sg(struct device *dev, struct scatterlist *sglist,
376 int nents, enum dma_data_direction direction,
377 struct dma_attrs *attrs)
234{ 378{
235 struct scatterlist *sg; 379 struct scatterlist *sg;
236 int i; 380 int i;
@@ -242,14 +386,20 @@ int dma_map_sg(struct device *dev, struct scatterlist *sglist, int nents,
242 for_each_sg(sglist, sg, nents, i) { 386 for_each_sg(sglist, sg, nents, i) {
243 sg->dma_address = sg_phys(sg); 387 sg->dma_address = sg_phys(sg);
244 __dma_prep_pa_range(sg->dma_address, sg->length, direction); 388 __dma_prep_pa_range(sg->dma_address, sg->length, direction);
389
390 sg->dma_address = phys_to_dma(dev, sg->dma_address);
391#ifdef CONFIG_NEED_SG_DMA_LENGTH
392 sg->dma_length = sg->length;
393#endif
245 } 394 }
246 395
247 return nents; 396 return nents;
248} 397}
249EXPORT_SYMBOL(dma_map_sg);
250 398
251void dma_unmap_sg(struct device *dev, struct scatterlist *sglist, int nents, 399static void tile_pci_dma_unmap_sg(struct device *dev,
252 enum dma_data_direction direction) 400 struct scatterlist *sglist, int nents,
401 enum dma_data_direction direction,
402 struct dma_attrs *attrs)
253{ 403{
254 struct scatterlist *sg; 404 struct scatterlist *sg;
255 int i; 405 int i;
@@ -261,46 +411,60 @@ void dma_unmap_sg(struct device *dev, struct scatterlist *sglist, int nents,
261 direction); 411 direction);
262 } 412 }
263} 413}
264EXPORT_SYMBOL(dma_unmap_sg);
265 414
266dma_addr_t dma_map_page(struct device *dev, struct page *page, 415static dma_addr_t tile_pci_dma_map_page(struct device *dev, struct page *page,
267 unsigned long offset, size_t size, 416 unsigned long offset, size_t size,
268 enum dma_data_direction direction) 417 enum dma_data_direction direction,
418 struct dma_attrs *attrs)
269{ 419{
270 BUG_ON(!valid_dma_direction(direction)); 420 BUG_ON(!valid_dma_direction(direction));
271 421
272 BUG_ON(offset + size > PAGE_SIZE); 422 BUG_ON(offset + size > PAGE_SIZE);
273 __dma_prep_page(page, offset, size, direction); 423 __dma_prep_page(page, offset, size, direction);
274 return page_to_pa(page) + offset; 424
425 return phys_to_dma(dev, page_to_pa(page) + offset);
275} 426}
276EXPORT_SYMBOL(dma_map_page);
277 427
278void dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size, 428static void tile_pci_dma_unmap_page(struct device *dev, dma_addr_t dma_address,
279 enum dma_data_direction direction) 429 size_t size,
430 enum dma_data_direction direction,
431 struct dma_attrs *attrs)
280{ 432{
281 BUG_ON(!valid_dma_direction(direction)); 433 BUG_ON(!valid_dma_direction(direction));
434
435 dma_address = dma_to_phys(dev, dma_address);
436
282 __dma_complete_page(pfn_to_page(PFN_DOWN(dma_address)), 437 __dma_complete_page(pfn_to_page(PFN_DOWN(dma_address)),
283 dma_address & PAGE_OFFSET, size, direction); 438 dma_address & PAGE_OFFSET, size, direction);
284} 439}
285EXPORT_SYMBOL(dma_unmap_page);
286 440
287void dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, 441static void tile_pci_dma_sync_single_for_cpu(struct device *dev,
288 size_t size, enum dma_data_direction direction) 442 dma_addr_t dma_handle,
443 size_t size,
444 enum dma_data_direction direction)
289{ 445{
290 BUG_ON(!valid_dma_direction(direction)); 446 BUG_ON(!valid_dma_direction(direction));
447
448 dma_handle = dma_to_phys(dev, dma_handle);
449
291 __dma_complete_pa_range(dma_handle, size, direction); 450 __dma_complete_pa_range(dma_handle, size, direction);
292} 451}
293EXPORT_SYMBOL(dma_sync_single_for_cpu);
294 452
295void dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, 453static void tile_pci_dma_sync_single_for_device(struct device *dev,
296 size_t size, enum dma_data_direction direction) 454 dma_addr_t dma_handle,
455 size_t size,
456 enum dma_data_direction
457 direction)
297{ 458{
459 dma_handle = dma_to_phys(dev, dma_handle);
460
298 __dma_prep_pa_range(dma_handle, size, direction); 461 __dma_prep_pa_range(dma_handle, size, direction);
299} 462}
300EXPORT_SYMBOL(dma_sync_single_for_device);
301 463
302void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sglist, 464static void tile_pci_dma_sync_sg_for_cpu(struct device *dev,
303 int nelems, enum dma_data_direction direction) 465 struct scatterlist *sglist,
466 int nelems,
467 enum dma_data_direction direction)
304{ 468{
305 struct scatterlist *sg; 469 struct scatterlist *sg;
306 int i; 470 int i;
@@ -313,10 +477,11 @@ void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sglist,
313 sg_dma_len(sg), direction); 477 sg_dma_len(sg), direction);
314 } 478 }
315} 479}
316EXPORT_SYMBOL(dma_sync_sg_for_cpu);
317 480
318void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sglist, 481static void tile_pci_dma_sync_sg_for_device(struct device *dev,
319 int nelems, enum dma_data_direction direction) 482 struct scatterlist *sglist,
483 int nelems,
484 enum dma_data_direction direction)
320{ 485{
321 struct scatterlist *sg; 486 struct scatterlist *sg;
322 int i; 487 int i;
@@ -329,31 +494,93 @@ void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sglist,
329 sg_dma_len(sg), direction); 494 sg_dma_len(sg), direction);
330 } 495 }
331} 496}
332EXPORT_SYMBOL(dma_sync_sg_for_device);
333 497
334void dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t dma_handle, 498static inline int
335 unsigned long offset, size_t size, 499tile_pci_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
336 enum dma_data_direction direction)
337{ 500{
338 dma_sync_single_for_cpu(dev, dma_handle + offset, size, direction); 501 return 0;
339} 502}
340EXPORT_SYMBOL(dma_sync_single_range_for_cpu);
341 503
342void dma_sync_single_range_for_device(struct device *dev, 504static inline int
343 dma_addr_t dma_handle, 505tile_pci_dma_supported(struct device *dev, u64 mask)
344 unsigned long offset, size_t size,
345 enum dma_data_direction direction)
346{ 506{
347 dma_sync_single_for_device(dev, dma_handle + offset, size, direction); 507 return 1;
348} 508}
349EXPORT_SYMBOL(dma_sync_single_range_for_device);
350 509
351/* 510static struct dma_map_ops tile_pci_default_dma_map_ops = {
352 * dma_alloc_noncoherent() is #defined to return coherent memory, 511 .alloc = tile_pci_dma_alloc_coherent,
353 * so there's no need to do any flushing here. 512 .free = tile_pci_dma_free_coherent,
354 */ 513 .map_page = tile_pci_dma_map_page,
355void dma_cache_sync(struct device *dev, void *vaddr, size_t size, 514 .unmap_page = tile_pci_dma_unmap_page,
356 enum dma_data_direction direction) 515 .map_sg = tile_pci_dma_map_sg,
516 .unmap_sg = tile_pci_dma_unmap_sg,
517 .sync_single_for_cpu = tile_pci_dma_sync_single_for_cpu,
518 .sync_single_for_device = tile_pci_dma_sync_single_for_device,
519 .sync_sg_for_cpu = tile_pci_dma_sync_sg_for_cpu,
520 .sync_sg_for_device = tile_pci_dma_sync_sg_for_device,
521 .mapping_error = tile_pci_dma_mapping_error,
522 .dma_supported = tile_pci_dma_supported
523};
524
525struct dma_map_ops *gx_pci_dma_map_ops = &tile_pci_default_dma_map_ops;
526EXPORT_SYMBOL(gx_pci_dma_map_ops);
527
528/* PCI DMA mapping functions for legacy PCI devices */
529
530#ifdef CONFIG_SWIOTLB
531static void *tile_swiotlb_alloc_coherent(struct device *dev, size_t size,
532 dma_addr_t *dma_handle, gfp_t gfp,
533 struct dma_attrs *attrs)
534{
535 gfp |= GFP_DMA;
536 return swiotlb_alloc_coherent(dev, size, dma_handle, gfp);
537}
538
539static void tile_swiotlb_free_coherent(struct device *dev, size_t size,
540 void *vaddr, dma_addr_t dma_addr,
541 struct dma_attrs *attrs)
357{ 542{
543 swiotlb_free_coherent(dev, size, vaddr, dma_addr);
358} 544}
359EXPORT_SYMBOL(dma_cache_sync); 545
546static struct dma_map_ops pci_swiotlb_dma_ops = {
547 .alloc = tile_swiotlb_alloc_coherent,
548 .free = tile_swiotlb_free_coherent,
549 .map_page = swiotlb_map_page,
550 .unmap_page = swiotlb_unmap_page,
551 .map_sg = swiotlb_map_sg_attrs,
552 .unmap_sg = swiotlb_unmap_sg_attrs,
553 .sync_single_for_cpu = swiotlb_sync_single_for_cpu,
554 .sync_single_for_device = swiotlb_sync_single_for_device,
555 .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
556 .sync_sg_for_device = swiotlb_sync_sg_for_device,
557 .dma_supported = swiotlb_dma_supported,
558 .mapping_error = swiotlb_dma_mapping_error,
559};
560
561struct dma_map_ops *gx_legacy_pci_dma_map_ops = &pci_swiotlb_dma_ops;
562#else
563struct dma_map_ops *gx_legacy_pci_dma_map_ops;
564#endif
565EXPORT_SYMBOL(gx_legacy_pci_dma_map_ops);
566
567#ifdef CONFIG_ARCH_HAS_DMA_SET_COHERENT_MASK
568int dma_set_coherent_mask(struct device *dev, u64 mask)
569{
570 struct dma_map_ops *dma_ops = get_dma_ops(dev);
571
572 /* Handle legacy PCI devices with limited memory addressability. */
573 if (((dma_ops == gx_pci_dma_map_ops) ||
574 (dma_ops == gx_legacy_pci_dma_map_ops)) &&
575 (mask <= DMA_BIT_MASK(32))) {
576 if (mask > dev->archdata.max_direct_dma_addr)
577 mask = dev->archdata.max_direct_dma_addr;
578 }
579
580 if (!dma_supported(dev, mask))
581 return -EIO;
582 dev->coherent_dma_mask = mask;
583 return 0;
584}
585EXPORT_SYMBOL(dma_set_coherent_mask);
586#endif
diff --git a/arch/tile/kernel/pci_gx.c b/arch/tile/kernel/pci_gx.c
index 1b996bb628f1..27f7ab021137 100644
--- a/arch/tile/kernel/pci_gx.c
+++ b/arch/tile/kernel/pci_gx.c
@@ -40,22 +40,8 @@
40#include <arch/sim.h> 40#include <arch/sim.h>
41 41
42/* 42/*
43 * Initialization flow and process 43 * This file containes the routines to search for PCI buses,
44 * -------------------------------
45 *
46 * This files containes the routines to search for PCI buses,
47 * enumerate the buses, and configure any attached devices. 44 * enumerate the buses, and configure any attached devices.
48 *
49 * There are two entry points here:
50 * 1) tile_pci_init
51 * This sets up the pci_controller structs, and opens the
52 * FDs to the hypervisor. This is called from setup_arch() early
53 * in the boot process.
54 * 2) pcibios_init
55 * This probes the PCI bus(es) for any attached hardware. It's
56 * called by subsys_initcall. All of the real work is done by the
57 * generic Linux PCI layer.
58 *
59 */ 45 */
60 46
61#define DEBUG_PCI_CFG 0 47#define DEBUG_PCI_CFG 0
@@ -110,6 +96,21 @@ static struct pci_ops tile_cfg_ops;
110/* Mask of CPUs that should receive PCIe interrupts. */ 96/* Mask of CPUs that should receive PCIe interrupts. */
111static struct cpumask intr_cpus_map; 97static struct cpumask intr_cpus_map;
112 98
99/* PCI I/O space support is not implemented. */
100static struct resource pci_ioport_resource = {
101 .name = "PCI IO",
102 .start = 0,
103 .end = 0,
104 .flags = IORESOURCE_IO,
105};
106
107static struct resource pci_iomem_resource = {
108 .name = "PCI mem",
109 .start = TILE_PCI_MEM_START,
110 .end = TILE_PCI_MEM_END,
111 .flags = IORESOURCE_MEM,
112};
113
113/* 114/*
114 * We don't need to worry about the alignment of resources. 115 * We don't need to worry about the alignment of resources.
115 */ 116 */
@@ -334,8 +335,6 @@ free_irqs:
334} 335}
335 336
336/* 337/*
337 * First initialization entry point, called from setup_arch().
338 *
339 * Find valid controllers and fill in pci_controller structs for each 338 * Find valid controllers and fill in pci_controller structs for each
340 * of them. 339 * of them.
341 * 340 *
@@ -583,10 +582,7 @@ static int __devinit setup_pcie_rc_delay(char *str)
583early_param("pcie_rc_delay", setup_pcie_rc_delay); 582early_param("pcie_rc_delay", setup_pcie_rc_delay);
584 583
585/* 584/*
586 * Second PCI initialization entry point, called by subsys_initcall. 585 * PCI initialization entry point, called by subsys_initcall.
587 *
588 * The controllers have been set up by the time we get here, by a call to
589 * tile_pci_init.
590 */ 586 */
591int __init pcibios_init(void) 587int __init pcibios_init(void)
592{ 588{
@@ -594,15 +590,13 @@ int __init pcibios_init(void)
594 LIST_HEAD(resources); 590 LIST_HEAD(resources);
595 int i; 591 int i;
596 592
593 tile_pci_init();
594
597 if (num_rc_controllers == 0 && num_ep_controllers == 0) 595 if (num_rc_controllers == 0 && num_ep_controllers == 0)
598 return 0; 596 return 0;
599 597
600 pr_info("PCI: Probing PCI hardware\n");
601
602 /* 598 /*
603 * We loop over all the TRIO shims and set up the MMIO mappings. 599 * We loop over all the TRIO shims and set up the MMIO mappings.
604 * This step can't be done in tile_pci_init because the MM subsystem
605 * hasn't been initialized then.
606 */ 600 */
607 for (i = 0; i < TILEGX_NUM_TRIO; i++) { 601 for (i = 0; i < TILEGX_NUM_TRIO; i++) {
608 gxio_trio_context_t *context = &trio_contexts[i]; 602 gxio_trio_context_t *context = &trio_contexts[i];
@@ -645,9 +639,7 @@ int __init pcibios_init(void)
645 unsigned int class_code_revision; 639 unsigned int class_code_revision;
646 int trio_index; 640 int trio_index;
647 int mac; 641 int mac;
648#ifndef USE_SHARED_PCIE_CONFIG_REGION
649 int ret; 642 int ret;
650#endif
651 643
652 if (trio_context->fd < 0) 644 if (trio_context->fd < 0)
653 continue; 645 continue;
@@ -802,8 +794,6 @@ int __init pcibios_init(void)
802 pr_err("PCI: PCI CFG PIO alloc failure for mac %d " 794 pr_err("PCI: PCI CFG PIO alloc failure for mac %d "
803 "on TRIO %d, give up\n", mac, trio_index); 795 "on TRIO %d, give up\n", mac, trio_index);
804 796
805 /* TBD: cleanup ... */
806
807 continue; 797 continue;
808 } 798 }
809 799
@@ -819,8 +809,6 @@ int __init pcibios_init(void)
819 pr_err("PCI: PCI CFG PIO init failure for mac %d " 809 pr_err("PCI: PCI CFG PIO init failure for mac %d "
820 "on TRIO %d, give up\n", mac, trio_index); 810 "on TRIO %d, give up\n", mac, trio_index);
821 811
822 /* TBD: cleanup ... */
823
824 continue; 812 continue;
825 } 813 }
826 814
@@ -837,8 +825,6 @@ int __init pcibios_init(void)
837 pr_err("PCI: PIO map failure for mac %d on TRIO %d\n", 825 pr_err("PCI: PIO map failure for mac %d on TRIO %d\n",
838 mac, trio_index); 826 mac, trio_index);
839 827
840 /* TBD: cleanup ... */
841
842 continue; 828 continue;
843 } 829 }
844 830
@@ -852,7 +838,14 @@ int __init pcibios_init(void)
852 continue; 838 continue;
853 } 839 }
854 840
855 pci_add_resource(&resources, &iomem_resource); 841 /*
842 * The PCI memory resource is located above the PA space.
843 * The memory range for the PCI root bus should not overlap
844 * with the physical RAM
845 */
846 pci_add_resource_offset(&resources, &iomem_resource,
847 1ULL << CHIP_PA_WIDTH());
848
856 bus = pci_scan_root_bus(NULL, 0, controller->ops, 849 bus = pci_scan_root_bus(NULL, 0, controller->ops,
857 controller, &resources); 850 controller, &resources);
858 controller->root_bus = bus; 851 controller->root_bus = bus;
@@ -923,11 +916,6 @@ int __init pcibios_init(void)
923 } 916 }
924 917
925 /* 918 /*
926 * We always assign 32-bit PCI bus BAR ranges.
927 */
928 BUG_ON(bus_address_hi != 0);
929
930 /*
931 * Alloc a PIO region for PCI memory access for each RC port. 919 * Alloc a PIO region for PCI memory access for each RC port.
932 */ 920 */
933 ret = gxio_trio_alloc_pio_regions(trio_context, 1, 0, 0); 921 ret = gxio_trio_alloc_pio_regions(trio_context, 1, 0, 0);
@@ -936,8 +924,6 @@ int __init pcibios_init(void)
936 "give up\n", controller->trio_index, 924 "give up\n", controller->trio_index,
937 controller->mac); 925 controller->mac);
938 926
939 /* TBD: cleanup ... */
940
941 continue; 927 continue;
942 } 928 }
943 929
@@ -950,15 +936,13 @@ int __init pcibios_init(void)
950 ret = gxio_trio_init_pio_region_aux(trio_context, 936 ret = gxio_trio_init_pio_region_aux(trio_context,
951 controller->pio_mem_index, 937 controller->pio_mem_index,
952 controller->mac, 938 controller->mac,
953 bus_address_hi, 939 0,
954 0); 940 0);
955 if (ret < 0) { 941 if (ret < 0) {
956 pr_err("PCI: MEM PIO init failure on TRIO %d mac %d, " 942 pr_err("PCI: MEM PIO init failure on TRIO %d mac %d, "
957 "give up\n", controller->trio_index, 943 "give up\n", controller->trio_index,
958 controller->mac); 944 controller->mac);
959 945
960 /* TBD: cleanup ... */
961
962 continue; 946 continue;
963 } 947 }
964 948
@@ -980,8 +964,6 @@ int __init pcibios_init(void)
980 controller->trio_index, 964 controller->trio_index,
981 controller->mac, j); 965 controller->mac, j);
982 966
983 /* TBD: cleanup ... */
984
985 goto alloc_mem_map_failed; 967 goto alloc_mem_map_failed;
986 } 968 }
987 969
@@ -991,9 +973,13 @@ int __init pcibios_init(void)
991 * Initialize the Mem-Map and the I/O MMU so that all 973 * Initialize the Mem-Map and the I/O MMU so that all
992 * the physical memory can be accessed by the endpoint 974 * the physical memory can be accessed by the endpoint
993 * devices. The base bus address is set to the base CPA 975 * devices. The base bus address is set to the base CPA
994 * of this memory controller, so is the base VA. The 976 * of this memory controller plus an offset (see pci.h).
977 * The region's base VA is set to the base CPA. The
995 * I/O MMU table essentially translates the CPA to 978 * I/O MMU table essentially translates the CPA to
996 * the real PA. 979 * the real PA. Implicitly, for node 0, we create
980 * a separate Mem-Map region that serves as the inbound
981 * window for legacy 32-bit devices. This is a direct
982 * map of the low 4GB CPA space.
997 */ 983 */
998 ret = gxio_trio_init_memory_map_mmu_aux(trio_context, 984 ret = gxio_trio_init_memory_map_mmu_aux(trio_context,
999 controller->mem_maps[j], 985 controller->mem_maps[j],
@@ -1001,7 +987,8 @@ int __init pcibios_init(void)
1001 nr_pages << PAGE_SHIFT, 987 nr_pages << PAGE_SHIFT,
1002 trio_context->asid, 988 trio_context->asid,
1003 controller->mac, 989 controller->mac,
1004 start_pfn << PAGE_SHIFT, 990 (start_pfn << PAGE_SHIFT) +
991 TILE_PCI_MEM_MAP_BASE_OFFSET,
1005 j, 992 j,
1006 GXIO_TRIO_ORDER_MODE_UNORDERED); 993 GXIO_TRIO_ORDER_MODE_UNORDERED);
1007 if (ret < 0) { 994 if (ret < 0) {
@@ -1010,11 +997,8 @@ int __init pcibios_init(void)
1010 controller->trio_index, 997 controller->trio_index,
1011 controller->mac, j); 998 controller->mac, j);
1012 999
1013 /* TBD: cleanup ... */
1014
1015 goto alloc_mem_map_failed; 1000 goto alloc_mem_map_failed;
1016 } 1001 }
1017
1018 continue; 1002 continue;
1019 1003
1020alloc_mem_map_failed: 1004alloc_mem_map_failed:
@@ -1028,11 +1012,19 @@ alloc_mem_map_failed:
1028subsys_initcall(pcibios_init); 1012subsys_initcall(pcibios_init);
1029 1013
1030/* 1014/*
1031 * No bus fixups needed. 1015 * PCI scan code calls the arch specific pcibios_fixup_bus() each time it scans
1016 * a new bridge. Called after each bus is probed, but before its children are
1017 * examined.
1032 */ 1018 */
1033void __devinit pcibios_fixup_bus(struct pci_bus *bus) 1019void __devinit pcibios_fixup_bus(struct pci_bus *bus)
1034{ 1020{
1035 /* Nothing needs to be done. */ 1021 struct pci_dev *dev = bus->self;
1022
1023 if (!dev) {
1024 /* This is the root bus. */
1025 bus->resource[0] = &pci_ioport_resource;
1026 bus->resource[1] = &pci_iomem_resource;
1027 }
1036} 1028}
1037 1029
1038/* 1030/*
@@ -1069,6 +1061,17 @@ int pcibios_enable_device(struct pci_dev *dev, int mask)
1069 return pci_enable_resources(dev, mask); 1061 return pci_enable_resources(dev, mask);
1070} 1062}
1071 1063
1064/* Called for each device after PCI setup is done. */
1065static void __init
1066pcibios_fixup_final(struct pci_dev *pdev)
1067{
1068 set_dma_ops(&pdev->dev, gx_pci_dma_map_ops);
1069 set_dma_offset(&pdev->dev, TILE_PCI_MEM_MAP_BASE_OFFSET);
1070 pdev->dev.archdata.max_direct_dma_addr =
1071 TILE_PCI_MAX_DIRECT_DMA_ADDRESS;
1072}
1073DECLARE_PCI_FIXUP_FINAL(PCI_ANY_ID, PCI_ANY_ID, pcibios_fixup_final);
1074
1072/* Map a PCI MMIO bus address into VA space. */ 1075/* Map a PCI MMIO bus address into VA space. */
1073void __iomem *ioremap(resource_size_t phys_addr, unsigned long size) 1076void __iomem *ioremap(resource_size_t phys_addr, unsigned long size)
1074{ 1077{
@@ -1127,7 +1130,7 @@ got_it:
1127 * We need to keep the PCI bus address's in-page offset in the VA. 1130 * We need to keep the PCI bus address's in-page offset in the VA.
1128 */ 1131 */
1129 return iorpc_ioremap(trio_fd, offset, size) + 1132 return iorpc_ioremap(trio_fd, offset, size) +
1130 (phys_addr & (PAGE_SIZE - 1)); 1133 (phys_addr & (PAGE_SIZE - 1));
1131} 1134}
1132EXPORT_SYMBOL(ioremap); 1135EXPORT_SYMBOL(ioremap);
1133 1136
diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c
index fdde3b6986e5..2b8b689e596d 100644
--- a/arch/tile/kernel/setup.c
+++ b/arch/tile/kernel/setup.c
@@ -23,6 +23,7 @@
23#include <linux/irq.h> 23#include <linux/irq.h>
24#include <linux/kexec.h> 24#include <linux/kexec.h>
25#include <linux/pci.h> 25#include <linux/pci.h>
26#include <linux/swiotlb.h>
26#include <linux/initrd.h> 27#include <linux/initrd.h>
27#include <linux/io.h> 28#include <linux/io.h>
28#include <linux/highmem.h> 29#include <linux/highmem.h>
@@ -109,7 +110,7 @@ static unsigned int __initdata maxnodemem_pfn[MAX_NUMNODES] = {
109}; 110};
110static nodemask_t __initdata isolnodes; 111static nodemask_t __initdata isolnodes;
111 112
112#ifdef CONFIG_PCI 113#if defined(CONFIG_PCI) && !defined(__tilegx__)
113enum { DEFAULT_PCI_RESERVE_MB = 64 }; 114enum { DEFAULT_PCI_RESERVE_MB = 64 };
114static unsigned int __initdata pci_reserve_mb = DEFAULT_PCI_RESERVE_MB; 115static unsigned int __initdata pci_reserve_mb = DEFAULT_PCI_RESERVE_MB;
115unsigned long __initdata pci_reserve_start_pfn = -1U; 116unsigned long __initdata pci_reserve_start_pfn = -1U;
@@ -160,7 +161,7 @@ static int __init setup_isolnodes(char *str)
160} 161}
161early_param("isolnodes", setup_isolnodes); 162early_param("isolnodes", setup_isolnodes);
162 163
163#ifdef CONFIG_PCI 164#if defined(CONFIG_PCI) && !defined(__tilegx__)
164static int __init setup_pci_reserve(char* str) 165static int __init setup_pci_reserve(char* str)
165{ 166{
166 unsigned long mb; 167 unsigned long mb;
@@ -171,7 +172,7 @@ static int __init setup_pci_reserve(char* str)
171 172
172 pci_reserve_mb = mb; 173 pci_reserve_mb = mb;
173 pr_info("Reserving %dMB for PCIE root complex mappings\n", 174 pr_info("Reserving %dMB for PCIE root complex mappings\n",
174 pci_reserve_mb); 175 pci_reserve_mb);
175 return 0; 176 return 0;
176} 177}
177early_param("pci_reserve", setup_pci_reserve); 178early_param("pci_reserve", setup_pci_reserve);
@@ -411,7 +412,7 @@ static void __init setup_memory(void)
411 continue; 412 continue;
412 } 413 }
413#endif 414#endif
414#ifdef CONFIG_PCI 415#if defined(CONFIG_PCI) && !defined(__tilegx__)
415 /* 416 /*
416 * Blocks that overlap the pci reserved region must 417 * Blocks that overlap the pci reserved region must
417 * have enough space to hold the maximum percpu data 418 * have enough space to hold the maximum percpu data
@@ -604,11 +605,9 @@ static void __init setup_bootmem_allocator_node(int i)
604 /* Free all the space back into the allocator. */ 605 /* Free all the space back into the allocator. */
605 free_bootmem(PFN_PHYS(start), PFN_PHYS(end - start)); 606 free_bootmem(PFN_PHYS(start), PFN_PHYS(end - start));
606 607
607#if defined(CONFIG_PCI) 608#if defined(CONFIG_PCI) && !defined(__tilegx__)
608 /* 609 /*
609 * Throw away any memory aliased by the PCI region. FIXME: this 610 * Throw away any memory aliased by the PCI region.
610 * is a temporary hack to work around bug 10502, and needs to be
611 * fixed properly.
612 */ 611 */
613 if (pci_reserve_start_pfn < end && pci_reserve_end_pfn > start) 612 if (pci_reserve_start_pfn < end && pci_reserve_end_pfn > start)
614 reserve_bootmem(PFN_PHYS(pci_reserve_start_pfn), 613 reserve_bootmem(PFN_PHYS(pci_reserve_start_pfn),
@@ -1353,8 +1352,7 @@ void __init setup_arch(char **cmdline_p)
1353 setup_cpu_maps(); 1352 setup_cpu_maps();
1354 1353
1355 1354
1356#ifdef CONFIG_PCI 1355#if defined(CONFIG_PCI) && !defined(__tilegx__)
1357#if !defined (__tilegx__)
1358 /* 1356 /*
1359 * Initialize the PCI structures. This is done before memory 1357 * Initialize the PCI structures. This is done before memory
1360 * setup so that we know whether or not a pci_reserve region 1358 * setup so that we know whether or not a pci_reserve region
@@ -1362,7 +1360,6 @@ void __init setup_arch(char **cmdline_p)
1362 */ 1360 */
1363 if (tile_pci_init() == 0) 1361 if (tile_pci_init() == 0)
1364 pci_reserve_mb = 0; 1362 pci_reserve_mb = 0;
1365#endif
1366 1363
1367 /* PCI systems reserve a region just below 4GB for mapping iomem. */ 1364 /* PCI systems reserve a region just below 4GB for mapping iomem. */
1368 pci_reserve_end_pfn = (1 << (32 - PAGE_SHIFT)); 1365 pci_reserve_end_pfn = (1 << (32 - PAGE_SHIFT));
@@ -1384,6 +1381,10 @@ void __init setup_arch(char **cmdline_p)
1384 * any memory using the bootmem allocator. 1381 * any memory using the bootmem allocator.
1385 */ 1382 */
1386 1383
1384#ifdef CONFIG_SWIOTLB
1385 swiotlb_init(0);
1386#endif
1387
1387 paging_init(); 1388 paging_init();
1388 setup_numa_mapping(); 1389 setup_numa_mapping();
1389 zone_sizes_init(); 1390 zone_sizes_init();
@@ -1391,10 +1392,6 @@ void __init setup_arch(char **cmdline_p)
1391 setup_cpu(1); 1392 setup_cpu(1);
1392 setup_clock(); 1393 setup_clock();
1393 load_hv_initrd(); 1394 load_hv_initrd();
1394
1395#if defined(CONFIG_PCI) && defined (__tilegx__)
1396 tile_pci_init();
1397#endif
1398} 1395}
1399 1396
1400 1397
@@ -1538,11 +1535,11 @@ static struct resource code_resource = {
1538}; 1535};
1539 1536
1540/* 1537/*
1541 * We reserve all resources above 4GB so that PCI won't try to put 1538 * On Pro, we reserve all resources above 4GB so that PCI won't try to put
1542 * mappings above 4GB; the standard allows that for some devices but 1539 * mappings above 4GB; the standard allows that for some devices but
1543 * the probing code trunates values to 32 bits. 1540 * the probing code trunates values to 32 bits.
1544 */ 1541 */
1545#ifdef CONFIG_PCI 1542#if defined(CONFIG_PCI) && !defined(__tilegx__)
1546static struct resource* __init 1543static struct resource* __init
1547insert_non_bus_resource(void) 1544insert_non_bus_resource(void)
1548{ 1545{
@@ -1588,7 +1585,7 @@ static int __init request_standard_resources(void)
1588 enum { CODE_DELTA = MEM_SV_INTRPT - PAGE_OFFSET }; 1585 enum { CODE_DELTA = MEM_SV_INTRPT - PAGE_OFFSET };
1589 1586
1590 iomem_resource.end = -1LL; 1587 iomem_resource.end = -1LL;
1591#ifdef CONFIG_PCI 1588#if defined(CONFIG_PCI) && !defined(__tilegx__)
1592 insert_non_bus_resource(); 1589 insert_non_bus_resource();
1593#endif 1590#endif
1594 1591
@@ -1596,7 +1593,7 @@ static int __init request_standard_resources(void)
1596 u64 start_pfn = node_start_pfn[i]; 1593 u64 start_pfn = node_start_pfn[i];
1597 u64 end_pfn = node_end_pfn[i]; 1594 u64 end_pfn = node_end_pfn[i];
1598 1595
1599#ifdef CONFIG_PCI 1596#if defined(CONFIG_PCI) && !defined(__tilegx__)
1600 if (start_pfn <= pci_reserve_start_pfn && 1597 if (start_pfn <= pci_reserve_start_pfn &&
1601 end_pfn > pci_reserve_start_pfn) { 1598 end_pfn > pci_reserve_start_pfn) {
1602 if (end_pfn > pci_reserve_end_pfn) 1599 if (end_pfn > pci_reserve_end_pfn)