aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/tile/Kconfig18
-rw-r--r--arch/tile/include/asm/Kbuild1
-rw-r--r--arch/tile/include/asm/device.h33
-rw-r--r--arch/tile/include/asm/dma-mapping.h146
-rw-r--r--arch/tile/include/asm/pci.h76
-rw-r--r--arch/tile/kernel/pci-dma.c369
-rw-r--r--arch/tile/kernel/pci_gx.c113
-rw-r--r--arch/tile/kernel/setup.c35
8 files changed, 588 insertions, 203 deletions
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index 0ad771f7a7e..557e3a381ca 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -3,6 +3,8 @@
3 3
4config TILE 4config TILE
5 def_bool y 5 def_bool y
6 select HAVE_DMA_ATTRS
7 select HAVE_DMA_API_DEBUG
6 select HAVE_KVM if !TILEGX 8 select HAVE_KVM if !TILEGX
7 select GENERIC_FIND_FIRST_BIT 9 select GENERIC_FIND_FIRST_BIT
8 select USE_GENERIC_SMP_HELPERS 10 select USE_GENERIC_SMP_HELPERS
@@ -79,6 +81,9 @@ config ARCH_DMA_ADDR_T_64BIT
79config NEED_DMA_MAP_STATE 81config NEED_DMA_MAP_STATE
80 def_bool y 82 def_bool y
81 83
84config ARCH_HAS_DMA_SET_COHERENT_MASK
85 bool
86
82config LOCKDEP_SUPPORT 87config LOCKDEP_SUPPORT
83 def_bool y 88 def_bool y
84 89
@@ -215,6 +220,19 @@ config HIGHMEM
215config ZONE_DMA 220config ZONE_DMA
216 def_bool y 221 def_bool y
217 222
223config IOMMU_HELPER
224 bool
225
226config NEED_SG_DMA_LENGTH
227 bool
228
229config SWIOTLB
230 bool
231 default TILEGX
232 select IOMMU_HELPER
233 select NEED_SG_DMA_LENGTH
234 select ARCH_HAS_DMA_SET_COHERENT_MASK
235
218# We do not currently support disabling NUMA. 236# We do not currently support disabling NUMA.
219config NUMA 237config NUMA
220 bool # "NUMA Memory Allocation and Scheduler Support" 238 bool # "NUMA Memory Allocation and Scheduler Support"
diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild
index 143473e3a0b..fb7c65ae8de 100644
--- a/arch/tile/include/asm/Kbuild
+++ b/arch/tile/include/asm/Kbuild
@@ -9,7 +9,6 @@ header-y += hardwall.h
9generic-y += bug.h 9generic-y += bug.h
10generic-y += bugs.h 10generic-y += bugs.h
11generic-y += cputime.h 11generic-y += cputime.h
12generic-y += device.h
13generic-y += div64.h 12generic-y += div64.h
14generic-y += emergency-restart.h 13generic-y += emergency-restart.h
15generic-y += errno.h 14generic-y += errno.h
diff --git a/arch/tile/include/asm/device.h b/arch/tile/include/asm/device.h
new file mode 100644
index 00000000000..5182705bd05
--- /dev/null
+++ b/arch/tile/include/asm/device.h
@@ -0,0 +1,33 @@
1/*
2 * Copyright 2010 Tilera Corporation. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation, version 2.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11 * NON INFRINGEMENT. See the GNU General Public License for
12 * more details.
13 * Arch specific extensions to struct device
14 */
15
16#ifndef _ASM_TILE_DEVICE_H
17#define _ASM_TILE_DEVICE_H
18
19struct dev_archdata {
20 /* DMA operations on that device */
21 struct dma_map_ops *dma_ops;
22
23 /* Offset of the DMA address from the PA. */
24 dma_addr_t dma_offset;
25
26 /* Highest DMA address that can be generated by this device. */
27 dma_addr_t max_direct_dma_addr;
28};
29
30struct pdev_archdata {
31};
32
33#endif /* _ASM_TILE_DEVICE_H */
diff --git a/arch/tile/include/asm/dma-mapping.h b/arch/tile/include/asm/dma-mapping.h
index eaa06d175b3..4b6247d1a31 100644
--- a/arch/tile/include/asm/dma-mapping.h
+++ b/arch/tile/include/asm/dma-mapping.h
@@ -20,69 +20,80 @@
20#include <linux/cache.h> 20#include <linux/cache.h>
21#include <linux/io.h> 21#include <linux/io.h>
22 22
23/* 23extern struct dma_map_ops *tile_dma_map_ops;
24 * Note that on x86 and powerpc, there is a "struct dma_mapping_ops" 24extern struct dma_map_ops *gx_pci_dma_map_ops;
25 * that is used for all the DMA operations. For now, we don't have an 25extern struct dma_map_ops *gx_legacy_pci_dma_map_ops;
26 * equivalent on tile, because we only have a single way of doing DMA. 26
27 * (Tilera bug 7994 to use dma_mapping_ops.) 27static inline struct dma_map_ops *get_dma_ops(struct device *dev)
28 */ 28{
29 if (dev && dev->archdata.dma_ops)
30 return dev->archdata.dma_ops;
31 else
32 return tile_dma_map_ops;
33}
34
35static inline dma_addr_t get_dma_offset(struct device *dev)
36{
37 return dev->archdata.dma_offset;
38}
39
40static inline void set_dma_offset(struct device *dev, dma_addr_t off)
41{
42 dev->archdata.dma_offset = off;
43}
29 44
30#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) 45static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
31#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) 46{
32 47 return paddr + get_dma_offset(dev);
33extern dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size, 48}
34 enum dma_data_direction); 49
35extern void dma_unmap_single(struct device *dev, dma_addr_t dma_addr, 50static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
36 size_t size, enum dma_data_direction); 51{
37extern int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, 52 return daddr - get_dma_offset(dev);
38 enum dma_data_direction); 53}
39extern void dma_unmap_sg(struct device *dev, struct scatterlist *sg, 54
40 int nhwentries, enum dma_data_direction); 55static inline void dma_mark_clean(void *addr, size_t size) {}
41extern dma_addr_t dma_map_page(struct device *dev, struct page *page, 56
42 unsigned long offset, size_t size, 57#include <asm-generic/dma-mapping-common.h>
43 enum dma_data_direction); 58
44extern void dma_unmap_page(struct device *dev, dma_addr_t dma_address, 59static inline void set_dma_ops(struct device *dev, struct dma_map_ops *ops)
45 size_t size, enum dma_data_direction); 60{
46extern void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, 61 dev->archdata.dma_ops = ops;
47 int nelems, enum dma_data_direction); 62}
48extern void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, 63
49 int nelems, enum dma_data_direction); 64static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
50 65{
51 66 if (!dev->dma_mask)
52void *dma_alloc_coherent(struct device *dev, size_t size, 67 return 0;
53 dma_addr_t *dma_handle, gfp_t flag); 68
54 69 return addr + size - 1 <= *dev->dma_mask;
55void dma_free_coherent(struct device *dev, size_t size, 70}
56 void *vaddr, dma_addr_t dma_handle);
57
58extern void dma_sync_single_for_cpu(struct device *, dma_addr_t, size_t,
59 enum dma_data_direction);
60extern void dma_sync_single_for_device(struct device *, dma_addr_t,
61 size_t, enum dma_data_direction);
62extern void dma_sync_single_range_for_cpu(struct device *, dma_addr_t,
63 unsigned long offset, size_t,
64 enum dma_data_direction);
65extern void dma_sync_single_range_for_device(struct device *, dma_addr_t,
66 unsigned long offset, size_t,
67 enum dma_data_direction);
68extern void dma_cache_sync(struct device *dev, void *vaddr, size_t,
69 enum dma_data_direction);
70 71
71static inline int 72static inline int
72dma_mapping_error(struct device *dev, dma_addr_t dma_addr) 73dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
73{ 74{
74 return 0; 75 return get_dma_ops(dev)->mapping_error(dev, dma_addr);
75} 76}
76 77
77static inline int 78static inline int
78dma_supported(struct device *dev, u64 mask) 79dma_supported(struct device *dev, u64 mask)
79{ 80{
80 return 1; 81 return get_dma_ops(dev)->dma_supported(dev, mask);
81} 82}
82 83
83static inline int 84static inline int
84dma_set_mask(struct device *dev, u64 mask) 85dma_set_mask(struct device *dev, u64 mask)
85{ 86{
87 struct dma_map_ops *dma_ops = get_dma_ops(dev);
88
89 /* Handle legacy PCI devices with limited memory addressability. */
90 if ((dma_ops == gx_pci_dma_map_ops) && (mask <= DMA_BIT_MASK(32))) {
91 set_dma_ops(dev, gx_legacy_pci_dma_map_ops);
92 set_dma_offset(dev, 0);
93 if (mask > dev->archdata.max_direct_dma_addr)
94 mask = dev->archdata.max_direct_dma_addr;
95 }
96
86 if (!dev->dma_mask || !dma_supported(dev, mask)) 97 if (!dev->dma_mask || !dma_supported(dev, mask))
87 return -EIO; 98 return -EIO;
88 99
@@ -91,4 +102,43 @@ dma_set_mask(struct device *dev, u64 mask)
91 return 0; 102 return 0;
92} 103}
93 104
105static inline void *dma_alloc_attrs(struct device *dev, size_t size,
106 dma_addr_t *dma_handle, gfp_t flag,
107 struct dma_attrs *attrs)
108{
109 struct dma_map_ops *dma_ops = get_dma_ops(dev);
110 void *cpu_addr;
111
112 cpu_addr = dma_ops->alloc(dev, size, dma_handle, flag, attrs);
113
114 debug_dma_alloc_coherent(dev, size, *dma_handle, cpu_addr);
115
116 return cpu_addr;
117}
118
119static inline void dma_free_attrs(struct device *dev, size_t size,
120 void *cpu_addr, dma_addr_t dma_handle,
121 struct dma_attrs *attrs)
122{
123 struct dma_map_ops *dma_ops = get_dma_ops(dev);
124
125 debug_dma_free_coherent(dev, size, cpu_addr, dma_handle);
126
127 dma_ops->free(dev, size, cpu_addr, dma_handle, attrs);
128}
129
130#define dma_alloc_coherent(d, s, h, f) dma_alloc_attrs(d, s, h, f, NULL)
131#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_attrs(d, s, h, f, NULL)
132#define dma_free_coherent(d, s, v, h) dma_free_attrs(d, s, v, h, NULL)
133#define dma_free_noncoherent(d, s, v, h) dma_free_attrs(d, s, v, h, NULL)
134
135/*
136 * dma_alloc_noncoherent() is #defined to return coherent memory,
137 * so there's no need to do any flushing here.
138 */
139static inline void dma_cache_sync(struct device *dev, void *vaddr, size_t size,
140 enum dma_data_direction direction)
141{
142}
143
94#endif /* _ASM_TILE_DMA_MAPPING_H */ 144#endif /* _ASM_TILE_DMA_MAPPING_H */
diff --git a/arch/tile/include/asm/pci.h b/arch/tile/include/asm/pci.h
index 2c224c47d8a..553b7ff018c 100644
--- a/arch/tile/include/asm/pci.h
+++ b/arch/tile/include/asm/pci.h
@@ -15,6 +15,7 @@
15#ifndef _ASM_TILE_PCI_H 15#ifndef _ASM_TILE_PCI_H
16#define _ASM_TILE_PCI_H 16#define _ASM_TILE_PCI_H
17 17
18#include <linux/dma-mapping.h>
18#include <linux/pci.h> 19#include <linux/pci.h>
19#include <linux/numa.h> 20#include <linux/numa.h>
20#include <asm-generic/pci_iomap.h> 21#include <asm-generic/pci_iomap.h>
@@ -53,6 +54,16 @@ static inline void pci_iounmap(struct pci_dev *dev, void __iomem *addr) {}
53 54
54#define TILE_NUM_PCIE 2 55#define TILE_NUM_PCIE 2
55 56
57/*
58 * The hypervisor maps the entirety of CPA-space as bus addresses, so
59 * bus addresses are physical addresses. The networking and block
60 * device layers use this boolean for bounce buffer decisions.
61 */
62#define PCI_DMA_BUS_IS_PHYS 1
63
64/* generic pci stuff */
65#include <asm-generic/pci.h>
66
56#else 67#else
57 68
58#include <asm/page.h> 69#include <asm/page.h>
@@ -85,7 +96,47 @@ static inline void pci_iounmap(struct pci_dev *dev, void __iomem *addr) {}
85/* 96/*
86 * Each Mem-Map interrupt region occupies 4KB. 97 * Each Mem-Map interrupt region occupies 4KB.
87 */ 98 */
88#define MEM_MAP_INTR_REGION_SIZE (1<< TRIO_MAP_MEM_LIM__ADDR_SHIFT) 99#define MEM_MAP_INTR_REGION_SIZE (1 << TRIO_MAP_MEM_LIM__ADDR_SHIFT)
100
101/*
102 * Allocate the PCI BAR window right below 4GB.
103 */
104#define TILE_PCI_BAR_WINDOW_TOP (1ULL << 32)
105
106/*
107 * Allocate 1GB for the PCI BAR window.
108 */
109#define TILE_PCI_BAR_WINDOW_SIZE (1 << 30)
110
111/*
112 * This is the highest bus address targeting the host memory that
113 * can be generated by legacy PCI devices with 32-bit or less
114 * DMA capability, dictated by the BAR window size and location.
115 */
116#define TILE_PCI_MAX_DIRECT_DMA_ADDRESS \
117 (TILE_PCI_BAR_WINDOW_TOP - TILE_PCI_BAR_WINDOW_SIZE - 1)
118
119/*
120 * We shift the PCI bus range for all the physical memory up by the whole PA
121 * range. The corresponding CPA of an incoming PCI request will be the PCI
122 * address minus TILE_PCI_MEM_MAP_BASE_OFFSET. This also implies
123 * that the 64-bit capable devices will be given DMA addresses as
124 * the CPA plus TILE_PCI_MEM_MAP_BASE_OFFSET. To support 32-bit
125 * devices, we create a separate map region that handles the low
126 * 4GB.
127 */
128#define TILE_PCI_MEM_MAP_BASE_OFFSET (1ULL << CHIP_PA_WIDTH())
129
130/*
131 * End of the PCI memory resource.
132 */
133#define TILE_PCI_MEM_END \
134 ((1ULL << CHIP_PA_WIDTH()) + TILE_PCI_BAR_WINDOW_TOP)
135
136/*
137 * Start of the PCI memory resource.
138 */
139#define TILE_PCI_MEM_START (TILE_PCI_MEM_END - TILE_PCI_BAR_WINDOW_SIZE)
89 140
90/* 141/*
91 * Structure of a PCI controller (host bridge) on Gx. 142 * Structure of a PCI controller (host bridge) on Gx.
@@ -108,6 +159,8 @@ struct pci_controller {
108 int index; /* PCI domain number */ 159 int index; /* PCI domain number */
109 struct pci_bus *root_bus; 160 struct pci_bus *root_bus;
110 161
162 uint64_t mem_offset; /* cpu->bus memory mapping offset. */
163
111 int last_busno; 164 int last_busno;
112 165
113 struct pci_ops *ops; 166 struct pci_ops *ops;
@@ -126,14 +179,22 @@ extern gxio_trio_context_t trio_contexts[TILEGX_NUM_TRIO];
126 179
127extern void pci_iounmap(struct pci_dev *dev, void __iomem *); 180extern void pci_iounmap(struct pci_dev *dev, void __iomem *);
128 181
129#endif /* __tilegx__ */ 182extern void
183pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region,
184 struct resource *res);
185
186extern void
187pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
188 struct pci_bus_region *region);
130 189
131/* 190/*
132 * The hypervisor maps the entirety of CPA-space as bus addresses, so 191 * The PCI address space does not equal the physical memory address
133 * bus addresses are physical addresses. The networking and block 192 * space (we have an IOMMU). The IDE and SCSI device layers use this
134 * device layers use this boolean for bounce buffer decisions. 193 * boolean for bounce buffer decisions.
135 */ 194 */
136#define PCI_DMA_BUS_IS_PHYS 1 195#define PCI_DMA_BUS_IS_PHYS 0
196
197#endif /* __tilegx__ */
137 198
138int __init tile_pci_init(void); 199int __init tile_pci_init(void);
139int __init pcibios_init(void); 200int __init pcibios_init(void);
@@ -169,7 +230,4 @@ static inline int pcibios_assign_all_busses(void)
169/* implement the pci_ DMA API in terms of the generic device dma_ one */ 230/* implement the pci_ DMA API in terms of the generic device dma_ one */
170#include <asm-generic/pci-dma-compat.h> 231#include <asm-generic/pci-dma-compat.h>
171 232
172/* generic pci stuff */
173#include <asm-generic/pci.h>
174
175#endif /* _ASM_TILE_PCI_H */ 233#endif /* _ASM_TILE_PCI_H */
diff --git a/arch/tile/kernel/pci-dma.c b/arch/tile/kernel/pci-dma.c
index edd856a000c..b9fe80ec108 100644
--- a/arch/tile/kernel/pci-dma.c
+++ b/arch/tile/kernel/pci-dma.c
@@ -14,6 +14,7 @@
14 14
15#include <linux/mm.h> 15#include <linux/mm.h>
16#include <linux/dma-mapping.h> 16#include <linux/dma-mapping.h>
17#include <linux/swiotlb.h>
17#include <linux/vmalloc.h> 18#include <linux/vmalloc.h>
18#include <linux/export.h> 19#include <linux/export.h>
19#include <asm/tlbflush.h> 20#include <asm/tlbflush.h>
@@ -31,10 +32,9 @@
31#define PAGE_HOME_DMA PAGE_HOME_HASH 32#define PAGE_HOME_DMA PAGE_HOME_HASH
32#endif 33#endif
33 34
34void *dma_alloc_coherent(struct device *dev, 35static void *tile_dma_alloc_coherent(struct device *dev, size_t size,
35 size_t size, 36 dma_addr_t *dma_handle, gfp_t gfp,
36 dma_addr_t *dma_handle, 37 struct dma_attrs *attrs)
37 gfp_t gfp)
38{ 38{
39 u64 dma_mask = dev->coherent_dma_mask ?: DMA_BIT_MASK(32); 39 u64 dma_mask = dev->coherent_dma_mask ?: DMA_BIT_MASK(32);
40 int node = dev_to_node(dev); 40 int node = dev_to_node(dev);
@@ -68,19 +68,19 @@ void *dma_alloc_coherent(struct device *dev,
68 } 68 }
69 69
70 *dma_handle = addr; 70 *dma_handle = addr;
71
71 return page_address(pg); 72 return page_address(pg);
72} 73}
73EXPORT_SYMBOL(dma_alloc_coherent);
74 74
75/* 75/*
76 * Free memory that was allocated with dma_alloc_coherent. 76 * Free memory that was allocated with tile_dma_alloc_coherent.
77 */ 77 */
78void dma_free_coherent(struct device *dev, size_t size, 78static void tile_dma_free_coherent(struct device *dev, size_t size,
79 void *vaddr, dma_addr_t dma_handle) 79 void *vaddr, dma_addr_t dma_handle,
80 struct dma_attrs *attrs)
80{ 81{
81 homecache_free_pages((unsigned long)vaddr, get_order(size)); 82 homecache_free_pages((unsigned long)vaddr, get_order(size));
82} 83}
83EXPORT_SYMBOL(dma_free_coherent);
84 84
85/* 85/*
86 * The map routines "map" the specified address range for DMA 86 * The map routines "map" the specified address range for DMA
@@ -199,38 +199,182 @@ static void __dma_complete_pa_range(dma_addr_t dma_addr, size_t size,
199 } 199 }
200} 200}
201 201
202static int tile_dma_map_sg(struct device *dev, struct scatterlist *sglist,
203 int nents, enum dma_data_direction direction,
204 struct dma_attrs *attrs)
205{
206 struct scatterlist *sg;
207 int i;
202 208
203/* 209 BUG_ON(!valid_dma_direction(direction));
204 * dma_map_single can be passed any memory address, and there appear 210
205 * to be no alignment constraints. 211 WARN_ON(nents == 0 || sglist->length == 0);
206 * 212
207 * There is a chance that the start of the buffer will share a cache 213 for_each_sg(sglist, sg, nents, i) {
208 * line with some other data that has been touched in the meantime. 214 sg->dma_address = sg_phys(sg);
209 */ 215 __dma_prep_pa_range(sg->dma_address, sg->length, direction);
210dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size, 216#ifdef CONFIG_NEED_SG_DMA_LENGTH
211 enum dma_data_direction direction) 217 sg->dma_length = sg->length;
218#endif
219 }
220
221 return nents;
222}
223
224static void tile_dma_unmap_sg(struct device *dev, struct scatterlist *sglist,
225 int nents, enum dma_data_direction direction,
226 struct dma_attrs *attrs)
227{
228 struct scatterlist *sg;
229 int i;
230
231 BUG_ON(!valid_dma_direction(direction));
232 for_each_sg(sglist, sg, nents, i) {
233 sg->dma_address = sg_phys(sg);
234 __dma_complete_pa_range(sg->dma_address, sg->length,
235 direction);
236 }
237}
238
239static dma_addr_t tile_dma_map_page(struct device *dev, struct page *page,
240 unsigned long offset, size_t size,
241 enum dma_data_direction direction,
242 struct dma_attrs *attrs)
212{ 243{
213 dma_addr_t dma_addr = __pa(ptr); 244 BUG_ON(!valid_dma_direction(direction));
245
246 BUG_ON(offset + size > PAGE_SIZE);
247 __dma_prep_page(page, offset, size, direction);
248
249 return page_to_pa(page) + offset;
250}
251
252static void tile_dma_unmap_page(struct device *dev, dma_addr_t dma_address,
253 size_t size, enum dma_data_direction direction,
254 struct dma_attrs *attrs)
255{
256 BUG_ON(!valid_dma_direction(direction));
257
258 __dma_complete_page(pfn_to_page(PFN_DOWN(dma_address)),
259 dma_address & PAGE_OFFSET, size, direction);
260}
214 261
262static void tile_dma_sync_single_for_cpu(struct device *dev,
263 dma_addr_t dma_handle,
264 size_t size,
265 enum dma_data_direction direction)
266{
215 BUG_ON(!valid_dma_direction(direction)); 267 BUG_ON(!valid_dma_direction(direction));
216 WARN_ON(size == 0);
217 268
218 __dma_prep_pa_range(dma_addr, size, direction); 269 __dma_complete_pa_range(dma_handle, size, direction);
270}
219 271
220 return dma_addr; 272static void tile_dma_sync_single_for_device(struct device *dev,
273 dma_addr_t dma_handle, size_t size,
274 enum dma_data_direction direction)
275{
276 __dma_prep_pa_range(dma_handle, size, direction);
221} 277}
222EXPORT_SYMBOL(dma_map_single);
223 278
224void dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size, 279static void tile_dma_sync_sg_for_cpu(struct device *dev,
225 enum dma_data_direction direction) 280 struct scatterlist *sglist, int nelems,
281 enum dma_data_direction direction)
226{ 282{
283 struct scatterlist *sg;
284 int i;
285
227 BUG_ON(!valid_dma_direction(direction)); 286 BUG_ON(!valid_dma_direction(direction));
228 __dma_complete_pa_range(dma_addr, size, direction); 287 WARN_ON(nelems == 0 || sglist->length == 0);
288
289 for_each_sg(sglist, sg, nelems, i) {
290 dma_sync_single_for_cpu(dev, sg->dma_address,
291 sg_dma_len(sg), direction);
292 }
229} 293}
230EXPORT_SYMBOL(dma_unmap_single);
231 294
232int dma_map_sg(struct device *dev, struct scatterlist *sglist, int nents, 295static void tile_dma_sync_sg_for_device(struct device *dev,
233 enum dma_data_direction direction) 296 struct scatterlist *sglist, int nelems,
297 enum dma_data_direction direction)
298{
299 struct scatterlist *sg;
300 int i;
301
302 BUG_ON(!valid_dma_direction(direction));
303 WARN_ON(nelems == 0 || sglist->length == 0);
304
305 for_each_sg(sglist, sg, nelems, i) {
306 dma_sync_single_for_device(dev, sg->dma_address,
307 sg_dma_len(sg), direction);
308 }
309}
310
311static inline int
312tile_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
313{
314 return 0;
315}
316
317static inline int
318tile_dma_supported(struct device *dev, u64 mask)
319{
320 return 1;
321}
322
323static struct dma_map_ops tile_default_dma_map_ops = {
324 .alloc = tile_dma_alloc_coherent,
325 .free = tile_dma_free_coherent,
326 .map_page = tile_dma_map_page,
327 .unmap_page = tile_dma_unmap_page,
328 .map_sg = tile_dma_map_sg,
329 .unmap_sg = tile_dma_unmap_sg,
330 .sync_single_for_cpu = tile_dma_sync_single_for_cpu,
331 .sync_single_for_device = tile_dma_sync_single_for_device,
332 .sync_sg_for_cpu = tile_dma_sync_sg_for_cpu,
333 .sync_sg_for_device = tile_dma_sync_sg_for_device,
334 .mapping_error = tile_dma_mapping_error,
335 .dma_supported = tile_dma_supported
336};
337
338struct dma_map_ops *tile_dma_map_ops = &tile_default_dma_map_ops;
339EXPORT_SYMBOL(tile_dma_map_ops);
340
341/* Generic PCI DMA mapping functions */
342
343static void *tile_pci_dma_alloc_coherent(struct device *dev, size_t size,
344 dma_addr_t *dma_handle, gfp_t gfp,
345 struct dma_attrs *attrs)
346{
347 int node = dev_to_node(dev);
348 int order = get_order(size);
349 struct page *pg;
350 dma_addr_t addr;
351
352 gfp |= __GFP_ZERO;
353
354 pg = homecache_alloc_pages_node(node, gfp, order, PAGE_HOME_DMA);
355 if (pg == NULL)
356 return NULL;
357
358 addr = page_to_phys(pg);
359
360 *dma_handle = phys_to_dma(dev, addr);
361
362 return page_address(pg);
363}
364
365/*
366 * Free memory that was allocated with tile_pci_dma_alloc_coherent.
367 */
368static void tile_pci_dma_free_coherent(struct device *dev, size_t size,
369 void *vaddr, dma_addr_t dma_handle,
370 struct dma_attrs *attrs)
371{
372 homecache_free_pages((unsigned long)vaddr, get_order(size));
373}
374
375static int tile_pci_dma_map_sg(struct device *dev, struct scatterlist *sglist,
376 int nents, enum dma_data_direction direction,
377 struct dma_attrs *attrs)
234{ 378{
235 struct scatterlist *sg; 379 struct scatterlist *sg;
236 int i; 380 int i;
@@ -242,14 +386,20 @@ int dma_map_sg(struct device *dev, struct scatterlist *sglist, int nents,
242 for_each_sg(sglist, sg, nents, i) { 386 for_each_sg(sglist, sg, nents, i) {
243 sg->dma_address = sg_phys(sg); 387 sg->dma_address = sg_phys(sg);
244 __dma_prep_pa_range(sg->dma_address, sg->length, direction); 388 __dma_prep_pa_range(sg->dma_address, sg->length, direction);
389
390 sg->dma_address = phys_to_dma(dev, sg->dma_address);
391#ifdef CONFIG_NEED_SG_DMA_LENGTH
392 sg->dma_length = sg->length;
393#endif
245 } 394 }
246 395
247 return nents; 396 return nents;
248} 397}
249EXPORT_SYMBOL(dma_map_sg);
250 398
251void dma_unmap_sg(struct device *dev, struct scatterlist *sglist, int nents, 399static void tile_pci_dma_unmap_sg(struct device *dev,
252 enum dma_data_direction direction) 400 struct scatterlist *sglist, int nents,
401 enum dma_data_direction direction,
402 struct dma_attrs *attrs)
253{ 403{
254 struct scatterlist *sg; 404 struct scatterlist *sg;
255 int i; 405 int i;
@@ -261,46 +411,60 @@ void dma_unmap_sg(struct device *dev, struct scatterlist *sglist, int nents,
261 direction); 411 direction);
262 } 412 }
263} 413}
264EXPORT_SYMBOL(dma_unmap_sg);
265 414
266dma_addr_t dma_map_page(struct device *dev, struct page *page, 415static dma_addr_t tile_pci_dma_map_page(struct device *dev, struct page *page,
267 unsigned long offset, size_t size, 416 unsigned long offset, size_t size,
268 enum dma_data_direction direction) 417 enum dma_data_direction direction,
418 struct dma_attrs *attrs)
269{ 419{
270 BUG_ON(!valid_dma_direction(direction)); 420 BUG_ON(!valid_dma_direction(direction));
271 421
272 BUG_ON(offset + size > PAGE_SIZE); 422 BUG_ON(offset + size > PAGE_SIZE);
273 __dma_prep_page(page, offset, size, direction); 423 __dma_prep_page(page, offset, size, direction);
274 return page_to_pa(page) + offset; 424
425 return phys_to_dma(dev, page_to_pa(page) + offset);
275} 426}
276EXPORT_SYMBOL(dma_map_page);
277 427
278void dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size, 428static void tile_pci_dma_unmap_page(struct device *dev, dma_addr_t dma_address,
279 enum dma_data_direction direction) 429 size_t size,
430 enum dma_data_direction direction,
431 struct dma_attrs *attrs)
280{ 432{
281 BUG_ON(!valid_dma_direction(direction)); 433 BUG_ON(!valid_dma_direction(direction));
434
435 dma_address = dma_to_phys(dev, dma_address);
436
282 __dma_complete_page(pfn_to_page(PFN_DOWN(dma_address)), 437 __dma_complete_page(pfn_to_page(PFN_DOWN(dma_address)),
283 dma_address & PAGE_OFFSET, size, direction); 438 dma_address & PAGE_OFFSET, size, direction);
284} 439}
285EXPORT_SYMBOL(dma_unmap_page);
286 440
287void dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, 441static void tile_pci_dma_sync_single_for_cpu(struct device *dev,
288 size_t size, enum dma_data_direction direction) 442 dma_addr_t dma_handle,
443 size_t size,
444 enum dma_data_direction direction)
289{ 445{
290 BUG_ON(!valid_dma_direction(direction)); 446 BUG_ON(!valid_dma_direction(direction));
447
448 dma_handle = dma_to_phys(dev, dma_handle);
449
291 __dma_complete_pa_range(dma_handle, size, direction); 450 __dma_complete_pa_range(dma_handle, size, direction);
292} 451}
293EXPORT_SYMBOL(dma_sync_single_for_cpu);
294 452
295void dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, 453static void tile_pci_dma_sync_single_for_device(struct device *dev,
296 size_t size, enum dma_data_direction direction) 454 dma_addr_t dma_handle,
455 size_t size,
456 enum dma_data_direction
457 direction)
297{ 458{
459 dma_handle = dma_to_phys(dev, dma_handle);
460
298 __dma_prep_pa_range(dma_handle, size, direction); 461 __dma_prep_pa_range(dma_handle, size, direction);
299} 462}
300EXPORT_SYMBOL(dma_sync_single_for_device);
301 463
302void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sglist, 464static void tile_pci_dma_sync_sg_for_cpu(struct device *dev,
303 int nelems, enum dma_data_direction direction) 465 struct scatterlist *sglist,
466 int nelems,
467 enum dma_data_direction direction)
304{ 468{
305 struct scatterlist *sg; 469 struct scatterlist *sg;
306 int i; 470 int i;
@@ -313,10 +477,11 @@ void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sglist,
313 sg_dma_len(sg), direction); 477 sg_dma_len(sg), direction);
314 } 478 }
315} 479}
316EXPORT_SYMBOL(dma_sync_sg_for_cpu);
317 480
318void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sglist, 481static void tile_pci_dma_sync_sg_for_device(struct device *dev,
319 int nelems, enum dma_data_direction direction) 482 struct scatterlist *sglist,
483 int nelems,
484 enum dma_data_direction direction)
320{ 485{
321 struct scatterlist *sg; 486 struct scatterlist *sg;
322 int i; 487 int i;
@@ -329,31 +494,93 @@ void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sglist,
329 sg_dma_len(sg), direction); 494 sg_dma_len(sg), direction);
330 } 495 }
331} 496}
332EXPORT_SYMBOL(dma_sync_sg_for_device);
333 497
334void dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t dma_handle, 498static inline int
335 unsigned long offset, size_t size, 499tile_pci_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
336 enum dma_data_direction direction)
337{ 500{
338 dma_sync_single_for_cpu(dev, dma_handle + offset, size, direction); 501 return 0;
339} 502}
340EXPORT_SYMBOL(dma_sync_single_range_for_cpu);
341 503
342void dma_sync_single_range_for_device(struct device *dev, 504static inline int
343 dma_addr_t dma_handle, 505tile_pci_dma_supported(struct device *dev, u64 mask)
344 unsigned long offset, size_t size,
345 enum dma_data_direction direction)
346{ 506{
347 dma_sync_single_for_device(dev, dma_handle + offset, size, direction); 507 return 1;
348} 508}
349EXPORT_SYMBOL(dma_sync_single_range_for_device);
350 509
351/* 510static struct dma_map_ops tile_pci_default_dma_map_ops = {
352 * dma_alloc_noncoherent() is #defined to return coherent memory, 511 .alloc = tile_pci_dma_alloc_coherent,
353 * so there's no need to do any flushing here. 512 .free = tile_pci_dma_free_coherent,
354 */ 513 .map_page = tile_pci_dma_map_page,
355void dma_cache_sync(struct device *dev, void *vaddr, size_t size, 514 .unmap_page = tile_pci_dma_unmap_page,
356 enum dma_data_direction direction) 515 .map_sg = tile_pci_dma_map_sg,
516 .unmap_sg = tile_pci_dma_unmap_sg,
517 .sync_single_for_cpu = tile_pci_dma_sync_single_for_cpu,
518 .sync_single_for_device = tile_pci_dma_sync_single_for_device,
519 .sync_sg_for_cpu = tile_pci_dma_sync_sg_for_cpu,
520 .sync_sg_for_device = tile_pci_dma_sync_sg_for_device,
521 .mapping_error = tile_pci_dma_mapping_error,
522 .dma_supported = tile_pci_dma_supported
523};
524
525struct dma_map_ops *gx_pci_dma_map_ops = &tile_pci_default_dma_map_ops;
526EXPORT_SYMBOL(gx_pci_dma_map_ops);
527
528/* PCI DMA mapping functions for legacy PCI devices */
529
530#ifdef CONFIG_SWIOTLB
531static void *tile_swiotlb_alloc_coherent(struct device *dev, size_t size,
532 dma_addr_t *dma_handle, gfp_t gfp,
533 struct dma_attrs *attrs)
534{
535 gfp |= GFP_DMA;
536 return swiotlb_alloc_coherent(dev, size, dma_handle, gfp);
537}
538
539static void tile_swiotlb_free_coherent(struct device *dev, size_t size,
540 void *vaddr, dma_addr_t dma_addr,
541 struct dma_attrs *attrs)
357{ 542{
543 swiotlb_free_coherent(dev, size, vaddr, dma_addr);
358} 544}
359EXPORT_SYMBOL(dma_cache_sync); 545
546static struct dma_map_ops pci_swiotlb_dma_ops = {
547 .alloc = tile_swiotlb_alloc_coherent,
548 .free = tile_swiotlb_free_coherent,
549 .map_page = swiotlb_map_page,
550 .unmap_page = swiotlb_unmap_page,
551 .map_sg = swiotlb_map_sg_attrs,
552 .unmap_sg = swiotlb_unmap_sg_attrs,
553 .sync_single_for_cpu = swiotlb_sync_single_for_cpu,
554 .sync_single_for_device = swiotlb_sync_single_for_device,
555 .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
556 .sync_sg_for_device = swiotlb_sync_sg_for_device,
557 .dma_supported = swiotlb_dma_supported,
558 .mapping_error = swiotlb_dma_mapping_error,
559};
560
561struct dma_map_ops *gx_legacy_pci_dma_map_ops = &pci_swiotlb_dma_ops;
562#else
563struct dma_map_ops *gx_legacy_pci_dma_map_ops;
564#endif
565EXPORT_SYMBOL(gx_legacy_pci_dma_map_ops);
566
567#ifdef CONFIG_ARCH_HAS_DMA_SET_COHERENT_MASK
568int dma_set_coherent_mask(struct device *dev, u64 mask)
569{
570 struct dma_map_ops *dma_ops = get_dma_ops(dev);
571
572 /* Handle legacy PCI devices with limited memory addressability. */
573 if (((dma_ops == gx_pci_dma_map_ops) ||
574 (dma_ops == gx_legacy_pci_dma_map_ops)) &&
575 (mask <= DMA_BIT_MASK(32))) {
576 if (mask > dev->archdata.max_direct_dma_addr)
577 mask = dev->archdata.max_direct_dma_addr;
578 }
579
580 if (!dma_supported(dev, mask))
581 return -EIO;
582 dev->coherent_dma_mask = mask;
583 return 0;
584}
585EXPORT_SYMBOL(dma_set_coherent_mask);
586#endif
diff --git a/arch/tile/kernel/pci_gx.c b/arch/tile/kernel/pci_gx.c
index 1b996bb628f..27f7ab02113 100644
--- a/arch/tile/kernel/pci_gx.c
+++ b/arch/tile/kernel/pci_gx.c
@@ -40,22 +40,8 @@
40#include <arch/sim.h> 40#include <arch/sim.h>
41 41
42/* 42/*
43 * Initialization flow and process 43 * This file containes the routines to search for PCI buses,
44 * -------------------------------
45 *
46 * This files containes the routines to search for PCI buses,
47 * enumerate the buses, and configure any attached devices. 44 * enumerate the buses, and configure any attached devices.
48 *
49 * There are two entry points here:
50 * 1) tile_pci_init
51 * This sets up the pci_controller structs, and opens the
52 * FDs to the hypervisor. This is called from setup_arch() early
53 * in the boot process.
54 * 2) pcibios_init
55 * This probes the PCI bus(es) for any attached hardware. It's
56 * called by subsys_initcall. All of the real work is done by the
57 * generic Linux PCI layer.
58 *
59 */ 45 */
60 46
61#define DEBUG_PCI_CFG 0 47#define DEBUG_PCI_CFG 0
@@ -110,6 +96,21 @@ static struct pci_ops tile_cfg_ops;
110/* Mask of CPUs that should receive PCIe interrupts. */ 96/* Mask of CPUs that should receive PCIe interrupts. */
111static struct cpumask intr_cpus_map; 97static struct cpumask intr_cpus_map;
112 98
99/* PCI I/O space support is not implemented. */
100static struct resource pci_ioport_resource = {
101 .name = "PCI IO",
102 .start = 0,
103 .end = 0,
104 .flags = IORESOURCE_IO,
105};
106
107static struct resource pci_iomem_resource = {
108 .name = "PCI mem",
109 .start = TILE_PCI_MEM_START,
110 .end = TILE_PCI_MEM_END,
111 .flags = IORESOURCE_MEM,
112};
113
113/* 114/*
114 * We don't need to worry about the alignment of resources. 115 * We don't need to worry about the alignment of resources.
115 */ 116 */
@@ -334,8 +335,6 @@ free_irqs:
334} 335}
335 336
336/* 337/*
337 * First initialization entry point, called from setup_arch().
338 *
339 * Find valid controllers and fill in pci_controller structs for each 338 * Find valid controllers and fill in pci_controller structs for each
340 * of them. 339 * of them.
341 * 340 *
@@ -583,10 +582,7 @@ static int __devinit setup_pcie_rc_delay(char *str)
583early_param("pcie_rc_delay", setup_pcie_rc_delay); 582early_param("pcie_rc_delay", setup_pcie_rc_delay);
584 583
585/* 584/*
586 * Second PCI initialization entry point, called by subsys_initcall. 585 * PCI initialization entry point, called by subsys_initcall.
587 *
588 * The controllers have been set up by the time we get here, by a call to
589 * tile_pci_init.
590 */ 586 */
591int __init pcibios_init(void) 587int __init pcibios_init(void)
592{ 588{
@@ -594,15 +590,13 @@ int __init pcibios_init(void)
594 LIST_HEAD(resources); 590 LIST_HEAD(resources);
595 int i; 591 int i;
596 592
593 tile_pci_init();
594
597 if (num_rc_controllers == 0 && num_ep_controllers == 0) 595 if (num_rc_controllers == 0 && num_ep_controllers == 0)
598 return 0; 596 return 0;
599 597
600 pr_info("PCI: Probing PCI hardware\n");
601
602 /* 598 /*
603 * We loop over all the TRIO shims and set up the MMIO mappings. 599 * We loop over all the TRIO shims and set up the MMIO mappings.
604 * This step can't be done in tile_pci_init because the MM subsystem
605 * hasn't been initialized then.
606 */ 600 */
607 for (i = 0; i < TILEGX_NUM_TRIO; i++) { 601 for (i = 0; i < TILEGX_NUM_TRIO; i++) {
608 gxio_trio_context_t *context = &trio_contexts[i]; 602 gxio_trio_context_t *context = &trio_contexts[i];
@@ -645,9 +639,7 @@ int __init pcibios_init(void)
645 unsigned int class_code_revision; 639 unsigned int class_code_revision;
646 int trio_index; 640 int trio_index;
647 int mac; 641 int mac;
648#ifndef USE_SHARED_PCIE_CONFIG_REGION
649 int ret; 642 int ret;
650#endif
651 643
652 if (trio_context->fd < 0) 644 if (trio_context->fd < 0)
653 continue; 645 continue;
@@ -802,8 +794,6 @@ int __init pcibios_init(void)
802 pr_err("PCI: PCI CFG PIO alloc failure for mac %d " 794 pr_err("PCI: PCI CFG PIO alloc failure for mac %d "
803 "on TRIO %d, give up\n", mac, trio_index); 795 "on TRIO %d, give up\n", mac, trio_index);
804 796
805 /* TBD: cleanup ... */
806
807 continue; 797 continue;
808 } 798 }
809 799
@@ -819,8 +809,6 @@ int __init pcibios_init(void)
819 pr_err("PCI: PCI CFG PIO init failure for mac %d " 809 pr_err("PCI: PCI CFG PIO init failure for mac %d "
820 "on TRIO %d, give up\n", mac, trio_index); 810 "on TRIO %d, give up\n", mac, trio_index);
821 811
822 /* TBD: cleanup ... */
823
824 continue; 812 continue;
825 } 813 }
826 814
@@ -837,8 +825,6 @@ int __init pcibios_init(void)
837 pr_err("PCI: PIO map failure for mac %d on TRIO %d\n", 825 pr_err("PCI: PIO map failure for mac %d on TRIO %d\n",
838 mac, trio_index); 826 mac, trio_index);
839 827
840 /* TBD: cleanup ... */
841
842 continue; 828 continue;
843 } 829 }
844 830
@@ -852,7 +838,14 @@ int __init pcibios_init(void)
852 continue; 838 continue;
853 } 839 }
854 840
855 pci_add_resource(&resources, &iomem_resource); 841 /*
842 * The PCI memory resource is located above the PA space.
843 * The memory range for the PCI root bus should not overlap
844 * with the physical RAM
845 */
846 pci_add_resource_offset(&resources, &iomem_resource,
847 1ULL << CHIP_PA_WIDTH());
848
856 bus = pci_scan_root_bus(NULL, 0, controller->ops, 849 bus = pci_scan_root_bus(NULL, 0, controller->ops,
857 controller, &resources); 850 controller, &resources);
858 controller->root_bus = bus; 851 controller->root_bus = bus;
@@ -923,11 +916,6 @@ int __init pcibios_init(void)
923 } 916 }
924 917
925 /* 918 /*
926 * We always assign 32-bit PCI bus BAR ranges.
927 */
928 BUG_ON(bus_address_hi != 0);
929
930 /*
931 * Alloc a PIO region for PCI memory access for each RC port. 919 * Alloc a PIO region for PCI memory access for each RC port.
932 */ 920 */
933 ret = gxio_trio_alloc_pio_regions(trio_context, 1, 0, 0); 921 ret = gxio_trio_alloc_pio_regions(trio_context, 1, 0, 0);
@@ -936,8 +924,6 @@ int __init pcibios_init(void)
936 "give up\n", controller->trio_index, 924 "give up\n", controller->trio_index,
937 controller->mac); 925 controller->mac);
938 926
939 /* TBD: cleanup ... */
940
941 continue; 927 continue;
942 } 928 }
943 929
@@ -950,15 +936,13 @@ int __init pcibios_init(void)
950 ret = gxio_trio_init_pio_region_aux(trio_context, 936 ret = gxio_trio_init_pio_region_aux(trio_context,
951 controller->pio_mem_index, 937 controller->pio_mem_index,
952 controller->mac, 938 controller->mac,
953 bus_address_hi, 939 0,
954 0); 940 0);
955 if (ret < 0) { 941 if (ret < 0) {
956 pr_err("PCI: MEM PIO init failure on TRIO %d mac %d, " 942 pr_err("PCI: MEM PIO init failure on TRIO %d mac %d, "
957 "give up\n", controller->trio_index, 943 "give up\n", controller->trio_index,
958 controller->mac); 944 controller->mac);
959 945
960 /* TBD: cleanup ... */
961
962 continue; 946 continue;
963 } 947 }
964 948
@@ -980,8 +964,6 @@ int __init pcibios_init(void)
980 controller->trio_index, 964 controller->trio_index,
981 controller->mac, j); 965 controller->mac, j);
982 966
983 /* TBD: cleanup ... */
984
985 goto alloc_mem_map_failed; 967 goto alloc_mem_map_failed;
986 } 968 }
987 969
@@ -991,9 +973,13 @@ int __init pcibios_init(void)
991 * Initialize the Mem-Map and the I/O MMU so that all 973 * Initialize the Mem-Map and the I/O MMU so that all
992 * the physical memory can be accessed by the endpoint 974 * the physical memory can be accessed by the endpoint
993 * devices. The base bus address is set to the base CPA 975 * devices. The base bus address is set to the base CPA
994 * of this memory controller, so is the base VA. The 976 * of this memory controller plus an offset (see pci.h).
977 * The region's base VA is set to the base CPA. The
995 * I/O MMU table essentially translates the CPA to 978 * I/O MMU table essentially translates the CPA to
996 * the real PA. 979 * the real PA. Implicitly, for node 0, we create
980 * a separate Mem-Map region that serves as the inbound
981 * window for legacy 32-bit devices. This is a direct
982 * map of the low 4GB CPA space.
997 */ 983 */
998 ret = gxio_trio_init_memory_map_mmu_aux(trio_context, 984 ret = gxio_trio_init_memory_map_mmu_aux(trio_context,
999 controller->mem_maps[j], 985 controller->mem_maps[j],
@@ -1001,7 +987,8 @@ int __init pcibios_init(void)
1001 nr_pages << PAGE_SHIFT, 987 nr_pages << PAGE_SHIFT,
1002 trio_context->asid, 988 trio_context->asid,
1003 controller->mac, 989 controller->mac,
1004 start_pfn << PAGE_SHIFT, 990 (start_pfn << PAGE_SHIFT) +
991 TILE_PCI_MEM_MAP_BASE_OFFSET,
1005 j, 992 j,
1006 GXIO_TRIO_ORDER_MODE_UNORDERED); 993 GXIO_TRIO_ORDER_MODE_UNORDERED);
1007 if (ret < 0) { 994 if (ret < 0) {
@@ -1010,11 +997,8 @@ int __init pcibios_init(void)
1010 controller->trio_index, 997 controller->trio_index,
1011 controller->mac, j); 998 controller->mac, j);
1012 999
1013 /* TBD: cleanup ... */
1014
1015 goto alloc_mem_map_failed; 1000 goto alloc_mem_map_failed;
1016 } 1001 }
1017
1018 continue; 1002 continue;
1019 1003
1020alloc_mem_map_failed: 1004alloc_mem_map_failed:
@@ -1028,11 +1012,19 @@ alloc_mem_map_failed:
1028subsys_initcall(pcibios_init); 1012subsys_initcall(pcibios_init);
1029 1013
1030/* 1014/*
1031 * No bus fixups needed. 1015 * PCI scan code calls the arch specific pcibios_fixup_bus() each time it scans
1016 * a new bridge. Called after each bus is probed, but before its children are
1017 * examined.
1032 */ 1018 */
1033void __devinit pcibios_fixup_bus(struct pci_bus *bus) 1019void __devinit pcibios_fixup_bus(struct pci_bus *bus)
1034{ 1020{
1035 /* Nothing needs to be done. */ 1021 struct pci_dev *dev = bus->self;
1022
1023 if (!dev) {
1024 /* This is the root bus. */
1025 bus->resource[0] = &pci_ioport_resource;
1026 bus->resource[1] = &pci_iomem_resource;
1027 }
1036} 1028}
1037 1029
1038/* 1030/*
@@ -1069,6 +1061,17 @@ int pcibios_enable_device(struct pci_dev *dev, int mask)
1069 return pci_enable_resources(dev, mask); 1061 return pci_enable_resources(dev, mask);
1070} 1062}
1071 1063
1064/* Called for each device after PCI setup is done. */
1065static void __init
1066pcibios_fixup_final(struct pci_dev *pdev)
1067{
1068 set_dma_ops(&pdev->dev, gx_pci_dma_map_ops);
1069 set_dma_offset(&pdev->dev, TILE_PCI_MEM_MAP_BASE_OFFSET);
1070 pdev->dev.archdata.max_direct_dma_addr =
1071 TILE_PCI_MAX_DIRECT_DMA_ADDRESS;
1072}
1073DECLARE_PCI_FIXUP_FINAL(PCI_ANY_ID, PCI_ANY_ID, pcibios_fixup_final);
1074
1072/* Map a PCI MMIO bus address into VA space. */ 1075/* Map a PCI MMIO bus address into VA space. */
1073void __iomem *ioremap(resource_size_t phys_addr, unsigned long size) 1076void __iomem *ioremap(resource_size_t phys_addr, unsigned long size)
1074{ 1077{
@@ -1127,7 +1130,7 @@ got_it:
1127 * We need to keep the PCI bus address's in-page offset in the VA. 1130 * We need to keep the PCI bus address's in-page offset in the VA.
1128 */ 1131 */
1129 return iorpc_ioremap(trio_fd, offset, size) + 1132 return iorpc_ioremap(trio_fd, offset, size) +
1130 (phys_addr & (PAGE_SIZE - 1)); 1133 (phys_addr & (PAGE_SIZE - 1));
1131} 1134}
1132EXPORT_SYMBOL(ioremap); 1135EXPORT_SYMBOL(ioremap);
1133 1136
diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c
index fdde3b6986e..2b8b689e596 100644
--- a/arch/tile/kernel/setup.c
+++ b/arch/tile/kernel/setup.c
@@ -23,6 +23,7 @@
23#include <linux/irq.h> 23#include <linux/irq.h>
24#include <linux/kexec.h> 24#include <linux/kexec.h>
25#include <linux/pci.h> 25#include <linux/pci.h>
26#include <linux/swiotlb.h>
26#include <linux/initrd.h> 27#include <linux/initrd.h>
27#include <linux/io.h> 28#include <linux/io.h>
28#include <linux/highmem.h> 29#include <linux/highmem.h>
@@ -109,7 +110,7 @@ static unsigned int __initdata maxnodemem_pfn[MAX_NUMNODES] = {
109}; 110};
110static nodemask_t __initdata isolnodes; 111static nodemask_t __initdata isolnodes;
111 112
112#ifdef CONFIG_PCI 113#if defined(CONFIG_PCI) && !defined(__tilegx__)
113enum { DEFAULT_PCI_RESERVE_MB = 64 }; 114enum { DEFAULT_PCI_RESERVE_MB = 64 };
114static unsigned int __initdata pci_reserve_mb = DEFAULT_PCI_RESERVE_MB; 115static unsigned int __initdata pci_reserve_mb = DEFAULT_PCI_RESERVE_MB;
115unsigned long __initdata pci_reserve_start_pfn = -1U; 116unsigned long __initdata pci_reserve_start_pfn = -1U;
@@ -160,7 +161,7 @@ static int __init setup_isolnodes(char *str)
160} 161}
161early_param("isolnodes", setup_isolnodes); 162early_param("isolnodes", setup_isolnodes);
162 163
163#ifdef CONFIG_PCI 164#if defined(CONFIG_PCI) && !defined(__tilegx__)
164static int __init setup_pci_reserve(char* str) 165static int __init setup_pci_reserve(char* str)
165{ 166{
166 unsigned long mb; 167 unsigned long mb;
@@ -171,7 +172,7 @@ static int __init setup_pci_reserve(char* str)
171 172
172 pci_reserve_mb = mb; 173 pci_reserve_mb = mb;
173 pr_info("Reserving %dMB for PCIE root complex mappings\n", 174 pr_info("Reserving %dMB for PCIE root complex mappings\n",
174 pci_reserve_mb); 175 pci_reserve_mb);
175 return 0; 176 return 0;
176} 177}
177early_param("pci_reserve", setup_pci_reserve); 178early_param("pci_reserve", setup_pci_reserve);
@@ -411,7 +412,7 @@ static void __init setup_memory(void)
411 continue; 412 continue;
412 } 413 }
413#endif 414#endif
414#ifdef CONFIG_PCI 415#if defined(CONFIG_PCI) && !defined(__tilegx__)
415 /* 416 /*
416 * Blocks that overlap the pci reserved region must 417 * Blocks that overlap the pci reserved region must
417 * have enough space to hold the maximum percpu data 418 * have enough space to hold the maximum percpu data
@@ -604,11 +605,9 @@ static void __init setup_bootmem_allocator_node(int i)
604 /* Free all the space back into the allocator. */ 605 /* Free all the space back into the allocator. */
605 free_bootmem(PFN_PHYS(start), PFN_PHYS(end - start)); 606 free_bootmem(PFN_PHYS(start), PFN_PHYS(end - start));
606 607
607#if defined(CONFIG_PCI) 608#if defined(CONFIG_PCI) && !defined(__tilegx__)
608 /* 609 /*
609 * Throw away any memory aliased by the PCI region. FIXME: this 610 * Throw away any memory aliased by the PCI region.
610 * is a temporary hack to work around bug 10502, and needs to be
611 * fixed properly.
612 */ 611 */
613 if (pci_reserve_start_pfn < end && pci_reserve_end_pfn > start) 612 if (pci_reserve_start_pfn < end && pci_reserve_end_pfn > start)
614 reserve_bootmem(PFN_PHYS(pci_reserve_start_pfn), 613 reserve_bootmem(PFN_PHYS(pci_reserve_start_pfn),
@@ -1353,8 +1352,7 @@ void __init setup_arch(char **cmdline_p)
1353 setup_cpu_maps(); 1352 setup_cpu_maps();
1354 1353
1355 1354
1356#ifdef CONFIG_PCI 1355#if defined(CONFIG_PCI) && !defined(__tilegx__)
1357#if !defined (__tilegx__)
1358 /* 1356 /*
1359 * Initialize the PCI structures. This is done before memory 1357 * Initialize the PCI structures. This is done before memory
1360 * setup so that we know whether or not a pci_reserve region 1358 * setup so that we know whether or not a pci_reserve region
@@ -1362,7 +1360,6 @@ void __init setup_arch(char **cmdline_p)
1362 */ 1360 */
1363 if (tile_pci_init() == 0) 1361 if (tile_pci_init() == 0)
1364 pci_reserve_mb = 0; 1362 pci_reserve_mb = 0;
1365#endif
1366 1363
1367 /* PCI systems reserve a region just below 4GB for mapping iomem. */ 1364 /* PCI systems reserve a region just below 4GB for mapping iomem. */
1368 pci_reserve_end_pfn = (1 << (32 - PAGE_SHIFT)); 1365 pci_reserve_end_pfn = (1 << (32 - PAGE_SHIFT));
@@ -1384,6 +1381,10 @@ void __init setup_arch(char **cmdline_p)
1384 * any memory using the bootmem allocator. 1381 * any memory using the bootmem allocator.
1385 */ 1382 */
1386 1383
1384#ifdef CONFIG_SWIOTLB
1385 swiotlb_init(0);
1386#endif
1387
1387 paging_init(); 1388 paging_init();
1388 setup_numa_mapping(); 1389 setup_numa_mapping();
1389 zone_sizes_init(); 1390 zone_sizes_init();
@@ -1391,10 +1392,6 @@ void __init setup_arch(char **cmdline_p)
1391 setup_cpu(1); 1392 setup_cpu(1);
1392 setup_clock(); 1393 setup_clock();
1393 load_hv_initrd(); 1394 load_hv_initrd();
1394
1395#if defined(CONFIG_PCI) && defined (__tilegx__)
1396 tile_pci_init();
1397#endif
1398} 1395}
1399 1396
1400 1397
@@ -1538,11 +1535,11 @@ static struct resource code_resource = {
1538}; 1535};
1539 1536
1540/* 1537/*
1541 * We reserve all resources above 4GB so that PCI won't try to put 1538 * On Pro, we reserve all resources above 4GB so that PCI won't try to put
1542 * mappings above 4GB; the standard allows that for some devices but 1539 * mappings above 4GB; the standard allows that for some devices but
1543 * the probing code trunates values to 32 bits. 1540 * the probing code trunates values to 32 bits.
1544 */ 1541 */
1545#ifdef CONFIG_PCI 1542#if defined(CONFIG_PCI) && !defined(__tilegx__)
1546static struct resource* __init 1543static struct resource* __init
1547insert_non_bus_resource(void) 1544insert_non_bus_resource(void)
1548{ 1545{
@@ -1588,7 +1585,7 @@ static int __init request_standard_resources(void)
1588 enum { CODE_DELTA = MEM_SV_INTRPT - PAGE_OFFSET }; 1585 enum { CODE_DELTA = MEM_SV_INTRPT - PAGE_OFFSET };
1589 1586
1590 iomem_resource.end = -1LL; 1587 iomem_resource.end = -1LL;
1591#ifdef CONFIG_PCI 1588#if defined(CONFIG_PCI) && !defined(__tilegx__)
1592 insert_non_bus_resource(); 1589 insert_non_bus_resource();
1593#endif 1590#endif
1594 1591
@@ -1596,7 +1593,7 @@ static int __init request_standard_resources(void)
1596 u64 start_pfn = node_start_pfn[i]; 1593 u64 start_pfn = node_start_pfn[i];
1597 u64 end_pfn = node_end_pfn[i]; 1594 u64 end_pfn = node_end_pfn[i];
1598 1595
1599#ifdef CONFIG_PCI 1596#if defined(CONFIG_PCI) && !defined(__tilegx__)
1600 if (start_pfn <= pci_reserve_start_pfn && 1597 if (start_pfn <= pci_reserve_start_pfn &&
1601 end_pfn > pci_reserve_start_pfn) { 1598 end_pfn > pci_reserve_start_pfn) {
1602 if (end_pfn > pci_reserve_end_pfn) 1599 if (end_pfn > pci_reserve_end_pfn)