aboutsummaryrefslogtreecommitdiffstats
path: root/arch/tile/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-07-23 22:10:54 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-07-23 22:10:54 -0400
commitf0a08fcb5972167e55faa330c4a24fbaa3328b1f (patch)
treee24c42230888bd0e6422b2f81d7991da4373bb5d /arch/tile/kernel
parent474183b188b3c5af45831c71151f819fc70479b8 (diff)
parentf6d2ce00da145ae31ec22d21daca6ca5e22b3c84 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/cmetcalf/linux-tile
Pull arch/tile updates from Chris Metcalf: "These changes provide support for PCIe root complex and USB host mode for tilegx's on-chip I/Os. In addition, this pull provides the required underpinning for the on-chip networking support that was pulled into 3.5. The changes have all been through LKML (with several rounds for PCIe RC) and on linux-next." * git://git.kernel.org/pub/scm/linux/kernel/git/cmetcalf/linux-tile: tile: updates to pci root complex from community feedback bounce: allow use of bounce pool via config option usb: add host support for the tilegx architecture arch/tile: provide kernel support for the tilegx USB shim tile pci: enable IOMMU to support DMA for legacy devices arch/tile: enable ZONE_DMA for tilegx tilegx pci: support I/O to arbitrarily-cached pages tile: remove unused header arch/tile: tilegx PCI root complex support arch/tile: provide kernel support for the tilegx TRIO shim arch/tile: break out the "csum a long" function to <asm/checksum.h> arch/tile: provide kernel support for the tilegx mPIPE shim arch/tile: common DMA code for the GXIO IORPC subsystem arch/tile: support MMIO-based readb/writeb etc. arch/tile: introduce GXIO IORPC framework for tilegx
Diffstat (limited to 'arch/tile/kernel')
-rw-r--r--arch/tile/kernel/Makefile5
-rw-r--r--arch/tile/kernel/pci-dma.c536
-rw-r--r--arch/tile/kernel/pci_gx.c1543
-rw-r--r--arch/tile/kernel/setup.c45
-rw-r--r--arch/tile/kernel/usb.c69
5 files changed, 2080 insertions, 118 deletions
diff --git a/arch/tile/kernel/Makefile b/arch/tile/kernel/Makefile
index 5de99248d8df..5334be8e2538 100644
--- a/arch/tile/kernel/Makefile
+++ b/arch/tile/kernel/Makefile
@@ -14,4 +14,9 @@ obj-$(CONFIG_SMP) += smpboot.o smp.o tlb.o
14obj-$(CONFIG_MODULES) += module.o 14obj-$(CONFIG_MODULES) += module.o
15obj-$(CONFIG_EARLY_PRINTK) += early_printk.o 15obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
16obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel_$(BITS).o 16obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel_$(BITS).o
17ifdef CONFIG_TILEGX
18obj-$(CONFIG_PCI) += pci_gx.o
19else
17obj-$(CONFIG_PCI) += pci.o 20obj-$(CONFIG_PCI) += pci.o
21endif
22obj-$(CONFIG_TILE_USB) += usb.o
diff --git a/arch/tile/kernel/pci-dma.c b/arch/tile/kernel/pci-dma.c
index b3ed19f8779c..b9fe80ec1089 100644
--- a/arch/tile/kernel/pci-dma.c
+++ b/arch/tile/kernel/pci-dma.c
@@ -14,6 +14,7 @@
14 14
15#include <linux/mm.h> 15#include <linux/mm.h>
16#include <linux/dma-mapping.h> 16#include <linux/dma-mapping.h>
17#include <linux/swiotlb.h>
17#include <linux/vmalloc.h> 18#include <linux/vmalloc.h>
18#include <linux/export.h> 19#include <linux/export.h>
19#include <asm/tlbflush.h> 20#include <asm/tlbflush.h>
@@ -22,13 +23,18 @@
22/* Generic DMA mapping functions: */ 23/* Generic DMA mapping functions: */
23 24
24/* 25/*
25 * Allocate what Linux calls "coherent" memory, which for us just 26 * Allocate what Linux calls "coherent" memory. On TILEPro this is
26 * means uncached. 27 * uncached memory; on TILE-Gx it is hash-for-home memory.
27 */ 28 */
28void *dma_alloc_coherent(struct device *dev, 29#ifdef __tilepro__
29 size_t size, 30#define PAGE_HOME_DMA PAGE_HOME_UNCACHED
30 dma_addr_t *dma_handle, 31#else
31 gfp_t gfp) 32#define PAGE_HOME_DMA PAGE_HOME_HASH
33#endif
34
35static void *tile_dma_alloc_coherent(struct device *dev, size_t size,
36 dma_addr_t *dma_handle, gfp_t gfp,
37 struct dma_attrs *attrs)
32{ 38{
33 u64 dma_mask = dev->coherent_dma_mask ?: DMA_BIT_MASK(32); 39 u64 dma_mask = dev->coherent_dma_mask ?: DMA_BIT_MASK(32);
34 int node = dev_to_node(dev); 40 int node = dev_to_node(dev);
@@ -39,39 +45,42 @@ void *dma_alloc_coherent(struct device *dev,
39 gfp |= __GFP_ZERO; 45 gfp |= __GFP_ZERO;
40 46
41 /* 47 /*
42 * By forcing NUMA node 0 for 32-bit masks we ensure that the 48 * If the mask specifies that the memory be in the first 4 GB, then
43 * high 32 bits of the resulting PA will be zero. If the mask 49 * we force the allocation to come from the DMA zone. We also
44 * size is, e.g., 24, we may still not be able to guarantee a 50 * force the node to 0 since that's the only node where the DMA
45 * suitable memory address, in which case we will return NULL. 51 * zone isn't empty. If the mask size is smaller than 32 bits, we
46 * But such devices are uncommon. 52 * may still not be able to guarantee a suitable memory address, in
53 * which case we will return NULL. But such devices are uncommon.
47 */ 54 */
48 if (dma_mask <= DMA_BIT_MASK(32)) 55 if (dma_mask <= DMA_BIT_MASK(32)) {
56 gfp |= GFP_DMA;
49 node = 0; 57 node = 0;
58 }
50 59
51 pg = homecache_alloc_pages_node(node, gfp, order, PAGE_HOME_UNCACHED); 60 pg = homecache_alloc_pages_node(node, gfp, order, PAGE_HOME_DMA);
52 if (pg == NULL) 61 if (pg == NULL)
53 return NULL; 62 return NULL;
54 63
55 addr = page_to_phys(pg); 64 addr = page_to_phys(pg);
56 if (addr + size > dma_mask) { 65 if (addr + size > dma_mask) {
57 homecache_free_pages(addr, order); 66 __homecache_free_pages(pg, order);
58 return NULL; 67 return NULL;
59 } 68 }
60 69
61 *dma_handle = addr; 70 *dma_handle = addr;
71
62 return page_address(pg); 72 return page_address(pg);
63} 73}
64EXPORT_SYMBOL(dma_alloc_coherent);
65 74
66/* 75/*
67 * Free memory that was allocated with dma_alloc_coherent. 76 * Free memory that was allocated with tile_dma_alloc_coherent.
68 */ 77 */
69void dma_free_coherent(struct device *dev, size_t size, 78static void tile_dma_free_coherent(struct device *dev, size_t size,
70 void *vaddr, dma_addr_t dma_handle) 79 void *vaddr, dma_addr_t dma_handle,
80 struct dma_attrs *attrs)
71{ 81{
72 homecache_free_pages((unsigned long)vaddr, get_order(size)); 82 homecache_free_pages((unsigned long)vaddr, get_order(size));
73} 83}
74EXPORT_SYMBOL(dma_free_coherent);
75 84
76/* 85/*
77 * The map routines "map" the specified address range for DMA 86 * The map routines "map" the specified address range for DMA
@@ -87,52 +96,285 @@ EXPORT_SYMBOL(dma_free_coherent);
87 * can count on nothing having been touched. 96 * can count on nothing having been touched.
88 */ 97 */
89 98
90/* Flush a PA range from cache page by page. */ 99/* Set up a single page for DMA access. */
91static void __dma_map_pa_range(dma_addr_t dma_addr, size_t size) 100static void __dma_prep_page(struct page *page, unsigned long offset,
101 size_t size, enum dma_data_direction direction)
102{
103 /*
104 * Flush the page from cache if necessary.
105 * On tilegx, data is delivered to hash-for-home L3; on tilepro,
106 * data is delivered direct to memory.
107 *
108 * NOTE: If we were just doing DMA_TO_DEVICE we could optimize
109 * this to be a "flush" not a "finv" and keep some of the
110 * state in cache across the DMA operation, but it doesn't seem
111 * worth creating the necessary flush_buffer_xxx() infrastructure.
112 */
113 int home = page_home(page);
114 switch (home) {
115 case PAGE_HOME_HASH:
116#ifdef __tilegx__
117 return;
118#endif
119 break;
120 case PAGE_HOME_UNCACHED:
121#ifdef __tilepro__
122 return;
123#endif
124 break;
125 case PAGE_HOME_IMMUTABLE:
126 /* Should be going to the device only. */
127 BUG_ON(direction == DMA_FROM_DEVICE ||
128 direction == DMA_BIDIRECTIONAL);
129 return;
130 case PAGE_HOME_INCOHERENT:
131 /* Incoherent anyway, so no need to work hard here. */
132 return;
133 default:
134 BUG_ON(home < 0 || home >= NR_CPUS);
135 break;
136 }
137 homecache_finv_page(page);
138
139#ifdef DEBUG_ALIGNMENT
140 /* Warn if the region isn't cacheline aligned. */
141 if (offset & (L2_CACHE_BYTES - 1) || (size & (L2_CACHE_BYTES - 1)))
142 pr_warn("Unaligned DMA to non-hfh memory: PA %#llx/%#lx\n",
143 PFN_PHYS(page_to_pfn(page)) + offset, size);
144#endif
145}
146
147/* Make the page ready to be read by the core. */
148static void __dma_complete_page(struct page *page, unsigned long offset,
149 size_t size, enum dma_data_direction direction)
150{
151#ifdef __tilegx__
152 switch (page_home(page)) {
153 case PAGE_HOME_HASH:
154 /* I/O device delivered data the way the cpu wanted it. */
155 break;
156 case PAGE_HOME_INCOHERENT:
157 /* Incoherent anyway, so no need to work hard here. */
158 break;
159 case PAGE_HOME_IMMUTABLE:
160 /* Extra read-only copies are not a problem. */
161 break;
162 default:
163 /* Flush the bogus hash-for-home I/O entries to memory. */
164 homecache_finv_map_page(page, PAGE_HOME_HASH);
165 break;
166 }
167#endif
168}
169
170static void __dma_prep_pa_range(dma_addr_t dma_addr, size_t size,
171 enum dma_data_direction direction)
92{ 172{
93 struct page *page = pfn_to_page(PFN_DOWN(dma_addr)); 173 struct page *page = pfn_to_page(PFN_DOWN(dma_addr));
94 size_t bytesleft = PAGE_SIZE - (dma_addr & (PAGE_SIZE - 1)); 174 unsigned long offset = dma_addr & (PAGE_SIZE - 1);
175 size_t bytes = min(size, (size_t)(PAGE_SIZE - offset));
176
177 while (size != 0) {
178 __dma_prep_page(page, offset, bytes, direction);
179 size -= bytes;
180 ++page;
181 offset = 0;
182 bytes = min((size_t)PAGE_SIZE, size);
183 }
184}
95 185
96 while ((ssize_t)size > 0) { 186static void __dma_complete_pa_range(dma_addr_t dma_addr, size_t size,
97 /* Flush the page. */ 187 enum dma_data_direction direction)
98 homecache_flush_cache(page++, 0); 188{
189 struct page *page = pfn_to_page(PFN_DOWN(dma_addr));
190 unsigned long offset = dma_addr & (PAGE_SIZE - 1);
191 size_t bytes = min(size, (size_t)(PAGE_SIZE - offset));
192
193 while (size != 0) {
194 __dma_complete_page(page, offset, bytes, direction);
195 size -= bytes;
196 ++page;
197 offset = 0;
198 bytes = min((size_t)PAGE_SIZE, size);
199 }
200}
201
202static int tile_dma_map_sg(struct device *dev, struct scatterlist *sglist,
203 int nents, enum dma_data_direction direction,
204 struct dma_attrs *attrs)
205{
206 struct scatterlist *sg;
207 int i;
208
209 BUG_ON(!valid_dma_direction(direction));
210
211 WARN_ON(nents == 0 || sglist->length == 0);
99 212
100 /* Figure out if we need to continue on the next page. */ 213 for_each_sg(sglist, sg, nents, i) {
101 size -= bytesleft; 214 sg->dma_address = sg_phys(sg);
102 bytesleft = PAGE_SIZE; 215 __dma_prep_pa_range(sg->dma_address, sg->length, direction);
216#ifdef CONFIG_NEED_SG_DMA_LENGTH
217 sg->dma_length = sg->length;
218#endif
103 } 219 }
220
221 return nents;
104} 222}
105 223
106/* 224static void tile_dma_unmap_sg(struct device *dev, struct scatterlist *sglist,
107 * dma_map_single can be passed any memory address, and there appear 225 int nents, enum dma_data_direction direction,
108 * to be no alignment constraints. 226 struct dma_attrs *attrs)
109 * 227{
110 * There is a chance that the start of the buffer will share a cache 228 struct scatterlist *sg;
111 * line with some other data that has been touched in the meantime. 229 int i;
112 */ 230
113dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size, 231 BUG_ON(!valid_dma_direction(direction));
114 enum dma_data_direction direction) 232 for_each_sg(sglist, sg, nents, i) {
233 sg->dma_address = sg_phys(sg);
234 __dma_complete_pa_range(sg->dma_address, sg->length,
235 direction);
236 }
237}
238
239static dma_addr_t tile_dma_map_page(struct device *dev, struct page *page,
240 unsigned long offset, size_t size,
241 enum dma_data_direction direction,
242 struct dma_attrs *attrs)
115{ 243{
116 dma_addr_t dma_addr = __pa(ptr); 244 BUG_ON(!valid_dma_direction(direction));
245
246 BUG_ON(offset + size > PAGE_SIZE);
247 __dma_prep_page(page, offset, size, direction);
117 248
249 return page_to_pa(page) + offset;
250}
251
252static void tile_dma_unmap_page(struct device *dev, dma_addr_t dma_address,
253 size_t size, enum dma_data_direction direction,
254 struct dma_attrs *attrs)
255{
118 BUG_ON(!valid_dma_direction(direction)); 256 BUG_ON(!valid_dma_direction(direction));
119 WARN_ON(size == 0);
120 257
121 __dma_map_pa_range(dma_addr, size); 258 __dma_complete_page(pfn_to_page(PFN_DOWN(dma_address)),
259 dma_address & PAGE_OFFSET, size, direction);
260}
122 261
123 return dma_addr; 262static void tile_dma_sync_single_for_cpu(struct device *dev,
263 dma_addr_t dma_handle,
264 size_t size,
265 enum dma_data_direction direction)
266{
267 BUG_ON(!valid_dma_direction(direction));
268
269 __dma_complete_pa_range(dma_handle, size, direction);
270}
271
272static void tile_dma_sync_single_for_device(struct device *dev,
273 dma_addr_t dma_handle, size_t size,
274 enum dma_data_direction direction)
275{
276 __dma_prep_pa_range(dma_handle, size, direction);
124} 277}
125EXPORT_SYMBOL(dma_map_single);
126 278
127void dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size, 279static void tile_dma_sync_sg_for_cpu(struct device *dev,
128 enum dma_data_direction direction) 280 struct scatterlist *sglist, int nelems,
281 enum dma_data_direction direction)
129{ 282{
283 struct scatterlist *sg;
284 int i;
285
286 BUG_ON(!valid_dma_direction(direction));
287 WARN_ON(nelems == 0 || sglist->length == 0);
288
289 for_each_sg(sglist, sg, nelems, i) {
290 dma_sync_single_for_cpu(dev, sg->dma_address,
291 sg_dma_len(sg), direction);
292 }
293}
294
295static void tile_dma_sync_sg_for_device(struct device *dev,
296 struct scatterlist *sglist, int nelems,
297 enum dma_data_direction direction)
298{
299 struct scatterlist *sg;
300 int i;
301
130 BUG_ON(!valid_dma_direction(direction)); 302 BUG_ON(!valid_dma_direction(direction));
303 WARN_ON(nelems == 0 || sglist->length == 0);
304
305 for_each_sg(sglist, sg, nelems, i) {
306 dma_sync_single_for_device(dev, sg->dma_address,
307 sg_dma_len(sg), direction);
308 }
309}
310
311static inline int
312tile_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
313{
314 return 0;
315}
316
317static inline int
318tile_dma_supported(struct device *dev, u64 mask)
319{
320 return 1;
321}
322
323static struct dma_map_ops tile_default_dma_map_ops = {
324 .alloc = tile_dma_alloc_coherent,
325 .free = tile_dma_free_coherent,
326 .map_page = tile_dma_map_page,
327 .unmap_page = tile_dma_unmap_page,
328 .map_sg = tile_dma_map_sg,
329 .unmap_sg = tile_dma_unmap_sg,
330 .sync_single_for_cpu = tile_dma_sync_single_for_cpu,
331 .sync_single_for_device = tile_dma_sync_single_for_device,
332 .sync_sg_for_cpu = tile_dma_sync_sg_for_cpu,
333 .sync_sg_for_device = tile_dma_sync_sg_for_device,
334 .mapping_error = tile_dma_mapping_error,
335 .dma_supported = tile_dma_supported
336};
337
338struct dma_map_ops *tile_dma_map_ops = &tile_default_dma_map_ops;
339EXPORT_SYMBOL(tile_dma_map_ops);
340
341/* Generic PCI DMA mapping functions */
342
343static void *tile_pci_dma_alloc_coherent(struct device *dev, size_t size,
344 dma_addr_t *dma_handle, gfp_t gfp,
345 struct dma_attrs *attrs)
346{
347 int node = dev_to_node(dev);
348 int order = get_order(size);
349 struct page *pg;
350 dma_addr_t addr;
351
352 gfp |= __GFP_ZERO;
353
354 pg = homecache_alloc_pages_node(node, gfp, order, PAGE_HOME_DMA);
355 if (pg == NULL)
356 return NULL;
357
358 addr = page_to_phys(pg);
359
360 *dma_handle = phys_to_dma(dev, addr);
361
362 return page_address(pg);
363}
364
365/*
366 * Free memory that was allocated with tile_pci_dma_alloc_coherent.
367 */
368static void tile_pci_dma_free_coherent(struct device *dev, size_t size,
369 void *vaddr, dma_addr_t dma_handle,
370 struct dma_attrs *attrs)
371{
372 homecache_free_pages((unsigned long)vaddr, get_order(size));
131} 373}
132EXPORT_SYMBOL(dma_unmap_single);
133 374
134int dma_map_sg(struct device *dev, struct scatterlist *sglist, int nents, 375static int tile_pci_dma_map_sg(struct device *dev, struct scatterlist *sglist,
135 enum dma_data_direction direction) 376 int nents, enum dma_data_direction direction,
377 struct dma_attrs *attrs)
136{ 378{
137 struct scatterlist *sg; 379 struct scatterlist *sg;
138 int i; 380 int i;
@@ -143,73 +385,103 @@ int dma_map_sg(struct device *dev, struct scatterlist *sglist, int nents,
143 385
144 for_each_sg(sglist, sg, nents, i) { 386 for_each_sg(sglist, sg, nents, i) {
145 sg->dma_address = sg_phys(sg); 387 sg->dma_address = sg_phys(sg);
146 __dma_map_pa_range(sg->dma_address, sg->length); 388 __dma_prep_pa_range(sg->dma_address, sg->length, direction);
389
390 sg->dma_address = phys_to_dma(dev, sg->dma_address);
391#ifdef CONFIG_NEED_SG_DMA_LENGTH
392 sg->dma_length = sg->length;
393#endif
147 } 394 }
148 395
149 return nents; 396 return nents;
150} 397}
151EXPORT_SYMBOL(dma_map_sg);
152 398
153void dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nhwentries, 399static void tile_pci_dma_unmap_sg(struct device *dev,
154 enum dma_data_direction direction) 400 struct scatterlist *sglist, int nents,
401 enum dma_data_direction direction,
402 struct dma_attrs *attrs)
155{ 403{
404 struct scatterlist *sg;
405 int i;
406
156 BUG_ON(!valid_dma_direction(direction)); 407 BUG_ON(!valid_dma_direction(direction));
408 for_each_sg(sglist, sg, nents, i) {
409 sg->dma_address = sg_phys(sg);
410 __dma_complete_pa_range(sg->dma_address, sg->length,
411 direction);
412 }
157} 413}
158EXPORT_SYMBOL(dma_unmap_sg);
159 414
160dma_addr_t dma_map_page(struct device *dev, struct page *page, 415static dma_addr_t tile_pci_dma_map_page(struct device *dev, struct page *page,
161 unsigned long offset, size_t size, 416 unsigned long offset, size_t size,
162 enum dma_data_direction direction) 417 enum dma_data_direction direction,
418 struct dma_attrs *attrs)
163{ 419{
164 BUG_ON(!valid_dma_direction(direction)); 420 BUG_ON(!valid_dma_direction(direction));
165 421
166 BUG_ON(offset + size > PAGE_SIZE); 422 BUG_ON(offset + size > PAGE_SIZE);
167 homecache_flush_cache(page, 0); 423 __dma_prep_page(page, offset, size, direction);
168 424
169 return page_to_pa(page) + offset; 425 return phys_to_dma(dev, page_to_pa(page) + offset);
170} 426}
171EXPORT_SYMBOL(dma_map_page);
172 427
173void dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size, 428static void tile_pci_dma_unmap_page(struct device *dev, dma_addr_t dma_address,
174 enum dma_data_direction direction) 429 size_t size,
430 enum dma_data_direction direction,
431 struct dma_attrs *attrs)
175{ 432{
176 BUG_ON(!valid_dma_direction(direction)); 433 BUG_ON(!valid_dma_direction(direction));
434
435 dma_address = dma_to_phys(dev, dma_address);
436
437 __dma_complete_page(pfn_to_page(PFN_DOWN(dma_address)),
438 dma_address & PAGE_OFFSET, size, direction);
177} 439}
178EXPORT_SYMBOL(dma_unmap_page);
179 440
180void dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, 441static void tile_pci_dma_sync_single_for_cpu(struct device *dev,
181 size_t size, enum dma_data_direction direction) 442 dma_addr_t dma_handle,
443 size_t size,
444 enum dma_data_direction direction)
182{ 445{
183 BUG_ON(!valid_dma_direction(direction)); 446 BUG_ON(!valid_dma_direction(direction));
447
448 dma_handle = dma_to_phys(dev, dma_handle);
449
450 __dma_complete_pa_range(dma_handle, size, direction);
184} 451}
185EXPORT_SYMBOL(dma_sync_single_for_cpu);
186 452
187void dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, 453static void tile_pci_dma_sync_single_for_device(struct device *dev,
188 size_t size, enum dma_data_direction direction) 454 dma_addr_t dma_handle,
455 size_t size,
456 enum dma_data_direction
457 direction)
189{ 458{
190 unsigned long start = PFN_DOWN(dma_handle); 459 dma_handle = dma_to_phys(dev, dma_handle);
191 unsigned long end = PFN_DOWN(dma_handle + size - 1);
192 unsigned long i;
193 460
194 BUG_ON(!valid_dma_direction(direction)); 461 __dma_prep_pa_range(dma_handle, size, direction);
195 for (i = start; i <= end; ++i)
196 homecache_flush_cache(pfn_to_page(i), 0);
197} 462}
198EXPORT_SYMBOL(dma_sync_single_for_device);
199 463
200void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems, 464static void tile_pci_dma_sync_sg_for_cpu(struct device *dev,
201 enum dma_data_direction direction) 465 struct scatterlist *sglist,
466 int nelems,
467 enum dma_data_direction direction)
202{ 468{
469 struct scatterlist *sg;
470 int i;
471
203 BUG_ON(!valid_dma_direction(direction)); 472 BUG_ON(!valid_dma_direction(direction));
204 WARN_ON(nelems == 0 || sg[0].length == 0); 473 WARN_ON(nelems == 0 || sglist->length == 0);
474
475 for_each_sg(sglist, sg, nelems, i) {
476 dma_sync_single_for_cpu(dev, sg->dma_address,
477 sg_dma_len(sg), direction);
478 }
205} 479}
206EXPORT_SYMBOL(dma_sync_sg_for_cpu);
207 480
208/* 481static void tile_pci_dma_sync_sg_for_device(struct device *dev,
209 * Flush and invalidate cache for scatterlist. 482 struct scatterlist *sglist,
210 */ 483 int nelems,
211void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sglist, 484 enum dma_data_direction direction)
212 int nelems, enum dma_data_direction direction)
213{ 485{
214 struct scatterlist *sg; 486 struct scatterlist *sg;
215 int i; 487 int i;
@@ -222,31 +494,93 @@ void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sglist,
222 sg_dma_len(sg), direction); 494 sg_dma_len(sg), direction);
223 } 495 }
224} 496}
225EXPORT_SYMBOL(dma_sync_sg_for_device);
226 497
227void dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t dma_handle, 498static inline int
228 unsigned long offset, size_t size, 499tile_pci_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
229 enum dma_data_direction direction)
230{ 500{
231 dma_sync_single_for_cpu(dev, dma_handle + offset, size, direction); 501 return 0;
232} 502}
233EXPORT_SYMBOL(dma_sync_single_range_for_cpu);
234 503
235void dma_sync_single_range_for_device(struct device *dev, 504static inline int
236 dma_addr_t dma_handle, 505tile_pci_dma_supported(struct device *dev, u64 mask)
237 unsigned long offset, size_t size,
238 enum dma_data_direction direction)
239{ 506{
240 dma_sync_single_for_device(dev, dma_handle + offset, size, direction); 507 return 1;
241} 508}
242EXPORT_SYMBOL(dma_sync_single_range_for_device);
243 509
244/* 510static struct dma_map_ops tile_pci_default_dma_map_ops = {
245 * dma_alloc_noncoherent() returns non-cacheable memory, so there's no 511 .alloc = tile_pci_dma_alloc_coherent,
246 * need to do any flushing here. 512 .free = tile_pci_dma_free_coherent,
247 */ 513 .map_page = tile_pci_dma_map_page,
248void dma_cache_sync(struct device *dev, void *vaddr, size_t size, 514 .unmap_page = tile_pci_dma_unmap_page,
249 enum dma_data_direction direction) 515 .map_sg = tile_pci_dma_map_sg,
516 .unmap_sg = tile_pci_dma_unmap_sg,
517 .sync_single_for_cpu = tile_pci_dma_sync_single_for_cpu,
518 .sync_single_for_device = tile_pci_dma_sync_single_for_device,
519 .sync_sg_for_cpu = tile_pci_dma_sync_sg_for_cpu,
520 .sync_sg_for_device = tile_pci_dma_sync_sg_for_device,
521 .mapping_error = tile_pci_dma_mapping_error,
522 .dma_supported = tile_pci_dma_supported
523};
524
525struct dma_map_ops *gx_pci_dma_map_ops = &tile_pci_default_dma_map_ops;
526EXPORT_SYMBOL(gx_pci_dma_map_ops);
527
528/* PCI DMA mapping functions for legacy PCI devices */
529
530#ifdef CONFIG_SWIOTLB
531static void *tile_swiotlb_alloc_coherent(struct device *dev, size_t size,
532 dma_addr_t *dma_handle, gfp_t gfp,
533 struct dma_attrs *attrs)
250{ 534{
535 gfp |= GFP_DMA;
536 return swiotlb_alloc_coherent(dev, size, dma_handle, gfp);
537}
538
539static void tile_swiotlb_free_coherent(struct device *dev, size_t size,
540 void *vaddr, dma_addr_t dma_addr,
541 struct dma_attrs *attrs)
542{
543 swiotlb_free_coherent(dev, size, vaddr, dma_addr);
544}
545
546static struct dma_map_ops pci_swiotlb_dma_ops = {
547 .alloc = tile_swiotlb_alloc_coherent,
548 .free = tile_swiotlb_free_coherent,
549 .map_page = swiotlb_map_page,
550 .unmap_page = swiotlb_unmap_page,
551 .map_sg = swiotlb_map_sg_attrs,
552 .unmap_sg = swiotlb_unmap_sg_attrs,
553 .sync_single_for_cpu = swiotlb_sync_single_for_cpu,
554 .sync_single_for_device = swiotlb_sync_single_for_device,
555 .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
556 .sync_sg_for_device = swiotlb_sync_sg_for_device,
557 .dma_supported = swiotlb_dma_supported,
558 .mapping_error = swiotlb_dma_mapping_error,
559};
560
561struct dma_map_ops *gx_legacy_pci_dma_map_ops = &pci_swiotlb_dma_ops;
562#else
563struct dma_map_ops *gx_legacy_pci_dma_map_ops;
564#endif
565EXPORT_SYMBOL(gx_legacy_pci_dma_map_ops);
566
567#ifdef CONFIG_ARCH_HAS_DMA_SET_COHERENT_MASK
568int dma_set_coherent_mask(struct device *dev, u64 mask)
569{
570 struct dma_map_ops *dma_ops = get_dma_ops(dev);
571
572 /* Handle legacy PCI devices with limited memory addressability. */
573 if (((dma_ops == gx_pci_dma_map_ops) ||
574 (dma_ops == gx_legacy_pci_dma_map_ops)) &&
575 (mask <= DMA_BIT_MASK(32))) {
576 if (mask > dev->archdata.max_direct_dma_addr)
577 mask = dev->archdata.max_direct_dma_addr;
578 }
579
580 if (!dma_supported(dev, mask))
581 return -EIO;
582 dev->coherent_dma_mask = mask;
583 return 0;
251} 584}
252EXPORT_SYMBOL(dma_cache_sync); 585EXPORT_SYMBOL(dma_set_coherent_mask);
586#endif
diff --git a/arch/tile/kernel/pci_gx.c b/arch/tile/kernel/pci_gx.c
new file mode 100644
index 000000000000..fa75264a82ae
--- /dev/null
+++ b/arch/tile/kernel/pci_gx.c
@@ -0,0 +1,1543 @@
1/*
2 * Copyright 2012 Tilera Corporation. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation, version 2.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11 * NON INFRINGEMENT. See the GNU General Public License for
12 * more details.
13 */
14
15#include <linux/kernel.h>
16#include <linux/mmzone.h>
17#include <linux/pci.h>
18#include <linux/delay.h>
19#include <linux/string.h>
20#include <linux/init.h>
21#include <linux/capability.h>
22#include <linux/sched.h>
23#include <linux/errno.h>
24#include <linux/irq.h>
25#include <linux/msi.h>
26#include <linux/io.h>
27#include <linux/uaccess.h>
28#include <linux/ctype.h>
29
30#include <asm/processor.h>
31#include <asm/sections.h>
32#include <asm/byteorder.h>
33
34#include <gxio/iorpc_globals.h>
35#include <gxio/kiorpc.h>
36#include <gxio/trio.h>
37#include <gxio/iorpc_trio.h>
38#include <hv/drv_trio_intf.h>
39
40#include <arch/sim.h>
41
42/*
43 * This file containes the routines to search for PCI buses,
44 * enumerate the buses, and configure any attached devices.
45 */
46
47#define DEBUG_PCI_CFG 0
48
49#if DEBUG_PCI_CFG
50#define TRACE_CFG_WR(size, val, bus, dev, func, offset) \
51 pr_info("CFG WR %d-byte VAL %#x to bus %d dev %d func %d addr %u\n", \
52 size, val, bus, dev, func, offset & 0xFFF);
53#define TRACE_CFG_RD(size, val, bus, dev, func, offset) \
54 pr_info("CFG RD %d-byte VAL %#x from bus %d dev %d func %d addr %u\n", \
55 size, val, bus, dev, func, offset & 0xFFF);
56#else
57#define TRACE_CFG_WR(...)
58#define TRACE_CFG_RD(...)
59#endif
60
61static int __devinitdata pci_probe = 1;
62
63/* Information on the PCIe RC ports configuration. */
64static int __devinitdata pcie_rc[TILEGX_NUM_TRIO][TILEGX_TRIO_PCIES];
65
66/*
67 * On some platforms with one or more Gx endpoint ports, we need to
68 * delay the PCIe RC port probe for a few seconds to work around
69 * a HW PCIe link-training bug. The exact delay is specified with
70 * a kernel boot argument in the form of "pcie_rc_delay=T,P,S",
71 * where T is the TRIO instance number, P is the port number and S is
72 * the delay in seconds. If the delay is not provided, the value
73 * will be DEFAULT_RC_DELAY.
74 */
75static int __devinitdata rc_delay[TILEGX_NUM_TRIO][TILEGX_TRIO_PCIES];
76
77/* Default number of seconds that the PCIe RC port probe can be delayed. */
78#define DEFAULT_RC_DELAY 10
79
80/* Max number of seconds that the PCIe RC port probe can be delayed. */
81#define MAX_RC_DELAY 20
82
83/* Array of the PCIe ports configuration info obtained from the BIB. */
84struct pcie_port_property pcie_ports[TILEGX_NUM_TRIO][TILEGX_TRIO_PCIES];
85
86/* All drivers share the TRIO contexts defined here. */
87gxio_trio_context_t trio_contexts[TILEGX_NUM_TRIO];
88
89/* Pointer to an array of PCIe RC controllers. */
90struct pci_controller pci_controllers[TILEGX_NUM_TRIO * TILEGX_TRIO_PCIES];
91int num_rc_controllers;
92static int num_ep_controllers;
93
94static struct pci_ops tile_cfg_ops;
95
96/* Mask of CPUs that should receive PCIe interrupts. */
97static struct cpumask intr_cpus_map;
98
99/*
100 * We don't need to worry about the alignment of resources.
101 */
102resource_size_t pcibios_align_resource(void *data, const struct resource *res,
103 resource_size_t size, resource_size_t align)
104{
105 return res->start;
106}
107EXPORT_SYMBOL(pcibios_align_resource);
108
109
110/*
111 * Pick a CPU to receive and handle the PCIe interrupts, based on the IRQ #.
112 * For now, we simply send interrupts to non-dataplane CPUs.
113 * We may implement methods to allow user to specify the target CPUs,
114 * e.g. via boot arguments.
115 */
116static int tile_irq_cpu(int irq)
117{
118 unsigned int count;
119 int i = 0;
120 int cpu;
121
122 count = cpumask_weight(&intr_cpus_map);
123 if (unlikely(count == 0)) {
124 pr_warning("intr_cpus_map empty, interrupts will be"
125 " delievered to dataplane tiles\n");
126 return irq % (smp_height * smp_width);
127 }
128
129 count = irq % count;
130 for_each_cpu(cpu, &intr_cpus_map) {
131 if (i++ == count)
132 break;
133 }
134 return cpu;
135}
136
137/*
138 * Open a file descriptor to the TRIO shim.
139 */
140static int __devinit tile_pcie_open(int trio_index)
141{
142 gxio_trio_context_t *context = &trio_contexts[trio_index];
143 int ret;
144
145 /*
146 * This opens a file descriptor to the TRIO shim.
147 */
148 ret = gxio_trio_init(context, trio_index);
149 if (ret < 0)
150 return ret;
151
152 /*
153 * Allocate an ASID for the kernel.
154 */
155 ret = gxio_trio_alloc_asids(context, 1, 0, 0);
156 if (ret < 0) {
157 pr_err("PCI: ASID alloc failure on TRIO %d, give up\n",
158 trio_index);
159 goto asid_alloc_failure;
160 }
161
162 context->asid = ret;
163
164#ifdef USE_SHARED_PCIE_CONFIG_REGION
165 /*
166 * Alloc a PIO region for config access, shared by all MACs per TRIO.
167 * This shouldn't fail since the kernel is supposed to the first
168 * client of the TRIO's PIO regions.
169 */
170 ret = gxio_trio_alloc_pio_regions(context, 1, 0, 0);
171 if (ret < 0) {
172 pr_err("PCI: CFG PIO alloc failure on TRIO %d, give up\n",
173 trio_index);
174 goto pio_alloc_failure;
175 }
176
177 context->pio_cfg_index = ret;
178
179 /*
180 * For PIO CFG, the bus_address_hi parameter is 0. The mac parameter
181 * is also 0 because it is specified in PIO_REGION_SETUP_CFG_ADDR.
182 */
183 ret = gxio_trio_init_pio_region_aux(context, context->pio_cfg_index,
184 0, 0, HV_TRIO_PIO_FLAG_CONFIG_SPACE);
185 if (ret < 0) {
186 pr_err("PCI: CFG PIO init failure on TRIO %d, give up\n",
187 trio_index);
188 goto pio_alloc_failure;
189 }
190#endif
191
192 return ret;
193
194asid_alloc_failure:
195#ifdef USE_SHARED_PCIE_CONFIG_REGION
196pio_alloc_failure:
197#endif
198 hv_dev_close(context->fd);
199
200 return ret;
201}
202
203static void
204tilegx_legacy_irq_ack(struct irq_data *d)
205{
206 __insn_mtspr(SPR_IPI_EVENT_RESET_K, 1UL << d->irq);
207}
208
209static void
210tilegx_legacy_irq_mask(struct irq_data *d)
211{
212 __insn_mtspr(SPR_IPI_MASK_SET_K, 1UL << d->irq);
213}
214
215static void
216tilegx_legacy_irq_unmask(struct irq_data *d)
217{
218 __insn_mtspr(SPR_IPI_MASK_RESET_K, 1UL << d->irq);
219}
220
221static struct irq_chip tilegx_legacy_irq_chip = {
222 .name = "tilegx_legacy_irq",
223 .irq_ack = tilegx_legacy_irq_ack,
224 .irq_mask = tilegx_legacy_irq_mask,
225 .irq_unmask = tilegx_legacy_irq_unmask,
226
227 /* TBD: support set_affinity. */
228};
229
230/*
231 * This is a wrapper function of the kernel level-trigger interrupt
232 * handler handle_level_irq() for PCI legacy interrupts. The TRIO
233 * is configured such that only INTx Assert interrupts are proxied
234 * to Linux which just calls handle_level_irq() after clearing the
235 * MAC INTx Assert status bit associated with this interrupt.
236 */
237static void
238trio_handle_level_irq(unsigned int irq, struct irq_desc *desc)
239{
240 struct pci_controller *controller = irq_desc_get_handler_data(desc);
241 gxio_trio_context_t *trio_context = controller->trio;
242 uint64_t intx = (uint64_t)irq_desc_get_chip_data(desc);
243 int mac = controller->mac;
244 unsigned int reg_offset;
245 uint64_t level_mask;
246
247 handle_level_irq(irq, desc);
248
249 /*
250 * Clear the INTx Level status, otherwise future interrupts are
251 * not sent.
252 */
253 reg_offset = (TRIO_PCIE_INTFC_MAC_INT_STS <<
254 TRIO_CFG_REGION_ADDR__REG_SHIFT) |
255 (TRIO_CFG_REGION_ADDR__INTFC_VAL_MAC_INTERFACE <<
256 TRIO_CFG_REGION_ADDR__INTFC_SHIFT ) |
257 (mac << TRIO_CFG_REGION_ADDR__MAC_SEL_SHIFT);
258
259 level_mask = TRIO_PCIE_INTFC_MAC_INT_STS__INT_LEVEL_MASK << intx;
260
261 __gxio_mmio_write(trio_context->mmio_base_mac + reg_offset, level_mask);
262}
263
264/*
265 * Create kernel irqs and set up the handlers for the legacy interrupts.
266 * Also some minimum initialization for the MSI support.
267 */
268static int __devinit tile_init_irqs(struct pci_controller *controller)
269{
270 int i;
271 int j;
272 int irq;
273 int result;
274
275 cpumask_copy(&intr_cpus_map, cpu_online_mask);
276
277
278 for (i = 0; i < 4; i++) {
279 gxio_trio_context_t *context = controller->trio;
280 int cpu;
281
282 /* Ask the kernel to allocate an IRQ. */
283 irq = create_irq();
284 if (irq < 0) {
285 pr_err("PCI: no free irq vectors, failed for %d\n", i);
286
287 goto free_irqs;
288 }
289 controller->irq_intx_table[i] = irq;
290
291 /* Distribute the 4 IRQs to different tiles. */
292 cpu = tile_irq_cpu(irq);
293
294 /* Configure the TRIO intr binding for this IRQ. */
295 result = gxio_trio_config_legacy_intr(context, cpu_x(cpu),
296 cpu_y(cpu), KERNEL_PL,
297 irq, controller->mac, i);
298 if (result < 0) {
299 pr_err("PCI: MAC intx config failed for %d\n", i);
300
301 goto free_irqs;
302 }
303
304 /*
305 * Register the IRQ handler with the kernel.
306 */
307 irq_set_chip_and_handler(irq, &tilegx_legacy_irq_chip,
308 trio_handle_level_irq);
309 irq_set_chip_data(irq, (void *)(uint64_t)i);
310 irq_set_handler_data(irq, controller);
311 }
312
313 return 0;
314
315free_irqs:
316 for (j = 0; j < i; j++)
317 destroy_irq(controller->irq_intx_table[j]);
318
319 return -1;
320}
321
322/*
323 * Find valid controllers and fill in pci_controller structs for each
324 * of them.
325 *
326 * Returns the number of controllers discovered.
327 */
328int __init tile_pci_init(void)
329{
330 int num_trio_shims = 0;
331 int ctl_index = 0;
332 int i, j;
333
334 if (!pci_probe) {
335 pr_info("PCI: disabled by boot argument\n");
336 return 0;
337 }
338
339 pr_info("PCI: Searching for controllers...\n");
340
341 /*
342 * We loop over all the TRIO shims.
343 */
344 for (i = 0; i < TILEGX_NUM_TRIO; i++) {
345 int ret;
346
347 ret = tile_pcie_open(i);
348 if (ret < 0)
349 continue;
350
351 num_trio_shims++;
352 }
353
354 if (num_trio_shims == 0 || sim_is_simulator())
355 return 0;
356
357 /*
358 * Now determine which PCIe ports are configured to operate in RC mode.
359 * We look at the Board Information Block first and then see if there
360 * are any overriding configuration by the HW strapping pin.
361 */
362 for (i = 0; i < TILEGX_NUM_TRIO; i++) {
363 gxio_trio_context_t *context = &trio_contexts[i];
364 int ret;
365
366 if (context->fd < 0)
367 continue;
368
369 ret = hv_dev_pread(context->fd, 0,
370 (HV_VirtAddr)&pcie_ports[i][0],
371 sizeof(struct pcie_port_property) * TILEGX_TRIO_PCIES,
372 GXIO_TRIO_OP_GET_PORT_PROPERTY);
373 if (ret < 0) {
374 pr_err("PCI: PCIE_GET_PORT_PROPERTY failure, error %d,"
375 " on TRIO %d\n", ret, i);
376 continue;
377 }
378
379 for (j = 0; j < TILEGX_TRIO_PCIES; j++) {
380 if (pcie_ports[i][j].allow_rc) {
381 pcie_rc[i][j] = 1;
382 num_rc_controllers++;
383 }
384 else if (pcie_ports[i][j].allow_ep) {
385 num_ep_controllers++;
386 }
387 }
388 }
389
390 /*
391 * Return if no PCIe ports are configured to operate in RC mode.
392 */
393 if (num_rc_controllers == 0)
394 return 0;
395
396 /*
397 * Set the TRIO pointer and MAC index for each PCIe RC port.
398 */
399 for (i = 0; i < TILEGX_NUM_TRIO; i++) {
400 for (j = 0; j < TILEGX_TRIO_PCIES; j++) {
401 if (pcie_rc[i][j]) {
402 pci_controllers[ctl_index].trio =
403 &trio_contexts[i];
404 pci_controllers[ctl_index].mac = j;
405 pci_controllers[ctl_index].trio_index = i;
406 ctl_index++;
407 if (ctl_index == num_rc_controllers)
408 goto out;
409 }
410 }
411 }
412
413out:
414 /*
415 * Configure each PCIe RC port.
416 */
417 for (i = 0; i < num_rc_controllers; i++) {
418 /*
419 * Configure the PCIe MAC to run in RC mode.
420 */
421
422 struct pci_controller *controller = &pci_controllers[i];
423
424 controller->index = i;
425 controller->ops = &tile_cfg_ops;
426
427 /*
428 * The PCI memory resource is located above the PA space.
429 * For every host bridge, the BAR window or the MMIO aperture
430 * is in range [3GB, 4GB - 1] of a 4GB space beyond the
431 * PA space.
432 */
433
434 controller->mem_offset = TILE_PCI_MEM_START +
435 (i * TILE_PCI_BAR_WINDOW_TOP);
436 controller->mem_space.start = controller->mem_offset +
437 TILE_PCI_BAR_WINDOW_TOP - TILE_PCI_BAR_WINDOW_SIZE;
438 controller->mem_space.end = controller->mem_offset +
439 TILE_PCI_BAR_WINDOW_TOP - 1;
440 controller->mem_space.flags = IORESOURCE_MEM;
441 snprintf(controller->mem_space_name,
442 sizeof(controller->mem_space_name),
443 "PCI mem domain %d", i);
444 controller->mem_space.name = controller->mem_space_name;
445 }
446
447 return num_rc_controllers;
448}
449
450/*
451 * (pin - 1) converts from the PCI standard's [1:4] convention to
452 * a normal [0:3] range.
453 */
454static int tile_map_irq(const struct pci_dev *dev, u8 device, u8 pin)
455{
456 struct pci_controller *controller =
457 (struct pci_controller *)dev->sysdata;
458 return controller->irq_intx_table[pin - 1];
459}
460
461
462static void __devinit fixup_read_and_payload_sizes(struct pci_controller *
463 controller)
464{
465 gxio_trio_context_t *trio_context = controller->trio;
466 struct pci_bus *root_bus = controller->root_bus;
467 TRIO_PCIE_RC_DEVICE_CONTROL_t dev_control;
468 TRIO_PCIE_RC_DEVICE_CAP_t rc_dev_cap;
469 unsigned int reg_offset;
470 struct pci_bus *child;
471 int mac;
472 int err;
473
474 mac = controller->mac;
475
476 /*
477 * Set our max read request size to be 4KB.
478 */
479 reg_offset =
480 (TRIO_PCIE_RC_DEVICE_CONTROL <<
481 TRIO_CFG_REGION_ADDR__REG_SHIFT) |
482 (TRIO_CFG_REGION_ADDR__INTFC_VAL_MAC_STANDARD <<
483 TRIO_CFG_REGION_ADDR__INTFC_SHIFT ) |
484 (mac << TRIO_CFG_REGION_ADDR__MAC_SEL_SHIFT);
485
486 dev_control.word = __gxio_mmio_read32(trio_context->mmio_base_mac +
487 reg_offset);
488 dev_control.max_read_req_sz = 5;
489 __gxio_mmio_write32(trio_context->mmio_base_mac + reg_offset,
490 dev_control.word);
491
492 /*
493 * Set the max payload size supported by this Gx PCIe MAC.
494 * Though Gx PCIe supports Max Payload Size of up to 1024 bytes,
495 * experiments have shown that setting MPS to 256 yields the
496 * best performance.
497 */
498 reg_offset =
499 (TRIO_PCIE_RC_DEVICE_CAP <<
500 TRIO_CFG_REGION_ADDR__REG_SHIFT) |
501 (TRIO_CFG_REGION_ADDR__INTFC_VAL_MAC_STANDARD <<
502 TRIO_CFG_REGION_ADDR__INTFC_SHIFT ) |
503 (mac << TRIO_CFG_REGION_ADDR__MAC_SEL_SHIFT);
504
505 rc_dev_cap.word = __gxio_mmio_read32(trio_context->mmio_base_mac +
506 reg_offset);
507 rc_dev_cap.mps_sup = 1;
508 __gxio_mmio_write32(trio_context->mmio_base_mac + reg_offset,
509 rc_dev_cap.word);
510
511 /* Configure PCI Express MPS setting. */
512 list_for_each_entry(child, &root_bus->children, node) {
513 struct pci_dev *self = child->self;
514 if (!self)
515 continue;
516
517 pcie_bus_configure_settings(child, self->pcie_mpss);
518 }
519
520 /*
521 * Set the mac_config register in trio based on the MPS/MRS of the link.
522 */
523 reg_offset =
524 (TRIO_PCIE_RC_DEVICE_CONTROL <<
525 TRIO_CFG_REGION_ADDR__REG_SHIFT) |
526 (TRIO_CFG_REGION_ADDR__INTFC_VAL_MAC_STANDARD <<
527 TRIO_CFG_REGION_ADDR__INTFC_SHIFT ) |
528 (mac << TRIO_CFG_REGION_ADDR__MAC_SEL_SHIFT);
529
530 dev_control.word = __gxio_mmio_read32(trio_context->mmio_base_mac +
531 reg_offset);
532
533 err = gxio_trio_set_mps_mrs(trio_context,
534 dev_control.max_payload_size,
535 dev_control.max_read_req_sz,
536 mac);
537 if (err < 0) {
538 pr_err("PCI: PCIE_CONFIGURE_MAC_MPS_MRS failure, "
539 "MAC %d on TRIO %d\n",
540 mac, controller->trio_index);
541 }
542}
543
544static int __devinit setup_pcie_rc_delay(char *str)
545{
546 unsigned long delay = 0;
547 unsigned long trio_index;
548 unsigned long mac;
549
550 if (str == NULL || !isdigit(*str))
551 return -EINVAL;
552 trio_index = simple_strtoul(str, (char **)&str, 10);
553 if (trio_index >= TILEGX_NUM_TRIO)
554 return -EINVAL;
555
556 if (*str != ',')
557 return -EINVAL;
558
559 str++;
560 if (!isdigit(*str))
561 return -EINVAL;
562 mac = simple_strtoul(str, (char **)&str, 10);
563 if (mac >= TILEGX_TRIO_PCIES)
564 return -EINVAL;
565
566 if (*str != '\0') {
567 if (*str != ',')
568 return -EINVAL;
569
570 str++;
571 if (!isdigit(*str))
572 return -EINVAL;
573 delay = simple_strtoul(str, (char **)&str, 10);
574 if (delay > MAX_RC_DELAY)
575 return -EINVAL;
576 }
577
578 rc_delay[trio_index][mac] = delay ? : DEFAULT_RC_DELAY;
579 pr_info("Delaying PCIe RC link training for %u sec"
580 " on MAC %lu on TRIO %lu\n", rc_delay[trio_index][mac],
581 mac, trio_index);
582 return 0;
583}
584early_param("pcie_rc_delay", setup_pcie_rc_delay);
585
586/*
587 * PCI initialization entry point, called by subsys_initcall.
588 */
589int __init pcibios_init(void)
590{
591 resource_size_t offset;
592 LIST_HEAD(resources);
593 int next_busno;
594 int i;
595
596 tile_pci_init();
597
598 if (num_rc_controllers == 0 && num_ep_controllers == 0)
599 return 0;
600
601 /*
602 * We loop over all the TRIO shims and set up the MMIO mappings.
603 */
604 for (i = 0; i < TILEGX_NUM_TRIO; i++) {
605 gxio_trio_context_t *context = &trio_contexts[i];
606
607 if (context->fd < 0)
608 continue;
609
610 /*
611 * Map in the MMIO space for the MAC.
612 */
613 offset = 0;
614 context->mmio_base_mac =
615 iorpc_ioremap(context->fd, offset,
616 HV_TRIO_CONFIG_IOREMAP_SIZE);
617 if (context->mmio_base_mac == NULL) {
618 pr_err("PCI: MAC map failure on TRIO %d\n", i);
619
620 hv_dev_close(context->fd);
621 context->fd = -1;
622 continue;
623 }
624 }
625
626 /*
627 * Delay a bit in case devices aren't ready. Some devices are
628 * known to require at least 20ms here, but we use a more
629 * conservative value.
630 */
631 msleep(250);
632
633 /* Scan all of the recorded PCI controllers. */
634 for (next_busno = 0, i = 0; i < num_rc_controllers; i++) {
635 struct pci_controller *controller = &pci_controllers[i];
636 gxio_trio_context_t *trio_context = controller->trio;
637 TRIO_PCIE_INTFC_PORT_CONFIG_t port_config;
638 TRIO_PCIE_INTFC_PORT_STATUS_t port_status;
639 TRIO_PCIE_INTFC_TX_FIFO_CTL_t tx_fifo_ctl;
640 struct pci_bus *bus;
641 unsigned int reg_offset;
642 unsigned int class_code_revision;
643 int trio_index;
644 int mac;
645 int ret;
646
647 if (trio_context->fd < 0)
648 continue;
649
650 trio_index = controller->trio_index;
651 mac = controller->mac;
652
653 /*
654 * Check the port strap state which will override the BIB
655 * setting.
656 */
657
658 reg_offset =
659 (TRIO_PCIE_INTFC_PORT_CONFIG <<
660 TRIO_CFG_REGION_ADDR__REG_SHIFT) |
661 (TRIO_CFG_REGION_ADDR__INTFC_VAL_MAC_INTERFACE <<
662 TRIO_CFG_REGION_ADDR__INTFC_SHIFT ) |
663 (mac << TRIO_CFG_REGION_ADDR__MAC_SEL_SHIFT);
664
665 port_config.word =
666 __gxio_mmio_read(trio_context->mmio_base_mac +
667 reg_offset);
668
669 if ((port_config.strap_state !=
670 TRIO_PCIE_INTFC_PORT_CONFIG__STRAP_STATE_VAL_AUTO_CONFIG_RC) &&
671 (port_config.strap_state !=
672 TRIO_PCIE_INTFC_PORT_CONFIG__STRAP_STATE_VAL_AUTO_CONFIG_RC_G1)) {
673 /*
674 * If this is really intended to be an EP port,
675 * record it so that the endpoint driver will know about it.
676 */
677 if (port_config.strap_state ==
678 TRIO_PCIE_INTFC_PORT_CONFIG__STRAP_STATE_VAL_AUTO_CONFIG_ENDPOINT ||
679 port_config.strap_state ==
680 TRIO_PCIE_INTFC_PORT_CONFIG__STRAP_STATE_VAL_AUTO_CONFIG_ENDPOINT_G1)
681 pcie_ports[trio_index][mac].allow_ep = 1;
682
683 continue;
684 }
685
686 /*
687 * Delay the RC link training if needed.
688 */
689 if (rc_delay[trio_index][mac])
690 msleep(rc_delay[trio_index][mac] * 1000);
691
692 ret = gxio_trio_force_rc_link_up(trio_context, mac);
693 if (ret < 0)
694 pr_err("PCI: PCIE_FORCE_LINK_UP failure, "
695 "MAC %d on TRIO %d\n", mac, trio_index);
696
697 pr_info("PCI: Found PCI controller #%d on TRIO %d MAC %d\n", i,
698 trio_index, controller->mac);
699
700 /*
701 * Wait a bit here because some EP devices take longer
702 * to come up.
703 */
704 msleep(1000);
705
706 /*
707 * Check for PCIe link-up status.
708 */
709
710 reg_offset =
711 (TRIO_PCIE_INTFC_PORT_STATUS <<
712 TRIO_CFG_REGION_ADDR__REG_SHIFT) |
713 (TRIO_CFG_REGION_ADDR__INTFC_VAL_MAC_INTERFACE <<
714 TRIO_CFG_REGION_ADDR__INTFC_SHIFT ) |
715 (mac << TRIO_CFG_REGION_ADDR__MAC_SEL_SHIFT);
716
717 port_status.word =
718 __gxio_mmio_read(trio_context->mmio_base_mac +
719 reg_offset);
720 if (!port_status.dl_up) {
721 pr_err("PCI: link is down, MAC %d on TRIO %d\n",
722 mac, trio_index);
723 continue;
724 }
725
726 /*
727 * Ensure that the link can come out of L1 power down state.
728 * Strictly speaking, this is needed only in the case of
729 * heavy RC-initiated DMAs.
730 */
731 reg_offset =
732 (TRIO_PCIE_INTFC_TX_FIFO_CTL <<
733 TRIO_CFG_REGION_ADDR__REG_SHIFT) |
734 (TRIO_CFG_REGION_ADDR__INTFC_VAL_MAC_INTERFACE <<
735 TRIO_CFG_REGION_ADDR__INTFC_SHIFT ) |
736 (mac << TRIO_CFG_REGION_ADDR__MAC_SEL_SHIFT);
737 tx_fifo_ctl.word =
738 __gxio_mmio_read(trio_context->mmio_base_mac +
739 reg_offset);
740 tx_fifo_ctl.min_p_credits = 0;
741 __gxio_mmio_write(trio_context->mmio_base_mac + reg_offset,
742 tx_fifo_ctl.word);
743
744 /*
745 * Change the device ID so that Linux bus crawl doesn't confuse
746 * the internal bridge with any Tilera endpoints.
747 */
748
749 reg_offset =
750 (TRIO_PCIE_RC_DEVICE_ID_VEN_ID <<
751 TRIO_CFG_REGION_ADDR__REG_SHIFT) |
752 (TRIO_CFG_REGION_ADDR__INTFC_VAL_MAC_STANDARD <<
753 TRIO_CFG_REGION_ADDR__INTFC_SHIFT ) |
754 (mac << TRIO_CFG_REGION_ADDR__MAC_SEL_SHIFT);
755
756 __gxio_mmio_write32(trio_context->mmio_base_mac + reg_offset,
757 (TILERA_GX36_RC_DEV_ID <<
758 TRIO_PCIE_RC_DEVICE_ID_VEN_ID__DEV_ID_SHIFT) |
759 TILERA_VENDOR_ID);
760
761 /*
762 * Set the internal P2P bridge class code.
763 */
764
765 reg_offset =
766 (TRIO_PCIE_RC_REVISION_ID <<
767 TRIO_CFG_REGION_ADDR__REG_SHIFT) |
768 (TRIO_CFG_REGION_ADDR__INTFC_VAL_MAC_STANDARD <<
769 TRIO_CFG_REGION_ADDR__INTFC_SHIFT ) |
770 (mac << TRIO_CFG_REGION_ADDR__MAC_SEL_SHIFT);
771
772 class_code_revision =
773 __gxio_mmio_read32(trio_context->mmio_base_mac +
774 reg_offset);
775 class_code_revision = (class_code_revision & 0xff ) |
776 (PCI_CLASS_BRIDGE_PCI << 16);
777
778 __gxio_mmio_write32(trio_context->mmio_base_mac +
779 reg_offset, class_code_revision);
780
781#ifdef USE_SHARED_PCIE_CONFIG_REGION
782
783 /*
784 * Map in the MMIO space for the PIO region.
785 */
786 offset = HV_TRIO_PIO_OFFSET(trio_context->pio_cfg_index) |
787 (((unsigned long long)mac) <<
788 TRIO_TILE_PIO_REGION_SETUP_CFG_ADDR__MAC_SHIFT);
789
790#else
791
792 /*
793 * Alloc a PIO region for PCI config access per MAC.
794 */
795 ret = gxio_trio_alloc_pio_regions(trio_context, 1, 0, 0);
796 if (ret < 0) {
797 pr_err("PCI: PCI CFG PIO alloc failure for mac %d "
798 "on TRIO %d, give up\n", mac, trio_index);
799
800 continue;
801 }
802
803 trio_context->pio_cfg_index[mac] = ret;
804
805 /*
806 * For PIO CFG, the bus_address_hi parameter is 0.
807 */
808 ret = gxio_trio_init_pio_region_aux(trio_context,
809 trio_context->pio_cfg_index[mac],
810 mac, 0, HV_TRIO_PIO_FLAG_CONFIG_SPACE);
811 if (ret < 0) {
812 pr_err("PCI: PCI CFG PIO init failure for mac %d "
813 "on TRIO %d, give up\n", mac, trio_index);
814
815 continue;
816 }
817
818 offset = HV_TRIO_PIO_OFFSET(trio_context->pio_cfg_index[mac]) |
819 (((unsigned long long)mac) <<
820 TRIO_TILE_PIO_REGION_SETUP_CFG_ADDR__MAC_SHIFT);
821
822#endif
823
824 trio_context->mmio_base_pio_cfg[mac] =
825 iorpc_ioremap(trio_context->fd, offset,
826 (1 << TRIO_TILE_PIO_REGION_SETUP_CFG_ADDR__MAC_SHIFT));
827 if (trio_context->mmio_base_pio_cfg[mac] == NULL) {
828 pr_err("PCI: PIO map failure for mac %d on TRIO %d\n",
829 mac, trio_index);
830
831 continue;
832 }
833
834 /*
835 * Initialize the PCIe interrupts.
836 */
837 if (tile_init_irqs(controller)) {
838 pr_err("PCI: IRQs init failure for mac %d on TRIO %d\n",
839 mac, trio_index);
840
841 continue;
842 }
843
844 /*
845 * The PCI memory resource is located above the PA space.
846 * The memory range for the PCI root bus should not overlap
847 * with the physical RAM
848 */
849 pci_add_resource_offset(&resources, &controller->mem_space,
850 controller->mem_offset);
851
852 controller->first_busno = next_busno;
853 bus = pci_scan_root_bus(NULL, next_busno, controller->ops,
854 controller, &resources);
855 controller->root_bus = bus;
856 next_busno = bus->subordinate + 1;
857
858 }
859
860 /* Do machine dependent PCI interrupt routing */
861 pci_fixup_irqs(pci_common_swizzle, tile_map_irq);
862
863 /*
864 * This comes from the generic Linux PCI driver.
865 *
866 * It allocates all of the resources (I/O memory, etc)
867 * associated with the devices read in above.
868 */
869
870 pci_assign_unassigned_resources();
871
872 /* Record the I/O resources in the PCI controller structure. */
873 for (i = 0; i < num_rc_controllers; i++) {
874 struct pci_controller *controller = &pci_controllers[i];
875 gxio_trio_context_t *trio_context = controller->trio;
876 struct pci_bus *root_bus = pci_controllers[i].root_bus;
877 struct pci_bus *next_bus;
878 uint32_t bus_address_hi;
879 struct pci_dev *dev;
880 int ret;
881 int j;
882
883 /*
884 * Skip controllers that are not properly initialized or
885 * have down links.
886 */
887 if (root_bus == NULL)
888 continue;
889
890 /* Configure the max_payload_size values for this domain. */
891 fixup_read_and_payload_sizes(controller);
892
893 list_for_each_entry(dev, &root_bus->devices, bus_list) {
894 /* Find the PCI host controller, ie. the 1st bridge. */
895 if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI &&
896 (PCI_SLOT(dev->devfn) == 0)) {
897 next_bus = dev->subordinate;
898 pci_controllers[i].mem_resources[0] =
899 *next_bus->resource[0];
900 pci_controllers[i].mem_resources[1] =
901 *next_bus->resource[1];
902 pci_controllers[i].mem_resources[2] =
903 *next_bus->resource[2];
904
905 break;
906 }
907 }
908
909 if (pci_controllers[i].mem_resources[1].flags & IORESOURCE_MEM)
910 bus_address_hi =
911 pci_controllers[i].mem_resources[1].start >> 32;
912 else if (pci_controllers[i].mem_resources[2].flags & IORESOURCE_PREFETCH)
913 bus_address_hi =
914 pci_controllers[i].mem_resources[2].start >> 32;
915 else {
916 /* This is unlikely. */
917 pr_err("PCI: no memory resources on TRIO %d mac %d\n",
918 controller->trio_index, controller->mac);
919 continue;
920 }
921
922 /*
923 * Alloc a PIO region for PCI memory access for each RC port.
924 */
925 ret = gxio_trio_alloc_pio_regions(trio_context, 1, 0, 0);
926 if (ret < 0) {
927 pr_err("PCI: MEM PIO alloc failure on TRIO %d mac %d, "
928 "give up\n", controller->trio_index,
929 controller->mac);
930
931 continue;
932 }
933
934 controller->pio_mem_index = ret;
935
936 /*
937 * For PIO MEM, the bus_address_hi parameter is hard-coded 0
938 * because we always assign 32-bit PCI bus BAR ranges.
939 */
940 ret = gxio_trio_init_pio_region_aux(trio_context,
941 controller->pio_mem_index,
942 controller->mac,
943 0,
944 0);
945 if (ret < 0) {
946 pr_err("PCI: MEM PIO init failure on TRIO %d mac %d, "
947 "give up\n", controller->trio_index,
948 controller->mac);
949
950 continue;
951 }
952
953 /*
954 * Configure a Mem-Map region for each memory controller so
955 * that Linux can map all of its PA space to the PCI bus.
956 * Use the IOMMU to handle hash-for-home memory.
957 */
958 for_each_online_node(j) {
959 unsigned long start_pfn = node_start_pfn[j];
960 unsigned long end_pfn = node_end_pfn[j];
961 unsigned long nr_pages = end_pfn - start_pfn;
962
963 ret = gxio_trio_alloc_memory_maps(trio_context, 1, 0,
964 0);
965 if (ret < 0) {
966 pr_err("PCI: Mem-Map alloc failure on TRIO %d "
967 "mac %d for MC %d, give up\n",
968 controller->trio_index,
969 controller->mac, j);
970
971 goto alloc_mem_map_failed;
972 }
973
974 controller->mem_maps[j] = ret;
975
976 /*
977 * Initialize the Mem-Map and the I/O MMU so that all
978 * the physical memory can be accessed by the endpoint
979 * devices. The base bus address is set to the base CPA
980 * of this memory controller plus an offset (see pci.h).
981 * The region's base VA is set to the base CPA. The
982 * I/O MMU table essentially translates the CPA to
983 * the real PA. Implicitly, for node 0, we create
984 * a separate Mem-Map region that serves as the inbound
985 * window for legacy 32-bit devices. This is a direct
986 * map of the low 4GB CPA space.
987 */
988 ret = gxio_trio_init_memory_map_mmu_aux(trio_context,
989 controller->mem_maps[j],
990 start_pfn << PAGE_SHIFT,
991 nr_pages << PAGE_SHIFT,
992 trio_context->asid,
993 controller->mac,
994 (start_pfn << PAGE_SHIFT) +
995 TILE_PCI_MEM_MAP_BASE_OFFSET,
996 j,
997 GXIO_TRIO_ORDER_MODE_UNORDERED);
998 if (ret < 0) {
999 pr_err("PCI: Mem-Map init failure on TRIO %d "
1000 "mac %d for MC %d, give up\n",
1001 controller->trio_index,
1002 controller->mac, j);
1003
1004 goto alloc_mem_map_failed;
1005 }
1006 continue;
1007
1008alloc_mem_map_failed:
1009 break;
1010 }
1011
1012 }
1013
1014 return 0;
1015}
1016subsys_initcall(pcibios_init);
1017
1018/* Note: to be deleted after Linux 3.6 merge. */
1019void __devinit pcibios_fixup_bus(struct pci_bus *bus)
1020{
1021}
1022
1023/*
1024 * This can be called from the generic PCI layer, but doesn't need to
1025 * do anything.
1026 */
1027char __devinit *pcibios_setup(char *str)
1028{
1029 if (!strcmp(str, "off")) {
1030 pci_probe = 0;
1031 return NULL;
1032 }
1033 return str;
1034}
1035
1036/*
1037 * This is called from the generic Linux layer.
1038 */
1039void __devinit pcibios_update_irq(struct pci_dev *dev, int irq)
1040{
1041 pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
1042}
1043
1044/*
1045 * Enable memory address decoding, as appropriate, for the
1046 * device described by the 'dev' struct. The I/O decoding
1047 * is disabled, though the TILE-Gx supports I/O addressing.
1048 *
1049 * This is called from the generic PCI layer, and can be called
1050 * for bridges or endpoints.
1051 */
1052int pcibios_enable_device(struct pci_dev *dev, int mask)
1053{
1054 return pci_enable_resources(dev, mask);
1055}
1056
1057/* Called for each device after PCI setup is done. */
1058static void __init
1059pcibios_fixup_final(struct pci_dev *pdev)
1060{
1061 set_dma_ops(&pdev->dev, gx_pci_dma_map_ops);
1062 set_dma_offset(&pdev->dev, TILE_PCI_MEM_MAP_BASE_OFFSET);
1063 pdev->dev.archdata.max_direct_dma_addr =
1064 TILE_PCI_MAX_DIRECT_DMA_ADDRESS;
1065}
1066DECLARE_PCI_FIXUP_FINAL(PCI_ANY_ID, PCI_ANY_ID, pcibios_fixup_final);
1067
1068/* Map a PCI MMIO bus address into VA space. */
1069void __iomem *ioremap(resource_size_t phys_addr, unsigned long size)
1070{
1071 struct pci_controller *controller = NULL;
1072 resource_size_t bar_start;
1073 resource_size_t bar_end;
1074 resource_size_t offset;
1075 resource_size_t start;
1076 resource_size_t end;
1077 int trio_fd;
1078 int i, j;
1079
1080 start = phys_addr;
1081 end = phys_addr + size - 1;
1082
1083 /*
1084 * In the following, each PCI controller's mem_resources[1]
1085 * represents its (non-prefetchable) PCI memory resource and
1086 * mem_resources[2] refers to its prefetchable PCI memory resource.
1087 * By searching phys_addr in each controller's mem_resources[], we can
1088 * determine the controller that should accept the PCI memory access.
1089 */
1090
1091 for (i = 0; i < num_rc_controllers; i++) {
1092 /*
1093 * Skip controllers that are not properly initialized or
1094 * have down links.
1095 */
1096 if (pci_controllers[i].root_bus == NULL)
1097 continue;
1098
1099 for (j = 1; j < 3; j++) {
1100 bar_start =
1101 pci_controllers[i].mem_resources[j].start;
1102 bar_end =
1103 pci_controllers[i].mem_resources[j].end;
1104
1105 if ((start >= bar_start) && (end <= bar_end)) {
1106
1107 controller = &pci_controllers[i];
1108
1109 goto got_it;
1110 }
1111 }
1112 }
1113
1114 if (controller == NULL)
1115 return NULL;
1116
1117got_it:
1118 trio_fd = controller->trio->fd;
1119
1120 /* Convert the resource start to the bus address offset. */
1121 start = phys_addr - controller->mem_offset;
1122
1123 offset = HV_TRIO_PIO_OFFSET(controller->pio_mem_index) + start;
1124
1125 /*
1126 * We need to keep the PCI bus address's in-page offset in the VA.
1127 */
1128 return iorpc_ioremap(trio_fd, offset, size) +
1129 (phys_addr & (PAGE_SIZE - 1));
1130}
1131EXPORT_SYMBOL(ioremap);
1132
1133void pci_iounmap(struct pci_dev *dev, void __iomem *addr)
1134{
1135 iounmap(addr);
1136}
1137EXPORT_SYMBOL(pci_iounmap);
1138
1139/****************************************************************
1140 *
1141 * Tile PCI config space read/write routines
1142 *
1143 ****************************************************************/
1144
1145/*
1146 * These are the normal read and write ops
1147 * These are expanded with macros from pci_bus_read_config_byte() etc.
1148 *
1149 * devfn is the combined PCI device & function.
1150 *
1151 * offset is in bytes, from the start of config space for the
1152 * specified bus & device.
1153 */
1154
1155static int __devinit tile_cfg_read(struct pci_bus *bus,
1156 unsigned int devfn,
1157 int offset,
1158 int size,
1159 u32 *val)
1160{
1161 struct pci_controller *controller = bus->sysdata;
1162 gxio_trio_context_t *trio_context = controller->trio;
1163 int busnum = bus->number & 0xff;
1164 int device = PCI_SLOT(devfn);
1165 int function = PCI_FUNC(devfn);
1166 int config_type = 1;
1167 TRIO_TILE_PIO_REGION_SETUP_CFG_ADDR_t cfg_addr;
1168 void *mmio_addr;
1169
1170 /*
1171 * Map all accesses to the local device on root bus into the
1172 * MMIO space of the MAC. Accesses to the downstream devices
1173 * go to the PIO space.
1174 */
1175 if (pci_is_root_bus(bus)) {
1176 if (device == 0) {
1177 /*
1178 * This is the internal downstream P2P bridge,
1179 * access directly.
1180 */
1181 unsigned int reg_offset;
1182
1183 reg_offset = ((offset & 0xFFF) <<
1184 TRIO_CFG_REGION_ADDR__REG_SHIFT) |
1185 (TRIO_CFG_REGION_ADDR__INTFC_VAL_MAC_PROTECTED
1186 << TRIO_CFG_REGION_ADDR__INTFC_SHIFT ) |
1187 (controller->mac <<
1188 TRIO_CFG_REGION_ADDR__MAC_SEL_SHIFT);
1189
1190 mmio_addr = trio_context->mmio_base_mac + reg_offset;
1191
1192 goto valid_device;
1193
1194 } else {
1195 /*
1196 * We fake an empty device for (device > 0),
1197 * since there is only one device on bus 0.
1198 */
1199 goto invalid_device;
1200 }
1201 }
1202
1203 /*
1204 * Accesses to the directly attached device have to be
1205 * sent as type-0 configs.
1206 */
1207
1208 if (busnum == (controller->first_busno + 1)) {
1209 /*
1210 * There is only one device off of our built-in P2P bridge.
1211 */
1212 if (device != 0)
1213 goto invalid_device;
1214
1215 config_type = 0;
1216 }
1217
1218 cfg_addr.word = 0;
1219 cfg_addr.reg_addr = (offset & 0xFFF);
1220 cfg_addr.fn = function;
1221 cfg_addr.dev = device;
1222 cfg_addr.bus = busnum;
1223 cfg_addr.type = config_type;
1224
1225 /*
1226 * Note that we don't set the mac field in cfg_addr because the
1227 * mapping is per port.
1228 */
1229
1230 mmio_addr = trio_context->mmio_base_pio_cfg[controller->mac] +
1231 cfg_addr.word;
1232
1233valid_device:
1234
1235 switch (size) {
1236 case 4:
1237 *val = __gxio_mmio_read32(mmio_addr);
1238 break;
1239
1240 case 2:
1241 *val = __gxio_mmio_read16(mmio_addr);
1242 break;
1243
1244 case 1:
1245 *val = __gxio_mmio_read8(mmio_addr);
1246 break;
1247
1248 default:
1249 return PCIBIOS_FUNC_NOT_SUPPORTED;
1250 }
1251
1252 TRACE_CFG_RD(size, *val, busnum, device, function, offset);
1253
1254 return 0;
1255
1256invalid_device:
1257
1258 switch (size) {
1259 case 4:
1260 *val = 0xFFFFFFFF;
1261 break;
1262
1263 case 2:
1264 *val = 0xFFFF;
1265 break;
1266
1267 case 1:
1268 *val = 0xFF;
1269 break;
1270
1271 default:
1272 return PCIBIOS_FUNC_NOT_SUPPORTED;
1273 }
1274
1275 return 0;
1276}
1277
1278
1279/*
1280 * See tile_cfg_read() for relevent comments.
1281 * Note that "val" is the value to write, not a pointer to that value.
1282 */
1283static int __devinit tile_cfg_write(struct pci_bus *bus,
1284 unsigned int devfn,
1285 int offset,
1286 int size,
1287 u32 val)
1288{
1289 struct pci_controller *controller = bus->sysdata;
1290 gxio_trio_context_t *trio_context = controller->trio;
1291 int busnum = bus->number & 0xff;
1292 int device = PCI_SLOT(devfn);
1293 int function = PCI_FUNC(devfn);
1294 int config_type = 1;
1295 TRIO_TILE_PIO_REGION_SETUP_CFG_ADDR_t cfg_addr;
1296 void *mmio_addr;
1297 u32 val_32 = (u32)val;
1298 u16 val_16 = (u16)val;
1299 u8 val_8 = (u8)val;
1300
1301 /*
1302 * Map all accesses to the local device on root bus into the
1303 * MMIO space of the MAC. Accesses to the downstream devices
1304 * go to the PIO space.
1305 */
1306 if (pci_is_root_bus(bus)) {
1307 if (device == 0) {
1308 /*
1309 * This is the internal downstream P2P bridge,
1310 * access directly.
1311 */
1312 unsigned int reg_offset;
1313
1314 reg_offset = ((offset & 0xFFF) <<
1315 TRIO_CFG_REGION_ADDR__REG_SHIFT) |
1316 (TRIO_CFG_REGION_ADDR__INTFC_VAL_MAC_PROTECTED
1317 << TRIO_CFG_REGION_ADDR__INTFC_SHIFT ) |
1318 (controller->mac <<
1319 TRIO_CFG_REGION_ADDR__MAC_SEL_SHIFT);
1320
1321 mmio_addr = trio_context->mmio_base_mac + reg_offset;
1322
1323 goto valid_device;
1324
1325 } else {
1326 /*
1327 * We fake an empty device for (device > 0),
1328 * since there is only one device on bus 0.
1329 */
1330 goto invalid_device;
1331 }
1332 }
1333
1334 /*
1335 * Accesses to the directly attached device have to be
1336 * sent as type-0 configs.
1337 */
1338
1339 if (busnum == (controller->first_busno + 1)) {
1340 /*
1341 * There is only one device off of our built-in P2P bridge.
1342 */
1343 if (device != 0)
1344 goto invalid_device;
1345
1346 config_type = 0;
1347 }
1348
1349 cfg_addr.word = 0;
1350 cfg_addr.reg_addr = (offset & 0xFFF);
1351 cfg_addr.fn = function;
1352 cfg_addr.dev = device;
1353 cfg_addr.bus = busnum;
1354 cfg_addr.type = config_type;
1355
1356 /*
1357 * Note that we don't set the mac field in cfg_addr because the
1358 * mapping is per port.
1359 */
1360
1361 mmio_addr = trio_context->mmio_base_pio_cfg[controller->mac] +
1362 cfg_addr.word;
1363
1364valid_device:
1365
1366 switch (size) {
1367 case 4:
1368 __gxio_mmio_write32(mmio_addr, val_32);
1369 TRACE_CFG_WR(size, val_32, busnum, device, function, offset);
1370 break;
1371
1372 case 2:
1373 __gxio_mmio_write16(mmio_addr, val_16);
1374 TRACE_CFG_WR(size, val_16, busnum, device, function, offset);
1375 break;
1376
1377 case 1:
1378 __gxio_mmio_write8(mmio_addr, val_8);
1379 TRACE_CFG_WR(size, val_8, busnum, device, function, offset);
1380 break;
1381
1382 default:
1383 return PCIBIOS_FUNC_NOT_SUPPORTED;
1384 }
1385
1386invalid_device:
1387
1388 return 0;
1389}
1390
1391
1392static struct pci_ops tile_cfg_ops = {
1393 .read = tile_cfg_read,
1394 .write = tile_cfg_write,
1395};
1396
1397
1398/*
1399 * MSI support starts here.
1400 */
1401static unsigned int
1402tilegx_msi_startup(struct irq_data *d)
1403{
1404 if (d->msi_desc)
1405 unmask_msi_irq(d);
1406
1407 return 0;
1408}
1409
1410static void
1411tilegx_msi_ack(struct irq_data *d)
1412{
1413 __insn_mtspr(SPR_IPI_EVENT_RESET_K, 1UL << d->irq);
1414}
1415
1416static void
1417tilegx_msi_mask(struct irq_data *d)
1418{
1419 mask_msi_irq(d);
1420 __insn_mtspr(SPR_IPI_MASK_SET_K, 1UL << d->irq);
1421}
1422
1423static void
1424tilegx_msi_unmask(struct irq_data *d)
1425{
1426 __insn_mtspr(SPR_IPI_MASK_RESET_K, 1UL << d->irq);
1427 unmask_msi_irq(d);
1428}
1429
1430static struct irq_chip tilegx_msi_chip = {
1431 .name = "tilegx_msi",
1432 .irq_startup = tilegx_msi_startup,
1433 .irq_ack = tilegx_msi_ack,
1434 .irq_mask = tilegx_msi_mask,
1435 .irq_unmask = tilegx_msi_unmask,
1436
1437 /* TBD: support set_affinity. */
1438};
1439
1440int arch_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *desc)
1441{
1442 struct pci_controller *controller;
1443 gxio_trio_context_t *trio_context;
1444 struct msi_msg msg;
1445 int default_irq;
1446 uint64_t mem_map_base;
1447 uint64_t mem_map_limit;
1448 u64 msi_addr;
1449 int mem_map;
1450 int cpu;
1451 int irq;
1452 int ret;
1453
1454 irq = create_irq();
1455 if (irq < 0)
1456 return irq;
1457
1458 /*
1459 * Since we use a 64-bit Mem-Map to accept the MSI write, we fail
1460 * devices that are not capable of generating a 64-bit message address.
1461 * These devices will fall back to using the legacy interrupts.
1462 * Most PCIe endpoint devices do support 64-bit message addressing.
1463 */
1464 if (desc->msi_attrib.is_64 == 0) {
1465 dev_printk(KERN_INFO, &pdev->dev,
1466 "64-bit MSI message address not supported, "
1467 "falling back to legacy interrupts.\n");
1468
1469 ret = -ENOMEM;
1470 goto is_64_failure;
1471 }
1472
1473 default_irq = desc->msi_attrib.default_irq;
1474 controller = irq_get_handler_data(default_irq);
1475
1476 BUG_ON(!controller);
1477
1478 trio_context = controller->trio;
1479
1480 /*
1481 * Allocate the Mem-Map that will accept the MSI write and
1482 * trigger the TILE-side interrupts.
1483 */
1484 mem_map = gxio_trio_alloc_memory_maps(trio_context, 1, 0, 0);
1485 if (mem_map < 0) {
1486 dev_printk(KERN_INFO, &pdev->dev,
1487 "%s Mem-Map alloc failure. "
1488 "Failed to initialize MSI interrupts. "
1489 "Falling back to legacy interrupts.\n",
1490 desc->msi_attrib.is_msix ? "MSI-X" : "MSI");
1491
1492 ret = -ENOMEM;
1493 goto msi_mem_map_alloc_failure;
1494 }
1495
1496 /* We try to distribute different IRQs to different tiles. */
1497 cpu = tile_irq_cpu(irq);
1498
1499 /*
1500 * Now call up to the HV to configure the Mem-Map interrupt and
1501 * set up the IPI binding.
1502 */
1503 mem_map_base = MEM_MAP_INTR_REGIONS_BASE +
1504 mem_map * MEM_MAP_INTR_REGION_SIZE;
1505 mem_map_limit = mem_map_base + MEM_MAP_INTR_REGION_SIZE - 1;
1506
1507 ret = gxio_trio_config_msi_intr(trio_context, cpu_x(cpu), cpu_y(cpu),
1508 KERNEL_PL, irq, controller->mac,
1509 mem_map, mem_map_base, mem_map_limit,
1510 trio_context->asid);
1511 if (ret < 0) {
1512 dev_printk(KERN_INFO, &pdev->dev, "HV MSI config failed.\n");
1513
1514 goto hv_msi_config_failure;
1515 }
1516
1517 irq_set_msi_desc(irq, desc);
1518
1519 msi_addr = mem_map_base + TRIO_MAP_MEM_REG_INT3 - TRIO_MAP_MEM_REG_INT0;
1520
1521 msg.address_hi = msi_addr >> 32;
1522 msg.address_lo = msi_addr & 0xffffffff;
1523
1524 msg.data = mem_map;
1525
1526 write_msi_msg(irq, &msg);
1527 irq_set_chip_and_handler(irq, &tilegx_msi_chip, handle_level_irq);
1528 irq_set_handler_data(irq, controller);
1529
1530 return 0;
1531
1532hv_msi_config_failure:
1533 /* Free mem-map */
1534msi_mem_map_alloc_failure:
1535is_64_failure:
1536 destroy_irq(irq);
1537 return ret;
1538}
1539
1540void arch_teardown_msi_irq(unsigned int irq)
1541{
1542 destroy_irq(irq);
1543}
diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c
index dd87f3420390..6a649a4462d3 100644
--- a/arch/tile/kernel/setup.c
+++ b/arch/tile/kernel/setup.c
@@ -23,6 +23,7 @@
23#include <linux/irq.h> 23#include <linux/irq.h>
24#include <linux/kexec.h> 24#include <linux/kexec.h>
25#include <linux/pci.h> 25#include <linux/pci.h>
26#include <linux/swiotlb.h>
26#include <linux/initrd.h> 27#include <linux/initrd.h>
27#include <linux/io.h> 28#include <linux/io.h>
28#include <linux/highmem.h> 29#include <linux/highmem.h>
@@ -109,7 +110,7 @@ static unsigned int __initdata maxnodemem_pfn[MAX_NUMNODES] = {
109}; 110};
110static nodemask_t __initdata isolnodes; 111static nodemask_t __initdata isolnodes;
111 112
112#ifdef CONFIG_PCI 113#if defined(CONFIG_PCI) && !defined(__tilegx__)
113enum { DEFAULT_PCI_RESERVE_MB = 64 }; 114enum { DEFAULT_PCI_RESERVE_MB = 64 };
114static unsigned int __initdata pci_reserve_mb = DEFAULT_PCI_RESERVE_MB; 115static unsigned int __initdata pci_reserve_mb = DEFAULT_PCI_RESERVE_MB;
115unsigned long __initdata pci_reserve_start_pfn = -1U; 116unsigned long __initdata pci_reserve_start_pfn = -1U;
@@ -160,7 +161,7 @@ static int __init setup_isolnodes(char *str)
160} 161}
161early_param("isolnodes", setup_isolnodes); 162early_param("isolnodes", setup_isolnodes);
162 163
163#ifdef CONFIG_PCI 164#if defined(CONFIG_PCI) && !defined(__tilegx__)
164static int __init setup_pci_reserve(char* str) 165static int __init setup_pci_reserve(char* str)
165{ 166{
166 unsigned long mb; 167 unsigned long mb;
@@ -171,7 +172,7 @@ static int __init setup_pci_reserve(char* str)
171 172
172 pci_reserve_mb = mb; 173 pci_reserve_mb = mb;
173 pr_info("Reserving %dMB for PCIE root complex mappings\n", 174 pr_info("Reserving %dMB for PCIE root complex mappings\n",
174 pci_reserve_mb); 175 pci_reserve_mb);
175 return 0; 176 return 0;
176} 177}
177early_param("pci_reserve", setup_pci_reserve); 178early_param("pci_reserve", setup_pci_reserve);
@@ -411,7 +412,7 @@ static void __init setup_memory(void)
411 continue; 412 continue;
412 } 413 }
413#endif 414#endif
414#ifdef CONFIG_PCI 415#if defined(CONFIG_PCI) && !defined(__tilegx__)
415 /* 416 /*
416 * Blocks that overlap the pci reserved region must 417 * Blocks that overlap the pci reserved region must
417 * have enough space to hold the maximum percpu data 418 * have enough space to hold the maximum percpu data
@@ -604,11 +605,9 @@ static void __init setup_bootmem_allocator_node(int i)
604 /* Free all the space back into the allocator. */ 605 /* Free all the space back into the allocator. */
605 free_bootmem(PFN_PHYS(start), PFN_PHYS(end - start)); 606 free_bootmem(PFN_PHYS(start), PFN_PHYS(end - start));
606 607
607#if defined(CONFIG_PCI) 608#if defined(CONFIG_PCI) && !defined(__tilegx__)
608 /* 609 /*
609 * Throw away any memory aliased by the PCI region. FIXME: this 610 * Throw away any memory aliased by the PCI region.
610 * is a temporary hack to work around bug 10502, and needs to be
611 * fixed properly.
612 */ 611 */
613 if (pci_reserve_start_pfn < end && pci_reserve_end_pfn > start) 612 if (pci_reserve_start_pfn < end && pci_reserve_end_pfn > start)
614 reserve_bootmem(PFN_PHYS(pci_reserve_start_pfn), 613 reserve_bootmem(PFN_PHYS(pci_reserve_start_pfn),
@@ -658,6 +657,8 @@ static void __init zone_sizes_init(void)
658 unsigned long zones_size[MAX_NR_ZONES] = { 0 }; 657 unsigned long zones_size[MAX_NR_ZONES] = { 0 };
659 int size = percpu_size(); 658 int size = percpu_size();
660 int num_cpus = smp_height * smp_width; 659 int num_cpus = smp_height * smp_width;
660 const unsigned long dma_end = (1UL << (32 - PAGE_SHIFT));
661
661 int i; 662 int i;
662 663
663 for (i = 0; i < num_cpus; ++i) 664 for (i = 0; i < num_cpus; ++i)
@@ -729,6 +730,14 @@ static void __init zone_sizes_init(void)
729 zones_size[ZONE_NORMAL] = end - start; 730 zones_size[ZONE_NORMAL] = end - start;
730#endif 731#endif
731 732
733 if (start < dma_end) {
734 zones_size[ZONE_DMA] = min(zones_size[ZONE_NORMAL],
735 dma_end - start);
736 zones_size[ZONE_NORMAL] -= zones_size[ZONE_DMA];
737 } else {
738 zones_size[ZONE_DMA] = 0;
739 }
740
732 /* Take zone metadata from controller 0 if we're isolnode. */ 741 /* Take zone metadata from controller 0 if we're isolnode. */
733 if (node_isset(i, isolnodes)) 742 if (node_isset(i, isolnodes))
734 NODE_DATA(i)->bdata = &bootmem_node_data[0]; 743 NODE_DATA(i)->bdata = &bootmem_node_data[0];
@@ -738,7 +747,7 @@ static void __init zone_sizes_init(void)
738 PFN_UP(node_percpu[i])); 747 PFN_UP(node_percpu[i]));
739 748
740 /* Track the type of memory on each node */ 749 /* Track the type of memory on each node */
741 if (zones_size[ZONE_NORMAL]) 750 if (zones_size[ZONE_NORMAL] || zones_size[ZONE_DMA])
742 node_set_state(i, N_NORMAL_MEMORY); 751 node_set_state(i, N_NORMAL_MEMORY);
743#ifdef CONFIG_HIGHMEM 752#ifdef CONFIG_HIGHMEM
744 if (end != start) 753 if (end != start)
@@ -1343,7 +1352,7 @@ void __init setup_arch(char **cmdline_p)
1343 setup_cpu_maps(); 1352 setup_cpu_maps();
1344 1353
1345 1354
1346#ifdef CONFIG_PCI 1355#if defined(CONFIG_PCI) && !defined(__tilegx__)
1347 /* 1356 /*
1348 * Initialize the PCI structures. This is done before memory 1357 * Initialize the PCI structures. This is done before memory
1349 * setup so that we know whether or not a pci_reserve region 1358 * setup so that we know whether or not a pci_reserve region
@@ -1372,6 +1381,10 @@ void __init setup_arch(char **cmdline_p)
1372 * any memory using the bootmem allocator. 1381 * any memory using the bootmem allocator.
1373 */ 1382 */
1374 1383
1384#ifdef CONFIG_SWIOTLB
1385 swiotlb_init(0);
1386#endif
1387
1375 paging_init(); 1388 paging_init();
1376 setup_numa_mapping(); 1389 setup_numa_mapping();
1377 zone_sizes_init(); 1390 zone_sizes_init();
@@ -1522,11 +1535,10 @@ static struct resource code_resource = {
1522}; 1535};
1523 1536
1524/* 1537/*
1525 * We reserve all resources above 4GB so that PCI won't try to put 1538 * On Pro, we reserve all resources above 4GB so that PCI won't try to put
1526 * mappings above 4GB; the standard allows that for some devices but 1539 * mappings above 4GB.
1527 * the probing code trunates values to 32 bits.
1528 */ 1540 */
1529#ifdef CONFIG_PCI 1541#if defined(CONFIG_PCI) && !defined(__tilegx__)
1530static struct resource* __init 1542static struct resource* __init
1531insert_non_bus_resource(void) 1543insert_non_bus_resource(void)
1532{ 1544{
@@ -1571,8 +1583,7 @@ static int __init request_standard_resources(void)
1571 int i; 1583 int i;
1572 enum { CODE_DELTA = MEM_SV_INTRPT - PAGE_OFFSET }; 1584 enum { CODE_DELTA = MEM_SV_INTRPT - PAGE_OFFSET };
1573 1585
1574 iomem_resource.end = -1LL; 1586#if defined(CONFIG_PCI) && !defined(__tilegx__)
1575#ifdef CONFIG_PCI
1576 insert_non_bus_resource(); 1587 insert_non_bus_resource();
1577#endif 1588#endif
1578 1589
@@ -1580,7 +1591,7 @@ static int __init request_standard_resources(void)
1580 u64 start_pfn = node_start_pfn[i]; 1591 u64 start_pfn = node_start_pfn[i];
1581 u64 end_pfn = node_end_pfn[i]; 1592 u64 end_pfn = node_end_pfn[i];
1582 1593
1583#ifdef CONFIG_PCI 1594#if defined(CONFIG_PCI) && !defined(__tilegx__)
1584 if (start_pfn <= pci_reserve_start_pfn && 1595 if (start_pfn <= pci_reserve_start_pfn &&
1585 end_pfn > pci_reserve_start_pfn) { 1596 end_pfn > pci_reserve_start_pfn) {
1586 if (end_pfn > pci_reserve_end_pfn) 1597 if (end_pfn > pci_reserve_end_pfn)
diff --git a/arch/tile/kernel/usb.c b/arch/tile/kernel/usb.c
new file mode 100644
index 000000000000..5af8debc6a71
--- /dev/null
+++ b/arch/tile/kernel/usb.c
@@ -0,0 +1,69 @@
1/*
2 * Copyright 2012 Tilera Corporation. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation, version 2.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11 * NON INFRINGEMENT. See the GNU General Public License for
12 * more details.
13 *
14 * Register the Tile-Gx USB interfaces as platform devices.
15 *
16 * The actual USB driver is just some glue (in
17 * drivers/usb/host/[eo]hci-tilegx.c) which makes the registers available
18 * to the standard kernel EHCI and OHCI drivers.
19 */
20
21#include <linux/dma-mapping.h>
22#include <linux/platform_device.h>
23#include <linux/usb/tilegx.h>
24#include <linux/types.h>
25
26static u64 ehci_dmamask = DMA_BIT_MASK(32);
27
28#define USB_HOST_DEF(unit, type, dmamask) \
29 static struct \
30 tilegx_usb_platform_data tilegx_usb_platform_data_ ## type ## \
31 hci ## unit = { \
32 .dev_index = unit, \
33 }; \
34 \
35 static struct platform_device tilegx_usb_ ## type ## hci ## unit = { \
36 .name = "tilegx-" #type "hci", \
37 .id = unit, \
38 .dev = { \
39 .dma_mask = dmamask, \
40 .coherent_dma_mask = DMA_BIT_MASK(32), \
41 .platform_data = \
42 &tilegx_usb_platform_data_ ## type ## hci ## \
43 unit, \
44 }, \
45 };
46
47USB_HOST_DEF(0, e, &ehci_dmamask)
48USB_HOST_DEF(0, o, NULL)
49USB_HOST_DEF(1, e, &ehci_dmamask)
50USB_HOST_DEF(1, o, NULL)
51
52#undef USB_HOST_DEF
53
54static struct platform_device *tilegx_usb_devices[] __initdata = {
55 &tilegx_usb_ehci0,
56 &tilegx_usb_ehci1,
57 &tilegx_usb_ohci0,
58 &tilegx_usb_ohci1,
59};
60
61/** Add our set of possible USB devices. */
62static int __init tilegx_usb_init(void)
63{
64 platform_add_devices(tilegx_usb_devices,
65 ARRAY_SIZE(tilegx_usb_devices));
66
67 return 0;
68}
69arch_initcall(tilegx_usb_init);