aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86_64
diff options
context:
space:
mode:
authorJon Mason <jdmason@us.ibm.com>2006-06-26 07:58:14 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2006-06-26 13:48:19 -0400
commite465058d55a88feb4c7ecabe63eea7ea7147e206 (patch)
treed431ed689e072415915694eecdfbcb9304287f01 /arch/x86_64
parent0dc243ae10c8309c170a3af9f1adad1924a9f217 (diff)
[PATCH] x86_64: Calgary IOMMU - Calgary specific bits
This patch hooks Calgary into the build, the x86-64 IOMMU initialization paths, and introduces the Calgary specific bits. The implementation draws inspiration from both PPC (which has support for the same chip but requires firmware support which we don't have on x86-64) and gart. Calgary is different from gart in that it support a translation table per PHB, as opposed to the single gart aperture. Changes from previous version: * Addition of boot-time disablement for bus-level translation/isolation (e.g, enable userspace DMA for things like X) * Usage of newer IOMMU abstraction functions Signed-off-by: Muli Ben-Yehuda <muli@il.ibm.com> Signed-off-by: Jon Mason <jdmason@us.ibm.com> Signed-off-by: Andi Kleen <ak@suse.de> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'arch/x86_64')
-rw-r--r--arch/x86_64/Kconfig19
-rw-r--r--arch/x86_64/kernel/Makefile1
-rw-r--r--arch/x86_64/kernel/pci-calgary.c1018
-rw-r--r--arch/x86_64/kernel/pci-dma.c9
-rw-r--r--arch/x86_64/kernel/tce.c202
5 files changed, 1249 insertions, 0 deletions
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig
index e8c52a1eec06..ccc4a7fb97a3 100644
--- a/arch/x86_64/Kconfig
+++ b/arch/x86_64/Kconfig
@@ -405,6 +405,25 @@ config IOMMU
405 device) unless CONFIG_IOMMU_DEBUG or iommu=force is specified 405 device) unless CONFIG_IOMMU_DEBUG or iommu=force is specified
406 too. 406 too.
407 407
408config CALGARY_IOMMU
409 bool "IBM Calgary IOMMU support"
410 default y
411 select SWIOTLB
412 depends on PCI && EXPERIMENTAL
413 help
414 Support for hardware IOMMUs in IBM's xSeries x366 and x460
415 systems. Needed to run systems with more than 3GB of memory
416 properly with 32-bit PCI devices that do not support DAC
417 (Double Address Cycle). Calgary also supports bus level
418 isolation, where all DMAs pass through the IOMMU. This
419 prevents them from going anywhere except their intended
420 destination. This catches hard-to-find kernel bugs and
421 mis-behaving drivers and devices that do not use the DMA-API
422 properly to set up their DMA buffers. The IOMMU can be
423 turned off at boot time with the iommu=off parameter.
424 Normally the kernel will make the right choice by itself.
425 If unsure, say Y.
426
408# need this always selected by IOMMU for the VIA workaround 427# need this always selected by IOMMU for the VIA workaround
409config SWIOTLB 428config SWIOTLB
410 bool 429 bool
diff --git a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile
index fd106bdddd6d..aeb9c560be88 100644
--- a/arch/x86_64/kernel/Makefile
+++ b/arch/x86_64/kernel/Makefile
@@ -29,6 +29,7 @@ obj-$(CONFIG_SOFTWARE_SUSPEND) += suspend_asm.o
29obj-$(CONFIG_CPU_FREQ) += cpufreq/ 29obj-$(CONFIG_CPU_FREQ) += cpufreq/
30obj-$(CONFIG_EARLY_PRINTK) += early_printk.o 30obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
31obj-$(CONFIG_IOMMU) += pci-gart.o aperture.o 31obj-$(CONFIG_IOMMU) += pci-gart.o aperture.o
32obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary.o tce.o
32obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o 33obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o
33obj-$(CONFIG_KPROBES) += kprobes.o 34obj-$(CONFIG_KPROBES) += kprobes.o
34obj-$(CONFIG_X86_PM_TIMER) += pmtimer.o 35obj-$(CONFIG_X86_PM_TIMER) += pmtimer.o
diff --git a/arch/x86_64/kernel/pci-calgary.c b/arch/x86_64/kernel/pci-calgary.c
new file mode 100644
index 000000000000..d91cb843f54d
--- /dev/null
+++ b/arch/x86_64/kernel/pci-calgary.c
@@ -0,0 +1,1018 @@
1/*
2 * Derived from arch/powerpc/kernel/iommu.c
3 *
4 * Copyright (C) 2006 Jon Mason <jdmason@us.ibm.com>, IBM Corporation
5 * Copyright (C) 2006 Muli Ben-Yehuda <muli@il.ibm.com>, IBM Corporation
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */
21
22#include <linux/config.h>
23#include <linux/kernel.h>
24#include <linux/init.h>
25#include <linux/types.h>
26#include <linux/slab.h>
27#include <linux/mm.h>
28#include <linux/spinlock.h>
29#include <linux/string.h>
30#include <linux/dma-mapping.h>
31#include <linux/init.h>
32#include <linux/bitops.h>
33#include <linux/pci_ids.h>
34#include <linux/pci.h>
35#include <linux/delay.h>
36#include <asm/proto.h>
37#include <asm/calgary.h>
38#include <asm/tce.h>
39#include <asm/pci-direct.h>
40#include <asm/system.h>
41#include <asm/dma.h>
42
43#define PCI_DEVICE_ID_IBM_CALGARY 0x02a1
44#define PCI_VENDOR_DEVICE_ID_CALGARY \
45 (PCI_VENDOR_ID_IBM | PCI_DEVICE_ID_IBM_CALGARY << 16)
46
47/* we need these for register space address calculation */
48#define START_ADDRESS 0xfe000000
49#define CHASSIS_BASE 0
50#define ONE_BASED_CHASSIS_NUM 1
51
52/* register offsets inside the host bridge space */
53#define PHB_CSR_OFFSET 0x0110
54#define PHB_PLSSR_OFFSET 0x0120
55#define PHB_CONFIG_RW_OFFSET 0x0160
56#define PHB_IOBASE_BAR_LOW 0x0170
57#define PHB_IOBASE_BAR_HIGH 0x0180
58#define PHB_MEM_1_LOW 0x0190
59#define PHB_MEM_1_HIGH 0x01A0
60#define PHB_IO_ADDR_SIZE 0x01B0
61#define PHB_MEM_1_SIZE 0x01C0
62#define PHB_MEM_ST_OFFSET 0x01D0
63#define PHB_AER_OFFSET 0x0200
64#define PHB_CONFIG_0_HIGH 0x0220
65#define PHB_CONFIG_0_LOW 0x0230
66#define PHB_CONFIG_0_END 0x0240
67#define PHB_MEM_2_LOW 0x02B0
68#define PHB_MEM_2_HIGH 0x02C0
69#define PHB_MEM_2_SIZE_HIGH 0x02D0
70#define PHB_MEM_2_SIZE_LOW 0x02E0
71#define PHB_DOSHOLE_OFFSET 0x08E0
72
73/* PHB_CONFIG_RW */
74#define PHB_TCE_ENABLE 0x20000000
75#define PHB_SLOT_DISABLE 0x1C000000
76#define PHB_DAC_DISABLE 0x01000000
77#define PHB_MEM2_ENABLE 0x00400000
78#define PHB_MCSR_ENABLE 0x00100000
79/* TAR (Table Address Register) */
80#define TAR_SW_BITS 0x0000ffffffff800fUL
81#define TAR_VALID 0x0000000000000008UL
82/* CSR (Channel/DMA Status Register) */
83#define CSR_AGENT_MASK 0xffe0ffff
84
85#define MAX_NUM_OF_PHBS 8 /* how many PHBs in total? */
86#define MAX_PHB_BUS_NUM (MAX_NUM_OF_PHBS * 2) /* max dev->bus->number */
87#define PHBS_PER_CALGARY 4
88
89/* register offsets in Calgary's internal register space */
90static const unsigned long tar_offsets[] = {
91 0x0580 /* TAR0 */,
92 0x0588 /* TAR1 */,
93 0x0590 /* TAR2 */,
94 0x0598 /* TAR3 */
95};
96
97static const unsigned long split_queue_offsets[] = {
98 0x4870 /* SPLIT QUEUE 0 */,
99 0x5870 /* SPLIT QUEUE 1 */,
100 0x6870 /* SPLIT QUEUE 2 */,
101 0x7870 /* SPLIT QUEUE 3 */
102};
103
104static const unsigned long phb_offsets[] = {
105 0x8000 /* PHB0 */,
106 0x9000 /* PHB1 */,
107 0xA000 /* PHB2 */,
108 0xB000 /* PHB3 */
109};
110
111void* tce_table_kva[MAX_NUM_OF_PHBS * MAX_NUMNODES];
112unsigned int specified_table_size = TCE_TABLE_SIZE_UNSPECIFIED;
113static int translate_empty_slots __read_mostly = 0;
114static int calgary_detected __read_mostly = 0;
115
116/*
117 * the bitmap of PHBs the user requested that we disable
118 * translation on.
119 */
120static DECLARE_BITMAP(translation_disabled, MAX_NUMNODES * MAX_PHB_BUS_NUM);
121
122static void tce_cache_blast(struct iommu_table *tbl);
123
124/* enable this to stress test the chip's TCE cache */
125#ifdef CONFIG_IOMMU_DEBUG
126static inline void tce_cache_blast_stress(struct iommu_table *tbl)
127{
128 tce_cache_blast(tbl);
129}
130#else
131static inline void tce_cache_blast_stress(struct iommu_table *tbl)
132{
133}
134#endif /* BLAST_TCE_CACHE_ON_UNMAP */
135
136static inline unsigned int num_dma_pages(unsigned long dma, unsigned int dmalen)
137{
138 unsigned int npages;
139
140 npages = PAGE_ALIGN(dma + dmalen) - (dma & PAGE_MASK);
141 npages >>= PAGE_SHIFT;
142
143 return npages;
144}
145
146static inline int translate_phb(struct pci_dev* dev)
147{
148 int disabled = test_bit(dev->bus->number, translation_disabled);
149 return !disabled;
150}
151
152static void iommu_range_reserve(struct iommu_table *tbl,
153 unsigned long start_addr, unsigned int npages)
154{
155 unsigned long index;
156 unsigned long end;
157
158 index = start_addr >> PAGE_SHIFT;
159
160 /* bail out if we're asked to reserve a region we don't cover */
161 if (index >= tbl->it_size)
162 return;
163
164 end = index + npages;
165 if (end > tbl->it_size) /* don't go off the table */
166 end = tbl->it_size;
167
168 while (index < end) {
169 if (test_bit(index, tbl->it_map))
170 printk(KERN_ERR "Calgary: entry already allocated at "
171 "0x%lx tbl %p dma 0x%lx npages %u\n",
172 index, tbl, start_addr, npages);
173 ++index;
174 }
175 set_bit_string(tbl->it_map, start_addr >> PAGE_SHIFT, npages);
176}
177
178static unsigned long iommu_range_alloc(struct iommu_table *tbl,
179 unsigned int npages)
180{
181 unsigned long offset;
182
183 BUG_ON(npages == 0);
184
185 offset = find_next_zero_string(tbl->it_map, tbl->it_hint,
186 tbl->it_size, npages);
187 if (offset == ~0UL) {
188 tce_cache_blast(tbl);
189 offset = find_next_zero_string(tbl->it_map, 0,
190 tbl->it_size, npages);
191 if (offset == ~0UL) {
192 printk(KERN_WARNING "Calgary: IOMMU full.\n");
193 if (panic_on_overflow)
194 panic("Calgary: fix the allocator.\n");
195 else
196 return bad_dma_address;
197 }
198 }
199
200 set_bit_string(tbl->it_map, offset, npages);
201 tbl->it_hint = offset + npages;
202 BUG_ON(tbl->it_hint > tbl->it_size);
203
204 return offset;
205}
206
207static dma_addr_t iommu_alloc(struct iommu_table *tbl, void *vaddr,
208 unsigned int npages, int direction)
209{
210 unsigned long entry, flags;
211 dma_addr_t ret = bad_dma_address;
212
213 spin_lock_irqsave(&tbl->it_lock, flags);
214
215 entry = iommu_range_alloc(tbl, npages);
216
217 if (unlikely(entry == bad_dma_address))
218 goto error;
219
220 /* set the return dma address */
221 ret = (entry << PAGE_SHIFT) | ((unsigned long)vaddr & ~PAGE_MASK);
222
223 /* put the TCEs in the HW table */
224 tce_build(tbl, entry, npages, (unsigned long)vaddr & PAGE_MASK,
225 direction);
226
227 spin_unlock_irqrestore(&tbl->it_lock, flags);
228
229 return ret;
230
231error:
232 spin_unlock_irqrestore(&tbl->it_lock, flags);
233 printk(KERN_WARNING "Calgary: failed to allocate %u pages in "
234 "iommu %p\n", npages, tbl);
235 return bad_dma_address;
236}
237
238static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
239 unsigned int npages)
240{
241 unsigned long entry;
242 unsigned long i;
243
244 entry = dma_addr >> PAGE_SHIFT;
245
246 BUG_ON(entry + npages > tbl->it_size);
247
248 tce_free(tbl, entry, npages);
249
250 for (i = 0; i < npages; ++i) {
251 if (!test_bit(entry + i, tbl->it_map))
252 printk(KERN_ERR "Calgary: bit is off at 0x%lx "
253 "tbl %p dma 0x%Lx entry 0x%lx npages %u\n",
254 entry + i, tbl, dma_addr, entry, npages);
255 }
256
257 __clear_bit_string(tbl->it_map, entry, npages);
258
259 tce_cache_blast_stress(tbl);
260}
261
262static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
263 unsigned int npages)
264{
265 unsigned long flags;
266
267 spin_lock_irqsave(&tbl->it_lock, flags);
268
269 __iommu_free(tbl, dma_addr, npages);
270
271 spin_unlock_irqrestore(&tbl->it_lock, flags);
272}
273
274static void __calgary_unmap_sg(struct iommu_table *tbl,
275 struct scatterlist *sglist, int nelems, int direction)
276{
277 while (nelems--) {
278 unsigned int npages;
279 dma_addr_t dma = sglist->dma_address;
280 unsigned int dmalen = sglist->dma_length;
281
282 if (dmalen == 0)
283 break;
284
285 npages = num_dma_pages(dma, dmalen);
286 __iommu_free(tbl, dma, npages);
287 sglist++;
288 }
289}
290
291void calgary_unmap_sg(struct device *dev, struct scatterlist *sglist,
292 int nelems, int direction)
293{
294 unsigned long flags;
295 struct iommu_table *tbl = to_pci_dev(dev)->bus->self->sysdata;
296
297 if (!translate_phb(to_pci_dev(dev)))
298 return;
299
300 spin_lock_irqsave(&tbl->it_lock, flags);
301
302 __calgary_unmap_sg(tbl, sglist, nelems, direction);
303
304 spin_unlock_irqrestore(&tbl->it_lock, flags);
305}
306
307static int calgary_nontranslate_map_sg(struct device* dev,
308 struct scatterlist *sg, int nelems, int direction)
309{
310 int i;
311
312 for (i = 0; i < nelems; i++ ) {
313 struct scatterlist *s = &sg[i];
314 BUG_ON(!s->page);
315 s->dma_address = virt_to_bus(page_address(s->page) +s->offset);
316 s->dma_length = s->length;
317 }
318 return nelems;
319}
320
321int calgary_map_sg(struct device *dev, struct scatterlist *sg,
322 int nelems, int direction)
323{
324 struct iommu_table *tbl = to_pci_dev(dev)->bus->self->sysdata;
325 unsigned long flags;
326 unsigned long vaddr;
327 unsigned int npages;
328 unsigned long entry;
329 int i;
330
331 if (!translate_phb(to_pci_dev(dev)))
332 return calgary_nontranslate_map_sg(dev, sg, nelems, direction);
333
334 spin_lock_irqsave(&tbl->it_lock, flags);
335
336 for (i = 0; i < nelems; i++ ) {
337 struct scatterlist *s = &sg[i];
338 BUG_ON(!s->page);
339
340 vaddr = (unsigned long)page_address(s->page) + s->offset;
341 npages = num_dma_pages(vaddr, s->length);
342
343 entry = iommu_range_alloc(tbl, npages);
344 if (entry == bad_dma_address) {
345 /* makes sure unmap knows to stop */
346 s->dma_length = 0;
347 goto error;
348 }
349
350 s->dma_address = (entry << PAGE_SHIFT) | s->offset;
351
352 /* insert into HW table */
353 tce_build(tbl, entry, npages, vaddr & PAGE_MASK,
354 direction);
355
356 s->dma_length = s->length;
357 }
358
359 spin_unlock_irqrestore(&tbl->it_lock, flags);
360
361 return nelems;
362error:
363 __calgary_unmap_sg(tbl, sg, nelems, direction);
364 for (i = 0; i < nelems; i++) {
365 sg[i].dma_address = bad_dma_address;
366 sg[i].dma_length = 0;
367 }
368 spin_unlock_irqrestore(&tbl->it_lock, flags);
369 return 0;
370}
371
372dma_addr_t calgary_map_single(struct device *dev, void *vaddr,
373 size_t size, int direction)
374{
375 dma_addr_t dma_handle = bad_dma_address;
376 unsigned long uaddr;
377 unsigned int npages;
378 struct iommu_table *tbl = to_pci_dev(dev)->bus->self->sysdata;
379
380 uaddr = (unsigned long)vaddr;
381 npages = num_dma_pages(uaddr, size);
382
383 if (translate_phb(to_pci_dev(dev)))
384 dma_handle = iommu_alloc(tbl, vaddr, npages, direction);
385 else
386 dma_handle = virt_to_bus(vaddr);
387
388 return dma_handle;
389}
390
391void calgary_unmap_single(struct device *dev, dma_addr_t dma_handle,
392 size_t size, int direction)
393{
394 struct iommu_table *tbl = to_pci_dev(dev)->bus->self->sysdata;
395 unsigned int npages;
396
397 if (!translate_phb(to_pci_dev(dev)))
398 return;
399
400 npages = num_dma_pages(dma_handle, size);
401 iommu_free(tbl, dma_handle, npages);
402}
403
404void* calgary_alloc_coherent(struct device *dev, size_t size,
405 dma_addr_t *dma_handle, gfp_t flag)
406{
407 void *ret = NULL;
408 dma_addr_t mapping;
409 unsigned int npages, order;
410 struct iommu_table *tbl;
411
412 tbl = to_pci_dev(dev)->bus->self->sysdata;
413
414 size = PAGE_ALIGN(size); /* size rounded up to full pages */
415 npages = size >> PAGE_SHIFT;
416 order = get_order(size);
417
418 /* alloc enough pages (and possibly more) */
419 ret = (void *)__get_free_pages(flag, order);
420 if (!ret)
421 goto error;
422 memset(ret, 0, size);
423
424 if (translate_phb(to_pci_dev(dev))) {
425 /* set up tces to cover the allocated range */
426 mapping = iommu_alloc(tbl, ret, npages, DMA_BIDIRECTIONAL);
427 if (mapping == bad_dma_address)
428 goto free;
429
430 *dma_handle = mapping;
431 } else /* non translated slot */
432 *dma_handle = virt_to_bus(ret);
433
434 return ret;
435
436free:
437 free_pages((unsigned long)ret, get_order(size));
438 ret = NULL;
439error:
440 return ret;
441}
442
443static struct dma_mapping_ops calgary_dma_ops = {
444 .alloc_coherent = calgary_alloc_coherent,
445 .map_single = calgary_map_single,
446 .unmap_single = calgary_unmap_single,
447 .map_sg = calgary_map_sg,
448 .unmap_sg = calgary_unmap_sg,
449};
450
451static inline int busno_to_phbid(unsigned char num)
452{
453 return bus_to_phb(num) % PHBS_PER_CALGARY;
454}
455
456static inline unsigned long split_queue_offset(unsigned char num)
457{
458 size_t idx = busno_to_phbid(num);
459
460 return split_queue_offsets[idx];
461}
462
463static inline unsigned long tar_offset(unsigned char num)
464{
465 size_t idx = busno_to_phbid(num);
466
467 return tar_offsets[idx];
468}
469
470static inline unsigned long phb_offset(unsigned char num)
471{
472 size_t idx = busno_to_phbid(num);
473
474 return phb_offsets[idx];
475}
476
477static inline void __iomem* calgary_reg(void __iomem *bar, unsigned long offset)
478{
479 unsigned long target = ((unsigned long)bar) | offset;
480 return (void __iomem*)target;
481}
482
483static void tce_cache_blast(struct iommu_table *tbl)
484{
485 u64 val;
486 u32 aer;
487 int i = 0;
488 void __iomem *bbar = tbl->bbar;
489 void __iomem *target;
490
491 /* disable arbitration on the bus */
492 target = calgary_reg(bbar, phb_offset(tbl->it_busno) | PHB_AER_OFFSET);
493 aer = readl(target);
494 writel(0, target);
495
496 /* read plssr to ensure it got there */
497 target = calgary_reg(bbar, phb_offset(tbl->it_busno) | PHB_PLSSR_OFFSET);
498 val = readl(target);
499
500 /* poll split queues until all DMA activity is done */
501 target = calgary_reg(bbar, split_queue_offset(tbl->it_busno));
502 do {
503 val = readq(target);
504 i++;
505 } while ((val & 0xff) != 0xff && i < 100);
506 if (i == 100)
507 printk(KERN_WARNING "Calgary: PCI bus not quiesced, "
508 "continuing anyway\n");
509
510 /* invalidate TCE cache */
511 target = calgary_reg(bbar, tar_offset(tbl->it_busno));
512 writeq(tbl->tar_val, target);
513
514 /* enable arbitration */
515 target = calgary_reg(bbar, phb_offset(tbl->it_busno) | PHB_AER_OFFSET);
516 writel(aer, target);
517 (void)readl(target); /* flush */
518}
519
520static void __init calgary_reserve_mem_region(struct pci_dev *dev, u64 start,
521 u64 limit)
522{
523 unsigned int numpages;
524
525 limit = limit | 0xfffff;
526 limit++;
527
528 numpages = ((limit - start) >> PAGE_SHIFT);
529 iommu_range_reserve(dev->sysdata, start, numpages);
530}
531
532static void __init calgary_reserve_peripheral_mem_1(struct pci_dev *dev)
533{
534 void __iomem *target;
535 u64 low, high, sizelow;
536 u64 start, limit;
537 struct iommu_table *tbl = dev->sysdata;
538 unsigned char busnum = dev->bus->number;
539 void __iomem *bbar = tbl->bbar;
540
541 /* peripheral MEM_1 region */
542 target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_1_LOW);
543 low = be32_to_cpu(readl(target));
544 target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_1_HIGH);
545 high = be32_to_cpu(readl(target));
546 target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_1_SIZE);
547 sizelow = be32_to_cpu(readl(target));
548
549 start = (high << 32) | low;
550 limit = sizelow;
551
552 calgary_reserve_mem_region(dev, start, limit);
553}
554
555static void __init calgary_reserve_peripheral_mem_2(struct pci_dev *dev)
556{
557 void __iomem *target;
558 u32 val32;
559 u64 low, high, sizelow, sizehigh;
560 u64 start, limit;
561 struct iommu_table *tbl = dev->sysdata;
562 unsigned char busnum = dev->bus->number;
563 void __iomem *bbar = tbl->bbar;
564
565 /* is it enabled? */
566 target = calgary_reg(bbar, phb_offset(busnum) | PHB_CONFIG_RW_OFFSET);
567 val32 = be32_to_cpu(readl(target));
568 if (!(val32 & PHB_MEM2_ENABLE))
569 return;
570
571 target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_2_LOW);
572 low = be32_to_cpu(readl(target));
573 target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_2_HIGH);
574 high = be32_to_cpu(readl(target));
575 target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_2_SIZE_LOW);
576 sizelow = be32_to_cpu(readl(target));
577 target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_2_SIZE_HIGH);
578 sizehigh = be32_to_cpu(readl(target));
579
580 start = (high << 32) | low;
581 limit = (sizehigh << 32) | sizelow;
582
583 calgary_reserve_mem_region(dev, start, limit);
584}
585
586/*
587 * some regions of the IO address space do not get translated, so we
588 * must not give devices IO addresses in those regions. The regions
589 * are the 640KB-1MB region and the two PCI peripheral memory holes.
590 * Reserve all of them in the IOMMU bitmap to avoid giving them out
591 * later.
592 */
593static void __init calgary_reserve_regions(struct pci_dev *dev)
594{
595 unsigned int npages;
596 void __iomem *bbar;
597 unsigned char busnum;
598 u64 start;
599 struct iommu_table *tbl = dev->sysdata;
600
601 bbar = tbl->bbar;
602 busnum = dev->bus->number;
603
604 /* reserve bad_dma_address in case it's a legal address */
605 iommu_range_reserve(tbl, bad_dma_address, 1);
606
607 /* avoid the BIOS/VGA first 640KB-1MB region */
608 start = (640 * 1024);
609 npages = ((1024 - 640) * 1024) >> PAGE_SHIFT;
610 iommu_range_reserve(tbl, start, npages);
611
612 /* reserve the two PCI peripheral memory regions in IO space */
613 calgary_reserve_peripheral_mem_1(dev);
614 calgary_reserve_peripheral_mem_2(dev);
615}
616
617static int __init calgary_setup_tar(struct pci_dev *dev, void __iomem *bbar)
618{
619 u64 val64;
620 u64 table_phys;
621 void __iomem *target;
622 int ret;
623 struct iommu_table *tbl;
624
625 /* build TCE tables for each PHB */
626 ret = build_tce_table(dev, bbar);
627 if (ret)
628 return ret;
629
630 calgary_reserve_regions(dev);
631
632 /* set TARs for each PHB */
633 target = calgary_reg(bbar, tar_offset(dev->bus->number));
634 val64 = be64_to_cpu(readq(target));
635
636 /* zero out all TAR bits under sw control */
637 val64 &= ~TAR_SW_BITS;
638
639 tbl = dev->sysdata;
640 table_phys = (u64)__pa(tbl->it_base);
641 val64 |= table_phys;
642
643 BUG_ON(specified_table_size > TCE_TABLE_SIZE_8M);
644 val64 |= (u64) specified_table_size;
645
646 tbl->tar_val = cpu_to_be64(val64);
647 writeq(tbl->tar_val, target);
648 readq(target); /* flush */
649
650 return 0;
651}
652
653static void __init calgary_free_tar(struct pci_dev *dev)
654{
655 u64 val64;
656 struct iommu_table *tbl = dev->sysdata;
657 void __iomem *target;
658
659 target = calgary_reg(tbl->bbar, tar_offset(dev->bus->number));
660 val64 = be64_to_cpu(readq(target));
661 val64 &= ~TAR_SW_BITS;
662 writeq(cpu_to_be64(val64), target);
663 readq(target); /* flush */
664
665 kfree(tbl);
666 dev->sysdata = NULL;
667}
668
669static void calgary_watchdog(unsigned long data)
670{
671 struct pci_dev *dev = (struct pci_dev *)data;
672 struct iommu_table *tbl = dev->sysdata;
673 void __iomem *bbar = tbl->bbar;
674 u32 val32;
675 void __iomem *target;
676
677 target = calgary_reg(bbar, phb_offset(tbl->it_busno) | PHB_CSR_OFFSET);
678 val32 = be32_to_cpu(readl(target));
679
680 /* If no error, the agent ID in the CSR is not valid */
681 if (val32 & CSR_AGENT_MASK) {
682 printk(KERN_EMERG "calgary_watchdog: DMA error on bus %d, "
683 "CSR = %#x\n", dev->bus->number, val32);
684 writel(0, target);
685
686 /* Disable bus that caused the error */
687 target = calgary_reg(bbar, phb_offset(tbl->it_busno) |
688 PHB_CONFIG_RW_OFFSET);
689 val32 = be32_to_cpu(readl(target));
690 val32 |= PHB_SLOT_DISABLE;
691 writel(cpu_to_be32(val32), target);
692 readl(target); /* flush */
693 } else {
694 /* Reset the timer */
695 mod_timer(&tbl->watchdog_timer, jiffies + 2 * HZ);
696 }
697}
698
699static void __init calgary_enable_translation(struct pci_dev *dev)
700{
701 u32 val32;
702 unsigned char busnum;
703 void __iomem *target;
704 void __iomem *bbar;
705 struct iommu_table *tbl;
706
707 busnum = dev->bus->number;
708 tbl = dev->sysdata;
709 bbar = tbl->bbar;
710
711 /* enable TCE in PHB Config Register */
712 target = calgary_reg(bbar, phb_offset(busnum) | PHB_CONFIG_RW_OFFSET);
713 val32 = be32_to_cpu(readl(target));
714 val32 |= PHB_TCE_ENABLE | PHB_DAC_DISABLE | PHB_MCSR_ENABLE;
715
716 printk(KERN_INFO "Calgary: enabling translation on PHB %d\n", busnum);
717 printk(KERN_INFO "Calgary: errant DMAs will now be prevented on this "
718 "bus.\n");
719
720 writel(cpu_to_be32(val32), target);
721 readl(target); /* flush */
722
723 init_timer(&tbl->watchdog_timer);
724 tbl->watchdog_timer.function = &calgary_watchdog;
725 tbl->watchdog_timer.data = (unsigned long)dev;
726 mod_timer(&tbl->watchdog_timer, jiffies);
727}
728
729static void __init calgary_disable_translation(struct pci_dev *dev)
730{
731 u32 val32;
732 unsigned char busnum;
733 void __iomem *target;
734 void __iomem *bbar;
735 struct iommu_table *tbl;
736
737 busnum = dev->bus->number;
738 tbl = dev->sysdata;
739 bbar = tbl->bbar;
740
741 /* disable TCE in PHB Config Register */
742 target = calgary_reg(bbar, phb_offset(busnum) | PHB_CONFIG_RW_OFFSET);
743 val32 = be32_to_cpu(readl(target));
744 val32 &= ~(PHB_TCE_ENABLE | PHB_DAC_DISABLE | PHB_MCSR_ENABLE);
745
746 printk(KERN_INFO "Calgary: disabling translation on PHB %d!\n", busnum);
747 writel(cpu_to_be32(val32), target);
748 readl(target); /* flush */
749
750 del_timer_sync(&tbl->watchdog_timer);
751}
752
753static inline unsigned int __init locate_register_space(struct pci_dev *dev)
754{
755 int rionodeid;
756 u32 address;
757
758 rionodeid = (dev->bus->number % 15 > 4) ? 3 : 2;
759 /*
760 * register space address calculation as follows:
761 * FE0MB-8MB*OneBasedChassisNumber+1MB*(RioNodeId-ChassisBase)
762 * ChassisBase is always zero for x366/x260/x460
763 * RioNodeId is 2 for first Calgary, 3 for second Calgary
764 */
765 address = START_ADDRESS -
766 (0x800000 * (ONE_BASED_CHASSIS_NUM + dev->bus->number / 15)) +
767 (0x100000) * (rionodeid - CHASSIS_BASE);
768 return address;
769}
770
771static int __init calgary_init_one_nontraslated(struct pci_dev *dev)
772{
773 dev->sysdata = NULL;
774 dev->bus->self = dev;
775
776 return 0;
777}
778
779static int __init calgary_init_one(struct pci_dev *dev)
780{
781 u32 address;
782 void __iomem *bbar;
783 int ret;
784
785 address = locate_register_space(dev);
786 /* map entire 1MB of Calgary config space */
787 bbar = ioremap_nocache(address, 1024 * 1024);
788 if (!bbar) {
789 ret = -ENODATA;
790 goto done;
791 }
792
793 ret = calgary_setup_tar(dev, bbar);
794 if (ret)
795 goto iounmap;
796
797 dev->bus->self = dev;
798 calgary_enable_translation(dev);
799
800 return 0;
801
802iounmap:
803 iounmap(bbar);
804done:
805 return ret;
806}
807
808static int __init calgary_init(void)
809{
810 int i, ret = -ENODEV;
811 struct pci_dev *dev = NULL;
812
813 for (i = 0; i <= num_online_nodes() * MAX_NUM_OF_PHBS; i++) {
814 dev = pci_get_device(PCI_VENDOR_ID_IBM,
815 PCI_DEVICE_ID_IBM_CALGARY,
816 dev);
817 if (!dev)
818 break;
819 if (!translate_phb(dev)) {
820 calgary_init_one_nontraslated(dev);
821 continue;
822 }
823 if (!tce_table_kva[i] && !translate_empty_slots) {
824 pci_dev_put(dev);
825 continue;
826 }
827 ret = calgary_init_one(dev);
828 if (ret)
829 goto error;
830 }
831
832 return ret;
833
834error:
835 for (i--; i >= 0; i--) {
836 dev = pci_find_device_reverse(PCI_VENDOR_ID_IBM,
837 PCI_DEVICE_ID_IBM_CALGARY,
838 dev);
839 if (!translate_phb(dev)) {
840 pci_dev_put(dev);
841 continue;
842 }
843 if (!tce_table_kva[i] && !translate_empty_slots)
844 continue;
845 calgary_disable_translation(dev);
846 calgary_free_tar(dev);
847 pci_dev_put(dev);
848 }
849
850 return ret;
851}
852
853static inline int __init determine_tce_table_size(u64 ram)
854{
855 int ret;
856
857 if (specified_table_size != TCE_TABLE_SIZE_UNSPECIFIED)
858 return specified_table_size;
859
860 /*
861 * Table sizes are from 0 to 7 (TCE_TABLE_SIZE_64K to
862 * TCE_TABLE_SIZE_8M). Table size 0 has 8K entries and each
863 * larger table size has twice as many entries, so shift the
864 * max ram address by 13 to divide by 8K and then look at the
865 * order of the result to choose between 0-7.
866 */
867 ret = get_order(ram >> 13);
868 if (ret > TCE_TABLE_SIZE_8M)
869 ret = TCE_TABLE_SIZE_8M;
870
871 return ret;
872}
873
874void __init detect_calgary(void)
875{
876 u32 val;
877 int bus, table_idx;
878 void *tbl;
879 int detected = 0;
880
881 /*
882 * if the user specified iommu=off or iommu=soft or we found
883 * another HW IOMMU already, bail out.
884 */
885 if (swiotlb || no_iommu || iommu_detected)
886 return;
887
888 specified_table_size = determine_tce_table_size(end_pfn * PAGE_SIZE);
889
890 for (bus = 0, table_idx = 0;
891 bus <= num_online_nodes() * MAX_PHB_BUS_NUM;
892 bus++) {
893 BUG_ON(bus > MAX_NUMNODES * MAX_PHB_BUS_NUM);
894 if (read_pci_config(bus, 0, 0, 0) != PCI_VENDOR_DEVICE_ID_CALGARY)
895 continue;
896 if (test_bit(bus, translation_disabled)) {
897 printk(KERN_INFO "Calgary: translation is disabled for "
898 "PHB 0x%x\n", bus);
899 /* skip this phb, don't allocate a tbl for it */
900 tce_table_kva[table_idx] = NULL;
901 table_idx++;
902 continue;
903 }
904 /*
905 * scan the first slot of the PCI bus to see if there
906 * are any devices present
907 */
908 val = read_pci_config(bus, 1, 0, 0);
909 if (val != 0xffffffff || translate_empty_slots) {
910 tbl = alloc_tce_table();
911 if (!tbl)
912 goto cleanup;
913 detected = 1;
914 } else
915 tbl = NULL;
916
917 tce_table_kva[table_idx] = tbl;
918 table_idx++;
919 }
920
921 if (detected) {
922 iommu_detected = 1;
923 calgary_detected = 1;
924 printk(KERN_INFO "PCI-DMA: Calgary IOMMU detected. "
925 "TCE table spec is %d.\n", specified_table_size);
926 }
927 return;
928
929cleanup:
930 for (--table_idx; table_idx >= 0; --table_idx)
931 if (tce_table_kva[table_idx])
932 free_tce_table(tce_table_kva[table_idx]);
933}
934
935int __init calgary_iommu_init(void)
936{
937 int ret;
938
939 if (no_iommu || swiotlb)
940 return -ENODEV;
941
942 if (!calgary_detected)
943 return -ENODEV;
944
945 /* ok, we're trying to use Calgary - let's roll */
946 printk(KERN_INFO "PCI-DMA: Using Calgary IOMMU\n");
947
948 ret = calgary_init();
949 if (ret) {
950 printk(KERN_ERR "PCI-DMA: Calgary init failed %d, "
951 "falling back to no_iommu\n", ret);
952 if (end_pfn > MAX_DMA32_PFN)
953 printk(KERN_ERR "WARNING more than 4GB of memory, "
954 "32bit PCI may malfunction.\n");
955 return ret;
956 }
957
958 force_iommu = 1;
959 dma_ops = &calgary_dma_ops;
960
961 return 0;
962}
963
964static int __init calgary_parse_options(char *p)
965{
966 unsigned int bridge;
967 size_t len;
968 char* endp;
969
970 while (*p) {
971 if (!strncmp(p, "64k", 3))
972 specified_table_size = TCE_TABLE_SIZE_64K;
973 else if (!strncmp(p, "128k", 4))
974 specified_table_size = TCE_TABLE_SIZE_128K;
975 else if (!strncmp(p, "256k", 4))
976 specified_table_size = TCE_TABLE_SIZE_256K;
977 else if (!strncmp(p, "512k", 4))
978 specified_table_size = TCE_TABLE_SIZE_512K;
979 else if (!strncmp(p, "1M", 2))
980 specified_table_size = TCE_TABLE_SIZE_1M;
981 else if (!strncmp(p, "2M", 2))
982 specified_table_size = TCE_TABLE_SIZE_2M;
983 else if (!strncmp(p, "4M", 2))
984 specified_table_size = TCE_TABLE_SIZE_4M;
985 else if (!strncmp(p, "8M", 2))
986 specified_table_size = TCE_TABLE_SIZE_8M;
987
988 len = strlen("translate_empty_slots");
989 if (!strncmp(p, "translate_empty_slots", len))
990 translate_empty_slots = 1;
991
992 len = strlen("disable");
993 if (!strncmp(p, "disable", len)) {
994 p += len;
995 if (*p == '=')
996 ++p;
997 if (*p == '\0')
998 break;
999 bridge = simple_strtol(p, &endp, 0);
1000 if (p == endp)
1001 break;
1002
1003 if (bridge <= (num_online_nodes() * MAX_PHB_BUS_NUM)) {
1004 printk(KERN_INFO "Calgary: disabling "
1005 "translation for PHB 0x%x\n", bridge);
1006 set_bit(bridge, translation_disabled);
1007 }
1008 }
1009
1010 p = strpbrk(p, ",");
1011 if (!p)
1012 break;
1013
1014 p++; /* skip ',' */
1015 }
1016 return 1;
1017}
1018__setup("calgary=", calgary_parse_options);
diff --git a/arch/x86_64/kernel/pci-dma.c b/arch/x86_64/kernel/pci-dma.c
index a45844c7e3a3..9c44f4f2433d 100644
--- a/arch/x86_64/kernel/pci-dma.c
+++ b/arch/x86_64/kernel/pci-dma.c
@@ -9,6 +9,7 @@
9#include <linux/module.h> 9#include <linux/module.h>
10#include <asm/io.h> 10#include <asm/io.h>
11#include <asm/proto.h> 11#include <asm/proto.h>
12#include <asm/calgary.h>
12 13
13int iommu_merge __read_mostly = 0; 14int iommu_merge __read_mostly = 0;
14EXPORT_SYMBOL(iommu_merge); 15EXPORT_SYMBOL(iommu_merge);
@@ -291,6 +292,10 @@ void __init pci_iommu_alloc(void)
291 iommu_hole_init(); 292 iommu_hole_init();
292#endif 293#endif
293 294
295#ifdef CONFIG_CALGARY_IOMMU
296 detect_calgary();
297#endif
298
294#ifdef CONFIG_SWIOTLB 299#ifdef CONFIG_SWIOTLB
295 pci_swiotlb_init(); 300 pci_swiotlb_init();
296#endif 301#endif
@@ -298,6 +303,10 @@ void __init pci_iommu_alloc(void)
298 303
299static int __init pci_iommu_init(void) 304static int __init pci_iommu_init(void)
300{ 305{
306#ifdef CONFIG_CALGARY_IOMMU
307 calgary_iommu_init();
308#endif
309
301#ifdef CONFIG_IOMMU 310#ifdef CONFIG_IOMMU
302 gart_iommu_init(); 311 gart_iommu_init();
303#endif 312#endif
diff --git a/arch/x86_64/kernel/tce.c b/arch/x86_64/kernel/tce.c
new file mode 100644
index 000000000000..8d4c67f61b8e
--- /dev/null
+++ b/arch/x86_64/kernel/tce.c
@@ -0,0 +1,202 @@
1/*
2 * Derived from arch/powerpc/platforms/pseries/iommu.c
3 *
4 * Copyright (C) 2006 Jon Mason <jdmason@us.ibm.com>, IBM Corporation
5 * Copyright (C) 2006 Muli Ben-Yehuda <muli@il.ibm.com>, IBM Corporation
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */
21
22#include <linux/config.h>
23#include <linux/types.h>
24#include <linux/slab.h>
25#include <linux/mm.h>
26#include <linux/spinlock.h>
27#include <linux/string.h>
28#include <linux/pci.h>
29#include <linux/dma-mapping.h>
30#include <linux/bootmem.h>
31#include <asm/tce.h>
32#include <asm/calgary.h>
33#include <asm/proto.h>
34
35/* flush a tce at 'tceaddr' to main memory */
36static inline void flush_tce(void* tceaddr)
37{
38 /* a single tce can't cross a cache line */
39 if (cpu_has_clflush)
40 asm volatile("clflush (%0)" :: "r" (tceaddr));
41 else
42 asm volatile("wbinvd":::"memory");
43}
44
45void tce_build(struct iommu_table *tbl, unsigned long index,
46 unsigned int npages, unsigned long uaddr, int direction)
47{
48 u64* tp;
49 u64 t;
50 u64 rpn;
51
52 t = (1 << TCE_READ_SHIFT);
53 if (direction != DMA_TO_DEVICE)
54 t |= (1 << TCE_WRITE_SHIFT);
55
56 tp = ((u64*)tbl->it_base) + index;
57
58 while (npages--) {
59 rpn = (virt_to_bus((void*)uaddr)) >> PAGE_SHIFT;
60 t &= ~TCE_RPN_MASK;
61 t |= (rpn << TCE_RPN_SHIFT);
62
63 *tp = cpu_to_be64(t);
64 flush_tce(tp);
65
66 uaddr += PAGE_SIZE;
67 tp++;
68 }
69}
70
71void tce_free(struct iommu_table *tbl, long index, unsigned int npages)
72{
73 u64* tp;
74
75 tp = ((u64*)tbl->it_base) + index;
76
77 while (npages--) {
78 *tp = cpu_to_be64(0);
79 flush_tce(tp);
80 tp++;
81 }
82}
83
84static inline unsigned int table_size_to_number_of_entries(unsigned char size)
85{
86 /*
87 * size is the order of the table, 0-7
88 * smallest table is 8K entries, so shift result by 13 to
89 * multiply by 8K
90 */
91 return (1 << size) << 13;
92}
93
94static int tce_table_setparms(struct pci_dev *dev, struct iommu_table *tbl)
95{
96 unsigned int bitmapsz;
97 unsigned int tce_table_index;
98 unsigned long bmppages;
99 int ret;
100
101 tbl->it_busno = dev->bus->number;
102
103 /* set the tce table size - measured in entries */
104 tbl->it_size = table_size_to_number_of_entries(specified_table_size);
105
106 tce_table_index = bus_to_phb(tbl->it_busno);
107 tbl->it_base = (unsigned long)tce_table_kva[tce_table_index];
108 if (!tbl->it_base) {
109 printk(KERN_ERR "Calgary: iommu_table_setparms: "
110 "no table allocated?!\n");
111 ret = -ENOMEM;
112 goto done;
113 }
114
115 /*
116 * number of bytes needed for the bitmap size in number of
117 * entries; we need one bit per entry
118 */
119 bitmapsz = tbl->it_size / BITS_PER_BYTE;
120 bmppages = __get_free_pages(GFP_KERNEL, get_order(bitmapsz));
121 if (!bmppages) {
122 printk(KERN_ERR "Calgary: cannot allocate bitmap\n");
123 ret = -ENOMEM;
124 goto done;
125 }
126
127 tbl->it_map = (unsigned long*)bmppages;
128
129 memset(tbl->it_map, 0, bitmapsz);
130
131 tbl->it_hint = 0;
132
133 spin_lock_init(&tbl->it_lock);
134
135 return 0;
136
137done:
138 return ret;
139}
140
141int build_tce_table(struct pci_dev *dev, void __iomem *bbar)
142{
143 struct iommu_table *tbl;
144 int ret;
145
146 if (dev->sysdata) {
147 printk(KERN_ERR "Calgary: dev %p has sysdata %p\n",
148 dev, dev->sysdata);
149 BUG();
150 }
151
152 tbl = kzalloc(sizeof(struct iommu_table), GFP_KERNEL);
153 if (!tbl) {
154 printk(KERN_ERR "Calgary: error allocating iommu_table\n");
155 ret = -ENOMEM;
156 goto done;
157 }
158
159 ret = tce_table_setparms(dev, tbl);
160 if (ret)
161 goto free_tbl;
162
163 tce_free(tbl, 0, tbl->it_size);
164
165 tbl->bbar = bbar;
166
167 /*
168 * NUMA is already using the bus's sysdata pointer, so we use
169 * the bus's pci_dev's sysdata instead.
170 */
171 dev->sysdata = tbl;
172
173 return 0;
174
175free_tbl:
176 kfree(tbl);
177done:
178 return ret;
179}
180
181void* alloc_tce_table(void)
182{
183 unsigned int size;
184
185 size = table_size_to_number_of_entries(specified_table_size);
186 size *= TCE_ENTRY_SIZE;
187
188 return __alloc_bootmem_low(size, size, 0);
189}
190
191void free_tce_table(void *tbl)
192{
193 unsigned int size;
194
195 if (!tbl)
196 return;
197
198 size = table_size_to_number_of_entries(specified_table_size);
199 size *= TCE_ENTRY_SIZE;
200
201 free_bootmem(__pa(tbl), size);
202}