aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/platforms/cell/iommu.c
diff options
context:
space:
mode:
authorJeremy Kerr <jk@ozlabs.org>2006-11-11 01:25:18 -0500
committerPaul Mackerras <paulus@samba.org>2006-12-04 04:39:02 -0500
commit165785e5c0be3ad43e8b8eadfbd25e92c2cd002a (patch)
treee582b911574b193544aa3aaf5527fd9311d3c108 /arch/powerpc/platforms/cell/iommu.c
parentacfd946a1aaffdec346c2864f596d4d92125d1ad (diff)
[POWERPC] Cell iommu support
This patch adds full cell iommu support (and iommu disabled mode). It implements mapping/unmapping of iommu pages on demand using the standard powerpc iommu framework. It also supports running with iommu disabled for machines with less than 2GB of memory. (The default is off in that case, though it can be forced on with the kernel command line option iommu=force). Signed-off-by: Jeremy Kerr <jk@ozlabs.org> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> Signed-off-by: Paul Mackerras <paulus@samba.org>
Diffstat (limited to 'arch/powerpc/platforms/cell/iommu.c')
-rw-r--r--arch/powerpc/platforms/cell/iommu.c1005
1 files changed, 653 insertions, 352 deletions
diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c
index 6a97fe1319d..b43466ba809 100644
--- a/arch/powerpc/platforms/cell/iommu.c
+++ b/arch/powerpc/platforms/cell/iommu.c
@@ -1,446 +1,747 @@
1/* 1/*
2 * IOMMU implementation for Cell Broadband Processor Architecture 2 * IOMMU implementation for Cell Broadband Processor Architecture
3 * We just establish a linear mapping at boot by setting all the
4 * IOPT cache entries in the CPU.
5 * The mapping functions should be identical to pci_direct_iommu,
6 * except for the handling of the high order bit that is required
7 * by the Spider bridge. These should be split into a separate
8 * file at the point where we get a different bridge chip.
9 * 3 *
10 * Copyright (C) 2005 IBM Deutschland Entwicklung GmbH, 4 * (C) Copyright IBM Corporation 2006
11 * Arnd Bergmann <arndb@de.ibm.com>
12 * 5 *
13 * Based on linear mapping 6 * Author: Jeremy Kerr <jk@ozlabs.org>
14 * Copyright (C) 2003 Benjamin Herrenschmidt (benh@kernel.crashing.org)
15 * 7 *
16 * This program is free software; you can redistribute it and/or 8 * This program is free software; you can redistribute it and/or modify
17 * modify it under the terms of the GNU General Public License 9 * it under the terms of the GNU General Public License as published by
18 * as published by the Free Software Foundation; either version 10 * the Free Software Foundation; either version 2, or (at your option)
19 * 2 of the License, or (at your option) any later version. 11 * any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 */ 21 */
21 22
22#undef DEBUG 23#undef DEBUG
23 24
24#include <linux/kernel.h> 25#include <linux/kernel.h>
25#include <linux/pci.h>
26#include <linux/delay.h>
27#include <linux/string.h>
28#include <linux/init.h> 26#include <linux/init.h>
29#include <linux/bootmem.h> 27#include <linux/interrupt.h>
30#include <linux/mm.h> 28#include <linux/notifier.h>
31#include <linux/dma-mapping.h>
32#include <linux/kernel.h>
33#include <linux/compiler.h>
34 29
35#include <asm/sections.h>
36#include <asm/iommu.h>
37#include <asm/io.h>
38#include <asm/prom.h> 30#include <asm/prom.h>
39#include <asm/pci-bridge.h> 31#include <asm/iommu.h>
40#include <asm/machdep.h> 32#include <asm/machdep.h>
41#include <asm/pmac_feature.h> 33#include <asm/pci-bridge.h>
42#include <asm/abs_addr.h>
43#include <asm/system.h>
44#include <asm/ppc-pci.h>
45#include <asm/udbg.h> 34#include <asm/udbg.h>
35#include <asm/of_platform.h>
36#include <asm/lmb.h>
46 37
47#include "iommu.h" 38#include "cbe_regs.h"
39#include "interrupt.h"
48 40
49static inline unsigned long 41/* Define CELL_IOMMU_REAL_UNMAP to actually unmap non-used pages
50get_iopt_entry(unsigned long real_address, unsigned long ioid, 42 * instead of leaving them mapped to some dummy page. This can be
51 unsigned long prot) 43 * enabled once the appropriate workarounds for spider bugs have
52{ 44 * been enabled
53 return (prot & IOPT_PROT_MASK) 45 */
54 | (IOPT_COHERENT) 46#define CELL_IOMMU_REAL_UNMAP
55 | (IOPT_ORDER_VC) 47
56 | (real_address & IOPT_RPN_MASK) 48/* Define CELL_IOMMU_STRICT_PROTECTION to enforce protection of
57 | (ioid & IOPT_IOID_MASK); 49 * IO PTEs based on the transfer direction. That can be enabled
58} 50 * once spider-net has been fixed to pass the correct direction
51 * to the DMA mapping functions
52 */
53#define CELL_IOMMU_STRICT_PROTECTION
54
55
56#define NR_IOMMUS 2
57
58/* IOC mmap registers */
59#define IOC_Reg_Size 0x2000
60
61#define IOC_IOPT_CacheInvd 0x908
62#define IOC_IOPT_CacheInvd_NE_Mask 0xffe0000000000000ul
63#define IOC_IOPT_CacheInvd_IOPTE_Mask 0x000003fffffffff8ul
64#define IOC_IOPT_CacheInvd_Busy 0x0000000000000001ul
65
66#define IOC_IOST_Origin 0x918
67#define IOC_IOST_Origin_E 0x8000000000000000ul
68#define IOC_IOST_Origin_HW 0x0000000000000800ul
69#define IOC_IOST_Origin_HL 0x0000000000000400ul
70
71#define IOC_IO_ExcpStat 0x920
72#define IOC_IO_ExcpStat_V 0x8000000000000000ul
73#define IOC_IO_ExcpStat_SPF_Mask 0x6000000000000000ul
74#define IOC_IO_ExcpStat_SPF_S 0x6000000000000000ul
75#define IOC_IO_ExcpStat_SPF_P 0x4000000000000000ul
76#define IOC_IO_ExcpStat_ADDR_Mask 0x00000007fffff000ul
77#define IOC_IO_ExcpStat_RW_Mask 0x0000000000000800ul
78#define IOC_IO_ExcpStat_IOID_Mask 0x00000000000007fful
79
80#define IOC_IO_ExcpMask 0x928
81#define IOC_IO_ExcpMask_SFE 0x4000000000000000ul
82#define IOC_IO_ExcpMask_PFE 0x2000000000000000ul
83
84#define IOC_IOCmd_Offset 0x1000
85
86#define IOC_IOCmd_Cfg 0xc00
87#define IOC_IOCmd_Cfg_TE 0x0000800000000000ul
88
89
90/* Segment table entries */
91#define IOSTE_V 0x8000000000000000ul /* valid */
92#define IOSTE_H 0x4000000000000000ul /* cache hint */
93#define IOSTE_PT_Base_RPN_Mask 0x3ffffffffffff000ul /* base RPN of IOPT */
94#define IOSTE_NPPT_Mask 0x0000000000000fe0ul /* no. pages in IOPT */
95#define IOSTE_PS_Mask 0x0000000000000007ul /* page size */
96#define IOSTE_PS_4K 0x0000000000000001ul /* - 4kB */
97#define IOSTE_PS_64K 0x0000000000000003ul /* - 64kB */
98#define IOSTE_PS_1M 0x0000000000000005ul /* - 1MB */
99#define IOSTE_PS_16M 0x0000000000000007ul /* - 16MB */
100
101/* Page table entries */
102#define IOPTE_PP_W 0x8000000000000000ul /* protection: write */
103#define IOPTE_PP_R 0x4000000000000000ul /* protection: read */
104#define IOPTE_M 0x2000000000000000ul /* coherency required */
105#define IOPTE_SO_R 0x1000000000000000ul /* ordering: writes */
106#define IOPTE_SO_RW 0x1800000000000000ul /* ordering: r & w */
107#define IOPTE_RPN_Mask 0x07fffffffffff000ul /* RPN */
108#define IOPTE_H 0x0000000000000800ul /* cache hint */
109#define IOPTE_IOID_Mask 0x00000000000007fful /* ioid */
110
111
112/* IOMMU sizing */
113#define IO_SEGMENT_SHIFT 28
114#define IO_PAGENO_BITS (IO_SEGMENT_SHIFT - IOMMU_PAGE_SHIFT)
115
116/* The high bit needs to be set on every DMA address */
117#define SPIDER_DMA_OFFSET 0x80000000ul
118
119struct iommu_window {
120 struct list_head list;
121 struct cbe_iommu *iommu;
122 unsigned long offset;
123 unsigned long size;
124 unsigned long pte_offset;
125 unsigned int ioid;
126 struct iommu_table table;
127};
59 128
60typedef struct { 129#define NAMESIZE 8
61 unsigned long val; 130struct cbe_iommu {
62} ioste; 131 int nid;
132 char name[NAMESIZE];
133 void __iomem *xlate_regs;
134 void __iomem *cmd_regs;
135 unsigned long *stab;
136 unsigned long *ptab;
137 void *pad_page;
138 struct list_head windows;
139};
63 140
64static inline ioste 141/* Static array of iommus, one per node
65mk_ioste(unsigned long val) 142 * each contains a list of windows, keyed from dma_window property
66{ 143 * - on bus setup, look for a matching window, or create one
67 ioste ioste = { .val = val, }; 144 * - on dev setup, assign iommu_table ptr
68 return ioste; 145 */
69} 146static struct cbe_iommu iommus[NR_IOMMUS];
147static int cbe_nr_iommus;
70 148
71static inline ioste 149static void invalidate_tce_cache(struct cbe_iommu *iommu, unsigned long *pte,
72get_iost_entry(unsigned long iopt_base, unsigned long io_address, unsigned page_size) 150 long n_ptes)
73{ 151{
74 unsigned long ps; 152 unsigned long *reg, val;
75 unsigned long iostep; 153 long n;
76 unsigned long nnpt;
77 unsigned long shift;
78
79 switch (page_size) {
80 case 0x1000000:
81 ps = IOST_PS_16M;
82 nnpt = 0; /* one page per segment */
83 shift = 5; /* segment has 16 iopt entries */
84 break;
85
86 case 0x100000:
87 ps = IOST_PS_1M;
88 nnpt = 0; /* one page per segment */
89 shift = 1; /* segment has 256 iopt entries */
90 break;
91
92 case 0x10000:
93 ps = IOST_PS_64K;
94 nnpt = 0x07; /* 8 pages per io page table */
95 shift = 0; /* all entries are used */
96 break;
97
98 case 0x1000:
99 ps = IOST_PS_4K;
100 nnpt = 0x7f; /* 128 pages per io page table */
101 shift = 0; /* all entries are used */
102 break;
103
104 default: /* not a known compile time constant */
105 {
106 /* BUILD_BUG_ON() is not usable here */
107 extern void __get_iost_entry_bad_page_size(void);
108 __get_iost_entry_bad_page_size();
109 }
110 break;
111 }
112 154
113 iostep = iopt_base + 155 reg = iommu->xlate_regs + IOC_IOPT_CacheInvd;
114 /* need 8 bytes per iopte */
115 (((io_address / page_size * 8)
116 /* align io page tables on 4k page boundaries */
117 << shift)
118 /* nnpt+1 pages go into each iopt */
119 & ~(nnpt << 12));
120
121 nnpt++; /* this seems to work, but the documentation is not clear
122 about wether we put nnpt or nnpt-1 into the ioste bits.
123 In theory, this can't work for 4k pages. */
124 return mk_ioste(IOST_VALID_MASK
125 | (iostep & IOST_PT_BASE_MASK)
126 | ((nnpt << 5) & IOST_NNPT_MASK)
127 | (ps & IOST_PS_MASK));
128}
129 156
130/* compute the address of an io pte */ 157 while (n_ptes > 0) {
131static inline unsigned long 158 /* we can invalidate up to 1 << 11 PTEs at once */
132get_ioptep(ioste iost_entry, unsigned long io_address) 159 n = min(n_ptes, 1l << 11);
133{ 160 val = (((n /*- 1*/) << 53) & IOC_IOPT_CacheInvd_NE_Mask)
134 unsigned long iopt_base; 161 | (__pa(pte) & IOC_IOPT_CacheInvd_IOPTE_Mask)
135 unsigned long page_size; 162 | IOC_IOPT_CacheInvd_Busy;
136 unsigned long page_number;
137 unsigned long iopt_offset;
138
139 iopt_base = iost_entry.val & IOST_PT_BASE_MASK;
140 page_size = iost_entry.val & IOST_PS_MASK;
141
142 /* decode page size to compute page number */
143 page_number = (io_address & 0x0fffffff) >> (10 + 2 * page_size);
144 /* page number is an offset into the io page table */
145 iopt_offset = (page_number << 3) & 0x7fff8ul;
146 return iopt_base + iopt_offset;
147}
148 163
149/* compute the tag field of the iopt cache entry */ 164 out_be64(reg, val);
150static inline unsigned long 165 while (in_be64(reg) & IOC_IOPT_CacheInvd_Busy)
151get_ioc_tag(ioste iost_entry, unsigned long io_address) 166 ;
152{
153 unsigned long iopte = get_ioptep(iost_entry, io_address);
154 167
155 return IOPT_VALID_MASK 168 n_ptes -= n;
156 | ((iopte & 0x00000000000000ff8ul) >> 3) 169 pte += n;
157 | ((iopte & 0x0000003fffffc0000ul) >> 9); 170 }
158} 171}
159 172
160/* compute the hashed 6 bit index for the 4-way associative pte cache */ 173static void tce_build_cell(struct iommu_table *tbl, long index, long npages,
161static inline unsigned long 174 unsigned long uaddr, enum dma_data_direction direction)
162get_ioc_hash(ioste iost_entry, unsigned long io_address)
163{ 175{
164 unsigned long iopte = get_ioptep(iost_entry, io_address); 176 int i;
165 177 unsigned long *io_pte, base_pte;
166 return ((iopte & 0x000000000000001f8ul) >> 3) 178 struct iommu_window *window =
167 ^ ((iopte & 0x00000000000020000ul) >> 17) 179 container_of(tbl, struct iommu_window, table);
168 ^ ((iopte & 0x00000000000010000ul) >> 15) 180
169 ^ ((iopte & 0x00000000000008000ul) >> 13) 181 /* implementing proper protection causes problems with the spidernet
170 ^ ((iopte & 0x00000000000004000ul) >> 11) 182 * driver - check mapping directions later, but allow read & write by
171 ^ ((iopte & 0x00000000000002000ul) >> 9) 183 * default for now.*/
172 ^ ((iopte & 0x00000000000001000ul) >> 7); 184#ifdef CELL_IOMMU_STRICT_PROTECTION
185 /* to avoid referencing a global, we use a trick here to setup the
186 * protection bit. "prot" is setup to be 3 fields of 4 bits apprended
187 * together for each of the 3 supported direction values. It is then
188 * shifted left so that the fields matching the desired direction
189 * lands on the appropriate bits, and other bits are masked out.
190 */
191 const unsigned long prot = 0xc48;
192 base_pte =
193 ((prot << (52 + 4 * direction)) & (IOPTE_PP_W | IOPTE_PP_R))
194 | IOPTE_M | IOPTE_SO_RW | (window->ioid & IOPTE_IOID_Mask);
195#else
196 base_pte = IOPTE_PP_W | IOPTE_PP_R | IOPTE_M | IOPTE_SO_RW |
197 (window->ioid & IOPTE_IOID_Mask);
198#endif
199
200 io_pte = (unsigned long *)tbl->it_base + (index - window->pte_offset);
201
202 for (i = 0; i < npages; i++, uaddr += IOMMU_PAGE_SIZE)
203 io_pte[i] = base_pte | (__pa(uaddr) & IOPTE_RPN_Mask);
204
205 mb();
206
207 invalidate_tce_cache(window->iommu, io_pte, npages);
208
209 pr_debug("tce_build_cell(index=%lx,n=%lx,dir=%d,base_pte=%lx)\n",
210 index, npages, direction, base_pte);
173} 211}
174 212
175/* same as above, but pretend that we have a simpler 1-way associative 213static void tce_free_cell(struct iommu_table *tbl, long index, long npages)
176 pte cache with an 8 bit index */
177static inline unsigned long
178get_ioc_hash_1way(ioste iost_entry, unsigned long io_address)
179{ 214{
180 unsigned long iopte = get_ioptep(iost_entry, io_address);
181
182 return ((iopte & 0x000000000000001f8ul) >> 3)
183 ^ ((iopte & 0x00000000000020000ul) >> 17)
184 ^ ((iopte & 0x00000000000010000ul) >> 15)
185 ^ ((iopte & 0x00000000000008000ul) >> 13)
186 ^ ((iopte & 0x00000000000004000ul) >> 11)
187 ^ ((iopte & 0x00000000000002000ul) >> 9)
188 ^ ((iopte & 0x00000000000001000ul) >> 7)
189 ^ ((iopte & 0x0000000000000c000ul) >> 8);
190}
191 215
192static inline ioste 216 int i;
193get_iost_cache(void __iomem *base, unsigned long index) 217 unsigned long *io_pte, pte;
194{ 218 struct iommu_window *window =
195 unsigned long __iomem *p = (base + IOC_ST_CACHE_DIR); 219 container_of(tbl, struct iommu_window, table);
196 return mk_ioste(in_be64(&p[index]));
197}
198 220
199static inline void 221 pr_debug("tce_free_cell(index=%lx,n=%lx)\n", index, npages);
200set_iost_cache(void __iomem *base, unsigned long index, ioste ste)
201{
202 unsigned long __iomem *p = (base + IOC_ST_CACHE_DIR);
203 pr_debug("ioste %02lx was %016lx, store %016lx", index,
204 get_iost_cache(base, index).val, ste.val);
205 out_be64(&p[index], ste.val);
206 pr_debug(" now %016lx\n", get_iost_cache(base, index).val);
207}
208 222
209static inline unsigned long 223#ifdef CELL_IOMMU_REAL_UNMAP
210get_iopt_cache(void __iomem *base, unsigned long index, unsigned long *tag) 224 pte = 0;
211{ 225#else
212 unsigned long __iomem *tags = (void *)(base + IOC_PT_CACHE_DIR); 226 /* spider bridge does PCI reads after freeing - insert a mapping
213 unsigned long __iomem *p = (void *)(base + IOC_PT_CACHE_REG); 227 * to a scratch page instead of an invalid entry */
228 pte = IOPTE_PP_R | IOPTE_M | IOPTE_SO_RW | __pa(window->iommu->pad_page)
229 | (window->ioid & IOPTE_IOID_Mask);
230#endif
214 231
215 *tag = tags[index]; 232 io_pte = (unsigned long *)tbl->it_base + (index - window->pte_offset);
216 rmb();
217 return *p;
218}
219 233
220static inline void 234 for (i = 0; i < npages; i++)
221set_iopt_cache(void __iomem *base, unsigned long index, 235 io_pte[i] = pte;
222 unsigned long tag, unsigned long val) 236
223{ 237 mb();
224 unsigned long __iomem *tags = base + IOC_PT_CACHE_DIR;
225 unsigned long __iomem *p = base + IOC_PT_CACHE_REG;
226 238
227 out_be64(p, val); 239 invalidate_tce_cache(window->iommu, io_pte, npages);
228 out_be64(&tags[index], tag);
229} 240}
230 241
231static inline void 242static irqreturn_t ioc_interrupt(int irq, void *data)
232set_iost_origin(void __iomem *base)
233{ 243{
234 unsigned long __iomem *p = base + IOC_ST_ORIGIN; 244 unsigned long stat;
235 unsigned long origin = IOSTO_ENABLE | IOSTO_SW; 245 struct cbe_iommu *iommu = data;
236 246
237 pr_debug("iost_origin %016lx, now %016lx\n", in_be64(p), origin); 247 stat = in_be64(iommu->xlate_regs + IOC_IO_ExcpStat);
238 out_be64(p, origin); 248
249 /* Might want to rate limit it */
250 printk(KERN_ERR "iommu: DMA exception 0x%016lx\n", stat);
251 printk(KERN_ERR " V=%d, SPF=[%c%c], RW=%s, IOID=0x%04x\n",
252 !!(stat & IOC_IO_ExcpStat_V),
253 (stat & IOC_IO_ExcpStat_SPF_S) ? 'S' : ' ',
254 (stat & IOC_IO_ExcpStat_SPF_P) ? 'P' : ' ',
255 (stat & IOC_IO_ExcpStat_RW_Mask) ? "Read" : "Write",
256 (unsigned int)(stat & IOC_IO_ExcpStat_IOID_Mask));
257 printk(KERN_ERR " page=0x%016lx\n",
258 stat & IOC_IO_ExcpStat_ADDR_Mask);
259
260 /* clear interrupt */
261 stat &= ~IOC_IO_ExcpStat_V;
262 out_be64(iommu->xlate_regs + IOC_IO_ExcpStat, stat);
263
264 return IRQ_HANDLED;
239} 265}
240 266
241static inline void 267static int cell_iommu_find_ioc(int nid, unsigned long *base)
242set_iocmd_config(void __iomem *base)
243{ 268{
244 unsigned long __iomem *p = base + 0xc00; 269 struct device_node *np;
245 unsigned long conf; 270 struct resource r;
271
272 *base = 0;
273
274 /* First look for new style /be nodes */
275 for_each_node_by_name(np, "ioc") {
276 if (of_node_to_nid(np) != nid)
277 continue;
278 if (of_address_to_resource(np, 0, &r)) {
279 printk(KERN_ERR "iommu: can't get address for %s\n",
280 np->full_name);
281 continue;
282 }
283 *base = r.start;
284 of_node_put(np);
285 return 0;
286 }
287
288 /* Ok, let's try the old way */
289 for_each_node_by_type(np, "cpu") {
290 const unsigned int *nidp;
291 const unsigned long *tmp;
292
293 nidp = get_property(np, "node-id", NULL);
294 if (nidp && *nidp == nid) {
295 tmp = get_property(np, "ioc-translation", NULL);
296 if (tmp) {
297 *base = *tmp;
298 of_node_put(np);
299 return 0;
300 }
301 }
302 }
246 303
247 conf = in_be64(p); 304 return -ENODEV;
248 pr_debug("iost_conf %016lx, now %016lx\n", conf, conf | IOCMD_CONF_TE);
249 out_be64(p, conf | IOCMD_CONF_TE);
250} 305}
251 306
252static void enable_mapping(void __iomem *base, void __iomem *mmio_base) 307static void cell_iommu_setup_hardware(struct cbe_iommu *iommu, unsigned long size)
253{ 308{
254 set_iocmd_config(base); 309 struct page *page;
255 set_iost_origin(mmio_base); 310 int ret, i;
256} 311 unsigned long reg, segments, pages_per_segment, ptab_size, n_pte_pages;
312 unsigned long xlate_base;
313 unsigned int virq;
314
315 if (cell_iommu_find_ioc(iommu->nid, &xlate_base))
316 panic("%s: missing IOC register mappings for node %d\n",
317 __FUNCTION__, iommu->nid);
318
319 iommu->xlate_regs = ioremap(xlate_base, IOC_Reg_Size);
320 iommu->cmd_regs = iommu->xlate_regs + IOC_IOCmd_Offset;
321
322 segments = size >> IO_SEGMENT_SHIFT;
323 pages_per_segment = 1ull << IO_PAGENO_BITS;
324
325 pr_debug("%s: iommu[%d]: segments: %lu, pages per segment: %lu\n",
326 __FUNCTION__, iommu->nid, segments, pages_per_segment);
327
328 /* set up the segment table */
329 page = alloc_pages_node(iommu->nid, GFP_KERNEL, 0);
330 BUG_ON(!page);
331 iommu->stab = page_address(page);
332 clear_page(iommu->stab);
333
334 /* ... and the page tables. Since these are contiguous, we can treat
335 * the page tables as one array of ptes, like pSeries does.
336 */
337 ptab_size = segments * pages_per_segment * sizeof(unsigned long);
338 pr_debug("%s: iommu[%d]: ptab_size: %lu, order: %d\n", __FUNCTION__,
339 iommu->nid, ptab_size, get_order(ptab_size));
340 page = alloc_pages_node(iommu->nid, GFP_KERNEL, get_order(ptab_size));
341 BUG_ON(!page);
342
343 iommu->ptab = page_address(page);
344 memset(iommu->ptab, 0, ptab_size);
345
346 /* allocate a bogus page for the end of each mapping */
347 page = alloc_pages_node(iommu->nid, GFP_KERNEL, 0);
348 BUG_ON(!page);
349 iommu->pad_page = page_address(page);
350 clear_page(iommu->pad_page);
351
352 /* number of pages needed for a page table */
353 n_pte_pages = (pages_per_segment *
354 sizeof(unsigned long)) >> IOMMU_PAGE_SHIFT;
355
356 pr_debug("%s: iommu[%d]: stab at %p, ptab at %p, n_pte_pages: %lu\n",
357 __FUNCTION__, iommu->nid, iommu->stab, iommu->ptab,
358 n_pte_pages);
359
360 /* initialise the STEs */
361 reg = IOSTE_V | ((n_pte_pages - 1) << 5);
362
363 if (IOMMU_PAGE_SIZE == 0x1000)
364 reg |= IOSTE_PS_4K;
365 else if (IOMMU_PAGE_SIZE == 0x10000)
366 reg |= IOSTE_PS_64K;
367 else {
368 extern void __unknown_page_size_error(void);
369 __unknown_page_size_error();
370 }
257 371
258struct cell_iommu { 372 pr_debug("Setting up IOMMU stab:\n");
259 unsigned long base; 373 for (i = 0; i * (1ul << IO_SEGMENT_SHIFT) < size; i++) {
260 unsigned long mmio_base; 374 iommu->stab[i] = reg |
261 void __iomem *mapped_base; 375 (__pa(iommu->ptab) + n_pte_pages * IOMMU_PAGE_SIZE * i);
262 void __iomem *mapped_mmio_base; 376 pr_debug("\t[%d] 0x%016lx\n", i, iommu->stab[i]);
263}; 377 }
378
379 /* ensure that the STEs have updated */
380 mb();
381
382 /* setup interrupts for the iommu. */
383 reg = in_be64(iommu->xlate_regs + IOC_IO_ExcpStat);
384 out_be64(iommu->xlate_regs + IOC_IO_ExcpStat,
385 reg & ~IOC_IO_ExcpStat_V);
386 out_be64(iommu->xlate_regs + IOC_IO_ExcpMask,
387 IOC_IO_ExcpMask_PFE | IOC_IO_ExcpMask_SFE);
388
389 virq = irq_create_mapping(NULL,
390 IIC_IRQ_IOEX_ATI | (iommu->nid << IIC_IRQ_NODE_SHIFT));
391 BUG_ON(virq == NO_IRQ);
392
393 ret = request_irq(virq, ioc_interrupt, IRQF_DISABLED,
394 iommu->name, iommu);
395 BUG_ON(ret);
264 396
265static struct cell_iommu cell_iommus[NR_CPUS]; 397 /* set the IOC segment table origin register (and turn on the iommu) */
398 reg = IOC_IOST_Origin_E | __pa(iommu->stab) | IOC_IOST_Origin_HW;
399 out_be64(iommu->xlate_regs + IOC_IOST_Origin, reg);
400 in_be64(iommu->xlate_regs + IOC_IOST_Origin);
266 401
267/* initialize the iommu to support a simple linear mapping 402 /* turn on IO translation */
268 * for each DMA window used by any device. For now, we 403 reg = in_be64(iommu->cmd_regs + IOC_IOCmd_Cfg) | IOC_IOCmd_Cfg_TE;
269 * happen to know that there is only one DMA window in use, 404 out_be64(iommu->cmd_regs + IOC_IOCmd_Cfg, reg);
270 * starting at iopt_phys_offset. */ 405}
271static void cell_do_map_iommu(struct cell_iommu *iommu, 406
272 unsigned int ioid, 407#if 0/* Unused for now */
273 unsigned long map_start, 408static struct iommu_window *find_window(struct cbe_iommu *iommu,
274 unsigned long map_size) 409 unsigned long offset, unsigned long size)
275{ 410{
276 unsigned long io_address, real_address; 411 struct iommu_window *window;
277 void __iomem *ioc_base, *ioc_mmio_base;
278 ioste ioste;
279 unsigned long index;
280 412
281 /* we pretend the io page table was at a very high address */ 413 /* todo: check for overlapping (but not equal) windows) */
282 const unsigned long fake_iopt = 0x10000000000ul; 414
283 const unsigned long io_page_size = 0x1000000; /* use 16M pages */ 415 list_for_each_entry(window, &(iommu->windows), list) {
284 const unsigned long io_segment_size = 0x10000000; /* 256M */ 416 if (window->offset == offset && window->size == size)
285 417 return window;
286 ioc_base = iommu->mapped_base;
287 ioc_mmio_base = iommu->mapped_mmio_base;
288
289 for (real_address = 0, io_address = map_start;
290 io_address <= map_start + map_size;
291 real_address += io_page_size, io_address += io_page_size) {
292 ioste = get_iost_entry(fake_iopt, io_address, io_page_size);
293 if ((real_address % io_segment_size) == 0) /* segment start */
294 set_iost_cache(ioc_mmio_base,
295 io_address >> 28, ioste);
296 index = get_ioc_hash_1way(ioste, io_address);
297 pr_debug("addr %08lx, index %02lx, ioste %016lx\n",
298 io_address, index, ioste.val);
299 set_iopt_cache(ioc_mmio_base,
300 get_ioc_hash_1way(ioste, io_address),
301 get_ioc_tag(ioste, io_address),
302 get_iopt_entry(real_address, ioid, IOPT_PROT_RW));
303 } 418 }
419
420 return NULL;
304} 421}
422#endif
305 423
306static void pci_dma_cell_bus_setup(struct pci_bus *b) 424static struct iommu_window * __init
425cell_iommu_setup_window(struct cbe_iommu *iommu, struct device_node *np,
426 unsigned long offset, unsigned long size,
427 unsigned long pte_offset)
307{ 428{
429 struct iommu_window *window;
308 const unsigned int *ioid; 430 const unsigned int *ioid;
309 unsigned long map_start, map_size, token;
310 const unsigned long *dma_window;
311 struct cell_iommu *iommu;
312 struct device_node *d;
313
314 d = pci_bus_to_OF_node(b);
315 431
316 ioid = get_property(d, "ioid", NULL); 432 ioid = get_property(np, "ioid", NULL);
317 if (!ioid) 433 if (ioid == NULL)
318 pr_debug("No ioid entry found !\n"); 434 printk(KERN_WARNING "iommu: missing ioid for %s using 0\n",
435 np->full_name);
436
437 window = kmalloc_node(sizeof(*window), GFP_KERNEL, iommu->nid);
438 BUG_ON(window == NULL);
439
440 window->offset = offset;
441 window->size = size;
442 window->ioid = ioid ? *ioid : 0;
443 window->iommu = iommu;
444 window->pte_offset = pte_offset;
445
446 window->table.it_blocksize = 16;
447 window->table.it_base = (unsigned long)iommu->ptab;
448 window->table.it_index = iommu->nid;
449 window->table.it_offset = (offset >> IOMMU_PAGE_SHIFT) +
450 window->pte_offset;
451 window->table.it_size = size >> IOMMU_PAGE_SHIFT;
452
453 iommu_init_table(&window->table, iommu->nid);
454
455 pr_debug("\tioid %d\n", window->ioid);
456 pr_debug("\tblocksize %ld\n", window->table.it_blocksize);
457 pr_debug("\tbase 0x%016lx\n", window->table.it_base);
458 pr_debug("\toffset 0x%lx\n", window->table.it_offset);
459 pr_debug("\tsize %ld\n", window->table.it_size);
460
461 list_add(&window->list, &iommu->windows);
462
463 if (offset != 0)
464 return window;
465
466 /* We need to map and reserve the first IOMMU page since it's used
467 * by the spider workaround. In theory, we only need to do that when
468 * running on spider but it doesn't really matter.
469 *
470 * This code also assumes that we have a window that starts at 0,
471 * which is the case on all spider based blades.
472 */
473 __set_bit(0, window->table.it_map);
474 tce_build_cell(&window->table, window->table.it_offset, 1,
475 (unsigned long)iommu->pad_page, DMA_TO_DEVICE);
476 window->table.it_hint = window->table.it_blocksize;
477
478 return window;
479}
319 480
320 dma_window = get_property(d, "ibm,dma-window", NULL); 481static struct cbe_iommu *cell_iommu_for_node(int nid)
321 if (!dma_window) 482{
322 pr_debug("No ibm,dma-window entry found !\n"); 483 int i;
323 484
324 map_start = dma_window[1]; 485 for (i = 0; i < cbe_nr_iommus; i++)
325 map_size = dma_window[2]; 486 if (iommus[i].nid == nid)
326 token = dma_window[0] >> 32; 487 return &iommus[i];
488 return NULL;
489}
327 490
328 iommu = &cell_iommus[token]; 491static void cell_dma_dev_setup(struct device *dev)
492{
493 struct iommu_window *window;
494 struct cbe_iommu *iommu;
495 struct dev_archdata *archdata = &dev->archdata;
496
497 /* If we run without iommu, no need to do anything */
498 if (pci_dma_ops == &dma_direct_ops)
499 return;
500
501 /* Current implementation uses the first window available in that
502 * node's iommu. We -might- do something smarter later though it may
503 * never be necessary
504 */
505 iommu = cell_iommu_for_node(archdata->numa_node);
506 if (iommu == NULL || list_empty(&iommu->windows)) {
507 printk(KERN_ERR "iommu: missing iommu for %s (node %d)\n",
508 archdata->of_node ? archdata->of_node->full_name : "?",
509 archdata->numa_node);
510 return;
511 }
512 window = list_entry(iommu->windows.next, struct iommu_window, list);
329 513
330 cell_do_map_iommu(iommu, *ioid, map_start, map_size); 514 archdata->dma_data = &window->table;
331} 515}
332 516
333 517static void cell_pci_dma_dev_setup(struct pci_dev *dev)
334static int cell_map_iommu_hardcoded(int num_nodes)
335{ 518{
336 struct cell_iommu *iommu = NULL; 519 cell_dma_dev_setup(&dev->dev);
520}
337 521
338 pr_debug("%s(%d): Using hardcoded defaults\n", __FUNCTION__, __LINE__); 522static int cell_of_bus_notify(struct notifier_block *nb, unsigned long action,
523 void *data)
524{
525 struct device *dev = data;
339 526
340 /* node 0 */ 527 /* We are only intereted in device addition */
341 iommu = &cell_iommus[0]; 528 if (action != BUS_NOTIFY_ADD_DEVICE)
342 iommu->mapped_base = ioremap(0x20000511000ul, 0x1000); 529 return 0;
343 iommu->mapped_mmio_base = ioremap(0x20000510000ul, 0x1000);
344 530
345 enable_mapping(iommu->mapped_base, iommu->mapped_mmio_base); 531 /* We use the PCI DMA ops */
532 dev->archdata.dma_ops = pci_dma_ops;
346 533
347 cell_do_map_iommu(iommu, 0x048a, 534 cell_dma_dev_setup(dev);
348 0x20000000ul,0x20000000ul);
349 535
350 if (num_nodes < 2) 536 return 0;
351 return 0; 537}
352 538
353 /* node 1 */ 539static struct notifier_block cell_of_bus_notifier = {
354 iommu = &cell_iommus[1]; 540 .notifier_call = cell_of_bus_notify
355 iommu->mapped_base = ioremap(0x30000511000ul, 0x1000); 541};
356 iommu->mapped_mmio_base = ioremap(0x30000510000ul, 0x1000);
357 542
358 enable_mapping(iommu->mapped_base, iommu->mapped_mmio_base); 543static int __init cell_iommu_get_window(struct device_node *np,
544 unsigned long *base,
545 unsigned long *size)
546{
547 const void *dma_window;
548 unsigned long index;
359 549
360 cell_do_map_iommu(iommu, 0x048a, 550 /* Use ibm,dma-window if available, else, hard code ! */
361 0x20000000,0x20000000ul); 551 dma_window = get_property(np, "ibm,dma-window", NULL);
552 if (dma_window == NULL) {
553 *base = 0;
554 *size = 0x80000000u;
555 return -ENODEV;
556 }
362 557
558 of_parse_dma_window(np, dma_window, &index, base, size);
363 return 0; 559 return 0;
364} 560}
365 561
366 562static void __init cell_iommu_init_one(struct device_node *np, unsigned long offset)
367static int cell_map_iommu(void)
368{ 563{
369 unsigned int num_nodes = 0; 564 struct cbe_iommu *iommu;
370 const unsigned int *node_id; 565 unsigned long base, size;
371 const unsigned long *base, *mmio_base; 566 int nid, i;
372 struct device_node *dn; 567
373 struct cell_iommu *iommu = NULL; 568 /* Get node ID */
374 569 nid = of_node_to_nid(np);
375 /* determine number of nodes (=iommus) */ 570 if (nid < 0) {
376 pr_debug("%s(%d): determining number of nodes...", __FUNCTION__, __LINE__); 571 printk(KERN_ERR "iommu: failed to get node for %s\n",
377 for(dn = of_find_node_by_type(NULL, "cpu"); 572 np->full_name);
378 dn; 573 return;
379 dn = of_find_node_by_type(dn, "cpu")) { 574 }
380 node_id = get_property(dn, "node-id", NULL); 575 pr_debug("iommu: setting up iommu for node %d (%s)\n",
381 576 nid, np->full_name);
382 if (num_nodes < *node_id) 577
383 num_nodes = *node_id; 578 /* XXX todo: If we can have multiple windows on the same IOMMU, which
384 } 579 * isn't the case today, we probably want here to check wether the
580 * iommu for that node is already setup.
581 * However, there might be issue with getting the size right so let's
582 * ignore that for now. We might want to completely get rid of the
583 * multiple window support since the cell iommu supports per-page ioids
584 */
585
586 if (cbe_nr_iommus >= NR_IOMMUS) {
587 printk(KERN_ERR "iommu: too many IOMMUs detected ! (%s)\n",
588 np->full_name);
589 return;
590 }
591
592 /* Init base fields */
593 i = cbe_nr_iommus++;
594 iommu = &iommus[i];
595 iommu->stab = 0;
596 iommu->nid = nid;
597 snprintf(iommu->name, sizeof(iommu->name), "iommu%d", i);
598 INIT_LIST_HEAD(&iommu->windows);
385 599
386 num_nodes++; 600 /* Obtain a window for it */
387 pr_debug("%i found.\n", num_nodes); 601 cell_iommu_get_window(np, &base, &size);
388 602
389 /* map the iommu registers for each node */ 603 pr_debug("\ttranslating window 0x%lx...0x%lx\n",
390 pr_debug("%s(%d): Looping through nodes\n", __FUNCTION__, __LINE__); 604 base, base + size - 1);
391 for(dn = of_find_node_by_type(NULL, "cpu");
392 dn;
393 dn = of_find_node_by_type(dn, "cpu")) {
394 605
395 node_id = get_property(dn, "node-id", NULL); 606 /* Initialize the hardware */
396 base = get_property(dn, "ioc-cache", NULL); 607 cell_iommu_setup_hardware(iommu, size);
397 mmio_base = get_property(dn, "ioc-translation", NULL);
398 608
399 if (!base || !mmio_base || !node_id) 609 /* Setup the iommu_table */
400 return cell_map_iommu_hardcoded(num_nodes); 610 cell_iommu_setup_window(iommu, np, base, size,
611 offset >> IOMMU_PAGE_SHIFT);
612}
401 613
402 iommu = &cell_iommus[*node_id]; 614static void __init cell_disable_iommus(void)
403 iommu->base = *base; 615{
404 iommu->mmio_base = *mmio_base; 616 int node;
617 unsigned long base, val;
618 void __iomem *xregs, *cregs;
619
620 /* Make sure IOC translation is disabled on all nodes */
621 for_each_online_node(node) {
622 if (cell_iommu_find_ioc(node, &base))
623 continue;
624 xregs = ioremap(base, IOC_Reg_Size);
625 if (xregs == NULL)
626 continue;
627 cregs = xregs + IOC_IOCmd_Offset;
628
629 pr_debug("iommu: cleaning up iommu on node %d\n", node);
630
631 out_be64(xregs + IOC_IOST_Origin, 0);
632 (void)in_be64(xregs + IOC_IOST_Origin);
633 val = in_be64(cregs + IOC_IOCmd_Cfg);
634 val &= ~IOC_IOCmd_Cfg_TE;
635 out_be64(cregs + IOC_IOCmd_Cfg, val);
636 (void)in_be64(cregs + IOC_IOCmd_Cfg);
637
638 iounmap(xregs);
639 }
640}
405 641
406 iommu->mapped_base = ioremap(*base, 0x1000); 642static int __init cell_iommu_init_disabled(void)
407 iommu->mapped_mmio_base = ioremap(*mmio_base, 0x1000); 643{
644 struct device_node *np = NULL;
645 unsigned long base = 0, size;
408 646
409 enable_mapping(iommu->mapped_base, 647 /* When no iommu is present, we use direct DMA ops */
410 iommu->mapped_mmio_base); 648 pci_dma_ops = &dma_direct_ops;
411 649
412 /* everything else will be done in iommu_bus_setup */ 650 /* First make sure all IOC translation is turned off */
651 cell_disable_iommus();
652
653 /* If we have no Axon, we set up the spider DMA magic offset */
654 if (of_find_node_by_name(NULL, "axon") == NULL)
655 dma_direct_offset = SPIDER_DMA_OFFSET;
656
657 /* Now we need to check to see where the memory is mapped
658 * in PCI space. We assume that all busses use the same dma
659 * window which is always the case so far on Cell, thus we
660 * pick up the first pci-internal node we can find and check
661 * the DMA window from there.
662 */
663 for_each_node_by_name(np, "axon") {
664 if (np->parent == NULL || np->parent->parent != NULL)
665 continue;
666 if (cell_iommu_get_window(np, &base, &size) == 0)
667 break;
668 }
669 if (np == NULL) {
670 for_each_node_by_name(np, "pci-internal") {
671 if (np->parent == NULL || np->parent->parent != NULL)
672 continue;
673 if (cell_iommu_get_window(np, &base, &size) == 0)
674 break;
675 }
676 }
677 of_node_put(np);
678
679 /* If we found a DMA window, we check if it's big enough to enclose
680 * all of physical memory. If not, we force enable IOMMU
681 */
682 if (np && size < lmb_end_of_DRAM()) {
683 printk(KERN_WARNING "iommu: force-enabled, dma window"
684 " (%ldMB) smaller than total memory (%ldMB)\n",
685 size >> 20, lmb_end_of_DRAM() >> 20);
686 return -ENODEV;
413 } 687 }
414 688
415 return 1; 689 dma_direct_offset += base;
690
691 printk("iommu: disabled, direct DMA offset is 0x%lx\n",
692 dma_direct_offset);
693
694 return 0;
416} 695}
417 696
418void cell_init_iommu(void) 697static int __init cell_iommu_init(void)
419{ 698{
420 int setup_bus = 0; 699 struct device_node *np;
421 700
422 if (of_find_node_by_path("/mambo")) { 701 if (!machine_is(cell))
423 pr_info("Not using iommu on systemsim\n"); 702 return -ENODEV;
424 } else { 703
425 /* If we don't have an Axon bridge, we assume we have a 704 /* If IOMMU is disabled or we have little enough RAM to not need
426 * spider which requires a DMA offset 705 * to enable it, we setup a direct mapping.
427 */ 706 *
428 if (of_find_node_by_name(NULL, "axon") == NULL) 707 * Note: should we make sure we have the IOMMU actually disabled ?
429 dma_direct_offset = SPIDER_DMA_VALID; 708 */
430 709 if (iommu_is_off ||
431 if (!(of_chosen && 710 (!iommu_force_on && lmb_end_of_DRAM() <= 0x80000000ull))
432 get_property(of_chosen, "linux,iommu-off", NULL))) 711 if (cell_iommu_init_disabled() == 0)
433 setup_bus = cell_map_iommu(); 712 goto bail;
434 713
435 if (setup_bus) { 714 /* Setup various ppc_md. callbacks */
436 pr_debug("%s: IOMMU mapping activated\n", __FUNCTION__); 715 ppc_md.pci_dma_dev_setup = cell_pci_dma_dev_setup;
437 ppc_md.pci_dma_bus_setup = pci_dma_cell_bus_setup; 716 ppc_md.tce_build = tce_build_cell;
438 } else { 717 ppc_md.tce_free = tce_free_cell;
439 pr_debug("%s: IOMMU mapping activated, " 718
440 "no device action necessary\n", __FUNCTION__); 719 /* Create an iommu for each /axon node. */
441 /* Direct I/O, IOMMU off */ 720 for_each_node_by_name(np, "axon") {
442 } 721 if (np->parent == NULL || np->parent->parent != NULL)
722 continue;
723 cell_iommu_init_one(np, 0);
443 } 724 }
444 725
445 pci_dma_ops = &dma_direct_ops; 726 /* Create an iommu for each toplevel /pci-internal node for
727 * old hardware/firmware
728 */
729 for_each_node_by_name(np, "pci-internal") {
730 if (np->parent == NULL || np->parent->parent != NULL)
731 continue;
732 cell_iommu_init_one(np, SPIDER_DMA_OFFSET);
733 }
734
735 /* Setup default PCI iommu ops */
736 pci_dma_ops = &dma_iommu_ops;
737
738 bail:
739 /* Register callbacks on OF platform device addition/removal
740 * to handle linking them to the right DMA operations
741 */
742 bus_register_notifier(&of_platform_bus_type, &cell_of_bus_notifier);
743
744 return 0;
446} 745}
746arch_initcall(cell_iommu_init);
747