diff options
author | Benjamin Herrenschmidt <benh@kernel.crashing.org> | 2007-06-04 01:15:36 -0400 |
---|---|---|
committer | Paul Mackerras <paulus@samba.org> | 2007-06-14 08:29:56 -0400 |
commit | 3d5134ee8341bffc4f539049abb9e90d469b448d (patch) | |
tree | 037958e0daa97b4ef350908a53182167ee2c8a03 /arch/powerpc/mm/pgtable_64.c | |
parent | c19c03fc749147f565e807fa65f1729066800571 (diff) |
[POWERPC] Rewrite IO allocation & mapping on powerpc64
This rewrites pretty much from scratch the handling of MMIO and PIO
space allocations on powerpc64. The main goals are:
- Get rid of imalloc and use more common code where possible
- Simplify the current mess so that PIO space is allocated and
mapped in a single place for PCI bridges
- Handle allocation constraints of PIO for all bridges including
hot plugged ones within the 2GB space reserved for IO ports,
so that devices on hotplugged busses will now work with drivers
that assume IO ports fit in an int.
- Cleanup and separate tracking of the ISA space in the reserved
low 64K of IO space. No ISA -> Nothing mapped there.
I booted a cell blade with IDE on PIO and MMIO and a dual G5 so
far, that's it :-)
With this patch, all allocations are done using the code in
mm/vmalloc.c, though we use the low level __get_vm_area with
explicit start/stop constraints in order to manage separate
areas for vmalloc/vmap, ioremap, and PCI IOs.
This greatly simplifies a lot of things, as you can see in the
diffstat of that patch :-)
A new pair of functions pcibios_map/unmap_io_space() now replace
all of the previous code that used to manipulate PCI IOs space.
The allocation is done at mapping time, which is now called from
scan_phb's, just before the devices are probed (instead of after,
which is by itself a bug fix). The only other caller is the PCI
hotplug code for hot adding PCI-PCI bridges (slots).
imalloc is gone, as is the "sub-allocation" thing, but I do beleive
that hotplug should still work in the sense that the space allocation
is always done by the PHB, but if you unmap a child bus of this PHB
(which seems to be possible), then the code should properly tear
down all the HPTE mappings for that area of the PHB allocated IO space.
I now always reserve the first 64K of IO space for the bridge with
the ISA bus on it. I have moved the code for tracking ISA in a separate
file which should also make it smarter if we ever are capable of
hot unplugging or re-plugging an ISA bridge.
This should have a side effect on platforms like powermac where VGA IOs
will no longer work. This is done on purpose though as they would have
worked semi-randomly before. The idea at this point is to isolate drivers
that might need to access those and fix them by providing a proper
function to obtain an offset to the legacy IOs of a given bus.
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
Diffstat (limited to 'arch/powerpc/mm/pgtable_64.c')
-rw-r--r-- | arch/powerpc/mm/pgtable_64.c | 204 |
1 files changed, 49 insertions, 155 deletions
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index fa5c828d3876..a895de73beae 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c | |||
@@ -34,41 +34,27 @@ | |||
34 | #include <linux/stddef.h> | 34 | #include <linux/stddef.h> |
35 | #include <linux/vmalloc.h> | 35 | #include <linux/vmalloc.h> |
36 | #include <linux/init.h> | 36 | #include <linux/init.h> |
37 | #include <linux/delay.h> | ||
38 | #include <linux/bootmem.h> | ||
39 | #include <linux/highmem.h> | ||
40 | #include <linux/idr.h> | ||
41 | #include <linux/nodemask.h> | ||
42 | #include <linux/module.h> | ||
43 | 37 | ||
44 | #include <asm/pgalloc.h> | 38 | #include <asm/pgalloc.h> |
45 | #include <asm/page.h> | 39 | #include <asm/page.h> |
46 | #include <asm/prom.h> | 40 | #include <asm/prom.h> |
47 | #include <asm/lmb.h> | ||
48 | #include <asm/rtas.h> | ||
49 | #include <asm/io.h> | 41 | #include <asm/io.h> |
50 | #include <asm/mmu_context.h> | 42 | #include <asm/mmu_context.h> |
51 | #include <asm/pgtable.h> | 43 | #include <asm/pgtable.h> |
52 | #include <asm/mmu.h> | 44 | #include <asm/mmu.h> |
53 | #include <asm/uaccess.h> | ||
54 | #include <asm/smp.h> | 45 | #include <asm/smp.h> |
55 | #include <asm/machdep.h> | 46 | #include <asm/machdep.h> |
56 | #include <asm/tlb.h> | 47 | #include <asm/tlb.h> |
57 | #include <asm/eeh.h> | ||
58 | #include <asm/processor.h> | 48 | #include <asm/processor.h> |
59 | #include <asm/mmzone.h> | ||
60 | #include <asm/cputable.h> | 49 | #include <asm/cputable.h> |
61 | #include <asm/sections.h> | 50 | #include <asm/sections.h> |
62 | #include <asm/system.h> | 51 | #include <asm/system.h> |
63 | #include <asm/iommu.h> | ||
64 | #include <asm/abs_addr.h> | 52 | #include <asm/abs_addr.h> |
65 | #include <asm/vdso.h> | ||
66 | #include <asm/firmware.h> | 53 | #include <asm/firmware.h> |
67 | 54 | ||
68 | #include "mmu_decl.h" | 55 | #include "mmu_decl.h" |
69 | 56 | ||
70 | unsigned long ioremap_bot = IMALLOC_BASE; | 57 | unsigned long ioremap_bot = IOREMAP_BASE; |
71 | static unsigned long phbs_io_bot = PHBS_IO_BASE; | ||
72 | 58 | ||
73 | /* | 59 | /* |
74 | * map_io_page currently only called by __ioremap | 60 | * map_io_page currently only called by __ioremap |
@@ -102,8 +88,8 @@ static int map_io_page(unsigned long ea, unsigned long pa, int flags) | |||
102 | * entry in the hardware page table. | 88 | * entry in the hardware page table. |
103 | * | 89 | * |
104 | */ | 90 | */ |
105 | if (htab_bolt_mapping(ea, ea + PAGE_SIZE, pa, flags, | 91 | if (htab_bolt_mapping(ea, (unsigned long)ea + PAGE_SIZE, |
106 | mmu_io_psize)) { | 92 | pa, flags, mmu_io_psize)) { |
107 | printk(KERN_ERR "Failed to do bolted mapping IO " | 93 | printk(KERN_ERR "Failed to do bolted mapping IO " |
108 | "memory at %016lx !\n", pa); | 94 | "memory at %016lx !\n", pa); |
109 | return -ENOMEM; | 95 | return -ENOMEM; |
@@ -113,8 +99,11 @@ static int map_io_page(unsigned long ea, unsigned long pa, int flags) | |||
113 | } | 99 | } |
114 | 100 | ||
115 | 101 | ||
116 | static void __iomem * __ioremap_com(phys_addr_t addr, unsigned long pa, | 102 | /** |
117 | unsigned long ea, unsigned long size, | 103 | * __ioremap_at - Low level function to establish the page tables |
104 | * for an IO mapping | ||
105 | */ | ||
106 | void __iomem * __ioremap_at(phys_addr_t pa, void *ea, unsigned long size, | ||
118 | unsigned long flags) | 107 | unsigned long flags) |
119 | { | 108 | { |
120 | unsigned long i; | 109 | unsigned long i; |
@@ -122,17 +111,35 @@ static void __iomem * __ioremap_com(phys_addr_t addr, unsigned long pa, | |||
122 | if ((flags & _PAGE_PRESENT) == 0) | 111 | if ((flags & _PAGE_PRESENT) == 0) |
123 | flags |= pgprot_val(PAGE_KERNEL); | 112 | flags |= pgprot_val(PAGE_KERNEL); |
124 | 113 | ||
114 | WARN_ON(pa & ~PAGE_MASK); | ||
115 | WARN_ON(((unsigned long)ea) & ~PAGE_MASK); | ||
116 | WARN_ON(size & ~PAGE_MASK); | ||
117 | |||
125 | for (i = 0; i < size; i += PAGE_SIZE) | 118 | for (i = 0; i < size; i += PAGE_SIZE) |
126 | if (map_io_page(ea+i, pa+i, flags)) | 119 | if (map_io_page((unsigned long)ea+i, pa+i, flags)) |
127 | return NULL; | 120 | return NULL; |
128 | 121 | ||
129 | return (void __iomem *) (ea + (addr & ~PAGE_MASK)); | 122 | return (void __iomem *)ea; |
123 | } | ||
124 | |||
125 | /** | ||
126 | * __iounmap_from - Low level function to tear down the page tables | ||
127 | * for an IO mapping. This is used for mappings that | ||
128 | * are manipulated manually, like partial unmapping of | ||
129 | * PCI IOs or ISA space. | ||
130 | */ | ||
131 | void __iounmap_at(void *ea, unsigned long size) | ||
132 | { | ||
133 | WARN_ON(((unsigned long)ea) & ~PAGE_MASK); | ||
134 | WARN_ON(size & ~PAGE_MASK); | ||
135 | |||
136 | unmap_kernel_range((unsigned long)ea, size); | ||
130 | } | 137 | } |
131 | 138 | ||
132 | void __iomem * __ioremap(phys_addr_t addr, unsigned long size, | 139 | void __iomem * __ioremap(phys_addr_t addr, unsigned long size, |
133 | unsigned long flags) | 140 | unsigned long flags) |
134 | { | 141 | { |
135 | unsigned long pa, ea; | 142 | phys_addr_t paligned; |
136 | void __iomem *ret; | 143 | void __iomem *ret; |
137 | 144 | ||
138 | /* | 145 | /* |
@@ -144,27 +151,30 @@ void __iomem * __ioremap(phys_addr_t addr, unsigned long size, | |||
144 | * IMALLOC_END | 151 | * IMALLOC_END |
145 | * | 152 | * |
146 | */ | 153 | */ |
147 | pa = addr & PAGE_MASK; | 154 | paligned = addr & PAGE_MASK; |
148 | size = PAGE_ALIGN(addr + size) - pa; | 155 | size = PAGE_ALIGN(addr + size) - paligned; |
149 | 156 | ||
150 | if ((size == 0) || (pa == 0)) | 157 | if ((size == 0) || (paligned == 0)) |
151 | return NULL; | 158 | return NULL; |
152 | 159 | ||
153 | if (mem_init_done) { | 160 | if (mem_init_done) { |
154 | struct vm_struct *area; | 161 | struct vm_struct *area; |
155 | area = im_get_free_area(size); | 162 | |
163 | area = __get_vm_area(size, VM_IOREMAP, | ||
164 | ioremap_bot, IOREMAP_END); | ||
156 | if (area == NULL) | 165 | if (area == NULL) |
157 | return NULL; | 166 | return NULL; |
158 | ea = (unsigned long)(area->addr); | 167 | ret = __ioremap_at(paligned, area->addr, size, flags); |
159 | ret = __ioremap_com(addr, pa, ea, size, flags); | ||
160 | if (!ret) | 168 | if (!ret) |
161 | im_free(area->addr); | 169 | vunmap(area->addr); |
162 | } else { | 170 | } else { |
163 | ea = ioremap_bot; | 171 | ret = __ioremap_at(paligned, (void *)ioremap_bot, size, flags); |
164 | ret = __ioremap_com(addr, pa, ea, size, flags); | ||
165 | if (ret) | 172 | if (ret) |
166 | ioremap_bot += size; | 173 | ioremap_bot += size; |
167 | } | 174 | } |
175 | |||
176 | if (ret) | ||
177 | ret += addr & ~PAGE_MASK; | ||
168 | return ret; | 178 | return ret; |
169 | } | 179 | } |
170 | 180 | ||
@@ -187,61 +197,9 @@ void __iomem * ioremap_flags(phys_addr_t addr, unsigned long size, | |||
187 | } | 197 | } |
188 | 198 | ||
189 | 199 | ||
190 | #define IS_PAGE_ALIGNED(_val) ((_val) == ((_val) & PAGE_MASK)) | ||
191 | |||
192 | int __ioremap_explicit(phys_addr_t pa, unsigned long ea, | ||
193 | unsigned long size, unsigned long flags) | ||
194 | { | ||
195 | struct vm_struct *area; | ||
196 | void __iomem *ret; | ||
197 | |||
198 | /* For now, require page-aligned values for pa, ea, and size */ | ||
199 | if (!IS_PAGE_ALIGNED(pa) || !IS_PAGE_ALIGNED(ea) || | ||
200 | !IS_PAGE_ALIGNED(size)) { | ||
201 | printk(KERN_ERR "unaligned value in %s\n", __FUNCTION__); | ||
202 | return 1; | ||
203 | } | ||
204 | |||
205 | if (!mem_init_done) { | ||
206 | /* Two things to consider in this case: | ||
207 | * 1) No records will be kept (imalloc, etc) that the region | ||
208 | * has been remapped | ||
209 | * 2) It won't be easy to iounmap() the region later (because | ||
210 | * of 1) | ||
211 | */ | ||
212 | ; | ||
213 | } else { | ||
214 | area = im_get_area(ea, size, | ||
215 | IM_REGION_UNUSED|IM_REGION_SUBSET|IM_REGION_EXISTS); | ||
216 | if (area == NULL) { | ||
217 | /* Expected when PHB-dlpar is in play */ | ||
218 | return 1; | ||
219 | } | ||
220 | if (ea != (unsigned long) area->addr) { | ||
221 | printk(KERN_ERR "unexpected addr return from " | ||
222 | "im_get_area\n"); | ||
223 | return 1; | ||
224 | } | ||
225 | } | ||
226 | |||
227 | ret = __ioremap_com(pa, pa, ea, size, flags); | ||
228 | if (ret == NULL) { | ||
229 | printk(KERN_ERR "ioremap_explicit() allocation failure !\n"); | ||
230 | return 1; | ||
231 | } | ||
232 | if (ret != (void *) ea) { | ||
233 | printk(KERN_ERR "__ioremap_com() returned unexpected addr\n"); | ||
234 | return 1; | ||
235 | } | ||
236 | |||
237 | return 0; | ||
238 | } | ||
239 | |||
240 | /* | 200 | /* |
241 | * Unmap an IO region and remove it from imalloc'd list. | 201 | * Unmap an IO region and remove it from imalloc'd list. |
242 | * Access to IO memory should be serialized by driver. | 202 | * Access to IO memory should be serialized by driver. |
243 | * | ||
244 | * XXX what about calls before mem_init_done (ie python_countermeasures()) | ||
245 | */ | 203 | */ |
246 | void __iounmap(volatile void __iomem *token) | 204 | void __iounmap(volatile void __iomem *token) |
247 | { | 205 | { |
@@ -250,9 +208,14 @@ void __iounmap(volatile void __iomem *token) | |||
250 | if (!mem_init_done) | 208 | if (!mem_init_done) |
251 | return; | 209 | return; |
252 | 210 | ||
253 | addr = (void *) ((unsigned long __force) token & PAGE_MASK); | 211 | addr = (void *) ((unsigned long __force) |
254 | 212 | PCI_FIX_ADDR(token) & PAGE_MASK); | |
255 | im_free(addr); | 213 | if ((unsigned long)addr < ioremap_bot) { |
214 | printk(KERN_WARNING "Attempt to iounmap early bolted mapping" | ||
215 | " at 0x%p\n", addr); | ||
216 | return; | ||
217 | } | ||
218 | vunmap(addr); | ||
256 | } | 219 | } |
257 | 220 | ||
258 | void iounmap(volatile void __iomem *token) | 221 | void iounmap(volatile void __iomem *token) |
@@ -263,77 +226,8 @@ void iounmap(volatile void __iomem *token) | |||
263 | __iounmap(token); | 226 | __iounmap(token); |
264 | } | 227 | } |
265 | 228 | ||
266 | static int iounmap_subset_regions(unsigned long addr, unsigned long size) | ||
267 | { | ||
268 | struct vm_struct *area; | ||
269 | |||
270 | /* Check whether subsets of this region exist */ | ||
271 | area = im_get_area(addr, size, IM_REGION_SUPERSET); | ||
272 | if (area == NULL) | ||
273 | return 1; | ||
274 | |||
275 | while (area) { | ||
276 | iounmap((void __iomem *) area->addr); | ||
277 | area = im_get_area(addr, size, | ||
278 | IM_REGION_SUPERSET); | ||
279 | } | ||
280 | |||
281 | return 0; | ||
282 | } | ||
283 | |||
284 | int __iounmap_explicit(volatile void __iomem *start, unsigned long size) | ||
285 | { | ||
286 | struct vm_struct *area; | ||
287 | unsigned long addr; | ||
288 | int rc; | ||
289 | |||
290 | addr = (unsigned long __force) start & PAGE_MASK; | ||
291 | |||
292 | /* Verify that the region either exists or is a subset of an existing | ||
293 | * region. In the latter case, split the parent region to create | ||
294 | * the exact region | ||
295 | */ | ||
296 | area = im_get_area(addr, size, | ||
297 | IM_REGION_EXISTS | IM_REGION_SUBSET); | ||
298 | if (area == NULL) { | ||
299 | /* Determine whether subset regions exist. If so, unmap */ | ||
300 | rc = iounmap_subset_regions(addr, size); | ||
301 | if (rc) { | ||
302 | printk(KERN_ERR | ||
303 | "%s() cannot unmap nonexistent range 0x%lx\n", | ||
304 | __FUNCTION__, addr); | ||
305 | return 1; | ||
306 | } | ||
307 | } else { | ||
308 | iounmap((void __iomem *) area->addr); | ||
309 | } | ||
310 | /* | ||
311 | * FIXME! This can't be right: | ||
312 | iounmap(area->addr); | ||
313 | * Maybe it should be "iounmap(area);" | ||
314 | */ | ||
315 | return 0; | ||
316 | } | ||
317 | |||
318 | EXPORT_SYMBOL(ioremap); | 229 | EXPORT_SYMBOL(ioremap); |
319 | EXPORT_SYMBOL(ioremap_flags); | 230 | EXPORT_SYMBOL(ioremap_flags); |
320 | EXPORT_SYMBOL(__ioremap); | 231 | EXPORT_SYMBOL(__ioremap); |
321 | EXPORT_SYMBOL(iounmap); | 232 | EXPORT_SYMBOL(iounmap); |
322 | EXPORT_SYMBOL(__iounmap); | 233 | EXPORT_SYMBOL(__iounmap); |
323 | |||
324 | static DEFINE_SPINLOCK(phb_io_lock); | ||
325 | |||
326 | void __iomem * reserve_phb_iospace(unsigned long size) | ||
327 | { | ||
328 | void __iomem *virt_addr; | ||
329 | |||
330 | if (phbs_io_bot >= IMALLOC_BASE) | ||
331 | panic("reserve_phb_iospace(): phb io space overflow\n"); | ||
332 | |||
333 | spin_lock(&phb_io_lock); | ||
334 | virt_addr = (void __iomem *) phbs_io_bot; | ||
335 | phbs_io_bot += size; | ||
336 | spin_unlock(&phb_io_lock); | ||
337 | |||
338 | return virt_addr; | ||
339 | } | ||