aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/kernel')
-rw-r--r--arch/powerpc/kernel/Makefile18
-rw-r--r--arch/powerpc/kernel/dma_64.c151
-rw-r--r--arch/powerpc/kernel/iomap.c146
-rw-r--r--arch/powerpc/kernel/iommu.c572
-rw-r--r--arch/powerpc/kernel/kprobes.c459
-rw-r--r--arch/powerpc/kernel/machine_kexec_64.c358
-rw-r--r--arch/powerpc/kernel/module_64.c455
-rw-r--r--arch/powerpc/kernel/pci_64.c1319
-rw-r--r--arch/powerpc/kernel/pci_direct_iommu.c94
-rw-r--r--arch/powerpc/kernel/pci_dn.c230
-rw-r--r--arch/powerpc/kernel/pci_iommu.c128
11 files changed, 3925 insertions, 5 deletions
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 046b4bf1f21e..4970e3721a84 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -49,12 +49,23 @@ extra-y += vmlinux.lds
49obj-y += process.o init_task.o time.o \ 49obj-y += process.o init_task.o time.o \
50 prom.o traps.o setup-common.o 50 prom.o traps.o setup-common.o
51obj-$(CONFIG_PPC32) += entry_32.o setup_32.o misc_32.o systbl.o 51obj-$(CONFIG_PPC32) += entry_32.o setup_32.o misc_32.o systbl.o
52obj-$(CONFIG_PPC64) += misc_64.o 52obj-$(CONFIG_PPC64) += misc_64.o dma_64.o iommu.o
53obj-$(CONFIG_PPC_OF) += prom_init.o 53obj-$(CONFIG_PPC_OF) += prom_init.o
54obj-$(CONFIG_MODULES) += ppc_ksyms.o 54obj-$(CONFIG_MODULES) += ppc_ksyms.o
55obj-$(CONFIG_BOOTX_TEXT) += btext.o 55obj-$(CONFIG_BOOTX_TEXT) += btext.o
56obj-$(CONFIG_6xx) += idle_6xx.o 56obj-$(CONFIG_6xx) += idle_6xx.o
57obj-$(CONFIG_SMP) += smp.o 57obj-$(CONFIG_SMP) += smp.o
58obj-$(CONFIG_KPROBES) += kprobes.o
59
60module-$(CONFIG_PPC64) += module_64.o
61obj-$(CONFIG_MODULES) += $(module-y)
62
63pci64-$(CONFIG_PPC64) += pci_64.o pci_dn.o pci_iommu.o \
64 pci_direct_iommu.o iomap.o
65obj-$(CONFIG_PCI) += $(pci64-y)
66
67kexec64-$(CONFIG_PPC64) += machine_kexec_64.o
68obj-$(CONFIG_KEXEC) += $(kexec64-y)
58 69
59ifeq ($(CONFIG_PPC_ISERIES),y) 70ifeq ($(CONFIG_PPC_ISERIES),y)
60$(obj)/head_64.o: $(obj)/lparmap.s 71$(obj)/head_64.o: $(obj)/lparmap.s
@@ -62,11 +73,8 @@ AFLAGS_head_64.o += -I$(obj)
62endif 73endif
63 74
64else 75else
65# stuff used from here for ARCH=ppc or ARCH=ppc64 76# stuff used from here for ARCH=ppc
66smpobj-$(CONFIG_SMP) += smp.o 77smpobj-$(CONFIG_SMP) += smp.o
67obj-$(CONFIG_PPC64) += traps.o process.o init_task.o time.o \
68 setup-common.o $(smpobj-y)
69
70 78
71endif 79endif
72 80
diff --git a/arch/powerpc/kernel/dma_64.c b/arch/powerpc/kernel/dma_64.c
new file mode 100644
index 000000000000..7c3419656ccc
--- /dev/null
+++ b/arch/powerpc/kernel/dma_64.c
@@ -0,0 +1,151 @@
1/*
2 * Copyright (C) 2004 IBM Corporation
3 *
4 * Implements the generic device dma API for ppc64. Handles
5 * the pci and vio busses
6 */
7
8#include <linux/device.h>
9#include <linux/dma-mapping.h>
10/* Include the busses we support */
11#include <linux/pci.h>
12#include <asm/vio.h>
13#include <asm/scatterlist.h>
14#include <asm/bug.h>
15
16static struct dma_mapping_ops *get_dma_ops(struct device *dev)
17{
18#ifdef CONFIG_PCI
19 if (dev->bus == &pci_bus_type)
20 return &pci_dma_ops;
21#endif
22#ifdef CONFIG_IBMVIO
23 if (dev->bus == &vio_bus_type)
24 return &vio_dma_ops;
25#endif
26 return NULL;
27}
28
29int dma_supported(struct device *dev, u64 mask)
30{
31 struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
32
33 if (dma_ops)
34 return dma_ops->dma_supported(dev, mask);
35 BUG();
36 return 0;
37}
38EXPORT_SYMBOL(dma_supported);
39
40int dma_set_mask(struct device *dev, u64 dma_mask)
41{
42#ifdef CONFIG_PCI
43 if (dev->bus == &pci_bus_type)
44 return pci_set_dma_mask(to_pci_dev(dev), dma_mask);
45#endif
46#ifdef CONFIG_IBMVIO
47 if (dev->bus == &vio_bus_type)
48 return -EIO;
49#endif /* CONFIG_IBMVIO */
50 BUG();
51 return 0;
52}
53EXPORT_SYMBOL(dma_set_mask);
54
55void *dma_alloc_coherent(struct device *dev, size_t size,
56 dma_addr_t *dma_handle, gfp_t flag)
57{
58 struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
59
60 if (dma_ops)
61 return dma_ops->alloc_coherent(dev, size, dma_handle, flag);
62 BUG();
63 return NULL;
64}
65EXPORT_SYMBOL(dma_alloc_coherent);
66
67void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr,
68 dma_addr_t dma_handle)
69{
70 struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
71
72 if (dma_ops)
73 dma_ops->free_coherent(dev, size, cpu_addr, dma_handle);
74 else
75 BUG();
76}
77EXPORT_SYMBOL(dma_free_coherent);
78
79dma_addr_t dma_map_single(struct device *dev, void *cpu_addr, size_t size,
80 enum dma_data_direction direction)
81{
82 struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
83
84 if (dma_ops)
85 return dma_ops->map_single(dev, cpu_addr, size, direction);
86 BUG();
87 return (dma_addr_t)0;
88}
89EXPORT_SYMBOL(dma_map_single);
90
91void dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
92 enum dma_data_direction direction)
93{
94 struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
95
96 if (dma_ops)
97 dma_ops->unmap_single(dev, dma_addr, size, direction);
98 else
99 BUG();
100}
101EXPORT_SYMBOL(dma_unmap_single);
102
103dma_addr_t dma_map_page(struct device *dev, struct page *page,
104 unsigned long offset, size_t size,
105 enum dma_data_direction direction)
106{
107 struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
108
109 if (dma_ops)
110 return dma_ops->map_single(dev,
111 (page_address(page) + offset), size, direction);
112 BUG();
113 return (dma_addr_t)0;
114}
115EXPORT_SYMBOL(dma_map_page);
116
117void dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
118 enum dma_data_direction direction)
119{
120 struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
121
122 if (dma_ops)
123 dma_ops->unmap_single(dev, dma_address, size, direction);
124 else
125 BUG();
126}
127EXPORT_SYMBOL(dma_unmap_page);
128
129int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
130 enum dma_data_direction direction)
131{
132 struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
133
134 if (dma_ops)
135 return dma_ops->map_sg(dev, sg, nents, direction);
136 BUG();
137 return 0;
138}
139EXPORT_SYMBOL(dma_map_sg);
140
141void dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nhwentries,
142 enum dma_data_direction direction)
143{
144 struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
145
146 if (dma_ops)
147 dma_ops->unmap_sg(dev, sg, nhwentries, direction);
148 else
149 BUG();
150}
151EXPORT_SYMBOL(dma_unmap_sg);
diff --git a/arch/powerpc/kernel/iomap.c b/arch/powerpc/kernel/iomap.c
new file mode 100644
index 000000000000..6160c8dbb7c5
--- /dev/null
+++ b/arch/powerpc/kernel/iomap.c
@@ -0,0 +1,146 @@
1/*
2 * arch/ppc64/kernel/iomap.c
3 *
4 * ppc64 "iomap" interface implementation.
5 *
6 * (C) Copyright 2004 Linus Torvalds
7 */
8#include <linux/init.h>
9#include <linux/pci.h>
10#include <linux/mm.h>
11#include <asm/io.h>
12
13/*
14 * Here comes the ppc64 implementation of the IOMAP
15 * interfaces.
16 */
17unsigned int fastcall ioread8(void __iomem *addr)
18{
19 return readb(addr);
20}
21unsigned int fastcall ioread16(void __iomem *addr)
22{
23 return readw(addr);
24}
25unsigned int fastcall ioread16be(void __iomem *addr)
26{
27 return in_be16(addr);
28}
29unsigned int fastcall ioread32(void __iomem *addr)
30{
31 return readl(addr);
32}
33unsigned int fastcall ioread32be(void __iomem *addr)
34{
35 return in_be32(addr);
36}
37EXPORT_SYMBOL(ioread8);
38EXPORT_SYMBOL(ioread16);
39EXPORT_SYMBOL(ioread16be);
40EXPORT_SYMBOL(ioread32);
41EXPORT_SYMBOL(ioread32be);
42
43void fastcall iowrite8(u8 val, void __iomem *addr)
44{
45 writeb(val, addr);
46}
47void fastcall iowrite16(u16 val, void __iomem *addr)
48{
49 writew(val, addr);
50}
51void fastcall iowrite16be(u16 val, void __iomem *addr)
52{
53 out_be16(addr, val);
54}
55void fastcall iowrite32(u32 val, void __iomem *addr)
56{
57 writel(val, addr);
58}
59void fastcall iowrite32be(u32 val, void __iomem *addr)
60{
61 out_be32(addr, val);
62}
63EXPORT_SYMBOL(iowrite8);
64EXPORT_SYMBOL(iowrite16);
65EXPORT_SYMBOL(iowrite16be);
66EXPORT_SYMBOL(iowrite32);
67EXPORT_SYMBOL(iowrite32be);
68
69/*
70 * These are the "repeat read/write" functions. Note the
71 * non-CPU byte order. We do things in "IO byteorder"
72 * here.
73 *
74 * FIXME! We could make these do EEH handling if we really
75 * wanted. Not clear if we do.
76 */
77void ioread8_rep(void __iomem *addr, void *dst, unsigned long count)
78{
79 _insb((u8 __iomem *) addr, dst, count);
80}
81void ioread16_rep(void __iomem *addr, void *dst, unsigned long count)
82{
83 _insw_ns((u16 __iomem *) addr, dst, count);
84}
85void ioread32_rep(void __iomem *addr, void *dst, unsigned long count)
86{
87 _insl_ns((u32 __iomem *) addr, dst, count);
88}
89EXPORT_SYMBOL(ioread8_rep);
90EXPORT_SYMBOL(ioread16_rep);
91EXPORT_SYMBOL(ioread32_rep);
92
93void iowrite8_rep(void __iomem *addr, const void *src, unsigned long count)
94{
95 _outsb((u8 __iomem *) addr, src, count);
96}
97void iowrite16_rep(void __iomem *addr, const void *src, unsigned long count)
98{
99 _outsw_ns((u16 __iomem *) addr, src, count);
100}
101void iowrite32_rep(void __iomem *addr, const void *src, unsigned long count)
102{
103 _outsl_ns((u32 __iomem *) addr, src, count);
104}
105EXPORT_SYMBOL(iowrite8_rep);
106EXPORT_SYMBOL(iowrite16_rep);
107EXPORT_SYMBOL(iowrite32_rep);
108
109void __iomem *ioport_map(unsigned long port, unsigned int len)
110{
111 if (!_IO_IS_VALID(port))
112 return NULL;
113 return (void __iomem *) (port+pci_io_base);
114}
115
116void ioport_unmap(void __iomem *addr)
117{
118 /* Nothing to do */
119}
120EXPORT_SYMBOL(ioport_map);
121EXPORT_SYMBOL(ioport_unmap);
122
123void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max)
124{
125 unsigned long start = pci_resource_start(dev, bar);
126 unsigned long len = pci_resource_len(dev, bar);
127 unsigned long flags = pci_resource_flags(dev, bar);
128
129 if (!len)
130 return NULL;
131 if (max && len > max)
132 len = max;
133 if (flags & IORESOURCE_IO)
134 return ioport_map(start, len);
135 if (flags & IORESOURCE_MEM)
136 return ioremap(start, len);
137 /* What? */
138 return NULL;
139}
140
141void pci_iounmap(struct pci_dev *dev, void __iomem *addr)
142{
143 /* Nothing to do */
144}
145EXPORT_SYMBOL(pci_iomap);
146EXPORT_SYMBOL(pci_iounmap);
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
new file mode 100644
index 000000000000..4d9b4388918b
--- /dev/null
+++ b/arch/powerpc/kernel/iommu.c
@@ -0,0 +1,572 @@
1/*
2 * arch/ppc64/kernel/iommu.c
3 * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation
4 *
5 * Rewrite, cleanup, new allocation schemes, virtual merging:
6 * Copyright (C) 2004 Olof Johansson, IBM Corporation
7 * and Ben. Herrenschmidt, IBM Corporation
8 *
9 * Dynamic DMA mapping support, bus-independent parts.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 */
25
26
27#include <linux/config.h>
28#include <linux/init.h>
29#include <linux/types.h>
30#include <linux/slab.h>
31#include <linux/mm.h>
32#include <linux/spinlock.h>
33#include <linux/string.h>
34#include <linux/dma-mapping.h>
35#include <linux/init.h>
36#include <linux/bitops.h>
37#include <asm/io.h>
38#include <asm/prom.h>
39#include <asm/iommu.h>
40#include <asm/pci-bridge.h>
41#include <asm/machdep.h>
42
43#define DBG(...)
44
45#ifdef CONFIG_IOMMU_VMERGE
46static int novmerge = 0;
47#else
48static int novmerge = 1;
49#endif
50
51static int __init setup_iommu(char *str)
52{
53 if (!strcmp(str, "novmerge"))
54 novmerge = 1;
55 else if (!strcmp(str, "vmerge"))
56 novmerge = 0;
57 return 1;
58}
59
60__setup("iommu=", setup_iommu);
61
62static unsigned long iommu_range_alloc(struct iommu_table *tbl,
63 unsigned long npages,
64 unsigned long *handle,
65 unsigned int align_order)
66{
67 unsigned long n, end, i, start;
68 unsigned long limit;
69 int largealloc = npages > 15;
70 int pass = 0;
71 unsigned long align_mask;
72
73 align_mask = 0xffffffffffffffffl >> (64 - align_order);
74
75 /* This allocator was derived from x86_64's bit string search */
76
77 /* Sanity check */
78 if (unlikely(npages) == 0) {
79 if (printk_ratelimit())
80 WARN_ON(1);
81 return DMA_ERROR_CODE;
82 }
83
84 if (handle && *handle)
85 start = *handle;
86 else
87 start = largealloc ? tbl->it_largehint : tbl->it_hint;
88
89 /* Use only half of the table for small allocs (15 pages or less) */
90 limit = largealloc ? tbl->it_size : tbl->it_halfpoint;
91
92 if (largealloc && start < tbl->it_halfpoint)
93 start = tbl->it_halfpoint;
94
95 /* The case below can happen if we have a small segment appended
96 * to a large, or when the previous alloc was at the very end of
97 * the available space. If so, go back to the initial start.
98 */
99 if (start >= limit)
100 start = largealloc ? tbl->it_largehint : tbl->it_hint;
101
102 again:
103
104 n = find_next_zero_bit(tbl->it_map, limit, start);
105
106 /* Align allocation */
107 n = (n + align_mask) & ~align_mask;
108
109 end = n + npages;
110
111 if (unlikely(end >= limit)) {
112 if (likely(pass < 2)) {
113 /* First failure, just rescan the half of the table.
114 * Second failure, rescan the other half of the table.
115 */
116 start = (largealloc ^ pass) ? tbl->it_halfpoint : 0;
117 limit = pass ? tbl->it_size : limit;
118 pass++;
119 goto again;
120 } else {
121 /* Third failure, give up */
122 return DMA_ERROR_CODE;
123 }
124 }
125
126 for (i = n; i < end; i++)
127 if (test_bit(i, tbl->it_map)) {
128 start = i+1;
129 goto again;
130 }
131
132 for (i = n; i < end; i++)
133 __set_bit(i, tbl->it_map);
134
135 /* Bump the hint to a new block for small allocs. */
136 if (largealloc) {
137 /* Don't bump to new block to avoid fragmentation */
138 tbl->it_largehint = end;
139 } else {
140 /* Overflow will be taken care of at the next allocation */
141 tbl->it_hint = (end + tbl->it_blocksize - 1) &
142 ~(tbl->it_blocksize - 1);
143 }
144
145 /* Update handle for SG allocations */
146 if (handle)
147 *handle = end;
148
149 return n;
150}
151
152static dma_addr_t iommu_alloc(struct iommu_table *tbl, void *page,
153 unsigned int npages, enum dma_data_direction direction,
154 unsigned int align_order)
155{
156 unsigned long entry, flags;
157 dma_addr_t ret = DMA_ERROR_CODE;
158
159 spin_lock_irqsave(&(tbl->it_lock), flags);
160
161 entry = iommu_range_alloc(tbl, npages, NULL, align_order);
162
163 if (unlikely(entry == DMA_ERROR_CODE)) {
164 spin_unlock_irqrestore(&(tbl->it_lock), flags);
165 return DMA_ERROR_CODE;
166 }
167
168 entry += tbl->it_offset; /* Offset into real TCE table */
169 ret = entry << PAGE_SHIFT; /* Set the return dma address */
170
171 /* Put the TCEs in the HW table */
172 ppc_md.tce_build(tbl, entry, npages, (unsigned long)page & PAGE_MASK,
173 direction);
174
175
176 /* Flush/invalidate TLB caches if necessary */
177 if (ppc_md.tce_flush)
178 ppc_md.tce_flush(tbl);
179
180 spin_unlock_irqrestore(&(tbl->it_lock), flags);
181
182 /* Make sure updates are seen by hardware */
183 mb();
184
185 return ret;
186}
187
188static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
189 unsigned int npages)
190{
191 unsigned long entry, free_entry;
192 unsigned long i;
193
194 entry = dma_addr >> PAGE_SHIFT;
195 free_entry = entry - tbl->it_offset;
196
197 if (((free_entry + npages) > tbl->it_size) ||
198 (entry < tbl->it_offset)) {
199 if (printk_ratelimit()) {
200 printk(KERN_INFO "iommu_free: invalid entry\n");
201 printk(KERN_INFO "\tentry = 0x%lx\n", entry);
202 printk(KERN_INFO "\tdma_addr = 0x%lx\n", (u64)dma_addr);
203 printk(KERN_INFO "\tTable = 0x%lx\n", (u64)tbl);
204 printk(KERN_INFO "\tbus# = 0x%lx\n", (u64)tbl->it_busno);
205 printk(KERN_INFO "\tsize = 0x%lx\n", (u64)tbl->it_size);
206 printk(KERN_INFO "\tstartOff = 0x%lx\n", (u64)tbl->it_offset);
207 printk(KERN_INFO "\tindex = 0x%lx\n", (u64)tbl->it_index);
208 WARN_ON(1);
209 }
210 return;
211 }
212
213 ppc_md.tce_free(tbl, entry, npages);
214
215 for (i = 0; i < npages; i++)
216 __clear_bit(free_entry+i, tbl->it_map);
217}
218
219static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
220 unsigned int npages)
221{
222 unsigned long flags;
223
224 spin_lock_irqsave(&(tbl->it_lock), flags);
225
226 __iommu_free(tbl, dma_addr, npages);
227
228 /* Make sure TLB cache is flushed if the HW needs it. We do
229 * not do an mb() here on purpose, it is not needed on any of
230 * the current platforms.
231 */
232 if (ppc_md.tce_flush)
233 ppc_md.tce_flush(tbl);
234
235 spin_unlock_irqrestore(&(tbl->it_lock), flags);
236}
237
238int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
239 struct scatterlist *sglist, int nelems,
240 enum dma_data_direction direction)
241{
242 dma_addr_t dma_next = 0, dma_addr;
243 unsigned long flags;
244 struct scatterlist *s, *outs, *segstart;
245 int outcount, incount;
246 unsigned long handle;
247
248 BUG_ON(direction == DMA_NONE);
249
250 if ((nelems == 0) || !tbl)
251 return 0;
252
253 outs = s = segstart = &sglist[0];
254 outcount = 1;
255 incount = nelems;
256 handle = 0;
257
258 /* Init first segment length for backout at failure */
259 outs->dma_length = 0;
260
261 DBG("mapping %d elements:\n", nelems);
262
263 spin_lock_irqsave(&(tbl->it_lock), flags);
264
265 for (s = outs; nelems; nelems--, s++) {
266 unsigned long vaddr, npages, entry, slen;
267
268 slen = s->length;
269 /* Sanity check */
270 if (slen == 0) {
271 dma_next = 0;
272 continue;
273 }
274 /* Allocate iommu entries for that segment */
275 vaddr = (unsigned long)page_address(s->page) + s->offset;
276 npages = PAGE_ALIGN(vaddr + slen) - (vaddr & PAGE_MASK);
277 npages >>= PAGE_SHIFT;
278 entry = iommu_range_alloc(tbl, npages, &handle, 0);
279
280 DBG(" - vaddr: %lx, size: %lx\n", vaddr, slen);
281
282 /* Handle failure */
283 if (unlikely(entry == DMA_ERROR_CODE)) {
284 if (printk_ratelimit())
285 printk(KERN_INFO "iommu_alloc failed, tbl %p vaddr %lx"
286 " npages %lx\n", tbl, vaddr, npages);
287 goto failure;
288 }
289
290 /* Convert entry to a dma_addr_t */
291 entry += tbl->it_offset;
292 dma_addr = entry << PAGE_SHIFT;
293 dma_addr |= s->offset;
294
295 DBG(" - %lx pages, entry: %lx, dma_addr: %lx\n",
296 npages, entry, dma_addr);
297
298 /* Insert into HW table */
299 ppc_md.tce_build(tbl, entry, npages, vaddr & PAGE_MASK, direction);
300
301 /* If we are in an open segment, try merging */
302 if (segstart != s) {
303 DBG(" - trying merge...\n");
304 /* We cannot merge if:
305 * - allocated dma_addr isn't contiguous to previous allocation
306 */
307 if (novmerge || (dma_addr != dma_next)) {
308 /* Can't merge: create a new segment */
309 segstart = s;
310 outcount++; outs++;
311 DBG(" can't merge, new segment.\n");
312 } else {
313 outs->dma_length += s->length;
314 DBG(" merged, new len: %lx\n", outs->dma_length);
315 }
316 }
317
318 if (segstart == s) {
319 /* This is a new segment, fill entries */
320 DBG(" - filling new segment.\n");
321 outs->dma_address = dma_addr;
322 outs->dma_length = slen;
323 }
324
325 /* Calculate next page pointer for contiguous check */
326 dma_next = dma_addr + slen;
327
328 DBG(" - dma next is: %lx\n", dma_next);
329 }
330
331 /* Flush/invalidate TLB caches if necessary */
332 if (ppc_md.tce_flush)
333 ppc_md.tce_flush(tbl);
334
335 spin_unlock_irqrestore(&(tbl->it_lock), flags);
336
337 /* Make sure updates are seen by hardware */
338 mb();
339
340 DBG("mapped %d elements:\n", outcount);
341
342 /* For the sake of iommu_unmap_sg, we clear out the length in the
343 * next entry of the sglist if we didn't fill the list completely
344 */
345 if (outcount < incount) {
346 outs++;
347 outs->dma_address = DMA_ERROR_CODE;
348 outs->dma_length = 0;
349 }
350 return outcount;
351
352 failure:
353 for (s = &sglist[0]; s <= outs; s++) {
354 if (s->dma_length != 0) {
355 unsigned long vaddr, npages;
356
357 vaddr = s->dma_address & PAGE_MASK;
358 npages = (PAGE_ALIGN(s->dma_address + s->dma_length) - vaddr)
359 >> PAGE_SHIFT;
360 __iommu_free(tbl, vaddr, npages);
361 }
362 }
363 spin_unlock_irqrestore(&(tbl->it_lock), flags);
364 return 0;
365}
366
367
368void iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist,
369 int nelems, enum dma_data_direction direction)
370{
371 unsigned long flags;
372
373 BUG_ON(direction == DMA_NONE);
374
375 if (!tbl)
376 return;
377
378 spin_lock_irqsave(&(tbl->it_lock), flags);
379
380 while (nelems--) {
381 unsigned int npages;
382 dma_addr_t dma_handle = sglist->dma_address;
383
384 if (sglist->dma_length == 0)
385 break;
386 npages = (PAGE_ALIGN(dma_handle + sglist->dma_length)
387 - (dma_handle & PAGE_MASK)) >> PAGE_SHIFT;
388 __iommu_free(tbl, dma_handle, npages);
389 sglist++;
390 }
391
392 /* Flush/invalidate TLBs if necessary. As for iommu_free(), we
393 * do not do an mb() here, the affected platforms do not need it
394 * when freeing.
395 */
396 if (ppc_md.tce_flush)
397 ppc_md.tce_flush(tbl);
398
399 spin_unlock_irqrestore(&(tbl->it_lock), flags);
400}
401
402/*
403 * Build a iommu_table structure. This contains a bit map which
404 * is used to manage allocation of the tce space.
405 */
406struct iommu_table *iommu_init_table(struct iommu_table *tbl)
407{
408 unsigned long sz;
409 static int welcomed = 0;
410
411 /* Set aside 1/4 of the table for large allocations. */
412 tbl->it_halfpoint = tbl->it_size * 3 / 4;
413
414 /* number of bytes needed for the bitmap */
415 sz = (tbl->it_size + 7) >> 3;
416
417 tbl->it_map = (unsigned long *)__get_free_pages(GFP_ATOMIC, get_order(sz));
418 if (!tbl->it_map)
419 panic("iommu_init_table: Can't allocate %ld bytes\n", sz);
420
421 memset(tbl->it_map, 0, sz);
422
423 tbl->it_hint = 0;
424 tbl->it_largehint = tbl->it_halfpoint;
425 spin_lock_init(&tbl->it_lock);
426
427 /* Clear the hardware table in case firmware left allocations in it */
428 ppc_md.tce_free(tbl, tbl->it_offset, tbl->it_size);
429
430 if (!welcomed) {
431 printk(KERN_INFO "IOMMU table initialized, virtual merging %s\n",
432 novmerge ? "disabled" : "enabled");
433 welcomed = 1;
434 }
435
436 return tbl;
437}
438
439void iommu_free_table(struct device_node *dn)
440{
441 struct pci_dn *pdn = dn->data;
442 struct iommu_table *tbl = pdn->iommu_table;
443 unsigned long bitmap_sz, i;
444 unsigned int order;
445
446 if (!tbl || !tbl->it_map) {
447 printk(KERN_ERR "%s: expected TCE map for %s\n", __FUNCTION__,
448 dn->full_name);
449 return;
450 }
451
452 /* verify that table contains no entries */
453 /* it_size is in entries, and we're examining 64 at a time */
454 for (i = 0; i < (tbl->it_size/64); i++) {
455 if (tbl->it_map[i] != 0) {
456 printk(KERN_WARNING "%s: Unexpected TCEs for %s\n",
457 __FUNCTION__, dn->full_name);
458 break;
459 }
460 }
461
462 /* calculate bitmap size in bytes */
463 bitmap_sz = (tbl->it_size + 7) / 8;
464
465 /* free bitmap */
466 order = get_order(bitmap_sz);
467 free_pages((unsigned long) tbl->it_map, order);
468
469 /* free table */
470 kfree(tbl);
471}
472
473/* Creates TCEs for a user provided buffer. The user buffer must be
474 * contiguous real kernel storage (not vmalloc). The address of the buffer
475 * passed here is the kernel (virtual) address of the buffer. The buffer
476 * need not be page aligned, the dma_addr_t returned will point to the same
477 * byte within the page as vaddr.
478 */
479dma_addr_t iommu_map_single(struct iommu_table *tbl, void *vaddr,
480 size_t size, enum dma_data_direction direction)
481{
482 dma_addr_t dma_handle = DMA_ERROR_CODE;
483 unsigned long uaddr;
484 unsigned int npages;
485
486 BUG_ON(direction == DMA_NONE);
487
488 uaddr = (unsigned long)vaddr;
489 npages = PAGE_ALIGN(uaddr + size) - (uaddr & PAGE_MASK);
490 npages >>= PAGE_SHIFT;
491
492 if (tbl) {
493 dma_handle = iommu_alloc(tbl, vaddr, npages, direction, 0);
494 if (dma_handle == DMA_ERROR_CODE) {
495 if (printk_ratelimit()) {
496 printk(KERN_INFO "iommu_alloc failed, "
497 "tbl %p vaddr %p npages %d\n",
498 tbl, vaddr, npages);
499 }
500 } else
501 dma_handle |= (uaddr & ~PAGE_MASK);
502 }
503
504 return dma_handle;
505}
506
507void iommu_unmap_single(struct iommu_table *tbl, dma_addr_t dma_handle,
508 size_t size, enum dma_data_direction direction)
509{
510 BUG_ON(direction == DMA_NONE);
511
512 if (tbl)
513 iommu_free(tbl, dma_handle, (PAGE_ALIGN(dma_handle + size) -
514 (dma_handle & PAGE_MASK)) >> PAGE_SHIFT);
515}
516
517/* Allocates a contiguous real buffer and creates mappings over it.
518 * Returns the virtual address of the buffer and sets dma_handle
519 * to the dma address (mapping) of the first page.
520 */
521void *iommu_alloc_coherent(struct iommu_table *tbl, size_t size,
522 dma_addr_t *dma_handle, gfp_t flag)
523{
524 void *ret = NULL;
525 dma_addr_t mapping;
526 unsigned int npages, order;
527
528 size = PAGE_ALIGN(size);
529 npages = size >> PAGE_SHIFT;
530 order = get_order(size);
531
532 /*
533 * Client asked for way too much space. This is checked later
534 * anyway. It is easier to debug here for the drivers than in
535 * the tce tables.
536 */
537 if (order >= IOMAP_MAX_ORDER) {
538 printk("iommu_alloc_consistent size too large: 0x%lx\n", size);
539 return NULL;
540 }
541
542 if (!tbl)
543 return NULL;
544
545 /* Alloc enough pages (and possibly more) */
546 ret = (void *)__get_free_pages(flag, order);
547 if (!ret)
548 return NULL;
549 memset(ret, 0, size);
550
551 /* Set up tces to cover the allocated range */
552 mapping = iommu_alloc(tbl, ret, npages, DMA_BIDIRECTIONAL, order);
553 if (mapping == DMA_ERROR_CODE) {
554 free_pages((unsigned long)ret, order);
555 ret = NULL;
556 } else
557 *dma_handle = mapping;
558 return ret;
559}
560
561void iommu_free_coherent(struct iommu_table *tbl, size_t size,
562 void *vaddr, dma_addr_t dma_handle)
563{
564 unsigned int npages;
565
566 if (tbl) {
567 size = PAGE_ALIGN(size);
568 npages = size >> PAGE_SHIFT;
569 iommu_free(tbl, dma_handle, npages);
570 free_pages((unsigned long)vaddr, get_order(size));
571 }
572}
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
new file mode 100644
index 000000000000..511af54e6230
--- /dev/null
+++ b/arch/powerpc/kernel/kprobes.c
@@ -0,0 +1,459 @@
1/*
2 * Kernel Probes (KProbes)
3 * arch/ppc64/kernel/kprobes.c
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 *
19 * Copyright (C) IBM Corporation, 2002, 2004
20 *
21 * 2002-Oct Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel
22 * Probes initial implementation ( includes contributions from
23 * Rusty Russell).
24 * 2004-July Suparna Bhattacharya <suparna@in.ibm.com> added jumper probes
25 * interface to access function arguments.
26 * 2004-Nov Ananth N Mavinakayanahalli <ananth@in.ibm.com> kprobes port
27 * for PPC64
28 */
29
30#include <linux/config.h>
31#include <linux/kprobes.h>
32#include <linux/ptrace.h>
33#include <linux/preempt.h>
34#include <asm/cacheflush.h>
35#include <asm/kdebug.h>
36#include <asm/sstep.h>
37
38static DECLARE_MUTEX(kprobe_mutex);
39DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
40DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
41
42int __kprobes arch_prepare_kprobe(struct kprobe *p)
43{
44 int ret = 0;
45 kprobe_opcode_t insn = *p->addr;
46
47 if ((unsigned long)p->addr & 0x03) {
48 printk("Attempt to register kprobe at an unaligned address\n");
49 ret = -EINVAL;
50 } else if (IS_MTMSRD(insn) || IS_RFID(insn)) {
51 printk("Cannot register a kprobe on rfid or mtmsrd\n");
52 ret = -EINVAL;
53 }
54
55 /* insn must be on a special executable page on ppc64 */
56 if (!ret) {
57 down(&kprobe_mutex);
58 p->ainsn.insn = get_insn_slot();
59 up(&kprobe_mutex);
60 if (!p->ainsn.insn)
61 ret = -ENOMEM;
62 }
63 return ret;
64}
65
66void __kprobes arch_copy_kprobe(struct kprobe *p)
67{
68 memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
69 p->opcode = *p->addr;
70}
71
72void __kprobes arch_arm_kprobe(struct kprobe *p)
73{
74 *p->addr = BREAKPOINT_INSTRUCTION;
75 flush_icache_range((unsigned long) p->addr,
76 (unsigned long) p->addr + sizeof(kprobe_opcode_t));
77}
78
79void __kprobes arch_disarm_kprobe(struct kprobe *p)
80{
81 *p->addr = p->opcode;
82 flush_icache_range((unsigned long) p->addr,
83 (unsigned long) p->addr + sizeof(kprobe_opcode_t));
84}
85
86void __kprobes arch_remove_kprobe(struct kprobe *p)
87{
88 down(&kprobe_mutex);
89 free_insn_slot(p->ainsn.insn);
90 up(&kprobe_mutex);
91}
92
93static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
94{
95 kprobe_opcode_t insn = *p->ainsn.insn;
96
97 regs->msr |= MSR_SE;
98
99 /* single step inline if it is a trap variant */
100 if (is_trap(insn))
101 regs->nip = (unsigned long)p->addr;
102 else
103 regs->nip = (unsigned long)p->ainsn.insn;
104}
105
106static inline void save_previous_kprobe(struct kprobe_ctlblk *kcb)
107{
108 kcb->prev_kprobe.kp = kprobe_running();
109 kcb->prev_kprobe.status = kcb->kprobe_status;
110 kcb->prev_kprobe.saved_msr = kcb->kprobe_saved_msr;
111}
112
113static inline void restore_previous_kprobe(struct kprobe_ctlblk *kcb)
114{
115 __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp;
116 kcb->kprobe_status = kcb->prev_kprobe.status;
117 kcb->kprobe_saved_msr = kcb->prev_kprobe.saved_msr;
118}
119
120static inline void set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
121 struct kprobe_ctlblk *kcb)
122{
123 __get_cpu_var(current_kprobe) = p;
124 kcb->kprobe_saved_msr = regs->msr;
125}
126
127/* Called with kretprobe_lock held */
128void __kprobes arch_prepare_kretprobe(struct kretprobe *rp,
129 struct pt_regs *regs)
130{
131 struct kretprobe_instance *ri;
132
133 if ((ri = get_free_rp_inst(rp)) != NULL) {
134 ri->rp = rp;
135 ri->task = current;
136 ri->ret_addr = (kprobe_opcode_t *)regs->link;
137
138 /* Replace the return addr with trampoline addr */
139 regs->link = (unsigned long)kretprobe_trampoline;
140 add_rp_inst(ri);
141 } else {
142 rp->nmissed++;
143 }
144}
145
146static inline int kprobe_handler(struct pt_regs *regs)
147{
148 struct kprobe *p;
149 int ret = 0;
150 unsigned int *addr = (unsigned int *)regs->nip;
151 struct kprobe_ctlblk *kcb;
152
153 /*
154 * We don't want to be preempted for the entire
155 * duration of kprobe processing
156 */
157 preempt_disable();
158 kcb = get_kprobe_ctlblk();
159
160 /* Check we're not actually recursing */
161 if (kprobe_running()) {
162 p = get_kprobe(addr);
163 if (p) {
164 kprobe_opcode_t insn = *p->ainsn.insn;
165 if (kcb->kprobe_status == KPROBE_HIT_SS &&
166 is_trap(insn)) {
167 regs->msr &= ~MSR_SE;
168 regs->msr |= kcb->kprobe_saved_msr;
169 goto no_kprobe;
170 }
171 /* We have reentered the kprobe_handler(), since
172 * another probe was hit while within the handler.
173 * We here save the original kprobes variables and
174 * just single step on the instruction of the new probe
175 * without calling any user handlers.
176 */
177 save_previous_kprobe(kcb);
178 set_current_kprobe(p, regs, kcb);
179 kcb->kprobe_saved_msr = regs->msr;
180 p->nmissed++;
181 prepare_singlestep(p, regs);
182 kcb->kprobe_status = KPROBE_REENTER;
183 return 1;
184 } else {
185 p = __get_cpu_var(current_kprobe);
186 if (p->break_handler && p->break_handler(p, regs)) {
187 goto ss_probe;
188 }
189 }
190 goto no_kprobe;
191 }
192
193 p = get_kprobe(addr);
194 if (!p) {
195 if (*addr != BREAKPOINT_INSTRUCTION) {
196 /*
197 * PowerPC has multiple variants of the "trap"
198 * instruction. If the current instruction is a
199 * trap variant, it could belong to someone else
200 */
201 kprobe_opcode_t cur_insn = *addr;
202 if (is_trap(cur_insn))
203 goto no_kprobe;
204 /*
205 * The breakpoint instruction was removed right
206 * after we hit it. Another cpu has removed
207 * either a probepoint or a debugger breakpoint
208 * at this address. In either case, no further
209 * handling of this interrupt is appropriate.
210 */
211 ret = 1;
212 }
213 /* Not one of ours: let kernel handle it */
214 goto no_kprobe;
215 }
216
217 kcb->kprobe_status = KPROBE_HIT_ACTIVE;
218 set_current_kprobe(p, regs, kcb);
219 if (p->pre_handler && p->pre_handler(p, regs))
220 /* handler has already set things up, so skip ss setup */
221 return 1;
222
223ss_probe:
224 prepare_singlestep(p, regs);
225 kcb->kprobe_status = KPROBE_HIT_SS;
226 return 1;
227
228no_kprobe:
229 preempt_enable_no_resched();
230 return ret;
231}
232
233/*
234 * Function return probe trampoline:
235 * - init_kprobes() establishes a probepoint here
236 * - When the probed function returns, this probe
237 * causes the handlers to fire
238 */
239void kretprobe_trampoline_holder(void)
240{
241 asm volatile(".global kretprobe_trampoline\n"
242 "kretprobe_trampoline:\n"
243 "nop\n");
244}
245
246/*
247 * Called when the probe at kretprobe trampoline is hit
248 */
249int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
250{
251 struct kretprobe_instance *ri = NULL;
252 struct hlist_head *head;
253 struct hlist_node *node, *tmp;
254 unsigned long flags, orig_ret_address = 0;
255 unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline;
256
257 spin_lock_irqsave(&kretprobe_lock, flags);
258 head = kretprobe_inst_table_head(current);
259
260 /*
261 * It is possible to have multiple instances associated with a given
262 * task either because an multiple functions in the call path
263 * have a return probe installed on them, and/or more then one return
264 * return probe was registered for a target function.
265 *
266 * We can handle this because:
267 * - instances are always inserted at the head of the list
268 * - when multiple return probes are registered for the same
269 * function, the first instance's ret_addr will point to the
270 * real return address, and all the rest will point to
271 * kretprobe_trampoline
272 */
273 hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
274 if (ri->task != current)
275 /* another task is sharing our hash bucket */
276 continue;
277
278 if (ri->rp && ri->rp->handler)
279 ri->rp->handler(ri, regs);
280
281 orig_ret_address = (unsigned long)ri->ret_addr;
282 recycle_rp_inst(ri);
283
284 if (orig_ret_address != trampoline_address)
285 /*
286 * This is the real return address. Any other
287 * instances associated with this task are for
288 * other calls deeper on the call stack
289 */
290 break;
291 }
292
293 BUG_ON(!orig_ret_address || (orig_ret_address == trampoline_address));
294 regs->nip = orig_ret_address;
295
296 reset_current_kprobe();
297 spin_unlock_irqrestore(&kretprobe_lock, flags);
298 preempt_enable_no_resched();
299
300 /*
301 * By returning a non-zero value, we are telling
302 * kprobe_handler() that we don't want the post_handler
303 * to run (and have re-enabled preemption)
304 */
305 return 1;
306}
307
308/*
309 * Called after single-stepping. p->addr is the address of the
310 * instruction whose first byte has been replaced by the "breakpoint"
311 * instruction. To avoid the SMP problems that can occur when we
312 * temporarily put back the original opcode to single-step, we
313 * single-stepped a copy of the instruction. The address of this
314 * copy is p->ainsn.insn.
315 */
316static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs)
317{
318 int ret;
319 unsigned int insn = *p->ainsn.insn;
320
321 regs->nip = (unsigned long)p->addr;
322 ret = emulate_step(regs, insn);
323 if (ret == 0)
324 regs->nip = (unsigned long)p->addr + 4;
325}
326
327static inline int post_kprobe_handler(struct pt_regs *regs)
328{
329 struct kprobe *cur = kprobe_running();
330 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
331
332 if (!cur)
333 return 0;
334
335 if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) {
336 kcb->kprobe_status = KPROBE_HIT_SSDONE;
337 cur->post_handler(cur, regs, 0);
338 }
339
340 resume_execution(cur, regs);
341 regs->msr |= kcb->kprobe_saved_msr;
342
343 /*Restore back the original saved kprobes variables and continue. */
344 if (kcb->kprobe_status == KPROBE_REENTER) {
345 restore_previous_kprobe(kcb);
346 goto out;
347 }
348 reset_current_kprobe();
349out:
350 preempt_enable_no_resched();
351
352 /*
353 * if somebody else is singlestepping across a probe point, msr
354 * will have SE set, in which case, continue the remaining processing
355 * of do_debug, as if this is not a probe hit.
356 */
357 if (regs->msr & MSR_SE)
358 return 0;
359
360 return 1;
361}
362
363static inline int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
364{
365 struct kprobe *cur = kprobe_running();
366 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
367
368 if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
369 return 1;
370
371 if (kcb->kprobe_status & KPROBE_HIT_SS) {
372 resume_execution(cur, regs);
373 regs->msr &= ~MSR_SE;
374 regs->msr |= kcb->kprobe_saved_msr;
375
376 reset_current_kprobe();
377 preempt_enable_no_resched();
378 }
379 return 0;
380}
381
382/*
383 * Wrapper routine to for handling exceptions.
384 */
385int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
386 unsigned long val, void *data)
387{
388 struct die_args *args = (struct die_args *)data;
389 int ret = NOTIFY_DONE;
390
391 switch (val) {
392 case DIE_BPT:
393 if (kprobe_handler(args->regs))
394 ret = NOTIFY_STOP;
395 break;
396 case DIE_SSTEP:
397 if (post_kprobe_handler(args->regs))
398 ret = NOTIFY_STOP;
399 break;
400 case DIE_PAGE_FAULT:
401 /* kprobe_running() needs smp_processor_id() */
402 preempt_disable();
403 if (kprobe_running() &&
404 kprobe_fault_handler(args->regs, args->trapnr))
405 ret = NOTIFY_STOP;
406 preempt_enable();
407 break;
408 default:
409 break;
410 }
411 return ret;
412}
413
414int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
415{
416 struct jprobe *jp = container_of(p, struct jprobe, kp);
417 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
418
419 memcpy(&kcb->jprobe_saved_regs, regs, sizeof(struct pt_regs));
420
421 /* setup return addr to the jprobe handler routine */
422 regs->nip = (unsigned long)(((func_descr_t *)jp->entry)->entry);
423 regs->gpr[2] = (unsigned long)(((func_descr_t *)jp->entry)->toc);
424
425 return 1;
426}
427
428void __kprobes jprobe_return(void)
429{
430 asm volatile("trap" ::: "memory");
431}
432
433void __kprobes jprobe_return_end(void)
434{
435};
436
437int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
438{
439 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
440
441 /*
442 * FIXME - we should ideally be validating that we got here 'cos
443 * of the "trap" in jprobe_return() above, before restoring the
444 * saved regs...
445 */
446 memcpy(regs, &kcb->jprobe_saved_regs, sizeof(struct pt_regs));
447 preempt_enable_no_resched();
448 return 1;
449}
450
451static struct kprobe trampoline_p = {
452 .addr = (kprobe_opcode_t *) &kretprobe_trampoline,
453 .pre_handler = trampoline_probe_handler
454};
455
456int __init arch_init_kprobes(void)
457{
458 return register_kprobe(&trampoline_p);
459}
diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c
new file mode 100644
index 000000000000..97c51e452be7
--- /dev/null
+++ b/arch/powerpc/kernel/machine_kexec_64.c
@@ -0,0 +1,358 @@
1/*
2 * machine_kexec.c - handle transition of Linux booting another kernel
3 *
4 * Copyright (C) 2004-2005, IBM Corp.
5 *
6 * Created by: Milton D Miller II
7 *
8 * This source code is licensed under the GNU General Public License,
9 * Version 2. See the file COPYING for more details.
10 */
11
12
13#include <linux/cpumask.h>
14#include <linux/kexec.h>
15#include <linux/smp.h>
16#include <linux/thread_info.h>
17#include <linux/errno.h>
18
19#include <asm/page.h>
20#include <asm/current.h>
21#include <asm/machdep.h>
22#include <asm/cacheflush.h>
23#include <asm/paca.h>
24#include <asm/mmu.h>
25#include <asm/sections.h> /* _end */
26#include <asm/prom.h>
27#include <asm/smp.h>
28
29#define HASH_GROUP_SIZE 0x80 /* size of each hash group, asm/mmu.h */
30
31/* Have this around till we move it into crash specific file */
32note_buf_t crash_notes[NR_CPUS];
33
34/* Dummy for now. Not sure if we need to have a crash shutdown in here
35 * and if what it will achieve. Letting it be now to compile the code
36 * in generic kexec environment
37 */
38void machine_crash_shutdown(struct pt_regs *regs)
39{
40 /* do nothing right now */
41 /* smp_relase_cpus() if we want smp on panic kernel */
42 /* cpu_irq_down to isolate us until we are ready */
43}
44
45int machine_kexec_prepare(struct kimage *image)
46{
47 int i;
48 unsigned long begin, end; /* limits of segment */
49 unsigned long low, high; /* limits of blocked memory range */
50 struct device_node *node;
51 unsigned long *basep;
52 unsigned int *sizep;
53
54 if (!ppc_md.hpte_clear_all)
55 return -ENOENT;
56
57 /*
58 * Since we use the kernel fault handlers and paging code to
59 * handle the virtual mode, we must make sure no destination
60 * overlaps kernel static data or bss.
61 */
62 for (i = 0; i < image->nr_segments; i++)
63 if (image->segment[i].mem < __pa(_end))
64 return -ETXTBSY;
65
66 /*
67 * For non-LPAR, we absolutely can not overwrite the mmu hash
68 * table, since we are still using the bolted entries in it to
69 * do the copy. Check that here.
70 *
71 * It is safe if the end is below the start of the blocked
72 * region (end <= low), or if the beginning is after the
73 * end of the blocked region (begin >= high). Use the
74 * boolean identity !(a || b) === (!a && !b).
75 */
76 if (htab_address) {
77 low = __pa(htab_address);
78 high = low + (htab_hash_mask + 1) * HASH_GROUP_SIZE;
79
80 for (i = 0; i < image->nr_segments; i++) {
81 begin = image->segment[i].mem;
82 end = begin + image->segment[i].memsz;
83
84 if ((begin < high) && (end > low))
85 return -ETXTBSY;
86 }
87 }
88
89 /* We also should not overwrite the tce tables */
90 for (node = of_find_node_by_type(NULL, "pci"); node != NULL;
91 node = of_find_node_by_type(node, "pci")) {
92 basep = (unsigned long *)get_property(node, "linux,tce-base",
93 NULL);
94 sizep = (unsigned int *)get_property(node, "linux,tce-size",
95 NULL);
96 if (basep == NULL || sizep == NULL)
97 continue;
98
99 low = *basep;
100 high = low + (*sizep);
101
102 for (i = 0; i < image->nr_segments; i++) {
103 begin = image->segment[i].mem;
104 end = begin + image->segment[i].memsz;
105
106 if ((begin < high) && (end > low))
107 return -ETXTBSY;
108 }
109 }
110
111 return 0;
112}
113
114void machine_kexec_cleanup(struct kimage *image)
115{
116 /* we do nothing in prepare that needs to be undone */
117}
118
119#define IND_FLAGS (IND_DESTINATION | IND_INDIRECTION | IND_DONE | IND_SOURCE)
120
121static void copy_segments(unsigned long ind)
122{
123 unsigned long entry;
124 unsigned long *ptr;
125 void *dest;
126 void *addr;
127
128 /*
129 * We rely on kexec_load to create a lists that properly
130 * initializes these pointers before they are used.
131 * We will still crash if the list is wrong, but at least
132 * the compiler will be quiet.
133 */
134 ptr = NULL;
135 dest = NULL;
136
137 for (entry = ind; !(entry & IND_DONE); entry = *ptr++) {
138 addr = __va(entry & PAGE_MASK);
139
140 switch (entry & IND_FLAGS) {
141 case IND_DESTINATION:
142 dest = addr;
143 break;
144 case IND_INDIRECTION:
145 ptr = addr;
146 break;
147 case IND_SOURCE:
148 copy_page(dest, addr);
149 dest += PAGE_SIZE;
150 }
151 }
152}
153
154void kexec_copy_flush(struct kimage *image)
155{
156 long i, nr_segments = image->nr_segments;
157 struct kexec_segment ranges[KEXEC_SEGMENT_MAX];
158
159 /* save the ranges on the stack to efficiently flush the icache */
160 memcpy(ranges, image->segment, sizeof(ranges));
161
162 /*
163 * After this call we may not use anything allocated in dynamic
164 * memory, including *image.
165 *
166 * Only globals and the stack are allowed.
167 */
168 copy_segments(image->head);
169
170 /*
171 * we need to clear the icache for all dest pages sometime,
172 * including ones that were in place on the original copy
173 */
174 for (i = 0; i < nr_segments; i++)
175 flush_icache_range(ranges[i].mem + KERNELBASE,
176 ranges[i].mem + KERNELBASE +
177 ranges[i].memsz);
178}
179
180#ifdef CONFIG_SMP
181
182/* FIXME: we should schedule this function to be called on all cpus based
183 * on calling the interrupts, but we would like to call it off irq level
184 * so that the interrupt controller is clean.
185 */
186void kexec_smp_down(void *arg)
187{
188 if (ppc_md.kexec_cpu_down)
189 ppc_md.kexec_cpu_down(0, 1);
190
191 local_irq_disable();
192 kexec_smp_wait();
193 /* NOTREACHED */
194}
195
196static void kexec_prepare_cpus(void)
197{
198 int my_cpu, i, notified=-1;
199
200 smp_call_function(kexec_smp_down, NULL, 0, /* wait */0);
201 my_cpu = get_cpu();
202
203 /* check the others cpus are now down (via paca hw cpu id == -1) */
204 for (i=0; i < NR_CPUS; i++) {
205 if (i == my_cpu)
206 continue;
207
208 while (paca[i].hw_cpu_id != -1) {
209 barrier();
210 if (!cpu_possible(i)) {
211 printk("kexec: cpu %d hw_cpu_id %d is not"
212 " possible, ignoring\n",
213 i, paca[i].hw_cpu_id);
214 break;
215 }
216 if (!cpu_online(i)) {
217 /* Fixme: this can be spinning in
218 * pSeries_secondary_wait with a paca
219 * waiting for it to go online.
220 */
221 printk("kexec: cpu %d hw_cpu_id %d is not"
222 " online, ignoring\n",
223 i, paca[i].hw_cpu_id);
224 break;
225 }
226 if (i != notified) {
227 printk( "kexec: waiting for cpu %d (physical"
228 " %d) to go down\n",
229 i, paca[i].hw_cpu_id);
230 notified = i;
231 }
232 }
233 }
234
235 /* after we tell the others to go down */
236 if (ppc_md.kexec_cpu_down)
237 ppc_md.kexec_cpu_down(0, 0);
238
239 put_cpu();
240
241 local_irq_disable();
242}
243
244#else /* ! SMP */
245
246static void kexec_prepare_cpus(void)
247{
248 /*
249 * move the secondarys to us so that we can copy
250 * the new kernel 0-0x100 safely
251 *
252 * do this if kexec in setup.c ?
253 *
254 * We need to release the cpus if we are ever going from an
255 * UP to an SMP kernel.
256 */
257 smp_release_cpus();
258 if (ppc_md.kexec_cpu_down)
259 ppc_md.kexec_cpu_down(0, 0);
260 local_irq_disable();
261}
262
263#endif /* SMP */
264
265/*
266 * kexec thread structure and stack.
267 *
268 * We need to make sure that this is 16384-byte aligned due to the
269 * way process stacks are handled. It also must be statically allocated
270 * or allocated as part of the kimage, because everything else may be
271 * overwritten when we copy the kexec image. We piggyback on the
272 * "init_task" linker section here to statically allocate a stack.
273 *
274 * We could use a smaller stack if we don't care about anything using
275 * current, but that audit has not been performed.
276 */
277union thread_union kexec_stack
278 __attribute__((__section__(".data.init_task"))) = { };
279
280/* Our assembly helper, in kexec_stub.S */
281extern NORET_TYPE void kexec_sequence(void *newstack, unsigned long start,
282 void *image, void *control,
283 void (*clear_all)(void)) ATTRIB_NORET;
284
285/* too late to fail here */
286void machine_kexec(struct kimage *image)
287{
288
289 /* prepare control code if any */
290
291 /* shutdown other cpus into our wait loop and quiesce interrupts */
292 kexec_prepare_cpus();
293
294 /* switch to a staticly allocated stack. Based on irq stack code.
295 * XXX: the task struct will likely be invalid once we do the copy!
296 */
297 kexec_stack.thread_info.task = current_thread_info()->task;
298 kexec_stack.thread_info.flags = 0;
299
300 /* Some things are best done in assembly. Finding globals with
301 * a toc is easier in C, so pass in what we can.
302 */
303 kexec_sequence(&kexec_stack, image->start, image,
304 page_address(image->control_code_page),
305 ppc_md.hpte_clear_all);
306 /* NOTREACHED */
307}
308
309/* Values we need to export to the second kernel via the device tree. */
310static unsigned long htab_base, htab_size, kernel_end;
311
312static struct property htab_base_prop = {
313 .name = "linux,htab-base",
314 .length = sizeof(unsigned long),
315 .value = (unsigned char *)&htab_base,
316};
317
318static struct property htab_size_prop = {
319 .name = "linux,htab-size",
320 .length = sizeof(unsigned long),
321 .value = (unsigned char *)&htab_size,
322};
323
324static struct property kernel_end_prop = {
325 .name = "linux,kernel-end",
326 .length = sizeof(unsigned long),
327 .value = (unsigned char *)&kernel_end,
328};
329
330static void __init export_htab_values(void)
331{
332 struct device_node *node;
333
334 node = of_find_node_by_path("/chosen");
335 if (!node)
336 return;
337
338 kernel_end = __pa(_end);
339 prom_add_property(node, &kernel_end_prop);
340
341 /* On machines with no htab htab_address is NULL */
342 if (NULL == htab_address)
343 goto out;
344
345 htab_base = __pa(htab_address);
346 prom_add_property(node, &htab_base_prop);
347
348 htab_size = 1UL << ppc64_pft_size;
349 prom_add_property(node, &htab_size_prop);
350
351 out:
352 of_node_put(node);
353}
354
355void __init kexec_setup(void)
356{
357 export_htab_values();
358}
diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
new file mode 100644
index 000000000000..928b8581fcb0
--- /dev/null
+++ b/arch/powerpc/kernel/module_64.c
@@ -0,0 +1,455 @@
1/* Kernel module help for PPC64.
2 Copyright (C) 2001, 2003 Rusty Russell IBM Corporation.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17*/
18#include <linux/module.h>
19#include <linux/elf.h>
20#include <linux/moduleloader.h>
21#include <linux/err.h>
22#include <linux/vmalloc.h>
23#include <asm/module.h>
24#include <asm/uaccess.h>
25
26/* FIXME: We don't do .init separately. To do this, we'd need to have
27 a separate r2 value in the init and core section, and stub between
28 them, too.
29
30 Using a magic allocator which places modules within 32MB solves
31 this, and makes other things simpler. Anton?
32 --RR. */
33#if 0
34#define DEBUGP printk
35#else
36#define DEBUGP(fmt , ...)
37#endif
38
39/* There's actually a third entry here, but it's unused */
40struct ppc64_opd_entry
41{
42 unsigned long funcaddr;
43 unsigned long r2;
44};
45
46/* Like PPC32, we need little trampolines to do > 24-bit jumps (into
47 the kernel itself). But on PPC64, these need to be used for every
48 jump, actually, to reset r2 (TOC+0x8000). */
49struct ppc64_stub_entry
50{
51 /* 28 byte jump instruction sequence (7 instructions) */
52 unsigned char jump[28];
53 unsigned char unused[4];
54 /* Data for the above code */
55 struct ppc64_opd_entry opd;
56};
57
58/* We use a stub to fix up r2 (TOC ptr) and to jump to the (external)
59 function which may be more than 24-bits away. We could simply
60 patch the new r2 value and function pointer into the stub, but it's
61 significantly shorter to put these values at the end of the stub
62 code, and patch the stub address (32-bits relative to the TOC ptr,
63 r2) into the stub. */
64static struct ppc64_stub_entry ppc64_stub =
65{ .jump = {
66 0x3d, 0x82, 0x00, 0x00, /* addis r12,r2, <high> */
67 0x39, 0x8c, 0x00, 0x00, /* addi r12,r12, <low> */
68 /* Save current r2 value in magic place on the stack. */
69 0xf8, 0x41, 0x00, 0x28, /* std r2,40(r1) */
70 0xe9, 0x6c, 0x00, 0x20, /* ld r11,32(r12) */
71 0xe8, 0x4c, 0x00, 0x28, /* ld r2,40(r12) */
72 0x7d, 0x69, 0x03, 0xa6, /* mtctr r11 */
73 0x4e, 0x80, 0x04, 0x20 /* bctr */
74} };
75
76/* Count how many different 24-bit relocations (different symbol,
77 different addend) */
78static unsigned int count_relocs(const Elf64_Rela *rela, unsigned int num)
79{
80 unsigned int i, j, ret = 0;
81
82 /* FIXME: Only count external ones --RR */
83 /* Sure, this is order(n^2), but it's usually short, and not
84 time critical */
85 for (i = 0; i < num; i++) {
86 /* Only count 24-bit relocs, others don't need stubs */
87 if (ELF64_R_TYPE(rela[i].r_info) != R_PPC_REL24)
88 continue;
89 for (j = 0; j < i; j++) {
90 /* If this addend appeared before, it's
91 already been counted */
92 if (rela[i].r_info == rela[j].r_info
93 && rela[i].r_addend == rela[j].r_addend)
94 break;
95 }
96 if (j == i) ret++;
97 }
98 return ret;
99}
100
101void *module_alloc(unsigned long size)
102{
103 if (size == 0)
104 return NULL;
105
106 return vmalloc_exec(size);
107}
108
109/* Free memory returned from module_alloc */
110void module_free(struct module *mod, void *module_region)
111{
112 vfree(module_region);
113 /* FIXME: If module_region == mod->init_region, trim exception
114 table entries. */
115}
116
117/* Get size of potential trampolines required. */
118static unsigned long get_stubs_size(const Elf64_Ehdr *hdr,
119 const Elf64_Shdr *sechdrs)
120{
121 /* One extra reloc so it's always 0-funcaddr terminated */
122 unsigned long relocs = 1;
123 unsigned i;
124
125 /* Every relocated section... */
126 for (i = 1; i < hdr->e_shnum; i++) {
127 if (sechdrs[i].sh_type == SHT_RELA) {
128 DEBUGP("Found relocations in section %u\n", i);
129 DEBUGP("Ptr: %p. Number: %lu\n",
130 (void *)sechdrs[i].sh_addr,
131 sechdrs[i].sh_size / sizeof(Elf64_Rela));
132 relocs += count_relocs((void *)sechdrs[i].sh_addr,
133 sechdrs[i].sh_size
134 / sizeof(Elf64_Rela));
135 }
136 }
137
138 DEBUGP("Looks like a total of %lu stubs, max\n", relocs);
139 return relocs * sizeof(struct ppc64_stub_entry);
140}
141
142static void dedotify_versions(struct modversion_info *vers,
143 unsigned long size)
144{
145 struct modversion_info *end;
146
147 for (end = (void *)vers + size; vers < end; vers++)
148 if (vers->name[0] == '.')
149 memmove(vers->name, vers->name+1, strlen(vers->name));
150}
151
152/* Undefined symbols which refer to .funcname, hack to funcname */
153static void dedotify(Elf64_Sym *syms, unsigned int numsyms, char *strtab)
154{
155 unsigned int i;
156
157 for (i = 1; i < numsyms; i++) {
158 if (syms[i].st_shndx == SHN_UNDEF) {
159 char *name = strtab + syms[i].st_name;
160 if (name[0] == '.')
161 memmove(name, name+1, strlen(name));
162 }
163 }
164}
165
166int module_frob_arch_sections(Elf64_Ehdr *hdr,
167 Elf64_Shdr *sechdrs,
168 char *secstrings,
169 struct module *me)
170{
171 unsigned int i;
172
173 /* Find .toc and .stubs sections, symtab and strtab */
174 for (i = 1; i < hdr->e_shnum; i++) {
175 char *p;
176 if (strcmp(secstrings + sechdrs[i].sh_name, ".stubs") == 0)
177 me->arch.stubs_section = i;
178 else if (strcmp(secstrings + sechdrs[i].sh_name, ".toc") == 0)
179 me->arch.toc_section = i;
180 else if (strcmp(secstrings+sechdrs[i].sh_name,"__versions")==0)
181 dedotify_versions((void *)hdr + sechdrs[i].sh_offset,
182 sechdrs[i].sh_size);
183
184 /* We don't handle .init for the moment: rename to _init */
185 while ((p = strstr(secstrings + sechdrs[i].sh_name, ".init")))
186 p[0] = '_';
187
188 if (sechdrs[i].sh_type == SHT_SYMTAB)
189 dedotify((void *)hdr + sechdrs[i].sh_offset,
190 sechdrs[i].sh_size / sizeof(Elf64_Sym),
191 (void *)hdr
192 + sechdrs[sechdrs[i].sh_link].sh_offset);
193 }
194 if (!me->arch.stubs_section || !me->arch.toc_section) {
195 printk("%s: doesn't contain .toc or .stubs.\n", me->name);
196 return -ENOEXEC;
197 }
198
199 /* Override the stubs size */
200 sechdrs[me->arch.stubs_section].sh_size = get_stubs_size(hdr, sechdrs);
201 return 0;
202}
203
204int apply_relocate(Elf64_Shdr *sechdrs,
205 const char *strtab,
206 unsigned int symindex,
207 unsigned int relsec,
208 struct module *me)
209{
210 printk(KERN_ERR "%s: Non-ADD RELOCATION unsupported\n", me->name);
211 return -ENOEXEC;
212}
213
214/* r2 is the TOC pointer: it actually points 0x8000 into the TOC (this
215 gives the value maximum span in an instruction which uses a signed
216 offset) */
217static inline unsigned long my_r2(Elf64_Shdr *sechdrs, struct module *me)
218{
219 return sechdrs[me->arch.toc_section].sh_addr + 0x8000;
220}
221
222/* Both low and high 16 bits are added as SIGNED additions, so if low
223 16 bits has high bit set, high 16 bits must be adjusted. These
224 macros do that (stolen from binutils). */
225#define PPC_LO(v) ((v) & 0xffff)
226#define PPC_HI(v) (((v) >> 16) & 0xffff)
227#define PPC_HA(v) PPC_HI ((v) + 0x8000)
228
229/* Patch stub to reference function and correct r2 value. */
230static inline int create_stub(Elf64_Shdr *sechdrs,
231 struct ppc64_stub_entry *entry,
232 struct ppc64_opd_entry *opd,
233 struct module *me)
234{
235 Elf64_Half *loc1, *loc2;
236 long reladdr;
237
238 *entry = ppc64_stub;
239
240 loc1 = (Elf64_Half *)&entry->jump[2];
241 loc2 = (Elf64_Half *)&entry->jump[6];
242
243 /* Stub uses address relative to r2. */
244 reladdr = (unsigned long)entry - my_r2(sechdrs, me);
245 if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) {
246 printk("%s: Address %p of stub out of range of %p.\n",
247 me->name, (void *)reladdr, (void *)my_r2);
248 return 0;
249 }
250 DEBUGP("Stub %p get data from reladdr %li\n", entry, reladdr);
251
252 *loc1 = PPC_HA(reladdr);
253 *loc2 = PPC_LO(reladdr);
254 entry->opd.funcaddr = opd->funcaddr;
255 entry->opd.r2 = opd->r2;
256 return 1;
257}
258
259/* Create stub to jump to function described in this OPD: we need the
260 stub to set up the TOC ptr (r2) for the function. */
261static unsigned long stub_for_addr(Elf64_Shdr *sechdrs,
262 unsigned long opdaddr,
263 struct module *me)
264{
265 struct ppc64_stub_entry *stubs;
266 struct ppc64_opd_entry *opd = (void *)opdaddr;
267 unsigned int i, num_stubs;
268
269 num_stubs = sechdrs[me->arch.stubs_section].sh_size / sizeof(*stubs);
270
271 /* Find this stub, or if that fails, the next avail. entry */
272 stubs = (void *)sechdrs[me->arch.stubs_section].sh_addr;
273 for (i = 0; stubs[i].opd.funcaddr; i++) {
274 BUG_ON(i >= num_stubs);
275
276 if (stubs[i].opd.funcaddr == opd->funcaddr)
277 return (unsigned long)&stubs[i];
278 }
279
280 if (!create_stub(sechdrs, &stubs[i], opd, me))
281 return 0;
282
283 return (unsigned long)&stubs[i];
284}
285
286/* We expect a noop next: if it is, replace it with instruction to
287 restore r2. */
288static int restore_r2(u32 *instruction, struct module *me)
289{
290 if (*instruction != 0x60000000) {
291 printk("%s: Expect noop after relocate, got %08x\n",
292 me->name, *instruction);
293 return 0;
294 }
295 *instruction = 0xe8410028; /* ld r2,40(r1) */
296 return 1;
297}
298
299int apply_relocate_add(Elf64_Shdr *sechdrs,
300 const char *strtab,
301 unsigned int symindex,
302 unsigned int relsec,
303 struct module *me)
304{
305 unsigned int i;
306 Elf64_Rela *rela = (void *)sechdrs[relsec].sh_addr;
307 Elf64_Sym *sym;
308 unsigned long *location;
309 unsigned long value;
310
311 DEBUGP("Applying ADD relocate section %u to %u\n", relsec,
312 sechdrs[relsec].sh_info);
313 for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rela); i++) {
314 /* This is where to make the change */
315 location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr
316 + rela[i].r_offset;
317 /* This is the symbol it is referring to */
318 sym = (Elf64_Sym *)sechdrs[symindex].sh_addr
319 + ELF64_R_SYM(rela[i].r_info);
320
321 DEBUGP("RELOC at %p: %li-type as %s (%lu) + %li\n",
322 location, (long)ELF64_R_TYPE(rela[i].r_info),
323 strtab + sym->st_name, (unsigned long)sym->st_value,
324 (long)rela[i].r_addend);
325
326 /* `Everything is relative'. */
327 value = sym->st_value + rela[i].r_addend;
328
329 switch (ELF64_R_TYPE(rela[i].r_info)) {
330 case R_PPC64_ADDR32:
331 /* Simply set it */
332 *(u32 *)location = value;
333 break;
334
335 case R_PPC64_ADDR64:
336 /* Simply set it */
337 *(unsigned long *)location = value;
338 break;
339
340 case R_PPC64_TOC:
341 *(unsigned long *)location = my_r2(sechdrs, me);
342 break;
343
344 case R_PPC64_TOC16:
345 /* Subtact TOC pointer */
346 value -= my_r2(sechdrs, me);
347 if (value + 0x8000 > 0xffff) {
348 printk("%s: bad TOC16 relocation (%lu)\n",
349 me->name, value);
350 return -ENOEXEC;
351 }
352 *((uint16_t *) location)
353 = (*((uint16_t *) location) & ~0xffff)
354 | (value & 0xffff);
355 break;
356
357 case R_PPC64_TOC16_DS:
358 /* Subtact TOC pointer */
359 value -= my_r2(sechdrs, me);
360 if ((value & 3) != 0 || value + 0x8000 > 0xffff) {
361 printk("%s: bad TOC16_DS relocation (%lu)\n",
362 me->name, value);
363 return -ENOEXEC;
364 }
365 *((uint16_t *) location)
366 = (*((uint16_t *) location) & ~0xfffc)
367 | (value & 0xfffc);
368 break;
369
370 case R_PPC_REL24:
371 /* FIXME: Handle weak symbols here --RR */
372 if (sym->st_shndx == SHN_UNDEF) {
373 /* External: go via stub */
374 value = stub_for_addr(sechdrs, value, me);
375 if (!value)
376 return -ENOENT;
377 if (!restore_r2((u32 *)location + 1, me))
378 return -ENOEXEC;
379 }
380
381 /* Convert value to relative */
382 value -= (unsigned long)location;
383 if (value + 0x2000000 > 0x3ffffff || (value & 3) != 0){
384 printk("%s: REL24 %li out of range!\n",
385 me->name, (long int)value);
386 return -ENOEXEC;
387 }
388
389 /* Only replace bits 2 through 26 */
390 *(uint32_t *)location
391 = (*(uint32_t *)location & ~0x03fffffc)
392 | (value & 0x03fffffc);
393 break;
394
395 default:
396 printk("%s: Unknown ADD relocation: %lu\n",
397 me->name,
398 (unsigned long)ELF64_R_TYPE(rela[i].r_info));
399 return -ENOEXEC;
400 }
401 }
402
403 return 0;
404}
405
406LIST_HEAD(module_bug_list);
407
408int module_finalize(const Elf_Ehdr *hdr,
409 const Elf_Shdr *sechdrs, struct module *me)
410{
411 char *secstrings;
412 unsigned int i;
413
414 me->arch.bug_table = NULL;
415 me->arch.num_bugs = 0;
416
417 /* Find the __bug_table section, if present */
418 secstrings = (char *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
419 for (i = 1; i < hdr->e_shnum; i++) {
420 if (strcmp(secstrings+sechdrs[i].sh_name, "__bug_table"))
421 continue;
422 me->arch.bug_table = (void *) sechdrs[i].sh_addr;
423 me->arch.num_bugs = sechdrs[i].sh_size / sizeof(struct bug_entry);
424 break;
425 }
426
427 /*
428 * Strictly speaking this should have a spinlock to protect against
429 * traversals, but since we only traverse on BUG()s, a spinlock
430 * could potentially lead to deadlock and thus be counter-productive.
431 */
432 list_add(&me->arch.bug_list, &module_bug_list);
433
434 return 0;
435}
436
437void module_arch_cleanup(struct module *mod)
438{
439 list_del(&mod->arch.bug_list);
440}
441
442struct bug_entry *module_find_bug(unsigned long bugaddr)
443{
444 struct mod_arch_specific *mod;
445 unsigned int i;
446 struct bug_entry *bug;
447
448 list_for_each_entry(mod, &module_bug_list, bug_list) {
449 bug = mod->bug_table;
450 for (i = 0; i < mod->num_bugs; ++i, ++bug)
451 if (bugaddr == bug->bug_addr)
452 return bug;
453 }
454 return NULL;
455}
diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c
new file mode 100644
index 000000000000..3cef1b8f57f0
--- /dev/null
+++ b/arch/powerpc/kernel/pci_64.c
@@ -0,0 +1,1319 @@
1/*
2 * Port for PPC64 David Engebretsen, IBM Corp.
3 * Contains common pci routines for ppc64 platform, pSeries and iSeries brands.
4 *
5 * Copyright (C) 2003 Anton Blanchard <anton@au.ibm.com>, IBM
6 * Rework, based on alpha PCI code.
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14#undef DEBUG
15
16#include <linux/config.h>
17#include <linux/kernel.h>
18#include <linux/pci.h>
19#include <linux/string.h>
20#include <linux/init.h>
21#include <linux/bootmem.h>
22#include <linux/mm.h>
23#include <linux/list.h>
24#include <linux/syscalls.h>
25
26#include <asm/processor.h>
27#include <asm/io.h>
28#include <asm/prom.h>
29#include <asm/pci-bridge.h>
30#include <asm/byteorder.h>
31#include <asm/irq.h>
32#include <asm/machdep.h>
33#include <asm/udbg.h>
34#include <asm/ppc-pci.h>
35
36#ifdef DEBUG
37#define DBG(fmt...) udbg_printf(fmt)
38#else
39#define DBG(fmt...)
40#endif
41
42unsigned long pci_probe_only = 1;
43unsigned long pci_assign_all_buses = 0;
44
45/*
46 * legal IO pages under MAX_ISA_PORT. This is to ensure we don't touch
47 * devices we don't have access to.
48 */
49unsigned long io_page_mask;
50
51EXPORT_SYMBOL(io_page_mask);
52
53#ifdef CONFIG_PPC_MULTIPLATFORM
54static void fixup_resource(struct resource *res, struct pci_dev *dev);
55static void do_bus_setup(struct pci_bus *bus);
56#endif
57
58unsigned int pcibios_assign_all_busses(void)
59{
60 return pci_assign_all_buses;
61}
62
63/* pci_io_base -- the base address from which io bars are offsets.
64 * This is the lowest I/O base address (so bar values are always positive),
65 * and it *must* be the start of ISA space if an ISA bus exists because
66 * ISA drivers use hard coded offsets. If no ISA bus exists a dummy
67 * page is mapped and isa_io_limit prevents access to it.
68 */
69unsigned long isa_io_base; /* NULL if no ISA bus */
70EXPORT_SYMBOL(isa_io_base);
71unsigned long pci_io_base;
72EXPORT_SYMBOL(pci_io_base);
73
74void iSeries_pcibios_init(void);
75
76LIST_HEAD(hose_list);
77
78struct dma_mapping_ops pci_dma_ops;
79EXPORT_SYMBOL(pci_dma_ops);
80
81int global_phb_number; /* Global phb counter */
82
83/* Cached ISA bridge dev. */
84struct pci_dev *ppc64_isabridge_dev = NULL;
85
86static void fixup_broken_pcnet32(struct pci_dev* dev)
87{
88 if ((dev->class>>8 == PCI_CLASS_NETWORK_ETHERNET)) {
89 dev->vendor = PCI_VENDOR_ID_AMD;
90 pci_write_config_word(dev, PCI_VENDOR_ID, PCI_VENDOR_ID_AMD);
91 }
92}
93DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_TRIDENT, PCI_ANY_ID, fixup_broken_pcnet32);
94
95void pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region,
96 struct resource *res)
97{
98 unsigned long offset = 0;
99 struct pci_controller *hose = pci_bus_to_host(dev->bus);
100
101 if (!hose)
102 return;
103
104 if (res->flags & IORESOURCE_IO)
105 offset = (unsigned long)hose->io_base_virt - pci_io_base;
106
107 if (res->flags & IORESOURCE_MEM)
108 offset = hose->pci_mem_offset;
109
110 region->start = res->start - offset;
111 region->end = res->end - offset;
112}
113
114void pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
115 struct pci_bus_region *region)
116{
117 unsigned long offset = 0;
118 struct pci_controller *hose = pci_bus_to_host(dev->bus);
119
120 if (!hose)
121 return;
122
123 if (res->flags & IORESOURCE_IO)
124 offset = (unsigned long)hose->io_base_virt - pci_io_base;
125
126 if (res->flags & IORESOURCE_MEM)
127 offset = hose->pci_mem_offset;
128
129 res->start = region->start + offset;
130 res->end = region->end + offset;
131}
132
133#ifdef CONFIG_HOTPLUG
134EXPORT_SYMBOL(pcibios_resource_to_bus);
135EXPORT_SYMBOL(pcibios_bus_to_resource);
136#endif
137
138/*
139 * We need to avoid collisions with `mirrored' VGA ports
140 * and other strange ISA hardware, so we always want the
141 * addresses to be allocated in the 0x000-0x0ff region
142 * modulo 0x400.
143 *
144 * Why? Because some silly external IO cards only decode
145 * the low 10 bits of the IO address. The 0x00-0xff region
146 * is reserved for motherboard devices that decode all 16
147 * bits, so it's ok to allocate at, say, 0x2800-0x28ff,
148 * but we want to try to avoid allocating at 0x2900-0x2bff
149 * which might have be mirrored at 0x0100-0x03ff..
150 */
151void pcibios_align_resource(void *data, struct resource *res,
152 unsigned long size, unsigned long align)
153{
154 struct pci_dev *dev = data;
155 struct pci_controller *hose = pci_bus_to_host(dev->bus);
156 unsigned long start = res->start;
157 unsigned long alignto;
158
159 if (res->flags & IORESOURCE_IO) {
160 unsigned long offset = (unsigned long)hose->io_base_virt -
161 pci_io_base;
162 /* Make sure we start at our min on all hoses */
163 if (start - offset < PCIBIOS_MIN_IO)
164 start = PCIBIOS_MIN_IO + offset;
165
166 /*
167 * Put everything into 0x00-0xff region modulo 0x400
168 */
169 if (start & 0x300)
170 start = (start + 0x3ff) & ~0x3ff;
171
172 } else if (res->flags & IORESOURCE_MEM) {
173 /* Make sure we start at our min on all hoses */
174 if (start - hose->pci_mem_offset < PCIBIOS_MIN_MEM)
175 start = PCIBIOS_MIN_MEM + hose->pci_mem_offset;
176
177 /* Align to multiple of size of minimum base. */
178 alignto = max(0x1000UL, align);
179 start = ALIGN(start, alignto);
180 }
181
182 res->start = start;
183}
184
185static DEFINE_SPINLOCK(hose_spinlock);
186
187/*
188 * pci_controller(phb) initialized common variables.
189 */
190void __devinit pci_setup_pci_controller(struct pci_controller *hose)
191{
192 memset(hose, 0, sizeof(struct pci_controller));
193
194 spin_lock(&hose_spinlock);
195 hose->global_number = global_phb_number++;
196 list_add_tail(&hose->list_node, &hose_list);
197 spin_unlock(&hose_spinlock);
198}
199
200static void __init pcibios_claim_one_bus(struct pci_bus *b)
201{
202 struct pci_dev *dev;
203 struct pci_bus *child_bus;
204
205 list_for_each_entry(dev, &b->devices, bus_list) {
206 int i;
207
208 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
209 struct resource *r = &dev->resource[i];
210
211 if (r->parent || !r->start || !r->flags)
212 continue;
213 pci_claim_resource(dev, i);
214 }
215 }
216
217 list_for_each_entry(child_bus, &b->children, node)
218 pcibios_claim_one_bus(child_bus);
219}
220
221#ifndef CONFIG_PPC_ISERIES
222static void __init pcibios_claim_of_setup(void)
223{
224 struct pci_bus *b;
225
226 list_for_each_entry(b, &pci_root_buses, node)
227 pcibios_claim_one_bus(b);
228}
229#endif
230
231#ifdef CONFIG_PPC_MULTIPLATFORM
232static u32 get_int_prop(struct device_node *np, const char *name, u32 def)
233{
234 u32 *prop;
235 int len;
236
237 prop = (u32 *) get_property(np, name, &len);
238 if (prop && len >= 4)
239 return *prop;
240 return def;
241}
242
243static unsigned int pci_parse_of_flags(u32 addr0)
244{
245 unsigned int flags = 0;
246
247 if (addr0 & 0x02000000) {
248 flags = IORESOURCE_MEM | PCI_BASE_ADDRESS_SPACE_MEMORY;
249 flags |= (addr0 >> 22) & PCI_BASE_ADDRESS_MEM_TYPE_64;
250 flags |= (addr0 >> 28) & PCI_BASE_ADDRESS_MEM_TYPE_1M;
251 if (addr0 & 0x40000000)
252 flags |= IORESOURCE_PREFETCH
253 | PCI_BASE_ADDRESS_MEM_PREFETCH;
254 } else if (addr0 & 0x01000000)
255 flags = IORESOURCE_IO | PCI_BASE_ADDRESS_SPACE_IO;
256 return flags;
257}
258
259#define GET_64BIT(prop, i) ((((u64) (prop)[(i)]) << 32) | (prop)[(i)+1])
260
261static void pci_parse_of_addrs(struct device_node *node, struct pci_dev *dev)
262{
263 u64 base, size;
264 unsigned int flags;
265 struct resource *res;
266 u32 *addrs, i;
267 int proplen;
268
269 addrs = (u32 *) get_property(node, "assigned-addresses", &proplen);
270 if (!addrs)
271 return;
272 for (; proplen >= 20; proplen -= 20, addrs += 5) {
273 flags = pci_parse_of_flags(addrs[0]);
274 if (!flags)
275 continue;
276 base = GET_64BIT(addrs, 1);
277 size = GET_64BIT(addrs, 3);
278 if (!size)
279 continue;
280 i = addrs[0] & 0xff;
281 if (PCI_BASE_ADDRESS_0 <= i && i <= PCI_BASE_ADDRESS_5) {
282 res = &dev->resource[(i - PCI_BASE_ADDRESS_0) >> 2];
283 } else if (i == dev->rom_base_reg) {
284 res = &dev->resource[PCI_ROM_RESOURCE];
285 flags |= IORESOURCE_READONLY | IORESOURCE_CACHEABLE;
286 } else {
287 printk(KERN_ERR "PCI: bad cfg reg num 0x%x\n", i);
288 continue;
289 }
290 res->start = base;
291 res->end = base + size - 1;
292 res->flags = flags;
293 res->name = pci_name(dev);
294 fixup_resource(res, dev);
295 }
296}
297
298struct pci_dev *of_create_pci_dev(struct device_node *node,
299 struct pci_bus *bus, int devfn)
300{
301 struct pci_dev *dev;
302 const char *type;
303
304 dev = kmalloc(sizeof(struct pci_dev), GFP_KERNEL);
305 if (!dev)
306 return NULL;
307 type = get_property(node, "device_type", NULL);
308 if (type == NULL)
309 type = "";
310
311 memset(dev, 0, sizeof(struct pci_dev));
312 dev->bus = bus;
313 dev->sysdata = node;
314 dev->dev.parent = bus->bridge;
315 dev->dev.bus = &pci_bus_type;
316 dev->devfn = devfn;
317 dev->multifunction = 0; /* maybe a lie? */
318
319 dev->vendor = get_int_prop(node, "vendor-id", 0xffff);
320 dev->device = get_int_prop(node, "device-id", 0xffff);
321 dev->subsystem_vendor = get_int_prop(node, "subsystem-vendor-id", 0);
322 dev->subsystem_device = get_int_prop(node, "subsystem-id", 0);
323
324 dev->cfg_size = 256; /*pci_cfg_space_size(dev);*/
325
326 sprintf(pci_name(dev), "%04x:%02x:%02x.%d", pci_domain_nr(bus),
327 dev->bus->number, PCI_SLOT(devfn), PCI_FUNC(devfn));
328 dev->class = get_int_prop(node, "class-code", 0);
329
330 dev->current_state = 4; /* unknown power state */
331
332 if (!strcmp(type, "pci")) {
333 /* a PCI-PCI bridge */
334 dev->hdr_type = PCI_HEADER_TYPE_BRIDGE;
335 dev->rom_base_reg = PCI_ROM_ADDRESS1;
336 } else if (!strcmp(type, "cardbus")) {
337 dev->hdr_type = PCI_HEADER_TYPE_CARDBUS;
338 } else {
339 dev->hdr_type = PCI_HEADER_TYPE_NORMAL;
340 dev->rom_base_reg = PCI_ROM_ADDRESS;
341 dev->irq = NO_IRQ;
342 if (node->n_intrs > 0) {
343 dev->irq = node->intrs[0].line;
344 pci_write_config_byte(dev, PCI_INTERRUPT_LINE,
345 dev->irq);
346 }
347 }
348
349 pci_parse_of_addrs(node, dev);
350
351 pci_device_add(dev, bus);
352
353 /* XXX pci_scan_msi_device(dev); */
354
355 return dev;
356}
357EXPORT_SYMBOL(of_create_pci_dev);
358
359void __devinit of_scan_bus(struct device_node *node,
360 struct pci_bus *bus)
361{
362 struct device_node *child = NULL;
363 u32 *reg;
364 int reglen, devfn;
365 struct pci_dev *dev;
366
367 while ((child = of_get_next_child(node, child)) != NULL) {
368 reg = (u32 *) get_property(child, "reg", &reglen);
369 if (reg == NULL || reglen < 20)
370 continue;
371 devfn = (reg[0] >> 8) & 0xff;
372 /* create a new pci_dev for this device */
373 dev = of_create_pci_dev(child, bus, devfn);
374 if (!dev)
375 continue;
376 if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE ||
377 dev->hdr_type == PCI_HEADER_TYPE_CARDBUS)
378 of_scan_pci_bridge(child, dev);
379 }
380
381 do_bus_setup(bus);
382}
383EXPORT_SYMBOL(of_scan_bus);
384
385void __devinit of_scan_pci_bridge(struct device_node *node,
386 struct pci_dev *dev)
387{
388 struct pci_bus *bus;
389 u32 *busrange, *ranges;
390 int len, i, mode;
391 struct resource *res;
392 unsigned int flags;
393 u64 size;
394
395 /* parse bus-range property */
396 busrange = (u32 *) get_property(node, "bus-range", &len);
397 if (busrange == NULL || len != 8) {
398 printk(KERN_ERR "Can't get bus-range for PCI-PCI bridge %s\n",
399 node->full_name);
400 return;
401 }
402 ranges = (u32 *) get_property(node, "ranges", &len);
403 if (ranges == NULL) {
404 printk(KERN_ERR "Can't get ranges for PCI-PCI bridge %s\n",
405 node->full_name);
406 return;
407 }
408
409 bus = pci_add_new_bus(dev->bus, dev, busrange[0]);
410 if (!bus) {
411 printk(KERN_ERR "Failed to create pci bus for %s\n",
412 node->full_name);
413 return;
414 }
415
416 bus->primary = dev->bus->number;
417 bus->subordinate = busrange[1];
418 bus->bridge_ctl = 0;
419 bus->sysdata = node;
420
421 /* parse ranges property */
422 /* PCI #address-cells == 3 and #size-cells == 2 always */
423 res = &dev->resource[PCI_BRIDGE_RESOURCES];
424 for (i = 0; i < PCI_NUM_RESOURCES - PCI_BRIDGE_RESOURCES; ++i) {
425 res->flags = 0;
426 bus->resource[i] = res;
427 ++res;
428 }
429 i = 1;
430 for (; len >= 32; len -= 32, ranges += 8) {
431 flags = pci_parse_of_flags(ranges[0]);
432 size = GET_64BIT(ranges, 6);
433 if (flags == 0 || size == 0)
434 continue;
435 if (flags & IORESOURCE_IO) {
436 res = bus->resource[0];
437 if (res->flags) {
438 printk(KERN_ERR "PCI: ignoring extra I/O range"
439 " for bridge %s\n", node->full_name);
440 continue;
441 }
442 } else {
443 if (i >= PCI_NUM_RESOURCES - PCI_BRIDGE_RESOURCES) {
444 printk(KERN_ERR "PCI: too many memory ranges"
445 " for bridge %s\n", node->full_name);
446 continue;
447 }
448 res = bus->resource[i];
449 ++i;
450 }
451 res->start = GET_64BIT(ranges, 1);
452 res->end = res->start + size - 1;
453 res->flags = flags;
454 fixup_resource(res, dev);
455 }
456 sprintf(bus->name, "PCI Bus %04x:%02x", pci_domain_nr(bus),
457 bus->number);
458
459 mode = PCI_PROBE_NORMAL;
460 if (ppc_md.pci_probe_mode)
461 mode = ppc_md.pci_probe_mode(bus);
462 if (mode == PCI_PROBE_DEVTREE)
463 of_scan_bus(node, bus);
464 else if (mode == PCI_PROBE_NORMAL)
465 pci_scan_child_bus(bus);
466}
467EXPORT_SYMBOL(of_scan_pci_bridge);
468#endif /* CONFIG_PPC_MULTIPLATFORM */
469
470void __devinit scan_phb(struct pci_controller *hose)
471{
472 struct pci_bus *bus;
473 struct device_node *node = hose->arch_data;
474 int i, mode;
475 struct resource *res;
476
477 bus = pci_create_bus(NULL, hose->first_busno, hose->ops, node);
478 if (bus == NULL) {
479 printk(KERN_ERR "Failed to create bus for PCI domain %04x\n",
480 hose->global_number);
481 return;
482 }
483 bus->secondary = hose->first_busno;
484 hose->bus = bus;
485
486 bus->resource[0] = res = &hose->io_resource;
487 if (res->flags && request_resource(&ioport_resource, res))
488 printk(KERN_ERR "Failed to request PCI IO region "
489 "on PCI domain %04x\n", hose->global_number);
490
491 for (i = 0; i < 3; ++i) {
492 res = &hose->mem_resources[i];
493 bus->resource[i+1] = res;
494 if (res->flags && request_resource(&iomem_resource, res))
495 printk(KERN_ERR "Failed to request PCI memory region "
496 "on PCI domain %04x\n", hose->global_number);
497 }
498
499 mode = PCI_PROBE_NORMAL;
500#ifdef CONFIG_PPC_MULTIPLATFORM
501 if (ppc_md.pci_probe_mode)
502 mode = ppc_md.pci_probe_mode(bus);
503 if (mode == PCI_PROBE_DEVTREE) {
504 bus->subordinate = hose->last_busno;
505 of_scan_bus(node, bus);
506 }
507#endif /* CONFIG_PPC_MULTIPLATFORM */
508 if (mode == PCI_PROBE_NORMAL)
509 hose->last_busno = bus->subordinate = pci_scan_child_bus(bus);
510 pci_bus_add_devices(bus);
511}
512
513static int __init pcibios_init(void)
514{
515 struct pci_controller *hose, *tmp;
516
517 /* For now, override phys_mem_access_prot. If we need it,
518 * later, we may move that initialization to each ppc_md
519 */
520 ppc_md.phys_mem_access_prot = pci_phys_mem_access_prot;
521
522#ifdef CONFIG_PPC_ISERIES
523 iSeries_pcibios_init();
524#endif
525
526 printk("PCI: Probing PCI hardware\n");
527
528 /* Scan all of the recorded PCI controllers. */
529 list_for_each_entry_safe(hose, tmp, &hose_list, list_node)
530 scan_phb(hose);
531
532#ifndef CONFIG_PPC_ISERIES
533 if (pci_probe_only)
534 pcibios_claim_of_setup();
535 else
536 /* FIXME: `else' will be removed when
537 pci_assign_unassigned_resources() is able to work
538 correctly with [partially] allocated PCI tree. */
539 pci_assign_unassigned_resources();
540#endif /* !CONFIG_PPC_ISERIES */
541
542 /* Call machine dependent final fixup */
543 if (ppc_md.pcibios_fixup)
544 ppc_md.pcibios_fixup();
545
546 /* Cache the location of the ISA bridge (if we have one) */
547 ppc64_isabridge_dev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
548 if (ppc64_isabridge_dev != NULL)
549 printk("ISA bridge at %s\n", pci_name(ppc64_isabridge_dev));
550
551#ifdef CONFIG_PPC_MULTIPLATFORM
552 /* map in PCI I/O space */
553 phbs_remap_io();
554#endif
555
556 printk("PCI: Probing PCI hardware done\n");
557
558 return 0;
559}
560
561subsys_initcall(pcibios_init);
562
563char __init *pcibios_setup(char *str)
564{
565 return str;
566}
567
568int pcibios_enable_device(struct pci_dev *dev, int mask)
569{
570 u16 cmd, oldcmd;
571 int i;
572
573 pci_read_config_word(dev, PCI_COMMAND, &cmd);
574 oldcmd = cmd;
575
576 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
577 struct resource *res = &dev->resource[i];
578
579 /* Only set up the requested stuff */
580 if (!(mask & (1<<i)))
581 continue;
582
583 if (res->flags & IORESOURCE_IO)
584 cmd |= PCI_COMMAND_IO;
585 if (res->flags & IORESOURCE_MEM)
586 cmd |= PCI_COMMAND_MEMORY;
587 }
588
589 if (cmd != oldcmd) {
590 printk(KERN_DEBUG "PCI: Enabling device: (%s), cmd %x\n",
591 pci_name(dev), cmd);
592 /* Enable the appropriate bits in the PCI command register. */
593 pci_write_config_word(dev, PCI_COMMAND, cmd);
594 }
595 return 0;
596}
597
598/*
599 * Return the domain number for this bus.
600 */
601int pci_domain_nr(struct pci_bus *bus)
602{
603#ifdef CONFIG_PPC_ISERIES
604 return 0;
605#else
606 struct pci_controller *hose = pci_bus_to_host(bus);
607
608 return hose->global_number;
609#endif
610}
611
612EXPORT_SYMBOL(pci_domain_nr);
613
614/* Decide whether to display the domain number in /proc */
615int pci_proc_domain(struct pci_bus *bus)
616{
617#ifdef CONFIG_PPC_ISERIES
618 return 0;
619#else
620 struct pci_controller *hose = pci_bus_to_host(bus);
621 return hose->buid;
622#endif
623}
624
625/*
626 * Platform support for /proc/bus/pci/X/Y mmap()s,
627 * modelled on the sparc64 implementation by Dave Miller.
628 * -- paulus.
629 */
630
631/*
632 * Adjust vm_pgoff of VMA such that it is the physical page offset
633 * corresponding to the 32-bit pci bus offset for DEV requested by the user.
634 *
635 * Basically, the user finds the base address for his device which he wishes
636 * to mmap. They read the 32-bit value from the config space base register,
637 * add whatever PAGE_SIZE multiple offset they wish, and feed this into the
638 * offset parameter of mmap on /proc/bus/pci/XXX for that device.
639 *
640 * Returns negative error code on failure, zero on success.
641 */
642static struct resource *__pci_mmap_make_offset(struct pci_dev *dev,
643 unsigned long *offset,
644 enum pci_mmap_state mmap_state)
645{
646 struct pci_controller *hose = pci_bus_to_host(dev->bus);
647 unsigned long io_offset = 0;
648 int i, res_bit;
649
650 if (hose == 0)
651 return NULL; /* should never happen */
652
653 /* If memory, add on the PCI bridge address offset */
654 if (mmap_state == pci_mmap_mem) {
655 *offset += hose->pci_mem_offset;
656 res_bit = IORESOURCE_MEM;
657 } else {
658 io_offset = (unsigned long)hose->io_base_virt - pci_io_base;
659 *offset += io_offset;
660 res_bit = IORESOURCE_IO;
661 }
662
663 /*
664 * Check that the offset requested corresponds to one of the
665 * resources of the device.
666 */
667 for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
668 struct resource *rp = &dev->resource[i];
669 int flags = rp->flags;
670
671 /* treat ROM as memory (should be already) */
672 if (i == PCI_ROM_RESOURCE)
673 flags |= IORESOURCE_MEM;
674
675 /* Active and same type? */
676 if ((flags & res_bit) == 0)
677 continue;
678
679 /* In the range of this resource? */
680 if (*offset < (rp->start & PAGE_MASK) || *offset > rp->end)
681 continue;
682
683 /* found it! construct the final physical address */
684 if (mmap_state == pci_mmap_io)
685 *offset += hose->io_base_phys - io_offset;
686 return rp;
687 }
688
689 return NULL;
690}
691
692/*
693 * Set vm_page_prot of VMA, as appropriate for this architecture, for a pci
694 * device mapping.
695 */
696static pgprot_t __pci_mmap_set_pgprot(struct pci_dev *dev, struct resource *rp,
697 pgprot_t protection,
698 enum pci_mmap_state mmap_state,
699 int write_combine)
700{
701 unsigned long prot = pgprot_val(protection);
702
703 /* Write combine is always 0 on non-memory space mappings. On
704 * memory space, if the user didn't pass 1, we check for a
705 * "prefetchable" resource. This is a bit hackish, but we use
706 * this to workaround the inability of /sysfs to provide a write
707 * combine bit
708 */
709 if (mmap_state != pci_mmap_mem)
710 write_combine = 0;
711 else if (write_combine == 0) {
712 if (rp->flags & IORESOURCE_PREFETCH)
713 write_combine = 1;
714 }
715
716 /* XXX would be nice to have a way to ask for write-through */
717 prot |= _PAGE_NO_CACHE;
718 if (write_combine)
719 prot &= ~_PAGE_GUARDED;
720 else
721 prot |= _PAGE_GUARDED;
722
723 printk("PCI map for %s:%lx, prot: %lx\n", pci_name(dev), rp->start,
724 prot);
725
726 return __pgprot(prot);
727}
728
729/*
730 * This one is used by /dev/mem and fbdev who have no clue about the
731 * PCI device, it tries to find the PCI device first and calls the
732 * above routine
733 */
734pgprot_t pci_phys_mem_access_prot(struct file *file,
735 unsigned long pfn,
736 unsigned long size,
737 pgprot_t protection)
738{
739 struct pci_dev *pdev = NULL;
740 struct resource *found = NULL;
741 unsigned long prot = pgprot_val(protection);
742 unsigned long offset = pfn << PAGE_SHIFT;
743 int i;
744
745 if (page_is_ram(pfn))
746 return __pgprot(prot);
747
748 prot |= _PAGE_NO_CACHE | _PAGE_GUARDED;
749
750 for_each_pci_dev(pdev) {
751 for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
752 struct resource *rp = &pdev->resource[i];
753 int flags = rp->flags;
754
755 /* Active and same type? */
756 if ((flags & IORESOURCE_MEM) == 0)
757 continue;
758 /* In the range of this resource? */
759 if (offset < (rp->start & PAGE_MASK) ||
760 offset > rp->end)
761 continue;
762 found = rp;
763 break;
764 }
765 if (found)
766 break;
767 }
768 if (found) {
769 if (found->flags & IORESOURCE_PREFETCH)
770 prot &= ~_PAGE_GUARDED;
771 pci_dev_put(pdev);
772 }
773
774 DBG("non-PCI map for %lx, prot: %lx\n", offset, prot);
775
776 return __pgprot(prot);
777}
778
779
780/*
781 * Perform the actual remap of the pages for a PCI device mapping, as
782 * appropriate for this architecture. The region in the process to map
783 * is described by vm_start and vm_end members of VMA, the base physical
784 * address is found in vm_pgoff.
785 * The pci device structure is provided so that architectures may make mapping
786 * decisions on a per-device or per-bus basis.
787 *
788 * Returns a negative error code on failure, zero on success.
789 */
790int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
791 enum pci_mmap_state mmap_state,
792 int write_combine)
793{
794 unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
795 struct resource *rp;
796 int ret;
797
798 rp = __pci_mmap_make_offset(dev, &offset, mmap_state);
799 if (rp == NULL)
800 return -EINVAL;
801
802 vma->vm_pgoff = offset >> PAGE_SHIFT;
803 vma->vm_flags |= VM_SHM | VM_LOCKED | VM_IO;
804 vma->vm_page_prot = __pci_mmap_set_pgprot(dev, rp,
805 vma->vm_page_prot,
806 mmap_state, write_combine);
807
808 ret = remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
809 vma->vm_end - vma->vm_start, vma->vm_page_prot);
810
811 return ret;
812}
813
814#ifdef CONFIG_PPC_MULTIPLATFORM
815static ssize_t pci_show_devspec(struct device *dev, struct device_attribute *attr, char *buf)
816{
817 struct pci_dev *pdev;
818 struct device_node *np;
819
820 pdev = to_pci_dev (dev);
821 np = pci_device_to_OF_node(pdev);
822 if (np == NULL || np->full_name == NULL)
823 return 0;
824 return sprintf(buf, "%s", np->full_name);
825}
826static DEVICE_ATTR(devspec, S_IRUGO, pci_show_devspec, NULL);
827#endif /* CONFIG_PPC_MULTIPLATFORM */
828
829void pcibios_add_platform_entries(struct pci_dev *pdev)
830{
831#ifdef CONFIG_PPC_MULTIPLATFORM
832 device_create_file(&pdev->dev, &dev_attr_devspec);
833#endif /* CONFIG_PPC_MULTIPLATFORM */
834}
835
836#ifdef CONFIG_PPC_MULTIPLATFORM
837
838#define ISA_SPACE_MASK 0x1
839#define ISA_SPACE_IO 0x1
840
841static void __devinit pci_process_ISA_OF_ranges(struct device_node *isa_node,
842 unsigned long phb_io_base_phys,
843 void __iomem * phb_io_base_virt)
844{
845 struct isa_range *range;
846 unsigned long pci_addr;
847 unsigned int isa_addr;
848 unsigned int size;
849 int rlen = 0;
850
851 range = (struct isa_range *) get_property(isa_node, "ranges", &rlen);
852 if (range == NULL || (rlen < sizeof(struct isa_range))) {
853 printk(KERN_ERR "no ISA ranges or unexpected isa range size,"
854 "mapping 64k\n");
855 __ioremap_explicit(phb_io_base_phys,
856 (unsigned long)phb_io_base_virt,
857 0x10000, _PAGE_NO_CACHE | _PAGE_GUARDED);
858 return;
859 }
860
861 /* From "ISA Binding to 1275"
862 * The ranges property is laid out as an array of elements,
863 * each of which comprises:
864 * cells 0 - 1: an ISA address
865 * cells 2 - 4: a PCI address
866 * (size depending on dev->n_addr_cells)
867 * cell 5: the size of the range
868 */
869 if ((range->isa_addr.a_hi && ISA_SPACE_MASK) == ISA_SPACE_IO) {
870 isa_addr = range->isa_addr.a_lo;
871 pci_addr = (unsigned long) range->pci_addr.a_mid << 32 |
872 range->pci_addr.a_lo;
873
874 /* Assume these are both zero */
875 if ((pci_addr != 0) || (isa_addr != 0)) {
876 printk(KERN_ERR "unexpected isa to pci mapping: %s\n",
877 __FUNCTION__);
878 return;
879 }
880
881 size = PAGE_ALIGN(range->size);
882
883 __ioremap_explicit(phb_io_base_phys,
884 (unsigned long) phb_io_base_virt,
885 size, _PAGE_NO_CACHE | _PAGE_GUARDED);
886 }
887}
888
889void __devinit pci_process_bridge_OF_ranges(struct pci_controller *hose,
890 struct device_node *dev, int prim)
891{
892 unsigned int *ranges, pci_space;
893 unsigned long size;
894 int rlen = 0;
895 int memno = 0;
896 struct resource *res;
897 int np, na = prom_n_addr_cells(dev);
898 unsigned long pci_addr, cpu_phys_addr;
899
900 np = na + 5;
901
902 /* From "PCI Binding to 1275"
903 * The ranges property is laid out as an array of elements,
904 * each of which comprises:
905 * cells 0 - 2: a PCI address
906 * cells 3 or 3+4: a CPU physical address
907 * (size depending on dev->n_addr_cells)
908 * cells 4+5 or 5+6: the size of the range
909 */
910 rlen = 0;
911 hose->io_base_phys = 0;
912 ranges = (unsigned int *) get_property(dev, "ranges", &rlen);
913 while ((rlen -= np * sizeof(unsigned int)) >= 0) {
914 res = NULL;
915 pci_space = ranges[0];
916 pci_addr = ((unsigned long)ranges[1] << 32) | ranges[2];
917
918 cpu_phys_addr = ranges[3];
919 if (na >= 2)
920 cpu_phys_addr = (cpu_phys_addr << 32) | ranges[4];
921
922 size = ((unsigned long)ranges[na+3] << 32) | ranges[na+4];
923 ranges += np;
924 if (size == 0)
925 continue;
926
927 /* Now consume following elements while they are contiguous */
928 while (rlen >= np * sizeof(unsigned int)) {
929 unsigned long addr, phys;
930
931 if (ranges[0] != pci_space)
932 break;
933 addr = ((unsigned long)ranges[1] << 32) | ranges[2];
934 phys = ranges[3];
935 if (na >= 2)
936 phys = (phys << 32) | ranges[4];
937 if (addr != pci_addr + size ||
938 phys != cpu_phys_addr + size)
939 break;
940
941 size += ((unsigned long)ranges[na+3] << 32)
942 | ranges[na+4];
943 ranges += np;
944 rlen -= np * sizeof(unsigned int);
945 }
946
947 switch ((pci_space >> 24) & 0x3) {
948 case 1: /* I/O space */
949 hose->io_base_phys = cpu_phys_addr;
950 hose->pci_io_size = size;
951
952 res = &hose->io_resource;
953 res->flags = IORESOURCE_IO;
954 res->start = pci_addr;
955 DBG("phb%d: IO 0x%lx -> 0x%lx\n", hose->global_number,
956 res->start, res->start + size - 1);
957 break;
958 case 2: /* memory space */
959 memno = 0;
960 while (memno < 3 && hose->mem_resources[memno].flags)
961 ++memno;
962
963 if (memno == 0)
964 hose->pci_mem_offset = cpu_phys_addr - pci_addr;
965 if (memno < 3) {
966 res = &hose->mem_resources[memno];
967 res->flags = IORESOURCE_MEM;
968 res->start = cpu_phys_addr;
969 DBG("phb%d: MEM 0x%lx -> 0x%lx\n", hose->global_number,
970 res->start, res->start + size - 1);
971 }
972 break;
973 }
974 if (res != NULL) {
975 res->name = dev->full_name;
976 res->end = res->start + size - 1;
977 res->parent = NULL;
978 res->sibling = NULL;
979 res->child = NULL;
980 }
981 }
982}
983
984void __init pci_setup_phb_io(struct pci_controller *hose, int primary)
985{
986 unsigned long size = hose->pci_io_size;
987 unsigned long io_virt_offset;
988 struct resource *res;
989 struct device_node *isa_dn;
990
991 hose->io_base_virt = reserve_phb_iospace(size);
992 DBG("phb%d io_base_phys 0x%lx io_base_virt 0x%lx\n",
993 hose->global_number, hose->io_base_phys,
994 (unsigned long) hose->io_base_virt);
995
996 if (primary) {
997 pci_io_base = (unsigned long)hose->io_base_virt;
998 isa_dn = of_find_node_by_type(NULL, "isa");
999 if (isa_dn) {
1000 isa_io_base = pci_io_base;
1001 pci_process_ISA_OF_ranges(isa_dn, hose->io_base_phys,
1002 hose->io_base_virt);
1003 of_node_put(isa_dn);
1004 /* Allow all IO */
1005 io_page_mask = -1;
1006 }
1007 }
1008
1009 io_virt_offset = (unsigned long)hose->io_base_virt - pci_io_base;
1010 res = &hose->io_resource;
1011 res->start += io_virt_offset;
1012 res->end += io_virt_offset;
1013}
1014
1015void __devinit pci_setup_phb_io_dynamic(struct pci_controller *hose,
1016 int primary)
1017{
1018 unsigned long size = hose->pci_io_size;
1019 unsigned long io_virt_offset;
1020 struct resource *res;
1021
1022 hose->io_base_virt = __ioremap(hose->io_base_phys, size,
1023 _PAGE_NO_CACHE | _PAGE_GUARDED);
1024 DBG("phb%d io_base_phys 0x%lx io_base_virt 0x%lx\n",
1025 hose->global_number, hose->io_base_phys,
1026 (unsigned long) hose->io_base_virt);
1027
1028 if (primary)
1029 pci_io_base = (unsigned long)hose->io_base_virt;
1030
1031 io_virt_offset = (unsigned long)hose->io_base_virt - pci_io_base;
1032 res = &hose->io_resource;
1033 res->start += io_virt_offset;
1034 res->end += io_virt_offset;
1035}
1036
1037
1038static int get_bus_io_range(struct pci_bus *bus, unsigned long *start_phys,
1039 unsigned long *start_virt, unsigned long *size)
1040{
1041 struct pci_controller *hose = pci_bus_to_host(bus);
1042 struct pci_bus_region region;
1043 struct resource *res;
1044
1045 if (bus->self) {
1046 res = bus->resource[0];
1047 pcibios_resource_to_bus(bus->self, &region, res);
1048 *start_phys = hose->io_base_phys + region.start;
1049 *start_virt = (unsigned long) hose->io_base_virt +
1050 region.start;
1051 if (region.end > region.start)
1052 *size = region.end - region.start + 1;
1053 else {
1054 printk("%s(): unexpected region 0x%lx->0x%lx\n",
1055 __FUNCTION__, region.start, region.end);
1056 return 1;
1057 }
1058
1059 } else {
1060 /* Root Bus */
1061 res = &hose->io_resource;
1062 *start_phys = hose->io_base_phys;
1063 *start_virt = (unsigned long) hose->io_base_virt;
1064 if (res->end > res->start)
1065 *size = res->end - res->start + 1;
1066 else {
1067 printk("%s(): unexpected region 0x%lx->0x%lx\n",
1068 __FUNCTION__, res->start, res->end);
1069 return 1;
1070 }
1071 }
1072
1073 return 0;
1074}
1075
1076int unmap_bus_range(struct pci_bus *bus)
1077{
1078 unsigned long start_phys;
1079 unsigned long start_virt;
1080 unsigned long size;
1081
1082 if (!bus) {
1083 printk(KERN_ERR "%s() expected bus\n", __FUNCTION__);
1084 return 1;
1085 }
1086
1087 if (get_bus_io_range(bus, &start_phys, &start_virt, &size))
1088 return 1;
1089 if (iounmap_explicit((void __iomem *) start_virt, size))
1090 return 1;
1091
1092 return 0;
1093}
1094EXPORT_SYMBOL(unmap_bus_range);
1095
1096int remap_bus_range(struct pci_bus *bus)
1097{
1098 unsigned long start_phys;
1099 unsigned long start_virt;
1100 unsigned long size;
1101
1102 if (!bus) {
1103 printk(KERN_ERR "%s() expected bus\n", __FUNCTION__);
1104 return 1;
1105 }
1106
1107
1108 if (get_bus_io_range(bus, &start_phys, &start_virt, &size))
1109 return 1;
1110 printk("mapping IO %lx -> %lx, size: %lx\n", start_phys, start_virt, size);
1111 if (__ioremap_explicit(start_phys, start_virt, size,
1112 _PAGE_NO_CACHE | _PAGE_GUARDED))
1113 return 1;
1114
1115 return 0;
1116}
1117EXPORT_SYMBOL(remap_bus_range);
1118
1119void phbs_remap_io(void)
1120{
1121 struct pci_controller *hose, *tmp;
1122
1123 list_for_each_entry_safe(hose, tmp, &hose_list, list_node)
1124 remap_bus_range(hose->bus);
1125}
1126
1127/*
1128 * ppc64 can have multifunction devices that do not respond to function 0.
1129 * In this case we must scan all functions.
1130 * XXX this can go now, we use the OF device tree in all the
1131 * cases that caused problems. -- paulus
1132 */
1133int pcibios_scan_all_fns(struct pci_bus *bus, int devfn)
1134{
1135 return 0;
1136}
1137
1138static void __devinit fixup_resource(struct resource *res, struct pci_dev *dev)
1139{
1140 struct pci_controller *hose = pci_bus_to_host(dev->bus);
1141 unsigned long start, end, mask, offset;
1142
1143 if (res->flags & IORESOURCE_IO) {
1144 offset = (unsigned long)hose->io_base_virt - pci_io_base;
1145
1146 start = res->start += offset;
1147 end = res->end += offset;
1148
1149 /* Need to allow IO access to pages that are in the
1150 ISA range */
1151 if (start < MAX_ISA_PORT) {
1152 if (end > MAX_ISA_PORT)
1153 end = MAX_ISA_PORT;
1154
1155 start >>= PAGE_SHIFT;
1156 end >>= PAGE_SHIFT;
1157
1158 /* get the range of pages for the map */
1159 mask = ((1 << (end+1)) - 1) ^ ((1 << start) - 1);
1160 io_page_mask |= mask;
1161 }
1162 } else if (res->flags & IORESOURCE_MEM) {
1163 res->start += hose->pci_mem_offset;
1164 res->end += hose->pci_mem_offset;
1165 }
1166}
1167
1168void __devinit pcibios_fixup_device_resources(struct pci_dev *dev,
1169 struct pci_bus *bus)
1170{
1171 /* Update device resources. */
1172 int i;
1173
1174 for (i = 0; i < PCI_NUM_RESOURCES; i++)
1175 if (dev->resource[i].flags)
1176 fixup_resource(&dev->resource[i], dev);
1177}
1178EXPORT_SYMBOL(pcibios_fixup_device_resources);
1179
1180static void __devinit do_bus_setup(struct pci_bus *bus)
1181{
1182 struct pci_dev *dev;
1183
1184 ppc_md.iommu_bus_setup(bus);
1185
1186 list_for_each_entry(dev, &bus->devices, bus_list)
1187 ppc_md.iommu_dev_setup(dev);
1188
1189 if (ppc_md.irq_bus_setup)
1190 ppc_md.irq_bus_setup(bus);
1191}
1192
1193void __devinit pcibios_fixup_bus(struct pci_bus *bus)
1194{
1195 struct pci_dev *dev = bus->self;
1196
1197 if (dev && pci_probe_only &&
1198 (dev->class >> 8) == PCI_CLASS_BRIDGE_PCI) {
1199 /* This is a subordinate bridge */
1200
1201 pci_read_bridge_bases(bus);
1202 pcibios_fixup_device_resources(dev, bus);
1203 }
1204
1205 do_bus_setup(bus);
1206
1207 if (!pci_probe_only)
1208 return;
1209
1210 list_for_each_entry(dev, &bus->devices, bus_list)
1211 if ((dev->class >> 8) != PCI_CLASS_BRIDGE_PCI)
1212 pcibios_fixup_device_resources(dev, bus);
1213}
1214EXPORT_SYMBOL(pcibios_fixup_bus);
1215
1216/*
1217 * Reads the interrupt pin to determine if interrupt is use by card.
1218 * If the interrupt is used, then gets the interrupt line from the
1219 * openfirmware and sets it in the pci_dev and pci_config line.
1220 */
1221int pci_read_irq_line(struct pci_dev *pci_dev)
1222{
1223 u8 intpin;
1224 struct device_node *node;
1225
1226 pci_read_config_byte(pci_dev, PCI_INTERRUPT_PIN, &intpin);
1227 if (intpin == 0)
1228 return 0;
1229
1230 node = pci_device_to_OF_node(pci_dev);
1231 if (node == NULL)
1232 return -1;
1233
1234 if (node->n_intrs == 0)
1235 return -1;
1236
1237 pci_dev->irq = node->intrs[0].line;
1238
1239 pci_write_config_byte(pci_dev, PCI_INTERRUPT_LINE, pci_dev->irq);
1240
1241 return 0;
1242}
1243EXPORT_SYMBOL(pci_read_irq_line);
1244
1245void pci_resource_to_user(const struct pci_dev *dev, int bar,
1246 const struct resource *rsrc,
1247 u64 *start, u64 *end)
1248{
1249 struct pci_controller *hose = pci_bus_to_host(dev->bus);
1250 unsigned long offset = 0;
1251
1252 if (hose == NULL)
1253 return;
1254
1255 if (rsrc->flags & IORESOURCE_IO)
1256 offset = pci_io_base - (unsigned long)hose->io_base_virt +
1257 hose->io_base_phys;
1258
1259 *start = rsrc->start + offset;
1260 *end = rsrc->end + offset;
1261}
1262
1263#endif /* CONFIG_PPC_MULTIPLATFORM */
1264
1265
1266#define IOBASE_BRIDGE_NUMBER 0
1267#define IOBASE_MEMORY 1
1268#define IOBASE_IO 2
1269#define IOBASE_ISA_IO 3
1270#define IOBASE_ISA_MEM 4
1271
1272long sys_pciconfig_iobase(long which, unsigned long in_bus,
1273 unsigned long in_devfn)
1274{
1275 struct pci_controller* hose;
1276 struct list_head *ln;
1277 struct pci_bus *bus = NULL;
1278 struct device_node *hose_node;
1279
1280 /* Argh ! Please forgive me for that hack, but that's the
1281 * simplest way to get existing XFree to not lockup on some
1282 * G5 machines... So when something asks for bus 0 io base
1283 * (bus 0 is HT root), we return the AGP one instead.
1284 */
1285 if (machine_is_compatible("MacRISC4"))
1286 if (in_bus == 0)
1287 in_bus = 0xf0;
1288
1289 /* That syscall isn't quite compatible with PCI domains, but it's
1290 * used on pre-domains setup. We return the first match
1291 */
1292
1293 for (ln = pci_root_buses.next; ln != &pci_root_buses; ln = ln->next) {
1294 bus = pci_bus_b(ln);
1295 if (in_bus >= bus->number && in_bus < (bus->number + bus->subordinate))
1296 break;
1297 bus = NULL;
1298 }
1299 if (bus == NULL || bus->sysdata == NULL)
1300 return -ENODEV;
1301
1302 hose_node = (struct device_node *)bus->sysdata;
1303 hose = PCI_DN(hose_node)->phb;
1304
1305 switch (which) {
1306 case IOBASE_BRIDGE_NUMBER:
1307 return (long)hose->first_busno;
1308 case IOBASE_MEMORY:
1309 return (long)hose->pci_mem_offset;
1310 case IOBASE_IO:
1311 return (long)hose->io_base_phys;
1312 case IOBASE_ISA_IO:
1313 return (long)isa_io_base;
1314 case IOBASE_ISA_MEM:
1315 return -EINVAL;
1316 }
1317
1318 return -EOPNOTSUPP;
1319}
diff --git a/arch/powerpc/kernel/pci_direct_iommu.c b/arch/powerpc/kernel/pci_direct_iommu.c
new file mode 100644
index 000000000000..e1a32f802c0b
--- /dev/null
+++ b/arch/powerpc/kernel/pci_direct_iommu.c
@@ -0,0 +1,94 @@
1/*
2 * Support for DMA from PCI devices to main memory on
3 * machines without an iommu or with directly addressable
4 * RAM (typically a pmac with 2Gb of RAM or less)
5 *
6 * Copyright (C) 2003 Benjamin Herrenschmidt (benh@kernel.crashing.org)
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14#include <linux/kernel.h>
15#include <linux/pci.h>
16#include <linux/delay.h>
17#include <linux/string.h>
18#include <linux/init.h>
19#include <linux/bootmem.h>
20#include <linux/mm.h>
21#include <linux/dma-mapping.h>
22
23#include <asm/sections.h>
24#include <asm/io.h>
25#include <asm/prom.h>
26#include <asm/pci-bridge.h>
27#include <asm/machdep.h>
28#include <asm/pmac_feature.h>
29#include <asm/abs_addr.h>
30#include <asm/ppc-pci.h>
31
32static void *pci_direct_alloc_coherent(struct device *hwdev, size_t size,
33 dma_addr_t *dma_handle, gfp_t flag)
34{
35 void *ret;
36
37 ret = (void *)__get_free_pages(flag, get_order(size));
38 if (ret != NULL) {
39 memset(ret, 0, size);
40 *dma_handle = virt_to_abs(ret);
41 }
42 return ret;
43}
44
45static void pci_direct_free_coherent(struct device *hwdev, size_t size,
46 void *vaddr, dma_addr_t dma_handle)
47{
48 free_pages((unsigned long)vaddr, get_order(size));
49}
50
51static dma_addr_t pci_direct_map_single(struct device *hwdev, void *ptr,
52 size_t size, enum dma_data_direction direction)
53{
54 return virt_to_abs(ptr);
55}
56
57static void pci_direct_unmap_single(struct device *hwdev, dma_addr_t dma_addr,
58 size_t size, enum dma_data_direction direction)
59{
60}
61
62static int pci_direct_map_sg(struct device *hwdev, struct scatterlist *sg,
63 int nents, enum dma_data_direction direction)
64{
65 int i;
66
67 for (i = 0; i < nents; i++, sg++) {
68 sg->dma_address = page_to_phys(sg->page) + sg->offset;
69 sg->dma_length = sg->length;
70 }
71
72 return nents;
73}
74
75static void pci_direct_unmap_sg(struct device *hwdev, struct scatterlist *sg,
76 int nents, enum dma_data_direction direction)
77{
78}
79
80static int pci_direct_dma_supported(struct device *dev, u64 mask)
81{
82 return mask < 0x100000000ull;
83}
84
85void __init pci_direct_iommu_init(void)
86{
87 pci_dma_ops.alloc_coherent = pci_direct_alloc_coherent;
88 pci_dma_ops.free_coherent = pci_direct_free_coherent;
89 pci_dma_ops.map_single = pci_direct_map_single;
90 pci_dma_ops.unmap_single = pci_direct_unmap_single;
91 pci_dma_ops.map_sg = pci_direct_map_sg;
92 pci_dma_ops.unmap_sg = pci_direct_unmap_sg;
93 pci_dma_ops.dma_supported = pci_direct_dma_supported;
94}
diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c
new file mode 100644
index 000000000000..12c4c9e9bbc7
--- /dev/null
+++ b/arch/powerpc/kernel/pci_dn.c
@@ -0,0 +1,230 @@
1/*
2 * pci_dn.c
3 *
4 * Copyright (C) 2001 Todd Inglett, IBM Corporation
5 *
6 * PCI manipulation via device_nodes.
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 */
22#include <linux/kernel.h>
23#include <linux/pci.h>
24#include <linux/string.h>
25#include <linux/init.h>
26#include <linux/slab.h>
27#include <linux/bootmem.h>
28
29#include <asm/io.h>
30#include <asm/prom.h>
31#include <asm/pci-bridge.h>
32#include <asm/pSeries_reconfig.h>
33#include <asm/ppc-pci.h>
34
35/*
36 * Traverse_func that inits the PCI fields of the device node.
37 * NOTE: this *must* be done before read/write config to the device.
38 */
39static void * __devinit update_dn_pci_info(struct device_node *dn, void *data)
40{
41 struct pci_controller *phb = data;
42 int *type = (int *)get_property(dn, "ibm,pci-config-space-type", NULL);
43 u32 *regs;
44 struct pci_dn *pdn;
45
46 if (mem_init_done)
47 pdn = kmalloc(sizeof(*pdn), GFP_KERNEL);
48 else
49 pdn = alloc_bootmem(sizeof(*pdn));
50 if (pdn == NULL)
51 return NULL;
52 memset(pdn, 0, sizeof(*pdn));
53 dn->data = pdn;
54 pdn->node = dn;
55 pdn->phb = phb;
56 regs = (u32 *)get_property(dn, "reg", NULL);
57 if (regs) {
58 /* First register entry is addr (00BBSS00) */
59 pdn->busno = (regs[0] >> 16) & 0xff;
60 pdn->devfn = (regs[0] >> 8) & 0xff;
61 }
62
63 pdn->pci_ext_config_space = (type && *type == 1);
64 return NULL;
65}
66
67/*
68 * Traverse a device tree stopping each PCI device in the tree.
69 * This is done depth first. As each node is processed, a "pre"
70 * function is called and the children are processed recursively.
71 *
72 * The "pre" func returns a value. If non-zero is returned from
73 * the "pre" func, the traversal stops and this value is returned.
74 * This return value is useful when using traverse as a method of
75 * finding a device.
76 *
77 * NOTE: we do not run the func for devices that do not appear to
78 * be PCI except for the start node which we assume (this is good
79 * because the start node is often a phb which may be missing PCI
80 * properties).
81 * We use the class-code as an indicator. If we run into
82 * one of these nodes we also assume its siblings are non-pci for
83 * performance.
84 */
85void *traverse_pci_devices(struct device_node *start, traverse_func pre,
86 void *data)
87{
88 struct device_node *dn, *nextdn;
89 void *ret;
90
91 /* We started with a phb, iterate all childs */
92 for (dn = start->child; dn; dn = nextdn) {
93 u32 *classp, class;
94
95 nextdn = NULL;
96 classp = (u32 *)get_property(dn, "class-code", NULL);
97 class = classp ? *classp : 0;
98
99 if (pre && ((ret = pre(dn, data)) != NULL))
100 return ret;
101
102 /* If we are a PCI bridge, go down */
103 if (dn->child && ((class >> 8) == PCI_CLASS_BRIDGE_PCI ||
104 (class >> 8) == PCI_CLASS_BRIDGE_CARDBUS))
105 /* Depth first...do children */
106 nextdn = dn->child;
107 else if (dn->sibling)
108 /* ok, try next sibling instead. */
109 nextdn = dn->sibling;
110 if (!nextdn) {
111 /* Walk up to next valid sibling. */
112 do {
113 dn = dn->parent;
114 if (dn == start)
115 return NULL;
116 } while (dn->sibling == NULL);
117 nextdn = dn->sibling;
118 }
119 }
120 return NULL;
121}
122
123/**
124 * pci_devs_phb_init_dynamic - setup pci devices under this PHB
125 * phb: pci-to-host bridge (top-level bridge connecting to cpu)
126 *
127 * This routine is called both during boot, (before the memory
128 * subsystem is set up, before kmalloc is valid) and during the
129 * dynamic lpar operation of adding a PHB to a running system.
130 */
131void __devinit pci_devs_phb_init_dynamic(struct pci_controller *phb)
132{
133 struct device_node * dn = (struct device_node *) phb->arch_data;
134 struct pci_dn *pdn;
135
136 /* PHB nodes themselves must not match */
137 update_dn_pci_info(dn, phb);
138 pdn = dn->data;
139 if (pdn) {
140 pdn->devfn = pdn->busno = -1;
141 pdn->phb = phb;
142 }
143
144 /* Update dn->phb ptrs for new phb and children devices */
145 traverse_pci_devices(dn, update_dn_pci_info, phb);
146}
147
148/*
149 * Traversal func that looks for a <busno,devfcn> value.
150 * If found, the pci_dn is returned (thus terminating the traversal).
151 */
152static void *is_devfn_node(struct device_node *dn, void *data)
153{
154 int busno = ((unsigned long)data >> 8) & 0xff;
155 int devfn = ((unsigned long)data) & 0xff;
156 struct pci_dn *pci = dn->data;
157
158 if (pci && (devfn == pci->devfn) && (busno == pci->busno))
159 return dn;
160 return NULL;
161}
162
163/*
164 * This is the "slow" path for looking up a device_node from a
165 * pci_dev. It will hunt for the device under its parent's
166 * phb and then update sysdata for a future fastpath.
167 *
168 * It may also do fixups on the actual device since this happens
169 * on the first read/write.
170 *
171 * Note that it also must deal with devices that don't exist.
172 * In this case it may probe for real hardware ("just in case")
173 * and add a device_node to the device tree if necessary.
174 *
175 */
176struct device_node *fetch_dev_dn(struct pci_dev *dev)
177{
178 struct device_node *orig_dn = dev->sysdata;
179 struct device_node *dn;
180 unsigned long searchval = (dev->bus->number << 8) | dev->devfn;
181
182 dn = traverse_pci_devices(orig_dn, is_devfn_node, (void *)searchval);
183 if (dn)
184 dev->sysdata = dn;
185 return dn;
186}
187EXPORT_SYMBOL(fetch_dev_dn);
188
189static int pci_dn_reconfig_notifier(struct notifier_block *nb, unsigned long action, void *node)
190{
191 struct device_node *np = node;
192 struct pci_dn *pci = NULL;
193 int err = NOTIFY_OK;
194
195 switch (action) {
196 case PSERIES_RECONFIG_ADD:
197 pci = np->parent->data;
198 if (pci)
199 update_dn_pci_info(np, pci->phb);
200 break;
201 default:
202 err = NOTIFY_DONE;
203 break;
204 }
205 return err;
206}
207
208static struct notifier_block pci_dn_reconfig_nb = {
209 .notifier_call = pci_dn_reconfig_notifier,
210};
211
212/**
213 * pci_devs_phb_init - Initialize phbs and pci devs under them.
214 *
215 * This routine walks over all phb's (pci-host bridges) on the
216 * system, and sets up assorted pci-related structures
217 * (including pci info in the device node structs) for each
218 * pci device found underneath. This routine runs once,
219 * early in the boot sequence.
220 */
221void __init pci_devs_phb_init(void)
222{
223 struct pci_controller *phb, *tmp;
224
225 /* This must be done first so the device nodes have valid pci info! */
226 list_for_each_entry_safe(phb, tmp, &hose_list, list_node)
227 pci_devs_phb_init_dynamic(phb);
228
229 pSeries_reconfig_notifier_register(&pci_dn_reconfig_nb);
230}
diff --git a/arch/powerpc/kernel/pci_iommu.c b/arch/powerpc/kernel/pci_iommu.c
new file mode 100644
index 000000000000..bdf15dbbf4f0
--- /dev/null
+++ b/arch/powerpc/kernel/pci_iommu.c
@@ -0,0 +1,128 @@
1/*
2 * arch/ppc64/kernel/pci_iommu.c
3 * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation
4 *
5 * Rewrite, cleanup, new allocation schemes:
6 * Copyright (C) 2004 Olof Johansson, IBM Corporation
7 *
8 * Dynamic DMA mapping support, platform-independent parts.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 */
24
25
26#include <linux/init.h>
27#include <linux/types.h>
28#include <linux/slab.h>
29#include <linux/mm.h>
30#include <linux/spinlock.h>
31#include <linux/string.h>
32#include <linux/pci.h>
33#include <linux/dma-mapping.h>
34#include <asm/io.h>
35#include <asm/prom.h>
36#include <asm/iommu.h>
37#include <asm/pci-bridge.h>
38#include <asm/machdep.h>
39#include <asm/ppc-pci.h>
40
41/*
42 * We can use ->sysdata directly and avoid the extra work in
43 * pci_device_to_OF_node since ->sysdata will have been initialised
44 * in the iommu init code for all devices.
45 */
46#define PCI_GET_DN(dev) ((struct device_node *)((dev)->sysdata))
47
48static inline struct iommu_table *devnode_table(struct device *dev)
49{
50 struct pci_dev *pdev;
51
52 if (!dev) {
53 pdev = ppc64_isabridge_dev;
54 if (!pdev)
55 return NULL;
56 } else
57 pdev = to_pci_dev(dev);
58
59 return PCI_DN(PCI_GET_DN(pdev))->iommu_table;
60}
61
62
63/* Allocates a contiguous real buffer and creates mappings over it.
64 * Returns the virtual address of the buffer and sets dma_handle
65 * to the dma address (mapping) of the first page.
66 */
67static void *pci_iommu_alloc_coherent(struct device *hwdev, size_t size,
68 dma_addr_t *dma_handle, gfp_t flag)
69{
70 return iommu_alloc_coherent(devnode_table(hwdev), size, dma_handle,
71 flag);
72}
73
74static void pci_iommu_free_coherent(struct device *hwdev, size_t size,
75 void *vaddr, dma_addr_t dma_handle)
76{
77 iommu_free_coherent(devnode_table(hwdev), size, vaddr, dma_handle);
78}
79
80/* Creates TCEs for a user provided buffer. The user buffer must be
81 * contiguous real kernel storage (not vmalloc). The address of the buffer
82 * passed here is the kernel (virtual) address of the buffer. The buffer
83 * need not be page aligned, the dma_addr_t returned will point to the same
84 * byte within the page as vaddr.
85 */
86static dma_addr_t pci_iommu_map_single(struct device *hwdev, void *vaddr,
87 size_t size, enum dma_data_direction direction)
88{
89 return iommu_map_single(devnode_table(hwdev), vaddr, size, direction);
90}
91
92
93static void pci_iommu_unmap_single(struct device *hwdev, dma_addr_t dma_handle,
94 size_t size, enum dma_data_direction direction)
95{
96 iommu_unmap_single(devnode_table(hwdev), dma_handle, size, direction);
97}
98
99
100static int pci_iommu_map_sg(struct device *pdev, struct scatterlist *sglist,
101 int nelems, enum dma_data_direction direction)
102{
103 return iommu_map_sg(pdev, devnode_table(pdev), sglist,
104 nelems, direction);
105}
106
107static void pci_iommu_unmap_sg(struct device *pdev, struct scatterlist *sglist,
108 int nelems, enum dma_data_direction direction)
109{
110 iommu_unmap_sg(devnode_table(pdev), sglist, nelems, direction);
111}
112
113/* We support DMA to/from any memory page via the iommu */
114static int pci_iommu_dma_supported(struct device *dev, u64 mask)
115{
116 return 1;
117}
118
119void pci_iommu_init(void)
120{
121 pci_dma_ops.alloc_coherent = pci_iommu_alloc_coherent;
122 pci_dma_ops.free_coherent = pci_iommu_free_coherent;
123 pci_dma_ops.map_single = pci_iommu_map_single;
124 pci_dma_ops.unmap_single = pci_iommu_unmap_single;
125 pci_dma_ops.map_sg = pci_iommu_map_sg;
126 pci_dma_ops.unmap_sg = pci_iommu_unmap_sg;
127 pci_dma_ops.dma_supported = pci_iommu_dma_supported;
128}