diff options
Diffstat (limited to 'arch/powerpc/kernel')
-rw-r--r-- | arch/powerpc/kernel/Makefile | 18 | ||||
-rw-r--r-- | arch/powerpc/kernel/dma_64.c | 151 | ||||
-rw-r--r-- | arch/powerpc/kernel/iomap.c | 146 | ||||
-rw-r--r-- | arch/powerpc/kernel/iommu.c | 572 | ||||
-rw-r--r-- | arch/powerpc/kernel/kprobes.c | 459 | ||||
-rw-r--r-- | arch/powerpc/kernel/machine_kexec_64.c | 358 | ||||
-rw-r--r-- | arch/powerpc/kernel/module_64.c | 455 | ||||
-rw-r--r-- | arch/powerpc/kernel/pci_64.c | 1319 | ||||
-rw-r--r-- | arch/powerpc/kernel/pci_direct_iommu.c | 94 | ||||
-rw-r--r-- | arch/powerpc/kernel/pci_dn.c | 230 | ||||
-rw-r--r-- | arch/powerpc/kernel/pci_iommu.c | 128 |
11 files changed, 3925 insertions, 5 deletions
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 046b4bf1f21e..4970e3721a84 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile | |||
@@ -49,12 +49,23 @@ extra-y += vmlinux.lds | |||
49 | obj-y += process.o init_task.o time.o \ | 49 | obj-y += process.o init_task.o time.o \ |
50 | prom.o traps.o setup-common.o | 50 | prom.o traps.o setup-common.o |
51 | obj-$(CONFIG_PPC32) += entry_32.o setup_32.o misc_32.o systbl.o | 51 | obj-$(CONFIG_PPC32) += entry_32.o setup_32.o misc_32.o systbl.o |
52 | obj-$(CONFIG_PPC64) += misc_64.o | 52 | obj-$(CONFIG_PPC64) += misc_64.o dma_64.o iommu.o |
53 | obj-$(CONFIG_PPC_OF) += prom_init.o | 53 | obj-$(CONFIG_PPC_OF) += prom_init.o |
54 | obj-$(CONFIG_MODULES) += ppc_ksyms.o | 54 | obj-$(CONFIG_MODULES) += ppc_ksyms.o |
55 | obj-$(CONFIG_BOOTX_TEXT) += btext.o | 55 | obj-$(CONFIG_BOOTX_TEXT) += btext.o |
56 | obj-$(CONFIG_6xx) += idle_6xx.o | 56 | obj-$(CONFIG_6xx) += idle_6xx.o |
57 | obj-$(CONFIG_SMP) += smp.o | 57 | obj-$(CONFIG_SMP) += smp.o |
58 | obj-$(CONFIG_KPROBES) += kprobes.o | ||
59 | |||
60 | module-$(CONFIG_PPC64) += module_64.o | ||
61 | obj-$(CONFIG_MODULES) += $(module-y) | ||
62 | |||
63 | pci64-$(CONFIG_PPC64) += pci_64.o pci_dn.o pci_iommu.o \ | ||
64 | pci_direct_iommu.o iomap.o | ||
65 | obj-$(CONFIG_PCI) += $(pci64-y) | ||
66 | |||
67 | kexec64-$(CONFIG_PPC64) += machine_kexec_64.o | ||
68 | obj-$(CONFIG_KEXEC) += $(kexec64-y) | ||
58 | 69 | ||
59 | ifeq ($(CONFIG_PPC_ISERIES),y) | 70 | ifeq ($(CONFIG_PPC_ISERIES),y) |
60 | $(obj)/head_64.o: $(obj)/lparmap.s | 71 | $(obj)/head_64.o: $(obj)/lparmap.s |
@@ -62,11 +73,8 @@ AFLAGS_head_64.o += -I$(obj) | |||
62 | endif | 73 | endif |
63 | 74 | ||
64 | else | 75 | else |
65 | # stuff used from here for ARCH=ppc or ARCH=ppc64 | 76 | # stuff used from here for ARCH=ppc |
66 | smpobj-$(CONFIG_SMP) += smp.o | 77 | smpobj-$(CONFIG_SMP) += smp.o |
67 | obj-$(CONFIG_PPC64) += traps.o process.o init_task.o time.o \ | ||
68 | setup-common.o $(smpobj-y) | ||
69 | |||
70 | 78 | ||
71 | endif | 79 | endif |
72 | 80 | ||
diff --git a/arch/powerpc/kernel/dma_64.c b/arch/powerpc/kernel/dma_64.c new file mode 100644 index 000000000000..7c3419656ccc --- /dev/null +++ b/arch/powerpc/kernel/dma_64.c | |||
@@ -0,0 +1,151 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2004 IBM Corporation | ||
3 | * | ||
4 | * Implements the generic device dma API for ppc64. Handles | ||
5 | * the pci and vio busses | ||
6 | */ | ||
7 | |||
8 | #include <linux/device.h> | ||
9 | #include <linux/dma-mapping.h> | ||
10 | /* Include the busses we support */ | ||
11 | #include <linux/pci.h> | ||
12 | #include <asm/vio.h> | ||
13 | #include <asm/scatterlist.h> | ||
14 | #include <asm/bug.h> | ||
15 | |||
16 | static struct dma_mapping_ops *get_dma_ops(struct device *dev) | ||
17 | { | ||
18 | #ifdef CONFIG_PCI | ||
19 | if (dev->bus == &pci_bus_type) | ||
20 | return &pci_dma_ops; | ||
21 | #endif | ||
22 | #ifdef CONFIG_IBMVIO | ||
23 | if (dev->bus == &vio_bus_type) | ||
24 | return &vio_dma_ops; | ||
25 | #endif | ||
26 | return NULL; | ||
27 | } | ||
28 | |||
29 | int dma_supported(struct device *dev, u64 mask) | ||
30 | { | ||
31 | struct dma_mapping_ops *dma_ops = get_dma_ops(dev); | ||
32 | |||
33 | if (dma_ops) | ||
34 | return dma_ops->dma_supported(dev, mask); | ||
35 | BUG(); | ||
36 | return 0; | ||
37 | } | ||
38 | EXPORT_SYMBOL(dma_supported); | ||
39 | |||
40 | int dma_set_mask(struct device *dev, u64 dma_mask) | ||
41 | { | ||
42 | #ifdef CONFIG_PCI | ||
43 | if (dev->bus == &pci_bus_type) | ||
44 | return pci_set_dma_mask(to_pci_dev(dev), dma_mask); | ||
45 | #endif | ||
46 | #ifdef CONFIG_IBMVIO | ||
47 | if (dev->bus == &vio_bus_type) | ||
48 | return -EIO; | ||
49 | #endif /* CONFIG_IBMVIO */ | ||
50 | BUG(); | ||
51 | return 0; | ||
52 | } | ||
53 | EXPORT_SYMBOL(dma_set_mask); | ||
54 | |||
55 | void *dma_alloc_coherent(struct device *dev, size_t size, | ||
56 | dma_addr_t *dma_handle, gfp_t flag) | ||
57 | { | ||
58 | struct dma_mapping_ops *dma_ops = get_dma_ops(dev); | ||
59 | |||
60 | if (dma_ops) | ||
61 | return dma_ops->alloc_coherent(dev, size, dma_handle, flag); | ||
62 | BUG(); | ||
63 | return NULL; | ||
64 | } | ||
65 | EXPORT_SYMBOL(dma_alloc_coherent); | ||
66 | |||
67 | void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, | ||
68 | dma_addr_t dma_handle) | ||
69 | { | ||
70 | struct dma_mapping_ops *dma_ops = get_dma_ops(dev); | ||
71 | |||
72 | if (dma_ops) | ||
73 | dma_ops->free_coherent(dev, size, cpu_addr, dma_handle); | ||
74 | else | ||
75 | BUG(); | ||
76 | } | ||
77 | EXPORT_SYMBOL(dma_free_coherent); | ||
78 | |||
79 | dma_addr_t dma_map_single(struct device *dev, void *cpu_addr, size_t size, | ||
80 | enum dma_data_direction direction) | ||
81 | { | ||
82 | struct dma_mapping_ops *dma_ops = get_dma_ops(dev); | ||
83 | |||
84 | if (dma_ops) | ||
85 | return dma_ops->map_single(dev, cpu_addr, size, direction); | ||
86 | BUG(); | ||
87 | return (dma_addr_t)0; | ||
88 | } | ||
89 | EXPORT_SYMBOL(dma_map_single); | ||
90 | |||
91 | void dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size, | ||
92 | enum dma_data_direction direction) | ||
93 | { | ||
94 | struct dma_mapping_ops *dma_ops = get_dma_ops(dev); | ||
95 | |||
96 | if (dma_ops) | ||
97 | dma_ops->unmap_single(dev, dma_addr, size, direction); | ||
98 | else | ||
99 | BUG(); | ||
100 | } | ||
101 | EXPORT_SYMBOL(dma_unmap_single); | ||
102 | |||
103 | dma_addr_t dma_map_page(struct device *dev, struct page *page, | ||
104 | unsigned long offset, size_t size, | ||
105 | enum dma_data_direction direction) | ||
106 | { | ||
107 | struct dma_mapping_ops *dma_ops = get_dma_ops(dev); | ||
108 | |||
109 | if (dma_ops) | ||
110 | return dma_ops->map_single(dev, | ||
111 | (page_address(page) + offset), size, direction); | ||
112 | BUG(); | ||
113 | return (dma_addr_t)0; | ||
114 | } | ||
115 | EXPORT_SYMBOL(dma_map_page); | ||
116 | |||
117 | void dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size, | ||
118 | enum dma_data_direction direction) | ||
119 | { | ||
120 | struct dma_mapping_ops *dma_ops = get_dma_ops(dev); | ||
121 | |||
122 | if (dma_ops) | ||
123 | dma_ops->unmap_single(dev, dma_address, size, direction); | ||
124 | else | ||
125 | BUG(); | ||
126 | } | ||
127 | EXPORT_SYMBOL(dma_unmap_page); | ||
128 | |||
129 | int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, | ||
130 | enum dma_data_direction direction) | ||
131 | { | ||
132 | struct dma_mapping_ops *dma_ops = get_dma_ops(dev); | ||
133 | |||
134 | if (dma_ops) | ||
135 | return dma_ops->map_sg(dev, sg, nents, direction); | ||
136 | BUG(); | ||
137 | return 0; | ||
138 | } | ||
139 | EXPORT_SYMBOL(dma_map_sg); | ||
140 | |||
141 | void dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nhwentries, | ||
142 | enum dma_data_direction direction) | ||
143 | { | ||
144 | struct dma_mapping_ops *dma_ops = get_dma_ops(dev); | ||
145 | |||
146 | if (dma_ops) | ||
147 | dma_ops->unmap_sg(dev, sg, nhwentries, direction); | ||
148 | else | ||
149 | BUG(); | ||
150 | } | ||
151 | EXPORT_SYMBOL(dma_unmap_sg); | ||
diff --git a/arch/powerpc/kernel/iomap.c b/arch/powerpc/kernel/iomap.c new file mode 100644 index 000000000000..6160c8dbb7c5 --- /dev/null +++ b/arch/powerpc/kernel/iomap.c | |||
@@ -0,0 +1,146 @@ | |||
1 | /* | ||
2 | * arch/ppc64/kernel/iomap.c | ||
3 | * | ||
4 | * ppc64 "iomap" interface implementation. | ||
5 | * | ||
6 | * (C) Copyright 2004 Linus Torvalds | ||
7 | */ | ||
8 | #include <linux/init.h> | ||
9 | #include <linux/pci.h> | ||
10 | #include <linux/mm.h> | ||
11 | #include <asm/io.h> | ||
12 | |||
13 | /* | ||
14 | * Here comes the ppc64 implementation of the IOMAP | ||
15 | * interfaces. | ||
16 | */ | ||
17 | unsigned int fastcall ioread8(void __iomem *addr) | ||
18 | { | ||
19 | return readb(addr); | ||
20 | } | ||
21 | unsigned int fastcall ioread16(void __iomem *addr) | ||
22 | { | ||
23 | return readw(addr); | ||
24 | } | ||
25 | unsigned int fastcall ioread16be(void __iomem *addr) | ||
26 | { | ||
27 | return in_be16(addr); | ||
28 | } | ||
29 | unsigned int fastcall ioread32(void __iomem *addr) | ||
30 | { | ||
31 | return readl(addr); | ||
32 | } | ||
33 | unsigned int fastcall ioread32be(void __iomem *addr) | ||
34 | { | ||
35 | return in_be32(addr); | ||
36 | } | ||
37 | EXPORT_SYMBOL(ioread8); | ||
38 | EXPORT_SYMBOL(ioread16); | ||
39 | EXPORT_SYMBOL(ioread16be); | ||
40 | EXPORT_SYMBOL(ioread32); | ||
41 | EXPORT_SYMBOL(ioread32be); | ||
42 | |||
43 | void fastcall iowrite8(u8 val, void __iomem *addr) | ||
44 | { | ||
45 | writeb(val, addr); | ||
46 | } | ||
47 | void fastcall iowrite16(u16 val, void __iomem *addr) | ||
48 | { | ||
49 | writew(val, addr); | ||
50 | } | ||
51 | void fastcall iowrite16be(u16 val, void __iomem *addr) | ||
52 | { | ||
53 | out_be16(addr, val); | ||
54 | } | ||
55 | void fastcall iowrite32(u32 val, void __iomem *addr) | ||
56 | { | ||
57 | writel(val, addr); | ||
58 | } | ||
59 | void fastcall iowrite32be(u32 val, void __iomem *addr) | ||
60 | { | ||
61 | out_be32(addr, val); | ||
62 | } | ||
63 | EXPORT_SYMBOL(iowrite8); | ||
64 | EXPORT_SYMBOL(iowrite16); | ||
65 | EXPORT_SYMBOL(iowrite16be); | ||
66 | EXPORT_SYMBOL(iowrite32); | ||
67 | EXPORT_SYMBOL(iowrite32be); | ||
68 | |||
69 | /* | ||
70 | * These are the "repeat read/write" functions. Note the | ||
71 | * non-CPU byte order. We do things in "IO byteorder" | ||
72 | * here. | ||
73 | * | ||
74 | * FIXME! We could make these do EEH handling if we really | ||
75 | * wanted. Not clear if we do. | ||
76 | */ | ||
77 | void ioread8_rep(void __iomem *addr, void *dst, unsigned long count) | ||
78 | { | ||
79 | _insb((u8 __iomem *) addr, dst, count); | ||
80 | } | ||
81 | void ioread16_rep(void __iomem *addr, void *dst, unsigned long count) | ||
82 | { | ||
83 | _insw_ns((u16 __iomem *) addr, dst, count); | ||
84 | } | ||
85 | void ioread32_rep(void __iomem *addr, void *dst, unsigned long count) | ||
86 | { | ||
87 | _insl_ns((u32 __iomem *) addr, dst, count); | ||
88 | } | ||
89 | EXPORT_SYMBOL(ioread8_rep); | ||
90 | EXPORT_SYMBOL(ioread16_rep); | ||
91 | EXPORT_SYMBOL(ioread32_rep); | ||
92 | |||
93 | void iowrite8_rep(void __iomem *addr, const void *src, unsigned long count) | ||
94 | { | ||
95 | _outsb((u8 __iomem *) addr, src, count); | ||
96 | } | ||
97 | void iowrite16_rep(void __iomem *addr, const void *src, unsigned long count) | ||
98 | { | ||
99 | _outsw_ns((u16 __iomem *) addr, src, count); | ||
100 | } | ||
101 | void iowrite32_rep(void __iomem *addr, const void *src, unsigned long count) | ||
102 | { | ||
103 | _outsl_ns((u32 __iomem *) addr, src, count); | ||
104 | } | ||
105 | EXPORT_SYMBOL(iowrite8_rep); | ||
106 | EXPORT_SYMBOL(iowrite16_rep); | ||
107 | EXPORT_SYMBOL(iowrite32_rep); | ||
108 | |||
109 | void __iomem *ioport_map(unsigned long port, unsigned int len) | ||
110 | { | ||
111 | if (!_IO_IS_VALID(port)) | ||
112 | return NULL; | ||
113 | return (void __iomem *) (port+pci_io_base); | ||
114 | } | ||
115 | |||
116 | void ioport_unmap(void __iomem *addr) | ||
117 | { | ||
118 | /* Nothing to do */ | ||
119 | } | ||
120 | EXPORT_SYMBOL(ioport_map); | ||
121 | EXPORT_SYMBOL(ioport_unmap); | ||
122 | |||
123 | void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max) | ||
124 | { | ||
125 | unsigned long start = pci_resource_start(dev, bar); | ||
126 | unsigned long len = pci_resource_len(dev, bar); | ||
127 | unsigned long flags = pci_resource_flags(dev, bar); | ||
128 | |||
129 | if (!len) | ||
130 | return NULL; | ||
131 | if (max && len > max) | ||
132 | len = max; | ||
133 | if (flags & IORESOURCE_IO) | ||
134 | return ioport_map(start, len); | ||
135 | if (flags & IORESOURCE_MEM) | ||
136 | return ioremap(start, len); | ||
137 | /* What? */ | ||
138 | return NULL; | ||
139 | } | ||
140 | |||
141 | void pci_iounmap(struct pci_dev *dev, void __iomem *addr) | ||
142 | { | ||
143 | /* Nothing to do */ | ||
144 | } | ||
145 | EXPORT_SYMBOL(pci_iomap); | ||
146 | EXPORT_SYMBOL(pci_iounmap); | ||
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c new file mode 100644 index 000000000000..4d9b4388918b --- /dev/null +++ b/arch/powerpc/kernel/iommu.c | |||
@@ -0,0 +1,572 @@ | |||
1 | /* | ||
2 | * arch/ppc64/kernel/iommu.c | ||
3 | * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation | ||
4 | * | ||
5 | * Rewrite, cleanup, new allocation schemes, virtual merging: | ||
6 | * Copyright (C) 2004 Olof Johansson, IBM Corporation | ||
7 | * and Ben. Herrenschmidt, IBM Corporation | ||
8 | * | ||
9 | * Dynamic DMA mapping support, bus-independent parts. | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify | ||
12 | * it under the terms of the GNU General Public License as published by | ||
13 | * the Free Software Foundation; either version 2 of the License, or | ||
14 | * (at your option) any later version. | ||
15 | * | ||
16 | * This program is distributed in the hope that it will be useful, | ||
17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
19 | * GNU General Public License for more details. | ||
20 | * | ||
21 | * You should have received a copy of the GNU General Public License | ||
22 | * along with this program; if not, write to the Free Software | ||
23 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
24 | */ | ||
25 | |||
26 | |||
27 | #include <linux/config.h> | ||
28 | #include <linux/init.h> | ||
29 | #include <linux/types.h> | ||
30 | #include <linux/slab.h> | ||
31 | #include <linux/mm.h> | ||
32 | #include <linux/spinlock.h> | ||
33 | #include <linux/string.h> | ||
34 | #include <linux/dma-mapping.h> | ||
35 | #include <linux/init.h> | ||
36 | #include <linux/bitops.h> | ||
37 | #include <asm/io.h> | ||
38 | #include <asm/prom.h> | ||
39 | #include <asm/iommu.h> | ||
40 | #include <asm/pci-bridge.h> | ||
41 | #include <asm/machdep.h> | ||
42 | |||
43 | #define DBG(...) | ||
44 | |||
45 | #ifdef CONFIG_IOMMU_VMERGE | ||
46 | static int novmerge = 0; | ||
47 | #else | ||
48 | static int novmerge = 1; | ||
49 | #endif | ||
50 | |||
51 | static int __init setup_iommu(char *str) | ||
52 | { | ||
53 | if (!strcmp(str, "novmerge")) | ||
54 | novmerge = 1; | ||
55 | else if (!strcmp(str, "vmerge")) | ||
56 | novmerge = 0; | ||
57 | return 1; | ||
58 | } | ||
59 | |||
60 | __setup("iommu=", setup_iommu); | ||
61 | |||
62 | static unsigned long iommu_range_alloc(struct iommu_table *tbl, | ||
63 | unsigned long npages, | ||
64 | unsigned long *handle, | ||
65 | unsigned int align_order) | ||
66 | { | ||
67 | unsigned long n, end, i, start; | ||
68 | unsigned long limit; | ||
69 | int largealloc = npages > 15; | ||
70 | int pass = 0; | ||
71 | unsigned long align_mask; | ||
72 | |||
73 | align_mask = 0xffffffffffffffffl >> (64 - align_order); | ||
74 | |||
75 | /* This allocator was derived from x86_64's bit string search */ | ||
76 | |||
77 | /* Sanity check */ | ||
78 | if (unlikely(npages) == 0) { | ||
79 | if (printk_ratelimit()) | ||
80 | WARN_ON(1); | ||
81 | return DMA_ERROR_CODE; | ||
82 | } | ||
83 | |||
84 | if (handle && *handle) | ||
85 | start = *handle; | ||
86 | else | ||
87 | start = largealloc ? tbl->it_largehint : tbl->it_hint; | ||
88 | |||
89 | /* Use only half of the table for small allocs (15 pages or less) */ | ||
90 | limit = largealloc ? tbl->it_size : tbl->it_halfpoint; | ||
91 | |||
92 | if (largealloc && start < tbl->it_halfpoint) | ||
93 | start = tbl->it_halfpoint; | ||
94 | |||
95 | /* The case below can happen if we have a small segment appended | ||
96 | * to a large, or when the previous alloc was at the very end of | ||
97 | * the available space. If so, go back to the initial start. | ||
98 | */ | ||
99 | if (start >= limit) | ||
100 | start = largealloc ? tbl->it_largehint : tbl->it_hint; | ||
101 | |||
102 | again: | ||
103 | |||
104 | n = find_next_zero_bit(tbl->it_map, limit, start); | ||
105 | |||
106 | /* Align allocation */ | ||
107 | n = (n + align_mask) & ~align_mask; | ||
108 | |||
109 | end = n + npages; | ||
110 | |||
111 | if (unlikely(end >= limit)) { | ||
112 | if (likely(pass < 2)) { | ||
113 | /* First failure, just rescan the half of the table. | ||
114 | * Second failure, rescan the other half of the table. | ||
115 | */ | ||
116 | start = (largealloc ^ pass) ? tbl->it_halfpoint : 0; | ||
117 | limit = pass ? tbl->it_size : limit; | ||
118 | pass++; | ||
119 | goto again; | ||
120 | } else { | ||
121 | /* Third failure, give up */ | ||
122 | return DMA_ERROR_CODE; | ||
123 | } | ||
124 | } | ||
125 | |||
126 | for (i = n; i < end; i++) | ||
127 | if (test_bit(i, tbl->it_map)) { | ||
128 | start = i+1; | ||
129 | goto again; | ||
130 | } | ||
131 | |||
132 | for (i = n; i < end; i++) | ||
133 | __set_bit(i, tbl->it_map); | ||
134 | |||
135 | /* Bump the hint to a new block for small allocs. */ | ||
136 | if (largealloc) { | ||
137 | /* Don't bump to new block to avoid fragmentation */ | ||
138 | tbl->it_largehint = end; | ||
139 | } else { | ||
140 | /* Overflow will be taken care of at the next allocation */ | ||
141 | tbl->it_hint = (end + tbl->it_blocksize - 1) & | ||
142 | ~(tbl->it_blocksize - 1); | ||
143 | } | ||
144 | |||
145 | /* Update handle for SG allocations */ | ||
146 | if (handle) | ||
147 | *handle = end; | ||
148 | |||
149 | return n; | ||
150 | } | ||
151 | |||
152 | static dma_addr_t iommu_alloc(struct iommu_table *tbl, void *page, | ||
153 | unsigned int npages, enum dma_data_direction direction, | ||
154 | unsigned int align_order) | ||
155 | { | ||
156 | unsigned long entry, flags; | ||
157 | dma_addr_t ret = DMA_ERROR_CODE; | ||
158 | |||
159 | spin_lock_irqsave(&(tbl->it_lock), flags); | ||
160 | |||
161 | entry = iommu_range_alloc(tbl, npages, NULL, align_order); | ||
162 | |||
163 | if (unlikely(entry == DMA_ERROR_CODE)) { | ||
164 | spin_unlock_irqrestore(&(tbl->it_lock), flags); | ||
165 | return DMA_ERROR_CODE; | ||
166 | } | ||
167 | |||
168 | entry += tbl->it_offset; /* Offset into real TCE table */ | ||
169 | ret = entry << PAGE_SHIFT; /* Set the return dma address */ | ||
170 | |||
171 | /* Put the TCEs in the HW table */ | ||
172 | ppc_md.tce_build(tbl, entry, npages, (unsigned long)page & PAGE_MASK, | ||
173 | direction); | ||
174 | |||
175 | |||
176 | /* Flush/invalidate TLB caches if necessary */ | ||
177 | if (ppc_md.tce_flush) | ||
178 | ppc_md.tce_flush(tbl); | ||
179 | |||
180 | spin_unlock_irqrestore(&(tbl->it_lock), flags); | ||
181 | |||
182 | /* Make sure updates are seen by hardware */ | ||
183 | mb(); | ||
184 | |||
185 | return ret; | ||
186 | } | ||
187 | |||
188 | static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, | ||
189 | unsigned int npages) | ||
190 | { | ||
191 | unsigned long entry, free_entry; | ||
192 | unsigned long i; | ||
193 | |||
194 | entry = dma_addr >> PAGE_SHIFT; | ||
195 | free_entry = entry - tbl->it_offset; | ||
196 | |||
197 | if (((free_entry + npages) > tbl->it_size) || | ||
198 | (entry < tbl->it_offset)) { | ||
199 | if (printk_ratelimit()) { | ||
200 | printk(KERN_INFO "iommu_free: invalid entry\n"); | ||
201 | printk(KERN_INFO "\tentry = 0x%lx\n", entry); | ||
202 | printk(KERN_INFO "\tdma_addr = 0x%lx\n", (u64)dma_addr); | ||
203 | printk(KERN_INFO "\tTable = 0x%lx\n", (u64)tbl); | ||
204 | printk(KERN_INFO "\tbus# = 0x%lx\n", (u64)tbl->it_busno); | ||
205 | printk(KERN_INFO "\tsize = 0x%lx\n", (u64)tbl->it_size); | ||
206 | printk(KERN_INFO "\tstartOff = 0x%lx\n", (u64)tbl->it_offset); | ||
207 | printk(KERN_INFO "\tindex = 0x%lx\n", (u64)tbl->it_index); | ||
208 | WARN_ON(1); | ||
209 | } | ||
210 | return; | ||
211 | } | ||
212 | |||
213 | ppc_md.tce_free(tbl, entry, npages); | ||
214 | |||
215 | for (i = 0; i < npages; i++) | ||
216 | __clear_bit(free_entry+i, tbl->it_map); | ||
217 | } | ||
218 | |||
219 | static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, | ||
220 | unsigned int npages) | ||
221 | { | ||
222 | unsigned long flags; | ||
223 | |||
224 | spin_lock_irqsave(&(tbl->it_lock), flags); | ||
225 | |||
226 | __iommu_free(tbl, dma_addr, npages); | ||
227 | |||
228 | /* Make sure TLB cache is flushed if the HW needs it. We do | ||
229 | * not do an mb() here on purpose, it is not needed on any of | ||
230 | * the current platforms. | ||
231 | */ | ||
232 | if (ppc_md.tce_flush) | ||
233 | ppc_md.tce_flush(tbl); | ||
234 | |||
235 | spin_unlock_irqrestore(&(tbl->it_lock), flags); | ||
236 | } | ||
237 | |||
238 | int iommu_map_sg(struct device *dev, struct iommu_table *tbl, | ||
239 | struct scatterlist *sglist, int nelems, | ||
240 | enum dma_data_direction direction) | ||
241 | { | ||
242 | dma_addr_t dma_next = 0, dma_addr; | ||
243 | unsigned long flags; | ||
244 | struct scatterlist *s, *outs, *segstart; | ||
245 | int outcount, incount; | ||
246 | unsigned long handle; | ||
247 | |||
248 | BUG_ON(direction == DMA_NONE); | ||
249 | |||
250 | if ((nelems == 0) || !tbl) | ||
251 | return 0; | ||
252 | |||
253 | outs = s = segstart = &sglist[0]; | ||
254 | outcount = 1; | ||
255 | incount = nelems; | ||
256 | handle = 0; | ||
257 | |||
258 | /* Init first segment length for backout at failure */ | ||
259 | outs->dma_length = 0; | ||
260 | |||
261 | DBG("mapping %d elements:\n", nelems); | ||
262 | |||
263 | spin_lock_irqsave(&(tbl->it_lock), flags); | ||
264 | |||
265 | for (s = outs; nelems; nelems--, s++) { | ||
266 | unsigned long vaddr, npages, entry, slen; | ||
267 | |||
268 | slen = s->length; | ||
269 | /* Sanity check */ | ||
270 | if (slen == 0) { | ||
271 | dma_next = 0; | ||
272 | continue; | ||
273 | } | ||
274 | /* Allocate iommu entries for that segment */ | ||
275 | vaddr = (unsigned long)page_address(s->page) + s->offset; | ||
276 | npages = PAGE_ALIGN(vaddr + slen) - (vaddr & PAGE_MASK); | ||
277 | npages >>= PAGE_SHIFT; | ||
278 | entry = iommu_range_alloc(tbl, npages, &handle, 0); | ||
279 | |||
280 | DBG(" - vaddr: %lx, size: %lx\n", vaddr, slen); | ||
281 | |||
282 | /* Handle failure */ | ||
283 | if (unlikely(entry == DMA_ERROR_CODE)) { | ||
284 | if (printk_ratelimit()) | ||
285 | printk(KERN_INFO "iommu_alloc failed, tbl %p vaddr %lx" | ||
286 | " npages %lx\n", tbl, vaddr, npages); | ||
287 | goto failure; | ||
288 | } | ||
289 | |||
290 | /* Convert entry to a dma_addr_t */ | ||
291 | entry += tbl->it_offset; | ||
292 | dma_addr = entry << PAGE_SHIFT; | ||
293 | dma_addr |= s->offset; | ||
294 | |||
295 | DBG(" - %lx pages, entry: %lx, dma_addr: %lx\n", | ||
296 | npages, entry, dma_addr); | ||
297 | |||
298 | /* Insert into HW table */ | ||
299 | ppc_md.tce_build(tbl, entry, npages, vaddr & PAGE_MASK, direction); | ||
300 | |||
301 | /* If we are in an open segment, try merging */ | ||
302 | if (segstart != s) { | ||
303 | DBG(" - trying merge...\n"); | ||
304 | /* We cannot merge if: | ||
305 | * - allocated dma_addr isn't contiguous to previous allocation | ||
306 | */ | ||
307 | if (novmerge || (dma_addr != dma_next)) { | ||
308 | /* Can't merge: create a new segment */ | ||
309 | segstart = s; | ||
310 | outcount++; outs++; | ||
311 | DBG(" can't merge, new segment.\n"); | ||
312 | } else { | ||
313 | outs->dma_length += s->length; | ||
314 | DBG(" merged, new len: %lx\n", outs->dma_length); | ||
315 | } | ||
316 | } | ||
317 | |||
318 | if (segstart == s) { | ||
319 | /* This is a new segment, fill entries */ | ||
320 | DBG(" - filling new segment.\n"); | ||
321 | outs->dma_address = dma_addr; | ||
322 | outs->dma_length = slen; | ||
323 | } | ||
324 | |||
325 | /* Calculate next page pointer for contiguous check */ | ||
326 | dma_next = dma_addr + slen; | ||
327 | |||
328 | DBG(" - dma next is: %lx\n", dma_next); | ||
329 | } | ||
330 | |||
331 | /* Flush/invalidate TLB caches if necessary */ | ||
332 | if (ppc_md.tce_flush) | ||
333 | ppc_md.tce_flush(tbl); | ||
334 | |||
335 | spin_unlock_irqrestore(&(tbl->it_lock), flags); | ||
336 | |||
337 | /* Make sure updates are seen by hardware */ | ||
338 | mb(); | ||
339 | |||
340 | DBG("mapped %d elements:\n", outcount); | ||
341 | |||
342 | /* For the sake of iommu_unmap_sg, we clear out the length in the | ||
343 | * next entry of the sglist if we didn't fill the list completely | ||
344 | */ | ||
345 | if (outcount < incount) { | ||
346 | outs++; | ||
347 | outs->dma_address = DMA_ERROR_CODE; | ||
348 | outs->dma_length = 0; | ||
349 | } | ||
350 | return outcount; | ||
351 | |||
352 | failure: | ||
353 | for (s = &sglist[0]; s <= outs; s++) { | ||
354 | if (s->dma_length != 0) { | ||
355 | unsigned long vaddr, npages; | ||
356 | |||
357 | vaddr = s->dma_address & PAGE_MASK; | ||
358 | npages = (PAGE_ALIGN(s->dma_address + s->dma_length) - vaddr) | ||
359 | >> PAGE_SHIFT; | ||
360 | __iommu_free(tbl, vaddr, npages); | ||
361 | } | ||
362 | } | ||
363 | spin_unlock_irqrestore(&(tbl->it_lock), flags); | ||
364 | return 0; | ||
365 | } | ||
366 | |||
367 | |||
368 | void iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist, | ||
369 | int nelems, enum dma_data_direction direction) | ||
370 | { | ||
371 | unsigned long flags; | ||
372 | |||
373 | BUG_ON(direction == DMA_NONE); | ||
374 | |||
375 | if (!tbl) | ||
376 | return; | ||
377 | |||
378 | spin_lock_irqsave(&(tbl->it_lock), flags); | ||
379 | |||
380 | while (nelems--) { | ||
381 | unsigned int npages; | ||
382 | dma_addr_t dma_handle = sglist->dma_address; | ||
383 | |||
384 | if (sglist->dma_length == 0) | ||
385 | break; | ||
386 | npages = (PAGE_ALIGN(dma_handle + sglist->dma_length) | ||
387 | - (dma_handle & PAGE_MASK)) >> PAGE_SHIFT; | ||
388 | __iommu_free(tbl, dma_handle, npages); | ||
389 | sglist++; | ||
390 | } | ||
391 | |||
392 | /* Flush/invalidate TLBs if necessary. As for iommu_free(), we | ||
393 | * do not do an mb() here, the affected platforms do not need it | ||
394 | * when freeing. | ||
395 | */ | ||
396 | if (ppc_md.tce_flush) | ||
397 | ppc_md.tce_flush(tbl); | ||
398 | |||
399 | spin_unlock_irqrestore(&(tbl->it_lock), flags); | ||
400 | } | ||
401 | |||
402 | /* | ||
403 | * Build a iommu_table structure. This contains a bit map which | ||
404 | * is used to manage allocation of the tce space. | ||
405 | */ | ||
406 | struct iommu_table *iommu_init_table(struct iommu_table *tbl) | ||
407 | { | ||
408 | unsigned long sz; | ||
409 | static int welcomed = 0; | ||
410 | |||
411 | /* Set aside 1/4 of the table for large allocations. */ | ||
412 | tbl->it_halfpoint = tbl->it_size * 3 / 4; | ||
413 | |||
414 | /* number of bytes needed for the bitmap */ | ||
415 | sz = (tbl->it_size + 7) >> 3; | ||
416 | |||
417 | tbl->it_map = (unsigned long *)__get_free_pages(GFP_ATOMIC, get_order(sz)); | ||
418 | if (!tbl->it_map) | ||
419 | panic("iommu_init_table: Can't allocate %ld bytes\n", sz); | ||
420 | |||
421 | memset(tbl->it_map, 0, sz); | ||
422 | |||
423 | tbl->it_hint = 0; | ||
424 | tbl->it_largehint = tbl->it_halfpoint; | ||
425 | spin_lock_init(&tbl->it_lock); | ||
426 | |||
427 | /* Clear the hardware table in case firmware left allocations in it */ | ||
428 | ppc_md.tce_free(tbl, tbl->it_offset, tbl->it_size); | ||
429 | |||
430 | if (!welcomed) { | ||
431 | printk(KERN_INFO "IOMMU table initialized, virtual merging %s\n", | ||
432 | novmerge ? "disabled" : "enabled"); | ||
433 | welcomed = 1; | ||
434 | } | ||
435 | |||
436 | return tbl; | ||
437 | } | ||
438 | |||
439 | void iommu_free_table(struct device_node *dn) | ||
440 | { | ||
441 | struct pci_dn *pdn = dn->data; | ||
442 | struct iommu_table *tbl = pdn->iommu_table; | ||
443 | unsigned long bitmap_sz, i; | ||
444 | unsigned int order; | ||
445 | |||
446 | if (!tbl || !tbl->it_map) { | ||
447 | printk(KERN_ERR "%s: expected TCE map for %s\n", __FUNCTION__, | ||
448 | dn->full_name); | ||
449 | return; | ||
450 | } | ||
451 | |||
452 | /* verify that table contains no entries */ | ||
453 | /* it_size is in entries, and we're examining 64 at a time */ | ||
454 | for (i = 0; i < (tbl->it_size/64); i++) { | ||
455 | if (tbl->it_map[i] != 0) { | ||
456 | printk(KERN_WARNING "%s: Unexpected TCEs for %s\n", | ||
457 | __FUNCTION__, dn->full_name); | ||
458 | break; | ||
459 | } | ||
460 | } | ||
461 | |||
462 | /* calculate bitmap size in bytes */ | ||
463 | bitmap_sz = (tbl->it_size + 7) / 8; | ||
464 | |||
465 | /* free bitmap */ | ||
466 | order = get_order(bitmap_sz); | ||
467 | free_pages((unsigned long) tbl->it_map, order); | ||
468 | |||
469 | /* free table */ | ||
470 | kfree(tbl); | ||
471 | } | ||
472 | |||
473 | /* Creates TCEs for a user provided buffer. The user buffer must be | ||
474 | * contiguous real kernel storage (not vmalloc). The address of the buffer | ||
475 | * passed here is the kernel (virtual) address of the buffer. The buffer | ||
476 | * need not be page aligned, the dma_addr_t returned will point to the same | ||
477 | * byte within the page as vaddr. | ||
478 | */ | ||
479 | dma_addr_t iommu_map_single(struct iommu_table *tbl, void *vaddr, | ||
480 | size_t size, enum dma_data_direction direction) | ||
481 | { | ||
482 | dma_addr_t dma_handle = DMA_ERROR_CODE; | ||
483 | unsigned long uaddr; | ||
484 | unsigned int npages; | ||
485 | |||
486 | BUG_ON(direction == DMA_NONE); | ||
487 | |||
488 | uaddr = (unsigned long)vaddr; | ||
489 | npages = PAGE_ALIGN(uaddr + size) - (uaddr & PAGE_MASK); | ||
490 | npages >>= PAGE_SHIFT; | ||
491 | |||
492 | if (tbl) { | ||
493 | dma_handle = iommu_alloc(tbl, vaddr, npages, direction, 0); | ||
494 | if (dma_handle == DMA_ERROR_CODE) { | ||
495 | if (printk_ratelimit()) { | ||
496 | printk(KERN_INFO "iommu_alloc failed, " | ||
497 | "tbl %p vaddr %p npages %d\n", | ||
498 | tbl, vaddr, npages); | ||
499 | } | ||
500 | } else | ||
501 | dma_handle |= (uaddr & ~PAGE_MASK); | ||
502 | } | ||
503 | |||
504 | return dma_handle; | ||
505 | } | ||
506 | |||
507 | void iommu_unmap_single(struct iommu_table *tbl, dma_addr_t dma_handle, | ||
508 | size_t size, enum dma_data_direction direction) | ||
509 | { | ||
510 | BUG_ON(direction == DMA_NONE); | ||
511 | |||
512 | if (tbl) | ||
513 | iommu_free(tbl, dma_handle, (PAGE_ALIGN(dma_handle + size) - | ||
514 | (dma_handle & PAGE_MASK)) >> PAGE_SHIFT); | ||
515 | } | ||
516 | |||
517 | /* Allocates a contiguous real buffer and creates mappings over it. | ||
518 | * Returns the virtual address of the buffer and sets dma_handle | ||
519 | * to the dma address (mapping) of the first page. | ||
520 | */ | ||
521 | void *iommu_alloc_coherent(struct iommu_table *tbl, size_t size, | ||
522 | dma_addr_t *dma_handle, gfp_t flag) | ||
523 | { | ||
524 | void *ret = NULL; | ||
525 | dma_addr_t mapping; | ||
526 | unsigned int npages, order; | ||
527 | |||
528 | size = PAGE_ALIGN(size); | ||
529 | npages = size >> PAGE_SHIFT; | ||
530 | order = get_order(size); | ||
531 | |||
532 | /* | ||
533 | * Client asked for way too much space. This is checked later | ||
534 | * anyway. It is easier to debug here for the drivers than in | ||
535 | * the tce tables. | ||
536 | */ | ||
537 | if (order >= IOMAP_MAX_ORDER) { | ||
538 | printk("iommu_alloc_consistent size too large: 0x%lx\n", size); | ||
539 | return NULL; | ||
540 | } | ||
541 | |||
542 | if (!tbl) | ||
543 | return NULL; | ||
544 | |||
545 | /* Alloc enough pages (and possibly more) */ | ||
546 | ret = (void *)__get_free_pages(flag, order); | ||
547 | if (!ret) | ||
548 | return NULL; | ||
549 | memset(ret, 0, size); | ||
550 | |||
551 | /* Set up tces to cover the allocated range */ | ||
552 | mapping = iommu_alloc(tbl, ret, npages, DMA_BIDIRECTIONAL, order); | ||
553 | if (mapping == DMA_ERROR_CODE) { | ||
554 | free_pages((unsigned long)ret, order); | ||
555 | ret = NULL; | ||
556 | } else | ||
557 | *dma_handle = mapping; | ||
558 | return ret; | ||
559 | } | ||
560 | |||
561 | void iommu_free_coherent(struct iommu_table *tbl, size_t size, | ||
562 | void *vaddr, dma_addr_t dma_handle) | ||
563 | { | ||
564 | unsigned int npages; | ||
565 | |||
566 | if (tbl) { | ||
567 | size = PAGE_ALIGN(size); | ||
568 | npages = size >> PAGE_SHIFT; | ||
569 | iommu_free(tbl, dma_handle, npages); | ||
570 | free_pages((unsigned long)vaddr, get_order(size)); | ||
571 | } | ||
572 | } | ||
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c new file mode 100644 index 000000000000..511af54e6230 --- /dev/null +++ b/arch/powerpc/kernel/kprobes.c | |||
@@ -0,0 +1,459 @@ | |||
1 | /* | ||
2 | * Kernel Probes (KProbes) | ||
3 | * arch/ppc64/kernel/kprobes.c | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms of the GNU General Public License as published by | ||
7 | * the Free Software Foundation; either version 2 of the License, or | ||
8 | * (at your option) any later version. | ||
9 | * | ||
10 | * This program is distributed in the hope that it will be useful, | ||
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
13 | * GNU General Public License for more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program; if not, write to the Free Software | ||
17 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
18 | * | ||
19 | * Copyright (C) IBM Corporation, 2002, 2004 | ||
20 | * | ||
21 | * 2002-Oct Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel | ||
22 | * Probes initial implementation ( includes contributions from | ||
23 | * Rusty Russell). | ||
24 | * 2004-July Suparna Bhattacharya <suparna@in.ibm.com> added jumper probes | ||
25 | * interface to access function arguments. | ||
26 | * 2004-Nov Ananth N Mavinakayanahalli <ananth@in.ibm.com> kprobes port | ||
27 | * for PPC64 | ||
28 | */ | ||
29 | |||
30 | #include <linux/config.h> | ||
31 | #include <linux/kprobes.h> | ||
32 | #include <linux/ptrace.h> | ||
33 | #include <linux/preempt.h> | ||
34 | #include <asm/cacheflush.h> | ||
35 | #include <asm/kdebug.h> | ||
36 | #include <asm/sstep.h> | ||
37 | |||
38 | static DECLARE_MUTEX(kprobe_mutex); | ||
39 | DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; | ||
40 | DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); | ||
41 | |||
42 | int __kprobes arch_prepare_kprobe(struct kprobe *p) | ||
43 | { | ||
44 | int ret = 0; | ||
45 | kprobe_opcode_t insn = *p->addr; | ||
46 | |||
47 | if ((unsigned long)p->addr & 0x03) { | ||
48 | printk("Attempt to register kprobe at an unaligned address\n"); | ||
49 | ret = -EINVAL; | ||
50 | } else if (IS_MTMSRD(insn) || IS_RFID(insn)) { | ||
51 | printk("Cannot register a kprobe on rfid or mtmsrd\n"); | ||
52 | ret = -EINVAL; | ||
53 | } | ||
54 | |||
55 | /* insn must be on a special executable page on ppc64 */ | ||
56 | if (!ret) { | ||
57 | down(&kprobe_mutex); | ||
58 | p->ainsn.insn = get_insn_slot(); | ||
59 | up(&kprobe_mutex); | ||
60 | if (!p->ainsn.insn) | ||
61 | ret = -ENOMEM; | ||
62 | } | ||
63 | return ret; | ||
64 | } | ||
65 | |||
66 | void __kprobes arch_copy_kprobe(struct kprobe *p) | ||
67 | { | ||
68 | memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); | ||
69 | p->opcode = *p->addr; | ||
70 | } | ||
71 | |||
72 | void __kprobes arch_arm_kprobe(struct kprobe *p) | ||
73 | { | ||
74 | *p->addr = BREAKPOINT_INSTRUCTION; | ||
75 | flush_icache_range((unsigned long) p->addr, | ||
76 | (unsigned long) p->addr + sizeof(kprobe_opcode_t)); | ||
77 | } | ||
78 | |||
79 | void __kprobes arch_disarm_kprobe(struct kprobe *p) | ||
80 | { | ||
81 | *p->addr = p->opcode; | ||
82 | flush_icache_range((unsigned long) p->addr, | ||
83 | (unsigned long) p->addr + sizeof(kprobe_opcode_t)); | ||
84 | } | ||
85 | |||
86 | void __kprobes arch_remove_kprobe(struct kprobe *p) | ||
87 | { | ||
88 | down(&kprobe_mutex); | ||
89 | free_insn_slot(p->ainsn.insn); | ||
90 | up(&kprobe_mutex); | ||
91 | } | ||
92 | |||
93 | static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs) | ||
94 | { | ||
95 | kprobe_opcode_t insn = *p->ainsn.insn; | ||
96 | |||
97 | regs->msr |= MSR_SE; | ||
98 | |||
99 | /* single step inline if it is a trap variant */ | ||
100 | if (is_trap(insn)) | ||
101 | regs->nip = (unsigned long)p->addr; | ||
102 | else | ||
103 | regs->nip = (unsigned long)p->ainsn.insn; | ||
104 | } | ||
105 | |||
106 | static inline void save_previous_kprobe(struct kprobe_ctlblk *kcb) | ||
107 | { | ||
108 | kcb->prev_kprobe.kp = kprobe_running(); | ||
109 | kcb->prev_kprobe.status = kcb->kprobe_status; | ||
110 | kcb->prev_kprobe.saved_msr = kcb->kprobe_saved_msr; | ||
111 | } | ||
112 | |||
113 | static inline void restore_previous_kprobe(struct kprobe_ctlblk *kcb) | ||
114 | { | ||
115 | __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp; | ||
116 | kcb->kprobe_status = kcb->prev_kprobe.status; | ||
117 | kcb->kprobe_saved_msr = kcb->prev_kprobe.saved_msr; | ||
118 | } | ||
119 | |||
120 | static inline void set_current_kprobe(struct kprobe *p, struct pt_regs *regs, | ||
121 | struct kprobe_ctlblk *kcb) | ||
122 | { | ||
123 | __get_cpu_var(current_kprobe) = p; | ||
124 | kcb->kprobe_saved_msr = regs->msr; | ||
125 | } | ||
126 | |||
127 | /* Called with kretprobe_lock held */ | ||
128 | void __kprobes arch_prepare_kretprobe(struct kretprobe *rp, | ||
129 | struct pt_regs *regs) | ||
130 | { | ||
131 | struct kretprobe_instance *ri; | ||
132 | |||
133 | if ((ri = get_free_rp_inst(rp)) != NULL) { | ||
134 | ri->rp = rp; | ||
135 | ri->task = current; | ||
136 | ri->ret_addr = (kprobe_opcode_t *)regs->link; | ||
137 | |||
138 | /* Replace the return addr with trampoline addr */ | ||
139 | regs->link = (unsigned long)kretprobe_trampoline; | ||
140 | add_rp_inst(ri); | ||
141 | } else { | ||
142 | rp->nmissed++; | ||
143 | } | ||
144 | } | ||
145 | |||
146 | static inline int kprobe_handler(struct pt_regs *regs) | ||
147 | { | ||
148 | struct kprobe *p; | ||
149 | int ret = 0; | ||
150 | unsigned int *addr = (unsigned int *)regs->nip; | ||
151 | struct kprobe_ctlblk *kcb; | ||
152 | |||
153 | /* | ||
154 | * We don't want to be preempted for the entire | ||
155 | * duration of kprobe processing | ||
156 | */ | ||
157 | preempt_disable(); | ||
158 | kcb = get_kprobe_ctlblk(); | ||
159 | |||
160 | /* Check we're not actually recursing */ | ||
161 | if (kprobe_running()) { | ||
162 | p = get_kprobe(addr); | ||
163 | if (p) { | ||
164 | kprobe_opcode_t insn = *p->ainsn.insn; | ||
165 | if (kcb->kprobe_status == KPROBE_HIT_SS && | ||
166 | is_trap(insn)) { | ||
167 | regs->msr &= ~MSR_SE; | ||
168 | regs->msr |= kcb->kprobe_saved_msr; | ||
169 | goto no_kprobe; | ||
170 | } | ||
171 | /* We have reentered the kprobe_handler(), since | ||
172 | * another probe was hit while within the handler. | ||
173 | * We here save the original kprobes variables and | ||
174 | * just single step on the instruction of the new probe | ||
175 | * without calling any user handlers. | ||
176 | */ | ||
177 | save_previous_kprobe(kcb); | ||
178 | set_current_kprobe(p, regs, kcb); | ||
179 | kcb->kprobe_saved_msr = regs->msr; | ||
180 | p->nmissed++; | ||
181 | prepare_singlestep(p, regs); | ||
182 | kcb->kprobe_status = KPROBE_REENTER; | ||
183 | return 1; | ||
184 | } else { | ||
185 | p = __get_cpu_var(current_kprobe); | ||
186 | if (p->break_handler && p->break_handler(p, regs)) { | ||
187 | goto ss_probe; | ||
188 | } | ||
189 | } | ||
190 | goto no_kprobe; | ||
191 | } | ||
192 | |||
193 | p = get_kprobe(addr); | ||
194 | if (!p) { | ||
195 | if (*addr != BREAKPOINT_INSTRUCTION) { | ||
196 | /* | ||
197 | * PowerPC has multiple variants of the "trap" | ||
198 | * instruction. If the current instruction is a | ||
199 | * trap variant, it could belong to someone else | ||
200 | */ | ||
201 | kprobe_opcode_t cur_insn = *addr; | ||
202 | if (is_trap(cur_insn)) | ||
203 | goto no_kprobe; | ||
204 | /* | ||
205 | * The breakpoint instruction was removed right | ||
206 | * after we hit it. Another cpu has removed | ||
207 | * either a probepoint or a debugger breakpoint | ||
208 | * at this address. In either case, no further | ||
209 | * handling of this interrupt is appropriate. | ||
210 | */ | ||
211 | ret = 1; | ||
212 | } | ||
213 | /* Not one of ours: let kernel handle it */ | ||
214 | goto no_kprobe; | ||
215 | } | ||
216 | |||
217 | kcb->kprobe_status = KPROBE_HIT_ACTIVE; | ||
218 | set_current_kprobe(p, regs, kcb); | ||
219 | if (p->pre_handler && p->pre_handler(p, regs)) | ||
220 | /* handler has already set things up, so skip ss setup */ | ||
221 | return 1; | ||
222 | |||
223 | ss_probe: | ||
224 | prepare_singlestep(p, regs); | ||
225 | kcb->kprobe_status = KPROBE_HIT_SS; | ||
226 | return 1; | ||
227 | |||
228 | no_kprobe: | ||
229 | preempt_enable_no_resched(); | ||
230 | return ret; | ||
231 | } | ||
232 | |||
233 | /* | ||
234 | * Function return probe trampoline: | ||
235 | * - init_kprobes() establishes a probepoint here | ||
236 | * - When the probed function returns, this probe | ||
237 | * causes the handlers to fire | ||
238 | */ | ||
239 | void kretprobe_trampoline_holder(void) | ||
240 | { | ||
241 | asm volatile(".global kretprobe_trampoline\n" | ||
242 | "kretprobe_trampoline:\n" | ||
243 | "nop\n"); | ||
244 | } | ||
245 | |||
246 | /* | ||
247 | * Called when the probe at kretprobe trampoline is hit | ||
248 | */ | ||
249 | int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) | ||
250 | { | ||
251 | struct kretprobe_instance *ri = NULL; | ||
252 | struct hlist_head *head; | ||
253 | struct hlist_node *node, *tmp; | ||
254 | unsigned long flags, orig_ret_address = 0; | ||
255 | unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline; | ||
256 | |||
257 | spin_lock_irqsave(&kretprobe_lock, flags); | ||
258 | head = kretprobe_inst_table_head(current); | ||
259 | |||
260 | /* | ||
261 | * It is possible to have multiple instances associated with a given | ||
262 | * task either because an multiple functions in the call path | ||
263 | * have a return probe installed on them, and/or more then one return | ||
264 | * return probe was registered for a target function. | ||
265 | * | ||
266 | * We can handle this because: | ||
267 | * - instances are always inserted at the head of the list | ||
268 | * - when multiple return probes are registered for the same | ||
269 | * function, the first instance's ret_addr will point to the | ||
270 | * real return address, and all the rest will point to | ||
271 | * kretprobe_trampoline | ||
272 | */ | ||
273 | hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { | ||
274 | if (ri->task != current) | ||
275 | /* another task is sharing our hash bucket */ | ||
276 | continue; | ||
277 | |||
278 | if (ri->rp && ri->rp->handler) | ||
279 | ri->rp->handler(ri, regs); | ||
280 | |||
281 | orig_ret_address = (unsigned long)ri->ret_addr; | ||
282 | recycle_rp_inst(ri); | ||
283 | |||
284 | if (orig_ret_address != trampoline_address) | ||
285 | /* | ||
286 | * This is the real return address. Any other | ||
287 | * instances associated with this task are for | ||
288 | * other calls deeper on the call stack | ||
289 | */ | ||
290 | break; | ||
291 | } | ||
292 | |||
293 | BUG_ON(!orig_ret_address || (orig_ret_address == trampoline_address)); | ||
294 | regs->nip = orig_ret_address; | ||
295 | |||
296 | reset_current_kprobe(); | ||
297 | spin_unlock_irqrestore(&kretprobe_lock, flags); | ||
298 | preempt_enable_no_resched(); | ||
299 | |||
300 | /* | ||
301 | * By returning a non-zero value, we are telling | ||
302 | * kprobe_handler() that we don't want the post_handler | ||
303 | * to run (and have re-enabled preemption) | ||
304 | */ | ||
305 | return 1; | ||
306 | } | ||
307 | |||
308 | /* | ||
309 | * Called after single-stepping. p->addr is the address of the | ||
310 | * instruction whose first byte has been replaced by the "breakpoint" | ||
311 | * instruction. To avoid the SMP problems that can occur when we | ||
312 | * temporarily put back the original opcode to single-step, we | ||
313 | * single-stepped a copy of the instruction. The address of this | ||
314 | * copy is p->ainsn.insn. | ||
315 | */ | ||
316 | static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs) | ||
317 | { | ||
318 | int ret; | ||
319 | unsigned int insn = *p->ainsn.insn; | ||
320 | |||
321 | regs->nip = (unsigned long)p->addr; | ||
322 | ret = emulate_step(regs, insn); | ||
323 | if (ret == 0) | ||
324 | regs->nip = (unsigned long)p->addr + 4; | ||
325 | } | ||
326 | |||
327 | static inline int post_kprobe_handler(struct pt_regs *regs) | ||
328 | { | ||
329 | struct kprobe *cur = kprobe_running(); | ||
330 | struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); | ||
331 | |||
332 | if (!cur) | ||
333 | return 0; | ||
334 | |||
335 | if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) { | ||
336 | kcb->kprobe_status = KPROBE_HIT_SSDONE; | ||
337 | cur->post_handler(cur, regs, 0); | ||
338 | } | ||
339 | |||
340 | resume_execution(cur, regs); | ||
341 | regs->msr |= kcb->kprobe_saved_msr; | ||
342 | |||
343 | /*Restore back the original saved kprobes variables and continue. */ | ||
344 | if (kcb->kprobe_status == KPROBE_REENTER) { | ||
345 | restore_previous_kprobe(kcb); | ||
346 | goto out; | ||
347 | } | ||
348 | reset_current_kprobe(); | ||
349 | out: | ||
350 | preempt_enable_no_resched(); | ||
351 | |||
352 | /* | ||
353 | * if somebody else is singlestepping across a probe point, msr | ||
354 | * will have SE set, in which case, continue the remaining processing | ||
355 | * of do_debug, as if this is not a probe hit. | ||
356 | */ | ||
357 | if (regs->msr & MSR_SE) | ||
358 | return 0; | ||
359 | |||
360 | return 1; | ||
361 | } | ||
362 | |||
363 | static inline int kprobe_fault_handler(struct pt_regs *regs, int trapnr) | ||
364 | { | ||
365 | struct kprobe *cur = kprobe_running(); | ||
366 | struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); | ||
367 | |||
368 | if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr)) | ||
369 | return 1; | ||
370 | |||
371 | if (kcb->kprobe_status & KPROBE_HIT_SS) { | ||
372 | resume_execution(cur, regs); | ||
373 | regs->msr &= ~MSR_SE; | ||
374 | regs->msr |= kcb->kprobe_saved_msr; | ||
375 | |||
376 | reset_current_kprobe(); | ||
377 | preempt_enable_no_resched(); | ||
378 | } | ||
379 | return 0; | ||
380 | } | ||
381 | |||
382 | /* | ||
383 | * Wrapper routine to for handling exceptions. | ||
384 | */ | ||
385 | int __kprobes kprobe_exceptions_notify(struct notifier_block *self, | ||
386 | unsigned long val, void *data) | ||
387 | { | ||
388 | struct die_args *args = (struct die_args *)data; | ||
389 | int ret = NOTIFY_DONE; | ||
390 | |||
391 | switch (val) { | ||
392 | case DIE_BPT: | ||
393 | if (kprobe_handler(args->regs)) | ||
394 | ret = NOTIFY_STOP; | ||
395 | break; | ||
396 | case DIE_SSTEP: | ||
397 | if (post_kprobe_handler(args->regs)) | ||
398 | ret = NOTIFY_STOP; | ||
399 | break; | ||
400 | case DIE_PAGE_FAULT: | ||
401 | /* kprobe_running() needs smp_processor_id() */ | ||
402 | preempt_disable(); | ||
403 | if (kprobe_running() && | ||
404 | kprobe_fault_handler(args->regs, args->trapnr)) | ||
405 | ret = NOTIFY_STOP; | ||
406 | preempt_enable(); | ||
407 | break; | ||
408 | default: | ||
409 | break; | ||
410 | } | ||
411 | return ret; | ||
412 | } | ||
413 | |||
414 | int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) | ||
415 | { | ||
416 | struct jprobe *jp = container_of(p, struct jprobe, kp); | ||
417 | struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); | ||
418 | |||
419 | memcpy(&kcb->jprobe_saved_regs, regs, sizeof(struct pt_regs)); | ||
420 | |||
421 | /* setup return addr to the jprobe handler routine */ | ||
422 | regs->nip = (unsigned long)(((func_descr_t *)jp->entry)->entry); | ||
423 | regs->gpr[2] = (unsigned long)(((func_descr_t *)jp->entry)->toc); | ||
424 | |||
425 | return 1; | ||
426 | } | ||
427 | |||
428 | void __kprobes jprobe_return(void) | ||
429 | { | ||
430 | asm volatile("trap" ::: "memory"); | ||
431 | } | ||
432 | |||
433 | void __kprobes jprobe_return_end(void) | ||
434 | { | ||
435 | }; | ||
436 | |||
437 | int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) | ||
438 | { | ||
439 | struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); | ||
440 | |||
441 | /* | ||
442 | * FIXME - we should ideally be validating that we got here 'cos | ||
443 | * of the "trap" in jprobe_return() above, before restoring the | ||
444 | * saved regs... | ||
445 | */ | ||
446 | memcpy(regs, &kcb->jprobe_saved_regs, sizeof(struct pt_regs)); | ||
447 | preempt_enable_no_resched(); | ||
448 | return 1; | ||
449 | } | ||
450 | |||
451 | static struct kprobe trampoline_p = { | ||
452 | .addr = (kprobe_opcode_t *) &kretprobe_trampoline, | ||
453 | .pre_handler = trampoline_probe_handler | ||
454 | }; | ||
455 | |||
456 | int __init arch_init_kprobes(void) | ||
457 | { | ||
458 | return register_kprobe(&trampoline_p); | ||
459 | } | ||
diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c new file mode 100644 index 000000000000..97c51e452be7 --- /dev/null +++ b/arch/powerpc/kernel/machine_kexec_64.c | |||
@@ -0,0 +1,358 @@ | |||
1 | /* | ||
2 | * machine_kexec.c - handle transition of Linux booting another kernel | ||
3 | * | ||
4 | * Copyright (C) 2004-2005, IBM Corp. | ||
5 | * | ||
6 | * Created by: Milton D Miller II | ||
7 | * | ||
8 | * This source code is licensed under the GNU General Public License, | ||
9 | * Version 2. See the file COPYING for more details. | ||
10 | */ | ||
11 | |||
12 | |||
13 | #include <linux/cpumask.h> | ||
14 | #include <linux/kexec.h> | ||
15 | #include <linux/smp.h> | ||
16 | #include <linux/thread_info.h> | ||
17 | #include <linux/errno.h> | ||
18 | |||
19 | #include <asm/page.h> | ||
20 | #include <asm/current.h> | ||
21 | #include <asm/machdep.h> | ||
22 | #include <asm/cacheflush.h> | ||
23 | #include <asm/paca.h> | ||
24 | #include <asm/mmu.h> | ||
25 | #include <asm/sections.h> /* _end */ | ||
26 | #include <asm/prom.h> | ||
27 | #include <asm/smp.h> | ||
28 | |||
29 | #define HASH_GROUP_SIZE 0x80 /* size of each hash group, asm/mmu.h */ | ||
30 | |||
31 | /* Have this around till we move it into crash specific file */ | ||
32 | note_buf_t crash_notes[NR_CPUS]; | ||
33 | |||
34 | /* Dummy for now. Not sure if we need to have a crash shutdown in here | ||
35 | * and if what it will achieve. Letting it be now to compile the code | ||
36 | * in generic kexec environment | ||
37 | */ | ||
38 | void machine_crash_shutdown(struct pt_regs *regs) | ||
39 | { | ||
40 | /* do nothing right now */ | ||
41 | /* smp_relase_cpus() if we want smp on panic kernel */ | ||
42 | /* cpu_irq_down to isolate us until we are ready */ | ||
43 | } | ||
44 | |||
45 | int machine_kexec_prepare(struct kimage *image) | ||
46 | { | ||
47 | int i; | ||
48 | unsigned long begin, end; /* limits of segment */ | ||
49 | unsigned long low, high; /* limits of blocked memory range */ | ||
50 | struct device_node *node; | ||
51 | unsigned long *basep; | ||
52 | unsigned int *sizep; | ||
53 | |||
54 | if (!ppc_md.hpte_clear_all) | ||
55 | return -ENOENT; | ||
56 | |||
57 | /* | ||
58 | * Since we use the kernel fault handlers and paging code to | ||
59 | * handle the virtual mode, we must make sure no destination | ||
60 | * overlaps kernel static data or bss. | ||
61 | */ | ||
62 | for (i = 0; i < image->nr_segments; i++) | ||
63 | if (image->segment[i].mem < __pa(_end)) | ||
64 | return -ETXTBSY; | ||
65 | |||
66 | /* | ||
67 | * For non-LPAR, we absolutely can not overwrite the mmu hash | ||
68 | * table, since we are still using the bolted entries in it to | ||
69 | * do the copy. Check that here. | ||
70 | * | ||
71 | * It is safe if the end is below the start of the blocked | ||
72 | * region (end <= low), or if the beginning is after the | ||
73 | * end of the blocked region (begin >= high). Use the | ||
74 | * boolean identity !(a || b) === (!a && !b). | ||
75 | */ | ||
76 | if (htab_address) { | ||
77 | low = __pa(htab_address); | ||
78 | high = low + (htab_hash_mask + 1) * HASH_GROUP_SIZE; | ||
79 | |||
80 | for (i = 0; i < image->nr_segments; i++) { | ||
81 | begin = image->segment[i].mem; | ||
82 | end = begin + image->segment[i].memsz; | ||
83 | |||
84 | if ((begin < high) && (end > low)) | ||
85 | return -ETXTBSY; | ||
86 | } | ||
87 | } | ||
88 | |||
89 | /* We also should not overwrite the tce tables */ | ||
90 | for (node = of_find_node_by_type(NULL, "pci"); node != NULL; | ||
91 | node = of_find_node_by_type(node, "pci")) { | ||
92 | basep = (unsigned long *)get_property(node, "linux,tce-base", | ||
93 | NULL); | ||
94 | sizep = (unsigned int *)get_property(node, "linux,tce-size", | ||
95 | NULL); | ||
96 | if (basep == NULL || sizep == NULL) | ||
97 | continue; | ||
98 | |||
99 | low = *basep; | ||
100 | high = low + (*sizep); | ||
101 | |||
102 | for (i = 0; i < image->nr_segments; i++) { | ||
103 | begin = image->segment[i].mem; | ||
104 | end = begin + image->segment[i].memsz; | ||
105 | |||
106 | if ((begin < high) && (end > low)) | ||
107 | return -ETXTBSY; | ||
108 | } | ||
109 | } | ||
110 | |||
111 | return 0; | ||
112 | } | ||
113 | |||
114 | void machine_kexec_cleanup(struct kimage *image) | ||
115 | { | ||
116 | /* we do nothing in prepare that needs to be undone */ | ||
117 | } | ||
118 | |||
119 | #define IND_FLAGS (IND_DESTINATION | IND_INDIRECTION | IND_DONE | IND_SOURCE) | ||
120 | |||
121 | static void copy_segments(unsigned long ind) | ||
122 | { | ||
123 | unsigned long entry; | ||
124 | unsigned long *ptr; | ||
125 | void *dest; | ||
126 | void *addr; | ||
127 | |||
128 | /* | ||
129 | * We rely on kexec_load to create a lists that properly | ||
130 | * initializes these pointers before they are used. | ||
131 | * We will still crash if the list is wrong, but at least | ||
132 | * the compiler will be quiet. | ||
133 | */ | ||
134 | ptr = NULL; | ||
135 | dest = NULL; | ||
136 | |||
137 | for (entry = ind; !(entry & IND_DONE); entry = *ptr++) { | ||
138 | addr = __va(entry & PAGE_MASK); | ||
139 | |||
140 | switch (entry & IND_FLAGS) { | ||
141 | case IND_DESTINATION: | ||
142 | dest = addr; | ||
143 | break; | ||
144 | case IND_INDIRECTION: | ||
145 | ptr = addr; | ||
146 | break; | ||
147 | case IND_SOURCE: | ||
148 | copy_page(dest, addr); | ||
149 | dest += PAGE_SIZE; | ||
150 | } | ||
151 | } | ||
152 | } | ||
153 | |||
154 | void kexec_copy_flush(struct kimage *image) | ||
155 | { | ||
156 | long i, nr_segments = image->nr_segments; | ||
157 | struct kexec_segment ranges[KEXEC_SEGMENT_MAX]; | ||
158 | |||
159 | /* save the ranges on the stack to efficiently flush the icache */ | ||
160 | memcpy(ranges, image->segment, sizeof(ranges)); | ||
161 | |||
162 | /* | ||
163 | * After this call we may not use anything allocated in dynamic | ||
164 | * memory, including *image. | ||
165 | * | ||
166 | * Only globals and the stack are allowed. | ||
167 | */ | ||
168 | copy_segments(image->head); | ||
169 | |||
170 | /* | ||
171 | * we need to clear the icache for all dest pages sometime, | ||
172 | * including ones that were in place on the original copy | ||
173 | */ | ||
174 | for (i = 0; i < nr_segments; i++) | ||
175 | flush_icache_range(ranges[i].mem + KERNELBASE, | ||
176 | ranges[i].mem + KERNELBASE + | ||
177 | ranges[i].memsz); | ||
178 | } | ||
179 | |||
180 | #ifdef CONFIG_SMP | ||
181 | |||
182 | /* FIXME: we should schedule this function to be called on all cpus based | ||
183 | * on calling the interrupts, but we would like to call it off irq level | ||
184 | * so that the interrupt controller is clean. | ||
185 | */ | ||
186 | void kexec_smp_down(void *arg) | ||
187 | { | ||
188 | if (ppc_md.kexec_cpu_down) | ||
189 | ppc_md.kexec_cpu_down(0, 1); | ||
190 | |||
191 | local_irq_disable(); | ||
192 | kexec_smp_wait(); | ||
193 | /* NOTREACHED */ | ||
194 | } | ||
195 | |||
196 | static void kexec_prepare_cpus(void) | ||
197 | { | ||
198 | int my_cpu, i, notified=-1; | ||
199 | |||
200 | smp_call_function(kexec_smp_down, NULL, 0, /* wait */0); | ||
201 | my_cpu = get_cpu(); | ||
202 | |||
203 | /* check the others cpus are now down (via paca hw cpu id == -1) */ | ||
204 | for (i=0; i < NR_CPUS; i++) { | ||
205 | if (i == my_cpu) | ||
206 | continue; | ||
207 | |||
208 | while (paca[i].hw_cpu_id != -1) { | ||
209 | barrier(); | ||
210 | if (!cpu_possible(i)) { | ||
211 | printk("kexec: cpu %d hw_cpu_id %d is not" | ||
212 | " possible, ignoring\n", | ||
213 | i, paca[i].hw_cpu_id); | ||
214 | break; | ||
215 | } | ||
216 | if (!cpu_online(i)) { | ||
217 | /* Fixme: this can be spinning in | ||
218 | * pSeries_secondary_wait with a paca | ||
219 | * waiting for it to go online. | ||
220 | */ | ||
221 | printk("kexec: cpu %d hw_cpu_id %d is not" | ||
222 | " online, ignoring\n", | ||
223 | i, paca[i].hw_cpu_id); | ||
224 | break; | ||
225 | } | ||
226 | if (i != notified) { | ||
227 | printk( "kexec: waiting for cpu %d (physical" | ||
228 | " %d) to go down\n", | ||
229 | i, paca[i].hw_cpu_id); | ||
230 | notified = i; | ||
231 | } | ||
232 | } | ||
233 | } | ||
234 | |||
235 | /* after we tell the others to go down */ | ||
236 | if (ppc_md.kexec_cpu_down) | ||
237 | ppc_md.kexec_cpu_down(0, 0); | ||
238 | |||
239 | put_cpu(); | ||
240 | |||
241 | local_irq_disable(); | ||
242 | } | ||
243 | |||
244 | #else /* ! SMP */ | ||
245 | |||
246 | static void kexec_prepare_cpus(void) | ||
247 | { | ||
248 | /* | ||
249 | * move the secondarys to us so that we can copy | ||
250 | * the new kernel 0-0x100 safely | ||
251 | * | ||
252 | * do this if kexec in setup.c ? | ||
253 | * | ||
254 | * We need to release the cpus if we are ever going from an | ||
255 | * UP to an SMP kernel. | ||
256 | */ | ||
257 | smp_release_cpus(); | ||
258 | if (ppc_md.kexec_cpu_down) | ||
259 | ppc_md.kexec_cpu_down(0, 0); | ||
260 | local_irq_disable(); | ||
261 | } | ||
262 | |||
263 | #endif /* SMP */ | ||
264 | |||
265 | /* | ||
266 | * kexec thread structure and stack. | ||
267 | * | ||
268 | * We need to make sure that this is 16384-byte aligned due to the | ||
269 | * way process stacks are handled. It also must be statically allocated | ||
270 | * or allocated as part of the kimage, because everything else may be | ||
271 | * overwritten when we copy the kexec image. We piggyback on the | ||
272 | * "init_task" linker section here to statically allocate a stack. | ||
273 | * | ||
274 | * We could use a smaller stack if we don't care about anything using | ||
275 | * current, but that audit has not been performed. | ||
276 | */ | ||
277 | union thread_union kexec_stack | ||
278 | __attribute__((__section__(".data.init_task"))) = { }; | ||
279 | |||
280 | /* Our assembly helper, in kexec_stub.S */ | ||
281 | extern NORET_TYPE void kexec_sequence(void *newstack, unsigned long start, | ||
282 | void *image, void *control, | ||
283 | void (*clear_all)(void)) ATTRIB_NORET; | ||
284 | |||
285 | /* too late to fail here */ | ||
286 | void machine_kexec(struct kimage *image) | ||
287 | { | ||
288 | |||
289 | /* prepare control code if any */ | ||
290 | |||
291 | /* shutdown other cpus into our wait loop and quiesce interrupts */ | ||
292 | kexec_prepare_cpus(); | ||
293 | |||
294 | /* switch to a staticly allocated stack. Based on irq stack code. | ||
295 | * XXX: the task struct will likely be invalid once we do the copy! | ||
296 | */ | ||
297 | kexec_stack.thread_info.task = current_thread_info()->task; | ||
298 | kexec_stack.thread_info.flags = 0; | ||
299 | |||
300 | /* Some things are best done in assembly. Finding globals with | ||
301 | * a toc is easier in C, so pass in what we can. | ||
302 | */ | ||
303 | kexec_sequence(&kexec_stack, image->start, image, | ||
304 | page_address(image->control_code_page), | ||
305 | ppc_md.hpte_clear_all); | ||
306 | /* NOTREACHED */ | ||
307 | } | ||
308 | |||
309 | /* Values we need to export to the second kernel via the device tree. */ | ||
310 | static unsigned long htab_base, htab_size, kernel_end; | ||
311 | |||
312 | static struct property htab_base_prop = { | ||
313 | .name = "linux,htab-base", | ||
314 | .length = sizeof(unsigned long), | ||
315 | .value = (unsigned char *)&htab_base, | ||
316 | }; | ||
317 | |||
318 | static struct property htab_size_prop = { | ||
319 | .name = "linux,htab-size", | ||
320 | .length = sizeof(unsigned long), | ||
321 | .value = (unsigned char *)&htab_size, | ||
322 | }; | ||
323 | |||
324 | static struct property kernel_end_prop = { | ||
325 | .name = "linux,kernel-end", | ||
326 | .length = sizeof(unsigned long), | ||
327 | .value = (unsigned char *)&kernel_end, | ||
328 | }; | ||
329 | |||
330 | static void __init export_htab_values(void) | ||
331 | { | ||
332 | struct device_node *node; | ||
333 | |||
334 | node = of_find_node_by_path("/chosen"); | ||
335 | if (!node) | ||
336 | return; | ||
337 | |||
338 | kernel_end = __pa(_end); | ||
339 | prom_add_property(node, &kernel_end_prop); | ||
340 | |||
341 | /* On machines with no htab htab_address is NULL */ | ||
342 | if (NULL == htab_address) | ||
343 | goto out; | ||
344 | |||
345 | htab_base = __pa(htab_address); | ||
346 | prom_add_property(node, &htab_base_prop); | ||
347 | |||
348 | htab_size = 1UL << ppc64_pft_size; | ||
349 | prom_add_property(node, &htab_size_prop); | ||
350 | |||
351 | out: | ||
352 | of_node_put(node); | ||
353 | } | ||
354 | |||
355 | void __init kexec_setup(void) | ||
356 | { | ||
357 | export_htab_values(); | ||
358 | } | ||
diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c new file mode 100644 index 000000000000..928b8581fcb0 --- /dev/null +++ b/arch/powerpc/kernel/module_64.c | |||
@@ -0,0 +1,455 @@ | |||
1 | /* Kernel module help for PPC64. | ||
2 | Copyright (C) 2001, 2003 Rusty Russell IBM Corporation. | ||
3 | |||
4 | This program is free software; you can redistribute it and/or modify | ||
5 | it under the terms of the GNU General Public License as published by | ||
6 | the Free Software Foundation; either version 2 of the License, or | ||
7 | (at your option) any later version. | ||
8 | |||
9 | This program is distributed in the hope that it will be useful, | ||
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | GNU General Public License for more details. | ||
13 | |||
14 | You should have received a copy of the GNU General Public License | ||
15 | along with this program; if not, write to the Free Software | ||
16 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
17 | */ | ||
18 | #include <linux/module.h> | ||
19 | #include <linux/elf.h> | ||
20 | #include <linux/moduleloader.h> | ||
21 | #include <linux/err.h> | ||
22 | #include <linux/vmalloc.h> | ||
23 | #include <asm/module.h> | ||
24 | #include <asm/uaccess.h> | ||
25 | |||
26 | /* FIXME: We don't do .init separately. To do this, we'd need to have | ||
27 | a separate r2 value in the init and core section, and stub between | ||
28 | them, too. | ||
29 | |||
30 | Using a magic allocator which places modules within 32MB solves | ||
31 | this, and makes other things simpler. Anton? | ||
32 | --RR. */ | ||
33 | #if 0 | ||
34 | #define DEBUGP printk | ||
35 | #else | ||
36 | #define DEBUGP(fmt , ...) | ||
37 | #endif | ||
38 | |||
39 | /* There's actually a third entry here, but it's unused */ | ||
40 | struct ppc64_opd_entry | ||
41 | { | ||
42 | unsigned long funcaddr; | ||
43 | unsigned long r2; | ||
44 | }; | ||
45 | |||
46 | /* Like PPC32, we need little trampolines to do > 24-bit jumps (into | ||
47 | the kernel itself). But on PPC64, these need to be used for every | ||
48 | jump, actually, to reset r2 (TOC+0x8000). */ | ||
49 | struct ppc64_stub_entry | ||
50 | { | ||
51 | /* 28 byte jump instruction sequence (7 instructions) */ | ||
52 | unsigned char jump[28]; | ||
53 | unsigned char unused[4]; | ||
54 | /* Data for the above code */ | ||
55 | struct ppc64_opd_entry opd; | ||
56 | }; | ||
57 | |||
58 | /* We use a stub to fix up r2 (TOC ptr) and to jump to the (external) | ||
59 | function which may be more than 24-bits away. We could simply | ||
60 | patch the new r2 value and function pointer into the stub, but it's | ||
61 | significantly shorter to put these values at the end of the stub | ||
62 | code, and patch the stub address (32-bits relative to the TOC ptr, | ||
63 | r2) into the stub. */ | ||
64 | static struct ppc64_stub_entry ppc64_stub = | ||
65 | { .jump = { | ||
66 | 0x3d, 0x82, 0x00, 0x00, /* addis r12,r2, <high> */ | ||
67 | 0x39, 0x8c, 0x00, 0x00, /* addi r12,r12, <low> */ | ||
68 | /* Save current r2 value in magic place on the stack. */ | ||
69 | 0xf8, 0x41, 0x00, 0x28, /* std r2,40(r1) */ | ||
70 | 0xe9, 0x6c, 0x00, 0x20, /* ld r11,32(r12) */ | ||
71 | 0xe8, 0x4c, 0x00, 0x28, /* ld r2,40(r12) */ | ||
72 | 0x7d, 0x69, 0x03, 0xa6, /* mtctr r11 */ | ||
73 | 0x4e, 0x80, 0x04, 0x20 /* bctr */ | ||
74 | } }; | ||
75 | |||
76 | /* Count how many different 24-bit relocations (different symbol, | ||
77 | different addend) */ | ||
78 | static unsigned int count_relocs(const Elf64_Rela *rela, unsigned int num) | ||
79 | { | ||
80 | unsigned int i, j, ret = 0; | ||
81 | |||
82 | /* FIXME: Only count external ones --RR */ | ||
83 | /* Sure, this is order(n^2), but it's usually short, and not | ||
84 | time critical */ | ||
85 | for (i = 0; i < num; i++) { | ||
86 | /* Only count 24-bit relocs, others don't need stubs */ | ||
87 | if (ELF64_R_TYPE(rela[i].r_info) != R_PPC_REL24) | ||
88 | continue; | ||
89 | for (j = 0; j < i; j++) { | ||
90 | /* If this addend appeared before, it's | ||
91 | already been counted */ | ||
92 | if (rela[i].r_info == rela[j].r_info | ||
93 | && rela[i].r_addend == rela[j].r_addend) | ||
94 | break; | ||
95 | } | ||
96 | if (j == i) ret++; | ||
97 | } | ||
98 | return ret; | ||
99 | } | ||
100 | |||
101 | void *module_alloc(unsigned long size) | ||
102 | { | ||
103 | if (size == 0) | ||
104 | return NULL; | ||
105 | |||
106 | return vmalloc_exec(size); | ||
107 | } | ||
108 | |||
109 | /* Free memory returned from module_alloc */ | ||
110 | void module_free(struct module *mod, void *module_region) | ||
111 | { | ||
112 | vfree(module_region); | ||
113 | /* FIXME: If module_region == mod->init_region, trim exception | ||
114 | table entries. */ | ||
115 | } | ||
116 | |||
117 | /* Get size of potential trampolines required. */ | ||
118 | static unsigned long get_stubs_size(const Elf64_Ehdr *hdr, | ||
119 | const Elf64_Shdr *sechdrs) | ||
120 | { | ||
121 | /* One extra reloc so it's always 0-funcaddr terminated */ | ||
122 | unsigned long relocs = 1; | ||
123 | unsigned i; | ||
124 | |||
125 | /* Every relocated section... */ | ||
126 | for (i = 1; i < hdr->e_shnum; i++) { | ||
127 | if (sechdrs[i].sh_type == SHT_RELA) { | ||
128 | DEBUGP("Found relocations in section %u\n", i); | ||
129 | DEBUGP("Ptr: %p. Number: %lu\n", | ||
130 | (void *)sechdrs[i].sh_addr, | ||
131 | sechdrs[i].sh_size / sizeof(Elf64_Rela)); | ||
132 | relocs += count_relocs((void *)sechdrs[i].sh_addr, | ||
133 | sechdrs[i].sh_size | ||
134 | / sizeof(Elf64_Rela)); | ||
135 | } | ||
136 | } | ||
137 | |||
138 | DEBUGP("Looks like a total of %lu stubs, max\n", relocs); | ||
139 | return relocs * sizeof(struct ppc64_stub_entry); | ||
140 | } | ||
141 | |||
142 | static void dedotify_versions(struct modversion_info *vers, | ||
143 | unsigned long size) | ||
144 | { | ||
145 | struct modversion_info *end; | ||
146 | |||
147 | for (end = (void *)vers + size; vers < end; vers++) | ||
148 | if (vers->name[0] == '.') | ||
149 | memmove(vers->name, vers->name+1, strlen(vers->name)); | ||
150 | } | ||
151 | |||
152 | /* Undefined symbols which refer to .funcname, hack to funcname */ | ||
153 | static void dedotify(Elf64_Sym *syms, unsigned int numsyms, char *strtab) | ||
154 | { | ||
155 | unsigned int i; | ||
156 | |||
157 | for (i = 1; i < numsyms; i++) { | ||
158 | if (syms[i].st_shndx == SHN_UNDEF) { | ||
159 | char *name = strtab + syms[i].st_name; | ||
160 | if (name[0] == '.') | ||
161 | memmove(name, name+1, strlen(name)); | ||
162 | } | ||
163 | } | ||
164 | } | ||
165 | |||
166 | int module_frob_arch_sections(Elf64_Ehdr *hdr, | ||
167 | Elf64_Shdr *sechdrs, | ||
168 | char *secstrings, | ||
169 | struct module *me) | ||
170 | { | ||
171 | unsigned int i; | ||
172 | |||
173 | /* Find .toc and .stubs sections, symtab and strtab */ | ||
174 | for (i = 1; i < hdr->e_shnum; i++) { | ||
175 | char *p; | ||
176 | if (strcmp(secstrings + sechdrs[i].sh_name, ".stubs") == 0) | ||
177 | me->arch.stubs_section = i; | ||
178 | else if (strcmp(secstrings + sechdrs[i].sh_name, ".toc") == 0) | ||
179 | me->arch.toc_section = i; | ||
180 | else if (strcmp(secstrings+sechdrs[i].sh_name,"__versions")==0) | ||
181 | dedotify_versions((void *)hdr + sechdrs[i].sh_offset, | ||
182 | sechdrs[i].sh_size); | ||
183 | |||
184 | /* We don't handle .init for the moment: rename to _init */ | ||
185 | while ((p = strstr(secstrings + sechdrs[i].sh_name, ".init"))) | ||
186 | p[0] = '_'; | ||
187 | |||
188 | if (sechdrs[i].sh_type == SHT_SYMTAB) | ||
189 | dedotify((void *)hdr + sechdrs[i].sh_offset, | ||
190 | sechdrs[i].sh_size / sizeof(Elf64_Sym), | ||
191 | (void *)hdr | ||
192 | + sechdrs[sechdrs[i].sh_link].sh_offset); | ||
193 | } | ||
194 | if (!me->arch.stubs_section || !me->arch.toc_section) { | ||
195 | printk("%s: doesn't contain .toc or .stubs.\n", me->name); | ||
196 | return -ENOEXEC; | ||
197 | } | ||
198 | |||
199 | /* Override the stubs size */ | ||
200 | sechdrs[me->arch.stubs_section].sh_size = get_stubs_size(hdr, sechdrs); | ||
201 | return 0; | ||
202 | } | ||
203 | |||
204 | int apply_relocate(Elf64_Shdr *sechdrs, | ||
205 | const char *strtab, | ||
206 | unsigned int symindex, | ||
207 | unsigned int relsec, | ||
208 | struct module *me) | ||
209 | { | ||
210 | printk(KERN_ERR "%s: Non-ADD RELOCATION unsupported\n", me->name); | ||
211 | return -ENOEXEC; | ||
212 | } | ||
213 | |||
214 | /* r2 is the TOC pointer: it actually points 0x8000 into the TOC (this | ||
215 | gives the value maximum span in an instruction which uses a signed | ||
216 | offset) */ | ||
217 | static inline unsigned long my_r2(Elf64_Shdr *sechdrs, struct module *me) | ||
218 | { | ||
219 | return sechdrs[me->arch.toc_section].sh_addr + 0x8000; | ||
220 | } | ||
221 | |||
222 | /* Both low and high 16 bits are added as SIGNED additions, so if low | ||
223 | 16 bits has high bit set, high 16 bits must be adjusted. These | ||
224 | macros do that (stolen from binutils). */ | ||
225 | #define PPC_LO(v) ((v) & 0xffff) | ||
226 | #define PPC_HI(v) (((v) >> 16) & 0xffff) | ||
227 | #define PPC_HA(v) PPC_HI ((v) + 0x8000) | ||
228 | |||
229 | /* Patch stub to reference function and correct r2 value. */ | ||
230 | static inline int create_stub(Elf64_Shdr *sechdrs, | ||
231 | struct ppc64_stub_entry *entry, | ||
232 | struct ppc64_opd_entry *opd, | ||
233 | struct module *me) | ||
234 | { | ||
235 | Elf64_Half *loc1, *loc2; | ||
236 | long reladdr; | ||
237 | |||
238 | *entry = ppc64_stub; | ||
239 | |||
240 | loc1 = (Elf64_Half *)&entry->jump[2]; | ||
241 | loc2 = (Elf64_Half *)&entry->jump[6]; | ||
242 | |||
243 | /* Stub uses address relative to r2. */ | ||
244 | reladdr = (unsigned long)entry - my_r2(sechdrs, me); | ||
245 | if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) { | ||
246 | printk("%s: Address %p of stub out of range of %p.\n", | ||
247 | me->name, (void *)reladdr, (void *)my_r2); | ||
248 | return 0; | ||
249 | } | ||
250 | DEBUGP("Stub %p get data from reladdr %li\n", entry, reladdr); | ||
251 | |||
252 | *loc1 = PPC_HA(reladdr); | ||
253 | *loc2 = PPC_LO(reladdr); | ||
254 | entry->opd.funcaddr = opd->funcaddr; | ||
255 | entry->opd.r2 = opd->r2; | ||
256 | return 1; | ||
257 | } | ||
258 | |||
259 | /* Create stub to jump to function described in this OPD: we need the | ||
260 | stub to set up the TOC ptr (r2) for the function. */ | ||
261 | static unsigned long stub_for_addr(Elf64_Shdr *sechdrs, | ||
262 | unsigned long opdaddr, | ||
263 | struct module *me) | ||
264 | { | ||
265 | struct ppc64_stub_entry *stubs; | ||
266 | struct ppc64_opd_entry *opd = (void *)opdaddr; | ||
267 | unsigned int i, num_stubs; | ||
268 | |||
269 | num_stubs = sechdrs[me->arch.stubs_section].sh_size / sizeof(*stubs); | ||
270 | |||
271 | /* Find this stub, or if that fails, the next avail. entry */ | ||
272 | stubs = (void *)sechdrs[me->arch.stubs_section].sh_addr; | ||
273 | for (i = 0; stubs[i].opd.funcaddr; i++) { | ||
274 | BUG_ON(i >= num_stubs); | ||
275 | |||
276 | if (stubs[i].opd.funcaddr == opd->funcaddr) | ||
277 | return (unsigned long)&stubs[i]; | ||
278 | } | ||
279 | |||
280 | if (!create_stub(sechdrs, &stubs[i], opd, me)) | ||
281 | return 0; | ||
282 | |||
283 | return (unsigned long)&stubs[i]; | ||
284 | } | ||
285 | |||
286 | /* We expect a noop next: if it is, replace it with instruction to | ||
287 | restore r2. */ | ||
288 | static int restore_r2(u32 *instruction, struct module *me) | ||
289 | { | ||
290 | if (*instruction != 0x60000000) { | ||
291 | printk("%s: Expect noop after relocate, got %08x\n", | ||
292 | me->name, *instruction); | ||
293 | return 0; | ||
294 | } | ||
295 | *instruction = 0xe8410028; /* ld r2,40(r1) */ | ||
296 | return 1; | ||
297 | } | ||
298 | |||
299 | int apply_relocate_add(Elf64_Shdr *sechdrs, | ||
300 | const char *strtab, | ||
301 | unsigned int symindex, | ||
302 | unsigned int relsec, | ||
303 | struct module *me) | ||
304 | { | ||
305 | unsigned int i; | ||
306 | Elf64_Rela *rela = (void *)sechdrs[relsec].sh_addr; | ||
307 | Elf64_Sym *sym; | ||
308 | unsigned long *location; | ||
309 | unsigned long value; | ||
310 | |||
311 | DEBUGP("Applying ADD relocate section %u to %u\n", relsec, | ||
312 | sechdrs[relsec].sh_info); | ||
313 | for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rela); i++) { | ||
314 | /* This is where to make the change */ | ||
315 | location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr | ||
316 | + rela[i].r_offset; | ||
317 | /* This is the symbol it is referring to */ | ||
318 | sym = (Elf64_Sym *)sechdrs[symindex].sh_addr | ||
319 | + ELF64_R_SYM(rela[i].r_info); | ||
320 | |||
321 | DEBUGP("RELOC at %p: %li-type as %s (%lu) + %li\n", | ||
322 | location, (long)ELF64_R_TYPE(rela[i].r_info), | ||
323 | strtab + sym->st_name, (unsigned long)sym->st_value, | ||
324 | (long)rela[i].r_addend); | ||
325 | |||
326 | /* `Everything is relative'. */ | ||
327 | value = sym->st_value + rela[i].r_addend; | ||
328 | |||
329 | switch (ELF64_R_TYPE(rela[i].r_info)) { | ||
330 | case R_PPC64_ADDR32: | ||
331 | /* Simply set it */ | ||
332 | *(u32 *)location = value; | ||
333 | break; | ||
334 | |||
335 | case R_PPC64_ADDR64: | ||
336 | /* Simply set it */ | ||
337 | *(unsigned long *)location = value; | ||
338 | break; | ||
339 | |||
340 | case R_PPC64_TOC: | ||
341 | *(unsigned long *)location = my_r2(sechdrs, me); | ||
342 | break; | ||
343 | |||
344 | case R_PPC64_TOC16: | ||
345 | /* Subtact TOC pointer */ | ||
346 | value -= my_r2(sechdrs, me); | ||
347 | if (value + 0x8000 > 0xffff) { | ||
348 | printk("%s: bad TOC16 relocation (%lu)\n", | ||
349 | me->name, value); | ||
350 | return -ENOEXEC; | ||
351 | } | ||
352 | *((uint16_t *) location) | ||
353 | = (*((uint16_t *) location) & ~0xffff) | ||
354 | | (value & 0xffff); | ||
355 | break; | ||
356 | |||
357 | case R_PPC64_TOC16_DS: | ||
358 | /* Subtact TOC pointer */ | ||
359 | value -= my_r2(sechdrs, me); | ||
360 | if ((value & 3) != 0 || value + 0x8000 > 0xffff) { | ||
361 | printk("%s: bad TOC16_DS relocation (%lu)\n", | ||
362 | me->name, value); | ||
363 | return -ENOEXEC; | ||
364 | } | ||
365 | *((uint16_t *) location) | ||
366 | = (*((uint16_t *) location) & ~0xfffc) | ||
367 | | (value & 0xfffc); | ||
368 | break; | ||
369 | |||
370 | case R_PPC_REL24: | ||
371 | /* FIXME: Handle weak symbols here --RR */ | ||
372 | if (sym->st_shndx == SHN_UNDEF) { | ||
373 | /* External: go via stub */ | ||
374 | value = stub_for_addr(sechdrs, value, me); | ||
375 | if (!value) | ||
376 | return -ENOENT; | ||
377 | if (!restore_r2((u32 *)location + 1, me)) | ||
378 | return -ENOEXEC; | ||
379 | } | ||
380 | |||
381 | /* Convert value to relative */ | ||
382 | value -= (unsigned long)location; | ||
383 | if (value + 0x2000000 > 0x3ffffff || (value & 3) != 0){ | ||
384 | printk("%s: REL24 %li out of range!\n", | ||
385 | me->name, (long int)value); | ||
386 | return -ENOEXEC; | ||
387 | } | ||
388 | |||
389 | /* Only replace bits 2 through 26 */ | ||
390 | *(uint32_t *)location | ||
391 | = (*(uint32_t *)location & ~0x03fffffc) | ||
392 | | (value & 0x03fffffc); | ||
393 | break; | ||
394 | |||
395 | default: | ||
396 | printk("%s: Unknown ADD relocation: %lu\n", | ||
397 | me->name, | ||
398 | (unsigned long)ELF64_R_TYPE(rela[i].r_info)); | ||
399 | return -ENOEXEC; | ||
400 | } | ||
401 | } | ||
402 | |||
403 | return 0; | ||
404 | } | ||
405 | |||
406 | LIST_HEAD(module_bug_list); | ||
407 | |||
408 | int module_finalize(const Elf_Ehdr *hdr, | ||
409 | const Elf_Shdr *sechdrs, struct module *me) | ||
410 | { | ||
411 | char *secstrings; | ||
412 | unsigned int i; | ||
413 | |||
414 | me->arch.bug_table = NULL; | ||
415 | me->arch.num_bugs = 0; | ||
416 | |||
417 | /* Find the __bug_table section, if present */ | ||
418 | secstrings = (char *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; | ||
419 | for (i = 1; i < hdr->e_shnum; i++) { | ||
420 | if (strcmp(secstrings+sechdrs[i].sh_name, "__bug_table")) | ||
421 | continue; | ||
422 | me->arch.bug_table = (void *) sechdrs[i].sh_addr; | ||
423 | me->arch.num_bugs = sechdrs[i].sh_size / sizeof(struct bug_entry); | ||
424 | break; | ||
425 | } | ||
426 | |||
427 | /* | ||
428 | * Strictly speaking this should have a spinlock to protect against | ||
429 | * traversals, but since we only traverse on BUG()s, a spinlock | ||
430 | * could potentially lead to deadlock and thus be counter-productive. | ||
431 | */ | ||
432 | list_add(&me->arch.bug_list, &module_bug_list); | ||
433 | |||
434 | return 0; | ||
435 | } | ||
436 | |||
437 | void module_arch_cleanup(struct module *mod) | ||
438 | { | ||
439 | list_del(&mod->arch.bug_list); | ||
440 | } | ||
441 | |||
442 | struct bug_entry *module_find_bug(unsigned long bugaddr) | ||
443 | { | ||
444 | struct mod_arch_specific *mod; | ||
445 | unsigned int i; | ||
446 | struct bug_entry *bug; | ||
447 | |||
448 | list_for_each_entry(mod, &module_bug_list, bug_list) { | ||
449 | bug = mod->bug_table; | ||
450 | for (i = 0; i < mod->num_bugs; ++i, ++bug) | ||
451 | if (bugaddr == bug->bug_addr) | ||
452 | return bug; | ||
453 | } | ||
454 | return NULL; | ||
455 | } | ||
diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c new file mode 100644 index 000000000000..3cef1b8f57f0 --- /dev/null +++ b/arch/powerpc/kernel/pci_64.c | |||
@@ -0,0 +1,1319 @@ | |||
1 | /* | ||
2 | * Port for PPC64 David Engebretsen, IBM Corp. | ||
3 | * Contains common pci routines for ppc64 platform, pSeries and iSeries brands. | ||
4 | * | ||
5 | * Copyright (C) 2003 Anton Blanchard <anton@au.ibm.com>, IBM | ||
6 | * Rework, based on alpha PCI code. | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU General Public License | ||
10 | * as published by the Free Software Foundation; either version | ||
11 | * 2 of the License, or (at your option) any later version. | ||
12 | */ | ||
13 | |||
14 | #undef DEBUG | ||
15 | |||
16 | #include <linux/config.h> | ||
17 | #include <linux/kernel.h> | ||
18 | #include <linux/pci.h> | ||
19 | #include <linux/string.h> | ||
20 | #include <linux/init.h> | ||
21 | #include <linux/bootmem.h> | ||
22 | #include <linux/mm.h> | ||
23 | #include <linux/list.h> | ||
24 | #include <linux/syscalls.h> | ||
25 | |||
26 | #include <asm/processor.h> | ||
27 | #include <asm/io.h> | ||
28 | #include <asm/prom.h> | ||
29 | #include <asm/pci-bridge.h> | ||
30 | #include <asm/byteorder.h> | ||
31 | #include <asm/irq.h> | ||
32 | #include <asm/machdep.h> | ||
33 | #include <asm/udbg.h> | ||
34 | #include <asm/ppc-pci.h> | ||
35 | |||
36 | #ifdef DEBUG | ||
37 | #define DBG(fmt...) udbg_printf(fmt) | ||
38 | #else | ||
39 | #define DBG(fmt...) | ||
40 | #endif | ||
41 | |||
42 | unsigned long pci_probe_only = 1; | ||
43 | unsigned long pci_assign_all_buses = 0; | ||
44 | |||
45 | /* | ||
46 | * legal IO pages under MAX_ISA_PORT. This is to ensure we don't touch | ||
47 | * devices we don't have access to. | ||
48 | */ | ||
49 | unsigned long io_page_mask; | ||
50 | |||
51 | EXPORT_SYMBOL(io_page_mask); | ||
52 | |||
53 | #ifdef CONFIG_PPC_MULTIPLATFORM | ||
54 | static void fixup_resource(struct resource *res, struct pci_dev *dev); | ||
55 | static void do_bus_setup(struct pci_bus *bus); | ||
56 | #endif | ||
57 | |||
58 | unsigned int pcibios_assign_all_busses(void) | ||
59 | { | ||
60 | return pci_assign_all_buses; | ||
61 | } | ||
62 | |||
63 | /* pci_io_base -- the base address from which io bars are offsets. | ||
64 | * This is the lowest I/O base address (so bar values are always positive), | ||
65 | * and it *must* be the start of ISA space if an ISA bus exists because | ||
66 | * ISA drivers use hard coded offsets. If no ISA bus exists a dummy | ||
67 | * page is mapped and isa_io_limit prevents access to it. | ||
68 | */ | ||
69 | unsigned long isa_io_base; /* NULL if no ISA bus */ | ||
70 | EXPORT_SYMBOL(isa_io_base); | ||
71 | unsigned long pci_io_base; | ||
72 | EXPORT_SYMBOL(pci_io_base); | ||
73 | |||
74 | void iSeries_pcibios_init(void); | ||
75 | |||
76 | LIST_HEAD(hose_list); | ||
77 | |||
78 | struct dma_mapping_ops pci_dma_ops; | ||
79 | EXPORT_SYMBOL(pci_dma_ops); | ||
80 | |||
81 | int global_phb_number; /* Global phb counter */ | ||
82 | |||
83 | /* Cached ISA bridge dev. */ | ||
84 | struct pci_dev *ppc64_isabridge_dev = NULL; | ||
85 | |||
86 | static void fixup_broken_pcnet32(struct pci_dev* dev) | ||
87 | { | ||
88 | if ((dev->class>>8 == PCI_CLASS_NETWORK_ETHERNET)) { | ||
89 | dev->vendor = PCI_VENDOR_ID_AMD; | ||
90 | pci_write_config_word(dev, PCI_VENDOR_ID, PCI_VENDOR_ID_AMD); | ||
91 | } | ||
92 | } | ||
93 | DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_TRIDENT, PCI_ANY_ID, fixup_broken_pcnet32); | ||
94 | |||
95 | void pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region, | ||
96 | struct resource *res) | ||
97 | { | ||
98 | unsigned long offset = 0; | ||
99 | struct pci_controller *hose = pci_bus_to_host(dev->bus); | ||
100 | |||
101 | if (!hose) | ||
102 | return; | ||
103 | |||
104 | if (res->flags & IORESOURCE_IO) | ||
105 | offset = (unsigned long)hose->io_base_virt - pci_io_base; | ||
106 | |||
107 | if (res->flags & IORESOURCE_MEM) | ||
108 | offset = hose->pci_mem_offset; | ||
109 | |||
110 | region->start = res->start - offset; | ||
111 | region->end = res->end - offset; | ||
112 | } | ||
113 | |||
114 | void pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res, | ||
115 | struct pci_bus_region *region) | ||
116 | { | ||
117 | unsigned long offset = 0; | ||
118 | struct pci_controller *hose = pci_bus_to_host(dev->bus); | ||
119 | |||
120 | if (!hose) | ||
121 | return; | ||
122 | |||
123 | if (res->flags & IORESOURCE_IO) | ||
124 | offset = (unsigned long)hose->io_base_virt - pci_io_base; | ||
125 | |||
126 | if (res->flags & IORESOURCE_MEM) | ||
127 | offset = hose->pci_mem_offset; | ||
128 | |||
129 | res->start = region->start + offset; | ||
130 | res->end = region->end + offset; | ||
131 | } | ||
132 | |||
133 | #ifdef CONFIG_HOTPLUG | ||
134 | EXPORT_SYMBOL(pcibios_resource_to_bus); | ||
135 | EXPORT_SYMBOL(pcibios_bus_to_resource); | ||
136 | #endif | ||
137 | |||
138 | /* | ||
139 | * We need to avoid collisions with `mirrored' VGA ports | ||
140 | * and other strange ISA hardware, so we always want the | ||
141 | * addresses to be allocated in the 0x000-0x0ff region | ||
142 | * modulo 0x400. | ||
143 | * | ||
144 | * Why? Because some silly external IO cards only decode | ||
145 | * the low 10 bits of the IO address. The 0x00-0xff region | ||
146 | * is reserved for motherboard devices that decode all 16 | ||
147 | * bits, so it's ok to allocate at, say, 0x2800-0x28ff, | ||
148 | * but we want to try to avoid allocating at 0x2900-0x2bff | ||
149 | * which might have be mirrored at 0x0100-0x03ff.. | ||
150 | */ | ||
151 | void pcibios_align_resource(void *data, struct resource *res, | ||
152 | unsigned long size, unsigned long align) | ||
153 | { | ||
154 | struct pci_dev *dev = data; | ||
155 | struct pci_controller *hose = pci_bus_to_host(dev->bus); | ||
156 | unsigned long start = res->start; | ||
157 | unsigned long alignto; | ||
158 | |||
159 | if (res->flags & IORESOURCE_IO) { | ||
160 | unsigned long offset = (unsigned long)hose->io_base_virt - | ||
161 | pci_io_base; | ||
162 | /* Make sure we start at our min on all hoses */ | ||
163 | if (start - offset < PCIBIOS_MIN_IO) | ||
164 | start = PCIBIOS_MIN_IO + offset; | ||
165 | |||
166 | /* | ||
167 | * Put everything into 0x00-0xff region modulo 0x400 | ||
168 | */ | ||
169 | if (start & 0x300) | ||
170 | start = (start + 0x3ff) & ~0x3ff; | ||
171 | |||
172 | } else if (res->flags & IORESOURCE_MEM) { | ||
173 | /* Make sure we start at our min on all hoses */ | ||
174 | if (start - hose->pci_mem_offset < PCIBIOS_MIN_MEM) | ||
175 | start = PCIBIOS_MIN_MEM + hose->pci_mem_offset; | ||
176 | |||
177 | /* Align to multiple of size of minimum base. */ | ||
178 | alignto = max(0x1000UL, align); | ||
179 | start = ALIGN(start, alignto); | ||
180 | } | ||
181 | |||
182 | res->start = start; | ||
183 | } | ||
184 | |||
185 | static DEFINE_SPINLOCK(hose_spinlock); | ||
186 | |||
187 | /* | ||
188 | * pci_controller(phb) initialized common variables. | ||
189 | */ | ||
190 | void __devinit pci_setup_pci_controller(struct pci_controller *hose) | ||
191 | { | ||
192 | memset(hose, 0, sizeof(struct pci_controller)); | ||
193 | |||
194 | spin_lock(&hose_spinlock); | ||
195 | hose->global_number = global_phb_number++; | ||
196 | list_add_tail(&hose->list_node, &hose_list); | ||
197 | spin_unlock(&hose_spinlock); | ||
198 | } | ||
199 | |||
200 | static void __init pcibios_claim_one_bus(struct pci_bus *b) | ||
201 | { | ||
202 | struct pci_dev *dev; | ||
203 | struct pci_bus *child_bus; | ||
204 | |||
205 | list_for_each_entry(dev, &b->devices, bus_list) { | ||
206 | int i; | ||
207 | |||
208 | for (i = 0; i < PCI_NUM_RESOURCES; i++) { | ||
209 | struct resource *r = &dev->resource[i]; | ||
210 | |||
211 | if (r->parent || !r->start || !r->flags) | ||
212 | continue; | ||
213 | pci_claim_resource(dev, i); | ||
214 | } | ||
215 | } | ||
216 | |||
217 | list_for_each_entry(child_bus, &b->children, node) | ||
218 | pcibios_claim_one_bus(child_bus); | ||
219 | } | ||
220 | |||
221 | #ifndef CONFIG_PPC_ISERIES | ||
222 | static void __init pcibios_claim_of_setup(void) | ||
223 | { | ||
224 | struct pci_bus *b; | ||
225 | |||
226 | list_for_each_entry(b, &pci_root_buses, node) | ||
227 | pcibios_claim_one_bus(b); | ||
228 | } | ||
229 | #endif | ||
230 | |||
231 | #ifdef CONFIG_PPC_MULTIPLATFORM | ||
232 | static u32 get_int_prop(struct device_node *np, const char *name, u32 def) | ||
233 | { | ||
234 | u32 *prop; | ||
235 | int len; | ||
236 | |||
237 | prop = (u32 *) get_property(np, name, &len); | ||
238 | if (prop && len >= 4) | ||
239 | return *prop; | ||
240 | return def; | ||
241 | } | ||
242 | |||
243 | static unsigned int pci_parse_of_flags(u32 addr0) | ||
244 | { | ||
245 | unsigned int flags = 0; | ||
246 | |||
247 | if (addr0 & 0x02000000) { | ||
248 | flags = IORESOURCE_MEM | PCI_BASE_ADDRESS_SPACE_MEMORY; | ||
249 | flags |= (addr0 >> 22) & PCI_BASE_ADDRESS_MEM_TYPE_64; | ||
250 | flags |= (addr0 >> 28) & PCI_BASE_ADDRESS_MEM_TYPE_1M; | ||
251 | if (addr0 & 0x40000000) | ||
252 | flags |= IORESOURCE_PREFETCH | ||
253 | | PCI_BASE_ADDRESS_MEM_PREFETCH; | ||
254 | } else if (addr0 & 0x01000000) | ||
255 | flags = IORESOURCE_IO | PCI_BASE_ADDRESS_SPACE_IO; | ||
256 | return flags; | ||
257 | } | ||
258 | |||
259 | #define GET_64BIT(prop, i) ((((u64) (prop)[(i)]) << 32) | (prop)[(i)+1]) | ||
260 | |||
261 | static void pci_parse_of_addrs(struct device_node *node, struct pci_dev *dev) | ||
262 | { | ||
263 | u64 base, size; | ||
264 | unsigned int flags; | ||
265 | struct resource *res; | ||
266 | u32 *addrs, i; | ||
267 | int proplen; | ||
268 | |||
269 | addrs = (u32 *) get_property(node, "assigned-addresses", &proplen); | ||
270 | if (!addrs) | ||
271 | return; | ||
272 | for (; proplen >= 20; proplen -= 20, addrs += 5) { | ||
273 | flags = pci_parse_of_flags(addrs[0]); | ||
274 | if (!flags) | ||
275 | continue; | ||
276 | base = GET_64BIT(addrs, 1); | ||
277 | size = GET_64BIT(addrs, 3); | ||
278 | if (!size) | ||
279 | continue; | ||
280 | i = addrs[0] & 0xff; | ||
281 | if (PCI_BASE_ADDRESS_0 <= i && i <= PCI_BASE_ADDRESS_5) { | ||
282 | res = &dev->resource[(i - PCI_BASE_ADDRESS_0) >> 2]; | ||
283 | } else if (i == dev->rom_base_reg) { | ||
284 | res = &dev->resource[PCI_ROM_RESOURCE]; | ||
285 | flags |= IORESOURCE_READONLY | IORESOURCE_CACHEABLE; | ||
286 | } else { | ||
287 | printk(KERN_ERR "PCI: bad cfg reg num 0x%x\n", i); | ||
288 | continue; | ||
289 | } | ||
290 | res->start = base; | ||
291 | res->end = base + size - 1; | ||
292 | res->flags = flags; | ||
293 | res->name = pci_name(dev); | ||
294 | fixup_resource(res, dev); | ||
295 | } | ||
296 | } | ||
297 | |||
298 | struct pci_dev *of_create_pci_dev(struct device_node *node, | ||
299 | struct pci_bus *bus, int devfn) | ||
300 | { | ||
301 | struct pci_dev *dev; | ||
302 | const char *type; | ||
303 | |||
304 | dev = kmalloc(sizeof(struct pci_dev), GFP_KERNEL); | ||
305 | if (!dev) | ||
306 | return NULL; | ||
307 | type = get_property(node, "device_type", NULL); | ||
308 | if (type == NULL) | ||
309 | type = ""; | ||
310 | |||
311 | memset(dev, 0, sizeof(struct pci_dev)); | ||
312 | dev->bus = bus; | ||
313 | dev->sysdata = node; | ||
314 | dev->dev.parent = bus->bridge; | ||
315 | dev->dev.bus = &pci_bus_type; | ||
316 | dev->devfn = devfn; | ||
317 | dev->multifunction = 0; /* maybe a lie? */ | ||
318 | |||
319 | dev->vendor = get_int_prop(node, "vendor-id", 0xffff); | ||
320 | dev->device = get_int_prop(node, "device-id", 0xffff); | ||
321 | dev->subsystem_vendor = get_int_prop(node, "subsystem-vendor-id", 0); | ||
322 | dev->subsystem_device = get_int_prop(node, "subsystem-id", 0); | ||
323 | |||
324 | dev->cfg_size = 256; /*pci_cfg_space_size(dev);*/ | ||
325 | |||
326 | sprintf(pci_name(dev), "%04x:%02x:%02x.%d", pci_domain_nr(bus), | ||
327 | dev->bus->number, PCI_SLOT(devfn), PCI_FUNC(devfn)); | ||
328 | dev->class = get_int_prop(node, "class-code", 0); | ||
329 | |||
330 | dev->current_state = 4; /* unknown power state */ | ||
331 | |||
332 | if (!strcmp(type, "pci")) { | ||
333 | /* a PCI-PCI bridge */ | ||
334 | dev->hdr_type = PCI_HEADER_TYPE_BRIDGE; | ||
335 | dev->rom_base_reg = PCI_ROM_ADDRESS1; | ||
336 | } else if (!strcmp(type, "cardbus")) { | ||
337 | dev->hdr_type = PCI_HEADER_TYPE_CARDBUS; | ||
338 | } else { | ||
339 | dev->hdr_type = PCI_HEADER_TYPE_NORMAL; | ||
340 | dev->rom_base_reg = PCI_ROM_ADDRESS; | ||
341 | dev->irq = NO_IRQ; | ||
342 | if (node->n_intrs > 0) { | ||
343 | dev->irq = node->intrs[0].line; | ||
344 | pci_write_config_byte(dev, PCI_INTERRUPT_LINE, | ||
345 | dev->irq); | ||
346 | } | ||
347 | } | ||
348 | |||
349 | pci_parse_of_addrs(node, dev); | ||
350 | |||
351 | pci_device_add(dev, bus); | ||
352 | |||
353 | /* XXX pci_scan_msi_device(dev); */ | ||
354 | |||
355 | return dev; | ||
356 | } | ||
357 | EXPORT_SYMBOL(of_create_pci_dev); | ||
358 | |||
359 | void __devinit of_scan_bus(struct device_node *node, | ||
360 | struct pci_bus *bus) | ||
361 | { | ||
362 | struct device_node *child = NULL; | ||
363 | u32 *reg; | ||
364 | int reglen, devfn; | ||
365 | struct pci_dev *dev; | ||
366 | |||
367 | while ((child = of_get_next_child(node, child)) != NULL) { | ||
368 | reg = (u32 *) get_property(child, "reg", ®len); | ||
369 | if (reg == NULL || reglen < 20) | ||
370 | continue; | ||
371 | devfn = (reg[0] >> 8) & 0xff; | ||
372 | /* create a new pci_dev for this device */ | ||
373 | dev = of_create_pci_dev(child, bus, devfn); | ||
374 | if (!dev) | ||
375 | continue; | ||
376 | if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE || | ||
377 | dev->hdr_type == PCI_HEADER_TYPE_CARDBUS) | ||
378 | of_scan_pci_bridge(child, dev); | ||
379 | } | ||
380 | |||
381 | do_bus_setup(bus); | ||
382 | } | ||
383 | EXPORT_SYMBOL(of_scan_bus); | ||
384 | |||
385 | void __devinit of_scan_pci_bridge(struct device_node *node, | ||
386 | struct pci_dev *dev) | ||
387 | { | ||
388 | struct pci_bus *bus; | ||
389 | u32 *busrange, *ranges; | ||
390 | int len, i, mode; | ||
391 | struct resource *res; | ||
392 | unsigned int flags; | ||
393 | u64 size; | ||
394 | |||
395 | /* parse bus-range property */ | ||
396 | busrange = (u32 *) get_property(node, "bus-range", &len); | ||
397 | if (busrange == NULL || len != 8) { | ||
398 | printk(KERN_ERR "Can't get bus-range for PCI-PCI bridge %s\n", | ||
399 | node->full_name); | ||
400 | return; | ||
401 | } | ||
402 | ranges = (u32 *) get_property(node, "ranges", &len); | ||
403 | if (ranges == NULL) { | ||
404 | printk(KERN_ERR "Can't get ranges for PCI-PCI bridge %s\n", | ||
405 | node->full_name); | ||
406 | return; | ||
407 | } | ||
408 | |||
409 | bus = pci_add_new_bus(dev->bus, dev, busrange[0]); | ||
410 | if (!bus) { | ||
411 | printk(KERN_ERR "Failed to create pci bus for %s\n", | ||
412 | node->full_name); | ||
413 | return; | ||
414 | } | ||
415 | |||
416 | bus->primary = dev->bus->number; | ||
417 | bus->subordinate = busrange[1]; | ||
418 | bus->bridge_ctl = 0; | ||
419 | bus->sysdata = node; | ||
420 | |||
421 | /* parse ranges property */ | ||
422 | /* PCI #address-cells == 3 and #size-cells == 2 always */ | ||
423 | res = &dev->resource[PCI_BRIDGE_RESOURCES]; | ||
424 | for (i = 0; i < PCI_NUM_RESOURCES - PCI_BRIDGE_RESOURCES; ++i) { | ||
425 | res->flags = 0; | ||
426 | bus->resource[i] = res; | ||
427 | ++res; | ||
428 | } | ||
429 | i = 1; | ||
430 | for (; len >= 32; len -= 32, ranges += 8) { | ||
431 | flags = pci_parse_of_flags(ranges[0]); | ||
432 | size = GET_64BIT(ranges, 6); | ||
433 | if (flags == 0 || size == 0) | ||
434 | continue; | ||
435 | if (flags & IORESOURCE_IO) { | ||
436 | res = bus->resource[0]; | ||
437 | if (res->flags) { | ||
438 | printk(KERN_ERR "PCI: ignoring extra I/O range" | ||
439 | " for bridge %s\n", node->full_name); | ||
440 | continue; | ||
441 | } | ||
442 | } else { | ||
443 | if (i >= PCI_NUM_RESOURCES - PCI_BRIDGE_RESOURCES) { | ||
444 | printk(KERN_ERR "PCI: too many memory ranges" | ||
445 | " for bridge %s\n", node->full_name); | ||
446 | continue; | ||
447 | } | ||
448 | res = bus->resource[i]; | ||
449 | ++i; | ||
450 | } | ||
451 | res->start = GET_64BIT(ranges, 1); | ||
452 | res->end = res->start + size - 1; | ||
453 | res->flags = flags; | ||
454 | fixup_resource(res, dev); | ||
455 | } | ||
456 | sprintf(bus->name, "PCI Bus %04x:%02x", pci_domain_nr(bus), | ||
457 | bus->number); | ||
458 | |||
459 | mode = PCI_PROBE_NORMAL; | ||
460 | if (ppc_md.pci_probe_mode) | ||
461 | mode = ppc_md.pci_probe_mode(bus); | ||
462 | if (mode == PCI_PROBE_DEVTREE) | ||
463 | of_scan_bus(node, bus); | ||
464 | else if (mode == PCI_PROBE_NORMAL) | ||
465 | pci_scan_child_bus(bus); | ||
466 | } | ||
467 | EXPORT_SYMBOL(of_scan_pci_bridge); | ||
468 | #endif /* CONFIG_PPC_MULTIPLATFORM */ | ||
469 | |||
470 | void __devinit scan_phb(struct pci_controller *hose) | ||
471 | { | ||
472 | struct pci_bus *bus; | ||
473 | struct device_node *node = hose->arch_data; | ||
474 | int i, mode; | ||
475 | struct resource *res; | ||
476 | |||
477 | bus = pci_create_bus(NULL, hose->first_busno, hose->ops, node); | ||
478 | if (bus == NULL) { | ||
479 | printk(KERN_ERR "Failed to create bus for PCI domain %04x\n", | ||
480 | hose->global_number); | ||
481 | return; | ||
482 | } | ||
483 | bus->secondary = hose->first_busno; | ||
484 | hose->bus = bus; | ||
485 | |||
486 | bus->resource[0] = res = &hose->io_resource; | ||
487 | if (res->flags && request_resource(&ioport_resource, res)) | ||
488 | printk(KERN_ERR "Failed to request PCI IO region " | ||
489 | "on PCI domain %04x\n", hose->global_number); | ||
490 | |||
491 | for (i = 0; i < 3; ++i) { | ||
492 | res = &hose->mem_resources[i]; | ||
493 | bus->resource[i+1] = res; | ||
494 | if (res->flags && request_resource(&iomem_resource, res)) | ||
495 | printk(KERN_ERR "Failed to request PCI memory region " | ||
496 | "on PCI domain %04x\n", hose->global_number); | ||
497 | } | ||
498 | |||
499 | mode = PCI_PROBE_NORMAL; | ||
500 | #ifdef CONFIG_PPC_MULTIPLATFORM | ||
501 | if (ppc_md.pci_probe_mode) | ||
502 | mode = ppc_md.pci_probe_mode(bus); | ||
503 | if (mode == PCI_PROBE_DEVTREE) { | ||
504 | bus->subordinate = hose->last_busno; | ||
505 | of_scan_bus(node, bus); | ||
506 | } | ||
507 | #endif /* CONFIG_PPC_MULTIPLATFORM */ | ||
508 | if (mode == PCI_PROBE_NORMAL) | ||
509 | hose->last_busno = bus->subordinate = pci_scan_child_bus(bus); | ||
510 | pci_bus_add_devices(bus); | ||
511 | } | ||
512 | |||
513 | static int __init pcibios_init(void) | ||
514 | { | ||
515 | struct pci_controller *hose, *tmp; | ||
516 | |||
517 | /* For now, override phys_mem_access_prot. If we need it, | ||
518 | * later, we may move that initialization to each ppc_md | ||
519 | */ | ||
520 | ppc_md.phys_mem_access_prot = pci_phys_mem_access_prot; | ||
521 | |||
522 | #ifdef CONFIG_PPC_ISERIES | ||
523 | iSeries_pcibios_init(); | ||
524 | #endif | ||
525 | |||
526 | printk("PCI: Probing PCI hardware\n"); | ||
527 | |||
528 | /* Scan all of the recorded PCI controllers. */ | ||
529 | list_for_each_entry_safe(hose, tmp, &hose_list, list_node) | ||
530 | scan_phb(hose); | ||
531 | |||
532 | #ifndef CONFIG_PPC_ISERIES | ||
533 | if (pci_probe_only) | ||
534 | pcibios_claim_of_setup(); | ||
535 | else | ||
536 | /* FIXME: `else' will be removed when | ||
537 | pci_assign_unassigned_resources() is able to work | ||
538 | correctly with [partially] allocated PCI tree. */ | ||
539 | pci_assign_unassigned_resources(); | ||
540 | #endif /* !CONFIG_PPC_ISERIES */ | ||
541 | |||
542 | /* Call machine dependent final fixup */ | ||
543 | if (ppc_md.pcibios_fixup) | ||
544 | ppc_md.pcibios_fixup(); | ||
545 | |||
546 | /* Cache the location of the ISA bridge (if we have one) */ | ||
547 | ppc64_isabridge_dev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL); | ||
548 | if (ppc64_isabridge_dev != NULL) | ||
549 | printk("ISA bridge at %s\n", pci_name(ppc64_isabridge_dev)); | ||
550 | |||
551 | #ifdef CONFIG_PPC_MULTIPLATFORM | ||
552 | /* map in PCI I/O space */ | ||
553 | phbs_remap_io(); | ||
554 | #endif | ||
555 | |||
556 | printk("PCI: Probing PCI hardware done\n"); | ||
557 | |||
558 | return 0; | ||
559 | } | ||
560 | |||
561 | subsys_initcall(pcibios_init); | ||
562 | |||
563 | char __init *pcibios_setup(char *str) | ||
564 | { | ||
565 | return str; | ||
566 | } | ||
567 | |||
568 | int pcibios_enable_device(struct pci_dev *dev, int mask) | ||
569 | { | ||
570 | u16 cmd, oldcmd; | ||
571 | int i; | ||
572 | |||
573 | pci_read_config_word(dev, PCI_COMMAND, &cmd); | ||
574 | oldcmd = cmd; | ||
575 | |||
576 | for (i = 0; i < PCI_NUM_RESOURCES; i++) { | ||
577 | struct resource *res = &dev->resource[i]; | ||
578 | |||
579 | /* Only set up the requested stuff */ | ||
580 | if (!(mask & (1<<i))) | ||
581 | continue; | ||
582 | |||
583 | if (res->flags & IORESOURCE_IO) | ||
584 | cmd |= PCI_COMMAND_IO; | ||
585 | if (res->flags & IORESOURCE_MEM) | ||
586 | cmd |= PCI_COMMAND_MEMORY; | ||
587 | } | ||
588 | |||
589 | if (cmd != oldcmd) { | ||
590 | printk(KERN_DEBUG "PCI: Enabling device: (%s), cmd %x\n", | ||
591 | pci_name(dev), cmd); | ||
592 | /* Enable the appropriate bits in the PCI command register. */ | ||
593 | pci_write_config_word(dev, PCI_COMMAND, cmd); | ||
594 | } | ||
595 | return 0; | ||
596 | } | ||
597 | |||
598 | /* | ||
599 | * Return the domain number for this bus. | ||
600 | */ | ||
601 | int pci_domain_nr(struct pci_bus *bus) | ||
602 | { | ||
603 | #ifdef CONFIG_PPC_ISERIES | ||
604 | return 0; | ||
605 | #else | ||
606 | struct pci_controller *hose = pci_bus_to_host(bus); | ||
607 | |||
608 | return hose->global_number; | ||
609 | #endif | ||
610 | } | ||
611 | |||
612 | EXPORT_SYMBOL(pci_domain_nr); | ||
613 | |||
614 | /* Decide whether to display the domain number in /proc */ | ||
615 | int pci_proc_domain(struct pci_bus *bus) | ||
616 | { | ||
617 | #ifdef CONFIG_PPC_ISERIES | ||
618 | return 0; | ||
619 | #else | ||
620 | struct pci_controller *hose = pci_bus_to_host(bus); | ||
621 | return hose->buid; | ||
622 | #endif | ||
623 | } | ||
624 | |||
625 | /* | ||
626 | * Platform support for /proc/bus/pci/X/Y mmap()s, | ||
627 | * modelled on the sparc64 implementation by Dave Miller. | ||
628 | * -- paulus. | ||
629 | */ | ||
630 | |||
631 | /* | ||
632 | * Adjust vm_pgoff of VMA such that it is the physical page offset | ||
633 | * corresponding to the 32-bit pci bus offset for DEV requested by the user. | ||
634 | * | ||
635 | * Basically, the user finds the base address for his device which he wishes | ||
636 | * to mmap. They read the 32-bit value from the config space base register, | ||
637 | * add whatever PAGE_SIZE multiple offset they wish, and feed this into the | ||
638 | * offset parameter of mmap on /proc/bus/pci/XXX for that device. | ||
639 | * | ||
640 | * Returns negative error code on failure, zero on success. | ||
641 | */ | ||
642 | static struct resource *__pci_mmap_make_offset(struct pci_dev *dev, | ||
643 | unsigned long *offset, | ||
644 | enum pci_mmap_state mmap_state) | ||
645 | { | ||
646 | struct pci_controller *hose = pci_bus_to_host(dev->bus); | ||
647 | unsigned long io_offset = 0; | ||
648 | int i, res_bit; | ||
649 | |||
650 | if (hose == 0) | ||
651 | return NULL; /* should never happen */ | ||
652 | |||
653 | /* If memory, add on the PCI bridge address offset */ | ||
654 | if (mmap_state == pci_mmap_mem) { | ||
655 | *offset += hose->pci_mem_offset; | ||
656 | res_bit = IORESOURCE_MEM; | ||
657 | } else { | ||
658 | io_offset = (unsigned long)hose->io_base_virt - pci_io_base; | ||
659 | *offset += io_offset; | ||
660 | res_bit = IORESOURCE_IO; | ||
661 | } | ||
662 | |||
663 | /* | ||
664 | * Check that the offset requested corresponds to one of the | ||
665 | * resources of the device. | ||
666 | */ | ||
667 | for (i = 0; i <= PCI_ROM_RESOURCE; i++) { | ||
668 | struct resource *rp = &dev->resource[i]; | ||
669 | int flags = rp->flags; | ||
670 | |||
671 | /* treat ROM as memory (should be already) */ | ||
672 | if (i == PCI_ROM_RESOURCE) | ||
673 | flags |= IORESOURCE_MEM; | ||
674 | |||
675 | /* Active and same type? */ | ||
676 | if ((flags & res_bit) == 0) | ||
677 | continue; | ||
678 | |||
679 | /* In the range of this resource? */ | ||
680 | if (*offset < (rp->start & PAGE_MASK) || *offset > rp->end) | ||
681 | continue; | ||
682 | |||
683 | /* found it! construct the final physical address */ | ||
684 | if (mmap_state == pci_mmap_io) | ||
685 | *offset += hose->io_base_phys - io_offset; | ||
686 | return rp; | ||
687 | } | ||
688 | |||
689 | return NULL; | ||
690 | } | ||
691 | |||
692 | /* | ||
693 | * Set vm_page_prot of VMA, as appropriate for this architecture, for a pci | ||
694 | * device mapping. | ||
695 | */ | ||
696 | static pgprot_t __pci_mmap_set_pgprot(struct pci_dev *dev, struct resource *rp, | ||
697 | pgprot_t protection, | ||
698 | enum pci_mmap_state mmap_state, | ||
699 | int write_combine) | ||
700 | { | ||
701 | unsigned long prot = pgprot_val(protection); | ||
702 | |||
703 | /* Write combine is always 0 on non-memory space mappings. On | ||
704 | * memory space, if the user didn't pass 1, we check for a | ||
705 | * "prefetchable" resource. This is a bit hackish, but we use | ||
706 | * this to workaround the inability of /sysfs to provide a write | ||
707 | * combine bit | ||
708 | */ | ||
709 | if (mmap_state != pci_mmap_mem) | ||
710 | write_combine = 0; | ||
711 | else if (write_combine == 0) { | ||
712 | if (rp->flags & IORESOURCE_PREFETCH) | ||
713 | write_combine = 1; | ||
714 | } | ||
715 | |||
716 | /* XXX would be nice to have a way to ask for write-through */ | ||
717 | prot |= _PAGE_NO_CACHE; | ||
718 | if (write_combine) | ||
719 | prot &= ~_PAGE_GUARDED; | ||
720 | else | ||
721 | prot |= _PAGE_GUARDED; | ||
722 | |||
723 | printk("PCI map for %s:%lx, prot: %lx\n", pci_name(dev), rp->start, | ||
724 | prot); | ||
725 | |||
726 | return __pgprot(prot); | ||
727 | } | ||
728 | |||
729 | /* | ||
730 | * This one is used by /dev/mem and fbdev who have no clue about the | ||
731 | * PCI device, it tries to find the PCI device first and calls the | ||
732 | * above routine | ||
733 | */ | ||
734 | pgprot_t pci_phys_mem_access_prot(struct file *file, | ||
735 | unsigned long pfn, | ||
736 | unsigned long size, | ||
737 | pgprot_t protection) | ||
738 | { | ||
739 | struct pci_dev *pdev = NULL; | ||
740 | struct resource *found = NULL; | ||
741 | unsigned long prot = pgprot_val(protection); | ||
742 | unsigned long offset = pfn << PAGE_SHIFT; | ||
743 | int i; | ||
744 | |||
745 | if (page_is_ram(pfn)) | ||
746 | return __pgprot(prot); | ||
747 | |||
748 | prot |= _PAGE_NO_CACHE | _PAGE_GUARDED; | ||
749 | |||
750 | for_each_pci_dev(pdev) { | ||
751 | for (i = 0; i <= PCI_ROM_RESOURCE; i++) { | ||
752 | struct resource *rp = &pdev->resource[i]; | ||
753 | int flags = rp->flags; | ||
754 | |||
755 | /* Active and same type? */ | ||
756 | if ((flags & IORESOURCE_MEM) == 0) | ||
757 | continue; | ||
758 | /* In the range of this resource? */ | ||
759 | if (offset < (rp->start & PAGE_MASK) || | ||
760 | offset > rp->end) | ||
761 | continue; | ||
762 | found = rp; | ||
763 | break; | ||
764 | } | ||
765 | if (found) | ||
766 | break; | ||
767 | } | ||
768 | if (found) { | ||
769 | if (found->flags & IORESOURCE_PREFETCH) | ||
770 | prot &= ~_PAGE_GUARDED; | ||
771 | pci_dev_put(pdev); | ||
772 | } | ||
773 | |||
774 | DBG("non-PCI map for %lx, prot: %lx\n", offset, prot); | ||
775 | |||
776 | return __pgprot(prot); | ||
777 | } | ||
778 | |||
779 | |||
780 | /* | ||
781 | * Perform the actual remap of the pages for a PCI device mapping, as | ||
782 | * appropriate for this architecture. The region in the process to map | ||
783 | * is described by vm_start and vm_end members of VMA, the base physical | ||
784 | * address is found in vm_pgoff. | ||
785 | * The pci device structure is provided so that architectures may make mapping | ||
786 | * decisions on a per-device or per-bus basis. | ||
787 | * | ||
788 | * Returns a negative error code on failure, zero on success. | ||
789 | */ | ||
790 | int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, | ||
791 | enum pci_mmap_state mmap_state, | ||
792 | int write_combine) | ||
793 | { | ||
794 | unsigned long offset = vma->vm_pgoff << PAGE_SHIFT; | ||
795 | struct resource *rp; | ||
796 | int ret; | ||
797 | |||
798 | rp = __pci_mmap_make_offset(dev, &offset, mmap_state); | ||
799 | if (rp == NULL) | ||
800 | return -EINVAL; | ||
801 | |||
802 | vma->vm_pgoff = offset >> PAGE_SHIFT; | ||
803 | vma->vm_flags |= VM_SHM | VM_LOCKED | VM_IO; | ||
804 | vma->vm_page_prot = __pci_mmap_set_pgprot(dev, rp, | ||
805 | vma->vm_page_prot, | ||
806 | mmap_state, write_combine); | ||
807 | |||
808 | ret = remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, | ||
809 | vma->vm_end - vma->vm_start, vma->vm_page_prot); | ||
810 | |||
811 | return ret; | ||
812 | } | ||
813 | |||
814 | #ifdef CONFIG_PPC_MULTIPLATFORM | ||
815 | static ssize_t pci_show_devspec(struct device *dev, struct device_attribute *attr, char *buf) | ||
816 | { | ||
817 | struct pci_dev *pdev; | ||
818 | struct device_node *np; | ||
819 | |||
820 | pdev = to_pci_dev (dev); | ||
821 | np = pci_device_to_OF_node(pdev); | ||
822 | if (np == NULL || np->full_name == NULL) | ||
823 | return 0; | ||
824 | return sprintf(buf, "%s", np->full_name); | ||
825 | } | ||
826 | static DEVICE_ATTR(devspec, S_IRUGO, pci_show_devspec, NULL); | ||
827 | #endif /* CONFIG_PPC_MULTIPLATFORM */ | ||
828 | |||
829 | void pcibios_add_platform_entries(struct pci_dev *pdev) | ||
830 | { | ||
831 | #ifdef CONFIG_PPC_MULTIPLATFORM | ||
832 | device_create_file(&pdev->dev, &dev_attr_devspec); | ||
833 | #endif /* CONFIG_PPC_MULTIPLATFORM */ | ||
834 | } | ||
835 | |||
836 | #ifdef CONFIG_PPC_MULTIPLATFORM | ||
837 | |||
838 | #define ISA_SPACE_MASK 0x1 | ||
839 | #define ISA_SPACE_IO 0x1 | ||
840 | |||
841 | static void __devinit pci_process_ISA_OF_ranges(struct device_node *isa_node, | ||
842 | unsigned long phb_io_base_phys, | ||
843 | void __iomem * phb_io_base_virt) | ||
844 | { | ||
845 | struct isa_range *range; | ||
846 | unsigned long pci_addr; | ||
847 | unsigned int isa_addr; | ||
848 | unsigned int size; | ||
849 | int rlen = 0; | ||
850 | |||
851 | range = (struct isa_range *) get_property(isa_node, "ranges", &rlen); | ||
852 | if (range == NULL || (rlen < sizeof(struct isa_range))) { | ||
853 | printk(KERN_ERR "no ISA ranges or unexpected isa range size," | ||
854 | "mapping 64k\n"); | ||
855 | __ioremap_explicit(phb_io_base_phys, | ||
856 | (unsigned long)phb_io_base_virt, | ||
857 | 0x10000, _PAGE_NO_CACHE | _PAGE_GUARDED); | ||
858 | return; | ||
859 | } | ||
860 | |||
861 | /* From "ISA Binding to 1275" | ||
862 | * The ranges property is laid out as an array of elements, | ||
863 | * each of which comprises: | ||
864 | * cells 0 - 1: an ISA address | ||
865 | * cells 2 - 4: a PCI address | ||
866 | * (size depending on dev->n_addr_cells) | ||
867 | * cell 5: the size of the range | ||
868 | */ | ||
869 | if ((range->isa_addr.a_hi && ISA_SPACE_MASK) == ISA_SPACE_IO) { | ||
870 | isa_addr = range->isa_addr.a_lo; | ||
871 | pci_addr = (unsigned long) range->pci_addr.a_mid << 32 | | ||
872 | range->pci_addr.a_lo; | ||
873 | |||
874 | /* Assume these are both zero */ | ||
875 | if ((pci_addr != 0) || (isa_addr != 0)) { | ||
876 | printk(KERN_ERR "unexpected isa to pci mapping: %s\n", | ||
877 | __FUNCTION__); | ||
878 | return; | ||
879 | } | ||
880 | |||
881 | size = PAGE_ALIGN(range->size); | ||
882 | |||
883 | __ioremap_explicit(phb_io_base_phys, | ||
884 | (unsigned long) phb_io_base_virt, | ||
885 | size, _PAGE_NO_CACHE | _PAGE_GUARDED); | ||
886 | } | ||
887 | } | ||
888 | |||
889 | void __devinit pci_process_bridge_OF_ranges(struct pci_controller *hose, | ||
890 | struct device_node *dev, int prim) | ||
891 | { | ||
892 | unsigned int *ranges, pci_space; | ||
893 | unsigned long size; | ||
894 | int rlen = 0; | ||
895 | int memno = 0; | ||
896 | struct resource *res; | ||
897 | int np, na = prom_n_addr_cells(dev); | ||
898 | unsigned long pci_addr, cpu_phys_addr; | ||
899 | |||
900 | np = na + 5; | ||
901 | |||
902 | /* From "PCI Binding to 1275" | ||
903 | * The ranges property is laid out as an array of elements, | ||
904 | * each of which comprises: | ||
905 | * cells 0 - 2: a PCI address | ||
906 | * cells 3 or 3+4: a CPU physical address | ||
907 | * (size depending on dev->n_addr_cells) | ||
908 | * cells 4+5 or 5+6: the size of the range | ||
909 | */ | ||
910 | rlen = 0; | ||
911 | hose->io_base_phys = 0; | ||
912 | ranges = (unsigned int *) get_property(dev, "ranges", &rlen); | ||
913 | while ((rlen -= np * sizeof(unsigned int)) >= 0) { | ||
914 | res = NULL; | ||
915 | pci_space = ranges[0]; | ||
916 | pci_addr = ((unsigned long)ranges[1] << 32) | ranges[2]; | ||
917 | |||
918 | cpu_phys_addr = ranges[3]; | ||
919 | if (na >= 2) | ||
920 | cpu_phys_addr = (cpu_phys_addr << 32) | ranges[4]; | ||
921 | |||
922 | size = ((unsigned long)ranges[na+3] << 32) | ranges[na+4]; | ||
923 | ranges += np; | ||
924 | if (size == 0) | ||
925 | continue; | ||
926 | |||
927 | /* Now consume following elements while they are contiguous */ | ||
928 | while (rlen >= np * sizeof(unsigned int)) { | ||
929 | unsigned long addr, phys; | ||
930 | |||
931 | if (ranges[0] != pci_space) | ||
932 | break; | ||
933 | addr = ((unsigned long)ranges[1] << 32) | ranges[2]; | ||
934 | phys = ranges[3]; | ||
935 | if (na >= 2) | ||
936 | phys = (phys << 32) | ranges[4]; | ||
937 | if (addr != pci_addr + size || | ||
938 | phys != cpu_phys_addr + size) | ||
939 | break; | ||
940 | |||
941 | size += ((unsigned long)ranges[na+3] << 32) | ||
942 | | ranges[na+4]; | ||
943 | ranges += np; | ||
944 | rlen -= np * sizeof(unsigned int); | ||
945 | } | ||
946 | |||
947 | switch ((pci_space >> 24) & 0x3) { | ||
948 | case 1: /* I/O space */ | ||
949 | hose->io_base_phys = cpu_phys_addr; | ||
950 | hose->pci_io_size = size; | ||
951 | |||
952 | res = &hose->io_resource; | ||
953 | res->flags = IORESOURCE_IO; | ||
954 | res->start = pci_addr; | ||
955 | DBG("phb%d: IO 0x%lx -> 0x%lx\n", hose->global_number, | ||
956 | res->start, res->start + size - 1); | ||
957 | break; | ||
958 | case 2: /* memory space */ | ||
959 | memno = 0; | ||
960 | while (memno < 3 && hose->mem_resources[memno].flags) | ||
961 | ++memno; | ||
962 | |||
963 | if (memno == 0) | ||
964 | hose->pci_mem_offset = cpu_phys_addr - pci_addr; | ||
965 | if (memno < 3) { | ||
966 | res = &hose->mem_resources[memno]; | ||
967 | res->flags = IORESOURCE_MEM; | ||
968 | res->start = cpu_phys_addr; | ||
969 | DBG("phb%d: MEM 0x%lx -> 0x%lx\n", hose->global_number, | ||
970 | res->start, res->start + size - 1); | ||
971 | } | ||
972 | break; | ||
973 | } | ||
974 | if (res != NULL) { | ||
975 | res->name = dev->full_name; | ||
976 | res->end = res->start + size - 1; | ||
977 | res->parent = NULL; | ||
978 | res->sibling = NULL; | ||
979 | res->child = NULL; | ||
980 | } | ||
981 | } | ||
982 | } | ||
983 | |||
984 | void __init pci_setup_phb_io(struct pci_controller *hose, int primary) | ||
985 | { | ||
986 | unsigned long size = hose->pci_io_size; | ||
987 | unsigned long io_virt_offset; | ||
988 | struct resource *res; | ||
989 | struct device_node *isa_dn; | ||
990 | |||
991 | hose->io_base_virt = reserve_phb_iospace(size); | ||
992 | DBG("phb%d io_base_phys 0x%lx io_base_virt 0x%lx\n", | ||
993 | hose->global_number, hose->io_base_phys, | ||
994 | (unsigned long) hose->io_base_virt); | ||
995 | |||
996 | if (primary) { | ||
997 | pci_io_base = (unsigned long)hose->io_base_virt; | ||
998 | isa_dn = of_find_node_by_type(NULL, "isa"); | ||
999 | if (isa_dn) { | ||
1000 | isa_io_base = pci_io_base; | ||
1001 | pci_process_ISA_OF_ranges(isa_dn, hose->io_base_phys, | ||
1002 | hose->io_base_virt); | ||
1003 | of_node_put(isa_dn); | ||
1004 | /* Allow all IO */ | ||
1005 | io_page_mask = -1; | ||
1006 | } | ||
1007 | } | ||
1008 | |||
1009 | io_virt_offset = (unsigned long)hose->io_base_virt - pci_io_base; | ||
1010 | res = &hose->io_resource; | ||
1011 | res->start += io_virt_offset; | ||
1012 | res->end += io_virt_offset; | ||
1013 | } | ||
1014 | |||
1015 | void __devinit pci_setup_phb_io_dynamic(struct pci_controller *hose, | ||
1016 | int primary) | ||
1017 | { | ||
1018 | unsigned long size = hose->pci_io_size; | ||
1019 | unsigned long io_virt_offset; | ||
1020 | struct resource *res; | ||
1021 | |||
1022 | hose->io_base_virt = __ioremap(hose->io_base_phys, size, | ||
1023 | _PAGE_NO_CACHE | _PAGE_GUARDED); | ||
1024 | DBG("phb%d io_base_phys 0x%lx io_base_virt 0x%lx\n", | ||
1025 | hose->global_number, hose->io_base_phys, | ||
1026 | (unsigned long) hose->io_base_virt); | ||
1027 | |||
1028 | if (primary) | ||
1029 | pci_io_base = (unsigned long)hose->io_base_virt; | ||
1030 | |||
1031 | io_virt_offset = (unsigned long)hose->io_base_virt - pci_io_base; | ||
1032 | res = &hose->io_resource; | ||
1033 | res->start += io_virt_offset; | ||
1034 | res->end += io_virt_offset; | ||
1035 | } | ||
1036 | |||
1037 | |||
1038 | static int get_bus_io_range(struct pci_bus *bus, unsigned long *start_phys, | ||
1039 | unsigned long *start_virt, unsigned long *size) | ||
1040 | { | ||
1041 | struct pci_controller *hose = pci_bus_to_host(bus); | ||
1042 | struct pci_bus_region region; | ||
1043 | struct resource *res; | ||
1044 | |||
1045 | if (bus->self) { | ||
1046 | res = bus->resource[0]; | ||
1047 | pcibios_resource_to_bus(bus->self, ®ion, res); | ||
1048 | *start_phys = hose->io_base_phys + region.start; | ||
1049 | *start_virt = (unsigned long) hose->io_base_virt + | ||
1050 | region.start; | ||
1051 | if (region.end > region.start) | ||
1052 | *size = region.end - region.start + 1; | ||
1053 | else { | ||
1054 | printk("%s(): unexpected region 0x%lx->0x%lx\n", | ||
1055 | __FUNCTION__, region.start, region.end); | ||
1056 | return 1; | ||
1057 | } | ||
1058 | |||
1059 | } else { | ||
1060 | /* Root Bus */ | ||
1061 | res = &hose->io_resource; | ||
1062 | *start_phys = hose->io_base_phys; | ||
1063 | *start_virt = (unsigned long) hose->io_base_virt; | ||
1064 | if (res->end > res->start) | ||
1065 | *size = res->end - res->start + 1; | ||
1066 | else { | ||
1067 | printk("%s(): unexpected region 0x%lx->0x%lx\n", | ||
1068 | __FUNCTION__, res->start, res->end); | ||
1069 | return 1; | ||
1070 | } | ||
1071 | } | ||
1072 | |||
1073 | return 0; | ||
1074 | } | ||
1075 | |||
1076 | int unmap_bus_range(struct pci_bus *bus) | ||
1077 | { | ||
1078 | unsigned long start_phys; | ||
1079 | unsigned long start_virt; | ||
1080 | unsigned long size; | ||
1081 | |||
1082 | if (!bus) { | ||
1083 | printk(KERN_ERR "%s() expected bus\n", __FUNCTION__); | ||
1084 | return 1; | ||
1085 | } | ||
1086 | |||
1087 | if (get_bus_io_range(bus, &start_phys, &start_virt, &size)) | ||
1088 | return 1; | ||
1089 | if (iounmap_explicit((void __iomem *) start_virt, size)) | ||
1090 | return 1; | ||
1091 | |||
1092 | return 0; | ||
1093 | } | ||
1094 | EXPORT_SYMBOL(unmap_bus_range); | ||
1095 | |||
1096 | int remap_bus_range(struct pci_bus *bus) | ||
1097 | { | ||
1098 | unsigned long start_phys; | ||
1099 | unsigned long start_virt; | ||
1100 | unsigned long size; | ||
1101 | |||
1102 | if (!bus) { | ||
1103 | printk(KERN_ERR "%s() expected bus\n", __FUNCTION__); | ||
1104 | return 1; | ||
1105 | } | ||
1106 | |||
1107 | |||
1108 | if (get_bus_io_range(bus, &start_phys, &start_virt, &size)) | ||
1109 | return 1; | ||
1110 | printk("mapping IO %lx -> %lx, size: %lx\n", start_phys, start_virt, size); | ||
1111 | if (__ioremap_explicit(start_phys, start_virt, size, | ||
1112 | _PAGE_NO_CACHE | _PAGE_GUARDED)) | ||
1113 | return 1; | ||
1114 | |||
1115 | return 0; | ||
1116 | } | ||
1117 | EXPORT_SYMBOL(remap_bus_range); | ||
1118 | |||
1119 | void phbs_remap_io(void) | ||
1120 | { | ||
1121 | struct pci_controller *hose, *tmp; | ||
1122 | |||
1123 | list_for_each_entry_safe(hose, tmp, &hose_list, list_node) | ||
1124 | remap_bus_range(hose->bus); | ||
1125 | } | ||
1126 | |||
1127 | /* | ||
1128 | * ppc64 can have multifunction devices that do not respond to function 0. | ||
1129 | * In this case we must scan all functions. | ||
1130 | * XXX this can go now, we use the OF device tree in all the | ||
1131 | * cases that caused problems. -- paulus | ||
1132 | */ | ||
1133 | int pcibios_scan_all_fns(struct pci_bus *bus, int devfn) | ||
1134 | { | ||
1135 | return 0; | ||
1136 | } | ||
1137 | |||
1138 | static void __devinit fixup_resource(struct resource *res, struct pci_dev *dev) | ||
1139 | { | ||
1140 | struct pci_controller *hose = pci_bus_to_host(dev->bus); | ||
1141 | unsigned long start, end, mask, offset; | ||
1142 | |||
1143 | if (res->flags & IORESOURCE_IO) { | ||
1144 | offset = (unsigned long)hose->io_base_virt - pci_io_base; | ||
1145 | |||
1146 | start = res->start += offset; | ||
1147 | end = res->end += offset; | ||
1148 | |||
1149 | /* Need to allow IO access to pages that are in the | ||
1150 | ISA range */ | ||
1151 | if (start < MAX_ISA_PORT) { | ||
1152 | if (end > MAX_ISA_PORT) | ||
1153 | end = MAX_ISA_PORT; | ||
1154 | |||
1155 | start >>= PAGE_SHIFT; | ||
1156 | end >>= PAGE_SHIFT; | ||
1157 | |||
1158 | /* get the range of pages for the map */ | ||
1159 | mask = ((1 << (end+1)) - 1) ^ ((1 << start) - 1); | ||
1160 | io_page_mask |= mask; | ||
1161 | } | ||
1162 | } else if (res->flags & IORESOURCE_MEM) { | ||
1163 | res->start += hose->pci_mem_offset; | ||
1164 | res->end += hose->pci_mem_offset; | ||
1165 | } | ||
1166 | } | ||
1167 | |||
1168 | void __devinit pcibios_fixup_device_resources(struct pci_dev *dev, | ||
1169 | struct pci_bus *bus) | ||
1170 | { | ||
1171 | /* Update device resources. */ | ||
1172 | int i; | ||
1173 | |||
1174 | for (i = 0; i < PCI_NUM_RESOURCES; i++) | ||
1175 | if (dev->resource[i].flags) | ||
1176 | fixup_resource(&dev->resource[i], dev); | ||
1177 | } | ||
1178 | EXPORT_SYMBOL(pcibios_fixup_device_resources); | ||
1179 | |||
1180 | static void __devinit do_bus_setup(struct pci_bus *bus) | ||
1181 | { | ||
1182 | struct pci_dev *dev; | ||
1183 | |||
1184 | ppc_md.iommu_bus_setup(bus); | ||
1185 | |||
1186 | list_for_each_entry(dev, &bus->devices, bus_list) | ||
1187 | ppc_md.iommu_dev_setup(dev); | ||
1188 | |||
1189 | if (ppc_md.irq_bus_setup) | ||
1190 | ppc_md.irq_bus_setup(bus); | ||
1191 | } | ||
1192 | |||
1193 | void __devinit pcibios_fixup_bus(struct pci_bus *bus) | ||
1194 | { | ||
1195 | struct pci_dev *dev = bus->self; | ||
1196 | |||
1197 | if (dev && pci_probe_only && | ||
1198 | (dev->class >> 8) == PCI_CLASS_BRIDGE_PCI) { | ||
1199 | /* This is a subordinate bridge */ | ||
1200 | |||
1201 | pci_read_bridge_bases(bus); | ||
1202 | pcibios_fixup_device_resources(dev, bus); | ||
1203 | } | ||
1204 | |||
1205 | do_bus_setup(bus); | ||
1206 | |||
1207 | if (!pci_probe_only) | ||
1208 | return; | ||
1209 | |||
1210 | list_for_each_entry(dev, &bus->devices, bus_list) | ||
1211 | if ((dev->class >> 8) != PCI_CLASS_BRIDGE_PCI) | ||
1212 | pcibios_fixup_device_resources(dev, bus); | ||
1213 | } | ||
1214 | EXPORT_SYMBOL(pcibios_fixup_bus); | ||
1215 | |||
1216 | /* | ||
1217 | * Reads the interrupt pin to determine if interrupt is use by card. | ||
1218 | * If the interrupt is used, then gets the interrupt line from the | ||
1219 | * openfirmware and sets it in the pci_dev and pci_config line. | ||
1220 | */ | ||
1221 | int pci_read_irq_line(struct pci_dev *pci_dev) | ||
1222 | { | ||
1223 | u8 intpin; | ||
1224 | struct device_node *node; | ||
1225 | |||
1226 | pci_read_config_byte(pci_dev, PCI_INTERRUPT_PIN, &intpin); | ||
1227 | if (intpin == 0) | ||
1228 | return 0; | ||
1229 | |||
1230 | node = pci_device_to_OF_node(pci_dev); | ||
1231 | if (node == NULL) | ||
1232 | return -1; | ||
1233 | |||
1234 | if (node->n_intrs == 0) | ||
1235 | return -1; | ||
1236 | |||
1237 | pci_dev->irq = node->intrs[0].line; | ||
1238 | |||
1239 | pci_write_config_byte(pci_dev, PCI_INTERRUPT_LINE, pci_dev->irq); | ||
1240 | |||
1241 | return 0; | ||
1242 | } | ||
1243 | EXPORT_SYMBOL(pci_read_irq_line); | ||
1244 | |||
1245 | void pci_resource_to_user(const struct pci_dev *dev, int bar, | ||
1246 | const struct resource *rsrc, | ||
1247 | u64 *start, u64 *end) | ||
1248 | { | ||
1249 | struct pci_controller *hose = pci_bus_to_host(dev->bus); | ||
1250 | unsigned long offset = 0; | ||
1251 | |||
1252 | if (hose == NULL) | ||
1253 | return; | ||
1254 | |||
1255 | if (rsrc->flags & IORESOURCE_IO) | ||
1256 | offset = pci_io_base - (unsigned long)hose->io_base_virt + | ||
1257 | hose->io_base_phys; | ||
1258 | |||
1259 | *start = rsrc->start + offset; | ||
1260 | *end = rsrc->end + offset; | ||
1261 | } | ||
1262 | |||
1263 | #endif /* CONFIG_PPC_MULTIPLATFORM */ | ||
1264 | |||
1265 | |||
1266 | #define IOBASE_BRIDGE_NUMBER 0 | ||
1267 | #define IOBASE_MEMORY 1 | ||
1268 | #define IOBASE_IO 2 | ||
1269 | #define IOBASE_ISA_IO 3 | ||
1270 | #define IOBASE_ISA_MEM 4 | ||
1271 | |||
1272 | long sys_pciconfig_iobase(long which, unsigned long in_bus, | ||
1273 | unsigned long in_devfn) | ||
1274 | { | ||
1275 | struct pci_controller* hose; | ||
1276 | struct list_head *ln; | ||
1277 | struct pci_bus *bus = NULL; | ||
1278 | struct device_node *hose_node; | ||
1279 | |||
1280 | /* Argh ! Please forgive me for that hack, but that's the | ||
1281 | * simplest way to get existing XFree to not lockup on some | ||
1282 | * G5 machines... So when something asks for bus 0 io base | ||
1283 | * (bus 0 is HT root), we return the AGP one instead. | ||
1284 | */ | ||
1285 | if (machine_is_compatible("MacRISC4")) | ||
1286 | if (in_bus == 0) | ||
1287 | in_bus = 0xf0; | ||
1288 | |||
1289 | /* That syscall isn't quite compatible with PCI domains, but it's | ||
1290 | * used on pre-domains setup. We return the first match | ||
1291 | */ | ||
1292 | |||
1293 | for (ln = pci_root_buses.next; ln != &pci_root_buses; ln = ln->next) { | ||
1294 | bus = pci_bus_b(ln); | ||
1295 | if (in_bus >= bus->number && in_bus < (bus->number + bus->subordinate)) | ||
1296 | break; | ||
1297 | bus = NULL; | ||
1298 | } | ||
1299 | if (bus == NULL || bus->sysdata == NULL) | ||
1300 | return -ENODEV; | ||
1301 | |||
1302 | hose_node = (struct device_node *)bus->sysdata; | ||
1303 | hose = PCI_DN(hose_node)->phb; | ||
1304 | |||
1305 | switch (which) { | ||
1306 | case IOBASE_BRIDGE_NUMBER: | ||
1307 | return (long)hose->first_busno; | ||
1308 | case IOBASE_MEMORY: | ||
1309 | return (long)hose->pci_mem_offset; | ||
1310 | case IOBASE_IO: | ||
1311 | return (long)hose->io_base_phys; | ||
1312 | case IOBASE_ISA_IO: | ||
1313 | return (long)isa_io_base; | ||
1314 | case IOBASE_ISA_MEM: | ||
1315 | return -EINVAL; | ||
1316 | } | ||
1317 | |||
1318 | return -EOPNOTSUPP; | ||
1319 | } | ||
diff --git a/arch/powerpc/kernel/pci_direct_iommu.c b/arch/powerpc/kernel/pci_direct_iommu.c new file mode 100644 index 000000000000..e1a32f802c0b --- /dev/null +++ b/arch/powerpc/kernel/pci_direct_iommu.c | |||
@@ -0,0 +1,94 @@ | |||
1 | /* | ||
2 | * Support for DMA from PCI devices to main memory on | ||
3 | * machines without an iommu or with directly addressable | ||
4 | * RAM (typically a pmac with 2Gb of RAM or less) | ||
5 | * | ||
6 | * Copyright (C) 2003 Benjamin Herrenschmidt (benh@kernel.crashing.org) | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU General Public License | ||
10 | * as published by the Free Software Foundation; either version | ||
11 | * 2 of the License, or (at your option) any later version. | ||
12 | */ | ||
13 | |||
14 | #include <linux/kernel.h> | ||
15 | #include <linux/pci.h> | ||
16 | #include <linux/delay.h> | ||
17 | #include <linux/string.h> | ||
18 | #include <linux/init.h> | ||
19 | #include <linux/bootmem.h> | ||
20 | #include <linux/mm.h> | ||
21 | #include <linux/dma-mapping.h> | ||
22 | |||
23 | #include <asm/sections.h> | ||
24 | #include <asm/io.h> | ||
25 | #include <asm/prom.h> | ||
26 | #include <asm/pci-bridge.h> | ||
27 | #include <asm/machdep.h> | ||
28 | #include <asm/pmac_feature.h> | ||
29 | #include <asm/abs_addr.h> | ||
30 | #include <asm/ppc-pci.h> | ||
31 | |||
32 | static void *pci_direct_alloc_coherent(struct device *hwdev, size_t size, | ||
33 | dma_addr_t *dma_handle, gfp_t flag) | ||
34 | { | ||
35 | void *ret; | ||
36 | |||
37 | ret = (void *)__get_free_pages(flag, get_order(size)); | ||
38 | if (ret != NULL) { | ||
39 | memset(ret, 0, size); | ||
40 | *dma_handle = virt_to_abs(ret); | ||
41 | } | ||
42 | return ret; | ||
43 | } | ||
44 | |||
45 | static void pci_direct_free_coherent(struct device *hwdev, size_t size, | ||
46 | void *vaddr, dma_addr_t dma_handle) | ||
47 | { | ||
48 | free_pages((unsigned long)vaddr, get_order(size)); | ||
49 | } | ||
50 | |||
51 | static dma_addr_t pci_direct_map_single(struct device *hwdev, void *ptr, | ||
52 | size_t size, enum dma_data_direction direction) | ||
53 | { | ||
54 | return virt_to_abs(ptr); | ||
55 | } | ||
56 | |||
57 | static void pci_direct_unmap_single(struct device *hwdev, dma_addr_t dma_addr, | ||
58 | size_t size, enum dma_data_direction direction) | ||
59 | { | ||
60 | } | ||
61 | |||
62 | static int pci_direct_map_sg(struct device *hwdev, struct scatterlist *sg, | ||
63 | int nents, enum dma_data_direction direction) | ||
64 | { | ||
65 | int i; | ||
66 | |||
67 | for (i = 0; i < nents; i++, sg++) { | ||
68 | sg->dma_address = page_to_phys(sg->page) + sg->offset; | ||
69 | sg->dma_length = sg->length; | ||
70 | } | ||
71 | |||
72 | return nents; | ||
73 | } | ||
74 | |||
75 | static void pci_direct_unmap_sg(struct device *hwdev, struct scatterlist *sg, | ||
76 | int nents, enum dma_data_direction direction) | ||
77 | { | ||
78 | } | ||
79 | |||
80 | static int pci_direct_dma_supported(struct device *dev, u64 mask) | ||
81 | { | ||
82 | return mask < 0x100000000ull; | ||
83 | } | ||
84 | |||
85 | void __init pci_direct_iommu_init(void) | ||
86 | { | ||
87 | pci_dma_ops.alloc_coherent = pci_direct_alloc_coherent; | ||
88 | pci_dma_ops.free_coherent = pci_direct_free_coherent; | ||
89 | pci_dma_ops.map_single = pci_direct_map_single; | ||
90 | pci_dma_ops.unmap_single = pci_direct_unmap_single; | ||
91 | pci_dma_ops.map_sg = pci_direct_map_sg; | ||
92 | pci_dma_ops.unmap_sg = pci_direct_unmap_sg; | ||
93 | pci_dma_ops.dma_supported = pci_direct_dma_supported; | ||
94 | } | ||
diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c new file mode 100644 index 000000000000..12c4c9e9bbc7 --- /dev/null +++ b/arch/powerpc/kernel/pci_dn.c | |||
@@ -0,0 +1,230 @@ | |||
1 | /* | ||
2 | * pci_dn.c | ||
3 | * | ||
4 | * Copyright (C) 2001 Todd Inglett, IBM Corporation | ||
5 | * | ||
6 | * PCI manipulation via device_nodes. | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify | ||
9 | * it under the terms of the GNU General Public License as published by | ||
10 | * the Free Software Foundation; either version 2 of the License, or | ||
11 | * (at your option) any later version. | ||
12 | * | ||
13 | * This program is distributed in the hope that it will be useful, | ||
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
16 | * GNU General Public License for more details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU General Public License | ||
19 | * along with this program; if not, write to the Free Software | ||
20 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
21 | */ | ||
22 | #include <linux/kernel.h> | ||
23 | #include <linux/pci.h> | ||
24 | #include <linux/string.h> | ||
25 | #include <linux/init.h> | ||
26 | #include <linux/slab.h> | ||
27 | #include <linux/bootmem.h> | ||
28 | |||
29 | #include <asm/io.h> | ||
30 | #include <asm/prom.h> | ||
31 | #include <asm/pci-bridge.h> | ||
32 | #include <asm/pSeries_reconfig.h> | ||
33 | #include <asm/ppc-pci.h> | ||
34 | |||
35 | /* | ||
36 | * Traverse_func that inits the PCI fields of the device node. | ||
37 | * NOTE: this *must* be done before read/write config to the device. | ||
38 | */ | ||
39 | static void * __devinit update_dn_pci_info(struct device_node *dn, void *data) | ||
40 | { | ||
41 | struct pci_controller *phb = data; | ||
42 | int *type = (int *)get_property(dn, "ibm,pci-config-space-type", NULL); | ||
43 | u32 *regs; | ||
44 | struct pci_dn *pdn; | ||
45 | |||
46 | if (mem_init_done) | ||
47 | pdn = kmalloc(sizeof(*pdn), GFP_KERNEL); | ||
48 | else | ||
49 | pdn = alloc_bootmem(sizeof(*pdn)); | ||
50 | if (pdn == NULL) | ||
51 | return NULL; | ||
52 | memset(pdn, 0, sizeof(*pdn)); | ||
53 | dn->data = pdn; | ||
54 | pdn->node = dn; | ||
55 | pdn->phb = phb; | ||
56 | regs = (u32 *)get_property(dn, "reg", NULL); | ||
57 | if (regs) { | ||
58 | /* First register entry is addr (00BBSS00) */ | ||
59 | pdn->busno = (regs[0] >> 16) & 0xff; | ||
60 | pdn->devfn = (regs[0] >> 8) & 0xff; | ||
61 | } | ||
62 | |||
63 | pdn->pci_ext_config_space = (type && *type == 1); | ||
64 | return NULL; | ||
65 | } | ||
66 | |||
67 | /* | ||
68 | * Traverse a device tree stopping each PCI device in the tree. | ||
69 | * This is done depth first. As each node is processed, a "pre" | ||
70 | * function is called and the children are processed recursively. | ||
71 | * | ||
72 | * The "pre" func returns a value. If non-zero is returned from | ||
73 | * the "pre" func, the traversal stops and this value is returned. | ||
74 | * This return value is useful when using traverse as a method of | ||
75 | * finding a device. | ||
76 | * | ||
77 | * NOTE: we do not run the func for devices that do not appear to | ||
78 | * be PCI except for the start node which we assume (this is good | ||
79 | * because the start node is often a phb which may be missing PCI | ||
80 | * properties). | ||
81 | * We use the class-code as an indicator. If we run into | ||
82 | * one of these nodes we also assume its siblings are non-pci for | ||
83 | * performance. | ||
84 | */ | ||
85 | void *traverse_pci_devices(struct device_node *start, traverse_func pre, | ||
86 | void *data) | ||
87 | { | ||
88 | struct device_node *dn, *nextdn; | ||
89 | void *ret; | ||
90 | |||
91 | /* We started with a phb, iterate all childs */ | ||
92 | for (dn = start->child; dn; dn = nextdn) { | ||
93 | u32 *classp, class; | ||
94 | |||
95 | nextdn = NULL; | ||
96 | classp = (u32 *)get_property(dn, "class-code", NULL); | ||
97 | class = classp ? *classp : 0; | ||
98 | |||
99 | if (pre && ((ret = pre(dn, data)) != NULL)) | ||
100 | return ret; | ||
101 | |||
102 | /* If we are a PCI bridge, go down */ | ||
103 | if (dn->child && ((class >> 8) == PCI_CLASS_BRIDGE_PCI || | ||
104 | (class >> 8) == PCI_CLASS_BRIDGE_CARDBUS)) | ||
105 | /* Depth first...do children */ | ||
106 | nextdn = dn->child; | ||
107 | else if (dn->sibling) | ||
108 | /* ok, try next sibling instead. */ | ||
109 | nextdn = dn->sibling; | ||
110 | if (!nextdn) { | ||
111 | /* Walk up to next valid sibling. */ | ||
112 | do { | ||
113 | dn = dn->parent; | ||
114 | if (dn == start) | ||
115 | return NULL; | ||
116 | } while (dn->sibling == NULL); | ||
117 | nextdn = dn->sibling; | ||
118 | } | ||
119 | } | ||
120 | return NULL; | ||
121 | } | ||
122 | |||
123 | /** | ||
124 | * pci_devs_phb_init_dynamic - setup pci devices under this PHB | ||
125 | * phb: pci-to-host bridge (top-level bridge connecting to cpu) | ||
126 | * | ||
127 | * This routine is called both during boot, (before the memory | ||
128 | * subsystem is set up, before kmalloc is valid) and during the | ||
129 | * dynamic lpar operation of adding a PHB to a running system. | ||
130 | */ | ||
131 | void __devinit pci_devs_phb_init_dynamic(struct pci_controller *phb) | ||
132 | { | ||
133 | struct device_node * dn = (struct device_node *) phb->arch_data; | ||
134 | struct pci_dn *pdn; | ||
135 | |||
136 | /* PHB nodes themselves must not match */ | ||
137 | update_dn_pci_info(dn, phb); | ||
138 | pdn = dn->data; | ||
139 | if (pdn) { | ||
140 | pdn->devfn = pdn->busno = -1; | ||
141 | pdn->phb = phb; | ||
142 | } | ||
143 | |||
144 | /* Update dn->phb ptrs for new phb and children devices */ | ||
145 | traverse_pci_devices(dn, update_dn_pci_info, phb); | ||
146 | } | ||
147 | |||
148 | /* | ||
149 | * Traversal func that looks for a <busno,devfcn> value. | ||
150 | * If found, the pci_dn is returned (thus terminating the traversal). | ||
151 | */ | ||
152 | static void *is_devfn_node(struct device_node *dn, void *data) | ||
153 | { | ||
154 | int busno = ((unsigned long)data >> 8) & 0xff; | ||
155 | int devfn = ((unsigned long)data) & 0xff; | ||
156 | struct pci_dn *pci = dn->data; | ||
157 | |||
158 | if (pci && (devfn == pci->devfn) && (busno == pci->busno)) | ||
159 | return dn; | ||
160 | return NULL; | ||
161 | } | ||
162 | |||
163 | /* | ||
164 | * This is the "slow" path for looking up a device_node from a | ||
165 | * pci_dev. It will hunt for the device under its parent's | ||
166 | * phb and then update sysdata for a future fastpath. | ||
167 | * | ||
168 | * It may also do fixups on the actual device since this happens | ||
169 | * on the first read/write. | ||
170 | * | ||
171 | * Note that it also must deal with devices that don't exist. | ||
172 | * In this case it may probe for real hardware ("just in case") | ||
173 | * and add a device_node to the device tree if necessary. | ||
174 | * | ||
175 | */ | ||
176 | struct device_node *fetch_dev_dn(struct pci_dev *dev) | ||
177 | { | ||
178 | struct device_node *orig_dn = dev->sysdata; | ||
179 | struct device_node *dn; | ||
180 | unsigned long searchval = (dev->bus->number << 8) | dev->devfn; | ||
181 | |||
182 | dn = traverse_pci_devices(orig_dn, is_devfn_node, (void *)searchval); | ||
183 | if (dn) | ||
184 | dev->sysdata = dn; | ||
185 | return dn; | ||
186 | } | ||
187 | EXPORT_SYMBOL(fetch_dev_dn); | ||
188 | |||
189 | static int pci_dn_reconfig_notifier(struct notifier_block *nb, unsigned long action, void *node) | ||
190 | { | ||
191 | struct device_node *np = node; | ||
192 | struct pci_dn *pci = NULL; | ||
193 | int err = NOTIFY_OK; | ||
194 | |||
195 | switch (action) { | ||
196 | case PSERIES_RECONFIG_ADD: | ||
197 | pci = np->parent->data; | ||
198 | if (pci) | ||
199 | update_dn_pci_info(np, pci->phb); | ||
200 | break; | ||
201 | default: | ||
202 | err = NOTIFY_DONE; | ||
203 | break; | ||
204 | } | ||
205 | return err; | ||
206 | } | ||
207 | |||
208 | static struct notifier_block pci_dn_reconfig_nb = { | ||
209 | .notifier_call = pci_dn_reconfig_notifier, | ||
210 | }; | ||
211 | |||
212 | /** | ||
213 | * pci_devs_phb_init - Initialize phbs and pci devs under them. | ||
214 | * | ||
215 | * This routine walks over all phb's (pci-host bridges) on the | ||
216 | * system, and sets up assorted pci-related structures | ||
217 | * (including pci info in the device node structs) for each | ||
218 | * pci device found underneath. This routine runs once, | ||
219 | * early in the boot sequence. | ||
220 | */ | ||
221 | void __init pci_devs_phb_init(void) | ||
222 | { | ||
223 | struct pci_controller *phb, *tmp; | ||
224 | |||
225 | /* This must be done first so the device nodes have valid pci info! */ | ||
226 | list_for_each_entry_safe(phb, tmp, &hose_list, list_node) | ||
227 | pci_devs_phb_init_dynamic(phb); | ||
228 | |||
229 | pSeries_reconfig_notifier_register(&pci_dn_reconfig_nb); | ||
230 | } | ||
diff --git a/arch/powerpc/kernel/pci_iommu.c b/arch/powerpc/kernel/pci_iommu.c new file mode 100644 index 000000000000..bdf15dbbf4f0 --- /dev/null +++ b/arch/powerpc/kernel/pci_iommu.c | |||
@@ -0,0 +1,128 @@ | |||
1 | /* | ||
2 | * arch/ppc64/kernel/pci_iommu.c | ||
3 | * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation | ||
4 | * | ||
5 | * Rewrite, cleanup, new allocation schemes: | ||
6 | * Copyright (C) 2004 Olof Johansson, IBM Corporation | ||
7 | * | ||
8 | * Dynamic DMA mapping support, platform-independent parts. | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or modify | ||
11 | * it under the terms of the GNU General Public License as published by | ||
12 | * the Free Software Foundation; either version 2 of the License, or | ||
13 | * (at your option) any later version. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
18 | * GNU General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public License | ||
21 | * along with this program; if not, write to the Free Software | ||
22 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
23 | */ | ||
24 | |||
25 | |||
26 | #include <linux/init.h> | ||
27 | #include <linux/types.h> | ||
28 | #include <linux/slab.h> | ||
29 | #include <linux/mm.h> | ||
30 | #include <linux/spinlock.h> | ||
31 | #include <linux/string.h> | ||
32 | #include <linux/pci.h> | ||
33 | #include <linux/dma-mapping.h> | ||
34 | #include <asm/io.h> | ||
35 | #include <asm/prom.h> | ||
36 | #include <asm/iommu.h> | ||
37 | #include <asm/pci-bridge.h> | ||
38 | #include <asm/machdep.h> | ||
39 | #include <asm/ppc-pci.h> | ||
40 | |||
41 | /* | ||
42 | * We can use ->sysdata directly and avoid the extra work in | ||
43 | * pci_device_to_OF_node since ->sysdata will have been initialised | ||
44 | * in the iommu init code for all devices. | ||
45 | */ | ||
46 | #define PCI_GET_DN(dev) ((struct device_node *)((dev)->sysdata)) | ||
47 | |||
48 | static inline struct iommu_table *devnode_table(struct device *dev) | ||
49 | { | ||
50 | struct pci_dev *pdev; | ||
51 | |||
52 | if (!dev) { | ||
53 | pdev = ppc64_isabridge_dev; | ||
54 | if (!pdev) | ||
55 | return NULL; | ||
56 | } else | ||
57 | pdev = to_pci_dev(dev); | ||
58 | |||
59 | return PCI_DN(PCI_GET_DN(pdev))->iommu_table; | ||
60 | } | ||
61 | |||
62 | |||
63 | /* Allocates a contiguous real buffer and creates mappings over it. | ||
64 | * Returns the virtual address of the buffer and sets dma_handle | ||
65 | * to the dma address (mapping) of the first page. | ||
66 | */ | ||
67 | static void *pci_iommu_alloc_coherent(struct device *hwdev, size_t size, | ||
68 | dma_addr_t *dma_handle, gfp_t flag) | ||
69 | { | ||
70 | return iommu_alloc_coherent(devnode_table(hwdev), size, dma_handle, | ||
71 | flag); | ||
72 | } | ||
73 | |||
74 | static void pci_iommu_free_coherent(struct device *hwdev, size_t size, | ||
75 | void *vaddr, dma_addr_t dma_handle) | ||
76 | { | ||
77 | iommu_free_coherent(devnode_table(hwdev), size, vaddr, dma_handle); | ||
78 | } | ||
79 | |||
80 | /* Creates TCEs for a user provided buffer. The user buffer must be | ||
81 | * contiguous real kernel storage (not vmalloc). The address of the buffer | ||
82 | * passed here is the kernel (virtual) address of the buffer. The buffer | ||
83 | * need not be page aligned, the dma_addr_t returned will point to the same | ||
84 | * byte within the page as vaddr. | ||
85 | */ | ||
86 | static dma_addr_t pci_iommu_map_single(struct device *hwdev, void *vaddr, | ||
87 | size_t size, enum dma_data_direction direction) | ||
88 | { | ||
89 | return iommu_map_single(devnode_table(hwdev), vaddr, size, direction); | ||
90 | } | ||
91 | |||
92 | |||
93 | static void pci_iommu_unmap_single(struct device *hwdev, dma_addr_t dma_handle, | ||
94 | size_t size, enum dma_data_direction direction) | ||
95 | { | ||
96 | iommu_unmap_single(devnode_table(hwdev), dma_handle, size, direction); | ||
97 | } | ||
98 | |||
99 | |||
100 | static int pci_iommu_map_sg(struct device *pdev, struct scatterlist *sglist, | ||
101 | int nelems, enum dma_data_direction direction) | ||
102 | { | ||
103 | return iommu_map_sg(pdev, devnode_table(pdev), sglist, | ||
104 | nelems, direction); | ||
105 | } | ||
106 | |||
107 | static void pci_iommu_unmap_sg(struct device *pdev, struct scatterlist *sglist, | ||
108 | int nelems, enum dma_data_direction direction) | ||
109 | { | ||
110 | iommu_unmap_sg(devnode_table(pdev), sglist, nelems, direction); | ||
111 | } | ||
112 | |||
113 | /* We support DMA to/from any memory page via the iommu */ | ||
114 | static int pci_iommu_dma_supported(struct device *dev, u64 mask) | ||
115 | { | ||
116 | return 1; | ||
117 | } | ||
118 | |||
119 | void pci_iommu_init(void) | ||
120 | { | ||
121 | pci_dma_ops.alloc_coherent = pci_iommu_alloc_coherent; | ||
122 | pci_dma_ops.free_coherent = pci_iommu_free_coherent; | ||
123 | pci_dma_ops.map_single = pci_iommu_map_single; | ||
124 | pci_dma_ops.unmap_single = pci_iommu_unmap_single; | ||
125 | pci_dma_ops.map_sg = pci_iommu_map_sg; | ||
126 | pci_dma_ops.unmap_sg = pci_iommu_unmap_sg; | ||
127 | pci_dma_ops.dma_supported = pci_iommu_dma_supported; | ||
128 | } | ||