aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/kernel')
-rw-r--r--arch/powerpc/kernel/Makefile27
-rw-r--r--arch/powerpc/kernel/align.c530
-rw-r--r--arch/powerpc/kernel/asm-offsets.c6
-rw-r--r--arch/powerpc/kernel/dma_64.c151
-rw-r--r--arch/powerpc/kernel/head_fsl_booke.S2
-rw-r--r--arch/powerpc/kernel/idle_64.c121
-rw-r--r--arch/powerpc/kernel/ioctl32.c4
-rw-r--r--arch/powerpc/kernel/iomap.c146
-rw-r--r--arch/powerpc/kernel/iommu.c572
-rw-r--r--arch/powerpc/kernel/irq.c9
-rw-r--r--arch/powerpc/kernel/kprobes.c459
-rw-r--r--arch/powerpc/kernel/lparcfg.c51
-rw-r--r--arch/powerpc/kernel/machine_kexec_64.c358
-rw-r--r--arch/powerpc/kernel/misc_32.S8
-rw-r--r--arch/powerpc/kernel/module_64.c455
-rw-r--r--arch/powerpc/kernel/nvram_64.c742
-rw-r--r--arch/powerpc/kernel/pci_64.c1381
-rw-r--r--arch/powerpc/kernel/pci_direct_iommu.c94
-rw-r--r--arch/powerpc/kernel/pci_dn.c230
-rw-r--r--arch/powerpc/kernel/pci_iommu.c128
-rw-r--r--arch/powerpc/kernel/ppc_ksyms.c7
-rw-r--r--arch/powerpc/kernel/prom.c2
-rw-r--r--arch/powerpc/kernel/rtas-rtc.c105
-rw-r--r--arch/powerpc/kernel/rtas_pci.c68
-rw-r--r--arch/powerpc/kernel/setup-common.c1
-rw-r--r--arch/powerpc/kernel/setup_32.c4
-rw-r--r--arch/powerpc/kernel/setup_64.c5
-rw-r--r--arch/powerpc/kernel/signal_32.c7
-rw-r--r--arch/powerpc/kernel/signal_64.c6
-rw-r--r--arch/powerpc/kernel/smp.c7
-rw-r--r--arch/powerpc/kernel/time.c28
-rw-r--r--arch/powerpc/kernel/vdso32/cacheflush.S2
-rw-r--r--arch/powerpc/kernel/vdso32/datapage.S6
-rw-r--r--arch/powerpc/kernel/vdso32/gettimeofday.S16
-rw-r--r--arch/powerpc/kernel/vdso64/cacheflush.S2
-rw-r--r--arch/powerpc/kernel/vdso64/datapage.S4
-rw-r--r--arch/powerpc/kernel/vdso64/gettimeofday.S35
37 files changed, 5632 insertions, 147 deletions
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 9a74b7ab03a4..9ed551b6c172 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -12,12 +12,12 @@ CFLAGS_btext.o += -fPIC
12endif 12endif
13 13
14obj-y := semaphore.o cputable.o ptrace.o syscalls.o \ 14obj-y := semaphore.o cputable.o ptrace.o syscalls.o \
15 irq.o signal_32.o pmc.o vdso.o 15 irq.o align.o signal_32.o pmc.o vdso.o
16obj-y += vdso32/ 16obj-y += vdso32/
17obj-$(CONFIG_PPC64) += setup_64.o binfmt_elf32.o sys_ppc32.o \ 17obj-$(CONFIG_PPC64) += setup_64.o binfmt_elf32.o sys_ppc32.o \
18 signal_64.o ptrace32.o systbl.o \ 18 signal_64.o ptrace32.o systbl.o \
19 paca.o ioctl32.o cpu_setup_power4.o \ 19 paca.o ioctl32.o cpu_setup_power4.o \
20 firmware.o sysfs.o udbg.o 20 firmware.o sysfs.o udbg.o idle_64.o
21obj-$(CONFIG_PPC64) += vdso64/ 21obj-$(CONFIG_PPC64) += vdso64/
22obj-$(CONFIG_ALTIVEC) += vecemu.o vector.o 22obj-$(CONFIG_ALTIVEC) += vecemu.o vector.o
23obj-$(CONFIG_POWER4) += idle_power4.o 23obj-$(CONFIG_POWER4) += idle_power4.o
@@ -25,7 +25,7 @@ obj-$(CONFIG_PPC_OF) += of_device.o
25procfs-$(CONFIG_PPC64) := proc_ppc64.o 25procfs-$(CONFIG_PPC64) := proc_ppc64.o
26obj-$(CONFIG_PROC_FS) += $(procfs-y) 26obj-$(CONFIG_PROC_FS) += $(procfs-y)
27rtaspci-$(CONFIG_PPC64) := rtas_pci.o 27rtaspci-$(CONFIG_PPC64) := rtas_pci.o
28obj-$(CONFIG_PPC_RTAS) += rtas.o $(rtaspci-y) 28obj-$(CONFIG_PPC_RTAS) += rtas.o rtas-rtc.o $(rtaspci-y)
29obj-$(CONFIG_RTAS_FLASH) += rtas_flash.o 29obj-$(CONFIG_RTAS_FLASH) += rtas_flash.o
30obj-$(CONFIG_RTAS_PROC) += rtas-proc.o 30obj-$(CONFIG_RTAS_PROC) += rtas-proc.o
31obj-$(CONFIG_LPARCFG) += lparcfg.o 31obj-$(CONFIG_LPARCFG) += lparcfg.o
@@ -35,6 +35,7 @@ obj-$(CONFIG_PPC_PSERIES) += udbg_16550.o
35obj-$(CONFIG_PPC_MAPLE) += udbg_16550.o 35obj-$(CONFIG_PPC_MAPLE) += udbg_16550.o
36udbgscc-$(CONFIG_PPC64) := udbg_scc.o 36udbgscc-$(CONFIG_PPC64) := udbg_scc.o
37obj-$(CONFIG_PPC_PMAC) += $(udbgscc-y) 37obj-$(CONFIG_PPC_PMAC) += $(udbgscc-y)
38obj64-$(CONFIG_PPC_MULTIPLATFORM) += nvram_64.o
38 39
39ifeq ($(CONFIG_PPC_MERGE),y) 40ifeq ($(CONFIG_PPC_MERGE),y)
40 41
@@ -49,12 +50,23 @@ extra-y += vmlinux.lds
49obj-y += process.o init_task.o time.o \ 50obj-y += process.o init_task.o time.o \
50 prom.o traps.o setup-common.o 51 prom.o traps.o setup-common.o
51obj-$(CONFIG_PPC32) += entry_32.o setup_32.o misc_32.o systbl.o 52obj-$(CONFIG_PPC32) += entry_32.o setup_32.o misc_32.o systbl.o
52obj-$(CONFIG_PPC64) += misc_64.o 53obj-$(CONFIG_PPC64) += misc_64.o dma_64.o iommu.o
53obj-$(CONFIG_PPC_OF) += prom_init.o 54obj-$(CONFIG_PPC_OF) += prom_init.o
54obj-$(CONFIG_MODULES) += ppc_ksyms.o 55obj-$(CONFIG_MODULES) += ppc_ksyms.o
55obj-$(CONFIG_BOOTX_TEXT) += btext.o 56obj-$(CONFIG_BOOTX_TEXT) += btext.o
56obj-$(CONFIG_6xx) += idle_6xx.o 57obj-$(CONFIG_6xx) += idle_6xx.o
57obj-$(CONFIG_SMP) += smp.o 58obj-$(CONFIG_SMP) += smp.o
59obj-$(CONFIG_KPROBES) += kprobes.o
60
61module-$(CONFIG_PPC64) += module_64.o
62obj-$(CONFIG_MODULES) += $(module-y)
63
64pci64-$(CONFIG_PPC64) += pci_64.o pci_dn.o pci_iommu.o \
65 pci_direct_iommu.o iomap.o
66obj-$(CONFIG_PCI) += $(pci64-y)
67
68kexec64-$(CONFIG_PPC64) += machine_kexec_64.o
69obj-$(CONFIG_KEXEC) += $(kexec64-y)
58 70
59ifeq ($(CONFIG_PPC_ISERIES),y) 71ifeq ($(CONFIG_PPC_ISERIES),y)
60$(obj)/head_64.o: $(obj)/lparmap.s 72$(obj)/head_64.o: $(obj)/lparmap.s
@@ -62,13 +74,12 @@ AFLAGS_head_64.o += -I$(obj)
62endif 74endif
63 75
64else 76else
65# stuff used from here for ARCH=ppc or ARCH=ppc64 77# stuff used from here for ARCH=ppc
66smpobj-$(CONFIG_SMP) += smp.o 78smpobj-$(CONFIG_SMP) += smp.o
67obj-$(CONFIG_PPC64) += traps.o process.o init_task.o time.o \
68 setup-common.o $(smpobj-y)
69
70 79
71endif 80endif
72 81
82obj-$(CONFIG_PPC64) += $(obj64-y)
83
73extra-$(CONFIG_PPC_FPU) += fpu.o 84extra-$(CONFIG_PPC_FPU) += fpu.o
74extra-$(CONFIG_PPC64) += entry_64.o 85extra-$(CONFIG_PPC64) += entry_64.o
diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c
new file mode 100644
index 000000000000..faaec9c6f78f
--- /dev/null
+++ b/arch/powerpc/kernel/align.c
@@ -0,0 +1,530 @@
1/* align.c - handle alignment exceptions for the Power PC.
2 *
3 * Copyright (c) 1996 Paul Mackerras <paulus@cs.anu.edu.au>
4 * Copyright (c) 1998-1999 TiVo, Inc.
5 * PowerPC 403GCX modifications.
6 * Copyright (c) 1999 Grant Erickson <grant@lcse.umn.edu>
7 * PowerPC 403GCX/405GP modifications.
8 * Copyright (c) 2001-2002 PPC64 team, IBM Corp
9 * 64-bit and Power4 support
10 * Copyright (c) 2005 Benjamin Herrenschmidt, IBM Corp
11 * <benh@kernel.crashing.org>
12 * Merge ppc32 and ppc64 implementations
13 *
14 * This program is free software; you can redistribute it and/or
15 * modify it under the terms of the GNU General Public License
16 * as published by the Free Software Foundation; either version
17 * 2 of the License, or (at your option) any later version.
18 */
19
20#include <linux/kernel.h>
21#include <linux/mm.h>
22#include <asm/processor.h>
23#include <asm/uaccess.h>
24#include <asm/system.h>
25#include <asm/cache.h>
26#include <asm/cputable.h>
27
28struct aligninfo {
29 unsigned char len;
30 unsigned char flags;
31};
32
33#define IS_XFORM(inst) (((inst) >> 26) == 31)
34#define IS_DSFORM(inst) (((inst) >> 26) >= 56)
35
36#define INVALID { 0, 0 }
37
38#define LD 1 /* load */
39#define ST 2 /* store */
40#define SE 4 /* sign-extend value */
41#define F 8 /* to/from fp regs */
42#define U 0x10 /* update index register */
43#define M 0x20 /* multiple load/store */
44#define SW 0x40 /* byte swap int or ... */
45#define S 0x40 /* ... single-precision fp */
46#define SX 0x40 /* byte count in XER */
47#define HARD 0x80 /* string, stwcx. */
48
49#define DCBZ 0x5f /* 8xx/82xx dcbz faults when cache not enabled */
50
51#define SWAP(a, b) (t = (a), (a) = (b), (b) = t)
52
53/*
54 * The PowerPC stores certain bits of the instruction that caused the
55 * alignment exception in the DSISR register. This array maps those
56 * bits to information about the operand length and what the
57 * instruction would do.
58 */
59static struct aligninfo aligninfo[128] = {
60 { 4, LD }, /* 00 0 0000: lwz / lwarx */
61 INVALID, /* 00 0 0001 */
62 { 4, ST }, /* 00 0 0010: stw */
63 INVALID, /* 00 0 0011 */
64 { 2, LD }, /* 00 0 0100: lhz */
65 { 2, LD+SE }, /* 00 0 0101: lha */
66 { 2, ST }, /* 00 0 0110: sth */
67 { 4, LD+M }, /* 00 0 0111: lmw */
68 { 4, LD+F+S }, /* 00 0 1000: lfs */
69 { 8, LD+F }, /* 00 0 1001: lfd */
70 { 4, ST+F+S }, /* 00 0 1010: stfs */
71 { 8, ST+F }, /* 00 0 1011: stfd */
72 INVALID, /* 00 0 1100 */
73 { 8, LD }, /* 00 0 1101: ld/ldu/lwa */
74 INVALID, /* 00 0 1110 */
75 { 8, ST }, /* 00 0 1111: std/stdu */
76 { 4, LD+U }, /* 00 1 0000: lwzu */
77 INVALID, /* 00 1 0001 */
78 { 4, ST+U }, /* 00 1 0010: stwu */
79 INVALID, /* 00 1 0011 */
80 { 2, LD+U }, /* 00 1 0100: lhzu */
81 { 2, LD+SE+U }, /* 00 1 0101: lhau */
82 { 2, ST+U }, /* 00 1 0110: sthu */
83 { 4, ST+M }, /* 00 1 0111: stmw */
84 { 4, LD+F+S+U }, /* 00 1 1000: lfsu */
85 { 8, LD+F+U }, /* 00 1 1001: lfdu */
86 { 4, ST+F+S+U }, /* 00 1 1010: stfsu */
87 { 8, ST+F+U }, /* 00 1 1011: stfdu */
88 INVALID, /* 00 1 1100 */
89 INVALID, /* 00 1 1101 */
90 INVALID, /* 00 1 1110 */
91 INVALID, /* 00 1 1111 */
92 { 8, LD }, /* 01 0 0000: ldx */
93 INVALID, /* 01 0 0001 */
94 { 8, ST }, /* 01 0 0010: stdx */
95 INVALID, /* 01 0 0011 */
96 INVALID, /* 01 0 0100 */
97 { 4, LD+SE }, /* 01 0 0101: lwax */
98 INVALID, /* 01 0 0110 */
99 INVALID, /* 01 0 0111 */
100 { 4, LD+M+HARD+SX }, /* 01 0 1000: lswx */
101 { 4, LD+M+HARD }, /* 01 0 1001: lswi */
102 { 4, ST+M+HARD+SX }, /* 01 0 1010: stswx */
103 { 4, ST+M+HARD }, /* 01 0 1011: stswi */
104 INVALID, /* 01 0 1100 */
105 { 8, LD+U }, /* 01 0 1101: ldu */
106 INVALID, /* 01 0 1110 */
107 { 8, ST+U }, /* 01 0 1111: stdu */
108 { 8, LD+U }, /* 01 1 0000: ldux */
109 INVALID, /* 01 1 0001 */
110 { 8, ST+U }, /* 01 1 0010: stdux */
111 INVALID, /* 01 1 0011 */
112 INVALID, /* 01 1 0100 */
113 { 4, LD+SE+U }, /* 01 1 0101: lwaux */
114 INVALID, /* 01 1 0110 */
115 INVALID, /* 01 1 0111 */
116 INVALID, /* 01 1 1000 */
117 INVALID, /* 01 1 1001 */
118 INVALID, /* 01 1 1010 */
119 INVALID, /* 01 1 1011 */
120 INVALID, /* 01 1 1100 */
121 INVALID, /* 01 1 1101 */
122 INVALID, /* 01 1 1110 */
123 INVALID, /* 01 1 1111 */
124 INVALID, /* 10 0 0000 */
125 INVALID, /* 10 0 0001 */
126 INVALID, /* 10 0 0010: stwcx. */
127 INVALID, /* 10 0 0011 */
128 INVALID, /* 10 0 0100 */
129 INVALID, /* 10 0 0101 */
130 INVALID, /* 10 0 0110 */
131 INVALID, /* 10 0 0111 */
132 { 4, LD+SW }, /* 10 0 1000: lwbrx */
133 INVALID, /* 10 0 1001 */
134 { 4, ST+SW }, /* 10 0 1010: stwbrx */
135 INVALID, /* 10 0 1011 */
136 { 2, LD+SW }, /* 10 0 1100: lhbrx */
137 { 4, LD+SE }, /* 10 0 1101 lwa */
138 { 2, ST+SW }, /* 10 0 1110: sthbrx */
139 INVALID, /* 10 0 1111 */
140 INVALID, /* 10 1 0000 */
141 INVALID, /* 10 1 0001 */
142 INVALID, /* 10 1 0010 */
143 INVALID, /* 10 1 0011 */
144 INVALID, /* 10 1 0100 */
145 INVALID, /* 10 1 0101 */
146 INVALID, /* 10 1 0110 */
147 INVALID, /* 10 1 0111 */
148 INVALID, /* 10 1 1000 */
149 INVALID, /* 10 1 1001 */
150 INVALID, /* 10 1 1010 */
151 INVALID, /* 10 1 1011 */
152 INVALID, /* 10 1 1100 */
153 INVALID, /* 10 1 1101 */
154 INVALID, /* 10 1 1110 */
155 { 0, ST+HARD }, /* 10 1 1111: dcbz */
156 { 4, LD }, /* 11 0 0000: lwzx */
157 INVALID, /* 11 0 0001 */
158 { 4, ST }, /* 11 0 0010: stwx */
159 INVALID, /* 11 0 0011 */
160 { 2, LD }, /* 11 0 0100: lhzx */
161 { 2, LD+SE }, /* 11 0 0101: lhax */
162 { 2, ST }, /* 11 0 0110: sthx */
163 INVALID, /* 11 0 0111 */
164 { 4, LD+F+S }, /* 11 0 1000: lfsx */
165 { 8, LD+F }, /* 11 0 1001: lfdx */
166 { 4, ST+F+S }, /* 11 0 1010: stfsx */
167 { 8, ST+F }, /* 11 0 1011: stfdx */
168 INVALID, /* 11 0 1100 */
169 { 8, LD+M }, /* 11 0 1101: lmd */
170 INVALID, /* 11 0 1110 */
171 { 8, ST+M }, /* 11 0 1111: stmd */
172 { 4, LD+U }, /* 11 1 0000: lwzux */
173 INVALID, /* 11 1 0001 */
174 { 4, ST+U }, /* 11 1 0010: stwux */
175 INVALID, /* 11 1 0011 */
176 { 2, LD+U }, /* 11 1 0100: lhzux */
177 { 2, LD+SE+U }, /* 11 1 0101: lhaux */
178 { 2, ST+U }, /* 11 1 0110: sthux */
179 INVALID, /* 11 1 0111 */
180 { 4, LD+F+S+U }, /* 11 1 1000: lfsux */
181 { 8, LD+F+U }, /* 11 1 1001: lfdux */
182 { 4, ST+F+S+U }, /* 11 1 1010: stfsux */
183 { 8, ST+F+U }, /* 11 1 1011: stfdux */
184 INVALID, /* 11 1 1100 */
185 INVALID, /* 11 1 1101 */
186 INVALID, /* 11 1 1110 */
187 INVALID, /* 11 1 1111 */
188};
189
190/*
191 * Create a DSISR value from the instruction
192 */
193static inline unsigned make_dsisr(unsigned instr)
194{
195 unsigned dsisr;
196
197
198 /* bits 6:15 --> 22:31 */
199 dsisr = (instr & 0x03ff0000) >> 16;
200
201 if (IS_XFORM(instr)) {
202 /* bits 29:30 --> 15:16 */
203 dsisr |= (instr & 0x00000006) << 14;
204 /* bit 25 --> 17 */
205 dsisr |= (instr & 0x00000040) << 8;
206 /* bits 21:24 --> 18:21 */
207 dsisr |= (instr & 0x00000780) << 3;
208 } else {
209 /* bit 5 --> 17 */
210 dsisr |= (instr & 0x04000000) >> 12;
211 /* bits 1: 4 --> 18:21 */
212 dsisr |= (instr & 0x78000000) >> 17;
213 /* bits 30:31 --> 12:13 */
214 if (IS_DSFORM(instr))
215 dsisr |= (instr & 0x00000003) << 18;
216 }
217
218 return dsisr;
219}
220
221/*
222 * The dcbz (data cache block zero) instruction
223 * gives an alignment fault if used on non-cacheable
224 * memory. We handle the fault mainly for the
225 * case when we are running with the cache disabled
226 * for debugging.
227 */
228static int emulate_dcbz(struct pt_regs *regs, unsigned char __user *addr)
229{
230 long __user *p;
231 int i, size;
232
233#ifdef __powerpc64__
234 size = ppc64_caches.dline_size;
235#else
236 size = L1_CACHE_BYTES;
237#endif
238 p = (long __user *) (regs->dar & -size);
239 if (user_mode(regs) && !access_ok(VERIFY_WRITE, p, size))
240 return -EFAULT;
241 for (i = 0; i < size / sizeof(long); ++i)
242 if (__put_user(0, p+i))
243 return -EFAULT;
244 return 1;
245}
246
247/*
248 * Emulate load & store multiple instructions
249 * On 64-bit machines, these instructions only affect/use the
250 * bottom 4 bytes of each register, and the loads clear the
251 * top 4 bytes of the affected register.
252 */
253#ifdef CONFIG_PPC64
254#define REG_BYTE(rp, i) *((u8 *)((rp) + ((i) >> 2)) + ((i) & 3) + 4)
255#else
256#define REG_BYTE(rp, i) *((u8 *)(rp) + (i))
257#endif
258
259static int emulate_multiple(struct pt_regs *regs, unsigned char __user *addr,
260 unsigned int reg, unsigned int nb,
261 unsigned int flags, unsigned int instr)
262{
263 unsigned long *rptr;
264 unsigned int nb0, i;
265
266 /*
267 * We do not try to emulate 8 bytes multiple as they aren't really
268 * available in our operating environments and we don't try to
269 * emulate multiples operations in kernel land as they should never
270 * be used/generated there at least not on unaligned boundaries
271 */
272 if (unlikely((nb > 4) || !user_mode(regs)))
273 return 0;
274
275 /* lmw, stmw, lswi/x, stswi/x */
276 nb0 = 0;
277 if (flags & HARD) {
278 if (flags & SX) {
279 nb = regs->xer & 127;
280 if (nb == 0)
281 return 1;
282 } else {
283 if (__get_user(instr,
284 (unsigned int __user *)regs->nip))
285 return -EFAULT;
286 nb = (instr >> 11) & 0x1f;
287 if (nb == 0)
288 nb = 32;
289 }
290 if (nb + reg * 4 > 128) {
291 nb0 = nb + reg * 4 - 128;
292 nb = 128 - reg * 4;
293 }
294 } else {
295 /* lwm, stmw */
296 nb = (32 - reg) * 4;
297 }
298
299 if (!access_ok((flags & ST ? VERIFY_WRITE: VERIFY_READ), addr, nb+nb0))
300 return -EFAULT; /* bad address */
301
302 rptr = &regs->gpr[reg];
303 if (flags & LD) {
304 /*
305 * This zeroes the top 4 bytes of the affected registers
306 * in 64-bit mode, and also zeroes out any remaining
307 * bytes of the last register for lsw*.
308 */
309 memset(rptr, 0, ((nb + 3) / 4) * sizeof(unsigned long));
310 if (nb0 > 0)
311 memset(&regs->gpr[0], 0,
312 ((nb0 + 3) / 4) * sizeof(unsigned long));
313
314 for (i = 0; i < nb; ++i)
315 if (__get_user(REG_BYTE(rptr, i), addr + i))
316 return -EFAULT;
317 if (nb0 > 0) {
318 rptr = &regs->gpr[0];
319 addr += nb;
320 for (i = 0; i < nb0; ++i)
321 if (__get_user(REG_BYTE(rptr, i), addr + i))
322 return -EFAULT;
323 }
324
325 } else {
326 for (i = 0; i < nb; ++i)
327 if (__put_user(REG_BYTE(rptr, i), addr + i))
328 return -EFAULT;
329 if (nb0 > 0) {
330 rptr = &regs->gpr[0];
331 addr += nb;
332 for (i = 0; i < nb0; ++i)
333 if (__put_user(REG_BYTE(rptr, i), addr + i))
334 return -EFAULT;
335 }
336 }
337 return 1;
338}
339
340
341/*
342 * Called on alignment exception. Attempts to fixup
343 *
344 * Return 1 on success
345 * Return 0 if unable to handle the interrupt
346 * Return -EFAULT if data address is bad
347 */
348
349int fix_alignment(struct pt_regs *regs)
350{
351 unsigned int instr, nb, flags;
352 unsigned int reg, areg;
353 unsigned int dsisr;
354 unsigned char __user *addr;
355 unsigned char __user *p;
356 int ret, t;
357 union {
358 u64 ll;
359 double dd;
360 unsigned char v[8];
361 struct {
362 unsigned hi32;
363 int low32;
364 } x32;
365 struct {
366 unsigned char hi48[6];
367 short low16;
368 } x16;
369 } data;
370
371 /*
372 * We require a complete register set, if not, then our assembly
373 * is broken
374 */
375 CHECK_FULL_REGS(regs);
376
377 dsisr = regs->dsisr;
378
379 /* Some processors don't provide us with a DSISR we can use here,
380 * let's make one up from the instruction
381 */
382 if (cpu_has_feature(CPU_FTR_NODSISRALIGN)) {
383 unsigned int real_instr;
384 if (unlikely(__get_user(real_instr,
385 (unsigned int __user *)regs->nip)))
386 return -EFAULT;
387 dsisr = make_dsisr(real_instr);
388 }
389
390 /* extract the operation and registers from the dsisr */
391 reg = (dsisr >> 5) & 0x1f; /* source/dest register */
392 areg = dsisr & 0x1f; /* register to update */
393 instr = (dsisr >> 10) & 0x7f;
394 instr |= (dsisr >> 13) & 0x60;
395
396 /* Lookup the operation in our table */
397 nb = aligninfo[instr].len;
398 flags = aligninfo[instr].flags;
399
400 /* DAR has the operand effective address */
401 addr = (unsigned char __user *)regs->dar;
402
403 /* A size of 0 indicates an instruction we don't support, with
404 * the exception of DCBZ which is handled as a special case here
405 */
406 if (instr == DCBZ)
407 return emulate_dcbz(regs, addr);
408 if (unlikely(nb == 0))
409 return 0;
410
411 /* Load/Store Multiple instructions are handled in their own
412 * function
413 */
414 if (flags & M)
415 return emulate_multiple(regs, addr, reg, nb, flags, instr);
416
417 /* Verify the address of the operand */
418 if (unlikely(user_mode(regs) &&
419 !access_ok((flags & ST ? VERIFY_WRITE : VERIFY_READ),
420 addr, nb)))
421 return -EFAULT;
422
423 /* Force the fprs into the save area so we can reference them */
424 if (flags & F) {
425 /* userland only */
426 if (unlikely(!user_mode(regs)))
427 return 0;
428 flush_fp_to_thread(current);
429 }
430
431 /* If we are loading, get the data from user space, else
432 * get it from register values
433 */
434 if (flags & LD) {
435 data.ll = 0;
436 ret = 0;
437 p = addr;
438 switch (nb) {
439 case 8:
440 ret |= __get_user(data.v[0], p++);
441 ret |= __get_user(data.v[1], p++);
442 ret |= __get_user(data.v[2], p++);
443 ret |= __get_user(data.v[3], p++);
444 case 4:
445 ret |= __get_user(data.v[4], p++);
446 ret |= __get_user(data.v[5], p++);
447 case 2:
448 ret |= __get_user(data.v[6], p++);
449 ret |= __get_user(data.v[7], p++);
450 if (unlikely(ret))
451 return -EFAULT;
452 }
453 } else if (flags & F)
454 data.dd = current->thread.fpr[reg];
455 else
456 data.ll = regs->gpr[reg];
457
458 /* Perform other misc operations like sign extension, byteswap,
459 * or floating point single precision conversion
460 */
461 switch (flags & ~U) {
462 case LD+SE: /* sign extend */
463 if ( nb == 2 )
464 data.ll = data.x16.low16;
465 else /* nb must be 4 */
466 data.ll = data.x32.low32;
467 break;
468 case LD+S: /* byte-swap */
469 case ST+S:
470 if (nb == 2) {
471 SWAP(data.v[6], data.v[7]);
472 } else {
473 SWAP(data.v[4], data.v[7]);
474 SWAP(data.v[5], data.v[6]);
475 }
476 break;
477
478 /* Single-precision FP load and store require conversions... */
479 case LD+F+S:
480#ifdef CONFIG_PPC_FPU
481 preempt_disable();
482 enable_kernel_fp();
483 cvt_fd((float *)&data.v[4], &data.dd, &current->thread);
484 preempt_enable();
485#else
486 return 0;
487#endif
488 break;
489 case ST+F+S:
490#ifdef CONFIG_PPC_FPU
491 preempt_disable();
492 enable_kernel_fp();
493 cvt_df(&data.dd, (float *)&data.v[4], &current->thread);
494 preempt_enable();
495#else
496 return 0;
497#endif
498 break;
499 }
500
501 /* Store result to memory or update registers */
502 if (flags & ST) {
503 ret = 0;
504 p = addr;
505 switch (nb) {
506 case 8:
507 ret |= __put_user(data.v[0], p++);
508 ret |= __put_user(data.v[1], p++);
509 ret |= __put_user(data.v[2], p++);
510 ret |= __put_user(data.v[3], p++);
511 case 4:
512 ret |= __put_user(data.v[4], p++);
513 ret |= __put_user(data.v[5], p++);
514 case 2:
515 ret |= __put_user(data.v[6], p++);
516 ret |= __put_user(data.v[7], p++);
517 }
518 if (unlikely(ret))
519 return -EFAULT;
520 } else if (flags & F)
521 current->thread.fpr[reg] = data.dd;
522 else
523 regs->gpr[reg] = data.ll;
524
525 /* Update RA as needed */
526 if (flags & U)
527 regs->gpr[areg] = regs->dar;
528
529 return 1;
530}
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 4550eb4f4fbd..91538d2445bf 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -270,13 +270,15 @@ int main(void)
270 DEFINE(TVAL64_TV_USEC, offsetof(struct timeval, tv_usec)); 270 DEFINE(TVAL64_TV_USEC, offsetof(struct timeval, tv_usec));
271 DEFINE(TVAL32_TV_SEC, offsetof(struct compat_timeval, tv_sec)); 271 DEFINE(TVAL32_TV_SEC, offsetof(struct compat_timeval, tv_sec));
272 DEFINE(TVAL32_TV_USEC, offsetof(struct compat_timeval, tv_usec)); 272 DEFINE(TVAL32_TV_USEC, offsetof(struct compat_timeval, tv_usec));
273 DEFINE(TSPC64_TV_SEC, offsetof(struct timespec, tv_sec));
274 DEFINE(TSPC64_TV_NSEC, offsetof(struct timespec, tv_nsec));
273 DEFINE(TSPC32_TV_SEC, offsetof(struct compat_timespec, tv_sec)); 275 DEFINE(TSPC32_TV_SEC, offsetof(struct compat_timespec, tv_sec));
274 DEFINE(TSPC32_TV_NSEC, offsetof(struct compat_timespec, tv_nsec)); 276 DEFINE(TSPC32_TV_NSEC, offsetof(struct compat_timespec, tv_nsec));
275#else 277#else
276 DEFINE(TVAL32_TV_SEC, offsetof(struct timeval, tv_sec)); 278 DEFINE(TVAL32_TV_SEC, offsetof(struct timeval, tv_sec));
277 DEFINE(TVAL32_TV_USEC, offsetof(struct timeval, tv_usec)); 279 DEFINE(TVAL32_TV_USEC, offsetof(struct timeval, tv_usec));
278 DEFINE(TSPEC32_TV_SEC, offsetof(struct timespec, tv_sec)); 280 DEFINE(TSPC32_TV_SEC, offsetof(struct timespec, tv_sec));
279 DEFINE(TSPEC32_TV_NSEC, offsetof(struct timespec, tv_nsec)); 281 DEFINE(TSPC32_TV_NSEC, offsetof(struct timespec, tv_nsec));
280#endif 282#endif
281 /* timeval/timezone offsets for use by vdso */ 283 /* timeval/timezone offsets for use by vdso */
282 DEFINE(TZONE_TZ_MINWEST, offsetof(struct timezone, tz_minuteswest)); 284 DEFINE(TZONE_TZ_MINWEST, offsetof(struct timezone, tz_minuteswest));
diff --git a/arch/powerpc/kernel/dma_64.c b/arch/powerpc/kernel/dma_64.c
new file mode 100644
index 000000000000..7c3419656ccc
--- /dev/null
+++ b/arch/powerpc/kernel/dma_64.c
@@ -0,0 +1,151 @@
1/*
2 * Copyright (C) 2004 IBM Corporation
3 *
4 * Implements the generic device dma API for ppc64. Handles
5 * the pci and vio busses
6 */
7
8#include <linux/device.h>
9#include <linux/dma-mapping.h>
10/* Include the busses we support */
11#include <linux/pci.h>
12#include <asm/vio.h>
13#include <asm/scatterlist.h>
14#include <asm/bug.h>
15
16static struct dma_mapping_ops *get_dma_ops(struct device *dev)
17{
18#ifdef CONFIG_PCI
19 if (dev->bus == &pci_bus_type)
20 return &pci_dma_ops;
21#endif
22#ifdef CONFIG_IBMVIO
23 if (dev->bus == &vio_bus_type)
24 return &vio_dma_ops;
25#endif
26 return NULL;
27}
28
29int dma_supported(struct device *dev, u64 mask)
30{
31 struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
32
33 if (dma_ops)
34 return dma_ops->dma_supported(dev, mask);
35 BUG();
36 return 0;
37}
38EXPORT_SYMBOL(dma_supported);
39
40int dma_set_mask(struct device *dev, u64 dma_mask)
41{
42#ifdef CONFIG_PCI
43 if (dev->bus == &pci_bus_type)
44 return pci_set_dma_mask(to_pci_dev(dev), dma_mask);
45#endif
46#ifdef CONFIG_IBMVIO
47 if (dev->bus == &vio_bus_type)
48 return -EIO;
49#endif /* CONFIG_IBMVIO */
50 BUG();
51 return 0;
52}
53EXPORT_SYMBOL(dma_set_mask);
54
55void *dma_alloc_coherent(struct device *dev, size_t size,
56 dma_addr_t *dma_handle, gfp_t flag)
57{
58 struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
59
60 if (dma_ops)
61 return dma_ops->alloc_coherent(dev, size, dma_handle, flag);
62 BUG();
63 return NULL;
64}
65EXPORT_SYMBOL(dma_alloc_coherent);
66
67void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr,
68 dma_addr_t dma_handle)
69{
70 struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
71
72 if (dma_ops)
73 dma_ops->free_coherent(dev, size, cpu_addr, dma_handle);
74 else
75 BUG();
76}
77EXPORT_SYMBOL(dma_free_coherent);
78
79dma_addr_t dma_map_single(struct device *dev, void *cpu_addr, size_t size,
80 enum dma_data_direction direction)
81{
82 struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
83
84 if (dma_ops)
85 return dma_ops->map_single(dev, cpu_addr, size, direction);
86 BUG();
87 return (dma_addr_t)0;
88}
89EXPORT_SYMBOL(dma_map_single);
90
91void dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
92 enum dma_data_direction direction)
93{
94 struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
95
96 if (dma_ops)
97 dma_ops->unmap_single(dev, dma_addr, size, direction);
98 else
99 BUG();
100}
101EXPORT_SYMBOL(dma_unmap_single);
102
103dma_addr_t dma_map_page(struct device *dev, struct page *page,
104 unsigned long offset, size_t size,
105 enum dma_data_direction direction)
106{
107 struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
108
109 if (dma_ops)
110 return dma_ops->map_single(dev,
111 (page_address(page) + offset), size, direction);
112 BUG();
113 return (dma_addr_t)0;
114}
115EXPORT_SYMBOL(dma_map_page);
116
117void dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
118 enum dma_data_direction direction)
119{
120 struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
121
122 if (dma_ops)
123 dma_ops->unmap_single(dev, dma_address, size, direction);
124 else
125 BUG();
126}
127EXPORT_SYMBOL(dma_unmap_page);
128
129int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
130 enum dma_data_direction direction)
131{
132 struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
133
134 if (dma_ops)
135 return dma_ops->map_sg(dev, sg, nents, direction);
136 BUG();
137 return 0;
138}
139EXPORT_SYMBOL(dma_map_sg);
140
141void dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nhwentries,
142 enum dma_data_direction direction)
143{
144 struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
145
146 if (dma_ops)
147 dma_ops->unmap_sg(dev, sg, nhwentries, direction);
148 else
149 BUG();
150}
151EXPORT_SYMBOL(dma_unmap_sg);
diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
index 5063c603fad4..8d60fa99fc4b 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -24,7 +24,7 @@
24 * Copyright 2002-2004 MontaVista Software, Inc. 24 * Copyright 2002-2004 MontaVista Software, Inc.
25 * PowerPC 44x support, Matt Porter <mporter@kernel.crashing.org> 25 * PowerPC 44x support, Matt Porter <mporter@kernel.crashing.org>
26 * Copyright 2004 Freescale Semiconductor, Inc 26 * Copyright 2004 Freescale Semiconductor, Inc
27 * PowerPC e500 modifications, Kumar Gala <kumar.gala@freescale.com> 27 * PowerPC e500 modifications, Kumar Gala <galak@kernel.crashing.org>
28 * 28 *
29 * This program is free software; you can redistribute it and/or modify it 29 * This program is free software; you can redistribute it and/or modify it
30 * under the terms of the GNU General Public License as published by the 30 * under the terms of the GNU General Public License as published by the
diff --git a/arch/powerpc/kernel/idle_64.c b/arch/powerpc/kernel/idle_64.c
new file mode 100644
index 000000000000..b879d3057ef8
--- /dev/null
+++ b/arch/powerpc/kernel/idle_64.c
@@ -0,0 +1,121 @@
1/*
2 * Idle daemon for PowerPC. Idle daemon will handle any action
3 * that needs to be taken when the system becomes idle.
4 *
5 * Originally Written by Cort Dougan (cort@cs.nmt.edu)
6 *
7 * iSeries supported added by Mike Corrigan <mikejc@us.ibm.com>
8 *
9 * Additional shared processor, SMT, and firmware support
10 * Copyright (c) 2003 Dave Engebretsen <engebret@us.ibm.com>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 */
17
18#include <linux/config.h>
19#include <linux/sched.h>
20#include <linux/kernel.h>
21#include <linux/smp.h>
22#include <linux/cpu.h>
23#include <linux/sysctl.h>
24
25#include <asm/system.h>
26#include <asm/processor.h>
27#include <asm/cputable.h>
28#include <asm/time.h>
29#include <asm/machdep.h>
30#include <asm/smp.h>
31
32extern void power4_idle(void);
33
34void default_idle(void)
35{
36 unsigned int cpu = smp_processor_id();
37 set_thread_flag(TIF_POLLING_NRFLAG);
38
39 while (1) {
40 if (!need_resched()) {
41 while (!need_resched() && !cpu_is_offline(cpu)) {
42 ppc64_runlatch_off();
43
44 /*
45 * Go into low thread priority and possibly
46 * low power mode.
47 */
48 HMT_low();
49 HMT_very_low();
50 }
51
52 HMT_medium();
53 }
54
55 ppc64_runlatch_on();
56 preempt_enable_no_resched();
57 schedule();
58 preempt_disable();
59 if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING)
60 cpu_die();
61 }
62}
63
64void native_idle(void)
65{
66 while (1) {
67 ppc64_runlatch_off();
68
69 if (!need_resched())
70 power4_idle();
71
72 if (need_resched()) {
73 ppc64_runlatch_on();
74 preempt_enable_no_resched();
75 schedule();
76 preempt_disable();
77 }
78
79 if (cpu_is_offline(smp_processor_id()) &&
80 system_state == SYSTEM_RUNNING)
81 cpu_die();
82 }
83}
84
85void cpu_idle(void)
86{
87 BUG_ON(NULL == ppc_md.idle_loop);
88 ppc_md.idle_loop();
89}
90
91int powersave_nap;
92
93#ifdef CONFIG_SYSCTL
94/*
95 * Register the sysctl to set/clear powersave_nap.
96 */
97static ctl_table powersave_nap_ctl_table[]={
98 {
99 .ctl_name = KERN_PPC_POWERSAVE_NAP,
100 .procname = "powersave-nap",
101 .data = &powersave_nap,
102 .maxlen = sizeof(int),
103 .mode = 0644,
104 .proc_handler = &proc_dointvec,
105 },
106 { 0, },
107};
108static ctl_table powersave_nap_sysctl_root[] = {
109 { 1, "kernel", NULL, 0, 0755, powersave_nap_ctl_table, },
110 { 0,},
111};
112
113static int __init
114register_powersave_nap_sysctl(void)
115{
116 register_sysctl_table(powersave_nap_sysctl_root, 0);
117
118 return 0;
119}
120__initcall(register_powersave_nap_sysctl);
121#endif
diff --git a/arch/powerpc/kernel/ioctl32.c b/arch/powerpc/kernel/ioctl32.c
index 3fa6a93adbd0..0fa3d27fef01 100644
--- a/arch/powerpc/kernel/ioctl32.c
+++ b/arch/powerpc/kernel/ioctl32.c
@@ -40,10 +40,6 @@ IOCTL_TABLE_START
40#define DECLARES 40#define DECLARES
41#include "compat_ioctl.c" 41#include "compat_ioctl.c"
42 42
43/* Little p (/dev/rtc, /dev/envctrl, etc.) */
44COMPATIBLE_IOCTL(_IOR('p', 20, int[7])) /* RTCGET */
45COMPATIBLE_IOCTL(_IOW('p', 21, int[7])) /* RTCSET */
46
47IOCTL_TABLE_END 43IOCTL_TABLE_END
48 44
49int ioctl_table_size = ARRAY_SIZE(ioctl_start); 45int ioctl_table_size = ARRAY_SIZE(ioctl_start);
diff --git a/arch/powerpc/kernel/iomap.c b/arch/powerpc/kernel/iomap.c
new file mode 100644
index 000000000000..6160c8dbb7c5
--- /dev/null
+++ b/arch/powerpc/kernel/iomap.c
@@ -0,0 +1,146 @@
1/*
2 * arch/ppc64/kernel/iomap.c
3 *
4 * ppc64 "iomap" interface implementation.
5 *
6 * (C) Copyright 2004 Linus Torvalds
7 */
8#include <linux/init.h>
9#include <linux/pci.h>
10#include <linux/mm.h>
11#include <asm/io.h>
12
13/*
14 * Here comes the ppc64 implementation of the IOMAP
15 * interfaces.
16 */
17unsigned int fastcall ioread8(void __iomem *addr)
18{
19 return readb(addr);
20}
21unsigned int fastcall ioread16(void __iomem *addr)
22{
23 return readw(addr);
24}
25unsigned int fastcall ioread16be(void __iomem *addr)
26{
27 return in_be16(addr);
28}
29unsigned int fastcall ioread32(void __iomem *addr)
30{
31 return readl(addr);
32}
33unsigned int fastcall ioread32be(void __iomem *addr)
34{
35 return in_be32(addr);
36}
37EXPORT_SYMBOL(ioread8);
38EXPORT_SYMBOL(ioread16);
39EXPORT_SYMBOL(ioread16be);
40EXPORT_SYMBOL(ioread32);
41EXPORT_SYMBOL(ioread32be);
42
43void fastcall iowrite8(u8 val, void __iomem *addr)
44{
45 writeb(val, addr);
46}
47void fastcall iowrite16(u16 val, void __iomem *addr)
48{
49 writew(val, addr);
50}
51void fastcall iowrite16be(u16 val, void __iomem *addr)
52{
53 out_be16(addr, val);
54}
55void fastcall iowrite32(u32 val, void __iomem *addr)
56{
57 writel(val, addr);
58}
59void fastcall iowrite32be(u32 val, void __iomem *addr)
60{
61 out_be32(addr, val);
62}
63EXPORT_SYMBOL(iowrite8);
64EXPORT_SYMBOL(iowrite16);
65EXPORT_SYMBOL(iowrite16be);
66EXPORT_SYMBOL(iowrite32);
67EXPORT_SYMBOL(iowrite32be);
68
69/*
70 * These are the "repeat read/write" functions. Note the
71 * non-CPU byte order. We do things in "IO byteorder"
72 * here.
73 *
74 * FIXME! We could make these do EEH handling if we really
75 * wanted. Not clear if we do.
76 */
77void ioread8_rep(void __iomem *addr, void *dst, unsigned long count)
78{
79 _insb((u8 __iomem *) addr, dst, count);
80}
81void ioread16_rep(void __iomem *addr, void *dst, unsigned long count)
82{
83 _insw_ns((u16 __iomem *) addr, dst, count);
84}
85void ioread32_rep(void __iomem *addr, void *dst, unsigned long count)
86{
87 _insl_ns((u32 __iomem *) addr, dst, count);
88}
89EXPORT_SYMBOL(ioread8_rep);
90EXPORT_SYMBOL(ioread16_rep);
91EXPORT_SYMBOL(ioread32_rep);
92
93void iowrite8_rep(void __iomem *addr, const void *src, unsigned long count)
94{
95 _outsb((u8 __iomem *) addr, src, count);
96}
97void iowrite16_rep(void __iomem *addr, const void *src, unsigned long count)
98{
99 _outsw_ns((u16 __iomem *) addr, src, count);
100}
101void iowrite32_rep(void __iomem *addr, const void *src, unsigned long count)
102{
103 _outsl_ns((u32 __iomem *) addr, src, count);
104}
105EXPORT_SYMBOL(iowrite8_rep);
106EXPORT_SYMBOL(iowrite16_rep);
107EXPORT_SYMBOL(iowrite32_rep);
108
109void __iomem *ioport_map(unsigned long port, unsigned int len)
110{
111 if (!_IO_IS_VALID(port))
112 return NULL;
113 return (void __iomem *) (port+pci_io_base);
114}
115
116void ioport_unmap(void __iomem *addr)
117{
118 /* Nothing to do */
119}
120EXPORT_SYMBOL(ioport_map);
121EXPORT_SYMBOL(ioport_unmap);
122
123void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max)
124{
125 unsigned long start = pci_resource_start(dev, bar);
126 unsigned long len = pci_resource_len(dev, bar);
127 unsigned long flags = pci_resource_flags(dev, bar);
128
129 if (!len)
130 return NULL;
131 if (max && len > max)
132 len = max;
133 if (flags & IORESOURCE_IO)
134 return ioport_map(start, len);
135 if (flags & IORESOURCE_MEM)
136 return ioremap(start, len);
137 /* What? */
138 return NULL;
139}
140
141void pci_iounmap(struct pci_dev *dev, void __iomem *addr)
142{
143 /* Nothing to do */
144}
145EXPORT_SYMBOL(pci_iomap);
146EXPORT_SYMBOL(pci_iounmap);
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
new file mode 100644
index 000000000000..4d9b4388918b
--- /dev/null
+++ b/arch/powerpc/kernel/iommu.c
@@ -0,0 +1,572 @@
1/*
2 * arch/ppc64/kernel/iommu.c
3 * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation
4 *
5 * Rewrite, cleanup, new allocation schemes, virtual merging:
6 * Copyright (C) 2004 Olof Johansson, IBM Corporation
7 * and Ben. Herrenschmidt, IBM Corporation
8 *
9 * Dynamic DMA mapping support, bus-independent parts.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 */
25
26
27#include <linux/config.h>
28#include <linux/init.h>
29#include <linux/types.h>
30#include <linux/slab.h>
31#include <linux/mm.h>
32#include <linux/spinlock.h>
33#include <linux/string.h>
34#include <linux/dma-mapping.h>
35#include <linux/init.h>
36#include <linux/bitops.h>
37#include <asm/io.h>
38#include <asm/prom.h>
39#include <asm/iommu.h>
40#include <asm/pci-bridge.h>
41#include <asm/machdep.h>
42
43#define DBG(...)
44
45#ifdef CONFIG_IOMMU_VMERGE
46static int novmerge = 0;
47#else
48static int novmerge = 1;
49#endif
50
51static int __init setup_iommu(char *str)
52{
53 if (!strcmp(str, "novmerge"))
54 novmerge = 1;
55 else if (!strcmp(str, "vmerge"))
56 novmerge = 0;
57 return 1;
58}
59
60__setup("iommu=", setup_iommu);
61
62static unsigned long iommu_range_alloc(struct iommu_table *tbl,
63 unsigned long npages,
64 unsigned long *handle,
65 unsigned int align_order)
66{
67 unsigned long n, end, i, start;
68 unsigned long limit;
69 int largealloc = npages > 15;
70 int pass = 0;
71 unsigned long align_mask;
72
73 align_mask = 0xffffffffffffffffl >> (64 - align_order);
74
75 /* This allocator was derived from x86_64's bit string search */
76
77 /* Sanity check */
78 if (unlikely(npages) == 0) {
79 if (printk_ratelimit())
80 WARN_ON(1);
81 return DMA_ERROR_CODE;
82 }
83
84 if (handle && *handle)
85 start = *handle;
86 else
87 start = largealloc ? tbl->it_largehint : tbl->it_hint;
88
89 /* Use only half of the table for small allocs (15 pages or less) */
90 limit = largealloc ? tbl->it_size : tbl->it_halfpoint;
91
92 if (largealloc && start < tbl->it_halfpoint)
93 start = tbl->it_halfpoint;
94
95 /* The case below can happen if we have a small segment appended
96 * to a large, or when the previous alloc was at the very end of
97 * the available space. If so, go back to the initial start.
98 */
99 if (start >= limit)
100 start = largealloc ? tbl->it_largehint : tbl->it_hint;
101
102 again:
103
104 n = find_next_zero_bit(tbl->it_map, limit, start);
105
106 /* Align allocation */
107 n = (n + align_mask) & ~align_mask;
108
109 end = n + npages;
110
111 if (unlikely(end >= limit)) {
112 if (likely(pass < 2)) {
113 /* First failure, just rescan the half of the table.
114 * Second failure, rescan the other half of the table.
115 */
116 start = (largealloc ^ pass) ? tbl->it_halfpoint : 0;
117 limit = pass ? tbl->it_size : limit;
118 pass++;
119 goto again;
120 } else {
121 /* Third failure, give up */
122 return DMA_ERROR_CODE;
123 }
124 }
125
126 for (i = n; i < end; i++)
127 if (test_bit(i, tbl->it_map)) {
128 start = i+1;
129 goto again;
130 }
131
132 for (i = n; i < end; i++)
133 __set_bit(i, tbl->it_map);
134
135 /* Bump the hint to a new block for small allocs. */
136 if (largealloc) {
137 /* Don't bump to new block to avoid fragmentation */
138 tbl->it_largehint = end;
139 } else {
140 /* Overflow will be taken care of at the next allocation */
141 tbl->it_hint = (end + tbl->it_blocksize - 1) &
142 ~(tbl->it_blocksize - 1);
143 }
144
145 /* Update handle for SG allocations */
146 if (handle)
147 *handle = end;
148
149 return n;
150}
151
152static dma_addr_t iommu_alloc(struct iommu_table *tbl, void *page,
153 unsigned int npages, enum dma_data_direction direction,
154 unsigned int align_order)
155{
156 unsigned long entry, flags;
157 dma_addr_t ret = DMA_ERROR_CODE;
158
159 spin_lock_irqsave(&(tbl->it_lock), flags);
160
161 entry = iommu_range_alloc(tbl, npages, NULL, align_order);
162
163 if (unlikely(entry == DMA_ERROR_CODE)) {
164 spin_unlock_irqrestore(&(tbl->it_lock), flags);
165 return DMA_ERROR_CODE;
166 }
167
168 entry += tbl->it_offset; /* Offset into real TCE table */
169 ret = entry << PAGE_SHIFT; /* Set the return dma address */
170
171 /* Put the TCEs in the HW table */
172 ppc_md.tce_build(tbl, entry, npages, (unsigned long)page & PAGE_MASK,
173 direction);
174
175
176 /* Flush/invalidate TLB caches if necessary */
177 if (ppc_md.tce_flush)
178 ppc_md.tce_flush(tbl);
179
180 spin_unlock_irqrestore(&(tbl->it_lock), flags);
181
182 /* Make sure updates are seen by hardware */
183 mb();
184
185 return ret;
186}
187
188static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
189 unsigned int npages)
190{
191 unsigned long entry, free_entry;
192 unsigned long i;
193
194 entry = dma_addr >> PAGE_SHIFT;
195 free_entry = entry - tbl->it_offset;
196
197 if (((free_entry + npages) > tbl->it_size) ||
198 (entry < tbl->it_offset)) {
199 if (printk_ratelimit()) {
200 printk(KERN_INFO "iommu_free: invalid entry\n");
201 printk(KERN_INFO "\tentry = 0x%lx\n", entry);
202 printk(KERN_INFO "\tdma_addr = 0x%lx\n", (u64)dma_addr);
203 printk(KERN_INFO "\tTable = 0x%lx\n", (u64)tbl);
204 printk(KERN_INFO "\tbus# = 0x%lx\n", (u64)tbl->it_busno);
205 printk(KERN_INFO "\tsize = 0x%lx\n", (u64)tbl->it_size);
206 printk(KERN_INFO "\tstartOff = 0x%lx\n", (u64)tbl->it_offset);
207 printk(KERN_INFO "\tindex = 0x%lx\n", (u64)tbl->it_index);
208 WARN_ON(1);
209 }
210 return;
211 }
212
213 ppc_md.tce_free(tbl, entry, npages);
214
215 for (i = 0; i < npages; i++)
216 __clear_bit(free_entry+i, tbl->it_map);
217}
218
219static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
220 unsigned int npages)
221{
222 unsigned long flags;
223
224 spin_lock_irqsave(&(tbl->it_lock), flags);
225
226 __iommu_free(tbl, dma_addr, npages);
227
228 /* Make sure TLB cache is flushed if the HW needs it. We do
229 * not do an mb() here on purpose, it is not needed on any of
230 * the current platforms.
231 */
232 if (ppc_md.tce_flush)
233 ppc_md.tce_flush(tbl);
234
235 spin_unlock_irqrestore(&(tbl->it_lock), flags);
236}
237
238int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
239 struct scatterlist *sglist, int nelems,
240 enum dma_data_direction direction)
241{
242 dma_addr_t dma_next = 0, dma_addr;
243 unsigned long flags;
244 struct scatterlist *s, *outs, *segstart;
245 int outcount, incount;
246 unsigned long handle;
247
248 BUG_ON(direction == DMA_NONE);
249
250 if ((nelems == 0) || !tbl)
251 return 0;
252
253 outs = s = segstart = &sglist[0];
254 outcount = 1;
255 incount = nelems;
256 handle = 0;
257
258 /* Init first segment length for backout at failure */
259 outs->dma_length = 0;
260
261 DBG("mapping %d elements:\n", nelems);
262
263 spin_lock_irqsave(&(tbl->it_lock), flags);
264
265 for (s = outs; nelems; nelems--, s++) {
266 unsigned long vaddr, npages, entry, slen;
267
268 slen = s->length;
269 /* Sanity check */
270 if (slen == 0) {
271 dma_next = 0;
272 continue;
273 }
274 /* Allocate iommu entries for that segment */
275 vaddr = (unsigned long)page_address(s->page) + s->offset;
276 npages = PAGE_ALIGN(vaddr + slen) - (vaddr & PAGE_MASK);
277 npages >>= PAGE_SHIFT;
278 entry = iommu_range_alloc(tbl, npages, &handle, 0);
279
280 DBG(" - vaddr: %lx, size: %lx\n", vaddr, slen);
281
282 /* Handle failure */
283 if (unlikely(entry == DMA_ERROR_CODE)) {
284 if (printk_ratelimit())
285 printk(KERN_INFO "iommu_alloc failed, tbl %p vaddr %lx"
286 " npages %lx\n", tbl, vaddr, npages);
287 goto failure;
288 }
289
290 /* Convert entry to a dma_addr_t */
291 entry += tbl->it_offset;
292 dma_addr = entry << PAGE_SHIFT;
293 dma_addr |= s->offset;
294
295 DBG(" - %lx pages, entry: %lx, dma_addr: %lx\n",
296 npages, entry, dma_addr);
297
298 /* Insert into HW table */
299 ppc_md.tce_build(tbl, entry, npages, vaddr & PAGE_MASK, direction);
300
301 /* If we are in an open segment, try merging */
302 if (segstart != s) {
303 DBG(" - trying merge...\n");
304 /* We cannot merge if:
305 * - allocated dma_addr isn't contiguous to previous allocation
306 */
307 if (novmerge || (dma_addr != dma_next)) {
308 /* Can't merge: create a new segment */
309 segstart = s;
310 outcount++; outs++;
311 DBG(" can't merge, new segment.\n");
312 } else {
313 outs->dma_length += s->length;
314 DBG(" merged, new len: %lx\n", outs->dma_length);
315 }
316 }
317
318 if (segstart == s) {
319 /* This is a new segment, fill entries */
320 DBG(" - filling new segment.\n");
321 outs->dma_address = dma_addr;
322 outs->dma_length = slen;
323 }
324
325 /* Calculate next page pointer for contiguous check */
326 dma_next = dma_addr + slen;
327
328 DBG(" - dma next is: %lx\n", dma_next);
329 }
330
331 /* Flush/invalidate TLB caches if necessary */
332 if (ppc_md.tce_flush)
333 ppc_md.tce_flush(tbl);
334
335 spin_unlock_irqrestore(&(tbl->it_lock), flags);
336
337 /* Make sure updates are seen by hardware */
338 mb();
339
340 DBG("mapped %d elements:\n", outcount);
341
342 /* For the sake of iommu_unmap_sg, we clear out the length in the
343 * next entry of the sglist if we didn't fill the list completely
344 */
345 if (outcount < incount) {
346 outs++;
347 outs->dma_address = DMA_ERROR_CODE;
348 outs->dma_length = 0;
349 }
350 return outcount;
351
352 failure:
353 for (s = &sglist[0]; s <= outs; s++) {
354 if (s->dma_length != 0) {
355 unsigned long vaddr, npages;
356
357 vaddr = s->dma_address & PAGE_MASK;
358 npages = (PAGE_ALIGN(s->dma_address + s->dma_length) - vaddr)
359 >> PAGE_SHIFT;
360 __iommu_free(tbl, vaddr, npages);
361 }
362 }
363 spin_unlock_irqrestore(&(tbl->it_lock), flags);
364 return 0;
365}
366
367
368void iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist,
369 int nelems, enum dma_data_direction direction)
370{
371 unsigned long flags;
372
373 BUG_ON(direction == DMA_NONE);
374
375 if (!tbl)
376 return;
377
378 spin_lock_irqsave(&(tbl->it_lock), flags);
379
380 while (nelems--) {
381 unsigned int npages;
382 dma_addr_t dma_handle = sglist->dma_address;
383
384 if (sglist->dma_length == 0)
385 break;
386 npages = (PAGE_ALIGN(dma_handle + sglist->dma_length)
387 - (dma_handle & PAGE_MASK)) >> PAGE_SHIFT;
388 __iommu_free(tbl, dma_handle, npages);
389 sglist++;
390 }
391
392 /* Flush/invalidate TLBs if necessary. As for iommu_free(), we
393 * do not do an mb() here, the affected platforms do not need it
394 * when freeing.
395 */
396 if (ppc_md.tce_flush)
397 ppc_md.tce_flush(tbl);
398
399 spin_unlock_irqrestore(&(tbl->it_lock), flags);
400}
401
402/*
403 * Build a iommu_table structure. This contains a bit map which
404 * is used to manage allocation of the tce space.
405 */
406struct iommu_table *iommu_init_table(struct iommu_table *tbl)
407{
408 unsigned long sz;
409 static int welcomed = 0;
410
411 /* Set aside 1/4 of the table for large allocations. */
412 tbl->it_halfpoint = tbl->it_size * 3 / 4;
413
414 /* number of bytes needed for the bitmap */
415 sz = (tbl->it_size + 7) >> 3;
416
417 tbl->it_map = (unsigned long *)__get_free_pages(GFP_ATOMIC, get_order(sz));
418 if (!tbl->it_map)
419 panic("iommu_init_table: Can't allocate %ld bytes\n", sz);
420
421 memset(tbl->it_map, 0, sz);
422
423 tbl->it_hint = 0;
424 tbl->it_largehint = tbl->it_halfpoint;
425 spin_lock_init(&tbl->it_lock);
426
427 /* Clear the hardware table in case firmware left allocations in it */
428 ppc_md.tce_free(tbl, tbl->it_offset, tbl->it_size);
429
430 if (!welcomed) {
431 printk(KERN_INFO "IOMMU table initialized, virtual merging %s\n",
432 novmerge ? "disabled" : "enabled");
433 welcomed = 1;
434 }
435
436 return tbl;
437}
438
439void iommu_free_table(struct device_node *dn)
440{
441 struct pci_dn *pdn = dn->data;
442 struct iommu_table *tbl = pdn->iommu_table;
443 unsigned long bitmap_sz, i;
444 unsigned int order;
445
446 if (!tbl || !tbl->it_map) {
447 printk(KERN_ERR "%s: expected TCE map for %s\n", __FUNCTION__,
448 dn->full_name);
449 return;
450 }
451
452 /* verify that table contains no entries */
453 /* it_size is in entries, and we're examining 64 at a time */
454 for (i = 0; i < (tbl->it_size/64); i++) {
455 if (tbl->it_map[i] != 0) {
456 printk(KERN_WARNING "%s: Unexpected TCEs for %s\n",
457 __FUNCTION__, dn->full_name);
458 break;
459 }
460 }
461
462 /* calculate bitmap size in bytes */
463 bitmap_sz = (tbl->it_size + 7) / 8;
464
465 /* free bitmap */
466 order = get_order(bitmap_sz);
467 free_pages((unsigned long) tbl->it_map, order);
468
469 /* free table */
470 kfree(tbl);
471}
472
473/* Creates TCEs for a user provided buffer. The user buffer must be
474 * contiguous real kernel storage (not vmalloc). The address of the buffer
475 * passed here is the kernel (virtual) address of the buffer. The buffer
476 * need not be page aligned, the dma_addr_t returned will point to the same
477 * byte within the page as vaddr.
478 */
479dma_addr_t iommu_map_single(struct iommu_table *tbl, void *vaddr,
480 size_t size, enum dma_data_direction direction)
481{
482 dma_addr_t dma_handle = DMA_ERROR_CODE;
483 unsigned long uaddr;
484 unsigned int npages;
485
486 BUG_ON(direction == DMA_NONE);
487
488 uaddr = (unsigned long)vaddr;
489 npages = PAGE_ALIGN(uaddr + size) - (uaddr & PAGE_MASK);
490 npages >>= PAGE_SHIFT;
491
492 if (tbl) {
493 dma_handle = iommu_alloc(tbl, vaddr, npages, direction, 0);
494 if (dma_handle == DMA_ERROR_CODE) {
495 if (printk_ratelimit()) {
496 printk(KERN_INFO "iommu_alloc failed, "
497 "tbl %p vaddr %p npages %d\n",
498 tbl, vaddr, npages);
499 }
500 } else
501 dma_handle |= (uaddr & ~PAGE_MASK);
502 }
503
504 return dma_handle;
505}
506
507void iommu_unmap_single(struct iommu_table *tbl, dma_addr_t dma_handle,
508 size_t size, enum dma_data_direction direction)
509{
510 BUG_ON(direction == DMA_NONE);
511
512 if (tbl)
513 iommu_free(tbl, dma_handle, (PAGE_ALIGN(dma_handle + size) -
514 (dma_handle & PAGE_MASK)) >> PAGE_SHIFT);
515}
516
517/* Allocates a contiguous real buffer and creates mappings over it.
518 * Returns the virtual address of the buffer and sets dma_handle
519 * to the dma address (mapping) of the first page.
520 */
521void *iommu_alloc_coherent(struct iommu_table *tbl, size_t size,
522 dma_addr_t *dma_handle, gfp_t flag)
523{
524 void *ret = NULL;
525 dma_addr_t mapping;
526 unsigned int npages, order;
527
528 size = PAGE_ALIGN(size);
529 npages = size >> PAGE_SHIFT;
530 order = get_order(size);
531
532 /*
533 * Client asked for way too much space. This is checked later
534 * anyway. It is easier to debug here for the drivers than in
535 * the tce tables.
536 */
537 if (order >= IOMAP_MAX_ORDER) {
538 printk("iommu_alloc_consistent size too large: 0x%lx\n", size);
539 return NULL;
540 }
541
542 if (!tbl)
543 return NULL;
544
545 /* Alloc enough pages (and possibly more) */
546 ret = (void *)__get_free_pages(flag, order);
547 if (!ret)
548 return NULL;
549 memset(ret, 0, size);
550
551 /* Set up tces to cover the allocated range */
552 mapping = iommu_alloc(tbl, ret, npages, DMA_BIDIRECTIONAL, order);
553 if (mapping == DMA_ERROR_CODE) {
554 free_pages((unsigned long)ret, order);
555 ret = NULL;
556 } else
557 *dma_handle = mapping;
558 return ret;
559}
560
561void iommu_free_coherent(struct iommu_table *tbl, size_t size,
562 void *vaddr, dma_addr_t dma_handle)
563{
564 unsigned int npages;
565
566 if (tbl) {
567 size = PAGE_ALIGN(size);
568 npages = size >> PAGE_SHIFT;
569 iommu_free(tbl, dma_handle, npages);
570 free_pages((unsigned long)vaddr, get_order(size));
571 }
572}
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 4b7940693f3d..5a71ed9612fe 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -71,6 +71,11 @@
71#include <asm/paca.h> 71#include <asm/paca.h>
72#endif 72#endif
73 73
74int __irq_offset_value;
75#ifdef CONFIG_PPC32
76EXPORT_SYMBOL(__irq_offset_value);
77#endif
78
74static int ppc_spurious_interrupts; 79static int ppc_spurious_interrupts;
75 80
76#if defined(CONFIG_PPC_ISERIES) && defined(CONFIG_SMP) 81#if defined(CONFIG_PPC_ISERIES) && defined(CONFIG_SMP)
@@ -98,7 +103,6 @@ extern atomic_t ipi_sent;
98EXPORT_SYMBOL(irq_desc); 103EXPORT_SYMBOL(irq_desc);
99 104
100int distribute_irqs = 1; 105int distribute_irqs = 1;
101int __irq_offset_value;
102u64 ppc64_interrupt_controller; 106u64 ppc64_interrupt_controller;
103#endif /* CONFIG_PPC64 */ 107#endif /* CONFIG_PPC64 */
104 108
@@ -311,7 +315,6 @@ void __init init_IRQ(void)
311} 315}
312 316
313#ifdef CONFIG_PPC64 317#ifdef CONFIG_PPC64
314#ifndef CONFIG_PPC_ISERIES
315/* 318/*
316 * Virtual IRQ mapping code, used on systems with XICS interrupt controllers. 319 * Virtual IRQ mapping code, used on systems with XICS interrupt controllers.
317 */ 320 */
@@ -420,8 +423,6 @@ unsigned int real_irq_to_virt_slowpath(unsigned int real_irq)
420 423
421} 424}
422 425
423#endif /* CONFIG_PPC_ISERIES */
424
425#ifdef CONFIG_IRQSTACKS 426#ifdef CONFIG_IRQSTACKS
426struct thread_info *softirq_ctx[NR_CPUS]; 427struct thread_info *softirq_ctx[NR_CPUS];
427struct thread_info *hardirq_ctx[NR_CPUS]; 428struct thread_info *hardirq_ctx[NR_CPUS];
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
new file mode 100644
index 000000000000..511af54e6230
--- /dev/null
+++ b/arch/powerpc/kernel/kprobes.c
@@ -0,0 +1,459 @@
1/*
2 * Kernel Probes (KProbes)
3 * arch/ppc64/kernel/kprobes.c
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 *
19 * Copyright (C) IBM Corporation, 2002, 2004
20 *
21 * 2002-Oct Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel
22 * Probes initial implementation ( includes contributions from
23 * Rusty Russell).
24 * 2004-July Suparna Bhattacharya <suparna@in.ibm.com> added jumper probes
25 * interface to access function arguments.
26 * 2004-Nov Ananth N Mavinakayanahalli <ananth@in.ibm.com> kprobes port
27 * for PPC64
28 */
29
30#include <linux/config.h>
31#include <linux/kprobes.h>
32#include <linux/ptrace.h>
33#include <linux/preempt.h>
34#include <asm/cacheflush.h>
35#include <asm/kdebug.h>
36#include <asm/sstep.h>
37
38static DECLARE_MUTEX(kprobe_mutex);
39DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
40DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
41
42int __kprobes arch_prepare_kprobe(struct kprobe *p)
43{
44 int ret = 0;
45 kprobe_opcode_t insn = *p->addr;
46
47 if ((unsigned long)p->addr & 0x03) {
48 printk("Attempt to register kprobe at an unaligned address\n");
49 ret = -EINVAL;
50 } else if (IS_MTMSRD(insn) || IS_RFID(insn)) {
51 printk("Cannot register a kprobe on rfid or mtmsrd\n");
52 ret = -EINVAL;
53 }
54
55 /* insn must be on a special executable page on ppc64 */
56 if (!ret) {
57 down(&kprobe_mutex);
58 p->ainsn.insn = get_insn_slot();
59 up(&kprobe_mutex);
60 if (!p->ainsn.insn)
61 ret = -ENOMEM;
62 }
63 return ret;
64}
65
66void __kprobes arch_copy_kprobe(struct kprobe *p)
67{
68 memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
69 p->opcode = *p->addr;
70}
71
72void __kprobes arch_arm_kprobe(struct kprobe *p)
73{
74 *p->addr = BREAKPOINT_INSTRUCTION;
75 flush_icache_range((unsigned long) p->addr,
76 (unsigned long) p->addr + sizeof(kprobe_opcode_t));
77}
78
79void __kprobes arch_disarm_kprobe(struct kprobe *p)
80{
81 *p->addr = p->opcode;
82 flush_icache_range((unsigned long) p->addr,
83 (unsigned long) p->addr + sizeof(kprobe_opcode_t));
84}
85
86void __kprobes arch_remove_kprobe(struct kprobe *p)
87{
88 down(&kprobe_mutex);
89 free_insn_slot(p->ainsn.insn);
90 up(&kprobe_mutex);
91}
92
93static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
94{
95 kprobe_opcode_t insn = *p->ainsn.insn;
96
97 regs->msr |= MSR_SE;
98
99 /* single step inline if it is a trap variant */
100 if (is_trap(insn))
101 regs->nip = (unsigned long)p->addr;
102 else
103 regs->nip = (unsigned long)p->ainsn.insn;
104}
105
106static inline void save_previous_kprobe(struct kprobe_ctlblk *kcb)
107{
108 kcb->prev_kprobe.kp = kprobe_running();
109 kcb->prev_kprobe.status = kcb->kprobe_status;
110 kcb->prev_kprobe.saved_msr = kcb->kprobe_saved_msr;
111}
112
113static inline void restore_previous_kprobe(struct kprobe_ctlblk *kcb)
114{
115 __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp;
116 kcb->kprobe_status = kcb->prev_kprobe.status;
117 kcb->kprobe_saved_msr = kcb->prev_kprobe.saved_msr;
118}
119
120static inline void set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
121 struct kprobe_ctlblk *kcb)
122{
123 __get_cpu_var(current_kprobe) = p;
124 kcb->kprobe_saved_msr = regs->msr;
125}
126
127/* Called with kretprobe_lock held */
128void __kprobes arch_prepare_kretprobe(struct kretprobe *rp,
129 struct pt_regs *regs)
130{
131 struct kretprobe_instance *ri;
132
133 if ((ri = get_free_rp_inst(rp)) != NULL) {
134 ri->rp = rp;
135 ri->task = current;
136 ri->ret_addr = (kprobe_opcode_t *)regs->link;
137
138 /* Replace the return addr with trampoline addr */
139 regs->link = (unsigned long)kretprobe_trampoline;
140 add_rp_inst(ri);
141 } else {
142 rp->nmissed++;
143 }
144}
145
146static inline int kprobe_handler(struct pt_regs *regs)
147{
148 struct kprobe *p;
149 int ret = 0;
150 unsigned int *addr = (unsigned int *)regs->nip;
151 struct kprobe_ctlblk *kcb;
152
153 /*
154 * We don't want to be preempted for the entire
155 * duration of kprobe processing
156 */
157 preempt_disable();
158 kcb = get_kprobe_ctlblk();
159
160 /* Check we're not actually recursing */
161 if (kprobe_running()) {
162 p = get_kprobe(addr);
163 if (p) {
164 kprobe_opcode_t insn = *p->ainsn.insn;
165 if (kcb->kprobe_status == KPROBE_HIT_SS &&
166 is_trap(insn)) {
167 regs->msr &= ~MSR_SE;
168 regs->msr |= kcb->kprobe_saved_msr;
169 goto no_kprobe;
170 }
171 /* We have reentered the kprobe_handler(), since
172 * another probe was hit while within the handler.
173 * We here save the original kprobes variables and
174 * just single step on the instruction of the new probe
175 * without calling any user handlers.
176 */
177 save_previous_kprobe(kcb);
178 set_current_kprobe(p, regs, kcb);
179 kcb->kprobe_saved_msr = regs->msr;
180 p->nmissed++;
181 prepare_singlestep(p, regs);
182 kcb->kprobe_status = KPROBE_REENTER;
183 return 1;
184 } else {
185 p = __get_cpu_var(current_kprobe);
186 if (p->break_handler && p->break_handler(p, regs)) {
187 goto ss_probe;
188 }
189 }
190 goto no_kprobe;
191 }
192
193 p = get_kprobe(addr);
194 if (!p) {
195 if (*addr != BREAKPOINT_INSTRUCTION) {
196 /*
197 * PowerPC has multiple variants of the "trap"
198 * instruction. If the current instruction is a
199 * trap variant, it could belong to someone else
200 */
201 kprobe_opcode_t cur_insn = *addr;
202 if (is_trap(cur_insn))
203 goto no_kprobe;
204 /*
205 * The breakpoint instruction was removed right
206 * after we hit it. Another cpu has removed
207 * either a probepoint or a debugger breakpoint
208 * at this address. In either case, no further
209 * handling of this interrupt is appropriate.
210 */
211 ret = 1;
212 }
213 /* Not one of ours: let kernel handle it */
214 goto no_kprobe;
215 }
216
217 kcb->kprobe_status = KPROBE_HIT_ACTIVE;
218 set_current_kprobe(p, regs, kcb);
219 if (p->pre_handler && p->pre_handler(p, regs))
220 /* handler has already set things up, so skip ss setup */
221 return 1;
222
223ss_probe:
224 prepare_singlestep(p, regs);
225 kcb->kprobe_status = KPROBE_HIT_SS;
226 return 1;
227
228no_kprobe:
229 preempt_enable_no_resched();
230 return ret;
231}
232
233/*
234 * Function return probe trampoline:
235 * - init_kprobes() establishes a probepoint here
236 * - When the probed function returns, this probe
237 * causes the handlers to fire
238 */
239void kretprobe_trampoline_holder(void)
240{
241 asm volatile(".global kretprobe_trampoline\n"
242 "kretprobe_trampoline:\n"
243 "nop\n");
244}
245
246/*
247 * Called when the probe at kretprobe trampoline is hit
248 */
249int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
250{
251 struct kretprobe_instance *ri = NULL;
252 struct hlist_head *head;
253 struct hlist_node *node, *tmp;
254 unsigned long flags, orig_ret_address = 0;
255 unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline;
256
257 spin_lock_irqsave(&kretprobe_lock, flags);
258 head = kretprobe_inst_table_head(current);
259
260 /*
261 * It is possible to have multiple instances associated with a given
262 * task either because an multiple functions in the call path
263 * have a return probe installed on them, and/or more then one return
264 * return probe was registered for a target function.
265 *
266 * We can handle this because:
267 * - instances are always inserted at the head of the list
268 * - when multiple return probes are registered for the same
269 * function, the first instance's ret_addr will point to the
270 * real return address, and all the rest will point to
271 * kretprobe_trampoline
272 */
273 hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
274 if (ri->task != current)
275 /* another task is sharing our hash bucket */
276 continue;
277
278 if (ri->rp && ri->rp->handler)
279 ri->rp->handler(ri, regs);
280
281 orig_ret_address = (unsigned long)ri->ret_addr;
282 recycle_rp_inst(ri);
283
284 if (orig_ret_address != trampoline_address)
285 /*
286 * This is the real return address. Any other
287 * instances associated with this task are for
288 * other calls deeper on the call stack
289 */
290 break;
291 }
292
293 BUG_ON(!orig_ret_address || (orig_ret_address == trampoline_address));
294 regs->nip = orig_ret_address;
295
296 reset_current_kprobe();
297 spin_unlock_irqrestore(&kretprobe_lock, flags);
298 preempt_enable_no_resched();
299
300 /*
301 * By returning a non-zero value, we are telling
302 * kprobe_handler() that we don't want the post_handler
303 * to run (and have re-enabled preemption)
304 */
305 return 1;
306}
307
308/*
309 * Called after single-stepping. p->addr is the address of the
310 * instruction whose first byte has been replaced by the "breakpoint"
311 * instruction. To avoid the SMP problems that can occur when we
312 * temporarily put back the original opcode to single-step, we
313 * single-stepped a copy of the instruction. The address of this
314 * copy is p->ainsn.insn.
315 */
316static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs)
317{
318 int ret;
319 unsigned int insn = *p->ainsn.insn;
320
321 regs->nip = (unsigned long)p->addr;
322 ret = emulate_step(regs, insn);
323 if (ret == 0)
324 regs->nip = (unsigned long)p->addr + 4;
325}
326
327static inline int post_kprobe_handler(struct pt_regs *regs)
328{
329 struct kprobe *cur = kprobe_running();
330 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
331
332 if (!cur)
333 return 0;
334
335 if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) {
336 kcb->kprobe_status = KPROBE_HIT_SSDONE;
337 cur->post_handler(cur, regs, 0);
338 }
339
340 resume_execution(cur, regs);
341 regs->msr |= kcb->kprobe_saved_msr;
342
343 /*Restore back the original saved kprobes variables and continue. */
344 if (kcb->kprobe_status == KPROBE_REENTER) {
345 restore_previous_kprobe(kcb);
346 goto out;
347 }
348 reset_current_kprobe();
349out:
350 preempt_enable_no_resched();
351
352 /*
353 * if somebody else is singlestepping across a probe point, msr
354 * will have SE set, in which case, continue the remaining processing
355 * of do_debug, as if this is not a probe hit.
356 */
357 if (regs->msr & MSR_SE)
358 return 0;
359
360 return 1;
361}
362
363static inline int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
364{
365 struct kprobe *cur = kprobe_running();
366 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
367
368 if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
369 return 1;
370
371 if (kcb->kprobe_status & KPROBE_HIT_SS) {
372 resume_execution(cur, regs);
373 regs->msr &= ~MSR_SE;
374 regs->msr |= kcb->kprobe_saved_msr;
375
376 reset_current_kprobe();
377 preempt_enable_no_resched();
378 }
379 return 0;
380}
381
382/*
383 * Wrapper routine to for handling exceptions.
384 */
385int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
386 unsigned long val, void *data)
387{
388 struct die_args *args = (struct die_args *)data;
389 int ret = NOTIFY_DONE;
390
391 switch (val) {
392 case DIE_BPT:
393 if (kprobe_handler(args->regs))
394 ret = NOTIFY_STOP;
395 break;
396 case DIE_SSTEP:
397 if (post_kprobe_handler(args->regs))
398 ret = NOTIFY_STOP;
399 break;
400 case DIE_PAGE_FAULT:
401 /* kprobe_running() needs smp_processor_id() */
402 preempt_disable();
403 if (kprobe_running() &&
404 kprobe_fault_handler(args->regs, args->trapnr))
405 ret = NOTIFY_STOP;
406 preempt_enable();
407 break;
408 default:
409 break;
410 }
411 return ret;
412}
413
414int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
415{
416 struct jprobe *jp = container_of(p, struct jprobe, kp);
417 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
418
419 memcpy(&kcb->jprobe_saved_regs, regs, sizeof(struct pt_regs));
420
421 /* setup return addr to the jprobe handler routine */
422 regs->nip = (unsigned long)(((func_descr_t *)jp->entry)->entry);
423 regs->gpr[2] = (unsigned long)(((func_descr_t *)jp->entry)->toc);
424
425 return 1;
426}
427
428void __kprobes jprobe_return(void)
429{
430 asm volatile("trap" ::: "memory");
431}
432
433void __kprobes jprobe_return_end(void)
434{
435};
436
437int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
438{
439 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
440
441 /*
442 * FIXME - we should ideally be validating that we got here 'cos
443 * of the "trap" in jprobe_return() above, before restoring the
444 * saved regs...
445 */
446 memcpy(regs, &kcb->jprobe_saved_regs, sizeof(struct pt_regs));
447 preempt_enable_no_resched();
448 return 1;
449}
450
451static struct kprobe trampoline_p = {
452 .addr = (kprobe_opcode_t *) &kretprobe_trampoline,
453 .pre_handler = trampoline_probe_handler
454};
455
456int __init arch_init_kprobes(void)
457{
458 return register_kprobe(&trampoline_p);
459}
diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c
index 1b3ba8a440a6..9dda16ccde78 100644
--- a/arch/powerpc/kernel/lparcfg.c
+++ b/arch/powerpc/kernel/lparcfg.c
@@ -42,32 +42,6 @@
42 42
43/* #define LPARCFG_DEBUG */ 43/* #define LPARCFG_DEBUG */
44 44
45/* find a better place for this function... */
46static void log_plpar_hcall_return(unsigned long rc, char *tag)
47{
48 if (rc == 0) /* success, return */
49 return;
50/* check for null tag ? */
51 if (rc == H_Hardware)
52 printk(KERN_INFO
53 "plpar-hcall (%s) failed with hardware fault\n", tag);
54 else if (rc == H_Function)
55 printk(KERN_INFO
56 "plpar-hcall (%s) failed; function not allowed\n", tag);
57 else if (rc == H_Authority)
58 printk(KERN_INFO
59 "plpar-hcall (%s) failed; not authorized to this function\n",
60 tag);
61 else if (rc == H_Parameter)
62 printk(KERN_INFO "plpar-hcall (%s) failed; Bad parameter(s)\n",
63 tag);
64 else
65 printk(KERN_INFO
66 "plpar-hcall (%s) failed with unexpected rc(0x%lx)\n",
67 tag, rc);
68
69}
70
71static struct proc_dir_entry *proc_ppc64_lparcfg; 45static struct proc_dir_entry *proc_ppc64_lparcfg;
72#define LPARCFG_BUFF_SIZE 4096 46#define LPARCFG_BUFF_SIZE 4096
73 47
@@ -172,6 +146,31 @@ static int lparcfg_data(struct seq_file *m, void *v)
172/* 146/*
173 * Methods used to fetch LPAR data when running on a pSeries platform. 147 * Methods used to fetch LPAR data when running on a pSeries platform.
174 */ 148 */
149/* find a better place for this function... */
150static void log_plpar_hcall_return(unsigned long rc, char *tag)
151{
152 if (rc == 0) /* success, return */
153 return;
154/* check for null tag ? */
155 if (rc == H_Hardware)
156 printk(KERN_INFO
157 "plpar-hcall (%s) failed with hardware fault\n", tag);
158 else if (rc == H_Function)
159 printk(KERN_INFO
160 "plpar-hcall (%s) failed; function not allowed\n", tag);
161 else if (rc == H_Authority)
162 printk(KERN_INFO
163 "plpar-hcall (%s) failed; not authorized to this function\n",
164 tag);
165 else if (rc == H_Parameter)
166 printk(KERN_INFO "plpar-hcall (%s) failed; Bad parameter(s)\n",
167 tag);
168 else
169 printk(KERN_INFO
170 "plpar-hcall (%s) failed with unexpected rc(0x%lx)\n",
171 tag, rc);
172
173}
175 174
176/* 175/*
177 * H_GET_PPP hcall returns info in 4 parms. 176 * H_GET_PPP hcall returns info in 4 parms.
diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c
new file mode 100644
index 000000000000..97c51e452be7
--- /dev/null
+++ b/arch/powerpc/kernel/machine_kexec_64.c
@@ -0,0 +1,358 @@
1/*
2 * machine_kexec.c - handle transition of Linux booting another kernel
3 *
4 * Copyright (C) 2004-2005, IBM Corp.
5 *
6 * Created by: Milton D Miller II
7 *
8 * This source code is licensed under the GNU General Public License,
9 * Version 2. See the file COPYING for more details.
10 */
11
12
13#include <linux/cpumask.h>
14#include <linux/kexec.h>
15#include <linux/smp.h>
16#include <linux/thread_info.h>
17#include <linux/errno.h>
18
19#include <asm/page.h>
20#include <asm/current.h>
21#include <asm/machdep.h>
22#include <asm/cacheflush.h>
23#include <asm/paca.h>
24#include <asm/mmu.h>
25#include <asm/sections.h> /* _end */
26#include <asm/prom.h>
27#include <asm/smp.h>
28
29#define HASH_GROUP_SIZE 0x80 /* size of each hash group, asm/mmu.h */
30
31/* Have this around till we move it into crash specific file */
32note_buf_t crash_notes[NR_CPUS];
33
34/* Dummy for now. Not sure if we need to have a crash shutdown in here
35 * and if what it will achieve. Letting it be now to compile the code
36 * in generic kexec environment
37 */
38void machine_crash_shutdown(struct pt_regs *regs)
39{
40 /* do nothing right now */
41 /* smp_relase_cpus() if we want smp on panic kernel */
42 /* cpu_irq_down to isolate us until we are ready */
43}
44
45int machine_kexec_prepare(struct kimage *image)
46{
47 int i;
48 unsigned long begin, end; /* limits of segment */
49 unsigned long low, high; /* limits of blocked memory range */
50 struct device_node *node;
51 unsigned long *basep;
52 unsigned int *sizep;
53
54 if (!ppc_md.hpte_clear_all)
55 return -ENOENT;
56
57 /*
58 * Since we use the kernel fault handlers and paging code to
59 * handle the virtual mode, we must make sure no destination
60 * overlaps kernel static data or bss.
61 */
62 for (i = 0; i < image->nr_segments; i++)
63 if (image->segment[i].mem < __pa(_end))
64 return -ETXTBSY;
65
66 /*
67 * For non-LPAR, we absolutely can not overwrite the mmu hash
68 * table, since we are still using the bolted entries in it to
69 * do the copy. Check that here.
70 *
71 * It is safe if the end is below the start of the blocked
72 * region (end <= low), or if the beginning is after the
73 * end of the blocked region (begin >= high). Use the
74 * boolean identity !(a || b) === (!a && !b).
75 */
76 if (htab_address) {
77 low = __pa(htab_address);
78 high = low + (htab_hash_mask + 1) * HASH_GROUP_SIZE;
79
80 for (i = 0; i < image->nr_segments; i++) {
81 begin = image->segment[i].mem;
82 end = begin + image->segment[i].memsz;
83
84 if ((begin < high) && (end > low))
85 return -ETXTBSY;
86 }
87 }
88
89 /* We also should not overwrite the tce tables */
90 for (node = of_find_node_by_type(NULL, "pci"); node != NULL;
91 node = of_find_node_by_type(node, "pci")) {
92 basep = (unsigned long *)get_property(node, "linux,tce-base",
93 NULL);
94 sizep = (unsigned int *)get_property(node, "linux,tce-size",
95 NULL);
96 if (basep == NULL || sizep == NULL)
97 continue;
98
99 low = *basep;
100 high = low + (*sizep);
101
102 for (i = 0; i < image->nr_segments; i++) {
103 begin = image->segment[i].mem;
104 end = begin + image->segment[i].memsz;
105
106 if ((begin < high) && (end > low))
107 return -ETXTBSY;
108 }
109 }
110
111 return 0;
112}
113
114void machine_kexec_cleanup(struct kimage *image)
115{
116 /* we do nothing in prepare that needs to be undone */
117}
118
119#define IND_FLAGS (IND_DESTINATION | IND_INDIRECTION | IND_DONE | IND_SOURCE)
120
121static void copy_segments(unsigned long ind)
122{
123 unsigned long entry;
124 unsigned long *ptr;
125 void *dest;
126 void *addr;
127
128 /*
129 * We rely on kexec_load to create a lists that properly
130 * initializes these pointers before they are used.
131 * We will still crash if the list is wrong, but at least
132 * the compiler will be quiet.
133 */
134 ptr = NULL;
135 dest = NULL;
136
137 for (entry = ind; !(entry & IND_DONE); entry = *ptr++) {
138 addr = __va(entry & PAGE_MASK);
139
140 switch (entry & IND_FLAGS) {
141 case IND_DESTINATION:
142 dest = addr;
143 break;
144 case IND_INDIRECTION:
145 ptr = addr;
146 break;
147 case IND_SOURCE:
148 copy_page(dest, addr);
149 dest += PAGE_SIZE;
150 }
151 }
152}
153
154void kexec_copy_flush(struct kimage *image)
155{
156 long i, nr_segments = image->nr_segments;
157 struct kexec_segment ranges[KEXEC_SEGMENT_MAX];
158
159 /* save the ranges on the stack to efficiently flush the icache */
160 memcpy(ranges, image->segment, sizeof(ranges));
161
162 /*
163 * After this call we may not use anything allocated in dynamic
164 * memory, including *image.
165 *
166 * Only globals and the stack are allowed.
167 */
168 copy_segments(image->head);
169
170 /*
171 * we need to clear the icache for all dest pages sometime,
172 * including ones that were in place on the original copy
173 */
174 for (i = 0; i < nr_segments; i++)
175 flush_icache_range(ranges[i].mem + KERNELBASE,
176 ranges[i].mem + KERNELBASE +
177 ranges[i].memsz);
178}
179
180#ifdef CONFIG_SMP
181
182/* FIXME: we should schedule this function to be called on all cpus based
183 * on calling the interrupts, but we would like to call it off irq level
184 * so that the interrupt controller is clean.
185 */
186void kexec_smp_down(void *arg)
187{
188 if (ppc_md.kexec_cpu_down)
189 ppc_md.kexec_cpu_down(0, 1);
190
191 local_irq_disable();
192 kexec_smp_wait();
193 /* NOTREACHED */
194}
195
196static void kexec_prepare_cpus(void)
197{
198 int my_cpu, i, notified=-1;
199
200 smp_call_function(kexec_smp_down, NULL, 0, /* wait */0);
201 my_cpu = get_cpu();
202
203 /* check the others cpus are now down (via paca hw cpu id == -1) */
204 for (i=0; i < NR_CPUS; i++) {
205 if (i == my_cpu)
206 continue;
207
208 while (paca[i].hw_cpu_id != -1) {
209 barrier();
210 if (!cpu_possible(i)) {
211 printk("kexec: cpu %d hw_cpu_id %d is not"
212 " possible, ignoring\n",
213 i, paca[i].hw_cpu_id);
214 break;
215 }
216 if (!cpu_online(i)) {
217 /* Fixme: this can be spinning in
218 * pSeries_secondary_wait with a paca
219 * waiting for it to go online.
220 */
221 printk("kexec: cpu %d hw_cpu_id %d is not"
222 " online, ignoring\n",
223 i, paca[i].hw_cpu_id);
224 break;
225 }
226 if (i != notified) {
227 printk( "kexec: waiting for cpu %d (physical"
228 " %d) to go down\n",
229 i, paca[i].hw_cpu_id);
230 notified = i;
231 }
232 }
233 }
234
235 /* after we tell the others to go down */
236 if (ppc_md.kexec_cpu_down)
237 ppc_md.kexec_cpu_down(0, 0);
238
239 put_cpu();
240
241 local_irq_disable();
242}
243
244#else /* ! SMP */
245
246static void kexec_prepare_cpus(void)
247{
248 /*
249 * move the secondarys to us so that we can copy
250 * the new kernel 0-0x100 safely
251 *
252 * do this if kexec in setup.c ?
253 *
254 * We need to release the cpus if we are ever going from an
255 * UP to an SMP kernel.
256 */
257 smp_release_cpus();
258 if (ppc_md.kexec_cpu_down)
259 ppc_md.kexec_cpu_down(0, 0);
260 local_irq_disable();
261}
262
263#endif /* SMP */
264
265/*
266 * kexec thread structure and stack.
267 *
268 * We need to make sure that this is 16384-byte aligned due to the
269 * way process stacks are handled. It also must be statically allocated
270 * or allocated as part of the kimage, because everything else may be
271 * overwritten when we copy the kexec image. We piggyback on the
272 * "init_task" linker section here to statically allocate a stack.
273 *
274 * We could use a smaller stack if we don't care about anything using
275 * current, but that audit has not been performed.
276 */
277union thread_union kexec_stack
278 __attribute__((__section__(".data.init_task"))) = { };
279
280/* Our assembly helper, in kexec_stub.S */
281extern NORET_TYPE void kexec_sequence(void *newstack, unsigned long start,
282 void *image, void *control,
283 void (*clear_all)(void)) ATTRIB_NORET;
284
285/* too late to fail here */
286void machine_kexec(struct kimage *image)
287{
288
289 /* prepare control code if any */
290
291 /* shutdown other cpus into our wait loop and quiesce interrupts */
292 kexec_prepare_cpus();
293
294 /* switch to a staticly allocated stack. Based on irq stack code.
295 * XXX: the task struct will likely be invalid once we do the copy!
296 */
297 kexec_stack.thread_info.task = current_thread_info()->task;
298 kexec_stack.thread_info.flags = 0;
299
300 /* Some things are best done in assembly. Finding globals with
301 * a toc is easier in C, so pass in what we can.
302 */
303 kexec_sequence(&kexec_stack, image->start, image,
304 page_address(image->control_code_page),
305 ppc_md.hpte_clear_all);
306 /* NOTREACHED */
307}
308
309/* Values we need to export to the second kernel via the device tree. */
310static unsigned long htab_base, htab_size, kernel_end;
311
312static struct property htab_base_prop = {
313 .name = "linux,htab-base",
314 .length = sizeof(unsigned long),
315 .value = (unsigned char *)&htab_base,
316};
317
318static struct property htab_size_prop = {
319 .name = "linux,htab-size",
320 .length = sizeof(unsigned long),
321 .value = (unsigned char *)&htab_size,
322};
323
324static struct property kernel_end_prop = {
325 .name = "linux,kernel-end",
326 .length = sizeof(unsigned long),
327 .value = (unsigned char *)&kernel_end,
328};
329
330static void __init export_htab_values(void)
331{
332 struct device_node *node;
333
334 node = of_find_node_by_path("/chosen");
335 if (!node)
336 return;
337
338 kernel_end = __pa(_end);
339 prom_add_property(node, &kernel_end_prop);
340
341 /* On machines with no htab htab_address is NULL */
342 if (NULL == htab_address)
343 goto out;
344
345 htab_base = __pa(htab_address);
346 prom_add_property(node, &htab_base_prop);
347
348 htab_size = 1UL << ppc64_pft_size;
349 prom_add_property(node, &htab_size_prop);
350
351 out:
352 of_node_put(node);
353}
354
355void __init kexec_setup(void)
356{
357 export_htab_values();
358}
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index f6d84a75ed26..624a983a9676 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -27,14 +27,6 @@
27 27
28 .text 28 .text
29 29
30 .align 5
31_GLOBAL(__delay)
32 cmpwi 0,r3,0
33 mtctr r3
34 beqlr
351: bdnz 1b
36 blr
37
38/* 30/*
39 * This returns the high 64 bits of the product of two 64-bit numbers. 31 * This returns the high 64 bits of the product of two 64-bit numbers.
40 */ 32 */
diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
new file mode 100644
index 000000000000..928b8581fcb0
--- /dev/null
+++ b/arch/powerpc/kernel/module_64.c
@@ -0,0 +1,455 @@
1/* Kernel module help for PPC64.
2 Copyright (C) 2001, 2003 Rusty Russell IBM Corporation.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17*/
18#include <linux/module.h>
19#include <linux/elf.h>
20#include <linux/moduleloader.h>
21#include <linux/err.h>
22#include <linux/vmalloc.h>
23#include <asm/module.h>
24#include <asm/uaccess.h>
25
26/* FIXME: We don't do .init separately. To do this, we'd need to have
27 a separate r2 value in the init and core section, and stub between
28 them, too.
29
30 Using a magic allocator which places modules within 32MB solves
31 this, and makes other things simpler. Anton?
32 --RR. */
33#if 0
34#define DEBUGP printk
35#else
36#define DEBUGP(fmt , ...)
37#endif
38
39/* There's actually a third entry here, but it's unused */
40struct ppc64_opd_entry
41{
42 unsigned long funcaddr;
43 unsigned long r2;
44};
45
46/* Like PPC32, we need little trampolines to do > 24-bit jumps (into
47 the kernel itself). But on PPC64, these need to be used for every
48 jump, actually, to reset r2 (TOC+0x8000). */
49struct ppc64_stub_entry
50{
51 /* 28 byte jump instruction sequence (7 instructions) */
52 unsigned char jump[28];
53 unsigned char unused[4];
54 /* Data for the above code */
55 struct ppc64_opd_entry opd;
56};
57
58/* We use a stub to fix up r2 (TOC ptr) and to jump to the (external)
59 function which may be more than 24-bits away. We could simply
60 patch the new r2 value and function pointer into the stub, but it's
61 significantly shorter to put these values at the end of the stub
62 code, and patch the stub address (32-bits relative to the TOC ptr,
63 r2) into the stub. */
64static struct ppc64_stub_entry ppc64_stub =
65{ .jump = {
66 0x3d, 0x82, 0x00, 0x00, /* addis r12,r2, <high> */
67 0x39, 0x8c, 0x00, 0x00, /* addi r12,r12, <low> */
68 /* Save current r2 value in magic place on the stack. */
69 0xf8, 0x41, 0x00, 0x28, /* std r2,40(r1) */
70 0xe9, 0x6c, 0x00, 0x20, /* ld r11,32(r12) */
71 0xe8, 0x4c, 0x00, 0x28, /* ld r2,40(r12) */
72 0x7d, 0x69, 0x03, 0xa6, /* mtctr r11 */
73 0x4e, 0x80, 0x04, 0x20 /* bctr */
74} };
75
76/* Count how many different 24-bit relocations (different symbol,
77 different addend) */
78static unsigned int count_relocs(const Elf64_Rela *rela, unsigned int num)
79{
80 unsigned int i, j, ret = 0;
81
82 /* FIXME: Only count external ones --RR */
83 /* Sure, this is order(n^2), but it's usually short, and not
84 time critical */
85 for (i = 0; i < num; i++) {
86 /* Only count 24-bit relocs, others don't need stubs */
87 if (ELF64_R_TYPE(rela[i].r_info) != R_PPC_REL24)
88 continue;
89 for (j = 0; j < i; j++) {
90 /* If this addend appeared before, it's
91 already been counted */
92 if (rela[i].r_info == rela[j].r_info
93 && rela[i].r_addend == rela[j].r_addend)
94 break;
95 }
96 if (j == i) ret++;
97 }
98 return ret;
99}
100
101void *module_alloc(unsigned long size)
102{
103 if (size == 0)
104 return NULL;
105
106 return vmalloc_exec(size);
107}
108
109/* Free memory returned from module_alloc */
110void module_free(struct module *mod, void *module_region)
111{
112 vfree(module_region);
113 /* FIXME: If module_region == mod->init_region, trim exception
114 table entries. */
115}
116
117/* Get size of potential trampolines required. */
118static unsigned long get_stubs_size(const Elf64_Ehdr *hdr,
119 const Elf64_Shdr *sechdrs)
120{
121 /* One extra reloc so it's always 0-funcaddr terminated */
122 unsigned long relocs = 1;
123 unsigned i;
124
125 /* Every relocated section... */
126 for (i = 1; i < hdr->e_shnum; i++) {
127 if (sechdrs[i].sh_type == SHT_RELA) {
128 DEBUGP("Found relocations in section %u\n", i);
129 DEBUGP("Ptr: %p. Number: %lu\n",
130 (void *)sechdrs[i].sh_addr,
131 sechdrs[i].sh_size / sizeof(Elf64_Rela));
132 relocs += count_relocs((void *)sechdrs[i].sh_addr,
133 sechdrs[i].sh_size
134 / sizeof(Elf64_Rela));
135 }
136 }
137
138 DEBUGP("Looks like a total of %lu stubs, max\n", relocs);
139 return relocs * sizeof(struct ppc64_stub_entry);
140}
141
142static void dedotify_versions(struct modversion_info *vers,
143 unsigned long size)
144{
145 struct modversion_info *end;
146
147 for (end = (void *)vers + size; vers < end; vers++)
148 if (vers->name[0] == '.')
149 memmove(vers->name, vers->name+1, strlen(vers->name));
150}
151
152/* Undefined symbols which refer to .funcname, hack to funcname */
153static void dedotify(Elf64_Sym *syms, unsigned int numsyms, char *strtab)
154{
155 unsigned int i;
156
157 for (i = 1; i < numsyms; i++) {
158 if (syms[i].st_shndx == SHN_UNDEF) {
159 char *name = strtab + syms[i].st_name;
160 if (name[0] == '.')
161 memmove(name, name+1, strlen(name));
162 }
163 }
164}
165
166int module_frob_arch_sections(Elf64_Ehdr *hdr,
167 Elf64_Shdr *sechdrs,
168 char *secstrings,
169 struct module *me)
170{
171 unsigned int i;
172
173 /* Find .toc and .stubs sections, symtab and strtab */
174 for (i = 1; i < hdr->e_shnum; i++) {
175 char *p;
176 if (strcmp(secstrings + sechdrs[i].sh_name, ".stubs") == 0)
177 me->arch.stubs_section = i;
178 else if (strcmp(secstrings + sechdrs[i].sh_name, ".toc") == 0)
179 me->arch.toc_section = i;
180 else if (strcmp(secstrings+sechdrs[i].sh_name,"__versions")==0)
181 dedotify_versions((void *)hdr + sechdrs[i].sh_offset,
182 sechdrs[i].sh_size);
183
184 /* We don't handle .init for the moment: rename to _init */
185 while ((p = strstr(secstrings + sechdrs[i].sh_name, ".init")))
186 p[0] = '_';
187
188 if (sechdrs[i].sh_type == SHT_SYMTAB)
189 dedotify((void *)hdr + sechdrs[i].sh_offset,
190 sechdrs[i].sh_size / sizeof(Elf64_Sym),
191 (void *)hdr
192 + sechdrs[sechdrs[i].sh_link].sh_offset);
193 }
194 if (!me->arch.stubs_section || !me->arch.toc_section) {
195 printk("%s: doesn't contain .toc or .stubs.\n", me->name);
196 return -ENOEXEC;
197 }
198
199 /* Override the stubs size */
200 sechdrs[me->arch.stubs_section].sh_size = get_stubs_size(hdr, sechdrs);
201 return 0;
202}
203
204int apply_relocate(Elf64_Shdr *sechdrs,
205 const char *strtab,
206 unsigned int symindex,
207 unsigned int relsec,
208 struct module *me)
209{
210 printk(KERN_ERR "%s: Non-ADD RELOCATION unsupported\n", me->name);
211 return -ENOEXEC;
212}
213
214/* r2 is the TOC pointer: it actually points 0x8000 into the TOC (this
215 gives the value maximum span in an instruction which uses a signed
216 offset) */
217static inline unsigned long my_r2(Elf64_Shdr *sechdrs, struct module *me)
218{
219 return sechdrs[me->arch.toc_section].sh_addr + 0x8000;
220}
221
222/* Both low and high 16 bits are added as SIGNED additions, so if low
223 16 bits has high bit set, high 16 bits must be adjusted. These
224 macros do that (stolen from binutils). */
225#define PPC_LO(v) ((v) & 0xffff)
226#define PPC_HI(v) (((v) >> 16) & 0xffff)
227#define PPC_HA(v) PPC_HI ((v) + 0x8000)
228
229/* Patch stub to reference function and correct r2 value. */
230static inline int create_stub(Elf64_Shdr *sechdrs,
231 struct ppc64_stub_entry *entry,
232 struct ppc64_opd_entry *opd,
233 struct module *me)
234{
235 Elf64_Half *loc1, *loc2;
236 long reladdr;
237
238 *entry = ppc64_stub;
239
240 loc1 = (Elf64_Half *)&entry->jump[2];
241 loc2 = (Elf64_Half *)&entry->jump[6];
242
243 /* Stub uses address relative to r2. */
244 reladdr = (unsigned long)entry - my_r2(sechdrs, me);
245 if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) {
246 printk("%s: Address %p of stub out of range of %p.\n",
247 me->name, (void *)reladdr, (void *)my_r2);
248 return 0;
249 }
250 DEBUGP("Stub %p get data from reladdr %li\n", entry, reladdr);
251
252 *loc1 = PPC_HA(reladdr);
253 *loc2 = PPC_LO(reladdr);
254 entry->opd.funcaddr = opd->funcaddr;
255 entry->opd.r2 = opd->r2;
256 return 1;
257}
258
259/* Create stub to jump to function described in this OPD: we need the
260 stub to set up the TOC ptr (r2) for the function. */
261static unsigned long stub_for_addr(Elf64_Shdr *sechdrs,
262 unsigned long opdaddr,
263 struct module *me)
264{
265 struct ppc64_stub_entry *stubs;
266 struct ppc64_opd_entry *opd = (void *)opdaddr;
267 unsigned int i, num_stubs;
268
269 num_stubs = sechdrs[me->arch.stubs_section].sh_size / sizeof(*stubs);
270
271 /* Find this stub, or if that fails, the next avail. entry */
272 stubs = (void *)sechdrs[me->arch.stubs_section].sh_addr;
273 for (i = 0; stubs[i].opd.funcaddr; i++) {
274 BUG_ON(i >= num_stubs);
275
276 if (stubs[i].opd.funcaddr == opd->funcaddr)
277 return (unsigned long)&stubs[i];
278 }
279
280 if (!create_stub(sechdrs, &stubs[i], opd, me))
281 return 0;
282
283 return (unsigned long)&stubs[i];
284}
285
286/* We expect a noop next: if it is, replace it with instruction to
287 restore r2. */
288static int restore_r2(u32 *instruction, struct module *me)
289{
290 if (*instruction != 0x60000000) {
291 printk("%s: Expect noop after relocate, got %08x\n",
292 me->name, *instruction);
293 return 0;
294 }
295 *instruction = 0xe8410028; /* ld r2,40(r1) */
296 return 1;
297}
298
299int apply_relocate_add(Elf64_Shdr *sechdrs,
300 const char *strtab,
301 unsigned int symindex,
302 unsigned int relsec,
303 struct module *me)
304{
305 unsigned int i;
306 Elf64_Rela *rela = (void *)sechdrs[relsec].sh_addr;
307 Elf64_Sym *sym;
308 unsigned long *location;
309 unsigned long value;
310
311 DEBUGP("Applying ADD relocate section %u to %u\n", relsec,
312 sechdrs[relsec].sh_info);
313 for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rela); i++) {
314 /* This is where to make the change */
315 location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr
316 + rela[i].r_offset;
317 /* This is the symbol it is referring to */
318 sym = (Elf64_Sym *)sechdrs[symindex].sh_addr
319 + ELF64_R_SYM(rela[i].r_info);
320
321 DEBUGP("RELOC at %p: %li-type as %s (%lu) + %li\n",
322 location, (long)ELF64_R_TYPE(rela[i].r_info),
323 strtab + sym->st_name, (unsigned long)sym->st_value,
324 (long)rela[i].r_addend);
325
326 /* `Everything is relative'. */
327 value = sym->st_value + rela[i].r_addend;
328
329 switch (ELF64_R_TYPE(rela[i].r_info)) {
330 case R_PPC64_ADDR32:
331 /* Simply set it */
332 *(u32 *)location = value;
333 break;
334
335 case R_PPC64_ADDR64:
336 /* Simply set it */
337 *(unsigned long *)location = value;
338 break;
339
340 case R_PPC64_TOC:
341 *(unsigned long *)location = my_r2(sechdrs, me);
342 break;
343
344 case R_PPC64_TOC16:
345 /* Subtact TOC pointer */
346 value -= my_r2(sechdrs, me);
347 if (value + 0x8000 > 0xffff) {
348 printk("%s: bad TOC16 relocation (%lu)\n",
349 me->name, value);
350 return -ENOEXEC;
351 }
352 *((uint16_t *) location)
353 = (*((uint16_t *) location) & ~0xffff)
354 | (value & 0xffff);
355 break;
356
357 case R_PPC64_TOC16_DS:
358 /* Subtact TOC pointer */
359 value -= my_r2(sechdrs, me);
360 if ((value & 3) != 0 || value + 0x8000 > 0xffff) {
361 printk("%s: bad TOC16_DS relocation (%lu)\n",
362 me->name, value);
363 return -ENOEXEC;
364 }
365 *((uint16_t *) location)
366 = (*((uint16_t *) location) & ~0xfffc)
367 | (value & 0xfffc);
368 break;
369
370 case R_PPC_REL24:
371 /* FIXME: Handle weak symbols here --RR */
372 if (sym->st_shndx == SHN_UNDEF) {
373 /* External: go via stub */
374 value = stub_for_addr(sechdrs, value, me);
375 if (!value)
376 return -ENOENT;
377 if (!restore_r2((u32 *)location + 1, me))
378 return -ENOEXEC;
379 }
380
381 /* Convert value to relative */
382 value -= (unsigned long)location;
383 if (value + 0x2000000 > 0x3ffffff || (value & 3) != 0){
384 printk("%s: REL24 %li out of range!\n",
385 me->name, (long int)value);
386 return -ENOEXEC;
387 }
388
389 /* Only replace bits 2 through 26 */
390 *(uint32_t *)location
391 = (*(uint32_t *)location & ~0x03fffffc)
392 | (value & 0x03fffffc);
393 break;
394
395 default:
396 printk("%s: Unknown ADD relocation: %lu\n",
397 me->name,
398 (unsigned long)ELF64_R_TYPE(rela[i].r_info));
399 return -ENOEXEC;
400 }
401 }
402
403 return 0;
404}
405
406LIST_HEAD(module_bug_list);
407
408int module_finalize(const Elf_Ehdr *hdr,
409 const Elf_Shdr *sechdrs, struct module *me)
410{
411 char *secstrings;
412 unsigned int i;
413
414 me->arch.bug_table = NULL;
415 me->arch.num_bugs = 0;
416
417 /* Find the __bug_table section, if present */
418 secstrings = (char *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
419 for (i = 1; i < hdr->e_shnum; i++) {
420 if (strcmp(secstrings+sechdrs[i].sh_name, "__bug_table"))
421 continue;
422 me->arch.bug_table = (void *) sechdrs[i].sh_addr;
423 me->arch.num_bugs = sechdrs[i].sh_size / sizeof(struct bug_entry);
424 break;
425 }
426
427 /*
428 * Strictly speaking this should have a spinlock to protect against
429 * traversals, but since we only traverse on BUG()s, a spinlock
430 * could potentially lead to deadlock and thus be counter-productive.
431 */
432 list_add(&me->arch.bug_list, &module_bug_list);
433
434 return 0;
435}
436
437void module_arch_cleanup(struct module *mod)
438{
439 list_del(&mod->arch.bug_list);
440}
441
442struct bug_entry *module_find_bug(unsigned long bugaddr)
443{
444 struct mod_arch_specific *mod;
445 unsigned int i;
446 struct bug_entry *bug;
447
448 list_for_each_entry(mod, &module_bug_list, bug_list) {
449 bug = mod->bug_table;
450 for (i = 0; i < mod->num_bugs; ++i, ++bug)
451 if (bugaddr == bug->bug_addr)
452 return bug;
453 }
454 return NULL;
455}
diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c
new file mode 100644
index 000000000000..c0fcd29918ce
--- /dev/null
+++ b/arch/powerpc/kernel/nvram_64.c
@@ -0,0 +1,742 @@
1/*
2 * c 2001 PPC 64 Team, IBM Corp
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * /dev/nvram driver for PPC64
10 *
11 * This perhaps should live in drivers/char
12 *
13 * TODO: Split the /dev/nvram part (that one can use
14 * drivers/char/generic_nvram.c) from the arch & partition
15 * parsing code.
16 */
17
18#include <linux/module.h>
19
20#include <linux/types.h>
21#include <linux/errno.h>
22#include <linux/fs.h>
23#include <linux/miscdevice.h>
24#include <linux/fcntl.h>
25#include <linux/nvram.h>
26#include <linux/init.h>
27#include <linux/slab.h>
28#include <linux/spinlock.h>
29#include <asm/uaccess.h>
30#include <asm/nvram.h>
31#include <asm/rtas.h>
32#include <asm/prom.h>
33#include <asm/machdep.h>
34
35#undef DEBUG_NVRAM
36
37static int nvram_scan_partitions(void);
38static int nvram_setup_partition(void);
39static int nvram_create_os_partition(void);
40static int nvram_remove_os_partition(void);
41
42static struct nvram_partition * nvram_part;
43static long nvram_error_log_index = -1;
44static long nvram_error_log_size = 0;
45
46int no_logging = 1; /* Until we initialize everything,
47 * make sure we don't try logging
48 * anything */
49
50extern volatile int error_log_cnt;
51
52struct err_log_info {
53 int error_type;
54 unsigned int seq_num;
55};
56
57static loff_t dev_nvram_llseek(struct file *file, loff_t offset, int origin)
58{
59 int size;
60
61 if (ppc_md.nvram_size == NULL)
62 return -ENODEV;
63 size = ppc_md.nvram_size();
64
65 switch (origin) {
66 case 1:
67 offset += file->f_pos;
68 break;
69 case 2:
70 offset += size;
71 break;
72 }
73 if (offset < 0)
74 return -EINVAL;
75 file->f_pos = offset;
76 return file->f_pos;
77}
78
79
80static ssize_t dev_nvram_read(struct file *file, char __user *buf,
81 size_t count, loff_t *ppos)
82{
83 ssize_t len;
84 char *tmp_buffer;
85 int size;
86
87 if (ppc_md.nvram_size == NULL)
88 return -ENODEV;
89 size = ppc_md.nvram_size();
90
91 if (!access_ok(VERIFY_WRITE, buf, count))
92 return -EFAULT;
93 if (*ppos >= size)
94 return 0;
95 if (count > size)
96 count = size;
97
98 tmp_buffer = (char *) kmalloc(count, GFP_KERNEL);
99 if (!tmp_buffer) {
100 printk(KERN_ERR "dev_read_nvram: kmalloc failed\n");
101 return -ENOMEM;
102 }
103
104 len = ppc_md.nvram_read(tmp_buffer, count, ppos);
105 if ((long)len <= 0) {
106 kfree(tmp_buffer);
107 return len;
108 }
109
110 if (copy_to_user(buf, tmp_buffer, len)) {
111 kfree(tmp_buffer);
112 return -EFAULT;
113 }
114
115 kfree(tmp_buffer);
116 return len;
117
118}
119
120static ssize_t dev_nvram_write(struct file *file, const char __user *buf,
121 size_t count, loff_t *ppos)
122{
123 ssize_t len;
124 char * tmp_buffer;
125 int size;
126
127 if (ppc_md.nvram_size == NULL)
128 return -ENODEV;
129 size = ppc_md.nvram_size();
130
131 if (!access_ok(VERIFY_READ, buf, count))
132 return -EFAULT;
133 if (*ppos >= size)
134 return 0;
135 if (count > size)
136 count = size;
137
138 tmp_buffer = (char *) kmalloc(count, GFP_KERNEL);
139 if (!tmp_buffer) {
140 printk(KERN_ERR "dev_nvram_write: kmalloc failed\n");
141 return -ENOMEM;
142 }
143
144 if (copy_from_user(tmp_buffer, buf, count)) {
145 kfree(tmp_buffer);
146 return -EFAULT;
147 }
148
149 len = ppc_md.nvram_write(tmp_buffer, count, ppos);
150 if ((long)len <= 0) {
151 kfree(tmp_buffer);
152 return len;
153 }
154
155 kfree(tmp_buffer);
156 return len;
157}
158
159static int dev_nvram_ioctl(struct inode *inode, struct file *file,
160 unsigned int cmd, unsigned long arg)
161{
162 switch(cmd) {
163#ifdef CONFIG_PPC_PMAC
164 case OBSOLETE_PMAC_NVRAM_GET_OFFSET:
165 printk(KERN_WARNING "nvram: Using obsolete PMAC_NVRAM_GET_OFFSET ioctl\n");
166 case IOC_NVRAM_GET_OFFSET: {
167 int part, offset;
168
169 if (_machine != PLATFORM_POWERMAC)
170 return -EINVAL;
171 if (copy_from_user(&part, (void __user*)arg, sizeof(part)) != 0)
172 return -EFAULT;
173 if (part < pmac_nvram_OF || part > pmac_nvram_NR)
174 return -EINVAL;
175 offset = pmac_get_partition(part);
176 if (offset < 0)
177 return offset;
178 if (copy_to_user((void __user*)arg, &offset, sizeof(offset)) != 0)
179 return -EFAULT;
180 return 0;
181 }
182#endif /* CONFIG_PPC_PMAC */
183 }
184 return -EINVAL;
185}
186
187struct file_operations nvram_fops = {
188 .owner = THIS_MODULE,
189 .llseek = dev_nvram_llseek,
190 .read = dev_nvram_read,
191 .write = dev_nvram_write,
192 .ioctl = dev_nvram_ioctl,
193};
194
195static struct miscdevice nvram_dev = {
196 NVRAM_MINOR,
197 "nvram",
198 &nvram_fops
199};
200
201
202#ifdef DEBUG_NVRAM
203static void nvram_print_partitions(char * label)
204{
205 struct list_head * p;
206 struct nvram_partition * tmp_part;
207
208 printk(KERN_WARNING "--------%s---------\n", label);
209 printk(KERN_WARNING "indx\t\tsig\tchks\tlen\tname\n");
210 list_for_each(p, &nvram_part->partition) {
211 tmp_part = list_entry(p, struct nvram_partition, partition);
212 printk(KERN_WARNING "%d \t%02x\t%02x\t%d\t%s\n",
213 tmp_part->index, tmp_part->header.signature,
214 tmp_part->header.checksum, tmp_part->header.length,
215 tmp_part->header.name);
216 }
217}
218#endif
219
220
221static int nvram_write_header(struct nvram_partition * part)
222{
223 loff_t tmp_index;
224 int rc;
225
226 tmp_index = part->index;
227 rc = ppc_md.nvram_write((char *)&part->header, NVRAM_HEADER_LEN, &tmp_index);
228
229 return rc;
230}
231
232
233static unsigned char nvram_checksum(struct nvram_header *p)
234{
235 unsigned int c_sum, c_sum2;
236 unsigned short *sp = (unsigned short *)p->name; /* assume 6 shorts */
237 c_sum = p->signature + p->length + sp[0] + sp[1] + sp[2] + sp[3] + sp[4] + sp[5];
238
239 /* The sum may have spilled into the 3rd byte. Fold it back. */
240 c_sum = ((c_sum & 0xffff) + (c_sum >> 16)) & 0xffff;
241 /* The sum cannot exceed 2 bytes. Fold it into a checksum */
242 c_sum2 = (c_sum >> 8) + (c_sum << 8);
243 c_sum = ((c_sum + c_sum2) >> 8) & 0xff;
244 return c_sum;
245}
246
247
248/*
249 * Find an nvram partition, sig can be 0 for any
250 * partition or name can be NULL for any name, else
251 * tries to match both
252 */
253struct nvram_partition *nvram_find_partition(int sig, const char *name)
254{
255 struct nvram_partition * part;
256 struct list_head * p;
257
258 list_for_each(p, &nvram_part->partition) {
259 part = list_entry(p, struct nvram_partition, partition);
260
261 if (sig && part->header.signature != sig)
262 continue;
263 if (name && 0 != strncmp(name, part->header.name, 12))
264 continue;
265 return part;
266 }
267 return NULL;
268}
269EXPORT_SYMBOL(nvram_find_partition);
270
271
272static int nvram_remove_os_partition(void)
273{
274 struct list_head *i;
275 struct list_head *j;
276 struct nvram_partition * part;
277 struct nvram_partition * cur_part;
278 int rc;
279
280 list_for_each(i, &nvram_part->partition) {
281 part = list_entry(i, struct nvram_partition, partition);
282 if (part->header.signature != NVRAM_SIG_OS)
283 continue;
284
285 /* Make os partition a free partition */
286 part->header.signature = NVRAM_SIG_FREE;
287 sprintf(part->header.name, "wwwwwwwwwwww");
288 part->header.checksum = nvram_checksum(&part->header);
289
290 /* Merge contiguous free partitions backwards */
291 list_for_each_prev(j, &part->partition) {
292 cur_part = list_entry(j, struct nvram_partition, partition);
293 if (cur_part == nvram_part || cur_part->header.signature != NVRAM_SIG_FREE) {
294 break;
295 }
296
297 part->header.length += cur_part->header.length;
298 part->header.checksum = nvram_checksum(&part->header);
299 part->index = cur_part->index;
300
301 list_del(&cur_part->partition);
302 kfree(cur_part);
303 j = &part->partition; /* fixup our loop */
304 }
305
306 /* Merge contiguous free partitions forwards */
307 list_for_each(j, &part->partition) {
308 cur_part = list_entry(j, struct nvram_partition, partition);
309 if (cur_part == nvram_part || cur_part->header.signature != NVRAM_SIG_FREE) {
310 break;
311 }
312
313 part->header.length += cur_part->header.length;
314 part->header.checksum = nvram_checksum(&part->header);
315
316 list_del(&cur_part->partition);
317 kfree(cur_part);
318 j = &part->partition; /* fixup our loop */
319 }
320
321 rc = nvram_write_header(part);
322 if (rc <= 0) {
323 printk(KERN_ERR "nvram_remove_os_partition: nvram_write failed (%d)\n", rc);
324 return rc;
325 }
326
327 }
328
329 return 0;
330}
331
332/* nvram_create_os_partition
333 *
334 * Create a OS linux partition to buffer error logs.
335 * Will create a partition starting at the first free
336 * space found if space has enough room.
337 */
338static int nvram_create_os_partition(void)
339{
340 struct nvram_partition *part;
341 struct nvram_partition *new_part;
342 struct nvram_partition *free_part = NULL;
343 int seq_init[2] = { 0, 0 };
344 loff_t tmp_index;
345 long size = 0;
346 int rc;
347
348 /* Find a free partition that will give us the maximum needed size
349 If can't find one that will give us the minimum size needed */
350 list_for_each_entry(part, &nvram_part->partition, partition) {
351 if (part->header.signature != NVRAM_SIG_FREE)
352 continue;
353
354 if (part->header.length >= NVRAM_MAX_REQ) {
355 size = NVRAM_MAX_REQ;
356 free_part = part;
357 break;
358 }
359 if (!size && part->header.length >= NVRAM_MIN_REQ) {
360 size = NVRAM_MIN_REQ;
361 free_part = part;
362 }
363 }
364 if (!size)
365 return -ENOSPC;
366
367 /* Create our OS partition */
368 new_part = kmalloc(sizeof(*new_part), GFP_KERNEL);
369 if (!new_part) {
370 printk(KERN_ERR "nvram_create_os_partition: kmalloc failed\n");
371 return -ENOMEM;
372 }
373
374 new_part->index = free_part->index;
375 new_part->header.signature = NVRAM_SIG_OS;
376 new_part->header.length = size;
377 strcpy(new_part->header.name, "ppc64,linux");
378 new_part->header.checksum = nvram_checksum(&new_part->header);
379
380 rc = nvram_write_header(new_part);
381 if (rc <= 0) {
382 printk(KERN_ERR "nvram_create_os_partition: nvram_write_header \
383 failed (%d)\n", rc);
384 return rc;
385 }
386
387 /* make sure and initialize to zero the sequence number and the error
388 type logged */
389 tmp_index = new_part->index + NVRAM_HEADER_LEN;
390 rc = ppc_md.nvram_write((char *)&seq_init, sizeof(seq_init), &tmp_index);
391 if (rc <= 0) {
392 printk(KERN_ERR "nvram_create_os_partition: nvram_write "
393 "failed (%d)\n", rc);
394 return rc;
395 }
396
397 nvram_error_log_index = new_part->index + NVRAM_HEADER_LEN;
398 nvram_error_log_size = ((part->header.length - 1) *
399 NVRAM_BLOCK_LEN) - sizeof(struct err_log_info);
400
401 list_add_tail(&new_part->partition, &free_part->partition);
402
403 if (free_part->header.length <= size) {
404 list_del(&free_part->partition);
405 kfree(free_part);
406 return 0;
407 }
408
409 /* Adjust the partition we stole the space from */
410 free_part->index += size * NVRAM_BLOCK_LEN;
411 free_part->header.length -= size;
412 free_part->header.checksum = nvram_checksum(&free_part->header);
413
414 rc = nvram_write_header(free_part);
415 if (rc <= 0) {
416 printk(KERN_ERR "nvram_create_os_partition: nvram_write_header "
417 "failed (%d)\n", rc);
418 return rc;
419 }
420
421 return 0;
422}
423
424
425/* nvram_setup_partition
426 *
427 * This will setup the partition we need for buffering the
428 * error logs and cleanup partitions if needed.
429 *
430 * The general strategy is the following:
431 * 1.) If there is ppc64,linux partition large enough then use it.
432 * 2.) If there is not a ppc64,linux partition large enough, search
433 * for a free partition that is large enough.
434 * 3.) If there is not a free partition large enough remove
435 * _all_ OS partitions and consolidate the space.
436 * 4.) Will first try getting a chunk that will satisfy the maximum
437 * error log size (NVRAM_MAX_REQ).
438 * 5.) If the max chunk cannot be allocated then try finding a chunk
439 * that will satisfy the minum needed (NVRAM_MIN_REQ).
440 */
441static int nvram_setup_partition(void)
442{
443 struct list_head * p;
444 struct nvram_partition * part;
445 int rc;
446
447 /* For now, we don't do any of this on pmac, until I
448 * have figured out if it's worth killing some unused stuffs
449 * in our nvram, as Apple defined partitions use pretty much
450 * all of the space
451 */
452 if (_machine == PLATFORM_POWERMAC)
453 return -ENOSPC;
454
455 /* see if we have an OS partition that meets our needs.
456 will try getting the max we need. If not we'll delete
457 partitions and try again. */
458 list_for_each(p, &nvram_part->partition) {
459 part = list_entry(p, struct nvram_partition, partition);
460 if (part->header.signature != NVRAM_SIG_OS)
461 continue;
462
463 if (strcmp(part->header.name, "ppc64,linux"))
464 continue;
465
466 if (part->header.length >= NVRAM_MIN_REQ) {
467 /* found our partition */
468 nvram_error_log_index = part->index + NVRAM_HEADER_LEN;
469 nvram_error_log_size = ((part->header.length - 1) *
470 NVRAM_BLOCK_LEN) - sizeof(struct err_log_info);
471 return 0;
472 }
473 }
474
475 /* try creating a partition with the free space we have */
476 rc = nvram_create_os_partition();
477 if (!rc) {
478 return 0;
479 }
480
481 /* need to free up some space */
482 rc = nvram_remove_os_partition();
483 if (rc) {
484 return rc;
485 }
486
487 /* create a partition in this new space */
488 rc = nvram_create_os_partition();
489 if (rc) {
490 printk(KERN_ERR "nvram_create_os_partition: Could not find a "
491 "NVRAM partition large enough\n");
492 return rc;
493 }
494
495 return 0;
496}
497
498
499static int nvram_scan_partitions(void)
500{
501 loff_t cur_index = 0;
502 struct nvram_header phead;
503 struct nvram_partition * tmp_part;
504 unsigned char c_sum;
505 char * header;
506 int total_size;
507 int err;
508
509 if (ppc_md.nvram_size == NULL)
510 return -ENODEV;
511 total_size = ppc_md.nvram_size();
512
513 header = (char *) kmalloc(NVRAM_HEADER_LEN, GFP_KERNEL);
514 if (!header) {
515 printk(KERN_ERR "nvram_scan_partitions: Failed kmalloc\n");
516 return -ENOMEM;
517 }
518
519 while (cur_index < total_size) {
520
521 err = ppc_md.nvram_read(header, NVRAM_HEADER_LEN, &cur_index);
522 if (err != NVRAM_HEADER_LEN) {
523 printk(KERN_ERR "nvram_scan_partitions: Error parsing "
524 "nvram partitions\n");
525 goto out;
526 }
527
528 cur_index -= NVRAM_HEADER_LEN; /* nvram_read will advance us */
529
530 memcpy(&phead, header, NVRAM_HEADER_LEN);
531
532 err = 0;
533 c_sum = nvram_checksum(&phead);
534 if (c_sum != phead.checksum) {
535 printk(KERN_WARNING "WARNING: nvram partition checksum"
536 " was %02x, should be %02x!\n",
537 phead.checksum, c_sum);
538 printk(KERN_WARNING "Terminating nvram partition scan\n");
539 goto out;
540 }
541 if (!phead.length) {
542 printk(KERN_WARNING "WARNING: nvram corruption "
543 "detected: 0-length partition\n");
544 goto out;
545 }
546 tmp_part = (struct nvram_partition *)
547 kmalloc(sizeof(struct nvram_partition), GFP_KERNEL);
548 err = -ENOMEM;
549 if (!tmp_part) {
550 printk(KERN_ERR "nvram_scan_partitions: kmalloc failed\n");
551 goto out;
552 }
553
554 memcpy(&tmp_part->header, &phead, NVRAM_HEADER_LEN);
555 tmp_part->index = cur_index;
556 list_add_tail(&tmp_part->partition, &nvram_part->partition);
557
558 cur_index += phead.length * NVRAM_BLOCK_LEN;
559 }
560 err = 0;
561
562 out:
563 kfree(header);
564 return err;
565}
566
567static int __init nvram_init(void)
568{
569 int error;
570 int rc;
571
572 if (ppc_md.nvram_size == NULL || ppc_md.nvram_size() <= 0)
573 return -ENODEV;
574
575 rc = misc_register(&nvram_dev);
576 if (rc != 0) {
577 printk(KERN_ERR "nvram_init: failed to register device\n");
578 return rc;
579 }
580
581 /* initialize our anchor for the nvram partition list */
582 nvram_part = (struct nvram_partition *) kmalloc(sizeof(struct nvram_partition), GFP_KERNEL);
583 if (!nvram_part) {
584 printk(KERN_ERR "nvram_init: Failed kmalloc\n");
585 return -ENOMEM;
586 }
587 INIT_LIST_HEAD(&nvram_part->partition);
588
589 /* Get all the NVRAM partitions */
590 error = nvram_scan_partitions();
591 if (error) {
592 printk(KERN_ERR "nvram_init: Failed nvram_scan_partitions\n");
593 return error;
594 }
595
596 if(nvram_setup_partition())
597 printk(KERN_WARNING "nvram_init: Could not find nvram partition"
598 " for nvram buffered error logging.\n");
599
600#ifdef DEBUG_NVRAM
601 nvram_print_partitions("NVRAM Partitions");
602#endif
603
604 return rc;
605}
606
607void __exit nvram_cleanup(void)
608{
609 misc_deregister( &nvram_dev );
610}
611
612
613#ifdef CONFIG_PPC_PSERIES
614
615/* nvram_write_error_log
616 *
617 * We need to buffer the error logs into nvram to ensure that we have
618 * the failure information to decode. If we have a severe error there
619 * is no way to guarantee that the OS or the machine is in a state to
620 * get back to user land and write the error to disk. For example if
621 * the SCSI device driver causes a Machine Check by writing to a bad
622 * IO address, there is no way of guaranteeing that the device driver
623 * is in any state that is would also be able to write the error data
624 * captured to disk, thus we buffer it in NVRAM for analysis on the
625 * next boot.
626 *
627 * In NVRAM the partition containing the error log buffer will looks like:
628 * Header (in bytes):
629 * +-----------+----------+--------+------------+------------------+
630 * | signature | checksum | length | name | data |
631 * |0 |1 |2 3|4 15|16 length-1|
632 * +-----------+----------+--------+------------+------------------+
633 *
634 * The 'data' section would look like (in bytes):
635 * +--------------+------------+-----------------------------------+
636 * | event_logged | sequence # | error log |
637 * |0 3|4 7|8 nvram_error_log_size-1|
638 * +--------------+------------+-----------------------------------+
639 *
640 * event_logged: 0 if event has not been logged to syslog, 1 if it has
641 * sequence #: The unique sequence # for each event. (until it wraps)
642 * error log: The error log from event_scan
643 */
644int nvram_write_error_log(char * buff, int length, unsigned int err_type)
645{
646 int rc;
647 loff_t tmp_index;
648 struct err_log_info info;
649
650 if (no_logging) {
651 return -EPERM;
652 }
653
654 if (nvram_error_log_index == -1) {
655 return -ESPIPE;
656 }
657
658 if (length > nvram_error_log_size) {
659 length = nvram_error_log_size;
660 }
661
662 info.error_type = err_type;
663 info.seq_num = error_log_cnt;
664
665 tmp_index = nvram_error_log_index;
666
667 rc = ppc_md.nvram_write((char *)&info, sizeof(struct err_log_info), &tmp_index);
668 if (rc <= 0) {
669 printk(KERN_ERR "nvram_write_error_log: Failed nvram_write (%d)\n", rc);
670 return rc;
671 }
672
673 rc = ppc_md.nvram_write(buff, length, &tmp_index);
674 if (rc <= 0) {
675 printk(KERN_ERR "nvram_write_error_log: Failed nvram_write (%d)\n", rc);
676 return rc;
677 }
678
679 return 0;
680}
681
682/* nvram_read_error_log
683 *
684 * Reads nvram for error log for at most 'length'
685 */
686int nvram_read_error_log(char * buff, int length, unsigned int * err_type)
687{
688 int rc;
689 loff_t tmp_index;
690 struct err_log_info info;
691
692 if (nvram_error_log_index == -1)
693 return -1;
694
695 if (length > nvram_error_log_size)
696 length = nvram_error_log_size;
697
698 tmp_index = nvram_error_log_index;
699
700 rc = ppc_md.nvram_read((char *)&info, sizeof(struct err_log_info), &tmp_index);
701 if (rc <= 0) {
702 printk(KERN_ERR "nvram_read_error_log: Failed nvram_read (%d)\n", rc);
703 return rc;
704 }
705
706 rc = ppc_md.nvram_read(buff, length, &tmp_index);
707 if (rc <= 0) {
708 printk(KERN_ERR "nvram_read_error_log: Failed nvram_read (%d)\n", rc);
709 return rc;
710 }
711
712 error_log_cnt = info.seq_num;
713 *err_type = info.error_type;
714
715 return 0;
716}
717
718/* This doesn't actually zero anything, but it sets the event_logged
719 * word to tell that this event is safely in syslog.
720 */
721int nvram_clear_error_log(void)
722{
723 loff_t tmp_index;
724 int clear_word = ERR_FLAG_ALREADY_LOGGED;
725 int rc;
726
727 tmp_index = nvram_error_log_index;
728
729 rc = ppc_md.nvram_write((char *)&clear_word, sizeof(int), &tmp_index);
730 if (rc <= 0) {
731 printk(KERN_ERR "nvram_clear_error_log: Failed nvram_write (%d)\n", rc);
732 return rc;
733 }
734
735 return 0;
736}
737
738#endif /* CONFIG_PPC_PSERIES */
739
740module_init(nvram_init);
741module_exit(nvram_cleanup);
742MODULE_LICENSE("GPL");
diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c
new file mode 100644
index 000000000000..5a5b24685081
--- /dev/null
+++ b/arch/powerpc/kernel/pci_64.c
@@ -0,0 +1,1381 @@
1/*
2 * Port for PPC64 David Engebretsen, IBM Corp.
3 * Contains common pci routines for ppc64 platform, pSeries and iSeries brands.
4 *
5 * Copyright (C) 2003 Anton Blanchard <anton@au.ibm.com>, IBM
6 * Rework, based on alpha PCI code.
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14#undef DEBUG
15
16#include <linux/config.h>
17#include <linux/kernel.h>
18#include <linux/pci.h>
19#include <linux/string.h>
20#include <linux/init.h>
21#include <linux/bootmem.h>
22#include <linux/mm.h>
23#include <linux/list.h>
24#include <linux/syscalls.h>
25
26#include <asm/processor.h>
27#include <asm/io.h>
28#include <asm/prom.h>
29#include <asm/pci-bridge.h>
30#include <asm/byteorder.h>
31#include <asm/irq.h>
32#include <asm/machdep.h>
33#include <asm/ppc-pci.h>
34
35#ifdef DEBUG
36#include <asm/udbg.h>
37#define DBG(fmt...) udbg_printf(fmt)
38#else
39#define DBG(fmt...)
40#endif
41
42unsigned long pci_probe_only = 1;
43unsigned long pci_assign_all_buses = 0;
44
45/*
46 * legal IO pages under MAX_ISA_PORT. This is to ensure we don't touch
47 * devices we don't have access to.
48 */
49unsigned long io_page_mask;
50
51EXPORT_SYMBOL(io_page_mask);
52
53#ifdef CONFIG_PPC_MULTIPLATFORM
54static void fixup_resource(struct resource *res, struct pci_dev *dev);
55static void do_bus_setup(struct pci_bus *bus);
56#endif
57
58unsigned int pcibios_assign_all_busses(void)
59{
60 return pci_assign_all_buses;
61}
62
63/* pci_io_base -- the base address from which io bars are offsets.
64 * This is the lowest I/O base address (so bar values are always positive),
65 * and it *must* be the start of ISA space if an ISA bus exists because
66 * ISA drivers use hard coded offsets. If no ISA bus exists a dummy
67 * page is mapped and isa_io_limit prevents access to it.
68 */
69unsigned long isa_io_base; /* NULL if no ISA bus */
70EXPORT_SYMBOL(isa_io_base);
71unsigned long pci_io_base;
72EXPORT_SYMBOL(pci_io_base);
73
74void iSeries_pcibios_init(void);
75
76LIST_HEAD(hose_list);
77
78struct dma_mapping_ops pci_dma_ops;
79EXPORT_SYMBOL(pci_dma_ops);
80
81int global_phb_number; /* Global phb counter */
82
83/* Cached ISA bridge dev. */
84struct pci_dev *ppc64_isabridge_dev = NULL;
85
86static void fixup_broken_pcnet32(struct pci_dev* dev)
87{
88 if ((dev->class>>8 == PCI_CLASS_NETWORK_ETHERNET)) {
89 dev->vendor = PCI_VENDOR_ID_AMD;
90 pci_write_config_word(dev, PCI_VENDOR_ID, PCI_VENDOR_ID_AMD);
91 }
92}
93DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_TRIDENT, PCI_ANY_ID, fixup_broken_pcnet32);
94
95void pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region,
96 struct resource *res)
97{
98 unsigned long offset = 0;
99 struct pci_controller *hose = pci_bus_to_host(dev->bus);
100
101 if (!hose)
102 return;
103
104 if (res->flags & IORESOURCE_IO)
105 offset = (unsigned long)hose->io_base_virt - pci_io_base;
106
107 if (res->flags & IORESOURCE_MEM)
108 offset = hose->pci_mem_offset;
109
110 region->start = res->start - offset;
111 region->end = res->end - offset;
112}
113
114void pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
115 struct pci_bus_region *region)
116{
117 unsigned long offset = 0;
118 struct pci_controller *hose = pci_bus_to_host(dev->bus);
119
120 if (!hose)
121 return;
122
123 if (res->flags & IORESOURCE_IO)
124 offset = (unsigned long)hose->io_base_virt - pci_io_base;
125
126 if (res->flags & IORESOURCE_MEM)
127 offset = hose->pci_mem_offset;
128
129 res->start = region->start + offset;
130 res->end = region->end + offset;
131}
132
133#ifdef CONFIG_HOTPLUG
134EXPORT_SYMBOL(pcibios_resource_to_bus);
135EXPORT_SYMBOL(pcibios_bus_to_resource);
136#endif
137
138/*
139 * We need to avoid collisions with `mirrored' VGA ports
140 * and other strange ISA hardware, so we always want the
141 * addresses to be allocated in the 0x000-0x0ff region
142 * modulo 0x400.
143 *
144 * Why? Because some silly external IO cards only decode
145 * the low 10 bits of the IO address. The 0x00-0xff region
146 * is reserved for motherboard devices that decode all 16
147 * bits, so it's ok to allocate at, say, 0x2800-0x28ff,
148 * but we want to try to avoid allocating at 0x2900-0x2bff
149 * which might have be mirrored at 0x0100-0x03ff..
150 */
151void pcibios_align_resource(void *data, struct resource *res,
152 unsigned long size, unsigned long align)
153{
154 struct pci_dev *dev = data;
155 struct pci_controller *hose = pci_bus_to_host(dev->bus);
156 unsigned long start = res->start;
157 unsigned long alignto;
158
159 if (res->flags & IORESOURCE_IO) {
160 unsigned long offset = (unsigned long)hose->io_base_virt -
161 pci_io_base;
162 /* Make sure we start at our min on all hoses */
163 if (start - offset < PCIBIOS_MIN_IO)
164 start = PCIBIOS_MIN_IO + offset;
165
166 /*
167 * Put everything into 0x00-0xff region modulo 0x400
168 */
169 if (start & 0x300)
170 start = (start + 0x3ff) & ~0x3ff;
171
172 } else if (res->flags & IORESOURCE_MEM) {
173 /* Make sure we start at our min on all hoses */
174 if (start - hose->pci_mem_offset < PCIBIOS_MIN_MEM)
175 start = PCIBIOS_MIN_MEM + hose->pci_mem_offset;
176
177 /* Align to multiple of size of minimum base. */
178 alignto = max(0x1000UL, align);
179 start = ALIGN(start, alignto);
180 }
181
182 res->start = start;
183}
184
185static DEFINE_SPINLOCK(hose_spinlock);
186
187/*
188 * pci_controller(phb) initialized common variables.
189 */
190static void __devinit pci_setup_pci_controller(struct pci_controller *hose)
191{
192 memset(hose, 0, sizeof(struct pci_controller));
193
194 spin_lock(&hose_spinlock);
195 hose->global_number = global_phb_number++;
196 list_add_tail(&hose->list_node, &hose_list);
197 spin_unlock(&hose_spinlock);
198}
199
200static void add_linux_pci_domain(struct device_node *dev,
201 struct pci_controller *phb)
202{
203 struct property *of_prop;
204 unsigned int size;
205
206 of_prop = (struct property *)
207 get_property(dev, "linux,pci-domain", &size);
208 if (of_prop != NULL)
209 return;
210 WARN_ON(of_prop && size < sizeof(int));
211 if (of_prop && size < sizeof(int))
212 of_prop = NULL;
213 size = sizeof(struct property) + sizeof(int);
214 if (of_prop == NULL) {
215 if (mem_init_done)
216 of_prop = kmalloc(size, GFP_KERNEL);
217 else
218 of_prop = alloc_bootmem(size);
219 }
220 memset(of_prop, 0, sizeof(struct property));
221 of_prop->name = "linux,pci-domain";
222 of_prop->length = sizeof(int);
223 of_prop->value = (unsigned char *)&of_prop[1];
224 *((int *)of_prop->value) = phb->global_number;
225 prom_add_property(dev, of_prop);
226}
227
228struct pci_controller * pcibios_alloc_controller(struct device_node *dev)
229{
230 struct pci_controller *phb;
231
232 if (mem_init_done)
233 phb = kmalloc(sizeof(struct pci_controller), GFP_KERNEL);
234 else
235 phb = alloc_bootmem(sizeof (struct pci_controller));
236 if (phb == NULL)
237 return NULL;
238 pci_setup_pci_controller(phb);
239 phb->arch_data = dev;
240 phb->is_dynamic = mem_init_done;
241 if (dev)
242 add_linux_pci_domain(dev, phb);
243 return phb;
244}
245
246void pcibios_free_controller(struct pci_controller *phb)
247{
248 if (phb->arch_data) {
249 struct device_node *np = phb->arch_data;
250 int *domain = (int *)get_property(np,
251 "linux,pci-domain", NULL);
252 if (domain)
253 *domain = -1;
254 }
255 if (phb->is_dynamic)
256 kfree(phb);
257}
258
259static void __init pcibios_claim_one_bus(struct pci_bus *b)
260{
261 struct pci_dev *dev;
262 struct pci_bus *child_bus;
263
264 list_for_each_entry(dev, &b->devices, bus_list) {
265 int i;
266
267 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
268 struct resource *r = &dev->resource[i];
269
270 if (r->parent || !r->start || !r->flags)
271 continue;
272 pci_claim_resource(dev, i);
273 }
274 }
275
276 list_for_each_entry(child_bus, &b->children, node)
277 pcibios_claim_one_bus(child_bus);
278}
279
280#ifndef CONFIG_PPC_ISERIES
281static void __init pcibios_claim_of_setup(void)
282{
283 struct pci_bus *b;
284
285 list_for_each_entry(b, &pci_root_buses, node)
286 pcibios_claim_one_bus(b);
287}
288#endif
289
290#ifdef CONFIG_PPC_MULTIPLATFORM
291static u32 get_int_prop(struct device_node *np, const char *name, u32 def)
292{
293 u32 *prop;
294 int len;
295
296 prop = (u32 *) get_property(np, name, &len);
297 if (prop && len >= 4)
298 return *prop;
299 return def;
300}
301
302static unsigned int pci_parse_of_flags(u32 addr0)
303{
304 unsigned int flags = 0;
305
306 if (addr0 & 0x02000000) {
307 flags = IORESOURCE_MEM | PCI_BASE_ADDRESS_SPACE_MEMORY;
308 flags |= (addr0 >> 22) & PCI_BASE_ADDRESS_MEM_TYPE_64;
309 flags |= (addr0 >> 28) & PCI_BASE_ADDRESS_MEM_TYPE_1M;
310 if (addr0 & 0x40000000)
311 flags |= IORESOURCE_PREFETCH
312 | PCI_BASE_ADDRESS_MEM_PREFETCH;
313 } else if (addr0 & 0x01000000)
314 flags = IORESOURCE_IO | PCI_BASE_ADDRESS_SPACE_IO;
315 return flags;
316}
317
318#define GET_64BIT(prop, i) ((((u64) (prop)[(i)]) << 32) | (prop)[(i)+1])
319
320static void pci_parse_of_addrs(struct device_node *node, struct pci_dev *dev)
321{
322 u64 base, size;
323 unsigned int flags;
324 struct resource *res;
325 u32 *addrs, i;
326 int proplen;
327
328 addrs = (u32 *) get_property(node, "assigned-addresses", &proplen);
329 if (!addrs)
330 return;
331 for (; proplen >= 20; proplen -= 20, addrs += 5) {
332 flags = pci_parse_of_flags(addrs[0]);
333 if (!flags)
334 continue;
335 base = GET_64BIT(addrs, 1);
336 size = GET_64BIT(addrs, 3);
337 if (!size)
338 continue;
339 i = addrs[0] & 0xff;
340 if (PCI_BASE_ADDRESS_0 <= i && i <= PCI_BASE_ADDRESS_5) {
341 res = &dev->resource[(i - PCI_BASE_ADDRESS_0) >> 2];
342 } else if (i == dev->rom_base_reg) {
343 res = &dev->resource[PCI_ROM_RESOURCE];
344 flags |= IORESOURCE_READONLY | IORESOURCE_CACHEABLE;
345 } else {
346 printk(KERN_ERR "PCI: bad cfg reg num 0x%x\n", i);
347 continue;
348 }
349 res->start = base;
350 res->end = base + size - 1;
351 res->flags = flags;
352 res->name = pci_name(dev);
353 fixup_resource(res, dev);
354 }
355}
356
357struct pci_dev *of_create_pci_dev(struct device_node *node,
358 struct pci_bus *bus, int devfn)
359{
360 struct pci_dev *dev;
361 const char *type;
362
363 dev = kmalloc(sizeof(struct pci_dev), GFP_KERNEL);
364 if (!dev)
365 return NULL;
366 type = get_property(node, "device_type", NULL);
367 if (type == NULL)
368 type = "";
369
370 memset(dev, 0, sizeof(struct pci_dev));
371 dev->bus = bus;
372 dev->sysdata = node;
373 dev->dev.parent = bus->bridge;
374 dev->dev.bus = &pci_bus_type;
375 dev->devfn = devfn;
376 dev->multifunction = 0; /* maybe a lie? */
377
378 dev->vendor = get_int_prop(node, "vendor-id", 0xffff);
379 dev->device = get_int_prop(node, "device-id", 0xffff);
380 dev->subsystem_vendor = get_int_prop(node, "subsystem-vendor-id", 0);
381 dev->subsystem_device = get_int_prop(node, "subsystem-id", 0);
382
383 dev->cfg_size = 256; /*pci_cfg_space_size(dev);*/
384
385 sprintf(pci_name(dev), "%04x:%02x:%02x.%d", pci_domain_nr(bus),
386 dev->bus->number, PCI_SLOT(devfn), PCI_FUNC(devfn));
387 dev->class = get_int_prop(node, "class-code", 0);
388
389 dev->current_state = 4; /* unknown power state */
390
391 if (!strcmp(type, "pci")) {
392 /* a PCI-PCI bridge */
393 dev->hdr_type = PCI_HEADER_TYPE_BRIDGE;
394 dev->rom_base_reg = PCI_ROM_ADDRESS1;
395 } else if (!strcmp(type, "cardbus")) {
396 dev->hdr_type = PCI_HEADER_TYPE_CARDBUS;
397 } else {
398 dev->hdr_type = PCI_HEADER_TYPE_NORMAL;
399 dev->rom_base_reg = PCI_ROM_ADDRESS;
400 dev->irq = NO_IRQ;
401 if (node->n_intrs > 0) {
402 dev->irq = node->intrs[0].line;
403 pci_write_config_byte(dev, PCI_INTERRUPT_LINE,
404 dev->irq);
405 }
406 }
407
408 pci_parse_of_addrs(node, dev);
409
410 pci_device_add(dev, bus);
411
412 /* XXX pci_scan_msi_device(dev); */
413
414 return dev;
415}
416EXPORT_SYMBOL(of_create_pci_dev);
417
418void __devinit of_scan_bus(struct device_node *node,
419 struct pci_bus *bus)
420{
421 struct device_node *child = NULL;
422 u32 *reg;
423 int reglen, devfn;
424 struct pci_dev *dev;
425
426 while ((child = of_get_next_child(node, child)) != NULL) {
427 reg = (u32 *) get_property(child, "reg", &reglen);
428 if (reg == NULL || reglen < 20)
429 continue;
430 devfn = (reg[0] >> 8) & 0xff;
431 /* create a new pci_dev for this device */
432 dev = of_create_pci_dev(child, bus, devfn);
433 if (!dev)
434 continue;
435 if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE ||
436 dev->hdr_type == PCI_HEADER_TYPE_CARDBUS)
437 of_scan_pci_bridge(child, dev);
438 }
439
440 do_bus_setup(bus);
441}
442EXPORT_SYMBOL(of_scan_bus);
443
444void __devinit of_scan_pci_bridge(struct device_node *node,
445 struct pci_dev *dev)
446{
447 struct pci_bus *bus;
448 u32 *busrange, *ranges;
449 int len, i, mode;
450 struct resource *res;
451 unsigned int flags;
452 u64 size;
453
454 /* parse bus-range property */
455 busrange = (u32 *) get_property(node, "bus-range", &len);
456 if (busrange == NULL || len != 8) {
457 printk(KERN_ERR "Can't get bus-range for PCI-PCI bridge %s\n",
458 node->full_name);
459 return;
460 }
461 ranges = (u32 *) get_property(node, "ranges", &len);
462 if (ranges == NULL) {
463 printk(KERN_ERR "Can't get ranges for PCI-PCI bridge %s\n",
464 node->full_name);
465 return;
466 }
467
468 bus = pci_add_new_bus(dev->bus, dev, busrange[0]);
469 if (!bus) {
470 printk(KERN_ERR "Failed to create pci bus for %s\n",
471 node->full_name);
472 return;
473 }
474
475 bus->primary = dev->bus->number;
476 bus->subordinate = busrange[1];
477 bus->bridge_ctl = 0;
478 bus->sysdata = node;
479
480 /* parse ranges property */
481 /* PCI #address-cells == 3 and #size-cells == 2 always */
482 res = &dev->resource[PCI_BRIDGE_RESOURCES];
483 for (i = 0; i < PCI_NUM_RESOURCES - PCI_BRIDGE_RESOURCES; ++i) {
484 res->flags = 0;
485 bus->resource[i] = res;
486 ++res;
487 }
488 i = 1;
489 for (; len >= 32; len -= 32, ranges += 8) {
490 flags = pci_parse_of_flags(ranges[0]);
491 size = GET_64BIT(ranges, 6);
492 if (flags == 0 || size == 0)
493 continue;
494 if (flags & IORESOURCE_IO) {
495 res = bus->resource[0];
496 if (res->flags) {
497 printk(KERN_ERR "PCI: ignoring extra I/O range"
498 " for bridge %s\n", node->full_name);
499 continue;
500 }
501 } else {
502 if (i >= PCI_NUM_RESOURCES - PCI_BRIDGE_RESOURCES) {
503 printk(KERN_ERR "PCI: too many memory ranges"
504 " for bridge %s\n", node->full_name);
505 continue;
506 }
507 res = bus->resource[i];
508 ++i;
509 }
510 res->start = GET_64BIT(ranges, 1);
511 res->end = res->start + size - 1;
512 res->flags = flags;
513 fixup_resource(res, dev);
514 }
515 sprintf(bus->name, "PCI Bus %04x:%02x", pci_domain_nr(bus),
516 bus->number);
517
518 mode = PCI_PROBE_NORMAL;
519 if (ppc_md.pci_probe_mode)
520 mode = ppc_md.pci_probe_mode(bus);
521 if (mode == PCI_PROBE_DEVTREE)
522 of_scan_bus(node, bus);
523 else if (mode == PCI_PROBE_NORMAL)
524 pci_scan_child_bus(bus);
525}
526EXPORT_SYMBOL(of_scan_pci_bridge);
527#endif /* CONFIG_PPC_MULTIPLATFORM */
528
529void __devinit scan_phb(struct pci_controller *hose)
530{
531 struct pci_bus *bus;
532 struct device_node *node = hose->arch_data;
533 int i, mode;
534 struct resource *res;
535
536 bus = pci_create_bus(NULL, hose->first_busno, hose->ops, node);
537 if (bus == NULL) {
538 printk(KERN_ERR "Failed to create bus for PCI domain %04x\n",
539 hose->global_number);
540 return;
541 }
542 bus->secondary = hose->first_busno;
543 hose->bus = bus;
544
545 bus->resource[0] = res = &hose->io_resource;
546 if (res->flags && request_resource(&ioport_resource, res))
547 printk(KERN_ERR "Failed to request PCI IO region "
548 "on PCI domain %04x\n", hose->global_number);
549
550 for (i = 0; i < 3; ++i) {
551 res = &hose->mem_resources[i];
552 bus->resource[i+1] = res;
553 if (res->flags && request_resource(&iomem_resource, res))
554 printk(KERN_ERR "Failed to request PCI memory region "
555 "on PCI domain %04x\n", hose->global_number);
556 }
557
558 mode = PCI_PROBE_NORMAL;
559#ifdef CONFIG_PPC_MULTIPLATFORM
560 if (ppc_md.pci_probe_mode)
561 mode = ppc_md.pci_probe_mode(bus);
562 if (mode == PCI_PROBE_DEVTREE) {
563 bus->subordinate = hose->last_busno;
564 of_scan_bus(node, bus);
565 }
566#endif /* CONFIG_PPC_MULTIPLATFORM */
567 if (mode == PCI_PROBE_NORMAL)
568 hose->last_busno = bus->subordinate = pci_scan_child_bus(bus);
569 pci_bus_add_devices(bus);
570}
571
572static int __init pcibios_init(void)
573{
574 struct pci_controller *hose, *tmp;
575
576 /* For now, override phys_mem_access_prot. If we need it,
577 * later, we may move that initialization to each ppc_md
578 */
579 ppc_md.phys_mem_access_prot = pci_phys_mem_access_prot;
580
581#ifdef CONFIG_PPC_ISERIES
582 iSeries_pcibios_init();
583#endif
584
585 printk("PCI: Probing PCI hardware\n");
586
587 /* Scan all of the recorded PCI controllers. */
588 list_for_each_entry_safe(hose, tmp, &hose_list, list_node)
589 scan_phb(hose);
590
591#ifndef CONFIG_PPC_ISERIES
592 if (pci_probe_only)
593 pcibios_claim_of_setup();
594 else
595 /* FIXME: `else' will be removed when
596 pci_assign_unassigned_resources() is able to work
597 correctly with [partially] allocated PCI tree. */
598 pci_assign_unassigned_resources();
599#endif /* !CONFIG_PPC_ISERIES */
600
601 /* Call machine dependent final fixup */
602 if (ppc_md.pcibios_fixup)
603 ppc_md.pcibios_fixup();
604
605 /* Cache the location of the ISA bridge (if we have one) */
606 ppc64_isabridge_dev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
607 if (ppc64_isabridge_dev != NULL)
608 printk("ISA bridge at %s\n", pci_name(ppc64_isabridge_dev));
609
610#ifdef CONFIG_PPC_MULTIPLATFORM
611 /* map in PCI I/O space */
612 phbs_remap_io();
613#endif
614
615 printk("PCI: Probing PCI hardware done\n");
616
617 return 0;
618}
619
620subsys_initcall(pcibios_init);
621
622char __init *pcibios_setup(char *str)
623{
624 return str;
625}
626
627int pcibios_enable_device(struct pci_dev *dev, int mask)
628{
629 u16 cmd, oldcmd;
630 int i;
631
632 pci_read_config_word(dev, PCI_COMMAND, &cmd);
633 oldcmd = cmd;
634
635 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
636 struct resource *res = &dev->resource[i];
637
638 /* Only set up the requested stuff */
639 if (!(mask & (1<<i)))
640 continue;
641
642 if (res->flags & IORESOURCE_IO)
643 cmd |= PCI_COMMAND_IO;
644 if (res->flags & IORESOURCE_MEM)
645 cmd |= PCI_COMMAND_MEMORY;
646 }
647
648 if (cmd != oldcmd) {
649 printk(KERN_DEBUG "PCI: Enabling device: (%s), cmd %x\n",
650 pci_name(dev), cmd);
651 /* Enable the appropriate bits in the PCI command register. */
652 pci_write_config_word(dev, PCI_COMMAND, cmd);
653 }
654 return 0;
655}
656
657/*
658 * Return the domain number for this bus.
659 */
660int pci_domain_nr(struct pci_bus *bus)
661{
662#ifdef CONFIG_PPC_ISERIES
663 return 0;
664#else
665 struct pci_controller *hose = pci_bus_to_host(bus);
666
667 return hose->global_number;
668#endif
669}
670
671EXPORT_SYMBOL(pci_domain_nr);
672
673/* Decide whether to display the domain number in /proc */
674int pci_proc_domain(struct pci_bus *bus)
675{
676#ifdef CONFIG_PPC_ISERIES
677 return 0;
678#else
679 struct pci_controller *hose = pci_bus_to_host(bus);
680 return hose->buid;
681#endif
682}
683
684/*
685 * Platform support for /proc/bus/pci/X/Y mmap()s,
686 * modelled on the sparc64 implementation by Dave Miller.
687 * -- paulus.
688 */
689
690/*
691 * Adjust vm_pgoff of VMA such that it is the physical page offset
692 * corresponding to the 32-bit pci bus offset for DEV requested by the user.
693 *
694 * Basically, the user finds the base address for his device which he wishes
695 * to mmap. They read the 32-bit value from the config space base register,
696 * add whatever PAGE_SIZE multiple offset they wish, and feed this into the
697 * offset parameter of mmap on /proc/bus/pci/XXX for that device.
698 *
699 * Returns negative error code on failure, zero on success.
700 */
701static struct resource *__pci_mmap_make_offset(struct pci_dev *dev,
702 unsigned long *offset,
703 enum pci_mmap_state mmap_state)
704{
705 struct pci_controller *hose = pci_bus_to_host(dev->bus);
706 unsigned long io_offset = 0;
707 int i, res_bit;
708
709 if (hose == 0)
710 return NULL; /* should never happen */
711
712 /* If memory, add on the PCI bridge address offset */
713 if (mmap_state == pci_mmap_mem) {
714 *offset += hose->pci_mem_offset;
715 res_bit = IORESOURCE_MEM;
716 } else {
717 io_offset = (unsigned long)hose->io_base_virt - pci_io_base;
718 *offset += io_offset;
719 res_bit = IORESOURCE_IO;
720 }
721
722 /*
723 * Check that the offset requested corresponds to one of the
724 * resources of the device.
725 */
726 for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
727 struct resource *rp = &dev->resource[i];
728 int flags = rp->flags;
729
730 /* treat ROM as memory (should be already) */
731 if (i == PCI_ROM_RESOURCE)
732 flags |= IORESOURCE_MEM;
733
734 /* Active and same type? */
735 if ((flags & res_bit) == 0)
736 continue;
737
738 /* In the range of this resource? */
739 if (*offset < (rp->start & PAGE_MASK) || *offset > rp->end)
740 continue;
741
742 /* found it! construct the final physical address */
743 if (mmap_state == pci_mmap_io)
744 *offset += hose->io_base_phys - io_offset;
745 return rp;
746 }
747
748 return NULL;
749}
750
751/*
752 * Set vm_page_prot of VMA, as appropriate for this architecture, for a pci
753 * device mapping.
754 */
755static pgprot_t __pci_mmap_set_pgprot(struct pci_dev *dev, struct resource *rp,
756 pgprot_t protection,
757 enum pci_mmap_state mmap_state,
758 int write_combine)
759{
760 unsigned long prot = pgprot_val(protection);
761
762 /* Write combine is always 0 on non-memory space mappings. On
763 * memory space, if the user didn't pass 1, we check for a
764 * "prefetchable" resource. This is a bit hackish, but we use
765 * this to workaround the inability of /sysfs to provide a write
766 * combine bit
767 */
768 if (mmap_state != pci_mmap_mem)
769 write_combine = 0;
770 else if (write_combine == 0) {
771 if (rp->flags & IORESOURCE_PREFETCH)
772 write_combine = 1;
773 }
774
775 /* XXX would be nice to have a way to ask for write-through */
776 prot |= _PAGE_NO_CACHE;
777 if (write_combine)
778 prot &= ~_PAGE_GUARDED;
779 else
780 prot |= _PAGE_GUARDED;
781
782 printk("PCI map for %s:%lx, prot: %lx\n", pci_name(dev), rp->start,
783 prot);
784
785 return __pgprot(prot);
786}
787
788/*
789 * This one is used by /dev/mem and fbdev who have no clue about the
790 * PCI device, it tries to find the PCI device first and calls the
791 * above routine
792 */
793pgprot_t pci_phys_mem_access_prot(struct file *file,
794 unsigned long pfn,
795 unsigned long size,
796 pgprot_t protection)
797{
798 struct pci_dev *pdev = NULL;
799 struct resource *found = NULL;
800 unsigned long prot = pgprot_val(protection);
801 unsigned long offset = pfn << PAGE_SHIFT;
802 int i;
803
804 if (page_is_ram(pfn))
805 return __pgprot(prot);
806
807 prot |= _PAGE_NO_CACHE | _PAGE_GUARDED;
808
809 for_each_pci_dev(pdev) {
810 for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
811 struct resource *rp = &pdev->resource[i];
812 int flags = rp->flags;
813
814 /* Active and same type? */
815 if ((flags & IORESOURCE_MEM) == 0)
816 continue;
817 /* In the range of this resource? */
818 if (offset < (rp->start & PAGE_MASK) ||
819 offset > rp->end)
820 continue;
821 found = rp;
822 break;
823 }
824 if (found)
825 break;
826 }
827 if (found) {
828 if (found->flags & IORESOURCE_PREFETCH)
829 prot &= ~_PAGE_GUARDED;
830 pci_dev_put(pdev);
831 }
832
833 DBG("non-PCI map for %lx, prot: %lx\n", offset, prot);
834
835 return __pgprot(prot);
836}
837
838
839/*
840 * Perform the actual remap of the pages for a PCI device mapping, as
841 * appropriate for this architecture. The region in the process to map
842 * is described by vm_start and vm_end members of VMA, the base physical
843 * address is found in vm_pgoff.
844 * The pci device structure is provided so that architectures may make mapping
845 * decisions on a per-device or per-bus basis.
846 *
847 * Returns a negative error code on failure, zero on success.
848 */
849int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
850 enum pci_mmap_state mmap_state,
851 int write_combine)
852{
853 unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
854 struct resource *rp;
855 int ret;
856
857 rp = __pci_mmap_make_offset(dev, &offset, mmap_state);
858 if (rp == NULL)
859 return -EINVAL;
860
861 vma->vm_pgoff = offset >> PAGE_SHIFT;
862 vma->vm_flags |= VM_SHM | VM_LOCKED | VM_IO;
863 vma->vm_page_prot = __pci_mmap_set_pgprot(dev, rp,
864 vma->vm_page_prot,
865 mmap_state, write_combine);
866
867 ret = remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
868 vma->vm_end - vma->vm_start, vma->vm_page_prot);
869
870 return ret;
871}
872
873#ifdef CONFIG_PPC_MULTIPLATFORM
874static ssize_t pci_show_devspec(struct device *dev, struct device_attribute *attr, char *buf)
875{
876 struct pci_dev *pdev;
877 struct device_node *np;
878
879 pdev = to_pci_dev (dev);
880 np = pci_device_to_OF_node(pdev);
881 if (np == NULL || np->full_name == NULL)
882 return 0;
883 return sprintf(buf, "%s", np->full_name);
884}
885static DEVICE_ATTR(devspec, S_IRUGO, pci_show_devspec, NULL);
886#endif /* CONFIG_PPC_MULTIPLATFORM */
887
888void pcibios_add_platform_entries(struct pci_dev *pdev)
889{
890#ifdef CONFIG_PPC_MULTIPLATFORM
891 device_create_file(&pdev->dev, &dev_attr_devspec);
892#endif /* CONFIG_PPC_MULTIPLATFORM */
893}
894
895#ifdef CONFIG_PPC_MULTIPLATFORM
896
897#define ISA_SPACE_MASK 0x1
898#define ISA_SPACE_IO 0x1
899
900static void __devinit pci_process_ISA_OF_ranges(struct device_node *isa_node,
901 unsigned long phb_io_base_phys,
902 void __iomem * phb_io_base_virt)
903{
904 struct isa_range *range;
905 unsigned long pci_addr;
906 unsigned int isa_addr;
907 unsigned int size;
908 int rlen = 0;
909
910 range = (struct isa_range *) get_property(isa_node, "ranges", &rlen);
911 if (range == NULL || (rlen < sizeof(struct isa_range))) {
912 printk(KERN_ERR "no ISA ranges or unexpected isa range size,"
913 "mapping 64k\n");
914 __ioremap_explicit(phb_io_base_phys,
915 (unsigned long)phb_io_base_virt,
916 0x10000, _PAGE_NO_CACHE | _PAGE_GUARDED);
917 return;
918 }
919
920 /* From "ISA Binding to 1275"
921 * The ranges property is laid out as an array of elements,
922 * each of which comprises:
923 * cells 0 - 1: an ISA address
924 * cells 2 - 4: a PCI address
925 * (size depending on dev->n_addr_cells)
926 * cell 5: the size of the range
927 */
928 if ((range->isa_addr.a_hi && ISA_SPACE_MASK) == ISA_SPACE_IO) {
929 isa_addr = range->isa_addr.a_lo;
930 pci_addr = (unsigned long) range->pci_addr.a_mid << 32 |
931 range->pci_addr.a_lo;
932
933 /* Assume these are both zero */
934 if ((pci_addr != 0) || (isa_addr != 0)) {
935 printk(KERN_ERR "unexpected isa to pci mapping: %s\n",
936 __FUNCTION__);
937 return;
938 }
939
940 size = PAGE_ALIGN(range->size);
941
942 __ioremap_explicit(phb_io_base_phys,
943 (unsigned long) phb_io_base_virt,
944 size, _PAGE_NO_CACHE | _PAGE_GUARDED);
945 }
946}
947
948void __devinit pci_process_bridge_OF_ranges(struct pci_controller *hose,
949 struct device_node *dev, int prim)
950{
951 unsigned int *ranges, pci_space;
952 unsigned long size;
953 int rlen = 0;
954 int memno = 0;
955 struct resource *res;
956 int np, na = prom_n_addr_cells(dev);
957 unsigned long pci_addr, cpu_phys_addr;
958
959 np = na + 5;
960
961 /* From "PCI Binding to 1275"
962 * The ranges property is laid out as an array of elements,
963 * each of which comprises:
964 * cells 0 - 2: a PCI address
965 * cells 3 or 3+4: a CPU physical address
966 * (size depending on dev->n_addr_cells)
967 * cells 4+5 or 5+6: the size of the range
968 */
969 ranges = (unsigned int *) get_property(dev, "ranges", &rlen);
970 if (ranges == NULL)
971 return;
972 hose->io_base_phys = 0;
973 while ((rlen -= np * sizeof(unsigned int)) >= 0) {
974 res = NULL;
975 pci_space = ranges[0];
976 pci_addr = ((unsigned long)ranges[1] << 32) | ranges[2];
977
978 cpu_phys_addr = ranges[3];
979 if (na >= 2)
980 cpu_phys_addr = (cpu_phys_addr << 32) | ranges[4];
981
982 size = ((unsigned long)ranges[na+3] << 32) | ranges[na+4];
983 ranges += np;
984 if (size == 0)
985 continue;
986
987 /* Now consume following elements while they are contiguous */
988 while (rlen >= np * sizeof(unsigned int)) {
989 unsigned long addr, phys;
990
991 if (ranges[0] != pci_space)
992 break;
993 addr = ((unsigned long)ranges[1] << 32) | ranges[2];
994 phys = ranges[3];
995 if (na >= 2)
996 phys = (phys << 32) | ranges[4];
997 if (addr != pci_addr + size ||
998 phys != cpu_phys_addr + size)
999 break;
1000
1001 size += ((unsigned long)ranges[na+3] << 32)
1002 | ranges[na+4];
1003 ranges += np;
1004 rlen -= np * sizeof(unsigned int);
1005 }
1006
1007 switch ((pci_space >> 24) & 0x3) {
1008 case 1: /* I/O space */
1009 hose->io_base_phys = cpu_phys_addr;
1010 hose->pci_io_size = size;
1011
1012 res = &hose->io_resource;
1013 res->flags = IORESOURCE_IO;
1014 res->start = pci_addr;
1015 DBG("phb%d: IO 0x%lx -> 0x%lx\n", hose->global_number,
1016 res->start, res->start + size - 1);
1017 break;
1018 case 2: /* memory space */
1019 memno = 0;
1020 while (memno < 3 && hose->mem_resources[memno].flags)
1021 ++memno;
1022
1023 if (memno == 0)
1024 hose->pci_mem_offset = cpu_phys_addr - pci_addr;
1025 if (memno < 3) {
1026 res = &hose->mem_resources[memno];
1027 res->flags = IORESOURCE_MEM;
1028 res->start = cpu_phys_addr;
1029 DBG("phb%d: MEM 0x%lx -> 0x%lx\n", hose->global_number,
1030 res->start, res->start + size - 1);
1031 }
1032 break;
1033 }
1034 if (res != NULL) {
1035 res->name = dev->full_name;
1036 res->end = res->start + size - 1;
1037 res->parent = NULL;
1038 res->sibling = NULL;
1039 res->child = NULL;
1040 }
1041 }
1042}
1043
1044void __init pci_setup_phb_io(struct pci_controller *hose, int primary)
1045{
1046 unsigned long size = hose->pci_io_size;
1047 unsigned long io_virt_offset;
1048 struct resource *res;
1049 struct device_node *isa_dn;
1050
1051 hose->io_base_virt = reserve_phb_iospace(size);
1052 DBG("phb%d io_base_phys 0x%lx io_base_virt 0x%lx\n",
1053 hose->global_number, hose->io_base_phys,
1054 (unsigned long) hose->io_base_virt);
1055
1056 if (primary) {
1057 pci_io_base = (unsigned long)hose->io_base_virt;
1058 isa_dn = of_find_node_by_type(NULL, "isa");
1059 if (isa_dn) {
1060 isa_io_base = pci_io_base;
1061 pci_process_ISA_OF_ranges(isa_dn, hose->io_base_phys,
1062 hose->io_base_virt);
1063 of_node_put(isa_dn);
1064 /* Allow all IO */
1065 io_page_mask = -1;
1066 }
1067 }
1068
1069 io_virt_offset = (unsigned long)hose->io_base_virt - pci_io_base;
1070 res = &hose->io_resource;
1071 res->start += io_virt_offset;
1072 res->end += io_virt_offset;
1073}
1074
1075void __devinit pci_setup_phb_io_dynamic(struct pci_controller *hose,
1076 int primary)
1077{
1078 unsigned long size = hose->pci_io_size;
1079 unsigned long io_virt_offset;
1080 struct resource *res;
1081
1082 hose->io_base_virt = __ioremap(hose->io_base_phys, size,
1083 _PAGE_NO_CACHE | _PAGE_GUARDED);
1084 DBG("phb%d io_base_phys 0x%lx io_base_virt 0x%lx\n",
1085 hose->global_number, hose->io_base_phys,
1086 (unsigned long) hose->io_base_virt);
1087
1088 if (primary)
1089 pci_io_base = (unsigned long)hose->io_base_virt;
1090
1091 io_virt_offset = (unsigned long)hose->io_base_virt - pci_io_base;
1092 res = &hose->io_resource;
1093 res->start += io_virt_offset;
1094 res->end += io_virt_offset;
1095}
1096
1097
1098static int get_bus_io_range(struct pci_bus *bus, unsigned long *start_phys,
1099 unsigned long *start_virt, unsigned long *size)
1100{
1101 struct pci_controller *hose = pci_bus_to_host(bus);
1102 struct pci_bus_region region;
1103 struct resource *res;
1104
1105 if (bus->self) {
1106 res = bus->resource[0];
1107 pcibios_resource_to_bus(bus->self, &region, res);
1108 *start_phys = hose->io_base_phys + region.start;
1109 *start_virt = (unsigned long) hose->io_base_virt +
1110 region.start;
1111 if (region.end > region.start)
1112 *size = region.end - region.start + 1;
1113 else {
1114 printk("%s(): unexpected region 0x%lx->0x%lx\n",
1115 __FUNCTION__, region.start, region.end);
1116 return 1;
1117 }
1118
1119 } else {
1120 /* Root Bus */
1121 res = &hose->io_resource;
1122 *start_phys = hose->io_base_phys;
1123 *start_virt = (unsigned long) hose->io_base_virt;
1124 if (res->end > res->start)
1125 *size = res->end - res->start + 1;
1126 else {
1127 printk("%s(): unexpected region 0x%lx->0x%lx\n",
1128 __FUNCTION__, res->start, res->end);
1129 return 1;
1130 }
1131 }
1132
1133 return 0;
1134}
1135
1136int unmap_bus_range(struct pci_bus *bus)
1137{
1138 unsigned long start_phys;
1139 unsigned long start_virt;
1140 unsigned long size;
1141
1142 if (!bus) {
1143 printk(KERN_ERR "%s() expected bus\n", __FUNCTION__);
1144 return 1;
1145 }
1146
1147 if (get_bus_io_range(bus, &start_phys, &start_virt, &size))
1148 return 1;
1149 if (iounmap_explicit((void __iomem *) start_virt, size))
1150 return 1;
1151
1152 return 0;
1153}
1154EXPORT_SYMBOL(unmap_bus_range);
1155
1156int remap_bus_range(struct pci_bus *bus)
1157{
1158 unsigned long start_phys;
1159 unsigned long start_virt;
1160 unsigned long size;
1161
1162 if (!bus) {
1163 printk(KERN_ERR "%s() expected bus\n", __FUNCTION__);
1164 return 1;
1165 }
1166
1167
1168 if (get_bus_io_range(bus, &start_phys, &start_virt, &size))
1169 return 1;
1170 if (start_phys == 0)
1171 return 1;
1172 printk("mapping IO %lx -> %lx, size: %lx\n", start_phys, start_virt, size);
1173 if (__ioremap_explicit(start_phys, start_virt, size,
1174 _PAGE_NO_CACHE | _PAGE_GUARDED))
1175 return 1;
1176
1177 return 0;
1178}
1179EXPORT_SYMBOL(remap_bus_range);
1180
1181void phbs_remap_io(void)
1182{
1183 struct pci_controller *hose, *tmp;
1184
1185 list_for_each_entry_safe(hose, tmp, &hose_list, list_node)
1186 remap_bus_range(hose->bus);
1187}
1188
1189/*
1190 * ppc64 can have multifunction devices that do not respond to function 0.
1191 * In this case we must scan all functions.
1192 * XXX this can go now, we use the OF device tree in all the
1193 * cases that caused problems. -- paulus
1194 */
1195int pcibios_scan_all_fns(struct pci_bus *bus, int devfn)
1196{
1197 return 0;
1198}
1199
1200static void __devinit fixup_resource(struct resource *res, struct pci_dev *dev)
1201{
1202 struct pci_controller *hose = pci_bus_to_host(dev->bus);
1203 unsigned long start, end, mask, offset;
1204
1205 if (res->flags & IORESOURCE_IO) {
1206 offset = (unsigned long)hose->io_base_virt - pci_io_base;
1207
1208 start = res->start += offset;
1209 end = res->end += offset;
1210
1211 /* Need to allow IO access to pages that are in the
1212 ISA range */
1213 if (start < MAX_ISA_PORT) {
1214 if (end > MAX_ISA_PORT)
1215 end = MAX_ISA_PORT;
1216
1217 start >>= PAGE_SHIFT;
1218 end >>= PAGE_SHIFT;
1219
1220 /* get the range of pages for the map */
1221 mask = ((1 << (end+1)) - 1) ^ ((1 << start) - 1);
1222 io_page_mask |= mask;
1223 }
1224 } else if (res->flags & IORESOURCE_MEM) {
1225 res->start += hose->pci_mem_offset;
1226 res->end += hose->pci_mem_offset;
1227 }
1228}
1229
1230void __devinit pcibios_fixup_device_resources(struct pci_dev *dev,
1231 struct pci_bus *bus)
1232{
1233 /* Update device resources. */
1234 int i;
1235
1236 for (i = 0; i < PCI_NUM_RESOURCES; i++)
1237 if (dev->resource[i].flags)
1238 fixup_resource(&dev->resource[i], dev);
1239}
1240EXPORT_SYMBOL(pcibios_fixup_device_resources);
1241
1242static void __devinit do_bus_setup(struct pci_bus *bus)
1243{
1244 struct pci_dev *dev;
1245
1246 ppc_md.iommu_bus_setup(bus);
1247
1248 list_for_each_entry(dev, &bus->devices, bus_list)
1249 ppc_md.iommu_dev_setup(dev);
1250
1251 if (ppc_md.irq_bus_setup)
1252 ppc_md.irq_bus_setup(bus);
1253}
1254
1255void __devinit pcibios_fixup_bus(struct pci_bus *bus)
1256{
1257 struct pci_dev *dev = bus->self;
1258
1259 if (dev && pci_probe_only &&
1260 (dev->class >> 8) == PCI_CLASS_BRIDGE_PCI) {
1261 /* This is a subordinate bridge */
1262
1263 pci_read_bridge_bases(bus);
1264 pcibios_fixup_device_resources(dev, bus);
1265 }
1266
1267 do_bus_setup(bus);
1268
1269 if (!pci_probe_only)
1270 return;
1271
1272 list_for_each_entry(dev, &bus->devices, bus_list)
1273 if ((dev->class >> 8) != PCI_CLASS_BRIDGE_PCI)
1274 pcibios_fixup_device_resources(dev, bus);
1275}
1276EXPORT_SYMBOL(pcibios_fixup_bus);
1277
1278/*
1279 * Reads the interrupt pin to determine if interrupt is use by card.
1280 * If the interrupt is used, then gets the interrupt line from the
1281 * openfirmware and sets it in the pci_dev and pci_config line.
1282 */
1283int pci_read_irq_line(struct pci_dev *pci_dev)
1284{
1285 u8 intpin;
1286 struct device_node *node;
1287
1288 pci_read_config_byte(pci_dev, PCI_INTERRUPT_PIN, &intpin);
1289 if (intpin == 0)
1290 return 0;
1291
1292 node = pci_device_to_OF_node(pci_dev);
1293 if (node == NULL)
1294 return -1;
1295
1296 if (node->n_intrs == 0)
1297 return -1;
1298
1299 pci_dev->irq = node->intrs[0].line;
1300
1301 pci_write_config_byte(pci_dev, PCI_INTERRUPT_LINE, pci_dev->irq);
1302
1303 return 0;
1304}
1305EXPORT_SYMBOL(pci_read_irq_line);
1306
1307void pci_resource_to_user(const struct pci_dev *dev, int bar,
1308 const struct resource *rsrc,
1309 u64 *start, u64 *end)
1310{
1311 struct pci_controller *hose = pci_bus_to_host(dev->bus);
1312 unsigned long offset = 0;
1313
1314 if (hose == NULL)
1315 return;
1316
1317 if (rsrc->flags & IORESOURCE_IO)
1318 offset = pci_io_base - (unsigned long)hose->io_base_virt +
1319 hose->io_base_phys;
1320
1321 *start = rsrc->start + offset;
1322 *end = rsrc->end + offset;
1323}
1324
1325#endif /* CONFIG_PPC_MULTIPLATFORM */
1326
1327
1328#define IOBASE_BRIDGE_NUMBER 0
1329#define IOBASE_MEMORY 1
1330#define IOBASE_IO 2
1331#define IOBASE_ISA_IO 3
1332#define IOBASE_ISA_MEM 4
1333
1334long sys_pciconfig_iobase(long which, unsigned long in_bus,
1335 unsigned long in_devfn)
1336{
1337 struct pci_controller* hose;
1338 struct list_head *ln;
1339 struct pci_bus *bus = NULL;
1340 struct device_node *hose_node;
1341
1342 /* Argh ! Please forgive me for that hack, but that's the
1343 * simplest way to get existing XFree to not lockup on some
1344 * G5 machines... So when something asks for bus 0 io base
1345 * (bus 0 is HT root), we return the AGP one instead.
1346 */
1347 if (machine_is_compatible("MacRISC4"))
1348 if (in_bus == 0)
1349 in_bus = 0xf0;
1350
1351 /* That syscall isn't quite compatible with PCI domains, but it's
1352 * used on pre-domains setup. We return the first match
1353 */
1354
1355 for (ln = pci_root_buses.next; ln != &pci_root_buses; ln = ln->next) {
1356 bus = pci_bus_b(ln);
1357 if (in_bus >= bus->number && in_bus < (bus->number + bus->subordinate))
1358 break;
1359 bus = NULL;
1360 }
1361 if (bus == NULL || bus->sysdata == NULL)
1362 return -ENODEV;
1363
1364 hose_node = (struct device_node *)bus->sysdata;
1365 hose = PCI_DN(hose_node)->phb;
1366
1367 switch (which) {
1368 case IOBASE_BRIDGE_NUMBER:
1369 return (long)hose->first_busno;
1370 case IOBASE_MEMORY:
1371 return (long)hose->pci_mem_offset;
1372 case IOBASE_IO:
1373 return (long)hose->io_base_phys;
1374 case IOBASE_ISA_IO:
1375 return (long)isa_io_base;
1376 case IOBASE_ISA_MEM:
1377 return -EINVAL;
1378 }
1379
1380 return -EOPNOTSUPP;
1381}
diff --git a/arch/powerpc/kernel/pci_direct_iommu.c b/arch/powerpc/kernel/pci_direct_iommu.c
new file mode 100644
index 000000000000..e1a32f802c0b
--- /dev/null
+++ b/arch/powerpc/kernel/pci_direct_iommu.c
@@ -0,0 +1,94 @@
1/*
2 * Support for DMA from PCI devices to main memory on
3 * machines without an iommu or with directly addressable
4 * RAM (typically a pmac with 2Gb of RAM or less)
5 *
6 * Copyright (C) 2003 Benjamin Herrenschmidt (benh@kernel.crashing.org)
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14#include <linux/kernel.h>
15#include <linux/pci.h>
16#include <linux/delay.h>
17#include <linux/string.h>
18#include <linux/init.h>
19#include <linux/bootmem.h>
20#include <linux/mm.h>
21#include <linux/dma-mapping.h>
22
23#include <asm/sections.h>
24#include <asm/io.h>
25#include <asm/prom.h>
26#include <asm/pci-bridge.h>
27#include <asm/machdep.h>
28#include <asm/pmac_feature.h>
29#include <asm/abs_addr.h>
30#include <asm/ppc-pci.h>
31
32static void *pci_direct_alloc_coherent(struct device *hwdev, size_t size,
33 dma_addr_t *dma_handle, gfp_t flag)
34{
35 void *ret;
36
37 ret = (void *)__get_free_pages(flag, get_order(size));
38 if (ret != NULL) {
39 memset(ret, 0, size);
40 *dma_handle = virt_to_abs(ret);
41 }
42 return ret;
43}
44
45static void pci_direct_free_coherent(struct device *hwdev, size_t size,
46 void *vaddr, dma_addr_t dma_handle)
47{
48 free_pages((unsigned long)vaddr, get_order(size));
49}
50
51static dma_addr_t pci_direct_map_single(struct device *hwdev, void *ptr,
52 size_t size, enum dma_data_direction direction)
53{
54 return virt_to_abs(ptr);
55}
56
57static void pci_direct_unmap_single(struct device *hwdev, dma_addr_t dma_addr,
58 size_t size, enum dma_data_direction direction)
59{
60}
61
62static int pci_direct_map_sg(struct device *hwdev, struct scatterlist *sg,
63 int nents, enum dma_data_direction direction)
64{
65 int i;
66
67 for (i = 0; i < nents; i++, sg++) {
68 sg->dma_address = page_to_phys(sg->page) + sg->offset;
69 sg->dma_length = sg->length;
70 }
71
72 return nents;
73}
74
75static void pci_direct_unmap_sg(struct device *hwdev, struct scatterlist *sg,
76 int nents, enum dma_data_direction direction)
77{
78}
79
80static int pci_direct_dma_supported(struct device *dev, u64 mask)
81{
82 return mask < 0x100000000ull;
83}
84
85void __init pci_direct_iommu_init(void)
86{
87 pci_dma_ops.alloc_coherent = pci_direct_alloc_coherent;
88 pci_dma_ops.free_coherent = pci_direct_free_coherent;
89 pci_dma_ops.map_single = pci_direct_map_single;
90 pci_dma_ops.unmap_single = pci_direct_unmap_single;
91 pci_dma_ops.map_sg = pci_direct_map_sg;
92 pci_dma_ops.unmap_sg = pci_direct_unmap_sg;
93 pci_dma_ops.dma_supported = pci_direct_dma_supported;
94}
diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c
new file mode 100644
index 000000000000..12c4c9e9bbc7
--- /dev/null
+++ b/arch/powerpc/kernel/pci_dn.c
@@ -0,0 +1,230 @@
1/*
2 * pci_dn.c
3 *
4 * Copyright (C) 2001 Todd Inglett, IBM Corporation
5 *
6 * PCI manipulation via device_nodes.
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 */
22#include <linux/kernel.h>
23#include <linux/pci.h>
24#include <linux/string.h>
25#include <linux/init.h>
26#include <linux/slab.h>
27#include <linux/bootmem.h>
28
29#include <asm/io.h>
30#include <asm/prom.h>
31#include <asm/pci-bridge.h>
32#include <asm/pSeries_reconfig.h>
33#include <asm/ppc-pci.h>
34
35/*
36 * Traverse_func that inits the PCI fields of the device node.
37 * NOTE: this *must* be done before read/write config to the device.
38 */
39static void * __devinit update_dn_pci_info(struct device_node *dn, void *data)
40{
41 struct pci_controller *phb = data;
42 int *type = (int *)get_property(dn, "ibm,pci-config-space-type", NULL);
43 u32 *regs;
44 struct pci_dn *pdn;
45
46 if (mem_init_done)
47 pdn = kmalloc(sizeof(*pdn), GFP_KERNEL);
48 else
49 pdn = alloc_bootmem(sizeof(*pdn));
50 if (pdn == NULL)
51 return NULL;
52 memset(pdn, 0, sizeof(*pdn));
53 dn->data = pdn;
54 pdn->node = dn;
55 pdn->phb = phb;
56 regs = (u32 *)get_property(dn, "reg", NULL);
57 if (regs) {
58 /* First register entry is addr (00BBSS00) */
59 pdn->busno = (regs[0] >> 16) & 0xff;
60 pdn->devfn = (regs[0] >> 8) & 0xff;
61 }
62
63 pdn->pci_ext_config_space = (type && *type == 1);
64 return NULL;
65}
66
67/*
68 * Traverse a device tree stopping each PCI device in the tree.
69 * This is done depth first. As each node is processed, a "pre"
70 * function is called and the children are processed recursively.
71 *
72 * The "pre" func returns a value. If non-zero is returned from
73 * the "pre" func, the traversal stops and this value is returned.
74 * This return value is useful when using traverse as a method of
75 * finding a device.
76 *
77 * NOTE: we do not run the func for devices that do not appear to
78 * be PCI except for the start node which we assume (this is good
79 * because the start node is often a phb which may be missing PCI
80 * properties).
81 * We use the class-code as an indicator. If we run into
82 * one of these nodes we also assume its siblings are non-pci for
83 * performance.
84 */
85void *traverse_pci_devices(struct device_node *start, traverse_func pre,
86 void *data)
87{
88 struct device_node *dn, *nextdn;
89 void *ret;
90
91 /* We started with a phb, iterate all childs */
92 for (dn = start->child; dn; dn = nextdn) {
93 u32 *classp, class;
94
95 nextdn = NULL;
96 classp = (u32 *)get_property(dn, "class-code", NULL);
97 class = classp ? *classp : 0;
98
99 if (pre && ((ret = pre(dn, data)) != NULL))
100 return ret;
101
102 /* If we are a PCI bridge, go down */
103 if (dn->child && ((class >> 8) == PCI_CLASS_BRIDGE_PCI ||
104 (class >> 8) == PCI_CLASS_BRIDGE_CARDBUS))
105 /* Depth first...do children */
106 nextdn = dn->child;
107 else if (dn->sibling)
108 /* ok, try next sibling instead. */
109 nextdn = dn->sibling;
110 if (!nextdn) {
111 /* Walk up to next valid sibling. */
112 do {
113 dn = dn->parent;
114 if (dn == start)
115 return NULL;
116 } while (dn->sibling == NULL);
117 nextdn = dn->sibling;
118 }
119 }
120 return NULL;
121}
122
123/**
124 * pci_devs_phb_init_dynamic - setup pci devices under this PHB
125 * phb: pci-to-host bridge (top-level bridge connecting to cpu)
126 *
127 * This routine is called both during boot, (before the memory
128 * subsystem is set up, before kmalloc is valid) and during the
129 * dynamic lpar operation of adding a PHB to a running system.
130 */
131void __devinit pci_devs_phb_init_dynamic(struct pci_controller *phb)
132{
133 struct device_node * dn = (struct device_node *) phb->arch_data;
134 struct pci_dn *pdn;
135
136 /* PHB nodes themselves must not match */
137 update_dn_pci_info(dn, phb);
138 pdn = dn->data;
139 if (pdn) {
140 pdn->devfn = pdn->busno = -1;
141 pdn->phb = phb;
142 }
143
144 /* Update dn->phb ptrs for new phb and children devices */
145 traverse_pci_devices(dn, update_dn_pci_info, phb);
146}
147
148/*
149 * Traversal func that looks for a <busno,devfcn> value.
150 * If found, the pci_dn is returned (thus terminating the traversal).
151 */
152static void *is_devfn_node(struct device_node *dn, void *data)
153{
154 int busno = ((unsigned long)data >> 8) & 0xff;
155 int devfn = ((unsigned long)data) & 0xff;
156 struct pci_dn *pci = dn->data;
157
158 if (pci && (devfn == pci->devfn) && (busno == pci->busno))
159 return dn;
160 return NULL;
161}
162
163/*
164 * This is the "slow" path for looking up a device_node from a
165 * pci_dev. It will hunt for the device under its parent's
166 * phb and then update sysdata for a future fastpath.
167 *
168 * It may also do fixups on the actual device since this happens
169 * on the first read/write.
170 *
171 * Note that it also must deal with devices that don't exist.
172 * In this case it may probe for real hardware ("just in case")
173 * and add a device_node to the device tree if necessary.
174 *
175 */
176struct device_node *fetch_dev_dn(struct pci_dev *dev)
177{
178 struct device_node *orig_dn = dev->sysdata;
179 struct device_node *dn;
180 unsigned long searchval = (dev->bus->number << 8) | dev->devfn;
181
182 dn = traverse_pci_devices(orig_dn, is_devfn_node, (void *)searchval);
183 if (dn)
184 dev->sysdata = dn;
185 return dn;
186}
187EXPORT_SYMBOL(fetch_dev_dn);
188
189static int pci_dn_reconfig_notifier(struct notifier_block *nb, unsigned long action, void *node)
190{
191 struct device_node *np = node;
192 struct pci_dn *pci = NULL;
193 int err = NOTIFY_OK;
194
195 switch (action) {
196 case PSERIES_RECONFIG_ADD:
197 pci = np->parent->data;
198 if (pci)
199 update_dn_pci_info(np, pci->phb);
200 break;
201 default:
202 err = NOTIFY_DONE;
203 break;
204 }
205 return err;
206}
207
208static struct notifier_block pci_dn_reconfig_nb = {
209 .notifier_call = pci_dn_reconfig_notifier,
210};
211
212/**
213 * pci_devs_phb_init - Initialize phbs and pci devs under them.
214 *
215 * This routine walks over all phb's (pci-host bridges) on the
216 * system, and sets up assorted pci-related structures
217 * (including pci info in the device node structs) for each
218 * pci device found underneath. This routine runs once,
219 * early in the boot sequence.
220 */
221void __init pci_devs_phb_init(void)
222{
223 struct pci_controller *phb, *tmp;
224
225 /* This must be done first so the device nodes have valid pci info! */
226 list_for_each_entry_safe(phb, tmp, &hose_list, list_node)
227 pci_devs_phb_init_dynamic(phb);
228
229 pSeries_reconfig_notifier_register(&pci_dn_reconfig_nb);
230}
diff --git a/arch/powerpc/kernel/pci_iommu.c b/arch/powerpc/kernel/pci_iommu.c
new file mode 100644
index 000000000000..bdf15dbbf4f0
--- /dev/null
+++ b/arch/powerpc/kernel/pci_iommu.c
@@ -0,0 +1,128 @@
1/*
2 * arch/ppc64/kernel/pci_iommu.c
3 * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation
4 *
5 * Rewrite, cleanup, new allocation schemes:
6 * Copyright (C) 2004 Olof Johansson, IBM Corporation
7 *
8 * Dynamic DMA mapping support, platform-independent parts.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 */
24
25
26#include <linux/init.h>
27#include <linux/types.h>
28#include <linux/slab.h>
29#include <linux/mm.h>
30#include <linux/spinlock.h>
31#include <linux/string.h>
32#include <linux/pci.h>
33#include <linux/dma-mapping.h>
34#include <asm/io.h>
35#include <asm/prom.h>
36#include <asm/iommu.h>
37#include <asm/pci-bridge.h>
38#include <asm/machdep.h>
39#include <asm/ppc-pci.h>
40
41/*
42 * We can use ->sysdata directly and avoid the extra work in
43 * pci_device_to_OF_node since ->sysdata will have been initialised
44 * in the iommu init code for all devices.
45 */
46#define PCI_GET_DN(dev) ((struct device_node *)((dev)->sysdata))
47
48static inline struct iommu_table *devnode_table(struct device *dev)
49{
50 struct pci_dev *pdev;
51
52 if (!dev) {
53 pdev = ppc64_isabridge_dev;
54 if (!pdev)
55 return NULL;
56 } else
57 pdev = to_pci_dev(dev);
58
59 return PCI_DN(PCI_GET_DN(pdev))->iommu_table;
60}
61
62
63/* Allocates a contiguous real buffer and creates mappings over it.
64 * Returns the virtual address of the buffer and sets dma_handle
65 * to the dma address (mapping) of the first page.
66 */
67static void *pci_iommu_alloc_coherent(struct device *hwdev, size_t size,
68 dma_addr_t *dma_handle, gfp_t flag)
69{
70 return iommu_alloc_coherent(devnode_table(hwdev), size, dma_handle,
71 flag);
72}
73
74static void pci_iommu_free_coherent(struct device *hwdev, size_t size,
75 void *vaddr, dma_addr_t dma_handle)
76{
77 iommu_free_coherent(devnode_table(hwdev), size, vaddr, dma_handle);
78}
79
80/* Creates TCEs for a user provided buffer. The user buffer must be
81 * contiguous real kernel storage (not vmalloc). The address of the buffer
82 * passed here is the kernel (virtual) address of the buffer. The buffer
83 * need not be page aligned, the dma_addr_t returned will point to the same
84 * byte within the page as vaddr.
85 */
86static dma_addr_t pci_iommu_map_single(struct device *hwdev, void *vaddr,
87 size_t size, enum dma_data_direction direction)
88{
89 return iommu_map_single(devnode_table(hwdev), vaddr, size, direction);
90}
91
92
93static void pci_iommu_unmap_single(struct device *hwdev, dma_addr_t dma_handle,
94 size_t size, enum dma_data_direction direction)
95{
96 iommu_unmap_single(devnode_table(hwdev), dma_handle, size, direction);
97}
98
99
100static int pci_iommu_map_sg(struct device *pdev, struct scatterlist *sglist,
101 int nelems, enum dma_data_direction direction)
102{
103 return iommu_map_sg(pdev, devnode_table(pdev), sglist,
104 nelems, direction);
105}
106
107static void pci_iommu_unmap_sg(struct device *pdev, struct scatterlist *sglist,
108 int nelems, enum dma_data_direction direction)
109{
110 iommu_unmap_sg(devnode_table(pdev), sglist, nelems, direction);
111}
112
113/* We support DMA to/from any memory page via the iommu */
114static int pci_iommu_dma_supported(struct device *dev, u64 mask)
115{
116 return 1;
117}
118
119void pci_iommu_init(void)
120{
121 pci_dma_ops.alloc_coherent = pci_iommu_alloc_coherent;
122 pci_dma_ops.free_coherent = pci_iommu_free_coherent;
123 pci_dma_ops.map_single = pci_iommu_map_single;
124 pci_dma_ops.unmap_single = pci_iommu_unmap_single;
125 pci_dma_ops.map_sg = pci_iommu_map_sg;
126 pci_dma_ops.unmap_sg = pci_iommu_unmap_sg;
127 pci_dma_ops.dma_supported = pci_iommu_dma_supported;
128}
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index 5dcf4ba05ee8..59846b40d521 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -105,6 +105,13 @@ EXPORT_SYMBOL(__clear_user);
105EXPORT_SYMBOL(__strncpy_from_user); 105EXPORT_SYMBOL(__strncpy_from_user);
106EXPORT_SYMBOL(__strnlen_user); 106EXPORT_SYMBOL(__strnlen_user);
107 107
108#ifndef __powerpc64__
109EXPORT_SYMBOL(__ide_mm_insl);
110EXPORT_SYMBOL(__ide_mm_outsw);
111EXPORT_SYMBOL(__ide_mm_insw);
112EXPORT_SYMBOL(__ide_mm_outsl);
113#endif
114
108EXPORT_SYMBOL(_insb); 115EXPORT_SYMBOL(_insb);
109EXPORT_SYMBOL(_outsb); 116EXPORT_SYMBOL(_outsb);
110EXPORT_SYMBOL(_insw); 117EXPORT_SYMBOL(_insw);
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 6a5b468edb4d..3bf968e74095 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -1368,6 +1368,7 @@ prom_n_addr_cells(struct device_node* np)
1368 /* No #address-cells property for the root node, default to 1 */ 1368 /* No #address-cells property for the root node, default to 1 */
1369 return 1; 1369 return 1;
1370} 1370}
1371EXPORT_SYMBOL(prom_n_addr_cells);
1371 1372
1372int 1373int
1373prom_n_size_cells(struct device_node* np) 1374prom_n_size_cells(struct device_node* np)
@@ -1383,6 +1384,7 @@ prom_n_size_cells(struct device_node* np)
1383 /* No #size-cells property for the root node, default to 1 */ 1384 /* No #size-cells property for the root node, default to 1 */
1384 return 1; 1385 return 1;
1385} 1386}
1387EXPORT_SYMBOL(prom_n_size_cells);
1386 1388
1387/** 1389/**
1388 * Work out the sense (active-low level / active-high edge) 1390 * Work out the sense (active-low level / active-high edge)
diff --git a/arch/powerpc/kernel/rtas-rtc.c b/arch/powerpc/kernel/rtas-rtc.c
new file mode 100644
index 000000000000..635d3b9a8811
--- /dev/null
+++ b/arch/powerpc/kernel/rtas-rtc.c
@@ -0,0 +1,105 @@
1#include <linux/kernel.h>
2#include <linux/time.h>
3#include <linux/timer.h>
4#include <linux/init.h>
5#include <linux/rtc.h>
6#include <linux/delay.h>
7#include <asm/prom.h>
8#include <asm/rtas.h>
9#include <asm/time.h>
10
11
12#define MAX_RTC_WAIT 5000 /* 5 sec */
13#define RTAS_CLOCK_BUSY (-2)
14unsigned long __init rtas_get_boot_time(void)
15{
16 int ret[8];
17 int error, wait_time;
18 u64 max_wait_tb;
19
20 max_wait_tb = get_tb() + tb_ticks_per_usec * 1000 * MAX_RTC_WAIT;
21 do {
22 error = rtas_call(rtas_token("get-time-of-day"), 0, 8, ret);
23 if (error == RTAS_CLOCK_BUSY || rtas_is_extended_busy(error)) {
24 wait_time = rtas_extended_busy_delay_time(error);
25 /* This is boot time so we spin. */
26 udelay(wait_time*1000);
27 error = RTAS_CLOCK_BUSY;
28 }
29 } while (error == RTAS_CLOCK_BUSY && (get_tb() < max_wait_tb));
30
31 if (error != 0 && printk_ratelimit()) {
32 printk(KERN_WARNING "error: reading the clock failed (%d)\n",
33 error);
34 return 0;
35 }
36
37 return mktime(ret[0], ret[1], ret[2], ret[3], ret[4], ret[5]);
38}
39
40/* NOTE: get_rtc_time will get an error if executed in interrupt context
41 * and if a delay is needed to read the clock. In this case we just
42 * silently return without updating rtc_tm.
43 */
44void rtas_get_rtc_time(struct rtc_time *rtc_tm)
45{
46 int ret[8];
47 int error, wait_time;
48 u64 max_wait_tb;
49
50 max_wait_tb = get_tb() + tb_ticks_per_usec * 1000 * MAX_RTC_WAIT;
51 do {
52 error = rtas_call(rtas_token("get-time-of-day"), 0, 8, ret);
53 if (error == RTAS_CLOCK_BUSY || rtas_is_extended_busy(error)) {
54 if (in_interrupt() && printk_ratelimit()) {
55 memset(&rtc_tm, 0, sizeof(struct rtc_time));
56 printk(KERN_WARNING "error: reading clock"
57 " would delay interrupt\n");
58 return; /* delay not allowed */
59 }
60 wait_time = rtas_extended_busy_delay_time(error);
61 msleep(wait_time);
62 error = RTAS_CLOCK_BUSY;
63 }
64 } while (error == RTAS_CLOCK_BUSY && (get_tb() < max_wait_tb));
65
66 if (error != 0 && printk_ratelimit()) {
67 printk(KERN_WARNING "error: reading the clock failed (%d)\n",
68 error);
69 return;
70 }
71
72 rtc_tm->tm_sec = ret[5];
73 rtc_tm->tm_min = ret[4];
74 rtc_tm->tm_hour = ret[3];
75 rtc_tm->tm_mday = ret[2];
76 rtc_tm->tm_mon = ret[1] - 1;
77 rtc_tm->tm_year = ret[0] - 1900;
78}
79
80int rtas_set_rtc_time(struct rtc_time *tm)
81{
82 int error, wait_time;
83 u64 max_wait_tb;
84
85 max_wait_tb = get_tb() + tb_ticks_per_usec * 1000 * MAX_RTC_WAIT;
86 do {
87 error = rtas_call(rtas_token("set-time-of-day"), 7, 1, NULL,
88 tm->tm_year + 1900, tm->tm_mon + 1,
89 tm->tm_mday, tm->tm_hour, tm->tm_min,
90 tm->tm_sec, 0);
91 if (error == RTAS_CLOCK_BUSY || rtas_is_extended_busy(error)) {
92 if (in_interrupt())
93 return 1; /* probably decrementer */
94 wait_time = rtas_extended_busy_delay_time(error);
95 msleep(wait_time);
96 error = RTAS_CLOCK_BUSY;
97 }
98 } while (error == RTAS_CLOCK_BUSY && (get_tb() < max_wait_tb));
99
100 if (error != 0 && printk_ratelimit())
101 printk(KERN_WARNING "error: setting the clock failed (%d)\n",
102 error);
103
104 return 0;
105}
diff --git a/arch/powerpc/kernel/rtas_pci.c b/arch/powerpc/kernel/rtas_pci.c
index 0e5a8e116653..60dec2401c26 100644
--- a/arch/powerpc/kernel/rtas_pci.c
+++ b/arch/powerpc/kernel/rtas_pci.c
@@ -304,75 +304,18 @@ static int __devinit setup_phb(struct device_node *dev,
304 struct pci_controller *phb, 304 struct pci_controller *phb,
305 unsigned int addr_size_words) 305 unsigned int addr_size_words)
306{ 306{
307 pci_setup_pci_controller(phb);
308
309 if (is_python(dev)) 307 if (is_python(dev))
310 python_countermeasures(dev, addr_size_words); 308 python_countermeasures(dev, addr_size_words);
311 309
312 if (phb_set_bus_ranges(dev, phb)) 310 if (phb_set_bus_ranges(dev, phb))
313 return 1; 311 return 1;
314 312
315 phb->arch_data = dev;
316 phb->ops = &rtas_pci_ops; 313 phb->ops = &rtas_pci_ops;
317 phb->buid = get_phb_buid(dev); 314 phb->buid = get_phb_buid(dev);
318 315
319 return 0; 316 return 0;
320} 317}
321 318
322static void __devinit add_linux_pci_domain(struct device_node *dev,
323 struct pci_controller *phb,
324 struct property *of_prop)
325{
326 memset(of_prop, 0, sizeof(struct property));
327 of_prop->name = "linux,pci-domain";
328 of_prop->length = sizeof(phb->global_number);
329 of_prop->value = (unsigned char *)&of_prop[1];
330 memcpy(of_prop->value, &phb->global_number, sizeof(phb->global_number));
331 prom_add_property(dev, of_prop);
332}
333
334static struct pci_controller * __init alloc_phb(struct device_node *dev,
335 unsigned int addr_size_words)
336{
337 struct pci_controller *phb;
338 struct property *of_prop;
339
340 phb = alloc_bootmem(sizeof(struct pci_controller));
341 if (phb == NULL)
342 return NULL;
343
344 of_prop = alloc_bootmem(sizeof(struct property) +
345 sizeof(phb->global_number));
346 if (!of_prop)
347 return NULL;
348
349 if (setup_phb(dev, phb, addr_size_words))
350 return NULL;
351
352 add_linux_pci_domain(dev, phb, of_prop);
353
354 return phb;
355}
356
357static struct pci_controller * __devinit alloc_phb_dynamic(struct device_node *dev, unsigned int addr_size_words)
358{
359 struct pci_controller *phb;
360
361 phb = (struct pci_controller *)kmalloc(sizeof(struct pci_controller),
362 GFP_KERNEL);
363 if (phb == NULL)
364 return NULL;
365
366 if (setup_phb(dev, phb, addr_size_words))
367 return NULL;
368
369 phb->is_dynamic = 1;
370
371 /* TODO: linux,pci-domain? */
372
373 return phb;
374}
375
376unsigned long __init find_and_init_phbs(void) 319unsigned long __init find_and_init_phbs(void)
377{ 320{
378 struct device_node *node; 321 struct device_node *node;
@@ -397,10 +340,10 @@ unsigned long __init find_and_init_phbs(void)
397 if (node->type == NULL || strcmp(node->type, "pci") != 0) 340 if (node->type == NULL || strcmp(node->type, "pci") != 0)
398 continue; 341 continue;
399 342
400 phb = alloc_phb(node, root_size_cells); 343 phb = pcibios_alloc_controller(node);
401 if (!phb) 344 if (!phb)
402 continue; 345 continue;
403 346 setup_phb(node, phb, root_size_cells);
404 pci_process_bridge_OF_ranges(phb, node, 0); 347 pci_process_bridge_OF_ranges(phb, node, 0);
405 pci_setup_phb_io(phb, index == 0); 348 pci_setup_phb_io(phb, index == 0);
406#ifdef CONFIG_PPC_PSERIES 349#ifdef CONFIG_PPC_PSERIES
@@ -446,10 +389,10 @@ struct pci_controller * __devinit init_phb_dynamic(struct device_node *dn)
446 root_size_cells = prom_n_size_cells(root); 389 root_size_cells = prom_n_size_cells(root);
447 390
448 primary = list_empty(&hose_list); 391 primary = list_empty(&hose_list);
449 phb = alloc_phb_dynamic(dn, root_size_cells); 392 phb = pcibios_alloc_controller(dn);
450 if (!phb) 393 if (!phb)
451 return NULL; 394 return NULL;
452 395 setup_phb(dn, phb, root_size_cells);
453 pci_process_bridge_OF_ranges(phb, dn, primary); 396 pci_process_bridge_OF_ranges(phb, dn, primary);
454 397
455 pci_setup_phb_io_dynamic(phb, primary); 398 pci_setup_phb_io_dynamic(phb, primary);
@@ -505,8 +448,7 @@ int pcibios_remove_root_bus(struct pci_controller *phb)
505 } 448 }
506 449
507 list_del(&phb->list_node); 450 list_del(&phb->list_node);
508 if (phb->is_dynamic) 451 pcibios_free_controller(phb);
509 kfree(phb);
510 452
511 return 0; 453 return 0;
512} 454}
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 33e7f2c7f194..bd3eb4292b53 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -59,6 +59,7 @@
59#undef DEBUG 59#undef DEBUG
60 60
61#ifdef DEBUG 61#ifdef DEBUG
62#include <asm/udbg.h>
62#define DBG(fmt...) udbg_printf(fmt) 63#define DBG(fmt...) udbg_printf(fmt)
63#else 64#else
64#define DBG(fmt...) 65#define DBG(fmt...)
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index c98cfcc9cd9a..e5694335bf10 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -57,10 +57,6 @@ extern void power4_idle(void);
57boot_infos_t *boot_infos; 57boot_infos_t *boot_infos;
58struct ide_machdep_calls ppc_ide_md; 58struct ide_machdep_calls ppc_ide_md;
59 59
60/* XXX should go elsewhere */
61int __irq_offset_value;
62EXPORT_SYMBOL(__irq_offset_value);
63
64int boot_cpuid; 60int boot_cpuid;
65EXPORT_SYMBOL_GPL(boot_cpuid); 61EXPORT_SYMBOL_GPL(boot_cpuid);
66int boot_cpuid_phys; 62int boot_cpuid_phys;
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index fdbd9f9122f2..608fee7c7e20 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -59,6 +59,7 @@
59#include <asm/firmware.h> 59#include <asm/firmware.h>
60#include <asm/xmon.h> 60#include <asm/xmon.h>
61#include <asm/udbg.h> 61#include <asm/udbg.h>
62#include <asm/kexec.h>
62 63
63#include "setup.h" 64#include "setup.h"
64 65
@@ -415,6 +416,10 @@ void __init setup_system(void)
415 */ 416 */
416 unflatten_device_tree(); 417 unflatten_device_tree();
417 418
419#ifdef CONFIG_KEXEC
420 kexec_setup(); /* requires unflattened device tree. */
421#endif
422
418 /* 423 /*
419 * Fill the ppc64_caches & systemcfg structures with informations 424 * Fill the ppc64_caches & systemcfg structures with informations
420 * retreived from the device-tree. Need to be called before 425 * retreived from the device-tree. Need to be called before
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index 8bdf95b7e420..5a2eba60dd39 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -403,8 +403,6 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame,
403 ELF_NFPREG * sizeof(double))) 403 ELF_NFPREG * sizeof(double)))
404 return 1; 404 return 1;
405 405
406 current->thread.fpscr.val = 0; /* turn off all fp exceptions */
407
408#ifdef CONFIG_ALTIVEC 406#ifdef CONFIG_ALTIVEC
409 /* save altivec registers */ 407 /* save altivec registers */
410 if (current->thread.used_vr) { 408 if (current->thread.used_vr) {
@@ -818,6 +816,9 @@ static int handle_rt_signal(unsigned long sig, struct k_sigaction *ka,
818 goto badframe; 816 goto badframe;
819 regs->link = (unsigned long) frame->tramp; 817 regs->link = (unsigned long) frame->tramp;
820 } 818 }
819
820 current->thread.fpscr.val = 0; /* turn off all fp exceptions */
821
821 if (put_user(regs->gpr[1], (u32 __user *)newsp)) 822 if (put_user(regs->gpr[1], (u32 __user *)newsp))
822 goto badframe; 823 goto badframe;
823 regs->gpr[1] = newsp; 824 regs->gpr[1] = newsp;
@@ -1097,6 +1098,8 @@ static int handle_signal(unsigned long sig, struct k_sigaction *ka,
1097 regs->link = (unsigned long) frame->mctx.tramp; 1098 regs->link = (unsigned long) frame->mctx.tramp;
1098 } 1099 }
1099 1100
1101 current->thread.fpscr.val = 0; /* turn off all fp exceptions */
1102
1100 if (put_user(regs->gpr[1], (u32 __user *)newsp)) 1103 if (put_user(regs->gpr[1], (u32 __user *)newsp))
1101 goto badframe; 1104 goto badframe;
1102 regs->gpr[1] = newsp; 1105 regs->gpr[1] = newsp;
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 58194e150711..1decf2785530 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -131,9 +131,6 @@ static long setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs,
131 131
132 flush_fp_to_thread(current); 132 flush_fp_to_thread(current);
133 133
134 /* Make sure signal doesn't get spurrious FP exceptions */
135 current->thread.fpscr.val = 0;
136
137#ifdef CONFIG_ALTIVEC 134#ifdef CONFIG_ALTIVEC
138 err |= __put_user(v_regs, &sc->v_regs); 135 err |= __put_user(v_regs, &sc->v_regs);
139 136
@@ -423,6 +420,9 @@ static int setup_rt_frame(int signr, struct k_sigaction *ka, siginfo_t *info,
423 if (err) 420 if (err)
424 goto badframe; 421 goto badframe;
425 422
423 /* Make sure signal handler doesn't get spurious FP exceptions */
424 current->thread.fpscr.val = 0;
425
426 /* Set up to return from userspace. */ 426 /* Set up to return from userspace. */
427 if (vdso64_rt_sigtramp && current->thread.vdso_base) { 427 if (vdso64_rt_sigtramp && current->thread.vdso_base) {
428 regs->link = current->thread.vdso_base + vdso64_rt_sigtramp; 428 regs->link = current->thread.vdso_base + vdso64_rt_sigtramp;
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 62dfc5b8d765..30374d2f88e5 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -49,15 +49,16 @@
49#include <asm/paca.h> 49#include <asm/paca.h>
50#endif 50#endif
51 51
52int smp_hw_index[NR_CPUS];
53struct thread_info *secondary_ti;
54
55#ifdef DEBUG 52#ifdef DEBUG
53#include <asm/udbg.h>
56#define DBG(fmt...) udbg_printf(fmt) 54#define DBG(fmt...) udbg_printf(fmt)
57#else 55#else
58#define DBG(fmt...) 56#define DBG(fmt...)
59#endif 57#endif
60 58
59int smp_hw_index[NR_CPUS];
60struct thread_info *secondary_ti;
61
61cpumask_t cpu_possible_map = CPU_MASK_NONE; 62cpumask_t cpu_possible_map = CPU_MASK_NONE;
62cpumask_t cpu_online_map = CPU_MASK_NONE; 63cpumask_t cpu_online_map = CPU_MASK_NONE;
63cpumask_t cpu_sibling_map[NR_CPUS] = { [0 ... NR_CPUS-1] = CPU_MASK_NONE }; 64cpumask_t cpu_sibling_map[NR_CPUS] = { [0 ... NR_CPUS-1] = CPU_MASK_NONE };
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 070b4b458aaf..de8479769bb7 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -130,6 +130,34 @@ unsigned long tb_last_stamp;
130 */ 130 */
131DEFINE_PER_CPU(unsigned long, last_jiffy); 131DEFINE_PER_CPU(unsigned long, last_jiffy);
132 132
133void __delay(unsigned long loops)
134{
135 unsigned long start;
136 int diff;
137
138 if (__USE_RTC()) {
139 start = get_rtcl();
140 do {
141 /* the RTCL register wraps at 1000000000 */
142 diff = get_rtcl() - start;
143 if (diff < 0)
144 diff += 1000000000;
145 } while (diff < loops);
146 } else {
147 start = get_tbl();
148 while (get_tbl() - start < loops)
149 HMT_low();
150 HMT_medium();
151 }
152}
153EXPORT_SYMBOL(__delay);
154
155void udelay(unsigned long usecs)
156{
157 __delay(tb_ticks_per_usec * usecs);
158}
159EXPORT_SYMBOL(udelay);
160
133static __inline__ void timer_check_rtc(void) 161static __inline__ void timer_check_rtc(void)
134{ 162{
135 /* 163 /*
diff --git a/arch/powerpc/kernel/vdso32/cacheflush.S b/arch/powerpc/kernel/vdso32/cacheflush.S
index c8db993574ee..09629aea3e47 100644
--- a/arch/powerpc/kernel/vdso32/cacheflush.S
+++ b/arch/powerpc/kernel/vdso32/cacheflush.S
@@ -35,6 +35,7 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache)
35 subf r8,r6,r4 /* compute length */ 35 subf r8,r6,r4 /* compute length */
36 add r8,r8,r5 /* ensure we get enough */ 36 add r8,r8,r5 /* ensure we get enough */
37 srwi. r8,r8,7 /* compute line count */ 37 srwi. r8,r8,7 /* compute line count */
38 crclr cr0*4+so
38 beqlr /* nothing to do? */ 39 beqlr /* nothing to do? */
39 mtctr r8 40 mtctr r8
40 mr r3,r6 41 mr r3,r6
@@ -58,6 +59,7 @@ V_FUNCTION_END(__kernel_sync_dicache)
58 */ 59 */
59V_FUNCTION_BEGIN(__kernel_sync_dicache_p5) 60V_FUNCTION_BEGIN(__kernel_sync_dicache_p5)
60 .cfi_startproc 61 .cfi_startproc
62 crclr cr0*4+so
61 sync 63 sync
62 isync 64 isync
63 li r3,0 65 li r3,0
diff --git a/arch/powerpc/kernel/vdso32/datapage.S b/arch/powerpc/kernel/vdso32/datapage.S
index a08c26e87835..4709f1d9542c 100644
--- a/arch/powerpc/kernel/vdso32/datapage.S
+++ b/arch/powerpc/kernel/vdso32/datapage.S
@@ -54,7 +54,6 @@ V_FUNCTION_BEGIN(__kernel_get_syscall_map)
54 .cfi_startproc 54 .cfi_startproc
55 mflr r12 55 mflr r12
56 .cfi_register lr,r12 56 .cfi_register lr,r12
57
58 mr r4,r3 57 mr r4,r3
59 bl __get_datapage@local 58 bl __get_datapage@local
60 mtlr r12 59 mtlr r12
@@ -63,6 +62,7 @@ V_FUNCTION_BEGIN(__kernel_get_syscall_map)
63 beqlr 62 beqlr
64 li r0,__NR_syscalls 63 li r0,__NR_syscalls
65 stw r0,0(r4) 64 stw r0,0(r4)
65 crclr cr0*4+so
66 blr 66 blr
67 .cfi_endproc 67 .cfi_endproc
68V_FUNCTION_END(__kernel_get_syscall_map) 68V_FUNCTION_END(__kernel_get_syscall_map)
@@ -77,8 +77,10 @@ V_FUNCTION_BEGIN(__kernel_get_tbfreq)
77 mflr r12 77 mflr r12
78 .cfi_register lr,r12 78 .cfi_register lr,r12
79 bl __get_datapage@local 79 bl __get_datapage@local
80 lwz r3,CFG_TB_TICKS_PER_SEC(r3)
81 lwz r4,(CFG_TB_TICKS_PER_SEC + 4)(r3) 80 lwz r4,(CFG_TB_TICKS_PER_SEC + 4)(r3)
81 lwz r3,CFG_TB_TICKS_PER_SEC(r3)
82 mtlr r12 82 mtlr r12
83 crclr cr0*4+so
84 blr
83 .cfi_endproc 85 .cfi_endproc
84V_FUNCTION_END(__kernel_get_tbfreq) 86V_FUNCTION_END(__kernel_get_tbfreq)
diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S
index aeb5fc9b87b3..7eebff03a041 100644
--- a/arch/powerpc/kernel/vdso32/gettimeofday.S
+++ b/arch/powerpc/kernel/vdso32/gettimeofday.S
@@ -59,6 +59,7 @@ V_FUNCTION_BEGIN(__kernel_gettimeofday)
59 stw r5,TZONE_TZ_DSTTIME(r11) 59 stw r5,TZONE_TZ_DSTTIME(r11)
60 60
611: mtlr r12 611: mtlr r12
62 crclr cr0*4+so
62 li r3,0 63 li r3,0
63 blr 64 blr
64 65
@@ -83,7 +84,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
83 /* Check for supported clock IDs */ 84 /* Check for supported clock IDs */
84 cmpli cr0,r3,CLOCK_REALTIME 85 cmpli cr0,r3,CLOCK_REALTIME
85 cmpli cr1,r3,CLOCK_MONOTONIC 86 cmpli cr1,r3,CLOCK_MONOTONIC
86 cror cr0,cr0,cr1 87 cror cr0*4+eq,cr0*4+eq,cr1*4+eq
87 bne cr0,99f 88 bne cr0,99f
88 89
89 mflr r12 /* r12 saves lr */ 90 mflr r12 /* r12 saves lr */
@@ -91,7 +92,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
91 mr r10,r3 /* r10 saves id */ 92 mr r10,r3 /* r10 saves id */
92 mr r11,r4 /* r11 saves tp */ 93 mr r11,r4 /* r11 saves tp */
93 bl __get_datapage@local /* get data page */ 94 bl __get_datapage@local /* get data page */
94 mr r9, r3 /* datapage ptr in r9 */ 95 mr r9,r3 /* datapage ptr in r9 */
95 beq cr1,50f /* if monotonic -> jump there */ 96 beq cr1,50f /* if monotonic -> jump there */
96 97
97 /* 98 /*
@@ -117,6 +118,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
117 mulli r5,r5,1000 118 mulli r5,r5,1000
118 stw r5,TSPC32_TV_NSEC(r11) 119 stw r5,TSPC32_TV_NSEC(r11)
119 mtlr r12 120 mtlr r12
121 crclr cr0*4+so
120 li r3,0 122 li r3,0
121 blr 123 blr
122 124
@@ -173,14 +175,19 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
173 add r4,r4,r7 175 add r4,r4,r7
174 lis r5,NSEC_PER_SEC@h 176 lis r5,NSEC_PER_SEC@h
175 ori r5,r5,NSEC_PER_SEC@l 177 ori r5,r5,NSEC_PER_SEC@l
176 cmpli cr0,r4,r5 178 cmpl cr0,r4,r5
179 cmpli cr1,r4,0
177 blt 1f 180 blt 1f
178 subf r4,r5,r4 181 subf r4,r5,r4
179 addi r3,r3,1 182 addi r3,r3,1
1831: bge cr1,1f
184 addi r3,r3,-1
185 add r4,r4,r5
1801: stw r3,TSPC32_TV_SEC(r11) 1861: stw r3,TSPC32_TV_SEC(r11)
181 stw r4,TSPC32_TV_NSEC(r11) 187 stw r4,TSPC32_TV_NSEC(r11)
182 188
183 mtlr r12 189 mtlr r12
190 crclr cr0*4+so
184 li r3,0 191 li r3,0
185 blr 192 blr
186 193
@@ -210,11 +217,12 @@ V_FUNCTION_BEGIN(__kernel_clock_getres)
210 /* Check for supported clock IDs */ 217 /* Check for supported clock IDs */
211 cmpwi cr0,r3,CLOCK_REALTIME 218 cmpwi cr0,r3,CLOCK_REALTIME
212 cmpwi cr1,r3,CLOCK_MONOTONIC 219 cmpwi cr1,r3,CLOCK_MONOTONIC
213 cror cr0,cr0,cr1 220 cror cr0*4+eq,cr0*4+eq,cr1*4+eq
214 bne cr0,99f 221 bne cr0,99f
215 222
216 li r3,0 223 li r3,0
217 cmpli cr0,r4,0 224 cmpli cr0,r4,0
225 crclr cr0*4+so
218 beqlr 226 beqlr
219 lis r5,CLOCK_REALTIME_RES@h 227 lis r5,CLOCK_REALTIME_RES@h
220 ori r5,r5,CLOCK_REALTIME_RES@l 228 ori r5,r5,CLOCK_REALTIME_RES@l
diff --git a/arch/powerpc/kernel/vdso64/cacheflush.S b/arch/powerpc/kernel/vdso64/cacheflush.S
index d4a0ad28d534..cb4ae0a5edd0 100644
--- a/arch/powerpc/kernel/vdso64/cacheflush.S
+++ b/arch/powerpc/kernel/vdso64/cacheflush.S
@@ -35,6 +35,7 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache)
35 subf r8,r6,r4 /* compute length */ 35 subf r8,r6,r4 /* compute length */
36 add r8,r8,r5 /* ensure we get enough */ 36 add r8,r8,r5 /* ensure we get enough */
37 srwi. r8,r8,7 /* compute line count */ 37 srwi. r8,r8,7 /* compute line count */
38 crclr cr0*4+so
38 beqlr /* nothing to do? */ 39 beqlr /* nothing to do? */
39 mtctr r8 40 mtctr r8
40 mr r3,r6 41 mr r3,r6
@@ -58,6 +59,7 @@ V_FUNCTION_END(__kernel_sync_dicache)
58 */ 59 */
59V_FUNCTION_BEGIN(__kernel_sync_dicache_p5) 60V_FUNCTION_BEGIN(__kernel_sync_dicache_p5)
60 .cfi_startproc 61 .cfi_startproc
62 crclr cr0*4+so
61 sync 63 sync
62 isync 64 isync
63 li r3,0 65 li r3,0
diff --git a/arch/powerpc/kernel/vdso64/datapage.S b/arch/powerpc/kernel/vdso64/datapage.S
index e67eda0f8cda..3b2dd7d0c1eb 100644
--- a/arch/powerpc/kernel/vdso64/datapage.S
+++ b/arch/powerpc/kernel/vdso64/datapage.S
@@ -54,12 +54,12 @@ V_FUNCTION_BEGIN(__kernel_get_syscall_map)
54 .cfi_startproc 54 .cfi_startproc
55 mflr r12 55 mflr r12
56 .cfi_register lr,r12 56 .cfi_register lr,r12
57
58 mr r4,r3 57 mr r4,r3
59 bl V_LOCAL_FUNC(__get_datapage) 58 bl V_LOCAL_FUNC(__get_datapage)
60 mtlr r12 59 mtlr r12
61 addi r3,r3,CFG_SYSCALL_MAP64 60 addi r3,r3,CFG_SYSCALL_MAP64
62 cmpli cr0,r4,0 61 cmpli cr0,r4,0
62 crclr cr0*4+so
63 beqlr 63 beqlr
64 li r0,__NR_syscalls 64 li r0,__NR_syscalls
65 stw r0,0(r4) 65 stw r0,0(r4)
@@ -80,5 +80,7 @@ V_FUNCTION_BEGIN(__kernel_get_tbfreq)
80 bl V_LOCAL_FUNC(__get_datapage) 80 bl V_LOCAL_FUNC(__get_datapage)
81 ld r3,CFG_TB_TICKS_PER_SEC(r3) 81 ld r3,CFG_TB_TICKS_PER_SEC(r3)
82 mtlr r12 82 mtlr r12
83 crclr cr0*4+so
84 blr
83 .cfi_endproc 85 .cfi_endproc
84V_FUNCTION_END(__kernel_get_tbfreq) 86V_FUNCTION_END(__kernel_get_tbfreq)
diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/kernel/vdso64/gettimeofday.S
index d371c02a8c0e..ccaeda5136d1 100644
--- a/arch/powerpc/kernel/vdso64/gettimeofday.S
+++ b/arch/powerpc/kernel/vdso64/gettimeofday.S
@@ -1,4 +1,5 @@
1/* 1
2 /*
2 * Userland implementation of gettimeofday() for 64 bits processes in a 3 * Userland implementation of gettimeofday() for 64 bits processes in a
3 * ppc64 kernel for use in the vDSO 4 * ppc64 kernel for use in the vDSO
4 * 5 *
@@ -51,6 +52,7 @@ V_FUNCTION_BEGIN(__kernel_gettimeofday)
51 stw r4,TZONE_TZ_MINWEST(r10) 52 stw r4,TZONE_TZ_MINWEST(r10)
52 stw r5,TZONE_TZ_DSTTIME(r10) 53 stw r5,TZONE_TZ_DSTTIME(r10)
531: mtlr r12 541: mtlr r12
55 crclr cr0*4+so
54 li r3,0 /* always success */ 56 li r3,0 /* always success */
55 blr 57 blr
56 .cfi_endproc 58 .cfi_endproc
@@ -68,7 +70,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
68 /* Check for supported clock IDs */ 70 /* Check for supported clock IDs */
69 cmpwi cr0,r3,CLOCK_REALTIME 71 cmpwi cr0,r3,CLOCK_REALTIME
70 cmpwi cr1,r3,CLOCK_MONOTONIC 72 cmpwi cr1,r3,CLOCK_MONOTONIC
71 cror cr0,cr0,cr1 73 cror cr0*4+eq,cr0*4+eq,cr1*4+eq
72 bne cr0,99f 74 bne cr0,99f
73 75
74 mflr r12 /* r12 saves lr */ 76 mflr r12 /* r12 saves lr */
@@ -84,19 +86,21 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
84 86
85 bl V_LOCAL_FUNC(__do_get_xsec) /* get xsec from tb & kernel */ 87 bl V_LOCAL_FUNC(__do_get_xsec) /* get xsec from tb & kernel */
86 88
87 lis r7,0x3b9a /* r7 = 1000000000 = NSEC_PER_SEC */ 89 lis r7,15 /* r7 = 1000000 = USEC_PER_SEC */
88 ori r7,r7,0xca00 90 ori r7,r7,16960
89 rldicl r5,r4,44,20 /* r5 = sec = xsec / XSEC_PER_SEC */ 91 rldicl r5,r4,44,20 /* r5 = sec = xsec / XSEC_PER_SEC */
90 rldicr r6,r5,20,43 /* r6 = sec * XSEC_PER_SEC */ 92 rldicr r6,r5,20,43 /* r6 = sec * XSEC_PER_SEC */
91 std r5,TSPC64_TV_SEC(r11) /* store sec in tv */ 93 std r5,TSPC64_TV_SEC(r11) /* store sec in tv */
92 subf r0,r6,r4 /* r0 = xsec = (xsec - r6) */ 94 subf r0,r6,r4 /* r0 = xsec = (xsec - r6) */
93 mulld r0,r0,r7 /* nsec = (xsec * NSEC_PER_SEC) / 95 mulld r0,r0,r7 /* usec = (xsec * USEC_PER_SEC) /
94 * XSEC_PER_SEC 96 * XSEC_PER_SEC
95 */ 97 */
96 rldicl r0,r0,44,20 98 rldicl r0,r0,44,20
99 mulli r0,r0,1000 /* nsec = usec * 1000 */
97 std r0,TSPC64_TV_NSEC(r11) /* store nsec in tp */ 100 std r0,TSPC64_TV_NSEC(r11) /* store nsec in tp */
98 101
99 mtlr r12 102 mtlr r12
103 crclr cr0*4+so
100 li r3,0 104 li r3,0
101 blr 105 blr
102 106
@@ -106,15 +110,16 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
106 110
10750: bl V_LOCAL_FUNC(__do_get_xsec) /* get xsec from tb & kernel */ 11150: bl V_LOCAL_FUNC(__do_get_xsec) /* get xsec from tb & kernel */
108 112
109 lis r7,0x3b9a /* r7 = 1000000000 = NSEC_PER_SEC */ 113 lis r7,15 /* r7 = 1000000 = USEC_PER_SEC */
110 ori r7,r7,0xca00 114 ori r7,r7,16960
111 rldicl r5,r4,44,20 /* r5 = sec = xsec / XSEC_PER_SEC */ 115 rldicl r5,r4,44,20 /* r5 = sec = xsec / XSEC_PER_SEC */
112 rldicr r6,r5,20,43 /* r6 = sec * XSEC_PER_SEC */ 116 rldicr r6,r5,20,43 /* r6 = sec * XSEC_PER_SEC */
113 subf r0,r6,r4 /* r0 = xsec = (xsec - r6) */ 117 subf r0,r6,r4 /* r0 = xsec = (xsec - r6) */
114 mulld r0,r0,r7 /* nsec = (xsec * NSEC_PER_SEC) / 118 mulld r0,r0,r7 /* usec = (xsec * USEC_PER_SEC) /
115 * XSEC_PER_SEC 119 * XSEC_PER_SEC
116 */ 120 */
117 rldicl r6,r0,44,20 121 rldicl r6,r0,44,20
122 mulli r6,r6,1000 /* nsec = usec * 1000 */
118 123
119 /* now we must fixup using wall to monotonic. We need to snapshot 124 /* now we must fixup using wall to monotonic. We need to snapshot
120 * that value and do the counter trick again. Fortunately, we still 125 * that value and do the counter trick again. Fortunately, we still
@@ -123,8 +128,8 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
123 * can be used 128 * can be used
124 */ 129 */
125 130
126 lwz r4,WTOM_CLOCK_SEC(r9) 131 lwa r4,WTOM_CLOCK_SEC(r3)
127 lwz r7,WTOM_CLOCK_NSEC(r9) 132 lwa r7,WTOM_CLOCK_NSEC(r3)
128 133
129 /* We now have our result in r4,r7. We create a fake dependency 134 /* We now have our result in r4,r7. We create a fake dependency
130 * on that result and re-check the counter 135 * on that result and re-check the counter
@@ -144,14 +149,19 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
144 add r7,r7,r6 149 add r7,r7,r6
145 lis r9,NSEC_PER_SEC@h 150 lis r9,NSEC_PER_SEC@h
146 ori r9,r9,NSEC_PER_SEC@l 151 ori r9,r9,NSEC_PER_SEC@l
147 cmpli cr0,r7,r9 152 cmpl cr0,r7,r9
153 cmpli cr1,r7,0
148 blt 1f 154 blt 1f
149 subf r7,r9,r7 155 subf r7,r9,r7
150 addi r4,r4,1 156 addi r4,r4,1
1571: bge cr1,1f
158 addi r4,r4,-1
159 add r7,r7,r9
1511: std r4,TSPC64_TV_SEC(r11) 1601: std r4,TSPC64_TV_SEC(r11)
152 std r7,TSPC64_TV_NSEC(r11) 161 std r7,TSPC64_TV_NSEC(r11)
153 162
154 mtlr r12 163 mtlr r12
164 crclr cr0*4+so
155 li r3,0 165 li r3,0
156 blr 166 blr
157 167
@@ -181,11 +191,12 @@ V_FUNCTION_BEGIN(__kernel_clock_getres)
181 /* Check for supported clock IDs */ 191 /* Check for supported clock IDs */
182 cmpwi cr0,r3,CLOCK_REALTIME 192 cmpwi cr0,r3,CLOCK_REALTIME
183 cmpwi cr1,r3,CLOCK_MONOTONIC 193 cmpwi cr1,r3,CLOCK_MONOTONIC
184 cror cr0,cr0,cr1 194 cror cr0*4+eq,cr0*4+eq,cr1*4+eq
185 bne cr0,99f 195 bne cr0,99f
186 196
187 li r3,0 197 li r3,0
188 cmpli cr0,r4,0 198 cmpli cr0,r4,0
199 crclr cr0*4+so
189 beqlr 200 beqlr
190 lis r5,CLOCK_REALTIME_RES@h 201 lis r5,CLOCK_REALTIME_RES@h
191 ori r5,r5,CLOCK_REALTIME_RES@l 202 ori r5,r5,CLOCK_REALTIME_RES@l