aboutsummaryrefslogtreecommitdiffstats
path: root/arch/ppc64
diff options
context:
space:
mode:
Diffstat (limited to 'arch/ppc64')
-rw-r--r--arch/ppc64/Kconfig6
-rw-r--r--arch/ppc64/Makefile7
-rw-r--r--arch/ppc64/kernel/HvCall.c36
-rw-r--r--arch/ppc64/kernel/HvLpConfig.c27
-rw-r--r--arch/ppc64/kernel/HvLpEvent.c88
-rw-r--r--arch/ppc64/kernel/ItLpQueue.c262
-rw-r--r--arch/ppc64/kernel/LparData.c227
-rw-r--r--arch/ppc64/kernel/Makefile93
-rw-r--r--arch/ppc64/kernel/asm-offsets.c1
-rw-r--r--arch/ppc64/kernel/bpa_iommu.c2
-rw-r--r--arch/ppc64/kernel/bpa_setup.c3
-rw-r--r--arch/ppc64/kernel/cputable.c82
-rw-r--r--arch/ppc64/kernel/eeh.c2
-rw-r--r--arch/ppc64/kernel/entry.S30
-rw-r--r--arch/ppc64/kernel/head.S205
-rw-r--r--arch/ppc64/kernel/hvCall.S98
-rw-r--r--arch/ppc64/kernel/iSeries_VpdInfo.c268
-rw-r--r--arch/ppc64/kernel/iSeries_htab.c236
-rw-r--r--arch/ppc64/kernel/iSeries_iommu.c176
-rw-r--r--arch/ppc64/kernel/iSeries_irq.c353
-rw-r--r--arch/ppc64/kernel/iSeries_pci.c905
-rw-r--r--arch/ppc64/kernel/iSeries_proc.c113
-rw-r--r--arch/ppc64/kernel/iSeries_setup.c977
-rw-r--r--arch/ppc64/kernel/iSeries_setup.h26
-rw-r--r--arch/ppc64/kernel/iSeries_smp.c149
-rw-r--r--arch/ppc64/kernel/iSeries_vio.c155
-rw-r--r--arch/ppc64/kernel/idle_power4.S79
-rw-r--r--arch/ppc64/kernel/init_task.c36
-rw-r--r--arch/ppc64/kernel/kprobes.c1
-rw-r--r--arch/ppc64/kernel/lmb.c299
-rw-r--r--arch/ppc64/kernel/lparmap.c31
-rw-r--r--arch/ppc64/kernel/maple_pci.c3
-rw-r--r--arch/ppc64/kernel/maple_setup.c4
-rw-r--r--arch/ppc64/kernel/mf.c1281
-rw-r--r--arch/ppc64/kernel/misc.S47
-rw-r--r--arch/ppc64/kernel/mpic.c3
-rw-r--r--arch/ppc64/kernel/mpic.h273
-rw-r--r--arch/ppc64/kernel/pSeries_hvCall.S131
-rw-r--r--arch/ppc64/kernel/pSeries_iommu.c590
-rw-r--r--arch/ppc64/kernel/pSeries_lpar.c518
-rw-r--r--arch/ppc64/kernel/pSeries_nvram.c148
-rw-r--r--arch/ppc64/kernel/pSeries_pci.c143
-rw-r--r--arch/ppc64/kernel/pSeries_reconfig.c426
-rw-r--r--arch/ppc64/kernel/pSeries_setup.c622
-rw-r--r--arch/ppc64/kernel/pSeries_smp.c517
-rw-r--r--arch/ppc64/kernel/pSeries_vio.c273
-rw-r--r--arch/ppc64/kernel/pci.c3
-rw-r--r--arch/ppc64/kernel/pci.h54
-rw-r--r--arch/ppc64/kernel/pci_direct_iommu.c3
-rw-r--r--arch/ppc64/kernel/pci_dn.c3
-rw-r--r--arch/ppc64/kernel/pci_iommu.c8
-rw-r--r--arch/ppc64/kernel/pmac_feature.c60
-rw-r--r--arch/ppc64/kernel/pmac_nvram.c30
-rw-r--r--arch/ppc64/kernel/pmac_pci.c14
-rw-r--r--arch/ppc64/kernel/pmac_setup.c13
-rw-r--r--arch/ppc64/kernel/pmac_smp.c5
-rw-r--r--arch/ppc64/kernel/pmac_time.c4
-rw-r--r--arch/ppc64/kernel/pmc.c4
-rw-r--r--arch/ppc64/kernel/ppc_ksyms.c20
-rw-r--r--arch/ppc64/kernel/process.c713
-rw-r--r--arch/ppc64/kernel/prom.c5
-rw-r--r--arch/ppc64/kernel/rtas.c1
-rw-r--r--arch/ppc64/kernel/rtas_pci.c5
-rw-r--r--arch/ppc64/kernel/rtc.c37
-rw-r--r--arch/ppc64/kernel/setup.c30
-rw-r--r--arch/ppc64/kernel/smp.c3
-rw-r--r--arch/ppc64/kernel/sys_ppc32.c3
-rw-r--r--arch/ppc64/kernel/time.c6
-rw-r--r--arch/ppc64/kernel/traps.c568
-rw-r--r--arch/ppc64/kernel/u3_iommu.c50
-rw-r--r--arch/ppc64/kernel/vdso64/sigtramp.S1
-rw-r--r--arch/ppc64/kernel/vecemu.c346
-rw-r--r--arch/ppc64/kernel/vector.S172
-rw-r--r--arch/ppc64/kernel/viopath.c673
-rw-r--r--arch/ppc64/kernel/vmlinux.lds.S17
-rw-r--r--arch/ppc64/mm/Makefile11
-rw-r--r--arch/ppc64/mm/fault.c333
-rw-r--r--arch/ppc64/mm/hash_low.S288
-rw-r--r--arch/ppc64/mm/hash_native.c453
-rw-r--r--arch/ppc64/mm/hash_utils.c438
-rw-r--r--arch/ppc64/mm/hugetlbpage.c745
-rw-r--r--arch/ppc64/mm/imalloc.c317
-rw-r--r--arch/ppc64/mm/init.c870
-rw-r--r--arch/ppc64/mm/mmap.c86
-rw-r--r--arch/ppc64/mm/numa.c779
-rw-r--r--arch/ppc64/mm/slb.c158
-rw-r--r--arch/ppc64/mm/slb_low.S151
-rw-r--r--arch/ppc64/mm/stab.c279
-rw-r--r--arch/ppc64/mm/tlb.c197
-rw-r--r--arch/ppc64/oprofile/Kconfig23
-rw-r--r--arch/ppc64/oprofile/Makefile9
-rw-r--r--arch/ppc64/oprofile/common.c145
-rw-r--r--arch/ppc64/oprofile/op_model_power4.c309
-rw-r--r--arch/ppc64/oprofile/op_model_rs64.c218
94 files changed, 344 insertions, 18268 deletions
diff --git a/arch/ppc64/Kconfig b/arch/ppc64/Kconfig
index c658650af429..e656e02d9dd1 100644
--- a/arch/ppc64/Kconfig
+++ b/arch/ppc64/Kconfig
@@ -186,6 +186,9 @@ config BOOTX_TEXT
186 Say Y here to see progress messages from the boot firmware in text 186 Say Y here to see progress messages from the boot firmware in text
187 mode. Requires an Open Firmware compatible video card. 187 mode. Requires an Open Firmware compatible video card.
188 188
189config POWER4
190 def_bool y
191
189config POWER4_ONLY 192config POWER4_ONLY
190 bool "Optimize for POWER4" 193 bool "Optimize for POWER4"
191 default n 194 default n
@@ -357,7 +360,6 @@ config HOTPLUG_CPU
357 360
358config PROC_DEVICETREE 361config PROC_DEVICETREE
359 bool "Support for Open Firmware device tree in /proc" 362 bool "Support for Open Firmware device tree in /proc"
360 depends on !PPC_ISERIES
361 help 363 help
362 This option adds a device-tree directory under /proc which contains 364 This option adds a device-tree directory under /proc which contains
363 an image of the device tree that the kernel copies from Open 365 an image of the device tree that the kernel copies from Open
@@ -461,7 +463,7 @@ config VIOPATH
461 depends on VIOCONS || VIODASD || VIOCD || VIOTAPE || VETH 463 depends on VIOCONS || VIODASD || VIOCD || VIOTAPE || VETH
462 default y 464 default y
463 465
464source "arch/ppc64/oprofile/Kconfig" 466source "arch/powerpc/oprofile/Kconfig"
465 467
466source "arch/ppc64/Kconfig.debug" 468source "arch/ppc64/Kconfig.debug"
467 469
diff --git a/arch/ppc64/Makefile b/arch/ppc64/Makefile
index 521c2a5a2862..4a9928ef3032 100644
--- a/arch/ppc64/Makefile
+++ b/arch/ppc64/Makefile
@@ -82,10 +82,11 @@ CFLAGS += $(call cc-option,-funit-at-a-time)
82head-y := arch/ppc64/kernel/head.o 82head-y := arch/ppc64/kernel/head.o
83 83
84libs-y += arch/ppc64/lib/ 84libs-y += arch/ppc64/lib/
85core-y += arch/ppc64/kernel/ 85core-y += arch/ppc64/kernel/ arch/powerpc/kernel/
86core-y += arch/ppc64/mm/ 86core-y += arch/powerpc/mm/
87core-y += arch/powerpc/platforms/
87core-$(CONFIG_XMON) += arch/ppc64/xmon/ 88core-$(CONFIG_XMON) += arch/ppc64/xmon/
88drivers-$(CONFIG_OPROFILE) += arch/ppc64/oprofile/ 89drivers-$(CONFIG_OPROFILE) += arch/powerpc/oprofile/
89 90
90boot := arch/ppc64/boot 91boot := arch/ppc64/boot
91 92
diff --git a/arch/ppc64/kernel/HvCall.c b/arch/ppc64/kernel/HvCall.c
deleted file mode 100644
index b772e65b57a2..000000000000
--- a/arch/ppc64/kernel/HvCall.c
+++ /dev/null
@@ -1,36 +0,0 @@
1/*
2 * HvCall.c
3 * Copyright (C) 2001 Mike Corrigan IBM Corporation
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 */
10
11#include <asm/page.h>
12#include <asm/abs_addr.h>
13#include <asm/iSeries/HvCall.h>
14#include <asm/iSeries/HvCallSc.h>
15#include <asm/iSeries/HvTypes.h>
16
17
18void HvCall_writeLogBuffer(const void *buffer, u64 len)
19{
20 struct HvLpBufferList hv_buf;
21 u64 left_this_page;
22 u64 cur = virt_to_abs(buffer);
23
24 while (len) {
25 hv_buf.addr = cur;
26 left_this_page = ((cur & PAGE_MASK) + PAGE_SIZE) - cur;
27 if (left_this_page > len)
28 left_this_page = len;
29 hv_buf.len = left_this_page;
30 len -= left_this_page;
31 HvCall2(HvCallBaseWriteLogBuffer,
32 virt_to_abs(&hv_buf),
33 left_this_page);
34 cur = (cur & PAGE_MASK) + PAGE_SIZE;
35 }
36}
diff --git a/arch/ppc64/kernel/HvLpConfig.c b/arch/ppc64/kernel/HvLpConfig.c
deleted file mode 100644
index cb1d6473203c..000000000000
--- a/arch/ppc64/kernel/HvLpConfig.c
+++ /dev/null
@@ -1,27 +0,0 @@
1/*
2 * HvLpConfig.c
3 * Copyright (C) 2001 Kyle A. Lucke, IBM Corporation
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
19
20#include <linux/module.h>
21#include <asm/iSeries/HvLpConfig.h>
22
23HvLpIndex HvLpConfig_getLpIndex_outline(void)
24{
25 return HvLpConfig_getLpIndex();
26}
27EXPORT_SYMBOL(HvLpConfig_getLpIndex_outline);
diff --git a/arch/ppc64/kernel/HvLpEvent.c b/arch/ppc64/kernel/HvLpEvent.c
deleted file mode 100644
index 90032b138902..000000000000
--- a/arch/ppc64/kernel/HvLpEvent.c
+++ /dev/null
@@ -1,88 +0,0 @@
1/*
2 * Copyright 2001 Mike Corrigan IBM Corp
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 */
9#include <linux/stddef.h>
10#include <linux/kernel.h>
11#include <linux/module.h>
12#include <asm/system.h>
13#include <asm/iSeries/HvLpEvent.h>
14#include <asm/iSeries/HvCallEvent.h>
15#include <asm/iSeries/ItLpNaca.h>
16
17/* Array of LpEvent handler functions */
18LpEventHandler lpEventHandler[HvLpEvent_Type_NumTypes];
19unsigned lpEventHandlerPaths[HvLpEvent_Type_NumTypes];
20
21/* Register a handler for an LpEvent type */
22
23int HvLpEvent_registerHandler( HvLpEvent_Type eventType, LpEventHandler handler )
24{
25 int rc = 1;
26 if ( eventType < HvLpEvent_Type_NumTypes ) {
27 lpEventHandler[eventType] = handler;
28 rc = 0;
29 }
30 return rc;
31
32}
33
34int HvLpEvent_unregisterHandler( HvLpEvent_Type eventType )
35{
36 int rc = 1;
37
38 might_sleep();
39
40 if ( eventType < HvLpEvent_Type_NumTypes ) {
41 if ( !lpEventHandlerPaths[eventType] ) {
42 lpEventHandler[eventType] = NULL;
43 rc = 0;
44
45 /* We now sleep until all other CPUs have scheduled. This ensures that
46 * the deletion is seen by all other CPUs, and that the deleted handler
47 * isn't still running on another CPU when we return. */
48 synchronize_rcu();
49 }
50 }
51 return rc;
52}
53EXPORT_SYMBOL(HvLpEvent_registerHandler);
54EXPORT_SYMBOL(HvLpEvent_unregisterHandler);
55
56/* (lpIndex is the partition index of the target partition.
57 * needed only for VirtualIo, VirtualLan and SessionMgr. Zero
58 * indicates to use our partition index - for the other types)
59 */
60int HvLpEvent_openPath( HvLpEvent_Type eventType, HvLpIndex lpIndex )
61{
62 int rc = 1;
63 if ( eventType < HvLpEvent_Type_NumTypes &&
64 lpEventHandler[eventType] ) {
65 if ( lpIndex == 0 )
66 lpIndex = itLpNaca.xLpIndex;
67 HvCallEvent_openLpEventPath( lpIndex, eventType );
68 ++lpEventHandlerPaths[eventType];
69 rc = 0;
70 }
71 return rc;
72}
73
74int HvLpEvent_closePath( HvLpEvent_Type eventType, HvLpIndex lpIndex )
75{
76 int rc = 1;
77 if ( eventType < HvLpEvent_Type_NumTypes &&
78 lpEventHandler[eventType] &&
79 lpEventHandlerPaths[eventType] ) {
80 if ( lpIndex == 0 )
81 lpIndex = itLpNaca.xLpIndex;
82 HvCallEvent_closeLpEventPath( lpIndex, eventType );
83 --lpEventHandlerPaths[eventType];
84 rc = 0;
85 }
86 return rc;
87}
88
diff --git a/arch/ppc64/kernel/ItLpQueue.c b/arch/ppc64/kernel/ItLpQueue.c
deleted file mode 100644
index 4231861288a3..000000000000
--- a/arch/ppc64/kernel/ItLpQueue.c
+++ /dev/null
@@ -1,262 +0,0 @@
1/*
2 * ItLpQueue.c
3 * Copyright (C) 2001 Mike Corrigan IBM Corporation
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 */
10
11#include <linux/stddef.h>
12#include <linux/kernel.h>
13#include <linux/sched.h>
14#include <linux/bootmem.h>
15#include <linux/seq_file.h>
16#include <linux/proc_fs.h>
17#include <asm/system.h>
18#include <asm/paca.h>
19#include <asm/iSeries/ItLpQueue.h>
20#include <asm/iSeries/HvLpEvent.h>
21#include <asm/iSeries/HvCallEvent.h>
22
23/*
24 * The LpQueue is used to pass event data from the hypervisor to
25 * the partition. This is where I/O interrupt events are communicated.
26 *
27 * It is written to by the hypervisor so cannot end up in the BSS.
28 */
29struct hvlpevent_queue hvlpevent_queue __attribute__((__section__(".data")));
30
31DEFINE_PER_CPU(unsigned long[HvLpEvent_Type_NumTypes], hvlpevent_counts);
32
33static char *event_types[HvLpEvent_Type_NumTypes] = {
34 "Hypervisor",
35 "Machine Facilities",
36 "Session Manager",
37 "SPD I/O",
38 "Virtual Bus",
39 "PCI I/O",
40 "RIO I/O",
41 "Virtual Lan",
42 "Virtual I/O"
43};
44
45/* Array of LpEvent handler functions */
46extern LpEventHandler lpEventHandler[HvLpEvent_Type_NumTypes];
47
48static struct HvLpEvent * get_next_hvlpevent(void)
49{
50 struct HvLpEvent * event;
51 event = (struct HvLpEvent *)hvlpevent_queue.xSlicCurEventPtr;
52
53 if (event->xFlags.xValid) {
54 /* rmb() needed only for weakly consistent machines (regatta) */
55 rmb();
56 /* Set pointer to next potential event */
57 hvlpevent_queue.xSlicCurEventPtr += ((event->xSizeMinus1 +
58 LpEventAlign) / LpEventAlign) * LpEventAlign;
59
60 /* Wrap to beginning if no room at end */
61 if (hvlpevent_queue.xSlicCurEventPtr >
62 hvlpevent_queue.xSlicLastValidEventPtr) {
63 hvlpevent_queue.xSlicCurEventPtr =
64 hvlpevent_queue.xSlicEventStackPtr;
65 }
66 } else {
67 event = NULL;
68 }
69
70 return event;
71}
72
73static unsigned long spread_lpevents = NR_CPUS;
74
75int hvlpevent_is_pending(void)
76{
77 struct HvLpEvent *next_event;
78
79 if (smp_processor_id() >= spread_lpevents)
80 return 0;
81
82 next_event = (struct HvLpEvent *)hvlpevent_queue.xSlicCurEventPtr;
83
84 return next_event->xFlags.xValid |
85 hvlpevent_queue.xPlicOverflowIntPending;
86}
87
88static void hvlpevent_clear_valid(struct HvLpEvent * event)
89{
90 /* Tell the Hypervisor that we're done with this event.
91 * Also clear bits within this event that might look like valid bits.
92 * ie. on 64-byte boundaries.
93 */
94 struct HvLpEvent *tmp;
95 unsigned extra = ((event->xSizeMinus1 + LpEventAlign) /
96 LpEventAlign) - 1;
97
98 switch (extra) {
99 case 3:
100 tmp = (struct HvLpEvent*)((char*)event + 3 * LpEventAlign);
101 tmp->xFlags.xValid = 0;
102 case 2:
103 tmp = (struct HvLpEvent*)((char*)event + 2 * LpEventAlign);
104 tmp->xFlags.xValid = 0;
105 case 1:
106 tmp = (struct HvLpEvent*)((char*)event + 1 * LpEventAlign);
107 tmp->xFlags.xValid = 0;
108 }
109
110 mb();
111
112 event->xFlags.xValid = 0;
113}
114
115void process_hvlpevents(struct pt_regs *regs)
116{
117 struct HvLpEvent * event;
118
119 /* If we have recursed, just return */
120 if (!spin_trylock(&hvlpevent_queue.lock))
121 return;
122
123 for (;;) {
124 event = get_next_hvlpevent();
125 if (event) {
126 /* Call appropriate handler here, passing
127 * a pointer to the LpEvent. The handler
128 * must make a copy of the LpEvent if it
129 * needs it in a bottom half. (perhaps for
130 * an ACK)
131 *
132 * Handlers are responsible for ACK processing
133 *
134 * The Hypervisor guarantees that LpEvents will
135 * only be delivered with types that we have
136 * registered for, so no type check is necessary
137 * here!
138 */
139 if (event->xType < HvLpEvent_Type_NumTypes)
140 __get_cpu_var(hvlpevent_counts)[event->xType]++;
141 if (event->xType < HvLpEvent_Type_NumTypes &&
142 lpEventHandler[event->xType])
143 lpEventHandler[event->xType](event, regs);
144 else
145 printk(KERN_INFO "Unexpected Lp Event type=%d\n", event->xType );
146
147 hvlpevent_clear_valid(event);
148 } else if (hvlpevent_queue.xPlicOverflowIntPending)
149 /*
150 * No more valid events. If overflow events are
151 * pending process them
152 */
153 HvCallEvent_getOverflowLpEvents(hvlpevent_queue.xIndex);
154 else
155 break;
156 }
157
158 spin_unlock(&hvlpevent_queue.lock);
159}
160
161static int set_spread_lpevents(char *str)
162{
163 unsigned long val = simple_strtoul(str, NULL, 0);
164
165 /*
166 * The parameter is the number of processors to share in processing
167 * lp events.
168 */
169 if (( val > 0) && (val <= NR_CPUS)) {
170 spread_lpevents = val;
171 printk("lpevent processing spread over %ld processors\n", val);
172 } else {
173 printk("invalid spread_lpevents %ld\n", val);
174 }
175
176 return 1;
177}
178__setup("spread_lpevents=", set_spread_lpevents);
179
180void setup_hvlpevent_queue(void)
181{
182 void *eventStack;
183
184 /*
185 * Allocate a page for the Event Stack. The Hypervisor needs the
186 * absolute real address, so we subtract out the KERNELBASE and add
187 * in the absolute real address of the kernel load area.
188 */
189 eventStack = alloc_bootmem_pages(LpEventStackSize);
190 memset(eventStack, 0, LpEventStackSize);
191
192 /* Invoke the hypervisor to initialize the event stack */
193 HvCallEvent_setLpEventStack(0, eventStack, LpEventStackSize);
194
195 hvlpevent_queue.xSlicEventStackPtr = (char *)eventStack;
196 hvlpevent_queue.xSlicCurEventPtr = (char *)eventStack;
197 hvlpevent_queue.xSlicLastValidEventPtr = (char *)eventStack +
198 (LpEventStackSize - LpEventMaxSize);
199 hvlpevent_queue.xIndex = 0;
200}
201
202static int proc_lpevents_show(struct seq_file *m, void *v)
203{
204 int cpu, i;
205 unsigned long sum;
206 static unsigned long cpu_totals[NR_CPUS];
207
208 /* FIXME: do we care that there's no locking here? */
209 sum = 0;
210 for_each_online_cpu(cpu) {
211 cpu_totals[cpu] = 0;
212 for (i = 0; i < HvLpEvent_Type_NumTypes; i++) {
213 cpu_totals[cpu] += per_cpu(hvlpevent_counts, cpu)[i];
214 }
215 sum += cpu_totals[cpu];
216 }
217
218 seq_printf(m, "LpEventQueue 0\n");
219 seq_printf(m, " events processed:\t%lu\n", sum);
220
221 for (i = 0; i < HvLpEvent_Type_NumTypes; ++i) {
222 sum = 0;
223 for_each_online_cpu(cpu) {
224 sum += per_cpu(hvlpevent_counts, cpu)[i];
225 }
226
227 seq_printf(m, " %-20s %10lu\n", event_types[i], sum);
228 }
229
230 seq_printf(m, "\n events processed by processor:\n");
231
232 for_each_online_cpu(cpu) {
233 seq_printf(m, " CPU%02d %10lu\n", cpu, cpu_totals[cpu]);
234 }
235
236 return 0;
237}
238
239static int proc_lpevents_open(struct inode *inode, struct file *file)
240{
241 return single_open(file, proc_lpevents_show, NULL);
242}
243
244static struct file_operations proc_lpevents_operations = {
245 .open = proc_lpevents_open,
246 .read = seq_read,
247 .llseek = seq_lseek,
248 .release = single_release,
249};
250
251static int __init proc_lpevents_init(void)
252{
253 struct proc_dir_entry *e;
254
255 e = create_proc_entry("iSeries/lpevents", S_IFREG|S_IRUGO, NULL);
256 if (e)
257 e->proc_fops = &proc_lpevents_operations;
258
259 return 0;
260}
261__initcall(proc_lpevents_init);
262
diff --git a/arch/ppc64/kernel/LparData.c b/arch/ppc64/kernel/LparData.c
deleted file mode 100644
index 0a9c23ca2f0c..000000000000
--- a/arch/ppc64/kernel/LparData.c
+++ /dev/null
@@ -1,227 +0,0 @@
1/*
2 * Copyright 2001 Mike Corrigan, IBM Corp
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 */
9#include <linux/config.h>
10#include <linux/types.h>
11#include <linux/threads.h>
12#include <linux/module.h>
13#include <linux/bitops.h>
14#include <asm/processor.h>
15#include <asm/ptrace.h>
16#include <asm/naca.h>
17#include <asm/abs_addr.h>
18#include <asm/iSeries/ItLpNaca.h>
19#include <asm/lppaca.h>
20#include <asm/iSeries/ItLpRegSave.h>
21#include <asm/paca.h>
22#include <asm/iSeries/HvReleaseData.h>
23#include <asm/iSeries/LparMap.h>
24#include <asm/iSeries/ItVpdAreas.h>
25#include <asm/iSeries/ItIplParmsReal.h>
26#include <asm/iSeries/ItExtVpdPanel.h>
27#include <asm/iSeries/ItLpQueue.h>
28#include <asm/iSeries/IoHriProcessorVpd.h>
29#include <asm/iSeries/ItSpCommArea.h>
30
31
32/* The HvReleaseData is the root of the information shared between
33 * the hypervisor and Linux.
34 */
35struct HvReleaseData hvReleaseData = {
36 .xDesc = 0xc8a5d9c4, /* "HvRD" ebcdic */
37 .xSize = sizeof(struct HvReleaseData),
38 .xVpdAreasPtrOffset = offsetof(struct naca_struct, xItVpdAreas),
39 .xSlicNacaAddr = &naca, /* 64-bit Naca address */
40 .xMsNucDataOffset = LPARMAP_PHYS,
41 .xFlags = HVREL_TAGSINACTIVE /* tags inactive */
42 /* 64 bit */
43 /* shared processors */
44 /* HMT allowed */
45 | 6, /* TEMP: This allows non-GA driver */
46 .xVrmIndex = 4, /* We are v5r2m0 */
47 .xMinSupportedPlicVrmIndex = 3, /* v5r1m0 */
48 .xMinCompatablePlicVrmIndex = 3, /* v5r1m0 */
49 .xVrmName = { 0xd3, 0x89, 0x95, 0xa4, /* "Linux 2.4.64" ebcdic */
50 0xa7, 0x40, 0xf2, 0x4b,
51 0xf4, 0x4b, 0xf6, 0xf4 },
52};
53
54/*
55 * The NACA. The first dword of the naca is required by the iSeries
56 * hypervisor to point to itVpdAreas. The hypervisor finds the NACA
57 * through the pointer in hvReleaseData.
58 */
59struct naca_struct naca = {
60 .xItVpdAreas = &itVpdAreas,
61 .xRamDisk = 0,
62 .xRamDiskSize = 0,
63};
64
65extern void system_reset_iSeries(void);
66extern void machine_check_iSeries(void);
67extern void data_access_iSeries(void);
68extern void instruction_access_iSeries(void);
69extern void hardware_interrupt_iSeries(void);
70extern void alignment_iSeries(void);
71extern void program_check_iSeries(void);
72extern void fp_unavailable_iSeries(void);
73extern void decrementer_iSeries(void);
74extern void trap_0a_iSeries(void);
75extern void trap_0b_iSeries(void);
76extern void system_call_iSeries(void);
77extern void single_step_iSeries(void);
78extern void trap_0e_iSeries(void);
79extern void performance_monitor_iSeries(void);
80extern void data_access_slb_iSeries(void);
81extern void instruction_access_slb_iSeries(void);
82
83struct ItLpNaca itLpNaca = {
84 .xDesc = 0xd397d581, /* "LpNa" ebcdic */
85 .xSize = 0x0400, /* size of ItLpNaca */
86 .xIntHdlrOffset = 0x0300, /* offset to int array */
87 .xMaxIntHdlrEntries = 19, /* # ents */
88 .xPrimaryLpIndex = 0, /* Part # of primary */
89 .xServiceLpIndex = 0, /* Part # of serv */
90 .xLpIndex = 0, /* Part # of me */
91 .xMaxLpQueues = 0, /* # of LP queues */
92 .xLpQueueOffset = 0x100, /* offset of start of LP queues */
93 .xPirEnvironMode = 0, /* Piranha stuff */
94 .xPirConsoleMode = 0,
95 .xPirDasdMode = 0,
96 .xLparInstalled = 0,
97 .xSysPartitioned = 0,
98 .xHwSyncedTBs = 0,
99 .xIntProcUtilHmt = 0,
100 .xSpVpdFormat = 0,
101 .xIntProcRatio = 0,
102 .xPlicVrmIndex = 0, /* VRM index of PLIC */
103 .xMinSupportedSlicVrmInd = 0, /* min supported SLIC */
104 .xMinCompatableSlicVrmInd = 0, /* min compat SLIC */
105 .xLoadAreaAddr = 0, /* 64-bit addr of load area */
106 .xLoadAreaChunks = 0, /* chunks for load area */
107 .xPaseSysCallCRMask = 0, /* PASE mask */
108 .xSlicSegmentTablePtr = 0, /* seg table */
109 .xOldLpQueue = { 0 }, /* Old LP Queue */
110 .xInterruptHdlr = {
111 (u64)system_reset_iSeries, /* 0x100 System Reset */
112 (u64)machine_check_iSeries, /* 0x200 Machine Check */
113 (u64)data_access_iSeries, /* 0x300 Data Access */
114 (u64)instruction_access_iSeries, /* 0x400 Instruction Access */
115 (u64)hardware_interrupt_iSeries, /* 0x500 External */
116 (u64)alignment_iSeries, /* 0x600 Alignment */
117 (u64)program_check_iSeries, /* 0x700 Program Check */
118 (u64)fp_unavailable_iSeries, /* 0x800 FP Unavailable */
119 (u64)decrementer_iSeries, /* 0x900 Decrementer */
120 (u64)trap_0a_iSeries, /* 0xa00 Trap 0A */
121 (u64)trap_0b_iSeries, /* 0xb00 Trap 0B */
122 (u64)system_call_iSeries, /* 0xc00 System Call */
123 (u64)single_step_iSeries, /* 0xd00 Single Step */
124 (u64)trap_0e_iSeries, /* 0xe00 Trap 0E */
125 (u64)performance_monitor_iSeries,/* 0xf00 Performance Monitor */
126 0, /* int 0x1000 */
127 0, /* int 0x1010 */
128 0, /* int 0x1020 CPU ctls */
129 (u64)hardware_interrupt_iSeries, /* SC Ret Hdlr */
130 (u64)data_access_slb_iSeries, /* 0x380 D-SLB */
131 (u64)instruction_access_slb_iSeries /* 0x480 I-SLB */
132 }
133};
134EXPORT_SYMBOL(itLpNaca);
135
136/* May be filled in by the hypervisor so cannot end up in the BSS */
137struct ItIplParmsReal xItIplParmsReal __attribute__((__section__(".data")));
138
139/* May be filled in by the hypervisor so cannot end up in the BSS */
140struct ItExtVpdPanel xItExtVpdPanel __attribute__((__section__(".data")));
141EXPORT_SYMBOL(xItExtVpdPanel);
142
143#define maxPhysicalProcessors 32
144
145struct IoHriProcessorVpd xIoHriProcessorVpd[maxPhysicalProcessors] = {
146 {
147 .xInstCacheOperandSize = 32,
148 .xDataCacheOperandSize = 32,
149 .xProcFreq = 50000000,
150 .xTimeBaseFreq = 50000000,
151 .xPVR = 0x3600
152 }
153};
154
155/* Space for Main Store Vpd 27,200 bytes */
156/* May be filled in by the hypervisor so cannot end up in the BSS */
157u64 xMsVpd[3400] __attribute__((__section__(".data")));
158
159/* Space for Recovery Log Buffer */
160/* May be filled in by the hypervisor so cannot end up in the BSS */
161u64 xRecoveryLogBuffer[32] __attribute__((__section__(".data")));
162
163struct SpCommArea xSpCommArea = {
164 .xDesc = 0xE2D7C3C2,
165 .xFormat = 1,
166};
167
168/* The LparMap data is now located at offset 0x6000 in head.S
169 * It was put there so that the HvReleaseData could address it
170 * with a 32-bit offset as required by the iSeries hypervisor
171 *
172 * The Naca has a pointer to the ItVpdAreas. The hypervisor finds
173 * the Naca via the HvReleaseData area. The HvReleaseData has the
174 * offset into the Naca of the pointer to the ItVpdAreas.
175 */
176struct ItVpdAreas itVpdAreas = {
177 .xSlicDesc = 0xc9a3e5c1, /* "ItVA" */
178 .xSlicSize = sizeof(struct ItVpdAreas),
179 .xSlicVpdEntries = ItVpdMaxEntries, /* # VPD array entries */
180 .xSlicDmaEntries = ItDmaMaxEntries, /* # DMA array entries */
181 .xSlicMaxLogicalProcs = NR_CPUS * 2, /* Max logical procs */
182 .xSlicMaxPhysicalProcs = maxPhysicalProcessors, /* Max physical procs */
183 .xSlicDmaToksOffset = offsetof(struct ItVpdAreas, xPlicDmaToks),
184 .xSlicVpdAdrsOffset = offsetof(struct ItVpdAreas, xSlicVpdAdrs),
185 .xSlicDmaLensOffset = offsetof(struct ItVpdAreas, xPlicDmaLens),
186 .xSlicVpdLensOffset = offsetof(struct ItVpdAreas, xSlicVpdLens),
187 .xSlicMaxSlotLabels = 0, /* max slot labels */
188 .xSlicMaxLpQueues = 1, /* max LP queues */
189 .xPlicDmaLens = { 0 }, /* DMA lengths */
190 .xPlicDmaToks = { 0 }, /* DMA tokens */
191 .xSlicVpdLens = { /* VPD lengths */
192 0,0,0, /* 0 - 2 */
193 sizeof(xItExtVpdPanel), /* 3 Extended VPD */
194 sizeof(struct paca_struct), /* 4 length of Paca */
195 0, /* 5 */
196 sizeof(struct ItIplParmsReal),/* 6 length of IPL parms */
197 26992, /* 7 length of MS VPD */
198 0, /* 8 */
199 sizeof(struct ItLpNaca),/* 9 length of LP Naca */
200 0, /* 10 */
201 256, /* 11 length of Recovery Log Buf */
202 sizeof(struct SpCommArea), /* 12 length of SP Comm Area */
203 0,0,0, /* 13 - 15 */
204 sizeof(struct IoHriProcessorVpd),/* 16 length of Proc Vpd */
205 0,0,0,0,0,0, /* 17 - 22 */
206 sizeof(struct hvlpevent_queue), /* 23 length of Lp Queue */
207 0,0 /* 24 - 25 */
208 },
209 .xSlicVpdAdrs = { /* VPD addresses */
210 0,0,0, /* 0 - 2 */
211 &xItExtVpdPanel, /* 3 Extended VPD */
212 &paca[0], /* 4 first Paca */
213 0, /* 5 */
214 &xItIplParmsReal, /* 6 IPL parms */
215 &xMsVpd, /* 7 MS Vpd */
216 0, /* 8 */
217 &itLpNaca, /* 9 LpNaca */
218 0, /* 10 */
219 &xRecoveryLogBuffer, /* 11 Recovery Log Buffer */
220 &xSpCommArea, /* 12 SP Comm Area */
221 0,0,0, /* 13 - 15 */
222 &xIoHriProcessorVpd, /* 16 Proc Vpd */
223 0,0,0,0,0,0, /* 17 - 22 */
224 &hvlpevent_queue, /* 23 Lp Queue */
225 0,0
226 }
227};
diff --git a/arch/ppc64/kernel/Makefile b/arch/ppc64/kernel/Makefile
index ae60eb1193c6..5569ea7e6830 100644
--- a/arch/ppc64/kernel/Makefile
+++ b/arch/ppc64/kernel/Makefile
@@ -2,36 +2,29 @@
2# Makefile for the linux ppc64 kernel. 2# Makefile for the linux ppc64 kernel.
3# 3#
4 4
5ifneq ($(CONFIG_PPC_MERGE),y)
6
5EXTRA_CFLAGS += -mno-minimal-toc 7EXTRA_CFLAGS += -mno-minimal-toc
6extra-y := head.o vmlinux.lds 8extra-y := head.o vmlinux.lds
7 9
8obj-y := setup.o entry.o traps.o irq.o idle.o dma.o \ 10obj-y := setup.o entry.o irq.o idle.o dma.o \
9 time.o process.o signal.o syscalls.o misc.o ptrace.o \ 11 time.o signal.o syscalls.o misc.o ptrace.o \
10 align.o semaphore.o bitops.o pacaData.o \ 12 align.o bitops.o pacaData.o \
11 udbg.o binfmt_elf32.o sys_ppc32.o ioctl32.o \ 13 udbg.o binfmt_elf32.o sys_ppc32.o ioctl32.o \
12 ptrace32.o signal32.o rtc.o init_task.o \ 14 ptrace32.o signal32.o rtc.o \
13 lmb.o cputable.o cpu_setup_power4.o idle_power4.o \ 15 cputable.o cpu_setup_power4.o \
14 iommu.o sysfs.o vdso.o pmc.o firmware.o 16 iommu.o sysfs.o vdso.o pmc.o firmware.o prom.o
15obj-y += vdso32/ vdso64/ 17obj-y += vdso32/ vdso64/
16 18
17obj-$(CONFIG_PPC_OF) += of_device.o 19obj-$(CONFIG_PPC_OF) += of_device.o
18 20
19pci-obj-$(CONFIG_PPC_ISERIES) += iSeries_pci.o iSeries_irq.o \
20 iSeries_VpdInfo.o
21pci-obj-$(CONFIG_PPC_MULTIPLATFORM) += pci_dn.o pci_direct_iommu.o 21pci-obj-$(CONFIG_PPC_MULTIPLATFORM) += pci_dn.o pci_direct_iommu.o
22 22
23obj-$(CONFIG_PCI) += pci.o pci_iommu.o iomap.o $(pci-obj-y) 23obj-$(CONFIG_PCI) += pci.o pci_iommu.o iomap.o $(pci-obj-y)
24 24
25obj-$(CONFIG_PPC_ISERIES) += HvCall.o HvLpConfig.o LparData.o \ 25obj-$(CONFIG_PPC_MULTIPLATFORM) += nvram.o i8259.o prom_init.o
26 iSeries_setup.o ItLpQueue.o hvCall.o \
27 mf.o HvLpEvent.o iSeries_proc.o iSeries_htab.o \
28 iSeries_iommu.o
29
30obj-$(CONFIG_PPC_MULTIPLATFORM) += nvram.o i8259.o prom_init.o prom.o
31 26
32obj-$(CONFIG_PPC_PSERIES) += pSeries_pci.o pSeries_lpar.o pSeries_hvCall.o \ 27obj-$(CONFIG_PPC_PSERIES) += rtasd.o ras.o udbg_16550.o
33 pSeries_nvram.o rtasd.o ras.o pSeries_reconfig.o \
34 pSeries_setup.o pSeries_iommu.o udbg_16550.o
35 28
36obj-$(CONFIG_PPC_BPA) += bpa_setup.o bpa_iommu.o bpa_nvram.o \ 29obj-$(CONFIG_PPC_BPA) += bpa_setup.o bpa_iommu.o bpa_nvram.o \
37 bpa_iic.o spider-pic.o 30 bpa_iic.o spider-pic.o
@@ -45,14 +38,11 @@ obj-$(CONFIG_MODULES) += module.o ppc_ksyms.o
45obj-$(CONFIG_PPC_RTAS) += rtas.o rtas_pci.o 38obj-$(CONFIG_PPC_RTAS) += rtas.o rtas_pci.o
46obj-$(CONFIG_RTAS_PROC) += rtas-proc.o 39obj-$(CONFIG_RTAS_PROC) += rtas-proc.o
47obj-$(CONFIG_SCANLOG) += scanlog.o 40obj-$(CONFIG_SCANLOG) += scanlog.o
48obj-$(CONFIG_VIOPATH) += viopath.o
49obj-$(CONFIG_LPARCFG) += lparcfg.o 41obj-$(CONFIG_LPARCFG) += lparcfg.o
50obj-$(CONFIG_HVC_CONSOLE) += hvconsole.o 42obj-$(CONFIG_HVC_CONSOLE) += hvconsole.o
51obj-$(CONFIG_BOOTX_TEXT) += btext.o 43obj-$(CONFIG_BOOTX_TEXT) += btext.o
52obj-$(CONFIG_HVCS) += hvcserver.o 44obj-$(CONFIG_HVCS) += hvcserver.o
53 45
54vio-obj-$(CONFIG_PPC_PSERIES) += pSeries_vio.o
55vio-obj-$(CONFIG_PPC_ISERIES) += iSeries_vio.o
56obj-$(CONFIG_IBMVIO) += vio.o $(vio-obj-y) 46obj-$(CONFIG_IBMVIO) += vio.o $(vio-obj-y)
57obj-$(CONFIG_XICS) += xics.o 47obj-$(CONFIG_XICS) += xics.o
58obj-$(CONFIG_MPIC) += mpic.o 48obj-$(CONFIG_MPIC) += mpic.o
@@ -68,18 +58,69 @@ obj-$(CONFIG_U3_DART) += u3_iommu.o
68 58
69ifdef CONFIG_SMP 59ifdef CONFIG_SMP
70obj-$(CONFIG_PPC_PMAC) += pmac_smp.o smp-tbsync.o 60obj-$(CONFIG_PPC_PMAC) += pmac_smp.o smp-tbsync.o
71obj-$(CONFIG_PPC_ISERIES) += iSeries_smp.o
72obj-$(CONFIG_PPC_PSERIES) += pSeries_smp.o
73obj-$(CONFIG_PPC_BPA) += pSeries_smp.o
74obj-$(CONFIG_PPC_MAPLE) += smp-tbsync.o 61obj-$(CONFIG_PPC_MAPLE) += smp-tbsync.o
75endif 62endif
76 63
77obj-$(CONFIG_ALTIVEC) += vecemu.o vector.o
78obj-$(CONFIG_KPROBES) += kprobes.o 64obj-$(CONFIG_KPROBES) += kprobes.o
79 65
80CFLAGS_ioctl32.o += -Ifs/ 66CFLAGS_ioctl32.o += -Ifs/
81 67
82ifeq ($(CONFIG_PPC_ISERIES),y) 68ifeq ($(CONFIG_PPC_ISERIES),y)
83arch/ppc64/kernel/head.o: arch/ppc64/kernel/lparmap.s 69arch/ppc64/kernel/head.o: arch/powerpc/kernel/lparmap.s
84AFLAGS_head.o += -Iarch/ppc64/kernel 70AFLAGS_head.o += -Iarch/powerpc/kernel
71endif
72
73else
74
75# Things still needed from here by the merged ppc code
76
77obj-y := irq.o idle.o dma.o \
78 time.o signal.o syscalls.o ptrace.o \
79 align.o bitops.o pacaData.o \
80 udbg.o binfmt_elf32.o sys_ppc32.o ioctl32.o \
81 ptrace32.o signal32.o rtc.o \
82 cputable.o cpu_setup_power4.o \
83 iommu.o sysfs.o vdso.o pmc.o firmware.o
84obj-y += vdso32/ vdso64/
85
86pci-obj-$(CONFIG_PPC_MULTIPLATFORM) += pci_dn.o pci_direct_iommu.o
87
88obj-$(CONFIG_PCI) += pci.o pci_iommu.o iomap.o $(pci-obj-y)
89
90obj-$(CONFIG_PPC_MULTIPLATFORM) += nvram.o i8259.o
91
92obj-$(CONFIG_PPC_PSERIES) += rtasd.o ras.o udbg_16550.o
93
94obj-$(CONFIG_PPC_BPA) += bpa_setup.o bpa_iommu.o bpa_nvram.o \
95 bpa_iic.o spider-pic.o
96
97obj-$(CONFIG_KEXEC) += machine_kexec.o
98obj-$(CONFIG_EEH) += eeh.o
99obj-$(CONFIG_PROC_FS) += proc_ppc64.o
100obj-$(CONFIG_RTAS_FLASH) += rtas_flash.o
101obj-$(CONFIG_SMP) += smp.o
102obj-$(CONFIG_MODULES) += module.o
103obj-$(CONFIG_PPC_RTAS) += rtas.o rtas_pci.o
104obj-$(CONFIG_RTAS_PROC) += rtas-proc.o
105obj-$(CONFIG_SCANLOG) += scanlog.o
106obj-$(CONFIG_LPARCFG) += lparcfg.o
107obj-$(CONFIG_HVC_CONSOLE) += hvconsole.o
108obj-$(CONFIG_HVCS) += hvcserver.o
109
110obj-$(CONFIG_IBMVIO) += vio.o
111obj-$(CONFIG_XICS) += xics.o
112
113obj-$(CONFIG_PPC_MAPLE) += maple_setup.o maple_pci.o maple_time.o \
114 udbg_16550.o
115
116obj-$(CONFIG_U3_DART) += u3_iommu.o
117
118ifdef CONFIG_SMP
119obj-$(CONFIG_PPC_MAPLE) += smp-tbsync.o
120endif
121
122obj-$(CONFIG_KPROBES) += kprobes.o
123
124CFLAGS_ioctl32.o += -Ifs/
125
85endif 126endif
diff --git a/arch/ppc64/kernel/asm-offsets.c b/arch/ppc64/kernel/asm-offsets.c
index 1ff4fa05a973..1378fbbe1e57 100644
--- a/arch/ppc64/kernel/asm-offsets.c
+++ b/arch/ppc64/kernel/asm-offsets.c
@@ -77,6 +77,7 @@ int main(void)
77 DEFINE(ICACHEL1LOGLINESIZE, offsetof(struct ppc64_caches, log_iline_size)); 77 DEFINE(ICACHEL1LOGLINESIZE, offsetof(struct ppc64_caches, log_iline_size));
78 DEFINE(ICACHEL1LINESPERPAGE, offsetof(struct ppc64_caches, ilines_per_page)); 78 DEFINE(ICACHEL1LINESPERPAGE, offsetof(struct ppc64_caches, ilines_per_page));
79 DEFINE(PLATFORM, offsetof(struct systemcfg, platform)); 79 DEFINE(PLATFORM, offsetof(struct systemcfg, platform));
80 DEFINE(PLATFORM_LPAR, PLATFORM_LPAR);
80 81
81 /* paca */ 82 /* paca */
82 DEFINE(PACA_SIZE, sizeof(struct paca_struct)); 83 DEFINE(PACA_SIZE, sizeof(struct paca_struct));
diff --git a/arch/ppc64/kernel/bpa_iommu.c b/arch/ppc64/kernel/bpa_iommu.c
index 5f2460090e03..da1b4b7a3269 100644
--- a/arch/ppc64/kernel/bpa_iommu.c
+++ b/arch/ppc64/kernel/bpa_iommu.c
@@ -39,8 +39,8 @@
39#include <asm/pmac_feature.h> 39#include <asm/pmac_feature.h>
40#include <asm/abs_addr.h> 40#include <asm/abs_addr.h>
41#include <asm/system.h> 41#include <asm/system.h>
42#include <asm/ppc-pci.h>
42 43
43#include "pci.h"
44#include "bpa_iommu.h" 44#include "bpa_iommu.h"
45 45
46static inline unsigned long 46static inline unsigned long
diff --git a/arch/ppc64/kernel/bpa_setup.c b/arch/ppc64/kernel/bpa_setup.c
index 57b3db66f458..017cf23e91fa 100644
--- a/arch/ppc64/kernel/bpa_setup.c
+++ b/arch/ppc64/kernel/bpa_setup.c
@@ -43,8 +43,9 @@
43#include <asm/time.h> 43#include <asm/time.h>
44#include <asm/nvram.h> 44#include <asm/nvram.h>
45#include <asm/cputable.h> 45#include <asm/cputable.h>
46#include <asm/ppc-pci.h>
47#include <asm/irq.h>
46 48
47#include "pci.h"
48#include "bpa_iic.h" 49#include "bpa_iic.h"
49#include "bpa_iommu.h" 50#include "bpa_iommu.h"
50 51
diff --git a/arch/ppc64/kernel/cputable.c b/arch/ppc64/kernel/cputable.c
index 8831a28c3c4e..5134c53d536d 100644
--- a/arch/ppc64/kernel/cputable.c
+++ b/arch/ppc64/kernel/cputable.c
@@ -37,26 +37,13 @@ extern void __setup_cpu_power4(unsigned long offset, struct cpu_spec* spec);
37extern void __setup_cpu_ppc970(unsigned long offset, struct cpu_spec* spec); 37extern void __setup_cpu_ppc970(unsigned long offset, struct cpu_spec* spec);
38extern void __setup_cpu_be(unsigned long offset, struct cpu_spec* spec); 38extern void __setup_cpu_be(unsigned long offset, struct cpu_spec* spec);
39 39
40
41/* We only set the altivec features if the kernel was compiled with altivec
42 * support
43 */
44#ifdef CONFIG_ALTIVEC
45#define CPU_FTR_ALTIVEC_COMP CPU_FTR_ALTIVEC
46#define PPC_FEATURE_HAS_ALTIVEC_COMP PPC_FEATURE_HAS_ALTIVEC
47#else
48#define CPU_FTR_ALTIVEC_COMP 0
49#define PPC_FEATURE_HAS_ALTIVEC_COMP 0
50#endif
51
52struct cpu_spec cpu_specs[] = { 40struct cpu_spec cpu_specs[] = {
53 { /* Power3 */ 41 { /* Power3 */
54 .pvr_mask = 0xffff0000, 42 .pvr_mask = 0xffff0000,
55 .pvr_value = 0x00400000, 43 .pvr_value = 0x00400000,
56 .cpu_name = "POWER3 (630)", 44 .cpu_name = "POWER3 (630)",
57 .cpu_features = CPU_FTR_SPLIT_ID_CACHE | 45 .cpu_features = CPU_FTRS_POWER3,
58 CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | CPU_FTR_IABR, 46 .cpu_user_features = COMMON_USER_PPC64,
59 .cpu_user_features = COMMON_USER_PPC64,
60 .icache_bsize = 128, 47 .icache_bsize = 128,
61 .dcache_bsize = 128, 48 .dcache_bsize = 128,
62 .num_pmcs = 8, 49 .num_pmcs = 8,
@@ -70,8 +57,7 @@ struct cpu_spec cpu_specs[] = {
70 .pvr_mask = 0xffff0000, 57 .pvr_mask = 0xffff0000,
71 .pvr_value = 0x00410000, 58 .pvr_value = 0x00410000,
72 .cpu_name = "POWER3 (630+)", 59 .cpu_name = "POWER3 (630+)",
73 .cpu_features = CPU_FTR_SPLIT_ID_CACHE | 60 .cpu_features = CPU_FTRS_POWER3,
74 CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | CPU_FTR_IABR,
75 .cpu_user_features = COMMON_USER_PPC64, 61 .cpu_user_features = COMMON_USER_PPC64,
76 .icache_bsize = 128, 62 .icache_bsize = 128,
77 .dcache_bsize = 128, 63 .dcache_bsize = 128,
@@ -86,9 +72,7 @@ struct cpu_spec cpu_specs[] = {
86 .pvr_mask = 0xffff0000, 72 .pvr_mask = 0xffff0000,
87 .pvr_value = 0x00330000, 73 .pvr_value = 0x00330000,
88 .cpu_name = "RS64-II (northstar)", 74 .cpu_name = "RS64-II (northstar)",
89 .cpu_features = CPU_FTR_SPLIT_ID_CACHE | 75 .cpu_features = CPU_FTRS_RS64,
90 CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | CPU_FTR_IABR |
91 CPU_FTR_MMCRA | CPU_FTR_CTRL,
92 .cpu_user_features = COMMON_USER_PPC64, 76 .cpu_user_features = COMMON_USER_PPC64,
93 .icache_bsize = 128, 77 .icache_bsize = 128,
94 .dcache_bsize = 128, 78 .dcache_bsize = 128,
@@ -103,9 +87,7 @@ struct cpu_spec cpu_specs[] = {
103 .pvr_mask = 0xffff0000, 87 .pvr_mask = 0xffff0000,
104 .pvr_value = 0x00340000, 88 .pvr_value = 0x00340000,
105 .cpu_name = "RS64-III (pulsar)", 89 .cpu_name = "RS64-III (pulsar)",
106 .cpu_features = CPU_FTR_SPLIT_ID_CACHE | 90 .cpu_features = CPU_FTRS_RS64,
107 CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | CPU_FTR_IABR |
108 CPU_FTR_MMCRA | CPU_FTR_CTRL,
109 .cpu_user_features = COMMON_USER_PPC64, 91 .cpu_user_features = COMMON_USER_PPC64,
110 .icache_bsize = 128, 92 .icache_bsize = 128,
111 .dcache_bsize = 128, 93 .dcache_bsize = 128,
@@ -120,9 +102,7 @@ struct cpu_spec cpu_specs[] = {
120 .pvr_mask = 0xffff0000, 102 .pvr_mask = 0xffff0000,
121 .pvr_value = 0x00360000, 103 .pvr_value = 0x00360000,
122 .cpu_name = "RS64-III (icestar)", 104 .cpu_name = "RS64-III (icestar)",
123 .cpu_features = CPU_FTR_SPLIT_ID_CACHE | 105 .cpu_features = CPU_FTRS_RS64,
124 CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | CPU_FTR_IABR |
125 CPU_FTR_MMCRA | CPU_FTR_CTRL,
126 .cpu_user_features = COMMON_USER_PPC64, 106 .cpu_user_features = COMMON_USER_PPC64,
127 .icache_bsize = 128, 107 .icache_bsize = 128,
128 .dcache_bsize = 128, 108 .dcache_bsize = 128,
@@ -137,9 +117,7 @@ struct cpu_spec cpu_specs[] = {
137 .pvr_mask = 0xffff0000, 117 .pvr_mask = 0xffff0000,
138 .pvr_value = 0x00370000, 118 .pvr_value = 0x00370000,
139 .cpu_name = "RS64-IV (sstar)", 119 .cpu_name = "RS64-IV (sstar)",
140 .cpu_features = CPU_FTR_SPLIT_ID_CACHE | 120 .cpu_features = CPU_FTRS_RS64,
141 CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | CPU_FTR_IABR |
142 CPU_FTR_MMCRA | CPU_FTR_CTRL,
143 .cpu_user_features = COMMON_USER_PPC64, 121 .cpu_user_features = COMMON_USER_PPC64,
144 .icache_bsize = 128, 122 .icache_bsize = 128,
145 .dcache_bsize = 128, 123 .dcache_bsize = 128,
@@ -154,9 +132,7 @@ struct cpu_spec cpu_specs[] = {
154 .pvr_mask = 0xffff0000, 132 .pvr_mask = 0xffff0000,
155 .pvr_value = 0x00350000, 133 .pvr_value = 0x00350000,
156 .cpu_name = "POWER4 (gp)", 134 .cpu_name = "POWER4 (gp)",
157 .cpu_features = CPU_FTR_SPLIT_ID_CACHE | 135 .cpu_features = CPU_FTRS_POWER4,
158 CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE |
159 CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_MMCRA,
160 .cpu_user_features = COMMON_USER_PPC64, 136 .cpu_user_features = COMMON_USER_PPC64,
161 .icache_bsize = 128, 137 .icache_bsize = 128,
162 .dcache_bsize = 128, 138 .dcache_bsize = 128,
@@ -171,9 +147,7 @@ struct cpu_spec cpu_specs[] = {
171 .pvr_mask = 0xffff0000, 147 .pvr_mask = 0xffff0000,
172 .pvr_value = 0x00380000, 148 .pvr_value = 0x00380000,
173 .cpu_name = "POWER4+ (gq)", 149 .cpu_name = "POWER4+ (gq)",
174 .cpu_features = CPU_FTR_SPLIT_ID_CACHE | 150 .cpu_features = CPU_FTRS_POWER4,
175 CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE |
176 CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_MMCRA,
177 .cpu_user_features = COMMON_USER_PPC64, 151 .cpu_user_features = COMMON_USER_PPC64,
178 .icache_bsize = 128, 152 .icache_bsize = 128,
179 .dcache_bsize = 128, 153 .dcache_bsize = 128,
@@ -188,10 +162,7 @@ struct cpu_spec cpu_specs[] = {
188 .pvr_mask = 0xffff0000, 162 .pvr_mask = 0xffff0000,
189 .pvr_value = 0x00390000, 163 .pvr_value = 0x00390000,
190 .cpu_name = "PPC970", 164 .cpu_name = "PPC970",
191 .cpu_features = CPU_FTR_SPLIT_ID_CACHE | 165 .cpu_features = CPU_FTRS_PPC970,
192 CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE |
193 CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_ALTIVEC_COMP |
194 CPU_FTR_CAN_NAP | CPU_FTR_MMCRA,
195 .cpu_user_features = COMMON_USER_PPC64 | 166 .cpu_user_features = COMMON_USER_PPC64 |
196 PPC_FEATURE_HAS_ALTIVEC_COMP, 167 PPC_FEATURE_HAS_ALTIVEC_COMP,
197 .icache_bsize = 128, 168 .icache_bsize = 128,
@@ -207,10 +178,7 @@ struct cpu_spec cpu_specs[] = {
207 .pvr_mask = 0xffff0000, 178 .pvr_mask = 0xffff0000,
208 .pvr_value = 0x003c0000, 179 .pvr_value = 0x003c0000,
209 .cpu_name = "PPC970FX", 180 .cpu_name = "PPC970FX",
210 .cpu_features = CPU_FTR_SPLIT_ID_CACHE | 181 .cpu_features = CPU_FTRS_PPC970,
211 CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE |
212 CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_ALTIVEC_COMP |
213 CPU_FTR_CAN_NAP | CPU_FTR_MMCRA,
214 .cpu_user_features = COMMON_USER_PPC64 | 182 .cpu_user_features = COMMON_USER_PPC64 |
215 PPC_FEATURE_HAS_ALTIVEC_COMP, 183 PPC_FEATURE_HAS_ALTIVEC_COMP,
216 .icache_bsize = 128, 184 .icache_bsize = 128,
@@ -226,10 +194,7 @@ struct cpu_spec cpu_specs[] = {
226 .pvr_mask = 0xffff0000, 194 .pvr_mask = 0xffff0000,
227 .pvr_value = 0x00440000, 195 .pvr_value = 0x00440000,
228 .cpu_name = "PPC970MP", 196 .cpu_name = "PPC970MP",
229 .cpu_features = CPU_FTR_SPLIT_ID_CACHE | 197 .cpu_features = CPU_FTRS_PPC970,
230 CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE |
231 CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_ALTIVEC_COMP |
232 CPU_FTR_CAN_NAP | CPU_FTR_MMCRA,
233 .cpu_user_features = COMMON_USER_PPC64 | 198 .cpu_user_features = COMMON_USER_PPC64 |
234 PPC_FEATURE_HAS_ALTIVEC_COMP, 199 PPC_FEATURE_HAS_ALTIVEC_COMP,
235 .icache_bsize = 128, 200 .icache_bsize = 128,
@@ -244,11 +209,7 @@ struct cpu_spec cpu_specs[] = {
244 .pvr_mask = 0xffff0000, 209 .pvr_mask = 0xffff0000,
245 .pvr_value = 0x003a0000, 210 .pvr_value = 0x003a0000,
246 .cpu_name = "POWER5 (gr)", 211 .cpu_name = "POWER5 (gr)",
247 .cpu_features = CPU_FTR_SPLIT_ID_CACHE | 212 .cpu_features = CPU_FTRS_POWER5,
248 CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE |
249 CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_MMCRA | CPU_FTR_SMT |
250 CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE |
251 CPU_FTR_MMCRA_SIHV,
252 .cpu_user_features = COMMON_USER_PPC64, 213 .cpu_user_features = COMMON_USER_PPC64,
253 .icache_bsize = 128, 214 .icache_bsize = 128,
254 .dcache_bsize = 128, 215 .dcache_bsize = 128,
@@ -263,11 +224,7 @@ struct cpu_spec cpu_specs[] = {
263 .pvr_mask = 0xffff0000, 224 .pvr_mask = 0xffff0000,
264 .pvr_value = 0x003b0000, 225 .pvr_value = 0x003b0000,
265 .cpu_name = "POWER5 (gs)", 226 .cpu_name = "POWER5 (gs)",
266 .cpu_features = CPU_FTR_SPLIT_ID_CACHE | 227 .cpu_features = CPU_FTRS_POWER5,
267 CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE |
268 CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_MMCRA | CPU_FTR_SMT |
269 CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE |
270 CPU_FTR_MMCRA_SIHV,
271 .cpu_user_features = COMMON_USER_PPC64, 228 .cpu_user_features = COMMON_USER_PPC64,
272 .icache_bsize = 128, 229 .icache_bsize = 128,
273 .dcache_bsize = 128, 230 .dcache_bsize = 128,
@@ -281,11 +238,8 @@ struct cpu_spec cpu_specs[] = {
281 { /* BE DD1.x */ 238 { /* BE DD1.x */
282 .pvr_mask = 0xffff0000, 239 .pvr_mask = 0xffff0000,
283 .pvr_value = 0x00700000, 240 .pvr_value = 0x00700000,
284 .cpu_name = "Broadband Engine", 241 .cpu_name = "Cell Broadband Engine",
285 .cpu_features = CPU_FTR_SPLIT_ID_CACHE | 242 .cpu_features = CPU_FTRS_CELL,
286 CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE |
287 CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_ALTIVEC_COMP |
288 CPU_FTR_SMT,
289 .cpu_user_features = COMMON_USER_PPC64 | 243 .cpu_user_features = COMMON_USER_PPC64 |
290 PPC_FEATURE_HAS_ALTIVEC_COMP, 244 PPC_FEATURE_HAS_ALTIVEC_COMP,
291 .icache_bsize = 128, 245 .icache_bsize = 128,
@@ -296,9 +250,7 @@ struct cpu_spec cpu_specs[] = {
296 .pvr_mask = 0x00000000, 250 .pvr_mask = 0x00000000,
297 .pvr_value = 0x00000000, 251 .pvr_value = 0x00000000,
298 .cpu_name = "POWER4 (compatible)", 252 .cpu_name = "POWER4 (compatible)",
299 .cpu_features = CPU_FTR_SPLIT_ID_CACHE | 253 .cpu_features = CPU_FTRS_COMPATIBLE,
300 CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE |
301 CPU_FTR_PPCAS_ARCH_V2,
302 .cpu_user_features = COMMON_USER_PPC64, 254 .cpu_user_features = COMMON_USER_PPC64,
303 .icache_bsize = 128, 255 .icache_bsize = 128,
304 .dcache_bsize = 128, 256 .dcache_bsize = 128,
diff --git a/arch/ppc64/kernel/eeh.c b/arch/ppc64/kernel/eeh.c
index ba93fd731222..035d1b14a207 100644
--- a/arch/ppc64/kernel/eeh.c
+++ b/arch/ppc64/kernel/eeh.c
@@ -33,7 +33,7 @@
33#include <asm/rtas.h> 33#include <asm/rtas.h>
34#include <asm/atomic.h> 34#include <asm/atomic.h>
35#include <asm/systemcfg.h> 35#include <asm/systemcfg.h>
36#include "pci.h" 36#include <asm/ppc-pci.h>
37 37
38#undef DEBUG 38#undef DEBUG
39 39
diff --git a/arch/ppc64/kernel/entry.S b/arch/ppc64/kernel/entry.S
index e8c0bbf4d000..ea30af810e0c 100644
--- a/arch/ppc64/kernel/entry.S
+++ b/arch/ppc64/kernel/entry.S
@@ -191,8 +191,8 @@ syscall_exit_trace_cont:
191 ld r1,GPR1(r1) 191 ld r1,GPR1(r1)
192 mtlr r4 192 mtlr r4
193 mtcr r5 193 mtcr r5
194 mtspr SRR0,r7 194 mtspr SPRN_SRR0,r7
195 mtspr SRR1,r8 195 mtspr SPRN_SRR1,r8
196 rfid 196 rfid
197 b . /* prevent speculative execution */ 197 b . /* prevent speculative execution */
198 198
@@ -531,7 +531,7 @@ restore:
531 mtctr r3 531 mtctr r3
532 mtlr r0 532 mtlr r0
533 ld r3,_XER(r1) 533 ld r3,_XER(r1)
534 mtspr XER,r3 534 mtspr SPRN_XER,r3
535 535
536 REST_8GPRS(5, r1) 536 REST_8GPRS(5, r1)
537 537
@@ -543,12 +543,12 @@ restore:
543 mtmsrd r0,1 543 mtmsrd r0,1
544 544
545 ld r0,_MSR(r1) 545 ld r0,_MSR(r1)
546 mtspr SRR1,r0 546 mtspr SPRN_SRR1,r0
547 547
548 ld r2,_CCR(r1) 548 ld r2,_CCR(r1)
549 mtcrf 0xFF,r2 549 mtcrf 0xFF,r2
550 ld r2,_NIP(r1) 550 ld r2,_NIP(r1)
551 mtspr SRR0,r2 551 mtspr SPRN_SRR0,r2
552 552
553 ld r0,GPR0(r1) 553 ld r0,GPR0(r1)
554 ld r2,GPR2(r1) 554 ld r2,GPR2(r1)
@@ -643,7 +643,7 @@ _GLOBAL(enter_rtas)
643 std r4,_CCR(r1) 643 std r4,_CCR(r1)
644 mfctr r5 644 mfctr r5
645 std r5,_CTR(r1) 645 std r5,_CTR(r1)
646 mfspr r6,XER 646 mfspr r6,SPRN_XER
647 std r6,_XER(r1) 647 std r6,_XER(r1)
648 mfdar r7 648 mfdar r7
649 std r7,_DAR(r1) 649 std r7,_DAR(r1)
@@ -697,14 +697,14 @@ _GLOBAL(enter_rtas)
697 ld r5,RTASENTRY(r4) /* get the rtas->entry value */ 697 ld r5,RTASENTRY(r4) /* get the rtas->entry value */
698 ld r4,RTASBASE(r4) /* get the rtas->base value */ 698 ld r4,RTASBASE(r4) /* get the rtas->base value */
699 699
700 mtspr SRR0,r5 700 mtspr SPRN_SRR0,r5
701 mtspr SRR1,r6 701 mtspr SPRN_SRR1,r6
702 rfid 702 rfid
703 b . /* prevent speculative execution */ 703 b . /* prevent speculative execution */
704 704
705_STATIC(rtas_return_loc) 705_STATIC(rtas_return_loc)
706 /* relocation is off at this point */ 706 /* relocation is off at this point */
707 mfspr r4,SPRG3 /* Get PACA */ 707 mfspr r4,SPRN_SPRG3 /* Get PACA */
708 SET_REG_TO_CONST(r5, KERNELBASE) 708 SET_REG_TO_CONST(r5, KERNELBASE)
709 sub r4,r4,r5 /* RELOC the PACA base pointer */ 709 sub r4,r4,r5 /* RELOC the PACA base pointer */
710 710
@@ -718,8 +718,8 @@ _STATIC(rtas_return_loc)
718 LOADADDR(r3,.rtas_restore_regs) 718 LOADADDR(r3,.rtas_restore_regs)
719 ld r4,PACASAVEDMSR(r4) /* Restore our MSR */ 719 ld r4,PACASAVEDMSR(r4) /* Restore our MSR */
720 720
721 mtspr SRR0,r3 721 mtspr SPRN_SRR0,r3
722 mtspr SRR1,r4 722 mtspr SPRN_SRR1,r4
723 rfid 723 rfid
724 b . /* prevent speculative execution */ 724 b . /* prevent speculative execution */
725 725
@@ -730,14 +730,14 @@ _STATIC(rtas_restore_regs)
730 REST_8GPRS(14, r1) /* Restore the non-volatiles */ 730 REST_8GPRS(14, r1) /* Restore the non-volatiles */
731 REST_10GPRS(22, r1) /* ditto */ 731 REST_10GPRS(22, r1) /* ditto */
732 732
733 mfspr r13,SPRG3 733 mfspr r13,SPRN_SPRG3
734 734
735 ld r4,_CCR(r1) 735 ld r4,_CCR(r1)
736 mtcr r4 736 mtcr r4
737 ld r5,_CTR(r1) 737 ld r5,_CTR(r1)
738 mtctr r5 738 mtctr r5
739 ld r6,_XER(r1) 739 ld r6,_XER(r1)
740 mtspr XER,r6 740 mtspr SPRN_XER,r6
741 ld r7,_DAR(r1) 741 ld r7,_DAR(r1)
742 mtdar r7 742 mtdar r7
743 ld r8,_DSISR(r1) 743 ld r8,_DSISR(r1)
@@ -774,7 +774,7 @@ _GLOBAL(enter_prom)
774 std r4,_CCR(r1) 774 std r4,_CCR(r1)
775 mfctr r5 775 mfctr r5
776 std r5,_CTR(r1) 776 std r5,_CTR(r1)
777 mfspr r6,XER 777 mfspr r6,SPRN_XER
778 std r6,_XER(r1) 778 std r6,_XER(r1)
779 mfdar r7 779 mfdar r7
780 std r7,_DAR(r1) 780 std r7,_DAR(r1)
@@ -827,7 +827,7 @@ _GLOBAL(enter_prom)
827 ld r5,_CTR(r1) 827 ld r5,_CTR(r1)
828 mtctr r5 828 mtctr r5
829 ld r6,_XER(r1) 829 ld r6,_XER(r1)
830 mtspr XER,r6 830 mtspr SPRN_XER,r6
831 ld r7,_DAR(r1) 831 ld r7,_DAR(r1)
832 mtdar r7 832 mtdar r7
833 ld r8,_DSISR(r1) 833 ld r8,_DSISR(r1)
diff --git a/arch/ppc64/kernel/head.S b/arch/ppc64/kernel/head.S
index 72c61041151a..d5e6be200764 100644
--- a/arch/ppc64/kernel/head.S
+++ b/arch/ppc64/kernel/head.S
@@ -201,22 +201,22 @@ exception_marker:
201#define EX_CCR 60 201#define EX_CCR 60
202 202
203#define EXCEPTION_PROLOG_PSERIES(area, label) \ 203#define EXCEPTION_PROLOG_PSERIES(area, label) \
204 mfspr r13,SPRG3; /* get paca address into r13 */ \ 204 mfspr r13,SPRN_SPRG3; /* get paca address into r13 */ \
205 std r9,area+EX_R9(r13); /* save r9 - r12 */ \ 205 std r9,area+EX_R9(r13); /* save r9 - r12 */ \
206 std r10,area+EX_R10(r13); \ 206 std r10,area+EX_R10(r13); \
207 std r11,area+EX_R11(r13); \ 207 std r11,area+EX_R11(r13); \
208 std r12,area+EX_R12(r13); \ 208 std r12,area+EX_R12(r13); \
209 mfspr r9,SPRG1; \ 209 mfspr r9,SPRN_SPRG1; \
210 std r9,area+EX_R13(r13); \ 210 std r9,area+EX_R13(r13); \
211 mfcr r9; \ 211 mfcr r9; \
212 clrrdi r12,r13,32; /* get high part of &label */ \ 212 clrrdi r12,r13,32; /* get high part of &label */ \
213 mfmsr r10; \ 213 mfmsr r10; \
214 mfspr r11,SRR0; /* save SRR0 */ \ 214 mfspr r11,SPRN_SRR0; /* save SRR0 */ \
215 ori r12,r12,(label)@l; /* virt addr of handler */ \ 215 ori r12,r12,(label)@l; /* virt addr of handler */ \
216 ori r10,r10,MSR_IR|MSR_DR|MSR_RI; \ 216 ori r10,r10,MSR_IR|MSR_DR|MSR_RI; \
217 mtspr SRR0,r12; \ 217 mtspr SPRN_SRR0,r12; \
218 mfspr r12,SRR1; /* and SRR1 */ \ 218 mfspr r12,SPRN_SRR1; /* and SRR1 */ \
219 mtspr SRR1,r10; \ 219 mtspr SPRN_SRR1,r10; \
220 rfid; \ 220 rfid; \
221 b . /* prevent speculative execution */ 221 b . /* prevent speculative execution */
222 222
@@ -225,12 +225,12 @@ exception_marker:
225 * This code runs with relocation on. 225 * This code runs with relocation on.
226 */ 226 */
227#define EXCEPTION_PROLOG_ISERIES_1(area) \ 227#define EXCEPTION_PROLOG_ISERIES_1(area) \
228 mfspr r13,SPRG3; /* get paca address into r13 */ \ 228 mfspr r13,SPRN_SPRG3; /* get paca address into r13 */ \
229 std r9,area+EX_R9(r13); /* save r9 - r12 */ \ 229 std r9,area+EX_R9(r13); /* save r9 - r12 */ \
230 std r10,area+EX_R10(r13); \ 230 std r10,area+EX_R10(r13); \
231 std r11,area+EX_R11(r13); \ 231 std r11,area+EX_R11(r13); \
232 std r12,area+EX_R12(r13); \ 232 std r12,area+EX_R12(r13); \
233 mfspr r9,SPRG1; \ 233 mfspr r9,SPRN_SPRG1; \
234 std r9,area+EX_R13(r13); \ 234 std r9,area+EX_R13(r13); \
235 mfcr r9 235 mfcr r9
236 236
@@ -283,7 +283,7 @@ exception_marker:
283 std r9,_LINK(r1); \ 283 std r9,_LINK(r1); \
284 mfctr r10; /* save CTR in stackframe */ \ 284 mfctr r10; /* save CTR in stackframe */ \
285 std r10,_CTR(r1); \ 285 std r10,_CTR(r1); \
286 mfspr r11,XER; /* save XER in stackframe */ \ 286 mfspr r11,SPRN_XER; /* save XER in stackframe */ \
287 std r11,_XER(r1); \ 287 std r11,_XER(r1); \
288 li r9,(n)+1; \ 288 li r9,(n)+1; \
289 std r9,_TRAP(r1); /* set trap number */ \ 289 std r9,_TRAP(r1); /* set trap number */ \
@@ -300,7 +300,7 @@ exception_marker:
300 .globl label##_pSeries; \ 300 .globl label##_pSeries; \
301label##_pSeries: \ 301label##_pSeries: \
302 HMT_MEDIUM; \ 302 HMT_MEDIUM; \
303 mtspr SPRG1,r13; /* save r13 */ \ 303 mtspr SPRN_SPRG1,r13; /* save r13 */ \
304 RUNLATCH_ON(r13); \ 304 RUNLATCH_ON(r13); \
305 EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common) 305 EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common)
306 306
@@ -308,7 +308,7 @@ label##_pSeries: \
308 .globl label##_iSeries; \ 308 .globl label##_iSeries; \
309label##_iSeries: \ 309label##_iSeries: \
310 HMT_MEDIUM; \ 310 HMT_MEDIUM; \
311 mtspr SPRG1,r13; /* save r13 */ \ 311 mtspr SPRN_SPRG1,r13; /* save r13 */ \
312 RUNLATCH_ON(r13); \ 312 RUNLATCH_ON(r13); \
313 EXCEPTION_PROLOG_ISERIES_1(area); \ 313 EXCEPTION_PROLOG_ISERIES_1(area); \
314 EXCEPTION_PROLOG_ISERIES_2; \ 314 EXCEPTION_PROLOG_ISERIES_2; \
@@ -318,7 +318,7 @@ label##_iSeries: \
318 .globl label##_iSeries; \ 318 .globl label##_iSeries; \
319label##_iSeries: \ 319label##_iSeries: \
320 HMT_MEDIUM; \ 320 HMT_MEDIUM; \
321 mtspr SPRG1,r13; /* save r13 */ \ 321 mtspr SPRN_SPRG1,r13; /* save r13 */ \
322 RUNLATCH_ON(r13); \ 322 RUNLATCH_ON(r13); \
323 EXCEPTION_PROLOG_ISERIES_1(PACA_EXGEN); \ 323 EXCEPTION_PROLOG_ISERIES_1(PACA_EXGEN); \
324 lbz r10,PACAPROCENABLED(r13); \ 324 lbz r10,PACAPROCENABLED(r13); \
@@ -388,7 +388,7 @@ __start_interrupts:
388 . = 0x200 388 . = 0x200
389_machine_check_pSeries: 389_machine_check_pSeries:
390 HMT_MEDIUM 390 HMT_MEDIUM
391 mtspr SPRG1,r13 /* save r13 */ 391 mtspr SPRN_SPRG1,r13 /* save r13 */
392 RUNLATCH_ON(r13) 392 RUNLATCH_ON(r13)
393 EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common) 393 EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common)
394 394
@@ -396,18 +396,18 @@ _machine_check_pSeries:
396 .globl data_access_pSeries 396 .globl data_access_pSeries
397data_access_pSeries: 397data_access_pSeries:
398 HMT_MEDIUM 398 HMT_MEDIUM
399 mtspr SPRG1,r13 399 mtspr SPRN_SPRG1,r13
400BEGIN_FTR_SECTION 400BEGIN_FTR_SECTION
401 mtspr SPRG2,r12 401 mtspr SPRN_SPRG2,r12
402 mfspr r13,DAR 402 mfspr r13,SPRN_DAR
403 mfspr r12,DSISR 403 mfspr r12,SPRN_DSISR
404 srdi r13,r13,60 404 srdi r13,r13,60
405 rlwimi r13,r12,16,0x20 405 rlwimi r13,r12,16,0x20
406 mfcr r12 406 mfcr r12
407 cmpwi r13,0x2c 407 cmpwi r13,0x2c
408 beq .do_stab_bolted_pSeries 408 beq .do_stab_bolted_pSeries
409 mtcrf 0x80,r12 409 mtcrf 0x80,r12
410 mfspr r12,SPRG2 410 mfspr r12,SPRN_SPRG2
411END_FTR_SECTION_IFCLR(CPU_FTR_SLB) 411END_FTR_SECTION_IFCLR(CPU_FTR_SLB)
412 EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, data_access_common) 412 EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, data_access_common)
413 413
@@ -415,19 +415,19 @@ END_FTR_SECTION_IFCLR(CPU_FTR_SLB)
415 .globl data_access_slb_pSeries 415 .globl data_access_slb_pSeries
416data_access_slb_pSeries: 416data_access_slb_pSeries:
417 HMT_MEDIUM 417 HMT_MEDIUM
418 mtspr SPRG1,r13 418 mtspr SPRN_SPRG1,r13
419 RUNLATCH_ON(r13) 419 RUNLATCH_ON(r13)
420 mfspr r13,SPRG3 /* get paca address into r13 */ 420 mfspr r13,SPRN_SPRG3 /* get paca address into r13 */
421 std r9,PACA_EXSLB+EX_R9(r13) /* save r9 - r12 */ 421 std r9,PACA_EXSLB+EX_R9(r13) /* save r9 - r12 */
422 std r10,PACA_EXSLB+EX_R10(r13) 422 std r10,PACA_EXSLB+EX_R10(r13)
423 std r11,PACA_EXSLB+EX_R11(r13) 423 std r11,PACA_EXSLB+EX_R11(r13)
424 std r12,PACA_EXSLB+EX_R12(r13) 424 std r12,PACA_EXSLB+EX_R12(r13)
425 std r3,PACA_EXSLB+EX_R3(r13) 425 std r3,PACA_EXSLB+EX_R3(r13)
426 mfspr r9,SPRG1 426 mfspr r9,SPRN_SPRG1
427 std r9,PACA_EXSLB+EX_R13(r13) 427 std r9,PACA_EXSLB+EX_R13(r13)
428 mfcr r9 428 mfcr r9
429 mfspr r12,SRR1 /* and SRR1 */ 429 mfspr r12,SPRN_SRR1 /* and SRR1 */
430 mfspr r3,DAR 430 mfspr r3,SPRN_DAR
431 b .do_slb_miss /* Rel. branch works in real mode */ 431 b .do_slb_miss /* Rel. branch works in real mode */
432 432
433 STD_EXCEPTION_PSERIES(0x400, instruction_access) 433 STD_EXCEPTION_PSERIES(0x400, instruction_access)
@@ -436,19 +436,19 @@ data_access_slb_pSeries:
436 .globl instruction_access_slb_pSeries 436 .globl instruction_access_slb_pSeries
437instruction_access_slb_pSeries: 437instruction_access_slb_pSeries:
438 HMT_MEDIUM 438 HMT_MEDIUM
439 mtspr SPRG1,r13 439 mtspr SPRN_SPRG1,r13
440 RUNLATCH_ON(r13) 440 RUNLATCH_ON(r13)
441 mfspr r13,SPRG3 /* get paca address into r13 */ 441 mfspr r13,SPRN_SPRG3 /* get paca address into r13 */
442 std r9,PACA_EXSLB+EX_R9(r13) /* save r9 - r12 */ 442 std r9,PACA_EXSLB+EX_R9(r13) /* save r9 - r12 */
443 std r10,PACA_EXSLB+EX_R10(r13) 443 std r10,PACA_EXSLB+EX_R10(r13)
444 std r11,PACA_EXSLB+EX_R11(r13) 444 std r11,PACA_EXSLB+EX_R11(r13)
445 std r12,PACA_EXSLB+EX_R12(r13) 445 std r12,PACA_EXSLB+EX_R12(r13)
446 std r3,PACA_EXSLB+EX_R3(r13) 446 std r3,PACA_EXSLB+EX_R3(r13)
447 mfspr r9,SPRG1 447 mfspr r9,SPRN_SPRG1
448 std r9,PACA_EXSLB+EX_R13(r13) 448 std r9,PACA_EXSLB+EX_R13(r13)
449 mfcr r9 449 mfcr r9
450 mfspr r12,SRR1 /* and SRR1 */ 450 mfspr r12,SPRN_SRR1 /* and SRR1 */
451 mfspr r3,SRR0 /* SRR0 is faulting address */ 451 mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */
452 b .do_slb_miss /* Rel. branch works in real mode */ 452 b .do_slb_miss /* Rel. branch works in real mode */
453 453
454 STD_EXCEPTION_PSERIES(0x500, hardware_interrupt) 454 STD_EXCEPTION_PSERIES(0x500, hardware_interrupt)
@@ -466,15 +466,15 @@ system_call_pSeries:
466 RUNLATCH_ON(r9) 466 RUNLATCH_ON(r9)
467 mr r9,r13 467 mr r9,r13
468 mfmsr r10 468 mfmsr r10
469 mfspr r13,SPRG3 469 mfspr r13,SPRN_SPRG3
470 mfspr r11,SRR0 470 mfspr r11,SPRN_SRR0
471 clrrdi r12,r13,32 471 clrrdi r12,r13,32
472 oris r12,r12,system_call_common@h 472 oris r12,r12,system_call_common@h
473 ori r12,r12,system_call_common@l 473 ori r12,r12,system_call_common@l
474 mtspr SRR0,r12 474 mtspr SPRN_SRR0,r12
475 ori r10,r10,MSR_IR|MSR_DR|MSR_RI 475 ori r10,r10,MSR_IR|MSR_DR|MSR_RI
476 mfspr r12,SRR1 476 mfspr r12,SPRN_SRR1
477 mtspr SRR1,r10 477 mtspr SPRN_SRR1,r10
478 rfid 478 rfid
479 b . /* prevent speculative execution */ 479 b . /* prevent speculative execution */
480 480
@@ -504,25 +504,25 @@ system_call_pSeries:
504 .align 7 504 .align 7
505_GLOBAL(do_stab_bolted_pSeries) 505_GLOBAL(do_stab_bolted_pSeries)
506 mtcrf 0x80,r12 506 mtcrf 0x80,r12
507 mfspr r12,SPRG2 507 mfspr r12,SPRN_SPRG2
508 EXCEPTION_PROLOG_PSERIES(PACA_EXSLB, .do_stab_bolted) 508 EXCEPTION_PROLOG_PSERIES(PACA_EXSLB, .do_stab_bolted)
509 509
510/* 510/*
511 * Vectors for the FWNMI option. Share common code. 511 * Vectors for the FWNMI option. Share common code.
512 */ 512 */
513 .globl system_reset_fwnmi 513 .globl system_reset_fwnmi
514system_reset_fwnmi: 514system_reset_fwnmi:
515 HMT_MEDIUM 515 HMT_MEDIUM
516 mtspr SPRG1,r13 /* save r13 */ 516 mtspr SPRN_SPRG1,r13 /* save r13 */
517 RUNLATCH_ON(r13) 517 RUNLATCH_ON(r13)
518 EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common) 518 EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common)
519 519
520 .globl machine_check_fwnmi 520 .globl machine_check_fwnmi
521machine_check_fwnmi: 521machine_check_fwnmi:
522 HMT_MEDIUM 522 HMT_MEDIUM
523 mtspr SPRG1,r13 /* save r13 */ 523 mtspr SPRN_SPRG1,r13 /* save r13 */
524 RUNLATCH_ON(r13) 524 RUNLATCH_ON(r13)
525 EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common) 525 EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common)
526 526
527#ifdef CONFIG_PPC_ISERIES 527#ifdef CONFIG_PPC_ISERIES
528/*** ISeries-LPAR interrupt handlers ***/ 528/*** ISeries-LPAR interrupt handlers ***/
@@ -531,18 +531,18 @@ machine_check_fwnmi:
531 531
532 .globl data_access_iSeries 532 .globl data_access_iSeries
533data_access_iSeries: 533data_access_iSeries:
534 mtspr SPRG1,r13 534 mtspr SPRN_SPRG1,r13
535BEGIN_FTR_SECTION 535BEGIN_FTR_SECTION
536 mtspr SPRG2,r12 536 mtspr SPRN_SPRG2,r12
537 mfspr r13,DAR 537 mfspr r13,SPRN_DAR
538 mfspr r12,DSISR 538 mfspr r12,SPRN_DSISR
539 srdi r13,r13,60 539 srdi r13,r13,60
540 rlwimi r13,r12,16,0x20 540 rlwimi r13,r12,16,0x20
541 mfcr r12 541 mfcr r12
542 cmpwi r13,0x2c 542 cmpwi r13,0x2c
543 beq .do_stab_bolted_iSeries 543 beq .do_stab_bolted_iSeries
544 mtcrf 0x80,r12 544 mtcrf 0x80,r12
545 mfspr r12,SPRG2 545 mfspr r12,SPRN_SPRG2
546END_FTR_SECTION_IFCLR(CPU_FTR_SLB) 546END_FTR_SECTION_IFCLR(CPU_FTR_SLB)
547 EXCEPTION_PROLOG_ISERIES_1(PACA_EXGEN) 547 EXCEPTION_PROLOG_ISERIES_1(PACA_EXGEN)
548 EXCEPTION_PROLOG_ISERIES_2 548 EXCEPTION_PROLOG_ISERIES_2
@@ -550,25 +550,25 @@ END_FTR_SECTION_IFCLR(CPU_FTR_SLB)
550 550
551.do_stab_bolted_iSeries: 551.do_stab_bolted_iSeries:
552 mtcrf 0x80,r12 552 mtcrf 0x80,r12
553 mfspr r12,SPRG2 553 mfspr r12,SPRN_SPRG2
554 EXCEPTION_PROLOG_ISERIES_1(PACA_EXSLB) 554 EXCEPTION_PROLOG_ISERIES_1(PACA_EXSLB)
555 EXCEPTION_PROLOG_ISERIES_2 555 EXCEPTION_PROLOG_ISERIES_2
556 b .do_stab_bolted 556 b .do_stab_bolted
557 557
558 .globl data_access_slb_iSeries 558 .globl data_access_slb_iSeries
559data_access_slb_iSeries: 559data_access_slb_iSeries:
560 mtspr SPRG1,r13 /* save r13 */ 560 mtspr SPRN_SPRG1,r13 /* save r13 */
561 EXCEPTION_PROLOG_ISERIES_1(PACA_EXSLB) 561 EXCEPTION_PROLOG_ISERIES_1(PACA_EXSLB)
562 std r3,PACA_EXSLB+EX_R3(r13) 562 std r3,PACA_EXSLB+EX_R3(r13)
563 ld r12,PACALPPACA+LPPACASRR1(r13) 563 ld r12,PACALPPACA+LPPACASRR1(r13)
564 mfspr r3,DAR 564 mfspr r3,SPRN_DAR
565 b .do_slb_miss 565 b .do_slb_miss
566 566
567 STD_EXCEPTION_ISERIES(0x400, instruction_access, PACA_EXGEN) 567 STD_EXCEPTION_ISERIES(0x400, instruction_access, PACA_EXGEN)
568 568
569 .globl instruction_access_slb_iSeries 569 .globl instruction_access_slb_iSeries
570instruction_access_slb_iSeries: 570instruction_access_slb_iSeries:
571 mtspr SPRG1,r13 /* save r13 */ 571 mtspr SPRN_SPRG1,r13 /* save r13 */
572 EXCEPTION_PROLOG_ISERIES_1(PACA_EXSLB) 572 EXCEPTION_PROLOG_ISERIES_1(PACA_EXSLB)
573 std r3,PACA_EXSLB+EX_R3(r13) 573 std r3,PACA_EXSLB+EX_R3(r13)
574 ld r12,PACALPPACA+LPPACASRR1(r13) 574 ld r12,PACALPPACA+LPPACASRR1(r13)
@@ -586,7 +586,7 @@ instruction_access_slb_iSeries:
586 .globl system_call_iSeries 586 .globl system_call_iSeries
587system_call_iSeries: 587system_call_iSeries:
588 mr r9,r13 588 mr r9,r13
589 mfspr r13,SPRG3 589 mfspr r13,SPRN_SPRG3
590 EXCEPTION_PROLOG_ISERIES_2 590 EXCEPTION_PROLOG_ISERIES_2
591 b system_call_common 591 b system_call_common
592 592
@@ -596,7 +596,7 @@ system_call_iSeries:
596 596
597 .globl system_reset_iSeries 597 .globl system_reset_iSeries
598system_reset_iSeries: 598system_reset_iSeries:
599 mfspr r13,SPRG3 /* Get paca address */ 599 mfspr r13,SPRN_SPRG3 /* Get paca address */
600 mfmsr r24 600 mfmsr r24
601 ori r24,r24,MSR_RI 601 ori r24,r24,MSR_RI
602 mtmsrd r24 /* RI on */ 602 mtmsrd r24 /* RI on */
@@ -639,7 +639,7 @@ iSeries_secondary_smp_loop:
639#endif /* CONFIG_SMP */ 639#endif /* CONFIG_SMP */
640 li r0,-1 /* r0=-1 indicates a Hypervisor call */ 640 li r0,-1 /* r0=-1 indicates a Hypervisor call */
641 sc /* Invoke the hypervisor via a system call */ 641 sc /* Invoke the hypervisor via a system call */
642 mfspr r13,SPRG3 /* Put r13 back ???? */ 642 mfspr r13,SPRN_SPRG3 /* Put r13 back ???? */
643 b 1b /* If SMP not configured, secondaries 643 b 1b /* If SMP not configured, secondaries
644 * loop forever */ 644 * loop forever */
645 645
@@ -656,8 +656,8 @@ hardware_interrupt_iSeries_masked:
656 mtcrf 0x80,r9 /* Restore regs */ 656 mtcrf 0x80,r9 /* Restore regs */
657 ld r11,PACALPPACA+LPPACASRR0(r13) 657 ld r11,PACALPPACA+LPPACASRR0(r13)
658 ld r12,PACALPPACA+LPPACASRR1(r13) 658 ld r12,PACALPPACA+LPPACASRR1(r13)
659 mtspr SRR0,r11 659 mtspr SPRN_SRR0,r11
660 mtspr SRR1,r12 660 mtspr SPRN_SRR1,r12
661 ld r9,PACA_EXGEN+EX_R9(r13) 661 ld r9,PACA_EXGEN+EX_R9(r13)
662 ld r10,PACA_EXGEN+EX_R10(r13) 662 ld r10,PACA_EXGEN+EX_R10(r13)
663 ld r11,PACA_EXGEN+EX_R11(r13) 663 ld r11,PACA_EXGEN+EX_R11(r13)
@@ -713,8 +713,8 @@ bad_stack:
713 std r10,GPR1(r1) 713 std r10,GPR1(r1)
714 std r11,_NIP(r1) 714 std r11,_NIP(r1)
715 std r12,_MSR(r1) 715 std r12,_MSR(r1)
716 mfspr r11,DAR 716 mfspr r11,SPRN_DAR
717 mfspr r12,DSISR 717 mfspr r12,SPRN_DSISR
718 std r11,_DAR(r1) 718 std r11,_DAR(r1)
719 std r12,_DSISR(r1) 719 std r12,_DSISR(r1)
720 mflr r10 720 mflr r10
@@ -766,8 +766,8 @@ fast_exception_return:
766 clrrdi r10,r10,2 /* clear RI (LE is 0 already) */ 766 clrrdi r10,r10,2 /* clear RI (LE is 0 already) */
767 mtmsrd r10,1 767 mtmsrd r10,1
768 768
769 mtspr SRR1,r12 769 mtspr SPRN_SRR1,r12
770 mtspr SRR0,r11 770 mtspr SPRN_SRR0,r11
771 REST_4GPRS(10, r1) 771 REST_4GPRS(10, r1)
772 ld r1,GPR1(r1) 772 ld r1,GPR1(r1)
773 rfid 773 rfid
@@ -788,9 +788,9 @@ unrecov_fer:
788 .globl data_access_common 788 .globl data_access_common
789data_access_common: 789data_access_common:
790 RUNLATCH_ON(r10) /* It wont fit in the 0x300 handler */ 790 RUNLATCH_ON(r10) /* It wont fit in the 0x300 handler */
791 mfspr r10,DAR 791 mfspr r10,SPRN_DAR
792 std r10,PACA_EXGEN+EX_DAR(r13) 792 std r10,PACA_EXGEN+EX_DAR(r13)
793 mfspr r10,DSISR 793 mfspr r10,SPRN_DSISR
794 stw r10,PACA_EXGEN+EX_DSISR(r13) 794 stw r10,PACA_EXGEN+EX_DSISR(r13)
795 EXCEPTION_PROLOG_COMMON(0x300, PACA_EXGEN) 795 EXCEPTION_PROLOG_COMMON(0x300, PACA_EXGEN)
796 ld r3,PACA_EXGEN+EX_DAR(r13) 796 ld r3,PACA_EXGEN+EX_DAR(r13)
@@ -821,9 +821,9 @@ hardware_interrupt_entry:
821 .align 7 821 .align 7
822 .globl alignment_common 822 .globl alignment_common
823alignment_common: 823alignment_common:
824 mfspr r10,DAR 824 mfspr r10,SPRN_DAR
825 std r10,PACA_EXGEN+EX_DAR(r13) 825 std r10,PACA_EXGEN+EX_DAR(r13)
826 mfspr r10,DSISR 826 mfspr r10,SPRN_DSISR
827 stw r10,PACA_EXGEN+EX_DSISR(r13) 827 stw r10,PACA_EXGEN+EX_DSISR(r13)
828 EXCEPTION_PROLOG_COMMON(0x600, PACA_EXGEN) 828 EXCEPTION_PROLOG_COMMON(0x600, PACA_EXGEN)
829 ld r3,PACA_EXGEN+EX_DAR(r13) 829 ld r3,PACA_EXGEN+EX_DAR(r13)
@@ -1120,7 +1120,7 @@ _GLOBAL(do_stab_bolted)
1120 1120
1121 /* Hash to the primary group */ 1121 /* Hash to the primary group */
1122 ld r10,PACASTABVIRT(r13) 1122 ld r10,PACASTABVIRT(r13)
1123 mfspr r11,DAR 1123 mfspr r11,SPRN_DAR
1124 srdi r11,r11,28 1124 srdi r11,r11,28
1125 rldimi r10,r11,7,52 /* r10 = first ste of the group */ 1125 rldimi r10,r11,7,52 /* r10 = first ste of the group */
1126 1126
@@ -1162,7 +1162,7 @@ _GLOBAL(do_stab_bolted)
11622: std r9,8(r10) /* Store the vsid part of the ste */ 11622: std r9,8(r10) /* Store the vsid part of the ste */
1163 eieio 1163 eieio
1164 1164
1165 mfspr r11,DAR /* Get the new esid */ 1165 mfspr r11,SPRN_DAR /* Get the new esid */
1166 clrrdi r11,r11,28 /* Permits a full 32b of ESID */ 1166 clrrdi r11,r11,28 /* Permits a full 32b of ESID */
1167 ori r11,r11,0x90 /* Turn on valid and kp */ 1167 ori r11,r11,0x90 /* Turn on valid and kp */
1168 std r11,0(r10) /* Put new entry back into the stab */ 1168 std r11,0(r10) /* Put new entry back into the stab */
@@ -1182,8 +1182,8 @@ _GLOBAL(do_stab_bolted)
1182 clrrdi r10,r10,2 1182 clrrdi r10,r10,2
1183 mtmsrd r10,1 1183 mtmsrd r10,1
1184 1184
1185 mtspr SRR0,r11 1185 mtspr SPRN_SRR0,r11
1186 mtspr SRR1,r12 1186 mtspr SPRN_SRR1,r12
1187 ld r9,PACA_EXSLB+EX_R9(r13) 1187 ld r9,PACA_EXSLB+EX_R9(r13)
1188 ld r10,PACA_EXSLB+EX_R10(r13) 1188 ld r10,PACA_EXSLB+EX_R10(r13)
1189 ld r11,PACA_EXSLB+EX_R11(r13) 1189 ld r11,PACA_EXSLB+EX_R11(r13)
@@ -1229,8 +1229,8 @@ _GLOBAL(do_slb_miss)
1229.machine pop 1229.machine pop
1230 1230
1231#ifdef CONFIG_PPC_ISERIES 1231#ifdef CONFIG_PPC_ISERIES
1232 mtspr SRR0,r11 1232 mtspr SPRN_SRR0,r11
1233 mtspr SRR1,r12 1233 mtspr SPRN_SRR1,r12
1234#endif /* CONFIG_PPC_ISERIES */ 1234#endif /* CONFIG_PPC_ISERIES */
1235 ld r9,PACA_EXSLB+EX_R9(r13) 1235 ld r9,PACA_EXSLB+EX_R9(r13)
1236 ld r10,PACA_EXSLB+EX_R10(r13) 1236 ld r10,PACA_EXSLB+EX_R10(r13)
@@ -1253,7 +1253,7 @@ unrecov_slb:
1253 * 1253 *
1254 * On iSeries, the hypervisor must fill in at least one entry before 1254 * On iSeries, the hypervisor must fill in at least one entry before
1255 * we get control (with relocate on). The address is give to the hv 1255 * we get control (with relocate on). The address is give to the hv
1256 * as a page number (see xLparMap in LparData.c), so this must be at a 1256 * as a page number (see xLparMap in lpardata.c), so this must be at a
1257 * fixed address (the linker can't compute (u64)&initial_stab >> 1257 * fixed address (the linker can't compute (u64)&initial_stab >>
1258 * PAGE_SHIFT). 1258 * PAGE_SHIFT).
1259 */ 1259 */
@@ -1316,7 +1316,7 @@ _GLOBAL(pSeries_secondary_smp_init)
1316 mr r3,r24 /* not found, copy phys to r3 */ 1316 mr r3,r24 /* not found, copy phys to r3 */
1317 b .kexec_wait /* next kernel might do better */ 1317 b .kexec_wait /* next kernel might do better */
1318 1318
13192: mtspr SPRG3,r13 /* Save vaddr of paca in SPRG3 */ 13192: mtspr SPRN_SPRG3,r13 /* Save vaddr of paca in SPRG3 */
1320 /* From now on, r24 is expected to be logical cpuid */ 1320 /* From now on, r24 is expected to be logical cpuid */
1321 mr r24,r5 1321 mr r24,r5
13223: HMT_LOW 13223: HMT_LOW
@@ -1364,6 +1364,7 @@ _STATIC(__start_initialization_iSeries)
1364 addi r2,r2,0x4000 1364 addi r2,r2,0x4000
1365 1365
1366 bl .iSeries_early_setup 1366 bl .iSeries_early_setup
1367 bl .early_setup
1367 1368
1368 /* relocation is on at this point */ 1369 /* relocation is on at this point */
1369 1370
@@ -1586,7 +1587,7 @@ _GLOBAL(pmac_secondary_start)
1586 LOADADDR(r4, paca) /* Get base vaddr of paca array */ 1587 LOADADDR(r4, paca) /* Get base vaddr of paca array */
1587 mulli r13,r24,PACA_SIZE /* Calculate vaddr of right paca */ 1588 mulli r13,r24,PACA_SIZE /* Calculate vaddr of right paca */
1588 add r13,r13,r4 /* for this processor. */ 1589 add r13,r13,r4 /* for this processor. */
1589 mtspr SPRG3,r13 /* Save vaddr of paca in SPRG3 */ 1590 mtspr SPRN_SPRG3,r13 /* Save vaddr of paca in SPRG3 */
1590 1591
1591 /* Create a temp kernel stack for use before relocation is on. */ 1592 /* Create a temp kernel stack for use before relocation is on. */
1592 ld r1,PACAEMERGSP(r13) 1593 ld r1,PACAEMERGSP(r13)
@@ -1621,7 +1622,7 @@ _GLOBAL(__secondary_start)
1621 /* Initialize the page table pointer register. */ 1622 /* Initialize the page table pointer register. */
1622 LOADADDR(r6,_SDR1) 1623 LOADADDR(r6,_SDR1)
1623 ld r6,0(r6) /* get the value of _SDR1 */ 1624 ld r6,0(r6) /* get the value of _SDR1 */
1624 mtspr SDR1,r6 /* set the htab location */ 1625 mtspr SPRN_SDR1,r6 /* set the htab location */
1625#endif 1626#endif
1626 /* Initialize the first segment table (or SLB) entry */ 1627 /* Initialize the first segment table (or SLB) entry */
1627 ld r3,PACASTABVIRT(r13) /* get addr of segment table */ 1628 ld r3,PACASTABVIRT(r13) /* get addr of segment table */
@@ -1650,7 +1651,7 @@ _GLOBAL(__secondary_start)
1650 lwz r3,PLATFORM(r3) /* r3 = platform flags */ 1651 lwz r3,PLATFORM(r3) /* r3 = platform flags */
1651 andi. r3,r3,PLATFORM_LPAR /* Test if bit 0 is set (LPAR bit) */ 1652 andi. r3,r3,PLATFORM_LPAR /* Test if bit 0 is set (LPAR bit) */
1652 beq 98f /* branch if result is 0 */ 1653 beq 98f /* branch if result is 0 */
1653 mfspr r3,PVR 1654 mfspr r3,SPRN_PVR
1654 srwi r3,r3,16 1655 srwi r3,r3,16
1655 cmpwi r3,0x37 /* SStar */ 1656 cmpwi r3,0x37 /* SStar */
1656 beq 97f 1657 beq 97f
@@ -1674,8 +1675,8 @@ _GLOBAL(__secondary_start)
1674#ifdef DO_SOFT_DISABLE 1675#ifdef DO_SOFT_DISABLE
1675 ori r4,r4,MSR_EE 1676 ori r4,r4,MSR_EE
1676#endif 1677#endif
1677 mtspr SRR0,r3 1678 mtspr SPRN_SRR0,r3
1678 mtspr SRR1,r4 1679 mtspr SPRN_SRR1,r4
1679 rfid 1680 rfid
1680 b . /* prevent speculative execution */ 1681 b . /* prevent speculative execution */
1681 1682
@@ -1737,7 +1738,7 @@ _STATIC(start_here_multiplatform)
1737 1738
1738#ifdef CONFIG_HMT 1739#ifdef CONFIG_HMT
1739 /* Start up the second thread on cpu 0 */ 1740 /* Start up the second thread on cpu 0 */
1740 mfspr r3,PVR 1741 mfspr r3,SPRN_PVR
1741 srwi r3,r3,16 1742 srwi r3,r3,16
1742 cmpwi r3,0x34 /* Pulsar */ 1743 cmpwi r3,0x34 /* Pulsar */
1743 beq 90f 1744 beq 90f
@@ -1797,7 +1798,7 @@ _STATIC(start_here_multiplatform)
1797 mulli r13,r27,PACA_SIZE /* Calculate vaddr of right paca */ 1798 mulli r13,r27,PACA_SIZE /* Calculate vaddr of right paca */
1798 add r13,r13,r24 /* for this processor. */ 1799 add r13,r13,r24 /* for this processor. */
1799 sub r13,r13,r26 /* convert to physical addr */ 1800 sub r13,r13,r26 /* convert to physical addr */
1800 mtspr SPRG3,r13 /* PPPBBB: Temp... -Peter */ 1801 mtspr SPRN_SPRG3,r13 /* PPPBBB: Temp... -Peter */
1801 1802
1802 /* Do very early kernel initializations, including initial hash table, 1803 /* Do very early kernel initializations, including initial hash table,
1803 * stab and slb setup before we turn on relocation. */ 1804 * stab and slb setup before we turn on relocation. */
@@ -1814,7 +1815,7 @@ _STATIC(start_here_multiplatform)
1814 lwz r3,PLATFORM(r3) /* r3 = platform flags */ 1815 lwz r3,PLATFORM(r3) /* r3 = platform flags */
1815 andi. r3,r3,PLATFORM_LPAR /* Test if bit 0 is set (LPAR bit) */ 1816 andi. r3,r3,PLATFORM_LPAR /* Test if bit 0 is set (LPAR bit) */
1816 beq 98f /* branch if result is 0 */ 1817 beq 98f /* branch if result is 0 */
1817 mfspr r3,PVR 1818 mfspr r3,SPRN_PVR
1818 srwi r3,r3,16 1819 srwi r3,r3,16
1819 cmpwi r3,0x37 /* SStar */ 1820 cmpwi r3,0x37 /* SStar */
1820 beq 97f 1821 beq 97f
@@ -1838,12 +1839,12 @@ _STATIC(start_here_multiplatform)
1838 LOADADDR(r6,_SDR1) /* Only if NOT LPAR */ 1839 LOADADDR(r6,_SDR1) /* Only if NOT LPAR */
1839 sub r6,r6,r26 1840 sub r6,r6,r26
1840 ld r6,0(r6) /* get the value of _SDR1 */ 1841 ld r6,0(r6) /* get the value of _SDR1 */
1841 mtspr SDR1,r6 /* set the htab location */ 1842 mtspr SPRN_SDR1,r6 /* set the htab location */
184298: 184398:
1843 LOADADDR(r3,.start_here_common) 1844 LOADADDR(r3,.start_here_common)
1844 SET_REG_TO_CONST(r4, MSR_KERNEL) 1845 SET_REG_TO_CONST(r4, MSR_KERNEL)
1845 mtspr SRR0,r3 1846 mtspr SPRN_SRR0,r3
1846 mtspr SRR1,r4 1847 mtspr SPRN_SRR1,r4
1847 rfid 1848 rfid
1848 b . /* prevent speculative execution */ 1849 b . /* prevent speculative execution */
1849#endif /* CONFIG_PPC_MULTIPLATFORM */ 1850#endif /* CONFIG_PPC_MULTIPLATFORM */
@@ -1874,7 +1875,7 @@ _STATIC(start_here_common)
1874 LOADADDR(r24, paca) /* Get base vaddr of paca array */ 1875 LOADADDR(r24, paca) /* Get base vaddr of paca array */
1875 mulli r13,r26,PACA_SIZE /* Calculate vaddr of right paca */ 1876 mulli r13,r26,PACA_SIZE /* Calculate vaddr of right paca */
1876 add r13,r13,r24 /* for this processor. */ 1877 add r13,r13,r24 /* for this processor. */
1877 mtspr SPRG3,r13 1878 mtspr SPRN_SPRG3,r13
1878 1879
1879 /* ptr to current */ 1880 /* ptr to current */
1880 LOADADDR(r4,init_task) 1881 LOADADDR(r4,init_task)
@@ -1901,7 +1902,7 @@ _STATIC(start_here_common)
1901_GLOBAL(hmt_init) 1902_GLOBAL(hmt_init)
1902#ifdef CONFIG_HMT 1903#ifdef CONFIG_HMT
1903 LOADADDR(r5, hmt_thread_data) 1904 LOADADDR(r5, hmt_thread_data)
1904 mfspr r7,PVR 1905 mfspr r7,SPRN_PVR
1905 srwi r7,r7,16 1906 srwi r7,r7,16
1906 cmpwi r7,0x34 /* Pulsar */ 1907 cmpwi r7,0x34 /* Pulsar */
1907 beq 90f 1908 beq 90f
@@ -1910,10 +1911,10 @@ _GLOBAL(hmt_init)
1910 cmpwi r7,0x37 /* SStar */ 1911 cmpwi r7,0x37 /* SStar */
1911 beq 91f 1912 beq 91f
1912 b 101f 1913 b 101f
191390: mfspr r6,PIR 191490: mfspr r6,SPRN_PIR
1914 andi. r6,r6,0x1f 1915 andi. r6,r6,0x1f
1915 b 92f 1916 b 92f
191691: mfspr r6,PIR 191791: mfspr r6,SPRN_PIR
1917 andi. r6,r6,0x3ff 1918 andi. r6,r6,0x3ff
191892: sldi r4,r24,3 191992: sldi r4,r24,3
1919 stwx r6,r5,r4 1920 stwx r6,r5,r4
@@ -1924,8 +1925,8 @@ __hmt_secondary_hold:
1924 LOADADDR(r5, hmt_thread_data) 1925 LOADADDR(r5, hmt_thread_data)
1925 clrldi r5,r5,4 1926 clrldi r5,r5,4
1926 li r7,0 1927 li r7,0
1927 mfspr r6,PIR 1928 mfspr r6,SPRN_PIR
1928 mfspr r8,PVR 1929 mfspr r8,SPRN_PVR
1929 srwi r8,r8,16 1930 srwi r8,r8,16
1930 cmpwi r8,0x34 1931 cmpwi r8,0x34
1931 bne 93f 1932 bne 93f
@@ -1951,39 +1952,41 @@ __hmt_secondary_hold:
1951_GLOBAL(hmt_start_secondary) 1952_GLOBAL(hmt_start_secondary)
1952 LOADADDR(r4,__hmt_secondary_hold) 1953 LOADADDR(r4,__hmt_secondary_hold)
1953 clrldi r4,r4,4 1954 clrldi r4,r4,4
1954 mtspr NIADORM, r4 1955 mtspr SPRN_NIADORM, r4
1955 mfspr r4, MSRDORM 1956 mfspr r4, SPRN_MSRDORM
1956 li r5, -65 1957 li r5, -65
1957 and r4, r4, r5 1958 and r4, r4, r5
1958 mtspr MSRDORM, r4 1959 mtspr SPRN_MSRDORM, r4
1959 lis r4,0xffef 1960 lis r4,0xffef
1960 ori r4,r4,0x7403 1961 ori r4,r4,0x7403
1961 mtspr TSC, r4 1962 mtspr SPRN_TSC, r4
1962 li r4,0x1f4 1963 li r4,0x1f4
1963 mtspr TST, r4 1964 mtspr SPRN_TST, r4
1964 mfspr r4, HID0 1965 mfspr r4, SPRN_HID0
1965 ori r4, r4, 0x1 1966 ori r4, r4, 0x1
1966 mtspr HID0, r4 1967 mtspr SPRN_HID0, r4
1967 mfspr r4, SPRN_CTRLF 1968 mfspr r4, SPRN_CTRLF
1968 oris r4, r4, 0x40 1969 oris r4, r4, 0x40
1969 mtspr SPRN_CTRLT, r4 1970 mtspr SPRN_CTRLT, r4
1970 blr 1971 blr
1971#endif 1972#endif
1972 1973
1973#if defined(CONFIG_KEXEC) || (defined(CONFIG_SMP) && !defined(CONFIG_PPC_ISERIES)) 1974#if defined(CONFIG_KEXEC) || defined(CONFIG_SMP)
1974_GLOBAL(smp_release_cpus) 1975_GLOBAL(smp_release_cpus)
1975 /* All secondary cpus are spinning on a common 1976 /* All secondary cpus are spinning on a common
1976 * spinloop, release them all now so they can start 1977 * spinloop, release them all now so they can start
1977 * to spin on their individual paca spinloops. 1978 * to spin on their individual paca spinloops.
1978 * For non SMP kernels, the secondary cpus never 1979 * For non SMP kernels, the secondary cpus never
1979 * get out of the common spinloop. 1980 * get out of the common spinloop.
1981 * XXX This does nothing useful on iSeries, secondaries are
1982 * already waiting on their paca.
1980 */ 1983 */
1981 li r3,1 1984 li r3,1
1982 LOADADDR(r5,__secondary_hold_spinloop) 1985 LOADADDR(r5,__secondary_hold_spinloop)
1983 std r3,0(r5) 1986 std r3,0(r5)
1984 sync 1987 sync
1985 blr 1988 blr
1986#endif /* CONFIG_SMP && !CONFIG_PPC_ISERIES */ 1989#endif /* CONFIG_SMP */
1987 1990
1988 1991
1989/* 1992/*
@@ -1992,7 +1995,7 @@ _GLOBAL(smp_release_cpus)
1992 */ 1995 */
1993 .section ".bss" 1996 .section ".bss"
1994 1997
1995 .align 12 1998 .align PAGE_SHIFT
1996 1999
1997 .globl empty_zero_page 2000 .globl empty_zero_page
1998empty_zero_page: 2001empty_zero_page:
diff --git a/arch/ppc64/kernel/hvCall.S b/arch/ppc64/kernel/hvCall.S
deleted file mode 100644
index 4c699eab1b95..000000000000
--- a/arch/ppc64/kernel/hvCall.S
+++ /dev/null
@@ -1,98 +0,0 @@
1/*
2 * arch/ppc64/kernel/hvCall.S
3 *
4 *
5 * This file contains the code to perform calls to the
6 * iSeries LPAR hypervisor
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14#include <asm/ppc_asm.h>
15#include <asm/processor.h>
16
17 .text
18
19/*
20 * Hypervisor call
21 *
22 * Invoke the iSeries hypervisor via the System Call instruction
23 * Parameters are passed to this routine in registers r3 - r10
24 *
25 * r3 contains the HV function to be called
26 * r4-r10 contain the operands to the hypervisor function
27 *
28 */
29
30_GLOBAL(HvCall)
31_GLOBAL(HvCall0)
32_GLOBAL(HvCall1)
33_GLOBAL(HvCall2)
34_GLOBAL(HvCall3)
35_GLOBAL(HvCall4)
36_GLOBAL(HvCall5)
37_GLOBAL(HvCall6)
38_GLOBAL(HvCall7)
39
40
41 mfcr r0
42 std r0,-8(r1)
43 stdu r1,-(STACK_FRAME_OVERHEAD+16)(r1)
44
45 /* r0 = 0xffffffffffffffff indicates a hypervisor call */
46
47 li r0,-1
48
49 /* Invoke the hypervisor */
50
51 sc
52
53 ld r1,0(r1)
54 ld r0,-8(r1)
55 mtcrf 0xff,r0
56
57 /* return to caller, return value in r3 */
58
59 blr
60
61_GLOBAL(HvCall0Ret16)
62_GLOBAL(HvCall1Ret16)
63_GLOBAL(HvCall2Ret16)
64_GLOBAL(HvCall3Ret16)
65_GLOBAL(HvCall4Ret16)
66_GLOBAL(HvCall5Ret16)
67_GLOBAL(HvCall6Ret16)
68_GLOBAL(HvCall7Ret16)
69
70 mfcr r0
71 std r0,-8(r1)
72 std r31,-16(r1)
73 stdu r1,-(STACK_FRAME_OVERHEAD+32)(r1)
74
75 mr r31,r4
76 li r0,-1
77 mr r4,r5
78 mr r5,r6
79 mr r6,r7
80 mr r7,r8
81 mr r8,r9
82 mr r9,r10
83
84 sc
85
86 std r3,0(r31)
87 std r4,8(r31)
88
89 mr r3,r5
90
91 ld r1,0(r1)
92 ld r0,-8(r1)
93 mtcrf 0xff,r0
94 ld r31,-16(r1)
95
96 blr
97
98
diff --git a/arch/ppc64/kernel/iSeries_VpdInfo.c b/arch/ppc64/kernel/iSeries_VpdInfo.c
deleted file mode 100644
index 5d921792571f..000000000000
--- a/arch/ppc64/kernel/iSeries_VpdInfo.c
+++ /dev/null
@@ -1,268 +0,0 @@
1/*
2 * File iSeries_vpdInfo.c created by Allan Trautman on Fri Feb 2 2001.
3 *
4 * This code gets the card location of the hardware
5 * Copyright (C) 2001 <Allan H Trautman> <IBM Corp>
6 * Copyright (C) 2005 Stephen Rothwel, IBM Corp
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the:
20 * Free Software Foundation, Inc.,
21 * 59 Temple Place, Suite 330,
22 * Boston, MA 02111-1307 USA
23 *
24 * Change Activity:
25 * Created, Feb 2, 2001
26 * Ported to ppc64, August 20, 2001
27 * End Change Activity
28 */
29#include <linux/init.h>
30#include <linux/module.h>
31#include <linux/pci.h>
32#include <asm/types.h>
33#include <asm/resource.h>
34
35#include <asm/iSeries/HvCallPci.h>
36#include <asm/iSeries/HvTypes.h>
37#include <asm/iSeries/iSeries_pci.h>
38
39/*
40 * Size of Bus VPD data
41 */
42#define BUS_VPDSIZE 1024
43
44/*
45 * Bus Vpd Tags
46 */
47#define VpdEndOfAreaTag 0x79
48#define VpdIdStringTag 0x82
49#define VpdVendorAreaTag 0x84
50
51/*
52 * Mfg Area Tags
53 */
54#define VpdFruFrameId 0x4649 // "FI"
55#define VpdSlotMapFormat 0x4D46 // "MF"
56#define VpdSlotMap 0x534D // "SM"
57
58/*
59 * Structures of the areas
60 */
61struct MfgVpdAreaStruct {
62 u16 Tag;
63 u8 TagLength;
64 u8 AreaData1;
65 u8 AreaData2;
66};
67typedef struct MfgVpdAreaStruct MfgArea;
68#define MFG_ENTRY_SIZE 3
69
70struct SlotMapStruct {
71 u8 AgentId;
72 u8 SecondaryAgentId;
73 u8 PhbId;
74 char CardLocation[3];
75 char Parms[8];
76 char Reserved[2];
77};
78typedef struct SlotMapStruct SlotMap;
79#define SLOT_ENTRY_SIZE 16
80
81/*
82 * Parse the Slot Area
83 */
84static void __init iSeries_Parse_SlotArea(SlotMap *MapPtr, int MapLen,
85 HvAgentId agent, u8 *PhbId, char card[4])
86{
87 int SlotMapLen = MapLen;
88 SlotMap *SlotMapPtr = MapPtr;
89
90 /*
91 * Parse Slot label until we find the one requested
92 */
93 while (SlotMapLen > 0) {
94 if (SlotMapPtr->AgentId == agent) {
95 /*
96 * If Phb wasn't found, grab the entry first one found.
97 */
98 if (*PhbId == 0xff)
99 *PhbId = SlotMapPtr->PhbId;
100 /* Found it, extract the data. */
101 if (SlotMapPtr->PhbId == *PhbId) {
102 memcpy(card, &SlotMapPtr->CardLocation, 3);
103 card[3] = 0;
104 break;
105 }
106 }
107 /* Point to the next Slot */
108 SlotMapPtr = (SlotMap *)((char *)SlotMapPtr + SLOT_ENTRY_SIZE);
109 SlotMapLen -= SLOT_ENTRY_SIZE;
110 }
111}
112
113/*
114 * Parse the Mfg Area
115 */
116static void __init iSeries_Parse_MfgArea(u8 *AreaData, int AreaLen,
117 HvAgentId agent, u8 *PhbId,
118 u8 *frame, char card[4])
119{
120 MfgArea *MfgAreaPtr = (MfgArea *)AreaData;
121 int MfgAreaLen = AreaLen;
122 u16 SlotMapFmt = 0;
123
124 /* Parse Mfg Data */
125 while (MfgAreaLen > 0) {
126 int MfgTagLen = MfgAreaPtr->TagLength;
127 /* Frame ID (FI 4649020310 ) */
128 if (MfgAreaPtr->Tag == VpdFruFrameId) /* FI */
129 *frame = MfgAreaPtr->AreaData1;
130 /* Slot Map Format (MF 4D46020004 ) */
131 else if (MfgAreaPtr->Tag == VpdSlotMapFormat) /* MF */
132 SlotMapFmt = (MfgAreaPtr->AreaData1 * 256)
133 + MfgAreaPtr->AreaData2;
134 /* Slot Map (SM 534D90 */
135 else if (MfgAreaPtr->Tag == VpdSlotMap) { /* SM */
136 SlotMap *SlotMapPtr;
137
138 if (SlotMapFmt == 0x1004)
139 SlotMapPtr = (SlotMap *)((char *)MfgAreaPtr
140 + MFG_ENTRY_SIZE + 1);
141 else
142 SlotMapPtr = (SlotMap *)((char *)MfgAreaPtr
143 + MFG_ENTRY_SIZE);
144 iSeries_Parse_SlotArea(SlotMapPtr, MfgTagLen,
145 agent, PhbId, card);
146 }
147 /*
148 * Point to the next Mfg Area
149 * Use defined size, sizeof give wrong answer
150 */
151 MfgAreaPtr = (MfgArea *)((char *)MfgAreaPtr + MfgTagLen
152 + MFG_ENTRY_SIZE);
153 MfgAreaLen -= (MfgTagLen + MFG_ENTRY_SIZE);
154 }
155}
156
157/*
158 * Look for "BUS".. Data is not Null terminated.
159 * PHBID of 0xFF indicates PHB was not found in VPD Data.
160 */
161static int __init iSeries_Parse_PhbId(u8 *AreaPtr, int AreaLength)
162{
163 u8 *PhbPtr = AreaPtr;
164 int DataLen = AreaLength;
165 char PhbId = 0xFF;
166
167 while (DataLen > 0) {
168 if ((*PhbPtr == 'B') && (*(PhbPtr + 1) == 'U')
169 && (*(PhbPtr + 2) == 'S')) {
170 PhbPtr += 3;
171 while (*PhbPtr == ' ')
172 ++PhbPtr;
173 PhbId = (*PhbPtr & 0x0F);
174 break;
175 }
176 ++PhbPtr;
177 --DataLen;
178 }
179 return PhbId;
180}
181
182/*
183 * Parse out the VPD Areas
184 */
185static void __init iSeries_Parse_Vpd(u8 *VpdData, int VpdDataLen,
186 HvAgentId agent, u8 *frame, char card[4])
187{
188 u8 *TagPtr = VpdData;
189 int DataLen = VpdDataLen - 3;
190 u8 PhbId;
191
192 while ((*TagPtr != VpdEndOfAreaTag) && (DataLen > 0)) {
193 int AreaLen = *(TagPtr + 1) + (*(TagPtr + 2) * 256);
194 u8 *AreaData = TagPtr + 3;
195
196 if (*TagPtr == VpdIdStringTag)
197 PhbId = iSeries_Parse_PhbId(AreaData, AreaLen);
198 else if (*TagPtr == VpdVendorAreaTag)
199 iSeries_Parse_MfgArea(AreaData, AreaLen,
200 agent, &PhbId, frame, card);
201 /* Point to next Area. */
202 TagPtr = AreaData + AreaLen;
203 DataLen -= AreaLen;
204 }
205}
206
207static void __init iSeries_Get_Location_Code(u16 bus, HvAgentId agent,
208 u8 *frame, char card[4])
209{
210 int BusVpdLen = 0;
211 u8 *BusVpdPtr = kmalloc(BUS_VPDSIZE, GFP_KERNEL);
212
213 if (BusVpdPtr == NULL) {
214 printk("PCI: Bus VPD Buffer allocation failure.\n");
215 return;
216 }
217 BusVpdLen = HvCallPci_getBusVpd(bus, ISERIES_HV_ADDR(BusVpdPtr),
218 BUS_VPDSIZE);
219 if (BusVpdLen == 0) {
220 printk("PCI: Bus VPD Buffer zero length.\n");
221 goto out_free;
222 }
223 /* printk("PCI: BusVpdPtr: %p, %d\n",BusVpdPtr, BusVpdLen); */
224 /* Make sure this is what I think it is */
225 if (*BusVpdPtr != VpdIdStringTag) { /* 0x82 */
226 printk("PCI: Bus VPD Buffer missing starting tag.\n");
227 goto out_free;
228 }
229 iSeries_Parse_Vpd(BusVpdPtr, BusVpdLen, agent, frame, card);
230out_free:
231 kfree(BusVpdPtr);
232}
233
234/*
235 * Prints the device information.
236 * - Pass in pci_dev* pointer to the device.
237 * - Pass in the device count
238 *
239 * Format:
240 * PCI: Bus 0, Device 26, Vendor 0x12AE Frame 1, Card C10 Ethernet
241 * controller
242 */
243void __init iSeries_Device_Information(struct pci_dev *PciDev, int count)
244{
245 struct iSeries_Device_Node *DevNode = PciDev->sysdata;
246 u16 bus;
247 u8 frame;
248 char card[4];
249 HvSubBusNumber subbus;
250 HvAgentId agent;
251
252 if (DevNode == NULL) {
253 printk("%d. PCI: iSeries_Device_Information DevNode is NULL\n",
254 count);
255 return;
256 }
257
258 bus = ISERIES_BUS(DevNode);
259 subbus = ISERIES_SUBBUS(DevNode);
260 agent = ISERIES_PCI_AGENTID(ISERIES_GET_DEVICE_FROM_SUBBUS(subbus),
261 ISERIES_GET_FUNCTION_FROM_SUBBUS(subbus));
262 iSeries_Get_Location_Code(bus, agent, &frame, card);
263
264 printk("%d. PCI: Bus%3d, Device%3d, Vendor %04X Frame%3d, Card %4s ",
265 count, bus, PCI_SLOT(PciDev->devfn), PciDev->vendor,
266 frame, card);
267 printk("0x%04X\n", (int)(PciDev->class >> 8));
268}
diff --git a/arch/ppc64/kernel/iSeries_htab.c b/arch/ppc64/kernel/iSeries_htab.c
deleted file mode 100644
index 2192055a90a0..000000000000
--- a/arch/ppc64/kernel/iSeries_htab.c
+++ /dev/null
@@ -1,236 +0,0 @@
1/*
2 * iSeries hashtable management.
3 * Derived from pSeries_htab.c
4 *
5 * SMP scalability work:
6 * Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13#include <asm/machdep.h>
14#include <asm/pgtable.h>
15#include <asm/mmu.h>
16#include <asm/mmu_context.h>
17#include <asm/iSeries/HvCallHpt.h>
18#include <asm/abs_addr.h>
19#include <linux/spinlock.h>
20
21static spinlock_t iSeries_hlocks[64] __cacheline_aligned_in_smp = { [0 ... 63] = SPIN_LOCK_UNLOCKED};
22
23/*
24 * Very primitive algorithm for picking up a lock
25 */
26static inline void iSeries_hlock(unsigned long slot)
27{
28 if (slot & 0x8)
29 slot = ~slot;
30 spin_lock(&iSeries_hlocks[(slot >> 4) & 0x3f]);
31}
32
33static inline void iSeries_hunlock(unsigned long slot)
34{
35 if (slot & 0x8)
36 slot = ~slot;
37 spin_unlock(&iSeries_hlocks[(slot >> 4) & 0x3f]);
38}
39
40static long iSeries_hpte_insert(unsigned long hpte_group, unsigned long va,
41 unsigned long prpn, unsigned long vflags,
42 unsigned long rflags)
43{
44 unsigned long arpn;
45 long slot;
46 hpte_t lhpte;
47 int secondary = 0;
48
49 /*
50 * The hypervisor tries both primary and secondary.
51 * If we are being called to insert in the secondary,
52 * it means we have already tried both primary and secondary,
53 * so we return failure immediately.
54 */
55 if (vflags & HPTE_V_SECONDARY)
56 return -1;
57
58 iSeries_hlock(hpte_group);
59
60 slot = HvCallHpt_findValid(&lhpte, va >> PAGE_SHIFT);
61 BUG_ON(lhpte.v & HPTE_V_VALID);
62
63 if (slot == -1) { /* No available entry found in either group */
64 iSeries_hunlock(hpte_group);
65 return -1;
66 }
67
68 if (slot < 0) { /* MSB set means secondary group */
69 vflags |= HPTE_V_VALID;
70 secondary = 1;
71 slot &= 0x7fffffffffffffff;
72 }
73
74 arpn = phys_to_abs(prpn << PAGE_SHIFT) >> PAGE_SHIFT;
75
76 lhpte.v = (va >> 23) << HPTE_V_AVPN_SHIFT | vflags | HPTE_V_VALID;
77 lhpte.r = (arpn << HPTE_R_RPN_SHIFT) | rflags;
78
79 /* Now fill in the actual HPTE */
80 HvCallHpt_addValidate(slot, secondary, &lhpte);
81
82 iSeries_hunlock(hpte_group);
83
84 return (secondary << 3) | (slot & 7);
85}
86
87static unsigned long iSeries_hpte_getword0(unsigned long slot)
88{
89 hpte_t hpte;
90
91 HvCallHpt_get(&hpte, slot);
92 return hpte.v;
93}
94
95static long iSeries_hpte_remove(unsigned long hpte_group)
96{
97 unsigned long slot_offset;
98 int i;
99 unsigned long hpte_v;
100
101 /* Pick a random slot to start at */
102 slot_offset = mftb() & 0x7;
103
104 iSeries_hlock(hpte_group);
105
106 for (i = 0; i < HPTES_PER_GROUP; i++) {
107 hpte_v = iSeries_hpte_getword0(hpte_group + slot_offset);
108
109 if (! (hpte_v & HPTE_V_BOLTED)) {
110 HvCallHpt_invalidateSetSwBitsGet(hpte_group +
111 slot_offset, 0, 0);
112 iSeries_hunlock(hpte_group);
113 return i;
114 }
115
116 slot_offset++;
117 slot_offset &= 0x7;
118 }
119
120 iSeries_hunlock(hpte_group);
121
122 return -1;
123}
124
125/*
126 * The HyperVisor expects the "flags" argument in this form:
127 * bits 0..59 : reserved
128 * bit 60 : N
129 * bits 61..63 : PP2,PP1,PP0
130 */
131static long iSeries_hpte_updatepp(unsigned long slot, unsigned long newpp,
132 unsigned long va, int large, int local)
133{
134 hpte_t hpte;
135 unsigned long avpn = va >> 23;
136
137 iSeries_hlock(slot);
138
139 HvCallHpt_get(&hpte, slot);
140 if ((HPTE_V_AVPN_VAL(hpte.v) == avpn) && (hpte.v & HPTE_V_VALID)) {
141 /*
142 * Hypervisor expects bits as NPPP, which is
143 * different from how they are mapped in our PP.
144 */
145 HvCallHpt_setPp(slot, (newpp & 0x3) | ((newpp & 0x4) << 1));
146 iSeries_hunlock(slot);
147 return 0;
148 }
149 iSeries_hunlock(slot);
150
151 return -1;
152}
153
154/*
155 * Functions used to find the PTE for a particular virtual address.
156 * Only used during boot when bolting pages.
157 *
158 * Input : vpn : virtual page number
159 * Output: PTE index within the page table of the entry
160 * -1 on failure
161 */
162static long iSeries_hpte_find(unsigned long vpn)
163{
164 hpte_t hpte;
165 long slot;
166
167 /*
168 * The HvCallHpt_findValid interface is as follows:
169 * 0xffffffffffffffff : No entry found.
170 * 0x00000000xxxxxxxx : Entry found in primary group, slot x
171 * 0x80000000xxxxxxxx : Entry found in secondary group, slot x
172 */
173 slot = HvCallHpt_findValid(&hpte, vpn);
174 if (hpte.v & HPTE_V_VALID) {
175 if (slot < 0) {
176 slot &= 0x7fffffffffffffff;
177 slot = -slot;
178 }
179 } else
180 slot = -1;
181 return slot;
182}
183
184/*
185 * Update the page protection bits. Intended to be used to create
186 * guard pages for kernel data structures on pages which are bolted
187 * in the HPT. Assumes pages being operated on will not be stolen.
188 * Does not work on large pages.
189 *
190 * No need to lock here because we should be the only user.
191 */
192static void iSeries_hpte_updateboltedpp(unsigned long newpp, unsigned long ea)
193{
194 unsigned long vsid,va,vpn;
195 long slot;
196
197 vsid = get_kernel_vsid(ea);
198 va = (vsid << 28) | (ea & 0x0fffffff);
199 vpn = va >> PAGE_SHIFT;
200 slot = iSeries_hpte_find(vpn);
201 if (slot == -1)
202 panic("updateboltedpp: Could not find page to bolt\n");
203 HvCallHpt_setPp(slot, newpp);
204}
205
206static void iSeries_hpte_invalidate(unsigned long slot, unsigned long va,
207 int large, int local)
208{
209 unsigned long hpte_v;
210 unsigned long avpn = va >> 23;
211 unsigned long flags;
212
213 local_irq_save(flags);
214
215 iSeries_hlock(slot);
216
217 hpte_v = iSeries_hpte_getword0(slot);
218
219 if ((HPTE_V_AVPN_VAL(hpte_v) == avpn) && (hpte_v & HPTE_V_VALID))
220 HvCallHpt_invalidateSetSwBitsGet(slot, 0, 0);
221
222 iSeries_hunlock(slot);
223
224 local_irq_restore(flags);
225}
226
227void hpte_init_iSeries(void)
228{
229 ppc_md.hpte_invalidate = iSeries_hpte_invalidate;
230 ppc_md.hpte_updatepp = iSeries_hpte_updatepp;
231 ppc_md.hpte_updateboltedpp = iSeries_hpte_updateboltedpp;
232 ppc_md.hpte_insert = iSeries_hpte_insert;
233 ppc_md.hpte_remove = iSeries_hpte_remove;
234
235 htab_finish_init();
236}
diff --git a/arch/ppc64/kernel/iSeries_iommu.c b/arch/ppc64/kernel/iSeries_iommu.c
deleted file mode 100644
index f8ff1bb054dc..000000000000
--- a/arch/ppc64/kernel/iSeries_iommu.c
+++ /dev/null
@@ -1,176 +0,0 @@
1/*
2 * arch/ppc64/kernel/iSeries_iommu.c
3 *
4 * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation
5 *
6 * Rewrite, cleanup:
7 *
8 * Copyright (C) 2004 Olof Johansson <olof@austin.ibm.com>, IBM Corporation
9 *
10 * Dynamic DMA mapping support, iSeries-specific parts.
11 *
12 *
13 * This program is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation; either version 2 of the License, or
16 * (at your option) any later version.
17 *
18 * This program is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with this program; if not, write to the Free Software
25 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26 */
27
28#include <linux/types.h>
29#include <linux/dma-mapping.h>
30#include <linux/list.h>
31
32#include <asm/iommu.h>
33#include <asm/machdep.h>
34#include <asm/iSeries/HvCallXm.h>
35#include <asm/iSeries/iSeries_pci.h>
36
37extern struct list_head iSeries_Global_Device_List;
38
39
40static void tce_build_iSeries(struct iommu_table *tbl, long index, long npages,
41 unsigned long uaddr, enum dma_data_direction direction)
42{
43 u64 rc;
44 union tce_entry tce;
45
46 while (npages--) {
47 tce.te_word = 0;
48 tce.te_bits.tb_rpn = virt_to_abs(uaddr) >> PAGE_SHIFT;
49
50 if (tbl->it_type == TCE_VB) {
51 /* Virtual Bus */
52 tce.te_bits.tb_valid = 1;
53 tce.te_bits.tb_allio = 1;
54 if (direction != DMA_TO_DEVICE)
55 tce.te_bits.tb_rdwr = 1;
56 } else {
57 /* PCI Bus */
58 tce.te_bits.tb_rdwr = 1; /* Read allowed */
59 if (direction != DMA_TO_DEVICE)
60 tce.te_bits.tb_pciwr = 1;
61 }
62
63 rc = HvCallXm_setTce((u64)tbl->it_index, (u64)index,
64 tce.te_word);
65 if (rc)
66 panic("PCI_DMA: HvCallXm_setTce failed, Rc: 0x%lx\n",
67 rc);
68 index++;
69 uaddr += PAGE_SIZE;
70 }
71}
72
73static void tce_free_iSeries(struct iommu_table *tbl, long index, long npages)
74{
75 u64 rc;
76
77 while (npages--) {
78 rc = HvCallXm_setTce((u64)tbl->it_index, (u64)index, 0);
79 if (rc)
80 panic("PCI_DMA: HvCallXm_setTce failed, Rc: 0x%lx\n",
81 rc);
82 index++;
83 }
84}
85
86#ifdef CONFIG_PCI
87/*
88 * This function compares the known tables to find an iommu_table
89 * that has already been built for hardware TCEs.
90 */
91static struct iommu_table *iommu_table_find(struct iommu_table * tbl)
92{
93 struct iSeries_Device_Node *dp;
94
95 list_for_each_entry(dp, &iSeries_Global_Device_List, Device_List) {
96 if ((dp->iommu_table != NULL) &&
97 (dp->iommu_table->it_type == TCE_PCI) &&
98 (dp->iommu_table->it_offset == tbl->it_offset) &&
99 (dp->iommu_table->it_index == tbl->it_index) &&
100 (dp->iommu_table->it_size == tbl->it_size))
101 return dp->iommu_table;
102 }
103 return NULL;
104}
105
106/*
107 * Call Hv with the architected data structure to get TCE table info.
108 * info. Put the returned data into the Linux representation of the
109 * TCE table data.
110 * The Hardware Tce table comes in three flavors.
111 * 1. TCE table shared between Buses.
112 * 2. TCE table per Bus.
113 * 3. TCE Table per IOA.
114 */
115static void iommu_table_getparms(struct iSeries_Device_Node* dn,
116 struct iommu_table* tbl)
117{
118 struct iommu_table_cb *parms;
119
120 parms = kmalloc(sizeof(*parms), GFP_KERNEL);
121 if (parms == NULL)
122 panic("PCI_DMA: TCE Table Allocation failed.");
123
124 memset(parms, 0, sizeof(*parms));
125
126 parms->itc_busno = ISERIES_BUS(dn);
127 parms->itc_slotno = dn->LogicalSlot;
128 parms->itc_virtbus = 0;
129
130 HvCallXm_getTceTableParms(ISERIES_HV_ADDR(parms));
131
132 if (parms->itc_size == 0)
133 panic("PCI_DMA: parms->size is zero, parms is 0x%p", parms);
134
135 /* itc_size is in pages worth of table, it_size is in # of entries */
136 tbl->it_size = (parms->itc_size * PAGE_SIZE) / sizeof(union tce_entry);
137 tbl->it_busno = parms->itc_busno;
138 tbl->it_offset = parms->itc_offset;
139 tbl->it_index = parms->itc_index;
140 tbl->it_blocksize = 1;
141 tbl->it_type = TCE_PCI;
142
143 kfree(parms);
144}
145
146
147void iommu_devnode_init_iSeries(struct iSeries_Device_Node *dn)
148{
149 struct iommu_table *tbl;
150
151 tbl = kmalloc(sizeof(struct iommu_table), GFP_KERNEL);
152
153 iommu_table_getparms(dn, tbl);
154
155 /* Look for existing tce table */
156 dn->iommu_table = iommu_table_find(tbl);
157 if (dn->iommu_table == NULL)
158 dn->iommu_table = iommu_init_table(tbl);
159 else
160 kfree(tbl);
161}
162#endif
163
164static void iommu_dev_setup_iSeries(struct pci_dev *dev) { }
165static void iommu_bus_setup_iSeries(struct pci_bus *bus) { }
166
167void iommu_init_early_iSeries(void)
168{
169 ppc_md.tce_build = tce_build_iSeries;
170 ppc_md.tce_free = tce_free_iSeries;
171
172 ppc_md.iommu_dev_setup = iommu_dev_setup_iSeries;
173 ppc_md.iommu_bus_setup = iommu_bus_setup_iSeries;
174
175 pci_iommu_init();
176}
diff --git a/arch/ppc64/kernel/iSeries_irq.c b/arch/ppc64/kernel/iSeries_irq.c
deleted file mode 100644
index 77376c1bd611..000000000000
--- a/arch/ppc64/kernel/iSeries_irq.c
+++ /dev/null
@@ -1,353 +0,0 @@
1/*
2 * This module supports the iSeries PCI bus interrupt handling
3 * Copyright (C) 20yy <Robert L Holtorf> <IBM Corp>
4 * Copyright (C) 2004-2005 IBM Corporation
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the:
18 * Free Software Foundation, Inc.,
19 * 59 Temple Place, Suite 330,
20 * Boston, MA 02111-1307 USA
21 *
22 * Change Activity:
23 * Created, December 13, 2000 by Wayne Holm
24 * End Change Activity
25 */
26#include <linux/config.h>
27#include <linux/pci.h>
28#include <linux/init.h>
29#include <linux/threads.h>
30#include <linux/smp.h>
31#include <linux/param.h>
32#include <linux/string.h>
33#include <linux/bootmem.h>
34#include <linux/ide.h>
35#include <linux/irq.h>
36#include <linux/spinlock.h>
37
38#include <asm/ppcdebug.h>
39#include <asm/iSeries/HvTypes.h>
40#include <asm/iSeries/HvLpEvent.h>
41#include <asm/iSeries/HvCallPci.h>
42#include <asm/iSeries/HvCallXm.h>
43#include <asm/iSeries/iSeries_irq.h>
44
45/* This maps virtual irq numbers to real irqs */
46unsigned int virt_irq_to_real_map[NR_IRQS];
47
48/* The next available virtual irq number */
49/* Note: the pcnet32 driver assumes irq numbers < 2 aren't valid. :( */
50static int next_virtual_irq = 2;
51
52static long Pci_Interrupt_Count;
53static long Pci_Event_Count;
54
55enum XmPciLpEvent_Subtype {
56 XmPciLpEvent_BusCreated = 0, // PHB has been created
57 XmPciLpEvent_BusError = 1, // PHB has failed
58 XmPciLpEvent_BusFailed = 2, // Msg to Secondary, Primary failed bus
59 XmPciLpEvent_NodeFailed = 4, // Multi-adapter bridge has failed
60 XmPciLpEvent_NodeRecovered = 5, // Multi-adapter bridge has recovered
61 XmPciLpEvent_BusRecovered = 12, // PHB has been recovered
62 XmPciLpEvent_UnQuiesceBus = 18, // Secondary bus unqiescing
63 XmPciLpEvent_BridgeError = 21, // Bridge Error
64 XmPciLpEvent_SlotInterrupt = 22 // Slot interrupt
65};
66
67struct XmPciLpEvent_BusInterrupt {
68 HvBusNumber busNumber;
69 HvSubBusNumber subBusNumber;
70};
71
72struct XmPciLpEvent_NodeInterrupt {
73 HvBusNumber busNumber;
74 HvSubBusNumber subBusNumber;
75 HvAgentId deviceId;
76};
77
78struct XmPciLpEvent {
79 struct HvLpEvent hvLpEvent;
80
81 union {
82 u64 alignData; // Align on an 8-byte boundary
83
84 struct {
85 u32 fisr;
86 HvBusNumber busNumber;
87 HvSubBusNumber subBusNumber;
88 HvAgentId deviceId;
89 } slotInterrupt;
90
91 struct XmPciLpEvent_BusInterrupt busFailed;
92 struct XmPciLpEvent_BusInterrupt busRecovered;
93 struct XmPciLpEvent_BusInterrupt busCreated;
94
95 struct XmPciLpEvent_NodeInterrupt nodeFailed;
96 struct XmPciLpEvent_NodeInterrupt nodeRecovered;
97
98 } eventData;
99
100};
101
102static void intReceived(struct XmPciLpEvent *eventParm,
103 struct pt_regs *regsParm)
104{
105 int irq;
106
107 ++Pci_Interrupt_Count;
108
109 switch (eventParm->hvLpEvent.xSubtype) {
110 case XmPciLpEvent_SlotInterrupt:
111 irq = eventParm->hvLpEvent.xCorrelationToken;
112 /* Dispatch the interrupt handlers for this irq */
113 ppc_irq_dispatch_handler(regsParm, irq);
114 HvCallPci_eoi(eventParm->eventData.slotInterrupt.busNumber,
115 eventParm->eventData.slotInterrupt.subBusNumber,
116 eventParm->eventData.slotInterrupt.deviceId);
117 break;
118 /* Ignore error recovery events for now */
119 case XmPciLpEvent_BusCreated:
120 printk(KERN_INFO "intReceived: system bus %d created\n",
121 eventParm->eventData.busCreated.busNumber);
122 break;
123 case XmPciLpEvent_BusError:
124 case XmPciLpEvent_BusFailed:
125 printk(KERN_INFO "intReceived: system bus %d failed\n",
126 eventParm->eventData.busFailed.busNumber);
127 break;
128 case XmPciLpEvent_BusRecovered:
129 case XmPciLpEvent_UnQuiesceBus:
130 printk(KERN_INFO "intReceived: system bus %d recovered\n",
131 eventParm->eventData.busRecovered.busNumber);
132 break;
133 case XmPciLpEvent_NodeFailed:
134 case XmPciLpEvent_BridgeError:
135 printk(KERN_INFO
136 "intReceived: multi-adapter bridge %d/%d/%d failed\n",
137 eventParm->eventData.nodeFailed.busNumber,
138 eventParm->eventData.nodeFailed.subBusNumber,
139 eventParm->eventData.nodeFailed.deviceId);
140 break;
141 case XmPciLpEvent_NodeRecovered:
142 printk(KERN_INFO
143 "intReceived: multi-adapter bridge %d/%d/%d recovered\n",
144 eventParm->eventData.nodeRecovered.busNumber,
145 eventParm->eventData.nodeRecovered.subBusNumber,
146 eventParm->eventData.nodeRecovered.deviceId);
147 break;
148 default:
149 printk(KERN_ERR
150 "intReceived: unrecognized event subtype 0x%x\n",
151 eventParm->hvLpEvent.xSubtype);
152 break;
153 }
154}
155
156static void XmPciLpEvent_handler(struct HvLpEvent *eventParm,
157 struct pt_regs *regsParm)
158{
159#ifdef CONFIG_PCI
160 ++Pci_Event_Count;
161
162 if (eventParm && (eventParm->xType == HvLpEvent_Type_PciIo)) {
163 switch (eventParm->xFlags.xFunction) {
164 case HvLpEvent_Function_Int:
165 intReceived((struct XmPciLpEvent *)eventParm, regsParm);
166 break;
167 case HvLpEvent_Function_Ack:
168 printk(KERN_ERR
169 "XmPciLpEvent_handler: unexpected ack received\n");
170 break;
171 default:
172 printk(KERN_ERR
173 "XmPciLpEvent_handler: unexpected event function %d\n",
174 (int)eventParm->xFlags.xFunction);
175 break;
176 }
177 } else if (eventParm)
178 printk(KERN_ERR
179 "XmPciLpEvent_handler: Unrecognized PCI event type 0x%x\n",
180 (int)eventParm->xType);
181 else
182 printk(KERN_ERR "XmPciLpEvent_handler: NULL event received\n");
183#endif
184}
185
186/*
187 * This is called by init_IRQ. set in ppc_md.init_IRQ by iSeries_setup.c
188 * It must be called before the bus walk.
189 */
190void __init iSeries_init_IRQ(void)
191{
192 /* Register PCI event handler and open an event path */
193 int xRc;
194
195 xRc = HvLpEvent_registerHandler(HvLpEvent_Type_PciIo,
196 &XmPciLpEvent_handler);
197 if (xRc == 0) {
198 xRc = HvLpEvent_openPath(HvLpEvent_Type_PciIo, 0);
199 if (xRc != 0)
200 printk(KERN_ERR "iSeries_init_IRQ: open event path "
201 "failed with rc 0x%x\n", xRc);
202 } else
203 printk(KERN_ERR "iSeries_init_IRQ: register handler "
204 "failed with rc 0x%x\n", xRc);
205}
206
207#define REAL_IRQ_TO_BUS(irq) ((((irq) >> 6) & 0xff) + 1)
208#define REAL_IRQ_TO_IDSEL(irq) ((((irq) >> 3) & 7) + 1)
209#define REAL_IRQ_TO_FUNC(irq) ((irq) & 7)
210
211/*
212 * This will be called by device drivers (via enable_IRQ)
213 * to enable INTA in the bridge interrupt status register.
214 */
215static void iSeries_enable_IRQ(unsigned int irq)
216{
217 u32 bus, deviceId, function, mask;
218 const u32 subBus = 0;
219 unsigned int rirq = virt_irq_to_real_map[irq];
220
221 /* The IRQ has already been locked by the caller */
222 bus = REAL_IRQ_TO_BUS(rirq);
223 function = REAL_IRQ_TO_FUNC(rirq);
224 deviceId = (REAL_IRQ_TO_IDSEL(rirq) << 4) + function;
225
226 /* Unmask secondary INTA */
227 mask = 0x80000000;
228 HvCallPci_unmaskInterrupts(bus, subBus, deviceId, mask);
229 PPCDBG(PPCDBG_BUSWALK, "iSeries_enable_IRQ 0x%02X.%02X.%02X 0x%04X\n",
230 bus, subBus, deviceId, irq);
231}
232
233/* This is called by iSeries_activate_IRQs */
234static unsigned int iSeries_startup_IRQ(unsigned int irq)
235{
236 u32 bus, deviceId, function, mask;
237 const u32 subBus = 0;
238 unsigned int rirq = virt_irq_to_real_map[irq];
239
240 bus = REAL_IRQ_TO_BUS(rirq);
241 function = REAL_IRQ_TO_FUNC(rirq);
242 deviceId = (REAL_IRQ_TO_IDSEL(rirq) << 4) + function;
243
244 /* Link the IRQ number to the bridge */
245 HvCallXm_connectBusUnit(bus, subBus, deviceId, irq);
246
247 /* Unmask bridge interrupts in the FISR */
248 mask = 0x01010000 << function;
249 HvCallPci_unmaskFisr(bus, subBus, deviceId, mask);
250 iSeries_enable_IRQ(irq);
251 return 0;
252}
253
254/*
255 * This is called out of iSeries_fixup to activate interrupt
256 * generation for usable slots
257 */
258void __init iSeries_activate_IRQs()
259{
260 int irq;
261 unsigned long flags;
262
263 for_each_irq (irq) {
264 irq_desc_t *desc = get_irq_desc(irq);
265
266 if (desc && desc->handler && desc->handler->startup) {
267 spin_lock_irqsave(&desc->lock, flags);
268 desc->handler->startup(irq);
269 spin_unlock_irqrestore(&desc->lock, flags);
270 }
271 }
272}
273
274/* this is not called anywhere currently */
275static void iSeries_shutdown_IRQ(unsigned int irq)
276{
277 u32 bus, deviceId, function, mask;
278 const u32 subBus = 0;
279 unsigned int rirq = virt_irq_to_real_map[irq];
280
281 /* irq should be locked by the caller */
282 bus = REAL_IRQ_TO_BUS(rirq);
283 function = REAL_IRQ_TO_FUNC(rirq);
284 deviceId = (REAL_IRQ_TO_IDSEL(rirq) << 4) + function;
285
286 /* Invalidate the IRQ number in the bridge */
287 HvCallXm_connectBusUnit(bus, subBus, deviceId, 0);
288
289 /* Mask bridge interrupts in the FISR */
290 mask = 0x01010000 << function;
291 HvCallPci_maskFisr(bus, subBus, deviceId, mask);
292}
293
294/*
295 * This will be called by device drivers (via disable_IRQ)
296 * to disable INTA in the bridge interrupt status register.
297 */
298static void iSeries_disable_IRQ(unsigned int irq)
299{
300 u32 bus, deviceId, function, mask;
301 const u32 subBus = 0;
302 unsigned int rirq = virt_irq_to_real_map[irq];
303
304 /* The IRQ has already been locked by the caller */
305 bus = REAL_IRQ_TO_BUS(rirq);
306 function = REAL_IRQ_TO_FUNC(rirq);
307 deviceId = (REAL_IRQ_TO_IDSEL(rirq) << 4) + function;
308
309 /* Mask secondary INTA */
310 mask = 0x80000000;
311 HvCallPci_maskInterrupts(bus, subBus, deviceId, mask);
312 PPCDBG(PPCDBG_BUSWALK, "iSeries_disable_IRQ 0x%02X.%02X.%02X 0x%04X\n",
313 bus, subBus, deviceId, irq);
314}
315
316/*
317 * Need to define this so ppc_irq_dispatch_handler will NOT call
318 * enable_IRQ at the end of interrupt handling. However, this does
319 * nothing because there is not enough information provided to do
320 * the EOI HvCall. This is done by XmPciLpEvent.c
321 */
322static void iSeries_end_IRQ(unsigned int irq)
323{
324}
325
326static hw_irq_controller iSeries_IRQ_handler = {
327 .typename = "iSeries irq controller",
328 .startup = iSeries_startup_IRQ,
329 .shutdown = iSeries_shutdown_IRQ,
330 .enable = iSeries_enable_IRQ,
331 .disable = iSeries_disable_IRQ,
332 .end = iSeries_end_IRQ
333};
334
335/*
336 * This is called out of iSeries_scan_slot to allocate an IRQ for an EADS slot
337 * It calculates the irq value for the slot.
338 * Note that subBusNumber is always 0 (at the moment at least).
339 */
340int __init iSeries_allocate_IRQ(HvBusNumber busNumber,
341 HvSubBusNumber subBusNumber, HvAgentId deviceId)
342{
343 unsigned int realirq, virtirq;
344 u8 idsel = (deviceId >> 4);
345 u8 function = deviceId & 7;
346
347 virtirq = next_virtual_irq++;
348 realirq = ((busNumber - 1) << 6) + ((idsel - 1) << 3) + function;
349 virt_irq_to_real_map[virtirq] = realirq;
350
351 irq_desc[virtirq].handler = &iSeries_IRQ_handler;
352 return virtirq;
353}
diff --git a/arch/ppc64/kernel/iSeries_pci.c b/arch/ppc64/kernel/iSeries_pci.c
deleted file mode 100644
index fbc273c32bcc..000000000000
--- a/arch/ppc64/kernel/iSeries_pci.c
+++ /dev/null
@@ -1,905 +0,0 @@
1/*
2 * iSeries_pci.c
3 *
4 * Copyright (C) 2001 Allan Trautman, IBM Corporation
5 *
6 * iSeries specific routines for PCI.
7 *
8 * Based on code from pci.c and iSeries_pci.c 32bit
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 */
24#include <linux/kernel.h>
25#include <linux/list.h>
26#include <linux/string.h>
27#include <linux/init.h>
28#include <linux/module.h>
29#include <linux/ide.h>
30#include <linux/pci.h>
31
32#include <asm/io.h>
33#include <asm/irq.h>
34#include <asm/prom.h>
35#include <asm/machdep.h>
36#include <asm/pci-bridge.h>
37#include <asm/ppcdebug.h>
38#include <asm/iommu.h>
39
40#include <asm/iSeries/HvCallPci.h>
41#include <asm/iSeries/HvCallXm.h>
42#include <asm/iSeries/iSeries_irq.h>
43#include <asm/iSeries/iSeries_pci.h>
44#include <asm/iSeries/mf.h>
45
46#include "pci.h"
47
48extern unsigned long io_page_mask;
49
50/*
51 * Forward declares of prototypes.
52 */
53static struct iSeries_Device_Node *find_Device_Node(int bus, int devfn);
54static void scan_PHB_slots(struct pci_controller *Phb);
55static void scan_EADS_bridge(HvBusNumber Bus, HvSubBusNumber SubBus, int IdSel);
56static int scan_bridge_slot(HvBusNumber Bus, struct HvCallPci_BridgeInfo *Info);
57
58LIST_HEAD(iSeries_Global_Device_List);
59
60static int DeviceCount;
61
62/* Counters and control flags. */
63static long Pci_Io_Read_Count;
64static long Pci_Io_Write_Count;
65#if 0
66static long Pci_Cfg_Read_Count;
67static long Pci_Cfg_Write_Count;
68#endif
69static long Pci_Error_Count;
70
71static int Pci_Retry_Max = 3; /* Only retry 3 times */
72static int Pci_Error_Flag = 1; /* Set Retry Error on. */
73
74static struct pci_ops iSeries_pci_ops;
75
76/*
77 * Table defines
78 * Each Entry size is 4 MB * 1024 Entries = 4GB I/O address space.
79 */
80#define IOMM_TABLE_MAX_ENTRIES 1024
81#define IOMM_TABLE_ENTRY_SIZE 0x0000000000400000UL
82#define BASE_IO_MEMORY 0xE000000000000000UL
83
84static unsigned long max_io_memory = 0xE000000000000000UL;
85static long current_iomm_table_entry;
86
87/*
88 * Lookup Tables.
89 */
90static struct iSeries_Device_Node **iomm_table;
91static u8 *iobar_table;
92
93/*
94 * Static and Global variables
95 */
96static char *pci_io_text = "iSeries PCI I/O";
97static DEFINE_SPINLOCK(iomm_table_lock);
98
99/*
100 * iomm_table_initialize
101 *
102 * Allocates and initalizes the Address Translation Table and Bar
103 * Tables to get them ready for use. Must be called before any
104 * I/O space is handed out to the device BARs.
105 */
106static void iomm_table_initialize(void)
107{
108 spin_lock(&iomm_table_lock);
109 iomm_table = kmalloc(sizeof(*iomm_table) * IOMM_TABLE_MAX_ENTRIES,
110 GFP_KERNEL);
111 iobar_table = kmalloc(sizeof(*iobar_table) * IOMM_TABLE_MAX_ENTRIES,
112 GFP_KERNEL);
113 spin_unlock(&iomm_table_lock);
114 if ((iomm_table == NULL) || (iobar_table == NULL))
115 panic("PCI: I/O tables allocation failed.\n");
116}
117
118/*
119 * iomm_table_allocate_entry
120 *
121 * Adds pci_dev entry in address translation table
122 *
123 * - Allocates the number of entries required in table base on BAR
124 * size.
125 * - Allocates starting at BASE_IO_MEMORY and increases.
126 * - The size is round up to be a multiple of entry size.
127 * - CurrentIndex is incremented to keep track of the last entry.
128 * - Builds the resource entry for allocated BARs.
129 */
130static void iomm_table_allocate_entry(struct pci_dev *dev, int bar_num)
131{
132 struct resource *bar_res = &dev->resource[bar_num];
133 long bar_size = pci_resource_len(dev, bar_num);
134
135 /*
136 * No space to allocate, quick exit, skip Allocation.
137 */
138 if (bar_size == 0)
139 return;
140 /*
141 * Set Resource values.
142 */
143 spin_lock(&iomm_table_lock);
144 bar_res->name = pci_io_text;
145 bar_res->start =
146 IOMM_TABLE_ENTRY_SIZE * current_iomm_table_entry;
147 bar_res->start += BASE_IO_MEMORY;
148 bar_res->end = bar_res->start + bar_size - 1;
149 /*
150 * Allocate the number of table entries needed for BAR.
151 */
152 while (bar_size > 0 ) {
153 iomm_table[current_iomm_table_entry] = dev->sysdata;
154 iobar_table[current_iomm_table_entry] = bar_num;
155 bar_size -= IOMM_TABLE_ENTRY_SIZE;
156 ++current_iomm_table_entry;
157 }
158 max_io_memory = BASE_IO_MEMORY +
159 (IOMM_TABLE_ENTRY_SIZE * current_iomm_table_entry);
160 spin_unlock(&iomm_table_lock);
161}
162
163/*
164 * allocate_device_bars
165 *
166 * - Allocates ALL pci_dev BAR's and updates the resources with the
167 * BAR value. BARS with zero length will have the resources
168 * The HvCallPci_getBarParms is used to get the size of the BAR
169 * space. It calls iomm_table_allocate_entry to allocate
170 * each entry.
171 * - Loops through The Bar resources(0 - 5) including the ROM
172 * is resource(6).
173 */
174static void allocate_device_bars(struct pci_dev *dev)
175{
176 struct resource *bar_res;
177 int bar_num;
178
179 for (bar_num = 0; bar_num <= PCI_ROM_RESOURCE; ++bar_num) {
180 bar_res = &dev->resource[bar_num];
181 iomm_table_allocate_entry(dev, bar_num);
182 }
183}
184
185/*
186 * Log error information to system console.
187 * Filter out the device not there errors.
188 * PCI: EADs Connect Failed 0x18.58.10 Rc: 0x00xx
189 * PCI: Read Vendor Failed 0x18.58.10 Rc: 0x00xx
190 * PCI: Connect Bus Unit Failed 0x18.58.10 Rc: 0x00xx
191 */
192static void pci_Log_Error(char *Error_Text, int Bus, int SubBus,
193 int AgentId, int HvRc)
194{
195 if (HvRc == 0x0302)
196 return;
197 printk(KERN_ERR "PCI: %s Failed: 0x%02X.%02X.%02X Rc: 0x%04X",
198 Error_Text, Bus, SubBus, AgentId, HvRc);
199}
200
201/*
202 * build_device_node(u16 Bus, int SubBus, u8 DevFn)
203 */
204static struct iSeries_Device_Node *build_device_node(HvBusNumber Bus,
205 HvSubBusNumber SubBus, int AgentId, int Function)
206{
207 struct iSeries_Device_Node *node;
208
209 PPCDBG(PPCDBG_BUSWALK,
210 "-build_device_node 0x%02X.%02X.%02X Function: %02X\n",
211 Bus, SubBus, AgentId, Function);
212
213 node = kmalloc(sizeof(struct iSeries_Device_Node), GFP_KERNEL);
214 if (node == NULL)
215 return NULL;
216
217 memset(node, 0, sizeof(struct iSeries_Device_Node));
218 list_add_tail(&node->Device_List, &iSeries_Global_Device_List);
219#if 0
220 node->DsaAddr = ((u64)Bus << 48) + ((u64)SubBus << 40) + ((u64)0x10 << 32);
221#endif
222 node->DsaAddr.DsaAddr = 0;
223 node->DsaAddr.Dsa.busNumber = Bus;
224 node->DsaAddr.Dsa.subBusNumber = SubBus;
225 node->DsaAddr.Dsa.deviceId = 0x10;
226 node->DevFn = PCI_DEVFN(ISERIES_ENCODE_DEVICE(AgentId), Function);
227 return node;
228}
229
230/*
231 * unsigned long __init find_and_init_phbs(void)
232 *
233 * Description:
234 * This function checks for all possible system PCI host bridges that connect
235 * PCI buses. The system hypervisor is queried as to the guest partition
236 * ownership status. A pci_controller is built for any bus which is partially
237 * owned or fully owned by this guest partition.
238 */
239unsigned long __init find_and_init_phbs(void)
240{
241 struct pci_controller *phb;
242 HvBusNumber bus;
243
244 PPCDBG(PPCDBG_BUSWALK, "find_and_init_phbs Entry\n");
245
246 /* Check all possible buses. */
247 for (bus = 0; bus < 256; bus++) {
248 int ret = HvCallXm_testBus(bus);
249 if (ret == 0) {
250 printk("bus %d appears to exist\n", bus);
251
252 phb = (struct pci_controller *)kmalloc(sizeof(struct pci_controller), GFP_KERNEL);
253 if (phb == NULL)
254 return -ENOMEM;
255 pci_setup_pci_controller(phb);
256
257 phb->pci_mem_offset = phb->local_number = bus;
258 phb->first_busno = bus;
259 phb->last_busno = bus;
260 phb->ops = &iSeries_pci_ops;
261
262 PPCDBG(PPCDBG_BUSWALK, "PCI:Create iSeries pci_controller(%p), Bus: %04X\n",
263 phb, bus);
264
265 /* Find and connect the devices. */
266 scan_PHB_slots(phb);
267 }
268 /*
269 * Check for Unexpected Return code, a clue that something
270 * has gone wrong.
271 */
272 else if (ret != 0x0301)
273 printk(KERN_ERR "Unexpected Return on Probe(0x%04X): 0x%04X",
274 bus, ret);
275 }
276 return 0;
277}
278
279/*
280 * iSeries_pcibios_init
281 *
282 * Chance to initialize and structures or variable before PCI Bus walk.
283 */
284void iSeries_pcibios_init(void)
285{
286 PPCDBG(PPCDBG_BUSWALK, "iSeries_pcibios_init Entry.\n");
287 iomm_table_initialize();
288 find_and_init_phbs();
289 io_page_mask = -1;
290 PPCDBG(PPCDBG_BUSWALK, "iSeries_pcibios_init Exit.\n");
291}
292
293/*
294 * iSeries_pci_final_fixup(void)
295 */
296void __init iSeries_pci_final_fixup(void)
297{
298 struct pci_dev *pdev = NULL;
299 struct iSeries_Device_Node *node;
300 int DeviceCount = 0;
301
302 PPCDBG(PPCDBG_BUSWALK, "iSeries_pcibios_fixup Entry.\n");
303
304 /* Fix up at the device node and pci_dev relationship */
305 mf_display_src(0xC9000100);
306
307 printk("pcibios_final_fixup\n");
308 for_each_pci_dev(pdev) {
309 node = find_Device_Node(pdev->bus->number, pdev->devfn);
310 printk("pci dev %p (%x.%x), node %p\n", pdev,
311 pdev->bus->number, pdev->devfn, node);
312
313 if (node != NULL) {
314 ++DeviceCount;
315 pdev->sysdata = (void *)node;
316 node->PciDev = pdev;
317 PPCDBG(PPCDBG_BUSWALK,
318 "pdev 0x%p <==> DevNode 0x%p\n",
319 pdev, node);
320 allocate_device_bars(pdev);
321 iSeries_Device_Information(pdev, DeviceCount);
322 iommu_devnode_init_iSeries(node);
323 } else
324 printk("PCI: Device Tree not found for 0x%016lX\n",
325 (unsigned long)pdev);
326 pdev->irq = node->Irq;
327 }
328 iSeries_activate_IRQs();
329 mf_display_src(0xC9000200);
330}
331
332void pcibios_fixup_bus(struct pci_bus *PciBus)
333{
334 PPCDBG(PPCDBG_BUSWALK, "iSeries_pcibios_fixup_bus(0x%04X) Entry.\n",
335 PciBus->number);
336}
337
338void pcibios_fixup_resources(struct pci_dev *pdev)
339{
340 PPCDBG(PPCDBG_BUSWALK, "fixup_resources pdev %p\n", pdev);
341}
342
343/*
344 * Loop through each node function to find usable EADs bridges.
345 */
346static void scan_PHB_slots(struct pci_controller *Phb)
347{
348 struct HvCallPci_DeviceInfo *DevInfo;
349 HvBusNumber bus = Phb->local_number; /* System Bus */
350 const HvSubBusNumber SubBus = 0; /* EADs is always 0. */
351 int HvRc = 0;
352 int IdSel;
353 const int MaxAgents = 8;
354
355 DevInfo = (struct HvCallPci_DeviceInfo*)
356 kmalloc(sizeof(struct HvCallPci_DeviceInfo), GFP_KERNEL);
357 if (DevInfo == NULL)
358 return;
359
360 /*
361 * Probe for EADs Bridges
362 */
363 for (IdSel = 1; IdSel < MaxAgents; ++IdSel) {
364 HvRc = HvCallPci_getDeviceInfo(bus, SubBus, IdSel,
365 ISERIES_HV_ADDR(DevInfo),
366 sizeof(struct HvCallPci_DeviceInfo));
367 if (HvRc == 0) {
368 if (DevInfo->deviceType == HvCallPci_NodeDevice)
369 scan_EADS_bridge(bus, SubBus, IdSel);
370 else
371 printk("PCI: Invalid System Configuration(0x%02X)"
372 " for bus 0x%02x id 0x%02x.\n",
373 DevInfo->deviceType, bus, IdSel);
374 }
375 else
376 pci_Log_Error("getDeviceInfo", bus, SubBus, IdSel, HvRc);
377 }
378 kfree(DevInfo);
379}
380
381static void scan_EADS_bridge(HvBusNumber bus, HvSubBusNumber SubBus,
382 int IdSel)
383{
384 struct HvCallPci_BridgeInfo *BridgeInfo;
385 HvAgentId AgentId;
386 int Function;
387 int HvRc;
388
389 BridgeInfo = (struct HvCallPci_BridgeInfo *)
390 kmalloc(sizeof(struct HvCallPci_BridgeInfo), GFP_KERNEL);
391 if (BridgeInfo == NULL)
392 return;
393
394 /* Note: hvSubBus and irq is always be 0 at this level! */
395 for (Function = 0; Function < 8; ++Function) {
396 AgentId = ISERIES_PCI_AGENTID(IdSel, Function);
397 HvRc = HvCallXm_connectBusUnit(bus, SubBus, AgentId, 0);
398 if (HvRc == 0) {
399 printk("found device at bus %d idsel %d func %d (AgentId %x)\n",
400 bus, IdSel, Function, AgentId);
401 /* Connect EADs: 0x18.00.12 = 0x00 */
402 PPCDBG(PPCDBG_BUSWALK,
403 "PCI:Connect EADs: 0x%02X.%02X.%02X\n",
404 bus, SubBus, AgentId);
405 HvRc = HvCallPci_getBusUnitInfo(bus, SubBus, AgentId,
406 ISERIES_HV_ADDR(BridgeInfo),
407 sizeof(struct HvCallPci_BridgeInfo));
408 if (HvRc == 0) {
409 printk("bridge info: type %x subbus %x maxAgents %x maxsubbus %x logslot %x\n",
410 BridgeInfo->busUnitInfo.deviceType,
411 BridgeInfo->subBusNumber,
412 BridgeInfo->maxAgents,
413 BridgeInfo->maxSubBusNumber,
414 BridgeInfo->logicalSlotNumber);
415 PPCDBG(PPCDBG_BUSWALK,
416 "PCI: BridgeInfo, Type:0x%02X, SubBus:0x%02X, MaxAgents:0x%02X, MaxSubBus: 0x%02X, LSlot: 0x%02X\n",
417 BridgeInfo->busUnitInfo.deviceType,
418 BridgeInfo->subBusNumber,
419 BridgeInfo->maxAgents,
420 BridgeInfo->maxSubBusNumber,
421 BridgeInfo->logicalSlotNumber);
422
423 if (BridgeInfo->busUnitInfo.deviceType ==
424 HvCallPci_BridgeDevice) {
425 /* Scan_Bridge_Slot...: 0x18.00.12 */
426 scan_bridge_slot(bus, BridgeInfo);
427 } else
428 printk("PCI: Invalid Bridge Configuration(0x%02X)",
429 BridgeInfo->busUnitInfo.deviceType);
430 }
431 } else if (HvRc != 0x000B)
432 pci_Log_Error("EADs Connect",
433 bus, SubBus, AgentId, HvRc);
434 }
435 kfree(BridgeInfo);
436}
437
438/*
439 * This assumes that the node slot is always on the primary bus!
440 */
441static int scan_bridge_slot(HvBusNumber Bus,
442 struct HvCallPci_BridgeInfo *BridgeInfo)
443{
444 struct iSeries_Device_Node *node;
445 HvSubBusNumber SubBus = BridgeInfo->subBusNumber;
446 u16 VendorId = 0;
447 int HvRc = 0;
448 u8 Irq = 0;
449 int IdSel = ISERIES_GET_DEVICE_FROM_SUBBUS(SubBus);
450 int Function = ISERIES_GET_FUNCTION_FROM_SUBBUS(SubBus);
451 HvAgentId EADsIdSel = ISERIES_PCI_AGENTID(IdSel, Function);
452
453 /* iSeries_allocate_IRQ.: 0x18.00.12(0xA3) */
454 Irq = iSeries_allocate_IRQ(Bus, 0, EADsIdSel);
455 PPCDBG(PPCDBG_BUSWALK,
456 "PCI:- allocate and assign IRQ 0x%02X.%02X.%02X = 0x%02X\n",
457 Bus, 0, EADsIdSel, Irq);
458
459 /*
460 * Connect all functions of any device found.
461 */
462 for (IdSel = 1; IdSel <= BridgeInfo->maxAgents; ++IdSel) {
463 for (Function = 0; Function < 8; ++Function) {
464 HvAgentId AgentId = ISERIES_PCI_AGENTID(IdSel, Function);
465 HvRc = HvCallXm_connectBusUnit(Bus, SubBus,
466 AgentId, Irq);
467 if (HvRc != 0) {
468 pci_Log_Error("Connect Bus Unit",
469 Bus, SubBus, AgentId, HvRc);
470 continue;
471 }
472
473 HvRc = HvCallPci_configLoad16(Bus, SubBus, AgentId,
474 PCI_VENDOR_ID, &VendorId);
475 if (HvRc != 0) {
476 pci_Log_Error("Read Vendor",
477 Bus, SubBus, AgentId, HvRc);
478 continue;
479 }
480 printk("read vendor ID: %x\n", VendorId);
481
482 /* FoundDevice: 0x18.28.10 = 0x12AE */
483 PPCDBG(PPCDBG_BUSWALK,
484 "PCI:- FoundDevice: 0x%02X.%02X.%02X = 0x%04X, irq %d\n",
485 Bus, SubBus, AgentId, VendorId, Irq);
486 HvRc = HvCallPci_configStore8(Bus, SubBus, AgentId,
487 PCI_INTERRUPT_LINE, Irq);
488 if (HvRc != 0)
489 pci_Log_Error("PciCfgStore Irq Failed!",
490 Bus, SubBus, AgentId, HvRc);
491
492 ++DeviceCount;
493 node = build_device_node(Bus, SubBus, EADsIdSel, Function);
494 node->Irq = Irq;
495 node->LogicalSlot = BridgeInfo->logicalSlotNumber;
496
497 } /* for (Function = 0; Function < 8; ++Function) */
498 } /* for (IdSel = 1; IdSel <= MaxAgents; ++IdSel) */
499 return HvRc;
500}
501
502/*
503 * I/0 Memory copy MUST use mmio commands on iSeries
504 * To do; For performance, include the hv call directly
505 */
506void iSeries_memset_io(volatile void __iomem *dest, char c, size_t Count)
507{
508 u8 ByteValue = c;
509 long NumberOfBytes = Count;
510
511 while (NumberOfBytes > 0) {
512 iSeries_Write_Byte(ByteValue, dest++);
513 -- NumberOfBytes;
514 }
515}
516EXPORT_SYMBOL(iSeries_memset_io);
517
518void iSeries_memcpy_toio(volatile void __iomem *dest, void *source, size_t count)
519{
520 char *src = source;
521 long NumberOfBytes = count;
522
523 while (NumberOfBytes > 0) {
524 iSeries_Write_Byte(*src++, dest++);
525 -- NumberOfBytes;
526 }
527}
528EXPORT_SYMBOL(iSeries_memcpy_toio);
529
530void iSeries_memcpy_fromio(void *dest, const volatile void __iomem *src, size_t count)
531{
532 char *dst = dest;
533 long NumberOfBytes = count;
534
535 while (NumberOfBytes > 0) {
536 *dst++ = iSeries_Read_Byte(src++);
537 -- NumberOfBytes;
538 }
539}
540EXPORT_SYMBOL(iSeries_memcpy_fromio);
541
542/*
543 * Look down the chain to find the matching Device Device
544 */
545static struct iSeries_Device_Node *find_Device_Node(int bus, int devfn)
546{
547 struct list_head *pos;
548
549 list_for_each(pos, &iSeries_Global_Device_List) {
550 struct iSeries_Device_Node *node =
551 list_entry(pos, struct iSeries_Device_Node, Device_List);
552
553 if ((bus == ISERIES_BUS(node)) && (devfn == node->DevFn))
554 return node;
555 }
556 return NULL;
557}
558
559#if 0
560/*
561 * Returns the device node for the passed pci_dev
562 * Sanity Check Node PciDev to passed pci_dev
563 * If none is found, returns a NULL which the client must handle.
564 */
565static struct iSeries_Device_Node *get_Device_Node(struct pci_dev *pdev)
566{
567 struct iSeries_Device_Node *node;
568
569 node = pdev->sysdata;
570 if (node == NULL || node->PciDev != pdev)
571 node = find_Device_Node(pdev->bus->number, pdev->devfn);
572 return node;
573}
574#endif
575
576/*
577 * Config space read and write functions.
578 * For now at least, we look for the device node for the bus and devfn
579 * that we are asked to access. It may be possible to translate the devfn
580 * to a subbus and deviceid more directly.
581 */
582static u64 hv_cfg_read_func[4] = {
583 HvCallPciConfigLoad8, HvCallPciConfigLoad16,
584 HvCallPciConfigLoad32, HvCallPciConfigLoad32
585};
586
587static u64 hv_cfg_write_func[4] = {
588 HvCallPciConfigStore8, HvCallPciConfigStore16,
589 HvCallPciConfigStore32, HvCallPciConfigStore32
590};
591
592/*
593 * Read PCI config space
594 */
595static int iSeries_pci_read_config(struct pci_bus *bus, unsigned int devfn,
596 int offset, int size, u32 *val)
597{
598 struct iSeries_Device_Node *node = find_Device_Node(bus->number, devfn);
599 u64 fn;
600 struct HvCallPci_LoadReturn ret;
601
602 if (node == NULL)
603 return PCIBIOS_DEVICE_NOT_FOUND;
604 if (offset > 255) {
605 *val = ~0;
606 return PCIBIOS_BAD_REGISTER_NUMBER;
607 }
608
609 fn = hv_cfg_read_func[(size - 1) & 3];
610 HvCall3Ret16(fn, &ret, node->DsaAddr.DsaAddr, offset, 0);
611
612 if (ret.rc != 0) {
613 *val = ~0;
614 return PCIBIOS_DEVICE_NOT_FOUND; /* or something */
615 }
616
617 *val = ret.value;
618 return 0;
619}
620
621/*
622 * Write PCI config space
623 */
624
625static int iSeries_pci_write_config(struct pci_bus *bus, unsigned int devfn,
626 int offset, int size, u32 val)
627{
628 struct iSeries_Device_Node *node = find_Device_Node(bus->number, devfn);
629 u64 fn;
630 u64 ret;
631
632 if (node == NULL)
633 return PCIBIOS_DEVICE_NOT_FOUND;
634 if (offset > 255)
635 return PCIBIOS_BAD_REGISTER_NUMBER;
636
637 fn = hv_cfg_write_func[(size - 1) & 3];
638 ret = HvCall4(fn, node->DsaAddr.DsaAddr, offset, val, 0);
639
640 if (ret != 0)
641 return PCIBIOS_DEVICE_NOT_FOUND;
642
643 return 0;
644}
645
646static struct pci_ops iSeries_pci_ops = {
647 .read = iSeries_pci_read_config,
648 .write = iSeries_pci_write_config
649};
650
651/*
652 * Check Return Code
653 * -> On Failure, print and log information.
654 * Increment Retry Count, if exceeds max, panic partition.
655 *
656 * PCI: Device 23.90 ReadL I/O Error( 0): 0x1234
657 * PCI: Device 23.90 ReadL Retry( 1)
658 * PCI: Device 23.90 ReadL Retry Successful(1)
659 */
660static int CheckReturnCode(char *TextHdr, struct iSeries_Device_Node *DevNode,
661 int *retry, u64 ret)
662{
663 if (ret != 0) {
664 ++Pci_Error_Count;
665 (*retry)++;
666 printk("PCI: %s: Device 0x%04X:%02X I/O Error(%2d): 0x%04X\n",
667 TextHdr, DevNode->DsaAddr.Dsa.busNumber, DevNode->DevFn,
668 *retry, (int)ret);
669 /*
670 * Bump the retry and check for retry count exceeded.
671 * If, Exceeded, panic the system.
672 */
673 if (((*retry) > Pci_Retry_Max) &&
674 (Pci_Error_Flag > 0)) {
675 mf_display_src(0xB6000103);
676 panic_timeout = 0;
677 panic("PCI: Hardware I/O Error, SRC B6000103, "
678 "Automatic Reboot Disabled.\n");
679 }
680 return -1; /* Retry Try */
681 }
682 return 0;
683}
684
685/*
686 * Translate the I/O Address into a device node, bar, and bar offset.
687 * Note: Make sure the passed variable end up on the stack to avoid
688 * the exposure of being device global.
689 */
690static inline struct iSeries_Device_Node *xlate_iomm_address(
691 const volatile void __iomem *IoAddress,
692 u64 *dsaptr, u64 *BarOffsetPtr)
693{
694 unsigned long OrigIoAddr;
695 unsigned long BaseIoAddr;
696 unsigned long TableIndex;
697 struct iSeries_Device_Node *DevNode;
698
699 OrigIoAddr = (unsigned long __force)IoAddress;
700 if ((OrigIoAddr < BASE_IO_MEMORY) || (OrigIoAddr >= max_io_memory))
701 return NULL;
702 BaseIoAddr = OrigIoAddr - BASE_IO_MEMORY;
703 TableIndex = BaseIoAddr / IOMM_TABLE_ENTRY_SIZE;
704 DevNode = iomm_table[TableIndex];
705
706 if (DevNode != NULL) {
707 int barnum = iobar_table[TableIndex];
708 *dsaptr = DevNode->DsaAddr.DsaAddr | (barnum << 24);
709 *BarOffsetPtr = BaseIoAddr % IOMM_TABLE_ENTRY_SIZE;
710 } else
711 panic("PCI: Invalid PCI IoAddress detected!\n");
712 return DevNode;
713}
714
715/*
716 * Read MM I/O Instructions for the iSeries
717 * On MM I/O error, all ones are returned and iSeries_pci_IoError is cal
718 * else, data is returned in big Endian format.
719 *
720 * iSeries_Read_Byte = Read Byte ( 8 bit)
721 * iSeries_Read_Word = Read Word (16 bit)
722 * iSeries_Read_Long = Read Long (32 bit)
723 */
724u8 iSeries_Read_Byte(const volatile void __iomem *IoAddress)
725{
726 u64 BarOffset;
727 u64 dsa;
728 int retry = 0;
729 struct HvCallPci_LoadReturn ret;
730 struct iSeries_Device_Node *DevNode =
731 xlate_iomm_address(IoAddress, &dsa, &BarOffset);
732
733 if (DevNode == NULL) {
734 static unsigned long last_jiffies;
735 static int num_printed;
736
737 if ((jiffies - last_jiffies) > 60 * HZ) {
738 last_jiffies = jiffies;
739 num_printed = 0;
740 }
741 if (num_printed++ < 10)
742 printk(KERN_ERR "iSeries_Read_Byte: invalid access at IO address %p\n", IoAddress);
743 return 0xff;
744 }
745 do {
746 ++Pci_Io_Read_Count;
747 HvCall3Ret16(HvCallPciBarLoad8, &ret, dsa, BarOffset, 0);
748 } while (CheckReturnCode("RDB", DevNode, &retry, ret.rc) != 0);
749
750 return (u8)ret.value;
751}
752EXPORT_SYMBOL(iSeries_Read_Byte);
753
754u16 iSeries_Read_Word(const volatile void __iomem *IoAddress)
755{
756 u64 BarOffset;
757 u64 dsa;
758 int retry = 0;
759 struct HvCallPci_LoadReturn ret;
760 struct iSeries_Device_Node *DevNode =
761 xlate_iomm_address(IoAddress, &dsa, &BarOffset);
762
763 if (DevNode == NULL) {
764 static unsigned long last_jiffies;
765 static int num_printed;
766
767 if ((jiffies - last_jiffies) > 60 * HZ) {
768 last_jiffies = jiffies;
769 num_printed = 0;
770 }
771 if (num_printed++ < 10)
772 printk(KERN_ERR "iSeries_Read_Word: invalid access at IO address %p\n", IoAddress);
773 return 0xffff;
774 }
775 do {
776 ++Pci_Io_Read_Count;
777 HvCall3Ret16(HvCallPciBarLoad16, &ret, dsa,
778 BarOffset, 0);
779 } while (CheckReturnCode("RDW", DevNode, &retry, ret.rc) != 0);
780
781 return swab16((u16)ret.value);
782}
783EXPORT_SYMBOL(iSeries_Read_Word);
784
785u32 iSeries_Read_Long(const volatile void __iomem *IoAddress)
786{
787 u64 BarOffset;
788 u64 dsa;
789 int retry = 0;
790 struct HvCallPci_LoadReturn ret;
791 struct iSeries_Device_Node *DevNode =
792 xlate_iomm_address(IoAddress, &dsa, &BarOffset);
793
794 if (DevNode == NULL) {
795 static unsigned long last_jiffies;
796 static int num_printed;
797
798 if ((jiffies - last_jiffies) > 60 * HZ) {
799 last_jiffies = jiffies;
800 num_printed = 0;
801 }
802 if (num_printed++ < 10)
803 printk(KERN_ERR "iSeries_Read_Long: invalid access at IO address %p\n", IoAddress);
804 return 0xffffffff;
805 }
806 do {
807 ++Pci_Io_Read_Count;
808 HvCall3Ret16(HvCallPciBarLoad32, &ret, dsa,
809 BarOffset, 0);
810 } while (CheckReturnCode("RDL", DevNode, &retry, ret.rc) != 0);
811
812 return swab32((u32)ret.value);
813}
814EXPORT_SYMBOL(iSeries_Read_Long);
815
816/*
817 * Write MM I/O Instructions for the iSeries
818 *
819 * iSeries_Write_Byte = Write Byte (8 bit)
820 * iSeries_Write_Word = Write Word(16 bit)
821 * iSeries_Write_Long = Write Long(32 bit)
822 */
823void iSeries_Write_Byte(u8 data, volatile void __iomem *IoAddress)
824{
825 u64 BarOffset;
826 u64 dsa;
827 int retry = 0;
828 u64 rc;
829 struct iSeries_Device_Node *DevNode =
830 xlate_iomm_address(IoAddress, &dsa, &BarOffset);
831
832 if (DevNode == NULL) {
833 static unsigned long last_jiffies;
834 static int num_printed;
835
836 if ((jiffies - last_jiffies) > 60 * HZ) {
837 last_jiffies = jiffies;
838 num_printed = 0;
839 }
840 if (num_printed++ < 10)
841 printk(KERN_ERR "iSeries_Write_Byte: invalid access at IO address %p\n", IoAddress);
842 return;
843 }
844 do {
845 ++Pci_Io_Write_Count;
846 rc = HvCall4(HvCallPciBarStore8, dsa, BarOffset, data, 0);
847 } while (CheckReturnCode("WWB", DevNode, &retry, rc) != 0);
848}
849EXPORT_SYMBOL(iSeries_Write_Byte);
850
851void iSeries_Write_Word(u16 data, volatile void __iomem *IoAddress)
852{
853 u64 BarOffset;
854 u64 dsa;
855 int retry = 0;
856 u64 rc;
857 struct iSeries_Device_Node *DevNode =
858 xlate_iomm_address(IoAddress, &dsa, &BarOffset);
859
860 if (DevNode == NULL) {
861 static unsigned long last_jiffies;
862 static int num_printed;
863
864 if ((jiffies - last_jiffies) > 60 * HZ) {
865 last_jiffies = jiffies;
866 num_printed = 0;
867 }
868 if (num_printed++ < 10)
869 printk(KERN_ERR "iSeries_Write_Word: invalid access at IO address %p\n", IoAddress);
870 return;
871 }
872 do {
873 ++Pci_Io_Write_Count;
874 rc = HvCall4(HvCallPciBarStore16, dsa, BarOffset, swab16(data), 0);
875 } while (CheckReturnCode("WWW", DevNode, &retry, rc) != 0);
876}
877EXPORT_SYMBOL(iSeries_Write_Word);
878
879void iSeries_Write_Long(u32 data, volatile void __iomem *IoAddress)
880{
881 u64 BarOffset;
882 u64 dsa;
883 int retry = 0;
884 u64 rc;
885 struct iSeries_Device_Node *DevNode =
886 xlate_iomm_address(IoAddress, &dsa, &BarOffset);
887
888 if (DevNode == NULL) {
889 static unsigned long last_jiffies;
890 static int num_printed;
891
892 if ((jiffies - last_jiffies) > 60 * HZ) {
893 last_jiffies = jiffies;
894 num_printed = 0;
895 }
896 if (num_printed++ < 10)
897 printk(KERN_ERR "iSeries_Write_Long: invalid access at IO address %p\n", IoAddress);
898 return;
899 }
900 do {
901 ++Pci_Io_Write_Count;
902 rc = HvCall4(HvCallPciBarStore32, dsa, BarOffset, swab32(data), 0);
903 } while (CheckReturnCode("WWL", DevNode, &retry, rc) != 0);
904}
905EXPORT_SYMBOL(iSeries_Write_Long);
diff --git a/arch/ppc64/kernel/iSeries_proc.c b/arch/ppc64/kernel/iSeries_proc.c
deleted file mode 100644
index 0fe3116eba29..000000000000
--- a/arch/ppc64/kernel/iSeries_proc.c
+++ /dev/null
@@ -1,113 +0,0 @@
1/*
2 * iSeries_proc.c
3 * Copyright (C) 2001 Kyle A. Lucke IBM Corporation
4 * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen IBM Corporation
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 */
20#include <linux/init.h>
21#include <linux/proc_fs.h>
22#include <linux/seq_file.h>
23#include <linux/param.h> /* for HZ */
24#include <asm/paca.h>
25#include <asm/processor.h>
26#include <asm/time.h>
27#include <asm/lppaca.h>
28#include <asm/iSeries/ItLpQueue.h>
29#include <asm/iSeries/HvCallXm.h>
30#include <asm/iSeries/IoHriMainStore.h>
31#include <asm/iSeries/IoHriProcessorVpd.h>
32
33static int __init iseries_proc_create(void)
34{
35 struct proc_dir_entry *e = proc_mkdir("iSeries", 0);
36 if (!e)
37 return 1;
38
39 return 0;
40}
41core_initcall(iseries_proc_create);
42
43static unsigned long startTitan = 0;
44static unsigned long startTb = 0;
45
46static int proc_titantod_show(struct seq_file *m, void *v)
47{
48 unsigned long tb0, titan_tod;
49
50 tb0 = get_tb();
51 titan_tod = HvCallXm_loadTod();
52
53 seq_printf(m, "Titan\n" );
54 seq_printf(m, " time base = %016lx\n", tb0);
55 seq_printf(m, " titan tod = %016lx\n", titan_tod);
56 seq_printf(m, " xProcFreq = %016x\n",
57 xIoHriProcessorVpd[0].xProcFreq);
58 seq_printf(m, " xTimeBaseFreq = %016x\n",
59 xIoHriProcessorVpd[0].xTimeBaseFreq);
60 seq_printf(m, " tb_ticks_per_jiffy = %lu\n", tb_ticks_per_jiffy);
61 seq_printf(m, " tb_ticks_per_usec = %lu\n", tb_ticks_per_usec);
62
63 if (!startTitan) {
64 startTitan = titan_tod;
65 startTb = tb0;
66 } else {
67 unsigned long titan_usec = (titan_tod - startTitan) >> 12;
68 unsigned long tb_ticks = (tb0 - startTb);
69 unsigned long titan_jiffies = titan_usec / (1000000/HZ);
70 unsigned long titan_jiff_usec = titan_jiffies * (1000000/HZ);
71 unsigned long titan_jiff_rem_usec = titan_usec - titan_jiff_usec;
72 unsigned long tb_jiffies = tb_ticks / tb_ticks_per_jiffy;
73 unsigned long tb_jiff_ticks = tb_jiffies * tb_ticks_per_jiffy;
74 unsigned long tb_jiff_rem_ticks = tb_ticks - tb_jiff_ticks;
75 unsigned long tb_jiff_rem_usec = tb_jiff_rem_ticks / tb_ticks_per_usec;
76 unsigned long new_tb_ticks_per_jiffy = (tb_ticks * (1000000/HZ))/titan_usec;
77
78 seq_printf(m, " titan elapsed = %lu uSec\n", titan_usec);
79 seq_printf(m, " tb elapsed = %lu ticks\n", tb_ticks);
80 seq_printf(m, " titan jiffies = %lu.%04lu \n", titan_jiffies,
81 titan_jiff_rem_usec);
82 seq_printf(m, " tb jiffies = %lu.%04lu\n", tb_jiffies,
83 tb_jiff_rem_usec);
84 seq_printf(m, " new tb_ticks_per_jiffy = %lu\n",
85 new_tb_ticks_per_jiffy);
86 }
87
88 return 0;
89}
90
91static int proc_titantod_open(struct inode *inode, struct file *file)
92{
93 return single_open(file, proc_titantod_show, NULL);
94}
95
96static struct file_operations proc_titantod_operations = {
97 .open = proc_titantod_open,
98 .read = seq_read,
99 .llseek = seq_lseek,
100 .release = single_release,
101};
102
103static int __init iseries_proc_init(void)
104{
105 struct proc_dir_entry *e;
106
107 e = create_proc_entry("iSeries/titanTod", S_IFREG|S_IRUGO, NULL);
108 if (e)
109 e->proc_fops = &proc_titantod_operations;
110
111 return 0;
112}
113__initcall(iseries_proc_init);
diff --git a/arch/ppc64/kernel/iSeries_setup.c b/arch/ppc64/kernel/iSeries_setup.c
deleted file mode 100644
index 3ffefbbc6623..000000000000
--- a/arch/ppc64/kernel/iSeries_setup.c
+++ /dev/null
@@ -1,977 +0,0 @@
1/*
2 * Copyright (c) 2000 Mike Corrigan <mikejc@us.ibm.com>
3 * Copyright (c) 1999-2000 Grant Erickson <grant@lcse.umn.edu>
4 *
5 * Module name: iSeries_setup.c
6 *
7 * Description:
8 * Architecture- / platform-specific boot-time initialization code for
9 * the IBM iSeries LPAR. Adapted from original code by Grant Erickson and
10 * code by Gary Thomas, Cort Dougan <cort@fsmlabs.com>, and Dan Malek
11 * <dan@net4x.com>.
12 *
13 * This program is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU General Public License
15 * as published by the Free Software Foundation; either version
16 * 2 of the License, or (at your option) any later version.
17 */
18
19#undef DEBUG
20
21#include <linux/config.h>
22#include <linux/init.h>
23#include <linux/threads.h>
24#include <linux/smp.h>
25#include <linux/param.h>
26#include <linux/string.h>
27#include <linux/initrd.h>
28#include <linux/seq_file.h>
29#include <linux/kdev_t.h>
30#include <linux/major.h>
31#include <linux/root_dev.h>
32
33#include <asm/processor.h>
34#include <asm/machdep.h>
35#include <asm/page.h>
36#include <asm/mmu.h>
37#include <asm/pgtable.h>
38#include <asm/mmu_context.h>
39#include <asm/cputable.h>
40#include <asm/sections.h>
41#include <asm/iommu.h>
42#include <asm/firmware.h>
43
44#include <asm/time.h>
45#include "iSeries_setup.h"
46#include <asm/naca.h>
47#include <asm/paca.h>
48#include <asm/cache.h>
49#include <asm/sections.h>
50#include <asm/abs_addr.h>
51#include <asm/iSeries/HvCallHpt.h>
52#include <asm/iSeries/HvLpConfig.h>
53#include <asm/iSeries/HvCallEvent.h>
54#include <asm/iSeries/HvCallSm.h>
55#include <asm/iSeries/HvCallXm.h>
56#include <asm/iSeries/ItLpQueue.h>
57#include <asm/iSeries/IoHriMainStore.h>
58#include <asm/iSeries/mf.h>
59#include <asm/iSeries/HvLpEvent.h>
60#include <asm/iSeries/iSeries_irq.h>
61#include <asm/iSeries/IoHriProcessorVpd.h>
62#include <asm/iSeries/ItVpdAreas.h>
63#include <asm/iSeries/LparMap.h>
64
65extern void hvlog(char *fmt, ...);
66
67#ifdef DEBUG
68#define DBG(fmt...) hvlog(fmt)
69#else
70#define DBG(fmt...)
71#endif
72
73/* Function Prototypes */
74extern void ppcdbg_initialize(void);
75
76static void build_iSeries_Memory_Map(void);
77static void setup_iSeries_cache_sizes(void);
78static void iSeries_bolt_kernel(unsigned long saddr, unsigned long eaddr);
79#ifdef CONFIG_PCI
80extern void iSeries_pci_final_fixup(void);
81#else
82static void iSeries_pci_final_fixup(void) { }
83#endif
84
85/* Global Variables */
86static unsigned long procFreqHz;
87static unsigned long procFreqMhz;
88static unsigned long procFreqMhzHundreths;
89
90static unsigned long tbFreqHz;
91static unsigned long tbFreqMhz;
92static unsigned long tbFreqMhzHundreths;
93
94int piranha_simulator;
95
96extern int rd_size; /* Defined in drivers/block/rd.c */
97extern unsigned long klimit;
98extern unsigned long embedded_sysmap_start;
99extern unsigned long embedded_sysmap_end;
100
101extern unsigned long iSeries_recal_tb;
102extern unsigned long iSeries_recal_titan;
103
104static int mf_initialized;
105
106struct MemoryBlock {
107 unsigned long absStart;
108 unsigned long absEnd;
109 unsigned long logicalStart;
110 unsigned long logicalEnd;
111};
112
113/*
114 * Process the main store vpd to determine where the holes in memory are
115 * and return the number of physical blocks and fill in the array of
116 * block data.
117 */
118static unsigned long iSeries_process_Condor_mainstore_vpd(
119 struct MemoryBlock *mb_array, unsigned long max_entries)
120{
121 unsigned long holeFirstChunk, holeSizeChunks;
122 unsigned long numMemoryBlocks = 1;
123 struct IoHriMainStoreSegment4 *msVpd =
124 (struct IoHriMainStoreSegment4 *)xMsVpd;
125 unsigned long holeStart = msVpd->nonInterleavedBlocksStartAdr;
126 unsigned long holeEnd = msVpd->nonInterleavedBlocksEndAdr;
127 unsigned long holeSize = holeEnd - holeStart;
128
129 printk("Mainstore_VPD: Condor\n");
130 /*
131 * Determine if absolute memory has any
132 * holes so that we can interpret the
133 * access map we get back from the hypervisor
134 * correctly.
135 */
136 mb_array[0].logicalStart = 0;
137 mb_array[0].logicalEnd = 0x100000000;
138 mb_array[0].absStart = 0;
139 mb_array[0].absEnd = 0x100000000;
140
141 if (holeSize) {
142 numMemoryBlocks = 2;
143 holeStart = holeStart & 0x000fffffffffffff;
144 holeStart = addr_to_chunk(holeStart);
145 holeFirstChunk = holeStart;
146 holeSize = addr_to_chunk(holeSize);
147 holeSizeChunks = holeSize;
148 printk( "Main store hole: start chunk = %0lx, size = %0lx chunks\n",
149 holeFirstChunk, holeSizeChunks );
150 mb_array[0].logicalEnd = holeFirstChunk;
151 mb_array[0].absEnd = holeFirstChunk;
152 mb_array[1].logicalStart = holeFirstChunk;
153 mb_array[1].logicalEnd = 0x100000000 - holeSizeChunks;
154 mb_array[1].absStart = holeFirstChunk + holeSizeChunks;
155 mb_array[1].absEnd = 0x100000000;
156 }
157 return numMemoryBlocks;
158}
159
160#define MaxSegmentAreas 32
161#define MaxSegmentAdrRangeBlocks 128
162#define MaxAreaRangeBlocks 4
163
164static unsigned long iSeries_process_Regatta_mainstore_vpd(
165 struct MemoryBlock *mb_array, unsigned long max_entries)
166{
167 struct IoHriMainStoreSegment5 *msVpdP =
168 (struct IoHriMainStoreSegment5 *)xMsVpd;
169 unsigned long numSegmentBlocks = 0;
170 u32 existsBits = msVpdP->msAreaExists;
171 unsigned long area_num;
172
173 printk("Mainstore_VPD: Regatta\n");
174
175 for (area_num = 0; area_num < MaxSegmentAreas; ++area_num ) {
176 unsigned long numAreaBlocks;
177 struct IoHriMainStoreArea4 *currentArea;
178
179 if (existsBits & 0x80000000) {
180 unsigned long block_num;
181
182 currentArea = &msVpdP->msAreaArray[area_num];
183 numAreaBlocks = currentArea->numAdrRangeBlocks;
184 printk("ms_vpd: processing area %2ld blocks=%ld",
185 area_num, numAreaBlocks);
186 for (block_num = 0; block_num < numAreaBlocks;
187 ++block_num ) {
188 /* Process an address range block */
189 struct MemoryBlock tempBlock;
190 unsigned long i;
191
192 tempBlock.absStart =
193 (unsigned long)currentArea->xAdrRangeBlock[block_num].blockStart;
194 tempBlock.absEnd =
195 (unsigned long)currentArea->xAdrRangeBlock[block_num].blockEnd;
196 tempBlock.logicalStart = 0;
197 tempBlock.logicalEnd = 0;
198 printk("\n block %ld absStart=%016lx absEnd=%016lx",
199 block_num, tempBlock.absStart,
200 tempBlock.absEnd);
201
202 for (i = 0; i < numSegmentBlocks; ++i) {
203 if (mb_array[i].absStart ==
204 tempBlock.absStart)
205 break;
206 }
207 if (i == numSegmentBlocks) {
208 if (numSegmentBlocks == max_entries)
209 panic("iSeries_process_mainstore_vpd: too many memory blocks");
210 mb_array[numSegmentBlocks] = tempBlock;
211 ++numSegmentBlocks;
212 } else
213 printk(" (duplicate)");
214 }
215 printk("\n");
216 }
217 existsBits <<= 1;
218 }
219 /* Now sort the blocks found into ascending sequence */
220 if (numSegmentBlocks > 1) {
221 unsigned long m, n;
222
223 for (m = 0; m < numSegmentBlocks - 1; ++m) {
224 for (n = numSegmentBlocks - 1; m < n; --n) {
225 if (mb_array[n].absStart <
226 mb_array[n-1].absStart) {
227 struct MemoryBlock tempBlock;
228
229 tempBlock = mb_array[n];
230 mb_array[n] = mb_array[n-1];
231 mb_array[n-1] = tempBlock;
232 }
233 }
234 }
235 }
236 /*
237 * Assign "logical" addresses to each block. These
238 * addresses correspond to the hypervisor "bitmap" space.
239 * Convert all addresses into units of 256K chunks.
240 */
241 {
242 unsigned long i, nextBitmapAddress;
243
244 printk("ms_vpd: %ld sorted memory blocks\n", numSegmentBlocks);
245 nextBitmapAddress = 0;
246 for (i = 0; i < numSegmentBlocks; ++i) {
247 unsigned long length = mb_array[i].absEnd -
248 mb_array[i].absStart;
249
250 mb_array[i].logicalStart = nextBitmapAddress;
251 mb_array[i].logicalEnd = nextBitmapAddress + length;
252 nextBitmapAddress += length;
253 printk(" Bitmap range: %016lx - %016lx\n"
254 " Absolute range: %016lx - %016lx\n",
255 mb_array[i].logicalStart,
256 mb_array[i].logicalEnd,
257 mb_array[i].absStart, mb_array[i].absEnd);
258 mb_array[i].absStart = addr_to_chunk(mb_array[i].absStart &
259 0x000fffffffffffff);
260 mb_array[i].absEnd = addr_to_chunk(mb_array[i].absEnd &
261 0x000fffffffffffff);
262 mb_array[i].logicalStart =
263 addr_to_chunk(mb_array[i].logicalStart);
264 mb_array[i].logicalEnd = addr_to_chunk(mb_array[i].logicalEnd);
265 }
266 }
267
268 return numSegmentBlocks;
269}
270
271static unsigned long iSeries_process_mainstore_vpd(struct MemoryBlock *mb_array,
272 unsigned long max_entries)
273{
274 unsigned long i;
275 unsigned long mem_blocks = 0;
276
277 if (cpu_has_feature(CPU_FTR_SLB))
278 mem_blocks = iSeries_process_Regatta_mainstore_vpd(mb_array,
279 max_entries);
280 else
281 mem_blocks = iSeries_process_Condor_mainstore_vpd(mb_array,
282 max_entries);
283
284 printk("Mainstore_VPD: numMemoryBlocks = %ld \n", mem_blocks);
285 for (i = 0; i < mem_blocks; ++i) {
286 printk("Mainstore_VPD: block %3ld logical chunks %016lx - %016lx\n"
287 " abs chunks %016lx - %016lx\n",
288 i, mb_array[i].logicalStart, mb_array[i].logicalEnd,
289 mb_array[i].absStart, mb_array[i].absEnd);
290 }
291 return mem_blocks;
292}
293
294static void __init iSeries_get_cmdline(void)
295{
296 char *p, *q;
297
298 /* copy the command line parameter from the primary VSP */
299 HvCallEvent_dmaToSp(cmd_line, 2 * 64* 1024, 256,
300 HvLpDma_Direction_RemoteToLocal);
301
302 p = cmd_line;
303 q = cmd_line + 255;
304 while(p < q) {
305 if (!*p || *p == '\n')
306 break;
307 ++p;
308 }
309 *p = 0;
310}
311
312static void __init iSeries_init_early(void)
313{
314 extern unsigned long memory_limit;
315
316 DBG(" -> iSeries_init_early()\n");
317
318 ppc64_firmware_features = FW_FEATURE_ISERIES;
319
320 ppcdbg_initialize();
321
322#if defined(CONFIG_BLK_DEV_INITRD)
323 /*
324 * If the init RAM disk has been configured and there is
325 * a non-zero starting address for it, set it up
326 */
327 if (naca.xRamDisk) {
328 initrd_start = (unsigned long)__va(naca.xRamDisk);
329 initrd_end = initrd_start + naca.xRamDiskSize * PAGE_SIZE;
330 initrd_below_start_ok = 1; // ramdisk in kernel space
331 ROOT_DEV = Root_RAM0;
332 if (((rd_size * 1024) / PAGE_SIZE) < naca.xRamDiskSize)
333 rd_size = (naca.xRamDiskSize * PAGE_SIZE) / 1024;
334 } else
335#endif /* CONFIG_BLK_DEV_INITRD */
336 {
337 /* ROOT_DEV = MKDEV(VIODASD_MAJOR, 1); */
338 }
339
340 iSeries_recal_tb = get_tb();
341 iSeries_recal_titan = HvCallXm_loadTod();
342
343 /*
344 * Cache sizes must be initialized before hpte_init_iSeries is called
345 * as the later need them for flush_icache_range()
346 */
347 setup_iSeries_cache_sizes();
348
349 /*
350 * Initialize the hash table management pointers
351 */
352 hpte_init_iSeries();
353
354 /*
355 * Initialize the DMA/TCE management
356 */
357 iommu_init_early_iSeries();
358
359 /*
360 * Initialize the table which translate Linux physical addresses to
361 * AS/400 absolute addresses
362 */
363 build_iSeries_Memory_Map();
364
365 iSeries_get_cmdline();
366
367 /* Save unparsed command line copy for /proc/cmdline */
368 strlcpy(saved_command_line, cmd_line, COMMAND_LINE_SIZE);
369
370 /* Parse early parameters, in particular mem=x */
371 parse_early_param();
372
373 if (memory_limit) {
374 if (memory_limit < systemcfg->physicalMemorySize)
375 systemcfg->physicalMemorySize = memory_limit;
376 else {
377 printk("Ignoring mem=%lu >= ram_top.\n", memory_limit);
378 memory_limit = 0;
379 }
380 }
381
382 /* Bolt kernel mappings for all of memory (or just a bit if we've got a limit) */
383 iSeries_bolt_kernel(0, systemcfg->physicalMemorySize);
384
385 lmb_init();
386 lmb_add(0, systemcfg->physicalMemorySize);
387 lmb_analyze();
388 lmb_reserve(0, __pa(klimit));
389
390 /* Initialize machine-dependency vectors */
391#ifdef CONFIG_SMP
392 smp_init_iSeries();
393#endif
394 if (itLpNaca.xPirEnvironMode == 0)
395 piranha_simulator = 1;
396
397 /* Associate Lp Event Queue 0 with processor 0 */
398 HvCallEvent_setLpEventQueueInterruptProc(0, 0);
399
400 mf_init();
401 mf_initialized = 1;
402 mb();
403
404 /* If we were passed an initrd, set the ROOT_DEV properly if the values
405 * look sensible. If not, clear initrd reference.
406 */
407#ifdef CONFIG_BLK_DEV_INITRD
408 if (initrd_start >= KERNELBASE && initrd_end >= KERNELBASE &&
409 initrd_end > initrd_start)
410 ROOT_DEV = Root_RAM0;
411 else
412 initrd_start = initrd_end = 0;
413#endif /* CONFIG_BLK_DEV_INITRD */
414
415 DBG(" <- iSeries_init_early()\n");
416}
417
418struct mschunks_map mschunks_map = {
419 /* XXX We don't use these, but Piranha might need them. */
420 .chunk_size = MSCHUNKS_CHUNK_SIZE,
421 .chunk_shift = MSCHUNKS_CHUNK_SHIFT,
422 .chunk_mask = MSCHUNKS_OFFSET_MASK,
423};
424EXPORT_SYMBOL(mschunks_map);
425
426void mschunks_alloc(unsigned long num_chunks)
427{
428 klimit = _ALIGN(klimit, sizeof(u32));
429 mschunks_map.mapping = (u32 *)klimit;
430 klimit += num_chunks * sizeof(u32);
431 mschunks_map.num_chunks = num_chunks;
432}
433
434/*
435 * The iSeries may have very large memories ( > 128 GB ) and a partition
436 * may get memory in "chunks" that may be anywhere in the 2**52 real
437 * address space. The chunks are 256K in size. To map this to the
438 * memory model Linux expects, the AS/400 specific code builds a
439 * translation table to translate what Linux thinks are "physical"
440 * addresses to the actual real addresses. This allows us to make
441 * it appear to Linux that we have contiguous memory starting at
442 * physical address zero while in fact this could be far from the truth.
443 * To avoid confusion, I'll let the words physical and/or real address
444 * apply to the Linux addresses while I'll use "absolute address" to
445 * refer to the actual hardware real address.
446 *
447 * build_iSeries_Memory_Map gets information from the Hypervisor and
448 * looks at the Main Store VPD to determine the absolute addresses
449 * of the memory that has been assigned to our partition and builds
450 * a table used to translate Linux's physical addresses to these
451 * absolute addresses. Absolute addresses are needed when
452 * communicating with the hypervisor (e.g. to build HPT entries)
453 */
454
455static void __init build_iSeries_Memory_Map(void)
456{
457 u32 loadAreaFirstChunk, loadAreaLastChunk, loadAreaSize;
458 u32 nextPhysChunk;
459 u32 hptFirstChunk, hptLastChunk, hptSizeChunks, hptSizePages;
460 u32 num_ptegs;
461 u32 totalChunks,moreChunks;
462 u32 currChunk, thisChunk, absChunk;
463 u32 currDword;
464 u32 chunkBit;
465 u64 map;
466 struct MemoryBlock mb[32];
467 unsigned long numMemoryBlocks, curBlock;
468
469 /* Chunk size on iSeries is 256K bytes */
470 totalChunks = (u32)HvLpConfig_getMsChunks();
471 mschunks_alloc(totalChunks);
472
473 /*
474 * Get absolute address of our load area
475 * and map it to physical address 0
476 * This guarantees that the loadarea ends up at physical 0
477 * otherwise, it might not be returned by PLIC as the first
478 * chunks
479 */
480
481 loadAreaFirstChunk = (u32)addr_to_chunk(itLpNaca.xLoadAreaAddr);
482 loadAreaSize = itLpNaca.xLoadAreaChunks;
483
484 /*
485 * Only add the pages already mapped here.
486 * Otherwise we might add the hpt pages
487 * The rest of the pages of the load area
488 * aren't in the HPT yet and can still
489 * be assigned an arbitrary physical address
490 */
491 if ((loadAreaSize * 64) > HvPagesToMap)
492 loadAreaSize = HvPagesToMap / 64;
493
494 loadAreaLastChunk = loadAreaFirstChunk + loadAreaSize - 1;
495
496 /*
497 * TODO Do we need to do something if the HPT is in the 64MB load area?
498 * This would be required if the itLpNaca.xLoadAreaChunks includes
499 * the HPT size
500 */
501
502 printk("Mapping load area - physical addr = 0000000000000000\n"
503 " absolute addr = %016lx\n",
504 chunk_to_addr(loadAreaFirstChunk));
505 printk("Load area size %dK\n", loadAreaSize * 256);
506
507 for (nextPhysChunk = 0; nextPhysChunk < loadAreaSize; ++nextPhysChunk)
508 mschunks_map.mapping[nextPhysChunk] =
509 loadAreaFirstChunk + nextPhysChunk;
510
511 /*
512 * Get absolute address of our HPT and remember it so
513 * we won't map it to any physical address
514 */
515 hptFirstChunk = (u32)addr_to_chunk(HvCallHpt_getHptAddress());
516 hptSizePages = (u32)HvCallHpt_getHptPages();
517 hptSizeChunks = hptSizePages >> (MSCHUNKS_CHUNK_SHIFT - PAGE_SHIFT);
518 hptLastChunk = hptFirstChunk + hptSizeChunks - 1;
519
520 printk("HPT absolute addr = %016lx, size = %dK\n",
521 chunk_to_addr(hptFirstChunk), hptSizeChunks * 256);
522
523 /* Fill in the hashed page table hash mask */
524 num_ptegs = hptSizePages *
525 (PAGE_SIZE / (sizeof(hpte_t) * HPTES_PER_GROUP));
526 htab_hash_mask = num_ptegs - 1;
527
528 /*
529 * The actual hashed page table is in the hypervisor,
530 * we have no direct access
531 */
532 htab_address = NULL;
533
534 /*
535 * Determine if absolute memory has any
536 * holes so that we can interpret the
537 * access map we get back from the hypervisor
538 * correctly.
539 */
540 numMemoryBlocks = iSeries_process_mainstore_vpd(mb, 32);
541
542 /*
543 * Process the main store access map from the hypervisor
544 * to build up our physical -> absolute translation table
545 */
546 curBlock = 0;
547 currChunk = 0;
548 currDword = 0;
549 moreChunks = totalChunks;
550
551 while (moreChunks) {
552 map = HvCallSm_get64BitsOfAccessMap(itLpNaca.xLpIndex,
553 currDword);
554 thisChunk = currChunk;
555 while (map) {
556 chunkBit = map >> 63;
557 map <<= 1;
558 if (chunkBit) {
559 --moreChunks;
560 while (thisChunk >= mb[curBlock].logicalEnd) {
561 ++curBlock;
562 if (curBlock >= numMemoryBlocks)
563 panic("out of memory blocks");
564 }
565 if (thisChunk < mb[curBlock].logicalStart)
566 panic("memory block error");
567
568 absChunk = mb[curBlock].absStart +
569 (thisChunk - mb[curBlock].logicalStart);
570 if (((absChunk < hptFirstChunk) ||
571 (absChunk > hptLastChunk)) &&
572 ((absChunk < loadAreaFirstChunk) ||
573 (absChunk > loadAreaLastChunk))) {
574 mschunks_map.mapping[nextPhysChunk] =
575 absChunk;
576 ++nextPhysChunk;
577 }
578 }
579 ++thisChunk;
580 }
581 ++currDword;
582 currChunk += 64;
583 }
584
585 /*
586 * main store size (in chunks) is
587 * totalChunks - hptSizeChunks
588 * which should be equal to
589 * nextPhysChunk
590 */
591 systemcfg->physicalMemorySize = chunk_to_addr(nextPhysChunk);
592}
593
594/*
595 * Set up the variables that describe the cache line sizes
596 * for this machine.
597 */
598static void __init setup_iSeries_cache_sizes(void)
599{
600 unsigned int i, n;
601 unsigned int procIx = get_paca()->lppaca.dyn_hv_phys_proc_index;
602
603 systemcfg->icache_size =
604 ppc64_caches.isize = xIoHriProcessorVpd[procIx].xInstCacheSize * 1024;
605 systemcfg->icache_line_size =
606 ppc64_caches.iline_size =
607 xIoHriProcessorVpd[procIx].xInstCacheOperandSize;
608 systemcfg->dcache_size =
609 ppc64_caches.dsize =
610 xIoHriProcessorVpd[procIx].xDataL1CacheSizeKB * 1024;
611 systemcfg->dcache_line_size =
612 ppc64_caches.dline_size =
613 xIoHriProcessorVpd[procIx].xDataCacheOperandSize;
614 ppc64_caches.ilines_per_page = PAGE_SIZE / ppc64_caches.iline_size;
615 ppc64_caches.dlines_per_page = PAGE_SIZE / ppc64_caches.dline_size;
616
617 i = ppc64_caches.iline_size;
618 n = 0;
619 while ((i = (i / 2)))
620 ++n;
621 ppc64_caches.log_iline_size = n;
622
623 i = ppc64_caches.dline_size;
624 n = 0;
625 while ((i = (i / 2)))
626 ++n;
627 ppc64_caches.log_dline_size = n;
628
629 printk("D-cache line size = %d\n",
630 (unsigned int)ppc64_caches.dline_size);
631 printk("I-cache line size = %d\n",
632 (unsigned int)ppc64_caches.iline_size);
633}
634
635/*
636 * Create a pte. Used during initialization only.
637 */
638static void iSeries_make_pte(unsigned long va, unsigned long pa,
639 int mode)
640{
641 hpte_t local_hpte, rhpte;
642 unsigned long hash, vpn;
643 long slot;
644
645 vpn = va >> PAGE_SHIFT;
646 hash = hpt_hash(vpn, 0);
647
648 local_hpte.r = pa | mode;
649 local_hpte.v = ((va >> 23) << HPTE_V_AVPN_SHIFT)
650 | HPTE_V_BOLTED | HPTE_V_VALID;
651
652 slot = HvCallHpt_findValid(&rhpte, vpn);
653 if (slot < 0) {
654 /* Must find space in primary group */
655 panic("hash_page: hpte already exists\n");
656 }
657 HvCallHpt_addValidate(slot, 0, &local_hpte);
658}
659
660/*
661 * Bolt the kernel addr space into the HPT
662 */
663static void __init iSeries_bolt_kernel(unsigned long saddr, unsigned long eaddr)
664{
665 unsigned long pa;
666 unsigned long mode_rw = _PAGE_ACCESSED | _PAGE_COHERENT | PP_RWXX;
667 hpte_t hpte;
668
669 for (pa = saddr; pa < eaddr ;pa += PAGE_SIZE) {
670 unsigned long ea = (unsigned long)__va(pa);
671 unsigned long vsid = get_kernel_vsid(ea);
672 unsigned long va = (vsid << 28) | (pa & 0xfffffff);
673 unsigned long vpn = va >> PAGE_SHIFT;
674 unsigned long slot = HvCallHpt_findValid(&hpte, vpn);
675
676 /* Make non-kernel text non-executable */
677 if (!in_kernel_text(ea))
678 mode_rw |= HW_NO_EXEC;
679
680 if (hpte.v & HPTE_V_VALID) {
681 /* HPTE exists, so just bolt it */
682 HvCallHpt_setSwBits(slot, 0x10, 0);
683 /* And make sure the pp bits are correct */
684 HvCallHpt_setPp(slot, PP_RWXX);
685 } else
686 /* No HPTE exists, so create a new bolted one */
687 iSeries_make_pte(va, phys_to_abs(pa), mode_rw);
688 }
689}
690
691/*
692 * Document me.
693 */
694static void __init iSeries_setup_arch(void)
695{
696 unsigned procIx = get_paca()->lppaca.dyn_hv_phys_proc_index;
697
698 /* Add an eye catcher and the systemcfg layout version number */
699 strcpy(systemcfg->eye_catcher, "SYSTEMCFG:PPC64");
700 systemcfg->version.major = SYSTEMCFG_MAJOR;
701 systemcfg->version.minor = SYSTEMCFG_MINOR;
702
703 /* Setup the Lp Event Queue */
704 setup_hvlpevent_queue();
705
706 /* Compute processor frequency */
707 procFreqHz = ((1UL << 34) * 1000000) /
708 xIoHriProcessorVpd[procIx].xProcFreq;
709 procFreqMhz = procFreqHz / 1000000;
710 procFreqMhzHundreths = (procFreqHz / 10000) - (procFreqMhz * 100);
711 ppc_proc_freq = procFreqHz;
712
713 /* Compute time base frequency */
714 tbFreqHz = ((1UL << 32) * 1000000) /
715 xIoHriProcessorVpd[procIx].xTimeBaseFreq;
716 tbFreqMhz = tbFreqHz / 1000000;
717 tbFreqMhzHundreths = (tbFreqHz / 10000) - (tbFreqMhz * 100);
718 ppc_tb_freq = tbFreqHz;
719
720 printk("Max logical processors = %d\n",
721 itVpdAreas.xSlicMaxLogicalProcs);
722 printk("Max physical processors = %d\n",
723 itVpdAreas.xSlicMaxPhysicalProcs);
724 printk("Processor frequency = %lu.%02lu\n", procFreqMhz,
725 procFreqMhzHundreths);
726 printk("Time base frequency = %lu.%02lu\n", tbFreqMhz,
727 tbFreqMhzHundreths);
728 systemcfg->processor = xIoHriProcessorVpd[procIx].xPVR;
729 printk("Processor version = %x\n", systemcfg->processor);
730}
731
732static void iSeries_get_cpuinfo(struct seq_file *m)
733{
734 seq_printf(m, "machine\t\t: 64-bit iSeries Logical Partition\n");
735}
736
737/*
738 * Document me.
739 * and Implement me.
740 */
741static int iSeries_get_irq(struct pt_regs *regs)
742{
743 /* -2 means ignore this interrupt */
744 return -2;
745}
746
747/*
748 * Document me.
749 */
750static void iSeries_restart(char *cmd)
751{
752 mf_reboot();
753}
754
755/*
756 * Document me.
757 */
758static void iSeries_power_off(void)
759{
760 mf_power_off();
761}
762
763/*
764 * Document me.
765 */
766static void iSeries_halt(void)
767{
768 mf_power_off();
769}
770
771/*
772 * void __init iSeries_calibrate_decr()
773 *
774 * Description:
775 * This routine retrieves the internal processor frequency from the VPD,
776 * and sets up the kernel timer decrementer based on that value.
777 *
778 */
779static void __init iSeries_calibrate_decr(void)
780{
781 unsigned long cyclesPerUsec;
782 struct div_result divres;
783
784 /* Compute decrementer (and TB) frequency in cycles/sec */
785 cyclesPerUsec = ppc_tb_freq / 1000000;
786
787 /*
788 * Set the amount to refresh the decrementer by. This
789 * is the number of decrementer ticks it takes for
790 * 1/HZ seconds.
791 */
792 tb_ticks_per_jiffy = ppc_tb_freq / HZ;
793
794#if 0
795 /* TEST CODE FOR ADJTIME */
796 tb_ticks_per_jiffy += tb_ticks_per_jiffy / 5000;
797 /* END OF TEST CODE */
798#endif
799
800 /*
801 * tb_ticks_per_sec = freq; would give better accuracy
802 * but tb_ticks_per_sec = tb_ticks_per_jiffy*HZ; assures
803 * that jiffies (and xtime) will match the time returned
804 * by do_gettimeofday.
805 */
806 tb_ticks_per_sec = tb_ticks_per_jiffy * HZ;
807 tb_ticks_per_usec = cyclesPerUsec;
808 tb_to_us = mulhwu_scale_factor(ppc_tb_freq, 1000000);
809 div128_by_32(1024 * 1024, 0, tb_ticks_per_sec, &divres);
810 tb_to_xs = divres.result_low;
811 setup_default_decr();
812}
813
814static void __init iSeries_progress(char * st, unsigned short code)
815{
816 printk("Progress: [%04x] - %s\n", (unsigned)code, st);
817 if (!piranha_simulator && mf_initialized) {
818 if (code != 0xffff)
819 mf_display_progress(code);
820 else
821 mf_clear_src();
822 }
823}
824
825static void __init iSeries_fixup_klimit(void)
826{
827 /*
828 * Change klimit to take into account any ram disk
829 * that may be included
830 */
831 if (naca.xRamDisk)
832 klimit = KERNELBASE + (u64)naca.xRamDisk +
833 (naca.xRamDiskSize * PAGE_SIZE);
834 else {
835 /*
836 * No ram disk was included - check and see if there
837 * was an embedded system map. Change klimit to take
838 * into account any embedded system map
839 */
840 if (embedded_sysmap_end)
841 klimit = KERNELBASE + ((embedded_sysmap_end + 4095) &
842 0xfffffffffffff000);
843 }
844}
845
846static int __init iSeries_src_init(void)
847{
848 /* clear the progress line */
849 ppc_md.progress(" ", 0xffff);
850 return 0;
851}
852
853late_initcall(iSeries_src_init);
854
855static inline void process_iSeries_events(void)
856{
857 asm volatile ("li 0,0x5555; sc" : : : "r0", "r3");
858}
859
860static void yield_shared_processor(void)
861{
862 unsigned long tb;
863
864 HvCall_setEnabledInterrupts(HvCall_MaskIPI |
865 HvCall_MaskLpEvent |
866 HvCall_MaskLpProd |
867 HvCall_MaskTimeout);
868
869 tb = get_tb();
870 /* Compute future tb value when yield should expire */
871 HvCall_yieldProcessor(HvCall_YieldTimed, tb+tb_ticks_per_jiffy);
872
873 /*
874 * The decrementer stops during the yield. Force a fake decrementer
875 * here and let the timer_interrupt code sort out the actual time.
876 */
877 get_paca()->lppaca.int_dword.fields.decr_int = 1;
878 process_iSeries_events();
879}
880
881static int iseries_shared_idle(void)
882{
883 while (1) {
884 while (!need_resched() && !hvlpevent_is_pending()) {
885 local_irq_disable();
886 ppc64_runlatch_off();
887
888 /* Recheck with irqs off */
889 if (!need_resched() && !hvlpevent_is_pending())
890 yield_shared_processor();
891
892 HMT_medium();
893 local_irq_enable();
894 }
895
896 ppc64_runlatch_on();
897
898 if (hvlpevent_is_pending())
899 process_iSeries_events();
900
901 schedule();
902 }
903
904 return 0;
905}
906
907static int iseries_dedicated_idle(void)
908{
909 long oldval;
910
911 while (1) {
912 oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED);
913
914 if (!oldval) {
915 set_thread_flag(TIF_POLLING_NRFLAG);
916
917 while (!need_resched()) {
918 ppc64_runlatch_off();
919 HMT_low();
920
921 if (hvlpevent_is_pending()) {
922 HMT_medium();
923 ppc64_runlatch_on();
924 process_iSeries_events();
925 }
926 }
927
928 HMT_medium();
929 clear_thread_flag(TIF_POLLING_NRFLAG);
930 } else {
931 set_need_resched();
932 }
933
934 ppc64_runlatch_on();
935 schedule();
936 }
937
938 return 0;
939}
940
941#ifndef CONFIG_PCI
942void __init iSeries_init_IRQ(void) { }
943#endif
944
945void __init iSeries_early_setup(void)
946{
947 iSeries_fixup_klimit();
948
949 ppc_md.setup_arch = iSeries_setup_arch;
950 ppc_md.get_cpuinfo = iSeries_get_cpuinfo;
951 ppc_md.init_IRQ = iSeries_init_IRQ;
952 ppc_md.get_irq = iSeries_get_irq;
953 ppc_md.init_early = iSeries_init_early,
954
955 ppc_md.pcibios_fixup = iSeries_pci_final_fixup;
956
957 ppc_md.restart = iSeries_restart;
958 ppc_md.power_off = iSeries_power_off;
959 ppc_md.halt = iSeries_halt;
960
961 ppc_md.get_boot_time = iSeries_get_boot_time;
962 ppc_md.set_rtc_time = iSeries_set_rtc_time;
963 ppc_md.get_rtc_time = iSeries_get_rtc_time;
964 ppc_md.calibrate_decr = iSeries_calibrate_decr;
965 ppc_md.progress = iSeries_progress;
966
967 /* XXX Implement enable_pmcs for iSeries */
968
969 if (get_paca()->lppaca.shared_proc) {
970 ppc_md.idle_loop = iseries_shared_idle;
971 printk(KERN_INFO "Using shared processor idle loop\n");
972 } else {
973 ppc_md.idle_loop = iseries_dedicated_idle;
974 printk(KERN_INFO "Using dedicated idle loop\n");
975 }
976}
977
diff --git a/arch/ppc64/kernel/iSeries_setup.h b/arch/ppc64/kernel/iSeries_setup.h
deleted file mode 100644
index c6eb29a245ac..000000000000
--- a/arch/ppc64/kernel/iSeries_setup.h
+++ /dev/null
@@ -1,26 +0,0 @@
1/*
2 * Copyright (c) 2000 Mike Corrigan <mikejc@us.ibm.com>
3 * Copyright (c) 1999-2000 Grant Erickson <grant@lcse.umn.edu>
4 *
5 * Module name: as400_setup.h
6 *
7 * Description:
8 * Architecture- / platform-specific boot-time initialization code for
9 * the IBM AS/400 LPAR. Adapted from original code by Grant Erickson and
10 * code by Gary Thomas, Cort Dougan <cort@cs.nmt.edu>, and Dan Malek
11 * <dan@netx4.com>.
12 *
13 * This program is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU General Public License
15 * as published by the Free Software Foundation; either version
16 * 2 of the License, or (at your option) any later version.
17 */
18
19#ifndef __ISERIES_SETUP_H__
20#define __ISERIES_SETUP_H__
21
22extern void iSeries_get_boot_time(struct rtc_time *tm);
23extern int iSeries_set_rtc_time(struct rtc_time *tm);
24extern void iSeries_get_rtc_time(struct rtc_time *tm);
25
26#endif /* __ISERIES_SETUP_H__ */
diff --git a/arch/ppc64/kernel/iSeries_smp.c b/arch/ppc64/kernel/iSeries_smp.c
deleted file mode 100644
index f74386e31638..000000000000
--- a/arch/ppc64/kernel/iSeries_smp.c
+++ /dev/null
@@ -1,149 +0,0 @@
1/*
2 * SMP support for iSeries machines.
3 *
4 * Dave Engebretsen, Peter Bergner, and
5 * Mike Corrigan {engebret|bergner|mikec}@us.ibm.com
6 *
7 * Plus various changes from other IBM teams...
8 *
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License
11 * as published by the Free Software Foundation; either version
12 * 2 of the License, or (at your option) any later version.
13 */
14
15#undef DEBUG
16
17#include <linux/config.h>
18#include <linux/kernel.h>
19#include <linux/module.h>
20#include <linux/sched.h>
21#include <linux/smp.h>
22#include <linux/smp_lock.h>
23#include <linux/interrupt.h>
24#include <linux/kernel_stat.h>
25#include <linux/delay.h>
26#include <linux/init.h>
27#include <linux/spinlock.h>
28#include <linux/cache.h>
29#include <linux/err.h>
30#include <linux/sysdev.h>
31#include <linux/cpu.h>
32
33#include <asm/ptrace.h>
34#include <asm/atomic.h>
35#include <asm/irq.h>
36#include <asm/page.h>
37#include <asm/pgtable.h>
38#include <asm/io.h>
39#include <asm/smp.h>
40#include <asm/paca.h>
41#include <asm/iSeries/HvCall.h>
42#include <asm/time.h>
43#include <asm/ppcdebug.h>
44#include <asm/machdep.h>
45#include <asm/cputable.h>
46#include <asm/system.h>
47
48static unsigned long iSeries_smp_message[NR_CPUS];
49
50void iSeries_smp_message_recv( struct pt_regs * regs )
51{
52 int cpu = smp_processor_id();
53 int msg;
54
55 if ( num_online_cpus() < 2 )
56 return;
57
58 for ( msg = 0; msg < 4; ++msg )
59 if ( test_and_clear_bit( msg, &iSeries_smp_message[cpu] ) )
60 smp_message_recv( msg, regs );
61}
62
63static inline void smp_iSeries_do_message(int cpu, int msg)
64{
65 set_bit(msg, &iSeries_smp_message[cpu]);
66 HvCall_sendIPI(&(paca[cpu]));
67}
68
69static void smp_iSeries_message_pass(int target, int msg)
70{
71 int i;
72
73 if (target < NR_CPUS)
74 smp_iSeries_do_message(target, msg);
75 else {
76 for_each_online_cpu(i) {
77 if (target == MSG_ALL_BUT_SELF
78 && i == smp_processor_id())
79 continue;
80 smp_iSeries_do_message(i, msg);
81 }
82 }
83}
84
85static int smp_iSeries_numProcs(void)
86{
87 unsigned np, i;
88
89 np = 0;
90 for (i=0; i < NR_CPUS; ++i) {
91 if (paca[i].lppaca.dyn_proc_status < 2) {
92 cpu_set(i, cpu_possible_map);
93 cpu_set(i, cpu_present_map);
94 cpu_set(i, cpu_sibling_map[i]);
95 ++np;
96 }
97 }
98 return np;
99}
100
101static int smp_iSeries_probe(void)
102{
103 unsigned i;
104 unsigned np = 0;
105
106 for (i=0; i < NR_CPUS; ++i) {
107 if (paca[i].lppaca.dyn_proc_status < 2) {
108 /*paca[i].active = 1;*/
109 ++np;
110 }
111 }
112
113 return np;
114}
115
116static void smp_iSeries_kick_cpu(int nr)
117{
118 BUG_ON(nr < 0 || nr >= NR_CPUS);
119
120 /* Verify that our partition has a processor nr */
121 if (paca[nr].lppaca.dyn_proc_status >= 2)
122 return;
123
124 /* The processor is currently spinning, waiting
125 * for the cpu_start field to become non-zero
126 * After we set cpu_start, the processor will
127 * continue on to secondary_start in iSeries_head.S
128 */
129 paca[nr].cpu_start = 1;
130}
131
132static void __devinit smp_iSeries_setup_cpu(int nr)
133{
134}
135
136static struct smp_ops_t iSeries_smp_ops = {
137 .message_pass = smp_iSeries_message_pass,
138 .probe = smp_iSeries_probe,
139 .kick_cpu = smp_iSeries_kick_cpu,
140 .setup_cpu = smp_iSeries_setup_cpu,
141};
142
143/* This is called very early. */
144void __init smp_init_iSeries(void)
145{
146 smp_ops = &iSeries_smp_ops;
147 systemcfg->processorCount = smp_iSeries_numProcs();
148}
149
diff --git a/arch/ppc64/kernel/iSeries_vio.c b/arch/ppc64/kernel/iSeries_vio.c
deleted file mode 100644
index 6b754b0c8344..000000000000
--- a/arch/ppc64/kernel/iSeries_vio.c
+++ /dev/null
@@ -1,155 +0,0 @@
1/*
2 * IBM PowerPC iSeries Virtual I/O Infrastructure Support.
3 *
4 * Copyright (c) 2005 Stephen Rothwell, IBM Corp.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11#include <linux/types.h>
12#include <linux/device.h>
13#include <linux/init.h>
14
15#include <asm/vio.h>
16#include <asm/iommu.h>
17#include <asm/abs_addr.h>
18#include <asm/page.h>
19#include <asm/iSeries/vio.h>
20#include <asm/iSeries/HvTypes.h>
21#include <asm/iSeries/HvLpConfig.h>
22#include <asm/iSeries/HvCallXm.h>
23
24struct device *iSeries_vio_dev = &vio_bus_device.dev;
25EXPORT_SYMBOL(iSeries_vio_dev);
26
27static struct iommu_table veth_iommu_table;
28static struct iommu_table vio_iommu_table;
29
30static void __init iommu_vio_init(void)
31{
32 struct iommu_table *t;
33 struct iommu_table_cb cb;
34 unsigned long cbp;
35 unsigned long itc_entries;
36
37 cb.itc_busno = 255; /* Bus 255 is the virtual bus */
38 cb.itc_virtbus = 0xff; /* Ask for virtual bus */
39
40 cbp = virt_to_abs(&cb);
41 HvCallXm_getTceTableParms(cbp);
42
43 itc_entries = cb.itc_size * PAGE_SIZE / sizeof(union tce_entry);
44 veth_iommu_table.it_size = itc_entries / 2;
45 veth_iommu_table.it_busno = cb.itc_busno;
46 veth_iommu_table.it_offset = cb.itc_offset;
47 veth_iommu_table.it_index = cb.itc_index;
48 veth_iommu_table.it_type = TCE_VB;
49 veth_iommu_table.it_blocksize = 1;
50
51 t = iommu_init_table(&veth_iommu_table);
52
53 if (!t)
54 printk("Virtual Bus VETH TCE table failed.\n");
55
56 vio_iommu_table.it_size = itc_entries - veth_iommu_table.it_size;
57 vio_iommu_table.it_busno = cb.itc_busno;
58 vio_iommu_table.it_offset = cb.itc_offset +
59 veth_iommu_table.it_size;
60 vio_iommu_table.it_index = cb.itc_index;
61 vio_iommu_table.it_type = TCE_VB;
62 vio_iommu_table.it_blocksize = 1;
63
64 t = iommu_init_table(&vio_iommu_table);
65
66 if (!t)
67 printk("Virtual Bus VIO TCE table failed.\n");
68}
69
70/**
71 * vio_register_device_iseries: - Register a new iSeries vio device.
72 * @voidev: The device to register.
73 */
74static struct vio_dev *__init vio_register_device_iseries(char *type,
75 uint32_t unit_num)
76{
77 struct vio_dev *viodev;
78
79 /* allocate a vio_dev for this device */
80 viodev = kmalloc(sizeof(struct vio_dev), GFP_KERNEL);
81 if (!viodev)
82 return NULL;
83 memset(viodev, 0, sizeof(struct vio_dev));
84
85 snprintf(viodev->dev.bus_id, BUS_ID_SIZE, "%s%d", type, unit_num);
86
87 viodev->name = viodev->dev.bus_id;
88 viodev->type = type;
89 viodev->unit_address = unit_num;
90 viodev->iommu_table = &vio_iommu_table;
91 if (vio_register_device(viodev) == NULL) {
92 kfree(viodev);
93 return NULL;
94 }
95 return viodev;
96}
97
98void __init probe_bus_iseries(void)
99{
100 HvLpIndexMap vlan_map;
101 struct vio_dev *viodev;
102 int i;
103
104 /* there is only one of each of these */
105 vio_register_device_iseries("viocons", 0);
106 vio_register_device_iseries("vscsi", 0);
107
108 vlan_map = HvLpConfig_getVirtualLanIndexMap();
109 for (i = 0; i < HVMAXARCHITECTEDVIRTUALLANS; i++) {
110 if ((vlan_map & (0x8000 >> i)) == 0)
111 continue;
112 viodev = vio_register_device_iseries("vlan", i);
113 /* veth is special and has it own iommu_table */
114 viodev->iommu_table = &veth_iommu_table;
115 }
116 for (i = 0; i < HVMAXARCHITECTEDVIRTUALDISKS; i++)
117 vio_register_device_iseries("viodasd", i);
118 for (i = 0; i < HVMAXARCHITECTEDVIRTUALCDROMS; i++)
119 vio_register_device_iseries("viocd", i);
120 for (i = 0; i < HVMAXARCHITECTEDVIRTUALTAPES; i++)
121 vio_register_device_iseries("viotape", i);
122}
123
124/**
125 * vio_match_device_iseries: - Tell if a iSeries VIO device matches a
126 * vio_device_id
127 */
128static int vio_match_device_iseries(const struct vio_device_id *id,
129 const struct vio_dev *dev)
130{
131 return strncmp(dev->type, id->type, strlen(id->type)) == 0;
132}
133
134static struct vio_bus_ops vio_bus_ops_iseries = {
135 .match = vio_match_device_iseries,
136};
137
138/**
139 * vio_bus_init_iseries: - Initialize the iSeries virtual IO bus
140 */
141static int __init vio_bus_init_iseries(void)
142{
143 int err;
144
145 err = vio_bus_init(&vio_bus_ops_iseries);
146 if (err == 0) {
147 iommu_vio_init();
148 vio_bus_device.iommu_table = &vio_iommu_table;
149 iSeries_vio_dev = &vio_bus_device.dev;
150 probe_bus_iseries();
151 }
152 return err;
153}
154
155__initcall(vio_bus_init_iseries);
diff --git a/arch/ppc64/kernel/idle_power4.S b/arch/ppc64/kernel/idle_power4.S
deleted file mode 100644
index ca02afe2a795..000000000000
--- a/arch/ppc64/kernel/idle_power4.S
+++ /dev/null
@@ -1,79 +0,0 @@
1/*
2 * This file contains the power_save function for 6xx & 7xxx CPUs
3 * rewritten in assembler
4 *
5 * Warning ! This code assumes that if your machine has a 750fx
6 * it will have PLL 1 set to low speed mode (used during NAP/DOZE).
7 * if this is not the case some additional changes will have to
8 * be done to check a runtime var (a bit like powersave-nap)
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16#include <linux/config.h>
17#include <linux/threads.h>
18#include <asm/processor.h>
19#include <asm/page.h>
20#include <asm/cputable.h>
21#include <asm/thread_info.h>
22#include <asm/ppc_asm.h>
23#include <asm/asm-offsets.h>
24
25#undef DEBUG
26
27 .text
28
29/*
30 * Here is the power_save_6xx function. This could eventually be
31 * split into several functions & changing the function pointer
32 * depending on the various features.
33 */
34_GLOBAL(power4_idle)
35BEGIN_FTR_SECTION
36 blr
37END_FTR_SECTION_IFCLR(CPU_FTR_CAN_NAP)
38 /* We must dynamically check for the NAP feature as it
39 * can be cleared by CPU init after the fixups are done
40 */
41 LOADBASE(r3,cur_cpu_spec)
42 ld r4,cur_cpu_spec@l(r3)
43 ld r4,CPU_SPEC_FEATURES(r4)
44 andi. r0,r4,CPU_FTR_CAN_NAP
45 beqlr
46 /* Now check if user or arch enabled NAP mode */
47 LOADBASE(r3,powersave_nap)
48 lwz r4,powersave_nap@l(r3)
49 cmpwi 0,r4,0
50 beqlr
51
52 /* Clear MSR:EE */
53 mfmsr r7
54 li r4,0
55 ori r4,r4,MSR_EE
56 andc r0,r7,r4
57 mtmsrd r0
58
59 /* Check current_thread_info()->flags */
60 clrrdi r4,r1,THREAD_SHIFT
61 ld r4,TI_FLAGS(r4)
62 andi. r0,r4,_TIF_NEED_RESCHED
63 beq 1f
64 mtmsrd r7 /* out of line this ? */
65 blr
661:
67 /* Go to NAP now */
68BEGIN_FTR_SECTION
69 DSSALL
70 sync
71END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
72 oris r7,r7,MSR_POW@h
73 sync
74 isync
75 mtmsrd r7
76 isync
77 sync
78 blr
79
diff --git a/arch/ppc64/kernel/init_task.c b/arch/ppc64/kernel/init_task.c
deleted file mode 100644
index 941043ae040f..000000000000
--- a/arch/ppc64/kernel/init_task.c
+++ /dev/null
@@ -1,36 +0,0 @@
1#include <linux/mm.h>
2#include <linux/module.h>
3#include <linux/sched.h>
4#include <linux/init.h>
5#include <linux/init_task.h>
6#include <linux/fs.h>
7#include <linux/mqueue.h>
8#include <asm/uaccess.h>
9
10static struct fs_struct init_fs = INIT_FS;
11static struct files_struct init_files = INIT_FILES;
12static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
13static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
14struct mm_struct init_mm = INIT_MM(init_mm);
15
16EXPORT_SYMBOL(init_mm);
17
18/*
19 * Initial thread structure.
20 *
21 * We need to make sure that this is 16384-byte aligned due to the
22 * way process stacks are handled. This is done by having a special
23 * "init_task" linker map entry..
24 */
25union thread_union init_thread_union
26 __attribute__((__section__(".data.init_task"))) =
27 { INIT_THREAD_INFO(init_task) };
28
29/*
30 * Initial task structure.
31 *
32 * All other task structs will be allocated on slabs in fork.c
33 */
34struct task_struct init_task = INIT_TASK(init_task);
35
36EXPORT_SYMBOL(init_task);
diff --git a/arch/ppc64/kernel/kprobes.c b/arch/ppc64/kernel/kprobes.c
index 9c6facc24f70..ed876a5178ae 100644
--- a/arch/ppc64/kernel/kprobes.c
+++ b/arch/ppc64/kernel/kprobes.c
@@ -395,7 +395,6 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
395 if (post_kprobe_handler(args->regs)) 395 if (post_kprobe_handler(args->regs))
396 ret = NOTIFY_STOP; 396 ret = NOTIFY_STOP;
397 break; 397 break;
398 case DIE_GPF:
399 case DIE_PAGE_FAULT: 398 case DIE_PAGE_FAULT:
400 if (kprobe_running() && 399 if (kprobe_running() &&
401 kprobe_fault_handler(args->regs, args->trapnr)) 400 kprobe_fault_handler(args->regs, args->trapnr))
diff --git a/arch/ppc64/kernel/lmb.c b/arch/ppc64/kernel/lmb.c
deleted file mode 100644
index 5adaca2ddc9d..000000000000
--- a/arch/ppc64/kernel/lmb.c
+++ /dev/null
@@ -1,299 +0,0 @@
1/*
2 * Procedures for interfacing to Open Firmware.
3 *
4 * Peter Bergner, IBM Corp. June 2001.
5 * Copyright (C) 2001 Peter Bergner.
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 */
12
13#include <linux/config.h>
14#include <linux/kernel.h>
15#include <linux/init.h>
16#include <linux/bitops.h>
17#include <asm/types.h>
18#include <asm/page.h>
19#include <asm/prom.h>
20#include <asm/lmb.h>
21#include <asm/abs_addr.h>
22
23struct lmb lmb;
24
25#undef DEBUG
26
27void lmb_dump_all(void)
28{
29#ifdef DEBUG
30 unsigned long i;
31
32 udbg_printf("lmb_dump_all:\n");
33 udbg_printf(" memory.cnt = 0x%lx\n",
34 lmb.memory.cnt);
35 udbg_printf(" memory.size = 0x%lx\n",
36 lmb.memory.size);
37 for (i=0; i < lmb.memory.cnt ;i++) {
38 udbg_printf(" memory.region[0x%x].base = 0x%lx\n",
39 i, lmb.memory.region[i].base);
40 udbg_printf(" .size = 0x%lx\n",
41 lmb.memory.region[i].size);
42 }
43
44 udbg_printf("\n reserved.cnt = 0x%lx\n",
45 lmb.reserved.cnt);
46 udbg_printf(" reserved.size = 0x%lx\n",
47 lmb.reserved.size);
48 for (i=0; i < lmb.reserved.cnt ;i++) {
49 udbg_printf(" reserved.region[0x%x].base = 0x%lx\n",
50 i, lmb.reserved.region[i].base);
51 udbg_printf(" .size = 0x%lx\n",
52 lmb.reserved.region[i].size);
53 }
54#endif /* DEBUG */
55}
56
57static unsigned long __init
58lmb_addrs_overlap(unsigned long base1, unsigned long size1,
59 unsigned long base2, unsigned long size2)
60{
61 return ((base1 < (base2+size2)) && (base2 < (base1+size1)));
62}
63
64static long __init
65lmb_addrs_adjacent(unsigned long base1, unsigned long size1,
66 unsigned long base2, unsigned long size2)
67{
68 if (base2 == base1 + size1)
69 return 1;
70 else if (base1 == base2 + size2)
71 return -1;
72
73 return 0;
74}
75
76static long __init
77lmb_regions_adjacent(struct lmb_region *rgn, unsigned long r1, unsigned long r2)
78{
79 unsigned long base1 = rgn->region[r1].base;
80 unsigned long size1 = rgn->region[r1].size;
81 unsigned long base2 = rgn->region[r2].base;
82 unsigned long size2 = rgn->region[r2].size;
83
84 return lmb_addrs_adjacent(base1, size1, base2, size2);
85}
86
87/* Assumption: base addr of region 1 < base addr of region 2 */
88static void __init
89lmb_coalesce_regions(struct lmb_region *rgn, unsigned long r1, unsigned long r2)
90{
91 unsigned long i;
92
93 rgn->region[r1].size += rgn->region[r2].size;
94 for (i=r2; i < rgn->cnt-1; i++) {
95 rgn->region[i].base = rgn->region[i+1].base;
96 rgn->region[i].size = rgn->region[i+1].size;
97 }
98 rgn->cnt--;
99}
100
101/* This routine called with relocation disabled. */
102void __init
103lmb_init(void)
104{
105 /* Create a dummy zero size LMB which will get coalesced away later.
106 * This simplifies the lmb_add() code below...
107 */
108 lmb.memory.region[0].base = 0;
109 lmb.memory.region[0].size = 0;
110 lmb.memory.cnt = 1;
111
112 /* Ditto. */
113 lmb.reserved.region[0].base = 0;
114 lmb.reserved.region[0].size = 0;
115 lmb.reserved.cnt = 1;
116}
117
118/* This routine called with relocation disabled. */
119void __init
120lmb_analyze(void)
121{
122 int i;
123
124 lmb.memory.size = 0;
125
126 for (i = 0; i < lmb.memory.cnt; i++)
127 lmb.memory.size += lmb.memory.region[i].size;
128}
129
130/* This routine called with relocation disabled. */
131static long __init
132lmb_add_region(struct lmb_region *rgn, unsigned long base, unsigned long size)
133{
134 unsigned long i, coalesced = 0;
135 long adjacent;
136
137 /* First try and coalesce this LMB with another. */
138 for (i=0; i < rgn->cnt; i++) {
139 unsigned long rgnbase = rgn->region[i].base;
140 unsigned long rgnsize = rgn->region[i].size;
141
142 adjacent = lmb_addrs_adjacent(base,size,rgnbase,rgnsize);
143 if ( adjacent > 0 ) {
144 rgn->region[i].base -= size;
145 rgn->region[i].size += size;
146 coalesced++;
147 break;
148 }
149 else if ( adjacent < 0 ) {
150 rgn->region[i].size += size;
151 coalesced++;
152 break;
153 }
154 }
155
156 if ((i < rgn->cnt-1) && lmb_regions_adjacent(rgn, i, i+1) ) {
157 lmb_coalesce_regions(rgn, i, i+1);
158 coalesced++;
159 }
160
161 if ( coalesced ) {
162 return coalesced;
163 } else if ( rgn->cnt >= MAX_LMB_REGIONS ) {
164 return -1;
165 }
166
167 /* Couldn't coalesce the LMB, so add it to the sorted table. */
168 for (i=rgn->cnt-1; i >= 0; i--) {
169 if (base < rgn->region[i].base) {
170 rgn->region[i+1].base = rgn->region[i].base;
171 rgn->region[i+1].size = rgn->region[i].size;
172 } else {
173 rgn->region[i+1].base = base;
174 rgn->region[i+1].size = size;
175 break;
176 }
177 }
178 rgn->cnt++;
179
180 return 0;
181}
182
183/* This routine called with relocation disabled. */
184long __init
185lmb_add(unsigned long base, unsigned long size)
186{
187 struct lmb_region *_rgn = &(lmb.memory);
188
189 /* On pSeries LPAR systems, the first LMB is our RMO region. */
190 if ( base == 0 )
191 lmb.rmo_size = size;
192
193 return lmb_add_region(_rgn, base, size);
194
195}
196
197long __init
198lmb_reserve(unsigned long base, unsigned long size)
199{
200 struct lmb_region *_rgn = &(lmb.reserved);
201
202 return lmb_add_region(_rgn, base, size);
203}
204
205long __init
206lmb_overlaps_region(struct lmb_region *rgn, unsigned long base, unsigned long size)
207{
208 unsigned long i;
209
210 for (i=0; i < rgn->cnt; i++) {
211 unsigned long rgnbase = rgn->region[i].base;
212 unsigned long rgnsize = rgn->region[i].size;
213 if ( lmb_addrs_overlap(base,size,rgnbase,rgnsize) ) {
214 break;
215 }
216 }
217
218 return (i < rgn->cnt) ? i : -1;
219}
220
221unsigned long __init
222lmb_alloc(unsigned long size, unsigned long align)
223{
224 return lmb_alloc_base(size, align, LMB_ALLOC_ANYWHERE);
225}
226
227unsigned long __init
228lmb_alloc_base(unsigned long size, unsigned long align, unsigned long max_addr)
229{
230 long i, j;
231 unsigned long base = 0;
232
233 for (i=lmb.memory.cnt-1; i >= 0; i--) {
234 unsigned long lmbbase = lmb.memory.region[i].base;
235 unsigned long lmbsize = lmb.memory.region[i].size;
236
237 if ( max_addr == LMB_ALLOC_ANYWHERE )
238 base = _ALIGN_DOWN(lmbbase+lmbsize-size, align);
239 else if ( lmbbase < max_addr )
240 base = _ALIGN_DOWN(min(lmbbase+lmbsize,max_addr)-size, align);
241 else
242 continue;
243
244 while ( (lmbbase <= base) &&
245 ((j = lmb_overlaps_region(&lmb.reserved,base,size)) >= 0) ) {
246 base = _ALIGN_DOWN(lmb.reserved.region[j].base-size, align);
247 }
248
249 if ( (base != 0) && (lmbbase <= base) )
250 break;
251 }
252
253 if ( i < 0 )
254 return 0;
255
256 lmb_add_region(&lmb.reserved, base, size);
257
258 return base;
259}
260
261/* You must call lmb_analyze() before this. */
262unsigned long __init
263lmb_phys_mem_size(void)
264{
265 return lmb.memory.size;
266}
267
268unsigned long __init
269lmb_end_of_DRAM(void)
270{
271 int idx = lmb.memory.cnt - 1;
272
273 return (lmb.memory.region[idx].base + lmb.memory.region[idx].size);
274}
275
276/*
277 * Truncate the lmb list to memory_limit if it's set
278 * You must call lmb_analyze() after this.
279 */
280void __init lmb_enforce_memory_limit(void)
281{
282 extern unsigned long memory_limit;
283 unsigned long i, limit;
284
285 if (! memory_limit)
286 return;
287
288 limit = memory_limit;
289 for (i = 0; i < lmb.memory.cnt; i++) {
290 if (limit > lmb.memory.region[i].size) {
291 limit -= lmb.memory.region[i].size;
292 continue;
293 }
294
295 lmb.memory.region[i].size = limit;
296 lmb.memory.cnt = i + 1;
297 break;
298 }
299}
diff --git a/arch/ppc64/kernel/lparmap.c b/arch/ppc64/kernel/lparmap.c
deleted file mode 100644
index b81de286df5e..000000000000
--- a/arch/ppc64/kernel/lparmap.c
+++ /dev/null
@@ -1,31 +0,0 @@
1/*
2 * Copyright (C) 2005 Stephen Rothwell IBM Corp.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 */
9#include <asm/mmu.h>
10#include <asm/page.h>
11#include <asm/iSeries/LparMap.h>
12
13const struct LparMap __attribute__((__section__(".text"))) xLparMap = {
14 .xNumberEsids = HvEsidsToMap,
15 .xNumberRanges = HvRangesToMap,
16 .xSegmentTableOffs = STAB0_PAGE,
17
18 .xEsids = {
19 { .xKernelEsid = GET_ESID(KERNELBASE),
20 .xKernelVsid = KERNEL_VSID(KERNELBASE), },
21 { .xKernelEsid = GET_ESID(VMALLOCBASE),
22 .xKernelVsid = KERNEL_VSID(VMALLOCBASE), },
23 },
24
25 .xRanges = {
26 { .xPages = HvPagesToMap,
27 .xOffset = 0,
28 .xVPN = KERNEL_VSID(KERNELBASE) << (SID_SHIFT - PAGE_SHIFT),
29 },
30 },
31};
diff --git a/arch/ppc64/kernel/maple_pci.c b/arch/ppc64/kernel/maple_pci.c
index 1d297e0edfc0..0937649f4961 100644
--- a/arch/ppc64/kernel/maple_pci.c
+++ b/arch/ppc64/kernel/maple_pci.c
@@ -23,8 +23,7 @@
23#include <asm/pci-bridge.h> 23#include <asm/pci-bridge.h>
24#include <asm/machdep.h> 24#include <asm/machdep.h>
25#include <asm/iommu.h> 25#include <asm/iommu.h>
26 26#include <asm/ppc-pci.h>
27#include "pci.h"
28 27
29#ifdef DEBUG 28#ifdef DEBUG
30#define DBG(x...) printk(x) 29#define DBG(x...) printk(x)
diff --git a/arch/ppc64/kernel/maple_setup.c b/arch/ppc64/kernel/maple_setup.c
index fc0567498a3a..22987675f544 100644
--- a/arch/ppc64/kernel/maple_setup.c
+++ b/arch/ppc64/kernel/maple_setup.c
@@ -59,8 +59,8 @@
59#include <asm/time.h> 59#include <asm/time.h>
60#include <asm/of_device.h> 60#include <asm/of_device.h>
61#include <asm/lmb.h> 61#include <asm/lmb.h>
62 62#include <asm/mpic.h>
63#include "mpic.h" 63#include <asm/udbg.h>
64 64
65#ifdef DEBUG 65#ifdef DEBUG
66#define DBG(fmt...) udbg_printf(fmt) 66#define DBG(fmt...) udbg_printf(fmt)
diff --git a/arch/ppc64/kernel/mf.c b/arch/ppc64/kernel/mf.c
deleted file mode 100644
index ef4a338ebd01..000000000000
--- a/arch/ppc64/kernel/mf.c
+++ /dev/null
@@ -1,1281 +0,0 @@
1/*
2 * mf.c
3 * Copyright (C) 2001 Troy D. Armstrong IBM Corporation
4 * Copyright (C) 2004-2005 Stephen Rothwell IBM Corporation
5 *
6 * This modules exists as an interface between a Linux secondary partition
7 * running on an iSeries and the primary partition's Virtual Service
8 * Processor (VSP) object. The VSP has final authority over powering on/off
9 * all partitions in the iSeries. It also provides miscellaneous low-level
10 * machine facility type operations.
11 *
12 *
13 * This program is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation; either version 2 of the License, or
16 * (at your option) any later version.
17 *
18 * This program is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with this program; if not, write to the Free Software
25 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26 */
27
28#include <linux/types.h>
29#include <linux/errno.h>
30#include <linux/kernel.h>
31#include <linux/init.h>
32#include <linux/completion.h>
33#include <linux/delay.h>
34#include <linux/dma-mapping.h>
35#include <linux/bcd.h>
36
37#include <asm/time.h>
38#include <asm/uaccess.h>
39#include <asm/paca.h>
40#include <asm/iSeries/vio.h>
41#include <asm/iSeries/mf.h>
42#include <asm/iSeries/HvLpConfig.h>
43#include <asm/iSeries/ItLpQueue.h>
44
45/*
46 * This is the structure layout for the Machine Facilites LPAR event
47 * flows.
48 */
49struct vsp_cmd_data {
50 u64 token;
51 u16 cmd;
52 HvLpIndex lp_index;
53 u8 result_code;
54 u32 reserved;
55 union {
56 u64 state; /* GetStateOut */
57 u64 ipl_type; /* GetIplTypeOut, Function02SelectIplTypeIn */
58 u64 ipl_mode; /* GetIplModeOut, Function02SelectIplModeIn */
59 u64 page[4]; /* GetSrcHistoryIn */
60 u64 flag; /* GetAutoIplWhenPrimaryIplsOut,
61 SetAutoIplWhenPrimaryIplsIn,
62 WhiteButtonPowerOffIn,
63 Function08FastPowerOffIn,
64 IsSpcnRackPowerIncompleteOut */
65 struct {
66 u64 token;
67 u64 address_type;
68 u64 side;
69 u32 length;
70 u32 offset;
71 } kern; /* SetKernelImageIn, GetKernelImageIn,
72 SetKernelCmdLineIn, GetKernelCmdLineIn */
73 u32 length_out; /* GetKernelImageOut, GetKernelCmdLineOut */
74 u8 reserved[80];
75 } sub_data;
76};
77
78struct vsp_rsp_data {
79 struct completion com;
80 struct vsp_cmd_data *response;
81};
82
83struct alloc_data {
84 u16 size;
85 u16 type;
86 u32 count;
87 u16 reserved1;
88 u8 reserved2;
89 HvLpIndex target_lp;
90};
91
92struct ce_msg_data;
93
94typedef void (*ce_msg_comp_hdlr)(void *token, struct ce_msg_data *vsp_cmd_rsp);
95
96struct ce_msg_comp_data {
97 ce_msg_comp_hdlr handler;
98 void *token;
99};
100
101struct ce_msg_data {
102 u8 ce_msg[12];
103 char reserved[4];
104 struct ce_msg_comp_data *completion;
105};
106
107struct io_mf_lp_event {
108 struct HvLpEvent hp_lp_event;
109 u16 subtype_result_code;
110 u16 reserved1;
111 u32 reserved2;
112 union {
113 struct alloc_data alloc;
114 struct ce_msg_data ce_msg;
115 struct vsp_cmd_data vsp_cmd;
116 } data;
117};
118
119#define subtype_data(a, b, c, d) \
120 (((a) << 24) + ((b) << 16) + ((c) << 8) + (d))
121
122/*
123 * All outgoing event traffic is kept on a FIFO queue. The first
124 * pointer points to the one that is outstanding, and all new
125 * requests get stuck on the end. Also, we keep a certain number of
126 * preallocated pending events so that we can operate very early in
127 * the boot up sequence (before kmalloc is ready).
128 */
129struct pending_event {
130 struct pending_event *next;
131 struct io_mf_lp_event event;
132 MFCompleteHandler hdlr;
133 char dma_data[72];
134 unsigned dma_data_length;
135 unsigned remote_address;
136};
137static spinlock_t pending_event_spinlock;
138static struct pending_event *pending_event_head;
139static struct pending_event *pending_event_tail;
140static struct pending_event *pending_event_avail;
141static struct pending_event pending_event_prealloc[16];
142
143/*
144 * Put a pending event onto the available queue, so it can get reused.
145 * Attention! You must have the pending_event_spinlock before calling!
146 */
147static void free_pending_event(struct pending_event *ev)
148{
149 if (ev != NULL) {
150 ev->next = pending_event_avail;
151 pending_event_avail = ev;
152 }
153}
154
155/*
156 * Enqueue the outbound event onto the stack. If the queue was
157 * empty to begin with, we must also issue it via the Hypervisor
158 * interface. There is a section of code below that will touch
159 * the first stack pointer without the protection of the pending_event_spinlock.
160 * This is OK, because we know that nobody else will be modifying
161 * the first pointer when we do this.
162 */
163static int signal_event(struct pending_event *ev)
164{
165 int rc = 0;
166 unsigned long flags;
167 int go = 1;
168 struct pending_event *ev1;
169 HvLpEvent_Rc hv_rc;
170
171 /* enqueue the event */
172 if (ev != NULL) {
173 ev->next = NULL;
174 spin_lock_irqsave(&pending_event_spinlock, flags);
175 if (pending_event_head == NULL)
176 pending_event_head = ev;
177 else {
178 go = 0;
179 pending_event_tail->next = ev;
180 }
181 pending_event_tail = ev;
182 spin_unlock_irqrestore(&pending_event_spinlock, flags);
183 }
184
185 /* send the event */
186 while (go) {
187 go = 0;
188
189 /* any DMA data to send beforehand? */
190 if (pending_event_head->dma_data_length > 0)
191 HvCallEvent_dmaToSp(pending_event_head->dma_data,
192 pending_event_head->remote_address,
193 pending_event_head->dma_data_length,
194 HvLpDma_Direction_LocalToRemote);
195
196 hv_rc = HvCallEvent_signalLpEvent(
197 &pending_event_head->event.hp_lp_event);
198 if (hv_rc != HvLpEvent_Rc_Good) {
199 printk(KERN_ERR "mf.c: HvCallEvent_signalLpEvent() "
200 "failed with %d\n", (int)hv_rc);
201
202 spin_lock_irqsave(&pending_event_spinlock, flags);
203 ev1 = pending_event_head;
204 pending_event_head = pending_event_head->next;
205 if (pending_event_head != NULL)
206 go = 1;
207 spin_unlock_irqrestore(&pending_event_spinlock, flags);
208
209 if (ev1 == ev)
210 rc = -EIO;
211 else if (ev1->hdlr != NULL)
212 (*ev1->hdlr)((void *)ev1->event.hp_lp_event.xCorrelationToken, -EIO);
213
214 spin_lock_irqsave(&pending_event_spinlock, flags);
215 free_pending_event(ev1);
216 spin_unlock_irqrestore(&pending_event_spinlock, flags);
217 }
218 }
219
220 return rc;
221}
222
223/*
224 * Allocate a new pending_event structure, and initialize it.
225 */
226static struct pending_event *new_pending_event(void)
227{
228 struct pending_event *ev = NULL;
229 HvLpIndex primary_lp = HvLpConfig_getPrimaryLpIndex();
230 unsigned long flags;
231 struct HvLpEvent *hev;
232
233 spin_lock_irqsave(&pending_event_spinlock, flags);
234 if (pending_event_avail != NULL) {
235 ev = pending_event_avail;
236 pending_event_avail = pending_event_avail->next;
237 }
238 spin_unlock_irqrestore(&pending_event_spinlock, flags);
239 if (ev == NULL) {
240 ev = kmalloc(sizeof(struct pending_event), GFP_ATOMIC);
241 if (ev == NULL) {
242 printk(KERN_ERR "mf.c: unable to kmalloc %ld bytes\n",
243 sizeof(struct pending_event));
244 return NULL;
245 }
246 }
247 memset(ev, 0, sizeof(struct pending_event));
248 hev = &ev->event.hp_lp_event;
249 hev->xFlags.xValid = 1;
250 hev->xFlags.xAckType = HvLpEvent_AckType_ImmediateAck;
251 hev->xFlags.xAckInd = HvLpEvent_AckInd_DoAck;
252 hev->xFlags.xFunction = HvLpEvent_Function_Int;
253 hev->xType = HvLpEvent_Type_MachineFac;
254 hev->xSourceLp = HvLpConfig_getLpIndex();
255 hev->xTargetLp = primary_lp;
256 hev->xSizeMinus1 = sizeof(ev->event) - 1;
257 hev->xRc = HvLpEvent_Rc_Good;
258 hev->xSourceInstanceId = HvCallEvent_getSourceLpInstanceId(primary_lp,
259 HvLpEvent_Type_MachineFac);
260 hev->xTargetInstanceId = HvCallEvent_getTargetLpInstanceId(primary_lp,
261 HvLpEvent_Type_MachineFac);
262
263 return ev;
264}
265
266static int signal_vsp_instruction(struct vsp_cmd_data *vsp_cmd)
267{
268 struct pending_event *ev = new_pending_event();
269 int rc;
270 struct vsp_rsp_data response;
271
272 if (ev == NULL)
273 return -ENOMEM;
274
275 init_completion(&response.com);
276 response.response = vsp_cmd;
277 ev->event.hp_lp_event.xSubtype = 6;
278 ev->event.hp_lp_event.x.xSubtypeData =
279 subtype_data('M', 'F', 'V', 'I');
280 ev->event.data.vsp_cmd.token = (u64)&response;
281 ev->event.data.vsp_cmd.cmd = vsp_cmd->cmd;
282 ev->event.data.vsp_cmd.lp_index = HvLpConfig_getLpIndex();
283 ev->event.data.vsp_cmd.result_code = 0xFF;
284 ev->event.data.vsp_cmd.reserved = 0;
285 memcpy(&(ev->event.data.vsp_cmd.sub_data),
286 &(vsp_cmd->sub_data), sizeof(vsp_cmd->sub_data));
287 mb();
288
289 rc = signal_event(ev);
290 if (rc == 0)
291 wait_for_completion(&response.com);
292 return rc;
293}
294
295
296/*
297 * Send a 12-byte CE message to the primary partition VSP object
298 */
299static int signal_ce_msg(char *ce_msg, struct ce_msg_comp_data *completion)
300{
301 struct pending_event *ev = new_pending_event();
302
303 if (ev == NULL)
304 return -ENOMEM;
305
306 ev->event.hp_lp_event.xSubtype = 0;
307 ev->event.hp_lp_event.x.xSubtypeData =
308 subtype_data('M', 'F', 'C', 'E');
309 memcpy(ev->event.data.ce_msg.ce_msg, ce_msg, 12);
310 ev->event.data.ce_msg.completion = completion;
311 return signal_event(ev);
312}
313
314/*
315 * Send a 12-byte CE message (with no data) to the primary partition VSP object
316 */
317static int signal_ce_msg_simple(u8 ce_op, struct ce_msg_comp_data *completion)
318{
319 u8 ce_msg[12];
320
321 memset(ce_msg, 0, sizeof(ce_msg));
322 ce_msg[3] = ce_op;
323 return signal_ce_msg(ce_msg, completion);
324}
325
326/*
327 * Send a 12-byte CE message and DMA data to the primary partition VSP object
328 */
329static int dma_and_signal_ce_msg(char *ce_msg,
330 struct ce_msg_comp_data *completion, void *dma_data,
331 unsigned dma_data_length, unsigned remote_address)
332{
333 struct pending_event *ev = new_pending_event();
334
335 if (ev == NULL)
336 return -ENOMEM;
337
338 ev->event.hp_lp_event.xSubtype = 0;
339 ev->event.hp_lp_event.x.xSubtypeData =
340 subtype_data('M', 'F', 'C', 'E');
341 memcpy(ev->event.data.ce_msg.ce_msg, ce_msg, 12);
342 ev->event.data.ce_msg.completion = completion;
343 memcpy(ev->dma_data, dma_data, dma_data_length);
344 ev->dma_data_length = dma_data_length;
345 ev->remote_address = remote_address;
346 return signal_event(ev);
347}
348
349/*
350 * Initiate a nice (hopefully) shutdown of Linux. We simply are
351 * going to try and send the init process a SIGINT signal. If
352 * this fails (why?), we'll simply force it off in a not-so-nice
353 * manner.
354 */
355static int shutdown(void)
356{
357 int rc = kill_proc(1, SIGINT, 1);
358
359 if (rc) {
360 printk(KERN_ALERT "mf.c: SIGINT to init failed (%d), "
361 "hard shutdown commencing\n", rc);
362 mf_power_off();
363 } else
364 printk(KERN_INFO "mf.c: init has been successfully notified "
365 "to proceed with shutdown\n");
366 return rc;
367}
368
369/*
370 * The primary partition VSP object is sending us a new
371 * event flow. Handle it...
372 */
373static void handle_int(struct io_mf_lp_event *event)
374{
375 struct ce_msg_data *ce_msg_data;
376 struct ce_msg_data *pce_msg_data;
377 unsigned long flags;
378 struct pending_event *pev;
379
380 /* ack the interrupt */
381 event->hp_lp_event.xRc = HvLpEvent_Rc_Good;
382 HvCallEvent_ackLpEvent(&event->hp_lp_event);
383
384 /* process interrupt */
385 switch (event->hp_lp_event.xSubtype) {
386 case 0: /* CE message */
387 ce_msg_data = &event->data.ce_msg;
388 switch (ce_msg_data->ce_msg[3]) {
389 case 0x5B: /* power control notification */
390 if ((ce_msg_data->ce_msg[5] & 0x20) != 0) {
391 printk(KERN_INFO "mf.c: Commencing partition shutdown\n");
392 if (shutdown() == 0)
393 signal_ce_msg_simple(0xDB, NULL);
394 }
395 break;
396 case 0xC0: /* get time */
397 spin_lock_irqsave(&pending_event_spinlock, flags);
398 pev = pending_event_head;
399 if (pev != NULL)
400 pending_event_head = pending_event_head->next;
401 spin_unlock_irqrestore(&pending_event_spinlock, flags);
402 if (pev == NULL)
403 break;
404 pce_msg_data = &pev->event.data.ce_msg;
405 if (pce_msg_data->ce_msg[3] != 0x40)
406 break;
407 if (pce_msg_data->completion != NULL) {
408 ce_msg_comp_hdlr handler =
409 pce_msg_data->completion->handler;
410 void *token = pce_msg_data->completion->token;
411
412 if (handler != NULL)
413 (*handler)(token, ce_msg_data);
414 }
415 spin_lock_irqsave(&pending_event_spinlock, flags);
416 free_pending_event(pev);
417 spin_unlock_irqrestore(&pending_event_spinlock, flags);
418 /* send next waiting event */
419 if (pending_event_head != NULL)
420 signal_event(NULL);
421 break;
422 }
423 break;
424 case 1: /* IT sys shutdown */
425 printk(KERN_INFO "mf.c: Commencing system shutdown\n");
426 shutdown();
427 break;
428 }
429}
430
431/*
432 * The primary partition VSP object is acknowledging the receipt
433 * of a flow we sent to them. If there are other flows queued
434 * up, we must send another one now...
435 */
436static void handle_ack(struct io_mf_lp_event *event)
437{
438 unsigned long flags;
439 struct pending_event *two = NULL;
440 unsigned long free_it = 0;
441 struct ce_msg_data *ce_msg_data;
442 struct ce_msg_data *pce_msg_data;
443 struct vsp_rsp_data *rsp;
444
445 /* handle current event */
446 if (pending_event_head == NULL) {
447 printk(KERN_ERR "mf.c: stack empty for receiving ack\n");
448 return;
449 }
450
451 switch (event->hp_lp_event.xSubtype) {
452 case 0: /* CE msg */
453 ce_msg_data = &event->data.ce_msg;
454 if (ce_msg_data->ce_msg[3] != 0x40) {
455 free_it = 1;
456 break;
457 }
458 if (ce_msg_data->ce_msg[2] == 0)
459 break;
460 free_it = 1;
461 pce_msg_data = &pending_event_head->event.data.ce_msg;
462 if (pce_msg_data->completion != NULL) {
463 ce_msg_comp_hdlr handler =
464 pce_msg_data->completion->handler;
465 void *token = pce_msg_data->completion->token;
466
467 if (handler != NULL)
468 (*handler)(token, ce_msg_data);
469 }
470 break;
471 case 4: /* allocate */
472 case 5: /* deallocate */
473 if (pending_event_head->hdlr != NULL)
474 (*pending_event_head->hdlr)((void *)event->hp_lp_event.xCorrelationToken, event->data.alloc.count);
475 free_it = 1;
476 break;
477 case 6:
478 free_it = 1;
479 rsp = (struct vsp_rsp_data *)event->data.vsp_cmd.token;
480 if (rsp == NULL) {
481 printk(KERN_ERR "mf.c: no rsp\n");
482 break;
483 }
484 if (rsp->response != NULL)
485 memcpy(rsp->response, &event->data.vsp_cmd,
486 sizeof(event->data.vsp_cmd));
487 complete(&rsp->com);
488 break;
489 }
490
491 /* remove from queue */
492 spin_lock_irqsave(&pending_event_spinlock, flags);
493 if ((pending_event_head != NULL) && (free_it == 1)) {
494 struct pending_event *oldHead = pending_event_head;
495
496 pending_event_head = pending_event_head->next;
497 two = pending_event_head;
498 free_pending_event(oldHead);
499 }
500 spin_unlock_irqrestore(&pending_event_spinlock, flags);
501
502 /* send next waiting event */
503 if (two != NULL)
504 signal_event(NULL);
505}
506
507/*
508 * This is the generic event handler we are registering with
509 * the Hypervisor. Ensure the flows are for us, and then
510 * parse it enough to know if it is an interrupt or an
511 * acknowledge.
512 */
513static void hv_handler(struct HvLpEvent *event, struct pt_regs *regs)
514{
515 if ((event != NULL) && (event->xType == HvLpEvent_Type_MachineFac)) {
516 switch(event->xFlags.xFunction) {
517 case HvLpEvent_Function_Ack:
518 handle_ack((struct io_mf_lp_event *)event);
519 break;
520 case HvLpEvent_Function_Int:
521 handle_int((struct io_mf_lp_event *)event);
522 break;
523 default:
524 printk(KERN_ERR "mf.c: non ack/int event received\n");
525 break;
526 }
527 } else
528 printk(KERN_ERR "mf.c: alien event received\n");
529}
530
531/*
532 * Global kernel interface to allocate and seed events into the
533 * Hypervisor.
534 */
535void mf_allocate_lp_events(HvLpIndex target_lp, HvLpEvent_Type type,
536 unsigned size, unsigned count, MFCompleteHandler hdlr,
537 void *user_token)
538{
539 struct pending_event *ev = new_pending_event();
540 int rc;
541
542 if (ev == NULL) {
543 rc = -ENOMEM;
544 } else {
545 ev->event.hp_lp_event.xSubtype = 4;
546 ev->event.hp_lp_event.xCorrelationToken = (u64)user_token;
547 ev->event.hp_lp_event.x.xSubtypeData =
548 subtype_data('M', 'F', 'M', 'A');
549 ev->event.data.alloc.target_lp = target_lp;
550 ev->event.data.alloc.type = type;
551 ev->event.data.alloc.size = size;
552 ev->event.data.alloc.count = count;
553 ev->hdlr = hdlr;
554 rc = signal_event(ev);
555 }
556 if ((rc != 0) && (hdlr != NULL))
557 (*hdlr)(user_token, rc);
558}
559EXPORT_SYMBOL(mf_allocate_lp_events);
560
561/*
562 * Global kernel interface to unseed and deallocate events already in
563 * Hypervisor.
564 */
565void mf_deallocate_lp_events(HvLpIndex target_lp, HvLpEvent_Type type,
566 unsigned count, MFCompleteHandler hdlr, void *user_token)
567{
568 struct pending_event *ev = new_pending_event();
569 int rc;
570
571 if (ev == NULL)
572 rc = -ENOMEM;
573 else {
574 ev->event.hp_lp_event.xSubtype = 5;
575 ev->event.hp_lp_event.xCorrelationToken = (u64)user_token;
576 ev->event.hp_lp_event.x.xSubtypeData =
577 subtype_data('M', 'F', 'M', 'D');
578 ev->event.data.alloc.target_lp = target_lp;
579 ev->event.data.alloc.type = type;
580 ev->event.data.alloc.count = count;
581 ev->hdlr = hdlr;
582 rc = signal_event(ev);
583 }
584 if ((rc != 0) && (hdlr != NULL))
585 (*hdlr)(user_token, rc);
586}
587EXPORT_SYMBOL(mf_deallocate_lp_events);
588
589/*
590 * Global kernel interface to tell the VSP object in the primary
591 * partition to power this partition off.
592 */
593void mf_power_off(void)
594{
595 printk(KERN_INFO "mf.c: Down it goes...\n");
596 signal_ce_msg_simple(0x4d, NULL);
597 for (;;)
598 ;
599}
600
601/*
602 * Global kernel interface to tell the VSP object in the primary
603 * partition to reboot this partition.
604 */
605void mf_reboot(void)
606{
607 printk(KERN_INFO "mf.c: Preparing to bounce...\n");
608 signal_ce_msg_simple(0x4e, NULL);
609 for (;;)
610 ;
611}
612
613/*
614 * Display a single word SRC onto the VSP control panel.
615 */
616void mf_display_src(u32 word)
617{
618 u8 ce[12];
619
620 memset(ce, 0, sizeof(ce));
621 ce[3] = 0x4a;
622 ce[7] = 0x01;
623 ce[8] = word >> 24;
624 ce[9] = word >> 16;
625 ce[10] = word >> 8;
626 ce[11] = word;
627 signal_ce_msg(ce, NULL);
628}
629
630/*
631 * Display a single word SRC of the form "PROGXXXX" on the VSP control panel.
632 */
633void mf_display_progress(u16 value)
634{
635 u8 ce[12];
636 u8 src[72];
637
638 memcpy(ce, "\x00\x00\x04\x4A\x00\x00\x00\x48\x00\x00\x00\x00", 12);
639 memcpy(src, "\x01\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00"
640 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
641 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
642 "\x00\x00\x00\x00PROGxxxx ",
643 72);
644 src[6] = value >> 8;
645 src[7] = value & 255;
646 src[44] = "0123456789ABCDEF"[(value >> 12) & 15];
647 src[45] = "0123456789ABCDEF"[(value >> 8) & 15];
648 src[46] = "0123456789ABCDEF"[(value >> 4) & 15];
649 src[47] = "0123456789ABCDEF"[value & 15];
650 dma_and_signal_ce_msg(ce, NULL, src, sizeof(src), 9 * 64 * 1024);
651}
652
653/*
654 * Clear the VSP control panel. Used to "erase" an SRC that was
655 * previously displayed.
656 */
657void mf_clear_src(void)
658{
659 signal_ce_msg_simple(0x4b, NULL);
660}
661
662/*
663 * Initialization code here.
664 */
665void mf_init(void)
666{
667 int i;
668
669 /* initialize */
670 spin_lock_init(&pending_event_spinlock);
671 for (i = 0;
672 i < sizeof(pending_event_prealloc) / sizeof(*pending_event_prealloc);
673 ++i)
674 free_pending_event(&pending_event_prealloc[i]);
675 HvLpEvent_registerHandler(HvLpEvent_Type_MachineFac, &hv_handler);
676
677 /* virtual continue ack */
678 signal_ce_msg_simple(0x57, NULL);
679
680 /* initialization complete */
681 printk(KERN_NOTICE "mf.c: iSeries Linux LPAR Machine Facilities "
682 "initialized\n");
683}
684
685struct rtc_time_data {
686 struct completion com;
687 struct ce_msg_data ce_msg;
688 int rc;
689};
690
691static void get_rtc_time_complete(void *token, struct ce_msg_data *ce_msg)
692{
693 struct rtc_time_data *rtc = token;
694
695 memcpy(&rtc->ce_msg, ce_msg, sizeof(rtc->ce_msg));
696 rtc->rc = 0;
697 complete(&rtc->com);
698}
699
700static int rtc_set_tm(int rc, u8 *ce_msg, struct rtc_time *tm)
701{
702 tm->tm_wday = 0;
703 tm->tm_yday = 0;
704 tm->tm_isdst = 0;
705 if (rc) {
706 tm->tm_sec = 0;
707 tm->tm_min = 0;
708 tm->tm_hour = 0;
709 tm->tm_mday = 15;
710 tm->tm_mon = 5;
711 tm->tm_year = 52;
712 return rc;
713 }
714
715 if ((ce_msg[2] == 0xa9) ||
716 (ce_msg[2] == 0xaf)) {
717 /* TOD clock is not set */
718 tm->tm_sec = 1;
719 tm->tm_min = 1;
720 tm->tm_hour = 1;
721 tm->tm_mday = 10;
722 tm->tm_mon = 8;
723 tm->tm_year = 71;
724 mf_set_rtc(tm);
725 }
726 {
727 u8 year = ce_msg[5];
728 u8 sec = ce_msg[6];
729 u8 min = ce_msg[7];
730 u8 hour = ce_msg[8];
731 u8 day = ce_msg[10];
732 u8 mon = ce_msg[11];
733
734 BCD_TO_BIN(sec);
735 BCD_TO_BIN(min);
736 BCD_TO_BIN(hour);
737 BCD_TO_BIN(day);
738 BCD_TO_BIN(mon);
739 BCD_TO_BIN(year);
740
741 if (year <= 69)
742 year += 100;
743
744 tm->tm_sec = sec;
745 tm->tm_min = min;
746 tm->tm_hour = hour;
747 tm->tm_mday = day;
748 tm->tm_mon = mon;
749 tm->tm_year = year;
750 }
751
752 return 0;
753}
754
755int mf_get_rtc(struct rtc_time *tm)
756{
757 struct ce_msg_comp_data ce_complete;
758 struct rtc_time_data rtc_data;
759 int rc;
760
761 memset(&ce_complete, 0, sizeof(ce_complete));
762 memset(&rtc_data, 0, sizeof(rtc_data));
763 init_completion(&rtc_data.com);
764 ce_complete.handler = &get_rtc_time_complete;
765 ce_complete.token = &rtc_data;
766 rc = signal_ce_msg_simple(0x40, &ce_complete);
767 if (rc)
768 return rc;
769 wait_for_completion(&rtc_data.com);
770 return rtc_set_tm(rtc_data.rc, rtc_data.ce_msg.ce_msg, tm);
771}
772
773struct boot_rtc_time_data {
774 int busy;
775 struct ce_msg_data ce_msg;
776 int rc;
777};
778
779static void get_boot_rtc_time_complete(void *token, struct ce_msg_data *ce_msg)
780{
781 struct boot_rtc_time_data *rtc = token;
782
783 memcpy(&rtc->ce_msg, ce_msg, sizeof(rtc->ce_msg));
784 rtc->rc = 0;
785 rtc->busy = 0;
786}
787
788int mf_get_boot_rtc(struct rtc_time *tm)
789{
790 struct ce_msg_comp_data ce_complete;
791 struct boot_rtc_time_data rtc_data;
792 int rc;
793
794 memset(&ce_complete, 0, sizeof(ce_complete));
795 memset(&rtc_data, 0, sizeof(rtc_data));
796 rtc_data.busy = 1;
797 ce_complete.handler = &get_boot_rtc_time_complete;
798 ce_complete.token = &rtc_data;
799 rc = signal_ce_msg_simple(0x40, &ce_complete);
800 if (rc)
801 return rc;
802 /* We need to poll here as we are not yet taking interrupts */
803 while (rtc_data.busy) {
804 if (hvlpevent_is_pending())
805 process_hvlpevents(NULL);
806 }
807 return rtc_set_tm(rtc_data.rc, rtc_data.ce_msg.ce_msg, tm);
808}
809
810int mf_set_rtc(struct rtc_time *tm)
811{
812 char ce_time[12];
813 u8 day, mon, hour, min, sec, y1, y2;
814 unsigned year;
815
816 year = 1900 + tm->tm_year;
817 y1 = year / 100;
818 y2 = year % 100;
819
820 sec = tm->tm_sec;
821 min = tm->tm_min;
822 hour = tm->tm_hour;
823 day = tm->tm_mday;
824 mon = tm->tm_mon + 1;
825
826 BIN_TO_BCD(sec);
827 BIN_TO_BCD(min);
828 BIN_TO_BCD(hour);
829 BIN_TO_BCD(mon);
830 BIN_TO_BCD(day);
831 BIN_TO_BCD(y1);
832 BIN_TO_BCD(y2);
833
834 memset(ce_time, 0, sizeof(ce_time));
835 ce_time[3] = 0x41;
836 ce_time[4] = y1;
837 ce_time[5] = y2;
838 ce_time[6] = sec;
839 ce_time[7] = min;
840 ce_time[8] = hour;
841 ce_time[10] = day;
842 ce_time[11] = mon;
843
844 return signal_ce_msg(ce_time, NULL);
845}
846
847#ifdef CONFIG_PROC_FS
848
849static int proc_mf_dump_cmdline(char *page, char **start, off_t off,
850 int count, int *eof, void *data)
851{
852 int len;
853 char *p;
854 struct vsp_cmd_data vsp_cmd;
855 int rc;
856 dma_addr_t dma_addr;
857
858 /* The HV appears to return no more than 256 bytes of command line */
859 if (off >= 256)
860 return 0;
861 if ((off + count) > 256)
862 count = 256 - off;
863
864 dma_addr = dma_map_single(iSeries_vio_dev, page, off + count,
865 DMA_FROM_DEVICE);
866 if (dma_mapping_error(dma_addr))
867 return -ENOMEM;
868 memset(page, 0, off + count);
869 memset(&vsp_cmd, 0, sizeof(vsp_cmd));
870 vsp_cmd.cmd = 33;
871 vsp_cmd.sub_data.kern.token = dma_addr;
872 vsp_cmd.sub_data.kern.address_type = HvLpDma_AddressType_TceIndex;
873 vsp_cmd.sub_data.kern.side = (u64)data;
874 vsp_cmd.sub_data.kern.length = off + count;
875 mb();
876 rc = signal_vsp_instruction(&vsp_cmd);
877 dma_unmap_single(iSeries_vio_dev, dma_addr, off + count,
878 DMA_FROM_DEVICE);
879 if (rc)
880 return rc;
881 if (vsp_cmd.result_code != 0)
882 return -ENOMEM;
883 p = page;
884 len = 0;
885 while (len < (off + count)) {
886 if ((*p == '\0') || (*p == '\n')) {
887 if (*p == '\0')
888 *p = '\n';
889 p++;
890 len++;
891 *eof = 1;
892 break;
893 }
894 p++;
895 len++;
896 }
897
898 if (len < off) {
899 *eof = 1;
900 len = 0;
901 }
902 return len;
903}
904
905#if 0
906static int mf_getVmlinuxChunk(char *buffer, int *size, int offset, u64 side)
907{
908 struct vsp_cmd_data vsp_cmd;
909 int rc;
910 int len = *size;
911 dma_addr_t dma_addr;
912
913 dma_addr = dma_map_single(iSeries_vio_dev, buffer, len,
914 DMA_FROM_DEVICE);
915 memset(buffer, 0, len);
916 memset(&vsp_cmd, 0, sizeof(vsp_cmd));
917 vsp_cmd.cmd = 32;
918 vsp_cmd.sub_data.kern.token = dma_addr;
919 vsp_cmd.sub_data.kern.address_type = HvLpDma_AddressType_TceIndex;
920 vsp_cmd.sub_data.kern.side = side;
921 vsp_cmd.sub_data.kern.offset = offset;
922 vsp_cmd.sub_data.kern.length = len;
923 mb();
924 rc = signal_vsp_instruction(&vsp_cmd);
925 if (rc == 0) {
926 if (vsp_cmd.result_code == 0)
927 *size = vsp_cmd.sub_data.length_out;
928 else
929 rc = -ENOMEM;
930 }
931
932 dma_unmap_single(iSeries_vio_dev, dma_addr, len, DMA_FROM_DEVICE);
933
934 return rc;
935}
936
937static int proc_mf_dump_vmlinux(char *page, char **start, off_t off,
938 int count, int *eof, void *data)
939{
940 int sizeToGet = count;
941
942 if (!capable(CAP_SYS_ADMIN))
943 return -EACCES;
944
945 if (mf_getVmlinuxChunk(page, &sizeToGet, off, (u64)data) == 0) {
946 if (sizeToGet != 0) {
947 *start = page + off;
948 return sizeToGet;
949 }
950 *eof = 1;
951 return 0;
952 }
953 *eof = 1;
954 return 0;
955}
956#endif
957
958static int proc_mf_dump_side(char *page, char **start, off_t off,
959 int count, int *eof, void *data)
960{
961 int len;
962 char mf_current_side = ' ';
963 struct vsp_cmd_data vsp_cmd;
964
965 memset(&vsp_cmd, 0, sizeof(vsp_cmd));
966 vsp_cmd.cmd = 2;
967 vsp_cmd.sub_data.ipl_type = 0;
968 mb();
969
970 if (signal_vsp_instruction(&vsp_cmd) == 0) {
971 if (vsp_cmd.result_code == 0) {
972 switch (vsp_cmd.sub_data.ipl_type) {
973 case 0: mf_current_side = 'A';
974 break;
975 case 1: mf_current_side = 'B';
976 break;
977 case 2: mf_current_side = 'C';
978 break;
979 default: mf_current_side = 'D';
980 break;
981 }
982 }
983 }
984
985 len = sprintf(page, "%c\n", mf_current_side);
986
987 if (len <= (off + count))
988 *eof = 1;
989 *start = page + off;
990 len -= off;
991 if (len > count)
992 len = count;
993 if (len < 0)
994 len = 0;
995 return len;
996}
997
998static int proc_mf_change_side(struct file *file, const char __user *buffer,
999 unsigned long count, void *data)
1000{
1001 char side;
1002 u64 newSide;
1003 struct vsp_cmd_data vsp_cmd;
1004
1005 if (!capable(CAP_SYS_ADMIN))
1006 return -EACCES;
1007
1008 if (count == 0)
1009 return 0;
1010
1011 if (get_user(side, buffer))
1012 return -EFAULT;
1013
1014 switch (side) {
1015 case 'A': newSide = 0;
1016 break;
1017 case 'B': newSide = 1;
1018 break;
1019 case 'C': newSide = 2;
1020 break;
1021 case 'D': newSide = 3;
1022 break;
1023 default:
1024 printk(KERN_ERR "mf_proc.c: proc_mf_change_side: invalid side\n");
1025 return -EINVAL;
1026 }
1027
1028 memset(&vsp_cmd, 0, sizeof(vsp_cmd));
1029 vsp_cmd.sub_data.ipl_type = newSide;
1030 vsp_cmd.cmd = 10;
1031
1032 (void)signal_vsp_instruction(&vsp_cmd);
1033
1034 return count;
1035}
1036
1037#if 0
1038static void mf_getSrcHistory(char *buffer, int size)
1039{
1040 struct IplTypeReturnStuff return_stuff;
1041 struct pending_event *ev = new_pending_event();
1042 int rc = 0;
1043 char *pages[4];
1044
1045 pages[0] = kmalloc(4096, GFP_ATOMIC);
1046 pages[1] = kmalloc(4096, GFP_ATOMIC);
1047 pages[2] = kmalloc(4096, GFP_ATOMIC);
1048 pages[3] = kmalloc(4096, GFP_ATOMIC);
1049 if ((ev == NULL) || (pages[0] == NULL) || (pages[1] == NULL)
1050 || (pages[2] == NULL) || (pages[3] == NULL))
1051 return -ENOMEM;
1052
1053 return_stuff.xType = 0;
1054 return_stuff.xRc = 0;
1055 return_stuff.xDone = 0;
1056 ev->event.hp_lp_event.xSubtype = 6;
1057 ev->event.hp_lp_event.x.xSubtypeData =
1058 subtype_data('M', 'F', 'V', 'I');
1059 ev->event.data.vsp_cmd.xEvent = &return_stuff;
1060 ev->event.data.vsp_cmd.cmd = 4;
1061 ev->event.data.vsp_cmd.lp_index = HvLpConfig_getLpIndex();
1062 ev->event.data.vsp_cmd.result_code = 0xFF;
1063 ev->event.data.vsp_cmd.reserved = 0;
1064 ev->event.data.vsp_cmd.sub_data.page[0] = ISERIES_HV_ADDR(pages[0]);
1065 ev->event.data.vsp_cmd.sub_data.page[1] = ISERIES_HV_ADDR(pages[1]);
1066 ev->event.data.vsp_cmd.sub_data.page[2] = ISERIES_HV_ADDR(pages[2]);
1067 ev->event.data.vsp_cmd.sub_data.page[3] = ISERIES_HV_ADDR(pages[3]);
1068 mb();
1069 if (signal_event(ev) != 0)
1070 return;
1071
1072 while (return_stuff.xDone != 1)
1073 udelay(10);
1074 if (return_stuff.xRc == 0)
1075 memcpy(buffer, pages[0], size);
1076 kfree(pages[0]);
1077 kfree(pages[1]);
1078 kfree(pages[2]);
1079 kfree(pages[3]);
1080}
1081#endif
1082
1083static int proc_mf_dump_src(char *page, char **start, off_t off,
1084 int count, int *eof, void *data)
1085{
1086#if 0
1087 int len;
1088
1089 mf_getSrcHistory(page, count);
1090 len = count;
1091 len -= off;
1092 if (len < count) {
1093 *eof = 1;
1094 if (len <= 0)
1095 return 0;
1096 } else
1097 len = count;
1098 *start = page + off;
1099 return len;
1100#else
1101 return 0;
1102#endif
1103}
1104
1105static int proc_mf_change_src(struct file *file, const char __user *buffer,
1106 unsigned long count, void *data)
1107{
1108 char stkbuf[10];
1109
1110 if (!capable(CAP_SYS_ADMIN))
1111 return -EACCES;
1112
1113 if ((count < 4) && (count != 1)) {
1114 printk(KERN_ERR "mf_proc: invalid src\n");
1115 return -EINVAL;
1116 }
1117
1118 if (count > (sizeof(stkbuf) - 1))
1119 count = sizeof(stkbuf) - 1;
1120 if (copy_from_user(stkbuf, buffer, count))
1121 return -EFAULT;
1122
1123 if ((count == 1) && (*stkbuf == '\0'))
1124 mf_clear_src();
1125 else
1126 mf_display_src(*(u32 *)stkbuf);
1127
1128 return count;
1129}
1130
1131static int proc_mf_change_cmdline(struct file *file, const char __user *buffer,
1132 unsigned long count, void *data)
1133{
1134 struct vsp_cmd_data vsp_cmd;
1135 dma_addr_t dma_addr;
1136 char *page;
1137 int ret = -EACCES;
1138
1139 if (!capable(CAP_SYS_ADMIN))
1140 goto out;
1141
1142 dma_addr = 0;
1143 page = dma_alloc_coherent(iSeries_vio_dev, count, &dma_addr,
1144 GFP_ATOMIC);
1145 ret = -ENOMEM;
1146 if (page == NULL)
1147 goto out;
1148
1149 ret = -EFAULT;
1150 if (copy_from_user(page, buffer, count))
1151 goto out_free;
1152
1153 memset(&vsp_cmd, 0, sizeof(vsp_cmd));
1154 vsp_cmd.cmd = 31;
1155 vsp_cmd.sub_data.kern.token = dma_addr;
1156 vsp_cmd.sub_data.kern.address_type = HvLpDma_AddressType_TceIndex;
1157 vsp_cmd.sub_data.kern.side = (u64)data;
1158 vsp_cmd.sub_data.kern.length = count;
1159 mb();
1160 (void)signal_vsp_instruction(&vsp_cmd);
1161 ret = count;
1162
1163out_free:
1164 dma_free_coherent(iSeries_vio_dev, count, page, dma_addr);
1165out:
1166 return ret;
1167}
1168
1169static ssize_t proc_mf_change_vmlinux(struct file *file,
1170 const char __user *buf,
1171 size_t count, loff_t *ppos)
1172{
1173 struct proc_dir_entry *dp = PDE(file->f_dentry->d_inode);
1174 ssize_t rc;
1175 dma_addr_t dma_addr;
1176 char *page;
1177 struct vsp_cmd_data vsp_cmd;
1178
1179 rc = -EACCES;
1180 if (!capable(CAP_SYS_ADMIN))
1181 goto out;
1182
1183 dma_addr = 0;
1184 page = dma_alloc_coherent(iSeries_vio_dev, count, &dma_addr,
1185 GFP_ATOMIC);
1186 rc = -ENOMEM;
1187 if (page == NULL) {
1188 printk(KERN_ERR "mf.c: couldn't allocate memory to set vmlinux chunk\n");
1189 goto out;
1190 }
1191 rc = -EFAULT;
1192 if (copy_from_user(page, buf, count))
1193 goto out_free;
1194
1195 memset(&vsp_cmd, 0, sizeof(vsp_cmd));
1196 vsp_cmd.cmd = 30;
1197 vsp_cmd.sub_data.kern.token = dma_addr;
1198 vsp_cmd.sub_data.kern.address_type = HvLpDma_AddressType_TceIndex;
1199 vsp_cmd.sub_data.kern.side = (u64)dp->data;
1200 vsp_cmd.sub_data.kern.offset = *ppos;
1201 vsp_cmd.sub_data.kern.length = count;
1202 mb();
1203 rc = signal_vsp_instruction(&vsp_cmd);
1204 if (rc)
1205 goto out_free;
1206 rc = -ENOMEM;
1207 if (vsp_cmd.result_code != 0)
1208 goto out_free;
1209
1210 *ppos += count;
1211 rc = count;
1212out_free:
1213 dma_free_coherent(iSeries_vio_dev, count, page, dma_addr);
1214out:
1215 return rc;
1216}
1217
1218static struct file_operations proc_vmlinux_operations = {
1219 .write = proc_mf_change_vmlinux,
1220};
1221
1222static int __init mf_proc_init(void)
1223{
1224 struct proc_dir_entry *mf_proc_root;
1225 struct proc_dir_entry *ent;
1226 struct proc_dir_entry *mf;
1227 char name[2];
1228 int i;
1229
1230 mf_proc_root = proc_mkdir("iSeries/mf", NULL);
1231 if (!mf_proc_root)
1232 return 1;
1233
1234 name[1] = '\0';
1235 for (i = 0; i < 4; i++) {
1236 name[0] = 'A' + i;
1237 mf = proc_mkdir(name, mf_proc_root);
1238 if (!mf)
1239 return 1;
1240
1241 ent = create_proc_entry("cmdline", S_IFREG|S_IRUSR|S_IWUSR, mf);
1242 if (!ent)
1243 return 1;
1244 ent->nlink = 1;
1245 ent->data = (void *)(long)i;
1246 ent->read_proc = proc_mf_dump_cmdline;
1247 ent->write_proc = proc_mf_change_cmdline;
1248
1249 if (i == 3) /* no vmlinux entry for 'D' */
1250 continue;
1251
1252 ent = create_proc_entry("vmlinux", S_IFREG|S_IWUSR, mf);
1253 if (!ent)
1254 return 1;
1255 ent->nlink = 1;
1256 ent->data = (void *)(long)i;
1257 ent->proc_fops = &proc_vmlinux_operations;
1258 }
1259
1260 ent = create_proc_entry("side", S_IFREG|S_IRUSR|S_IWUSR, mf_proc_root);
1261 if (!ent)
1262 return 1;
1263 ent->nlink = 1;
1264 ent->data = (void *)0;
1265 ent->read_proc = proc_mf_dump_side;
1266 ent->write_proc = proc_mf_change_side;
1267
1268 ent = create_proc_entry("src", S_IFREG|S_IRUSR|S_IWUSR, mf_proc_root);
1269 if (!ent)
1270 return 1;
1271 ent->nlink = 1;
1272 ent->data = (void *)0;
1273 ent->read_proc = proc_mf_dump_src;
1274 ent->write_proc = proc_mf_change_src;
1275
1276 return 0;
1277}
1278
1279__initcall(mf_proc_init);
1280
1281#endif /* CONFIG_PROC_FS */
diff --git a/arch/ppc64/kernel/misc.S b/arch/ppc64/kernel/misc.S
index e7241ad80a08..f9f2131d2fb5 100644
--- a/arch/ppc64/kernel/misc.S
+++ b/arch/ppc64/kernel/misc.S
@@ -64,44 +64,6 @@ _GLOBAL(get_srr1)
64_GLOBAL(get_sp) 64_GLOBAL(get_sp)
65 mr r3,r1 65 mr r3,r1
66 blr 66 blr
67
68#ifdef CONFIG_PPC_ISERIES
69/* unsigned long local_save_flags(void) */
70_GLOBAL(local_get_flags)
71 lbz r3,PACAPROCENABLED(r13)
72 blr
73
74/* unsigned long local_irq_disable(void) */
75_GLOBAL(local_irq_disable)
76 lbz r3,PACAPROCENABLED(r13)
77 li r4,0
78 stb r4,PACAPROCENABLED(r13)
79 blr /* Done */
80
81/* void local_irq_restore(unsigned long flags) */
82_GLOBAL(local_irq_restore)
83 lbz r5,PACAPROCENABLED(r13)
84 /* Check if things are setup the way we want _already_. */
85 cmpw 0,r3,r5
86 beqlr
87 /* are we enabling interrupts? */
88 cmpdi 0,r3,0
89 stb r3,PACAPROCENABLED(r13)
90 beqlr
91 /* Check pending interrupts */
92 /* A decrementer, IPI or PMC interrupt may have occurred
93 * while we were in the hypervisor (which enables) */
94 ld r4,PACALPPACA+LPPACAANYINT(r13)
95 cmpdi r4,0
96 beqlr
97
98 /*
99 * Handle pending interrupts in interrupt context
100 */
101 li r0,0x5555
102 sc
103 blr
104#endif /* CONFIG_PPC_ISERIES */
105 67
106#ifdef CONFIG_IRQSTACKS 68#ifdef CONFIG_IRQSTACKS
107_GLOBAL(call_do_softirq) 69_GLOBAL(call_do_softirq)
@@ -329,7 +291,7 @@ _GLOBAL(__flush_dcache_icache)
329 291
330/* Flush the dcache */ 292/* Flush the dcache */
331 ld r7,PPC64_CACHES@toc(r2) 293 ld r7,PPC64_CACHES@toc(r2)
332 clrrdi r3,r3,12 /* Page align */ 294 clrrdi r3,r3,PAGE_SHIFT /* Page align */
333 lwz r4,DCACHEL1LINESPERPAGE(r7) /* Get # dcache lines per page */ 295 lwz r4,DCACHEL1LINESPERPAGE(r7) /* Get # dcache lines per page */
334 lwz r5,DCACHEL1LINESIZE(r7) /* Get dcache line size */ 296 lwz r5,DCACHEL1LINESIZE(r7) /* Get dcache line size */
335 mr r6,r3 297 mr r6,r3
@@ -778,6 +740,13 @@ _GLOBAL(giveup_altivec)
778_GLOBAL(__setup_cpu_power3) 740_GLOBAL(__setup_cpu_power3)
779 blr 741 blr
780 742
743_GLOBAL(execve)
744 li r0,__NR_execve
745 sc
746 bnslr
747 neg r3,r3
748 blr
749
781/* kexec_wait(phys_cpu) 750/* kexec_wait(phys_cpu)
782 * 751 *
783 * wait for the flag to change, indicating this kernel is going away but 752 * wait for the flag to change, indicating this kernel is going away but
diff --git a/arch/ppc64/kernel/mpic.c b/arch/ppc64/kernel/mpic.c
index cc262a05ddb4..ec22321342ad 100644
--- a/arch/ppc64/kernel/mpic.c
+++ b/arch/ppc64/kernel/mpic.c
@@ -31,8 +31,7 @@
31#include <asm/pgtable.h> 31#include <asm/pgtable.h>
32#include <asm/irq.h> 32#include <asm/irq.h>
33#include <asm/machdep.h> 33#include <asm/machdep.h>
34 34#include <asm/mpic.h>
35#include "mpic.h"
36 35
37#ifdef DEBUG 36#ifdef DEBUG
38#define DBG(fmt...) printk(fmt) 37#define DBG(fmt...) printk(fmt)
diff --git a/arch/ppc64/kernel/mpic.h b/arch/ppc64/kernel/mpic.h
deleted file mode 100644
index ca78a7f10528..000000000000
--- a/arch/ppc64/kernel/mpic.h
+++ /dev/null
@@ -1,273 +0,0 @@
1#include <linux/irq.h>
2
3/*
4 * Global registers
5 */
6
7#define MPIC_GREG_BASE 0x01000
8
9#define MPIC_GREG_FEATURE_0 0x00000
10#define MPIC_GREG_FEATURE_LAST_SRC_MASK 0x07ff0000
11#define MPIC_GREG_FEATURE_LAST_SRC_SHIFT 16
12#define MPIC_GREG_FEATURE_LAST_CPU_MASK 0x00001f00
13#define MPIC_GREG_FEATURE_LAST_CPU_SHIFT 8
14#define MPIC_GREG_FEATURE_VERSION_MASK 0xff
15#define MPIC_GREG_FEATURE_1 0x00010
16#define MPIC_GREG_GLOBAL_CONF_0 0x00020
17#define MPIC_GREG_GCONF_RESET 0x80000000
18#define MPIC_GREG_GCONF_8259_PTHROU_DIS 0x20000000
19#define MPIC_GREG_GCONF_BASE_MASK 0x000fffff
20#define MPIC_GREG_GLOBAL_CONF_1 0x00030
21#define MPIC_GREG_VENDOR_0 0x00040
22#define MPIC_GREG_VENDOR_1 0x00050
23#define MPIC_GREG_VENDOR_2 0x00060
24#define MPIC_GREG_VENDOR_3 0x00070
25#define MPIC_GREG_VENDOR_ID 0x00080
26#define MPIC_GREG_VENDOR_ID_STEPPING_MASK 0x00ff0000
27#define MPIC_GREG_VENDOR_ID_STEPPING_SHIFT 16
28#define MPIC_GREG_VENDOR_ID_DEVICE_ID_MASK 0x0000ff00
29#define MPIC_GREG_VENDOR_ID_DEVICE_ID_SHIFT 8
30#define MPIC_GREG_VENDOR_ID_VENDOR_ID_MASK 0x000000ff
31#define MPIC_GREG_PROCESSOR_INIT 0x00090
32#define MPIC_GREG_IPI_VECTOR_PRI_0 0x000a0
33#define MPIC_GREG_IPI_VECTOR_PRI_1 0x000b0
34#define MPIC_GREG_IPI_VECTOR_PRI_2 0x000c0
35#define MPIC_GREG_IPI_VECTOR_PRI_3 0x000d0
36#define MPIC_GREG_SPURIOUS 0x000e0
37#define MPIC_GREG_TIMER_FREQ 0x000f0
38
39/*
40 *
41 * Timer registers
42 */
43#define MPIC_TIMER_BASE 0x01100
44#define MPIC_TIMER_STRIDE 0x40
45
46#define MPIC_TIMER_CURRENT_CNT 0x00000
47#define MPIC_TIMER_BASE_CNT 0x00010
48#define MPIC_TIMER_VECTOR_PRI 0x00020
49#define MPIC_TIMER_DESTINATION 0x00030
50
51/*
52 * Per-Processor registers
53 */
54
55#define MPIC_CPU_THISBASE 0x00000
56#define MPIC_CPU_BASE 0x20000
57#define MPIC_CPU_STRIDE 0x01000
58
59#define MPIC_CPU_IPI_DISPATCH_0 0x00040
60#define MPIC_CPU_IPI_DISPATCH_1 0x00050
61#define MPIC_CPU_IPI_DISPATCH_2 0x00060
62#define MPIC_CPU_IPI_DISPATCH_3 0x00070
63#define MPIC_CPU_CURRENT_TASK_PRI 0x00080
64#define MPIC_CPU_TASKPRI_MASK 0x0000000f
65#define MPIC_CPU_WHOAMI 0x00090
66#define MPIC_CPU_WHOAMI_MASK 0x0000001f
67#define MPIC_CPU_INTACK 0x000a0
68#define MPIC_CPU_EOI 0x000b0
69
70/*
71 * Per-source registers
72 */
73
74#define MPIC_IRQ_BASE 0x10000
75#define MPIC_IRQ_STRIDE 0x00020
76#define MPIC_IRQ_VECTOR_PRI 0x00000
77#define MPIC_VECPRI_MASK 0x80000000
78#define MPIC_VECPRI_ACTIVITY 0x40000000 /* Read Only */
79#define MPIC_VECPRI_PRIORITY_MASK 0x000f0000
80#define MPIC_VECPRI_PRIORITY_SHIFT 16
81#define MPIC_VECPRI_VECTOR_MASK 0x000007ff
82#define MPIC_VECPRI_POLARITY_POSITIVE 0x00800000
83#define MPIC_VECPRI_POLARITY_NEGATIVE 0x00000000
84#define MPIC_VECPRI_POLARITY_MASK 0x00800000
85#define MPIC_VECPRI_SENSE_LEVEL 0x00400000
86#define MPIC_VECPRI_SENSE_EDGE 0x00000000
87#define MPIC_VECPRI_SENSE_MASK 0x00400000
88#define MPIC_IRQ_DESTINATION 0x00010
89
90#define MPIC_MAX_IRQ_SOURCES 2048
91#define MPIC_MAX_CPUS 32
92#define MPIC_MAX_ISU 32
93
94/*
95 * Special vector numbers (internal use only)
96 */
97#define MPIC_VEC_SPURRIOUS 255
98#define MPIC_VEC_IPI_3 254
99#define MPIC_VEC_IPI_2 253
100#define MPIC_VEC_IPI_1 252
101#define MPIC_VEC_IPI_0 251
102
103/* unused */
104#define MPIC_VEC_TIMER_3 250
105#define MPIC_VEC_TIMER_2 249
106#define MPIC_VEC_TIMER_1 248
107#define MPIC_VEC_TIMER_0 247
108
109/* Type definition of the cascade handler */
110typedef int (*mpic_cascade_t)(struct pt_regs *regs, void *data);
111
112#ifdef CONFIG_MPIC_BROKEN_U3
113/* Fixup table entry */
114struct mpic_irq_fixup
115{
116 u8 __iomem *base;
117 unsigned int irq;
118};
119#endif /* CONFIG_MPIC_BROKEN_U3 */
120
121
122/* The instance data of a given MPIC */
123struct mpic
124{
125 /* The "linux" controller struct */
126 hw_irq_controller hc_irq;
127#ifdef CONFIG_SMP
128 hw_irq_controller hc_ipi;
129#endif
130 const char *name;
131 /* Flags */
132 unsigned int flags;
133 /* How many irq sources in a given ISU */
134 unsigned int isu_size;
135 unsigned int isu_shift;
136 unsigned int isu_mask;
137 /* Offset of irq vector numbers */
138 unsigned int irq_offset;
139 unsigned int irq_count;
140 /* Offset of ipi vector numbers */
141 unsigned int ipi_offset;
142 /* Number of sources */
143 unsigned int num_sources;
144 /* Number of CPUs */
145 unsigned int num_cpus;
146 /* cascade handler */
147 mpic_cascade_t cascade;
148 void *cascade_data;
149 unsigned int cascade_vec;
150 /* senses array */
151 unsigned char *senses;
152 unsigned int senses_count;
153
154#ifdef CONFIG_MPIC_BROKEN_U3
155 /* The fixup table */
156 struct mpic_irq_fixup *fixups;
157 spinlock_t fixup_lock;
158#endif
159
160 /* The various ioremap'ed bases */
161 volatile u32 __iomem *gregs;
162 volatile u32 __iomem *tmregs;
163 volatile u32 __iomem *cpuregs[MPIC_MAX_CPUS];
164 volatile u32 __iomem *isus[MPIC_MAX_ISU];
165
166 /* link */
167 struct mpic *next;
168};
169
170/* This is the primary controller, only that one has IPIs and
171 * has afinity control. A non-primary MPIC always uses CPU0
172 * registers only
173 */
174#define MPIC_PRIMARY 0x00000001
175/* Set this for a big-endian MPIC */
176#define MPIC_BIG_ENDIAN 0x00000002
177/* Broken U3 MPIC */
178#define MPIC_BROKEN_U3 0x00000004
179/* Broken IPI registers (autodetected) */
180#define MPIC_BROKEN_IPI 0x00000008
181/* MPIC wants a reset */
182#define MPIC_WANTS_RESET 0x00000010
183
184/* Allocate the controller structure and setup the linux irq descs
185 * for the range if interrupts passed in. No HW initialization is
186 * actually performed.
187 *
188 * @phys_addr: physial base address of the MPIC
189 * @flags: flags, see constants above
190 * @isu_size: number of interrupts in an ISU. Use 0 to use a
191 * standard ISU-less setup (aka powermac)
192 * @irq_offset: first irq number to assign to this mpic
193 * @irq_count: number of irqs to use with this mpic IRQ sources. Pass 0
194 * to match the number of sources
195 * @ipi_offset: first irq number to assign to this mpic IPI sources,
196 * used only on primary mpic
197 * @senses: array of sense values
198 * @senses_num: number of entries in the array
199 *
200 * Note about the sense array. If none is passed, all interrupts are
201 * setup to be level negative unless MPIC_BROKEN_U3 is set in which
202 * case they are edge positive (and the array is ignored anyway).
203 * The values in the array start at the first source of the MPIC,
204 * that is senses[0] correspond to linux irq "irq_offset".
205 */
206extern struct mpic *mpic_alloc(unsigned long phys_addr,
207 unsigned int flags,
208 unsigned int isu_size,
209 unsigned int irq_offset,
210 unsigned int irq_count,
211 unsigned int ipi_offset,
212 unsigned char *senses,
213 unsigned int senses_num,
214 const char *name);
215
216/* Assign ISUs, to call before mpic_init()
217 *
218 * @mpic: controller structure as returned by mpic_alloc()
219 * @isu_num: ISU number
220 * @phys_addr: physical address of the ISU
221 */
222extern void mpic_assign_isu(struct mpic *mpic, unsigned int isu_num,
223 unsigned long phys_addr);
224
225/* Initialize the controller. After this has been called, none of the above
226 * should be called again for this mpic
227 */
228extern void mpic_init(struct mpic *mpic);
229
230/* Setup a cascade. Currently, only one cascade is supported this
231 * way, though you can always do a normal request_irq() and add
232 * other cascades this way. You should call this _after_ having
233 * added all the ISUs
234 *
235 * @irq_no: "linux" irq number of the cascade (that is offset'ed vector)
236 * @handler: cascade handler function
237 */
238extern void mpic_setup_cascade(unsigned int irq_no, mpic_cascade_t hanlder,
239 void *data);
240
241/*
242 * All of the following functions must only be used after the
243 * ISUs have been assigned and the controller fully initialized
244 * with mpic_init()
245 */
246
247
248/* Change/Read the priority of an interrupt. Default is 8 for irqs and
249 * 10 for IPIs. You can call this on both IPIs and IRQ numbers, but the
250 * IPI number is then the offset'ed (linux irq number mapped to the IPI)
251 */
252extern void mpic_irq_set_priority(unsigned int irq, unsigned int pri);
253extern unsigned int mpic_irq_get_priority(unsigned int irq);
254
255/* Setup a non-boot CPU */
256extern void mpic_setup_this_cpu(void);
257
258/* Clean up for kexec (or cpu offline or ...) */
259extern void mpic_teardown_this_cpu(int secondary);
260
261/* Request IPIs on primary mpic */
262extern void mpic_request_ipis(void);
263
264/* Send an IPI (non offseted number 0..3) */
265extern void mpic_send_ipi(unsigned int ipi_no, unsigned int cpu_mask);
266
267/* Fetch interrupt from a given mpic */
268extern int mpic_get_one_irq(struct mpic *mpic, struct pt_regs *regs);
269/* This one gets to the primary mpic */
270extern int mpic_get_irq(struct pt_regs *regs);
271
272/* global mpic for pSeries */
273extern struct mpic *pSeries_mpic;
diff --git a/arch/ppc64/kernel/pSeries_hvCall.S b/arch/ppc64/kernel/pSeries_hvCall.S
deleted file mode 100644
index 176e8da76466..000000000000
--- a/arch/ppc64/kernel/pSeries_hvCall.S
+++ /dev/null
@@ -1,131 +0,0 @@
1/*
2 * arch/ppc64/kernel/pSeries_hvCall.S
3 *
4 * This file contains the generic code to perform a call to the
5 * pSeries LPAR hypervisor.
6 * NOTE: this file will go away when we move to inline this work.
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13#include <asm/hvcall.h>
14#include <asm/processor.h>
15#include <asm/ppc_asm.h>
16
17#define STK_PARM(i) (48 + ((i)-3)*8)
18
19 .text
20
21/* long plpar_hcall(unsigned long opcode, R3
22 unsigned long arg1, R4
23 unsigned long arg2, R5
24 unsigned long arg3, R6
25 unsigned long arg4, R7
26 unsigned long *out1, R8
27 unsigned long *out2, R9
28 unsigned long *out3); R10
29 */
30_GLOBAL(plpar_hcall)
31 HMT_MEDIUM
32
33 mfcr r0
34
35 std r8,STK_PARM(r8)(r1) /* Save out ptrs */
36 std r9,STK_PARM(r9)(r1)
37 std r10,STK_PARM(r10)(r1)
38
39 stw r0,8(r1)
40
41 HVSC /* invoke the hypervisor */
42
43 lwz r0,8(r1)
44
45 ld r8,STK_PARM(r8)(r1) /* Fetch r4-r6 ret args */
46 ld r9,STK_PARM(r9)(r1)
47 ld r10,STK_PARM(r10)(r1)
48 std r4,0(r8)
49 std r5,0(r9)
50 std r6,0(r10)
51
52 mtcrf 0xff,r0
53 blr /* return r3 = status */
54
55
56/* Simple interface with no output values (other than status) */
57_GLOBAL(plpar_hcall_norets)
58 HMT_MEDIUM
59
60 mfcr r0
61 stw r0,8(r1)
62
63 HVSC /* invoke the hypervisor */
64
65 lwz r0,8(r1)
66 mtcrf 0xff,r0
67 blr /* return r3 = status */
68
69
70/* long plpar_hcall_8arg_2ret(unsigned long opcode, R3
71 unsigned long arg1, R4
72 unsigned long arg2, R5
73 unsigned long arg3, R6
74 unsigned long arg4, R7
75 unsigned long arg5, R8
76 unsigned long arg6, R9
77 unsigned long arg7, R10
78 unsigned long arg8, 112(R1)
79 unsigned long *out1); 120(R1)
80 */
81_GLOBAL(plpar_hcall_8arg_2ret)
82 HMT_MEDIUM
83
84 mfcr r0
85 ld r11,STK_PARM(r11)(r1) /* put arg8 in R11 */
86 stw r0,8(r1)
87
88 HVSC /* invoke the hypervisor */
89
90 lwz r0,8(r1)
91 ld r10,STK_PARM(r12)(r1) /* Fetch r4 ret arg */
92 std r4,0(r10)
93 mtcrf 0xff,r0
94 blr /* return r3 = status */
95
96
97/* long plpar_hcall_4out(unsigned long opcode, R3
98 unsigned long arg1, R4
99 unsigned long arg2, R5
100 unsigned long arg3, R6
101 unsigned long arg4, R7
102 unsigned long *out1, R8
103 unsigned long *out2, R9
104 unsigned long *out3, R10
105 unsigned long *out4); 112(R1)
106 */
107_GLOBAL(plpar_hcall_4out)
108 HMT_MEDIUM
109
110 mfcr r0
111 stw r0,8(r1)
112
113 std r8,STK_PARM(r8)(r1) /* Save out ptrs */
114 std r9,STK_PARM(r9)(r1)
115 std r10,STK_PARM(r10)(r1)
116
117 HVSC /* invoke the hypervisor */
118
119 lwz r0,8(r1)
120
121 ld r8,STK_PARM(r8)(r1) /* Fetch r4-r7 ret args */
122 ld r9,STK_PARM(r9)(r1)
123 ld r10,STK_PARM(r10)(r1)
124 ld r11,STK_PARM(r11)(r1)
125 std r4,0(r8)
126 std r5,0(r9)
127 std r6,0(r10)
128 std r7,0(r11)
129
130 mtcrf 0xff,r0
131 blr /* return r3 = status */
diff --git a/arch/ppc64/kernel/pSeries_iommu.c b/arch/ppc64/kernel/pSeries_iommu.c
deleted file mode 100644
index d17f0108a032..000000000000
--- a/arch/ppc64/kernel/pSeries_iommu.c
+++ /dev/null
@@ -1,590 +0,0 @@
1/*
2 * arch/ppc64/kernel/pSeries_iommu.c
3 *
4 * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation
5 *
6 * Rewrite, cleanup:
7 *
8 * Copyright (C) 2004 Olof Johansson <olof@austin.ibm.com>, IBM Corporation
9 *
10 * Dynamic DMA mapping support, pSeries-specific parts, both SMP and LPAR.
11 *
12 *
13 * This program is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation; either version 2 of the License, or
16 * (at your option) any later version.
17 *
18 * This program is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with this program; if not, write to the Free Software
25 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26 */
27
28#include <linux/config.h>
29#include <linux/init.h>
30#include <linux/types.h>
31#include <linux/slab.h>
32#include <linux/mm.h>
33#include <linux/spinlock.h>
34#include <linux/string.h>
35#include <linux/pci.h>
36#include <linux/dma-mapping.h>
37#include <asm/io.h>
38#include <asm/prom.h>
39#include <asm/rtas.h>
40#include <asm/ppcdebug.h>
41#include <asm/iommu.h>
42#include <asm/pci-bridge.h>
43#include <asm/machdep.h>
44#include <asm/abs_addr.h>
45#include <asm/plpar_wrappers.h>
46#include <asm/pSeries_reconfig.h>
47#include <asm/systemcfg.h>
48#include <asm/firmware.h>
49#include "pci.h"
50
51#define DBG(fmt...)
52
53extern int is_python(struct device_node *);
54
55static void tce_build_pSeries(struct iommu_table *tbl, long index,
56 long npages, unsigned long uaddr,
57 enum dma_data_direction direction)
58{
59 union tce_entry t;
60 union tce_entry *tp;
61
62 t.te_word = 0;
63 t.te_rdwr = 1; // Read allowed
64
65 if (direction != DMA_TO_DEVICE)
66 t.te_pciwr = 1;
67
68 tp = ((union tce_entry *)tbl->it_base) + index;
69
70 while (npages--) {
71 /* can't move this out since we might cross LMB boundary */
72 t.te_rpn = (virt_to_abs(uaddr)) >> PAGE_SHIFT;
73
74 tp->te_word = t.te_word;
75
76 uaddr += PAGE_SIZE;
77 tp++;
78 }
79}
80
81
82static void tce_free_pSeries(struct iommu_table *tbl, long index, long npages)
83{
84 union tce_entry t;
85 union tce_entry *tp;
86
87 t.te_word = 0;
88 tp = ((union tce_entry *)tbl->it_base) + index;
89
90 while (npages--) {
91 tp->te_word = t.te_word;
92
93 tp++;
94 }
95}
96
97
98static void tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum,
99 long npages, unsigned long uaddr,
100 enum dma_data_direction direction)
101{
102 u64 rc;
103 union tce_entry tce;
104
105 tce.te_word = 0;
106 tce.te_rpn = (virt_to_abs(uaddr)) >> PAGE_SHIFT;
107 tce.te_rdwr = 1;
108 if (direction != DMA_TO_DEVICE)
109 tce.te_pciwr = 1;
110
111 while (npages--) {
112 rc = plpar_tce_put((u64)tbl->it_index,
113 (u64)tcenum << 12,
114 tce.te_word );
115
116 if (rc && printk_ratelimit()) {
117 printk("tce_build_pSeriesLP: plpar_tce_put failed. rc=%ld\n", rc);
118 printk("\tindex = 0x%lx\n", (u64)tbl->it_index);
119 printk("\ttcenum = 0x%lx\n", (u64)tcenum);
120 printk("\ttce val = 0x%lx\n", tce.te_word );
121 show_stack(current, (unsigned long *)__get_SP());
122 }
123
124 tcenum++;
125 tce.te_rpn++;
126 }
127}
128
129static DEFINE_PER_CPU(void *, tce_page) = NULL;
130
131static void tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
132 long npages, unsigned long uaddr,
133 enum dma_data_direction direction)
134{
135 u64 rc;
136 union tce_entry tce, *tcep;
137 long l, limit;
138
139 if (npages == 1)
140 return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr,
141 direction);
142
143 tcep = __get_cpu_var(tce_page);
144
145 /* This is safe to do since interrupts are off when we're called
146 * from iommu_alloc{,_sg}()
147 */
148 if (!tcep) {
149 tcep = (void *)__get_free_page(GFP_ATOMIC);
150 /* If allocation fails, fall back to the loop implementation */
151 if (!tcep)
152 return tce_build_pSeriesLP(tbl, tcenum, npages,
153 uaddr, direction);
154 __get_cpu_var(tce_page) = tcep;
155 }
156
157 tce.te_word = 0;
158 tce.te_rpn = (virt_to_abs(uaddr)) >> PAGE_SHIFT;
159 tce.te_rdwr = 1;
160 if (direction != DMA_TO_DEVICE)
161 tce.te_pciwr = 1;
162
163 /* We can map max one pageful of TCEs at a time */
164 do {
165 /*
166 * Set up the page with TCE data, looping through and setting
167 * the values.
168 */
169 limit = min_t(long, npages, PAGE_SIZE/sizeof(union tce_entry));
170
171 for (l = 0; l < limit; l++) {
172 tcep[l] = tce;
173 tce.te_rpn++;
174 }
175
176 rc = plpar_tce_put_indirect((u64)tbl->it_index,
177 (u64)tcenum << 12,
178 (u64)virt_to_abs(tcep),
179 limit);
180
181 npages -= limit;
182 tcenum += limit;
183 } while (npages > 0 && !rc);
184
185 if (rc && printk_ratelimit()) {
186 printk("tce_buildmulti_pSeriesLP: plpar_tce_put failed. rc=%ld\n", rc);
187 printk("\tindex = 0x%lx\n", (u64)tbl->it_index);
188 printk("\tnpages = 0x%lx\n", (u64)npages);
189 printk("\ttce[0] val = 0x%lx\n", tcep[0].te_word);
190 show_stack(current, (unsigned long *)__get_SP());
191 }
192}
193
194static void tce_free_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages)
195{
196 u64 rc;
197 union tce_entry tce;
198
199 tce.te_word = 0;
200
201 while (npages--) {
202 rc = plpar_tce_put((u64)tbl->it_index,
203 (u64)tcenum << 12,
204 tce.te_word);
205
206 if (rc && printk_ratelimit()) {
207 printk("tce_free_pSeriesLP: plpar_tce_put failed. rc=%ld\n", rc);
208 printk("\tindex = 0x%lx\n", (u64)tbl->it_index);
209 printk("\ttcenum = 0x%lx\n", (u64)tcenum);
210 printk("\ttce val = 0x%lx\n", tce.te_word );
211 show_stack(current, (unsigned long *)__get_SP());
212 }
213
214 tcenum++;
215 }
216}
217
218
219static void tce_freemulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages)
220{
221 u64 rc;
222 union tce_entry tce;
223
224 tce.te_word = 0;
225
226 rc = plpar_tce_stuff((u64)tbl->it_index,
227 (u64)tcenum << 12,
228 tce.te_word,
229 npages);
230
231 if (rc && printk_ratelimit()) {
232 printk("tce_freemulti_pSeriesLP: plpar_tce_stuff failed\n");
233 printk("\trc = %ld\n", rc);
234 printk("\tindex = 0x%lx\n", (u64)tbl->it_index);
235 printk("\tnpages = 0x%lx\n", (u64)npages);
236 printk("\ttce val = 0x%lx\n", tce.te_word );
237 show_stack(current, (unsigned long *)__get_SP());
238 }
239}
240
241static void iommu_table_setparms(struct pci_controller *phb,
242 struct device_node *dn,
243 struct iommu_table *tbl)
244{
245 struct device_node *node;
246 unsigned long *basep;
247 unsigned int *sizep;
248
249 node = (struct device_node *)phb->arch_data;
250
251 basep = (unsigned long *)get_property(node, "linux,tce-base", NULL);
252 sizep = (unsigned int *)get_property(node, "linux,tce-size", NULL);
253 if (basep == NULL || sizep == NULL) {
254 printk(KERN_ERR "PCI_DMA: iommu_table_setparms: %s has "
255 "missing tce entries !\n", dn->full_name);
256 return;
257 }
258
259 tbl->it_base = (unsigned long)__va(*basep);
260 memset((void *)tbl->it_base, 0, *sizep);
261
262 tbl->it_busno = phb->bus->number;
263
264 /* Units of tce entries */
265 tbl->it_offset = phb->dma_window_base_cur >> PAGE_SHIFT;
266
267 /* Test if we are going over 2GB of DMA space */
268 if (phb->dma_window_base_cur + phb->dma_window_size > 0x80000000ul) {
269 udbg_printf("PCI_DMA: Unexpected number of IOAs under this PHB.\n");
270 panic("PCI_DMA: Unexpected number of IOAs under this PHB.\n");
271 }
272
273 phb->dma_window_base_cur += phb->dma_window_size;
274
275 /* Set the tce table size - measured in entries */
276 tbl->it_size = phb->dma_window_size >> PAGE_SHIFT;
277
278 tbl->it_index = 0;
279 tbl->it_blocksize = 16;
280 tbl->it_type = TCE_PCI;
281}
282
283/*
284 * iommu_table_setparms_lpar
285 *
286 * Function: On pSeries LPAR systems, return TCE table info, given a pci bus.
287 *
288 * ToDo: properly interpret the ibm,dma-window property. The definition is:
289 * logical-bus-number (1 word)
290 * phys-address (#address-cells words)
291 * size (#cell-size words)
292 *
293 * Currently we hard code these sizes (more or less).
294 */
295static void iommu_table_setparms_lpar(struct pci_controller *phb,
296 struct device_node *dn,
297 struct iommu_table *tbl,
298 unsigned int *dma_window)
299{
300 tbl->it_busno = PCI_DN(dn)->bussubno;
301
302 /* TODO: Parse field size properties properly. */
303 tbl->it_size = (((unsigned long)dma_window[4] << 32) |
304 (unsigned long)dma_window[5]) >> PAGE_SHIFT;
305 tbl->it_offset = (((unsigned long)dma_window[2] << 32) |
306 (unsigned long)dma_window[3]) >> PAGE_SHIFT;
307 tbl->it_base = 0;
308 tbl->it_index = dma_window[0];
309 tbl->it_blocksize = 16;
310 tbl->it_type = TCE_PCI;
311}
312
313static void iommu_bus_setup_pSeries(struct pci_bus *bus)
314{
315 struct device_node *dn;
316 struct iommu_table *tbl;
317 struct device_node *isa_dn, *isa_dn_orig;
318 struct device_node *tmp;
319 struct pci_dn *pci;
320 int children;
321
322 DBG("iommu_bus_setup_pSeries, bus %p, bus->self %p\n", bus, bus->self);
323
324 dn = pci_bus_to_OF_node(bus);
325 pci = PCI_DN(dn);
326
327 if (bus->self) {
328 /* This is not a root bus, any setup will be done for the
329 * device-side of the bridge in iommu_dev_setup_pSeries().
330 */
331 return;
332 }
333
334 /* Check if the ISA bus on the system is under
335 * this PHB.
336 */
337 isa_dn = isa_dn_orig = of_find_node_by_type(NULL, "isa");
338
339 while (isa_dn && isa_dn != dn)
340 isa_dn = isa_dn->parent;
341
342 if (isa_dn_orig)
343 of_node_put(isa_dn_orig);
344
345 /* Count number of direct PCI children of the PHB.
346 * All PCI device nodes have class-code property, so it's
347 * an easy way to find them.
348 */
349 for (children = 0, tmp = dn->child; tmp; tmp = tmp->sibling)
350 if (get_property(tmp, "class-code", NULL))
351 children++;
352
353 DBG("Children: %d\n", children);
354
355 /* Calculate amount of DMA window per slot. Each window must be
356 * a power of two (due to pci_alloc_consistent requirements).
357 *
358 * Keep 256MB aside for PHBs with ISA.
359 */
360
361 if (!isa_dn) {
362 /* No ISA/IDE - just set window size and return */
363 pci->phb->dma_window_size = 0x80000000ul; /* To be divided */
364
365 while (pci->phb->dma_window_size * children > 0x80000000ul)
366 pci->phb->dma_window_size >>= 1;
367 DBG("No ISA/IDE, window size is 0x%lx\n",
368 pci->phb->dma_window_size);
369 pci->phb->dma_window_base_cur = 0;
370
371 return;
372 }
373
374 /* If we have ISA, then we probably have an IDE
375 * controller too. Allocate a 128MB table but
376 * skip the first 128MB to avoid stepping on ISA
377 * space.
378 */
379 pci->phb->dma_window_size = 0x8000000ul;
380 pci->phb->dma_window_base_cur = 0x8000000ul;
381
382 tbl = kmalloc(sizeof(struct iommu_table), GFP_KERNEL);
383
384 iommu_table_setparms(pci->phb, dn, tbl);
385 pci->iommu_table = iommu_init_table(tbl);
386
387 /* Divide the rest (1.75GB) among the children */
388 pci->phb->dma_window_size = 0x80000000ul;
389 while (pci->phb->dma_window_size * children > 0x70000000ul)
390 pci->phb->dma_window_size >>= 1;
391
392 DBG("ISA/IDE, window size is 0x%lx\n", pci->phb->dma_window_size);
393
394}
395
396
397static void iommu_bus_setup_pSeriesLP(struct pci_bus *bus)
398{
399 struct iommu_table *tbl;
400 struct device_node *dn, *pdn;
401 struct pci_dn *ppci;
402 unsigned int *dma_window = NULL;
403
404 DBG("iommu_bus_setup_pSeriesLP, bus %p, bus->self %p\n", bus, bus->self);
405
406 dn = pci_bus_to_OF_node(bus);
407
408 /* Find nearest ibm,dma-window, walking up the device tree */
409 for (pdn = dn; pdn != NULL; pdn = pdn->parent) {
410 dma_window = (unsigned int *)get_property(pdn, "ibm,dma-window", NULL);
411 if (dma_window != NULL)
412 break;
413 }
414
415 if (dma_window == NULL) {
416 DBG("iommu_bus_setup_pSeriesLP: bus %s seems to have no ibm,dma-window property\n", dn->full_name);
417 return;
418 }
419
420 ppci = pdn->data;
421 if (!ppci->iommu_table) {
422 /* Bussubno hasn't been copied yet.
423 * Do it now because iommu_table_setparms_lpar needs it.
424 */
425
426 ppci->bussubno = bus->number;
427
428 tbl = (struct iommu_table *)kmalloc(sizeof(struct iommu_table),
429 GFP_KERNEL);
430
431 iommu_table_setparms_lpar(ppci->phb, pdn, tbl, dma_window);
432
433 ppci->iommu_table = iommu_init_table(tbl);
434 }
435
436 if (pdn != dn)
437 PCI_DN(dn)->iommu_table = ppci->iommu_table;
438}
439
440
441static void iommu_dev_setup_pSeries(struct pci_dev *dev)
442{
443 struct device_node *dn, *mydn;
444 struct iommu_table *tbl;
445
446 DBG("iommu_dev_setup_pSeries, dev %p (%s)\n", dev, pci_name(dev));
447
448 mydn = dn = pci_device_to_OF_node(dev);
449
450 /* If we're the direct child of a root bus, then we need to allocate
451 * an iommu table ourselves. The bus setup code should have setup
452 * the window sizes already.
453 */
454 if (!dev->bus->self) {
455 DBG(" --> first child, no bridge. Allocating iommu table.\n");
456 tbl = kmalloc(sizeof(struct iommu_table), GFP_KERNEL);
457 iommu_table_setparms(PCI_DN(dn)->phb, dn, tbl);
458 PCI_DN(mydn)->iommu_table = iommu_init_table(tbl);
459
460 return;
461 }
462
463 /* If this device is further down the bus tree, search upwards until
464 * an already allocated iommu table is found and use that.
465 */
466
467 while (dn && dn->data && PCI_DN(dn)->iommu_table == NULL)
468 dn = dn->parent;
469
470 if (dn && dn->data) {
471 PCI_DN(mydn)->iommu_table = PCI_DN(dn)->iommu_table;
472 } else {
473 DBG("iommu_dev_setup_pSeries, dev %p (%s) has no iommu table\n", dev, pci_name(dev));
474 }
475}
476
477static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned long action, void *node)
478{
479 int err = NOTIFY_OK;
480 struct device_node *np = node;
481 struct pci_dn *pci = np->data;
482
483 switch (action) {
484 case PSERIES_RECONFIG_REMOVE:
485 if (pci->iommu_table &&
486 get_property(np, "ibm,dma-window", NULL))
487 iommu_free_table(np);
488 break;
489 default:
490 err = NOTIFY_DONE;
491 break;
492 }
493 return err;
494}
495
496static struct notifier_block iommu_reconfig_nb = {
497 .notifier_call = iommu_reconfig_notifier,
498};
499
500static void iommu_dev_setup_pSeriesLP(struct pci_dev *dev)
501{
502 struct device_node *pdn, *dn;
503 struct iommu_table *tbl;
504 int *dma_window = NULL;
505 struct pci_dn *pci;
506
507 DBG("iommu_dev_setup_pSeriesLP, dev %p (%s)\n", dev, pci_name(dev));
508
509 /* dev setup for LPAR is a little tricky, since the device tree might
510 * contain the dma-window properties per-device and not neccesarily
511 * for the bus. So we need to search upwards in the tree until we
512 * either hit a dma-window property, OR find a parent with a table
513 * already allocated.
514 */
515 dn = pci_device_to_OF_node(dev);
516
517 for (pdn = dn; pdn && pdn->data && !PCI_DN(pdn)->iommu_table;
518 pdn = pdn->parent) {
519 dma_window = (unsigned int *)
520 get_property(pdn, "ibm,dma-window", NULL);
521 if (dma_window)
522 break;
523 }
524
525 /* Check for parent == NULL so we don't try to setup the empty EADS
526 * slots on POWER4 machines.
527 */
528 if (dma_window == NULL || pdn->parent == NULL) {
529 DBG("No dma window for device, linking to parent\n");
530 PCI_DN(dn)->iommu_table = PCI_DN(pdn)->iommu_table;
531 return;
532 } else {
533 DBG("Found DMA window, allocating table\n");
534 }
535
536 pci = pdn->data;
537 if (!pci->iommu_table) {
538 /* iommu_table_setparms_lpar needs bussubno. */
539 pci->bussubno = pci->phb->bus->number;
540
541 tbl = (struct iommu_table *)kmalloc(sizeof(struct iommu_table),
542 GFP_KERNEL);
543
544 iommu_table_setparms_lpar(pci->phb, pdn, tbl, dma_window);
545
546 pci->iommu_table = iommu_init_table(tbl);
547 }
548
549 if (pdn != dn)
550 PCI_DN(dn)->iommu_table = pci->iommu_table;
551}
552
553static void iommu_bus_setup_null(struct pci_bus *b) { }
554static void iommu_dev_setup_null(struct pci_dev *d) { }
555
556/* These are called very early. */
557void iommu_init_early_pSeries(void)
558{
559 if (of_chosen && get_property(of_chosen, "linux,iommu-off", NULL)) {
560 /* Direct I/O, IOMMU off */
561 ppc_md.iommu_dev_setup = iommu_dev_setup_null;
562 ppc_md.iommu_bus_setup = iommu_bus_setup_null;
563 pci_direct_iommu_init();
564
565 return;
566 }
567
568 if (systemcfg->platform & PLATFORM_LPAR) {
569 if (firmware_has_feature(FW_FEATURE_MULTITCE)) {
570 ppc_md.tce_build = tce_buildmulti_pSeriesLP;
571 ppc_md.tce_free = tce_freemulti_pSeriesLP;
572 } else {
573 ppc_md.tce_build = tce_build_pSeriesLP;
574 ppc_md.tce_free = tce_free_pSeriesLP;
575 }
576 ppc_md.iommu_bus_setup = iommu_bus_setup_pSeriesLP;
577 ppc_md.iommu_dev_setup = iommu_dev_setup_pSeriesLP;
578 } else {
579 ppc_md.tce_build = tce_build_pSeries;
580 ppc_md.tce_free = tce_free_pSeries;
581 ppc_md.iommu_bus_setup = iommu_bus_setup_pSeries;
582 ppc_md.iommu_dev_setup = iommu_dev_setup_pSeries;
583 }
584
585
586 pSeries_reconfig_notifier_register(&iommu_reconfig_nb);
587
588 pci_iommu_init();
589}
590
diff --git a/arch/ppc64/kernel/pSeries_lpar.c b/arch/ppc64/kernel/pSeries_lpar.c
deleted file mode 100644
index a6de83f2078f..000000000000
--- a/arch/ppc64/kernel/pSeries_lpar.c
+++ /dev/null
@@ -1,518 +0,0 @@
1/*
2 * pSeries_lpar.c
3 * Copyright (C) 2001 Todd Inglett, IBM Corporation
4 *
5 * pSeries LPAR support.
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */
21
22#define DEBUG
23
24#include <linux/config.h>
25#include <linux/kernel.h>
26#include <linux/dma-mapping.h>
27#include <asm/processor.h>
28#include <asm/mmu.h>
29#include <asm/page.h>
30#include <asm/pgtable.h>
31#include <asm/machdep.h>
32#include <asm/abs_addr.h>
33#include <asm/mmu_context.h>
34#include <asm/ppcdebug.h>
35#include <asm/iommu.h>
36#include <asm/tlbflush.h>
37#include <asm/tlb.h>
38#include <asm/prom.h>
39#include <asm/abs_addr.h>
40#include <asm/cputable.h>
41#include <asm/plpar_wrappers.h>
42
43#ifdef DEBUG
44#define DBG(fmt...) udbg_printf(fmt)
45#else
46#define DBG(fmt...)
47#endif
48
49/* in pSeries_hvCall.S */
50EXPORT_SYMBOL(plpar_hcall);
51EXPORT_SYMBOL(plpar_hcall_4out);
52EXPORT_SYMBOL(plpar_hcall_norets);
53EXPORT_SYMBOL(plpar_hcall_8arg_2ret);
54
55extern void pSeries_find_serial_port(void);
56
57
58int vtermno; /* virtual terminal# for udbg */
59
60#define __ALIGNED__ __attribute__((__aligned__(sizeof(long))))
61static void udbg_hvsi_putc(unsigned char c)
62{
63 /* packet's seqno isn't used anyways */
64 uint8_t packet[] __ALIGNED__ = { 0xff, 5, 0, 0, c };
65 int rc;
66
67 if (c == '\n')
68 udbg_hvsi_putc('\r');
69
70 do {
71 rc = plpar_put_term_char(vtermno, sizeof(packet), packet);
72 } while (rc == H_Busy);
73}
74
75static long hvsi_udbg_buf_len;
76static uint8_t hvsi_udbg_buf[256];
77
78static int udbg_hvsi_getc_poll(void)
79{
80 unsigned char ch;
81 int rc, i;
82
83 if (hvsi_udbg_buf_len == 0) {
84 rc = plpar_get_term_char(vtermno, &hvsi_udbg_buf_len, hvsi_udbg_buf);
85 if (rc != H_Success || hvsi_udbg_buf[0] != 0xff) {
86 /* bad read or non-data packet */
87 hvsi_udbg_buf_len = 0;
88 } else {
89 /* remove the packet header */
90 for (i = 4; i < hvsi_udbg_buf_len; i++)
91 hvsi_udbg_buf[i-4] = hvsi_udbg_buf[i];
92 hvsi_udbg_buf_len -= 4;
93 }
94 }
95
96 if (hvsi_udbg_buf_len <= 0 || hvsi_udbg_buf_len > 256) {
97 /* no data ready */
98 hvsi_udbg_buf_len = 0;
99 return -1;
100 }
101
102 ch = hvsi_udbg_buf[0];
103 /* shift remaining data down */
104 for (i = 1; i < hvsi_udbg_buf_len; i++) {
105 hvsi_udbg_buf[i-1] = hvsi_udbg_buf[i];
106 }
107 hvsi_udbg_buf_len--;
108
109 return ch;
110}
111
112static unsigned char udbg_hvsi_getc(void)
113{
114 int ch;
115 for (;;) {
116 ch = udbg_hvsi_getc_poll();
117 if (ch == -1) {
118 /* This shouldn't be needed...but... */
119 volatile unsigned long delay;
120 for (delay=0; delay < 2000000; delay++)
121 ;
122 } else {
123 return ch;
124 }
125 }
126}
127
128static void udbg_putcLP(unsigned char c)
129{
130 char buf[16];
131 unsigned long rc;
132
133 if (c == '\n')
134 udbg_putcLP('\r');
135
136 buf[0] = c;
137 do {
138 rc = plpar_put_term_char(vtermno, 1, buf);
139 } while(rc == H_Busy);
140}
141
142/* Buffered chars getc */
143static long inbuflen;
144static long inbuf[2]; /* must be 2 longs */
145
146static int udbg_getc_pollLP(void)
147{
148 /* The interface is tricky because it may return up to 16 chars.
149 * We save them statically for future calls to udbg_getc().
150 */
151 char ch, *buf = (char *)inbuf;
152 int i;
153 long rc;
154 if (inbuflen == 0) {
155 /* get some more chars. */
156 inbuflen = 0;
157 rc = plpar_get_term_char(vtermno, &inbuflen, buf);
158 if (rc != H_Success)
159 inbuflen = 0; /* otherwise inbuflen is garbage */
160 }
161 if (inbuflen <= 0 || inbuflen > 16) {
162 /* Catch error case as well as other oddities (corruption) */
163 inbuflen = 0;
164 return -1;
165 }
166 ch = buf[0];
167 for (i = 1; i < inbuflen; i++) /* shuffle them down. */
168 buf[i-1] = buf[i];
169 inbuflen--;
170 return ch;
171}
172
173static unsigned char udbg_getcLP(void)
174{
175 int ch;
176 for (;;) {
177 ch = udbg_getc_pollLP();
178 if (ch == -1) {
179 /* This shouldn't be needed...but... */
180 volatile unsigned long delay;
181 for (delay=0; delay < 2000000; delay++)
182 ;
183 } else {
184 return ch;
185 }
186 }
187}
188
189/* call this from early_init() for a working debug console on
190 * vterm capable LPAR machines
191 */
192void udbg_init_debug_lpar(void)
193{
194 vtermno = 0;
195 udbg_putc = udbg_putcLP;
196 udbg_getc = udbg_getcLP;
197 udbg_getc_poll = udbg_getc_pollLP;
198}
199
200/* returns 0 if couldn't find or use /chosen/stdout as console */
201int find_udbg_vterm(void)
202{
203 struct device_node *stdout_node;
204 u32 *termno;
205 char *name;
206 int found = 0;
207
208 /* find the boot console from /chosen/stdout */
209 if (!of_chosen)
210 return 0;
211 name = (char *)get_property(of_chosen, "linux,stdout-path", NULL);
212 if (name == NULL)
213 return 0;
214 stdout_node = of_find_node_by_path(name);
215 if (!stdout_node)
216 return 0;
217
218 /* now we have the stdout node; figure out what type of device it is. */
219 name = (char *)get_property(stdout_node, "name", NULL);
220 if (!name) {
221 printk(KERN_WARNING "stdout node missing 'name' property!\n");
222 goto out;
223 }
224
225 if (strncmp(name, "vty", 3) == 0) {
226 if (device_is_compatible(stdout_node, "hvterm1")) {
227 termno = (u32 *)get_property(stdout_node, "reg", NULL);
228 if (termno) {
229 vtermno = termno[0];
230 udbg_putc = udbg_putcLP;
231 udbg_getc = udbg_getcLP;
232 udbg_getc_poll = udbg_getc_pollLP;
233 found = 1;
234 }
235 } else if (device_is_compatible(stdout_node, "hvterm-protocol")) {
236 termno = (u32 *)get_property(stdout_node, "reg", NULL);
237 if (termno) {
238 vtermno = termno[0];
239 udbg_putc = udbg_hvsi_putc;
240 udbg_getc = udbg_hvsi_getc;
241 udbg_getc_poll = udbg_hvsi_getc_poll;
242 found = 1;
243 }
244 }
245 } else if (strncmp(name, "serial", 6)) {
246 /* XXX fix ISA serial console */
247 printk(KERN_WARNING "serial stdout on LPAR ('%s')! "
248 "can't print udbg messages\n",
249 stdout_node->full_name);
250 } else {
251 printk(KERN_WARNING "don't know how to print to stdout '%s'\n",
252 stdout_node->full_name);
253 }
254
255out:
256 of_node_put(stdout_node);
257 return found;
258}
259
260void vpa_init(int cpu)
261{
262 int hwcpu = get_hard_smp_processor_id(cpu);
263 unsigned long vpa = (unsigned long)&(paca[cpu].lppaca);
264 long ret;
265 unsigned long flags;
266
267 /* Register the Virtual Processor Area (VPA) */
268 flags = 1UL << (63 - 18);
269
270 if (cpu_has_feature(CPU_FTR_ALTIVEC))
271 paca[cpu].lppaca.vmxregs_in_use = 1;
272
273 ret = register_vpa(flags, hwcpu, __pa(vpa));
274
275 if (ret)
276 printk(KERN_ERR "WARNING: vpa_init: VPA registration for "
277 "cpu %d (hw %d) of area %lx returns %ld\n",
278 cpu, hwcpu, __pa(vpa), ret);
279}
280
281long pSeries_lpar_hpte_insert(unsigned long hpte_group,
282 unsigned long va, unsigned long prpn,
283 unsigned long vflags, unsigned long rflags)
284{
285 unsigned long lpar_rc;
286 unsigned long flags;
287 unsigned long slot;
288 unsigned long hpte_v, hpte_r;
289 unsigned long dummy0, dummy1;
290
291 hpte_v = ((va >> 23) << HPTE_V_AVPN_SHIFT) | vflags | HPTE_V_VALID;
292 if (vflags & HPTE_V_LARGE)
293 hpte_v &= ~(1UL << HPTE_V_AVPN_SHIFT);
294
295 hpte_r = (prpn << HPTE_R_RPN_SHIFT) | rflags;
296
297 /* Now fill in the actual HPTE */
298 /* Set CEC cookie to 0 */
299 /* Zero page = 0 */
300 /* I-cache Invalidate = 0 */
301 /* I-cache synchronize = 0 */
302 /* Exact = 0 */
303 flags = 0;
304
305 /* XXX why is this here? - Anton */
306 if (rflags & (_PAGE_GUARDED|_PAGE_NO_CACHE))
307 hpte_r &= ~_PAGE_COHERENT;
308
309 lpar_rc = plpar_hcall(H_ENTER, flags, hpte_group, hpte_v,
310 hpte_r, &slot, &dummy0, &dummy1);
311
312 if (unlikely(lpar_rc == H_PTEG_Full))
313 return -1;
314
315 /*
316 * Since we try and ioremap PHBs we don't own, the pte insert
317 * will fail. However we must catch the failure in hash_page
318 * or we will loop forever, so return -2 in this case.
319 */
320 if (unlikely(lpar_rc != H_Success))
321 return -2;
322
323 /* Because of iSeries, we have to pass down the secondary
324 * bucket bit here as well
325 */
326 return (slot & 7) | (!!(vflags & HPTE_V_SECONDARY) << 3);
327}
328
329static DEFINE_SPINLOCK(pSeries_lpar_tlbie_lock);
330
331static long pSeries_lpar_hpte_remove(unsigned long hpte_group)
332{
333 unsigned long slot_offset;
334 unsigned long lpar_rc;
335 int i;
336 unsigned long dummy1, dummy2;
337
338 /* pick a random slot to start at */
339 slot_offset = mftb() & 0x7;
340
341 for (i = 0; i < HPTES_PER_GROUP; i++) {
342
343 /* don't remove a bolted entry */
344 lpar_rc = plpar_pte_remove(H_ANDCOND, hpte_group + slot_offset,
345 (0x1UL << 4), &dummy1, &dummy2);
346
347 if (lpar_rc == H_Success)
348 return i;
349
350 BUG_ON(lpar_rc != H_Not_Found);
351
352 slot_offset++;
353 slot_offset &= 0x7;
354 }
355
356 return -1;
357}
358
359static void pSeries_lpar_hptab_clear(void)
360{
361 unsigned long size_bytes = 1UL << ppc64_pft_size;
362 unsigned long hpte_count = size_bytes >> 4;
363 unsigned long dummy1, dummy2;
364 int i;
365
366 /* TODO: Use bulk call */
367 for (i = 0; i < hpte_count; i++)
368 plpar_pte_remove(0, i, 0, &dummy1, &dummy2);
369}
370
371/*
372 * NOTE: for updatepp ops we are fortunate that the linux "newpp" bits and
373 * the low 3 bits of flags happen to line up. So no transform is needed.
374 * We can probably optimize here and assume the high bits of newpp are
375 * already zero. For now I am paranoid.
376 */
377static long pSeries_lpar_hpte_updatepp(unsigned long slot, unsigned long newpp,
378 unsigned long va, int large, int local)
379{
380 unsigned long lpar_rc;
381 unsigned long flags = (newpp & 7) | H_AVPN;
382 unsigned long avpn = va >> 23;
383
384 if (large)
385 avpn &= ~0x1UL;
386
387 lpar_rc = plpar_pte_protect(flags, slot, (avpn << 7));
388
389 if (lpar_rc == H_Not_Found)
390 return -1;
391
392 BUG_ON(lpar_rc != H_Success);
393
394 return 0;
395}
396
397static unsigned long pSeries_lpar_hpte_getword0(unsigned long slot)
398{
399 unsigned long dword0;
400 unsigned long lpar_rc;
401 unsigned long dummy_word1;
402 unsigned long flags;
403
404 /* Read 1 pte at a time */
405 /* Do not need RPN to logical page translation */
406 /* No cross CEC PFT access */
407 flags = 0;
408
409 lpar_rc = plpar_pte_read(flags, slot, &dword0, &dummy_word1);
410
411 BUG_ON(lpar_rc != H_Success);
412
413 return dword0;
414}
415
416static long pSeries_lpar_hpte_find(unsigned long vpn)
417{
418 unsigned long hash;
419 unsigned long i, j;
420 long slot;
421 unsigned long hpte_v;
422
423 hash = hpt_hash(vpn, 0);
424
425 for (j = 0; j < 2; j++) {
426 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
427 for (i = 0; i < HPTES_PER_GROUP; i++) {
428 hpte_v = pSeries_lpar_hpte_getword0(slot);
429
430 if ((HPTE_V_AVPN_VAL(hpte_v) == (vpn >> 11))
431 && (hpte_v & HPTE_V_VALID)
432 && (!!(hpte_v & HPTE_V_SECONDARY) == j)) {
433 /* HPTE matches */
434 if (j)
435 slot = -slot;
436 return slot;
437 }
438 ++slot;
439 }
440 hash = ~hash;
441 }
442
443 return -1;
444}
445
446static void pSeries_lpar_hpte_updateboltedpp(unsigned long newpp,
447 unsigned long ea)
448{
449 unsigned long lpar_rc;
450 unsigned long vsid, va, vpn, flags;
451 long slot;
452
453 vsid = get_kernel_vsid(ea);
454 va = (vsid << 28) | (ea & 0x0fffffff);
455 vpn = va >> PAGE_SHIFT;
456
457 slot = pSeries_lpar_hpte_find(vpn);
458 BUG_ON(slot == -1);
459
460 flags = newpp & 7;
461 lpar_rc = plpar_pte_protect(flags, slot, 0);
462
463 BUG_ON(lpar_rc != H_Success);
464}
465
466static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long va,
467 int large, int local)
468{
469 unsigned long avpn = va >> 23;
470 unsigned long lpar_rc;
471 unsigned long dummy1, dummy2;
472
473 if (large)
474 avpn &= ~0x1UL;
475
476 lpar_rc = plpar_pte_remove(H_AVPN, slot, (avpn << 7), &dummy1,
477 &dummy2);
478
479 if (lpar_rc == H_Not_Found)
480 return;
481
482 BUG_ON(lpar_rc != H_Success);
483}
484
485/*
486 * Take a spinlock around flushes to avoid bouncing the hypervisor tlbie
487 * lock.
488 */
489void pSeries_lpar_flush_hash_range(unsigned long context, unsigned long number,
490 int local)
491{
492 int i;
493 unsigned long flags = 0;
494 struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
495 int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
496
497 if (lock_tlbie)
498 spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags);
499
500 for (i = 0; i < number; i++)
501 flush_hash_page(context, batch->addr[i], batch->pte[i], local);
502
503 if (lock_tlbie)
504 spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags);
505}
506
507void hpte_init_lpar(void)
508{
509 ppc_md.hpte_invalidate = pSeries_lpar_hpte_invalidate;
510 ppc_md.hpte_updatepp = pSeries_lpar_hpte_updatepp;
511 ppc_md.hpte_updateboltedpp = pSeries_lpar_hpte_updateboltedpp;
512 ppc_md.hpte_insert = pSeries_lpar_hpte_insert;
513 ppc_md.hpte_remove = pSeries_lpar_hpte_remove;
514 ppc_md.flush_hash_range = pSeries_lpar_flush_hash_range;
515 ppc_md.hpte_clear_all = pSeries_lpar_hptab_clear;
516
517 htab_finish_init();
518}
diff --git a/arch/ppc64/kernel/pSeries_nvram.c b/arch/ppc64/kernel/pSeries_nvram.c
deleted file mode 100644
index 18abfb1f4e24..000000000000
--- a/arch/ppc64/kernel/pSeries_nvram.c
+++ /dev/null
@@ -1,148 +0,0 @@
1/*
2 * c 2001 PPC 64 Team, IBM Corp
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * /dev/nvram driver for PPC64
10 *
11 * This perhaps should live in drivers/char
12 */
13
14
15#include <linux/types.h>
16#include <linux/errno.h>
17#include <linux/init.h>
18#include <linux/slab.h>
19#include <linux/spinlock.h>
20#include <asm/uaccess.h>
21#include <asm/nvram.h>
22#include <asm/rtas.h>
23#include <asm/prom.h>
24#include <asm/machdep.h>
25
26static unsigned int nvram_size;
27static int nvram_fetch, nvram_store;
28static char nvram_buf[NVRW_CNT]; /* assume this is in the first 4GB */
29static DEFINE_SPINLOCK(nvram_lock);
30
31
32static ssize_t pSeries_nvram_read(char *buf, size_t count, loff_t *index)
33{
34 unsigned int i;
35 unsigned long len;
36 int done;
37 unsigned long flags;
38 char *p = buf;
39
40
41 if (nvram_size == 0 || nvram_fetch == RTAS_UNKNOWN_SERVICE)
42 return -ENODEV;
43
44 if (*index >= nvram_size)
45 return 0;
46
47 i = *index;
48 if (i + count > nvram_size)
49 count = nvram_size - i;
50
51 spin_lock_irqsave(&nvram_lock, flags);
52
53 for (; count != 0; count -= len) {
54 len = count;
55 if (len > NVRW_CNT)
56 len = NVRW_CNT;
57
58 if ((rtas_call(nvram_fetch, 3, 2, &done, i, __pa(nvram_buf),
59 len) != 0) || len != done) {
60 spin_unlock_irqrestore(&nvram_lock, flags);
61 return -EIO;
62 }
63
64 memcpy(p, nvram_buf, len);
65
66 p += len;
67 i += len;
68 }
69
70 spin_unlock_irqrestore(&nvram_lock, flags);
71
72 *index = i;
73 return p - buf;
74}
75
76static ssize_t pSeries_nvram_write(char *buf, size_t count, loff_t *index)
77{
78 unsigned int i;
79 unsigned long len;
80 int done;
81 unsigned long flags;
82 const char *p = buf;
83
84 if (nvram_size == 0 || nvram_store == RTAS_UNKNOWN_SERVICE)
85 return -ENODEV;
86
87 if (*index >= nvram_size)
88 return 0;
89
90 i = *index;
91 if (i + count > nvram_size)
92 count = nvram_size - i;
93
94 spin_lock_irqsave(&nvram_lock, flags);
95
96 for (; count != 0; count -= len) {
97 len = count;
98 if (len > NVRW_CNT)
99 len = NVRW_CNT;
100
101 memcpy(nvram_buf, p, len);
102
103 if ((rtas_call(nvram_store, 3, 2, &done, i, __pa(nvram_buf),
104 len) != 0) || len != done) {
105 spin_unlock_irqrestore(&nvram_lock, flags);
106 return -EIO;
107 }
108
109 p += len;
110 i += len;
111 }
112 spin_unlock_irqrestore(&nvram_lock, flags);
113
114 *index = i;
115 return p - buf;
116}
117
118static ssize_t pSeries_nvram_get_size(void)
119{
120 return nvram_size ? nvram_size : -ENODEV;
121}
122
123int __init pSeries_nvram_init(void)
124{
125 struct device_node *nvram;
126 unsigned int *nbytes_p, proplen;
127
128 nvram = of_find_node_by_type(NULL, "nvram");
129 if (nvram == NULL)
130 return -ENODEV;
131
132 nbytes_p = (unsigned int *)get_property(nvram, "#bytes", &proplen);
133 if (nbytes_p == NULL || proplen != sizeof(unsigned int))
134 return -EIO;
135
136 nvram_size = *nbytes_p;
137
138 nvram_fetch = rtas_token("nvram-fetch");
139 nvram_store = rtas_token("nvram-store");
140 printk(KERN_INFO "PPC64 nvram contains %d bytes\n", nvram_size);
141 of_node_put(nvram);
142
143 ppc_md.nvram_read = pSeries_nvram_read;
144 ppc_md.nvram_write = pSeries_nvram_write;
145 ppc_md.nvram_size = pSeries_nvram_get_size;
146
147 return 0;
148}
diff --git a/arch/ppc64/kernel/pSeries_pci.c b/arch/ppc64/kernel/pSeries_pci.c
deleted file mode 100644
index 928f8febdb3b..000000000000
--- a/arch/ppc64/kernel/pSeries_pci.c
+++ /dev/null
@@ -1,143 +0,0 @@
1/*
2 * arch/ppc64/kernel/pSeries_pci.c
3 *
4 * Copyright (C) 2001 Dave Engebretsen, IBM Corporation
5 * Copyright (C) 2003 Anton Blanchard <anton@au.ibm.com>, IBM
6 *
7 * pSeries specific routines for PCI.
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 */
23
24#include <linux/init.h>
25#include <linux/ioport.h>
26#include <linux/kernel.h>
27#include <linux/pci.h>
28#include <linux/string.h>
29
30#include <asm/pci-bridge.h>
31#include <asm/prom.h>
32
33#include "pci.h"
34
35static int __devinitdata s7a_workaround = -1;
36
37#if 0
38void pcibios_name_device(struct pci_dev *dev)
39{
40 struct device_node *dn;
41
42 /*
43 * Add IBM loc code (slot) as a prefix to the device names for service
44 */
45 dn = pci_device_to_OF_node(dev);
46 if (dn) {
47 char *loc_code = get_property(dn, "ibm,loc-code", 0);
48 if (loc_code) {
49 int loc_len = strlen(loc_code);
50 if (loc_len < sizeof(dev->dev.name)) {
51 memmove(dev->dev.name+loc_len+1, dev->dev.name,
52 sizeof(dev->dev.name)-loc_len-1);
53 memcpy(dev->dev.name, loc_code, loc_len);
54 dev->dev.name[loc_len] = ' ';
55 dev->dev.name[sizeof(dev->dev.name)-1] = '\0';
56 }
57 }
58 }
59}
60DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pcibios_name_device);
61#endif
62
63static void __devinit check_s7a(void)
64{
65 struct device_node *root;
66 char *model;
67
68 s7a_workaround = 0;
69 root = of_find_node_by_path("/");
70 if (root) {
71 model = get_property(root, "model", NULL);
72 if (model && !strcmp(model, "IBM,7013-S7A"))
73 s7a_workaround = 1;
74 of_node_put(root);
75 }
76}
77
78void __devinit pSeries_irq_bus_setup(struct pci_bus *bus)
79{
80 struct pci_dev *dev;
81
82 if (s7a_workaround < 0)
83 check_s7a();
84 list_for_each_entry(dev, &bus->devices, bus_list) {
85 pci_read_irq_line(dev);
86 if (s7a_workaround) {
87 if (dev->irq > 16) {
88 dev->irq -= 3;
89 pci_write_config_byte(dev, PCI_INTERRUPT_LINE,
90 dev->irq);
91 }
92 }
93 }
94}
95
96static void __init pSeries_request_regions(void)
97{
98 if (!isa_io_base)
99 return;
100
101 request_region(0x20,0x20,"pic1");
102 request_region(0xa0,0x20,"pic2");
103 request_region(0x00,0x20,"dma1");
104 request_region(0x40,0x20,"timer");
105 request_region(0x80,0x10,"dma page reg");
106 request_region(0xc0,0x20,"dma2");
107}
108
109void __init pSeries_final_fixup(void)
110{
111 phbs_remap_io();
112 pSeries_request_regions();
113
114 pci_addr_cache_build();
115}
116
117/*
118 * Assume the winbond 82c105 is the IDE controller on a
119 * p610. We should probably be more careful in case
120 * someone tries to plug in a similar adapter.
121 */
122static void fixup_winbond_82c105(struct pci_dev* dev)
123{
124 int i;
125 unsigned int reg;
126
127 if (!(systemcfg->platform & PLATFORM_PSERIES))
128 return;
129
130 printk("Using INTC for W82c105 IDE controller.\n");
131 pci_read_config_dword(dev, 0x40, &reg);
132 /* Enable LEGIRQ to use INTC instead of ISA interrupts */
133 pci_write_config_dword(dev, 0x40, reg | (1<<11));
134
135 for (i = 0; i < DEVICE_COUNT_RESOURCE; ++i) {
136 /* zap the 2nd function of the winbond chip */
137 if (dev->resource[i].flags & IORESOURCE_IO
138 && dev->bus->number == 0 && dev->devfn == 0x81)
139 dev->resource[i].flags &= ~IORESOURCE_IO;
140 }
141}
142DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_WINBOND, PCI_DEVICE_ID_WINBOND_82C105,
143 fixup_winbond_82c105);
diff --git a/arch/ppc64/kernel/pSeries_reconfig.c b/arch/ppc64/kernel/pSeries_reconfig.c
deleted file mode 100644
index 58c61219d08e..000000000000
--- a/arch/ppc64/kernel/pSeries_reconfig.c
+++ /dev/null
@@ -1,426 +0,0 @@
1/*
2 * pSeries_reconfig.c - support for dynamic reconfiguration (including PCI
3 * Hotplug and Dynamic Logical Partitioning on RPA platforms).
4 *
5 * Copyright (C) 2005 Nathan Lynch
6 * Copyright (C) 2005 IBM Corporation
7 *
8 *
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License version
11 * 2 as published by the Free Software Foundation.
12 */
13
14#include <linux/kernel.h>
15#include <linux/kref.h>
16#include <linux/notifier.h>
17#include <linux/proc_fs.h>
18
19#include <asm/prom.h>
20#include <asm/pSeries_reconfig.h>
21#include <asm/uaccess.h>
22
23
24
25/*
26 * Routines for "runtime" addition and removal of device tree nodes.
27 */
28#ifdef CONFIG_PROC_DEVICETREE
29/*
30 * Add a node to /proc/device-tree.
31 */
32static void add_node_proc_entries(struct device_node *np)
33{
34 struct proc_dir_entry *ent;
35
36 ent = proc_mkdir(strrchr(np->full_name, '/') + 1, np->parent->pde);
37 if (ent)
38 proc_device_tree_add_node(np, ent);
39}
40
41static void remove_node_proc_entries(struct device_node *np)
42{
43 struct property *pp = np->properties;
44 struct device_node *parent = np->parent;
45
46 while (pp) {
47 remove_proc_entry(pp->name, np->pde);
48 pp = pp->next;
49 }
50 if (np->pde)
51 remove_proc_entry(np->pde->name, parent->pde);
52}
53#else /* !CONFIG_PROC_DEVICETREE */
54static void add_node_proc_entries(struct device_node *np)
55{
56 return;
57}
58
59static void remove_node_proc_entries(struct device_node *np)
60{
61 return;
62}
63#endif /* CONFIG_PROC_DEVICETREE */
64
65/**
66 * derive_parent - basically like dirname(1)
67 * @path: the full_name of a node to be added to the tree
68 *
69 * Returns the node which should be the parent of the node
70 * described by path. E.g., for path = "/foo/bar", returns
71 * the node with full_name = "/foo".
72 */
73static struct device_node *derive_parent(const char *path)
74{
75 struct device_node *parent = NULL;
76 char *parent_path = "/";
77 size_t parent_path_len = strrchr(path, '/') - path + 1;
78
79 /* reject if path is "/" */
80 if (!strcmp(path, "/"))
81 return ERR_PTR(-EINVAL);
82
83 if (strrchr(path, '/') != path) {
84 parent_path = kmalloc(parent_path_len, GFP_KERNEL);
85 if (!parent_path)
86 return ERR_PTR(-ENOMEM);
87 strlcpy(parent_path, path, parent_path_len);
88 }
89 parent = of_find_node_by_path(parent_path);
90 if (!parent)
91 return ERR_PTR(-EINVAL);
92 if (strcmp(parent_path, "/"))
93 kfree(parent_path);
94 return parent;
95}
96
97static struct notifier_block *pSeries_reconfig_chain;
98
99int pSeries_reconfig_notifier_register(struct notifier_block *nb)
100{
101 return notifier_chain_register(&pSeries_reconfig_chain, nb);
102}
103
104void pSeries_reconfig_notifier_unregister(struct notifier_block *nb)
105{
106 notifier_chain_unregister(&pSeries_reconfig_chain, nb);
107}
108
109static int pSeries_reconfig_add_node(const char *path, struct property *proplist)
110{
111 struct device_node *np;
112 int err = -ENOMEM;
113
114 np = kzalloc(sizeof(*np), GFP_KERNEL);
115 if (!np)
116 goto out_err;
117
118 np->full_name = kmalloc(strlen(path) + 1, GFP_KERNEL);
119 if (!np->full_name)
120 goto out_err;
121
122 strcpy(np->full_name, path);
123
124 np->properties = proplist;
125 OF_MARK_DYNAMIC(np);
126 kref_init(&np->kref);
127
128 np->parent = derive_parent(path);
129 if (IS_ERR(np->parent)) {
130 err = PTR_ERR(np->parent);
131 goto out_err;
132 }
133
134 err = notifier_call_chain(&pSeries_reconfig_chain,
135 PSERIES_RECONFIG_ADD, np);
136 if (err == NOTIFY_BAD) {
137 printk(KERN_ERR "Failed to add device node %s\n", path);
138 err = -ENOMEM; /* For now, safe to assume kmalloc failure */
139 goto out_err;
140 }
141
142 of_attach_node(np);
143
144 add_node_proc_entries(np);
145
146 of_node_put(np->parent);
147
148 return 0;
149
150out_err:
151 if (np) {
152 of_node_put(np->parent);
153 kfree(np->full_name);
154 kfree(np);
155 }
156 return err;
157}
158
159static int pSeries_reconfig_remove_node(struct device_node *np)
160{
161 struct device_node *parent, *child;
162
163 parent = of_get_parent(np);
164 if (!parent)
165 return -EINVAL;
166
167 if ((child = of_get_next_child(np, NULL))) {
168 of_node_put(child);
169 return -EBUSY;
170 }
171
172 remove_node_proc_entries(np);
173
174 notifier_call_chain(&pSeries_reconfig_chain,
175 PSERIES_RECONFIG_REMOVE, np);
176 of_detach_node(np);
177
178 of_node_put(parent);
179 of_node_put(np); /* Must decrement the refcount */
180 return 0;
181}
182
183/*
184 * /proc/ppc64/ofdt - yucky binary interface for adding and removing
185 * OF device nodes. Should be deprecated as soon as we get an
186 * in-kernel wrapper for the RTAS ibm,configure-connector call.
187 */
188
189static void release_prop_list(const struct property *prop)
190{
191 struct property *next;
192 for (; prop; prop = next) {
193 next = prop->next;
194 kfree(prop->name);
195 kfree(prop->value);
196 kfree(prop);
197 }
198
199}
200
201/**
202 * parse_next_property - process the next property from raw input buffer
203 * @buf: input buffer, must be nul-terminated
204 * @end: end of the input buffer + 1, for validation
205 * @name: return value; set to property name in buf
206 * @length: return value; set to length of value
207 * @value: return value; set to the property value in buf
208 *
209 * Note that the caller must make copies of the name and value returned,
210 * this function does no allocation or copying of the data. Return value
211 * is set to the next name in buf, or NULL on error.
212 */
213static char * parse_next_property(char *buf, char *end, char **name, int *length,
214 unsigned char **value)
215{
216 char *tmp;
217
218 *name = buf;
219
220 tmp = strchr(buf, ' ');
221 if (!tmp) {
222 printk(KERN_ERR "property parse failed in %s at line %d\n",
223 __FUNCTION__, __LINE__);
224 return NULL;
225 }
226 *tmp = '\0';
227
228 if (++tmp >= end) {
229 printk(KERN_ERR "property parse failed in %s at line %d\n",
230 __FUNCTION__, __LINE__);
231 return NULL;
232 }
233
234 /* now we're on the length */
235 *length = -1;
236 *length = simple_strtoul(tmp, &tmp, 10);
237 if (*length == -1) {
238 printk(KERN_ERR "property parse failed in %s at line %d\n",
239 __FUNCTION__, __LINE__);
240 return NULL;
241 }
242 if (*tmp != ' ' || ++tmp >= end) {
243 printk(KERN_ERR "property parse failed in %s at line %d\n",
244 __FUNCTION__, __LINE__);
245 return NULL;
246 }
247
248 /* now we're on the value */
249 *value = tmp;
250 tmp += *length;
251 if (tmp > end) {
252 printk(KERN_ERR "property parse failed in %s at line %d\n",
253 __FUNCTION__, __LINE__);
254 return NULL;
255 }
256 else if (tmp < end && *tmp != ' ' && *tmp != '\0') {
257 printk(KERN_ERR "property parse failed in %s at line %d\n",
258 __FUNCTION__, __LINE__);
259 return NULL;
260 }
261 tmp++;
262
263 /* and now we should be on the next name, or the end */
264 return tmp;
265}
266
267static struct property *new_property(const char *name, const int length,
268 const unsigned char *value, struct property *last)
269{
270 struct property *new = kmalloc(sizeof(*new), GFP_KERNEL);
271
272 if (!new)
273 return NULL;
274 memset(new, 0, sizeof(*new));
275
276 if (!(new->name = kmalloc(strlen(name) + 1, GFP_KERNEL)))
277 goto cleanup;
278 if (!(new->value = kmalloc(length + 1, GFP_KERNEL)))
279 goto cleanup;
280
281 strcpy(new->name, name);
282 memcpy(new->value, value, length);
283 *(((char *)new->value) + length) = 0;
284 new->length = length;
285 new->next = last;
286 return new;
287
288cleanup:
289 if (new->name)
290 kfree(new->name);
291 if (new->value)
292 kfree(new->value);
293 kfree(new);
294 return NULL;
295}
296
297static int do_add_node(char *buf, size_t bufsize)
298{
299 char *path, *end, *name;
300 struct device_node *np;
301 struct property *prop = NULL;
302 unsigned char* value;
303 int length, rv = 0;
304
305 end = buf + bufsize;
306 path = buf;
307 buf = strchr(buf, ' ');
308 if (!buf)
309 return -EINVAL;
310 *buf = '\0';
311 buf++;
312
313 if ((np = of_find_node_by_path(path))) {
314 of_node_put(np);
315 return -EINVAL;
316 }
317
318 /* rv = build_prop_list(tmp, bufsize - (tmp - buf), &proplist); */
319 while (buf < end &&
320 (buf = parse_next_property(buf, end, &name, &length, &value))) {
321 struct property *last = prop;
322
323 prop = new_property(name, length, value, last);
324 if (!prop) {
325 rv = -ENOMEM;
326 prop = last;
327 goto out;
328 }
329 }
330 if (!buf) {
331 rv = -EINVAL;
332 goto out;
333 }
334
335 rv = pSeries_reconfig_add_node(path, prop);
336
337out:
338 if (rv)
339 release_prop_list(prop);
340 return rv;
341}
342
343static int do_remove_node(char *buf)
344{
345 struct device_node *node;
346 int rv = -ENODEV;
347
348 if ((node = of_find_node_by_path(buf)))
349 rv = pSeries_reconfig_remove_node(node);
350
351 of_node_put(node);
352 return rv;
353}
354
355/**
356 * ofdt_write - perform operations on the Open Firmware device tree
357 *
358 * @file: not used
359 * @buf: command and arguments
360 * @count: size of the command buffer
361 * @off: not used
362 *
363 * Operations supported at this time are addition and removal of
364 * whole nodes along with their properties. Operations on individual
365 * properties are not implemented (yet).
366 */
367static ssize_t ofdt_write(struct file *file, const char __user *buf, size_t count,
368 loff_t *off)
369{
370 int rv = 0;
371 char *kbuf;
372 char *tmp;
373
374 if (!(kbuf = kmalloc(count + 1, GFP_KERNEL))) {
375 rv = -ENOMEM;
376 goto out;
377 }
378 if (copy_from_user(kbuf, buf, count)) {
379 rv = -EFAULT;
380 goto out;
381 }
382
383 kbuf[count] = '\0';
384
385 tmp = strchr(kbuf, ' ');
386 if (!tmp) {
387 rv = -EINVAL;
388 goto out;
389 }
390 *tmp = '\0';
391 tmp++;
392
393 if (!strcmp(kbuf, "add_node"))
394 rv = do_add_node(tmp, count - (tmp - kbuf));
395 else if (!strcmp(kbuf, "remove_node"))
396 rv = do_remove_node(tmp);
397 else
398 rv = -EINVAL;
399out:
400 kfree(kbuf);
401 return rv ? rv : count;
402}
403
404static struct file_operations ofdt_fops = {
405 .write = ofdt_write
406};
407
408/* create /proc/ppc64/ofdt write-only by root */
409static int proc_ppc64_create_ofdt(void)
410{
411 struct proc_dir_entry *ent;
412
413 if (!(systemcfg->platform & PLATFORM_PSERIES))
414 return 0;
415
416 ent = create_proc_entry("ppc64/ofdt", S_IWUSR, NULL);
417 if (ent) {
418 ent->nlink = 1;
419 ent->data = NULL;
420 ent->size = 0;
421 ent->proc_fops = &ofdt_fops;
422 }
423
424 return 0;
425}
426__initcall(proc_ppc64_create_ofdt);
diff --git a/arch/ppc64/kernel/pSeries_setup.c b/arch/ppc64/kernel/pSeries_setup.c
deleted file mode 100644
index 3009701eb90d..000000000000
--- a/arch/ppc64/kernel/pSeries_setup.c
+++ /dev/null
@@ -1,622 +0,0 @@
1/*
2 * linux/arch/ppc/kernel/setup.c
3 *
4 * Copyright (C) 1995 Linus Torvalds
5 * Adapted from 'alpha' version by Gary Thomas
6 * Modified by Cort Dougan (cort@cs.nmt.edu)
7 * Modified by PPC64 Team, IBM Corp
8 *
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License
11 * as published by the Free Software Foundation; either version
12 * 2 of the License, or (at your option) any later version.
13 */
14
15/*
16 * bootup setup stuff..
17 */
18
19#undef DEBUG
20
21#include <linux/config.h>
22#include <linux/cpu.h>
23#include <linux/errno.h>
24#include <linux/sched.h>
25#include <linux/kernel.h>
26#include <linux/mm.h>
27#include <linux/stddef.h>
28#include <linux/unistd.h>
29#include <linux/slab.h>
30#include <linux/user.h>
31#include <linux/a.out.h>
32#include <linux/tty.h>
33#include <linux/major.h>
34#include <linux/interrupt.h>
35#include <linux/reboot.h>
36#include <linux/init.h>
37#include <linux/ioport.h>
38#include <linux/console.h>
39#include <linux/pci.h>
40#include <linux/utsname.h>
41#include <linux/adb.h>
42#include <linux/module.h>
43#include <linux/delay.h>
44#include <linux/irq.h>
45#include <linux/seq_file.h>
46#include <linux/root_dev.h>
47
48#include <asm/mmu.h>
49#include <asm/processor.h>
50#include <asm/io.h>
51#include <asm/pgtable.h>
52#include <asm/prom.h>
53#include <asm/rtas.h>
54#include <asm/pci-bridge.h>
55#include <asm/iommu.h>
56#include <asm/dma.h>
57#include <asm/machdep.h>
58#include <asm/irq.h>
59#include <asm/time.h>
60#include <asm/nvram.h>
61#include <asm/plpar_wrappers.h>
62#include <asm/xics.h>
63#include <asm/firmware.h>
64#include <asm/pmc.h>
65
66#include "i8259.h"
67#include "mpic.h"
68#include "pci.h"
69
70#ifdef DEBUG
71#define DBG(fmt...) udbg_printf(fmt)
72#else
73#define DBG(fmt...)
74#endif
75
76extern void find_udbg_vterm(void);
77extern void system_reset_fwnmi(void); /* from head.S */
78extern void machine_check_fwnmi(void); /* from head.S */
79extern void generic_find_legacy_serial_ports(u64 *physport,
80 unsigned int *default_speed);
81
82int fwnmi_active; /* TRUE if an FWNMI handler is present */
83
84extern void pSeries_system_reset_exception(struct pt_regs *regs);
85extern int pSeries_machine_check_exception(struct pt_regs *regs);
86
87static int pseries_shared_idle(void);
88static int pseries_dedicated_idle(void);
89
90static volatile void __iomem * chrp_int_ack_special;
91struct mpic *pSeries_mpic;
92
93void pSeries_get_cpuinfo(struct seq_file *m)
94{
95 struct device_node *root;
96 const char *model = "";
97
98 root = of_find_node_by_path("/");
99 if (root)
100 model = get_property(root, "model", NULL);
101 seq_printf(m, "machine\t\t: CHRP %s\n", model);
102 of_node_put(root);
103}
104
105/* Initialize firmware assisted non-maskable interrupts if
106 * the firmware supports this feature.
107 *
108 */
109static void __init fwnmi_init(void)
110{
111 int ret;
112 int ibm_nmi_register = rtas_token("ibm,nmi-register");
113 if (ibm_nmi_register == RTAS_UNKNOWN_SERVICE)
114 return;
115 ret = rtas_call(ibm_nmi_register, 2, 1, NULL,
116 __pa((unsigned long)system_reset_fwnmi),
117 __pa((unsigned long)machine_check_fwnmi));
118 if (ret == 0)
119 fwnmi_active = 1;
120}
121
122static int pSeries_irq_cascade(struct pt_regs *regs, void *data)
123{
124 if (chrp_int_ack_special)
125 return readb(chrp_int_ack_special);
126 else
127 return i8259_irq(smp_processor_id());
128}
129
130static void __init pSeries_init_mpic(void)
131{
132 unsigned int *addrp;
133 struct device_node *np;
134 int i;
135
136 /* All ISUs are setup, complete initialization */
137 mpic_init(pSeries_mpic);
138
139 /* Check what kind of cascade ACK we have */
140 if (!(np = of_find_node_by_name(NULL, "pci"))
141 || !(addrp = (unsigned int *)
142 get_property(np, "8259-interrupt-acknowledge", NULL)))
143 printk(KERN_ERR "Cannot find pci to get ack address\n");
144 else
145 chrp_int_ack_special = ioremap(addrp[prom_n_addr_cells(np)-1], 1);
146 of_node_put(np);
147
148 /* Setup the legacy interrupts & controller */
149 for (i = 0; i < NUM_ISA_INTERRUPTS; i++)
150 irq_desc[i].handler = &i8259_pic;
151 i8259_init(0);
152
153 /* Hook cascade to mpic */
154 mpic_setup_cascade(NUM_ISA_INTERRUPTS, pSeries_irq_cascade, NULL);
155}
156
157static void __init pSeries_setup_mpic(void)
158{
159 unsigned int *opprop;
160 unsigned long openpic_addr = 0;
161 unsigned char senses[NR_IRQS - NUM_ISA_INTERRUPTS];
162 struct device_node *root;
163 int irq_count;
164
165 /* Find the Open PIC if present */
166 root = of_find_node_by_path("/");
167 opprop = (unsigned int *) get_property(root, "platform-open-pic", NULL);
168 if (opprop != 0) {
169 int n = prom_n_addr_cells(root);
170
171 for (openpic_addr = 0; n > 0; --n)
172 openpic_addr = (openpic_addr << 32) + *opprop++;
173 printk(KERN_DEBUG "OpenPIC addr: %lx\n", openpic_addr);
174 }
175 of_node_put(root);
176
177 BUG_ON(openpic_addr == 0);
178
179 /* Get the sense values from OF */
180 prom_get_irq_senses(senses, NUM_ISA_INTERRUPTS, NR_IRQS);
181
182 /* Setup the openpic driver */
183 irq_count = NR_IRQS - NUM_ISA_INTERRUPTS - 4; /* leave room for IPIs */
184 pSeries_mpic = mpic_alloc(openpic_addr, MPIC_PRIMARY,
185 16, 16, irq_count, /* isu size, irq offset, irq count */
186 NR_IRQS - 4, /* ipi offset */
187 senses, irq_count, /* sense & sense size */
188 " MPIC ");
189}
190
191static void pseries_lpar_enable_pmcs(void)
192{
193 unsigned long set, reset;
194
195 power4_enable_pmcs();
196
197 set = 1UL << 63;
198 reset = 0;
199 plpar_hcall_norets(H_PERFMON, set, reset);
200
201 /* instruct hypervisor to maintain PMCs */
202 if (firmware_has_feature(FW_FEATURE_SPLPAR))
203 get_paca()->lppaca.pmcregs_in_use = 1;
204}
205
206static void __init pSeries_setup_arch(void)
207{
208 /* Fixup ppc_md depending on the type of interrupt controller */
209 if (ppc64_interrupt_controller == IC_OPEN_PIC) {
210 ppc_md.init_IRQ = pSeries_init_mpic;
211 ppc_md.get_irq = mpic_get_irq;
212 ppc_md.cpu_irq_down = mpic_teardown_this_cpu;
213 /* Allocate the mpic now, so that find_and_init_phbs() can
214 * fill the ISUs */
215 pSeries_setup_mpic();
216 } else {
217 ppc_md.init_IRQ = xics_init_IRQ;
218 ppc_md.get_irq = xics_get_irq;
219 ppc_md.cpu_irq_down = xics_teardown_cpu;
220 }
221
222#ifdef CONFIG_SMP
223 smp_init_pSeries();
224#endif
225 /* openpic global configuration register (64-bit format). */
226 /* openpic Interrupt Source Unit pointer (64-bit format). */
227 /* python0 facility area (mmio) (64-bit format) REAL address. */
228
229 /* init to some ~sane value until calibrate_delay() runs */
230 loops_per_jiffy = 50000000;
231
232 if (ROOT_DEV == 0) {
233 printk("No ramdisk, default root is /dev/sda2\n");
234 ROOT_DEV = Root_SDA2;
235 }
236
237 fwnmi_init();
238
239 /* Find and initialize PCI host bridges */
240 init_pci_config_tokens();
241 find_and_init_phbs();
242 eeh_init();
243
244#ifdef CONFIG_DUMMY_CONSOLE
245 conswitchp = &dummy_con;
246#endif
247
248 pSeries_nvram_init();
249
250 /* Choose an idle loop */
251 if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
252 vpa_init(boot_cpuid);
253 if (get_paca()->lppaca.shared_proc) {
254 printk(KERN_INFO "Using shared processor idle loop\n");
255 ppc_md.idle_loop = pseries_shared_idle;
256 } else {
257 printk(KERN_INFO "Using dedicated idle loop\n");
258 ppc_md.idle_loop = pseries_dedicated_idle;
259 }
260 } else {
261 printk(KERN_INFO "Using default idle loop\n");
262 ppc_md.idle_loop = default_idle;
263 }
264
265 if (systemcfg->platform & PLATFORM_LPAR)
266 ppc_md.enable_pmcs = pseries_lpar_enable_pmcs;
267 else
268 ppc_md.enable_pmcs = power4_enable_pmcs;
269}
270
271static int __init pSeries_init_panel(void)
272{
273 /* Manually leave the kernel version on the panel. */
274 ppc_md.progress("Linux ppc64\n", 0);
275 ppc_md.progress(system_utsname.version, 0);
276
277 return 0;
278}
279arch_initcall(pSeries_init_panel);
280
281
282/* Build up the ppc64_firmware_features bitmask field
283 * using contents of device-tree/ibm,hypertas-functions.
284 * Ultimately this functionality may be moved into prom.c prom_init().
285 */
286static void __init fw_feature_init(void)
287{
288 struct device_node * dn;
289 char * hypertas;
290 unsigned int len;
291
292 DBG(" -> fw_feature_init()\n");
293
294 ppc64_firmware_features = 0;
295 dn = of_find_node_by_path("/rtas");
296 if (dn == NULL) {
297 printk(KERN_ERR "WARNING ! Cannot find RTAS in device-tree !\n");
298 goto no_rtas;
299 }
300
301 hypertas = get_property(dn, "ibm,hypertas-functions", &len);
302 if (hypertas) {
303 while (len > 0){
304 int i, hypertas_len;
305 /* check value against table of strings */
306 for(i=0; i < FIRMWARE_MAX_FEATURES ;i++) {
307 if ((firmware_features_table[i].name) &&
308 (strcmp(firmware_features_table[i].name,hypertas))==0) {
309 /* we have a match */
310 ppc64_firmware_features |=
311 (firmware_features_table[i].val);
312 break;
313 }
314 }
315 hypertas_len = strlen(hypertas);
316 len -= hypertas_len +1;
317 hypertas+= hypertas_len +1;
318 }
319 }
320
321 of_node_put(dn);
322 no_rtas:
323 printk(KERN_INFO "firmware_features = 0x%lx\n",
324 ppc64_firmware_features);
325
326 DBG(" <- fw_feature_init()\n");
327}
328
329
330static void __init pSeries_discover_pic(void)
331{
332 struct device_node *np;
333 char *typep;
334
335 /*
336 * Setup interrupt mapping options that are needed for finish_device_tree
337 * to properly parse the OF interrupt tree & do the virtual irq mapping
338 */
339 __irq_offset_value = NUM_ISA_INTERRUPTS;
340 ppc64_interrupt_controller = IC_INVALID;
341 for (np = NULL; (np = of_find_node_by_name(np, "interrupt-controller"));) {
342 typep = (char *)get_property(np, "compatible", NULL);
343 if (strstr(typep, "open-pic"))
344 ppc64_interrupt_controller = IC_OPEN_PIC;
345 else if (strstr(typep, "ppc-xicp"))
346 ppc64_interrupt_controller = IC_PPC_XIC;
347 else
348 printk("pSeries_discover_pic: failed to recognize"
349 " interrupt-controller\n");
350 break;
351 }
352}
353
354static void pSeries_mach_cpu_die(void)
355{
356 local_irq_disable();
357 idle_task_exit();
358 /* Some hardware requires clearing the CPPR, while other hardware does not
359 * it is safe either way
360 */
361 pSeriesLP_cppr_info(0, 0);
362 rtas_stop_self();
363 /* Should never get here... */
364 BUG();
365 for(;;);
366}
367
368
369/*
370 * Early initialization. Relocation is on but do not reference unbolted pages
371 */
372static void __init pSeries_init_early(void)
373{
374 void *comport;
375 int iommu_off = 0;
376 unsigned int default_speed;
377 u64 physport;
378
379 DBG(" -> pSeries_init_early()\n");
380
381 fw_feature_init();
382
383 if (systemcfg->platform & PLATFORM_LPAR)
384 hpte_init_lpar();
385 else {
386 hpte_init_native();
387 iommu_off = (of_chosen &&
388 get_property(of_chosen, "linux,iommu-off", NULL));
389 }
390
391 generic_find_legacy_serial_ports(&physport, &default_speed);
392
393 if (systemcfg->platform & PLATFORM_LPAR)
394 find_udbg_vterm();
395 else if (physport) {
396 /* Map the uart for udbg. */
397 comport = (void *)ioremap(physport, 16);
398 udbg_init_uart(comport, default_speed);
399
400 DBG("Hello World !\n");
401 }
402
403
404 iommu_init_early_pSeries();
405
406 pSeries_discover_pic();
407
408 DBG(" <- pSeries_init_early()\n");
409}
410
411
412static int pSeries_check_legacy_ioport(unsigned int baseport)
413{
414 struct device_node *np;
415
416#define I8042_DATA_REG 0x60
417#define FDC_BASE 0x3f0
418
419
420 switch(baseport) {
421 case I8042_DATA_REG:
422 np = of_find_node_by_type(NULL, "8042");
423 if (np == NULL)
424 return -ENODEV;
425 of_node_put(np);
426 break;
427 case FDC_BASE:
428 np = of_find_node_by_type(NULL, "fdc");
429 if (np == NULL)
430 return -ENODEV;
431 of_node_put(np);
432 break;
433 }
434 return 0;
435}
436
437/*
438 * Called very early, MMU is off, device-tree isn't unflattened
439 */
440extern struct machdep_calls pSeries_md;
441
442static int __init pSeries_probe(int platform)
443{
444 if (platform != PLATFORM_PSERIES &&
445 platform != PLATFORM_PSERIES_LPAR)
446 return 0;
447
448 /* if we have some ppc_md fixups for LPAR to do, do
449 * it here ...
450 */
451
452 return 1;
453}
454
455DECLARE_PER_CPU(unsigned long, smt_snooze_delay);
456
457static inline void dedicated_idle_sleep(unsigned int cpu)
458{
459 struct paca_struct *ppaca = &paca[cpu ^ 1];
460
461 /* Only sleep if the other thread is not idle */
462 if (!(ppaca->lppaca.idle)) {
463 local_irq_disable();
464
465 /*
466 * We are about to sleep the thread and so wont be polling any
467 * more.
468 */
469 clear_thread_flag(TIF_POLLING_NRFLAG);
470
471 /*
472 * SMT dynamic mode. Cede will result in this thread going
473 * dormant, if the partner thread is still doing work. Thread
474 * wakes up if partner goes idle, an interrupt is presented, or
475 * a prod occurs. Returning from the cede enables external
476 * interrupts.
477 */
478 if (!need_resched())
479 cede_processor();
480 else
481 local_irq_enable();
482 } else {
483 /*
484 * Give the HV an opportunity at the processor, since we are
485 * not doing any work.
486 */
487 poll_pending();
488 }
489}
490
491static int pseries_dedicated_idle(void)
492{
493 long oldval;
494 struct paca_struct *lpaca = get_paca();
495 unsigned int cpu = smp_processor_id();
496 unsigned long start_snooze;
497 unsigned long *smt_snooze_delay = &__get_cpu_var(smt_snooze_delay);
498
499 while (1) {
500 /*
501 * Indicate to the HV that we are idle. Now would be
502 * a good time to find other work to dispatch.
503 */
504 lpaca->lppaca.idle = 1;
505
506 oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED);
507 if (!oldval) {
508 set_thread_flag(TIF_POLLING_NRFLAG);
509
510 start_snooze = __get_tb() +
511 *smt_snooze_delay * tb_ticks_per_usec;
512
513 while (!need_resched() && !cpu_is_offline(cpu)) {
514 ppc64_runlatch_off();
515
516 /*
517 * Go into low thread priority and possibly
518 * low power mode.
519 */
520 HMT_low();
521 HMT_very_low();
522
523 if (*smt_snooze_delay != 0 &&
524 __get_tb() > start_snooze) {
525 HMT_medium();
526 dedicated_idle_sleep(cpu);
527 }
528
529 }
530
531 HMT_medium();
532 clear_thread_flag(TIF_POLLING_NRFLAG);
533 } else {
534 set_need_resched();
535 }
536
537 lpaca->lppaca.idle = 0;
538 ppc64_runlatch_on();
539
540 schedule();
541
542 if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING)
543 cpu_die();
544 }
545}
546
547static int pseries_shared_idle(void)
548{
549 struct paca_struct *lpaca = get_paca();
550 unsigned int cpu = smp_processor_id();
551
552 while (1) {
553 /*
554 * Indicate to the HV that we are idle. Now would be
555 * a good time to find other work to dispatch.
556 */
557 lpaca->lppaca.idle = 1;
558
559 while (!need_resched() && !cpu_is_offline(cpu)) {
560 local_irq_disable();
561 ppc64_runlatch_off();
562
563 /*
564 * Yield the processor to the hypervisor. We return if
565 * an external interrupt occurs (which are driven prior
566 * to returning here) or if a prod occurs from another
567 * processor. When returning here, external interrupts
568 * are enabled.
569 *
570 * Check need_resched() again with interrupts disabled
571 * to avoid a race.
572 */
573 if (!need_resched())
574 cede_processor();
575 else
576 local_irq_enable();
577
578 HMT_medium();
579 }
580
581 lpaca->lppaca.idle = 0;
582 ppc64_runlatch_on();
583
584 schedule();
585
586 if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING)
587 cpu_die();
588 }
589
590 return 0;
591}
592
593static int pSeries_pci_probe_mode(struct pci_bus *bus)
594{
595 if (systemcfg->platform & PLATFORM_LPAR)
596 return PCI_PROBE_DEVTREE;
597 return PCI_PROBE_NORMAL;
598}
599
600struct machdep_calls __initdata pSeries_md = {
601 .probe = pSeries_probe,
602 .setup_arch = pSeries_setup_arch,
603 .init_early = pSeries_init_early,
604 .get_cpuinfo = pSeries_get_cpuinfo,
605 .log_error = pSeries_log_error,
606 .pcibios_fixup = pSeries_final_fixup,
607 .pci_probe_mode = pSeries_pci_probe_mode,
608 .irq_bus_setup = pSeries_irq_bus_setup,
609 .restart = rtas_restart,
610 .power_off = rtas_power_off,
611 .halt = rtas_halt,
612 .panic = rtas_os_term,
613 .cpu_die = pSeries_mach_cpu_die,
614 .get_boot_time = rtas_get_boot_time,
615 .get_rtc_time = rtas_get_rtc_time,
616 .set_rtc_time = rtas_set_rtc_time,
617 .calibrate_decr = generic_calibrate_decr,
618 .progress = rtas_progress,
619 .check_legacy_ioport = pSeries_check_legacy_ioport,
620 .system_reset_exception = pSeries_system_reset_exception,
621 .machine_check_exception = pSeries_machine_check_exception,
622};
diff --git a/arch/ppc64/kernel/pSeries_smp.c b/arch/ppc64/kernel/pSeries_smp.c
deleted file mode 100644
index d2c7e2c4733b..000000000000
--- a/arch/ppc64/kernel/pSeries_smp.c
+++ /dev/null
@@ -1,517 +0,0 @@
1/*
2 * SMP support for pSeries and BPA machines.
3 *
4 * Dave Engebretsen, Peter Bergner, and
5 * Mike Corrigan {engebret|bergner|mikec}@us.ibm.com
6 *
7 * Plus various changes from other IBM teams...
8 *
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License
11 * as published by the Free Software Foundation; either version
12 * 2 of the License, or (at your option) any later version.
13 */
14
15#undef DEBUG
16
17#include <linux/config.h>
18#include <linux/kernel.h>
19#include <linux/module.h>
20#include <linux/sched.h>
21#include <linux/smp.h>
22#include <linux/interrupt.h>
23#include <linux/delay.h>
24#include <linux/init.h>
25#include <linux/spinlock.h>
26#include <linux/cache.h>
27#include <linux/err.h>
28#include <linux/sysdev.h>
29#include <linux/cpu.h>
30
31#include <asm/ptrace.h>
32#include <asm/atomic.h>
33#include <asm/irq.h>
34#include <asm/page.h>
35#include <asm/pgtable.h>
36#include <asm/io.h>
37#include <asm/prom.h>
38#include <asm/smp.h>
39#include <asm/paca.h>
40#include <asm/time.h>
41#include <asm/machdep.h>
42#include <asm/xics.h>
43#include <asm/cputable.h>
44#include <asm/firmware.h>
45#include <asm/system.h>
46#include <asm/rtas.h>
47#include <asm/plpar_wrappers.h>
48#include <asm/pSeries_reconfig.h>
49
50#include "mpic.h"
51#include "bpa_iic.h"
52
53#ifdef DEBUG
54#define DBG(fmt...) udbg_printf(fmt)
55#else
56#define DBG(fmt...)
57#endif
58
59/*
60 * The primary thread of each non-boot processor is recorded here before
61 * smp init.
62 */
63static cpumask_t of_spin_map;
64
65extern void pSeries_secondary_smp_init(unsigned long);
66
67#ifdef CONFIG_HOTPLUG_CPU
68
69/* Get state of physical CPU.
70 * Return codes:
71 * 0 - The processor is in the RTAS stopped state
72 * 1 - stop-self is in progress
73 * 2 - The processor is not in the RTAS stopped state
74 * -1 - Hardware Error
75 * -2 - Hardware Busy, Try again later.
76 */
77static int query_cpu_stopped(unsigned int pcpu)
78{
79 int cpu_status;
80 int status, qcss_tok;
81
82 qcss_tok = rtas_token("query-cpu-stopped-state");
83 if (qcss_tok == RTAS_UNKNOWN_SERVICE)
84 return -1;
85 status = rtas_call(qcss_tok, 1, 2, &cpu_status, pcpu);
86 if (status != 0) {
87 printk(KERN_ERR
88 "RTAS query-cpu-stopped-state failed: %i\n", status);
89 return status;
90 }
91
92 return cpu_status;
93}
94
95int pSeries_cpu_disable(void)
96{
97 int cpu = smp_processor_id();
98
99 cpu_clear(cpu, cpu_online_map);
100 systemcfg->processorCount--;
101
102 /*fix boot_cpuid here*/
103 if (cpu == boot_cpuid)
104 boot_cpuid = any_online_cpu(cpu_online_map);
105
106 /* FIXME: abstract this to not be platform specific later on */
107 xics_migrate_irqs_away();
108 return 0;
109}
110
111void pSeries_cpu_die(unsigned int cpu)
112{
113 int tries;
114 int cpu_status;
115 unsigned int pcpu = get_hard_smp_processor_id(cpu);
116
117 for (tries = 0; tries < 25; tries++) {
118 cpu_status = query_cpu_stopped(pcpu);
119 if (cpu_status == 0 || cpu_status == -1)
120 break;
121 msleep(200);
122 }
123 if (cpu_status != 0) {
124 printk("Querying DEAD? cpu %i (%i) shows %i\n",
125 cpu, pcpu, cpu_status);
126 }
127
128 /* Isolation and deallocation are definatly done by
129 * drslot_chrp_cpu. If they were not they would be
130 * done here. Change isolate state to Isolate and
131 * change allocation-state to Unusable.
132 */
133 paca[cpu].cpu_start = 0;
134}
135
136/*
137 * Update cpu_present_map and paca(s) for a new cpu node. The wrinkle
138 * here is that a cpu device node may represent up to two logical cpus
139 * in the SMT case. We must honor the assumption in other code that
140 * the logical ids for sibling SMT threads x and y are adjacent, such
141 * that x^1 == y and y^1 == x.
142 */
143static int pSeries_add_processor(struct device_node *np)
144{
145 unsigned int cpu;
146 cpumask_t candidate_map, tmp = CPU_MASK_NONE;
147 int err = -ENOSPC, len, nthreads, i;
148 u32 *intserv;
149
150 intserv = (u32 *)get_property(np, "ibm,ppc-interrupt-server#s", &len);
151 if (!intserv)
152 return 0;
153
154 nthreads = len / sizeof(u32);
155 for (i = 0; i < nthreads; i++)
156 cpu_set(i, tmp);
157
158 lock_cpu_hotplug();
159
160 BUG_ON(!cpus_subset(cpu_present_map, cpu_possible_map));
161
162 /* Get a bitmap of unoccupied slots. */
163 cpus_xor(candidate_map, cpu_possible_map, cpu_present_map);
164 if (cpus_empty(candidate_map)) {
165 /* If we get here, it most likely means that NR_CPUS is
166 * less than the partition's max processors setting.
167 */
168 printk(KERN_ERR "Cannot add cpu %s; this system configuration"
169 " supports %d logical cpus.\n", np->full_name,
170 cpus_weight(cpu_possible_map));
171 goto out_unlock;
172 }
173
174 while (!cpus_empty(tmp))
175 if (cpus_subset(tmp, candidate_map))
176 /* Found a range where we can insert the new cpu(s) */
177 break;
178 else
179 cpus_shift_left(tmp, tmp, nthreads);
180
181 if (cpus_empty(tmp)) {
182 printk(KERN_ERR "Unable to find space in cpu_present_map for"
183 " processor %s with %d thread(s)\n", np->name,
184 nthreads);
185 goto out_unlock;
186 }
187
188 for_each_cpu_mask(cpu, tmp) {
189 BUG_ON(cpu_isset(cpu, cpu_present_map));
190 cpu_set(cpu, cpu_present_map);
191 set_hard_smp_processor_id(cpu, *intserv++);
192 }
193 err = 0;
194out_unlock:
195 unlock_cpu_hotplug();
196 return err;
197}
198
199/*
200 * Update the present map for a cpu node which is going away, and set
201 * the hard id in the paca(s) to -1 to be consistent with boot time
202 * convention for non-present cpus.
203 */
204static void pSeries_remove_processor(struct device_node *np)
205{
206 unsigned int cpu;
207 int len, nthreads, i;
208 u32 *intserv;
209
210 intserv = (u32 *)get_property(np, "ibm,ppc-interrupt-server#s", &len);
211 if (!intserv)
212 return;
213
214 nthreads = len / sizeof(u32);
215
216 lock_cpu_hotplug();
217 for (i = 0; i < nthreads; i++) {
218 for_each_present_cpu(cpu) {
219 if (get_hard_smp_processor_id(cpu) != intserv[i])
220 continue;
221 BUG_ON(cpu_online(cpu));
222 cpu_clear(cpu, cpu_present_map);
223 set_hard_smp_processor_id(cpu, -1);
224 break;
225 }
226 if (cpu == NR_CPUS)
227 printk(KERN_WARNING "Could not find cpu to remove "
228 "with physical id 0x%x\n", intserv[i]);
229 }
230 unlock_cpu_hotplug();
231}
232
233static int pSeries_smp_notifier(struct notifier_block *nb, unsigned long action, void *node)
234{
235 int err = NOTIFY_OK;
236
237 switch (action) {
238 case PSERIES_RECONFIG_ADD:
239 if (pSeries_add_processor(node))
240 err = NOTIFY_BAD;
241 break;
242 case PSERIES_RECONFIG_REMOVE:
243 pSeries_remove_processor(node);
244 break;
245 default:
246 err = NOTIFY_DONE;
247 break;
248 }
249 return err;
250}
251
252static struct notifier_block pSeries_smp_nb = {
253 .notifier_call = pSeries_smp_notifier,
254};
255
256#endif /* CONFIG_HOTPLUG_CPU */
257
258/**
259 * smp_startup_cpu() - start the given cpu
260 *
261 * At boot time, there is nothing to do for primary threads which were
262 * started from Open Firmware. For anything else, call RTAS with the
263 * appropriate start location.
264 *
265 * Returns:
266 * 0 - failure
267 * 1 - success
268 */
269static inline int __devinit smp_startup_cpu(unsigned int lcpu)
270{
271 int status;
272 unsigned long start_here = __pa((u32)*((unsigned long *)
273 pSeries_secondary_smp_init));
274 unsigned int pcpu;
275 int start_cpu;
276
277 if (cpu_isset(lcpu, of_spin_map))
278 /* Already started by OF and sitting in spin loop */
279 return 1;
280
281 pcpu = get_hard_smp_processor_id(lcpu);
282
283 /* Fixup atomic count: it exited inside IRQ handler. */
284 paca[lcpu].__current->thread_info->preempt_count = 0;
285
286 /*
287 * If the RTAS start-cpu token does not exist then presume the
288 * cpu is already spinning.
289 */
290 start_cpu = rtas_token("start-cpu");
291 if (start_cpu == RTAS_UNKNOWN_SERVICE)
292 return 1;
293
294 status = rtas_call(start_cpu, 3, 1, NULL, pcpu, start_here, lcpu);
295 if (status != 0) {
296 printk(KERN_ERR "start-cpu failed: %i\n", status);
297 return 0;
298 }
299
300 return 1;
301}
302
303#ifdef CONFIG_XICS
304static inline void smp_xics_do_message(int cpu, int msg)
305{
306 set_bit(msg, &xics_ipi_message[cpu].value);
307 mb();
308 xics_cause_IPI(cpu);
309}
310
311static void smp_xics_message_pass(int target, int msg)
312{
313 unsigned int i;
314
315 if (target < NR_CPUS) {
316 smp_xics_do_message(target, msg);
317 } else {
318 for_each_online_cpu(i) {
319 if (target == MSG_ALL_BUT_SELF
320 && i == smp_processor_id())
321 continue;
322 smp_xics_do_message(i, msg);
323 }
324 }
325}
326
327static int __init smp_xics_probe(void)
328{
329 xics_request_IPIs();
330
331 return cpus_weight(cpu_possible_map);
332}
333
334static void __devinit smp_xics_setup_cpu(int cpu)
335{
336 if (cpu != boot_cpuid)
337 xics_setup_cpu();
338
339 if (firmware_has_feature(FW_FEATURE_SPLPAR))
340 vpa_init(cpu);
341
342 cpu_clear(cpu, of_spin_map);
343
344}
345#endif /* CONFIG_XICS */
346#ifdef CONFIG_BPA_IIC
347static void smp_iic_message_pass(int target, int msg)
348{
349 unsigned int i;
350
351 if (target < NR_CPUS) {
352 iic_cause_IPI(target, msg);
353 } else {
354 for_each_online_cpu(i) {
355 if (target == MSG_ALL_BUT_SELF
356 && i == smp_processor_id())
357 continue;
358 iic_cause_IPI(i, msg);
359 }
360 }
361}
362
363static int __init smp_iic_probe(void)
364{
365 iic_request_IPIs();
366
367 return cpus_weight(cpu_possible_map);
368}
369
370static void __devinit smp_iic_setup_cpu(int cpu)
371{
372 if (cpu != boot_cpuid)
373 iic_setup_cpu();
374}
375#endif /* CONFIG_BPA_IIC */
376
377static DEFINE_SPINLOCK(timebase_lock);
378static unsigned long timebase = 0;
379
380static void __devinit pSeries_give_timebase(void)
381{
382 spin_lock(&timebase_lock);
383 rtas_call(rtas_token("freeze-time-base"), 0, 1, NULL);
384 timebase = get_tb();
385 spin_unlock(&timebase_lock);
386
387 while (timebase)
388 barrier();
389 rtas_call(rtas_token("thaw-time-base"), 0, 1, NULL);
390}
391
392static void __devinit pSeries_take_timebase(void)
393{
394 while (!timebase)
395 barrier();
396 spin_lock(&timebase_lock);
397 set_tb(timebase >> 32, timebase & 0xffffffff);
398 timebase = 0;
399 spin_unlock(&timebase_lock);
400}
401
402static void __devinit smp_pSeries_kick_cpu(int nr)
403{
404 BUG_ON(nr < 0 || nr >= NR_CPUS);
405
406 if (!smp_startup_cpu(nr))
407 return;
408
409 /*
410 * The processor is currently spinning, waiting for the
411 * cpu_start field to become non-zero After we set cpu_start,
412 * the processor will continue on to secondary_start
413 */
414 paca[nr].cpu_start = 1;
415}
416
417static int smp_pSeries_cpu_bootable(unsigned int nr)
418{
419 /* Special case - we inhibit secondary thread startup
420 * during boot if the user requests it. Odd-numbered
421 * cpus are assumed to be secondary threads.
422 */
423 if (system_state < SYSTEM_RUNNING &&
424 cpu_has_feature(CPU_FTR_SMT) &&
425 !smt_enabled_at_boot && nr % 2 != 0)
426 return 0;
427
428 return 1;
429}
430#ifdef CONFIG_MPIC
431static struct smp_ops_t pSeries_mpic_smp_ops = {
432 .message_pass = smp_mpic_message_pass,
433 .probe = smp_mpic_probe,
434 .kick_cpu = smp_pSeries_kick_cpu,
435 .setup_cpu = smp_mpic_setup_cpu,
436};
437#endif
438#ifdef CONFIG_XICS
439static struct smp_ops_t pSeries_xics_smp_ops = {
440 .message_pass = smp_xics_message_pass,
441 .probe = smp_xics_probe,
442 .kick_cpu = smp_pSeries_kick_cpu,
443 .setup_cpu = smp_xics_setup_cpu,
444 .cpu_bootable = smp_pSeries_cpu_bootable,
445};
446#endif
447#ifdef CONFIG_BPA_IIC
448static struct smp_ops_t bpa_iic_smp_ops = {
449 .message_pass = smp_iic_message_pass,
450 .probe = smp_iic_probe,
451 .kick_cpu = smp_pSeries_kick_cpu,
452 .setup_cpu = smp_iic_setup_cpu,
453 .cpu_bootable = smp_pSeries_cpu_bootable,
454};
455#endif
456
457/* This is called very early */
458void __init smp_init_pSeries(void)
459{
460 int i;
461
462 DBG(" -> smp_init_pSeries()\n");
463
464 switch (ppc64_interrupt_controller) {
465#ifdef CONFIG_MPIC
466 case IC_OPEN_PIC:
467 smp_ops = &pSeries_mpic_smp_ops;
468 break;
469#endif
470#ifdef CONFIG_XICS
471 case IC_PPC_XIC:
472 smp_ops = &pSeries_xics_smp_ops;
473 break;
474#endif
475#ifdef CONFIG_BPA_IIC
476 case IC_BPA_IIC:
477 smp_ops = &bpa_iic_smp_ops;
478 break;
479#endif
480 default:
481 panic("Invalid interrupt controller");
482 }
483
484#ifdef CONFIG_HOTPLUG_CPU
485 smp_ops->cpu_disable = pSeries_cpu_disable;
486 smp_ops->cpu_die = pSeries_cpu_die;
487
488 /* Processors can be added/removed only on LPAR */
489 if (systemcfg->platform == PLATFORM_PSERIES_LPAR)
490 pSeries_reconfig_notifier_register(&pSeries_smp_nb);
491#endif
492
493 /* Mark threads which are still spinning in hold loops. */
494 if (cpu_has_feature(CPU_FTR_SMT)) {
495 for_each_present_cpu(i) {
496 if (i % 2 == 0)
497 /*
498 * Even-numbered logical cpus correspond to
499 * primary threads.
500 */
501 cpu_set(i, of_spin_map);
502 }
503 } else {
504 of_spin_map = cpu_present_map;
505 }
506
507 cpu_clear(boot_cpuid, of_spin_map);
508
509 /* Non-lpar has additional take/give timebase */
510 if (rtas_token("freeze-time-base") != RTAS_UNKNOWN_SERVICE) {
511 smp_ops->give_timebase = pSeries_give_timebase;
512 smp_ops->take_timebase = pSeries_take_timebase;
513 }
514
515 DBG(" <- smp_init_pSeries()\n");
516}
517
diff --git a/arch/ppc64/kernel/pSeries_vio.c b/arch/ppc64/kernel/pSeries_vio.c
deleted file mode 100644
index e0ae06f58f86..000000000000
--- a/arch/ppc64/kernel/pSeries_vio.c
+++ /dev/null
@@ -1,273 +0,0 @@
1/*
2 * IBM PowerPC pSeries Virtual I/O Infrastructure Support.
3 *
4 * Copyright (c) 2003-2005 IBM Corp.
5 * Dave Engebretsen engebret@us.ibm.com
6 * Santiago Leon santil@us.ibm.com
7 * Hollis Blanchard <hollisb@us.ibm.com>
8 * Stephen Rothwell
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16#include <linux/init.h>
17#include <linux/module.h>
18#include <linux/mm.h>
19#include <linux/kobject.h>
20#include <asm/iommu.h>
21#include <asm/dma.h>
22#include <asm/prom.h>
23#include <asm/vio.h>
24#include <asm/hvcall.h>
25
26extern struct subsystem devices_subsys; /* needed for vio_find_name() */
27
28static void probe_bus_pseries(void)
29{
30 struct device_node *node_vroot, *of_node;
31
32 node_vroot = find_devices("vdevice");
33 if ((node_vroot == NULL) || (node_vroot->child == NULL))
34 /* this machine doesn't do virtual IO, and that's ok */
35 return;
36
37 /*
38 * Create struct vio_devices for each virtual device in the device tree.
39 * Drivers will associate with them later.
40 */
41 for (of_node = node_vroot->child; of_node != NULL;
42 of_node = of_node->sibling) {
43 printk(KERN_DEBUG "%s: processing %p\n", __FUNCTION__, of_node);
44 vio_register_device_node(of_node);
45 }
46}
47
48/**
49 * vio_match_device_pseries: - Tell if a pSeries VIO device matches a
50 * vio_device_id
51 */
52static int vio_match_device_pseries(const struct vio_device_id *id,
53 const struct vio_dev *dev)
54{
55 return (strncmp(dev->type, id->type, strlen(id->type)) == 0) &&
56 device_is_compatible(dev->dev.platform_data, id->compat);
57}
58
59static void vio_release_device_pseries(struct device *dev)
60{
61 /* XXX free TCE table */
62 of_node_put(dev->platform_data);
63}
64
65static ssize_t viodev_show_devspec(struct device *dev,
66 struct device_attribute *attr, char *buf)
67{
68 struct device_node *of_node = dev->platform_data;
69
70 return sprintf(buf, "%s\n", of_node->full_name);
71}
72DEVICE_ATTR(devspec, S_IRUSR | S_IRGRP | S_IROTH, viodev_show_devspec, NULL);
73
74static void vio_unregister_device_pseries(struct vio_dev *viodev)
75{
76 device_remove_file(&viodev->dev, &dev_attr_devspec);
77}
78
79static struct vio_bus_ops vio_bus_ops_pseries = {
80 .match = vio_match_device_pseries,
81 .unregister_device = vio_unregister_device_pseries,
82 .release_device = vio_release_device_pseries,
83};
84
85/**
86 * vio_bus_init_pseries: - Initialize the pSeries virtual IO bus
87 */
88static int __init vio_bus_init_pseries(void)
89{
90 int err;
91
92 err = vio_bus_init(&vio_bus_ops_pseries);
93 if (err == 0)
94 probe_bus_pseries();
95 return err;
96}
97
98__initcall(vio_bus_init_pseries);
99
100/**
101 * vio_build_iommu_table: - gets the dma information from OF and
102 * builds the TCE tree.
103 * @dev: the virtual device.
104 *
105 * Returns a pointer to the built tce tree, or NULL if it can't
106 * find property.
107*/
108static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev)
109{
110 unsigned int *dma_window;
111 struct iommu_table *newTceTable;
112 unsigned long offset;
113 int dma_window_property_size;
114
115 dma_window = (unsigned int *) get_property(dev->dev.platform_data, "ibm,my-dma-window", &dma_window_property_size);
116 if(!dma_window) {
117 return NULL;
118 }
119
120 newTceTable = (struct iommu_table *) kmalloc(sizeof(struct iommu_table), GFP_KERNEL);
121
122 /* There should be some code to extract the phys-encoded offset
123 using prom_n_addr_cells(). However, according to a comment
124 on earlier versions, it's always zero, so we don't bother */
125 offset = dma_window[1] >> PAGE_SHIFT;
126
127 /* TCE table size - measured in tce entries */
128 newTceTable->it_size = dma_window[4] >> PAGE_SHIFT;
129 /* offset for VIO should always be 0 */
130 newTceTable->it_offset = offset;
131 newTceTable->it_busno = 0;
132 newTceTable->it_index = (unsigned long)dma_window[0];
133 newTceTable->it_type = TCE_VB;
134
135 return iommu_init_table(newTceTable);
136}
137
138/**
139 * vio_register_device_node: - Register a new vio device.
140 * @of_node: The OF node for this device.
141 *
142 * Creates and initializes a vio_dev structure from the data in
143 * of_node (dev.platform_data) and adds it to the list of virtual devices.
144 * Returns a pointer to the created vio_dev or NULL if node has
145 * NULL device_type or compatible fields.
146 */
147struct vio_dev * __devinit vio_register_device_node(struct device_node *of_node)
148{
149 struct vio_dev *viodev;
150 unsigned int *unit_address;
151 unsigned int *irq_p;
152
153 /* we need the 'device_type' property, in order to match with drivers */
154 if ((NULL == of_node->type)) {
155 printk(KERN_WARNING
156 "%s: node %s missing 'device_type'\n", __FUNCTION__,
157 of_node->name ? of_node->name : "<unknown>");
158 return NULL;
159 }
160
161 unit_address = (unsigned int *)get_property(of_node, "reg", NULL);
162 if (!unit_address) {
163 printk(KERN_WARNING "%s: node %s missing 'reg'\n", __FUNCTION__,
164 of_node->name ? of_node->name : "<unknown>");
165 return NULL;
166 }
167
168 /* allocate a vio_dev for this node */
169 viodev = kmalloc(sizeof(struct vio_dev), GFP_KERNEL);
170 if (!viodev) {
171 return NULL;
172 }
173 memset(viodev, 0, sizeof(struct vio_dev));
174
175 viodev->dev.platform_data = of_node_get(of_node);
176
177 viodev->irq = NO_IRQ;
178 irq_p = (unsigned int *)get_property(of_node, "interrupts", NULL);
179 if (irq_p) {
180 int virq = virt_irq_create_mapping(*irq_p);
181 if (virq == NO_IRQ) {
182 printk(KERN_ERR "Unable to allocate interrupt "
183 "number for %s\n", of_node->full_name);
184 } else
185 viodev->irq = irq_offset_up(virq);
186 }
187
188 snprintf(viodev->dev.bus_id, BUS_ID_SIZE, "%x", *unit_address);
189 viodev->name = of_node->name;
190 viodev->type = of_node->type;
191 viodev->unit_address = *unit_address;
192 viodev->iommu_table = vio_build_iommu_table(viodev);
193
194 /* register with generic device framework */
195 if (vio_register_device(viodev) == NULL) {
196 /* XXX free TCE table */
197 kfree(viodev);
198 return NULL;
199 }
200 device_create_file(&viodev->dev, &dev_attr_devspec);
201
202 return viodev;
203}
204EXPORT_SYMBOL(vio_register_device_node);
205
206/**
207 * vio_get_attribute: - get attribute for virtual device
208 * @vdev: The vio device to get property.
209 * @which: The property/attribute to be extracted.
210 * @length: Pointer to length of returned data size (unused if NULL).
211 *
212 * Calls prom.c's get_property() to return the value of the
213 * attribute specified by the preprocessor constant @which
214*/
215const void * vio_get_attribute(struct vio_dev *vdev, void* which, int* length)
216{
217 return get_property(vdev->dev.platform_data, (char*)which, length);
218}
219EXPORT_SYMBOL(vio_get_attribute);
220
221/* vio_find_name() - internal because only vio.c knows how we formatted the
222 * kobject name
223 * XXX once vio_bus_type.devices is actually used as a kset in
224 * drivers/base/bus.c, this function should be removed in favor of
225 * "device_find(kobj_name, &vio_bus_type)"
226 */
227static struct vio_dev *vio_find_name(const char *kobj_name)
228{
229 struct kobject *found;
230
231 found = kset_find_obj(&devices_subsys.kset, kobj_name);
232 if (!found)
233 return NULL;
234
235 return to_vio_dev(container_of(found, struct device, kobj));
236}
237
238/**
239 * vio_find_node - find an already-registered vio_dev
240 * @vnode: device_node of the virtual device we're looking for
241 */
242struct vio_dev *vio_find_node(struct device_node *vnode)
243{
244 uint32_t *unit_address;
245 char kobj_name[BUS_ID_SIZE];
246
247 /* construct the kobject name from the device node */
248 unit_address = (uint32_t *)get_property(vnode, "reg", NULL);
249 if (!unit_address)
250 return NULL;
251 snprintf(kobj_name, BUS_ID_SIZE, "%x", *unit_address);
252
253 return vio_find_name(kobj_name);
254}
255EXPORT_SYMBOL(vio_find_node);
256
257int vio_enable_interrupts(struct vio_dev *dev)
258{
259 int rc = h_vio_signal(dev->unit_address, VIO_IRQ_ENABLE);
260 if (rc != H_Success)
261 printk(KERN_ERR "vio: Error 0x%x enabling interrupts\n", rc);
262 return rc;
263}
264EXPORT_SYMBOL(vio_enable_interrupts);
265
266int vio_disable_interrupts(struct vio_dev *dev)
267{
268 int rc = h_vio_signal(dev->unit_address, VIO_IRQ_DISABLE);
269 if (rc != H_Success)
270 printk(KERN_ERR "vio: Error 0x%x disabling interrupts\n", rc);
271 return rc;
272}
273EXPORT_SYMBOL(vio_disable_interrupts);
diff --git a/arch/ppc64/kernel/pci.c b/arch/ppc64/kernel/pci.c
index ff4be1da69d5..feec06bbafc3 100644
--- a/arch/ppc64/kernel/pci.c
+++ b/arch/ppc64/kernel/pci.c
@@ -31,8 +31,7 @@
31#include <asm/irq.h> 31#include <asm/irq.h>
32#include <asm/machdep.h> 32#include <asm/machdep.h>
33#include <asm/udbg.h> 33#include <asm/udbg.h>
34 34#include <asm/ppc-pci.h>
35#include "pci.h"
36 35
37#ifdef DEBUG 36#ifdef DEBUG
38#define DBG(fmt...) udbg_printf(fmt) 37#define DBG(fmt...) udbg_printf(fmt)
diff --git a/arch/ppc64/kernel/pci.h b/arch/ppc64/kernel/pci.h
deleted file mode 100644
index 5eb2cc320566..000000000000
--- a/arch/ppc64/kernel/pci.h
+++ /dev/null
@@ -1,54 +0,0 @@
1/*
2 * c 2001 PPC 64 Team, IBM Corp
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 */
9#ifndef __PPC_KERNEL_PCI_H__
10#define __PPC_KERNEL_PCI_H__
11
12#include <linux/pci.h>
13#include <asm/pci-bridge.h>
14
15extern unsigned long isa_io_base;
16
17extern void pci_setup_pci_controller(struct pci_controller *hose);
18extern void pci_setup_phb_io(struct pci_controller *hose, int primary);
19extern void pci_setup_phb_io_dynamic(struct pci_controller *hose, int primary);
20
21
22extern struct list_head hose_list;
23extern int global_phb_number;
24
25extern unsigned long find_and_init_phbs(void);
26
27extern struct pci_dev *ppc64_isabridge_dev; /* may be NULL if no ISA bus */
28
29/* PCI device_node operations */
30struct device_node;
31typedef void *(*traverse_func)(struct device_node *me, void *data);
32void *traverse_pci_devices(struct device_node *start, traverse_func pre,
33 void *data);
34
35void pci_devs_phb_init(void);
36void pci_devs_phb_init_dynamic(struct pci_controller *phb);
37
38/* PCI address cache management routines */
39void pci_addr_cache_insert_device(struct pci_dev *dev);
40void pci_addr_cache_remove_device(struct pci_dev *dev);
41
42/* From rtas_pci.h */
43void init_pci_config_tokens (void);
44unsigned long get_phb_buid (struct device_node *);
45
46/* From pSeries_pci.h */
47extern void pSeries_final_fixup(void);
48extern void pSeries_irq_bus_setup(struct pci_bus *bus);
49
50extern unsigned long pci_probe_only;
51extern unsigned long pci_assign_all_buses;
52extern int pci_read_irq_line(struct pci_dev *pci_dev);
53
54#endif /* __PPC_KERNEL_PCI_H__ */
diff --git a/arch/ppc64/kernel/pci_direct_iommu.c b/arch/ppc64/kernel/pci_direct_iommu.c
index 54055c81017a..e1a32f802c0b 100644
--- a/arch/ppc64/kernel/pci_direct_iommu.c
+++ b/arch/ppc64/kernel/pci_direct_iommu.c
@@ -27,8 +27,7 @@
27#include <asm/machdep.h> 27#include <asm/machdep.h>
28#include <asm/pmac_feature.h> 28#include <asm/pmac_feature.h>
29#include <asm/abs_addr.h> 29#include <asm/abs_addr.h>
30 30#include <asm/ppc-pci.h>
31#include "pci.h"
32 31
33static void *pci_direct_alloc_coherent(struct device *hwdev, size_t size, 32static void *pci_direct_alloc_coherent(struct device *hwdev, size_t size,
34 dma_addr_t *dma_handle, gfp_t flag) 33 dma_addr_t *dma_handle, gfp_t flag)
diff --git a/arch/ppc64/kernel/pci_dn.c b/arch/ppc64/kernel/pci_dn.c
index a86389d07d57..493bbe43f5b4 100644
--- a/arch/ppc64/kernel/pci_dn.c
+++ b/arch/ppc64/kernel/pci_dn.c
@@ -30,8 +30,7 @@
30#include <asm/prom.h> 30#include <asm/prom.h>
31#include <asm/pci-bridge.h> 31#include <asm/pci-bridge.h>
32#include <asm/pSeries_reconfig.h> 32#include <asm/pSeries_reconfig.h>
33 33#include <asm/ppc-pci.h>
34#include "pci.h"
35 34
36/* 35/*
37 * Traverse_func that inits the PCI fields of the device node. 36 * Traverse_func that inits the PCI fields of the device node.
diff --git a/arch/ppc64/kernel/pci_iommu.c b/arch/ppc64/kernel/pci_iommu.c
index d9e33b7d4203..2114dc9c59b5 100644
--- a/arch/ppc64/kernel/pci_iommu.c
+++ b/arch/ppc64/kernel/pci_iommu.c
@@ -37,7 +37,7 @@
37#include <asm/iommu.h> 37#include <asm/iommu.h>
38#include <asm/pci-bridge.h> 38#include <asm/pci-bridge.h>
39#include <asm/machdep.h> 39#include <asm/machdep.h>
40#include "pci.h" 40#include <asm/ppc-pci.h>
41 41
42#ifdef CONFIG_PPC_ISERIES 42#ifdef CONFIG_PPC_ISERIES
43#include <asm/iSeries/iSeries_pci.h> 43#include <asm/iSeries/iSeries_pci.h>
@@ -61,13 +61,7 @@ static inline struct iommu_table *devnode_table(struct device *dev)
61 } else 61 } else
62 pdev = to_pci_dev(dev); 62 pdev = to_pci_dev(dev);
63 63
64#ifdef CONFIG_PPC_ISERIES
65 return ISERIES_DEVNODE(pdev)->iommu_table;
66#endif /* CONFIG_PPC_ISERIES */
67
68#ifdef CONFIG_PPC_MULTIPLATFORM
69 return PCI_DN(PCI_GET_DN(pdev))->iommu_table; 64 return PCI_DN(PCI_GET_DN(pdev))->iommu_table;
70#endif /* CONFIG_PPC_MULTIPLATFORM */
71} 65}
72 66
73 67
diff --git a/arch/ppc64/kernel/pmac_feature.c b/arch/ppc64/kernel/pmac_feature.c
index eb4e6c3f694d..26075f11db77 100644
--- a/arch/ppc64/kernel/pmac_feature.c
+++ b/arch/ppc64/kernel/pmac_feature.c
@@ -53,7 +53,7 @@
53 * We use a single global lock to protect accesses. Each driver has 53 * We use a single global lock to protect accesses. Each driver has
54 * to take care of its own locking 54 * to take care of its own locking
55 */ 55 */
56static DEFINE_SPINLOCK(feature_lock __pmacdata); 56static DEFINE_SPINLOCK(feature_lock);
57 57
58#define LOCK(flags) spin_lock_irqsave(&feature_lock, flags); 58#define LOCK(flags) spin_lock_irqsave(&feature_lock, flags);
59#define UNLOCK(flags) spin_unlock_irqrestore(&feature_lock, flags); 59#define UNLOCK(flags) spin_unlock_irqrestore(&feature_lock, flags);
@@ -62,9 +62,9 @@ static DEFINE_SPINLOCK(feature_lock __pmacdata);
62/* 62/*
63 * Instance of some macio stuffs 63 * Instance of some macio stuffs
64 */ 64 */
65struct macio_chip macio_chips[MAX_MACIO_CHIPS] __pmacdata; 65struct macio_chip macio_chips[MAX_MACIO_CHIPS] ;
66 66
67struct macio_chip* __pmac macio_find(struct device_node* child, int type) 67struct macio_chip* macio_find(struct device_node* child, int type)
68{ 68{
69 while(child) { 69 while(child) {
70 int i; 70 int i;
@@ -79,7 +79,7 @@ struct macio_chip* __pmac macio_find(struct device_node* child, int type)
79} 79}
80EXPORT_SYMBOL_GPL(macio_find); 80EXPORT_SYMBOL_GPL(macio_find);
81 81
82static const char* macio_names[] __pmacdata = 82static const char* macio_names[] =
83{ 83{
84 "Unknown", 84 "Unknown",
85 "Grand Central", 85 "Grand Central",
@@ -106,9 +106,9 @@ static const char* macio_names[] __pmacdata =
106#define UN_BIS(r,v) (UN_OUT((r), UN_IN(r) | (v))) 106#define UN_BIS(r,v) (UN_OUT((r), UN_IN(r) | (v)))
107#define UN_BIC(r,v) (UN_OUT((r), UN_IN(r) & ~(v))) 107#define UN_BIC(r,v) (UN_OUT((r), UN_IN(r) & ~(v)))
108 108
109static struct device_node* uninorth_node __pmacdata; 109static struct device_node* uninorth_node;
110static u32* uninorth_base __pmacdata; 110static u32* uninorth_base;
111static u32 uninorth_rev __pmacdata; 111static u32 uninorth_rev;
112static void *u3_ht; 112static void *u3_ht;
113 113
114extern struct device_node *k2_skiplist[2]; 114extern struct device_node *k2_skiplist[2];
@@ -133,14 +133,14 @@ struct pmac_mb_def
133 struct feature_table_entry* features; 133 struct feature_table_entry* features;
134 unsigned long board_flags; 134 unsigned long board_flags;
135}; 135};
136static struct pmac_mb_def pmac_mb __pmacdata; 136static struct pmac_mb_def pmac_mb;
137 137
138/* 138/*
139 * Here are the chip specific feature functions 139 * Here are the chip specific feature functions
140 */ 140 */
141 141
142 142
143static long __pmac g5_read_gpio(struct device_node* node, long param, long value) 143static long g5_read_gpio(struct device_node* node, long param, long value)
144{ 144{
145 struct macio_chip* macio = &macio_chips[0]; 145 struct macio_chip* macio = &macio_chips[0];
146 146
@@ -148,7 +148,7 @@ static long __pmac g5_read_gpio(struct device_node* node, long param, long value
148} 148}
149 149
150 150
151static long __pmac g5_write_gpio(struct device_node* node, long param, long value) 151static long g5_write_gpio(struct device_node* node, long param, long value)
152{ 152{
153 struct macio_chip* macio = &macio_chips[0]; 153 struct macio_chip* macio = &macio_chips[0];
154 154
@@ -156,7 +156,7 @@ static long __pmac g5_write_gpio(struct device_node* node, long param, long valu
156 return 0; 156 return 0;
157} 157}
158 158
159static long __pmac g5_gmac_enable(struct device_node* node, long param, long value) 159static long g5_gmac_enable(struct device_node* node, long param, long value)
160{ 160{
161 struct macio_chip* macio = &macio_chips[0]; 161 struct macio_chip* macio = &macio_chips[0];
162 unsigned long flags; 162 unsigned long flags;
@@ -181,7 +181,7 @@ static long __pmac g5_gmac_enable(struct device_node* node, long param, long val
181 return 0; 181 return 0;
182} 182}
183 183
184static long __pmac g5_fw_enable(struct device_node* node, long param, long value) 184static long g5_fw_enable(struct device_node* node, long param, long value)
185{ 185{
186 struct macio_chip* macio = &macio_chips[0]; 186 struct macio_chip* macio = &macio_chips[0];
187 unsigned long flags; 187 unsigned long flags;
@@ -206,7 +206,7 @@ static long __pmac g5_fw_enable(struct device_node* node, long param, long value
206 return 0; 206 return 0;
207} 207}
208 208
209static long __pmac g5_mpic_enable(struct device_node* node, long param, long value) 209static long g5_mpic_enable(struct device_node* node, long param, long value)
210{ 210{
211 unsigned long flags; 211 unsigned long flags;
212 212
@@ -220,7 +220,7 @@ static long __pmac g5_mpic_enable(struct device_node* node, long param, long val
220 return 0; 220 return 0;
221} 221}
222 222
223static long __pmac g5_eth_phy_reset(struct device_node* node, long param, long value) 223static long g5_eth_phy_reset(struct device_node* node, long param, long value)
224{ 224{
225 struct macio_chip* macio = &macio_chips[0]; 225 struct macio_chip* macio = &macio_chips[0];
226 struct device_node *phy; 226 struct device_node *phy;
@@ -250,7 +250,7 @@ static long __pmac g5_eth_phy_reset(struct device_node* node, long param, long v
250 return 0; 250 return 0;
251} 251}
252 252
253static long __pmac g5_i2s_enable(struct device_node *node, long param, long value) 253static long g5_i2s_enable(struct device_node *node, long param, long value)
254{ 254{
255 /* Very crude implementation for now */ 255 /* Very crude implementation for now */
256 struct macio_chip* macio = &macio_chips[0]; 256 struct macio_chip* macio = &macio_chips[0];
@@ -275,7 +275,7 @@ static long __pmac g5_i2s_enable(struct device_node *node, long param, long valu
275 275
276 276
277#ifdef CONFIG_SMP 277#ifdef CONFIG_SMP
278static long __pmac g5_reset_cpu(struct device_node* node, long param, long value) 278static long g5_reset_cpu(struct device_node* node, long param, long value)
279{ 279{
280 unsigned int reset_io = 0; 280 unsigned int reset_io = 0;
281 unsigned long flags; 281 unsigned long flags;
@@ -320,12 +320,12 @@ static long __pmac g5_reset_cpu(struct device_node* node, long param, long value
320 * This takes the second CPU off the bus on dual CPU machines 320 * This takes the second CPU off the bus on dual CPU machines
321 * running UP 321 * running UP
322 */ 322 */
323void __pmac g5_phy_disable_cpu1(void) 323void g5_phy_disable_cpu1(void)
324{ 324{
325 UN_OUT(U3_API_PHY_CONFIG_1, 0); 325 UN_OUT(U3_API_PHY_CONFIG_1, 0);
326} 326}
327 327
328static long __pmac generic_get_mb_info(struct device_node* node, long param, long value) 328static long generic_get_mb_info(struct device_node* node, long param, long value)
329{ 329{
330 switch(param) { 330 switch(param) {
331 case PMAC_MB_INFO_MODEL: 331 case PMAC_MB_INFO_MODEL:
@@ -347,14 +347,14 @@ static long __pmac generic_get_mb_info(struct device_node* node, long param, lon
347 347
348/* Used on any machine 348/* Used on any machine
349 */ 349 */
350static struct feature_table_entry any_features[] __pmacdata = { 350static struct feature_table_entry any_features[] = {
351 { PMAC_FTR_GET_MB_INFO, generic_get_mb_info }, 351 { PMAC_FTR_GET_MB_INFO, generic_get_mb_info },
352 { 0, NULL } 352 { 0, NULL }
353}; 353};
354 354
355/* G5 features 355/* G5 features
356 */ 356 */
357static struct feature_table_entry g5_features[] __pmacdata = { 357static struct feature_table_entry g5_features[] = {
358 { PMAC_FTR_GMAC_ENABLE, g5_gmac_enable }, 358 { PMAC_FTR_GMAC_ENABLE, g5_gmac_enable },
359 { PMAC_FTR_1394_ENABLE, g5_fw_enable }, 359 { PMAC_FTR_1394_ENABLE, g5_fw_enable },
360 { PMAC_FTR_ENABLE_MPIC, g5_mpic_enable }, 360 { PMAC_FTR_ENABLE_MPIC, g5_mpic_enable },
@@ -368,7 +368,7 @@ static struct feature_table_entry g5_features[] __pmacdata = {
368 { 0, NULL } 368 { 0, NULL }
369}; 369};
370 370
371static struct pmac_mb_def pmac_mb_defs[] __pmacdata = { 371static struct pmac_mb_def pmac_mb_defs[] = {
372 { "PowerMac7,2", "PowerMac G5", 372 { "PowerMac7,2", "PowerMac G5",
373 PMAC_TYPE_POWERMAC_G5, g5_features, 373 PMAC_TYPE_POWERMAC_G5, g5_features,
374 0, 374 0,
@@ -394,7 +394,7 @@ static struct pmac_mb_def pmac_mb_defs[] __pmacdata = {
394/* 394/*
395 * The toplevel feature_call callback 395 * The toplevel feature_call callback
396 */ 396 */
397long __pmac pmac_do_feature_call(unsigned int selector, ...) 397long pmac_do_feature_call(unsigned int selector, ...)
398{ 398{
399 struct device_node* node; 399 struct device_node* node;
400 long param, value; 400 long param, value;
@@ -706,8 +706,8 @@ void __init pmac_check_ht_link(void)
706 * Early video resume hook 706 * Early video resume hook
707 */ 707 */
708 708
709static void (*pmac_early_vresume_proc)(void *data) __pmacdata; 709static void (*pmac_early_vresume_proc)(void *data);
710static void *pmac_early_vresume_data __pmacdata; 710static void *pmac_early_vresume_data;
711 711
712void pmac_set_early_video_resume(void (*proc)(void *data), void *data) 712void pmac_set_early_video_resume(void (*proc)(void *data), void *data)
713{ 713{
@@ -725,11 +725,11 @@ EXPORT_SYMBOL(pmac_set_early_video_resume);
725 * AGP related suspend/resume code 725 * AGP related suspend/resume code
726 */ 726 */
727 727
728static struct pci_dev *pmac_agp_bridge __pmacdata; 728static struct pci_dev *pmac_agp_bridge;
729static int (*pmac_agp_suspend)(struct pci_dev *bridge) __pmacdata; 729static int (*pmac_agp_suspend)(struct pci_dev *bridge);
730static int (*pmac_agp_resume)(struct pci_dev *bridge) __pmacdata; 730static int (*pmac_agp_resume)(struct pci_dev *bridge);
731 731
732void __pmac pmac_register_agp_pm(struct pci_dev *bridge, 732void pmac_register_agp_pm(struct pci_dev *bridge,
733 int (*suspend)(struct pci_dev *bridge), 733 int (*suspend)(struct pci_dev *bridge),
734 int (*resume)(struct pci_dev *bridge)) 734 int (*resume)(struct pci_dev *bridge))
735{ 735{
@@ -746,7 +746,7 @@ void __pmac pmac_register_agp_pm(struct pci_dev *bridge,
746} 746}
747EXPORT_SYMBOL(pmac_register_agp_pm); 747EXPORT_SYMBOL(pmac_register_agp_pm);
748 748
749void __pmac pmac_suspend_agp_for_card(struct pci_dev *dev) 749void pmac_suspend_agp_for_card(struct pci_dev *dev)
750{ 750{
751 if (pmac_agp_bridge == NULL || pmac_agp_suspend == NULL) 751 if (pmac_agp_bridge == NULL || pmac_agp_suspend == NULL)
752 return; 752 return;
@@ -756,7 +756,7 @@ void __pmac pmac_suspend_agp_for_card(struct pci_dev *dev)
756} 756}
757EXPORT_SYMBOL(pmac_suspend_agp_for_card); 757EXPORT_SYMBOL(pmac_suspend_agp_for_card);
758 758
759void __pmac pmac_resume_agp_for_card(struct pci_dev *dev) 759void pmac_resume_agp_for_card(struct pci_dev *dev)
760{ 760{
761 if (pmac_agp_bridge == NULL || pmac_agp_resume == NULL) 761 if (pmac_agp_bridge == NULL || pmac_agp_resume == NULL)
762 return; 762 return;
diff --git a/arch/ppc64/kernel/pmac_nvram.c b/arch/ppc64/kernel/pmac_nvram.c
index e32a902236e3..11586d535f81 100644
--- a/arch/ppc64/kernel/pmac_nvram.c
+++ b/arch/ppc64/kernel/pmac_nvram.c
@@ -82,10 +82,10 @@ extern int system_running;
82static int (*core99_write_bank)(int bank, u8* datas); 82static int (*core99_write_bank)(int bank, u8* datas);
83static int (*core99_erase_bank)(int bank); 83static int (*core99_erase_bank)(int bank);
84 84
85static char *nvram_image __pmacdata; 85static char *nvram_image;
86 86
87 87
88static ssize_t __pmac core99_nvram_read(char *buf, size_t count, loff_t *index) 88static ssize_t core99_nvram_read(char *buf, size_t count, loff_t *index)
89{ 89{
90 int i; 90 int i;
91 91
@@ -103,7 +103,7 @@ static ssize_t __pmac core99_nvram_read(char *buf, size_t count, loff_t *index)
103 return count; 103 return count;
104} 104}
105 105
106static ssize_t __pmac core99_nvram_write(char *buf, size_t count, loff_t *index) 106static ssize_t core99_nvram_write(char *buf, size_t count, loff_t *index)
107{ 107{
108 int i; 108 int i;
109 109
@@ -121,14 +121,14 @@ static ssize_t __pmac core99_nvram_write(char *buf, size_t count, loff_t *index)
121 return count; 121 return count;
122} 122}
123 123
124static ssize_t __pmac core99_nvram_size(void) 124static ssize_t core99_nvram_size(void)
125{ 125{
126 if (nvram_image == NULL) 126 if (nvram_image == NULL)
127 return -ENODEV; 127 return -ENODEV;
128 return NVRAM_SIZE; 128 return NVRAM_SIZE;
129} 129}
130 130
131static u8 __pmac chrp_checksum(struct chrp_header* hdr) 131static u8 chrp_checksum(struct chrp_header* hdr)
132{ 132{
133 u8 *ptr; 133 u8 *ptr;
134 u16 sum = hdr->signature; 134 u16 sum = hdr->signature;
@@ -139,7 +139,7 @@ static u8 __pmac chrp_checksum(struct chrp_header* hdr)
139 return sum; 139 return sum;
140} 140}
141 141
142static u32 __pmac core99_calc_adler(u8 *buffer) 142static u32 core99_calc_adler(u8 *buffer)
143{ 143{
144 int cnt; 144 int cnt;
145 u32 low, high; 145 u32 low, high;
@@ -161,7 +161,7 @@ static u32 __pmac core99_calc_adler(u8 *buffer)
161 return (high << 16) | low; 161 return (high << 16) | low;
162} 162}
163 163
164static u32 __pmac core99_check(u8* datas) 164static u32 core99_check(u8* datas)
165{ 165{
166 struct core99_header* hdr99 = (struct core99_header*)datas; 166 struct core99_header* hdr99 = (struct core99_header*)datas;
167 167
@@ -180,7 +180,7 @@ static u32 __pmac core99_check(u8* datas)
180 return hdr99->generation; 180 return hdr99->generation;
181} 181}
182 182
183static int __pmac sm_erase_bank(int bank) 183static int sm_erase_bank(int bank)
184{ 184{
185 int stat, i; 185 int stat, i;
186 unsigned long timeout; 186 unsigned long timeout;
@@ -212,7 +212,7 @@ static int __pmac sm_erase_bank(int bank)
212 return 0; 212 return 0;
213} 213}
214 214
215static int __pmac sm_write_bank(int bank, u8* datas) 215static int sm_write_bank(int bank, u8* datas)
216{ 216{
217 int i, stat = 0; 217 int i, stat = 0;
218 unsigned long timeout; 218 unsigned long timeout;
@@ -247,7 +247,7 @@ static int __pmac sm_write_bank(int bank, u8* datas)
247 return 0; 247 return 0;
248} 248}
249 249
250static int __pmac amd_erase_bank(int bank) 250static int amd_erase_bank(int bank)
251{ 251{
252 int i, stat = 0; 252 int i, stat = 0;
253 unsigned long timeout; 253 unsigned long timeout;
@@ -294,7 +294,7 @@ static int __pmac amd_erase_bank(int bank)
294 return 0; 294 return 0;
295} 295}
296 296
297static int __pmac amd_write_bank(int bank, u8* datas) 297static int amd_write_bank(int bank, u8* datas)
298{ 298{
299 int i, stat = 0; 299 int i, stat = 0;
300 unsigned long timeout; 300 unsigned long timeout;
@@ -341,7 +341,7 @@ static int __pmac amd_write_bank(int bank, u8* datas)
341} 341}
342 342
343 343
344static int __pmac core99_nvram_sync(void) 344static int core99_nvram_sync(void)
345{ 345{
346 struct core99_header* hdr99; 346 struct core99_header* hdr99;
347 unsigned long flags; 347 unsigned long flags;
@@ -431,7 +431,7 @@ int __init pmac_nvram_init(void)
431 return 0; 431 return 0;
432} 432}
433 433
434int __pmac pmac_get_partition(int partition) 434int pmac_get_partition(int partition)
435{ 435{
436 struct nvram_partition *part; 436 struct nvram_partition *part;
437 const char *name; 437 const char *name;
@@ -459,7 +459,7 @@ int __pmac pmac_get_partition(int partition)
459 return part->index; 459 return part->index;
460} 460}
461 461
462u8 __pmac pmac_xpram_read(int xpaddr) 462u8 pmac_xpram_read(int xpaddr)
463{ 463{
464 int offset = pmac_get_partition(pmac_nvram_XPRAM); 464 int offset = pmac_get_partition(pmac_nvram_XPRAM);
465 loff_t index; 465 loff_t index;
@@ -476,7 +476,7 @@ u8 __pmac pmac_xpram_read(int xpaddr)
476 return buf; 476 return buf;
477} 477}
478 478
479void __pmac pmac_xpram_write(int xpaddr, u8 data) 479void pmac_xpram_write(int xpaddr, u8 data)
480{ 480{
481 int offset = pmac_get_partition(pmac_nvram_XPRAM); 481 int offset = pmac_get_partition(pmac_nvram_XPRAM);
482 loff_t index; 482 loff_t index;
diff --git a/arch/ppc64/kernel/pmac_pci.c b/arch/ppc64/kernel/pmac_pci.c
index dc40a0cad0b4..f139fc034199 100644
--- a/arch/ppc64/kernel/pmac_pci.c
+++ b/arch/ppc64/kernel/pmac_pci.c
@@ -27,8 +27,8 @@
27#include <asm/machdep.h> 27#include <asm/machdep.h>
28#include <asm/pmac_feature.h> 28#include <asm/pmac_feature.h>
29#include <asm/iommu.h> 29#include <asm/iommu.h>
30#include <asm/ppc-pci.h>
30 31
31#include "pci.h"
32#include "pmac.h" 32#include "pmac.h"
33 33
34#define DEBUG 34#define DEBUG
@@ -121,7 +121,7 @@ static void __init fixup_bus_range(struct device_node *bridge)
121 |(((unsigned long)(off)) & 0xFCUL) \ 121 |(((unsigned long)(off)) & 0xFCUL) \
122 |1UL) 122 |1UL)
123 123
124static unsigned long __pmac macrisc_cfg_access(struct pci_controller* hose, 124static unsigned long macrisc_cfg_access(struct pci_controller* hose,
125 u8 bus, u8 dev_fn, u8 offset) 125 u8 bus, u8 dev_fn, u8 offset)
126{ 126{
127 unsigned int caddr; 127 unsigned int caddr;
@@ -142,7 +142,7 @@ static unsigned long __pmac macrisc_cfg_access(struct pci_controller* hose,
142 return ((unsigned long)hose->cfg_data) + offset; 142 return ((unsigned long)hose->cfg_data) + offset;
143} 143}
144 144
145static int __pmac macrisc_read_config(struct pci_bus *bus, unsigned int devfn, 145static int macrisc_read_config(struct pci_bus *bus, unsigned int devfn,
146 int offset, int len, u32 *val) 146 int offset, int len, u32 *val)
147{ 147{
148 struct pci_controller *hose; 148 struct pci_controller *hose;
@@ -173,7 +173,7 @@ static int __pmac macrisc_read_config(struct pci_bus *bus, unsigned int devfn,
173 return PCIBIOS_SUCCESSFUL; 173 return PCIBIOS_SUCCESSFUL;
174} 174}
175 175
176static int __pmac macrisc_write_config(struct pci_bus *bus, unsigned int devfn, 176static int macrisc_write_config(struct pci_bus *bus, unsigned int devfn,
177 int offset, int len, u32 val) 177 int offset, int len, u32 val)
178{ 178{
179 struct pci_controller *hose; 179 struct pci_controller *hose;
@@ -265,7 +265,7 @@ static int u3_ht_skip_device(struct pci_controller *hose,
265 + (((unsigned long)bus) << 16) \ 265 + (((unsigned long)bus) << 16) \
266 + 0x01000000UL) 266 + 0x01000000UL)
267 267
268static unsigned long __pmac u3_ht_cfg_access(struct pci_controller* hose, 268static unsigned long u3_ht_cfg_access(struct pci_controller* hose,
269 u8 bus, u8 devfn, u8 offset) 269 u8 bus, u8 devfn, u8 offset)
270{ 270{
271 if (bus == hose->first_busno) { 271 if (bus == hose->first_busno) {
@@ -277,7 +277,7 @@ static unsigned long __pmac u3_ht_cfg_access(struct pci_controller* hose,
277 return ((unsigned long)hose->cfg_data) + U3_HT_CFA1(bus, devfn, offset); 277 return ((unsigned long)hose->cfg_data) + U3_HT_CFA1(bus, devfn, offset);
278} 278}
279 279
280static int __pmac u3_ht_read_config(struct pci_bus *bus, unsigned int devfn, 280static int u3_ht_read_config(struct pci_bus *bus, unsigned int devfn,
281 int offset, int len, u32 *val) 281 int offset, int len, u32 *val)
282{ 282{
283 struct pci_controller *hose; 283 struct pci_controller *hose;
@@ -327,7 +327,7 @@ static int __pmac u3_ht_read_config(struct pci_bus *bus, unsigned int devfn,
327 return PCIBIOS_SUCCESSFUL; 327 return PCIBIOS_SUCCESSFUL;
328} 328}
329 329
330static int __pmac u3_ht_write_config(struct pci_bus *bus, unsigned int devfn, 330static int u3_ht_write_config(struct pci_bus *bus, unsigned int devfn,
331 int offset, int len, u32 val) 331 int offset, int len, u32 val)
332{ 332{
333 struct pci_controller *hose; 333 struct pci_controller *hose;
diff --git a/arch/ppc64/kernel/pmac_setup.c b/arch/ppc64/kernel/pmac_setup.c
index 25755252067a..be4c1693d149 100644
--- a/arch/ppc64/kernel/pmac_setup.c
+++ b/arch/ppc64/kernel/pmac_setup.c
@@ -72,9 +72,10 @@
72#include <asm/lmb.h> 72#include <asm/lmb.h>
73#include <asm/smu.h> 73#include <asm/smu.h>
74#include <asm/pmc.h> 74#include <asm/pmc.h>
75#include <asm/mpic.h>
76#include <asm/udbg.h>
75 77
76#include "pmac.h" 78#include "pmac.h"
77#include "mpic.h"
78 79
79#ifdef DEBUG 80#ifdef DEBUG
80#define DBG(fmt...) udbg_printf(fmt) 81#define DBG(fmt...) udbg_printf(fmt)
@@ -98,7 +99,7 @@ EXPORT_SYMBOL(smu_cmdbuf_abs);
98 99
99extern void udbg_init_scc(struct device_node *np); 100extern void udbg_init_scc(struct device_node *np);
100 101
101static void __pmac pmac_show_cpuinfo(struct seq_file *m) 102static void pmac_show_cpuinfo(struct seq_file *m)
102{ 103{
103 struct device_node *np; 104 struct device_node *np;
104 char *pp; 105 char *pp;
@@ -210,7 +211,7 @@ static int pmac_late_init(void)
210late_initcall(pmac_late_init); 211late_initcall(pmac_late_init);
211 212
212/* can't be __init - can be called whenever a disk is first accessed */ 213/* can't be __init - can be called whenever a disk is first accessed */
213void __pmac note_bootable_part(dev_t dev, int part, int goodness) 214void note_bootable_part(dev_t dev, int part, int goodness)
214{ 215{
215 extern dev_t boot_dev; 216 extern dev_t boot_dev;
216 char *p; 217 char *p;
@@ -231,7 +232,7 @@ void __pmac note_bootable_part(dev_t dev, int part, int goodness)
231 } 232 }
232} 233}
233 234
234static void __pmac pmac_restart(char *cmd) 235static void pmac_restart(char *cmd)
235{ 236{
236 switch(sys_ctrler) { 237 switch(sys_ctrler) {
237#ifdef CONFIG_ADB_PMU 238#ifdef CONFIG_ADB_PMU
@@ -250,7 +251,7 @@ static void __pmac pmac_restart(char *cmd)
250 } 251 }
251} 252}
252 253
253static void __pmac pmac_power_off(void) 254static void pmac_power_off(void)
254{ 255{
255 switch(sys_ctrler) { 256 switch(sys_ctrler) {
256#ifdef CONFIG_ADB_PMU 257#ifdef CONFIG_ADB_PMU
@@ -268,7 +269,7 @@ static void __pmac pmac_power_off(void)
268 } 269 }
269} 270}
270 271
271static void __pmac pmac_halt(void) 272static void pmac_halt(void)
272{ 273{
273 pmac_power_off(); 274 pmac_power_off();
274} 275}
diff --git a/arch/ppc64/kernel/pmac_smp.c b/arch/ppc64/kernel/pmac_smp.c
index a23de37227bf..3a1683f5b07f 100644
--- a/arch/ppc64/kernel/pmac_smp.c
+++ b/arch/ppc64/kernel/pmac_smp.c
@@ -51,8 +51,7 @@
51#include <asm/cacheflush.h> 51#include <asm/cacheflush.h>
52#include <asm/keylargo.h> 52#include <asm/keylargo.h>
53#include <asm/pmac_low_i2c.h> 53#include <asm/pmac_low_i2c.h>
54 54#include <asm/mpic.h>
55#include "mpic.h"
56 55
57#ifdef DEBUG 56#ifdef DEBUG
58#define DBG(fmt...) udbg_printf(fmt) 57#define DBG(fmt...) udbg_printf(fmt)
@@ -310,7 +309,7 @@ static void __init smp_core99_setup_cpu(int cpu_nr)
310 } 309 }
311} 310}
312 311
313struct smp_ops_t core99_smp_ops __pmacdata = { 312struct smp_ops_t core99_smp_ops = {
314 .message_pass = smp_mpic_message_pass, 313 .message_pass = smp_mpic_message_pass,
315 .probe = smp_core99_probe, 314 .probe = smp_core99_probe,
316 .kick_cpu = smp_core99_kick_cpu, 315 .kick_cpu = smp_core99_kick_cpu,
diff --git a/arch/ppc64/kernel/pmac_time.c b/arch/ppc64/kernel/pmac_time.c
index 41bbb8c59697..9d8c97decd32 100644
--- a/arch/ppc64/kernel/pmac_time.c
+++ b/arch/ppc64/kernel/pmac_time.c
@@ -51,7 +51,7 @@
51extern struct timezone sys_tz; 51extern struct timezone sys_tz;
52extern void to_tm(int tim, struct rtc_time * tm); 52extern void to_tm(int tim, struct rtc_time * tm);
53 53
54void __pmac pmac_get_rtc_time(struct rtc_time *tm) 54void pmac_get_rtc_time(struct rtc_time *tm)
55{ 55{
56 switch(sys_ctrler) { 56 switch(sys_ctrler) {
57#ifdef CONFIG_ADB_PMU 57#ifdef CONFIG_ADB_PMU
@@ -92,7 +92,7 @@ void __pmac pmac_get_rtc_time(struct rtc_time *tm)
92 } 92 }
93} 93}
94 94
95int __pmac pmac_set_rtc_time(struct rtc_time *tm) 95int pmac_set_rtc_time(struct rtc_time *tm)
96{ 96{
97 switch(sys_ctrler) { 97 switch(sys_ctrler) {
98#ifdef CONFIG_ADB_PMU 98#ifdef CONFIG_ADB_PMU
diff --git a/arch/ppc64/kernel/pmc.c b/arch/ppc64/kernel/pmc.c
index 63d9481c3ec2..944d7df7935f 100644
--- a/arch/ppc64/kernel/pmc.c
+++ b/arch/ppc64/kernel/pmc.c
@@ -70,7 +70,7 @@ void power4_enable_pmcs(void)
70{ 70{
71 unsigned long hid0; 71 unsigned long hid0;
72 72
73 hid0 = mfspr(HID0); 73 hid0 = mfspr(SPRN_HID0);
74 hid0 |= 1UL << (63 - 20); 74 hid0 |= 1UL << (63 - 20);
75 75
76 /* POWER4 requires the following sequence */ 76 /* POWER4 requires the following sequence */
@@ -83,6 +83,6 @@ void power4_enable_pmcs(void)
83 "mfspr %0, %1\n" 83 "mfspr %0, %1\n"
84 "mfspr %0, %1\n" 84 "mfspr %0, %1\n"
85 "mfspr %0, %1\n" 85 "mfspr %0, %1\n"
86 "isync" : "=&r" (hid0) : "i" (HID0), "0" (hid0): 86 "isync" : "=&r" (hid0) : "i" (SPRN_HID0), "0" (hid0):
87 "memory"); 87 "memory");
88} 88}
diff --git a/arch/ppc64/kernel/ppc_ksyms.c b/arch/ppc64/kernel/ppc_ksyms.c
index 705742f4eec6..84006e26342c 100644
--- a/arch/ppc64/kernel/ppc_ksyms.c
+++ b/arch/ppc64/kernel/ppc_ksyms.c
@@ -19,7 +19,6 @@
19#include <asm/hw_irq.h> 19#include <asm/hw_irq.h>
20#include <asm/abs_addr.h> 20#include <asm/abs_addr.h>
21#include <asm/cacheflush.h> 21#include <asm/cacheflush.h>
22#include <asm/iSeries/HvCallSc.h>
23 22
24EXPORT_SYMBOL(strcpy); 23EXPORT_SYMBOL(strcpy);
25EXPORT_SYMBOL(strncpy); 24EXPORT_SYMBOL(strncpy);
@@ -46,17 +45,6 @@ EXPORT_SYMBOL(__strnlen_user);
46 45
47EXPORT_SYMBOL(reloc_offset); 46EXPORT_SYMBOL(reloc_offset);
48 47
49#ifdef CONFIG_PPC_ISERIES
50EXPORT_SYMBOL(HvCall0);
51EXPORT_SYMBOL(HvCall1);
52EXPORT_SYMBOL(HvCall2);
53EXPORT_SYMBOL(HvCall3);
54EXPORT_SYMBOL(HvCall4);
55EXPORT_SYMBOL(HvCall5);
56EXPORT_SYMBOL(HvCall6);
57EXPORT_SYMBOL(HvCall7);
58#endif
59
60EXPORT_SYMBOL(_insb); 48EXPORT_SYMBOL(_insb);
61EXPORT_SYMBOL(_outsb); 49EXPORT_SYMBOL(_outsb);
62EXPORT_SYMBOL(_insw); 50EXPORT_SYMBOL(_insw);
@@ -77,14 +65,6 @@ EXPORT_SYMBOL(giveup_altivec);
77EXPORT_SYMBOL(__flush_icache_range); 65EXPORT_SYMBOL(__flush_icache_range);
78EXPORT_SYMBOL(flush_dcache_range); 66EXPORT_SYMBOL(flush_dcache_range);
79 67
80#ifdef CONFIG_SMP
81#ifdef CONFIG_PPC_ISERIES
82EXPORT_SYMBOL(local_get_flags);
83EXPORT_SYMBOL(local_irq_disable);
84EXPORT_SYMBOL(local_irq_restore);
85#endif
86#endif
87
88EXPORT_SYMBOL(memcpy); 68EXPORT_SYMBOL(memcpy);
89EXPORT_SYMBOL(memset); 69EXPORT_SYMBOL(memset);
90EXPORT_SYMBOL(memmove); 70EXPORT_SYMBOL(memmove);
diff --git a/arch/ppc64/kernel/process.c b/arch/ppc64/kernel/process.c
deleted file mode 100644
index 887005358eb1..000000000000
--- a/arch/ppc64/kernel/process.c
+++ /dev/null
@@ -1,713 +0,0 @@
1/*
2 * linux/arch/ppc64/kernel/process.c
3 *
4 * Derived from "arch/i386/kernel/process.c"
5 * Copyright (C) 1995 Linus Torvalds
6 *
7 * Updated and modified by Cort Dougan (cort@cs.nmt.edu) and
8 * Paul Mackerras (paulus@cs.anu.edu.au)
9 *
10 * PowerPC version
11 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
12 *
13 * This program is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU General Public License
15 * as published by the Free Software Foundation; either version
16 * 2 of the License, or (at your option) any later version.
17 */
18
19#include <linux/config.h>
20#include <linux/module.h>
21#include <linux/errno.h>
22#include <linux/sched.h>
23#include <linux/kernel.h>
24#include <linux/mm.h>
25#include <linux/smp.h>
26#include <linux/smp_lock.h>
27#include <linux/stddef.h>
28#include <linux/unistd.h>
29#include <linux/slab.h>
30#include <linux/user.h>
31#include <linux/elf.h>
32#include <linux/init.h>
33#include <linux/init_task.h>
34#include <linux/prctl.h>
35#include <linux/ptrace.h>
36#include <linux/kallsyms.h>
37#include <linux/interrupt.h>
38#include <linux/utsname.h>
39#include <linux/kprobes.h>
40
41#include <asm/pgtable.h>
42#include <asm/uaccess.h>
43#include <asm/system.h>
44#include <asm/io.h>
45#include <asm/processor.h>
46#include <asm/mmu.h>
47#include <asm/mmu_context.h>
48#include <asm/prom.h>
49#include <asm/ppcdebug.h>
50#include <asm/machdep.h>
51#include <asm/iSeries/HvCallHpt.h>
52#include <asm/cputable.h>
53#include <asm/firmware.h>
54#include <asm/sections.h>
55#include <asm/tlbflush.h>
56#include <asm/time.h>
57#include <asm/plpar_wrappers.h>
58
59#ifndef CONFIG_SMP
60struct task_struct *last_task_used_math = NULL;
61struct task_struct *last_task_used_altivec = NULL;
62#endif
63
64/*
65 * Make sure the floating-point register state in the
66 * the thread_struct is up to date for task tsk.
67 */
68void flush_fp_to_thread(struct task_struct *tsk)
69{
70 if (tsk->thread.regs) {
71 /*
72 * We need to disable preemption here because if we didn't,
73 * another process could get scheduled after the regs->msr
74 * test but before we have finished saving the FP registers
75 * to the thread_struct. That process could take over the
76 * FPU, and then when we get scheduled again we would store
77 * bogus values for the remaining FP registers.
78 */
79 preempt_disable();
80 if (tsk->thread.regs->msr & MSR_FP) {
81#ifdef CONFIG_SMP
82 /*
83 * This should only ever be called for current or
84 * for a stopped child process. Since we save away
85 * the FP register state on context switch on SMP,
86 * there is something wrong if a stopped child appears
87 * to still have its FP state in the CPU registers.
88 */
89 BUG_ON(tsk != current);
90#endif
91 giveup_fpu(current);
92 }
93 preempt_enable();
94 }
95}
96
97void enable_kernel_fp(void)
98{
99 WARN_ON(preemptible());
100
101#ifdef CONFIG_SMP
102 if (current->thread.regs && (current->thread.regs->msr & MSR_FP))
103 giveup_fpu(current);
104 else
105 giveup_fpu(NULL); /* just enables FP for kernel */
106#else
107 giveup_fpu(last_task_used_math);
108#endif /* CONFIG_SMP */
109}
110EXPORT_SYMBOL(enable_kernel_fp);
111
112int dump_task_fpu(struct task_struct *tsk, elf_fpregset_t *fpregs)
113{
114 if (!tsk->thread.regs)
115 return 0;
116 flush_fp_to_thread(current);
117
118 memcpy(fpregs, &tsk->thread.fpr[0], sizeof(*fpregs));
119
120 return 1;
121}
122
123#ifdef CONFIG_ALTIVEC
124
125void enable_kernel_altivec(void)
126{
127 WARN_ON(preemptible());
128
129#ifdef CONFIG_SMP
130 if (current->thread.regs && (current->thread.regs->msr & MSR_VEC))
131 giveup_altivec(current);
132 else
133 giveup_altivec(NULL); /* just enables FP for kernel */
134#else
135 giveup_altivec(last_task_used_altivec);
136#endif /* CONFIG_SMP */
137}
138EXPORT_SYMBOL(enable_kernel_altivec);
139
140/*
141 * Make sure the VMX/Altivec register state in the
142 * the thread_struct is up to date for task tsk.
143 */
144void flush_altivec_to_thread(struct task_struct *tsk)
145{
146 if (tsk->thread.regs) {
147 preempt_disable();
148 if (tsk->thread.regs->msr & MSR_VEC) {
149#ifdef CONFIG_SMP
150 BUG_ON(tsk != current);
151#endif
152 giveup_altivec(current);
153 }
154 preempt_enable();
155 }
156}
157
158int dump_task_altivec(struct pt_regs *regs, elf_vrregset_t *vrregs)
159{
160 flush_altivec_to_thread(current);
161 memcpy(vrregs, &current->thread.vr[0], sizeof(*vrregs));
162 return 1;
163}
164
165#endif /* CONFIG_ALTIVEC */
166
167static void set_dabr_spr(unsigned long val)
168{
169 mtspr(SPRN_DABR, val);
170}
171
172int set_dabr(unsigned long dabr)
173{
174 int ret = 0;
175
176 if (firmware_has_feature(FW_FEATURE_XDABR)) {
177 /* We want to catch accesses from kernel and userspace */
178 unsigned long flags = H_DABRX_KERNEL|H_DABRX_USER;
179 ret = plpar_set_xdabr(dabr, flags);
180 } else if (firmware_has_feature(FW_FEATURE_DABR)) {
181 ret = plpar_set_dabr(dabr);
182 } else {
183 set_dabr_spr(dabr);
184 }
185
186 return ret;
187}
188
189DEFINE_PER_CPU(struct cpu_usage, cpu_usage_array);
190static DEFINE_PER_CPU(unsigned long, current_dabr);
191
192struct task_struct *__switch_to(struct task_struct *prev,
193 struct task_struct *new)
194{
195 struct thread_struct *new_thread, *old_thread;
196 unsigned long flags;
197 struct task_struct *last;
198
199#ifdef CONFIG_SMP
200 /* avoid complexity of lazy save/restore of fpu
201 * by just saving it every time we switch out if
202 * this task used the fpu during the last quantum.
203 *
204 * If it tries to use the fpu again, it'll trap and
205 * reload its fp regs. So we don't have to do a restore
206 * every switch, just a save.
207 * -- Cort
208 */
209 if (prev->thread.regs && (prev->thread.regs->msr & MSR_FP))
210 giveup_fpu(prev);
211#ifdef CONFIG_ALTIVEC
212 if (prev->thread.regs && (prev->thread.regs->msr & MSR_VEC))
213 giveup_altivec(prev);
214#endif /* CONFIG_ALTIVEC */
215#endif /* CONFIG_SMP */
216
217#if defined(CONFIG_ALTIVEC) && !defined(CONFIG_SMP)
218 /* Avoid the trap. On smp this this never happens since
219 * we don't set last_task_used_altivec -- Cort
220 */
221 if (new->thread.regs && last_task_used_altivec == new)
222 new->thread.regs->msr |= MSR_VEC;
223#endif /* CONFIG_ALTIVEC */
224
225 if (unlikely(__get_cpu_var(current_dabr) != new->thread.dabr)) {
226 set_dabr(new->thread.dabr);
227 __get_cpu_var(current_dabr) = new->thread.dabr;
228 }
229
230 flush_tlb_pending();
231
232 new_thread = &new->thread;
233 old_thread = &current->thread;
234
235 /* Collect purr utilization data per process and per processor
236 * wise purr is nothing but processor time base
237 */
238 if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
239 struct cpu_usage *cu = &__get_cpu_var(cpu_usage_array);
240 long unsigned start_tb, current_tb;
241 start_tb = old_thread->start_tb;
242 cu->current_tb = current_tb = mfspr(SPRN_PURR);
243 old_thread->accum_tb += (current_tb - start_tb);
244 new_thread->start_tb = current_tb;
245 }
246
247 local_irq_save(flags);
248 last = _switch(old_thread, new_thread);
249
250 local_irq_restore(flags);
251
252 return last;
253}
254
255static int instructions_to_print = 16;
256
257static void show_instructions(struct pt_regs *regs)
258{
259 int i;
260 unsigned long pc = regs->nip - (instructions_to_print * 3 / 4 *
261 sizeof(int));
262
263 printk("Instruction dump:");
264
265 for (i = 0; i < instructions_to_print; i++) {
266 int instr;
267
268 if (!(i % 8))
269 printk("\n");
270
271 if (((REGION_ID(pc) != KERNEL_REGION_ID) &&
272 (REGION_ID(pc) != VMALLOC_REGION_ID)) ||
273 __get_user(instr, (unsigned int *)pc)) {
274 printk("XXXXXXXX ");
275 } else {
276 if (regs->nip == pc)
277 printk("<%08x> ", instr);
278 else
279 printk("%08x ", instr);
280 }
281
282 pc += sizeof(int);
283 }
284
285 printk("\n");
286}
287
288void show_regs(struct pt_regs * regs)
289{
290 int i;
291 unsigned long trap;
292
293 printk("NIP: %016lX XER: %08X LR: %016lX CTR: %016lX\n",
294 regs->nip, (unsigned int)regs->xer, regs->link, regs->ctr);
295 printk("REGS: %p TRAP: %04lx %s (%s)\n",
296 regs, regs->trap, print_tainted(), system_utsname.release);
297 printk("MSR: %016lx EE: %01x PR: %01x FP: %01x ME: %01x "
298 "IR/DR: %01x%01x CR: %08X\n",
299 regs->msr, regs->msr&MSR_EE ? 1 : 0, regs->msr&MSR_PR ? 1 : 0,
300 regs->msr & MSR_FP ? 1 : 0,regs->msr&MSR_ME ? 1 : 0,
301 regs->msr&MSR_IR ? 1 : 0,
302 regs->msr&MSR_DR ? 1 : 0,
303 (unsigned int)regs->ccr);
304 trap = TRAP(regs);
305 printk("DAR: %016lx DSISR: %016lx\n", regs->dar, regs->dsisr);
306 printk("TASK: %p[%d] '%s' THREAD: %p",
307 current, current->pid, current->comm, current->thread_info);
308
309#ifdef CONFIG_SMP
310 printk(" CPU: %d", smp_processor_id());
311#endif /* CONFIG_SMP */
312
313 for (i = 0; i < 32; i++) {
314 if ((i % 4) == 0) {
315 printk("\n" KERN_INFO "GPR%02d: ", i);
316 }
317
318 printk("%016lX ", regs->gpr[i]);
319 if (i == 13 && !FULL_REGS(regs))
320 break;
321 }
322 printk("\n");
323 /*
324 * Lookup NIP late so we have the best change of getting the
325 * above info out without failing
326 */
327 printk("NIP [%016lx] ", regs->nip);
328 print_symbol("%s\n", regs->nip);
329 printk("LR [%016lx] ", regs->link);
330 print_symbol("%s\n", regs->link);
331 show_stack(current, (unsigned long *)regs->gpr[1]);
332 if (!user_mode(regs))
333 show_instructions(regs);
334}
335
336void exit_thread(void)
337{
338 kprobe_flush_task(current);
339
340#ifndef CONFIG_SMP
341 if (last_task_used_math == current)
342 last_task_used_math = NULL;
343#ifdef CONFIG_ALTIVEC
344 if (last_task_used_altivec == current)
345 last_task_used_altivec = NULL;
346#endif /* CONFIG_ALTIVEC */
347#endif /* CONFIG_SMP */
348}
349
350void flush_thread(void)
351{
352 struct thread_info *t = current_thread_info();
353
354 kprobe_flush_task(current);
355 if (t->flags & _TIF_ABI_PENDING)
356 t->flags ^= (_TIF_ABI_PENDING | _TIF_32BIT);
357
358#ifndef CONFIG_SMP
359 if (last_task_used_math == current)
360 last_task_used_math = NULL;
361#ifdef CONFIG_ALTIVEC
362 if (last_task_used_altivec == current)
363 last_task_used_altivec = NULL;
364#endif /* CONFIG_ALTIVEC */
365#endif /* CONFIG_SMP */
366
367 if (current->thread.dabr) {
368 current->thread.dabr = 0;
369 set_dabr(0);
370 }
371}
372
373void
374release_thread(struct task_struct *t)
375{
376}
377
378
379/*
380 * This gets called before we allocate a new thread and copy
381 * the current task into it.
382 */
383void prepare_to_copy(struct task_struct *tsk)
384{
385 flush_fp_to_thread(current);
386 flush_altivec_to_thread(current);
387}
388
389/*
390 * Copy a thread..
391 */
392int
393copy_thread(int nr, unsigned long clone_flags, unsigned long usp,
394 unsigned long unused, struct task_struct *p, struct pt_regs *regs)
395{
396 struct pt_regs *childregs, *kregs;
397 extern void ret_from_fork(void);
398 unsigned long sp = (unsigned long)p->thread_info + THREAD_SIZE;
399
400 /* Copy registers */
401 sp -= sizeof(struct pt_regs);
402 childregs = (struct pt_regs *) sp;
403 *childregs = *regs;
404 if ((childregs->msr & MSR_PR) == 0) {
405 /* for kernel thread, set stackptr in new task */
406 childregs->gpr[1] = sp + sizeof(struct pt_regs);
407 p->thread.regs = NULL; /* no user register state */
408 clear_ti_thread_flag(p->thread_info, TIF_32BIT);
409 } else {
410 childregs->gpr[1] = usp;
411 p->thread.regs = childregs;
412 if (clone_flags & CLONE_SETTLS) {
413 if (test_thread_flag(TIF_32BIT))
414 childregs->gpr[2] = childregs->gpr[6];
415 else
416 childregs->gpr[13] = childregs->gpr[6];
417 }
418 }
419 childregs->gpr[3] = 0; /* Result from fork() */
420 sp -= STACK_FRAME_OVERHEAD;
421
422 /*
423 * The way this works is that at some point in the future
424 * some task will call _switch to switch to the new task.
425 * That will pop off the stack frame created below and start
426 * the new task running at ret_from_fork. The new task will
427 * do some house keeping and then return from the fork or clone
428 * system call, using the stack frame created above.
429 */
430 sp -= sizeof(struct pt_regs);
431 kregs = (struct pt_regs *) sp;
432 sp -= STACK_FRAME_OVERHEAD;
433 p->thread.ksp = sp;
434 if (cpu_has_feature(CPU_FTR_SLB)) {
435 unsigned long sp_vsid = get_kernel_vsid(sp);
436
437 sp_vsid <<= SLB_VSID_SHIFT;
438 sp_vsid |= SLB_VSID_KERNEL;
439 if (cpu_has_feature(CPU_FTR_16M_PAGE))
440 sp_vsid |= SLB_VSID_L;
441
442 p->thread.ksp_vsid = sp_vsid;
443 }
444
445 /*
446 * The PPC64 ABI makes use of a TOC to contain function
447 * pointers. The function (ret_from_except) is actually a pointer
448 * to the TOC entry. The first entry is a pointer to the actual
449 * function.
450 */
451 kregs->nip = *((unsigned long *)ret_from_fork);
452
453 return 0;
454}
455
456/*
457 * Set up a thread for executing a new program
458 */
459void start_thread(struct pt_regs *regs, unsigned long fdptr, unsigned long sp)
460{
461 unsigned long entry, toc, load_addr = regs->gpr[2];
462
463 /* fdptr is a relocated pointer to the function descriptor for
464 * the elf _start routine. The first entry in the function
465 * descriptor is the entry address of _start and the second
466 * entry is the TOC value we need to use.
467 */
468 set_fs(USER_DS);
469 __get_user(entry, (unsigned long __user *)fdptr);
470 __get_user(toc, (unsigned long __user *)fdptr+1);
471
472 /* Check whether the e_entry function descriptor entries
473 * need to be relocated before we can use them.
474 */
475 if (load_addr != 0) {
476 entry += load_addr;
477 toc += load_addr;
478 }
479
480 /*
481 * If we exec out of a kernel thread then thread.regs will not be
482 * set. Do it now.
483 */
484 if (!current->thread.regs) {
485 unsigned long childregs = (unsigned long)current->thread_info +
486 THREAD_SIZE;
487 childregs -= sizeof(struct pt_regs);
488 current->thread.regs = (struct pt_regs *)childregs;
489 }
490
491 regs->nip = entry;
492 regs->gpr[1] = sp;
493 regs->gpr[2] = toc;
494 regs->msr = MSR_USER64;
495#ifndef CONFIG_SMP
496 if (last_task_used_math == current)
497 last_task_used_math = 0;
498#endif /* CONFIG_SMP */
499 memset(current->thread.fpr, 0, sizeof(current->thread.fpr));
500 current->thread.fpscr = 0;
501#ifdef CONFIG_ALTIVEC
502#ifndef CONFIG_SMP
503 if (last_task_used_altivec == current)
504 last_task_used_altivec = 0;
505#endif /* CONFIG_SMP */
506 memset(current->thread.vr, 0, sizeof(current->thread.vr));
507 current->thread.vscr.u[0] = 0;
508 current->thread.vscr.u[1] = 0;
509 current->thread.vscr.u[2] = 0;
510 current->thread.vscr.u[3] = 0x00010000; /* Java mode disabled */
511 current->thread.vrsave = 0;
512 current->thread.used_vr = 0;
513#endif /* CONFIG_ALTIVEC */
514}
515EXPORT_SYMBOL(start_thread);
516
517int set_fpexc_mode(struct task_struct *tsk, unsigned int val)
518{
519 struct pt_regs *regs = tsk->thread.regs;
520
521 if (val > PR_FP_EXC_PRECISE)
522 return -EINVAL;
523 tsk->thread.fpexc_mode = __pack_fe01(val);
524 if (regs != NULL && (regs->msr & MSR_FP) != 0)
525 regs->msr = (regs->msr & ~(MSR_FE0|MSR_FE1))
526 | tsk->thread.fpexc_mode;
527 return 0;
528}
529
530int get_fpexc_mode(struct task_struct *tsk, unsigned long adr)
531{
532 unsigned int val;
533
534 val = __unpack_fe01(tsk->thread.fpexc_mode);
535 return put_user(val, (unsigned int __user *) adr);
536}
537
538int sys_clone(unsigned long clone_flags, unsigned long p2, unsigned long p3,
539 unsigned long p4, unsigned long p5, unsigned long p6,
540 struct pt_regs *regs)
541{
542 unsigned long parent_tidptr = 0;
543 unsigned long child_tidptr = 0;
544
545 if (p2 == 0)
546 p2 = regs->gpr[1]; /* stack pointer for child */
547
548 if (clone_flags & (CLONE_PARENT_SETTID | CLONE_CHILD_SETTID |
549 CLONE_CHILD_CLEARTID)) {
550 parent_tidptr = p3;
551 child_tidptr = p5;
552 if (test_thread_flag(TIF_32BIT)) {
553 parent_tidptr &= 0xffffffff;
554 child_tidptr &= 0xffffffff;
555 }
556 }
557
558 return do_fork(clone_flags, p2, regs, 0,
559 (int __user *)parent_tidptr, (int __user *)child_tidptr);
560}
561
562int sys_fork(unsigned long p1, unsigned long p2, unsigned long p3,
563 unsigned long p4, unsigned long p5, unsigned long p6,
564 struct pt_regs *regs)
565{
566 return do_fork(SIGCHLD, regs->gpr[1], regs, 0, NULL, NULL);
567}
568
569int sys_vfork(unsigned long p1, unsigned long p2, unsigned long p3,
570 unsigned long p4, unsigned long p5, unsigned long p6,
571 struct pt_regs *regs)
572{
573 return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->gpr[1], regs, 0,
574 NULL, NULL);
575}
576
577int sys_execve(unsigned long a0, unsigned long a1, unsigned long a2,
578 unsigned long a3, unsigned long a4, unsigned long a5,
579 struct pt_regs *regs)
580{
581 int error;
582 char * filename;
583
584 filename = getname((char __user *) a0);
585 error = PTR_ERR(filename);
586 if (IS_ERR(filename))
587 goto out;
588 flush_fp_to_thread(current);
589 flush_altivec_to_thread(current);
590 error = do_execve(filename, (char __user * __user *) a1,
591 (char __user * __user *) a2, regs);
592
593 if (error == 0) {
594 task_lock(current);
595 current->ptrace &= ~PT_DTRACE;
596 task_unlock(current);
597 }
598 putname(filename);
599
600out:
601 return error;
602}
603
604static int kstack_depth_to_print = 64;
605
606static int validate_sp(unsigned long sp, struct task_struct *p,
607 unsigned long nbytes)
608{
609 unsigned long stack_page = (unsigned long)p->thread_info;
610
611 if (sp >= stack_page + sizeof(struct thread_struct)
612 && sp <= stack_page + THREAD_SIZE - nbytes)
613 return 1;
614
615#ifdef CONFIG_IRQSTACKS
616 stack_page = (unsigned long) hardirq_ctx[task_cpu(p)];
617 if (sp >= stack_page + sizeof(struct thread_struct)
618 && sp <= stack_page + THREAD_SIZE - nbytes)
619 return 1;
620
621 stack_page = (unsigned long) softirq_ctx[task_cpu(p)];
622 if (sp >= stack_page + sizeof(struct thread_struct)
623 && sp <= stack_page + THREAD_SIZE - nbytes)
624 return 1;
625#endif
626
627 return 0;
628}
629
630unsigned long get_wchan(struct task_struct *p)
631{
632 unsigned long ip, sp;
633 int count = 0;
634
635 if (!p || p == current || p->state == TASK_RUNNING)
636 return 0;
637
638 sp = p->thread.ksp;
639 if (!validate_sp(sp, p, 112))
640 return 0;
641
642 do {
643 sp = *(unsigned long *)sp;
644 if (!validate_sp(sp, p, 112))
645 return 0;
646 if (count > 0) {
647 ip = *(unsigned long *)(sp + 16);
648 if (!in_sched_functions(ip))
649 return ip;
650 }
651 } while (count++ < 16);
652 return 0;
653}
654EXPORT_SYMBOL(get_wchan);
655
656void show_stack(struct task_struct *p, unsigned long *_sp)
657{
658 unsigned long ip, newsp, lr;
659 int count = 0;
660 unsigned long sp = (unsigned long)_sp;
661 int firstframe = 1;
662
663 if (sp == 0) {
664 if (p) {
665 sp = p->thread.ksp;
666 } else {
667 sp = __get_SP();
668 p = current;
669 }
670 }
671
672 lr = 0;
673 printk("Call Trace:\n");
674 do {
675 if (!validate_sp(sp, p, 112))
676 return;
677
678 _sp = (unsigned long *) sp;
679 newsp = _sp[0];
680 ip = _sp[2];
681 if (!firstframe || ip != lr) {
682 printk("[%016lx] [%016lx] ", sp, ip);
683 print_symbol("%s", ip);
684 if (firstframe)
685 printk(" (unreliable)");
686 printk("\n");
687 }
688 firstframe = 0;
689
690 /*
691 * See if this is an exception frame.
692 * We look for the "regshere" marker in the current frame.
693 */
694 if (validate_sp(sp, p, sizeof(struct pt_regs) + 400)
695 && _sp[12] == 0x7265677368657265ul) {
696 struct pt_regs *regs = (struct pt_regs *)
697 (sp + STACK_FRAME_OVERHEAD);
698 printk("--- Exception: %lx", regs->trap);
699 print_symbol(" at %s\n", regs->nip);
700 lr = regs->link;
701 print_symbol(" LR = %s\n", lr);
702 firstframe = 1;
703 }
704
705 sp = newsp;
706 } while (count++ < kstack_depth_to_print);
707}
708
709void dump_stack(void)
710{
711 show_stack(current, (unsigned long *)__get_SP());
712}
713EXPORT_SYMBOL(dump_stack);
diff --git a/arch/ppc64/kernel/prom.c b/arch/ppc64/kernel/prom.c
index 7035deb6de92..a0866f12647f 100644
--- a/arch/ppc64/kernel/prom.c
+++ b/arch/ppc64/kernel/prom.c
@@ -78,6 +78,7 @@ typedef int interpret_func(struct device_node *, unsigned long *,
78extern struct rtas_t rtas; 78extern struct rtas_t rtas;
79extern struct lmb lmb; 79extern struct lmb lmb;
80extern unsigned long klimit; 80extern unsigned long klimit;
81extern unsigned long memory_limit;
81 82
82static int __initdata dt_root_addr_cells; 83static int __initdata dt_root_addr_cells;
83static int __initdata dt_root_size_cells; 84static int __initdata dt_root_size_cells;
@@ -1063,7 +1064,7 @@ static int __init early_init_dt_scan_chosen(unsigned long node,
1063{ 1064{
1064 u32 *prop; 1065 u32 *prop;
1065 u64 *prop64; 1066 u64 *prop64;
1066 extern unsigned long memory_limit, tce_alloc_start, tce_alloc_end; 1067 extern unsigned long tce_alloc_start, tce_alloc_end;
1067 1068
1068 DBG("search \"chosen\", depth: %d, uname: %s\n", depth, uname); 1069 DBG("search \"chosen\", depth: %d, uname: %s\n", depth, uname);
1069 1070
@@ -1237,7 +1238,7 @@ void __init early_init_devtree(void *params)
1237 lmb_init(); 1238 lmb_init();
1238 scan_flat_dt(early_init_dt_scan_root, NULL); 1239 scan_flat_dt(early_init_dt_scan_root, NULL);
1239 scan_flat_dt(early_init_dt_scan_memory, NULL); 1240 scan_flat_dt(early_init_dt_scan_memory, NULL);
1240 lmb_enforce_memory_limit(); 1241 lmb_enforce_memory_limit(memory_limit);
1241 lmb_analyze(); 1242 lmb_analyze();
1242 systemcfg->physicalMemorySize = lmb_phys_mem_size(); 1243 systemcfg->physicalMemorySize = lmb_phys_mem_size();
1243 lmb_reserve(0, __pa(klimit)); 1244 lmb_reserve(0, __pa(klimit));
diff --git a/arch/ppc64/kernel/rtas.c b/arch/ppc64/kernel/rtas.c
index 5e8eb33b8e54..36adab591bd3 100644
--- a/arch/ppc64/kernel/rtas.c
+++ b/arch/ppc64/kernel/rtas.c
@@ -30,6 +30,7 @@
30#include <asm/delay.h> 30#include <asm/delay.h>
31#include <asm/uaccess.h> 31#include <asm/uaccess.h>
32#include <asm/systemcfg.h> 32#include <asm/systemcfg.h>
33#include <asm/ppcdebug.h>
33 34
34struct flash_block_list_header rtas_firmware_flash_list = {0, NULL}; 35struct flash_block_list_header rtas_firmware_flash_list = {0, NULL};
35 36
diff --git a/arch/ppc64/kernel/rtas_pci.c b/arch/ppc64/kernel/rtas_pci.c
index 4a9719b48abe..20361bcd8cfb 100644
--- a/arch/ppc64/kernel/rtas_pci.c
+++ b/arch/ppc64/kernel/rtas_pci.c
@@ -38,9 +38,8 @@
38#include <asm/pci-bridge.h> 38#include <asm/pci-bridge.h>
39#include <asm/iommu.h> 39#include <asm/iommu.h>
40#include <asm/rtas.h> 40#include <asm/rtas.h>
41 41#include <asm/mpic.h>
42#include "mpic.h" 42#include <asm/ppc-pci.h>
43#include "pci.h"
44 43
45/* RTAS tokens */ 44/* RTAS tokens */
46static int read_pci_config; 45static int read_pci_config;
diff --git a/arch/ppc64/kernel/rtc.c b/arch/ppc64/kernel/rtc.c
index 6ff52bc61325..88ae13f81c46 100644
--- a/arch/ppc64/kernel/rtc.c
+++ b/arch/ppc64/kernel/rtc.c
@@ -43,11 +43,8 @@
43#include <asm/time.h> 43#include <asm/time.h>
44#include <asm/rtas.h> 44#include <asm/rtas.h>
45 45
46#include <asm/iSeries/mf.h>
47#include <asm/machdep.h> 46#include <asm/machdep.h>
48 47
49extern int piranha_simulator;
50
51/* 48/*
52 * We sponge a minor off of the misc major. No need slurping 49 * We sponge a minor off of the misc major. No need slurping
53 * up another valuable major dev number for this. If you add 50 * up another valuable major dev number for this. If you add
@@ -265,40 +262,6 @@ static int rtc_read_proc(char *page, char **start, off_t off,
265 return len; 262 return len;
266} 263}
267 264
268#ifdef CONFIG_PPC_ISERIES
269/*
270 * Get the RTC from the virtual service processor
271 * This requires flowing LpEvents to the primary partition
272 */
273void iSeries_get_rtc_time(struct rtc_time *rtc_tm)
274{
275 if (piranha_simulator)
276 return;
277
278 mf_get_rtc(rtc_tm);
279 rtc_tm->tm_mon--;
280}
281
282/*
283 * Set the RTC in the virtual service processor
284 * This requires flowing LpEvents to the primary partition
285 */
286int iSeries_set_rtc_time(struct rtc_time *tm)
287{
288 mf_set_rtc(tm);
289 return 0;
290}
291
292void iSeries_get_boot_time(struct rtc_time *tm)
293{
294 if ( piranha_simulator )
295 return;
296
297 mf_get_boot_rtc(tm);
298 tm->tm_mon -= 1;
299}
300#endif
301
302#ifdef CONFIG_PPC_RTAS 265#ifdef CONFIG_PPC_RTAS
303#define MAX_RTC_WAIT 5000 /* 5 sec */ 266#define MAX_RTC_WAIT 5000 /* 5 sec */
304#define RTAS_CLOCK_BUSY (-2) 267#define RTAS_CLOCK_BUSY (-2)
diff --git a/arch/ppc64/kernel/setup.c b/arch/ppc64/kernel/setup.c
index 5ac48bd64891..776b55b45e1b 100644
--- a/arch/ppc64/kernel/setup.c
+++ b/arch/ppc64/kernel/setup.c
@@ -58,6 +58,7 @@
58#include <asm/mmu.h> 58#include <asm/mmu.h>
59#include <asm/lmb.h> 59#include <asm/lmb.h>
60#include <asm/iSeries/ItLpNaca.h> 60#include <asm/iSeries/ItLpNaca.h>
61#include <asm/firmware.h>
61 62
62#ifdef DEBUG 63#ifdef DEBUG
63#define DBG(fmt...) udbg_printf(fmt) 64#define DBG(fmt...) udbg_printf(fmt)
@@ -153,7 +154,7 @@ struct screen_info screen_info = {
153 .orig_video_points = 16 154 .orig_video_points = 16
154}; 155};
155 156
156#if defined(CONFIG_PPC_MULTIPLATFORM) && defined(CONFIG_SMP) 157#ifdef CONFIG_SMP
157 158
158static int smt_enabled_cmdline; 159static int smt_enabled_cmdline;
159 160
@@ -306,15 +307,13 @@ static void __init setup_cpu_maps(void)
306 307
307 systemcfg->processorCount = num_present_cpus(); 308 systemcfg->processorCount = num_present_cpus();
308} 309}
309#endif /* defined(CONFIG_PPC_MULTIPLATFORM) && defined(CONFIG_SMP) */ 310#endif /* CONFIG_SMP */
310
311
312#ifdef CONFIG_PPC_MULTIPLATFORM
313 311
314extern struct machdep_calls pSeries_md; 312extern struct machdep_calls pSeries_md;
315extern struct machdep_calls pmac_md; 313extern struct machdep_calls pmac_md;
316extern struct machdep_calls maple_md; 314extern struct machdep_calls maple_md;
317extern struct machdep_calls bpa_md; 315extern struct machdep_calls bpa_md;
316extern struct machdep_calls iseries_md;
318 317
319/* Ultimately, stuff them in an elf section like initcalls... */ 318/* Ultimately, stuff them in an elf section like initcalls... */
320static struct machdep_calls __initdata *machines[] = { 319static struct machdep_calls __initdata *machines[] = {
@@ -330,6 +329,9 @@ static struct machdep_calls __initdata *machines[] = {
330#ifdef CONFIG_PPC_BPA 329#ifdef CONFIG_PPC_BPA
331 &bpa_md, 330 &bpa_md,
332#endif 331#endif
332#ifdef CONFIG_PPC_ISERIES
333 &iseries_md,
334#endif
333 NULL 335 NULL
334}; 336};
335 337
@@ -401,7 +403,8 @@ void __init early_setup(unsigned long dt_ptr)
401 /* 403 /*
402 * Initialize stab / SLB management 404 * Initialize stab / SLB management
403 */ 405 */
404 stab_initialize(lpaca->stab_real); 406 if (!firmware_has_feature(FW_FEATURE_ISERIES))
407 stab_initialize(lpaca->stab_real);
405 408
406 /* 409 /*
407 * Initialize the MMU Hash table and create the linear mapping 410 * Initialize the MMU Hash table and create the linear mapping
@@ -532,8 +535,6 @@ static void __init check_for_initrd(void)
532#endif /* CONFIG_BLK_DEV_INITRD */ 535#endif /* CONFIG_BLK_DEV_INITRD */
533} 536}
534 537
535#endif /* CONFIG_PPC_MULTIPLATFORM */
536
537/* 538/*
538 * Do some initial setup of the system. The parameters are those which 539 * Do some initial setup of the system. The parameters are those which
539 * were passed in from the bootloader. 540 * were passed in from the bootloader.
@@ -542,14 +543,6 @@ void __init setup_system(void)
542{ 543{
543 DBG(" -> setup_system()\n"); 544 DBG(" -> setup_system()\n");
544 545
545#ifdef CONFIG_PPC_ISERIES
546 /* pSeries systems are identified in prom.c via OF. */
547 if (itLpNaca.xLparInstalled == 1)
548 systemcfg->platform = PLATFORM_ISERIES_LPAR;
549
550 ppc_md.init_early();
551#else /* CONFIG_PPC_ISERIES */
552
553 /* 546 /*
554 * Unflatten the device-tree passed by prom_init or kexec 547 * Unflatten the device-tree passed by prom_init or kexec
555 */ 548 */
@@ -607,9 +600,8 @@ void __init setup_system(void)
607 strlcpy(saved_command_line, cmd_line, COMMAND_LINE_SIZE); 600 strlcpy(saved_command_line, cmd_line, COMMAND_LINE_SIZE);
608 601
609 parse_early_param(); 602 parse_early_param();
610#endif /* !CONFIG_PPC_ISERIES */
611 603
612#if defined(CONFIG_SMP) && !defined(CONFIG_PPC_ISERIES) 604#ifdef CONFIG_SMP
613 /* 605 /*
614 * iSeries has already initialized the cpu maps at this point. 606 * iSeries has already initialized the cpu maps at this point.
615 */ 607 */
@@ -619,7 +611,7 @@ void __init setup_system(void)
619 * we can map physical -> logical CPU ids 611 * we can map physical -> logical CPU ids
620 */ 612 */
621 smp_release_cpus(); 613 smp_release_cpus();
622#endif /* defined(CONFIG_SMP) && !defined(CONFIG_PPC_ISERIES) */ 614#endif
623 615
624 printk("Starting Linux PPC64 %s\n", system_utsname.version); 616 printk("Starting Linux PPC64 %s\n", system_utsname.version);
625 617
diff --git a/arch/ppc64/kernel/smp.c b/arch/ppc64/kernel/smp.c
index 793b562da653..6f4f3da12a63 100644
--- a/arch/ppc64/kernel/smp.c
+++ b/arch/ppc64/kernel/smp.c
@@ -45,8 +45,7 @@
45#include <asm/cputable.h> 45#include <asm/cputable.h>
46#include <asm/system.h> 46#include <asm/system.h>
47#include <asm/abs_addr.h> 47#include <asm/abs_addr.h>
48 48#include <asm/mpic.h>
49#include "mpic.h"
50 49
51#ifdef DEBUG 50#ifdef DEBUG
52#define DBG(fmt...) udbg_printf(fmt) 51#define DBG(fmt...) udbg_printf(fmt)
diff --git a/arch/ppc64/kernel/sys_ppc32.c b/arch/ppc64/kernel/sys_ppc32.c
index e93c13458910..1cacf61f9c91 100644
--- a/arch/ppc64/kernel/sys_ppc32.c
+++ b/arch/ppc64/kernel/sys_ppc32.c
@@ -53,8 +53,7 @@
53#include <asm/time.h> 53#include <asm/time.h>
54#include <asm/mmu_context.h> 54#include <asm/mmu_context.h>
55#include <asm/systemcfg.h> 55#include <asm/systemcfg.h>
56 56#include <asm/ppc-pci.h>
57#include "pci.h"
58 57
59/* readdir & getdents */ 58/* readdir & getdents */
60#define NAME_OFFSET(de) ((int) ((de)->d_name - (char __user *) (de))) 59#define NAME_OFFSET(de) ((int) ((de)->d_name - (char __user *) (de)))
diff --git a/arch/ppc64/kernel/time.c b/arch/ppc64/kernel/time.c
index 9939c206afa4..7f63755eddfd 100644
--- a/arch/ppc64/kernel/time.c
+++ b/arch/ppc64/kernel/time.c
@@ -319,7 +319,7 @@ unsigned long tb_last_stamp __cacheline_aligned_in_smp;
319 * timer_interrupt - gets called when the decrementer overflows, 319 * timer_interrupt - gets called when the decrementer overflows,
320 * with interrupts disabled. 320 * with interrupts disabled.
321 */ 321 */
322int timer_interrupt(struct pt_regs * regs) 322void timer_interrupt(struct pt_regs * regs)
323{ 323{
324 int next_dec; 324 int next_dec;
325 unsigned long cur_tb; 325 unsigned long cur_tb;
@@ -377,8 +377,6 @@ int timer_interrupt(struct pt_regs * regs)
377 } 377 }
378 378
379 irq_exit(); 379 irq_exit();
380
381 return 1;
382} 380}
383 381
384/* 382/*
@@ -467,7 +465,7 @@ int do_settimeofday(struct timespec *tv)
467 465
468EXPORT_SYMBOL(do_settimeofday); 466EXPORT_SYMBOL(do_settimeofday);
469 467
470#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_MAPLE) || defined(CONFIG_PPC_BPA) 468#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_MAPLE) || defined(CONFIG_PPC_BPA) || defined(CONFIG_PPC_ISERIES)
471void __init generic_calibrate_decr(void) 469void __init generic_calibrate_decr(void)
472{ 470{
473 struct device_node *cpu; 471 struct device_node *cpu;
diff --git a/arch/ppc64/kernel/traps.c b/arch/ppc64/kernel/traps.c
deleted file mode 100644
index 7467ae508e6e..000000000000
--- a/arch/ppc64/kernel/traps.c
+++ /dev/null
@@ -1,568 +0,0 @@
1/*
2 * linux/arch/ppc64/kernel/traps.c
3 *
4 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 * Modified by Cort Dougan (cort@cs.nmt.edu)
12 * and Paul Mackerras (paulus@cs.anu.edu.au)
13 */
14
15/*
16 * This file handles the architecture-dependent parts of hardware exceptions
17 */
18
19#include <linux/config.h>
20#include <linux/errno.h>
21#include <linux/sched.h>
22#include <linux/kernel.h>
23#include <linux/mm.h>
24#include <linux/stddef.h>
25#include <linux/unistd.h>
26#include <linux/slab.h>
27#include <linux/user.h>
28#include <linux/a.out.h>
29#include <linux/interrupt.h>
30#include <linux/init.h>
31#include <linux/module.h>
32#include <linux/delay.h>
33#include <linux/kprobes.h>
34#include <asm/kdebug.h>
35
36#include <asm/pgtable.h>
37#include <asm/uaccess.h>
38#include <asm/system.h>
39#include <asm/io.h>
40#include <asm/processor.h>
41#include <asm/ppcdebug.h>
42#include <asm/rtas.h>
43#include <asm/systemcfg.h>
44#include <asm/machdep.h>
45#include <asm/pmc.h>
46
47#ifdef CONFIG_DEBUGGER
48int (*__debugger)(struct pt_regs *regs);
49int (*__debugger_ipi)(struct pt_regs *regs);
50int (*__debugger_bpt)(struct pt_regs *regs);
51int (*__debugger_sstep)(struct pt_regs *regs);
52int (*__debugger_iabr_match)(struct pt_regs *regs);
53int (*__debugger_dabr_match)(struct pt_regs *regs);
54int (*__debugger_fault_handler)(struct pt_regs *regs);
55
56EXPORT_SYMBOL(__debugger);
57EXPORT_SYMBOL(__debugger_ipi);
58EXPORT_SYMBOL(__debugger_bpt);
59EXPORT_SYMBOL(__debugger_sstep);
60EXPORT_SYMBOL(__debugger_iabr_match);
61EXPORT_SYMBOL(__debugger_dabr_match);
62EXPORT_SYMBOL(__debugger_fault_handler);
63#endif
64
65struct notifier_block *ppc64_die_chain;
66static DEFINE_SPINLOCK(die_notifier_lock);
67
68int register_die_notifier(struct notifier_block *nb)
69{
70 int err = 0;
71 unsigned long flags;
72
73 spin_lock_irqsave(&die_notifier_lock, flags);
74 err = notifier_chain_register(&ppc64_die_chain, nb);
75 spin_unlock_irqrestore(&die_notifier_lock, flags);
76 return err;
77}
78
79/*
80 * Trap & Exception support
81 */
82
83static DEFINE_SPINLOCK(die_lock);
84
85int die(const char *str, struct pt_regs *regs, long err)
86{
87 static int die_counter;
88 int nl = 0;
89
90 if (debugger(regs))
91 return 1;
92
93 console_verbose();
94 spin_lock_irq(&die_lock);
95 bust_spinlocks(1);
96 printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter);
97#ifdef CONFIG_PREEMPT
98 printk("PREEMPT ");
99 nl = 1;
100#endif
101#ifdef CONFIG_SMP
102 printk("SMP NR_CPUS=%d ", NR_CPUS);
103 nl = 1;
104#endif
105#ifdef CONFIG_DEBUG_PAGEALLOC
106 printk("DEBUG_PAGEALLOC ");
107 nl = 1;
108#endif
109#ifdef CONFIG_NUMA
110 printk("NUMA ");
111 nl = 1;
112#endif
113 switch(systemcfg->platform) {
114 case PLATFORM_PSERIES:
115 printk("PSERIES ");
116 nl = 1;
117 break;
118 case PLATFORM_PSERIES_LPAR:
119 printk("PSERIES LPAR ");
120 nl = 1;
121 break;
122 case PLATFORM_ISERIES_LPAR:
123 printk("ISERIES LPAR ");
124 nl = 1;
125 break;
126 case PLATFORM_POWERMAC:
127 printk("POWERMAC ");
128 nl = 1;
129 break;
130 case PLATFORM_BPA:
131 printk("BPA ");
132 nl = 1;
133 break;
134 }
135 if (nl)
136 printk("\n");
137 print_modules();
138 show_regs(regs);
139 bust_spinlocks(0);
140 spin_unlock_irq(&die_lock);
141
142 if (in_interrupt())
143 panic("Fatal exception in interrupt");
144
145 if (panic_on_oops) {
146 printk(KERN_EMERG "Fatal exception: panic in 5 seconds\n");
147 ssleep(5);
148 panic("Fatal exception");
149 }
150 do_exit(SIGSEGV);
151
152 return 0;
153}
154
155void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr)
156{
157 siginfo_t info;
158
159 if (!user_mode(regs)) {
160 if (die("Exception in kernel mode", regs, signr))
161 return;
162 }
163
164 memset(&info, 0, sizeof(info));
165 info.si_signo = signr;
166 info.si_code = code;
167 info.si_addr = (void __user *) addr;
168 force_sig_info(signr, &info, current);
169}
170
171void system_reset_exception(struct pt_regs *regs)
172{
173 /* See if any machine dependent calls */
174 if (ppc_md.system_reset_exception)
175 ppc_md.system_reset_exception(regs);
176
177 die("System Reset", regs, 0);
178
179 /* Must die if the interrupt is not recoverable */
180 if (!(regs->msr & MSR_RI))
181 panic("Unrecoverable System Reset");
182
183 /* What should we do here? We could issue a shutdown or hard reset. */
184}
185
186void machine_check_exception(struct pt_regs *regs)
187{
188 int recover = 0;
189
190 /* See if any machine dependent calls */
191 if (ppc_md.machine_check_exception)
192 recover = ppc_md.machine_check_exception(regs);
193
194 if (recover)
195 return;
196
197 if (debugger_fault_handler(regs))
198 return;
199 die("Machine check", regs, 0);
200
201 /* Must die if the interrupt is not recoverable */
202 if (!(regs->msr & MSR_RI))
203 panic("Unrecoverable Machine check");
204}
205
206void unknown_exception(struct pt_regs *regs)
207{
208 printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n",
209 regs->nip, regs->msr, regs->trap);
210
211 _exception(SIGTRAP, regs, 0, 0);
212}
213
214void instruction_breakpoint_exception(struct pt_regs *regs)
215{
216 if (notify_die(DIE_IABR_MATCH, "iabr_match", regs, 5,
217 5, SIGTRAP) == NOTIFY_STOP)
218 return;
219 if (debugger_iabr_match(regs))
220 return;
221 _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip);
222}
223
224void __kprobes single_step_exception(struct pt_regs *regs)
225{
226 regs->msr &= ~MSR_SE; /* Turn off 'trace' bit */
227
228 if (notify_die(DIE_SSTEP, "single_step", regs, 5,
229 5, SIGTRAP) == NOTIFY_STOP)
230 return;
231 if (debugger_sstep(regs))
232 return;
233
234 _exception(SIGTRAP, regs, TRAP_TRACE, regs->nip);
235}
236
237/*
238 * After we have successfully emulated an instruction, we have to
239 * check if the instruction was being single-stepped, and if so,
240 * pretend we got a single-step exception. This was pointed out
241 * by Kumar Gala. -- paulus
242 */
243static inline void emulate_single_step(struct pt_regs *regs)
244{
245 if (regs->msr & MSR_SE)
246 single_step_exception(regs);
247}
248
249static void parse_fpe(struct pt_regs *regs)
250{
251 int code = 0;
252 unsigned long fpscr;
253
254 flush_fp_to_thread(current);
255
256 fpscr = current->thread.fpscr;
257
258 /* Invalid operation */
259 if ((fpscr & FPSCR_VE) && (fpscr & FPSCR_VX))
260 code = FPE_FLTINV;
261
262 /* Overflow */
263 else if ((fpscr & FPSCR_OE) && (fpscr & FPSCR_OX))
264 code = FPE_FLTOVF;
265
266 /* Underflow */
267 else if ((fpscr & FPSCR_UE) && (fpscr & FPSCR_UX))
268 code = FPE_FLTUND;
269
270 /* Divide by zero */
271 else if ((fpscr & FPSCR_ZE) && (fpscr & FPSCR_ZX))
272 code = FPE_FLTDIV;
273
274 /* Inexact result */
275 else if ((fpscr & FPSCR_XE) && (fpscr & FPSCR_XX))
276 code = FPE_FLTRES;
277
278 _exception(SIGFPE, regs, code, regs->nip);
279}
280
281/*
282 * Illegal instruction emulation support. Return non-zero if we can't
283 * emulate, or -EFAULT if the associated memory access caused an access
284 * fault. Return zero on success.
285 */
286
287#define INST_MFSPR_PVR 0x7c1f42a6
288#define INST_MFSPR_PVR_MASK 0xfc1fffff
289
290#define INST_DCBA 0x7c0005ec
291#define INST_DCBA_MASK 0x7c0007fe
292
293#define INST_MCRXR 0x7c000400
294#define INST_MCRXR_MASK 0x7c0007fe
295
296static int emulate_instruction(struct pt_regs *regs)
297{
298 unsigned int instword;
299
300 if (!user_mode(regs))
301 return -EINVAL;
302
303 CHECK_FULL_REGS(regs);
304
305 if (get_user(instword, (unsigned int __user *)(regs->nip)))
306 return -EFAULT;
307
308 /* Emulate the mfspr rD, PVR. */
309 if ((instword & INST_MFSPR_PVR_MASK) == INST_MFSPR_PVR) {
310 unsigned int rd;
311
312 rd = (instword >> 21) & 0x1f;
313 regs->gpr[rd] = mfspr(SPRN_PVR);
314 return 0;
315 }
316
317 /* Emulating the dcba insn is just a no-op. */
318 if ((instword & INST_DCBA_MASK) == INST_DCBA) {
319 static int warned;
320
321 if (!warned) {
322 printk(KERN_WARNING
323 "process %d (%s) uses obsolete 'dcba' insn\n",
324 current->pid, current->comm);
325 warned = 1;
326 }
327 return 0;
328 }
329
330 /* Emulate the mcrxr insn. */
331 if ((instword & INST_MCRXR_MASK) == INST_MCRXR) {
332 static int warned;
333 unsigned int shift;
334
335 if (!warned) {
336 printk(KERN_WARNING
337 "process %d (%s) uses obsolete 'mcrxr' insn\n",
338 current->pid, current->comm);
339 warned = 1;
340 }
341
342 shift = (instword >> 21) & 0x1c;
343 regs->ccr &= ~(0xf0000000 >> shift);
344 regs->ccr |= (regs->xer & 0xf0000000) >> shift;
345 regs->xer &= ~0xf0000000;
346 return 0;
347 }
348
349 return -EINVAL;
350}
351
352/*
353 * Look through the list of trap instructions that are used for BUG(),
354 * BUG_ON() and WARN_ON() and see if we hit one. At this point we know
355 * that the exception was caused by a trap instruction of some kind.
356 * Returns 1 if we should continue (i.e. it was a WARN_ON) or 0
357 * otherwise.
358 */
359extern struct bug_entry __start___bug_table[], __stop___bug_table[];
360
361#ifndef CONFIG_MODULES
362#define module_find_bug(x) NULL
363#endif
364
365struct bug_entry *find_bug(unsigned long bugaddr)
366{
367 struct bug_entry *bug;
368
369 for (bug = __start___bug_table; bug < __stop___bug_table; ++bug)
370 if (bugaddr == bug->bug_addr)
371 return bug;
372 return module_find_bug(bugaddr);
373}
374
375static int
376check_bug_trap(struct pt_regs *regs)
377{
378 struct bug_entry *bug;
379 unsigned long addr;
380
381 if (regs->msr & MSR_PR)
382 return 0; /* not in kernel */
383 addr = regs->nip; /* address of trap instruction */
384 if (addr < PAGE_OFFSET)
385 return 0;
386 bug = find_bug(regs->nip);
387 if (bug == NULL)
388 return 0;
389 if (bug->line & BUG_WARNING_TRAP) {
390 /* this is a WARN_ON rather than BUG/BUG_ON */
391 printk(KERN_ERR "Badness in %s at %s:%d\n",
392 bug->function, bug->file,
393 (unsigned int)bug->line & ~BUG_WARNING_TRAP);
394 show_stack(current, (void *)regs->gpr[1]);
395 return 1;
396 }
397 printk(KERN_CRIT "kernel BUG in %s at %s:%d!\n",
398 bug->function, bug->file, (unsigned int)bug->line);
399 return 0;
400}
401
402void __kprobes program_check_exception(struct pt_regs *regs)
403{
404 if (debugger_fault_handler(regs))
405 return;
406
407 if (regs->msr & 0x100000) {
408 /* IEEE FP exception */
409 parse_fpe(regs);
410 } else if (regs->msr & 0x20000) {
411 /* trap exception */
412
413 if (notify_die(DIE_BPT, "breakpoint", regs, 5,
414 5, SIGTRAP) == NOTIFY_STOP)
415 return;
416 if (debugger_bpt(regs))
417 return;
418
419 if (check_bug_trap(regs)) {
420 regs->nip += 4;
421 return;
422 }
423 _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip);
424
425 } else {
426 /* Privileged or illegal instruction; try to emulate it. */
427 switch (emulate_instruction(regs)) {
428 case 0:
429 regs->nip += 4;
430 emulate_single_step(regs);
431 break;
432
433 case -EFAULT:
434 _exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip);
435 break;
436
437 default:
438 if (regs->msr & 0x40000)
439 /* priveleged */
440 _exception(SIGILL, regs, ILL_PRVOPC, regs->nip);
441 else
442 /* illegal */
443 _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
444 break;
445 }
446 }
447}
448
449void kernel_fp_unavailable_exception(struct pt_regs *regs)
450{
451 printk(KERN_EMERG "Unrecoverable FP Unavailable Exception "
452 "%lx at %lx\n", regs->trap, regs->nip);
453 die("Unrecoverable FP Unavailable Exception", regs, SIGABRT);
454}
455
456void altivec_unavailable_exception(struct pt_regs *regs)
457{
458 if (user_mode(regs)) {
459 /* A user program has executed an altivec instruction,
460 but this kernel doesn't support altivec. */
461 _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
462 return;
463 }
464 printk(KERN_EMERG "Unrecoverable VMX/Altivec Unavailable Exception "
465 "%lx at %lx\n", regs->trap, regs->nip);
466 die("Unrecoverable VMX/Altivec Unavailable Exception", regs, SIGABRT);
467}
468
469extern perf_irq_t perf_irq;
470
471void performance_monitor_exception(struct pt_regs *regs)
472{
473 perf_irq(regs);
474}
475
476void alignment_exception(struct pt_regs *regs)
477{
478 int fixed;
479
480 fixed = fix_alignment(regs);
481
482 if (fixed == 1) {
483 regs->nip += 4; /* skip over emulated instruction */
484 emulate_single_step(regs);
485 return;
486 }
487
488 /* Operand address was bad */
489 if (fixed == -EFAULT) {
490 if (user_mode(regs)) {
491 _exception(SIGSEGV, regs, SEGV_MAPERR, regs->dar);
492 } else {
493 /* Search exception table */
494 bad_page_fault(regs, regs->dar, SIGSEGV);
495 }
496
497 return;
498 }
499
500 _exception(SIGBUS, regs, BUS_ADRALN, regs->nip);
501}
502
503#ifdef CONFIG_ALTIVEC
504void altivec_assist_exception(struct pt_regs *regs)
505{
506 int err;
507 siginfo_t info;
508
509 if (!user_mode(regs)) {
510 printk(KERN_EMERG "VMX/Altivec assist exception in kernel mode"
511 " at %lx\n", regs->nip);
512 die("Kernel VMX/Altivec assist exception", regs, SIGILL);
513 }
514
515 flush_altivec_to_thread(current);
516
517 err = emulate_altivec(regs);
518 if (err == 0) {
519 regs->nip += 4; /* skip emulated instruction */
520 emulate_single_step(regs);
521 return;
522 }
523
524 if (err == -EFAULT) {
525 /* got an error reading the instruction */
526 info.si_signo = SIGSEGV;
527 info.si_errno = 0;
528 info.si_code = SEGV_MAPERR;
529 info.si_addr = (void __user *) regs->nip;
530 force_sig_info(SIGSEGV, &info, current);
531 } else {
532 /* didn't recognize the instruction */
533 /* XXX quick hack for now: set the non-Java bit in the VSCR */
534 if (printk_ratelimit())
535 printk(KERN_ERR "Unrecognized altivec instruction "
536 "in %s at %lx\n", current->comm, regs->nip);
537 current->thread.vscr.u[3] |= 0x10000;
538 }
539}
540#endif /* CONFIG_ALTIVEC */
541
542/*
543 * We enter here if we get an unrecoverable exception, that is, one
544 * that happened at a point where the RI (recoverable interrupt) bit
545 * in the MSR is 0. This indicates that SRR0/1 are live, and that
546 * we therefore lost state by taking this exception.
547 */
548void unrecoverable_exception(struct pt_regs *regs)
549{
550 printk(KERN_EMERG "Unrecoverable exception %lx at %lx\n",
551 regs->trap, regs->nip);
552 die("Unrecoverable exception", regs, SIGABRT);
553}
554
555/*
556 * We enter here if we discover during exception entry that we are
557 * running in supervisor mode with a userspace value in the stack pointer.
558 */
559void kernel_bad_stack(struct pt_regs *regs)
560{
561 printk(KERN_EMERG "Bad kernel stack pointer %lx at %lx\n",
562 regs->gpr[1], regs->nip);
563 die("Bad kernel stack pointer", regs, SIGABRT);
564}
565
566void __init trap_init(void)
567{
568}
diff --git a/arch/ppc64/kernel/u3_iommu.c b/arch/ppc64/kernel/u3_iommu.c
index 41ea09cb9ac7..fba871a1bda5 100644
--- a/arch/ppc64/kernel/u3_iommu.c
+++ b/arch/ppc64/kernel/u3_iommu.c
@@ -44,39 +44,11 @@
44#include <asm/abs_addr.h> 44#include <asm/abs_addr.h>
45#include <asm/cacheflush.h> 45#include <asm/cacheflush.h>
46#include <asm/lmb.h> 46#include <asm/lmb.h>
47 47#include <asm/dart.h>
48#include "pci.h" 48#include <asm/ppc-pci.h>
49 49
50extern int iommu_force_on; 50extern int iommu_force_on;
51 51
52/* physical base of DART registers */
53#define DART_BASE 0xf8033000UL
54
55/* Offset from base to control register */
56#define DARTCNTL 0
57/* Offset from base to exception register */
58#define DARTEXCP 0x10
59/* Offset from base to TLB tag registers */
60#define DARTTAG 0x1000
61
62
63/* Control Register fields */
64
65/* base address of table (pfn) */
66#define DARTCNTL_BASE_MASK 0xfffff
67#define DARTCNTL_BASE_SHIFT 12
68
69#define DARTCNTL_FLUSHTLB 0x400
70#define DARTCNTL_ENABLE 0x200
71
72/* size of table in pages */
73#define DARTCNTL_SIZE_MASK 0x1ff
74#define DARTCNTL_SIZE_SHIFT 0
75
76/* DART table fields */
77#define DARTMAP_VALID 0x80000000
78#define DARTMAP_RPNMASK 0x00ffffff
79
80/* Physical base address and size of the DART table */ 52/* Physical base address and size of the DART table */
81unsigned long dart_tablebase; /* exported to htab_initialize */ 53unsigned long dart_tablebase; /* exported to htab_initialize */
82static unsigned long dart_tablesize; 54static unsigned long dart_tablesize;
@@ -152,18 +124,21 @@ static void dart_build(struct iommu_table *tbl, long index,
152 124
153 DBG("dart: build at: %lx, %lx, addr: %x\n", index, npages, uaddr); 125 DBG("dart: build at: %lx, %lx, addr: %x\n", index, npages, uaddr);
154 126
127 index <<= DART_PAGE_FACTOR;
128 npages <<= DART_PAGE_FACTOR;
129
155 dp = ((unsigned int*)tbl->it_base) + index; 130 dp = ((unsigned int*)tbl->it_base) + index;
156 131
157 /* On U3, all memory is contigous, so we can move this 132 /* On U3, all memory is contigous, so we can move this
158 * out of the loop. 133 * out of the loop.
159 */ 134 */
160 while (npages--) { 135 while (npages--) {
161 rpn = virt_to_abs(uaddr) >> PAGE_SHIFT; 136 rpn = virt_to_abs(uaddr) >> DART_PAGE_SHIFT;
162 137
163 *(dp++) = DARTMAP_VALID | (rpn & DARTMAP_RPNMASK); 138 *(dp++) = DARTMAP_VALID | (rpn & DARTMAP_RPNMASK);
164 139
165 rpn++; 140 rpn++;
166 uaddr += PAGE_SIZE; 141 uaddr += DART_PAGE_SIZE;
167 } 142 }
168 143
169 dart_dirty = 1; 144 dart_dirty = 1;
@@ -181,6 +156,9 @@ static void dart_free(struct iommu_table *tbl, long index, long npages)
181 156
182 DBG("dart: free at: %lx, %lx\n", index, npages); 157 DBG("dart: free at: %lx, %lx\n", index, npages);
183 158
159 index <<= DART_PAGE_FACTOR;
160 npages <<= DART_PAGE_FACTOR;
161
184 dp = ((unsigned int *)tbl->it_base) + index; 162 dp = ((unsigned int *)tbl->it_base) + index;
185 163
186 while (npages--) 164 while (npages--)
@@ -209,10 +187,10 @@ static int dart_init(struct device_node *dart_node)
209 * that to work around what looks like a problem with the HT bridge 187 * that to work around what looks like a problem with the HT bridge
210 * prefetching into invalid pages and corrupting data 188 * prefetching into invalid pages and corrupting data
211 */ 189 */
212 tmp = lmb_alloc(PAGE_SIZE, PAGE_SIZE); 190 tmp = lmb_alloc(DART_PAGE_SIZE, DART_PAGE_SIZE);
213 if (!tmp) 191 if (!tmp)
214 panic("U3-DART: Cannot allocate spare page!"); 192 panic("U3-DART: Cannot allocate spare page!");
215 dart_emptyval = DARTMAP_VALID | ((tmp >> PAGE_SHIFT) & DARTMAP_RPNMASK); 193 dart_emptyval = DARTMAP_VALID | ((tmp >> DART_PAGE_SHIFT) & DARTMAP_RPNMASK);
216 194
217 /* Map in DART registers. FIXME: Use device node to get base address */ 195 /* Map in DART registers. FIXME: Use device node to get base address */
218 dart = ioremap(DART_BASE, 0x7000); 196 dart = ioremap(DART_BASE, 0x7000);
@@ -223,8 +201,8 @@ static int dart_init(struct device_node *dart_node)
223 * table size and enable bit 201 * table size and enable bit
224 */ 202 */
225 regword = DARTCNTL_ENABLE | 203 regword = DARTCNTL_ENABLE |
226 ((dart_tablebase >> PAGE_SHIFT) << DARTCNTL_BASE_SHIFT) | 204 ((dart_tablebase >> DART_PAGE_SHIFT) << DARTCNTL_BASE_SHIFT) |
227 (((dart_tablesize >> PAGE_SHIFT) & DARTCNTL_SIZE_MASK) 205 (((dart_tablesize >> DART_PAGE_SHIFT) & DARTCNTL_SIZE_MASK)
228 << DARTCNTL_SIZE_SHIFT); 206 << DARTCNTL_SIZE_SHIFT);
229 dart_vbase = ioremap(virt_to_abs(dart_tablebase), dart_tablesize); 207 dart_vbase = ioremap(virt_to_abs(dart_tablebase), dart_tablesize);
230 208
diff --git a/arch/ppc64/kernel/vdso64/sigtramp.S b/arch/ppc64/kernel/vdso64/sigtramp.S
index 8ae8f205e470..31b604ab56de 100644
--- a/arch/ppc64/kernel/vdso64/sigtramp.S
+++ b/arch/ppc64/kernel/vdso64/sigtramp.S
@@ -15,6 +15,7 @@
15#include <asm/ppc_asm.h> 15#include <asm/ppc_asm.h>
16#include <asm/unistd.h> 16#include <asm/unistd.h>
17#include <asm/vdso.h> 17#include <asm/vdso.h>
18#include <asm/ptrace.h> /* XXX for __SIGNAL_FRAMESIZE */
18 19
19 .text 20 .text
20 21
diff --git a/arch/ppc64/kernel/vecemu.c b/arch/ppc64/kernel/vecemu.c
deleted file mode 100644
index cb207629f21f..000000000000
--- a/arch/ppc64/kernel/vecemu.c
+++ /dev/null
@@ -1,346 +0,0 @@
1/*
2 * Routines to emulate some Altivec/VMX instructions, specifically
3 * those that can trap when given denormalized operands in Java mode.
4 */
5#include <linux/kernel.h>
6#include <linux/errno.h>
7#include <linux/sched.h>
8#include <asm/ptrace.h>
9#include <asm/processor.h>
10#include <asm/uaccess.h>
11
12/* Functions in vector.S */
13extern void vaddfp(vector128 *dst, vector128 *a, vector128 *b);
14extern void vsubfp(vector128 *dst, vector128 *a, vector128 *b);
15extern void vmaddfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c);
16extern void vnmsubfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c);
17extern void vrefp(vector128 *dst, vector128 *src);
18extern void vrsqrtefp(vector128 *dst, vector128 *src);
19extern void vexptep(vector128 *dst, vector128 *src);
20
21static unsigned int exp2s[8] = {
22 0x800000,
23 0x8b95c2,
24 0x9837f0,
25 0xa5fed7,
26 0xb504f3,
27 0xc5672a,
28 0xd744fd,
29 0xeac0c7
30};
31
32/*
33 * Computes an estimate of 2^x. The `s' argument is the 32-bit
34 * single-precision floating-point representation of x.
35 */
36static unsigned int eexp2(unsigned int s)
37{
38 int exp, pwr;
39 unsigned int mant, frac;
40
41 /* extract exponent field from input */
42 exp = ((s >> 23) & 0xff) - 127;
43 if (exp > 7) {
44 /* check for NaN input */
45 if (exp == 128 && (s & 0x7fffff) != 0)
46 return s | 0x400000; /* return QNaN */
47 /* 2^-big = 0, 2^+big = +Inf */
48 return (s & 0x80000000)? 0: 0x7f800000; /* 0 or +Inf */
49 }
50 if (exp < -23)
51 return 0x3f800000; /* 1.0 */
52
53 /* convert to fixed point integer in 9.23 representation */
54 pwr = (s & 0x7fffff) | 0x800000;
55 if (exp > 0)
56 pwr <<= exp;
57 else
58 pwr >>= -exp;
59 if (s & 0x80000000)
60 pwr = -pwr;
61
62 /* extract integer part, which becomes exponent part of result */
63 exp = (pwr >> 23) + 126;
64 if (exp >= 254)
65 return 0x7f800000;
66 if (exp < -23)
67 return 0;
68
69 /* table lookup on top 3 bits of fraction to get mantissa */
70 mant = exp2s[(pwr >> 20) & 7];
71
72 /* linear interpolation using remaining 20 bits of fraction */
73 asm("mulhwu %0,%1,%2" : "=r" (frac)
74 : "r" (pwr << 12), "r" (0x172b83ff));
75 asm("mulhwu %0,%1,%2" : "=r" (frac) : "r" (frac), "r" (mant));
76 mant += frac;
77
78 if (exp >= 0)
79 return mant + (exp << 23);
80
81 /* denormalized result */
82 exp = -exp;
83 mant += 1 << (exp - 1);
84 return mant >> exp;
85}
86
87/*
88 * Computes an estimate of log_2(x). The `s' argument is the 32-bit
89 * single-precision floating-point representation of x.
90 */
91static unsigned int elog2(unsigned int s)
92{
93 int exp, mant, lz, frac;
94
95 exp = s & 0x7f800000;
96 mant = s & 0x7fffff;
97 if (exp == 0x7f800000) { /* Inf or NaN */
98 if (mant != 0)
99 s |= 0x400000; /* turn NaN into QNaN */
100 return s;
101 }
102 if ((exp | mant) == 0) /* +0 or -0 */
103 return 0xff800000; /* return -Inf */
104
105 if (exp == 0) {
106 /* denormalized */
107 asm("cntlzw %0,%1" : "=r" (lz) : "r" (mant));
108 mant <<= lz - 8;
109 exp = (-118 - lz) << 23;
110 } else {
111 mant |= 0x800000;
112 exp -= 127 << 23;
113 }
114
115 if (mant >= 0xb504f3) { /* 2^0.5 * 2^23 */
116 exp |= 0x400000; /* 0.5 * 2^23 */
117 asm("mulhwu %0,%1,%2" : "=r" (mant)
118 : "r" (mant), "r" (0xb504f334)); /* 2^-0.5 * 2^32 */
119 }
120 if (mant >= 0x9837f0) { /* 2^0.25 * 2^23 */
121 exp |= 0x200000; /* 0.25 * 2^23 */
122 asm("mulhwu %0,%1,%2" : "=r" (mant)
123 : "r" (mant), "r" (0xd744fccb)); /* 2^-0.25 * 2^32 */
124 }
125 if (mant >= 0x8b95c2) { /* 2^0.125 * 2^23 */
126 exp |= 0x100000; /* 0.125 * 2^23 */
127 asm("mulhwu %0,%1,%2" : "=r" (mant)
128 : "r" (mant), "r" (0xeac0c6e8)); /* 2^-0.125 * 2^32 */
129 }
130 if (mant > 0x800000) { /* 1.0 * 2^23 */
131 /* calculate (mant - 1) * 1.381097463 */
132 /* 1.381097463 == 0.125 / (2^0.125 - 1) */
133 asm("mulhwu %0,%1,%2" : "=r" (frac)
134 : "r" ((mant - 0x800000) << 1), "r" (0xb0c7cd3a));
135 exp += frac;
136 }
137 s = exp & 0x80000000;
138 if (exp != 0) {
139 if (s)
140 exp = -exp;
141 asm("cntlzw %0,%1" : "=r" (lz) : "r" (exp));
142 lz = 8 - lz;
143 if (lz > 0)
144 exp >>= lz;
145 else if (lz < 0)
146 exp <<= -lz;
147 s += ((lz + 126) << 23) + exp;
148 }
149 return s;
150}
151
152#define VSCR_SAT 1
153
154static int ctsxs(unsigned int x, int scale, unsigned int *vscrp)
155{
156 int exp, mant;
157
158 exp = (x >> 23) & 0xff;
159 mant = x & 0x7fffff;
160 if (exp == 255 && mant != 0)
161 return 0; /* NaN -> 0 */
162 exp = exp - 127 + scale;
163 if (exp < 0)
164 return 0; /* round towards zero */
165 if (exp >= 31) {
166 /* saturate, unless the result would be -2^31 */
167 if (x + (scale << 23) != 0xcf000000)
168 *vscrp |= VSCR_SAT;
169 return (x & 0x80000000)? 0x80000000: 0x7fffffff;
170 }
171 mant |= 0x800000;
172 mant = (mant << 7) >> (30 - exp);
173 return (x & 0x80000000)? -mant: mant;
174}
175
176static unsigned int ctuxs(unsigned int x, int scale, unsigned int *vscrp)
177{
178 int exp;
179 unsigned int mant;
180
181 exp = (x >> 23) & 0xff;
182 mant = x & 0x7fffff;
183 if (exp == 255 && mant != 0)
184 return 0; /* NaN -> 0 */
185 exp = exp - 127 + scale;
186 if (exp < 0)
187 return 0; /* round towards zero */
188 if (x & 0x80000000) {
189 /* negative => saturate to 0 */
190 *vscrp |= VSCR_SAT;
191 return 0;
192 }
193 if (exp >= 32) {
194 /* saturate */
195 *vscrp |= VSCR_SAT;
196 return 0xffffffff;
197 }
198 mant |= 0x800000;
199 mant = (mant << 8) >> (31 - exp);
200 return mant;
201}
202
203/* Round to floating integer, towards 0 */
204static unsigned int rfiz(unsigned int x)
205{
206 int exp;
207
208 exp = ((x >> 23) & 0xff) - 127;
209 if (exp == 128 && (x & 0x7fffff) != 0)
210 return x | 0x400000; /* NaN -> make it a QNaN */
211 if (exp >= 23)
212 return x; /* it's an integer already (or Inf) */
213 if (exp < 0)
214 return x & 0x80000000; /* |x| < 1.0 rounds to 0 */
215 return x & ~(0x7fffff >> exp);
216}
217
218/* Round to floating integer, towards +/- Inf */
219static unsigned int rfii(unsigned int x)
220{
221 int exp, mask;
222
223 exp = ((x >> 23) & 0xff) - 127;
224 if (exp == 128 && (x & 0x7fffff) != 0)
225 return x | 0x400000; /* NaN -> make it a QNaN */
226 if (exp >= 23)
227 return x; /* it's an integer already (or Inf) */
228 if ((x & 0x7fffffff) == 0)
229 return x; /* +/-0 -> +/-0 */
230 if (exp < 0)
231 /* 0 < |x| < 1.0 rounds to +/- 1.0 */
232 return (x & 0x80000000) | 0x3f800000;
233 mask = 0x7fffff >> exp;
234 /* mantissa overflows into exponent - that's OK,
235 it can't overflow into the sign bit */
236 return (x + mask) & ~mask;
237}
238
239/* Round to floating integer, to nearest */
240static unsigned int rfin(unsigned int x)
241{
242 int exp, half;
243
244 exp = ((x >> 23) & 0xff) - 127;
245 if (exp == 128 && (x & 0x7fffff) != 0)
246 return x | 0x400000; /* NaN -> make it a QNaN */
247 if (exp >= 23)
248 return x; /* it's an integer already (or Inf) */
249 if (exp < -1)
250 return x & 0x80000000; /* |x| < 0.5 -> +/-0 */
251 if (exp == -1)
252 /* 0.5 <= |x| < 1.0 rounds to +/- 1.0 */
253 return (x & 0x80000000) | 0x3f800000;
254 half = 0x400000 >> exp;
255 /* add 0.5 to the magnitude and chop off the fraction bits */
256 return (x + half) & ~(0x7fffff >> exp);
257}
258
259int
260emulate_altivec(struct pt_regs *regs)
261{
262 unsigned int instr, i;
263 unsigned int va, vb, vc, vd;
264 vector128 *vrs;
265
266 if (get_user(instr, (unsigned int __user *) regs->nip))
267 return -EFAULT;
268 if ((instr >> 26) != 4)
269 return -EINVAL; /* not an altivec instruction */
270 vd = (instr >> 21) & 0x1f;
271 va = (instr >> 16) & 0x1f;
272 vb = (instr >> 11) & 0x1f;
273 vc = (instr >> 6) & 0x1f;
274
275 vrs = current->thread.vr;
276 switch (instr & 0x3f) {
277 case 10:
278 switch (vc) {
279 case 0: /* vaddfp */
280 vaddfp(&vrs[vd], &vrs[va], &vrs[vb]);
281 break;
282 case 1: /* vsubfp */
283 vsubfp(&vrs[vd], &vrs[va], &vrs[vb]);
284 break;
285 case 4: /* vrefp */
286 vrefp(&vrs[vd], &vrs[vb]);
287 break;
288 case 5: /* vrsqrtefp */
289 vrsqrtefp(&vrs[vd], &vrs[vb]);
290 break;
291 case 6: /* vexptefp */
292 for (i = 0; i < 4; ++i)
293 vrs[vd].u[i] = eexp2(vrs[vb].u[i]);
294 break;
295 case 7: /* vlogefp */
296 for (i = 0; i < 4; ++i)
297 vrs[vd].u[i] = elog2(vrs[vb].u[i]);
298 break;
299 case 8: /* vrfin */
300 for (i = 0; i < 4; ++i)
301 vrs[vd].u[i] = rfin(vrs[vb].u[i]);
302 break;
303 case 9: /* vrfiz */
304 for (i = 0; i < 4; ++i)
305 vrs[vd].u[i] = rfiz(vrs[vb].u[i]);
306 break;
307 case 10: /* vrfip */
308 for (i = 0; i < 4; ++i) {
309 u32 x = vrs[vb].u[i];
310 x = (x & 0x80000000)? rfiz(x): rfii(x);
311 vrs[vd].u[i] = x;
312 }
313 break;
314 case 11: /* vrfim */
315 for (i = 0; i < 4; ++i) {
316 u32 x = vrs[vb].u[i];
317 x = (x & 0x80000000)? rfii(x): rfiz(x);
318 vrs[vd].u[i] = x;
319 }
320 break;
321 case 14: /* vctuxs */
322 for (i = 0; i < 4; ++i)
323 vrs[vd].u[i] = ctuxs(vrs[vb].u[i], va,
324 &current->thread.vscr.u[3]);
325 break;
326 case 15: /* vctsxs */
327 for (i = 0; i < 4; ++i)
328 vrs[vd].u[i] = ctsxs(vrs[vb].u[i], va,
329 &current->thread.vscr.u[3]);
330 break;
331 default:
332 return -EINVAL;
333 }
334 break;
335 case 46: /* vmaddfp */
336 vmaddfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]);
337 break;
338 case 47: /* vnmsubfp */
339 vnmsubfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]);
340 break;
341 default:
342 return -EINVAL;
343 }
344
345 return 0;
346}
diff --git a/arch/ppc64/kernel/vector.S b/arch/ppc64/kernel/vector.S
deleted file mode 100644
index b79d33e4001e..000000000000
--- a/arch/ppc64/kernel/vector.S
+++ /dev/null
@@ -1,172 +0,0 @@
1#include <asm/ppc_asm.h>
2#include <asm/processor.h>
3
4/*
5 * The routines below are in assembler so we can closely control the
6 * usage of floating-point registers. These routines must be called
7 * with preempt disabled.
8 */
9 .section ".toc","aw"
10fpzero:
11 .tc FD_0_0[TC],0
12fpone:
13 .tc FD_3ff00000_0[TC],0x3ff0000000000000 /* 1.0 */
14fphalf:
15 .tc FD_3fe00000_0[TC],0x3fe0000000000000 /* 0.5 */
16
17 .text
18/*
19 * Internal routine to enable floating point and set FPSCR to 0.
20 * Don't call it from C; it doesn't use the normal calling convention.
21 */
22fpenable:
23 mfmsr r10
24 ori r11,r10,MSR_FP
25 mtmsr r11
26 isync
27 stfd fr31,-8(r1)
28 stfd fr0,-16(r1)
29 stfd fr1,-24(r1)
30 mffs fr31
31 lfd fr1,fpzero@toc(r2)
32 mtfsf 0xff,fr1
33 blr
34
35fpdisable:
36 mtlr r12
37 mtfsf 0xff,fr31
38 lfd fr1,-24(r1)
39 lfd fr0,-16(r1)
40 lfd fr31,-8(r1)
41 mtmsr r10
42 isync
43 blr
44
45/*
46 * Vector add, floating point.
47 */
48_GLOBAL(vaddfp)
49 mflr r12
50 bl fpenable
51 li r0,4
52 mtctr r0
53 li r6,0
541: lfsx fr0,r4,r6
55 lfsx fr1,r5,r6
56 fadds fr0,fr0,fr1
57 stfsx fr0,r3,r6
58 addi r6,r6,4
59 bdnz 1b
60 b fpdisable
61
62/*
63 * Vector subtract, floating point.
64 */
65_GLOBAL(vsubfp)
66 mflr r12
67 bl fpenable
68 li r0,4
69 mtctr r0
70 li r6,0
711: lfsx fr0,r4,r6
72 lfsx fr1,r5,r6
73 fsubs fr0,fr0,fr1
74 stfsx fr0,r3,r6
75 addi r6,r6,4
76 bdnz 1b
77 b fpdisable
78
79/*
80 * Vector multiply and add, floating point.
81 */
82_GLOBAL(vmaddfp)
83 mflr r12
84 bl fpenable
85 stfd fr2,-32(r1)
86 li r0,4
87 mtctr r0
88 li r7,0
891: lfsx fr0,r4,r7
90 lfsx fr1,r5,r7
91 lfsx fr2,r6,r7
92 fmadds fr0,fr0,fr2,fr1
93 stfsx fr0,r3,r7
94 addi r7,r7,4
95 bdnz 1b
96 lfd fr2,-32(r1)
97 b fpdisable
98
99/*
100 * Vector negative multiply and subtract, floating point.
101 */
102_GLOBAL(vnmsubfp)
103 mflr r12
104 bl fpenable
105 stfd fr2,-32(r1)
106 li r0,4
107 mtctr r0
108 li r7,0
1091: lfsx fr0,r4,r7
110 lfsx fr1,r5,r7
111 lfsx fr2,r6,r7
112 fnmsubs fr0,fr0,fr2,fr1
113 stfsx fr0,r3,r7
114 addi r7,r7,4
115 bdnz 1b
116 lfd fr2,-32(r1)
117 b fpdisable
118
119/*
120 * Vector reciprocal estimate. We just compute 1.0/x.
121 * r3 -> destination, r4 -> source.
122 */
123_GLOBAL(vrefp)
124 mflr r12
125 bl fpenable
126 li r0,4
127 lfd fr1,fpone@toc(r2)
128 mtctr r0
129 li r6,0
1301: lfsx fr0,r4,r6
131 fdivs fr0,fr1,fr0
132 stfsx fr0,r3,r6
133 addi r6,r6,4
134 bdnz 1b
135 b fpdisable
136
137/*
138 * Vector reciprocal square-root estimate, floating point.
139 * We use the frsqrte instruction for the initial estimate followed
140 * by 2 iterations of Newton-Raphson to get sufficient accuracy.
141 * r3 -> destination, r4 -> source.
142 */
143_GLOBAL(vrsqrtefp)
144 mflr r12
145 bl fpenable
146 stfd fr2,-32(r1)
147 stfd fr3,-40(r1)
148 stfd fr4,-48(r1)
149 stfd fr5,-56(r1)
150 li r0,4
151 lfd fr4,fpone@toc(r2)
152 lfd fr5,fphalf@toc(r2)
153 mtctr r0
154 li r6,0
1551: lfsx fr0,r4,r6
156 frsqrte fr1,fr0 /* r = frsqrte(s) */
157 fmuls fr3,fr1,fr0 /* r * s */
158 fmuls fr2,fr1,fr5 /* r * 0.5 */
159 fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */
160 fmadds fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */
161 fmuls fr3,fr1,fr0 /* r * s */
162 fmuls fr2,fr1,fr5 /* r * 0.5 */
163 fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */
164 fmadds fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */
165 stfsx fr1,r3,r6
166 addi r6,r6,4
167 bdnz 1b
168 lfd fr5,-56(r1)
169 lfd fr4,-48(r1)
170 lfd fr3,-40(r1)
171 lfd fr2,-32(r1)
172 b fpdisable
diff --git a/arch/ppc64/kernel/viopath.c b/arch/ppc64/kernel/viopath.c
deleted file mode 100644
index 2a6c4f01c45e..000000000000
--- a/arch/ppc64/kernel/viopath.c
+++ /dev/null
@@ -1,673 +0,0 @@
1/* -*- linux-c -*-
2 * arch/ppc64/kernel/viopath.c
3 *
4 * iSeries Virtual I/O Message Path code
5 *
6 * Authors: Dave Boutcher <boutcher@us.ibm.com>
7 * Ryan Arnold <ryanarn@us.ibm.com>
8 * Colin Devilbiss <devilbis@us.ibm.com>
9 *
10 * (C) Copyright 2000-2003 IBM Corporation
11 *
12 * This code is used by the iSeries virtual disk, cd,
13 * tape, and console to communicate with OS/400 in another
14 * partition.
15 *
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License as
18 * published by the Free Software Foundation; either version 2 of the
19 * License, or (at your option) anyu later version.
20 *
21 * This program is distributed in the hope that it will be useful, but
22 * WITHOUT ANY WARRANTY; without even the implied warranty of
23 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24 * General Public License for more details.
25 *
26 * You should have received a copy of the GNU General Public License
27 * along with this program; if not, write to the Free Software Foundation,
28 * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
29 *
30 */
31#include <linux/module.h>
32#include <linux/kernel.h>
33#include <linux/errno.h>
34#include <linux/vmalloc.h>
35#include <linux/string.h>
36#include <linux/proc_fs.h>
37#include <linux/dma-mapping.h>
38#include <linux/wait.h>
39#include <linux/seq_file.h>
40#include <linux/smp_lock.h>
41#include <linux/interrupt.h>
42
43#include <asm/system.h>
44#include <asm/uaccess.h>
45#include <asm/iSeries/HvTypes.h>
46#include <asm/iSeries/ItExtVpdPanel.h>
47#include <asm/iSeries/HvLpEvent.h>
48#include <asm/iSeries/HvLpConfig.h>
49#include <asm/iSeries/mf.h>
50#include <asm/iSeries/vio.h>
51
52/* Status of the path to each other partition in the system.
53 * This is overkill, since we will only ever establish connections
54 * to our hosting partition and the primary partition on the system.
55 * But this allows for other support in the future.
56 */
57static struct viopathStatus {
58 int isOpen; /* Did we open the path? */
59 int isActive; /* Do we have a mon msg outstanding */
60 int users[VIO_MAX_SUBTYPES];
61 HvLpInstanceId mSourceInst;
62 HvLpInstanceId mTargetInst;
63 int numberAllocated;
64} viopathStatus[HVMAXARCHITECTEDLPS];
65
66static DEFINE_SPINLOCK(statuslock);
67
68/*
69 * For each kind of event we allocate a buffer that is
70 * guaranteed not to cross a page boundary
71 */
72static unsigned char event_buffer[VIO_MAX_SUBTYPES * 256] __page_aligned;
73static atomic_t event_buffer_available[VIO_MAX_SUBTYPES];
74static int event_buffer_initialised;
75
76static void handleMonitorEvent(struct HvLpEvent *event);
77
78/*
79 * We use this structure to handle asynchronous responses. The caller
80 * blocks on the semaphore and the handler posts the semaphore. However,
81 * if system_state is not SYSTEM_RUNNING, then wait_atomic is used ...
82 */
83struct alloc_parms {
84 struct semaphore sem;
85 int number;
86 atomic_t wait_atomic;
87 int used_wait_atomic;
88};
89
90/* Put a sequence number in each mon msg. The value is not
91 * important. Start at something other than 0 just for
92 * readability. wrapping this is ok.
93 */
94static u8 viomonseq = 22;
95
96/* Our hosting logical partition. We get this at startup
97 * time, and different modules access this variable directly.
98 */
99HvLpIndex viopath_hostLp = HvLpIndexInvalid;
100EXPORT_SYMBOL(viopath_hostLp);
101HvLpIndex viopath_ourLp = HvLpIndexInvalid;
102EXPORT_SYMBOL(viopath_ourLp);
103
104/* For each kind of incoming event we set a pointer to a
105 * routine to call.
106 */
107static vio_event_handler_t *vio_handler[VIO_MAX_SUBTYPES];
108
109#define VIOPATH_KERN_WARN KERN_WARNING "viopath: "
110#define VIOPATH_KERN_INFO KERN_INFO "viopath: "
111
112static int proc_viopath_show(struct seq_file *m, void *v)
113{
114 char *buf;
115 u16 vlanMap;
116 dma_addr_t handle;
117 HvLpEvent_Rc hvrc;
118 DECLARE_MUTEX_LOCKED(Semaphore);
119
120 buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
121 if (!buf)
122 return 0;
123 memset(buf, 0, PAGE_SIZE);
124
125 handle = dma_map_single(iSeries_vio_dev, buf, PAGE_SIZE,
126 DMA_FROM_DEVICE);
127
128 hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp,
129 HvLpEvent_Type_VirtualIo,
130 viomajorsubtype_config | vioconfigget,
131 HvLpEvent_AckInd_DoAck, HvLpEvent_AckType_ImmediateAck,
132 viopath_sourceinst(viopath_hostLp),
133 viopath_targetinst(viopath_hostLp),
134 (u64)(unsigned long)&Semaphore, VIOVERSION << 16,
135 ((u64)handle) << 32, PAGE_SIZE, 0, 0);
136
137 if (hvrc != HvLpEvent_Rc_Good)
138 printk(VIOPATH_KERN_WARN "hv error on op %d\n", (int)hvrc);
139
140 down(&Semaphore);
141
142 vlanMap = HvLpConfig_getVirtualLanIndexMap();
143
144 buf[PAGE_SIZE-1] = '\0';
145 seq_printf(m, "%s", buf);
146 seq_printf(m, "AVAILABLE_VETH=%x\n", vlanMap);
147 seq_printf(m, "SRLNBR=%c%c%c%c%c%c%c\n",
148 e2a(xItExtVpdPanel.mfgID[2]),
149 e2a(xItExtVpdPanel.mfgID[3]),
150 e2a(xItExtVpdPanel.systemSerial[1]),
151 e2a(xItExtVpdPanel.systemSerial[2]),
152 e2a(xItExtVpdPanel.systemSerial[3]),
153 e2a(xItExtVpdPanel.systemSerial[4]),
154 e2a(xItExtVpdPanel.systemSerial[5]));
155
156 dma_unmap_single(iSeries_vio_dev, handle, PAGE_SIZE, DMA_FROM_DEVICE);
157 kfree(buf);
158
159 return 0;
160}
161
162static int proc_viopath_open(struct inode *inode, struct file *file)
163{
164 return single_open(file, proc_viopath_show, NULL);
165}
166
167static struct file_operations proc_viopath_operations = {
168 .open = proc_viopath_open,
169 .read = seq_read,
170 .llseek = seq_lseek,
171 .release = single_release,
172};
173
174static int __init vio_proc_init(void)
175{
176 struct proc_dir_entry *e;
177
178 e = create_proc_entry("iSeries/config", 0, NULL);
179 if (e)
180 e->proc_fops = &proc_viopath_operations;
181
182 return 0;
183}
184__initcall(vio_proc_init);
185
186/* See if a given LP is active. Allow for invalid lps to be passed in
187 * and just return invalid
188 */
189int viopath_isactive(HvLpIndex lp)
190{
191 if (lp == HvLpIndexInvalid)
192 return 0;
193 if (lp < HVMAXARCHITECTEDLPS)
194 return viopathStatus[lp].isActive;
195 else
196 return 0;
197}
198EXPORT_SYMBOL(viopath_isactive);
199
200/*
201 * We cache the source and target instance ids for each
202 * partition.
203 */
204HvLpInstanceId viopath_sourceinst(HvLpIndex lp)
205{
206 return viopathStatus[lp].mSourceInst;
207}
208EXPORT_SYMBOL(viopath_sourceinst);
209
210HvLpInstanceId viopath_targetinst(HvLpIndex lp)
211{
212 return viopathStatus[lp].mTargetInst;
213}
214EXPORT_SYMBOL(viopath_targetinst);
215
216/*
217 * Send a monitor message. This is a message with the acknowledge
218 * bit on that the other side will NOT explicitly acknowledge. When
219 * the other side goes down, the hypervisor will acknowledge any
220 * outstanding messages....so we will know when the other side dies.
221 */
222static void sendMonMsg(HvLpIndex remoteLp)
223{
224 HvLpEvent_Rc hvrc;
225
226 viopathStatus[remoteLp].mSourceInst =
227 HvCallEvent_getSourceLpInstanceId(remoteLp,
228 HvLpEvent_Type_VirtualIo);
229 viopathStatus[remoteLp].mTargetInst =
230 HvCallEvent_getTargetLpInstanceId(remoteLp,
231 HvLpEvent_Type_VirtualIo);
232
233 /*
234 * Deliberately ignore the return code here. if we call this
235 * more than once, we don't care.
236 */
237 vio_setHandler(viomajorsubtype_monitor, handleMonitorEvent);
238
239 hvrc = HvCallEvent_signalLpEventFast(remoteLp, HvLpEvent_Type_VirtualIo,
240 viomajorsubtype_monitor, HvLpEvent_AckInd_DoAck,
241 HvLpEvent_AckType_DeferredAck,
242 viopathStatus[remoteLp].mSourceInst,
243 viopathStatus[remoteLp].mTargetInst,
244 viomonseq++, 0, 0, 0, 0, 0);
245
246 if (hvrc == HvLpEvent_Rc_Good)
247 viopathStatus[remoteLp].isActive = 1;
248 else {
249 printk(VIOPATH_KERN_WARN "could not connect to partition %d\n",
250 remoteLp);
251 viopathStatus[remoteLp].isActive = 0;
252 }
253}
254
255static void handleMonitorEvent(struct HvLpEvent *event)
256{
257 HvLpIndex remoteLp;
258 int i;
259
260 /*
261 * This handler is _also_ called as part of the loop
262 * at the end of this routine, so it must be able to
263 * ignore NULL events...
264 */
265 if (!event)
266 return;
267
268 /*
269 * First see if this is just a normal monitor message from the
270 * other partition
271 */
272 if (event->xFlags.xFunction == HvLpEvent_Function_Int) {
273 remoteLp = event->xSourceLp;
274 if (!viopathStatus[remoteLp].isActive)
275 sendMonMsg(remoteLp);
276 return;
277 }
278
279 /*
280 * This path is for an acknowledgement; the other partition
281 * died
282 */
283 remoteLp = event->xTargetLp;
284 if ((event->xSourceInstanceId != viopathStatus[remoteLp].mSourceInst) ||
285 (event->xTargetInstanceId != viopathStatus[remoteLp].mTargetInst)) {
286 printk(VIOPATH_KERN_WARN "ignoring ack....mismatched instances\n");
287 return;
288 }
289
290 printk(VIOPATH_KERN_WARN "partition %d ended\n", remoteLp);
291
292 viopathStatus[remoteLp].isActive = 0;
293
294 /*
295 * For each active handler, pass them a NULL
296 * message to indicate that the other partition
297 * died
298 */
299 for (i = 0; i < VIO_MAX_SUBTYPES; i++) {
300 if (vio_handler[i] != NULL)
301 (*vio_handler[i])(NULL);
302 }
303}
304
305int vio_setHandler(int subtype, vio_event_handler_t *beh)
306{
307 subtype = subtype >> VIOMAJOR_SUBTYPE_SHIFT;
308 if ((subtype < 0) || (subtype >= VIO_MAX_SUBTYPES))
309 return -EINVAL;
310 if (vio_handler[subtype] != NULL)
311 return -EBUSY;
312 vio_handler[subtype] = beh;
313 return 0;
314}
315EXPORT_SYMBOL(vio_setHandler);
316
317int vio_clearHandler(int subtype)
318{
319 subtype = subtype >> VIOMAJOR_SUBTYPE_SHIFT;
320 if ((subtype < 0) || (subtype >= VIO_MAX_SUBTYPES))
321 return -EINVAL;
322 if (vio_handler[subtype] == NULL)
323 return -EAGAIN;
324 vio_handler[subtype] = NULL;
325 return 0;
326}
327EXPORT_SYMBOL(vio_clearHandler);
328
329static void handleConfig(struct HvLpEvent *event)
330{
331 if (!event)
332 return;
333 if (event->xFlags.xFunction == HvLpEvent_Function_Int) {
334 printk(VIOPATH_KERN_WARN
335 "unexpected config request from partition %d",
336 event->xSourceLp);
337
338 if ((event->xFlags.xFunction == HvLpEvent_Function_Int) &&
339 (event->xFlags.xAckInd == HvLpEvent_AckInd_DoAck)) {
340 event->xRc = HvLpEvent_Rc_InvalidSubtype;
341 HvCallEvent_ackLpEvent(event);
342 }
343 return;
344 }
345
346 up((struct semaphore *)event->xCorrelationToken);
347}
348
349/*
350 * Initialization of the hosting partition
351 */
352void vio_set_hostlp(void)
353{
354 /*
355 * If this has already been set then we DON'T want to either change
356 * it or re-register the proc file system
357 */
358 if (viopath_hostLp != HvLpIndexInvalid)
359 return;
360
361 /*
362 * Figure out our hosting partition. This isn't allowed to change
363 * while we're active
364 */
365 viopath_ourLp = HvLpConfig_getLpIndex();
366 viopath_hostLp = HvLpConfig_getHostingLpIndex(viopath_ourLp);
367
368 if (viopath_hostLp != HvLpIndexInvalid)
369 vio_setHandler(viomajorsubtype_config, handleConfig);
370}
371EXPORT_SYMBOL(vio_set_hostlp);
372
373static void vio_handleEvent(struct HvLpEvent *event, struct pt_regs *regs)
374{
375 HvLpIndex remoteLp;
376 int subtype = (event->xSubtype & VIOMAJOR_SUBTYPE_MASK)
377 >> VIOMAJOR_SUBTYPE_SHIFT;
378
379 if (event->xFlags.xFunction == HvLpEvent_Function_Int) {
380 remoteLp = event->xSourceLp;
381 /*
382 * The isActive is checked because if the hosting partition
383 * went down and came back up it would not be active but it
384 * would have different source and target instances, in which
385 * case we'd want to reset them. This case really protects
386 * against an unauthorized active partition sending interrupts
387 * or acks to this linux partition.
388 */
389 if (viopathStatus[remoteLp].isActive
390 && (event->xSourceInstanceId !=
391 viopathStatus[remoteLp].mTargetInst)) {
392 printk(VIOPATH_KERN_WARN
393 "message from invalid partition. "
394 "int msg rcvd, source inst (%d) doesnt match (%d)\n",
395 viopathStatus[remoteLp].mTargetInst,
396 event->xSourceInstanceId);
397 return;
398 }
399
400 if (viopathStatus[remoteLp].isActive
401 && (event->xTargetInstanceId !=
402 viopathStatus[remoteLp].mSourceInst)) {
403 printk(VIOPATH_KERN_WARN
404 "message from invalid partition. "
405 "int msg rcvd, target inst (%d) doesnt match (%d)\n",
406 viopathStatus[remoteLp].mSourceInst,
407 event->xTargetInstanceId);
408 return;
409 }
410 } else {
411 remoteLp = event->xTargetLp;
412 if (event->xSourceInstanceId !=
413 viopathStatus[remoteLp].mSourceInst) {
414 printk(VIOPATH_KERN_WARN
415 "message from invalid partition. "
416 "ack msg rcvd, source inst (%d) doesnt match (%d)\n",
417 viopathStatus[remoteLp].mSourceInst,
418 event->xSourceInstanceId);
419 return;
420 }
421
422 if (event->xTargetInstanceId !=
423 viopathStatus[remoteLp].mTargetInst) {
424 printk(VIOPATH_KERN_WARN
425 "message from invalid partition. "
426 "viopath: ack msg rcvd, target inst (%d) doesnt match (%d)\n",
427 viopathStatus[remoteLp].mTargetInst,
428 event->xTargetInstanceId);
429 return;
430 }
431 }
432
433 if (vio_handler[subtype] == NULL) {
434 printk(VIOPATH_KERN_WARN
435 "unexpected virtual io event subtype %d from partition %d\n",
436 event->xSubtype, remoteLp);
437 /* No handler. Ack if necessary */
438 if ((event->xFlags.xFunction == HvLpEvent_Function_Int) &&
439 (event->xFlags.xAckInd == HvLpEvent_AckInd_DoAck)) {
440 event->xRc = HvLpEvent_Rc_InvalidSubtype;
441 HvCallEvent_ackLpEvent(event);
442 }
443 return;
444 }
445
446 /* This innocuous little line is where all the real work happens */
447 (*vio_handler[subtype])(event);
448}
449
450static void viopath_donealloc(void *parm, int number)
451{
452 struct alloc_parms *parmsp = parm;
453
454 parmsp->number = number;
455 if (parmsp->used_wait_atomic)
456 atomic_set(&parmsp->wait_atomic, 0);
457 else
458 up(&parmsp->sem);
459}
460
461static int allocateEvents(HvLpIndex remoteLp, int numEvents)
462{
463 struct alloc_parms parms;
464
465 if (system_state != SYSTEM_RUNNING) {
466 parms.used_wait_atomic = 1;
467 atomic_set(&parms.wait_atomic, 1);
468 } else {
469 parms.used_wait_atomic = 0;
470 init_MUTEX_LOCKED(&parms.sem);
471 }
472 mf_allocate_lp_events(remoteLp, HvLpEvent_Type_VirtualIo, 250, /* It would be nice to put a real number here! */
473 numEvents, &viopath_donealloc, &parms);
474 if (system_state != SYSTEM_RUNNING) {
475 while (atomic_read(&parms.wait_atomic))
476 mb();
477 } else
478 down(&parms.sem);
479 return parms.number;
480}
481
482int viopath_open(HvLpIndex remoteLp, int subtype, int numReq)
483{
484 int i;
485 unsigned long flags;
486 int tempNumAllocated;
487
488 if ((remoteLp >= HVMAXARCHITECTEDLPS) || (remoteLp == HvLpIndexInvalid))
489 return -EINVAL;
490
491 subtype = subtype >> VIOMAJOR_SUBTYPE_SHIFT;
492 if ((subtype < 0) || (subtype >= VIO_MAX_SUBTYPES))
493 return -EINVAL;
494
495 spin_lock_irqsave(&statuslock, flags);
496
497 if (!event_buffer_initialised) {
498 for (i = 0; i < VIO_MAX_SUBTYPES; i++)
499 atomic_set(&event_buffer_available[i], 1);
500 event_buffer_initialised = 1;
501 }
502
503 viopathStatus[remoteLp].users[subtype]++;
504
505 if (!viopathStatus[remoteLp].isOpen) {
506 viopathStatus[remoteLp].isOpen = 1;
507 HvCallEvent_openLpEventPath(remoteLp, HvLpEvent_Type_VirtualIo);
508
509 /*
510 * Don't hold the spinlock during an operation that
511 * can sleep.
512 */
513 spin_unlock_irqrestore(&statuslock, flags);
514 tempNumAllocated = allocateEvents(remoteLp, 1);
515 spin_lock_irqsave(&statuslock, flags);
516
517 viopathStatus[remoteLp].numberAllocated += tempNumAllocated;
518
519 if (viopathStatus[remoteLp].numberAllocated == 0) {
520 HvCallEvent_closeLpEventPath(remoteLp,
521 HvLpEvent_Type_VirtualIo);
522
523 spin_unlock_irqrestore(&statuslock, flags);
524 return -ENOMEM;
525 }
526
527 viopathStatus[remoteLp].mSourceInst =
528 HvCallEvent_getSourceLpInstanceId(remoteLp,
529 HvLpEvent_Type_VirtualIo);
530 viopathStatus[remoteLp].mTargetInst =
531 HvCallEvent_getTargetLpInstanceId(remoteLp,
532 HvLpEvent_Type_VirtualIo);
533 HvLpEvent_registerHandler(HvLpEvent_Type_VirtualIo,
534 &vio_handleEvent);
535 sendMonMsg(remoteLp);
536 printk(VIOPATH_KERN_INFO "opening connection to partition %d, "
537 "setting sinst %d, tinst %d\n",
538 remoteLp, viopathStatus[remoteLp].mSourceInst,
539 viopathStatus[remoteLp].mTargetInst);
540 }
541
542 spin_unlock_irqrestore(&statuslock, flags);
543 tempNumAllocated = allocateEvents(remoteLp, numReq);
544 spin_lock_irqsave(&statuslock, flags);
545 viopathStatus[remoteLp].numberAllocated += tempNumAllocated;
546 spin_unlock_irqrestore(&statuslock, flags);
547
548 return 0;
549}
550EXPORT_SYMBOL(viopath_open);
551
552int viopath_close(HvLpIndex remoteLp, int subtype, int numReq)
553{
554 unsigned long flags;
555 int i;
556 int numOpen;
557 struct alloc_parms parms;
558
559 if ((remoteLp >= HVMAXARCHITECTEDLPS) || (remoteLp == HvLpIndexInvalid))
560 return -EINVAL;
561
562 subtype = subtype >> VIOMAJOR_SUBTYPE_SHIFT;
563 if ((subtype < 0) || (subtype >= VIO_MAX_SUBTYPES))
564 return -EINVAL;
565
566 spin_lock_irqsave(&statuslock, flags);
567 /*
568 * If the viopath_close somehow gets called before a
569 * viopath_open it could decrement to -1 which is a non
570 * recoverable state so we'll prevent this from
571 * happening.
572 */
573 if (viopathStatus[remoteLp].users[subtype] > 0)
574 viopathStatus[remoteLp].users[subtype]--;
575
576 spin_unlock_irqrestore(&statuslock, flags);
577
578 parms.used_wait_atomic = 0;
579 init_MUTEX_LOCKED(&parms.sem);
580 mf_deallocate_lp_events(remoteLp, HvLpEvent_Type_VirtualIo,
581 numReq, &viopath_donealloc, &parms);
582 down(&parms.sem);
583
584 spin_lock_irqsave(&statuslock, flags);
585 for (i = 0, numOpen = 0; i < VIO_MAX_SUBTYPES; i++)
586 numOpen += viopathStatus[remoteLp].users[i];
587
588 if ((viopathStatus[remoteLp].isOpen) && (numOpen == 0)) {
589 printk(VIOPATH_KERN_INFO "closing connection to partition %d",
590 remoteLp);
591
592 HvCallEvent_closeLpEventPath(remoteLp,
593 HvLpEvent_Type_VirtualIo);
594 viopathStatus[remoteLp].isOpen = 0;
595 viopathStatus[remoteLp].isActive = 0;
596
597 for (i = 0; i < VIO_MAX_SUBTYPES; i++)
598 atomic_set(&event_buffer_available[i], 0);
599 event_buffer_initialised = 0;
600 }
601 spin_unlock_irqrestore(&statuslock, flags);
602 return 0;
603}
604EXPORT_SYMBOL(viopath_close);
605
606void *vio_get_event_buffer(int subtype)
607{
608 subtype = subtype >> VIOMAJOR_SUBTYPE_SHIFT;
609 if ((subtype < 0) || (subtype >= VIO_MAX_SUBTYPES))
610 return NULL;
611
612 if (atomic_dec_if_positive(&event_buffer_available[subtype]) == 0)
613 return &event_buffer[subtype * 256];
614 else
615 return NULL;
616}
617EXPORT_SYMBOL(vio_get_event_buffer);
618
619void vio_free_event_buffer(int subtype, void *buffer)
620{
621 subtype = subtype >> VIOMAJOR_SUBTYPE_SHIFT;
622 if ((subtype < 0) || (subtype >= VIO_MAX_SUBTYPES)) {
623 printk(VIOPATH_KERN_WARN
624 "unexpected subtype %d freeing event buffer\n", subtype);
625 return;
626 }
627
628 if (atomic_read(&event_buffer_available[subtype]) != 0) {
629 printk(VIOPATH_KERN_WARN
630 "freeing unallocated event buffer, subtype %d\n",
631 subtype);
632 return;
633 }
634
635 if (buffer != &event_buffer[subtype * 256]) {
636 printk(VIOPATH_KERN_WARN
637 "freeing invalid event buffer, subtype %d\n", subtype);
638 }
639
640 atomic_set(&event_buffer_available[subtype], 1);
641}
642EXPORT_SYMBOL(vio_free_event_buffer);
643
644static const struct vio_error_entry vio_no_error =
645 { 0, 0, "Non-VIO Error" };
646static const struct vio_error_entry vio_unknown_error =
647 { 0, EIO, "Unknown Error" };
648
649static const struct vio_error_entry vio_default_errors[] = {
650 {0x0001, EIO, "No Connection"},
651 {0x0002, EIO, "No Receiver"},
652 {0x0003, EIO, "No Buffer Available"},
653 {0x0004, EBADRQC, "Invalid Message Type"},
654 {0x0000, 0, NULL},
655};
656
657const struct vio_error_entry *vio_lookup_rc(
658 const struct vio_error_entry *local_table, u16 rc)
659{
660 const struct vio_error_entry *cur;
661
662 if (!rc)
663 return &vio_no_error;
664 if (local_table)
665 for (cur = local_table; cur->rc; ++cur)
666 if (cur->rc == rc)
667 return cur;
668 for (cur = vio_default_errors; cur->rc; ++cur)
669 if (cur->rc == rc)
670 return cur;
671 return &vio_unknown_error;
672}
673EXPORT_SYMBOL(vio_lookup_rc);
diff --git a/arch/ppc64/kernel/vmlinux.lds.S b/arch/ppc64/kernel/vmlinux.lds.S
index 0306510bc4ff..022f220e772f 100644
--- a/arch/ppc64/kernel/vmlinux.lds.S
+++ b/arch/ppc64/kernel/vmlinux.lds.S
@@ -1,3 +1,4 @@
1#include <asm/page.h>
1#include <asm-generic/vmlinux.lds.h> 2#include <asm-generic/vmlinux.lds.h>
2 3
3OUTPUT_ARCH(powerpc:common64) 4OUTPUT_ARCH(powerpc:common64)
@@ -17,7 +18,7 @@ SECTIONS
17 LOCK_TEXT 18 LOCK_TEXT
18 KPROBES_TEXT 19 KPROBES_TEXT
19 *(.fixup) 20 *(.fixup)
20 . = ALIGN(4096); 21 . = ALIGN(PAGE_SIZE);
21 _etext = .; 22 _etext = .;
22 } 23 }
23 24
@@ -43,7 +44,7 @@ SECTIONS
43 44
44 45
45 /* will be freed after init */ 46 /* will be freed after init */
46 . = ALIGN(4096); 47 . = ALIGN(PAGE_SIZE);
47 __init_begin = .; 48 __init_begin = .;
48 49
49 .init.text : { 50 .init.text : {
@@ -83,7 +84,7 @@ SECTIONS
83 84
84 SECURITY_INIT 85 SECURITY_INIT
85 86
86 . = ALIGN(4096); 87 . = ALIGN(PAGE_SIZE);
87 .init.ramfs : { 88 .init.ramfs : {
88 __initramfs_start = .; 89 __initramfs_start = .;
89 *(.init.ramfs) 90 *(.init.ramfs)
@@ -96,18 +97,22 @@ SECTIONS
96 __per_cpu_end = .; 97 __per_cpu_end = .;
97 } 98 }
98 99
100 . = ALIGN(PAGE_SIZE);
99 . = ALIGN(16384); 101 . = ALIGN(16384);
100 __init_end = .; 102 __init_end = .;
101 /* freed after init ends here */ 103 /* freed after init ends here */
102 104
103 105
104 /* Read/write sections */ 106 /* Read/write sections */
107 . = ALIGN(PAGE_SIZE);
105 . = ALIGN(16384); 108 . = ALIGN(16384);
109 _sdata = .;
106 /* The initial task and kernel stack */ 110 /* The initial task and kernel stack */
107 .data.init_task : { 111 .data.init_task : {
108 *(.data.init_task) 112 *(.data.init_task)
109 } 113 }
110 114
115 . = ALIGN(PAGE_SIZE);
111 .data.page_aligned : { 116 .data.page_aligned : {
112 *(.data.page_aligned) 117 *(.data.page_aligned)
113 } 118 }
@@ -129,18 +134,18 @@ SECTIONS
129 __toc_start = .; 134 __toc_start = .;
130 *(.got) 135 *(.got)
131 *(.toc) 136 *(.toc)
132 . = ALIGN(4096); 137 . = ALIGN(PAGE_SIZE);
133 _edata = .; 138 _edata = .;
134 } 139 }
135 140
136 141
137 . = ALIGN(4096); 142 . = ALIGN(PAGE_SIZE);
138 .bss : { 143 .bss : {
139 __bss_start = .; 144 __bss_start = .;
140 *(.bss) 145 *(.bss)
141 __bss_stop = .; 146 __bss_stop = .;
142 } 147 }
143 148
144 . = ALIGN(4096); 149 . = ALIGN(PAGE_SIZE);
145 _end = . ; 150 _end = . ;
146} 151}
diff --git a/arch/ppc64/mm/Makefile b/arch/ppc64/mm/Makefile
deleted file mode 100644
index 3695d00d347f..000000000000
--- a/arch/ppc64/mm/Makefile
+++ /dev/null
@@ -1,11 +0,0 @@
1#
2# Makefile for the linux ppc-specific parts of the memory manager.
3#
4
5EXTRA_CFLAGS += -mno-minimal-toc
6
7obj-y := fault.o init.o imalloc.o hash_utils.o hash_low.o tlb.o \
8 slb_low.o slb.o stab.o mmap.o
9obj-$(CONFIG_NEED_MULTIPLE_NODES) += numa.o
10obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
11obj-$(CONFIG_PPC_MULTIPLATFORM) += hash_native.o
diff --git a/arch/ppc64/mm/fault.c b/arch/ppc64/mm/fault.c
deleted file mode 100644
index be3f25cf3e9f..000000000000
--- a/arch/ppc64/mm/fault.c
+++ /dev/null
@@ -1,333 +0,0 @@
1/*
2 * arch/ppc/mm/fault.c
3 *
4 * PowerPC version
5 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
6 *
7 * Derived from "arch/i386/mm/fault.c"
8 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
9 *
10 * Modified by Cort Dougan and Paul Mackerras.
11 *
12 * Modified for PPC64 by Dave Engebretsen (engebret@ibm.com)
13 *
14 * This program is free software; you can redistribute it and/or
15 * modify it under the terms of the GNU General Public License
16 * as published by the Free Software Foundation; either version
17 * 2 of the License, or (at your option) any later version.
18 */
19
20#include <linux/config.h>
21#include <linux/signal.h>
22#include <linux/sched.h>
23#include <linux/kernel.h>
24#include <linux/errno.h>
25#include <linux/string.h>
26#include <linux/types.h>
27#include <linux/mman.h>
28#include <linux/mm.h>
29#include <linux/interrupt.h>
30#include <linux/smp_lock.h>
31#include <linux/module.h>
32#include <linux/kprobes.h>
33
34#include <asm/page.h>
35#include <asm/pgtable.h>
36#include <asm/mmu.h>
37#include <asm/mmu_context.h>
38#include <asm/system.h>
39#include <asm/uaccess.h>
40#include <asm/kdebug.h>
41#include <asm/siginfo.h>
42
43/*
44 * Check whether the instruction at regs->nip is a store using
45 * an update addressing form which will update r1.
46 */
47static int store_updates_sp(struct pt_regs *regs)
48{
49 unsigned int inst;
50
51 if (get_user(inst, (unsigned int __user *)regs->nip))
52 return 0;
53 /* check for 1 in the rA field */
54 if (((inst >> 16) & 0x1f) != 1)
55 return 0;
56 /* check major opcode */
57 switch (inst >> 26) {
58 case 37: /* stwu */
59 case 39: /* stbu */
60 case 45: /* sthu */
61 case 53: /* stfsu */
62 case 55: /* stfdu */
63 return 1;
64 case 62: /* std or stdu */
65 return (inst & 3) == 1;
66 case 31:
67 /* check minor opcode */
68 switch ((inst >> 1) & 0x3ff) {
69 case 181: /* stdux */
70 case 183: /* stwux */
71 case 247: /* stbux */
72 case 439: /* sthux */
73 case 695: /* stfsux */
74 case 759: /* stfdux */
75 return 1;
76 }
77 }
78 return 0;
79}
80
81static void do_dabr(struct pt_regs *regs, unsigned long error_code)
82{
83 siginfo_t info;
84
85 if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code,
86 11, SIGSEGV) == NOTIFY_STOP)
87 return;
88
89 if (debugger_dabr_match(regs))
90 return;
91
92 /* Clear the DABR */
93 set_dabr(0);
94
95 /* Deliver the signal to userspace */
96 info.si_signo = SIGTRAP;
97 info.si_errno = 0;
98 info.si_code = TRAP_HWBKPT;
99 info.si_addr = (void __user *)regs->nip;
100 force_sig_info(SIGTRAP, &info, current);
101}
102
103/*
104 * The error_code parameter is
105 * - DSISR for a non-SLB data access fault,
106 * - SRR1 & 0x08000000 for a non-SLB instruction access fault
107 * - 0 any SLB fault.
108 * The return value is 0 if the fault was handled, or the signal
109 * number if this is a kernel fault that can't be handled here.
110 */
111int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
112 unsigned long error_code)
113{
114 struct vm_area_struct * vma;
115 struct mm_struct *mm = current->mm;
116 siginfo_t info;
117 unsigned long code = SEGV_MAPERR;
118 unsigned long is_write = error_code & DSISR_ISSTORE;
119 unsigned long trap = TRAP(regs);
120 unsigned long is_exec = trap == 0x400;
121
122 BUG_ON((trap == 0x380) || (trap == 0x480));
123
124 if (notify_die(DIE_PAGE_FAULT, "page_fault", regs, error_code,
125 11, SIGSEGV) == NOTIFY_STOP)
126 return 0;
127
128 if (trap == 0x300) {
129 if (debugger_fault_handler(regs))
130 return 0;
131 }
132
133 /* On a kernel SLB miss we can only check for a valid exception entry */
134 if (!user_mode(regs) && (address >= TASK_SIZE))
135 return SIGSEGV;
136
137 if (error_code & DSISR_DABRMATCH) {
138 do_dabr(regs, error_code);
139 return 0;
140 }
141
142 if (in_atomic() || mm == NULL) {
143 if (!user_mode(regs))
144 return SIGSEGV;
145 /* in_atomic() in user mode is really bad,
146 as is current->mm == NULL. */
147 printk(KERN_EMERG "Page fault in user mode with"
148 "in_atomic() = %d mm = %p\n", in_atomic(), mm);
149 printk(KERN_EMERG "NIP = %lx MSR = %lx\n",
150 regs->nip, regs->msr);
151 die("Weird page fault", regs, SIGSEGV);
152 }
153
154 /* When running in the kernel we expect faults to occur only to
155 * addresses in user space. All other faults represent errors in the
156 * kernel and should generate an OOPS. Unfortunatly, in the case of an
157 * erroneous fault occuring in a code path which already holds mmap_sem
158 * we will deadlock attempting to validate the fault against the
159 * address space. Luckily the kernel only validly references user
160 * space from well defined areas of code, which are listed in the
161 * exceptions table.
162 *
163 * As the vast majority of faults will be valid we will only perform
164 * the source reference check when there is a possibilty of a deadlock.
165 * Attempt to lock the address space, if we cannot we then validate the
166 * source. If this is invalid we can skip the address space check,
167 * thus avoiding the deadlock.
168 */
169 if (!down_read_trylock(&mm->mmap_sem)) {
170 if (!user_mode(regs) && !search_exception_tables(regs->nip))
171 goto bad_area_nosemaphore;
172
173 down_read(&mm->mmap_sem);
174 }
175
176 vma = find_vma(mm, address);
177 if (!vma)
178 goto bad_area;
179
180 if (vma->vm_start <= address) {
181 goto good_area;
182 }
183 if (!(vma->vm_flags & VM_GROWSDOWN))
184 goto bad_area;
185
186 /*
187 * N.B. The POWER/Open ABI allows programs to access up to
188 * 288 bytes below the stack pointer.
189 * The kernel signal delivery code writes up to about 1.5kB
190 * below the stack pointer (r1) before decrementing it.
191 * The exec code can write slightly over 640kB to the stack
192 * before setting the user r1. Thus we allow the stack to
193 * expand to 1MB without further checks.
194 */
195 if (address + 0x100000 < vma->vm_end) {
196 /* get user regs even if this fault is in kernel mode */
197 struct pt_regs *uregs = current->thread.regs;
198 if (uregs == NULL)
199 goto bad_area;
200
201 /*
202 * A user-mode access to an address a long way below
203 * the stack pointer is only valid if the instruction
204 * is one which would update the stack pointer to the
205 * address accessed if the instruction completed,
206 * i.e. either stwu rs,n(r1) or stwux rs,r1,rb
207 * (or the byte, halfword, float or double forms).
208 *
209 * If we don't check this then any write to the area
210 * between the last mapped region and the stack will
211 * expand the stack rather than segfaulting.
212 */
213 if (address + 2048 < uregs->gpr[1]
214 && (!user_mode(regs) || !store_updates_sp(regs)))
215 goto bad_area;
216 }
217
218 if (expand_stack(vma, address))
219 goto bad_area;
220
221good_area:
222 code = SEGV_ACCERR;
223
224 if (is_exec) {
225 /* protection fault */
226 if (error_code & DSISR_PROTFAULT)
227 goto bad_area;
228 if (!(vma->vm_flags & VM_EXEC))
229 goto bad_area;
230 /* a write */
231 } else if (is_write) {
232 if (!(vma->vm_flags & VM_WRITE))
233 goto bad_area;
234 /* a read */
235 } else {
236 if (!(vma->vm_flags & VM_READ))
237 goto bad_area;
238 }
239
240 survive:
241 /*
242 * If for any reason at all we couldn't handle the fault,
243 * make sure we exit gracefully rather than endlessly redo
244 * the fault.
245 */
246 switch (handle_mm_fault(mm, vma, address, is_write)) {
247
248 case VM_FAULT_MINOR:
249 current->min_flt++;
250 break;
251 case VM_FAULT_MAJOR:
252 current->maj_flt++;
253 break;
254 case VM_FAULT_SIGBUS:
255 goto do_sigbus;
256 case VM_FAULT_OOM:
257 goto out_of_memory;
258 default:
259 BUG();
260 }
261
262 up_read(&mm->mmap_sem);
263 return 0;
264
265bad_area:
266 up_read(&mm->mmap_sem);
267
268bad_area_nosemaphore:
269 /* User mode accesses cause a SIGSEGV */
270 if (user_mode(regs)) {
271 info.si_signo = SIGSEGV;
272 info.si_errno = 0;
273 info.si_code = code;
274 info.si_addr = (void __user *) address;
275 force_sig_info(SIGSEGV, &info, current);
276 return 0;
277 }
278
279 if (trap == 0x400 && (error_code & DSISR_PROTFAULT)
280 && printk_ratelimit())
281 printk(KERN_CRIT "kernel tried to execute NX-protected"
282 " page (%lx) - exploit attempt? (uid: %d)\n",
283 address, current->uid);
284
285 return SIGSEGV;
286
287/*
288 * We ran out of memory, or some other thing happened to us that made
289 * us unable to handle the page fault gracefully.
290 */
291out_of_memory:
292 up_read(&mm->mmap_sem);
293 if (current->pid == 1) {
294 yield();
295 down_read(&mm->mmap_sem);
296 goto survive;
297 }
298 printk("VM: killing process %s\n", current->comm);
299 if (user_mode(regs))
300 do_exit(SIGKILL);
301 return SIGKILL;
302
303do_sigbus:
304 up_read(&mm->mmap_sem);
305 if (user_mode(regs)) {
306 info.si_signo = SIGBUS;
307 info.si_errno = 0;
308 info.si_code = BUS_ADRERR;
309 info.si_addr = (void __user *)address;
310 force_sig_info(SIGBUS, &info, current);
311 return 0;
312 }
313 return SIGBUS;
314}
315
316/*
317 * bad_page_fault is called when we have a bad access from the kernel.
318 * It is called from do_page_fault above and from some of the procedures
319 * in traps.c.
320 */
321void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
322{
323 const struct exception_table_entry *entry;
324
325 /* Are we prepared to handle this fault? */
326 if ((entry = search_exception_tables(regs->nip)) != NULL) {
327 regs->nip = entry->fixup;
328 return;
329 }
330
331 /* kernel has accessed a bad area */
332 die("Kernel access of bad area", regs, sig);
333}
diff --git a/arch/ppc64/mm/hash_low.S b/arch/ppc64/mm/hash_low.S
deleted file mode 100644
index ee5a5d36bfa8..000000000000
--- a/arch/ppc64/mm/hash_low.S
+++ /dev/null
@@ -1,288 +0,0 @@
1/*
2 * ppc64 MMU hashtable management routines
3 *
4 * (c) Copyright IBM Corp. 2003
5 *
6 * Maintained by: Benjamin Herrenschmidt
7 * <benh@kernel.crashing.org>
8 *
9 * This file is covered by the GNU Public Licence v2 as
10 * described in the kernel's COPYING file.
11 */
12
13#include <asm/processor.h>
14#include <asm/pgtable.h>
15#include <asm/mmu.h>
16#include <asm/page.h>
17#include <asm/types.h>
18#include <asm/ppc_asm.h>
19#include <asm/asm-offsets.h>
20#include <asm/cputable.h>
21
22 .text
23
24/*
25 * Stackframe:
26 *
27 * +-> Back chain (SP + 256)
28 * | General register save area (SP + 112)
29 * | Parameter save area (SP + 48)
30 * | TOC save area (SP + 40)
31 * | link editor doubleword (SP + 32)
32 * | compiler doubleword (SP + 24)
33 * | LR save area (SP + 16)
34 * | CR save area (SP + 8)
35 * SP ---> +-- Back chain (SP + 0)
36 */
37#define STACKFRAMESIZE 256
38
39/* Save parameters offsets */
40#define STK_PARM(i) (STACKFRAMESIZE + 48 + ((i)-3)*8)
41
42/* Save non-volatile offsets */
43#define STK_REG(i) (112 + ((i)-14)*8)
44
45/*
46 * _hash_page(unsigned long ea, unsigned long access, unsigned long vsid,
47 * pte_t *ptep, unsigned long trap, int local)
48 *
49 * Adds a page to the hash table. This is the non-LPAR version for now
50 */
51
52_GLOBAL(__hash_page)
53 mflr r0
54 std r0,16(r1)
55 stdu r1,-STACKFRAMESIZE(r1)
56 /* Save all params that we need after a function call */
57 std r6,STK_PARM(r6)(r1)
58 std r8,STK_PARM(r8)(r1)
59
60 /* Add _PAGE_PRESENT to access */
61 ori r4,r4,_PAGE_PRESENT
62
63 /* Save non-volatile registers.
64 * r31 will hold "old PTE"
65 * r30 is "new PTE"
66 * r29 is "va"
67 * r28 is a hash value
68 * r27 is hashtab mask (maybe dynamic patched instead ?)
69 */
70 std r27,STK_REG(r27)(r1)
71 std r28,STK_REG(r28)(r1)
72 std r29,STK_REG(r29)(r1)
73 std r30,STK_REG(r30)(r1)
74 std r31,STK_REG(r31)(r1)
75
76 /* Step 1:
77 *
78 * Check permissions, atomically mark the linux PTE busy
79 * and hashed.
80 */
811:
82 ldarx r31,0,r6
83 /* Check access rights (access & ~(pte_val(*ptep))) */
84 andc. r0,r4,r31
85 bne- htab_wrong_access
86 /* Check if PTE is busy */
87 andi. r0,r31,_PAGE_BUSY
88 /* If so, just bail out and refault if needed. Someone else
89 * is changing this PTE anyway and might hash it.
90 */
91 bne- bail_ok
92 /* Prepare new PTE value (turn access RW into DIRTY, then
93 * add BUSY,HASHPTE and ACCESSED)
94 */
95 rlwinm r30,r4,32-9+7,31-7,31-7 /* _PAGE_RW -> _PAGE_DIRTY */
96 or r30,r30,r31
97 ori r30,r30,_PAGE_BUSY | _PAGE_ACCESSED | _PAGE_HASHPTE
98 /* Write the linux PTE atomically (setting busy) */
99 stdcx. r30,0,r6
100 bne- 1b
101 isync
102
103 /* Step 2:
104 *
105 * Insert/Update the HPTE in the hash table. At this point,
106 * r4 (access) is re-useable, we use it for the new HPTE flags
107 */
108
109 /* Calc va and put it in r29 */
110 rldicr r29,r5,28,63-28
111 rldicl r3,r3,0,36
112 or r29,r3,r29
113
114 /* Calculate hash value for primary slot and store it in r28 */
115 rldicl r5,r5,0,25 /* vsid & 0x0000007fffffffff */
116 rldicl r0,r3,64-12,48 /* (ea >> 12) & 0xffff */
117 xor r28,r5,r0
118
119 /* Convert linux PTE bits into HW equivalents */
120 andi. r3,r30,0x1fe /* Get basic set of flags */
121 xori r3,r3,HW_NO_EXEC /* _PAGE_EXEC -> NOEXEC */
122 rlwinm r0,r30,32-9+1,30,30 /* _PAGE_RW -> _PAGE_USER (r0) */
123 rlwinm r4,r30,32-7+1,30,30 /* _PAGE_DIRTY -> _PAGE_USER (r4) */
124 and r0,r0,r4 /* _PAGE_RW & _PAGE_DIRTY -> r0 bit 30 */
125 andc r0,r30,r0 /* r0 = pte & ~r0 */
126 rlwimi r3,r0,32-1,31,31 /* Insert result into PP lsb */
127
128 /* We eventually do the icache sync here (maybe inline that
129 * code rather than call a C function...)
130 */
131BEGIN_FTR_SECTION
132 mr r4,r30
133 mr r5,r7
134 bl .hash_page_do_lazy_icache
135END_FTR_SECTION(CPU_FTR_NOEXECUTE|CPU_FTR_COHERENT_ICACHE, CPU_FTR_NOEXECUTE)
136
137 /* At this point, r3 contains new PP bits, save them in
138 * place of "access" in the param area (sic)
139 */
140 std r3,STK_PARM(r4)(r1)
141
142 /* Get htab_hash_mask */
143 ld r4,htab_hash_mask@got(2)
144 ld r27,0(r4) /* htab_hash_mask -> r27 */
145
146 /* Check if we may already be in the hashtable, in this case, we
147 * go to out-of-line code to try to modify the HPTE
148 */
149 andi. r0,r31,_PAGE_HASHPTE
150 bne htab_modify_pte
151
152htab_insert_pte:
153 /* Clear hpte bits in new pte (we also clear BUSY btw) and
154 * add _PAGE_HASHPTE
155 */
156 lis r0,_PAGE_HPTEFLAGS@h
157 ori r0,r0,_PAGE_HPTEFLAGS@l
158 andc r30,r30,r0
159 ori r30,r30,_PAGE_HASHPTE
160
161 /* page number in r5 */
162 rldicl r5,r31,64-PTE_SHIFT,PTE_SHIFT
163
164 /* Calculate primary group hash */
165 and r0,r28,r27
166 rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */
167
168 /* Call ppc_md.hpte_insert */
169 ld r7,STK_PARM(r4)(r1) /* Retreive new pp bits */
170 mr r4,r29 /* Retreive va */
171 li r6,0 /* no vflags */
172_GLOBAL(htab_call_hpte_insert1)
173 bl . /* Will be patched by htab_finish_init() */
174 cmpdi 0,r3,0
175 bge htab_pte_insert_ok /* Insertion successful */
176 cmpdi 0,r3,-2 /* Critical failure */
177 beq- htab_pte_insert_failure
178
179 /* Now try secondary slot */
180
181 /* page number in r5 */
182 rldicl r5,r31,64-PTE_SHIFT,PTE_SHIFT
183
184 /* Calculate secondary group hash */
185 andc r0,r27,r28
186 rldicr r3,r0,3,63-3 /* r0 = (~hash & mask) << 3 */
187
188 /* Call ppc_md.hpte_insert */
189 ld r7,STK_PARM(r4)(r1) /* Retreive new pp bits */
190 mr r4,r29 /* Retreive va */
191 li r6,HPTE_V_SECONDARY@l /* secondary slot */
192_GLOBAL(htab_call_hpte_insert2)
193 bl . /* Will be patched by htab_finish_init() */
194 cmpdi 0,r3,0
195 bge+ htab_pte_insert_ok /* Insertion successful */
196 cmpdi 0,r3,-2 /* Critical failure */
197 beq- htab_pte_insert_failure
198
199 /* Both are full, we need to evict something */
200 mftb r0
201 /* Pick a random group based on TB */
202 andi. r0,r0,1
203 mr r5,r28
204 bne 2f
205 not r5,r5
2062: and r0,r5,r27
207 rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */
208 /* Call ppc_md.hpte_remove */
209_GLOBAL(htab_call_hpte_remove)
210 bl . /* Will be patched by htab_finish_init() */
211
212 /* Try all again */
213 b htab_insert_pte
214
215bail_ok:
216 li r3,0
217 b bail
218
219htab_pte_insert_ok:
220 /* Insert slot number & secondary bit in PTE */
221 rldimi r30,r3,12,63-15
222
223 /* Write out the PTE with a normal write
224 * (maybe add eieio may be good still ?)
225 */
226htab_write_out_pte:
227 ld r6,STK_PARM(r6)(r1)
228 std r30,0(r6)
229 li r3, 0
230bail:
231 ld r27,STK_REG(r27)(r1)
232 ld r28,STK_REG(r28)(r1)
233 ld r29,STK_REG(r29)(r1)
234 ld r30,STK_REG(r30)(r1)
235 ld r31,STK_REG(r31)(r1)
236 addi r1,r1,STACKFRAMESIZE
237 ld r0,16(r1)
238 mtlr r0
239 blr
240
241htab_modify_pte:
242 /* Keep PP bits in r4 and slot idx from the PTE around in r3 */
243 mr r4,r3
244 rlwinm r3,r31,32-12,29,31
245
246 /* Secondary group ? if yes, get a inverted hash value */
247 mr r5,r28
248 andi. r0,r31,_PAGE_SECONDARY
249 beq 1f
250 not r5,r5
2511:
252 /* Calculate proper slot value for ppc_md.hpte_updatepp */
253 and r0,r5,r27
254 rldicr r0,r0,3,63-3 /* r0 = (hash & mask) << 3 */
255 add r3,r0,r3 /* add slot idx */
256
257 /* Call ppc_md.hpte_updatepp */
258 mr r5,r29 /* va */
259 li r6,0 /* large is 0 */
260 ld r7,STK_PARM(r8)(r1) /* get "local" param */
261_GLOBAL(htab_call_hpte_updatepp)
262 bl . /* Will be patched by htab_finish_init() */
263
264 /* if we failed because typically the HPTE wasn't really here
265 * we try an insertion.
266 */
267 cmpdi 0,r3,-1
268 beq- htab_insert_pte
269
270 /* Clear the BUSY bit and Write out the PTE */
271 li r0,_PAGE_BUSY
272 andc r30,r30,r0
273 b htab_write_out_pte
274
275htab_wrong_access:
276 /* Bail out clearing reservation */
277 stdcx. r31,0,r6
278 li r3,1
279 b bail
280
281htab_pte_insert_failure:
282 /* Bail out restoring old PTE */
283 ld r6,STK_PARM(r6)(r1)
284 std r31,0(r6)
285 li r3,-1
286 b bail
287
288
diff --git a/arch/ppc64/mm/hash_native.c b/arch/ppc64/mm/hash_native.c
deleted file mode 100644
index bfd385b7713c..000000000000
--- a/arch/ppc64/mm/hash_native.c
+++ /dev/null
@@ -1,453 +0,0 @@
1/*
2 * native hashtable management.
3 *
4 * SMP scalability work:
5 * Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 */
12#include <linux/spinlock.h>
13#include <linux/bitops.h>
14#include <linux/threads.h>
15#include <linux/smp.h>
16
17#include <asm/abs_addr.h>
18#include <asm/machdep.h>
19#include <asm/mmu.h>
20#include <asm/mmu_context.h>
21#include <asm/pgtable.h>
22#include <asm/tlbflush.h>
23#include <asm/tlb.h>
24#include <asm/cputable.h>
25
26#define HPTE_LOCK_BIT 3
27
28static DEFINE_SPINLOCK(native_tlbie_lock);
29
30static inline void native_lock_hpte(hpte_t *hptep)
31{
32 unsigned long *word = &hptep->v;
33
34 while (1) {
35 if (!test_and_set_bit(HPTE_LOCK_BIT, word))
36 break;
37 while(test_bit(HPTE_LOCK_BIT, word))
38 cpu_relax();
39 }
40}
41
42static inline void native_unlock_hpte(hpte_t *hptep)
43{
44 unsigned long *word = &hptep->v;
45
46 asm volatile("lwsync":::"memory");
47 clear_bit(HPTE_LOCK_BIT, word);
48}
49
50long native_hpte_insert(unsigned long hpte_group, unsigned long va,
51 unsigned long prpn, unsigned long vflags,
52 unsigned long rflags)
53{
54 hpte_t *hptep = htab_address + hpte_group;
55 unsigned long hpte_v, hpte_r;
56 int i;
57
58 for (i = 0; i < HPTES_PER_GROUP; i++) {
59 if (! (hptep->v & HPTE_V_VALID)) {
60 /* retry with lock held */
61 native_lock_hpte(hptep);
62 if (! (hptep->v & HPTE_V_VALID))
63 break;
64 native_unlock_hpte(hptep);
65 }
66
67 hptep++;
68 }
69
70 if (i == HPTES_PER_GROUP)
71 return -1;
72
73 hpte_v = (va >> 23) << HPTE_V_AVPN_SHIFT | vflags | HPTE_V_VALID;
74 if (vflags & HPTE_V_LARGE)
75 va &= ~(1UL << HPTE_V_AVPN_SHIFT);
76 hpte_r = (prpn << HPTE_R_RPN_SHIFT) | rflags;
77
78 hptep->r = hpte_r;
79 /* Guarantee the second dword is visible before the valid bit */
80 __asm__ __volatile__ ("eieio" : : : "memory");
81 /*
82 * Now set the first dword including the valid bit
83 * NOTE: this also unlocks the hpte
84 */
85 hptep->v = hpte_v;
86
87 __asm__ __volatile__ ("ptesync" : : : "memory");
88
89 return i | (!!(vflags & HPTE_V_SECONDARY) << 3);
90}
91
92static long native_hpte_remove(unsigned long hpte_group)
93{
94 hpte_t *hptep;
95 int i;
96 int slot_offset;
97 unsigned long hpte_v;
98
99 /* pick a random entry to start at */
100 slot_offset = mftb() & 0x7;
101
102 for (i = 0; i < HPTES_PER_GROUP; i++) {
103 hptep = htab_address + hpte_group + slot_offset;
104 hpte_v = hptep->v;
105
106 if ((hpte_v & HPTE_V_VALID) && !(hpte_v & HPTE_V_BOLTED)) {
107 /* retry with lock held */
108 native_lock_hpte(hptep);
109 hpte_v = hptep->v;
110 if ((hpte_v & HPTE_V_VALID)
111 && !(hpte_v & HPTE_V_BOLTED))
112 break;
113 native_unlock_hpte(hptep);
114 }
115
116 slot_offset++;
117 slot_offset &= 0x7;
118 }
119
120 if (i == HPTES_PER_GROUP)
121 return -1;
122
123 /* Invalidate the hpte. NOTE: this also unlocks it */
124 hptep->v = 0;
125
126 return i;
127}
128
129static inline void set_pp_bit(unsigned long pp, hpte_t *addr)
130{
131 unsigned long old;
132 unsigned long *p = &addr->r;
133
134 __asm__ __volatile__(
135 "1: ldarx %0,0,%3\n\
136 rldimi %0,%2,0,61\n\
137 stdcx. %0,0,%3\n\
138 bne 1b"
139 : "=&r" (old), "=m" (*p)
140 : "r" (pp), "r" (p), "m" (*p)
141 : "cc");
142}
143
144/*
145 * Only works on small pages. Yes its ugly to have to check each slot in
146 * the group but we only use this during bootup.
147 */
148static long native_hpte_find(unsigned long vpn)
149{
150 hpte_t *hptep;
151 unsigned long hash;
152 unsigned long i, j;
153 long slot;
154 unsigned long hpte_v;
155
156 hash = hpt_hash(vpn, 0);
157
158 for (j = 0; j < 2; j++) {
159 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
160 for (i = 0; i < HPTES_PER_GROUP; i++) {
161 hptep = htab_address + slot;
162 hpte_v = hptep->v;
163
164 if ((HPTE_V_AVPN_VAL(hpte_v) == (vpn >> 11))
165 && (hpte_v & HPTE_V_VALID)
166 && ( !!(hpte_v & HPTE_V_SECONDARY) == j)) {
167 /* HPTE matches */
168 if (j)
169 slot = -slot;
170 return slot;
171 }
172 ++slot;
173 }
174 hash = ~hash;
175 }
176
177 return -1;
178}
179
180static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
181 unsigned long va, int large, int local)
182{
183 hpte_t *hptep = htab_address + slot;
184 unsigned long hpte_v;
185 unsigned long avpn = va >> 23;
186 int ret = 0;
187
188 if (large)
189 avpn &= ~1;
190
191 native_lock_hpte(hptep);
192
193 hpte_v = hptep->v;
194
195 /* Even if we miss, we need to invalidate the TLB */
196 if ((HPTE_V_AVPN_VAL(hpte_v) != avpn)
197 || !(hpte_v & HPTE_V_VALID)) {
198 native_unlock_hpte(hptep);
199 ret = -1;
200 } else {
201 set_pp_bit(newpp, hptep);
202 native_unlock_hpte(hptep);
203 }
204
205 /* Ensure it is out of the tlb too */
206 if (cpu_has_feature(CPU_FTR_TLBIEL) && !large && local) {
207 tlbiel(va);
208 } else {
209 int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
210
211 if (lock_tlbie)
212 spin_lock(&native_tlbie_lock);
213 tlbie(va, large);
214 if (lock_tlbie)
215 spin_unlock(&native_tlbie_lock);
216 }
217
218 return ret;
219}
220
221/*
222 * Update the page protection bits. Intended to be used to create
223 * guard pages for kernel data structures on pages which are bolted
224 * in the HPT. Assumes pages being operated on will not be stolen.
225 * Does not work on large pages.
226 *
227 * No need to lock here because we should be the only user.
228 */
229static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea)
230{
231 unsigned long vsid, va, vpn, flags = 0;
232 long slot;
233 hpte_t *hptep;
234 int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
235
236 vsid = get_kernel_vsid(ea);
237 va = (vsid << 28) | (ea & 0x0fffffff);
238 vpn = va >> PAGE_SHIFT;
239
240 slot = native_hpte_find(vpn);
241 if (slot == -1)
242 panic("could not find page to bolt\n");
243 hptep = htab_address + slot;
244
245 set_pp_bit(newpp, hptep);
246
247 /* Ensure it is out of the tlb too */
248 if (lock_tlbie)
249 spin_lock_irqsave(&native_tlbie_lock, flags);
250 tlbie(va, 0);
251 if (lock_tlbie)
252 spin_unlock_irqrestore(&native_tlbie_lock, flags);
253}
254
255static void native_hpte_invalidate(unsigned long slot, unsigned long va,
256 int large, int local)
257{
258 hpte_t *hptep = htab_address + slot;
259 unsigned long hpte_v;
260 unsigned long avpn = va >> 23;
261 unsigned long flags;
262 int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
263
264 if (large)
265 avpn &= ~1;
266
267 local_irq_save(flags);
268 native_lock_hpte(hptep);
269
270 hpte_v = hptep->v;
271
272 /* Even if we miss, we need to invalidate the TLB */
273 if ((HPTE_V_AVPN_VAL(hpte_v) != avpn)
274 || !(hpte_v & HPTE_V_VALID)) {
275 native_unlock_hpte(hptep);
276 } else {
277 /* Invalidate the hpte. NOTE: this also unlocks it */
278 hptep->v = 0;
279 }
280
281 /* Invalidate the tlb */
282 if (cpu_has_feature(CPU_FTR_TLBIEL) && !large && local) {
283 tlbiel(va);
284 } else {
285 if (lock_tlbie)
286 spin_lock(&native_tlbie_lock);
287 tlbie(va, large);
288 if (lock_tlbie)
289 spin_unlock(&native_tlbie_lock);
290 }
291 local_irq_restore(flags);
292}
293
294/*
295 * clear all mappings on kexec. All cpus are in real mode (or they will
296 * be when they isi), and we are the only one left. We rely on our kernel
297 * mapping being 0xC0's and the hardware ignoring those two real bits.
298 *
299 * TODO: add batching support when enabled. remember, no dynamic memory here,
300 * athough there is the control page available...
301 */
302static void native_hpte_clear(void)
303{
304 unsigned long slot, slots, flags;
305 hpte_t *hptep = htab_address;
306 unsigned long hpte_v;
307 unsigned long pteg_count;
308
309 pteg_count = htab_hash_mask + 1;
310
311 local_irq_save(flags);
312
313 /* we take the tlbie lock and hold it. Some hardware will
314 * deadlock if we try to tlbie from two processors at once.
315 */
316 spin_lock(&native_tlbie_lock);
317
318 slots = pteg_count * HPTES_PER_GROUP;
319
320 for (slot = 0; slot < slots; slot++, hptep++) {
321 /*
322 * we could lock the pte here, but we are the only cpu
323 * running, right? and for crash dump, we probably
324 * don't want to wait for a maybe bad cpu.
325 */
326 hpte_v = hptep->v;
327
328 if (hpte_v & HPTE_V_VALID) {
329 hptep->v = 0;
330 tlbie(slot2va(hpte_v, slot), hpte_v & HPTE_V_LARGE);
331 }
332 }
333
334 spin_unlock(&native_tlbie_lock);
335 local_irq_restore(flags);
336}
337
338static void native_flush_hash_range(unsigned long context,
339 unsigned long number, int local)
340{
341 unsigned long vsid, vpn, va, hash, secondary, slot, flags, avpn;
342 int i, j;
343 hpte_t *hptep;
344 unsigned long hpte_v;
345 struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
346 unsigned long large = batch->large;
347
348 local_irq_save(flags);
349
350 j = 0;
351 for (i = 0; i < number; i++) {
352 if (batch->addr[i] < KERNELBASE)
353 vsid = get_vsid(context, batch->addr[i]);
354 else
355 vsid = get_kernel_vsid(batch->addr[i]);
356
357 va = (vsid << 28) | (batch->addr[i] & 0x0fffffff);
358 batch->vaddr[j] = va;
359 if (large)
360 vpn = va >> HPAGE_SHIFT;
361 else
362 vpn = va >> PAGE_SHIFT;
363 hash = hpt_hash(vpn, large);
364 secondary = (pte_val(batch->pte[i]) & _PAGE_SECONDARY) >> 15;
365 if (secondary)
366 hash = ~hash;
367 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
368 slot += (pte_val(batch->pte[i]) & _PAGE_GROUP_IX) >> 12;
369
370 hptep = htab_address + slot;
371
372 avpn = va >> 23;
373 if (large)
374 avpn &= ~0x1UL;
375
376 native_lock_hpte(hptep);
377
378 hpte_v = hptep->v;
379
380 /* Even if we miss, we need to invalidate the TLB */
381 if ((HPTE_V_AVPN_VAL(hpte_v) != avpn)
382 || !(hpte_v & HPTE_V_VALID)) {
383 native_unlock_hpte(hptep);
384 } else {
385 /* Invalidate the hpte. NOTE: this also unlocks it */
386 hptep->v = 0;
387 }
388
389 j++;
390 }
391
392 if (cpu_has_feature(CPU_FTR_TLBIEL) && !large && local) {
393 asm volatile("ptesync":::"memory");
394
395 for (i = 0; i < j; i++)
396 __tlbiel(batch->vaddr[i]);
397
398 asm volatile("ptesync":::"memory");
399 } else {
400 int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
401
402 if (lock_tlbie)
403 spin_lock(&native_tlbie_lock);
404
405 asm volatile("ptesync":::"memory");
406
407 for (i = 0; i < j; i++)
408 __tlbie(batch->vaddr[i], large);
409
410 asm volatile("eieio; tlbsync; ptesync":::"memory");
411
412 if (lock_tlbie)
413 spin_unlock(&native_tlbie_lock);
414 }
415
416 local_irq_restore(flags);
417}
418
419#ifdef CONFIG_PPC_PSERIES
420/* Disable TLB batching on nighthawk */
421static inline int tlb_batching_enabled(void)
422{
423 struct device_node *root = of_find_node_by_path("/");
424 int enabled = 1;
425
426 if (root) {
427 const char *model = get_property(root, "model", NULL);
428 if (model && !strcmp(model, "IBM,9076-N81"))
429 enabled = 0;
430 of_node_put(root);
431 }
432
433 return enabled;
434}
435#else
436static inline int tlb_batching_enabled(void)
437{
438 return 1;
439}
440#endif
441
442void hpte_init_native(void)
443{
444 ppc_md.hpte_invalidate = native_hpte_invalidate;
445 ppc_md.hpte_updatepp = native_hpte_updatepp;
446 ppc_md.hpte_updateboltedpp = native_hpte_updateboltedpp;
447 ppc_md.hpte_insert = native_hpte_insert;
448 ppc_md.hpte_remove = native_hpte_remove;
449 ppc_md.hpte_clear_all = native_hpte_clear;
450 if (tlb_batching_enabled())
451 ppc_md.flush_hash_range = native_flush_hash_range;
452 htab_finish_init();
453}
diff --git a/arch/ppc64/mm/hash_utils.c b/arch/ppc64/mm/hash_utils.c
deleted file mode 100644
index 09475c8edf7c..000000000000
--- a/arch/ppc64/mm/hash_utils.c
+++ /dev/null
@@ -1,438 +0,0 @@
1/*
2 * PowerPC64 port by Mike Corrigan and Dave Engebretsen
3 * {mikejc|engebret}@us.ibm.com
4 *
5 * Copyright (c) 2000 Mike Corrigan <mikejc@us.ibm.com>
6 *
7 * SMP scalability work:
8 * Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM
9 *
10 * Module name: htab.c
11 *
12 * Description:
13 * PowerPC Hashed Page Table functions
14 *
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation; either version
18 * 2 of the License, or (at your option) any later version.
19 */
20
21#undef DEBUG
22
23#include <linux/config.h>
24#include <linux/spinlock.h>
25#include <linux/errno.h>
26#include <linux/sched.h>
27#include <linux/proc_fs.h>
28#include <linux/stat.h>
29#include <linux/sysctl.h>
30#include <linux/ctype.h>
31#include <linux/cache.h>
32#include <linux/init.h>
33#include <linux/signal.h>
34
35#include <asm/ppcdebug.h>
36#include <asm/processor.h>
37#include <asm/pgtable.h>
38#include <asm/mmu.h>
39#include <asm/mmu_context.h>
40#include <asm/page.h>
41#include <asm/types.h>
42#include <asm/system.h>
43#include <asm/uaccess.h>
44#include <asm/machdep.h>
45#include <asm/lmb.h>
46#include <asm/abs_addr.h>
47#include <asm/tlbflush.h>
48#include <asm/io.h>
49#include <asm/eeh.h>
50#include <asm/tlb.h>
51#include <asm/cacheflush.h>
52#include <asm/cputable.h>
53#include <asm/abs_addr.h>
54#include <asm/sections.h>
55
56#ifdef DEBUG
57#define DBG(fmt...) udbg_printf(fmt)
58#else
59#define DBG(fmt...)
60#endif
61
62/*
63 * Note: pte --> Linux PTE
64 * HPTE --> PowerPC Hashed Page Table Entry
65 *
66 * Execution context:
67 * htab_initialize is called with the MMU off (of course), but
68 * the kernel has been copied down to zero so it can directly
69 * reference global data. At this point it is very difficult
70 * to print debug info.
71 *
72 */
73
74#ifdef CONFIG_U3_DART
75extern unsigned long dart_tablebase;
76#endif /* CONFIG_U3_DART */
77
78hpte_t *htab_address;
79unsigned long htab_hash_mask;
80
81extern unsigned long _SDR1;
82
83#define KB (1024)
84#define MB (1024*KB)
85
86static inline void loop_forever(void)
87{
88 volatile unsigned long x = 1;
89 for(;x;x|=1)
90 ;
91}
92
93#ifdef CONFIG_PPC_MULTIPLATFORM
94static inline void create_pte_mapping(unsigned long start, unsigned long end,
95 unsigned long mode, int large)
96{
97 unsigned long addr;
98 unsigned int step;
99 unsigned long tmp_mode;
100 unsigned long vflags;
101
102 if (large) {
103 step = 16*MB;
104 vflags = HPTE_V_BOLTED | HPTE_V_LARGE;
105 } else {
106 step = 4*KB;
107 vflags = HPTE_V_BOLTED;
108 }
109
110 for (addr = start; addr < end; addr += step) {
111 unsigned long vpn, hash, hpteg;
112 unsigned long vsid = get_kernel_vsid(addr);
113 unsigned long va = (vsid << 28) | (addr & 0xfffffff);
114 int ret;
115
116 if (large)
117 vpn = va >> HPAGE_SHIFT;
118 else
119 vpn = va >> PAGE_SHIFT;
120
121
122 tmp_mode = mode;
123
124 /* Make non-kernel text non-executable */
125 if (!in_kernel_text(addr))
126 tmp_mode = mode | HW_NO_EXEC;
127
128 hash = hpt_hash(vpn, large);
129
130 hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
131
132#ifdef CONFIG_PPC_PSERIES
133 if (systemcfg->platform & PLATFORM_LPAR)
134 ret = pSeries_lpar_hpte_insert(hpteg, va,
135 virt_to_abs(addr) >> PAGE_SHIFT,
136 vflags, tmp_mode);
137 else
138#endif /* CONFIG_PPC_PSERIES */
139 ret = native_hpte_insert(hpteg, va,
140 virt_to_abs(addr) >> PAGE_SHIFT,
141 vflags, tmp_mode);
142
143 if (ret == -1) {
144 ppc64_terminate_msg(0x20, "create_pte_mapping");
145 loop_forever();
146 }
147 }
148}
149
150void __init htab_initialize(void)
151{
152 unsigned long table, htab_size_bytes;
153 unsigned long pteg_count;
154 unsigned long mode_rw;
155 int i, use_largepages = 0;
156 unsigned long base = 0, size = 0;
157 extern unsigned long tce_alloc_start, tce_alloc_end;
158
159 DBG(" -> htab_initialize()\n");
160
161 /*
162 * Calculate the required size of the htab. We want the number of
163 * PTEGs to equal one half the number of real pages.
164 */
165 htab_size_bytes = 1UL << ppc64_pft_size;
166 pteg_count = htab_size_bytes >> 7;
167
168 /* For debug, make the HTAB 1/8 as big as it normally would be. */
169 ifppcdebug(PPCDBG_HTABSIZE) {
170 pteg_count >>= 3;
171 htab_size_bytes = pteg_count << 7;
172 }
173
174 htab_hash_mask = pteg_count - 1;
175
176 if (systemcfg->platform & PLATFORM_LPAR) {
177 /* Using a hypervisor which owns the htab */
178 htab_address = NULL;
179 _SDR1 = 0;
180 } else {
181 /* Find storage for the HPT. Must be contiguous in
182 * the absolute address space.
183 */
184 table = lmb_alloc(htab_size_bytes, htab_size_bytes);
185
186 DBG("Hash table allocated at %lx, size: %lx\n", table,
187 htab_size_bytes);
188
189 if ( !table ) {
190 ppc64_terminate_msg(0x20, "hpt space");
191 loop_forever();
192 }
193 htab_address = abs_to_virt(table);
194
195 /* htab absolute addr + encoded htabsize */
196 _SDR1 = table + __ilog2(pteg_count) - 11;
197
198 /* Initialize the HPT with no entries */
199 memset((void *)table, 0, htab_size_bytes);
200 }
201
202 mode_rw = _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_COHERENT | PP_RWXX;
203
204 /* On U3 based machines, we need to reserve the DART area and
205 * _NOT_ map it to avoid cache paradoxes as it's remapped non
206 * cacheable later on
207 */
208 if (cpu_has_feature(CPU_FTR_16M_PAGE))
209 use_largepages = 1;
210
211 /* create bolted the linear mapping in the hash table */
212 for (i=0; i < lmb.memory.cnt; i++) {
213 base = lmb.memory.region[i].base + KERNELBASE;
214 size = lmb.memory.region[i].size;
215
216 DBG("creating mapping for region: %lx : %lx\n", base, size);
217
218#ifdef CONFIG_U3_DART
219 /* Do not map the DART space. Fortunately, it will be aligned
220 * in such a way that it will not cross two lmb regions and will
221 * fit within a single 16Mb page.
222 * The DART space is assumed to be a full 16Mb region even if we
223 * only use 2Mb of that space. We will use more of it later for
224 * AGP GART. We have to use a full 16Mb large page.
225 */
226 DBG("DART base: %lx\n", dart_tablebase);
227
228 if (dart_tablebase != 0 && dart_tablebase >= base
229 && dart_tablebase < (base + size)) {
230 if (base != dart_tablebase)
231 create_pte_mapping(base, dart_tablebase, mode_rw,
232 use_largepages);
233 if ((base + size) > (dart_tablebase + 16*MB))
234 create_pte_mapping(dart_tablebase + 16*MB, base + size,
235 mode_rw, use_largepages);
236 continue;
237 }
238#endif /* CONFIG_U3_DART */
239 create_pte_mapping(base, base + size, mode_rw, use_largepages);
240 }
241
242 /*
243 * If we have a memory_limit and we've allocated TCEs then we need to
244 * explicitly map the TCE area at the top of RAM. We also cope with the
245 * case that the TCEs start below memory_limit.
246 * tce_alloc_start/end are 16MB aligned so the mapping should work
247 * for either 4K or 16MB pages.
248 */
249 if (tce_alloc_start) {
250 tce_alloc_start += KERNELBASE;
251 tce_alloc_end += KERNELBASE;
252
253 if (base + size >= tce_alloc_start)
254 tce_alloc_start = base + size + 1;
255
256 create_pte_mapping(tce_alloc_start, tce_alloc_end,
257 mode_rw, use_largepages);
258 }
259
260 DBG(" <- htab_initialize()\n");
261}
262#undef KB
263#undef MB
264#endif /* CONFIG_PPC_MULTIPLATFORM */
265
266/*
267 * Called by asm hashtable.S for doing lazy icache flush
268 */
269unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap)
270{
271 struct page *page;
272
273 if (!pfn_valid(pte_pfn(pte)))
274 return pp;
275
276 page = pte_page(pte);
277
278 /* page is dirty */
279 if (!test_bit(PG_arch_1, &page->flags) && !PageReserved(page)) {
280 if (trap == 0x400) {
281 __flush_dcache_icache(page_address(page));
282 set_bit(PG_arch_1, &page->flags);
283 } else
284 pp |= HW_NO_EXEC;
285 }
286 return pp;
287}
288
289/* Result code is:
290 * 0 - handled
291 * 1 - normal page fault
292 * -1 - critical hash insertion error
293 */
294int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
295{
296 void *pgdir;
297 unsigned long vsid;
298 struct mm_struct *mm;
299 pte_t *ptep;
300 int ret;
301 int user_region = 0;
302 int local = 0;
303 cpumask_t tmp;
304
305 if ((ea & ~REGION_MASK) >= PGTABLE_RANGE)
306 return 1;
307
308 switch (REGION_ID(ea)) {
309 case USER_REGION_ID:
310 user_region = 1;
311 mm = current->mm;
312 if (! mm)
313 return 1;
314
315 vsid = get_vsid(mm->context.id, ea);
316 break;
317 case VMALLOC_REGION_ID:
318 mm = &init_mm;
319 vsid = get_kernel_vsid(ea);
320 break;
321#if 0
322 case KERNEL_REGION_ID:
323 /*
324 * Should never get here - entire 0xC0... region is bolted.
325 * Send the problem up to do_page_fault
326 */
327#endif
328 default:
329 /* Not a valid range
330 * Send the problem up to do_page_fault
331 */
332 return 1;
333 break;
334 }
335
336 pgdir = mm->pgd;
337
338 if (pgdir == NULL)
339 return 1;
340
341 tmp = cpumask_of_cpu(smp_processor_id());
342 if (user_region && cpus_equal(mm->cpu_vm_mask, tmp))
343 local = 1;
344
345 /* Is this a huge page ? */
346 if (unlikely(in_hugepage_area(mm->context, ea)))
347 ret = hash_huge_page(mm, access, ea, vsid, local);
348 else {
349 ptep = find_linux_pte(pgdir, ea);
350 if (ptep == NULL)
351 return 1;
352 ret = __hash_page(ea, access, vsid, ptep, trap, local);
353 }
354
355 return ret;
356}
357
358void flush_hash_page(unsigned long context, unsigned long ea, pte_t pte,
359 int local)
360{
361 unsigned long vsid, vpn, va, hash, secondary, slot;
362 unsigned long huge = pte_huge(pte);
363
364 if (ea < KERNELBASE)
365 vsid = get_vsid(context, ea);
366 else
367 vsid = get_kernel_vsid(ea);
368
369 va = (vsid << 28) | (ea & 0x0fffffff);
370 if (huge)
371 vpn = va >> HPAGE_SHIFT;
372 else
373 vpn = va >> PAGE_SHIFT;
374 hash = hpt_hash(vpn, huge);
375 secondary = (pte_val(pte) & _PAGE_SECONDARY) >> 15;
376 if (secondary)
377 hash = ~hash;
378 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
379 slot += (pte_val(pte) & _PAGE_GROUP_IX) >> 12;
380
381 ppc_md.hpte_invalidate(slot, va, huge, local);
382}
383
384void flush_hash_range(unsigned long context, unsigned long number, int local)
385{
386 if (ppc_md.flush_hash_range) {
387 ppc_md.flush_hash_range(context, number, local);
388 } else {
389 int i;
390 struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
391
392 for (i = 0; i < number; i++)
393 flush_hash_page(context, batch->addr[i], batch->pte[i],
394 local);
395 }
396}
397
398static inline void make_bl(unsigned int *insn_addr, void *func)
399{
400 unsigned long funcp = *((unsigned long *)func);
401 int offset = funcp - (unsigned long)insn_addr;
402
403 *insn_addr = (unsigned int)(0x48000001 | (offset & 0x03fffffc));
404 flush_icache_range((unsigned long)insn_addr, 4+
405 (unsigned long)insn_addr);
406}
407
408/*
409 * low_hash_fault is called when we the low level hash code failed
410 * to instert a PTE due to an hypervisor error
411 */
412void low_hash_fault(struct pt_regs *regs, unsigned long address)
413{
414 if (user_mode(regs)) {
415 siginfo_t info;
416
417 info.si_signo = SIGBUS;
418 info.si_errno = 0;
419 info.si_code = BUS_ADRERR;
420 info.si_addr = (void __user *)address;
421 force_sig_info(SIGBUS, &info, current);
422 return;
423 }
424 bad_page_fault(regs, address, SIGBUS);
425}
426
427void __init htab_finish_init(void)
428{
429 extern unsigned int *htab_call_hpte_insert1;
430 extern unsigned int *htab_call_hpte_insert2;
431 extern unsigned int *htab_call_hpte_remove;
432 extern unsigned int *htab_call_hpte_updatepp;
433
434 make_bl(htab_call_hpte_insert1, ppc_md.hpte_insert);
435 make_bl(htab_call_hpte_insert2, ppc_md.hpte_insert);
436 make_bl(htab_call_hpte_remove, ppc_md.hpte_remove);
437 make_bl(htab_call_hpte_updatepp, ppc_md.hpte_updatepp);
438}
diff --git a/arch/ppc64/mm/hugetlbpage.c b/arch/ppc64/mm/hugetlbpage.c
deleted file mode 100644
index 0ea0994ed974..000000000000
--- a/arch/ppc64/mm/hugetlbpage.c
+++ /dev/null
@@ -1,745 +0,0 @@
1/*
2 * PPC64 (POWER4) Huge TLB Page Support for Kernel.
3 *
4 * Copyright (C) 2003 David Gibson, IBM Corporation.
5 *
6 * Based on the IA-32 version:
7 * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com>
8 */
9
10#include <linux/init.h>
11#include <linux/fs.h>
12#include <linux/mm.h>
13#include <linux/hugetlb.h>
14#include <linux/pagemap.h>
15#include <linux/smp_lock.h>
16#include <linux/slab.h>
17#include <linux/err.h>
18#include <linux/sysctl.h>
19#include <asm/mman.h>
20#include <asm/pgalloc.h>
21#include <asm/tlb.h>
22#include <asm/tlbflush.h>
23#include <asm/mmu_context.h>
24#include <asm/machdep.h>
25#include <asm/cputable.h>
26#include <asm/tlb.h>
27
28#include <linux/sysctl.h>
29
30#define NUM_LOW_AREAS (0x100000000UL >> SID_SHIFT)
31#define NUM_HIGH_AREAS (PGTABLE_RANGE >> HTLB_AREA_SHIFT)
32
33/* Modelled after find_linux_pte() */
34pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
35{
36 pgd_t *pg;
37 pud_t *pu;
38 pmd_t *pm;
39 pte_t *pt;
40
41 BUG_ON(! in_hugepage_area(mm->context, addr));
42
43 addr &= HPAGE_MASK;
44
45 pg = pgd_offset(mm, addr);
46 if (!pgd_none(*pg)) {
47 pu = pud_offset(pg, addr);
48 if (!pud_none(*pu)) {
49 pm = pmd_offset(pu, addr);
50 pt = (pte_t *)pm;
51 BUG_ON(!pmd_none(*pm)
52 && !(pte_present(*pt) && pte_huge(*pt)));
53 return pt;
54 }
55 }
56
57 return NULL;
58}
59
60pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
61{
62 pgd_t *pg;
63 pud_t *pu;
64 pmd_t *pm;
65 pte_t *pt;
66
67 BUG_ON(! in_hugepage_area(mm->context, addr));
68
69 addr &= HPAGE_MASK;
70
71 pg = pgd_offset(mm, addr);
72 pu = pud_alloc(mm, pg, addr);
73
74 if (pu) {
75 pm = pmd_alloc(mm, pu, addr);
76 if (pm) {
77 pt = (pte_t *)pm;
78 BUG_ON(!pmd_none(*pm)
79 && !(pte_present(*pt) && pte_huge(*pt)));
80 return pt;
81 }
82 }
83
84 return NULL;
85}
86
87#define HUGEPTE_BATCH_SIZE (HPAGE_SIZE / PMD_SIZE)
88
89void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
90 pte_t *ptep, pte_t pte)
91{
92 int i;
93
94 if (pte_present(*ptep)) {
95 pte_clear(mm, addr, ptep);
96 flush_tlb_pending();
97 }
98
99 for (i = 0; i < HUGEPTE_BATCH_SIZE; i++) {
100 *ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
101 ptep++;
102 }
103}
104
105pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
106 pte_t *ptep)
107{
108 unsigned long old = pte_update(ptep, ~0UL);
109 int i;
110
111 if (old & _PAGE_HASHPTE)
112 hpte_update(mm, addr, old, 0);
113
114 for (i = 1; i < HUGEPTE_BATCH_SIZE; i++)
115 ptep[i] = __pte(0);
116
117 return __pte(old);
118}
119
120/*
121 * This function checks for proper alignment of input addr and len parameters.
122 */
123int is_aligned_hugepage_range(unsigned long addr, unsigned long len)
124{
125 if (len & ~HPAGE_MASK)
126 return -EINVAL;
127 if (addr & ~HPAGE_MASK)
128 return -EINVAL;
129 if (! (within_hugepage_low_range(addr, len)
130 || within_hugepage_high_range(addr, len)) )
131 return -EINVAL;
132 return 0;
133}
134
135static void flush_low_segments(void *parm)
136{
137 u16 areas = (unsigned long) parm;
138 unsigned long i;
139
140 asm volatile("isync" : : : "memory");
141
142 BUILD_BUG_ON((sizeof(areas)*8) != NUM_LOW_AREAS);
143
144 for (i = 0; i < NUM_LOW_AREAS; i++) {
145 if (! (areas & (1U << i)))
146 continue;
147 asm volatile("slbie %0"
148 : : "r" ((i << SID_SHIFT) | SLBIE_C));
149 }
150
151 asm volatile("isync" : : : "memory");
152}
153
154static void flush_high_segments(void *parm)
155{
156 u16 areas = (unsigned long) parm;
157 unsigned long i, j;
158
159 asm volatile("isync" : : : "memory");
160
161 BUILD_BUG_ON((sizeof(areas)*8) != NUM_HIGH_AREAS);
162
163 for (i = 0; i < NUM_HIGH_AREAS; i++) {
164 if (! (areas & (1U << i)))
165 continue;
166 for (j = 0; j < (1UL << (HTLB_AREA_SHIFT-SID_SHIFT)); j++)
167 asm volatile("slbie %0"
168 :: "r" (((i << HTLB_AREA_SHIFT)
169 + (j << SID_SHIFT)) | SLBIE_C));
170 }
171
172 asm volatile("isync" : : : "memory");
173}
174
175static int prepare_low_area_for_htlb(struct mm_struct *mm, unsigned long area)
176{
177 unsigned long start = area << SID_SHIFT;
178 unsigned long end = (area+1) << SID_SHIFT;
179 struct vm_area_struct *vma;
180
181 BUG_ON(area >= NUM_LOW_AREAS);
182
183 /* Check no VMAs are in the region */
184 vma = find_vma(mm, start);
185 if (vma && (vma->vm_start < end))
186 return -EBUSY;
187
188 return 0;
189}
190
191static int prepare_high_area_for_htlb(struct mm_struct *mm, unsigned long area)
192{
193 unsigned long start = area << HTLB_AREA_SHIFT;
194 unsigned long end = (area+1) << HTLB_AREA_SHIFT;
195 struct vm_area_struct *vma;
196
197 BUG_ON(area >= NUM_HIGH_AREAS);
198
199 /* Check no VMAs are in the region */
200 vma = find_vma(mm, start);
201 if (vma && (vma->vm_start < end))
202 return -EBUSY;
203
204 return 0;
205}
206
207static int open_low_hpage_areas(struct mm_struct *mm, u16 newareas)
208{
209 unsigned long i;
210
211 BUILD_BUG_ON((sizeof(newareas)*8) != NUM_LOW_AREAS);
212 BUILD_BUG_ON((sizeof(mm->context.low_htlb_areas)*8) != NUM_LOW_AREAS);
213
214 newareas &= ~(mm->context.low_htlb_areas);
215 if (! newareas)
216 return 0; /* The segments we want are already open */
217
218 for (i = 0; i < NUM_LOW_AREAS; i++)
219 if ((1 << i) & newareas)
220 if (prepare_low_area_for_htlb(mm, i) != 0)
221 return -EBUSY;
222
223 mm->context.low_htlb_areas |= newareas;
224
225 /* update the paca copy of the context struct */
226 get_paca()->context = mm->context;
227
228 /* the context change must make it to memory before the flush,
229 * so that further SLB misses do the right thing. */
230 mb();
231 on_each_cpu(flush_low_segments, (void *)(unsigned long)newareas, 0, 1);
232
233 return 0;
234}
235
236static int open_high_hpage_areas(struct mm_struct *mm, u16 newareas)
237{
238 unsigned long i;
239
240 BUILD_BUG_ON((sizeof(newareas)*8) != NUM_HIGH_AREAS);
241 BUILD_BUG_ON((sizeof(mm->context.high_htlb_areas)*8)
242 != NUM_HIGH_AREAS);
243
244 newareas &= ~(mm->context.high_htlb_areas);
245 if (! newareas)
246 return 0; /* The areas we want are already open */
247
248 for (i = 0; i < NUM_HIGH_AREAS; i++)
249 if ((1 << i) & newareas)
250 if (prepare_high_area_for_htlb(mm, i) != 0)
251 return -EBUSY;
252
253 mm->context.high_htlb_areas |= newareas;
254
255 /* update the paca copy of the context struct */
256 get_paca()->context = mm->context;
257
258 /* the context change must make it to memory before the flush,
259 * so that further SLB misses do the right thing. */
260 mb();
261 on_each_cpu(flush_high_segments, (void *)(unsigned long)newareas, 0, 1);
262
263 return 0;
264}
265
266int prepare_hugepage_range(unsigned long addr, unsigned long len)
267{
268 int err;
269
270 if ( (addr+len) < addr )
271 return -EINVAL;
272
273 if ((addr + len) < 0x100000000UL)
274 err = open_low_hpage_areas(current->mm,
275 LOW_ESID_MASK(addr, len));
276 else
277 err = open_high_hpage_areas(current->mm,
278 HTLB_AREA_MASK(addr, len));
279 if (err) {
280 printk(KERN_DEBUG "prepare_hugepage_range(%lx, %lx)"
281 " failed (lowmask: 0x%04hx, highmask: 0x%04hx)\n",
282 addr, len,
283 LOW_ESID_MASK(addr, len), HTLB_AREA_MASK(addr, len));
284 return err;
285 }
286
287 return 0;
288}
289
290struct page *
291follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
292{
293 pte_t *ptep;
294 struct page *page;
295
296 if (! in_hugepage_area(mm->context, address))
297 return ERR_PTR(-EINVAL);
298
299 ptep = huge_pte_offset(mm, address);
300 page = pte_page(*ptep);
301 if (page)
302 page += (address % HPAGE_SIZE) / PAGE_SIZE;
303
304 return page;
305}
306
307int pmd_huge(pmd_t pmd)
308{
309 return 0;
310}
311
312struct page *
313follow_huge_pmd(struct mm_struct *mm, unsigned long address,
314 pmd_t *pmd, int write)
315{
316 BUG();
317 return NULL;
318}
319
320/* Because we have an exclusive hugepage region which lies within the
321 * normal user address space, we have to take special measures to make
322 * non-huge mmap()s evade the hugepage reserved regions. */
323unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
324 unsigned long len, unsigned long pgoff,
325 unsigned long flags)
326{
327 struct mm_struct *mm = current->mm;
328 struct vm_area_struct *vma;
329 unsigned long start_addr;
330
331 if (len > TASK_SIZE)
332 return -ENOMEM;
333
334 if (addr) {
335 addr = PAGE_ALIGN(addr);
336 vma = find_vma(mm, addr);
337 if (((TASK_SIZE - len) >= addr)
338 && (!vma || (addr+len) <= vma->vm_start)
339 && !is_hugepage_only_range(mm, addr,len))
340 return addr;
341 }
342 if (len > mm->cached_hole_size) {
343 start_addr = addr = mm->free_area_cache;
344 } else {
345 start_addr = addr = TASK_UNMAPPED_BASE;
346 mm->cached_hole_size = 0;
347 }
348
349full_search:
350 vma = find_vma(mm, addr);
351 while (TASK_SIZE - len >= addr) {
352 BUG_ON(vma && (addr >= vma->vm_end));
353
354 if (touches_hugepage_low_range(mm, addr, len)) {
355 addr = ALIGN(addr+1, 1<<SID_SHIFT);
356 vma = find_vma(mm, addr);
357 continue;
358 }
359 if (touches_hugepage_high_range(mm, addr, len)) {
360 addr = ALIGN(addr+1, 1UL<<HTLB_AREA_SHIFT);
361 vma = find_vma(mm, addr);
362 continue;
363 }
364 if (!vma || addr + len <= vma->vm_start) {
365 /*
366 * Remember the place where we stopped the search:
367 */
368 mm->free_area_cache = addr + len;
369 return addr;
370 }
371 if (addr + mm->cached_hole_size < vma->vm_start)
372 mm->cached_hole_size = vma->vm_start - addr;
373 addr = vma->vm_end;
374 vma = vma->vm_next;
375 }
376
377 /* Make sure we didn't miss any holes */
378 if (start_addr != TASK_UNMAPPED_BASE) {
379 start_addr = addr = TASK_UNMAPPED_BASE;
380 mm->cached_hole_size = 0;
381 goto full_search;
382 }
383 return -ENOMEM;
384}
385
386/*
387 * This mmap-allocator allocates new areas top-down from below the
388 * stack's low limit (the base):
389 *
390 * Because we have an exclusive hugepage region which lies within the
391 * normal user address space, we have to take special measures to make
392 * non-huge mmap()s evade the hugepage reserved regions.
393 */
394unsigned long
395arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
396 const unsigned long len, const unsigned long pgoff,
397 const unsigned long flags)
398{
399 struct vm_area_struct *vma, *prev_vma;
400 struct mm_struct *mm = current->mm;
401 unsigned long base = mm->mmap_base, addr = addr0;
402 unsigned long largest_hole = mm->cached_hole_size;
403 int first_time = 1;
404
405 /* requested length too big for entire address space */
406 if (len > TASK_SIZE)
407 return -ENOMEM;
408
409 /* dont allow allocations above current base */
410 if (mm->free_area_cache > base)
411 mm->free_area_cache = base;
412
413 /* requesting a specific address */
414 if (addr) {
415 addr = PAGE_ALIGN(addr);
416 vma = find_vma(mm, addr);
417 if (TASK_SIZE - len >= addr &&
418 (!vma || addr + len <= vma->vm_start)
419 && !is_hugepage_only_range(mm, addr,len))
420 return addr;
421 }
422
423 if (len <= largest_hole) {
424 largest_hole = 0;
425 mm->free_area_cache = base;
426 }
427try_again:
428 /* make sure it can fit in the remaining address space */
429 if (mm->free_area_cache < len)
430 goto fail;
431
432 /* either no address requested or cant fit in requested address hole */
433 addr = (mm->free_area_cache - len) & PAGE_MASK;
434 do {
435hugepage_recheck:
436 if (touches_hugepage_low_range(mm, addr, len)) {
437 addr = (addr & ((~0) << SID_SHIFT)) - len;
438 goto hugepage_recheck;
439 } else if (touches_hugepage_high_range(mm, addr, len)) {
440 addr = (addr & ((~0UL) << HTLB_AREA_SHIFT)) - len;
441 goto hugepage_recheck;
442 }
443
444 /*
445 * Lookup failure means no vma is above this address,
446 * i.e. return with success:
447 */
448 if (!(vma = find_vma_prev(mm, addr, &prev_vma)))
449 return addr;
450
451 /*
452 * new region fits between prev_vma->vm_end and
453 * vma->vm_start, use it:
454 */
455 if (addr+len <= vma->vm_start &&
456 (!prev_vma || (addr >= prev_vma->vm_end))) {
457 /* remember the address as a hint for next time */
458 mm->cached_hole_size = largest_hole;
459 return (mm->free_area_cache = addr);
460 } else {
461 /* pull free_area_cache down to the first hole */
462 if (mm->free_area_cache == vma->vm_end) {
463 mm->free_area_cache = vma->vm_start;
464 mm->cached_hole_size = largest_hole;
465 }
466 }
467
468 /* remember the largest hole we saw so far */
469 if (addr + largest_hole < vma->vm_start)
470 largest_hole = vma->vm_start - addr;
471
472 /* try just below the current vma->vm_start */
473 addr = vma->vm_start-len;
474 } while (len <= vma->vm_start);
475
476fail:
477 /*
478 * if hint left us with no space for the requested
479 * mapping then try again:
480 */
481 if (first_time) {
482 mm->free_area_cache = base;
483 largest_hole = 0;
484 first_time = 0;
485 goto try_again;
486 }
487 /*
488 * A failed mmap() very likely causes application failure,
489 * so fall back to the bottom-up function here. This scenario
490 * can happen with large stack limits and large mmap()
491 * allocations.
492 */
493 mm->free_area_cache = TASK_UNMAPPED_BASE;
494 mm->cached_hole_size = ~0UL;
495 addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
496 /*
497 * Restore the topdown base:
498 */
499 mm->free_area_cache = base;
500 mm->cached_hole_size = ~0UL;
501
502 return addr;
503}
504
505static unsigned long htlb_get_low_area(unsigned long len, u16 segmask)
506{
507 unsigned long addr = 0;
508 struct vm_area_struct *vma;
509
510 vma = find_vma(current->mm, addr);
511 while (addr + len <= 0x100000000UL) {
512 BUG_ON(vma && (addr >= vma->vm_end)); /* invariant */
513
514 if (! __within_hugepage_low_range(addr, len, segmask)) {
515 addr = ALIGN(addr+1, 1<<SID_SHIFT);
516 vma = find_vma(current->mm, addr);
517 continue;
518 }
519
520 if (!vma || (addr + len) <= vma->vm_start)
521 return addr;
522 addr = ALIGN(vma->vm_end, HPAGE_SIZE);
523 /* Depending on segmask this might not be a confirmed
524 * hugepage region, so the ALIGN could have skipped
525 * some VMAs */
526 vma = find_vma(current->mm, addr);
527 }
528
529 return -ENOMEM;
530}
531
532static unsigned long htlb_get_high_area(unsigned long len, u16 areamask)
533{
534 unsigned long addr = 0x100000000UL;
535 struct vm_area_struct *vma;
536
537 vma = find_vma(current->mm, addr);
538 while (addr + len <= TASK_SIZE_USER64) {
539 BUG_ON(vma && (addr >= vma->vm_end)); /* invariant */
540
541 if (! __within_hugepage_high_range(addr, len, areamask)) {
542 addr = ALIGN(addr+1, 1UL<<HTLB_AREA_SHIFT);
543 vma = find_vma(current->mm, addr);
544 continue;
545 }
546
547 if (!vma || (addr + len) <= vma->vm_start)
548 return addr;
549 addr = ALIGN(vma->vm_end, HPAGE_SIZE);
550 /* Depending on segmask this might not be a confirmed
551 * hugepage region, so the ALIGN could have skipped
552 * some VMAs */
553 vma = find_vma(current->mm, addr);
554 }
555
556 return -ENOMEM;
557}
558
559unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
560 unsigned long len, unsigned long pgoff,
561 unsigned long flags)
562{
563 int lastshift;
564 u16 areamask, curareas;
565
566 if (len & ~HPAGE_MASK)
567 return -EINVAL;
568
569 if (!cpu_has_feature(CPU_FTR_16M_PAGE))
570 return -EINVAL;
571
572 if (test_thread_flag(TIF_32BIT)) {
573 curareas = current->mm->context.low_htlb_areas;
574
575 /* First see if we can do the mapping in the existing
576 * low areas */
577 addr = htlb_get_low_area(len, curareas);
578 if (addr != -ENOMEM)
579 return addr;
580
581 lastshift = 0;
582 for (areamask = LOW_ESID_MASK(0x100000000UL-len, len);
583 ! lastshift; areamask >>=1) {
584 if (areamask & 1)
585 lastshift = 1;
586
587 addr = htlb_get_low_area(len, curareas | areamask);
588 if ((addr != -ENOMEM)
589 && open_low_hpage_areas(current->mm, areamask) == 0)
590 return addr;
591 }
592 } else {
593 curareas = current->mm->context.high_htlb_areas;
594
595 /* First see if we can do the mapping in the existing
596 * high areas */
597 addr = htlb_get_high_area(len, curareas);
598 if (addr != -ENOMEM)
599 return addr;
600
601 lastshift = 0;
602 for (areamask = HTLB_AREA_MASK(TASK_SIZE_USER64-len, len);
603 ! lastshift; areamask >>=1) {
604 if (areamask & 1)
605 lastshift = 1;
606
607 addr = htlb_get_high_area(len, curareas | areamask);
608 if ((addr != -ENOMEM)
609 && open_high_hpage_areas(current->mm, areamask) == 0)
610 return addr;
611 }
612 }
613 printk(KERN_DEBUG "hugetlb_get_unmapped_area() unable to open"
614 " enough areas\n");
615 return -ENOMEM;
616}
617
618int hash_huge_page(struct mm_struct *mm, unsigned long access,
619 unsigned long ea, unsigned long vsid, int local)
620{
621 pte_t *ptep;
622 unsigned long va, vpn;
623 pte_t old_pte, new_pte;
624 unsigned long rflags, prpn;
625 long slot;
626 int err = 1;
627
628 spin_lock(&mm->page_table_lock);
629
630 ptep = huge_pte_offset(mm, ea);
631
632 /* Search the Linux page table for a match with va */
633 va = (vsid << 28) | (ea & 0x0fffffff);
634 vpn = va >> HPAGE_SHIFT;
635
636 /*
637 * If no pte found or not present, send the problem up to
638 * do_page_fault
639 */
640 if (unlikely(!ptep || pte_none(*ptep)))
641 goto out;
642
643/* BUG_ON(pte_bad(*ptep)); */
644
645 /*
646 * Check the user's access rights to the page. If access should be
647 * prevented then send the problem up to do_page_fault.
648 */
649 if (unlikely(access & ~pte_val(*ptep)))
650 goto out;
651 /*
652 * At this point, we have a pte (old_pte) which can be used to build
653 * or update an HPTE. There are 2 cases:
654 *
655 * 1. There is a valid (present) pte with no associated HPTE (this is
656 * the most common case)
657 * 2. There is a valid (present) pte with an associated HPTE. The
658 * current values of the pp bits in the HPTE prevent access
659 * because we are doing software DIRTY bit management and the
660 * page is currently not DIRTY.
661 */
662
663
664 old_pte = *ptep;
665 new_pte = old_pte;
666
667 rflags = 0x2 | (! (pte_val(new_pte) & _PAGE_RW));
668 /* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */
669 rflags |= ((pte_val(new_pte) & _PAGE_EXEC) ? 0 : HW_NO_EXEC);
670
671 /* Check if pte already has an hpte (case 2) */
672 if (unlikely(pte_val(old_pte) & _PAGE_HASHPTE)) {
673 /* There MIGHT be an HPTE for this pte */
674 unsigned long hash, slot;
675
676 hash = hpt_hash(vpn, 1);
677 if (pte_val(old_pte) & _PAGE_SECONDARY)
678 hash = ~hash;
679 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
680 slot += (pte_val(old_pte) & _PAGE_GROUP_IX) >> 12;
681
682 if (ppc_md.hpte_updatepp(slot, rflags, va, 1, local) == -1)
683 pte_val(old_pte) &= ~_PAGE_HPTEFLAGS;
684 }
685
686 if (likely(!(pte_val(old_pte) & _PAGE_HASHPTE))) {
687 unsigned long hash = hpt_hash(vpn, 1);
688 unsigned long hpte_group;
689
690 prpn = pte_pfn(old_pte);
691
692repeat:
693 hpte_group = ((hash & htab_hash_mask) *
694 HPTES_PER_GROUP) & ~0x7UL;
695
696 /* Update the linux pte with the HPTE slot */
697 pte_val(new_pte) &= ~_PAGE_HPTEFLAGS;
698 pte_val(new_pte) |= _PAGE_HASHPTE;
699
700 /* Add in WIMG bits */
701 /* XXX We should store these in the pte */
702 rflags |= _PAGE_COHERENT;
703
704 slot = ppc_md.hpte_insert(hpte_group, va, prpn,
705 HPTE_V_LARGE, rflags);
706
707 /* Primary is full, try the secondary */
708 if (unlikely(slot == -1)) {
709 pte_val(new_pte) |= _PAGE_SECONDARY;
710 hpte_group = ((~hash & htab_hash_mask) *
711 HPTES_PER_GROUP) & ~0x7UL;
712 slot = ppc_md.hpte_insert(hpte_group, va, prpn,
713 HPTE_V_LARGE |
714 HPTE_V_SECONDARY,
715 rflags);
716 if (slot == -1) {
717 if (mftb() & 0x1)
718 hpte_group = ((hash & htab_hash_mask) *
719 HPTES_PER_GROUP)&~0x7UL;
720
721 ppc_md.hpte_remove(hpte_group);
722 goto repeat;
723 }
724 }
725
726 if (unlikely(slot == -2))
727 panic("hash_huge_page: pte_insert failed\n");
728
729 pte_val(new_pte) |= (slot<<12) & _PAGE_GROUP_IX;
730
731 /*
732 * No need to use ldarx/stdcx here because all who
733 * might be updating the pte will hold the
734 * page_table_lock
735 */
736 *ptep = new_pte;
737 }
738
739 err = 0;
740
741 out:
742 spin_unlock(&mm->page_table_lock);
743
744 return err;
745}
diff --git a/arch/ppc64/mm/imalloc.c b/arch/ppc64/mm/imalloc.c
deleted file mode 100644
index c65b87b92756..000000000000
--- a/arch/ppc64/mm/imalloc.c
+++ /dev/null
@@ -1,317 +0,0 @@
1/*
2 * c 2001 PPC 64 Team, IBM Corp
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 */
9
10#include <linux/slab.h>
11#include <linux/vmalloc.h>
12
13#include <asm/uaccess.h>
14#include <asm/pgalloc.h>
15#include <asm/pgtable.h>
16#include <asm/semaphore.h>
17#include <asm/imalloc.h>
18#include <asm/cacheflush.h>
19
20static DECLARE_MUTEX(imlist_sem);
21struct vm_struct * imlist = NULL;
22
23static int get_free_im_addr(unsigned long size, unsigned long *im_addr)
24{
25 unsigned long addr;
26 struct vm_struct **p, *tmp;
27
28 addr = ioremap_bot;
29 for (p = &imlist; (tmp = *p) ; p = &tmp->next) {
30 if (size + addr < (unsigned long) tmp->addr)
31 break;
32 if ((unsigned long)tmp->addr >= ioremap_bot)
33 addr = tmp->size + (unsigned long) tmp->addr;
34 if (addr >= IMALLOC_END-size)
35 return 1;
36 }
37 *im_addr = addr;
38
39 return 0;
40}
41
42/* Return whether the region described by v_addr and size is a subset
43 * of the region described by parent
44 */
45static inline int im_region_is_subset(unsigned long v_addr, unsigned long size,
46 struct vm_struct *parent)
47{
48 return (int) (v_addr >= (unsigned long) parent->addr &&
49 v_addr < (unsigned long) parent->addr + parent->size &&
50 size < parent->size);
51}
52
53/* Return whether the region described by v_addr and size is a superset
54 * of the region described by child
55 */
56static int im_region_is_superset(unsigned long v_addr, unsigned long size,
57 struct vm_struct *child)
58{
59 struct vm_struct parent;
60
61 parent.addr = (void *) v_addr;
62 parent.size = size;
63
64 return im_region_is_subset((unsigned long) child->addr, child->size,
65 &parent);
66}
67
68/* Return whether the region described by v_addr and size overlaps
69 * the region described by vm. Overlapping regions meet the
70 * following conditions:
71 * 1) The regions share some part of the address space
72 * 2) The regions aren't identical
73 * 3) Neither region is a subset of the other
74 */
75static int im_region_overlaps(unsigned long v_addr, unsigned long size,
76 struct vm_struct *vm)
77{
78 if (im_region_is_superset(v_addr, size, vm))
79 return 0;
80
81 return (v_addr + size > (unsigned long) vm->addr + vm->size &&
82 v_addr < (unsigned long) vm->addr + vm->size) ||
83 (v_addr < (unsigned long) vm->addr &&
84 v_addr + size > (unsigned long) vm->addr);
85}
86
87/* Determine imalloc status of region described by v_addr and size.
88 * Can return one of the following:
89 * IM_REGION_UNUSED - Entire region is unallocated in imalloc space.
90 * IM_REGION_SUBSET - Region is a subset of a region that is already
91 * allocated in imalloc space.
92 * vm will be assigned to a ptr to the parent region.
93 * IM_REGION_EXISTS - Exact region already allocated in imalloc space.
94 * vm will be assigned to a ptr to the existing imlist
95 * member.
96 * IM_REGION_OVERLAPS - Region overlaps an allocated region in imalloc space.
97 * IM_REGION_SUPERSET - Region is a superset of a region that is already
98 * allocated in imalloc space.
99 */
100static int im_region_status(unsigned long v_addr, unsigned long size,
101 struct vm_struct **vm)
102{
103 struct vm_struct *tmp;
104
105 for (tmp = imlist; tmp; tmp = tmp->next)
106 if (v_addr < (unsigned long) tmp->addr + tmp->size)
107 break;
108
109 if (tmp) {
110 if (im_region_overlaps(v_addr, size, tmp))
111 return IM_REGION_OVERLAP;
112
113 *vm = tmp;
114 if (im_region_is_subset(v_addr, size, tmp)) {
115 /* Return with tmp pointing to superset */
116 return IM_REGION_SUBSET;
117 }
118 if (im_region_is_superset(v_addr, size, tmp)) {
119 /* Return with tmp pointing to first subset */
120 return IM_REGION_SUPERSET;
121 }
122 else if (v_addr == (unsigned long) tmp->addr &&
123 size == tmp->size) {
124 /* Return with tmp pointing to exact region */
125 return IM_REGION_EXISTS;
126 }
127 }
128
129 *vm = NULL;
130 return IM_REGION_UNUSED;
131}
132
133static struct vm_struct * split_im_region(unsigned long v_addr,
134 unsigned long size, struct vm_struct *parent)
135{
136 struct vm_struct *vm1 = NULL;
137 struct vm_struct *vm2 = NULL;
138 struct vm_struct *new_vm = NULL;
139
140 vm1 = (struct vm_struct *) kmalloc(sizeof(*vm1), GFP_KERNEL);
141 if (vm1 == NULL) {
142 printk(KERN_ERR "%s() out of memory\n", __FUNCTION__);
143 return NULL;
144 }
145
146 if (v_addr == (unsigned long) parent->addr) {
147 /* Use existing parent vm_struct to represent child, allocate
148 * new one for the remainder of parent range
149 */
150 vm1->size = parent->size - size;
151 vm1->addr = (void *) (v_addr + size);
152 vm1->next = parent->next;
153
154 parent->size = size;
155 parent->next = vm1;
156 new_vm = parent;
157 } else if (v_addr + size == (unsigned long) parent->addr +
158 parent->size) {
159 /* Allocate new vm_struct to represent child, use existing
160 * parent one for remainder of parent range
161 */
162 vm1->size = size;
163 vm1->addr = (void *) v_addr;
164 vm1->next = parent->next;
165 new_vm = vm1;
166
167 parent->size -= size;
168 parent->next = vm1;
169 } else {
170 /* Allocate two new vm_structs for the new child and
171 * uppermost remainder, and use existing parent one for the
172 * lower remainder of parent range
173 */
174 vm2 = (struct vm_struct *) kmalloc(sizeof(*vm2), GFP_KERNEL);
175 if (vm2 == NULL) {
176 printk(KERN_ERR "%s() out of memory\n", __FUNCTION__);
177 kfree(vm1);
178 return NULL;
179 }
180
181 vm1->size = size;
182 vm1->addr = (void *) v_addr;
183 vm1->next = vm2;
184 new_vm = vm1;
185
186 vm2->size = ((unsigned long) parent->addr + parent->size) -
187 (v_addr + size);
188 vm2->addr = (void *) v_addr + size;
189 vm2->next = parent->next;
190
191 parent->size = v_addr - (unsigned long) parent->addr;
192 parent->next = vm1;
193 }
194
195 return new_vm;
196}
197
198static struct vm_struct * __add_new_im_area(unsigned long req_addr,
199 unsigned long size)
200{
201 struct vm_struct **p, *tmp, *area;
202
203 for (p = &imlist; (tmp = *p) ; p = &tmp->next) {
204 if (req_addr + size <= (unsigned long)tmp->addr)
205 break;
206 }
207
208 area = (struct vm_struct *) kmalloc(sizeof(*area), GFP_KERNEL);
209 if (!area)
210 return NULL;
211 area->flags = 0;
212 area->addr = (void *)req_addr;
213 area->size = size;
214 area->next = *p;
215 *p = area;
216
217 return area;
218}
219
220static struct vm_struct * __im_get_area(unsigned long req_addr,
221 unsigned long size,
222 int criteria)
223{
224 struct vm_struct *tmp;
225 int status;
226
227 status = im_region_status(req_addr, size, &tmp);
228 if ((criteria & status) == 0) {
229 return NULL;
230 }
231
232 switch (status) {
233 case IM_REGION_UNUSED:
234 tmp = __add_new_im_area(req_addr, size);
235 break;
236 case IM_REGION_SUBSET:
237 tmp = split_im_region(req_addr, size, tmp);
238 break;
239 case IM_REGION_EXISTS:
240 /* Return requested region */
241 break;
242 case IM_REGION_SUPERSET:
243 /* Return first existing subset of requested region */
244 break;
245 default:
246 printk(KERN_ERR "%s() unexpected imalloc region status\n",
247 __FUNCTION__);
248 tmp = NULL;
249 }
250
251 return tmp;
252}
253
254struct vm_struct * im_get_free_area(unsigned long size)
255{
256 struct vm_struct *area;
257 unsigned long addr;
258
259 down(&imlist_sem);
260 if (get_free_im_addr(size, &addr)) {
261 printk(KERN_ERR "%s() cannot obtain addr for size 0x%lx\n",
262 __FUNCTION__, size);
263 area = NULL;
264 goto next_im_done;
265 }
266
267 area = __im_get_area(addr, size, IM_REGION_UNUSED);
268 if (area == NULL) {
269 printk(KERN_ERR
270 "%s() cannot obtain area for addr 0x%lx size 0x%lx\n",
271 __FUNCTION__, addr, size);
272 }
273next_im_done:
274 up(&imlist_sem);
275 return area;
276}
277
278struct vm_struct * im_get_area(unsigned long v_addr, unsigned long size,
279 int criteria)
280{
281 struct vm_struct *area;
282
283 down(&imlist_sem);
284 area = __im_get_area(v_addr, size, criteria);
285 up(&imlist_sem);
286 return area;
287}
288
289void im_free(void * addr)
290{
291 struct vm_struct **p, *tmp;
292
293 if (!addr)
294 return;
295 if ((unsigned long) addr & ~PAGE_MASK) {
296 printk(KERN_ERR "Trying to %s bad address (%p)\n", __FUNCTION__, addr);
297 return;
298 }
299 down(&imlist_sem);
300 for (p = &imlist ; (tmp = *p) ; p = &tmp->next) {
301 if (tmp->addr == addr) {
302 *p = tmp->next;
303
304 /* XXX: do we need the lock? */
305 spin_lock(&init_mm.page_table_lock);
306 unmap_vm_area(tmp);
307 spin_unlock(&init_mm.page_table_lock);
308
309 kfree(tmp);
310 up(&imlist_sem);
311 return;
312 }
313 }
314 up(&imlist_sem);
315 printk(KERN_ERR "Trying to %s nonexistent area (%p)\n", __FUNCTION__,
316 addr);
317}
diff --git a/arch/ppc64/mm/init.c b/arch/ppc64/mm/init.c
deleted file mode 100644
index c2157c9c3acb..000000000000
--- a/arch/ppc64/mm/init.c
+++ /dev/null
@@ -1,870 +0,0 @@
1/*
2 * PowerPC version
3 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
4 *
5 * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
6 * and Cort Dougan (PReP) (cort@cs.nmt.edu)
7 * Copyright (C) 1996 Paul Mackerras
8 * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
9 *
10 * Derived from "arch/i386/mm/init.c"
11 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
12 *
13 * Dave Engebretsen <engebret@us.ibm.com>
14 * Rework for PPC64 port.
15 *
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License
18 * as published by the Free Software Foundation; either version
19 * 2 of the License, or (at your option) any later version.
20 *
21 */
22
23#include <linux/config.h>
24#include <linux/signal.h>
25#include <linux/sched.h>
26#include <linux/kernel.h>
27#include <linux/errno.h>
28#include <linux/string.h>
29#include <linux/types.h>
30#include <linux/mman.h>
31#include <linux/mm.h>
32#include <linux/swap.h>
33#include <linux/stddef.h>
34#include <linux/vmalloc.h>
35#include <linux/init.h>
36#include <linux/delay.h>
37#include <linux/bootmem.h>
38#include <linux/highmem.h>
39#include <linux/idr.h>
40#include <linux/nodemask.h>
41#include <linux/module.h>
42
43#include <asm/pgalloc.h>
44#include <asm/page.h>
45#include <asm/prom.h>
46#include <asm/lmb.h>
47#include <asm/rtas.h>
48#include <asm/io.h>
49#include <asm/mmu_context.h>
50#include <asm/pgtable.h>
51#include <asm/mmu.h>
52#include <asm/uaccess.h>
53#include <asm/smp.h>
54#include <asm/machdep.h>
55#include <asm/tlb.h>
56#include <asm/eeh.h>
57#include <asm/processor.h>
58#include <asm/mmzone.h>
59#include <asm/cputable.h>
60#include <asm/ppcdebug.h>
61#include <asm/sections.h>
62#include <asm/system.h>
63#include <asm/iommu.h>
64#include <asm/abs_addr.h>
65#include <asm/vdso.h>
66#include <asm/imalloc.h>
67
68#if PGTABLE_RANGE > USER_VSID_RANGE
69#warning Limited user VSID range means pagetable space is wasted
70#endif
71
72#if (TASK_SIZE_USER64 < PGTABLE_RANGE) && (TASK_SIZE_USER64 < USER_VSID_RANGE)
73#warning TASK_SIZE is smaller than it needs to be.
74#endif
75
76int mem_init_done;
77unsigned long ioremap_bot = IMALLOC_BASE;
78static unsigned long phbs_io_bot = PHBS_IO_BASE;
79
80extern pgd_t swapper_pg_dir[];
81extern struct task_struct *current_set[NR_CPUS];
82
83unsigned long klimit = (unsigned long)_end;
84
85unsigned long _SDR1=0;
86unsigned long _ASR=0;
87
88/* max amount of RAM to use */
89unsigned long __max_memory;
90
91/* info on what we think the IO hole is */
92unsigned long io_hole_start;
93unsigned long io_hole_size;
94
95void show_mem(void)
96{
97 unsigned long total = 0, reserved = 0;
98 unsigned long shared = 0, cached = 0;
99 struct page *page;
100 pg_data_t *pgdat;
101 unsigned long i;
102
103 printk("Mem-info:\n");
104 show_free_areas();
105 printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
106 for_each_pgdat(pgdat) {
107 for (i = 0; i < pgdat->node_spanned_pages; i++) {
108 page = pgdat_page_nr(pgdat, i);
109 total++;
110 if (PageReserved(page))
111 reserved++;
112 else if (PageSwapCache(page))
113 cached++;
114 else if (page_count(page))
115 shared += page_count(page) - 1;
116 }
117 }
118 printk("%ld pages of RAM\n", total);
119 printk("%ld reserved pages\n", reserved);
120 printk("%ld pages shared\n", shared);
121 printk("%ld pages swap cached\n", cached);
122}
123
124#ifdef CONFIG_PPC_ISERIES
125
126void __iomem *ioremap(unsigned long addr, unsigned long size)
127{
128 return (void __iomem *)addr;
129}
130
131extern void __iomem *__ioremap(unsigned long addr, unsigned long size,
132 unsigned long flags)
133{
134 return (void __iomem *)addr;
135}
136
137void iounmap(volatile void __iomem *addr)
138{
139 return;
140}
141
142#else
143
144/*
145 * map_io_page currently only called by __ioremap
146 * map_io_page adds an entry to the ioremap page table
147 * and adds an entry to the HPT, possibly bolting it
148 */
149static int map_io_page(unsigned long ea, unsigned long pa, int flags)
150{
151 pgd_t *pgdp;
152 pud_t *pudp;
153 pmd_t *pmdp;
154 pte_t *ptep;
155 unsigned long vsid;
156
157 if (mem_init_done) {
158 spin_lock(&init_mm.page_table_lock);
159 pgdp = pgd_offset_k(ea);
160 pudp = pud_alloc(&init_mm, pgdp, ea);
161 if (!pudp)
162 return -ENOMEM;
163 pmdp = pmd_alloc(&init_mm, pudp, ea);
164 if (!pmdp)
165 return -ENOMEM;
166 ptep = pte_alloc_kernel(&init_mm, pmdp, ea);
167 if (!ptep)
168 return -ENOMEM;
169 set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT,
170 __pgprot(flags)));
171 spin_unlock(&init_mm.page_table_lock);
172 } else {
173 unsigned long va, vpn, hash, hpteg;
174
175 /*
176 * If the mm subsystem is not fully up, we cannot create a
177 * linux page table entry for this mapping. Simply bolt an
178 * entry in the hardware page table.
179 */
180 vsid = get_kernel_vsid(ea);
181 va = (vsid << 28) | (ea & 0xFFFFFFF);
182 vpn = va >> PAGE_SHIFT;
183
184 hash = hpt_hash(vpn, 0);
185
186 hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
187
188 /* Panic if a pte grpup is full */
189 if (ppc_md.hpte_insert(hpteg, va, pa >> PAGE_SHIFT,
190 HPTE_V_BOLTED,
191 _PAGE_NO_CACHE|_PAGE_GUARDED|PP_RWXX)
192 == -1) {
193 panic("map_io_page: could not insert mapping");
194 }
195 }
196 return 0;
197}
198
199
200static void __iomem * __ioremap_com(unsigned long addr, unsigned long pa,
201 unsigned long ea, unsigned long size,
202 unsigned long flags)
203{
204 unsigned long i;
205
206 if ((flags & _PAGE_PRESENT) == 0)
207 flags |= pgprot_val(PAGE_KERNEL);
208
209 for (i = 0; i < size; i += PAGE_SIZE)
210 if (map_io_page(ea+i, pa+i, flags))
211 return NULL;
212
213 return (void __iomem *) (ea + (addr & ~PAGE_MASK));
214}
215
216
217void __iomem *
218ioremap(unsigned long addr, unsigned long size)
219{
220 return __ioremap(addr, size, _PAGE_NO_CACHE | _PAGE_GUARDED);
221}
222
223void __iomem * __ioremap(unsigned long addr, unsigned long size,
224 unsigned long flags)
225{
226 unsigned long pa, ea;
227 void __iomem *ret;
228
229 /*
230 * Choose an address to map it to.
231 * Once the imalloc system is running, we use it.
232 * Before that, we map using addresses going
233 * up from ioremap_bot. imalloc will use
234 * the addresses from ioremap_bot through
235 * IMALLOC_END
236 *
237 */
238 pa = addr & PAGE_MASK;
239 size = PAGE_ALIGN(addr + size) - pa;
240
241 if (size == 0)
242 return NULL;
243
244 if (mem_init_done) {
245 struct vm_struct *area;
246 area = im_get_free_area(size);
247 if (area == NULL)
248 return NULL;
249 ea = (unsigned long)(area->addr);
250 ret = __ioremap_com(addr, pa, ea, size, flags);
251 if (!ret)
252 im_free(area->addr);
253 } else {
254 ea = ioremap_bot;
255 ret = __ioremap_com(addr, pa, ea, size, flags);
256 if (ret)
257 ioremap_bot += size;
258 }
259 return ret;
260}
261
262#define IS_PAGE_ALIGNED(_val) ((_val) == ((_val) & PAGE_MASK))
263
264int __ioremap_explicit(unsigned long pa, unsigned long ea,
265 unsigned long size, unsigned long flags)
266{
267 struct vm_struct *area;
268 void __iomem *ret;
269
270 /* For now, require page-aligned values for pa, ea, and size */
271 if (!IS_PAGE_ALIGNED(pa) || !IS_PAGE_ALIGNED(ea) ||
272 !IS_PAGE_ALIGNED(size)) {
273 printk(KERN_ERR "unaligned value in %s\n", __FUNCTION__);
274 return 1;
275 }
276
277 if (!mem_init_done) {
278 /* Two things to consider in this case:
279 * 1) No records will be kept (imalloc, etc) that the region
280 * has been remapped
281 * 2) It won't be easy to iounmap() the region later (because
282 * of 1)
283 */
284 ;
285 } else {
286 area = im_get_area(ea, size,
287 IM_REGION_UNUSED|IM_REGION_SUBSET|IM_REGION_EXISTS);
288 if (area == NULL) {
289 /* Expected when PHB-dlpar is in play */
290 return 1;
291 }
292 if (ea != (unsigned long) area->addr) {
293 printk(KERN_ERR "unexpected addr return from "
294 "im_get_area\n");
295 return 1;
296 }
297 }
298
299 ret = __ioremap_com(pa, pa, ea, size, flags);
300 if (ret == NULL) {
301 printk(KERN_ERR "ioremap_explicit() allocation failure !\n");
302 return 1;
303 }
304 if (ret != (void *) ea) {
305 printk(KERN_ERR "__ioremap_com() returned unexpected addr\n");
306 return 1;
307 }
308
309 return 0;
310}
311
312/*
313 * Unmap an IO region and remove it from imalloc'd list.
314 * Access to IO memory should be serialized by driver.
315 * This code is modeled after vmalloc code - unmap_vm_area()
316 *
317 * XXX what about calls before mem_init_done (ie python_countermeasures())
318 */
319void iounmap(volatile void __iomem *token)
320{
321 void *addr;
322
323 if (!mem_init_done)
324 return;
325
326 addr = (void *) ((unsigned long __force) token & PAGE_MASK);
327
328 im_free(addr);
329}
330
331static int iounmap_subset_regions(unsigned long addr, unsigned long size)
332{
333 struct vm_struct *area;
334
335 /* Check whether subsets of this region exist */
336 area = im_get_area(addr, size, IM_REGION_SUPERSET);
337 if (area == NULL)
338 return 1;
339
340 while (area) {
341 iounmap((void __iomem *) area->addr);
342 area = im_get_area(addr, size,
343 IM_REGION_SUPERSET);
344 }
345
346 return 0;
347}
348
349int iounmap_explicit(volatile void __iomem *start, unsigned long size)
350{
351 struct vm_struct *area;
352 unsigned long addr;
353 int rc;
354
355 addr = (unsigned long __force) start & PAGE_MASK;
356
357 /* Verify that the region either exists or is a subset of an existing
358 * region. In the latter case, split the parent region to create
359 * the exact region
360 */
361 area = im_get_area(addr, size,
362 IM_REGION_EXISTS | IM_REGION_SUBSET);
363 if (area == NULL) {
364 /* Determine whether subset regions exist. If so, unmap */
365 rc = iounmap_subset_regions(addr, size);
366 if (rc) {
367 printk(KERN_ERR
368 "%s() cannot unmap nonexistent range 0x%lx\n",
369 __FUNCTION__, addr);
370 return 1;
371 }
372 } else {
373 iounmap((void __iomem *) area->addr);
374 }
375 /*
376 * FIXME! This can't be right:
377 iounmap(area->addr);
378 * Maybe it should be "iounmap(area);"
379 */
380 return 0;
381}
382
383#endif
384
385EXPORT_SYMBOL(ioremap);
386EXPORT_SYMBOL(__ioremap);
387EXPORT_SYMBOL(iounmap);
388
389void free_initmem(void)
390{
391 unsigned long addr;
392
393 addr = (unsigned long)__init_begin;
394 for (; addr < (unsigned long)__init_end; addr += PAGE_SIZE) {
395 memset((void *)addr, 0xcc, PAGE_SIZE);
396 ClearPageReserved(virt_to_page(addr));
397 set_page_count(virt_to_page(addr), 1);
398 free_page(addr);
399 totalram_pages++;
400 }
401 printk ("Freeing unused kernel memory: %luk freed\n",
402 ((unsigned long)__init_end - (unsigned long)__init_begin) >> 10);
403}
404
405#ifdef CONFIG_BLK_DEV_INITRD
406void free_initrd_mem(unsigned long start, unsigned long end)
407{
408 if (start < end)
409 printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
410 for (; start < end; start += PAGE_SIZE) {
411 ClearPageReserved(virt_to_page(start));
412 set_page_count(virt_to_page(start), 1);
413 free_page(start);
414 totalram_pages++;
415 }
416}
417#endif
418
419static DEFINE_SPINLOCK(mmu_context_lock);
420static DEFINE_IDR(mmu_context_idr);
421
422int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
423{
424 int index;
425 int err;
426
427again:
428 if (!idr_pre_get(&mmu_context_idr, GFP_KERNEL))
429 return -ENOMEM;
430
431 spin_lock(&mmu_context_lock);
432 err = idr_get_new_above(&mmu_context_idr, NULL, 1, &index);
433 spin_unlock(&mmu_context_lock);
434
435 if (err == -EAGAIN)
436 goto again;
437 else if (err)
438 return err;
439
440 if (index > MAX_CONTEXT) {
441 idr_remove(&mmu_context_idr, index);
442 return -ENOMEM;
443 }
444
445 mm->context.id = index;
446
447 return 0;
448}
449
450void destroy_context(struct mm_struct *mm)
451{
452 spin_lock(&mmu_context_lock);
453 idr_remove(&mmu_context_idr, mm->context.id);
454 spin_unlock(&mmu_context_lock);
455
456 mm->context.id = NO_CONTEXT;
457}
458
459/*
460 * Do very early mm setup.
461 */
462void __init mm_init_ppc64(void)
463{
464#ifndef CONFIG_PPC_ISERIES
465 unsigned long i;
466#endif
467
468 ppc64_boot_msg(0x100, "MM Init");
469
470 /* This is the story of the IO hole... please, keep seated,
471 * unfortunately, we are out of oxygen masks at the moment.
472 * So we need some rough way to tell where your big IO hole
473 * is. On pmac, it's between 2G and 4G, on POWER3, it's around
474 * that area as well, on POWER4 we don't have one, etc...
475 * We need that as a "hint" when sizing the TCE table on POWER3
476 * So far, the simplest way that seem work well enough for us it
477 * to just assume that the first discontinuity in our physical
478 * RAM layout is the IO hole. That may not be correct in the future
479 * (and isn't on iSeries but then we don't care ;)
480 */
481
482#ifndef CONFIG_PPC_ISERIES
483 for (i = 1; i < lmb.memory.cnt; i++) {
484 unsigned long base, prevbase, prevsize;
485
486 prevbase = lmb.memory.region[i-1].base;
487 prevsize = lmb.memory.region[i-1].size;
488 base = lmb.memory.region[i].base;
489 if (base > (prevbase + prevsize)) {
490 io_hole_start = prevbase + prevsize;
491 io_hole_size = base - (prevbase + prevsize);
492 break;
493 }
494 }
495#endif /* CONFIG_PPC_ISERIES */
496 if (io_hole_start)
497 printk("IO Hole assumed to be %lx -> %lx\n",
498 io_hole_start, io_hole_start + io_hole_size - 1);
499
500 ppc64_boot_msg(0x100, "MM Init Done");
501}
502
503/*
504 * This is called by /dev/mem to know if a given address has to
505 * be mapped non-cacheable or not
506 */
507int page_is_ram(unsigned long pfn)
508{
509 int i;
510 unsigned long paddr = (pfn << PAGE_SHIFT);
511
512 for (i=0; i < lmb.memory.cnt; i++) {
513 unsigned long base;
514
515 base = lmb.memory.region[i].base;
516
517 if ((paddr >= base) &&
518 (paddr < (base + lmb.memory.region[i].size))) {
519 return 1;
520 }
521 }
522
523 return 0;
524}
525EXPORT_SYMBOL(page_is_ram);
526
527/*
528 * Initialize the bootmem system and give it all the memory we
529 * have available.
530 */
531#ifndef CONFIG_NEED_MULTIPLE_NODES
532void __init do_init_bootmem(void)
533{
534 unsigned long i;
535 unsigned long start, bootmap_pages;
536 unsigned long total_pages = lmb_end_of_DRAM() >> PAGE_SHIFT;
537 int boot_mapsize;
538
539 /*
540 * Find an area to use for the bootmem bitmap. Calculate the size of
541 * bitmap required as (Total Memory) / PAGE_SIZE / BITS_PER_BYTE.
542 * Add 1 additional page in case the address isn't page-aligned.
543 */
544 bootmap_pages = bootmem_bootmap_pages(total_pages);
545
546 start = lmb_alloc(bootmap_pages<<PAGE_SHIFT, PAGE_SIZE);
547 BUG_ON(!start);
548
549 boot_mapsize = init_bootmem(start >> PAGE_SHIFT, total_pages);
550
551 max_pfn = max_low_pfn;
552
553 /* Add all physical memory to the bootmem map, mark each area
554 * present.
555 */
556 for (i=0; i < lmb.memory.cnt; i++)
557 free_bootmem(lmb.memory.region[i].base,
558 lmb_size_bytes(&lmb.memory, i));
559
560 /* reserve the sections we're already using */
561 for (i=0; i < lmb.reserved.cnt; i++)
562 reserve_bootmem(lmb.reserved.region[i].base,
563 lmb_size_bytes(&lmb.reserved, i));
564
565 for (i=0; i < lmb.memory.cnt; i++)
566 memory_present(0, lmb_start_pfn(&lmb.memory, i),
567 lmb_end_pfn(&lmb.memory, i));
568}
569
570/*
571 * paging_init() sets up the page tables - in fact we've already done this.
572 */
573void __init paging_init(void)
574{
575 unsigned long zones_size[MAX_NR_ZONES];
576 unsigned long zholes_size[MAX_NR_ZONES];
577 unsigned long total_ram = lmb_phys_mem_size();
578 unsigned long top_of_ram = lmb_end_of_DRAM();
579
580 printk(KERN_INFO "Top of RAM: 0x%lx, Total RAM: 0x%lx\n",
581 top_of_ram, total_ram);
582 printk(KERN_INFO "Memory hole size: %ldMB\n",
583 (top_of_ram - total_ram) >> 20);
584 /*
585 * All pages are DMA-able so we put them all in the DMA zone.
586 */
587 memset(zones_size, 0, sizeof(zones_size));
588 memset(zholes_size, 0, sizeof(zholes_size));
589
590 zones_size[ZONE_DMA] = top_of_ram >> PAGE_SHIFT;
591 zholes_size[ZONE_DMA] = (top_of_ram - total_ram) >> PAGE_SHIFT;
592
593 free_area_init_node(0, NODE_DATA(0), zones_size,
594 __pa(PAGE_OFFSET) >> PAGE_SHIFT, zholes_size);
595}
596#endif /* ! CONFIG_NEED_MULTIPLE_NODES */
597
598static struct kcore_list kcore_vmem;
599
600static int __init setup_kcore(void)
601{
602 int i;
603
604 for (i=0; i < lmb.memory.cnt; i++) {
605 unsigned long base, size;
606 struct kcore_list *kcore_mem;
607
608 base = lmb.memory.region[i].base;
609 size = lmb.memory.region[i].size;
610
611 /* GFP_ATOMIC to avoid might_sleep warnings during boot */
612 kcore_mem = kmalloc(sizeof(struct kcore_list), GFP_ATOMIC);
613 if (!kcore_mem)
614 panic("mem_init: kmalloc failed\n");
615
616 kclist_add(kcore_mem, __va(base), size);
617 }
618
619 kclist_add(&kcore_vmem, (void *)VMALLOC_START, VMALLOC_END-VMALLOC_START);
620
621 return 0;
622}
623module_init(setup_kcore);
624
625void __init mem_init(void)
626{
627#ifdef CONFIG_NEED_MULTIPLE_NODES
628 int nid;
629#endif
630 pg_data_t *pgdat;
631 unsigned long i;
632 struct page *page;
633 unsigned long reservedpages = 0, codesize, initsize, datasize, bsssize;
634
635 num_physpages = max_low_pfn; /* RAM is assumed contiguous */
636 high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
637
638#ifdef CONFIG_NEED_MULTIPLE_NODES
639 for_each_online_node(nid) {
640 if (NODE_DATA(nid)->node_spanned_pages != 0) {
641 printk("freeing bootmem node %x\n", nid);
642 totalram_pages +=
643 free_all_bootmem_node(NODE_DATA(nid));
644 }
645 }
646#else
647 max_mapnr = num_physpages;
648 totalram_pages += free_all_bootmem();
649#endif
650
651 for_each_pgdat(pgdat) {
652 for (i = 0; i < pgdat->node_spanned_pages; i++) {
653 page = pgdat_page_nr(pgdat, i);
654 if (PageReserved(page))
655 reservedpages++;
656 }
657 }
658
659 codesize = (unsigned long)&_etext - (unsigned long)&_stext;
660 initsize = (unsigned long)&__init_end - (unsigned long)&__init_begin;
661 datasize = (unsigned long)&_edata - (unsigned long)&__init_end;
662 bsssize = (unsigned long)&__bss_stop - (unsigned long)&__bss_start;
663
664 printk(KERN_INFO "Memory: %luk/%luk available (%luk kernel code, "
665 "%luk reserved, %luk data, %luk bss, %luk init)\n",
666 (unsigned long)nr_free_pages() << (PAGE_SHIFT-10),
667 num_physpages << (PAGE_SHIFT-10),
668 codesize >> 10,
669 reservedpages << (PAGE_SHIFT-10),
670 datasize >> 10,
671 bsssize >> 10,
672 initsize >> 10);
673
674 mem_init_done = 1;
675
676 /* Initialize the vDSO */
677 vdso_init();
678}
679
680/*
681 * This is called when a page has been modified by the kernel.
682 * It just marks the page as not i-cache clean. We do the i-cache
683 * flush later when the page is given to a user process, if necessary.
684 */
685void flush_dcache_page(struct page *page)
686{
687 if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
688 return;
689 /* avoid an atomic op if possible */
690 if (test_bit(PG_arch_1, &page->flags))
691 clear_bit(PG_arch_1, &page->flags);
692}
693EXPORT_SYMBOL(flush_dcache_page);
694
695void clear_user_page(void *page, unsigned long vaddr, struct page *pg)
696{
697 clear_page(page);
698
699 if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
700 return;
701 /*
702 * We shouldnt have to do this, but some versions of glibc
703 * require it (ld.so assumes zero filled pages are icache clean)
704 * - Anton
705 */
706
707 /* avoid an atomic op if possible */
708 if (test_bit(PG_arch_1, &pg->flags))
709 clear_bit(PG_arch_1, &pg->flags);
710}
711EXPORT_SYMBOL(clear_user_page);
712
713void copy_user_page(void *vto, void *vfrom, unsigned long vaddr,
714 struct page *pg)
715{
716 copy_page(vto, vfrom);
717
718 /*
719 * We should be able to use the following optimisation, however
720 * there are two problems.
721 * Firstly a bug in some versions of binutils meant PLT sections
722 * were not marked executable.
723 * Secondly the first word in the GOT section is blrl, used
724 * to establish the GOT address. Until recently the GOT was
725 * not marked executable.
726 * - Anton
727 */
728#if 0
729 if (!vma->vm_file && ((vma->vm_flags & VM_EXEC) == 0))
730 return;
731#endif
732
733 if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
734 return;
735
736 /* avoid an atomic op if possible */
737 if (test_bit(PG_arch_1, &pg->flags))
738 clear_bit(PG_arch_1, &pg->flags);
739}
740
741void flush_icache_user_range(struct vm_area_struct *vma, struct page *page,
742 unsigned long addr, int len)
743{
744 unsigned long maddr;
745
746 maddr = (unsigned long)page_address(page) + (addr & ~PAGE_MASK);
747 flush_icache_range(maddr, maddr + len);
748}
749EXPORT_SYMBOL(flush_icache_user_range);
750
751/*
752 * This is called at the end of handling a user page fault, when the
753 * fault has been handled by updating a PTE in the linux page tables.
754 * We use it to preload an HPTE into the hash table corresponding to
755 * the updated linux PTE.
756 *
757 * This must always be called with the mm->page_table_lock held
758 */
759void update_mmu_cache(struct vm_area_struct *vma, unsigned long ea,
760 pte_t pte)
761{
762 unsigned long vsid;
763 void *pgdir;
764 pte_t *ptep;
765 int local = 0;
766 cpumask_t tmp;
767 unsigned long flags;
768
769 /* handle i-cache coherency */
770 if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE) &&
771 !cpu_has_feature(CPU_FTR_NOEXECUTE)) {
772 unsigned long pfn = pte_pfn(pte);
773 if (pfn_valid(pfn)) {
774 struct page *page = pfn_to_page(pfn);
775 if (!PageReserved(page)
776 && !test_bit(PG_arch_1, &page->flags)) {
777 __flush_dcache_icache(page_address(page));
778 set_bit(PG_arch_1, &page->flags);
779 }
780 }
781 }
782
783 /* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */
784 if (!pte_young(pte))
785 return;
786
787 pgdir = vma->vm_mm->pgd;
788 if (pgdir == NULL)
789 return;
790
791 ptep = find_linux_pte(pgdir, ea);
792 if (!ptep)
793 return;
794
795 vsid = get_vsid(vma->vm_mm->context.id, ea);
796
797 local_irq_save(flags);
798 tmp = cpumask_of_cpu(smp_processor_id());
799 if (cpus_equal(vma->vm_mm->cpu_vm_mask, tmp))
800 local = 1;
801
802 __hash_page(ea, pte_val(pte) & (_PAGE_USER|_PAGE_RW), vsid, ptep,
803 0x300, local);
804 local_irq_restore(flags);
805}
806
807void __iomem * reserve_phb_iospace(unsigned long size)
808{
809 void __iomem *virt_addr;
810
811 if (phbs_io_bot >= IMALLOC_BASE)
812 panic("reserve_phb_iospace(): phb io space overflow\n");
813
814 virt_addr = (void __iomem *) phbs_io_bot;
815 phbs_io_bot += size;
816
817 return virt_addr;
818}
819
820static void zero_ctor(void *addr, kmem_cache_t *cache, unsigned long flags)
821{
822 memset(addr, 0, kmem_cache_size(cache));
823}
824
825static const int pgtable_cache_size[2] = {
826 PTE_TABLE_SIZE, PMD_TABLE_SIZE
827};
828static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = {
829 "pgd_pte_cache", "pud_pmd_cache",
830};
831
832kmem_cache_t *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)];
833
834void pgtable_cache_init(void)
835{
836 int i;
837
838 BUILD_BUG_ON(PTE_TABLE_SIZE != pgtable_cache_size[PTE_CACHE_NUM]);
839 BUILD_BUG_ON(PMD_TABLE_SIZE != pgtable_cache_size[PMD_CACHE_NUM]);
840 BUILD_BUG_ON(PUD_TABLE_SIZE != pgtable_cache_size[PUD_CACHE_NUM]);
841 BUILD_BUG_ON(PGD_TABLE_SIZE != pgtable_cache_size[PGD_CACHE_NUM]);
842
843 for (i = 0; i < ARRAY_SIZE(pgtable_cache_size); i++) {
844 int size = pgtable_cache_size[i];
845 const char *name = pgtable_cache_name[i];
846
847 pgtable_cache[i] = kmem_cache_create(name,
848 size, size,
849 SLAB_HWCACHE_ALIGN
850 | SLAB_MUST_HWCACHE_ALIGN,
851 zero_ctor,
852 NULL);
853 if (! pgtable_cache[i])
854 panic("pgtable_cache_init(): could not create %s!\n",
855 name);
856 }
857}
858
859pgprot_t phys_mem_access_prot(struct file *file, unsigned long addr,
860 unsigned long size, pgprot_t vma_prot)
861{
862 if (ppc_md.phys_mem_access_prot)
863 return ppc_md.phys_mem_access_prot(file, addr, size, vma_prot);
864
865 if (!page_is_ram(addr >> PAGE_SHIFT))
866 vma_prot = __pgprot(pgprot_val(vma_prot)
867 | _PAGE_GUARDED | _PAGE_NO_CACHE);
868 return vma_prot;
869}
870EXPORT_SYMBOL(phys_mem_access_prot);
diff --git a/arch/ppc64/mm/mmap.c b/arch/ppc64/mm/mmap.c
deleted file mode 100644
index fe65f522aff3..000000000000
--- a/arch/ppc64/mm/mmap.c
+++ /dev/null
@@ -1,86 +0,0 @@
1/*
2 * linux/arch/ppc64/mm/mmap.c
3 *
4 * flexible mmap layout support
5 *
6 * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina.
7 * All Rights Reserved.
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 *
23 *
24 * Started by Ingo Molnar <mingo@elte.hu>
25 */
26
27#include <linux/personality.h>
28#include <linux/mm.h>
29
30/*
31 * Top of mmap area (just below the process stack).
32 *
33 * Leave an at least ~128 MB hole.
34 */
35#define MIN_GAP (128*1024*1024)
36#define MAX_GAP (TASK_SIZE/6*5)
37
38static inline unsigned long mmap_base(void)
39{
40 unsigned long gap = current->signal->rlim[RLIMIT_STACK].rlim_cur;
41
42 if (gap < MIN_GAP)
43 gap = MIN_GAP;
44 else if (gap > MAX_GAP)
45 gap = MAX_GAP;
46
47 return TASK_SIZE - (gap & PAGE_MASK);
48}
49
50static inline int mmap_is_legacy(void)
51{
52 /*
53 * Force standard allocation for 64 bit programs.
54 */
55 if (!test_thread_flag(TIF_32BIT))
56 return 1;
57
58 if (current->personality & ADDR_COMPAT_LAYOUT)
59 return 1;
60
61 if (current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY)
62 return 1;
63
64 return sysctl_legacy_va_layout;
65}
66
67/*
68 * This function, called very early during the creation of a new
69 * process VM image, sets up which VM layout function to use:
70 */
71void arch_pick_mmap_layout(struct mm_struct *mm)
72{
73 /*
74 * Fall back to the standard layout if the personality
75 * bit is set, or if the expected stack growth is unlimited:
76 */
77 if (mmap_is_legacy()) {
78 mm->mmap_base = TASK_UNMAPPED_BASE;
79 mm->get_unmapped_area = arch_get_unmapped_area;
80 mm->unmap_area = arch_unmap_area;
81 } else {
82 mm->mmap_base = mmap_base();
83 mm->get_unmapped_area = arch_get_unmapped_area_topdown;
84 mm->unmap_area = arch_unmap_area_topdown;
85 }
86}
diff --git a/arch/ppc64/mm/numa.c b/arch/ppc64/mm/numa.c
deleted file mode 100644
index cb864b8f2750..000000000000
--- a/arch/ppc64/mm/numa.c
+++ /dev/null
@@ -1,779 +0,0 @@
1/*
2 * pSeries NUMA support
3 *
4 * Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11#include <linux/threads.h>
12#include <linux/bootmem.h>
13#include <linux/init.h>
14#include <linux/mm.h>
15#include <linux/mmzone.h>
16#include <linux/module.h>
17#include <linux/nodemask.h>
18#include <linux/cpu.h>
19#include <linux/notifier.h>
20#include <asm/lmb.h>
21#include <asm/machdep.h>
22#include <asm/abs_addr.h>
23
24static int numa_enabled = 1;
25
26static int numa_debug;
27#define dbg(args...) if (numa_debug) { printk(KERN_INFO args); }
28
29#ifdef DEBUG_NUMA
30#define ARRAY_INITIALISER -1
31#else
32#define ARRAY_INITIALISER 0
33#endif
34
35int numa_cpu_lookup_table[NR_CPUS] = { [ 0 ... (NR_CPUS - 1)] =
36 ARRAY_INITIALISER};
37char *numa_memory_lookup_table;
38cpumask_t numa_cpumask_lookup_table[MAX_NUMNODES];
39int nr_cpus_in_node[MAX_NUMNODES] = { [0 ... (MAX_NUMNODES -1)] = 0};
40
41struct pglist_data *node_data[MAX_NUMNODES];
42bootmem_data_t __initdata plat_node_bdata[MAX_NUMNODES];
43static int min_common_depth;
44
45/*
46 * We need somewhere to store start/span for each node until we have
47 * allocated the real node_data structures.
48 */
49static struct {
50 unsigned long node_start_pfn;
51 unsigned long node_end_pfn;
52 unsigned long node_present_pages;
53} init_node_data[MAX_NUMNODES] __initdata;
54
55EXPORT_SYMBOL(node_data);
56EXPORT_SYMBOL(numa_cpu_lookup_table);
57EXPORT_SYMBOL(numa_memory_lookup_table);
58EXPORT_SYMBOL(numa_cpumask_lookup_table);
59EXPORT_SYMBOL(nr_cpus_in_node);
60
61static inline void map_cpu_to_node(int cpu, int node)
62{
63 numa_cpu_lookup_table[cpu] = node;
64 if (!(cpu_isset(cpu, numa_cpumask_lookup_table[node]))) {
65 cpu_set(cpu, numa_cpumask_lookup_table[node]);
66 nr_cpus_in_node[node]++;
67 }
68}
69
70#ifdef CONFIG_HOTPLUG_CPU
71static void unmap_cpu_from_node(unsigned long cpu)
72{
73 int node = numa_cpu_lookup_table[cpu];
74
75 dbg("removing cpu %lu from node %d\n", cpu, node);
76
77 if (cpu_isset(cpu, numa_cpumask_lookup_table[node])) {
78 cpu_clear(cpu, numa_cpumask_lookup_table[node]);
79 nr_cpus_in_node[node]--;
80 } else {
81 printk(KERN_ERR "WARNING: cpu %lu not found in node %d\n",
82 cpu, node);
83 }
84}
85#endif /* CONFIG_HOTPLUG_CPU */
86
87static struct device_node * __devinit find_cpu_node(unsigned int cpu)
88{
89 unsigned int hw_cpuid = get_hard_smp_processor_id(cpu);
90 struct device_node *cpu_node = NULL;
91 unsigned int *interrupt_server, *reg;
92 int len;
93
94 while ((cpu_node = of_find_node_by_type(cpu_node, "cpu")) != NULL) {
95 /* Try interrupt server first */
96 interrupt_server = (unsigned int *)get_property(cpu_node,
97 "ibm,ppc-interrupt-server#s", &len);
98
99 len = len / sizeof(u32);
100
101 if (interrupt_server && (len > 0)) {
102 while (len--) {
103 if (interrupt_server[len] == hw_cpuid)
104 return cpu_node;
105 }
106 } else {
107 reg = (unsigned int *)get_property(cpu_node,
108 "reg", &len);
109 if (reg && (len > 0) && (reg[0] == hw_cpuid))
110 return cpu_node;
111 }
112 }
113
114 return NULL;
115}
116
117/* must hold reference to node during call */
118static int *of_get_associativity(struct device_node *dev)
119{
120 return (unsigned int *)get_property(dev, "ibm,associativity", NULL);
121}
122
123static int of_node_numa_domain(struct device_node *device)
124{
125 int numa_domain;
126 unsigned int *tmp;
127
128 if (min_common_depth == -1)
129 return 0;
130
131 tmp = of_get_associativity(device);
132 if (tmp && (tmp[0] >= min_common_depth)) {
133 numa_domain = tmp[min_common_depth];
134 } else {
135 dbg("WARNING: no NUMA information for %s\n",
136 device->full_name);
137 numa_domain = 0;
138 }
139 return numa_domain;
140}
141
142/*
143 * In theory, the "ibm,associativity" property may contain multiple
144 * associativity lists because a resource may be multiply connected
145 * into the machine. This resource then has different associativity
146 * characteristics relative to its multiple connections. We ignore
147 * this for now. We also assume that all cpu and memory sets have
148 * their distances represented at a common level. This won't be
149 * true for heirarchical NUMA.
150 *
151 * In any case the ibm,associativity-reference-points should give
152 * the correct depth for a normal NUMA system.
153 *
154 * - Dave Hansen <haveblue@us.ibm.com>
155 */
156static int __init find_min_common_depth(void)
157{
158 int depth;
159 unsigned int *ref_points;
160 struct device_node *rtas_root;
161 unsigned int len;
162
163 rtas_root = of_find_node_by_path("/rtas");
164
165 if (!rtas_root)
166 return -1;
167
168 /*
169 * this property is 2 32-bit integers, each representing a level of
170 * depth in the associativity nodes. The first is for an SMP
171 * configuration (should be all 0's) and the second is for a normal
172 * NUMA configuration.
173 */
174 ref_points = (unsigned int *)get_property(rtas_root,
175 "ibm,associativity-reference-points", &len);
176
177 if ((len >= 1) && ref_points) {
178 depth = ref_points[1];
179 } else {
180 dbg("WARNING: could not find NUMA "
181 "associativity reference point\n");
182 depth = -1;
183 }
184 of_node_put(rtas_root);
185
186 return depth;
187}
188
189static int __init get_mem_addr_cells(void)
190{
191 struct device_node *memory = NULL;
192 int rc;
193
194 memory = of_find_node_by_type(memory, "memory");
195 if (!memory)
196 return 0; /* it won't matter */
197
198 rc = prom_n_addr_cells(memory);
199 return rc;
200}
201
202static int __init get_mem_size_cells(void)
203{
204 struct device_node *memory = NULL;
205 int rc;
206
207 memory = of_find_node_by_type(memory, "memory");
208 if (!memory)
209 return 0; /* it won't matter */
210 rc = prom_n_size_cells(memory);
211 return rc;
212}
213
214static unsigned long read_n_cells(int n, unsigned int **buf)
215{
216 unsigned long result = 0;
217
218 while (n--) {
219 result = (result << 32) | **buf;
220 (*buf)++;
221 }
222 return result;
223}
224
225/*
226 * Figure out to which domain a cpu belongs and stick it there.
227 * Return the id of the domain used.
228 */
229static int numa_setup_cpu(unsigned long lcpu)
230{
231 int numa_domain = 0;
232 struct device_node *cpu = find_cpu_node(lcpu);
233
234 if (!cpu) {
235 WARN_ON(1);
236 goto out;
237 }
238
239 numa_domain = of_node_numa_domain(cpu);
240
241 if (numa_domain >= num_online_nodes()) {
242 /*
243 * POWER4 LPAR uses 0xffff as invalid node,
244 * dont warn in this case.
245 */
246 if (numa_domain != 0xffff)
247 printk(KERN_ERR "WARNING: cpu %ld "
248 "maps to invalid NUMA node %d\n",
249 lcpu, numa_domain);
250 numa_domain = 0;
251 }
252out:
253 node_set_online(numa_domain);
254
255 map_cpu_to_node(lcpu, numa_domain);
256
257 of_node_put(cpu);
258
259 return numa_domain;
260}
261
262static int cpu_numa_callback(struct notifier_block *nfb,
263 unsigned long action,
264 void *hcpu)
265{
266 unsigned long lcpu = (unsigned long)hcpu;
267 int ret = NOTIFY_DONE;
268
269 switch (action) {
270 case CPU_UP_PREPARE:
271 if (min_common_depth == -1 || !numa_enabled)
272 map_cpu_to_node(lcpu, 0);
273 else
274 numa_setup_cpu(lcpu);
275 ret = NOTIFY_OK;
276 break;
277#ifdef CONFIG_HOTPLUG_CPU
278 case CPU_DEAD:
279 case CPU_UP_CANCELED:
280 unmap_cpu_from_node(lcpu);
281 break;
282 ret = NOTIFY_OK;
283#endif
284 }
285 return ret;
286}
287
288/*
289 * Check and possibly modify a memory region to enforce the memory limit.
290 *
291 * Returns the size the region should have to enforce the memory limit.
292 * This will either be the original value of size, a truncated value,
293 * or zero. If the returned value of size is 0 the region should be
294 * discarded as it lies wholy above the memory limit.
295 */
296static unsigned long __init numa_enforce_memory_limit(unsigned long start, unsigned long size)
297{
298 /*
299 * We use lmb_end_of_DRAM() in here instead of memory_limit because
300 * we've already adjusted it for the limit and it takes care of
301 * having memory holes below the limit.
302 */
303 extern unsigned long memory_limit;
304
305 if (! memory_limit)
306 return size;
307
308 if (start + size <= lmb_end_of_DRAM())
309 return size;
310
311 if (start >= lmb_end_of_DRAM())
312 return 0;
313
314 return lmb_end_of_DRAM() - start;
315}
316
317static int __init parse_numa_properties(void)
318{
319 struct device_node *cpu = NULL;
320 struct device_node *memory = NULL;
321 int addr_cells, size_cells;
322 int max_domain = 0;
323 long entries = lmb_end_of_DRAM() >> MEMORY_INCREMENT_SHIFT;
324 unsigned long i;
325
326 if (numa_enabled == 0) {
327 printk(KERN_WARNING "NUMA disabled by user\n");
328 return -1;
329 }
330
331 numa_memory_lookup_table =
332 (char *)abs_to_virt(lmb_alloc(entries * sizeof(char), 1));
333 memset(numa_memory_lookup_table, 0, entries * sizeof(char));
334
335 for (i = 0; i < entries ; i++)
336 numa_memory_lookup_table[i] = ARRAY_INITIALISER;
337
338 min_common_depth = find_min_common_depth();
339
340 dbg("NUMA associativity depth for CPU/Memory: %d\n", min_common_depth);
341 if (min_common_depth < 0)
342 return min_common_depth;
343
344 max_domain = numa_setup_cpu(boot_cpuid);
345
346 /*
347 * Even though we connect cpus to numa domains later in SMP init,
348 * we need to know the maximum node id now. This is because each
349 * node id must have NODE_DATA etc backing it.
350 * As a result of hotplug we could still have cpus appear later on
351 * with larger node ids. In that case we force the cpu into node 0.
352 */
353 for_each_cpu(i) {
354 int numa_domain;
355
356 cpu = find_cpu_node(i);
357
358 if (cpu) {
359 numa_domain = of_node_numa_domain(cpu);
360 of_node_put(cpu);
361
362 if (numa_domain < MAX_NUMNODES &&
363 max_domain < numa_domain)
364 max_domain = numa_domain;
365 }
366 }
367
368 addr_cells = get_mem_addr_cells();
369 size_cells = get_mem_size_cells();
370 memory = NULL;
371 while ((memory = of_find_node_by_type(memory, "memory")) != NULL) {
372 unsigned long start;
373 unsigned long size;
374 int numa_domain;
375 int ranges;
376 unsigned int *memcell_buf;
377 unsigned int len;
378
379 memcell_buf = (unsigned int *)get_property(memory, "reg", &len);
380 if (!memcell_buf || len <= 0)
381 continue;
382
383 ranges = memory->n_addrs;
384new_range:
385 /* these are order-sensitive, and modify the buffer pointer */
386 start = read_n_cells(addr_cells, &memcell_buf);
387 size = read_n_cells(size_cells, &memcell_buf);
388
389 start = _ALIGN_DOWN(start, MEMORY_INCREMENT);
390 size = _ALIGN_UP(size, MEMORY_INCREMENT);
391
392 numa_domain = of_node_numa_domain(memory);
393
394 if (numa_domain >= MAX_NUMNODES) {
395 if (numa_domain != 0xffff)
396 printk(KERN_ERR "WARNING: memory at %lx maps "
397 "to invalid NUMA node %d\n", start,
398 numa_domain);
399 numa_domain = 0;
400 }
401
402 if (max_domain < numa_domain)
403 max_domain = numa_domain;
404
405 if (! (size = numa_enforce_memory_limit(start, size))) {
406 if (--ranges)
407 goto new_range;
408 else
409 continue;
410 }
411
412 /*
413 * Initialize new node struct, or add to an existing one.
414 */
415 if (init_node_data[numa_domain].node_end_pfn) {
416 if ((start / PAGE_SIZE) <
417 init_node_data[numa_domain].node_start_pfn)
418 init_node_data[numa_domain].node_start_pfn =
419 start / PAGE_SIZE;
420 if (((start / PAGE_SIZE) + (size / PAGE_SIZE)) >
421 init_node_data[numa_domain].node_end_pfn)
422 init_node_data[numa_domain].node_end_pfn =
423 (start / PAGE_SIZE) +
424 (size / PAGE_SIZE);
425
426 init_node_data[numa_domain].node_present_pages +=
427 size / PAGE_SIZE;
428 } else {
429 node_set_online(numa_domain);
430
431 init_node_data[numa_domain].node_start_pfn =
432 start / PAGE_SIZE;
433 init_node_data[numa_domain].node_end_pfn =
434 init_node_data[numa_domain].node_start_pfn +
435 size / PAGE_SIZE;
436 init_node_data[numa_domain].node_present_pages =
437 size / PAGE_SIZE;
438 }
439
440 for (i = start ; i < (start+size); i += MEMORY_INCREMENT)
441 numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] =
442 numa_domain;
443
444 if (--ranges)
445 goto new_range;
446 }
447
448 for (i = 0; i <= max_domain; i++)
449 node_set_online(i);
450
451 return 0;
452}
453
454static void __init setup_nonnuma(void)
455{
456 unsigned long top_of_ram = lmb_end_of_DRAM();
457 unsigned long total_ram = lmb_phys_mem_size();
458 unsigned long i;
459
460 printk(KERN_INFO "Top of RAM: 0x%lx, Total RAM: 0x%lx\n",
461 top_of_ram, total_ram);
462 printk(KERN_INFO "Memory hole size: %ldMB\n",
463 (top_of_ram - total_ram) >> 20);
464
465 if (!numa_memory_lookup_table) {
466 long entries = top_of_ram >> MEMORY_INCREMENT_SHIFT;
467 numa_memory_lookup_table =
468 (char *)abs_to_virt(lmb_alloc(entries * sizeof(char), 1));
469 memset(numa_memory_lookup_table, 0, entries * sizeof(char));
470 for (i = 0; i < entries ; i++)
471 numa_memory_lookup_table[i] = ARRAY_INITIALISER;
472 }
473
474 map_cpu_to_node(boot_cpuid, 0);
475
476 node_set_online(0);
477
478 init_node_data[0].node_start_pfn = 0;
479 init_node_data[0].node_end_pfn = lmb_end_of_DRAM() / PAGE_SIZE;
480 init_node_data[0].node_present_pages = total_ram / PAGE_SIZE;
481
482 for (i = 0 ; i < top_of_ram; i += MEMORY_INCREMENT)
483 numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] = 0;
484}
485
486static void __init dump_numa_topology(void)
487{
488 unsigned int node;
489 unsigned int count;
490
491 if (min_common_depth == -1 || !numa_enabled)
492 return;
493
494 for_each_online_node(node) {
495 unsigned long i;
496
497 printk(KERN_INFO "Node %d Memory:", node);
498
499 count = 0;
500
501 for (i = 0; i < lmb_end_of_DRAM(); i += MEMORY_INCREMENT) {
502 if (numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] == node) {
503 if (count == 0)
504 printk(" 0x%lx", i);
505 ++count;
506 } else {
507 if (count > 0)
508 printk("-0x%lx", i);
509 count = 0;
510 }
511 }
512
513 if (count > 0)
514 printk("-0x%lx", i);
515 printk("\n");
516 }
517 return;
518}
519
520/*
521 * Allocate some memory, satisfying the lmb or bootmem allocator where
522 * required. nid is the preferred node and end is the physical address of
523 * the highest address in the node.
524 *
525 * Returns the physical address of the memory.
526 */
527static unsigned long careful_allocation(int nid, unsigned long size,
528 unsigned long align, unsigned long end)
529{
530 unsigned long ret = lmb_alloc_base(size, align, end);
531
532 /* retry over all memory */
533 if (!ret)
534 ret = lmb_alloc_base(size, align, lmb_end_of_DRAM());
535
536 if (!ret)
537 panic("numa.c: cannot allocate %lu bytes on node %d",
538 size, nid);
539
540 /*
541 * If the memory came from a previously allocated node, we must
542 * retry with the bootmem allocator.
543 */
544 if (pa_to_nid(ret) < nid) {
545 nid = pa_to_nid(ret);
546 ret = (unsigned long)__alloc_bootmem_node(NODE_DATA(nid),
547 size, align, 0);
548
549 if (!ret)
550 panic("numa.c: cannot allocate %lu bytes on node %d",
551 size, nid);
552
553 ret = virt_to_abs(ret);
554
555 dbg("alloc_bootmem %lx %lx\n", ret, size);
556 }
557
558 return ret;
559}
560
561void __init do_init_bootmem(void)
562{
563 int nid;
564 int addr_cells, size_cells;
565 struct device_node *memory = NULL;
566 static struct notifier_block ppc64_numa_nb = {
567 .notifier_call = cpu_numa_callback,
568 .priority = 1 /* Must run before sched domains notifier. */
569 };
570
571 min_low_pfn = 0;
572 max_low_pfn = lmb_end_of_DRAM() >> PAGE_SHIFT;
573 max_pfn = max_low_pfn;
574
575 if (parse_numa_properties())
576 setup_nonnuma();
577 else
578 dump_numa_topology();
579
580 register_cpu_notifier(&ppc64_numa_nb);
581
582 for_each_online_node(nid) {
583 unsigned long start_paddr, end_paddr;
584 int i;
585 unsigned long bootmem_paddr;
586 unsigned long bootmap_pages;
587
588 start_paddr = init_node_data[nid].node_start_pfn * PAGE_SIZE;
589 end_paddr = init_node_data[nid].node_end_pfn * PAGE_SIZE;
590
591 /* Allocate the node structure node local if possible */
592 NODE_DATA(nid) = (struct pglist_data *)careful_allocation(nid,
593 sizeof(struct pglist_data),
594 SMP_CACHE_BYTES, end_paddr);
595 NODE_DATA(nid) = abs_to_virt(NODE_DATA(nid));
596 memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
597
598 dbg("node %d\n", nid);
599 dbg("NODE_DATA() = %p\n", NODE_DATA(nid));
600
601 NODE_DATA(nid)->bdata = &plat_node_bdata[nid];
602 NODE_DATA(nid)->node_start_pfn =
603 init_node_data[nid].node_start_pfn;
604 NODE_DATA(nid)->node_spanned_pages =
605 end_paddr - start_paddr;
606
607 if (NODE_DATA(nid)->node_spanned_pages == 0)
608 continue;
609
610 dbg("start_paddr = %lx\n", start_paddr);
611 dbg("end_paddr = %lx\n", end_paddr);
612
613 bootmap_pages = bootmem_bootmap_pages((end_paddr - start_paddr) >> PAGE_SHIFT);
614
615 bootmem_paddr = careful_allocation(nid,
616 bootmap_pages << PAGE_SHIFT,
617 PAGE_SIZE, end_paddr);
618 memset(abs_to_virt(bootmem_paddr), 0,
619 bootmap_pages << PAGE_SHIFT);
620 dbg("bootmap_paddr = %lx\n", bootmem_paddr);
621
622 init_bootmem_node(NODE_DATA(nid), bootmem_paddr >> PAGE_SHIFT,
623 start_paddr >> PAGE_SHIFT,
624 end_paddr >> PAGE_SHIFT);
625
626 /*
627 * We need to do another scan of all memory sections to
628 * associate memory with the correct node.
629 */
630 addr_cells = get_mem_addr_cells();
631 size_cells = get_mem_size_cells();
632 memory = NULL;
633 while ((memory = of_find_node_by_type(memory, "memory")) != NULL) {
634 unsigned long mem_start, mem_size;
635 int numa_domain, ranges;
636 unsigned int *memcell_buf;
637 unsigned int len;
638
639 memcell_buf = (unsigned int *)get_property(memory, "reg", &len);
640 if (!memcell_buf || len <= 0)
641 continue;
642
643 ranges = memory->n_addrs; /* ranges in cell */
644new_range:
645 mem_start = read_n_cells(addr_cells, &memcell_buf);
646 mem_size = read_n_cells(size_cells, &memcell_buf);
647 if (numa_enabled) {
648 numa_domain = of_node_numa_domain(memory);
649 if (numa_domain >= MAX_NUMNODES)
650 numa_domain = 0;
651 } else
652 numa_domain = 0;
653
654 if (numa_domain != nid)
655 continue;
656
657 mem_size = numa_enforce_memory_limit(mem_start, mem_size);
658 if (mem_size) {
659 dbg("free_bootmem %lx %lx\n", mem_start, mem_size);
660 free_bootmem_node(NODE_DATA(nid), mem_start, mem_size);
661 }
662
663 if (--ranges) /* process all ranges in cell */
664 goto new_range;
665 }
666
667 /*
668 * Mark reserved regions on this node
669 */
670 for (i = 0; i < lmb.reserved.cnt; i++) {
671 unsigned long physbase = lmb.reserved.region[i].base;
672 unsigned long size = lmb.reserved.region[i].size;
673
674 if (pa_to_nid(physbase) != nid &&
675 pa_to_nid(physbase+size-1) != nid)
676 continue;
677
678 if (physbase < end_paddr &&
679 (physbase+size) > start_paddr) {
680 /* overlaps */
681 if (physbase < start_paddr) {
682 size -= start_paddr - physbase;
683 physbase = start_paddr;
684 }
685
686 if (size > end_paddr - physbase)
687 size = end_paddr - physbase;
688
689 dbg("reserve_bootmem %lx %lx\n", physbase,
690 size);
691 reserve_bootmem_node(NODE_DATA(nid), physbase,
692 size);
693 }
694 }
695 /*
696 * This loop may look famaliar, but we have to do it again
697 * after marking our reserved memory to mark memory present
698 * for sparsemem.
699 */
700 addr_cells = get_mem_addr_cells();
701 size_cells = get_mem_size_cells();
702 memory = NULL;
703 while ((memory = of_find_node_by_type(memory, "memory")) != NULL) {
704 unsigned long mem_start, mem_size;
705 int numa_domain, ranges;
706 unsigned int *memcell_buf;
707 unsigned int len;
708
709 memcell_buf = (unsigned int *)get_property(memory, "reg", &len);
710 if (!memcell_buf || len <= 0)
711 continue;
712
713 ranges = memory->n_addrs; /* ranges in cell */
714new_range2:
715 mem_start = read_n_cells(addr_cells, &memcell_buf);
716 mem_size = read_n_cells(size_cells, &memcell_buf);
717 if (numa_enabled) {
718 numa_domain = of_node_numa_domain(memory);
719 if (numa_domain >= MAX_NUMNODES)
720 numa_domain = 0;
721 } else
722 numa_domain = 0;
723
724 if (numa_domain != nid)
725 continue;
726
727 mem_size = numa_enforce_memory_limit(mem_start, mem_size);
728 memory_present(numa_domain, mem_start >> PAGE_SHIFT,
729 (mem_start + mem_size) >> PAGE_SHIFT);
730
731 if (--ranges) /* process all ranges in cell */
732 goto new_range2;
733 }
734
735 }
736}
737
738void __init paging_init(void)
739{
740 unsigned long zones_size[MAX_NR_ZONES];
741 unsigned long zholes_size[MAX_NR_ZONES];
742 int nid;
743
744 memset(zones_size, 0, sizeof(zones_size));
745 memset(zholes_size, 0, sizeof(zholes_size));
746
747 for_each_online_node(nid) {
748 unsigned long start_pfn;
749 unsigned long end_pfn;
750
751 start_pfn = init_node_data[nid].node_start_pfn;
752 end_pfn = init_node_data[nid].node_end_pfn;
753
754 zones_size[ZONE_DMA] = end_pfn - start_pfn;
755 zholes_size[ZONE_DMA] = zones_size[ZONE_DMA] -
756 init_node_data[nid].node_present_pages;
757
758 dbg("free_area_init node %d %lx %lx (hole: %lx)\n", nid,
759 zones_size[ZONE_DMA], start_pfn, zholes_size[ZONE_DMA]);
760
761 free_area_init_node(nid, NODE_DATA(nid), zones_size,
762 start_pfn, zholes_size);
763 }
764}
765
766static int __init early_numa(char *p)
767{
768 if (!p)
769 return 0;
770
771 if (strstr(p, "off"))
772 numa_enabled = 0;
773
774 if (strstr(p, "debug"))
775 numa_debug = 1;
776
777 return 0;
778}
779early_param("numa", early_numa);
diff --git a/arch/ppc64/mm/slb.c b/arch/ppc64/mm/slb.c
deleted file mode 100644
index 0473953f6a37..000000000000
--- a/arch/ppc64/mm/slb.c
+++ /dev/null
@@ -1,158 +0,0 @@
1/*
2 * PowerPC64 SLB support.
3 *
4 * Copyright (C) 2004 David Gibson <dwg@au.ibm.com>, IBM
5 * Based on earlier code writteh by:
6 * Dave Engebretsen and Mike Corrigan {engebret|mikejc}@us.ibm.com
7 * Copyright (c) 2001 Dave Engebretsen
8 * Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM
9 *
10 *
11 * This program is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU General Public License
13 * as published by the Free Software Foundation; either version
14 * 2 of the License, or (at your option) any later version.
15 */
16
17#include <linux/config.h>
18#include <asm/pgtable.h>
19#include <asm/mmu.h>
20#include <asm/mmu_context.h>
21#include <asm/paca.h>
22#include <asm/cputable.h>
23
24extern void slb_allocate(unsigned long ea);
25
26static inline unsigned long mk_esid_data(unsigned long ea, unsigned long slot)
27{
28 return (ea & ESID_MASK) | SLB_ESID_V | slot;
29}
30
31static inline unsigned long mk_vsid_data(unsigned long ea, unsigned long flags)
32{
33 return (get_kernel_vsid(ea) << SLB_VSID_SHIFT) | flags;
34}
35
36static inline void create_slbe(unsigned long ea, unsigned long flags,
37 unsigned long entry)
38{
39 asm volatile("slbmte %0,%1" :
40 : "r" (mk_vsid_data(ea, flags)),
41 "r" (mk_esid_data(ea, entry))
42 : "memory" );
43}
44
45static void slb_flush_and_rebolt(void)
46{
47 /* If you change this make sure you change SLB_NUM_BOLTED
48 * appropriately too. */
49 unsigned long ksp_flags = SLB_VSID_KERNEL;
50 unsigned long ksp_esid_data;
51
52 WARN_ON(!irqs_disabled());
53
54 if (cpu_has_feature(CPU_FTR_16M_PAGE))
55 ksp_flags |= SLB_VSID_L;
56
57 ksp_esid_data = mk_esid_data(get_paca()->kstack, 2);
58 if ((ksp_esid_data & ESID_MASK) == KERNELBASE)
59 ksp_esid_data &= ~SLB_ESID_V;
60
61 /* We need to do this all in asm, so we're sure we don't touch
62 * the stack between the slbia and rebolting it. */
63 asm volatile("isync\n"
64 "slbia\n"
65 /* Slot 1 - first VMALLOC segment */
66 "slbmte %0,%1\n"
67 /* Slot 2 - kernel stack */
68 "slbmte %2,%3\n"
69 "isync"
70 :: "r"(mk_vsid_data(VMALLOCBASE, SLB_VSID_KERNEL)),
71 "r"(mk_esid_data(VMALLOCBASE, 1)),
72 "r"(mk_vsid_data(ksp_esid_data, ksp_flags)),
73 "r"(ksp_esid_data)
74 : "memory");
75}
76
77/* Flush all user entries from the segment table of the current processor. */
78void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
79{
80 unsigned long offset = get_paca()->slb_cache_ptr;
81 unsigned long esid_data = 0;
82 unsigned long pc = KSTK_EIP(tsk);
83 unsigned long stack = KSTK_ESP(tsk);
84 unsigned long unmapped_base;
85
86 if (offset <= SLB_CACHE_ENTRIES) {
87 int i;
88 asm volatile("isync" : : : "memory");
89 for (i = 0; i < offset; i++) {
90 esid_data = ((unsigned long)get_paca()->slb_cache[i]
91 << SID_SHIFT) | SLBIE_C;
92 asm volatile("slbie %0" : : "r" (esid_data));
93 }
94 asm volatile("isync" : : : "memory");
95 } else {
96 slb_flush_and_rebolt();
97 }
98
99 /* Workaround POWER5 < DD2.1 issue */
100 if (offset == 1 || offset > SLB_CACHE_ENTRIES)
101 asm volatile("slbie %0" : : "r" (esid_data));
102
103 get_paca()->slb_cache_ptr = 0;
104 get_paca()->context = mm->context;
105
106 /*
107 * preload some userspace segments into the SLB.
108 */
109 if (test_tsk_thread_flag(tsk, TIF_32BIT))
110 unmapped_base = TASK_UNMAPPED_BASE_USER32;
111 else
112 unmapped_base = TASK_UNMAPPED_BASE_USER64;
113
114 if (pc >= KERNELBASE)
115 return;
116 slb_allocate(pc);
117
118 if (GET_ESID(pc) == GET_ESID(stack))
119 return;
120
121 if (stack >= KERNELBASE)
122 return;
123 slb_allocate(stack);
124
125 if ((GET_ESID(pc) == GET_ESID(unmapped_base))
126 || (GET_ESID(stack) == GET_ESID(unmapped_base)))
127 return;
128
129 if (unmapped_base >= KERNELBASE)
130 return;
131 slb_allocate(unmapped_base);
132}
133
134void slb_initialize(void)
135{
136 /* On iSeries the bolted entries have already been set up by
137 * the hypervisor from the lparMap data in head.S */
138#ifndef CONFIG_PPC_ISERIES
139 unsigned long flags = SLB_VSID_KERNEL;
140
141 /* Invalidate the entire SLB (even slot 0) & all the ERATS */
142 if (cpu_has_feature(CPU_FTR_16M_PAGE))
143 flags |= SLB_VSID_L;
144
145 asm volatile("isync":::"memory");
146 asm volatile("slbmte %0,%0"::"r" (0) : "memory");
147 asm volatile("isync; slbia; isync":::"memory");
148 create_slbe(KERNELBASE, flags, 0);
149 create_slbe(VMALLOCBASE, SLB_VSID_KERNEL, 1);
150 /* We don't bolt the stack for the time being - we're in boot,
151 * so the stack is in the bolted segment. By the time it goes
152 * elsewhere, we'll call _switch() which will bolt in the new
153 * one. */
154 asm volatile("isync":::"memory");
155#endif
156
157 get_paca()->stab_rr = SLB_NUM_BOLTED;
158}
diff --git a/arch/ppc64/mm/slb_low.S b/arch/ppc64/mm/slb_low.S
deleted file mode 100644
index a3a03da503bc..000000000000
--- a/arch/ppc64/mm/slb_low.S
+++ /dev/null
@@ -1,151 +0,0 @@
1/*
2 * arch/ppc64/mm/slb_low.S
3 *
4 * Low-level SLB routines
5 *
6 * Copyright (C) 2004 David Gibson <dwg@au.ibm.com>, IBM
7 *
8 * Based on earlier C version:
9 * Dave Engebretsen and Mike Corrigan {engebret|mikejc}@us.ibm.com
10 * Copyright (c) 2001 Dave Engebretsen
11 * Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM
12 *
13 * This program is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU General Public License
15 * as published by the Free Software Foundation; either version
16 * 2 of the License, or (at your option) any later version.
17 */
18
19#include <linux/config.h>
20#include <asm/processor.h>
21#include <asm/page.h>
22#include <asm/mmu.h>
23#include <asm/ppc_asm.h>
24#include <asm/asm-offsets.h>
25#include <asm/cputable.h>
26
27/* void slb_allocate(unsigned long ea);
28 *
29 * Create an SLB entry for the given EA (user or kernel).
30 * r3 = faulting address, r13 = PACA
31 * r9, r10, r11 are clobbered by this function
32 * No other registers are examined or changed.
33 */
34_GLOBAL(slb_allocate)
35 /*
36 * First find a slot, round robin. Previously we tried to find
37 * a free slot first but that took too long. Unfortunately we
38 * dont have any LRU information to help us choose a slot.
39 */
40#ifdef CONFIG_PPC_ISERIES
41 /*
42 * On iSeries, the "bolted" stack segment can be cast out on
43 * shared processor switch so we need to check for a miss on
44 * it and restore it to the right slot.
45 */
46 ld r9,PACAKSAVE(r13)
47 clrrdi r9,r9,28
48 clrrdi r11,r3,28
49 li r10,SLB_NUM_BOLTED-1 /* Stack goes in last bolted slot */
50 cmpld r9,r11
51 beq 3f
52#endif /* CONFIG_PPC_ISERIES */
53
54 ld r10,PACASTABRR(r13)
55 addi r10,r10,1
56 /* use a cpu feature mask if we ever change our slb size */
57 cmpldi r10,SLB_NUM_ENTRIES
58
59 blt+ 4f
60 li r10,SLB_NUM_BOLTED
61
624:
63 std r10,PACASTABRR(r13)
643:
65 /* r3 = faulting address, r10 = entry */
66
67 srdi r9,r3,60 /* get region */
68 srdi r3,r3,28 /* get esid */
69 cmpldi cr7,r9,0xc /* cmp KERNELBASE for later use */
70
71 rldimi r10,r3,28,0 /* r10= ESID<<28 | entry */
72 oris r10,r10,SLB_ESID_V@h /* r10 |= SLB_ESID_V */
73
74 /* r3 = esid, r10 = esid_data, cr7 = <>KERNELBASE */
75
76 blt cr7,0f /* user or kernel? */
77
78 /* kernel address: proto-VSID = ESID */
79 /* WARNING - MAGIC: we don't use the VSID 0xfffffffff, but
80 * this code will generate the protoVSID 0xfffffffff for the
81 * top segment. That's ok, the scramble below will translate
82 * it to VSID 0, which is reserved as a bad VSID - one which
83 * will never have any pages in it. */
84 li r11,SLB_VSID_KERNEL
85BEGIN_FTR_SECTION
86 bne cr7,9f
87 li r11,(SLB_VSID_KERNEL|SLB_VSID_L)
88END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE)
89 b 9f
90
910: /* user address: proto-VSID = context<<15 | ESID */
92 srdi. r9,r3,USER_ESID_BITS
93 bne- 8f /* invalid ea bits set */
94
95#ifdef CONFIG_HUGETLB_PAGE
96BEGIN_FTR_SECTION
97 lhz r9,PACAHIGHHTLBAREAS(r13)
98 srdi r11,r3,(HTLB_AREA_SHIFT-SID_SHIFT)
99 srd r9,r9,r11
100 lhz r11,PACALOWHTLBAREAS(r13)
101 srd r11,r11,r3
102 or r9,r9,r11
103END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE)
104#endif /* CONFIG_HUGETLB_PAGE */
105
106 li r11,SLB_VSID_USER
107
108#ifdef CONFIG_HUGETLB_PAGE
109BEGIN_FTR_SECTION
110 rldimi r11,r9,8,55 /* shift masked bit into SLB_VSID_L */
111END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE)
112#endif /* CONFIG_HUGETLB_PAGE */
113
114 ld r9,PACACONTEXTID(r13)
115 rldimi r3,r9,USER_ESID_BITS,0
116
1179: /* r3 = protovsid, r11 = flags, r10 = esid_data, cr7 = <>KERNELBASE */
118 ASM_VSID_SCRAMBLE(r3,r9)
119
120 rldimi r11,r3,SLB_VSID_SHIFT,16 /* combine VSID and flags */
121
122 /*
123 * No need for an isync before or after this slbmte. The exception
124 * we enter with and the rfid we exit with are context synchronizing.
125 */
126 slbmte r11,r10
127
128 bgelr cr7 /* we're done for kernel addresses */
129
130 /* Update the slb cache */
131 lhz r3,PACASLBCACHEPTR(r13) /* offset = paca->slb_cache_ptr */
132 cmpldi r3,SLB_CACHE_ENTRIES
133 bge 1f
134
135 /* still room in the slb cache */
136 sldi r11,r3,1 /* r11 = offset * sizeof(u16) */
137 rldicl r10,r10,36,28 /* get low 16 bits of the ESID */
138 add r11,r11,r13 /* r11 = (u16 *)paca + offset */
139 sth r10,PACASLBCACHE(r11) /* paca->slb_cache[offset] = esid */
140 addi r3,r3,1 /* offset++ */
141 b 2f
1421: /* offset >= SLB_CACHE_ENTRIES */
143 li r3,SLB_CACHE_ENTRIES+1
1442:
145 sth r3,PACASLBCACHEPTR(r13) /* paca->slb_cache_ptr = offset */
146 blr
147
1488: /* invalid EA */
149 li r3,0 /* BAD_VSID */
150 li r11,SLB_VSID_USER /* flags don't much matter */
151 b 9b
diff --git a/arch/ppc64/mm/stab.c b/arch/ppc64/mm/stab.c
deleted file mode 100644
index 1b83f002bf27..000000000000
--- a/arch/ppc64/mm/stab.c
+++ /dev/null
@@ -1,279 +0,0 @@
1/*
2 * PowerPC64 Segment Translation Support.
3 *
4 * Dave Engebretsen and Mike Corrigan {engebret|mikejc}@us.ibm.com
5 * Copyright (c) 2001 Dave Engebretsen
6 *
7 * Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM
8 *
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License
11 * as published by the Free Software Foundation; either version
12 * 2 of the License, or (at your option) any later version.
13 */
14
15#include <linux/config.h>
16#include <asm/pgtable.h>
17#include <asm/mmu.h>
18#include <asm/mmu_context.h>
19#include <asm/paca.h>
20#include <asm/cputable.h>
21#include <asm/lmb.h>
22#include <asm/abs_addr.h>
23
24struct stab_entry {
25 unsigned long esid_data;
26 unsigned long vsid_data;
27};
28
29/* Both the segment table and SLB code uses the following cache */
30#define NR_STAB_CACHE_ENTRIES 8
31DEFINE_PER_CPU(long, stab_cache_ptr);
32DEFINE_PER_CPU(long, stab_cache[NR_STAB_CACHE_ENTRIES]);
33
34/*
35 * Create a segment table entry for the given esid/vsid pair.
36 */
37static int make_ste(unsigned long stab, unsigned long esid, unsigned long vsid)
38{
39 unsigned long esid_data, vsid_data;
40 unsigned long entry, group, old_esid, castout_entry, i;
41 unsigned int global_entry;
42 struct stab_entry *ste, *castout_ste;
43 unsigned long kernel_segment = (esid << SID_SHIFT) >= KERNELBASE;
44
45 vsid_data = vsid << STE_VSID_SHIFT;
46 esid_data = esid << SID_SHIFT | STE_ESID_KP | STE_ESID_V;
47 if (! kernel_segment)
48 esid_data |= STE_ESID_KS;
49
50 /* Search the primary group first. */
51 global_entry = (esid & 0x1f) << 3;
52 ste = (struct stab_entry *)(stab | ((esid & 0x1f) << 7));
53
54 /* Find an empty entry, if one exists. */
55 for (group = 0; group < 2; group++) {
56 for (entry = 0; entry < 8; entry++, ste++) {
57 if (!(ste->esid_data & STE_ESID_V)) {
58 ste->vsid_data = vsid_data;
59 asm volatile("eieio":::"memory");
60 ste->esid_data = esid_data;
61 return (global_entry | entry);
62 }
63 }
64 /* Now search the secondary group. */
65 global_entry = ((~esid) & 0x1f) << 3;
66 ste = (struct stab_entry *)(stab | (((~esid) & 0x1f) << 7));
67 }
68
69 /*
70 * Could not find empty entry, pick one with a round robin selection.
71 * Search all entries in the two groups.
72 */
73 castout_entry = get_paca()->stab_rr;
74 for (i = 0; i < 16; i++) {
75 if (castout_entry < 8) {
76 global_entry = (esid & 0x1f) << 3;
77 ste = (struct stab_entry *)(stab | ((esid & 0x1f) << 7));
78 castout_ste = ste + castout_entry;
79 } else {
80 global_entry = ((~esid) & 0x1f) << 3;
81 ste = (struct stab_entry *)(stab | (((~esid) & 0x1f) << 7));
82 castout_ste = ste + (castout_entry - 8);
83 }
84
85 /* Dont cast out the first kernel segment */
86 if ((castout_ste->esid_data & ESID_MASK) != KERNELBASE)
87 break;
88
89 castout_entry = (castout_entry + 1) & 0xf;
90 }
91
92 get_paca()->stab_rr = (castout_entry + 1) & 0xf;
93
94 /* Modify the old entry to the new value. */
95
96 /* Force previous translations to complete. DRENG */
97 asm volatile("isync" : : : "memory");
98
99 old_esid = castout_ste->esid_data >> SID_SHIFT;
100 castout_ste->esid_data = 0; /* Invalidate old entry */
101
102 asm volatile("sync" : : : "memory"); /* Order update */
103
104 castout_ste->vsid_data = vsid_data;
105 asm volatile("eieio" : : : "memory"); /* Order update */
106 castout_ste->esid_data = esid_data;
107
108 asm volatile("slbie %0" : : "r" (old_esid << SID_SHIFT));
109 /* Ensure completion of slbie */
110 asm volatile("sync" : : : "memory");
111
112 return (global_entry | (castout_entry & 0x7));
113}
114
115/*
116 * Allocate a segment table entry for the given ea and mm
117 */
118static int __ste_allocate(unsigned long ea, struct mm_struct *mm)
119{
120 unsigned long vsid;
121 unsigned char stab_entry;
122 unsigned long offset;
123
124 /* Kernel or user address? */
125 if (ea >= KERNELBASE) {
126 vsid = get_kernel_vsid(ea);
127 } else {
128 if ((ea >= TASK_SIZE_USER64) || (! mm))
129 return 1;
130
131 vsid = get_vsid(mm->context.id, ea);
132 }
133
134 stab_entry = make_ste(get_paca()->stab_addr, GET_ESID(ea), vsid);
135
136 if (ea < KERNELBASE) {
137 offset = __get_cpu_var(stab_cache_ptr);
138 if (offset < NR_STAB_CACHE_ENTRIES)
139 __get_cpu_var(stab_cache[offset++]) = stab_entry;
140 else
141 offset = NR_STAB_CACHE_ENTRIES+1;
142 __get_cpu_var(stab_cache_ptr) = offset;
143
144 /* Order update */
145 asm volatile("sync":::"memory");
146 }
147
148 return 0;
149}
150
151int ste_allocate(unsigned long ea)
152{
153 return __ste_allocate(ea, current->mm);
154}
155
156/*
157 * Do the segment table work for a context switch: flush all user
158 * entries from the table, then preload some probably useful entries
159 * for the new task
160 */
161void switch_stab(struct task_struct *tsk, struct mm_struct *mm)
162{
163 struct stab_entry *stab = (struct stab_entry *) get_paca()->stab_addr;
164 struct stab_entry *ste;
165 unsigned long offset = __get_cpu_var(stab_cache_ptr);
166 unsigned long pc = KSTK_EIP(tsk);
167 unsigned long stack = KSTK_ESP(tsk);
168 unsigned long unmapped_base;
169
170 /* Force previous translations to complete. DRENG */
171 asm volatile("isync" : : : "memory");
172
173 if (offset <= NR_STAB_CACHE_ENTRIES) {
174 int i;
175
176 for (i = 0; i < offset; i++) {
177 ste = stab + __get_cpu_var(stab_cache[i]);
178 ste->esid_data = 0; /* invalidate entry */
179 }
180 } else {
181 unsigned long entry;
182
183 /* Invalidate all entries. */
184 ste = stab;
185
186 /* Never flush the first entry. */
187 ste += 1;
188 for (entry = 1;
189 entry < (PAGE_SIZE / sizeof(struct stab_entry));
190 entry++, ste++) {
191 unsigned long ea;
192 ea = ste->esid_data & ESID_MASK;
193 if (ea < KERNELBASE) {
194 ste->esid_data = 0;
195 }
196 }
197 }
198
199 asm volatile("sync; slbia; sync":::"memory");
200
201 __get_cpu_var(stab_cache_ptr) = 0;
202
203 /* Now preload some entries for the new task */
204 if (test_tsk_thread_flag(tsk, TIF_32BIT))
205 unmapped_base = TASK_UNMAPPED_BASE_USER32;
206 else
207 unmapped_base = TASK_UNMAPPED_BASE_USER64;
208
209 __ste_allocate(pc, mm);
210
211 if (GET_ESID(pc) == GET_ESID(stack))
212 return;
213
214 __ste_allocate(stack, mm);
215
216 if ((GET_ESID(pc) == GET_ESID(unmapped_base))
217 || (GET_ESID(stack) == GET_ESID(unmapped_base)))
218 return;
219
220 __ste_allocate(unmapped_base, mm);
221
222 /* Order update */
223 asm volatile("sync" : : : "memory");
224}
225
226extern void slb_initialize(void);
227
228/*
229 * Allocate segment tables for secondary CPUs. These must all go in
230 * the first (bolted) segment, so that do_stab_bolted won't get a
231 * recursive segment miss on the segment table itself.
232 */
233void stabs_alloc(void)
234{
235 int cpu;
236
237 if (cpu_has_feature(CPU_FTR_SLB))
238 return;
239
240 for_each_cpu(cpu) {
241 unsigned long newstab;
242
243 if (cpu == 0)
244 continue; /* stab for CPU 0 is statically allocated */
245
246 newstab = lmb_alloc_base(PAGE_SIZE, PAGE_SIZE, 1<<SID_SHIFT);
247 if (! newstab)
248 panic("Unable to allocate segment table for CPU %d.\n",
249 cpu);
250
251 newstab += KERNELBASE;
252
253 memset((void *)newstab, 0, PAGE_SIZE);
254
255 paca[cpu].stab_addr = newstab;
256 paca[cpu].stab_real = virt_to_abs(newstab);
257 printk(KERN_DEBUG "Segment table for CPU %d at 0x%lx virtual, 0x%lx absolute\n", cpu, paca[cpu].stab_addr, paca[cpu].stab_real);
258 }
259}
260
261/*
262 * Build an entry for the base kernel segment and put it into
263 * the segment table or SLB. All other segment table or SLB
264 * entries are faulted in.
265 */
266void stab_initialize(unsigned long stab)
267{
268 unsigned long vsid = get_kernel_vsid(KERNELBASE);
269
270 if (cpu_has_feature(CPU_FTR_SLB)) {
271 slb_initialize();
272 } else {
273 asm volatile("isync; slbia; isync":::"memory");
274 make_ste(stab, GET_ESID(KERNELBASE), vsid);
275
276 /* Order update */
277 asm volatile("sync":::"memory");
278 }
279}
diff --git a/arch/ppc64/mm/tlb.c b/arch/ppc64/mm/tlb.c
deleted file mode 100644
index 21fbffb23a43..000000000000
--- a/arch/ppc64/mm/tlb.c
+++ /dev/null
@@ -1,197 +0,0 @@
1/*
2 * This file contains the routines for flushing entries from the
3 * TLB and MMU hash table.
4 *
5 * Derived from arch/ppc64/mm/init.c:
6 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
7 *
8 * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
9 * and Cort Dougan (PReP) (cort@cs.nmt.edu)
10 * Copyright (C) 1996 Paul Mackerras
11 * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
12 *
13 * Derived from "arch/i386/mm/init.c"
14 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
15 *
16 * Dave Engebretsen <engebret@us.ibm.com>
17 * Rework for PPC64 port.
18 *
19 * This program is free software; you can redistribute it and/or
20 * modify it under the terms of the GNU General Public License
21 * as published by the Free Software Foundation; either version
22 * 2 of the License, or (at your option) any later version.
23 */
24#include <linux/config.h>
25#include <linux/kernel.h>
26#include <linux/mm.h>
27#include <linux/init.h>
28#include <linux/percpu.h>
29#include <linux/hardirq.h>
30#include <asm/pgalloc.h>
31#include <asm/tlbflush.h>
32#include <asm/tlb.h>
33#include <linux/highmem.h>
34
35DEFINE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch);
36
37/* This is declared as we are using the more or less generic
38 * include/asm-ppc64/tlb.h file -- tgall
39 */
40DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
41DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur);
42unsigned long pte_freelist_forced_free;
43
44struct pte_freelist_batch
45{
46 struct rcu_head rcu;
47 unsigned int index;
48 pgtable_free_t tables[0];
49};
50
51DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur);
52unsigned long pte_freelist_forced_free;
53
54#define PTE_FREELIST_SIZE \
55 ((PAGE_SIZE - sizeof(struct pte_freelist_batch)) \
56 / sizeof(pgtable_free_t))
57
58#ifdef CONFIG_SMP
59static void pte_free_smp_sync(void *arg)
60{
61 /* Do nothing, just ensure we sync with all CPUs */
62}
63#endif
64
65/* This is only called when we are critically out of memory
66 * (and fail to get a page in pte_free_tlb).
67 */
68static void pgtable_free_now(pgtable_free_t pgf)
69{
70 pte_freelist_forced_free++;
71
72 smp_call_function(pte_free_smp_sync, NULL, 0, 1);
73
74 pgtable_free(pgf);
75}
76
77static void pte_free_rcu_callback(struct rcu_head *head)
78{
79 struct pte_freelist_batch *batch =
80 container_of(head, struct pte_freelist_batch, rcu);
81 unsigned int i;
82
83 for (i = 0; i < batch->index; i++)
84 pgtable_free(batch->tables[i]);
85
86 free_page((unsigned long)batch);
87}
88
89static void pte_free_submit(struct pte_freelist_batch *batch)
90{
91 INIT_RCU_HEAD(&batch->rcu);
92 call_rcu(&batch->rcu, pte_free_rcu_callback);
93}
94
95void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf)
96{
97 /* This is safe as we are holding page_table_lock */
98 cpumask_t local_cpumask = cpumask_of_cpu(smp_processor_id());
99 struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur);
100
101 if (atomic_read(&tlb->mm->mm_users) < 2 ||
102 cpus_equal(tlb->mm->cpu_vm_mask, local_cpumask)) {
103 pgtable_free(pgf);
104 return;
105 }
106
107 if (*batchp == NULL) {
108 *batchp = (struct pte_freelist_batch *)__get_free_page(GFP_ATOMIC);
109 if (*batchp == NULL) {
110 pgtable_free_now(pgf);
111 return;
112 }
113 (*batchp)->index = 0;
114 }
115 (*batchp)->tables[(*batchp)->index++] = pgf;
116 if ((*batchp)->index == PTE_FREELIST_SIZE) {
117 pte_free_submit(*batchp);
118 *batchp = NULL;
119 }
120}
121
122/*
123 * Update the MMU hash table to correspond with a change to
124 * a Linux PTE. If wrprot is true, it is permissible to
125 * change the existing HPTE to read-only rather than removing it
126 * (if we remove it we should clear the _PTE_HPTEFLAGS bits).
127 */
128void hpte_update(struct mm_struct *mm, unsigned long addr,
129 unsigned long pte, int wrprot)
130{
131 int i;
132 unsigned long context = 0;
133 struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
134
135 if (REGION_ID(addr) == USER_REGION_ID)
136 context = mm->context.id;
137 i = batch->index;
138
139 /*
140 * This can happen when we are in the middle of a TLB batch and
141 * we encounter memory pressure (eg copy_page_range when it tries
142 * to allocate a new pte). If we have to reclaim memory and end
143 * up scanning and resetting referenced bits then our batch context
144 * will change mid stream.
145 */
146 if (i != 0 && (context != batch->context ||
147 batch->large != pte_huge(pte))) {
148 flush_tlb_pending();
149 i = 0;
150 }
151
152 if (i == 0) {
153 batch->context = context;
154 batch->mm = mm;
155 batch->large = pte_huge(pte);
156 }
157 batch->pte[i] = __pte(pte);
158 batch->addr[i] = addr;
159 batch->index = ++i;
160 if (i >= PPC64_TLB_BATCH_NR)
161 flush_tlb_pending();
162}
163
164void __flush_tlb_pending(struct ppc64_tlb_batch *batch)
165{
166 int i;
167 int cpu;
168 cpumask_t tmp;
169 int local = 0;
170
171 BUG_ON(in_interrupt());
172
173 cpu = get_cpu();
174 i = batch->index;
175 tmp = cpumask_of_cpu(cpu);
176 if (cpus_equal(batch->mm->cpu_vm_mask, tmp))
177 local = 1;
178
179 if (i == 1)
180 flush_hash_page(batch->context, batch->addr[0], batch->pte[0],
181 local);
182 else
183 flush_hash_range(batch->context, i, local);
184 batch->index = 0;
185 put_cpu();
186}
187
188void pte_free_finish(void)
189{
190 /* This is safe as we are holding page_table_lock */
191 struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur);
192
193 if (*batchp == NULL)
194 return;
195 pte_free_submit(*batchp);
196 *batchp = NULL;
197}
diff --git a/arch/ppc64/oprofile/Kconfig b/arch/ppc64/oprofile/Kconfig
deleted file mode 100644
index 5ade19801b97..000000000000
--- a/arch/ppc64/oprofile/Kconfig
+++ /dev/null
@@ -1,23 +0,0 @@
1
2menu "Profiling support"
3 depends on EXPERIMENTAL
4
5config PROFILING
6 bool "Profiling support (EXPERIMENTAL)"
7 help
8 Say Y here to enable the extended profiling support mechanisms used
9 by profilers such as OProfile.
10
11
12config OPROFILE
13 tristate "OProfile system profiling (EXPERIMENTAL)"
14 depends on PROFILING
15 help
16 OProfile is a profiling system capable of profiling the
17 whole system, include the kernel, kernel modules, libraries,
18 and applications.
19
20 If unsure, say N.
21
22endmenu
23
diff --git a/arch/ppc64/oprofile/Makefile b/arch/ppc64/oprofile/Makefile
deleted file mode 100644
index 162dbf06c142..000000000000
--- a/arch/ppc64/oprofile/Makefile
+++ /dev/null
@@ -1,9 +0,0 @@
1obj-$(CONFIG_OPROFILE) += oprofile.o
2
3DRIVER_OBJS := $(addprefix ../../../drivers/oprofile/, \
4 oprof.o cpu_buffer.o buffer_sync.o \
5 event_buffer.o oprofile_files.o \
6 oprofilefs.o oprofile_stats.o \
7 timer_int.o )
8
9oprofile-y := $(DRIVER_OBJS) common.o op_model_rs64.o op_model_power4.o
diff --git a/arch/ppc64/oprofile/common.c b/arch/ppc64/oprofile/common.c
deleted file mode 100644
index e5f572710aa0..000000000000
--- a/arch/ppc64/oprofile/common.c
+++ /dev/null
@@ -1,145 +0,0 @@
1/*
2 * Copyright (C) 2004 Anton Blanchard <anton@au.ibm.com>, IBM
3 *
4 * Based on alpha version.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#include <linux/oprofile.h>
13#include <linux/init.h>
14#include <linux/smp.h>
15#include <linux/errno.h>
16#include <asm/ptrace.h>
17#include <asm/system.h>
18#include <asm/pmc.h>
19#include <asm/cputable.h>
20#include <asm/oprofile_impl.h>
21
22static struct op_ppc64_model *model;
23
24static struct op_counter_config ctr[OP_MAX_COUNTER];
25static struct op_system_config sys;
26
27static void op_handle_interrupt(struct pt_regs *regs)
28{
29 model->handle_interrupt(regs, ctr);
30}
31
32static int op_ppc64_setup(void)
33{
34 int err;
35
36 /* Grab the hardware */
37 err = reserve_pmc_hardware(op_handle_interrupt);
38 if (err)
39 return err;
40
41 /* Pre-compute the values to stuff in the hardware registers. */
42 model->reg_setup(ctr, &sys, model->num_counters);
43
44 /* Configure the registers on all cpus. */
45 on_each_cpu(model->cpu_setup, NULL, 0, 1);
46
47 return 0;
48}
49
50static void op_ppc64_shutdown(void)
51{
52 release_pmc_hardware();
53}
54
55static void op_ppc64_cpu_start(void *dummy)
56{
57 model->start(ctr);
58}
59
60static int op_ppc64_start(void)
61{
62 on_each_cpu(op_ppc64_cpu_start, NULL, 0, 1);
63 return 0;
64}
65
66static inline void op_ppc64_cpu_stop(void *dummy)
67{
68 model->stop();
69}
70
71static void op_ppc64_stop(void)
72{
73 on_each_cpu(op_ppc64_cpu_stop, NULL, 0, 1);
74}
75
76static int op_ppc64_create_files(struct super_block *sb, struct dentry *root)
77{
78 int i;
79
80 /*
81 * There is one mmcr0, mmcr1 and mmcra for setting the events for
82 * all of the counters.
83 */
84 oprofilefs_create_ulong(sb, root, "mmcr0", &sys.mmcr0);
85 oprofilefs_create_ulong(sb, root, "mmcr1", &sys.mmcr1);
86 oprofilefs_create_ulong(sb, root, "mmcra", &sys.mmcra);
87
88 for (i = 0; i < model->num_counters; ++i) {
89 struct dentry *dir;
90 char buf[3];
91
92 snprintf(buf, sizeof buf, "%d", i);
93 dir = oprofilefs_mkdir(sb, root, buf);
94
95 oprofilefs_create_ulong(sb, dir, "enabled", &ctr[i].enabled);
96 oprofilefs_create_ulong(sb, dir, "event", &ctr[i].event);
97 oprofilefs_create_ulong(sb, dir, "count", &ctr[i].count);
98 /*
99 * We dont support per counter user/kernel selection, but
100 * we leave the entries because userspace expects them
101 */
102 oprofilefs_create_ulong(sb, dir, "kernel", &ctr[i].kernel);
103 oprofilefs_create_ulong(sb, dir, "user", &ctr[i].user);
104 oprofilefs_create_ulong(sb, dir, "unit_mask", &ctr[i].unit_mask);
105 }
106
107 oprofilefs_create_ulong(sb, root, "enable_kernel", &sys.enable_kernel);
108 oprofilefs_create_ulong(sb, root, "enable_user", &sys.enable_user);
109 oprofilefs_create_ulong(sb, root, "backtrace_spinlocks",
110 &sys.backtrace_spinlocks);
111
112 /* Default to tracing both kernel and user */
113 sys.enable_kernel = 1;
114 sys.enable_user = 1;
115
116 /* Turn on backtracing through spinlocks by default */
117 sys.backtrace_spinlocks = 1;
118
119 return 0;
120}
121
122int __init oprofile_arch_init(struct oprofile_operations *ops)
123{
124 if (!cur_cpu_spec->oprofile_model || !cur_cpu_spec->oprofile_cpu_type)
125 return -ENODEV;
126
127 model = cur_cpu_spec->oprofile_model;
128 model->num_counters = cur_cpu_spec->num_pmcs;
129
130 ops->cpu_type = cur_cpu_spec->oprofile_cpu_type;
131 ops->create_files = op_ppc64_create_files;
132 ops->setup = op_ppc64_setup;
133 ops->shutdown = op_ppc64_shutdown;
134 ops->start = op_ppc64_start;
135 ops->stop = op_ppc64_stop;
136
137 printk(KERN_INFO "oprofile: using %s performance monitoring.\n",
138 ops->cpu_type);
139
140 return 0;
141}
142
143void oprofile_arch_exit(void)
144{
145}
diff --git a/arch/ppc64/oprofile/op_model_power4.c b/arch/ppc64/oprofile/op_model_power4.c
deleted file mode 100644
index 32b2bb5625fe..000000000000
--- a/arch/ppc64/oprofile/op_model_power4.c
+++ /dev/null
@@ -1,309 +0,0 @@
1/*
2 * Copyright (C) 2004 Anton Blanchard <anton@au.ibm.com>, IBM
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 */
9
10#include <linux/oprofile.h>
11#include <linux/init.h>
12#include <linux/smp.h>
13#include <asm/ptrace.h>
14#include <asm/system.h>
15#include <asm/processor.h>
16#include <asm/cputable.h>
17#include <asm/systemcfg.h>
18#include <asm/rtas.h>
19#include <asm/oprofile_impl.h>
20
21#define dbg(args...)
22
23static unsigned long reset_value[OP_MAX_COUNTER];
24
25static int oprofile_running;
26static int mmcra_has_sihv;
27
28/* mmcr values are set in power4_reg_setup, used in power4_cpu_setup */
29static u32 mmcr0_val;
30static u64 mmcr1_val;
31static u32 mmcra_val;
32
33/*
34 * Since we do not have an NMI, backtracing through spinlocks is
35 * only a best guess. In light of this, allow it to be disabled at
36 * runtime.
37 */
38static int backtrace_spinlocks;
39
40static void power4_reg_setup(struct op_counter_config *ctr,
41 struct op_system_config *sys,
42 int num_ctrs)
43{
44 int i;
45
46 /*
47 * SIHV / SIPR bits are only implemented on POWER4+ (GQ) and above.
48 * However we disable it on all POWER4 until we verify it works
49 * (I was seeing some strange behaviour last time I tried).
50 *
51 * It has been verified to work on POWER5 so we enable it there.
52 */
53 if (cpu_has_feature(CPU_FTR_MMCRA_SIHV))
54 mmcra_has_sihv = 1;
55
56 /*
57 * The performance counter event settings are given in the mmcr0,
58 * mmcr1 and mmcra values passed from the user in the
59 * op_system_config structure (sys variable).
60 */
61 mmcr0_val = sys->mmcr0;
62 mmcr1_val = sys->mmcr1;
63 mmcra_val = sys->mmcra;
64
65 backtrace_spinlocks = sys->backtrace_spinlocks;
66
67 for (i = 0; i < cur_cpu_spec->num_pmcs; ++i)
68 reset_value[i] = 0x80000000UL - ctr[i].count;
69
70 /* setup user and kernel profiling */
71 if (sys->enable_kernel)
72 mmcr0_val &= ~MMCR0_KERNEL_DISABLE;
73 else
74 mmcr0_val |= MMCR0_KERNEL_DISABLE;
75
76 if (sys->enable_user)
77 mmcr0_val &= ~MMCR0_PROBLEM_DISABLE;
78 else
79 mmcr0_val |= MMCR0_PROBLEM_DISABLE;
80}
81
82extern void ppc64_enable_pmcs(void);
83
84static void power4_cpu_setup(void *unused)
85{
86 unsigned int mmcr0 = mmcr0_val;
87 unsigned long mmcra = mmcra_val;
88
89 ppc64_enable_pmcs();
90
91 /* set the freeze bit */
92 mmcr0 |= MMCR0_FC;
93 mtspr(SPRN_MMCR0, mmcr0);
94
95 mmcr0 |= MMCR0_FCM1|MMCR0_PMXE|MMCR0_FCECE;
96 mmcr0 |= MMCR0_PMC1CE|MMCR0_PMCjCE;
97 mtspr(SPRN_MMCR0, mmcr0);
98
99 mtspr(SPRN_MMCR1, mmcr1_val);
100
101 mmcra |= MMCRA_SAMPLE_ENABLE;
102 mtspr(SPRN_MMCRA, mmcra);
103
104 dbg("setup on cpu %d, mmcr0 %lx\n", smp_processor_id(),
105 mfspr(SPRN_MMCR0));
106 dbg("setup on cpu %d, mmcr1 %lx\n", smp_processor_id(),
107 mfspr(SPRN_MMCR1));
108 dbg("setup on cpu %d, mmcra %lx\n", smp_processor_id(),
109 mfspr(SPRN_MMCRA));
110}
111
112static void power4_start(struct op_counter_config *ctr)
113{
114 int i;
115 unsigned int mmcr0;
116
117 /* set the PMM bit (see comment below) */
118 mtmsrd(mfmsr() | MSR_PMM);
119
120 for (i = 0; i < cur_cpu_spec->num_pmcs; ++i) {
121 if (ctr[i].enabled) {
122 ctr_write(i, reset_value[i]);
123 } else {
124 ctr_write(i, 0);
125 }
126 }
127
128 mmcr0 = mfspr(SPRN_MMCR0);
129
130 /*
131 * We must clear the PMAO bit on some (GQ) chips. Just do it
132 * all the time
133 */
134 mmcr0 &= ~MMCR0_PMAO;
135
136 /*
137 * now clear the freeze bit, counting will not start until we
138 * rfid from this excetion, because only at that point will
139 * the PMM bit be cleared
140 */
141 mmcr0 &= ~MMCR0_FC;
142 mtspr(SPRN_MMCR0, mmcr0);
143
144 oprofile_running = 1;
145
146 dbg("start on cpu %d, mmcr0 %x\n", smp_processor_id(), mmcr0);
147}
148
149static void power4_stop(void)
150{
151 unsigned int mmcr0;
152
153 /* freeze counters */
154 mmcr0 = mfspr(SPRN_MMCR0);
155 mmcr0 |= MMCR0_FC;
156 mtspr(SPRN_MMCR0, mmcr0);
157
158 oprofile_running = 0;
159
160 dbg("stop on cpu %d, mmcr0 %x\n", smp_processor_id(), mmcr0);
161
162 mb();
163}
164
165/* Fake functions used by canonicalize_pc */
166static void __attribute_used__ hypervisor_bucket(void)
167{
168}
169
170static void __attribute_used__ rtas_bucket(void)
171{
172}
173
174static void __attribute_used__ kernel_unknown_bucket(void)
175{
176}
177
178static unsigned long check_spinlock_pc(struct pt_regs *regs,
179 unsigned long profile_pc)
180{
181 unsigned long pc = instruction_pointer(regs);
182
183 /*
184 * If both the SIAR (sampled instruction) and the perfmon exception
185 * occurred in a spinlock region then we account the sample to the
186 * calling function. This isnt 100% correct, we really need soft
187 * IRQ disable so we always get the perfmon exception at the
188 * point at which the SIAR is set.
189 */
190 if (backtrace_spinlocks && in_lock_functions(pc) &&
191 in_lock_functions(profile_pc))
192 return regs->link;
193 else
194 return profile_pc;
195}
196
197/*
198 * On GQ and newer the MMCRA stores the HV and PR bits at the time
199 * the SIAR was sampled. We use that to work out if the SIAR was sampled in
200 * the hypervisor, our exception vectors or RTAS.
201 */
202static unsigned long get_pc(struct pt_regs *regs)
203{
204 unsigned long pc = mfspr(SPRN_SIAR);
205 unsigned long mmcra;
206
207 /* Cant do much about it */
208 if (!mmcra_has_sihv)
209 return check_spinlock_pc(regs, pc);
210
211 mmcra = mfspr(SPRN_MMCRA);
212
213 /* Were we in the hypervisor? */
214 if ((systemcfg->platform == PLATFORM_PSERIES_LPAR) &&
215 (mmcra & MMCRA_SIHV))
216 /* function descriptor madness */
217 return *((unsigned long *)hypervisor_bucket);
218
219 /* We were in userspace, nothing to do */
220 if (mmcra & MMCRA_SIPR)
221 return pc;
222
223#ifdef CONFIG_PPC_RTAS
224 /* Were we in RTAS? */
225 if (pc >= rtas.base && pc < (rtas.base + rtas.size))
226 /* function descriptor madness */
227 return *((unsigned long *)rtas_bucket);
228#endif
229
230 /* Were we in our exception vectors or SLB real mode miss handler? */
231 if (pc < 0x1000000UL)
232 return (unsigned long)__va(pc);
233
234 /* Not sure where we were */
235 if (pc < KERNELBASE)
236 /* function descriptor madness */
237 return *((unsigned long *)kernel_unknown_bucket);
238
239 return check_spinlock_pc(regs, pc);
240}
241
242static int get_kernel(unsigned long pc)
243{
244 int is_kernel;
245
246 if (!mmcra_has_sihv) {
247 is_kernel = (pc >= KERNELBASE);
248 } else {
249 unsigned long mmcra = mfspr(SPRN_MMCRA);
250 is_kernel = ((mmcra & MMCRA_SIPR) == 0);
251 }
252
253 return is_kernel;
254}
255
256static void power4_handle_interrupt(struct pt_regs *regs,
257 struct op_counter_config *ctr)
258{
259 unsigned long pc;
260 int is_kernel;
261 int val;
262 int i;
263 unsigned int mmcr0;
264
265 pc = get_pc(regs);
266 is_kernel = get_kernel(pc);
267
268 /* set the PMM bit (see comment below) */
269 mtmsrd(mfmsr() | MSR_PMM);
270
271 for (i = 0; i < cur_cpu_spec->num_pmcs; ++i) {
272 val = ctr_read(i);
273 if (val < 0) {
274 if (oprofile_running && ctr[i].enabled) {
275 oprofile_add_pc(pc, is_kernel, i);
276 ctr_write(i, reset_value[i]);
277 } else {
278 ctr_write(i, 0);
279 }
280 }
281 }
282
283 mmcr0 = mfspr(SPRN_MMCR0);
284
285 /* reset the perfmon trigger */
286 mmcr0 |= MMCR0_PMXE;
287
288 /*
289 * We must clear the PMAO bit on some (GQ) chips. Just do it
290 * all the time
291 */
292 mmcr0 &= ~MMCR0_PMAO;
293
294 /*
295 * now clear the freeze bit, counting will not start until we
296 * rfid from this exception, because only at that point will
297 * the PMM bit be cleared
298 */
299 mmcr0 &= ~MMCR0_FC;
300 mtspr(SPRN_MMCR0, mmcr0);
301}
302
303struct op_ppc64_model op_model_power4 = {
304 .reg_setup = power4_reg_setup,
305 .cpu_setup = power4_cpu_setup,
306 .start = power4_start,
307 .stop = power4_stop,
308 .handle_interrupt = power4_handle_interrupt,
309};
diff --git a/arch/ppc64/oprofile/op_model_rs64.c b/arch/ppc64/oprofile/op_model_rs64.c
deleted file mode 100644
index 08c5b333f5c4..000000000000
--- a/arch/ppc64/oprofile/op_model_rs64.c
+++ /dev/null
@@ -1,218 +0,0 @@
1/*
2 * Copyright (C) 2004 Anton Blanchard <anton@au.ibm.com>, IBM
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 */
9
10#include <linux/oprofile.h>
11#include <linux/init.h>
12#include <linux/smp.h>
13#include <asm/ptrace.h>
14#include <asm/system.h>
15#include <asm/processor.h>
16#include <asm/cputable.h>
17#include <asm/oprofile_impl.h>
18
19#define dbg(args...)
20
21static void ctrl_write(unsigned int i, unsigned int val)
22{
23 unsigned int tmp = 0;
24 unsigned long shift = 0, mask = 0;
25
26 dbg("ctrl_write %d %x\n", i, val);
27
28 switch(i) {
29 case 0:
30 tmp = mfspr(SPRN_MMCR0);
31 shift = 6;
32 mask = 0x7F;
33 break;
34 case 1:
35 tmp = mfspr(SPRN_MMCR0);
36 shift = 0;
37 mask = 0x3F;
38 break;
39 case 2:
40 tmp = mfspr(SPRN_MMCR1);
41 shift = 31 - 4;
42 mask = 0x1F;
43 break;
44 case 3:
45 tmp = mfspr(SPRN_MMCR1);
46 shift = 31 - 9;
47 mask = 0x1F;
48 break;
49 case 4:
50 tmp = mfspr(SPRN_MMCR1);
51 shift = 31 - 14;
52 mask = 0x1F;
53 break;
54 case 5:
55 tmp = mfspr(SPRN_MMCR1);
56 shift = 31 - 19;
57 mask = 0x1F;
58 break;
59 case 6:
60 tmp = mfspr(SPRN_MMCR1);
61 shift = 31 - 24;
62 mask = 0x1F;
63 break;
64 case 7:
65 tmp = mfspr(SPRN_MMCR1);
66 shift = 31 - 28;
67 mask = 0xF;
68 break;
69 }
70
71 tmp = tmp & ~(mask << shift);
72 tmp |= val << shift;
73
74 switch(i) {
75 case 0:
76 case 1:
77 mtspr(SPRN_MMCR0, tmp);
78 break;
79 default:
80 mtspr(SPRN_MMCR1, tmp);
81 }
82
83 dbg("ctrl_write mmcr0 %lx mmcr1 %lx\n", mfspr(SPRN_MMCR0),
84 mfspr(SPRN_MMCR1));
85}
86
87static unsigned long reset_value[OP_MAX_COUNTER];
88
89static int num_counters;
90
91static void rs64_reg_setup(struct op_counter_config *ctr,
92 struct op_system_config *sys,
93 int num_ctrs)
94{
95 int i;
96
97 num_counters = num_ctrs;
98
99 for (i = 0; i < num_counters; ++i)
100 reset_value[i] = 0x80000000UL - ctr[i].count;
101
102 /* XXX setup user and kernel profiling */
103}
104
105static void rs64_cpu_setup(void *unused)
106{
107 unsigned int mmcr0;
108
109 /* reset MMCR0 and set the freeze bit */
110 mmcr0 = MMCR0_FC;
111 mtspr(SPRN_MMCR0, mmcr0);
112
113 /* reset MMCR1, MMCRA */
114 mtspr(SPRN_MMCR1, 0);
115
116 if (cpu_has_feature(CPU_FTR_MMCRA))
117 mtspr(SPRN_MMCRA, 0);
118
119 mmcr0 |= MMCR0_FCM1|MMCR0_PMXE|MMCR0_FCECE;
120 /* Only applies to POWER3, but should be safe on RS64 */
121 mmcr0 |= MMCR0_PMC1CE|MMCR0_PMCjCE;
122 mtspr(SPRN_MMCR0, mmcr0);
123
124 dbg("setup on cpu %d, mmcr0 %lx\n", smp_processor_id(),
125 mfspr(SPRN_MMCR0));
126 dbg("setup on cpu %d, mmcr1 %lx\n", smp_processor_id(),
127 mfspr(SPRN_MMCR1));
128}
129
130static void rs64_start(struct op_counter_config *ctr)
131{
132 int i;
133 unsigned int mmcr0;
134
135 /* set the PMM bit (see comment below) */
136 mtmsrd(mfmsr() | MSR_PMM);
137
138 for (i = 0; i < num_counters; ++i) {
139 if (ctr[i].enabled) {
140 ctr_write(i, reset_value[i]);
141 ctrl_write(i, ctr[i].event);
142 } else {
143 ctr_write(i, 0);
144 }
145 }
146
147 mmcr0 = mfspr(SPRN_MMCR0);
148
149 /*
150 * now clear the freeze bit, counting will not start until we
151 * rfid from this excetion, because only at that point will
152 * the PMM bit be cleared
153 */
154 mmcr0 &= ~MMCR0_FC;
155 mtspr(SPRN_MMCR0, mmcr0);
156
157 dbg("start on cpu %d, mmcr0 %x\n", smp_processor_id(), mmcr0);
158}
159
160static void rs64_stop(void)
161{
162 unsigned int mmcr0;
163
164 /* freeze counters */
165 mmcr0 = mfspr(SPRN_MMCR0);
166 mmcr0 |= MMCR0_FC;
167 mtspr(SPRN_MMCR0, mmcr0);
168
169 dbg("stop on cpu %d, mmcr0 %x\n", smp_processor_id(), mmcr0);
170
171 mb();
172}
173
174static void rs64_handle_interrupt(struct pt_regs *regs,
175 struct op_counter_config *ctr)
176{
177 unsigned int mmcr0;
178 int val;
179 int i;
180 unsigned long pc = mfspr(SPRN_SIAR);
181 int is_kernel = (pc >= KERNELBASE);
182
183 /* set the PMM bit (see comment below) */
184 mtmsrd(mfmsr() | MSR_PMM);
185
186 for (i = 0; i < num_counters; ++i) {
187 val = ctr_read(i);
188 if (val < 0) {
189 if (ctr[i].enabled) {
190 oprofile_add_pc(pc, is_kernel, i);
191 ctr_write(i, reset_value[i]);
192 } else {
193 ctr_write(i, 0);
194 }
195 }
196 }
197
198 mmcr0 = mfspr(SPRN_MMCR0);
199
200 /* reset the perfmon trigger */
201 mmcr0 |= MMCR0_PMXE;
202
203 /*
204 * now clear the freeze bit, counting will not start until we
205 * rfid from this exception, because only at that point will
206 * the PMM bit be cleared
207 */
208 mmcr0 &= ~MMCR0_FC;
209 mtspr(SPRN_MMCR0, mmcr0);
210}
211
212struct op_ppc64_model op_model_rs64 = {
213 .reg_setup = rs64_reg_setup,
214 .cpu_setup = rs64_cpu_setup,
215 .start = rs64_start,
216 .stop = rs64_stop,
217 .handle_interrupt = rs64_handle_interrupt,
218};