aboutsummaryrefslogtreecommitdiffstats
path: root/arch/ppc64/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 18:20:36 -0400
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 18:20:36 -0400
commit1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree0bba044c4ce775e45a88a51686b5d9f90697ea9d /arch/ppc64/kernel
Linux-2.6.12-rc2v2.6.12-rc2
Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip!
Diffstat (limited to 'arch/ppc64/kernel')
-rw-r--r--arch/ppc64/kernel/HvCall.c36
-rw-r--r--arch/ppc64/kernel/HvLpConfig.c27
-rw-r--r--arch/ppc64/kernel/HvLpEvent.c88
-rw-r--r--arch/ppc64/kernel/ItLpQueue.c167
-rw-r--r--arch/ppc64/kernel/LparData.c250
-rw-r--r--arch/ppc64/kernel/Makefile68
-rw-r--r--arch/ppc64/kernel/XmPciLpEvent.c190
-rw-r--r--arch/ppc64/kernel/align.c396
-rw-r--r--arch/ppc64/kernel/asm-offsets.c193
-rw-r--r--arch/ppc64/kernel/binfmt_elf32.c78
-rw-r--r--arch/ppc64/kernel/bitops.c147
-rw-r--r--arch/ppc64/kernel/btext.c751
-rw-r--r--arch/ppc64/kernel/cpu_setup_power4.S214
-rw-r--r--arch/ppc64/kernel/cputable.c197
-rw-r--r--arch/ppc64/kernel/dma.c147
-rw-r--r--arch/ppc64/kernel/eeh.c937
-rw-r--r--arch/ppc64/kernel/entry.S845
-rw-r--r--arch/ppc64/kernel/head.S2139
-rw-r--r--arch/ppc64/kernel/hvCall.S98
-rw-r--r--arch/ppc64/kernel/hvconsole.c121
-rw-r--r--arch/ppc64/kernel/hvcserver.c249
-rw-r--r--arch/ppc64/kernel/i8259.c177
-rw-r--r--arch/ppc64/kernel/i8259.h17
-rw-r--r--arch/ppc64/kernel/iSeries_VpdInfo.c277
-rw-r--r--arch/ppc64/kernel/iSeries_htab.c242
-rw-r--r--arch/ppc64/kernel/iSeries_iommu.c175
-rw-r--r--arch/ppc64/kernel/iSeries_irq.c209
-rw-r--r--arch/ppc64/kernel/iSeries_pci.c912
-rw-r--r--arch/ppc64/kernel/iSeries_pci_reset.c104
-rw-r--r--arch/ppc64/kernel/iSeries_proc.c162
-rw-r--r--arch/ppc64/kernel/iSeries_setup.c877
-rw-r--r--arch/ppc64/kernel/iSeries_setup.h26
-rw-r--r--arch/ppc64/kernel/iSeries_smp.c151
-rw-r--r--arch/ppc64/kernel/idle.c380
-rw-r--r--arch/ppc64/kernel/idle_power4.S79
-rw-r--r--arch/ppc64/kernel/init_task.c36
-rw-r--r--arch/ppc64/kernel/ioctl32.c51
-rw-r--r--arch/ppc64/kernel/iomap.c126
-rw-r--r--arch/ppc64/kernel/iommu.c567
-rw-r--r--arch/ppc64/kernel/irq.c519
-rw-r--r--arch/ppc64/kernel/kprobes.c290
-rw-r--r--arch/ppc64/kernel/lmb.c372
-rw-r--r--arch/ppc64/kernel/lparcfg.c611
-rw-r--r--arch/ppc64/kernel/maple_pci.c521
-rw-r--r--arch/ppc64/kernel/maple_setup.c240
-rw-r--r--arch/ppc64/kernel/maple_time.c226
-rw-r--r--arch/ppc64/kernel/mf.c1239
-rw-r--r--arch/ppc64/kernel/misc.S1234
-rw-r--r--arch/ppc64/kernel/module.c442
-rw-r--r--arch/ppc64/kernel/mpic.c859
-rw-r--r--arch/ppc64/kernel/mpic.h267
-rw-r--r--arch/ppc64/kernel/nvram.c746
-rw-r--r--arch/ppc64/kernel/of_device.c272
-rw-r--r--arch/ppc64/kernel/pSeries_hvCall.S123
-rw-r--r--arch/ppc64/kernel/pSeries_iommu.c570
-rw-r--r--arch/ppc64/kernel/pSeries_lpar.c531
-rw-r--r--arch/ppc64/kernel/pSeries_nvram.c148
-rw-r--r--arch/ppc64/kernel/pSeries_pci.c602
-rw-r--r--arch/ppc64/kernel/pSeries_reconfig.c434
-rw-r--r--arch/ppc64/kernel/pSeries_setup.c612
-rw-r--r--arch/ppc64/kernel/pSeries_smp.c451
-rw-r--r--arch/ppc64/kernel/pacaData.c224
-rw-r--r--arch/ppc64/kernel/pci.c942
-rw-r--r--arch/ppc64/kernel/pci.h51
-rw-r--r--arch/ppc64/kernel/pci_direct_iommu.c95
-rw-r--r--arch/ppc64/kernel/pci_dn.c198
-rw-r--r--arch/ppc64/kernel/pci_iommu.c139
-rw-r--r--arch/ppc64/kernel/pmac.h31
-rw-r--r--arch/ppc64/kernel/pmac_feature.c676
-rw-r--r--arch/ppc64/kernel/pmac_low_i2c.c523
-rw-r--r--arch/ppc64/kernel/pmac_nvram.c495
-rw-r--r--arch/ppc64/kernel/pmac_pci.c793
-rw-r--r--arch/ppc64/kernel/pmac_setup.c511
-rw-r--r--arch/ppc64/kernel/pmac_smp.c316
-rw-r--r--arch/ppc64/kernel/pmac_time.c201
-rw-r--r--arch/ppc64/kernel/pmc.c67
-rw-r--r--arch/ppc64/kernel/ppc_ksyms.c95
-rw-r--r--arch/ppc64/kernel/proc_ppc64.c128
-rw-r--r--arch/ppc64/kernel/process.c688
-rw-r--r--arch/ppc64/kernel/prom.c1820
-rw-r--r--arch/ppc64/kernel/prom_init.c1838
-rw-r--r--arch/ppc64/kernel/ptrace.c328
-rw-r--r--arch/ppc64/kernel/ptrace32.c420
-rw-r--r--arch/ppc64/kernel/ras.c356
-rw-r--r--arch/ppc64/kernel/rtas-proc.c807
-rw-r--r--arch/ppc64/kernel/rtas.c657
-rw-r--r--arch/ppc64/kernel/rtas_flash.c725
-rw-r--r--arch/ppc64/kernel/rtasd.c527
-rw-r--r--arch/ppc64/kernel/rtc.c440
-rw-r--r--arch/ppc64/kernel/scanlog.c245
-rw-r--r--arch/ppc64/kernel/semaphore.c136
-rw-r--r--arch/ppc64/kernel/setup.c1392
-rw-r--r--arch/ppc64/kernel/signal.c575
-rw-r--r--arch/ppc64/kernel/signal32.c989
-rw-r--r--arch/ppc64/kernel/smp-tbsync.c179
-rw-r--r--arch/ppc64/kernel/smp.c622
-rw-r--r--arch/ppc64/kernel/sys_ppc32.c1329
-rw-r--r--arch/ppc64/kernel/syscalls.c258
-rw-r--r--arch/ppc64/kernel/sysfs.c431
-rw-r--r--arch/ppc64/kernel/time.c827
-rw-r--r--arch/ppc64/kernel/traps.c565
-rw-r--r--arch/ppc64/kernel/u3_iommu.c349
-rw-r--r--arch/ppc64/kernel/udbg.c360
-rw-r--r--arch/ppc64/kernel/vdso.c614
-rw-r--r--arch/ppc64/kernel/vdso32/Makefile36
-rw-r--r--arch/ppc64/kernel/vdso32/cacheflush.S65
-rw-r--r--arch/ppc64/kernel/vdso32/datapage.S68
-rw-r--r--arch/ppc64/kernel/vdso32/gettimeofday.S139
-rw-r--r--arch/ppc64/kernel/vdso32/sigtramp.S300
-rw-r--r--arch/ppc64/kernel/vdso32/vdso32.lds.S111
-rw-r--r--arch/ppc64/kernel/vdso32/vdso32_wrapper.S13
-rw-r--r--arch/ppc64/kernel/vdso64/Makefile35
-rw-r--r--arch/ppc64/kernel/vdso64/cacheflush.S64
-rw-r--r--arch/ppc64/kernel/vdso64/datapage.S68
-rw-r--r--arch/ppc64/kernel/vdso64/gettimeofday.S91
-rw-r--r--arch/ppc64/kernel/vdso64/sigtramp.S294
-rw-r--r--arch/ppc64/kernel/vdso64/vdso64.lds.S110
-rw-r--r--arch/ppc64/kernel/vdso64/vdso64_wrapper.S13
-rw-r--r--arch/ppc64/kernel/vecemu.c346
-rw-r--r--arch/ppc64/kernel/vector.S172
-rw-r--r--arch/ppc64/kernel/vio.c640
-rw-r--r--arch/ppc64/kernel/viopath.c675
-rw-r--r--arch/ppc64/kernel/vmlinux.lds.S145
-rw-r--r--arch/ppc64/kernel/xics.c713
124 files changed, 50275 insertions, 0 deletions
diff --git a/arch/ppc64/kernel/HvCall.c b/arch/ppc64/kernel/HvCall.c
new file mode 100644
index 000000000000..b772e65b57a2
--- /dev/null
+++ b/arch/ppc64/kernel/HvCall.c
@@ -0,0 +1,36 @@
1/*
2 * HvCall.c
3 * Copyright (C) 2001 Mike Corrigan IBM Corporation
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 */
10
11#include <asm/page.h>
12#include <asm/abs_addr.h>
13#include <asm/iSeries/HvCall.h>
14#include <asm/iSeries/HvCallSc.h>
15#include <asm/iSeries/HvTypes.h>
16
17
18void HvCall_writeLogBuffer(const void *buffer, u64 len)
19{
20 struct HvLpBufferList hv_buf;
21 u64 left_this_page;
22 u64 cur = virt_to_abs(buffer);
23
24 while (len) {
25 hv_buf.addr = cur;
26 left_this_page = ((cur & PAGE_MASK) + PAGE_SIZE) - cur;
27 if (left_this_page > len)
28 left_this_page = len;
29 hv_buf.len = left_this_page;
30 len -= left_this_page;
31 HvCall2(HvCallBaseWriteLogBuffer,
32 virt_to_abs(&hv_buf),
33 left_this_page);
34 cur = (cur & PAGE_MASK) + PAGE_SIZE;
35 }
36}
diff --git a/arch/ppc64/kernel/HvLpConfig.c b/arch/ppc64/kernel/HvLpConfig.c
new file mode 100644
index 000000000000..cb1d6473203c
--- /dev/null
+++ b/arch/ppc64/kernel/HvLpConfig.c
@@ -0,0 +1,27 @@
1/*
2 * HvLpConfig.c
3 * Copyright (C) 2001 Kyle A. Lucke, IBM Corporation
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
19
20#include <linux/module.h>
21#include <asm/iSeries/HvLpConfig.h>
22
23HvLpIndex HvLpConfig_getLpIndex_outline(void)
24{
25 return HvLpConfig_getLpIndex();
26}
27EXPORT_SYMBOL(HvLpConfig_getLpIndex_outline);
diff --git a/arch/ppc64/kernel/HvLpEvent.c b/arch/ppc64/kernel/HvLpEvent.c
new file mode 100644
index 000000000000..9802beefa217
--- /dev/null
+++ b/arch/ppc64/kernel/HvLpEvent.c
@@ -0,0 +1,88 @@
1/*
2 * Copyright 2001 Mike Corrigan IBM Corp
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 */
9#include <linux/stddef.h>
10#include <linux/kernel.h>
11#include <linux/module.h>
12#include <asm/system.h>
13#include <asm/iSeries/HvLpEvent.h>
14#include <asm/iSeries/HvCallEvent.h>
15#include <asm/iSeries/LparData.h>
16
17/* Array of LpEvent handler functions */
18LpEventHandler lpEventHandler[HvLpEvent_Type_NumTypes];
19unsigned lpEventHandlerPaths[HvLpEvent_Type_NumTypes];
20
21/* Register a handler for an LpEvent type */
22
23int HvLpEvent_registerHandler( HvLpEvent_Type eventType, LpEventHandler handler )
24{
25 int rc = 1;
26 if ( eventType < HvLpEvent_Type_NumTypes ) {
27 lpEventHandler[eventType] = handler;
28 rc = 0;
29 }
30 return rc;
31
32}
33
34int HvLpEvent_unregisterHandler( HvLpEvent_Type eventType )
35{
36 int rc = 1;
37
38 might_sleep();
39
40 if ( eventType < HvLpEvent_Type_NumTypes ) {
41 if ( !lpEventHandlerPaths[eventType] ) {
42 lpEventHandler[eventType] = NULL;
43 rc = 0;
44
45 /* We now sleep until all other CPUs have scheduled. This ensures that
46 * the deletion is seen by all other CPUs, and that the deleted handler
47 * isn't still running on another CPU when we return. */
48 synchronize_kernel();
49 }
50 }
51 return rc;
52}
53EXPORT_SYMBOL(HvLpEvent_registerHandler);
54EXPORT_SYMBOL(HvLpEvent_unregisterHandler);
55
56/* (lpIndex is the partition index of the target partition.
57 * needed only for VirtualIo, VirtualLan and SessionMgr. Zero
58 * indicates to use our partition index - for the other types)
59 */
60int HvLpEvent_openPath( HvLpEvent_Type eventType, HvLpIndex lpIndex )
61{
62 int rc = 1;
63 if ( eventType < HvLpEvent_Type_NumTypes &&
64 lpEventHandler[eventType] ) {
65 if ( lpIndex == 0 )
66 lpIndex = itLpNaca.xLpIndex;
67 HvCallEvent_openLpEventPath( lpIndex, eventType );
68 ++lpEventHandlerPaths[eventType];
69 rc = 0;
70 }
71 return rc;
72}
73
74int HvLpEvent_closePath( HvLpEvent_Type eventType, HvLpIndex lpIndex )
75{
76 int rc = 1;
77 if ( eventType < HvLpEvent_Type_NumTypes &&
78 lpEventHandler[eventType] &&
79 lpEventHandlerPaths[eventType] ) {
80 if ( lpIndex == 0 )
81 lpIndex = itLpNaca.xLpIndex;
82 HvCallEvent_closeLpEventPath( lpIndex, eventType );
83 --lpEventHandlerPaths[eventType];
84 rc = 0;
85 }
86 return rc;
87}
88
diff --git a/arch/ppc64/kernel/ItLpQueue.c b/arch/ppc64/kernel/ItLpQueue.c
new file mode 100644
index 000000000000..c923a815760e
--- /dev/null
+++ b/arch/ppc64/kernel/ItLpQueue.c
@@ -0,0 +1,167 @@
1/*
2 * ItLpQueue.c
3 * Copyright (C) 2001 Mike Corrigan IBM Corporation
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 */
10
11#include <linux/stddef.h>
12#include <linux/kernel.h>
13#include <linux/sched.h>
14#include <asm/system.h>
15#include <asm/paca.h>
16#include <asm/iSeries/ItLpQueue.h>
17#include <asm/iSeries/HvLpEvent.h>
18#include <asm/iSeries/HvCallEvent.h>
19#include <asm/iSeries/LparData.h>
20
21static __inline__ int set_inUse( struct ItLpQueue * lpQueue )
22{
23 int t;
24 u32 * inUseP = &(lpQueue->xInUseWord);
25
26 __asm__ __volatile__("\n\
271: lwarx %0,0,%2 \n\
28 cmpwi 0,%0,0 \n\
29 li %0,0 \n\
30 bne- 2f \n\
31 addi %0,%0,1 \n\
32 stwcx. %0,0,%2 \n\
33 bne- 1b \n\
342: eieio"
35 : "=&r" (t), "=m" (lpQueue->xInUseWord)
36 : "r" (inUseP), "m" (lpQueue->xInUseWord)
37 : "cc");
38
39 return t;
40}
41
42static __inline__ void clear_inUse( struct ItLpQueue * lpQueue )
43{
44 lpQueue->xInUseWord = 0;
45}
46
47/* Array of LpEvent handler functions */
48extern LpEventHandler lpEventHandler[HvLpEvent_Type_NumTypes];
49unsigned long ItLpQueueInProcess = 0;
50
51struct HvLpEvent * ItLpQueue_getNextLpEvent( struct ItLpQueue * lpQueue )
52{
53 struct HvLpEvent * nextLpEvent =
54 (struct HvLpEvent *)lpQueue->xSlicCurEventPtr;
55 if ( nextLpEvent->xFlags.xValid ) {
56 /* rmb() needed only for weakly consistent machines (regatta) */
57 rmb();
58 /* Set pointer to next potential event */
59 lpQueue->xSlicCurEventPtr += ((nextLpEvent->xSizeMinus1 +
60 LpEventAlign ) /
61 LpEventAlign ) *
62 LpEventAlign;
63 /* Wrap to beginning if no room at end */
64 if (lpQueue->xSlicCurEventPtr > lpQueue->xSlicLastValidEventPtr)
65 lpQueue->xSlicCurEventPtr = lpQueue->xSlicEventStackPtr;
66 }
67 else
68 nextLpEvent = NULL;
69
70 return nextLpEvent;
71}
72
73int ItLpQueue_isLpIntPending( struct ItLpQueue * lpQueue )
74{
75 int retval = 0;
76 struct HvLpEvent * nextLpEvent;
77 if ( lpQueue ) {
78 nextLpEvent = (struct HvLpEvent *)lpQueue->xSlicCurEventPtr;
79 retval = nextLpEvent->xFlags.xValid | lpQueue->xPlicOverflowIntPending;
80 }
81 return retval;
82}
83
84void ItLpQueue_clearValid( struct HvLpEvent * event )
85{
86 /* Clear the valid bit of the event
87 * Also clear bits within this event that might
88 * look like valid bits (on 64-byte boundaries)
89 */
90 unsigned extra = (( event->xSizeMinus1 + LpEventAlign ) /
91 LpEventAlign ) - 1;
92 switch ( extra ) {
93 case 3:
94 ((struct HvLpEvent*)((char*)event+3*LpEventAlign))->xFlags.xValid=0;
95 case 2:
96 ((struct HvLpEvent*)((char*)event+2*LpEventAlign))->xFlags.xValid=0;
97 case 1:
98 ((struct HvLpEvent*)((char*)event+1*LpEventAlign))->xFlags.xValid=0;
99 case 0:
100 ;
101 }
102 mb();
103 event->xFlags.xValid = 0;
104}
105
106unsigned ItLpQueue_process( struct ItLpQueue * lpQueue, struct pt_regs *regs )
107{
108 unsigned numIntsProcessed = 0;
109 struct HvLpEvent * nextLpEvent;
110
111 /* If we have recursed, just return */
112 if ( !set_inUse( lpQueue ) )
113 return 0;
114
115 if (ItLpQueueInProcess == 0)
116 ItLpQueueInProcess = 1;
117 else
118 BUG();
119
120 for (;;) {
121 nextLpEvent = ItLpQueue_getNextLpEvent( lpQueue );
122 if ( nextLpEvent ) {
123 /* Count events to return to caller
124 * and count processed events in lpQueue
125 */
126 ++numIntsProcessed;
127 lpQueue->xLpIntCount++;
128 /* Call appropriate handler here, passing
129 * a pointer to the LpEvent. The handler
130 * must make a copy of the LpEvent if it
131 * needs it in a bottom half. (perhaps for
132 * an ACK)
133 *
134 * Handlers are responsible for ACK processing
135 *
136 * The Hypervisor guarantees that LpEvents will
137 * only be delivered with types that we have
138 * registered for, so no type check is necessary
139 * here!
140 */
141 if ( nextLpEvent->xType < HvLpEvent_Type_NumTypes )
142 lpQueue->xLpIntCountByType[nextLpEvent->xType]++;
143 if ( nextLpEvent->xType < HvLpEvent_Type_NumTypes &&
144 lpEventHandler[nextLpEvent->xType] )
145 lpEventHandler[nextLpEvent->xType](nextLpEvent, regs);
146 else
147 printk(KERN_INFO "Unexpected Lp Event type=%d\n", nextLpEvent->xType );
148
149 ItLpQueue_clearValid( nextLpEvent );
150 } else if ( lpQueue->xPlicOverflowIntPending )
151 /*
152 * No more valid events. If overflow events are
153 * pending process them
154 */
155 HvCallEvent_getOverflowLpEvents( lpQueue->xIndex);
156 else
157 break;
158 }
159
160 ItLpQueueInProcess = 0;
161 mb();
162 clear_inUse( lpQueue );
163
164 get_paca()->lpevent_count += numIntsProcessed;
165
166 return numIntsProcessed;
167}
diff --git a/arch/ppc64/kernel/LparData.c b/arch/ppc64/kernel/LparData.c
new file mode 100644
index 000000000000..badc5a443614
--- /dev/null
+++ b/arch/ppc64/kernel/LparData.c
@@ -0,0 +1,250 @@
1/*
2 * Copyright 2001 Mike Corrigan, IBM Corp
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 */
9#include <linux/config.h>
10#include <linux/types.h>
11#include <linux/threads.h>
12#include <linux/module.h>
13#include <linux/bitops.h>
14#include <asm/processor.h>
15#include <asm/ptrace.h>
16#include <asm/naca.h>
17#include <asm/abs_addr.h>
18#include <asm/iSeries/ItLpNaca.h>
19#include <asm/lppaca.h>
20#include <asm/iSeries/ItLpRegSave.h>
21#include <asm/paca.h>
22#include <asm/iSeries/HvReleaseData.h>
23#include <asm/iSeries/LparMap.h>
24#include <asm/iSeries/ItVpdAreas.h>
25#include <asm/iSeries/ItIplParmsReal.h>
26#include <asm/iSeries/ItExtVpdPanel.h>
27#include <asm/iSeries/ItLpQueue.h>
28#include <asm/iSeries/IoHriProcessorVpd.h>
29#include <asm/iSeries/ItSpCommArea.h>
30
31/* The LpQueue is used to pass event data from the hypervisor to
32 * the partition. This is where I/O interrupt events are communicated.
33 */
34
35/* May be filled in by the hypervisor so cannot end up in the BSS */
36struct ItLpQueue xItLpQueue __attribute__((__section__(".data")));
37
38
39/* The HvReleaseData is the root of the information shared between
40 * the hypervisor and Linux.
41 */
42
43struct HvReleaseData hvReleaseData = {
44 .xDesc = 0xc8a5d9c4, /* "HvRD" ebcdic */
45 .xSize = sizeof(struct HvReleaseData),
46 .xVpdAreasPtrOffset = offsetof(struct naca_struct, xItVpdAreas),
47 .xSlicNacaAddr = &naca, /* 64-bit Naca address */
48 .xMsNucDataOffset = 0x4800, /* offset of LparMap within loadarea (see head.S) */
49 .xTagsMode = 1, /* tags inactive */
50 .xAddressSize = 0, /* 64 bit */
51 .xNoSharedProcs = 0, /* shared processors */
52 .xNoHMT = 0, /* HMT allowed */
53 .xRsvd2 = 6, /* TEMP: This allows non-GA driver */
54 .xVrmIndex = 4, /* We are v5r2m0 */
55 .xMinSupportedPlicVrmIndex = 3, /* v5r1m0 */
56 .xMinCompatablePlicVrmIndex = 3, /* v5r1m0 */
57 .xVrmName = { 0xd3, 0x89, 0x95, 0xa4, /* "Linux 2.4.64" ebcdic */
58 0xa7, 0x40, 0xf2, 0x4b,
59 0xf4, 0x4b, 0xf6, 0xf4 },
60};
61
62extern void system_reset_iSeries(void);
63extern void machine_check_iSeries(void);
64extern void data_access_iSeries(void);
65extern void instruction_access_iSeries(void);
66extern void hardware_interrupt_iSeries(void);
67extern void alignment_iSeries(void);
68extern void program_check_iSeries(void);
69extern void fp_unavailable_iSeries(void);
70extern void decrementer_iSeries(void);
71extern void trap_0a_iSeries(void);
72extern void trap_0b_iSeries(void);
73extern void system_call_iSeries(void);
74extern void single_step_iSeries(void);
75extern void trap_0e_iSeries(void);
76extern void performance_monitor_iSeries(void);
77extern void data_access_slb_iSeries(void);
78extern void instruction_access_slb_iSeries(void);
79
80struct ItLpNaca itLpNaca = {
81 .xDesc = 0xd397d581, /* "LpNa" ebcdic */
82 .xSize = 0x0400, /* size of ItLpNaca */
83 .xIntHdlrOffset = 0x0300, /* offset to int array */
84 .xMaxIntHdlrEntries = 19, /* # ents */
85 .xPrimaryLpIndex = 0, /* Part # of primary */
86 .xServiceLpIndex = 0, /* Part # of serv */
87 .xLpIndex = 0, /* Part # of me */
88 .xMaxLpQueues = 0, /* # of LP queues */
89 .xLpQueueOffset = 0x100, /* offset of start of LP queues */
90 .xPirEnvironMode = 0, /* Piranha stuff */
91 .xPirConsoleMode = 0,
92 .xPirDasdMode = 0,
93 .xLparInstalled = 0,
94 .xSysPartitioned = 0,
95 .xHwSyncedTBs = 0,
96 .xIntProcUtilHmt = 0,
97 .xSpVpdFormat = 0,
98 .xIntProcRatio = 0,
99 .xPlicVrmIndex = 0, /* VRM index of PLIC */
100 .xMinSupportedSlicVrmInd = 0, /* min supported SLIC */
101 .xMinCompatableSlicVrmInd = 0, /* min compat SLIC */
102 .xLoadAreaAddr = 0, /* 64-bit addr of load area */
103 .xLoadAreaChunks = 0, /* chunks for load area */
104 .xPaseSysCallCRMask = 0, /* PASE mask */
105 .xSlicSegmentTablePtr = 0, /* seg table */
106 .xOldLpQueue = { 0 }, /* Old LP Queue */
107 .xInterruptHdlr = {
108 (u64)system_reset_iSeries, /* 0x100 System Reset */
109 (u64)machine_check_iSeries, /* 0x200 Machine Check */
110 (u64)data_access_iSeries, /* 0x300 Data Access */
111 (u64)instruction_access_iSeries, /* 0x400 Instruction Access */
112 (u64)hardware_interrupt_iSeries, /* 0x500 External */
113 (u64)alignment_iSeries, /* 0x600 Alignment */
114 (u64)program_check_iSeries, /* 0x700 Program Check */
115 (u64)fp_unavailable_iSeries, /* 0x800 FP Unavailable */
116 (u64)decrementer_iSeries, /* 0x900 Decrementer */
117 (u64)trap_0a_iSeries, /* 0xa00 Trap 0A */
118 (u64)trap_0b_iSeries, /* 0xb00 Trap 0B */
119 (u64)system_call_iSeries, /* 0xc00 System Call */
120 (u64)single_step_iSeries, /* 0xd00 Single Step */
121 (u64)trap_0e_iSeries, /* 0xe00 Trap 0E */
122 (u64)performance_monitor_iSeries,/* 0xf00 Performance Monitor */
123 0, /* int 0x1000 */
124 0, /* int 0x1010 */
125 0, /* int 0x1020 CPU ctls */
126 (u64)hardware_interrupt_iSeries, /* SC Ret Hdlr */
127 (u64)data_access_slb_iSeries, /* 0x380 D-SLB */
128 (u64)instruction_access_slb_iSeries /* 0x480 I-SLB */
129 }
130};
131EXPORT_SYMBOL(itLpNaca);
132
133/* May be filled in by the hypervisor so cannot end up in the BSS */
134struct ItIplParmsReal xItIplParmsReal __attribute__((__section__(".data")));
135
136/* May be filled in by the hypervisor so cannot end up in the BSS */
137struct ItExtVpdPanel xItExtVpdPanel __attribute__((__section__(".data")));
138EXPORT_SYMBOL(xItExtVpdPanel);
139
140#define maxPhysicalProcessors 32
141
142struct IoHriProcessorVpd xIoHriProcessorVpd[maxPhysicalProcessors] = {
143 {
144 .xInstCacheOperandSize = 32,
145 .xDataCacheOperandSize = 32,
146 .xProcFreq = 50000000,
147 .xTimeBaseFreq = 50000000,
148 .xPVR = 0x3600
149 }
150};
151
152/* Space for Main Store Vpd 27,200 bytes */
153/* May be filled in by the hypervisor so cannot end up in the BSS */
154u64 xMsVpd[3400] __attribute__((__section__(".data")));
155
156/* Space for Recovery Log Buffer */
157/* May be filled in by the hypervisor so cannot end up in the BSS */
158u64 xRecoveryLogBuffer[32] __attribute__((__section__(".data")));
159
160struct SpCommArea xSpCommArea = {
161 .xDesc = 0xE2D7C3C2,
162 .xFormat = 1,
163};
164
165/* The LparMap data is now located at offset 0x6000 in head.S
166 * It was put there so that the HvReleaseData could address it
167 * with a 32-bit offset as required by the iSeries hypervisor
168 *
169 * The Naca has a pointer to the ItVpdAreas. The hypervisor finds
170 * the Naca via the HvReleaseData area. The HvReleaseData has the
171 * offset into the Naca of the pointer to the ItVpdAreas.
172 */
173struct ItVpdAreas itVpdAreas = {
174 .xSlicDesc = 0xc9a3e5c1, /* "ItVA" */
175 .xSlicSize = sizeof(struct ItVpdAreas),
176 .xSlicVpdEntries = ItVpdMaxEntries, /* # VPD array entries */
177 .xSlicDmaEntries = ItDmaMaxEntries, /* # DMA array entries */
178 .xSlicMaxLogicalProcs = NR_CPUS * 2, /* Max logical procs */
179 .xSlicMaxPhysicalProcs = maxPhysicalProcessors, /* Max physical procs */
180 .xSlicDmaToksOffset = offsetof(struct ItVpdAreas, xPlicDmaToks),
181 .xSlicVpdAdrsOffset = offsetof(struct ItVpdAreas, xSlicVpdAdrs),
182 .xSlicDmaLensOffset = offsetof(struct ItVpdAreas, xPlicDmaLens),
183 .xSlicVpdLensOffset = offsetof(struct ItVpdAreas, xSlicVpdLens),
184 .xSlicMaxSlotLabels = 0, /* max slot labels */
185 .xSlicMaxLpQueues = 1, /* max LP queues */
186 .xPlicDmaLens = { 0 }, /* DMA lengths */
187 .xPlicDmaToks = { 0 }, /* DMA tokens */
188 .xSlicVpdLens = { /* VPD lengths */
189 0,0,0, /* 0 - 2 */
190 sizeof(xItExtVpdPanel), /* 3 Extended VPD */
191 sizeof(struct paca_struct), /* 4 length of Paca */
192 0, /* 5 */
193 sizeof(struct ItIplParmsReal),/* 6 length of IPL parms */
194 26992, /* 7 length of MS VPD */
195 0, /* 8 */
196 sizeof(struct ItLpNaca),/* 9 length of LP Naca */
197 0, /* 10 */
198 256, /* 11 length of Recovery Log Buf */
199 sizeof(struct SpCommArea), /* 12 length of SP Comm Area */
200 0,0,0, /* 13 - 15 */
201 sizeof(struct IoHriProcessorVpd),/* 16 length of Proc Vpd */
202 0,0,0,0,0,0, /* 17 - 22 */
203 sizeof(struct ItLpQueue),/* 23 length of Lp Queue */
204 0,0 /* 24 - 25 */
205 },
206 .xSlicVpdAdrs = { /* VPD addresses */
207 0,0,0, /* 0 - 2 */
208 &xItExtVpdPanel, /* 3 Extended VPD */
209 &paca[0], /* 4 first Paca */
210 0, /* 5 */
211 &xItIplParmsReal, /* 6 IPL parms */
212 &xMsVpd, /* 7 MS Vpd */
213 0, /* 8 */
214 &itLpNaca, /* 9 LpNaca */
215 0, /* 10 */
216 &xRecoveryLogBuffer, /* 11 Recovery Log Buffer */
217 &xSpCommArea, /* 12 SP Comm Area */
218 0,0,0, /* 13 - 15 */
219 &xIoHriProcessorVpd, /* 16 Proc Vpd */
220 0,0,0,0,0,0, /* 17 - 22 */
221 &xItLpQueue, /* 23 Lp Queue */
222 0,0
223 }
224};
225
226struct msChunks msChunks;
227EXPORT_SYMBOL(msChunks);
228
229/* Depending on whether this is called from iSeries or pSeries setup
230 * code, the location of the msChunks struct may or may not have
231 * to be reloc'd, so we force the caller to do that for us by passing
232 * in a pointer to the structure.
233 */
234unsigned long
235msChunks_alloc(unsigned long mem, unsigned long num_chunks, unsigned long chunk_size)
236{
237 unsigned long offset = reloc_offset();
238 struct msChunks *_msChunks = PTRRELOC(&msChunks);
239
240 _msChunks->num_chunks = num_chunks;
241 _msChunks->chunk_size = chunk_size;
242 _msChunks->chunk_shift = __ilog2(chunk_size);
243 _msChunks->chunk_mask = (1UL<<_msChunks->chunk_shift)-1;
244
245 mem = _ALIGN(mem, sizeof(msChunks_entry));
246 _msChunks->abs = (msChunks_entry *)(mem + offset);
247 mem += num_chunks * sizeof(msChunks_entry);
248
249 return mem;
250}
diff --git a/arch/ppc64/kernel/Makefile b/arch/ppc64/kernel/Makefile
new file mode 100644
index 000000000000..96d90b0c5119
--- /dev/null
+++ b/arch/ppc64/kernel/Makefile
@@ -0,0 +1,68 @@
1#
2# Makefile for the linux ppc64 kernel.
3#
4
5EXTRA_CFLAGS += -mno-minimal-toc
6extra-y := head.o vmlinux.lds
7
8obj-y := setup.o entry.o traps.o irq.o idle.o dma.o \
9 time.o process.o signal.o syscalls.o misc.o ptrace.o \
10 align.o semaphore.o bitops.o pacaData.o \
11 udbg.o binfmt_elf32.o sys_ppc32.o ioctl32.o \
12 ptrace32.o signal32.o rtc.o init_task.o \
13 lmb.o cputable.o cpu_setup_power4.o idle_power4.o \
14 iommu.o sysfs.o vdso.o pmc.o
15obj-y += vdso32/ vdso64/
16
17obj-$(CONFIG_PPC_OF) += of_device.o
18
19pci-obj-$(CONFIG_PPC_ISERIES) += iSeries_pci.o iSeries_pci_reset.o
20pci-obj-$(CONFIG_PPC_MULTIPLATFORM) += pci_dn.o pci_direct_iommu.o
21
22obj-$(CONFIG_PCI) += pci.o pci_iommu.o iomap.o $(pci-obj-y)
23
24obj-$(CONFIG_PPC_ISERIES) += iSeries_irq.o \
25 iSeries_VpdInfo.o XmPciLpEvent.o \
26 HvCall.o HvLpConfig.o LparData.o \
27 iSeries_setup.o ItLpQueue.o hvCall.o \
28 mf.o HvLpEvent.o iSeries_proc.o iSeries_htab.o \
29 iSeries_iommu.o
30
31obj-$(CONFIG_PPC_MULTIPLATFORM) += nvram.o i8259.o prom_init.o prom.o mpic.o
32
33obj-$(CONFIG_PPC_PSERIES) += pSeries_pci.o pSeries_lpar.o pSeries_hvCall.o \
34 pSeries_nvram.o rtasd.o ras.o pSeries_reconfig.o \
35 xics.o rtas.o pSeries_setup.o pSeries_iommu.o
36
37obj-$(CONFIG_EEH) += eeh.o
38obj-$(CONFIG_PROC_FS) += proc_ppc64.o
39obj-$(CONFIG_RTAS_FLASH) += rtas_flash.o
40obj-$(CONFIG_SMP) += smp.o
41obj-$(CONFIG_MODULES) += module.o ppc_ksyms.o
42obj-$(CONFIG_RTAS_PROC) += rtas-proc.o
43obj-$(CONFIG_SCANLOG) += scanlog.o
44obj-$(CONFIG_VIOPATH) += viopath.o
45obj-$(CONFIG_LPARCFG) += lparcfg.o
46obj-$(CONFIG_HVC_CONSOLE) += hvconsole.o
47obj-$(CONFIG_BOOTX_TEXT) += btext.o
48obj-$(CONFIG_HVCS) += hvcserver.o
49obj-$(CONFIG_IBMVIO) += vio.o
50
51obj-$(CONFIG_PPC_PMAC) += pmac_setup.o pmac_feature.o pmac_pci.o \
52 pmac_time.o pmac_nvram.o pmac_low_i2c.o
53
54obj-$(CONFIG_PPC_MAPLE) += maple_setup.o maple_pci.o maple_time.o
55
56obj-$(CONFIG_U3_DART) += u3_iommu.o
57
58ifdef CONFIG_SMP
59obj-$(CONFIG_PPC_PMAC) += pmac_smp.o smp-tbsync.o
60obj-$(CONFIG_PPC_ISERIES) += iSeries_smp.o
61obj-$(CONFIG_PPC_PSERIES) += pSeries_smp.o
62obj-$(CONFIG_PPC_MAPLE) += smp-tbsync.o
63endif
64
65obj-$(CONFIG_ALTIVEC) += vecemu.o vector.o
66obj-$(CONFIG_KPROBES) += kprobes.o
67
68CFLAGS_ioctl32.o += -Ifs/
diff --git a/arch/ppc64/kernel/XmPciLpEvent.c b/arch/ppc64/kernel/XmPciLpEvent.c
new file mode 100644
index 000000000000..809c9bc6678b
--- /dev/null
+++ b/arch/ppc64/kernel/XmPciLpEvent.c
@@ -0,0 +1,190 @@
1/*
2 * File XmPciLpEvent.h created by Wayne Holm on Mon Jan 15 2001.
3 *
4 * This module handles PCI interrupt events sent by the iSeries Hypervisor.
5*/
6
7#include <linux/config.h>
8#include <linux/pci.h>
9#include <linux/init.h>
10#include <linux/threads.h>
11#include <linux/smp.h>
12#include <linux/param.h>
13#include <linux/string.h>
14#include <linux/bootmem.h>
15#include <linux/ide.h>
16
17#include <asm/iSeries/HvTypes.h>
18#include <asm/iSeries/HvLpEvent.h>
19#include <asm/iSeries/HvCallPci.h>
20#include <asm/iSeries/XmPciLpEvent.h>
21#include <asm/ppcdebug.h>
22
23static long Pci_Interrupt_Count;
24static long Pci_Event_Count;
25
26enum XmPciLpEvent_Subtype {
27 XmPciLpEvent_BusCreated = 0, // PHB has been created
28 XmPciLpEvent_BusError = 1, // PHB has failed
29 XmPciLpEvent_BusFailed = 2, // Msg to Secondary, Primary failed bus
30 XmPciLpEvent_NodeFailed = 4, // Multi-adapter bridge has failed
31 XmPciLpEvent_NodeRecovered = 5, // Multi-adapter bridge has recovered
32 XmPciLpEvent_BusRecovered = 12, // PHB has been recovered
33 XmPciLpEvent_UnQuiesceBus = 18, // Secondary bus unqiescing
34 XmPciLpEvent_BridgeError = 21, // Bridge Error
35 XmPciLpEvent_SlotInterrupt = 22 // Slot interrupt
36};
37
38struct XmPciLpEvent_BusInterrupt {
39 HvBusNumber busNumber;
40 HvSubBusNumber subBusNumber;
41};
42
43struct XmPciLpEvent_NodeInterrupt {
44 HvBusNumber busNumber;
45 HvSubBusNumber subBusNumber;
46 HvAgentId deviceId;
47};
48
49struct XmPciLpEvent {
50 struct HvLpEvent hvLpEvent;
51
52 union {
53 u64 alignData; // Align on an 8-byte boundary
54
55 struct {
56 u32 fisr;
57 HvBusNumber busNumber;
58 HvSubBusNumber subBusNumber;
59 HvAgentId deviceId;
60 } slotInterrupt;
61
62 struct XmPciLpEvent_BusInterrupt busFailed;
63 struct XmPciLpEvent_BusInterrupt busRecovered;
64 struct XmPciLpEvent_BusInterrupt busCreated;
65
66 struct XmPciLpEvent_NodeInterrupt nodeFailed;
67 struct XmPciLpEvent_NodeInterrupt nodeRecovered;
68
69 } eventData;
70
71};
72
73static void intReceived(struct XmPciLpEvent *eventParm,
74 struct pt_regs *regsParm);
75
76static void XmPciLpEvent_handler(struct HvLpEvent *eventParm,
77 struct pt_regs *regsParm)
78{
79#ifdef CONFIG_PCI
80#if 0
81 PPCDBG(PPCDBG_BUSWALK, "XmPciLpEvent_handler, type 0x%x\n",
82 eventParm->xType);
83#endif
84 ++Pci_Event_Count;
85
86 if (eventParm && (eventParm->xType == HvLpEvent_Type_PciIo)) {
87 switch (eventParm->xFlags.xFunction) {
88 case HvLpEvent_Function_Int:
89 intReceived((struct XmPciLpEvent *)eventParm, regsParm);
90 break;
91 case HvLpEvent_Function_Ack:
92 printk(KERN_ERR
93 "XmPciLpEvent.c: unexpected ack received\n");
94 break;
95 default:
96 printk(KERN_ERR
97 "XmPciLpEvent.c: unexpected event function %d\n",
98 (int)eventParm->xFlags.xFunction);
99 break;
100 }
101 } else if (eventParm)
102 printk(KERN_ERR
103 "XmPciLpEvent.c: Unrecognized PCI event type 0x%x\n",
104 (int)eventParm->xType);
105 else
106 printk(KERN_ERR "XmPciLpEvent.c: NULL event received\n");
107#endif
108}
109
110static void intReceived(struct XmPciLpEvent *eventParm,
111 struct pt_regs *regsParm)
112{
113 int irq;
114
115 ++Pci_Interrupt_Count;
116#if 0
117 PPCDBG(PPCDBG_BUSWALK, "PCI: XmPciLpEvent.c: intReceived\n");
118#endif
119
120 switch (eventParm->hvLpEvent.xSubtype) {
121 case XmPciLpEvent_SlotInterrupt:
122 irq = eventParm->hvLpEvent.xCorrelationToken;
123 /* Dispatch the interrupt handlers for this irq */
124 ppc_irq_dispatch_handler(regsParm, irq);
125 HvCallPci_eoi(eventParm->eventData.slotInterrupt.busNumber,
126 eventParm->eventData.slotInterrupt.subBusNumber,
127 eventParm->eventData.slotInterrupt.deviceId);
128 break;
129 /* Ignore error recovery events for now */
130 case XmPciLpEvent_BusCreated:
131 printk(KERN_INFO "XmPciLpEvent.c: system bus %d created\n",
132 eventParm->eventData.busCreated.busNumber);
133 break;
134 case XmPciLpEvent_BusError:
135 case XmPciLpEvent_BusFailed:
136 printk(KERN_INFO "XmPciLpEvent.c: system bus %d failed\n",
137 eventParm->eventData.busFailed.busNumber);
138 break;
139 case XmPciLpEvent_BusRecovered:
140 case XmPciLpEvent_UnQuiesceBus:
141 printk(KERN_INFO "XmPciLpEvent.c: system bus %d recovered\n",
142 eventParm->eventData.busRecovered.busNumber);
143 break;
144 case XmPciLpEvent_NodeFailed:
145 case XmPciLpEvent_BridgeError:
146 printk(KERN_INFO
147 "XmPciLpEvent.c: multi-adapter bridge %d/%d/%d failed\n",
148 eventParm->eventData.nodeFailed.busNumber,
149 eventParm->eventData.nodeFailed.subBusNumber,
150 eventParm->eventData.nodeFailed.deviceId);
151 break;
152 case XmPciLpEvent_NodeRecovered:
153 printk(KERN_INFO
154 "XmPciLpEvent.c: multi-adapter bridge %d/%d/%d recovered\n",
155 eventParm->eventData.nodeRecovered.busNumber,
156 eventParm->eventData.nodeRecovered.subBusNumber,
157 eventParm->eventData.nodeRecovered.deviceId);
158 break;
159 default:
160 printk(KERN_ERR
161 "XmPciLpEvent.c: unrecognized event subtype 0x%x\n",
162 eventParm->hvLpEvent.xSubtype);
163 break;
164 }
165}
166
167
168/* This should be called sometime prior to buswalk (init_IRQ would be good) */
169int XmPciLpEvent_init()
170{
171 int xRc;
172
173 PPCDBG(PPCDBG_BUSWALK,
174 "XmPciLpEvent_init, Register Event type 0x%04X\n",
175 HvLpEvent_Type_PciIo);
176
177 xRc = HvLpEvent_registerHandler(HvLpEvent_Type_PciIo,
178 &XmPciLpEvent_handler);
179 if (xRc == 0) {
180 xRc = HvLpEvent_openPath(HvLpEvent_Type_PciIo, 0);
181 if (xRc != 0)
182 printk(KERN_ERR
183 "XmPciLpEvent.c: open event path failed with rc 0x%x\n",
184 xRc);
185 } else
186 printk(KERN_ERR
187 "XmPciLpEvent.c: register handler failed with rc 0x%x\n",
188 xRc);
189 return xRc;
190}
diff --git a/arch/ppc64/kernel/align.c b/arch/ppc64/kernel/align.c
new file mode 100644
index 000000000000..330e7ef81427
--- /dev/null
+++ b/arch/ppc64/kernel/align.c
@@ -0,0 +1,396 @@
1/* align.c - handle alignment exceptions for the Power PC.
2 *
3 * Copyright (c) 1996 Paul Mackerras <paulus@cs.anu.edu.au>
4 * Copyright (c) 1998-1999 TiVo, Inc.
5 * PowerPC 403GCX modifications.
6 * Copyright (c) 1999 Grant Erickson <grant@lcse.umn.edu>
7 * PowerPC 403GCX/405GP modifications.
8 * Copyright (c) 2001-2002 PPC64 team, IBM Corp
9 * 64-bit and Power4 support
10 *
11 * This program is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU General Public License
13 * as published by the Free Software Foundation; either version
14 * 2 of the License, or (at your option) any later version.
15 */
16
17#include <linux/kernel.h>
18#include <linux/mm.h>
19#include <asm/processor.h>
20#include <asm/uaccess.h>
21#include <asm/system.h>
22#include <asm/cache.h>
23#include <asm/cputable.h>
24
25struct aligninfo {
26 unsigned char len;
27 unsigned char flags;
28};
29
30#define IS_XFORM(inst) (((inst) >> 26) == 31)
31#define IS_DSFORM(inst) (((inst) >> 26) >= 56)
32
33#define INVALID { 0, 0 }
34
35#define LD 1 /* load */
36#define ST 2 /* store */
37#define SE 4 /* sign-extend value */
38#define F 8 /* to/from fp regs */
39#define U 0x10 /* update index register */
40#define M 0x20 /* multiple load/store */
41#define SW 0x40 /* byte swap */
42
43#define DCBZ 0x5f /* 8xx/82xx dcbz faults when cache not enabled */
44
45/*
46 * The PowerPC stores certain bits of the instruction that caused the
47 * alignment exception in the DSISR register. This array maps those
48 * bits to information about the operand length and what the
49 * instruction would do.
50 */
51static struct aligninfo aligninfo[128] = {
52 { 4, LD }, /* 00 0 0000: lwz / lwarx */
53 INVALID, /* 00 0 0001 */
54 { 4, ST }, /* 00 0 0010: stw */
55 INVALID, /* 00 0 0011 */
56 { 2, LD }, /* 00 0 0100: lhz */
57 { 2, LD+SE }, /* 00 0 0101: lha */
58 { 2, ST }, /* 00 0 0110: sth */
59 { 4, LD+M }, /* 00 0 0111: lmw */
60 { 4, LD+F }, /* 00 0 1000: lfs */
61 { 8, LD+F }, /* 00 0 1001: lfd */
62 { 4, ST+F }, /* 00 0 1010: stfs */
63 { 8, ST+F }, /* 00 0 1011: stfd */
64 INVALID, /* 00 0 1100 */
65 { 8, LD }, /* 00 0 1101: ld */
66 INVALID, /* 00 0 1110 */
67 { 8, ST }, /* 00 0 1111: std */
68 { 4, LD+U }, /* 00 1 0000: lwzu */
69 INVALID, /* 00 1 0001 */
70 { 4, ST+U }, /* 00 1 0010: stwu */
71 INVALID, /* 00 1 0011 */
72 { 2, LD+U }, /* 00 1 0100: lhzu */
73 { 2, LD+SE+U }, /* 00 1 0101: lhau */
74 { 2, ST+U }, /* 00 1 0110: sthu */
75 { 4, ST+M }, /* 00 1 0111: stmw */
76 { 4, LD+F+U }, /* 00 1 1000: lfsu */
77 { 8, LD+F+U }, /* 00 1 1001: lfdu */
78 { 4, ST+F+U }, /* 00 1 1010: stfsu */
79 { 8, ST+F+U }, /* 00 1 1011: stfdu */
80 INVALID, /* 00 1 1100 */
81 INVALID, /* 00 1 1101 */
82 INVALID, /* 00 1 1110 */
83 INVALID, /* 00 1 1111 */
84 { 8, LD }, /* 01 0 0000: ldx */
85 INVALID, /* 01 0 0001 */
86 { 8, ST }, /* 01 0 0010: stdx */
87 INVALID, /* 01 0 0011 */
88 INVALID, /* 01 0 0100 */
89 { 4, LD+SE }, /* 01 0 0101: lwax */
90 INVALID, /* 01 0 0110 */
91 INVALID, /* 01 0 0111 */
92 { 0, LD }, /* 01 0 1000: lswx */
93 { 0, LD }, /* 01 0 1001: lswi */
94 { 0, ST }, /* 01 0 1010: stswx */
95 { 0, ST }, /* 01 0 1011: stswi */
96 INVALID, /* 01 0 1100 */
97 { 8, LD+U }, /* 01 0 1101: ldu */
98 INVALID, /* 01 0 1110 */
99 { 8, ST+U }, /* 01 0 1111: stdu */
100 { 8, LD+U }, /* 01 1 0000: ldux */
101 INVALID, /* 01 1 0001 */
102 { 8, ST+U }, /* 01 1 0010: stdux */
103 INVALID, /* 01 1 0011 */
104 INVALID, /* 01 1 0100 */
105 { 4, LD+SE+U }, /* 01 1 0101: lwaux */
106 INVALID, /* 01 1 0110 */
107 INVALID, /* 01 1 0111 */
108 INVALID, /* 01 1 1000 */
109 INVALID, /* 01 1 1001 */
110 INVALID, /* 01 1 1010 */
111 INVALID, /* 01 1 1011 */
112 INVALID, /* 01 1 1100 */
113 INVALID, /* 01 1 1101 */
114 INVALID, /* 01 1 1110 */
115 INVALID, /* 01 1 1111 */
116 INVALID, /* 10 0 0000 */
117 INVALID, /* 10 0 0001 */
118 { 0, ST }, /* 10 0 0010: stwcx. */
119 INVALID, /* 10 0 0011 */
120 INVALID, /* 10 0 0100 */
121 INVALID, /* 10 0 0101 */
122 INVALID, /* 10 0 0110 */
123 INVALID, /* 10 0 0111 */
124 { 4, LD+SW }, /* 10 0 1000: lwbrx */
125 INVALID, /* 10 0 1001 */
126 { 4, ST+SW }, /* 10 0 1010: stwbrx */
127 INVALID, /* 10 0 1011 */
128 { 2, LD+SW }, /* 10 0 1100: lhbrx */
129 { 4, LD+SE }, /* 10 0 1101 lwa */
130 { 2, ST+SW }, /* 10 0 1110: sthbrx */
131 INVALID, /* 10 0 1111 */
132 INVALID, /* 10 1 0000 */
133 INVALID, /* 10 1 0001 */
134 INVALID, /* 10 1 0010 */
135 INVALID, /* 10 1 0011 */
136 INVALID, /* 10 1 0100 */
137 INVALID, /* 10 1 0101 */
138 INVALID, /* 10 1 0110 */
139 INVALID, /* 10 1 0111 */
140 INVALID, /* 10 1 1000 */
141 INVALID, /* 10 1 1001 */
142 INVALID, /* 10 1 1010 */
143 INVALID, /* 10 1 1011 */
144 INVALID, /* 10 1 1100 */
145 INVALID, /* 10 1 1101 */
146 INVALID, /* 10 1 1110 */
147 { L1_CACHE_BYTES, ST }, /* 10 1 1111: dcbz */
148 { 4, LD }, /* 11 0 0000: lwzx */
149 INVALID, /* 11 0 0001 */
150 { 4, ST }, /* 11 0 0010: stwx */
151 INVALID, /* 11 0 0011 */
152 { 2, LD }, /* 11 0 0100: lhzx */
153 { 2, LD+SE }, /* 11 0 0101: lhax */
154 { 2, ST }, /* 11 0 0110: sthx */
155 INVALID, /* 11 0 0111 */
156 { 4, LD+F }, /* 11 0 1000: lfsx */
157 { 8, LD+F }, /* 11 0 1001: lfdx */
158 { 4, ST+F }, /* 11 0 1010: stfsx */
159 { 8, ST+F }, /* 11 0 1011: stfdx */
160 INVALID, /* 11 0 1100 */
161 { 8, LD+M }, /* 11 0 1101: lmd */
162 INVALID, /* 11 0 1110 */
163 { 8, ST+M }, /* 11 0 1111: stmd */
164 { 4, LD+U }, /* 11 1 0000: lwzux */
165 INVALID, /* 11 1 0001 */
166 { 4, ST+U }, /* 11 1 0010: stwux */
167 INVALID, /* 11 1 0011 */
168 { 2, LD+U }, /* 11 1 0100: lhzux */
169 { 2, LD+SE+U }, /* 11 1 0101: lhaux */
170 { 2, ST+U }, /* 11 1 0110: sthux */
171 INVALID, /* 11 1 0111 */
172 { 4, LD+F+U }, /* 11 1 1000: lfsux */
173 { 8, LD+F+U }, /* 11 1 1001: lfdux */
174 { 4, ST+F+U }, /* 11 1 1010: stfsux */
175 { 8, ST+F+U }, /* 11 1 1011: stfdux */
176 INVALID, /* 11 1 1100 */
177 INVALID, /* 11 1 1101 */
178 INVALID, /* 11 1 1110 */
179 INVALID, /* 11 1 1111 */
180};
181
182#define SWAP(a, b) (t = (a), (a) = (b), (b) = t)
183
184static inline unsigned make_dsisr(unsigned instr)
185{
186 unsigned dsisr;
187
188 /* create a DSISR value from the instruction */
189 dsisr = (instr & 0x03ff0000) >> 16; /* bits 6:15 --> 22:31 */
190
191 if ( IS_XFORM(instr) ) {
192 dsisr |= (instr & 0x00000006) << 14; /* bits 29:30 --> 15:16 */
193 dsisr |= (instr & 0x00000040) << 8; /* bit 25 --> 17 */
194 dsisr |= (instr & 0x00000780) << 3; /* bits 21:24 --> 18:21 */
195 }
196 else {
197 dsisr |= (instr & 0x04000000) >> 12; /* bit 5 --> 17 */
198 dsisr |= (instr & 0x78000000) >> 17; /* bits 1: 4 --> 18:21 */
199 if ( IS_DSFORM(instr) ) {
200 dsisr |= (instr & 0x00000003) << 18; /* bits 30:31 --> 12:13 */
201 }
202 }
203
204 return dsisr;
205}
206
207int
208fix_alignment(struct pt_regs *regs)
209{
210 unsigned int instr, nb, flags;
211 int t;
212 unsigned long reg, areg;
213 unsigned long i;
214 int ret;
215 unsigned dsisr;
216 unsigned char __user *addr;
217 unsigned char __user *p;
218 unsigned long __user *lp;
219 union {
220 long ll;
221 double dd;
222 unsigned char v[8];
223 struct {
224 unsigned hi32;
225 int low32;
226 } x32;
227 struct {
228 unsigned char hi48[6];
229 short low16;
230 } x16;
231 } data;
232
233 /*
234 * Return 1 on success
235 * Return 0 if unable to handle the interrupt
236 * Return -EFAULT if data address is bad
237 */
238
239 dsisr = regs->dsisr;
240
241 if (cpu_has_feature(CPU_FTR_NODSISRALIGN)) {
242 unsigned int real_instr;
243 if (__get_user(real_instr, (unsigned int __user *)regs->nip))
244 return 0;
245 dsisr = make_dsisr(real_instr);
246 }
247
248 /* extract the operation and registers from the dsisr */
249 reg = (dsisr >> 5) & 0x1f; /* source/dest register */
250 areg = dsisr & 0x1f; /* register to update */
251 instr = (dsisr >> 10) & 0x7f;
252 instr |= (dsisr >> 13) & 0x60;
253
254 /* Lookup the operation in our table */
255 nb = aligninfo[instr].len;
256 flags = aligninfo[instr].flags;
257
258 /* DAR has the operand effective address */
259 addr = (unsigned char __user *)regs->dar;
260
261 /* A size of 0 indicates an instruction we don't support */
262 /* we also don't support the multiples (lmw, stmw, lmd, stmd) */
263 if ((nb == 0) || (flags & M))
264 return 0; /* too hard or invalid instruction */
265
266 /*
267 * Special handling for dcbz
268 * dcbz may give an alignment exception for accesses to caching inhibited
269 * storage
270 */
271 if (instr == DCBZ)
272 addr = (unsigned char __user *) ((unsigned long)addr & -L1_CACHE_BYTES);
273
274 /* Verify the address of the operand */
275 if (user_mode(regs)) {
276 if (!access_ok((flags & ST? VERIFY_WRITE: VERIFY_READ), addr, nb))
277 return -EFAULT; /* bad address */
278 }
279
280 /* Force the fprs into the save area so we can reference them */
281 if (flags & F) {
282 if (!user_mode(regs))
283 return 0;
284 flush_fp_to_thread(current);
285 }
286
287 /* If we are loading, get the data from user space */
288 if (flags & LD) {
289 data.ll = 0;
290 ret = 0;
291 p = addr;
292 switch (nb) {
293 case 8:
294 ret |= __get_user(data.v[0], p++);
295 ret |= __get_user(data.v[1], p++);
296 ret |= __get_user(data.v[2], p++);
297 ret |= __get_user(data.v[3], p++);
298 case 4:
299 ret |= __get_user(data.v[4], p++);
300 ret |= __get_user(data.v[5], p++);
301 case 2:
302 ret |= __get_user(data.v[6], p++);
303 ret |= __get_user(data.v[7], p++);
304 if (ret)
305 return -EFAULT;
306 }
307 }
308
309 /* If we are storing, get the data from the saved gpr or fpr */
310 if (flags & ST) {
311 if (flags & F) {
312 if (nb == 4) {
313 /* Doing stfs, have to convert to single */
314 preempt_disable();
315 enable_kernel_fp();
316 cvt_df(&current->thread.fpr[reg], (float *)&data.v[4], &current->thread.fpscr);
317 disable_kernel_fp();
318 preempt_enable();
319 }
320 else
321 data.dd = current->thread.fpr[reg];
322 }
323 else
324 data.ll = regs->gpr[reg];
325 }
326
327 /* Swap bytes as needed */
328 if (flags & SW) {
329 if (nb == 2)
330 SWAP(data.v[6], data.v[7]);
331 else { /* nb must be 4 */
332 SWAP(data.v[4], data.v[7]);
333 SWAP(data.v[5], data.v[6]);
334 }
335 }
336
337 /* Sign extend as needed */
338 if (flags & SE) {
339 if ( nb == 2 )
340 data.ll = data.x16.low16;
341 else /* nb must be 4 */
342 data.ll = data.x32.low32;
343 }
344
345 /* If we are loading, move the data to the gpr or fpr */
346 if (flags & LD) {
347 if (flags & F) {
348 if (nb == 4) {
349 /* Doing lfs, have to convert to double */
350 preempt_disable();
351 enable_kernel_fp();
352 cvt_fd((float *)&data.v[4], &current->thread.fpr[reg], &current->thread.fpscr);
353 disable_kernel_fp();
354 preempt_enable();
355 }
356 else
357 current->thread.fpr[reg] = data.dd;
358 }
359 else
360 regs->gpr[reg] = data.ll;
361 }
362
363 /* If we are storing, copy the data to the user */
364 if (flags & ST) {
365 ret = 0;
366 p = addr;
367 switch (nb) {
368 case 128: /* Special case - must be dcbz */
369 lp = (unsigned long __user *)p;
370 for (i = 0; i < L1_CACHE_BYTES / sizeof(long); ++i)
371 ret |= __put_user(0, lp++);
372 break;
373 case 8:
374 ret |= __put_user(data.v[0], p++);
375 ret |= __put_user(data.v[1], p++);
376 ret |= __put_user(data.v[2], p++);
377 ret |= __put_user(data.v[3], p++);
378 case 4:
379 ret |= __put_user(data.v[4], p++);
380 ret |= __put_user(data.v[5], p++);
381 case 2:
382 ret |= __put_user(data.v[6], p++);
383 ret |= __put_user(data.v[7], p++);
384 }
385 if (ret)
386 return -EFAULT;
387 }
388
389 /* Update RA as needed */
390 if (flags & U) {
391 regs->gpr[areg] = regs->dar;
392 }
393
394 return 1;
395}
396
diff --git a/arch/ppc64/kernel/asm-offsets.c b/arch/ppc64/kernel/asm-offsets.c
new file mode 100644
index 000000000000..0094ac79a18c
--- /dev/null
+++ b/arch/ppc64/kernel/asm-offsets.c
@@ -0,0 +1,193 @@
1/*
2 * This program is used to generate definitions needed by
3 * assembly language modules.
4 *
5 * We use the technique used in the OSF Mach kernel code:
6 * generate asm statements containing #defines,
7 * compile this file to assembler, and then extract the
8 * #defines from the assembly-language output.
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16#include <linux/config.h>
17#include <linux/signal.h>
18#include <linux/sched.h>
19#include <linux/kernel.h>
20#include <linux/errno.h>
21#include <linux/string.h>
22#include <linux/types.h>
23#include <linux/mman.h>
24#include <linux/mm.h>
25#include <linux/time.h>
26#include <linux/hardirq.h>
27#include <asm/io.h>
28#include <asm/page.h>
29#include <asm/pgtable.h>
30#include <asm/processor.h>
31
32#include <asm/paca.h>
33#include <asm/lppaca.h>
34#include <asm/iSeries/ItLpQueue.h>
35#include <asm/iSeries/HvLpEvent.h>
36#include <asm/rtas.h>
37#include <asm/cputable.h>
38#include <asm/cache.h>
39#include <asm/systemcfg.h>
40#include <asm/compat.h>
41
42#define DEFINE(sym, val) \
43 asm volatile("\n->" #sym " %0 " #val : : "i" (val))
44
45#define BLANK() asm volatile("\n->" : : )
46
47int main(void)
48{
49 /* thread struct on stack */
50 DEFINE(THREAD_SHIFT, THREAD_SHIFT);
51 DEFINE(THREAD_SIZE, THREAD_SIZE);
52 DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
53 DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count));
54 DEFINE(TI_SC_NOERR, offsetof(struct thread_info, syscall_noerror));
55
56 /* task_struct->thread */
57 DEFINE(THREAD, offsetof(struct task_struct, thread));
58 DEFINE(PT_REGS, offsetof(struct thread_struct, regs));
59 DEFINE(THREAD_FPEXC_MODE, offsetof(struct thread_struct, fpexc_mode));
60 DEFINE(THREAD_FPR0, offsetof(struct thread_struct, fpr[0]));
61 DEFINE(THREAD_FPSCR, offsetof(struct thread_struct, fpscr));
62 DEFINE(KSP, offsetof(struct thread_struct, ksp));
63 DEFINE(KSP_VSID, offsetof(struct thread_struct, ksp_vsid));
64
65#ifdef CONFIG_ALTIVEC
66 DEFINE(THREAD_VR0, offsetof(struct thread_struct, vr[0]));
67 DEFINE(THREAD_VRSAVE, offsetof(struct thread_struct, vrsave));
68 DEFINE(THREAD_VSCR, offsetof(struct thread_struct, vscr));
69 DEFINE(THREAD_USED_VR, offsetof(struct thread_struct, used_vr));
70#endif /* CONFIG_ALTIVEC */
71 DEFINE(MM, offsetof(struct task_struct, mm));
72
73 DEFINE(DCACHEL1LINESIZE, offsetof(struct ppc64_caches, dline_size));
74 DEFINE(DCACHEL1LOGLINESIZE, offsetof(struct ppc64_caches, log_dline_size));
75 DEFINE(DCACHEL1LINESPERPAGE, offsetof(struct ppc64_caches, dlines_per_page));
76 DEFINE(ICACHEL1LINESIZE, offsetof(struct ppc64_caches, iline_size));
77 DEFINE(ICACHEL1LOGLINESIZE, offsetof(struct ppc64_caches, log_iline_size));
78 DEFINE(ICACHEL1LINESPERPAGE, offsetof(struct ppc64_caches, ilines_per_page));
79 DEFINE(PLATFORM, offsetof(struct systemcfg, platform));
80
81 /* paca */
82 DEFINE(PACA_SIZE, sizeof(struct paca_struct));
83 DEFINE(PACAPACAINDEX, offsetof(struct paca_struct, paca_index));
84 DEFINE(PACAPROCSTART, offsetof(struct paca_struct, cpu_start));
85 DEFINE(PACAKSAVE, offsetof(struct paca_struct, kstack));
86 DEFINE(PACACURRENT, offsetof(struct paca_struct, __current));
87 DEFINE(PACASAVEDMSR, offsetof(struct paca_struct, saved_msr));
88 DEFINE(PACASTABREAL, offsetof(struct paca_struct, stab_real));
89 DEFINE(PACASTABVIRT, offsetof(struct paca_struct, stab_addr));
90 DEFINE(PACASTABRR, offsetof(struct paca_struct, stab_rr));
91 DEFINE(PACAR1, offsetof(struct paca_struct, saved_r1));
92 DEFINE(PACATOC, offsetof(struct paca_struct, kernel_toc));
93 DEFINE(PACAPROCENABLED, offsetof(struct paca_struct, proc_enabled));
94 DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache));
95 DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr));
96 DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id));
97#ifdef CONFIG_HUGETLB_PAGE
98 DEFINE(PACAHTLBSEGS, offsetof(struct paca_struct, context.htlb_segs));
99#endif /* CONFIG_HUGETLB_PAGE */
100 DEFINE(PACADEFAULTDECR, offsetof(struct paca_struct, default_decr));
101 DEFINE(PACA_EXGEN, offsetof(struct paca_struct, exgen));
102 DEFINE(PACA_EXMC, offsetof(struct paca_struct, exmc));
103 DEFINE(PACA_EXSLB, offsetof(struct paca_struct, exslb));
104 DEFINE(PACA_EXDSI, offsetof(struct paca_struct, exdsi));
105 DEFINE(PACAEMERGSP, offsetof(struct paca_struct, emergency_sp));
106 DEFINE(PACALPPACA, offsetof(struct paca_struct, lppaca));
107 DEFINE(PACAHWCPUID, offsetof(struct paca_struct, hw_cpu_id));
108 DEFINE(LPPACASRR0, offsetof(struct lppaca, saved_srr0));
109 DEFINE(LPPACASRR1, offsetof(struct lppaca, saved_srr1));
110 DEFINE(LPPACAANYINT, offsetof(struct lppaca, int_dword.any_int));
111 DEFINE(LPPACADECRINT, offsetof(struct lppaca, int_dword.fields.decr_int));
112
113 /* RTAS */
114 DEFINE(RTASBASE, offsetof(struct rtas_t, base));
115 DEFINE(RTASENTRY, offsetof(struct rtas_t, entry));
116
117 /* Interrupt register frame */
118 DEFINE(STACK_FRAME_OVERHEAD, STACK_FRAME_OVERHEAD);
119
120 DEFINE(SWITCH_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs));
121
122 /* 288 = # of volatile regs, int & fp, for leaf routines */
123 /* which do not stack a frame. See the PPC64 ABI. */
124 DEFINE(INT_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 288);
125 /* Create extra stack space for SRR0 and SRR1 when calling prom/rtas. */
126 DEFINE(PROM_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16);
127 DEFINE(RTAS_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16);
128 DEFINE(GPR0, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[0]));
129 DEFINE(GPR1, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[1]));
130 DEFINE(GPR2, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[2]));
131 DEFINE(GPR3, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[3]));
132 DEFINE(GPR4, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[4]));
133 DEFINE(GPR5, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[5]));
134 DEFINE(GPR6, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[6]));
135 DEFINE(GPR7, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[7]));
136 DEFINE(GPR8, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[8]));
137 DEFINE(GPR9, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[9]));
138 DEFINE(GPR10, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[10]));
139 DEFINE(GPR11, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[11]));
140 DEFINE(GPR12, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[12]));
141 DEFINE(GPR13, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[13]));
142 /*
143 * Note: these symbols include _ because they overlap with special
144 * register names
145 */
146 DEFINE(_NIP, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, nip));
147 DEFINE(_MSR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, msr));
148 DEFINE(_CTR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, ctr));
149 DEFINE(_LINK, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, link));
150 DEFINE(_CCR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, ccr));
151 DEFINE(_XER, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, xer));
152 DEFINE(_DAR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, dar));
153 DEFINE(_DSISR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, dsisr));
154 DEFINE(ORIG_GPR3, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, orig_gpr3));
155 DEFINE(RESULT, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, result));
156 DEFINE(_TRAP, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, trap));
157 DEFINE(SOFTE, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, softe));
158
159 /* These _only_ to be used with {PROM,RTAS}_FRAME_SIZE!!! */
160 DEFINE(_SRR0, STACK_FRAME_OVERHEAD+sizeof(struct pt_regs));
161 DEFINE(_SRR1, STACK_FRAME_OVERHEAD+sizeof(struct pt_regs)+8);
162
163 DEFINE(CLONE_VM, CLONE_VM);
164 DEFINE(CLONE_UNTRACED, CLONE_UNTRACED);
165
166 /* About the CPU features table */
167 DEFINE(CPU_SPEC_ENTRY_SIZE, sizeof(struct cpu_spec));
168 DEFINE(CPU_SPEC_PVR_MASK, offsetof(struct cpu_spec, pvr_mask));
169 DEFINE(CPU_SPEC_PVR_VALUE, offsetof(struct cpu_spec, pvr_value));
170 DEFINE(CPU_SPEC_FEATURES, offsetof(struct cpu_spec, cpu_features));
171 DEFINE(CPU_SPEC_SETUP, offsetof(struct cpu_spec, cpu_setup));
172
173 /* systemcfg offsets for use by vdso */
174 DEFINE(CFG_TB_ORIG_STAMP, offsetof(struct systemcfg, tb_orig_stamp));
175 DEFINE(CFG_TB_TICKS_PER_SEC, offsetof(struct systemcfg, tb_ticks_per_sec));
176 DEFINE(CFG_TB_TO_XS, offsetof(struct systemcfg, tb_to_xs));
177 DEFINE(CFG_STAMP_XSEC, offsetof(struct systemcfg, stamp_xsec));
178 DEFINE(CFG_TB_UPDATE_COUNT, offsetof(struct systemcfg, tb_update_count));
179 DEFINE(CFG_TZ_MINUTEWEST, offsetof(struct systemcfg, tz_minuteswest));
180 DEFINE(CFG_TZ_DSTTIME, offsetof(struct systemcfg, tz_dsttime));
181 DEFINE(CFG_SYSCALL_MAP32, offsetof(struct systemcfg, syscall_map_32));
182 DEFINE(CFG_SYSCALL_MAP64, offsetof(struct systemcfg, syscall_map_64));
183
184 /* timeval/timezone offsets for use by vdso */
185 DEFINE(TVAL64_TV_SEC, offsetof(struct timeval, tv_sec));
186 DEFINE(TVAL64_TV_USEC, offsetof(struct timeval, tv_usec));
187 DEFINE(TVAL32_TV_SEC, offsetof(struct compat_timeval, tv_sec));
188 DEFINE(TVAL32_TV_USEC, offsetof(struct compat_timeval, tv_usec));
189 DEFINE(TZONE_TZ_MINWEST, offsetof(struct timezone, tz_minuteswest));
190 DEFINE(TZONE_TZ_DSTTIME, offsetof(struct timezone, tz_dsttime));
191
192 return 0;
193}
diff --git a/arch/ppc64/kernel/binfmt_elf32.c b/arch/ppc64/kernel/binfmt_elf32.c
new file mode 100644
index 000000000000..fadc699a0497
--- /dev/null
+++ b/arch/ppc64/kernel/binfmt_elf32.c
@@ -0,0 +1,78 @@
1/*
2 * binfmt_elf32.c: Support 32-bit PPC ELF binaries on Power3 and followons.
3 * based on the SPARC64 version.
4 * Copyright (C) 1995, 1996, 1997, 1998 David S. Miller (davem@redhat.com)
5 * Copyright (C) 1995, 1996, 1997, 1998 Jakub Jelinek (jj@ultra.linux.cz)
6 *
7 * Copyright (C) 2000,2001 Ken Aaker (kdaaker@rchland.vnet.ibm.com), IBM Corp
8 * Copyright (C) 2001 Anton Blanchard (anton@au.ibm.com), IBM
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16#define ELF_ARCH EM_PPC
17#define ELF_CLASS ELFCLASS32
18#define ELF_DATA ELFDATA2MSB;
19
20#include <asm/processor.h>
21#include <linux/module.h>
22#include <linux/config.h>
23#include <linux/elfcore.h>
24#include <linux/compat.h>
25
26#define elf_prstatus elf_prstatus32
27struct elf_prstatus32
28{
29 struct elf_siginfo pr_info; /* Info associated with signal */
30 short pr_cursig; /* Current signal */
31 unsigned int pr_sigpend; /* Set of pending signals */
32 unsigned int pr_sighold; /* Set of held signals */
33 pid_t pr_pid;
34 pid_t pr_ppid;
35 pid_t pr_pgrp;
36 pid_t pr_sid;
37 struct compat_timeval pr_utime; /* User time */
38 struct compat_timeval pr_stime; /* System time */
39 struct compat_timeval pr_cutime; /* Cumulative user time */
40 struct compat_timeval pr_cstime; /* Cumulative system time */
41 elf_gregset_t pr_reg; /* General purpose registers. */
42 int pr_fpvalid; /* True if math co-processor being used. */
43};
44
45#define elf_prpsinfo elf_prpsinfo32
46struct elf_prpsinfo32
47{
48 char pr_state; /* numeric process state */
49 char pr_sname; /* char for pr_state */
50 char pr_zomb; /* zombie */
51 char pr_nice; /* nice val */
52 unsigned int pr_flag; /* flags */
53 u32 pr_uid;
54 u32 pr_gid;
55 pid_t pr_pid, pr_ppid, pr_pgrp, pr_sid;
56 /* Lots missing */
57 char pr_fname[16]; /* filename of executable */
58 char pr_psargs[ELF_PRARGSZ]; /* initial part of arg list */
59};
60
61#include <linux/time.h>
62
63#undef cputime_to_timeval
64#define cputime_to_timeval cputime_to_compat_timeval
65static __inline__ void
66cputime_to_compat_timeval(const cputime_t cputime, struct compat_timeval *value)
67{
68 unsigned long jiffies = cputime_to_jiffies(cputime);
69 value->tv_usec = (jiffies % HZ) * (1000000L / HZ);
70 value->tv_sec = jiffies / HZ;
71}
72
73extern void start_thread32(struct pt_regs *, unsigned long, unsigned long);
74#undef start_thread
75#define start_thread start_thread32
76#define init_elf_binfmt init_elf32_binfmt
77
78#include "../../../fs/binfmt_elf.c"
diff --git a/arch/ppc64/kernel/bitops.c b/arch/ppc64/kernel/bitops.c
new file mode 100644
index 000000000000..ae329e8b4acb
--- /dev/null
+++ b/arch/ppc64/kernel/bitops.c
@@ -0,0 +1,147 @@
1/*
2 * These are too big to be inlined.
3 */
4
5#include <linux/kernel.h>
6#include <linux/module.h>
7#include <linux/bitops.h>
8#include <asm/byteorder.h>
9
10unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
11 unsigned long offset)
12{
13 const unsigned long *p = addr + (offset >> 6);
14 unsigned long result = offset & ~63UL;
15 unsigned long tmp;
16
17 if (offset >= size)
18 return size;
19 size -= result;
20 offset &= 63UL;
21 if (offset) {
22 tmp = *(p++);
23 tmp |= ~0UL >> (64 - offset);
24 if (size < 64)
25 goto found_first;
26 if (~tmp)
27 goto found_middle;
28 size -= 64;
29 result += 64;
30 }
31 while (size & ~63UL) {
32 if (~(tmp = *(p++)))
33 goto found_middle;
34 result += 64;
35 size -= 64;
36 }
37 if (!size)
38 return result;
39 tmp = *p;
40
41found_first:
42 tmp |= ~0UL << size;
43 if (tmp == ~0UL) /* Are any bits zero? */
44 return result + size; /* Nope. */
45found_middle:
46 return result + ffz(tmp);
47}
48
49EXPORT_SYMBOL(find_next_zero_bit);
50
51unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
52 unsigned long offset)
53{
54 const unsigned long *p = addr + (offset >> 6);
55 unsigned long result = offset & ~63UL;
56 unsigned long tmp;
57
58 if (offset >= size)
59 return size;
60 size -= result;
61 offset &= 63UL;
62 if (offset) {
63 tmp = *(p++);
64 tmp &= (~0UL << offset);
65 if (size < 64)
66 goto found_first;
67 if (tmp)
68 goto found_middle;
69 size -= 64;
70 result += 64;
71 }
72 while (size & ~63UL) {
73 if ((tmp = *(p++)))
74 goto found_middle;
75 result += 64;
76 size -= 64;
77 }
78 if (!size)
79 return result;
80 tmp = *p;
81
82found_first:
83 tmp &= (~0UL >> (64 - size));
84 if (tmp == 0UL) /* Are any bits set? */
85 return result + size; /* Nope. */
86found_middle:
87 return result + __ffs(tmp);
88}
89
90EXPORT_SYMBOL(find_next_bit);
91
92static inline unsigned int ext2_ilog2(unsigned int x)
93{
94 int lz;
95
96 asm("cntlzw %0,%1": "=r"(lz):"r"(x));
97 return 31 - lz;
98}
99
100static inline unsigned int ext2_ffz(unsigned int x)
101{
102 u32 rc;
103 if ((x = ~x) == 0)
104 return 32;
105 rc = ext2_ilog2(x & -x);
106 return rc;
107}
108
109unsigned long find_next_zero_le_bit(const unsigned long *addr, unsigned long size,
110 unsigned long offset)
111{
112 const unsigned int *p = ((const unsigned int *)addr) + (offset >> 5);
113 unsigned int result = offset & ~31;
114 unsigned int tmp;
115
116 if (offset >= size)
117 return size;
118 size -= result;
119 offset &= 31;
120 if (offset) {
121 tmp = cpu_to_le32p(p++);
122 tmp |= ~0U >> (32 - offset); /* bug or feature ? */
123 if (size < 32)
124 goto found_first;
125 if (tmp != ~0)
126 goto found_middle;
127 size -= 32;
128 result += 32;
129 }
130 while (size >= 32) {
131 if ((tmp = cpu_to_le32p(p++)) != ~0)
132 goto found_middle;
133 result += 32;
134 size -= 32;
135 }
136 if (!size)
137 return result;
138 tmp = cpu_to_le32p(p);
139found_first:
140 tmp |= ~0 << size;
141 if (tmp == ~0) /* Are any bits zero? */
142 return result + size; /* Nope. */
143found_middle:
144 return result + ext2_ffz(tmp);
145}
146
147EXPORT_SYMBOL(find_next_zero_le_bit);
diff --git a/arch/ppc64/kernel/btext.c b/arch/ppc64/kernel/btext.c
new file mode 100644
index 000000000000..c53f079e9b77
--- /dev/null
+++ b/arch/ppc64/kernel/btext.c
@@ -0,0 +1,751 @@
1/*
2 * Procedures for drawing on the screen early on in the boot process.
3 *
4 * Benjamin Herrenschmidt <benh@kernel.crashing.org>
5 */
6#include <linux/config.h>
7#include <linux/kernel.h>
8#include <linux/string.h>
9#include <linux/init.h>
10#include <linux/version.h>
11
12#include <asm/sections.h>
13#include <asm/prom.h>
14#include <asm/btext.h>
15#include <asm/prom.h>
16#include <asm/page.h>
17#include <asm/mmu.h>
18#include <asm/pgtable.h>
19#include <asm/io.h>
20#include <asm/lmb.h>
21#include <asm/processor.h>
22
23#undef NO_SCROLL
24
25#ifndef NO_SCROLL
26static void scrollscreen(void);
27#endif
28
29static void draw_byte(unsigned char c, long locX, long locY);
30static void draw_byte_32(unsigned char *bits, unsigned int *base, int rb);
31static void draw_byte_16(unsigned char *bits, unsigned int *base, int rb);
32static void draw_byte_8(unsigned char *bits, unsigned int *base, int rb);
33
34static int g_loc_X;
35static int g_loc_Y;
36static int g_max_loc_X;
37static int g_max_loc_Y;
38
39static int dispDeviceRowBytes;
40static int dispDeviceDepth;
41static int dispDeviceRect[4];
42static unsigned char *dispDeviceBase, *logicalDisplayBase;
43
44unsigned long disp_BAT[2] __initdata = {0, 0};
45
46#define cmapsz (16*256)
47
48static unsigned char vga_font[cmapsz];
49
50int boot_text_mapped;
51int force_printk_to_btext = 0;
52
53
54/* Here's a small text engine to use during early boot
55 * or for debugging purposes
56 *
57 * todo:
58 *
59 * - build some kind of vgacon with it to enable early printk
60 * - move to a separate file
61 * - add a few video driver hooks to keep in sync with display
62 * changes.
63 */
64
65void map_boot_text(void)
66{
67 unsigned long base, offset, size;
68 unsigned char *vbase;
69
70 /* By default, we are no longer mapped */
71 boot_text_mapped = 0;
72 if (dispDeviceBase == 0)
73 return;
74 base = ((unsigned long) dispDeviceBase) & 0xFFFFF000UL;
75 offset = ((unsigned long) dispDeviceBase) - base;
76 size = dispDeviceRowBytes * dispDeviceRect[3] + offset
77 + dispDeviceRect[0];
78 vbase = __ioremap(base, size, _PAGE_NO_CACHE);
79 if (vbase == 0)
80 return;
81 logicalDisplayBase = vbase + offset;
82 boot_text_mapped = 1;
83}
84
85int btext_initialize(struct device_node *np)
86{
87 unsigned int width, height, depth, pitch;
88 unsigned long address = 0;
89 u32 *prop;
90
91 prop = (u32 *)get_property(np, "width", NULL);
92 if (prop == NULL)
93 return -EINVAL;
94 width = *prop;
95 prop = (u32 *)get_property(np, "height", NULL);
96 if (prop == NULL)
97 return -EINVAL;
98 height = *prop;
99 prop = (u32 *)get_property(np, "depth", NULL);
100 if (prop == NULL)
101 return -EINVAL;
102 depth = *prop;
103 pitch = width * ((depth + 7) / 8);
104 prop = (u32 *)get_property(np, "linebytes", NULL);
105 if (prop)
106 pitch = *prop;
107 if (pitch == 1)
108 pitch = 0x1000;
109 prop = (u32 *)get_property(np, "address", NULL);
110 if (prop)
111 address = *prop;
112
113 /* FIXME: Add support for PCI reg properties */
114
115 if (address == 0)
116 return -EINVAL;
117
118 g_loc_X = 0;
119 g_loc_Y = 0;
120 g_max_loc_X = width / 8;
121 g_max_loc_Y = height / 16;
122 logicalDisplayBase = (unsigned char *)address;
123 dispDeviceBase = (unsigned char *)address;
124 dispDeviceRowBytes = pitch;
125 dispDeviceDepth = depth;
126 dispDeviceRect[0] = dispDeviceRect[1] = 0;
127 dispDeviceRect[2] = width;
128 dispDeviceRect[3] = height;
129
130 map_boot_text();
131
132 return 0;
133}
134
135
136/* Calc the base address of a given point (x,y) */
137static unsigned char * calc_base(int x, int y)
138{
139 unsigned char *base;
140
141 base = logicalDisplayBase;
142 if (base == 0)
143 base = dispDeviceBase;
144 base += (x + dispDeviceRect[0]) * (dispDeviceDepth >> 3);
145 base += (y + dispDeviceRect[1]) * dispDeviceRowBytes;
146 return base;
147}
148
149/* Adjust the display to a new resolution */
150void btext_update_display(unsigned long phys, int width, int height,
151 int depth, int pitch)
152{
153 if (dispDeviceBase == 0)
154 return;
155
156 /* check it's the same frame buffer (within 256MB) */
157 if ((phys ^ (unsigned long)dispDeviceBase) & 0xf0000000)
158 return;
159
160 dispDeviceBase = (__u8 *) phys;
161 dispDeviceRect[0] = 0;
162 dispDeviceRect[1] = 0;
163 dispDeviceRect[2] = width;
164 dispDeviceRect[3] = height;
165 dispDeviceDepth = depth;
166 dispDeviceRowBytes = pitch;
167 if (boot_text_mapped) {
168 iounmap(logicalDisplayBase);
169 boot_text_mapped = 0;
170 }
171 map_boot_text();
172 g_loc_X = 0;
173 g_loc_Y = 0;
174 g_max_loc_X = width / 8;
175 g_max_loc_Y = height / 16;
176}
177
178void btext_clearscreen(void)
179{
180 unsigned long *base = (unsigned long *)calc_base(0, 0);
181 unsigned long width = ((dispDeviceRect[2] - dispDeviceRect[0]) *
182 (dispDeviceDepth >> 3)) >> 3;
183 int i,j;
184
185 for (i=0; i<(dispDeviceRect[3] - dispDeviceRect[1]); i++)
186 {
187 unsigned long *ptr = base;
188 for(j=width; j; --j)
189 *(ptr++) = 0;
190 base += (dispDeviceRowBytes >> 3);
191 }
192}
193
194#ifndef NO_SCROLL
195static void scrollscreen(void)
196{
197 unsigned long *src = (unsigned long *)calc_base(0,16);
198 unsigned long *dst = (unsigned long *)calc_base(0,0);
199 unsigned long width = ((dispDeviceRect[2] - dispDeviceRect[0]) *
200 (dispDeviceDepth >> 3)) >> 3;
201 int i,j;
202
203 for (i=0; i<(dispDeviceRect[3] - dispDeviceRect[1] - 16); i++)
204 {
205 unsigned long *src_ptr = src;
206 unsigned long *dst_ptr = dst;
207 for(j=width; j; --j)
208 *(dst_ptr++) = *(src_ptr++);
209 src += (dispDeviceRowBytes >> 3);
210 dst += (dispDeviceRowBytes >> 3);
211 }
212 for (i=0; i<16; i++)
213 {
214 unsigned long *dst_ptr = dst;
215 for(j=width; j; --j)
216 *(dst_ptr++) = 0;
217 dst += (dispDeviceRowBytes >> 3);
218 }
219}
220#endif /* ndef NO_SCROLL */
221
222void btext_drawchar(char c)
223{
224 int cline = 0;
225#ifdef NO_SCROLL
226 int x;
227#endif
228 if (!boot_text_mapped)
229 return;
230
231 switch (c) {
232 case '\b':
233 if (g_loc_X > 0)
234 --g_loc_X;
235 break;
236 case '\t':
237 g_loc_X = (g_loc_X & -8) + 8;
238 break;
239 case '\r':
240 g_loc_X = 0;
241 break;
242 case '\n':
243 g_loc_X = 0;
244 g_loc_Y++;
245 cline = 1;
246 break;
247 default:
248 draw_byte(c, g_loc_X++, g_loc_Y);
249 }
250 if (g_loc_X >= g_max_loc_X) {
251 g_loc_X = 0;
252 g_loc_Y++;
253 cline = 1;
254 }
255#ifndef NO_SCROLL
256 while (g_loc_Y >= g_max_loc_Y) {
257 scrollscreen();
258 g_loc_Y--;
259 }
260#else
261 /* wrap around from bottom to top of screen so we don't
262 waste time scrolling each line. -- paulus. */
263 if (g_loc_Y >= g_max_loc_Y)
264 g_loc_Y = 0;
265 if (cline) {
266 for (x = 0; x < g_max_loc_X; ++x)
267 draw_byte(' ', x, g_loc_Y);
268 }
269#endif
270}
271
272void btext_drawstring(const char *c)
273{
274 if (!boot_text_mapped)
275 return;
276 while (*c)
277 btext_drawchar(*c++);
278}
279
280void btext_drawhex(unsigned long v)
281{
282 char *hex_table = "0123456789abcdef";
283
284 if (!boot_text_mapped)
285 return;
286 btext_drawchar(hex_table[(v >> 60) & 0x0000000FUL]);
287 btext_drawchar(hex_table[(v >> 56) & 0x0000000FUL]);
288 btext_drawchar(hex_table[(v >> 52) & 0x0000000FUL]);
289 btext_drawchar(hex_table[(v >> 48) & 0x0000000FUL]);
290 btext_drawchar(hex_table[(v >> 44) & 0x0000000FUL]);
291 btext_drawchar(hex_table[(v >> 40) & 0x0000000FUL]);
292 btext_drawchar(hex_table[(v >> 36) & 0x0000000FUL]);
293 btext_drawchar(hex_table[(v >> 32) & 0x0000000FUL]);
294 btext_drawchar(hex_table[(v >> 28) & 0x0000000FUL]);
295 btext_drawchar(hex_table[(v >> 24) & 0x0000000FUL]);
296 btext_drawchar(hex_table[(v >> 20) & 0x0000000FUL]);
297 btext_drawchar(hex_table[(v >> 16) & 0x0000000FUL]);
298 btext_drawchar(hex_table[(v >> 12) & 0x0000000FUL]);
299 btext_drawchar(hex_table[(v >> 8) & 0x0000000FUL]);
300 btext_drawchar(hex_table[(v >> 4) & 0x0000000FUL]);
301 btext_drawchar(hex_table[(v >> 0) & 0x0000000FUL]);
302 btext_drawchar(' ');
303}
304
305static void draw_byte(unsigned char c, long locX, long locY)
306{
307 unsigned char *base = calc_base(locX << 3, locY << 4);
308 unsigned char *font = &vga_font[((unsigned int)c) * 16];
309 int rb = dispDeviceRowBytes;
310
311 switch(dispDeviceDepth) {
312 case 24:
313 case 32:
314 draw_byte_32(font, (unsigned int *)base, rb);
315 break;
316 case 15:
317 case 16:
318 draw_byte_16(font, (unsigned int *)base, rb);
319 break;
320 case 8:
321 draw_byte_8(font, (unsigned int *)base, rb);
322 break;
323 }
324}
325
326static unsigned int expand_bits_8[16] = {
327 0x00000000,
328 0x000000ff,
329 0x0000ff00,
330 0x0000ffff,
331 0x00ff0000,
332 0x00ff00ff,
333 0x00ffff00,
334 0x00ffffff,
335 0xff000000,
336 0xff0000ff,
337 0xff00ff00,
338 0xff00ffff,
339 0xffff0000,
340 0xffff00ff,
341 0xffffff00,
342 0xffffffff
343};
344
345static unsigned int expand_bits_16[4] = {
346 0x00000000,
347 0x0000ffff,
348 0xffff0000,
349 0xffffffff
350};
351
352
353static void draw_byte_32(unsigned char *font, unsigned int *base, int rb)
354{
355 int l, bits;
356 int fg = 0xFFFFFFFFUL;
357 int bg = 0x00000000UL;
358
359 for (l = 0; l < 16; ++l)
360 {
361 bits = *font++;
362 base[0] = (-(bits >> 7) & fg) ^ bg;
363 base[1] = (-((bits >> 6) & 1) & fg) ^ bg;
364 base[2] = (-((bits >> 5) & 1) & fg) ^ bg;
365 base[3] = (-((bits >> 4) & 1) & fg) ^ bg;
366 base[4] = (-((bits >> 3) & 1) & fg) ^ bg;
367 base[5] = (-((bits >> 2) & 1) & fg) ^ bg;
368 base[6] = (-((bits >> 1) & 1) & fg) ^ bg;
369 base[7] = (-(bits & 1) & fg) ^ bg;
370 base = (unsigned int *) ((char *)base + rb);
371 }
372}
373
374static void draw_byte_16(unsigned char *font, unsigned int *base, int rb)
375{
376 int l, bits;
377 int fg = 0xFFFFFFFFUL;
378 int bg = 0x00000000UL;
379 unsigned int *eb = (int *)expand_bits_16;
380
381 for (l = 0; l < 16; ++l)
382 {
383 bits = *font++;
384 base[0] = (eb[bits >> 6] & fg) ^ bg;
385 base[1] = (eb[(bits >> 4) & 3] & fg) ^ bg;
386 base[2] = (eb[(bits >> 2) & 3] & fg) ^ bg;
387 base[3] = (eb[bits & 3] & fg) ^ bg;
388 base = (unsigned int *) ((char *)base + rb);
389 }
390}
391
392static void draw_byte_8(unsigned char *font, unsigned int *base, int rb)
393{
394 int l, bits;
395 int fg = 0x0F0F0F0FUL;
396 int bg = 0x00000000UL;
397 unsigned int *eb = (int *)expand_bits_8;
398
399 for (l = 0; l < 16; ++l)
400 {
401 bits = *font++;
402 base[0] = (eb[bits >> 4] & fg) ^ bg;
403 base[1] = (eb[bits & 0xf] & fg) ^ bg;
404 base = (unsigned int *) ((char *)base + rb);
405 }
406}
407
408static unsigned char vga_font[cmapsz] = {
4090x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4100x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0x81, 0xa5, 0x81, 0x81, 0xbd,
4110x99, 0x81, 0x81, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0xff,
4120xdb, 0xff, 0xff, 0xc3, 0xe7, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x00, 0x00,
4130x00, 0x00, 0x00, 0x00, 0x6c, 0xfe, 0xfe, 0xfe, 0xfe, 0x7c, 0x38, 0x10,
4140x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x7c, 0xfe,
4150x7c, 0x38, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18,
4160x3c, 0x3c, 0xe7, 0xe7, 0xe7, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00,
4170x00, 0x00, 0x00, 0x18, 0x3c, 0x7e, 0xff, 0xff, 0x7e, 0x18, 0x18, 0x3c,
4180x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x3c,
4190x3c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
4200xff, 0xff, 0xe7, 0xc3, 0xc3, 0xe7, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
4210x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x66, 0x42, 0x42, 0x66, 0x3c, 0x00,
4220x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc3, 0x99, 0xbd,
4230xbd, 0x99, 0xc3, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x1e, 0x0e,
4240x1a, 0x32, 0x78, 0xcc, 0xcc, 0xcc, 0xcc, 0x78, 0x00, 0x00, 0x00, 0x00,
4250x00, 0x00, 0x3c, 0x66, 0x66, 0x66, 0x66, 0x3c, 0x18, 0x7e, 0x18, 0x18,
4260x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3f, 0x33, 0x3f, 0x30, 0x30, 0x30,
4270x30, 0x70, 0xf0, 0xe0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0x63,
4280x7f, 0x63, 0x63, 0x63, 0x63, 0x67, 0xe7, 0xe6, 0xc0, 0x00, 0x00, 0x00,
4290x00, 0x00, 0x00, 0x18, 0x18, 0xdb, 0x3c, 0xe7, 0x3c, 0xdb, 0x18, 0x18,
4300x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfe, 0xf8,
4310xf0, 0xe0, 0xc0, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x06, 0x0e,
4320x1e, 0x3e, 0xfe, 0x3e, 0x1e, 0x0e, 0x06, 0x02, 0x00, 0x00, 0x00, 0x00,
4330x00, 0x00, 0x18, 0x3c, 0x7e, 0x18, 0x18, 0x18, 0x7e, 0x3c, 0x18, 0x00,
4340x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66,
4350x66, 0x00, 0x66, 0x66, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0xdb,
4360xdb, 0xdb, 0x7b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x00, 0x00, 0x00, 0x00,
4370x00, 0x7c, 0xc6, 0x60, 0x38, 0x6c, 0xc6, 0xc6, 0x6c, 0x38, 0x0c, 0xc6,
4380x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4390xfe, 0xfe, 0xfe, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x3c,
4400x7e, 0x18, 0x18, 0x18, 0x7e, 0x3c, 0x18, 0x7e, 0x00, 0x00, 0x00, 0x00,
4410x00, 0x00, 0x18, 0x3c, 0x7e, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
4420x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
4430x18, 0x7e, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4440x00, 0x18, 0x0c, 0xfe, 0x0c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4450x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x60, 0xfe, 0x60, 0x30, 0x00, 0x00,
4460x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0xc0,
4470xc0, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4480x00, 0x24, 0x66, 0xff, 0x66, 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4490x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x38, 0x7c, 0x7c, 0xfe, 0xfe, 0x00,
4500x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xfe, 0x7c, 0x7c,
4510x38, 0x38, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4520x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4530x00, 0x00, 0x18, 0x3c, 0x3c, 0x3c, 0x18, 0x18, 0x18, 0x00, 0x18, 0x18,
4540x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x66, 0x66, 0x24, 0x00, 0x00, 0x00,
4550x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x6c,
4560x6c, 0xfe, 0x6c, 0x6c, 0x6c, 0xfe, 0x6c, 0x6c, 0x00, 0x00, 0x00, 0x00,
4570x18, 0x18, 0x7c, 0xc6, 0xc2, 0xc0, 0x7c, 0x06, 0x06, 0x86, 0xc6, 0x7c,
4580x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc2, 0xc6, 0x0c, 0x18,
4590x30, 0x60, 0xc6, 0x86, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c,
4600x6c, 0x38, 0x76, 0xdc, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00,
4610x00, 0x30, 0x30, 0x30, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4620x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x18, 0x30, 0x30, 0x30, 0x30,
4630x30, 0x30, 0x18, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x18,
4640x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x18, 0x30, 0x00, 0x00, 0x00, 0x00,
4650x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x3c, 0xff, 0x3c, 0x66, 0x00, 0x00,
4660x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x7e,
4670x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4680x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x30, 0x00, 0x00, 0x00,
4690x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0x00, 0x00, 0x00, 0x00,
4700x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4710x00, 0x00, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4720x02, 0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0, 0x80, 0x00, 0x00, 0x00, 0x00,
4730x00, 0x00, 0x7c, 0xc6, 0xc6, 0xce, 0xde, 0xf6, 0xe6, 0xc6, 0xc6, 0x7c,
4740x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x38, 0x78, 0x18, 0x18, 0x18,
4750x18, 0x18, 0x18, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6,
4760x06, 0x0c, 0x18, 0x30, 0x60, 0xc0, 0xc6, 0xfe, 0x00, 0x00, 0x00, 0x00,
4770x00, 0x00, 0x7c, 0xc6, 0x06, 0x06, 0x3c, 0x06, 0x06, 0x06, 0xc6, 0x7c,
4780x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x1c, 0x3c, 0x6c, 0xcc, 0xfe,
4790x0c, 0x0c, 0x0c, 0x1e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xc0,
4800xc0, 0xc0, 0xfc, 0x06, 0x06, 0x06, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
4810x00, 0x00, 0x38, 0x60, 0xc0, 0xc0, 0xfc, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c,
4820x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xc6, 0x06, 0x06, 0x0c, 0x18,
4830x30, 0x30, 0x30, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6,
4840xc6, 0xc6, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
4850x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0x7e, 0x06, 0x06, 0x06, 0x0c, 0x78,
4860x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x00, 0x00,
4870x00, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4880x18, 0x18, 0x00, 0x00, 0x00, 0x18, 0x18, 0x30, 0x00, 0x00, 0x00, 0x00,
4890x00, 0x00, 0x00, 0x06, 0x0c, 0x18, 0x30, 0x60, 0x30, 0x18, 0x0c, 0x06,
4900x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0x00, 0x00,
4910x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60,
4920x30, 0x18, 0x0c, 0x06, 0x0c, 0x18, 0x30, 0x60, 0x00, 0x00, 0x00, 0x00,
4930x00, 0x00, 0x7c, 0xc6, 0xc6, 0x0c, 0x18, 0x18, 0x18, 0x00, 0x18, 0x18,
4940x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xde, 0xde,
4950xde, 0xdc, 0xc0, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38,
4960x6c, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00,
4970x00, 0x00, 0xfc, 0x66, 0x66, 0x66, 0x7c, 0x66, 0x66, 0x66, 0x66, 0xfc,
4980x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x66, 0xc2, 0xc0, 0xc0, 0xc0,
4990xc0, 0xc2, 0x66, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf8, 0x6c,
5000x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x6c, 0xf8, 0x00, 0x00, 0x00, 0x00,
5010x00, 0x00, 0xfe, 0x66, 0x62, 0x68, 0x78, 0x68, 0x60, 0x62, 0x66, 0xfe,
5020x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0x66, 0x62, 0x68, 0x78, 0x68,
5030x60, 0x60, 0x60, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x66,
5040xc2, 0xc0, 0xc0, 0xde, 0xc6, 0xc6, 0x66, 0x3a, 0x00, 0x00, 0x00, 0x00,
5050x00, 0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6,
5060x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x18, 0x18, 0x18, 0x18, 0x18,
5070x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1e, 0x0c,
5080x0c, 0x0c, 0x0c, 0x0c, 0xcc, 0xcc, 0xcc, 0x78, 0x00, 0x00, 0x00, 0x00,
5090x00, 0x00, 0xe6, 0x66, 0x66, 0x6c, 0x78, 0x78, 0x6c, 0x66, 0x66, 0xe6,
5100x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x60, 0x60, 0x60, 0x60, 0x60,
5110x60, 0x62, 0x66, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xe7,
5120xff, 0xff, 0xdb, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0x00, 0x00, 0x00, 0x00,
5130x00, 0x00, 0xc6, 0xe6, 0xf6, 0xfe, 0xde, 0xce, 0xc6, 0xc6, 0xc6, 0xc6,
5140x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6,
5150xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfc, 0x66,
5160x66, 0x66, 0x7c, 0x60, 0x60, 0x60, 0x60, 0xf0, 0x00, 0x00, 0x00, 0x00,
5170x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xd6, 0xde, 0x7c,
5180x0c, 0x0e, 0x00, 0x00, 0x00, 0x00, 0xfc, 0x66, 0x66, 0x66, 0x7c, 0x6c,
5190x66, 0x66, 0x66, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6,
5200xc6, 0x60, 0x38, 0x0c, 0x06, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
5210x00, 0x00, 0xff, 0xdb, 0x99, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c,
5220x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6,
5230xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xc3,
5240xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0x66, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00,
5250x00, 0x00, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0xdb, 0xdb, 0xff, 0x66, 0x66,
5260x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xc3, 0x66, 0x3c, 0x18, 0x18,
5270x3c, 0x66, 0xc3, 0xc3, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xc3,
5280xc3, 0x66, 0x3c, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00,
5290x00, 0x00, 0xff, 0xc3, 0x86, 0x0c, 0x18, 0x30, 0x60, 0xc1, 0xc3, 0xff,
5300x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x30, 0x30, 0x30, 0x30, 0x30,
5310x30, 0x30, 0x30, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80,
5320xc0, 0xe0, 0x70, 0x38, 0x1c, 0x0e, 0x06, 0x02, 0x00, 0x00, 0x00, 0x00,
5330x00, 0x00, 0x3c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x3c,
5340x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x6c, 0xc6, 0x00, 0x00, 0x00, 0x00,
5350x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
5360x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x00, 0x00,
5370x30, 0x30, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
5380x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x78, 0x0c, 0x7c,
5390xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe0, 0x60,
5400x60, 0x78, 0x6c, 0x66, 0x66, 0x66, 0x66, 0x7c, 0x00, 0x00, 0x00, 0x00,
5410x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc0, 0xc0, 0xc0, 0xc6, 0x7c,
5420x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1c, 0x0c, 0x0c, 0x3c, 0x6c, 0xcc,
5430xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
5440x00, 0x7c, 0xc6, 0xfe, 0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
5450x00, 0x00, 0x38, 0x6c, 0x64, 0x60, 0xf0, 0x60, 0x60, 0x60, 0x60, 0xf0,
5460x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0xcc, 0xcc,
5470xcc, 0xcc, 0xcc, 0x7c, 0x0c, 0xcc, 0x78, 0x00, 0x00, 0x00, 0xe0, 0x60,
5480x60, 0x6c, 0x76, 0x66, 0x66, 0x66, 0x66, 0xe6, 0x00, 0x00, 0x00, 0x00,
5490x00, 0x00, 0x18, 0x18, 0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c,
5500x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x06, 0x00, 0x0e, 0x06, 0x06,
5510x06, 0x06, 0x06, 0x06, 0x66, 0x66, 0x3c, 0x00, 0x00, 0x00, 0xe0, 0x60,
5520x60, 0x66, 0x6c, 0x78, 0x78, 0x6c, 0x66, 0xe6, 0x00, 0x00, 0x00, 0x00,
5530x00, 0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c,
5540x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe6, 0xff, 0xdb,
5550xdb, 0xdb, 0xdb, 0xdb, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
5560x00, 0xdc, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x00, 0x00, 0x00, 0x00,
5570x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c,
5580x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xdc, 0x66, 0x66,
5590x66, 0x66, 0x66, 0x7c, 0x60, 0x60, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00,
5600x00, 0x76, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x7c, 0x0c, 0x0c, 0x1e, 0x00,
5610x00, 0x00, 0x00, 0x00, 0x00, 0xdc, 0x76, 0x66, 0x60, 0x60, 0x60, 0xf0,
5620x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0x60,
5630x38, 0x0c, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x30,
5640x30, 0xfc, 0x30, 0x30, 0x30, 0x30, 0x36, 0x1c, 0x00, 0x00, 0x00, 0x00,
5650x00, 0x00, 0x00, 0x00, 0x00, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x76,
5660x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xc3, 0xc3,
5670xc3, 0x66, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
5680x00, 0xc3, 0xc3, 0xc3, 0xdb, 0xdb, 0xff, 0x66, 0x00, 0x00, 0x00, 0x00,
5690x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0x66, 0x3c, 0x18, 0x3c, 0x66, 0xc3,
5700x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0xc6, 0xc6,
5710xc6, 0xc6, 0xc6, 0x7e, 0x06, 0x0c, 0xf8, 0x00, 0x00, 0x00, 0x00, 0x00,
5720x00, 0xfe, 0xcc, 0x18, 0x30, 0x60, 0xc6, 0xfe, 0x00, 0x00, 0x00, 0x00,
5730x00, 0x00, 0x0e, 0x18, 0x18, 0x18, 0x70, 0x18, 0x18, 0x18, 0x18, 0x0e,
5740x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x00, 0x18,
5750x18, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x70, 0x18,
5760x18, 0x18, 0x0e, 0x18, 0x18, 0x18, 0x18, 0x70, 0x00, 0x00, 0x00, 0x00,
5770x00, 0x00, 0x76, 0xdc, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
5780x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x6c, 0xc6,
5790xc6, 0xc6, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x66,
5800xc2, 0xc0, 0xc0, 0xc0, 0xc2, 0x66, 0x3c, 0x0c, 0x06, 0x7c, 0x00, 0x00,
5810x00, 0x00, 0xcc, 0x00, 0x00, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x76,
5820x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x18, 0x30, 0x00, 0x7c, 0xc6, 0xfe,
5830xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x6c,
5840x00, 0x78, 0x0c, 0x7c, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00,
5850x00, 0x00, 0xcc, 0x00, 0x00, 0x78, 0x0c, 0x7c, 0xcc, 0xcc, 0xcc, 0x76,
5860x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x30, 0x18, 0x00, 0x78, 0x0c, 0x7c,
5870xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c, 0x38,
5880x00, 0x78, 0x0c, 0x7c, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00,
5890x00, 0x00, 0x00, 0x00, 0x3c, 0x66, 0x60, 0x60, 0x66, 0x3c, 0x0c, 0x06,
5900x3c, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x6c, 0x00, 0x7c, 0xc6, 0xfe,
5910xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00,
5920x00, 0x7c, 0xc6, 0xfe, 0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
5930x00, 0x60, 0x30, 0x18, 0x00, 0x7c, 0xc6, 0xfe, 0xc0, 0xc0, 0xc6, 0x7c,
5940x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x00, 0x00, 0x38, 0x18, 0x18,
5950x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x3c, 0x66,
5960x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00,
5970x00, 0x60, 0x30, 0x18, 0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c,
5980x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, 0x10, 0x38, 0x6c, 0xc6, 0xc6,
5990xfe, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c, 0x38, 0x00,
6000x38, 0x6c, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00,
6010x18, 0x30, 0x60, 0x00, 0xfe, 0x66, 0x60, 0x7c, 0x60, 0x60, 0x66, 0xfe,
6020x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x6e, 0x3b, 0x1b,
6030x7e, 0xd8, 0xdc, 0x77, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x6c,
6040xcc, 0xcc, 0xfe, 0xcc, 0xcc, 0xcc, 0xcc, 0xce, 0x00, 0x00, 0x00, 0x00,
6050x00, 0x10, 0x38, 0x6c, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c,
6060x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, 0x00, 0x7c, 0xc6, 0xc6,
6070xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x30, 0x18,
6080x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
6090x00, 0x30, 0x78, 0xcc, 0x00, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x76,
6100x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x30, 0x18, 0x00, 0xcc, 0xcc, 0xcc,
6110xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00,
6120x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7e, 0x06, 0x0c, 0x78, 0x00,
6130x00, 0xc6, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c,
6140x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6,
6150xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x7e,
6160xc3, 0xc0, 0xc0, 0xc0, 0xc3, 0x7e, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00,
6170x00, 0x38, 0x6c, 0x64, 0x60, 0xf0, 0x60, 0x60, 0x60, 0x60, 0xe6, 0xfc,
6180x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0x66, 0x3c, 0x18, 0xff, 0x18,
6190xff, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfc, 0x66, 0x66,
6200x7c, 0x62, 0x66, 0x6f, 0x66, 0x66, 0x66, 0xf3, 0x00, 0x00, 0x00, 0x00,
6210x00, 0x0e, 0x1b, 0x18, 0x18, 0x18, 0x7e, 0x18, 0x18, 0x18, 0x18, 0x18,
6220xd8, 0x70, 0x00, 0x00, 0x00, 0x18, 0x30, 0x60, 0x00, 0x78, 0x0c, 0x7c,
6230xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x18, 0x30,
6240x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00,
6250x00, 0x18, 0x30, 0x60, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c,
6260x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x30, 0x60, 0x00, 0xcc, 0xcc, 0xcc,
6270xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0xdc,
6280x00, 0xdc, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x00, 0x00, 0x00, 0x00,
6290x76, 0xdc, 0x00, 0xc6, 0xe6, 0xf6, 0xfe, 0xde, 0xce, 0xc6, 0xc6, 0xc6,
6300x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x6c, 0x6c, 0x3e, 0x00, 0x7e, 0x00,
6310x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c, 0x6c,
6320x38, 0x00, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
6330x00, 0x00, 0x30, 0x30, 0x00, 0x30, 0x30, 0x60, 0xc0, 0xc6, 0xc6, 0x7c,
6340x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xc0,
6350xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
6360x00, 0x00, 0xfe, 0x06, 0x06, 0x06, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00,
6370x00, 0xc0, 0xc0, 0xc2, 0xc6, 0xcc, 0x18, 0x30, 0x60, 0xce, 0x9b, 0x06,
6380x0c, 0x1f, 0x00, 0x00, 0x00, 0xc0, 0xc0, 0xc2, 0xc6, 0xcc, 0x18, 0x30,
6390x66, 0xce, 0x96, 0x3e, 0x06, 0x06, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18,
6400x00, 0x18, 0x18, 0x18, 0x3c, 0x3c, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00,
6410x00, 0x00, 0x00, 0x00, 0x00, 0x36, 0x6c, 0xd8, 0x6c, 0x36, 0x00, 0x00,
6420x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xd8, 0x6c, 0x36,
6430x6c, 0xd8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x44, 0x11, 0x44,
6440x11, 0x44, 0x11, 0x44, 0x11, 0x44, 0x11, 0x44, 0x11, 0x44, 0x11, 0x44,
6450x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa,
6460x55, 0xaa, 0x55, 0xaa, 0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77,
6470xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, 0x18, 0x18, 0x18, 0x18,
6480x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
6490x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8, 0x18, 0x18, 0x18, 0x18,
6500x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8, 0x18, 0xf8,
6510x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x36, 0x36, 0x36, 0x36,
6520x36, 0x36, 0x36, 0xf6, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
6530x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0x36, 0x36, 0x36, 0x36,
6540x36, 0x36, 0x36, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf8, 0x18, 0xf8,
6550x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x36, 0x36, 0x36, 0x36,
6560x36, 0xf6, 0x06, 0xf6, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
6570x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
6580x36, 0x36, 0x36, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0x06, 0xf6,
6590x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
6600x36, 0xf6, 0x06, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
6610x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0xfe, 0x00, 0x00, 0x00, 0x00,
6620x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8, 0x18, 0xf8,
6630x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
6640x00, 0x00, 0x00, 0xf8, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
6650x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x1f, 0x00, 0x00, 0x00, 0x00,
6660x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xff,
6670x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
6680x00, 0x00, 0x00, 0xff, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
6690x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x1f, 0x18, 0x18, 0x18, 0x18,
6700x18, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff,
6710x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18,
6720x18, 0x18, 0x18, 0xff, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
6730x18, 0x18, 0x18, 0x18, 0x18, 0x1f, 0x18, 0x1f, 0x18, 0x18, 0x18, 0x18,
6740x18, 0x18, 0x18, 0x18, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x37,
6750x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
6760x36, 0x37, 0x30, 0x3f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
6770x00, 0x00, 0x00, 0x00, 0x00, 0x3f, 0x30, 0x37, 0x36, 0x36, 0x36, 0x36,
6780x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0xf7, 0x00, 0xff,
6790x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
6800x00, 0xff, 0x00, 0xf7, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
6810x36, 0x36, 0x36, 0x36, 0x36, 0x37, 0x30, 0x37, 0x36, 0x36, 0x36, 0x36,
6820x36, 0x36, 0x36, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x00, 0xff,
6830x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x36, 0x36, 0x36, 0x36,
6840x36, 0xf7, 0x00, 0xf7, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
6850x18, 0x18, 0x18, 0x18, 0x18, 0xff, 0x00, 0xff, 0x00, 0x00, 0x00, 0x00,
6860x00, 0x00, 0x00, 0x00, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0xff,
6870x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
6880x00, 0xff, 0x00, 0xff, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
6890x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x36, 0x36, 0x36, 0x36,
6900x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x3f,
6910x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18,
6920x18, 0x1f, 0x18, 0x1f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
6930x00, 0x00, 0x00, 0x00, 0x00, 0x1f, 0x18, 0x1f, 0x18, 0x18, 0x18, 0x18,
6940x18, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3f,
6950x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
6960x36, 0x36, 0x36, 0xff, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
6970x18, 0x18, 0x18, 0x18, 0x18, 0xff, 0x18, 0xff, 0x18, 0x18, 0x18, 0x18,
6980x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8,
6990x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
7000x00, 0x00, 0x00, 0x1f, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
7010xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
7020xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff,
7030xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf0, 0xf0, 0xf0, 0xf0,
7040xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
7050x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
7060x0f, 0x0f, 0x0f, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00,
7070x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
7080x00, 0x76, 0xdc, 0xd8, 0xd8, 0xd8, 0xdc, 0x76, 0x00, 0x00, 0x00, 0x00,
7090x00, 0x00, 0x78, 0xcc, 0xcc, 0xcc, 0xd8, 0xcc, 0xc6, 0xc6, 0xc6, 0xcc,
7100x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xc6, 0xc6, 0xc0, 0xc0, 0xc0,
7110xc0, 0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
7120xfe, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x00, 0x00, 0x00, 0x00,
7130x00, 0x00, 0x00, 0xfe, 0xc6, 0x60, 0x30, 0x18, 0x30, 0x60, 0xc6, 0xfe,
7140x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0xd8, 0xd8,
7150xd8, 0xd8, 0xd8, 0x70, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
7160x66, 0x66, 0x66, 0x66, 0x66, 0x7c, 0x60, 0x60, 0xc0, 0x00, 0x00, 0x00,
7170x00, 0x00, 0x00, 0x00, 0x76, 0xdc, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
7180x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0x18, 0x3c, 0x66, 0x66,
7190x66, 0x3c, 0x18, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38,
7200x6c, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0x6c, 0x38, 0x00, 0x00, 0x00, 0x00,
7210x00, 0x00, 0x38, 0x6c, 0xc6, 0xc6, 0xc6, 0x6c, 0x6c, 0x6c, 0x6c, 0xee,
7220x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1e, 0x30, 0x18, 0x0c, 0x3e, 0x66,
7230x66, 0x66, 0x66, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
7240x00, 0x7e, 0xdb, 0xdb, 0xdb, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
7250x00, 0x00, 0x00, 0x03, 0x06, 0x7e, 0xdb, 0xdb, 0xf3, 0x7e, 0x60, 0xc0,
7260x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1c, 0x30, 0x60, 0x60, 0x7c, 0x60,
7270x60, 0x60, 0x30, 0x1c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c,
7280xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00,
7290x00, 0x00, 0x00, 0x00, 0xfe, 0x00, 0x00, 0xfe, 0x00, 0x00, 0xfe, 0x00,
7300x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x7e, 0x18,
7310x18, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30,
7320x18, 0x0c, 0x06, 0x0c, 0x18, 0x30, 0x00, 0x7e, 0x00, 0x00, 0x00, 0x00,
7330x00, 0x00, 0x00, 0x0c, 0x18, 0x30, 0x60, 0x30, 0x18, 0x0c, 0x00, 0x7e,
7340x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x1b, 0x1b, 0x1b, 0x18, 0x18,
7350x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
7360x18, 0x18, 0x18, 0x18, 0xd8, 0xd8, 0xd8, 0x70, 0x00, 0x00, 0x00, 0x00,
7370x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x00, 0x7e, 0x00, 0x18, 0x18, 0x00,
7380x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0xdc, 0x00,
7390x76, 0xdc, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c, 0x6c,
7400x38, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
7410x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x00, 0x00, 0x00,
7420x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
7430x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0f, 0x0c, 0x0c,
7440x0c, 0x0c, 0x0c, 0xec, 0x6c, 0x6c, 0x3c, 0x1c, 0x00, 0x00, 0x00, 0x00,
7450x00, 0xd8, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x00, 0x00, 0x00, 0x00, 0x00,
7460x00, 0x00, 0x00, 0x00, 0x00, 0x70, 0xd8, 0x30, 0x60, 0xc8, 0xf8, 0x00,
7470x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
7480x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00,
7490x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
7500x00, 0x00, 0x00, 0x00,
751};
diff --git a/arch/ppc64/kernel/cpu_setup_power4.S b/arch/ppc64/kernel/cpu_setup_power4.S
new file mode 100644
index 000000000000..3bd951820850
--- /dev/null
+++ b/arch/ppc64/kernel/cpu_setup_power4.S
@@ -0,0 +1,214 @@
1/*
2 * This file contains low level CPU setup functions.
3 * Copyright (C) 2003 Benjamin Herrenschmidt (benh@kernel.crashing.org)
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version
8 * 2 of the License, or (at your option) any later version.
9 *
10 */
11
12#include <linux/config.h>
13#include <asm/processor.h>
14#include <asm/page.h>
15#include <asm/ppc_asm.h>
16#include <asm/cputable.h>
17#include <asm/ppc_asm.h>
18#include <asm/offsets.h>
19#include <asm/cache.h>
20
21_GLOBAL(__970_cpu_preinit)
22 /*
23 * Do nothing if not running in HV mode
24 */
25 mfmsr r0
26 rldicl. r0,r0,4,63
27 beqlr
28
29 /*
30 * Deal only with PPC970 and PPC970FX.
31 */
32 mfspr r0,SPRN_PVR
33 srwi r0,r0,16
34 cmpwi cr0,r0,0x39
35 cmpwi cr1,r0,0x3c
36 cror 4*cr0+eq,4*cr0+eq,4*cr1+eq
37 bnelr
38
39 /* Make sure HID4:rm_ci is off before MMU is turned off, that large
40 * pages are enabled with HID4:61 and clear HID5:DCBZ_size and
41 * HID5:DCBZ32_ill
42 */
43 li r0,0
44 mfspr r3,SPRN_HID4
45 rldimi r3,r0,40,23 /* clear bit 23 (rm_ci) */
46 rldimi r3,r0,2,61 /* clear bit 61 (lg_pg_en) */
47 sync
48 mtspr SPRN_HID4,r3
49 isync
50 sync
51 mfspr r3,SPRN_HID5
52 rldimi r3,r0,6,56 /* clear bits 56 & 57 (DCBZ*) */
53 sync
54 mtspr SPRN_HID5,r3
55 isync
56 sync
57
58 /* Setup some basic HID1 features */
59 mfspr r0,SPRN_HID1
60 li r3,0x1200 /* enable i-fetch cacheability */
61 sldi r3,r3,44 /* and prefetch */
62 or r0,r0,r3
63 mtspr SPRN_HID1,r0
64 mtspr SPRN_HID1,r0
65 isync
66
67 /* Clear HIOR */
68 li r0,0
69 sync
70 mtspr SPRN_HIOR,0 /* Clear interrupt prefix */
71 isync
72 blr
73
74_GLOBAL(__setup_cpu_power4)
75 blr
76
77_GLOBAL(__setup_cpu_ppc970)
78 mfspr r0,SPRN_HID0
79 li r11,5 /* clear DOZE and SLEEP */
80 rldimi r0,r11,52,8 /* set NAP and DPM */
81 mtspr SPRN_HID0,r0
82 mfspr r0,SPRN_HID0
83 mfspr r0,SPRN_HID0
84 mfspr r0,SPRN_HID0
85 mfspr r0,SPRN_HID0
86 mfspr r0,SPRN_HID0
87 mfspr r0,SPRN_HID0
88 sync
89 isync
90 blr
91
92/* Definitions for the table use to save CPU states */
93#define CS_HID0 0
94#define CS_HID1 8
95#define CS_HID4 16
96#define CS_HID5 24
97#define CS_SIZE 32
98
99 .data
100 .balign L1_CACHE_BYTES,0
101cpu_state_storage:
102 .space CS_SIZE
103 .balign L1_CACHE_BYTES,0
104 .text
105
106/* Called in normal context to backup CPU 0 state. This
107 * does not include cache settings. This function is also
108 * called for machine sleep. This does not include the MMU
109 * setup, BATs, etc... but rather the "special" registers
110 * like HID0, HID1, HID4, etc...
111 */
112_GLOBAL(__save_cpu_setup)
113 /* Some CR fields are volatile, we back it up all */
114 mfcr r7
115
116 /* Get storage ptr */
117 LOADADDR(r5,cpu_state_storage)
118
119 /* We only deal with 970 for now */
120 mfspr r0,SPRN_PVR
121 srwi r0,r0,16
122 cmpwi cr0,r0,0x39
123 cmpwi cr1,r0,0x3c
124 cror 4*cr0+eq,4*cr0+eq,4*cr1+eq
125 bne 1f
126
127 /* Save HID0,1,4 and 5 */
128 mfspr r3,SPRN_HID0
129 std r3,CS_HID0(r5)
130 mfspr r3,SPRN_HID1
131 std r3,CS_HID1(r5)
132 mfspr r3,SPRN_HID4
133 std r3,CS_HID4(r5)
134 mfspr r3,SPRN_HID5
135 std r3,CS_HID5(r5)
136
1371:
138 mtcr r7
139 blr
140
141/* Called with no MMU context (typically MSR:IR/DR off) to
142 * restore CPU state as backed up by the previous
143 * function. This does not include cache setting
144 */
145_GLOBAL(__restore_cpu_setup)
146 /* Get storage ptr (FIXME when using anton reloc as we
147 * are running with translation disabled here
148 */
149 LOADADDR(r5,cpu_state_storage)
150
151 /* We only deal with 970 for now */
152 mfspr r0,SPRN_PVR
153 srwi r0,r0,16
154 cmpwi cr0,r0,0x39
155 cmpwi cr1,r0,0x3c
156 cror 4*cr0+eq,4*cr0+eq,4*cr1+eq
157 bne 1f
158
159 /* Before accessing memory, we make sure rm_ci is clear */
160 li r0,0
161 mfspr r3,SPRN_HID4
162 rldimi r3,r0,40,23 /* clear bit 23 (rm_ci) */
163 sync
164 mtspr SPRN_HID4,r3
165 isync
166 sync
167
168 /* Clear interrupt prefix */
169 li r0,0
170 sync
171 mtspr SPRN_HIOR,0
172 isync
173
174 /* Restore HID0 */
175 ld r3,CS_HID0(r5)
176 sync
177 isync
178 mtspr SPRN_HID0,r3
179 mfspr r3,SPRN_HID0
180 mfspr r3,SPRN_HID0
181 mfspr r3,SPRN_HID0
182 mfspr r3,SPRN_HID0
183 mfspr r3,SPRN_HID0
184 mfspr r3,SPRN_HID0
185 sync
186 isync
187
188 /* Restore HID1 */
189 ld r3,CS_HID1(r5)
190 sync
191 isync
192 mtspr SPRN_HID1,r3
193 mtspr SPRN_HID1,r3
194 sync
195 isync
196
197 /* Restore HID4 */
198 ld r3,CS_HID4(r5)
199 sync
200 isync
201 mtspr SPRN_HID4,r3
202 sync
203 isync
204
205 /* Restore HID5 */
206 ld r3,CS_HID5(r5)
207 sync
208 isync
209 mtspr SPRN_HID5,r3
210 sync
211 isync
2121:
213 blr
214
diff --git a/arch/ppc64/kernel/cputable.c b/arch/ppc64/kernel/cputable.c
new file mode 100644
index 000000000000..8644a8648058
--- /dev/null
+++ b/arch/ppc64/kernel/cputable.c
@@ -0,0 +1,197 @@
1/*
2 * arch/ppc64/kernel/cputable.c
3 *
4 * Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org)
5 *
6 * Modifications for ppc64:
7 * Copyright (C) 2003 Dave Engebretsen <engebret@us.ibm.com>
8 *
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License
11 * as published by the Free Software Foundation; either version
12 * 2 of the License, or (at your option) any later version.
13 */
14
15#include <linux/config.h>
16#include <linux/string.h>
17#include <linux/sched.h>
18#include <linux/threads.h>
19#include <linux/init.h>
20#include <linux/module.h>
21
22#include <asm/cputable.h>
23
24struct cpu_spec* cur_cpu_spec = NULL;
25EXPORT_SYMBOL(cur_cpu_spec);
26
27/* NOTE:
28 * Unlike ppc32, ppc64 will only call this once for the boot CPU, it's
29 * the responsibility of the appropriate CPU save/restore functions to
30 * eventually copy these settings over. Those save/restore aren't yet
31 * part of the cputable though. That has to be fixed for both ppc32
32 * and ppc64
33 */
34extern void __setup_cpu_power3(unsigned long offset, struct cpu_spec* spec);
35extern void __setup_cpu_power4(unsigned long offset, struct cpu_spec* spec);
36extern void __setup_cpu_ppc970(unsigned long offset, struct cpu_spec* spec);
37
38
39/* We only set the altivec features if the kernel was compiled with altivec
40 * support
41 */
42#ifdef CONFIG_ALTIVEC
43#define CPU_FTR_ALTIVEC_COMP CPU_FTR_ALTIVEC
44#define PPC_FEATURE_HAS_ALTIVEC_COMP PPC_FEATURE_HAS_ALTIVEC
45#else
46#define CPU_FTR_ALTIVEC_COMP 0
47#define PPC_FEATURE_HAS_ALTIVEC_COMP 0
48#endif
49
50struct cpu_spec cpu_specs[] = {
51 { /* Power3 */
52 0xffff0000, 0x00400000, "POWER3 (630)",
53 CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE |
54 CPU_FTR_IABR | CPU_FTR_PMC8,
55 COMMON_USER_PPC64,
56 128, 128,
57 __setup_cpu_power3,
58 COMMON_PPC64_FW
59 },
60 { /* Power3+ */
61 0xffff0000, 0x00410000, "POWER3 (630+)",
62 CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE |
63 CPU_FTR_IABR | CPU_FTR_PMC8,
64 COMMON_USER_PPC64,
65 128, 128,
66 __setup_cpu_power3,
67 COMMON_PPC64_FW
68 },
69 { /* Northstar */
70 0xffff0000, 0x00330000, "RS64-II (northstar)",
71 CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE |
72 CPU_FTR_IABR | CPU_FTR_PMC8 | CPU_FTR_MMCRA,
73 COMMON_USER_PPC64,
74 128, 128,
75 __setup_cpu_power3,
76 COMMON_PPC64_FW
77 },
78 { /* Pulsar */
79 0xffff0000, 0x00340000, "RS64-III (pulsar)",
80 CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE |
81 CPU_FTR_IABR | CPU_FTR_PMC8 | CPU_FTR_MMCRA,
82 COMMON_USER_PPC64,
83 128, 128,
84 __setup_cpu_power3,
85 COMMON_PPC64_FW
86 },
87 { /* I-star */
88 0xffff0000, 0x00360000, "RS64-III (icestar)",
89 CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE |
90 CPU_FTR_IABR | CPU_FTR_PMC8 | CPU_FTR_MMCRA,
91 COMMON_USER_PPC64,
92 128, 128,
93 __setup_cpu_power3,
94 COMMON_PPC64_FW
95 },
96 { /* S-star */
97 0xffff0000, 0x00370000, "RS64-IV (sstar)",
98 CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE |
99 CPU_FTR_IABR | CPU_FTR_PMC8 | CPU_FTR_MMCRA,
100 COMMON_USER_PPC64,
101 128, 128,
102 __setup_cpu_power3,
103 COMMON_PPC64_FW
104 },
105 { /* Power4 */
106 0xffff0000, 0x00350000, "POWER4 (gp)",
107 CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE |
108 CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_PMC8 | CPU_FTR_MMCRA,
109 COMMON_USER_PPC64,
110 128, 128,
111 __setup_cpu_power4,
112 COMMON_PPC64_FW
113 },
114 { /* Power4+ */
115 0xffff0000, 0x00380000, "POWER4+ (gq)",
116 CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE |
117 CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_PMC8 | CPU_FTR_MMCRA,
118 COMMON_USER_PPC64,
119 128, 128,
120 __setup_cpu_power4,
121 COMMON_PPC64_FW
122 },
123 { /* PPC970 */
124 0xffff0000, 0x00390000, "PPC970",
125 CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE |
126 CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_ALTIVEC_COMP |
127 CPU_FTR_CAN_NAP | CPU_FTR_PMC8 | CPU_FTR_MMCRA,
128 COMMON_USER_PPC64 | PPC_FEATURE_HAS_ALTIVEC_COMP,
129 128, 128,
130 __setup_cpu_ppc970,
131 COMMON_PPC64_FW
132 },
133 { /* PPC970FX */
134 0xffff0000, 0x003c0000, "PPC970FX",
135 CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE |
136 CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_ALTIVEC_COMP |
137 CPU_FTR_CAN_NAP | CPU_FTR_PMC8 | CPU_FTR_MMCRA,
138 COMMON_USER_PPC64 | PPC_FEATURE_HAS_ALTIVEC_COMP,
139 128, 128,
140 __setup_cpu_ppc970,
141 COMMON_PPC64_FW
142 },
143 { /* Power5 */
144 0xffff0000, 0x003a0000, "POWER5 (gr)",
145 CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE |
146 CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_MMCRA | CPU_FTR_SMT |
147 CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE |
148 CPU_FTR_MMCRA_SIHV,
149 COMMON_USER_PPC64,
150 128, 128,
151 __setup_cpu_power4,
152 COMMON_PPC64_FW
153 },
154 { /* Power5 */
155 0xffff0000, 0x003b0000, "POWER5 (gs)",
156 CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE |
157 CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_MMCRA | CPU_FTR_SMT |
158 CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE |
159 CPU_FTR_MMCRA_SIHV,
160 COMMON_USER_PPC64,
161 128, 128,
162 __setup_cpu_power4,
163 COMMON_PPC64_FW
164 },
165 { /* default match */
166 0x00000000, 0x00000000, "POWER4 (compatible)",
167 CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE |
168 CPU_FTR_PPCAS_ARCH_V2,
169 COMMON_USER_PPC64,
170 128, 128,
171 __setup_cpu_power4,
172 COMMON_PPC64_FW
173 }
174};
175
176firmware_feature_t firmware_features_table[FIRMWARE_MAX_FEATURES] = {
177 {FW_FEATURE_PFT, "hcall-pft"},
178 {FW_FEATURE_TCE, "hcall-tce"},
179 {FW_FEATURE_SPRG0, "hcall-sprg0"},
180 {FW_FEATURE_DABR, "hcall-dabr"},
181 {FW_FEATURE_COPY, "hcall-copy"},
182 {FW_FEATURE_ASR, "hcall-asr"},
183 {FW_FEATURE_DEBUG, "hcall-debug"},
184 {FW_FEATURE_PERF, "hcall-perf"},
185 {FW_FEATURE_DUMP, "hcall-dump"},
186 {FW_FEATURE_INTERRUPT, "hcall-interrupt"},
187 {FW_FEATURE_MIGRATE, "hcall-migrate"},
188 {FW_FEATURE_PERFMON, "hcall-perfmon"},
189 {FW_FEATURE_CRQ, "hcall-crq"},
190 {FW_FEATURE_VIO, "hcall-vio"},
191 {FW_FEATURE_RDMA, "hcall-rdma"},
192 {FW_FEATURE_LLAN, "hcall-lLAN"},
193 {FW_FEATURE_BULK, "hcall-bulk"},
194 {FW_FEATURE_XDABR, "hcall-xdabr"},
195 {FW_FEATURE_MULTITCE, "hcall-multi-tce"},
196 {FW_FEATURE_SPLPAR, "hcall-splpar"},
197};
diff --git a/arch/ppc64/kernel/dma.c b/arch/ppc64/kernel/dma.c
new file mode 100644
index 000000000000..ce714c927134
--- /dev/null
+++ b/arch/ppc64/kernel/dma.c
@@ -0,0 +1,147 @@
1/*
2 * Copyright (C) 2004 IBM Corporation
3 *
4 * Implements the generic device dma API for ppc64. Handles
5 * the pci and vio busses
6 */
7
8#include <linux/device.h>
9#include <linux/dma-mapping.h>
10/* Include the busses we support */
11#include <linux/pci.h>
12#include <asm/vio.h>
13#include <asm/scatterlist.h>
14#include <asm/bug.h>
15
16static struct dma_mapping_ops *get_dma_ops(struct device *dev)
17{
18 if (dev->bus == &pci_bus_type)
19 return &pci_dma_ops;
20#ifdef CONFIG_IBMVIO
21 if (dev->bus == &vio_bus_type)
22 return &vio_dma_ops;
23#endif
24 return NULL;
25}
26
27int dma_supported(struct device *dev, u64 mask)
28{
29 struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
30
31 if (dma_ops)
32 return dma_ops->dma_supported(dev, mask);
33 BUG();
34 return 0;
35}
36EXPORT_SYMBOL(dma_supported);
37
38int dma_set_mask(struct device *dev, u64 dma_mask)
39{
40 if (dev->bus == &pci_bus_type)
41 return pci_set_dma_mask(to_pci_dev(dev), dma_mask);
42#ifdef CONFIG_IBMVIO
43 if (dev->bus == &vio_bus_type)
44 return -EIO;
45#endif /* CONFIG_IBMVIO */
46 BUG();
47 return 0;
48}
49EXPORT_SYMBOL(dma_set_mask);
50
51void *dma_alloc_coherent(struct device *dev, size_t size,
52 dma_addr_t *dma_handle, unsigned int __nocast flag)
53{
54 struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
55
56 if (dma_ops)
57 return dma_ops->alloc_coherent(dev, size, dma_handle, flag);
58 BUG();
59 return NULL;
60}
61EXPORT_SYMBOL(dma_alloc_coherent);
62
63void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr,
64 dma_addr_t dma_handle)
65{
66 struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
67
68 if (dma_ops)
69 dma_ops->free_coherent(dev, size, cpu_addr, dma_handle);
70 else
71 BUG();
72}
73EXPORT_SYMBOL(dma_free_coherent);
74
75dma_addr_t dma_map_single(struct device *dev, void *cpu_addr, size_t size,
76 enum dma_data_direction direction)
77{
78 struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
79
80 if (dma_ops)
81 return dma_ops->map_single(dev, cpu_addr, size, direction);
82 BUG();
83 return (dma_addr_t)0;
84}
85EXPORT_SYMBOL(dma_map_single);
86
87void dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
88 enum dma_data_direction direction)
89{
90 struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
91
92 if (dma_ops)
93 dma_ops->unmap_single(dev, dma_addr, size, direction);
94 else
95 BUG();
96}
97EXPORT_SYMBOL(dma_unmap_single);
98
99dma_addr_t dma_map_page(struct device *dev, struct page *page,
100 unsigned long offset, size_t size,
101 enum dma_data_direction direction)
102{
103 struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
104
105 if (dma_ops)
106 return dma_ops->map_single(dev,
107 (page_address(page) + offset), size, direction);
108 BUG();
109 return (dma_addr_t)0;
110}
111EXPORT_SYMBOL(dma_map_page);
112
113void dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
114 enum dma_data_direction direction)
115{
116 struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
117
118 if (dma_ops)
119 dma_ops->unmap_single(dev, dma_address, size, direction);
120 else
121 BUG();
122}
123EXPORT_SYMBOL(dma_unmap_page);
124
125int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
126 enum dma_data_direction direction)
127{
128 struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
129
130 if (dma_ops)
131 return dma_ops->map_sg(dev, sg, nents, direction);
132 BUG();
133 return 0;
134}
135EXPORT_SYMBOL(dma_map_sg);
136
137void dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nhwentries,
138 enum dma_data_direction direction)
139{
140 struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
141
142 if (dma_ops)
143 dma_ops->unmap_sg(dev, sg, nhwentries, direction);
144 else
145 BUG();
146}
147EXPORT_SYMBOL(dma_unmap_sg);
diff --git a/arch/ppc64/kernel/eeh.c b/arch/ppc64/kernel/eeh.c
new file mode 100644
index 000000000000..d63d41f3eecf
--- /dev/null
+++ b/arch/ppc64/kernel/eeh.c
@@ -0,0 +1,937 @@
1/*
2 * eeh.c
3 * Copyright (C) 2001 Dave Engebretsen & Todd Inglett IBM Corporation
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
19
20#include <linux/bootmem.h>
21#include <linux/init.h>
22#include <linux/list.h>
23#include <linux/mm.h>
24#include <linux/notifier.h>
25#include <linux/pci.h>
26#include <linux/proc_fs.h>
27#include <linux/rbtree.h>
28#include <linux/seq_file.h>
29#include <linux/spinlock.h>
30#include <asm/eeh.h>
31#include <asm/io.h>
32#include <asm/machdep.h>
33#include <asm/rtas.h>
34#include <asm/atomic.h>
35#include <asm/systemcfg.h>
36#include "pci.h"
37
38#undef DEBUG
39
40/** Overview:
41 * EEH, or "Extended Error Handling" is a PCI bridge technology for
42 * dealing with PCI bus errors that can't be dealt with within the
43 * usual PCI framework, except by check-stopping the CPU. Systems
44 * that are designed for high-availability/reliability cannot afford
45 * to crash due to a "mere" PCI error, thus the need for EEH.
46 * An EEH-capable bridge operates by converting a detected error
47 * into a "slot freeze", taking the PCI adapter off-line, making
48 * the slot behave, from the OS'es point of view, as if the slot
49 * were "empty": all reads return 0xff's and all writes are silently
50 * ignored. EEH slot isolation events can be triggered by parity
51 * errors on the address or data busses (e.g. during posted writes),
52 * which in turn might be caused by dust, vibration, humidity,
53 * radioactivity or plain-old failed hardware.
54 *
55 * Note, however, that one of the leading causes of EEH slot
56 * freeze events are buggy device drivers, buggy device microcode,
57 * or buggy device hardware. This is because any attempt by the
58 * device to bus-master data to a memory address that is not
59 * assigned to the device will trigger a slot freeze. (The idea
60 * is to prevent devices-gone-wild from corrupting system memory).
61 * Buggy hardware/drivers will have a miserable time co-existing
62 * with EEH.
63 *
64 * Ideally, a PCI device driver, when suspecting that an isolation
65 * event has occured (e.g. by reading 0xff's), will then ask EEH
66 * whether this is the case, and then take appropriate steps to
67 * reset the PCI slot, the PCI device, and then resume operations.
68 * However, until that day, the checking is done here, with the
69 * eeh_check_failure() routine embedded in the MMIO macros. If
70 * the slot is found to be isolated, an "EEH Event" is synthesized
71 * and sent out for processing.
72 */
73
74/** Bus Unit ID macros; get low and hi 32-bits of the 64-bit BUID */
75#define BUID_HI(buid) ((buid) >> 32)
76#define BUID_LO(buid) ((buid) & 0xffffffff)
77
78/* EEH event workqueue setup. */
79static DEFINE_SPINLOCK(eeh_eventlist_lock);
80LIST_HEAD(eeh_eventlist);
81static void eeh_event_handler(void *);
82DECLARE_WORK(eeh_event_wq, eeh_event_handler, NULL);
83
84static struct notifier_block *eeh_notifier_chain;
85
86/*
87 * If a device driver keeps reading an MMIO register in an interrupt
88 * handler after a slot isolation event has occurred, we assume it
89 * is broken and panic. This sets the threshold for how many read
90 * attempts we allow before panicking.
91 */
92#define EEH_MAX_FAILS 1000
93static atomic_t eeh_fail_count;
94
95/* RTAS tokens */
96static int ibm_set_eeh_option;
97static int ibm_set_slot_reset;
98static int ibm_read_slot_reset_state;
99static int ibm_read_slot_reset_state2;
100static int ibm_slot_error_detail;
101
102static int eeh_subsystem_enabled;
103
104/* Buffer for reporting slot-error-detail rtas calls */
105static unsigned char slot_errbuf[RTAS_ERROR_LOG_MAX];
106static DEFINE_SPINLOCK(slot_errbuf_lock);
107static int eeh_error_buf_size;
108
109/* System monitoring statistics */
110static DEFINE_PER_CPU(unsigned long, total_mmio_ffs);
111static DEFINE_PER_CPU(unsigned long, false_positives);
112static DEFINE_PER_CPU(unsigned long, ignored_failures);
113static DEFINE_PER_CPU(unsigned long, slot_resets);
114
115/**
116 * The pci address cache subsystem. This subsystem places
117 * PCI device address resources into a red-black tree, sorted
118 * according to the address range, so that given only an i/o
119 * address, the corresponding PCI device can be **quickly**
120 * found. It is safe to perform an address lookup in an interrupt
121 * context; this ability is an important feature.
122 *
123 * Currently, the only customer of this code is the EEH subsystem;
124 * thus, this code has been somewhat tailored to suit EEH better.
125 * In particular, the cache does *not* hold the addresses of devices
126 * for which EEH is not enabled.
127 *
128 * (Implementation Note: The RB tree seems to be better/faster
129 * than any hash algo I could think of for this problem, even
130 * with the penalty of slow pointer chases for d-cache misses).
131 */
132struct pci_io_addr_range
133{
134 struct rb_node rb_node;
135 unsigned long addr_lo;
136 unsigned long addr_hi;
137 struct pci_dev *pcidev;
138 unsigned int flags;
139};
140
141static struct pci_io_addr_cache
142{
143 struct rb_root rb_root;
144 spinlock_t piar_lock;
145} pci_io_addr_cache_root;
146
147static inline struct pci_dev *__pci_get_device_by_addr(unsigned long addr)
148{
149 struct rb_node *n = pci_io_addr_cache_root.rb_root.rb_node;
150
151 while (n) {
152 struct pci_io_addr_range *piar;
153 piar = rb_entry(n, struct pci_io_addr_range, rb_node);
154
155 if (addr < piar->addr_lo) {
156 n = n->rb_left;
157 } else {
158 if (addr > piar->addr_hi) {
159 n = n->rb_right;
160 } else {
161 pci_dev_get(piar->pcidev);
162 return piar->pcidev;
163 }
164 }
165 }
166
167 return NULL;
168}
169
170/**
171 * pci_get_device_by_addr - Get device, given only address
172 * @addr: mmio (PIO) phys address or i/o port number
173 *
174 * Given an mmio phys address, or a port number, find a pci device
175 * that implements this address. Be sure to pci_dev_put the device
176 * when finished. I/O port numbers are assumed to be offset
177 * from zero (that is, they do *not* have pci_io_addr added in).
178 * It is safe to call this function within an interrupt.
179 */
180static struct pci_dev *pci_get_device_by_addr(unsigned long addr)
181{
182 struct pci_dev *dev;
183 unsigned long flags;
184
185 spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);
186 dev = __pci_get_device_by_addr(addr);
187 spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);
188 return dev;
189}
190
191#ifdef DEBUG
192/*
193 * Handy-dandy debug print routine, does nothing more
194 * than print out the contents of our addr cache.
195 */
196static void pci_addr_cache_print(struct pci_io_addr_cache *cache)
197{
198 struct rb_node *n;
199 int cnt = 0;
200
201 n = rb_first(&cache->rb_root);
202 while (n) {
203 struct pci_io_addr_range *piar;
204 piar = rb_entry(n, struct pci_io_addr_range, rb_node);
205 printk(KERN_DEBUG "PCI: %s addr range %d [%lx-%lx]: %s %s\n",
206 (piar->flags & IORESOURCE_IO) ? "i/o" : "mem", cnt,
207 piar->addr_lo, piar->addr_hi, pci_name(piar->pcidev),
208 pci_pretty_name(piar->pcidev));
209 cnt++;
210 n = rb_next(n);
211 }
212}
213#endif
214
215/* Insert address range into the rb tree. */
216static struct pci_io_addr_range *
217pci_addr_cache_insert(struct pci_dev *dev, unsigned long alo,
218 unsigned long ahi, unsigned int flags)
219{
220 struct rb_node **p = &pci_io_addr_cache_root.rb_root.rb_node;
221 struct rb_node *parent = NULL;
222 struct pci_io_addr_range *piar;
223
224 /* Walk tree, find a place to insert into tree */
225 while (*p) {
226 parent = *p;
227 piar = rb_entry(parent, struct pci_io_addr_range, rb_node);
228 if (alo < piar->addr_lo) {
229 p = &parent->rb_left;
230 } else if (ahi > piar->addr_hi) {
231 p = &parent->rb_right;
232 } else {
233 if (dev != piar->pcidev ||
234 alo != piar->addr_lo || ahi != piar->addr_hi) {
235 printk(KERN_WARNING "PIAR: overlapping address range\n");
236 }
237 return piar;
238 }
239 }
240 piar = (struct pci_io_addr_range *)kmalloc(sizeof(struct pci_io_addr_range), GFP_ATOMIC);
241 if (!piar)
242 return NULL;
243
244 piar->addr_lo = alo;
245 piar->addr_hi = ahi;
246 piar->pcidev = dev;
247 piar->flags = flags;
248
249 rb_link_node(&piar->rb_node, parent, p);
250 rb_insert_color(&piar->rb_node, &pci_io_addr_cache_root.rb_root);
251
252 return piar;
253}
254
255static void __pci_addr_cache_insert_device(struct pci_dev *dev)
256{
257 struct device_node *dn;
258 int i;
259 int inserted = 0;
260
261 dn = pci_device_to_OF_node(dev);
262 if (!dn) {
263 printk(KERN_WARNING "PCI: no pci dn found for dev=%s %s\n",
264 pci_name(dev), pci_pretty_name(dev));
265 return;
266 }
267
268 /* Skip any devices for which EEH is not enabled. */
269 if (!(dn->eeh_mode & EEH_MODE_SUPPORTED) ||
270 dn->eeh_mode & EEH_MODE_NOCHECK) {
271#ifdef DEBUG
272 printk(KERN_INFO "PCI: skip building address cache for=%s %s\n",
273 pci_name(dev), pci_pretty_name(dev));
274#endif
275 return;
276 }
277
278 /* The cache holds a reference to the device... */
279 pci_dev_get(dev);
280
281 /* Walk resources on this device, poke them into the tree */
282 for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
283 unsigned long start = pci_resource_start(dev,i);
284 unsigned long end = pci_resource_end(dev,i);
285 unsigned int flags = pci_resource_flags(dev,i);
286
287 /* We are interested only bus addresses, not dma or other stuff */
288 if (0 == (flags & (IORESOURCE_IO | IORESOURCE_MEM)))
289 continue;
290 if (start == 0 || ~start == 0 || end == 0 || ~end == 0)
291 continue;
292 pci_addr_cache_insert(dev, start, end, flags);
293 inserted = 1;
294 }
295
296 /* If there was nothing to add, the cache has no reference... */
297 if (!inserted)
298 pci_dev_put(dev);
299}
300
301/**
302 * pci_addr_cache_insert_device - Add a device to the address cache
303 * @dev: PCI device whose I/O addresses we are interested in.
304 *
305 * In order to support the fast lookup of devices based on addresses,
306 * we maintain a cache of devices that can be quickly searched.
307 * This routine adds a device to that cache.
308 */
309void pci_addr_cache_insert_device(struct pci_dev *dev)
310{
311 unsigned long flags;
312
313 spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);
314 __pci_addr_cache_insert_device(dev);
315 spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);
316}
317
318static inline void __pci_addr_cache_remove_device(struct pci_dev *dev)
319{
320 struct rb_node *n;
321 int removed = 0;
322
323restart:
324 n = rb_first(&pci_io_addr_cache_root.rb_root);
325 while (n) {
326 struct pci_io_addr_range *piar;
327 piar = rb_entry(n, struct pci_io_addr_range, rb_node);
328
329 if (piar->pcidev == dev) {
330 rb_erase(n, &pci_io_addr_cache_root.rb_root);
331 removed = 1;
332 kfree(piar);
333 goto restart;
334 }
335 n = rb_next(n);
336 }
337
338 /* The cache no longer holds its reference to this device... */
339 if (removed)
340 pci_dev_put(dev);
341}
342
343/**
344 * pci_addr_cache_remove_device - remove pci device from addr cache
345 * @dev: device to remove
346 *
347 * Remove a device from the addr-cache tree.
348 * This is potentially expensive, since it will walk
349 * the tree multiple times (once per resource).
350 * But so what; device removal doesn't need to be that fast.
351 */
352void pci_addr_cache_remove_device(struct pci_dev *dev)
353{
354 unsigned long flags;
355
356 spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);
357 __pci_addr_cache_remove_device(dev);
358 spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);
359}
360
361/**
362 * pci_addr_cache_build - Build a cache of I/O addresses
363 *
364 * Build a cache of pci i/o addresses. This cache will be used to
365 * find the pci device that corresponds to a given address.
366 * This routine scans all pci busses to build the cache.
367 * Must be run late in boot process, after the pci controllers
368 * have been scaned for devices (after all device resources are known).
369 */
370void __init pci_addr_cache_build(void)
371{
372 struct pci_dev *dev = NULL;
373
374 spin_lock_init(&pci_io_addr_cache_root.piar_lock);
375
376 while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
377 /* Ignore PCI bridges ( XXX why ??) */
378 if ((dev->class >> 16) == PCI_BASE_CLASS_BRIDGE) {
379 continue;
380 }
381 pci_addr_cache_insert_device(dev);
382 }
383
384#ifdef DEBUG
385 /* Verify tree built up above, echo back the list of addrs. */
386 pci_addr_cache_print(&pci_io_addr_cache_root);
387#endif
388}
389
390/* --------------------------------------------------------------- */
391/* Above lies the PCI Address Cache. Below lies the EEH event infrastructure */
392
393/**
394 * eeh_register_notifier - Register to find out about EEH events.
395 * @nb: notifier block to callback on events
396 */
397int eeh_register_notifier(struct notifier_block *nb)
398{
399 return notifier_chain_register(&eeh_notifier_chain, nb);
400}
401
402/**
403 * eeh_unregister_notifier - Unregister to an EEH event notifier.
404 * @nb: notifier block to callback on events
405 */
406int eeh_unregister_notifier(struct notifier_block *nb)
407{
408 return notifier_chain_unregister(&eeh_notifier_chain, nb);
409}
410
411/**
412 * read_slot_reset_state - Read the reset state of a device node's slot
413 * @dn: device node to read
414 * @rets: array to return results in
415 */
416static int read_slot_reset_state(struct device_node *dn, int rets[])
417{
418 int token, outputs;
419
420 if (ibm_read_slot_reset_state2 != RTAS_UNKNOWN_SERVICE) {
421 token = ibm_read_slot_reset_state2;
422 outputs = 4;
423 } else {
424 token = ibm_read_slot_reset_state;
425 outputs = 3;
426 }
427
428 return rtas_call(token, 3, outputs, rets, dn->eeh_config_addr,
429 BUID_HI(dn->phb->buid), BUID_LO(dn->phb->buid));
430}
431
432/**
433 * eeh_panic - call panic() for an eeh event that cannot be handled.
434 * The philosophy of this routine is that it is better to panic and
435 * halt the OS than it is to risk possible data corruption by
436 * oblivious device drivers that don't know better.
437 *
438 * @dev pci device that had an eeh event
439 * @reset_state current reset state of the device slot
440 */
441static void eeh_panic(struct pci_dev *dev, int reset_state)
442{
443 /*
444 * XXX We should create a separate sysctl for this.
445 *
446 * Since the panic_on_oops sysctl is used to halt the system
447 * in light of potential corruption, we can use it here.
448 */
449 if (panic_on_oops)
450 panic("EEH: MMIO failure (%d) on device:%s %s\n", reset_state,
451 pci_name(dev), pci_pretty_name(dev));
452 else {
453 __get_cpu_var(ignored_failures)++;
454 printk(KERN_INFO "EEH: Ignored MMIO failure (%d) on device:%s %s\n",
455 reset_state, pci_name(dev), pci_pretty_name(dev));
456 }
457}
458
459/**
460 * eeh_event_handler - dispatch EEH events. The detection of a frozen
461 * slot can occur inside an interrupt, where it can be hard to do
462 * anything about it. The goal of this routine is to pull these
463 * detection events out of the context of the interrupt handler, and
464 * re-dispatch them for processing at a later time in a normal context.
465 *
466 * @dummy - unused
467 */
468static void eeh_event_handler(void *dummy)
469{
470 unsigned long flags;
471 struct eeh_event *event;
472
473 while (1) {
474 spin_lock_irqsave(&eeh_eventlist_lock, flags);
475 event = NULL;
476 if (!list_empty(&eeh_eventlist)) {
477 event = list_entry(eeh_eventlist.next, struct eeh_event, list);
478 list_del(&event->list);
479 }
480 spin_unlock_irqrestore(&eeh_eventlist_lock, flags);
481 if (event == NULL)
482 break;
483
484 printk(KERN_INFO "EEH: MMIO failure (%d), notifiying device "
485 "%s %s\n", event->reset_state,
486 pci_name(event->dev), pci_pretty_name(event->dev));
487
488 atomic_set(&eeh_fail_count, 0);
489 notifier_call_chain (&eeh_notifier_chain,
490 EEH_NOTIFY_FREEZE, event);
491
492 __get_cpu_var(slot_resets)++;
493
494 pci_dev_put(event->dev);
495 kfree(event);
496 }
497}
498
499/**
500 * eeh_token_to_phys - convert EEH address token to phys address
501 * @token i/o token, should be address in the form 0xE....
502 */
503static inline unsigned long eeh_token_to_phys(unsigned long token)
504{
505 pte_t *ptep;
506 unsigned long pa;
507
508 ptep = find_linux_pte(ioremap_mm.pgd, token);
509 if (!ptep)
510 return token;
511 pa = pte_pfn(*ptep) << PAGE_SHIFT;
512
513 return pa | (token & (PAGE_SIZE-1));
514}
515
516/**
517 * eeh_dn_check_failure - check if all 1's data is due to EEH slot freeze
518 * @dn device node
519 * @dev pci device, if known
520 *
521 * Check for an EEH failure for the given device node. Call this
522 * routine if the result of a read was all 0xff's and you want to
523 * find out if this is due to an EEH slot freeze. This routine
524 * will query firmware for the EEH status.
525 *
526 * Returns 0 if there has not been an EEH error; otherwise returns
527 * a non-zero value and queues up a solt isolation event notification.
528 *
529 * It is safe to call this routine in an interrupt context.
530 */
531int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
532{
533 int ret;
534 int rets[3];
535 unsigned long flags;
536 int rc, reset_state;
537 struct eeh_event *event;
538
539 __get_cpu_var(total_mmio_ffs)++;
540
541 if (!eeh_subsystem_enabled)
542 return 0;
543
544 if (!dn)
545 return 0;
546
547 /* Access to IO BARs might get this far and still not want checking. */
548 if (!(dn->eeh_mode & EEH_MODE_SUPPORTED) ||
549 dn->eeh_mode & EEH_MODE_NOCHECK) {
550 return 0;
551 }
552
553 if (!dn->eeh_config_addr) {
554 return 0;
555 }
556
557 /*
558 * If we already have a pending isolation event for this
559 * slot, we know it's bad already, we don't need to check...
560 */
561 if (dn->eeh_mode & EEH_MODE_ISOLATED) {
562 atomic_inc(&eeh_fail_count);
563 if (atomic_read(&eeh_fail_count) >= EEH_MAX_FAILS) {
564 /* re-read the slot reset state */
565 if (read_slot_reset_state(dn, rets) != 0)
566 rets[0] = -1; /* reset state unknown */
567 eeh_panic(dev, rets[0]);
568 }
569 return 0;
570 }
571
572 /*
573 * Now test for an EEH failure. This is VERY expensive.
574 * Note that the eeh_config_addr may be a parent device
575 * in the case of a device behind a bridge, or it may be
576 * function zero of a multi-function device.
577 * In any case they must share a common PHB.
578 */
579 ret = read_slot_reset_state(dn, rets);
580 if (!(ret == 0 && rets[1] == 1 && (rets[0] == 2 || rets[0] == 4))) {
581 __get_cpu_var(false_positives)++;
582 return 0;
583 }
584
585 /* prevent repeated reports of this failure */
586 dn->eeh_mode |= EEH_MODE_ISOLATED;
587
588 reset_state = rets[0];
589
590 spin_lock_irqsave(&slot_errbuf_lock, flags);
591 memset(slot_errbuf, 0, eeh_error_buf_size);
592
593 rc = rtas_call(ibm_slot_error_detail,
594 8, 1, NULL, dn->eeh_config_addr,
595 BUID_HI(dn->phb->buid),
596 BUID_LO(dn->phb->buid), NULL, 0,
597 virt_to_phys(slot_errbuf),
598 eeh_error_buf_size,
599 1 /* Temporary Error */);
600
601 if (rc == 0)
602 log_error(slot_errbuf, ERR_TYPE_RTAS_LOG, 0);
603 spin_unlock_irqrestore(&slot_errbuf_lock, flags);
604
605 printk(KERN_INFO "EEH: MMIO failure (%d) on device: %s %s\n",
606 rets[0], dn->name, dn->full_name);
607 event = kmalloc(sizeof(*event), GFP_ATOMIC);
608 if (event == NULL) {
609 eeh_panic(dev, reset_state);
610 return 1;
611 }
612
613 event->dev = dev;
614 event->dn = dn;
615 event->reset_state = reset_state;
616
617 /* We may or may not be called in an interrupt context */
618 spin_lock_irqsave(&eeh_eventlist_lock, flags);
619 list_add(&event->list, &eeh_eventlist);
620 spin_unlock_irqrestore(&eeh_eventlist_lock, flags);
621
622 /* Most EEH events are due to device driver bugs. Having
623 * a stack trace will help the device-driver authors figure
624 * out what happened. So print that out. */
625 dump_stack();
626 schedule_work(&eeh_event_wq);
627
628 return 0;
629}
630
631EXPORT_SYMBOL(eeh_dn_check_failure);
632
633/**
634 * eeh_check_failure - check if all 1's data is due to EEH slot freeze
635 * @token i/o token, should be address in the form 0xA....
636 * @val value, should be all 1's (XXX why do we need this arg??)
637 *
638 * Check for an eeh failure at the given token address.
639 * Check for an EEH failure at the given token address. Call this
640 * routine if the result of a read was all 0xff's and you want to
641 * find out if this is due to an EEH slot freeze event. This routine
642 * will query firmware for the EEH status.
643 *
644 * Note this routine is safe to call in an interrupt context.
645 */
646unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned long val)
647{
648 unsigned long addr;
649 struct pci_dev *dev;
650 struct device_node *dn;
651
652 /* Finding the phys addr + pci device; this is pretty quick. */
653 addr = eeh_token_to_phys((unsigned long __force) token);
654 dev = pci_get_device_by_addr(addr);
655 if (!dev)
656 return val;
657
658 dn = pci_device_to_OF_node(dev);
659 eeh_dn_check_failure (dn, dev);
660
661 pci_dev_put(dev);
662 return val;
663}
664
665EXPORT_SYMBOL(eeh_check_failure);
666
667struct eeh_early_enable_info {
668 unsigned int buid_hi;
669 unsigned int buid_lo;
670};
671
672/* Enable eeh for the given device node. */
673static void *early_enable_eeh(struct device_node *dn, void *data)
674{
675 struct eeh_early_enable_info *info = data;
676 int ret;
677 char *status = get_property(dn, "status", NULL);
678 u32 *class_code = (u32 *)get_property(dn, "class-code", NULL);
679 u32 *vendor_id = (u32 *)get_property(dn, "vendor-id", NULL);
680 u32 *device_id = (u32 *)get_property(dn, "device-id", NULL);
681 u32 *regs;
682 int enable;
683
684 dn->eeh_mode = 0;
685
686 if (status && strcmp(status, "ok") != 0)
687 return NULL; /* ignore devices with bad status */
688
689 /* Ignore bad nodes. */
690 if (!class_code || !vendor_id || !device_id)
691 return NULL;
692
693 /* There is nothing to check on PCI to ISA bridges */
694 if (dn->type && !strcmp(dn->type, "isa")) {
695 dn->eeh_mode |= EEH_MODE_NOCHECK;
696 return NULL;
697 }
698
699 /*
700 * Now decide if we are going to "Disable" EEH checking
701 * for this device. We still run with the EEH hardware active,
702 * but we won't be checking for ff's. This means a driver
703 * could return bad data (very bad!), an interrupt handler could
704 * hang waiting on status bits that won't change, etc.
705 * But there are a few cases like display devices that make sense.
706 */
707 enable = 1; /* i.e. we will do checking */
708 if ((*class_code >> 16) == PCI_BASE_CLASS_DISPLAY)
709 enable = 0;
710
711 if (!enable)
712 dn->eeh_mode |= EEH_MODE_NOCHECK;
713
714 /* Ok... see if this device supports EEH. Some do, some don't,
715 * and the only way to find out is to check each and every one. */
716 regs = (u32 *)get_property(dn, "reg", NULL);
717 if (regs) {
718 /* First register entry is addr (00BBSS00) */
719 /* Try to enable eeh */
720 ret = rtas_call(ibm_set_eeh_option, 4, 1, NULL,
721 regs[0], info->buid_hi, info->buid_lo,
722 EEH_ENABLE);
723 if (ret == 0) {
724 eeh_subsystem_enabled = 1;
725 dn->eeh_mode |= EEH_MODE_SUPPORTED;
726 dn->eeh_config_addr = regs[0];
727#ifdef DEBUG
728 printk(KERN_DEBUG "EEH: %s: eeh enabled\n", dn->full_name);
729#endif
730 } else {
731
732 /* This device doesn't support EEH, but it may have an
733 * EEH parent, in which case we mark it as supported. */
734 if (dn->parent && (dn->parent->eeh_mode & EEH_MODE_SUPPORTED)) {
735 /* Parent supports EEH. */
736 dn->eeh_mode |= EEH_MODE_SUPPORTED;
737 dn->eeh_config_addr = dn->parent->eeh_config_addr;
738 return NULL;
739 }
740 }
741 } else {
742 printk(KERN_WARNING "EEH: %s: unable to get reg property.\n",
743 dn->full_name);
744 }
745
746 return NULL;
747}
748
749/*
750 * Initialize EEH by trying to enable it for all of the adapters in the system.
751 * As a side effect we can determine here if eeh is supported at all.
752 * Note that we leave EEH on so failed config cycles won't cause a machine
753 * check. If a user turns off EEH for a particular adapter they are really
754 * telling Linux to ignore errors. Some hardware (e.g. POWER5) won't
755 * grant access to a slot if EEH isn't enabled, and so we always enable
756 * EEH for all slots/all devices.
757 *
758 * The eeh-force-off option disables EEH checking globally, for all slots.
759 * Even if force-off is set, the EEH hardware is still enabled, so that
760 * newer systems can boot.
761 */
762void __init eeh_init(void)
763{
764 struct device_node *phb, *np;
765 struct eeh_early_enable_info info;
766
767 np = of_find_node_by_path("/rtas");
768 if (np == NULL)
769 return;
770
771 ibm_set_eeh_option = rtas_token("ibm,set-eeh-option");
772 ibm_set_slot_reset = rtas_token("ibm,set-slot-reset");
773 ibm_read_slot_reset_state2 = rtas_token("ibm,read-slot-reset-state2");
774 ibm_read_slot_reset_state = rtas_token("ibm,read-slot-reset-state");
775 ibm_slot_error_detail = rtas_token("ibm,slot-error-detail");
776
777 if (ibm_set_eeh_option == RTAS_UNKNOWN_SERVICE)
778 return;
779
780 eeh_error_buf_size = rtas_token("rtas-error-log-max");
781 if (eeh_error_buf_size == RTAS_UNKNOWN_SERVICE) {
782 eeh_error_buf_size = 1024;
783 }
784 if (eeh_error_buf_size > RTAS_ERROR_LOG_MAX) {
785 printk(KERN_WARNING "EEH: rtas-error-log-max is bigger than allocated "
786 "buffer ! (%d vs %d)", eeh_error_buf_size, RTAS_ERROR_LOG_MAX);
787 eeh_error_buf_size = RTAS_ERROR_LOG_MAX;
788 }
789
790 /* Enable EEH for all adapters. Note that eeh requires buid's */
791 for (phb = of_find_node_by_name(NULL, "pci"); phb;
792 phb = of_find_node_by_name(phb, "pci")) {
793 unsigned long buid;
794
795 buid = get_phb_buid(phb);
796 if (buid == 0)
797 continue;
798
799 info.buid_lo = BUID_LO(buid);
800 info.buid_hi = BUID_HI(buid);
801 traverse_pci_devices(phb, early_enable_eeh, &info);
802 }
803
804 if (eeh_subsystem_enabled)
805 printk(KERN_INFO "EEH: PCI Enhanced I/O Error Handling Enabled\n");
806 else
807 printk(KERN_WARNING "EEH: No capable adapters found\n");
808}
809
810/**
811 * eeh_add_device_early - enable EEH for the indicated device_node
812 * @dn: device node for which to set up EEH
813 *
814 * This routine must be used to perform EEH initialization for PCI
815 * devices that were added after system boot (e.g. hotplug, dlpar).
816 * This routine must be called before any i/o is performed to the
817 * adapter (inluding any config-space i/o).
818 * Whether this actually enables EEH or not for this device depends
819 * on the CEC architecture, type of the device, on earlier boot
820 * command-line arguments & etc.
821 */
822void eeh_add_device_early(struct device_node *dn)
823{
824 struct pci_controller *phb;
825 struct eeh_early_enable_info info;
826
827 if (!dn)
828 return;
829 phb = dn->phb;
830 if (NULL == phb || 0 == phb->buid) {
831 printk(KERN_WARNING "EEH: Expected buid but found none\n");
832 return;
833 }
834
835 info.buid_hi = BUID_HI(phb->buid);
836 info.buid_lo = BUID_LO(phb->buid);
837 early_enable_eeh(dn, &info);
838}
839EXPORT_SYMBOL(eeh_add_device_early);
840
841/**
842 * eeh_add_device_late - perform EEH initialization for the indicated pci device
843 * @dev: pci device for which to set up EEH
844 *
845 * This routine must be used to complete EEH initialization for PCI
846 * devices that were added after system boot (e.g. hotplug, dlpar).
847 */
848void eeh_add_device_late(struct pci_dev *dev)
849{
850 if (!dev || !eeh_subsystem_enabled)
851 return;
852
853#ifdef DEBUG
854 printk(KERN_DEBUG "EEH: adding device %s %s\n", pci_name(dev),
855 pci_pretty_name(dev));
856#endif
857
858 pci_addr_cache_insert_device (dev);
859}
860EXPORT_SYMBOL(eeh_add_device_late);
861
862/**
863 * eeh_remove_device - undo EEH setup for the indicated pci device
864 * @dev: pci device to be removed
865 *
866 * This routine should be when a device is removed from a running
867 * system (e.g. by hotplug or dlpar).
868 */
869void eeh_remove_device(struct pci_dev *dev)
870{
871 if (!dev || !eeh_subsystem_enabled)
872 return;
873
874 /* Unregister the device with the EEH/PCI address search system */
875#ifdef DEBUG
876 printk(KERN_DEBUG "EEH: remove device %s %s\n", pci_name(dev),
877 pci_pretty_name(dev));
878#endif
879 pci_addr_cache_remove_device(dev);
880}
881EXPORT_SYMBOL(eeh_remove_device);
882
883static int proc_eeh_show(struct seq_file *m, void *v)
884{
885 unsigned int cpu;
886 unsigned long ffs = 0, positives = 0, failures = 0;
887 unsigned long resets = 0;
888
889 for_each_cpu(cpu) {
890 ffs += per_cpu(total_mmio_ffs, cpu);
891 positives += per_cpu(false_positives, cpu);
892 failures += per_cpu(ignored_failures, cpu);
893 resets += per_cpu(slot_resets, cpu);
894 }
895
896 if (0 == eeh_subsystem_enabled) {
897 seq_printf(m, "EEH Subsystem is globally disabled\n");
898 seq_printf(m, "eeh_total_mmio_ffs=%ld\n", ffs);
899 } else {
900 seq_printf(m, "EEH Subsystem is enabled\n");
901 seq_printf(m, "eeh_total_mmio_ffs=%ld\n"
902 "eeh_false_positives=%ld\n"
903 "eeh_ignored_failures=%ld\n"
904 "eeh_slot_resets=%ld\n"
905 "eeh_fail_count=%d\n",
906 ffs, positives, failures, resets,
907 eeh_fail_count.counter);
908 }
909
910 return 0;
911}
912
913static int proc_eeh_open(struct inode *inode, struct file *file)
914{
915 return single_open(file, proc_eeh_show, NULL);
916}
917
918static struct file_operations proc_eeh_operations = {
919 .open = proc_eeh_open,
920 .read = seq_read,
921 .llseek = seq_lseek,
922 .release = single_release,
923};
924
925static int __init eeh_init_proc(void)
926{
927 struct proc_dir_entry *e;
928
929 if (systemcfg->platform & PLATFORM_PSERIES) {
930 e = create_proc_entry("ppc64/eeh", 0, NULL);
931 if (e)
932 e->proc_fops = &proc_eeh_operations;
933 }
934
935 return 0;
936}
937__initcall(eeh_init_proc);
diff --git a/arch/ppc64/kernel/entry.S b/arch/ppc64/kernel/entry.S
new file mode 100644
index 000000000000..d3604056e1a9
--- /dev/null
+++ b/arch/ppc64/kernel/entry.S
@@ -0,0 +1,845 @@
1/*
2 * arch/ppc64/kernel/entry.S
3 *
4 * PowerPC version
5 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
6 * Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP
7 * Copyright (C) 1996 Cort Dougan <cort@cs.nmt.edu>
8 * Adapted for Power Macintosh by Paul Mackerras.
9 * Low-level exception handlers and MMU support
10 * rewritten by Paul Mackerras.
11 * Copyright (C) 1996 Paul Mackerras.
12 * MPC8xx modifications Copyright (C) 1997 Dan Malek (dmalek@jlc.net).
13 *
14 * This file contains the system call entry code, context switch
15 * code, and exception/interrupt return code for PowerPC.
16 *
17 * This program is free software; you can redistribute it and/or
18 * modify it under the terms of the GNU General Public License
19 * as published by the Free Software Foundation; either version
20 * 2 of the License, or (at your option) any later version.
21 */
22
23#include <linux/config.h>
24#include <linux/errno.h>
25#include <asm/unistd.h>
26#include <asm/processor.h>
27#include <asm/page.h>
28#include <asm/mmu.h>
29#include <asm/thread_info.h>
30#include <asm/ppc_asm.h>
31#include <asm/offsets.h>
32#include <asm/cputable.h>
33
34#ifdef CONFIG_PPC_ISERIES
35#define DO_SOFT_DISABLE
36#endif
37
38/*
39 * System calls.
40 */
41 .section ".toc","aw"
42.SYS_CALL_TABLE:
43 .tc .sys_call_table[TC],.sys_call_table
44
45.SYS_CALL_TABLE32:
46 .tc .sys_call_table32[TC],.sys_call_table32
47
48/* This value is used to mark exception frames on the stack. */
49exception_marker:
50 .tc ID_72656773_68657265[TC],0x7265677368657265
51
52 .section ".text"
53 .align 7
54
55#undef SHOW_SYSCALLS
56
57 .globl system_call_common
58system_call_common:
59 andi. r10,r12,MSR_PR
60 mr r10,r1
61 addi r1,r1,-INT_FRAME_SIZE
62 beq- 1f
63 ld r1,PACAKSAVE(r13)
641: std r10,0(r1)
65 std r11,_NIP(r1)
66 std r12,_MSR(r1)
67 std r0,GPR0(r1)
68 std r10,GPR1(r1)
69 std r2,GPR2(r1)
70 std r3,GPR3(r1)
71 std r4,GPR4(r1)
72 std r5,GPR5(r1)
73 std r6,GPR6(r1)
74 std r7,GPR7(r1)
75 std r8,GPR8(r1)
76 li r11,0
77 std r11,GPR9(r1)
78 std r11,GPR10(r1)
79 std r11,GPR11(r1)
80 std r11,GPR12(r1)
81 std r9,GPR13(r1)
82 crclr so
83 mfcr r9
84 mflr r10
85 li r11,0xc01
86 std r9,_CCR(r1)
87 std r10,_LINK(r1)
88 std r11,_TRAP(r1)
89 mfxer r9
90 mfctr r10
91 std r9,_XER(r1)
92 std r10,_CTR(r1)
93 std r3,ORIG_GPR3(r1)
94 ld r2,PACATOC(r13)
95 addi r9,r1,STACK_FRAME_OVERHEAD
96 ld r11,exception_marker@toc(r2)
97 std r11,-16(r9) /* "regshere" marker */
98#ifdef CONFIG_PPC_ISERIES
99 /* Hack for handling interrupts when soft-enabling on iSeries */
100 cmpdi cr1,r0,0x5555 /* syscall 0x5555 */
101 andi. r10,r12,MSR_PR /* from kernel */
102 crand 4*cr0+eq,4*cr1+eq,4*cr0+eq
103 beq hardware_interrupt_entry
104 lbz r10,PACAPROCENABLED(r13)
105 std r10,SOFTE(r1)
106#endif
107 mfmsr r11
108 ori r11,r11,MSR_EE
109 mtmsrd r11,1
110
111#ifdef SHOW_SYSCALLS
112 bl .do_show_syscall
113 REST_GPR(0,r1)
114 REST_4GPRS(3,r1)
115 REST_2GPRS(7,r1)
116 addi r9,r1,STACK_FRAME_OVERHEAD
117#endif
118 clrrdi r11,r1,THREAD_SHIFT
119 li r12,0
120 ld r10,TI_FLAGS(r11)
121 stb r12,TI_SC_NOERR(r11)
122 andi. r11,r10,_TIF_SYSCALL_T_OR_A
123 bne- syscall_dotrace
124syscall_dotrace_cont:
125 cmpldi 0,r0,NR_syscalls
126 bge- syscall_enosys
127
128system_call: /* label this so stack traces look sane */
129/*
130 * Need to vector to 32 Bit or default sys_call_table here,
131 * based on caller's run-mode / personality.
132 */
133 ld r11,.SYS_CALL_TABLE@toc(2)
134 andi. r10,r10,_TIF_32BIT
135 beq 15f
136 ld r11,.SYS_CALL_TABLE32@toc(2)
137 clrldi r3,r3,32
138 clrldi r4,r4,32
139 clrldi r5,r5,32
140 clrldi r6,r6,32
141 clrldi r7,r7,32
142 clrldi r8,r8,32
14315:
144 slwi r0,r0,3
145 ldx r10,r11,r0 /* Fetch system call handler [ptr] */
146 mtctr r10
147 bctrl /* Call handler */
148
149syscall_exit:
150#ifdef SHOW_SYSCALLS
151 std r3,GPR3(r1)
152 bl .do_show_syscall_exit
153 ld r3,GPR3(r1)
154#endif
155 std r3,RESULT(r1)
156 ld r5,_CCR(r1)
157 li r10,-_LAST_ERRNO
158 cmpld r3,r10
159 clrrdi r12,r1,THREAD_SHIFT
160 bge- syscall_error
161syscall_error_cont:
162
163 /* check for syscall tracing or audit */
164 ld r9,TI_FLAGS(r12)
165 andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP)
166 bne- syscall_exit_trace
167syscall_exit_trace_cont:
168
169 /* disable interrupts so current_thread_info()->flags can't change,
170 and so that we don't get interrupted after loading SRR0/1. */
171 ld r8,_MSR(r1)
172 andi. r10,r8,MSR_RI
173 beq- unrecov_restore
174 mfmsr r10
175 rldicl r10,r10,48,1
176 rotldi r10,r10,16
177 mtmsrd r10,1
178 ld r9,TI_FLAGS(r12)
179 andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SIGPENDING|_TIF_NEED_RESCHED)
180 bne- syscall_exit_work
181 ld r7,_NIP(r1)
182 stdcx. r0,0,r1 /* to clear the reservation */
183 andi. r6,r8,MSR_PR
184 ld r4,_LINK(r1)
185 beq- 1f /* only restore r13 if */
186 ld r13,GPR13(r1) /* returning to usermode */
1871: ld r2,GPR2(r1)
188 li r12,MSR_RI
189 andc r10,r10,r12
190 mtmsrd r10,1 /* clear MSR.RI */
191 ld r1,GPR1(r1)
192 mtlr r4
193 mtcr r5
194 mtspr SRR0,r7
195 mtspr SRR1,r8
196 rfid
197 b . /* prevent speculative execution */
198
199syscall_enosys:
200 li r3,-ENOSYS
201 std r3,RESULT(r1)
202 clrrdi r12,r1,THREAD_SHIFT
203 ld r5,_CCR(r1)
204
205syscall_error:
206 lbz r11,TI_SC_NOERR(r12)
207 cmpwi 0,r11,0
208 bne- syscall_error_cont
209 neg r3,r3
210 oris r5,r5,0x1000 /* Set SO bit in CR */
211 std r5,_CCR(r1)
212 b syscall_error_cont
213
214/* Traced system call support */
215syscall_dotrace:
216 bl .save_nvgprs
217 addi r3,r1,STACK_FRAME_OVERHEAD
218 bl .do_syscall_trace_enter
219 ld r0,GPR0(r1) /* Restore original registers */
220 ld r3,GPR3(r1)
221 ld r4,GPR4(r1)
222 ld r5,GPR5(r1)
223 ld r6,GPR6(r1)
224 ld r7,GPR7(r1)
225 ld r8,GPR8(r1)
226 addi r9,r1,STACK_FRAME_OVERHEAD
227 clrrdi r10,r1,THREAD_SHIFT
228 ld r10,TI_FLAGS(r10)
229 b syscall_dotrace_cont
230
231syscall_exit_trace:
232 std r3,GPR3(r1)
233 bl .save_nvgprs
234 addi r3,r1,STACK_FRAME_OVERHEAD
235 bl .do_syscall_trace_leave
236 REST_NVGPRS(r1)
237 ld r3,GPR3(r1)
238 ld r5,_CCR(r1)
239 clrrdi r12,r1,THREAD_SHIFT
240 b syscall_exit_trace_cont
241
242/* Stuff to do on exit from a system call. */
243syscall_exit_work:
244 std r3,GPR3(r1)
245 std r5,_CCR(r1)
246 b .ret_from_except_lite
247
248/* Save non-volatile GPRs, if not already saved. */
249_GLOBAL(save_nvgprs)
250 ld r11,_TRAP(r1)
251 andi. r0,r11,1
252 beqlr-
253 SAVE_NVGPRS(r1)
254 clrrdi r0,r11,1
255 std r0,_TRAP(r1)
256 blr
257
258/*
259 * The sigsuspend and rt_sigsuspend system calls can call do_signal
260 * and thus put the process into the stopped state where we might
261 * want to examine its user state with ptrace. Therefore we need
262 * to save all the nonvolatile registers (r14 - r31) before calling
263 * the C code. Similarly, fork, vfork and clone need the full
264 * register state on the stack so that it can be copied to the child.
265 */
266_GLOBAL(ppc32_sigsuspend)
267 bl .save_nvgprs
268 bl .sys32_sigsuspend
269 b 70f
270
271_GLOBAL(ppc64_rt_sigsuspend)
272 bl .save_nvgprs
273 bl .sys_rt_sigsuspend
274 b 70f
275
276_GLOBAL(ppc32_rt_sigsuspend)
277 bl .save_nvgprs
278 bl .sys32_rt_sigsuspend
279 /* If sigsuspend() returns zero, we are going into a signal handler */
28070: cmpdi 0,r3,0
281 beq .ret_from_except
282 /* If it returned -EINTR, we need to return via syscall_exit to set
283 the SO bit in cr0 and potentially stop for ptrace. */
284 b syscall_exit
285
286_GLOBAL(ppc_fork)
287 bl .save_nvgprs
288 bl .sys_fork
289 b syscall_exit
290
291_GLOBAL(ppc_vfork)
292 bl .save_nvgprs
293 bl .sys_vfork
294 b syscall_exit
295
296_GLOBAL(ppc_clone)
297 bl .save_nvgprs
298 bl .sys_clone
299 b syscall_exit
300
301_GLOBAL(ppc32_swapcontext)
302 bl .save_nvgprs
303 bl .sys32_swapcontext
304 b 80f
305
306_GLOBAL(ppc64_swapcontext)
307 bl .save_nvgprs
308 bl .sys_swapcontext
309 b 80f
310
311_GLOBAL(ppc32_sigreturn)
312 bl .sys32_sigreturn
313 b 80f
314
315_GLOBAL(ppc32_rt_sigreturn)
316 bl .sys32_rt_sigreturn
317 b 80f
318
319_GLOBAL(ppc64_rt_sigreturn)
320 bl .sys_rt_sigreturn
321
32280: cmpdi 0,r3,0
323 blt syscall_exit
324 clrrdi r4,r1,THREAD_SHIFT
325 ld r4,TI_FLAGS(r4)
326 andi. r4,r4,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP)
327 beq+ 81f
328 addi r3,r1,STACK_FRAME_OVERHEAD
329 bl .do_syscall_trace_leave
33081: b .ret_from_except
331
332_GLOBAL(ret_from_fork)
333 bl .schedule_tail
334 REST_NVGPRS(r1)
335 li r3,0
336 b syscall_exit
337
338/*
339 * This routine switches between two different tasks. The process
340 * state of one is saved on its kernel stack. Then the state
341 * of the other is restored from its kernel stack. The memory
342 * management hardware is updated to the second process's state.
343 * Finally, we can return to the second process, via ret_from_except.
344 * On entry, r3 points to the THREAD for the current task, r4
345 * points to the THREAD for the new task.
346 *
347 * Note: there are two ways to get to the "going out" portion
348 * of this code; either by coming in via the entry (_switch)
349 * or via "fork" which must set up an environment equivalent
350 * to the "_switch" path. If you change this you'll have to change
351 * the fork code also.
352 *
353 * The code which creates the new task context is in 'copy_thread'
354 * in arch/ppc64/kernel/process.c
355 */
356 .align 7
357_GLOBAL(_switch)
358 mflr r0
359 std r0,16(r1)
360 stdu r1,-SWITCH_FRAME_SIZE(r1)
361 /* r3-r13 are caller saved -- Cort */
362 SAVE_8GPRS(14, r1)
363 SAVE_10GPRS(22, r1)
364 mflr r20 /* Return to switch caller */
365 mfmsr r22
366 li r0, MSR_FP
367#ifdef CONFIG_ALTIVEC
368BEGIN_FTR_SECTION
369 oris r0,r0,MSR_VEC@h /* Disable altivec */
370 mfspr r24,SPRN_VRSAVE /* save vrsave register value */
371 std r24,THREAD_VRSAVE(r3)
372END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
373#endif /* CONFIG_ALTIVEC */
374 and. r0,r0,r22
375 beq+ 1f
376 andc r22,r22,r0
377 mtmsrd r22
378 isync
3791: std r20,_NIP(r1)
380 mfcr r23
381 std r23,_CCR(r1)
382 std r1,KSP(r3) /* Set old stack pointer */
383
384#ifdef CONFIG_SMP
385 /* We need a sync somewhere here to make sure that if the
386 * previous task gets rescheduled on another CPU, it sees all
387 * stores it has performed on this one.
388 */
389 sync
390#endif /* CONFIG_SMP */
391
392 addi r6,r4,-THREAD /* Convert THREAD to 'current' */
393 std r6,PACACURRENT(r13) /* Set new 'current' */
394
395 ld r8,KSP(r4) /* new stack pointer */
396BEGIN_FTR_SECTION
397 clrrdi r6,r8,28 /* get its ESID */
398 clrrdi r9,r1,28 /* get current sp ESID */
399 clrldi. r0,r6,2 /* is new ESID c00000000? */
400 cmpd cr1,r6,r9 /* or is new ESID the same as current ESID? */
401 cror eq,4*cr1+eq,eq
402 beq 2f /* if yes, don't slbie it */
403 oris r0,r6,0x0800 /* set C (class) bit */
404
405 /* Bolt in the new stack SLB entry */
406 ld r7,KSP_VSID(r4) /* Get new stack's VSID */
407 oris r6,r6,(SLB_ESID_V)@h
408 ori r6,r6,(SLB_NUM_BOLTED-1)@l
409 slbie r0
410 slbie r0 /* Workaround POWER5 < DD2.1 issue */
411 slbmte r7,r6
412 isync
413
4142:
415END_FTR_SECTION_IFSET(CPU_FTR_SLB)
416 clrrdi r7,r8,THREAD_SHIFT /* base of new stack */
417 /* Note: this uses SWITCH_FRAME_SIZE rather than INT_FRAME_SIZE
418 because we don't need to leave the 288-byte ABI gap at the
419 top of the kernel stack. */
420 addi r7,r7,THREAD_SIZE-SWITCH_FRAME_SIZE
421
422 mr r1,r8 /* start using new stack pointer */
423 std r7,PACAKSAVE(r13)
424
425 ld r6,_CCR(r1)
426 mtcrf 0xFF,r6
427
428#ifdef CONFIG_ALTIVEC
429BEGIN_FTR_SECTION
430 ld r0,THREAD_VRSAVE(r4)
431 mtspr SPRN_VRSAVE,r0 /* if G4, restore VRSAVE reg */
432END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
433#endif /* CONFIG_ALTIVEC */
434
435 /* r3-r13 are destroyed -- Cort */
436 REST_8GPRS(14, r1)
437 REST_10GPRS(22, r1)
438
439#ifdef CONFIG_PPC_ISERIES
440 clrrdi r7,r1,THREAD_SHIFT /* get current_thread_info() */
441 ld r7,TI_FLAGS(r7) /* Get run light flag */
442 mfspr r9,CTRLF
443 srdi r7,r7,TIF_RUN_LIGHT
444 insrdi r9,r7,1,63 /* Insert run light into CTRL */
445 mtspr CTRLT,r9
446#endif
447
448 /* convert old thread to its task_struct for return value */
449 addi r3,r3,-THREAD
450 ld r7,_NIP(r1) /* Return to _switch caller in new task */
451 mtlr r7
452 addi r1,r1,SWITCH_FRAME_SIZE
453 blr
454
455 .align 7
456_GLOBAL(ret_from_except)
457 ld r11,_TRAP(r1)
458 andi. r0,r11,1
459 bne .ret_from_except_lite
460 REST_NVGPRS(r1)
461
462_GLOBAL(ret_from_except_lite)
463 /*
464 * Disable interrupts so that current_thread_info()->flags
465 * can't change between when we test it and when we return
466 * from the interrupt.
467 */
468 mfmsr r10 /* Get current interrupt state */
469 rldicl r9,r10,48,1 /* clear MSR_EE */
470 rotldi r9,r9,16
471 mtmsrd r9,1 /* Update machine state */
472
473#ifdef CONFIG_PREEMPT
474 clrrdi r9,r1,THREAD_SHIFT /* current_thread_info() */
475 li r0,_TIF_NEED_RESCHED /* bits to check */
476 ld r3,_MSR(r1)
477 ld r4,TI_FLAGS(r9)
478 /* Move MSR_PR bit in r3 to _TIF_SIGPENDING position in r0 */
479 rlwimi r0,r3,32+TIF_SIGPENDING-MSR_PR_LG,_TIF_SIGPENDING
480 and. r0,r4,r0 /* check NEED_RESCHED and maybe SIGPENDING */
481 bne do_work
482
483#else /* !CONFIG_PREEMPT */
484 ld r3,_MSR(r1) /* Returning to user mode? */
485 andi. r3,r3,MSR_PR
486 beq restore /* if not, just restore regs and return */
487
488 /* Check current_thread_info()->flags */
489 clrrdi r9,r1,THREAD_SHIFT
490 ld r4,TI_FLAGS(r9)
491 andi. r0,r4,_TIF_USER_WORK_MASK
492 bne do_work
493#endif
494
495restore:
496#ifdef CONFIG_PPC_ISERIES
497 ld r5,SOFTE(r1)
498 cmpdi 0,r5,0
499 beq 4f
500 /* Check for pending interrupts (iSeries) */
501 ld r3,PACALPPACA+LPPACAANYINT(r13)
502 cmpdi r3,0
503 beq+ 4f /* skip do_IRQ if no interrupts */
504
505 li r3,0
506 stb r3,PACAPROCENABLED(r13) /* ensure we are soft-disabled */
507 ori r10,r10,MSR_EE
508 mtmsrd r10 /* hard-enable again */
509 addi r3,r1,STACK_FRAME_OVERHEAD
510 bl .do_IRQ
511 b .ret_from_except_lite /* loop back and handle more */
512
5134: stb r5,PACAPROCENABLED(r13)
514#endif
515
516 ld r3,_MSR(r1)
517 andi. r0,r3,MSR_RI
518 beq- unrecov_restore
519
520 andi. r0,r3,MSR_PR
521
522 /*
523 * r13 is our per cpu area, only restore it if we are returning to
524 * userspace
525 */
526 beq 1f
527 REST_GPR(13, r1)
5281:
529 ld r3,_CTR(r1)
530 ld r0,_LINK(r1)
531 mtctr r3
532 mtlr r0
533 ld r3,_XER(r1)
534 mtspr XER,r3
535
536 REST_8GPRS(5, r1)
537
538 stdcx. r0,0,r1 /* to clear the reservation */
539
540 mfmsr r0
541 li r2, MSR_RI
542 andc r0,r0,r2
543 mtmsrd r0,1
544
545 ld r0,_MSR(r1)
546 mtspr SRR1,r0
547
548 ld r2,_CCR(r1)
549 mtcrf 0xFF,r2
550 ld r2,_NIP(r1)
551 mtspr SRR0,r2
552
553 ld r0,GPR0(r1)
554 ld r2,GPR2(r1)
555 ld r3,GPR3(r1)
556 ld r4,GPR4(r1)
557 ld r1,GPR1(r1)
558
559 rfid
560 b . /* prevent speculative execution */
561
562/* Note: this must change if we start using the TIF_NOTIFY_RESUME bit */
563do_work:
564#ifdef CONFIG_PREEMPT
565 andi. r0,r3,MSR_PR /* Returning to user mode? */
566 bne user_work
567 /* Check that preempt_count() == 0 and interrupts are enabled */
568 lwz r8,TI_PREEMPT(r9)
569 cmpwi cr1,r8,0
570#ifdef CONFIG_PPC_ISERIES
571 ld r0,SOFTE(r1)
572 cmpdi r0,0
573#else
574 andi. r0,r3,MSR_EE
575#endif
576 crandc eq,cr1*4+eq,eq
577 bne restore
578 /* here we are preempting the current task */
5791:
580#ifdef CONFIG_PPC_ISERIES
581 li r0,1
582 stb r0,PACAPROCENABLED(r13)
583#endif
584 ori r10,r10,MSR_EE
585 mtmsrd r10,1 /* reenable interrupts */
586 bl .preempt_schedule
587 mfmsr r10
588 clrrdi r9,r1,THREAD_SHIFT
589 rldicl r10,r10,48,1 /* disable interrupts again */
590 rotldi r10,r10,16
591 mtmsrd r10,1
592 ld r4,TI_FLAGS(r9)
593 andi. r0,r4,_TIF_NEED_RESCHED
594 bne 1b
595 b restore
596
597user_work:
598#endif
599 /* Enable interrupts */
600 ori r10,r10,MSR_EE
601 mtmsrd r10,1
602
603 andi. r0,r4,_TIF_NEED_RESCHED
604 beq 1f
605 bl .schedule
606 b .ret_from_except_lite
607
6081: bl .save_nvgprs
609 li r3,0
610 addi r4,r1,STACK_FRAME_OVERHEAD
611 bl .do_signal
612 b .ret_from_except
613
614unrecov_restore:
615 addi r3,r1,STACK_FRAME_OVERHEAD
616 bl .unrecoverable_exception
617 b unrecov_restore
618
619#ifdef CONFIG_PPC_RTAS
620/*
621 * On CHRP, the Run-Time Abstraction Services (RTAS) have to be
622 * called with the MMU off.
623 *
624 * In addition, we need to be in 32b mode, at least for now.
625 *
626 * Note: r3 is an input parameter to rtas, so don't trash it...
627 */
628_GLOBAL(enter_rtas)
629 mflr r0
630 std r0,16(r1)
631 stdu r1,-RTAS_FRAME_SIZE(r1) /* Save SP and create stack space. */
632
633 /* Because RTAS is running in 32b mode, it clobbers the high order half
634 * of all registers that it saves. We therefore save those registers
635 * RTAS might touch to the stack. (r0, r3-r13 are caller saved)
636 */
637 SAVE_GPR(2, r1) /* Save the TOC */
638 SAVE_GPR(13, r1) /* Save paca */
639 SAVE_8GPRS(14, r1) /* Save the non-volatiles */
640 SAVE_10GPRS(22, r1) /* ditto */
641
642 mfcr r4
643 std r4,_CCR(r1)
644 mfctr r5
645 std r5,_CTR(r1)
646 mfspr r6,XER
647 std r6,_XER(r1)
648 mfdar r7
649 std r7,_DAR(r1)
650 mfdsisr r8
651 std r8,_DSISR(r1)
652 mfsrr0 r9
653 std r9,_SRR0(r1)
654 mfsrr1 r10
655 std r10,_SRR1(r1)
656
657 /* There is no way it is acceptable to get here with interrupts enabled,
658 * check it with the asm equivalent of WARN_ON
659 */
660 mfmsr r6
661 andi. r0,r6,MSR_EE
6621: tdnei r0,0
663.section __bug_table,"a"
664 .llong 1b,__LINE__ + 0x1000000, 1f, 2f
665.previous
666.section .rodata,"a"
6671: .asciz __FILE__
6682: .asciz "enter_rtas"
669.previous
670
671 /* Unfortunately, the stack pointer and the MSR are also clobbered,
672 * so they are saved in the PACA which allows us to restore
673 * our original state after RTAS returns.
674 */
675 std r1,PACAR1(r13)
676 std r6,PACASAVEDMSR(r13)
677
678 /* Setup our real return addr */
679 SET_REG_TO_LABEL(r4,.rtas_return_loc)
680 SET_REG_TO_CONST(r9,KERNELBASE)
681 sub r4,r4,r9
682 mtlr r4
683
684 li r0,0
685 ori r0,r0,MSR_EE|MSR_SE|MSR_BE|MSR_RI
686 andc r0,r6,r0
687
688 li r9,1
689 rldicr r9,r9,MSR_SF_LG,(63-MSR_SF_LG)
690 ori r9,r9,MSR_IR|MSR_DR|MSR_FE0|MSR_FE1|MSR_FP
691 andc r6,r0,r9
692 ori r6,r6,MSR_RI
693 sync /* disable interrupts so SRR0/1 */
694 mtmsrd r0 /* don't get trashed */
695
696 SET_REG_TO_LABEL(r4,rtas)
697 ld r5,RTASENTRY(r4) /* get the rtas->entry value */
698 ld r4,RTASBASE(r4) /* get the rtas->base value */
699
700 mtspr SRR0,r5
701 mtspr SRR1,r6
702 rfid
703 b . /* prevent speculative execution */
704
705_STATIC(rtas_return_loc)
706 /* relocation is off at this point */
707 mfspr r4,SPRG3 /* Get PACA */
708 SET_REG_TO_CONST(r5, KERNELBASE)
709 sub r4,r4,r5 /* RELOC the PACA base pointer */
710
711 mfmsr r6
712 li r0,MSR_RI
713 andc r6,r6,r0
714 sync
715 mtmsrd r6
716
717 ld r1,PACAR1(r4) /* Restore our SP */
718 LOADADDR(r3,.rtas_restore_regs)
719 ld r4,PACASAVEDMSR(r4) /* Restore our MSR */
720
721 mtspr SRR0,r3
722 mtspr SRR1,r4
723 rfid
724 b . /* prevent speculative execution */
725
726_STATIC(rtas_restore_regs)
727 /* relocation is on at this point */
728 REST_GPR(2, r1) /* Restore the TOC */
729 REST_GPR(13, r1) /* Restore paca */
730 REST_8GPRS(14, r1) /* Restore the non-volatiles */
731 REST_10GPRS(22, r1) /* ditto */
732
733 mfspr r13,SPRG3
734
735 ld r4,_CCR(r1)
736 mtcr r4
737 ld r5,_CTR(r1)
738 mtctr r5
739 ld r6,_XER(r1)
740 mtspr XER,r6
741 ld r7,_DAR(r1)
742 mtdar r7
743 ld r8,_DSISR(r1)
744 mtdsisr r8
745 ld r9,_SRR0(r1)
746 mtsrr0 r9
747 ld r10,_SRR1(r1)
748 mtsrr1 r10
749
750 addi r1,r1,RTAS_FRAME_SIZE /* Unstack our frame */
751 ld r0,16(r1) /* get return address */
752
753 mtlr r0
754 blr /* return to caller */
755
756#endif /* CONFIG_PPC_RTAS */
757
758#ifdef CONFIG_PPC_MULTIPLATFORM
759
760_GLOBAL(enter_prom)
761 mflr r0
762 std r0,16(r1)
763 stdu r1,-PROM_FRAME_SIZE(r1) /* Save SP and create stack space */
764
765 /* Because PROM is running in 32b mode, it clobbers the high order half
766 * of all registers that it saves. We therefore save those registers
767 * PROM might touch to the stack. (r0, r3-r13 are caller saved)
768 */
769 SAVE_8GPRS(2, r1)
770 SAVE_GPR(13, r1)
771 SAVE_8GPRS(14, r1)
772 SAVE_10GPRS(22, r1)
773 mfcr r4
774 std r4,_CCR(r1)
775 mfctr r5
776 std r5,_CTR(r1)
777 mfspr r6,XER
778 std r6,_XER(r1)
779 mfdar r7
780 std r7,_DAR(r1)
781 mfdsisr r8
782 std r8,_DSISR(r1)
783 mfsrr0 r9
784 std r9,_SRR0(r1)
785 mfsrr1 r10
786 std r10,_SRR1(r1)
787 mfmsr r11
788 std r11,_MSR(r1)
789
790 /* Get the PROM entrypoint */
791 ld r0,GPR4(r1)
792 mtlr r0
793
794 /* Switch MSR to 32 bits mode
795 */
796 mfmsr r11
797 li r12,1
798 rldicr r12,r12,MSR_SF_LG,(63-MSR_SF_LG)
799 andc r11,r11,r12
800 li r12,1
801 rldicr r12,r12,MSR_ISF_LG,(63-MSR_ISF_LG)
802 andc r11,r11,r12
803 mtmsrd r11
804 isync
805
806 /* Restore arguments & enter PROM here... */
807 ld r3,GPR3(r1)
808 blrl
809
810 /* Just make sure that r1 top 32 bits didn't get
811 * corrupt by OF
812 */
813 rldicl r1,r1,0,32
814
815 /* Restore the MSR (back to 64 bits) */
816 ld r0,_MSR(r1)
817 mtmsrd r0
818 isync
819
820 /* Restore other registers */
821 REST_GPR(2, r1)
822 REST_GPR(13, r1)
823 REST_8GPRS(14, r1)
824 REST_10GPRS(22, r1)
825 ld r4,_CCR(r1)
826 mtcr r4
827 ld r5,_CTR(r1)
828 mtctr r5
829 ld r6,_XER(r1)
830 mtspr XER,r6
831 ld r7,_DAR(r1)
832 mtdar r7
833 ld r8,_DSISR(r1)
834 mtdsisr r8
835 ld r9,_SRR0(r1)
836 mtsrr0 r9
837 ld r10,_SRR1(r1)
838 mtsrr1 r10
839
840 addi r1,r1,PROM_FRAME_SIZE
841 ld r0,16(r1)
842 mtlr r0
843 blr
844
845#endif /* CONFIG_PPC_MULTIPLATFORM */
diff --git a/arch/ppc64/kernel/head.S b/arch/ppc64/kernel/head.S
new file mode 100644
index 000000000000..fe05f3fbf9d0
--- /dev/null
+++ b/arch/ppc64/kernel/head.S
@@ -0,0 +1,2139 @@
1/*
2 * arch/ppc64/kernel/head.S
3 *
4 * PowerPC version
5 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
6 *
7 * Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP
8 * Copyright (C) 1996 Cort Dougan <cort@cs.nmt.edu>
9 * Adapted for Power Macintosh by Paul Mackerras.
10 * Low-level exception handlers and MMU support
11 * rewritten by Paul Mackerras.
12 * Copyright (C) 1996 Paul Mackerras.
13 *
14 * Adapted for 64bit PowerPC by Dave Engebretsen, Peter Bergner, and
15 * Mike Corrigan {engebret|bergner|mikejc}@us.ibm.com
16 *
17 * This file contains the low-level support and setup for the
18 * PowerPC-64 platform, including trap and interrupt dispatch.
19 *
20 * This program is free software; you can redistribute it and/or
21 * modify it under the terms of the GNU General Public License
22 * as published by the Free Software Foundation; either version
23 * 2 of the License, or (at your option) any later version.
24 */
25
26#define SECONDARY_PROCESSORS
27
28#include <linux/config.h>
29#include <linux/threads.h>
30#include <asm/processor.h>
31#include <asm/page.h>
32#include <asm/mmu.h>
33#include <asm/naca.h>
34#include <asm/systemcfg.h>
35#include <asm/ppc_asm.h>
36#include <asm/offsets.h>
37#include <asm/bug.h>
38#include <asm/cputable.h>
39#include <asm/setup.h>
40#include <asm/hvcall.h>
41
42#ifdef CONFIG_PPC_ISERIES
43#define DO_SOFT_DISABLE
44#endif
45
46/*
47 * hcall interface to pSeries LPAR
48 */
49#define H_SET_ASR 0x30
50
51/*
52 * We layout physical memory as follows:
53 * 0x0000 - 0x00ff : Secondary processor spin code
54 * 0x0100 - 0x2fff : pSeries Interrupt prologs
55 * 0x3000 - 0x3fff : Interrupt support
56 * 0x4000 - 0x4fff : NACA
57 * 0x6000 : iSeries and common interrupt prologs
58 * 0x9000 - 0x9fff : Initial segment table
59 */
60
61/*
62 * SPRG Usage
63 *
64 * Register Definition
65 *
66 * SPRG0 reserved for hypervisor
67 * SPRG1 temp - used to save gpr
68 * SPRG2 temp - used to save gpr
69 * SPRG3 virt addr of paca
70 */
71
72/*
73 * Entering into this code we make the following assumptions:
74 * For pSeries:
75 * 1. The MMU is off & open firmware is running in real mode.
76 * 2. The kernel is entered at __start
77 *
78 * For iSeries:
79 * 1. The MMU is on (as it always is for iSeries)
80 * 2. The kernel is entered at system_reset_iSeries
81 */
82
83 .text
84 .globl _stext
85_stext:
86#ifdef CONFIG_PPC_MULTIPLATFORM
87_GLOBAL(__start)
88 /* NOP this out unconditionally */
89BEGIN_FTR_SECTION
90 b .__start_initialization_multiplatform
91END_FTR_SECTION(0, 1)
92#endif /* CONFIG_PPC_MULTIPLATFORM */
93
94 /* Catch branch to 0 in real mode */
95 trap
96#ifdef CONFIG_PPC_ISERIES
97 /*
98 * At offset 0x20, there is a pointer to iSeries LPAR data.
99 * This is required by the hypervisor
100 */
101 . = 0x20
102 .llong hvReleaseData-KERNELBASE
103
104 /*
105 * At offset 0x28 and 0x30 are offsets to the msChunks
106 * array (used by the iSeries LPAR debugger to do translation
107 * between physical addresses and absolute addresses) and
108 * to the pidhash table (also used by the debugger)
109 */
110 .llong msChunks-KERNELBASE
111 .llong 0 /* pidhash-KERNELBASE SFRXXX */
112
113 /* Offset 0x38 - Pointer to start of embedded System.map */
114 .globl embedded_sysmap_start
115embedded_sysmap_start:
116 .llong 0
117 /* Offset 0x40 - Pointer to end of embedded System.map */
118 .globl embedded_sysmap_end
119embedded_sysmap_end:
120 .llong 0
121
122#else /* CONFIG_PPC_ISERIES */
123
124 /* Secondary processors spin on this value until it goes to 1. */
125 .globl __secondary_hold_spinloop
126__secondary_hold_spinloop:
127 .llong 0x0
128
129 /* Secondary processors write this value with their cpu # */
130 /* after they enter the spin loop immediately below. */
131 .globl __secondary_hold_acknowledge
132__secondary_hold_acknowledge:
133 .llong 0x0
134
135 . = 0x60
136/*
137 * The following code is used on pSeries to hold secondary processors
138 * in a spin loop after they have been freed from OpenFirmware, but
139 * before the bulk of the kernel has been relocated. This code
140 * is relocated to physical address 0x60 before prom_init is run.
141 * All of it must fit below the first exception vector at 0x100.
142 */
143_GLOBAL(__secondary_hold)
144 mfmsr r24
145 ori r24,r24,MSR_RI
146 mtmsrd r24 /* RI on */
147
148 /* Grab our linux cpu number */
149 mr r24,r3
150
151 /* Tell the master cpu we're here */
152 /* Relocation is off & we are located at an address less */
153 /* than 0x100, so only need to grab low order offset. */
154 std r24,__secondary_hold_acknowledge@l(0)
155 sync
156
157 /* All secondary cpu's wait here until told to start. */
158100: ld r4,__secondary_hold_spinloop@l(0)
159 cmpdi 0,r4,1
160 bne 100b
161
162#ifdef CONFIG_HMT
163 b .hmt_init
164#else
165#ifdef CONFIG_SMP
166 mr r3,r24
167 b .pSeries_secondary_smp_init
168#else
169 BUG_OPCODE
170#endif
171#endif
172#endif
173
174/* This value is used to mark exception frames on the stack. */
175 .section ".toc","aw"
176exception_marker:
177 .tc ID_72656773_68657265[TC],0x7265677368657265
178 .text
179
180/*
181 * The following macros define the code that appears as
182 * the prologue to each of the exception handlers. They
183 * are split into two parts to allow a single kernel binary
184 * to be used for pSeries and iSeries.
185 * LOL. One day... - paulus
186 */
187
188/*
189 * We make as much of the exception code common between native
190 * exception handlers (including pSeries LPAR) and iSeries LPAR
191 * implementations as possible.
192 */
193
194/*
195 * This is the start of the interrupt handlers for pSeries
196 * This code runs with relocation off.
197 */
198#define EX_R9 0
199#define EX_R10 8
200#define EX_R11 16
201#define EX_R12 24
202#define EX_R13 32
203#define EX_SRR0 40
204#define EX_R3 40 /* SLB miss saves R3, but not SRR0 */
205#define EX_DAR 48
206#define EX_LR 48 /* SLB miss saves LR, but not DAR */
207#define EX_DSISR 56
208#define EX_CCR 60
209
210#define EXCEPTION_PROLOG_PSERIES(area, label) \
211 mfspr r13,SPRG3; /* get paca address into r13 */ \
212 std r9,area+EX_R9(r13); /* save r9 - r12 */ \
213 std r10,area+EX_R10(r13); \
214 std r11,area+EX_R11(r13); \
215 std r12,area+EX_R12(r13); \
216 mfspr r9,SPRG1; \
217 std r9,area+EX_R13(r13); \
218 mfcr r9; \
219 clrrdi r12,r13,32; /* get high part of &label */ \
220 mfmsr r10; \
221 mfspr r11,SRR0; /* save SRR0 */ \
222 ori r12,r12,(label)@l; /* virt addr of handler */ \
223 ori r10,r10,MSR_IR|MSR_DR|MSR_RI; \
224 mtspr SRR0,r12; \
225 mfspr r12,SRR1; /* and SRR1 */ \
226 mtspr SRR1,r10; \
227 rfid; \
228 b . /* prevent speculative execution */
229
230/*
231 * This is the start of the interrupt handlers for iSeries
232 * This code runs with relocation on.
233 */
234#define EXCEPTION_PROLOG_ISERIES_1(area) \
235 mfspr r13,SPRG3; /* get paca address into r13 */ \
236 std r9,area+EX_R9(r13); /* save r9 - r12 */ \
237 std r10,area+EX_R10(r13); \
238 std r11,area+EX_R11(r13); \
239 std r12,area+EX_R12(r13); \
240 mfspr r9,SPRG1; \
241 std r9,area+EX_R13(r13); \
242 mfcr r9
243
244#define EXCEPTION_PROLOG_ISERIES_2 \
245 mfmsr r10; \
246 ld r11,PACALPPACA+LPPACASRR0(r13); \
247 ld r12,PACALPPACA+LPPACASRR1(r13); \
248 ori r10,r10,MSR_RI; \
249 mtmsrd r10,1
250
251/*
252 * The common exception prolog is used for all except a few exceptions
253 * such as a segment miss on a kernel address. We have to be prepared
254 * to take another exception from the point where we first touch the
255 * kernel stack onwards.
256 *
257 * On entry r13 points to the paca, r9-r13 are saved in the paca,
258 * r9 contains the saved CR, r11 and r12 contain the saved SRR0 and
259 * SRR1, and relocation is on.
260 */
261#define EXCEPTION_PROLOG_COMMON(n, area) \
262 andi. r10,r12,MSR_PR; /* See if coming from user */ \
263 mr r10,r1; /* Save r1 */ \
264 subi r1,r1,INT_FRAME_SIZE; /* alloc frame on kernel stack */ \
265 beq- 1f; \
266 ld r1,PACAKSAVE(r13); /* kernel stack to use */ \
2671: cmpdi cr1,r1,0; /* check if r1 is in userspace */ \
268 bge- cr1,bad_stack; /* abort if it is */ \
269 std r9,_CCR(r1); /* save CR in stackframe */ \
270 std r11,_NIP(r1); /* save SRR0 in stackframe */ \
271 std r12,_MSR(r1); /* save SRR1 in stackframe */ \
272 std r10,0(r1); /* make stack chain pointer */ \
273 std r0,GPR0(r1); /* save r0 in stackframe */ \
274 std r10,GPR1(r1); /* save r1 in stackframe */ \
275 std r2,GPR2(r1); /* save r2 in stackframe */ \
276 SAVE_4GPRS(3, r1); /* save r3 - r6 in stackframe */ \
277 SAVE_2GPRS(7, r1); /* save r7, r8 in stackframe */ \
278 ld r9,area+EX_R9(r13); /* move r9, r10 to stackframe */ \
279 ld r10,area+EX_R10(r13); \
280 std r9,GPR9(r1); \
281 std r10,GPR10(r1); \
282 ld r9,area+EX_R11(r13); /* move r11 - r13 to stackframe */ \
283 ld r10,area+EX_R12(r13); \
284 ld r11,area+EX_R13(r13); \
285 std r9,GPR11(r1); \
286 std r10,GPR12(r1); \
287 std r11,GPR13(r1); \
288 ld r2,PACATOC(r13); /* get kernel TOC into r2 */ \
289 mflr r9; /* save LR in stackframe */ \
290 std r9,_LINK(r1); \
291 mfctr r10; /* save CTR in stackframe */ \
292 std r10,_CTR(r1); \
293 mfspr r11,XER; /* save XER in stackframe */ \
294 std r11,_XER(r1); \
295 li r9,(n)+1; \
296 std r9,_TRAP(r1); /* set trap number */ \
297 li r10,0; \
298 ld r11,exception_marker@toc(r2); \
299 std r10,RESULT(r1); /* clear regs->result */ \
300 std r11,STACK_FRAME_OVERHEAD-16(r1); /* mark the frame */
301
302/*
303 * Exception vectors.
304 */
305#define STD_EXCEPTION_PSERIES(n, label) \
306 . = n; \
307 .globl label##_pSeries; \
308label##_pSeries: \
309 HMT_MEDIUM; \
310 mtspr SPRG1,r13; /* save r13 */ \
311 EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common)
312
313#define STD_EXCEPTION_ISERIES(n, label, area) \
314 .globl label##_iSeries; \
315label##_iSeries: \
316 HMT_MEDIUM; \
317 mtspr SPRG1,r13; /* save r13 */ \
318 EXCEPTION_PROLOG_ISERIES_1(area); \
319 EXCEPTION_PROLOG_ISERIES_2; \
320 b label##_common
321
322#define MASKABLE_EXCEPTION_ISERIES(n, label) \
323 .globl label##_iSeries; \
324label##_iSeries: \
325 HMT_MEDIUM; \
326 mtspr SPRG1,r13; /* save r13 */ \
327 EXCEPTION_PROLOG_ISERIES_1(PACA_EXGEN); \
328 lbz r10,PACAPROCENABLED(r13); \
329 cmpwi 0,r10,0; \
330 beq- label##_iSeries_masked; \
331 EXCEPTION_PROLOG_ISERIES_2; \
332 b label##_common; \
333
334#ifdef DO_SOFT_DISABLE
335#define DISABLE_INTS \
336 lbz r10,PACAPROCENABLED(r13); \
337 li r11,0; \
338 std r10,SOFTE(r1); \
339 mfmsr r10; \
340 stb r11,PACAPROCENABLED(r13); \
341 ori r10,r10,MSR_EE; \
342 mtmsrd r10,1
343
344#define ENABLE_INTS \
345 lbz r10,PACAPROCENABLED(r13); \
346 mfmsr r11; \
347 std r10,SOFTE(r1); \
348 ori r11,r11,MSR_EE; \
349 mtmsrd r11,1
350
351#else /* hard enable/disable interrupts */
352#define DISABLE_INTS
353
354#define ENABLE_INTS \
355 ld r12,_MSR(r1); \
356 mfmsr r11; \
357 rlwimi r11,r12,0,MSR_EE; \
358 mtmsrd r11,1
359
360#endif
361
362#define STD_EXCEPTION_COMMON(trap, label, hdlr) \
363 .align 7; \
364 .globl label##_common; \
365label##_common: \
366 EXCEPTION_PROLOG_COMMON(trap, PACA_EXGEN); \
367 DISABLE_INTS; \
368 bl .save_nvgprs; \
369 addi r3,r1,STACK_FRAME_OVERHEAD; \
370 bl hdlr; \
371 b .ret_from_except
372
373#define STD_EXCEPTION_COMMON_LITE(trap, label, hdlr) \
374 .align 7; \
375 .globl label##_common; \
376label##_common: \
377 EXCEPTION_PROLOG_COMMON(trap, PACA_EXGEN); \
378 DISABLE_INTS; \
379 addi r3,r1,STACK_FRAME_OVERHEAD; \
380 bl hdlr; \
381 b .ret_from_except_lite
382
383/*
384 * Start of pSeries system interrupt routines
385 */
386 . = 0x100
387 .globl __start_interrupts
388__start_interrupts:
389
390 STD_EXCEPTION_PSERIES(0x100, system_reset)
391
392 . = 0x200
393_machine_check_pSeries:
394 HMT_MEDIUM
395 mtspr SPRG1,r13 /* save r13 */
396 EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common)
397
398 . = 0x300
399 .globl data_access_pSeries
400data_access_pSeries:
401 HMT_MEDIUM
402 mtspr SPRG1,r13
403BEGIN_FTR_SECTION
404 mtspr SPRG2,r12
405 mfspr r13,DAR
406 mfspr r12,DSISR
407 srdi r13,r13,60
408 rlwimi r13,r12,16,0x20
409 mfcr r12
410 cmpwi r13,0x2c
411 beq .do_stab_bolted_pSeries
412 mtcrf 0x80,r12
413 mfspr r12,SPRG2
414END_FTR_SECTION_IFCLR(CPU_FTR_SLB)
415 EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, data_access_common)
416
417 . = 0x380
418 .globl data_access_slb_pSeries
419data_access_slb_pSeries:
420 HMT_MEDIUM
421 mtspr SPRG1,r13
422 mfspr r13,SPRG3 /* get paca address into r13 */
423 std r9,PACA_EXSLB+EX_R9(r13) /* save r9 - r12 */
424 std r10,PACA_EXSLB+EX_R10(r13)
425 std r11,PACA_EXSLB+EX_R11(r13)
426 std r12,PACA_EXSLB+EX_R12(r13)
427 std r3,PACA_EXSLB+EX_R3(r13)
428 mfspr r9,SPRG1
429 std r9,PACA_EXSLB+EX_R13(r13)
430 mfcr r9
431 mfspr r12,SRR1 /* and SRR1 */
432 mfspr r3,DAR
433 b .do_slb_miss /* Rel. branch works in real mode */
434
435 STD_EXCEPTION_PSERIES(0x400, instruction_access)
436
437 . = 0x480
438 .globl instruction_access_slb_pSeries
439instruction_access_slb_pSeries:
440 HMT_MEDIUM
441 mtspr SPRG1,r13
442 mfspr r13,SPRG3 /* get paca address into r13 */
443 std r9,PACA_EXSLB+EX_R9(r13) /* save r9 - r12 */
444 std r10,PACA_EXSLB+EX_R10(r13)
445 std r11,PACA_EXSLB+EX_R11(r13)
446 std r12,PACA_EXSLB+EX_R12(r13)
447 std r3,PACA_EXSLB+EX_R3(r13)
448 mfspr r9,SPRG1
449 std r9,PACA_EXSLB+EX_R13(r13)
450 mfcr r9
451 mfspr r12,SRR1 /* and SRR1 */
452 mfspr r3,SRR0 /* SRR0 is faulting address */
453 b .do_slb_miss /* Rel. branch works in real mode */
454
455 STD_EXCEPTION_PSERIES(0x500, hardware_interrupt)
456 STD_EXCEPTION_PSERIES(0x600, alignment)
457 STD_EXCEPTION_PSERIES(0x700, program_check)
458 STD_EXCEPTION_PSERIES(0x800, fp_unavailable)
459 STD_EXCEPTION_PSERIES(0x900, decrementer)
460 STD_EXCEPTION_PSERIES(0xa00, trap_0a)
461 STD_EXCEPTION_PSERIES(0xb00, trap_0b)
462
463 . = 0xc00
464 .globl system_call_pSeries
465system_call_pSeries:
466 HMT_MEDIUM
467 mr r9,r13
468 mfmsr r10
469 mfspr r13,SPRG3
470 mfspr r11,SRR0
471 clrrdi r12,r13,32
472 oris r12,r12,system_call_common@h
473 ori r12,r12,system_call_common@l
474 mtspr SRR0,r12
475 ori r10,r10,MSR_IR|MSR_DR|MSR_RI
476 mfspr r12,SRR1
477 mtspr SRR1,r10
478 rfid
479 b . /* prevent speculative execution */
480
481 STD_EXCEPTION_PSERIES(0xd00, single_step)
482 STD_EXCEPTION_PSERIES(0xe00, trap_0e)
483
484 /* We need to deal with the Altivec unavailable exception
485 * here which is at 0xf20, thus in the middle of the
486 * prolog code of the PerformanceMonitor one. A little
487 * trickery is thus necessary
488 */
489 . = 0xf00
490 b performance_monitor_pSeries
491
492 STD_EXCEPTION_PSERIES(0xf20, altivec_unavailable)
493
494 STD_EXCEPTION_PSERIES(0x1300, instruction_breakpoint)
495 STD_EXCEPTION_PSERIES(0x1700, altivec_assist)
496
497 /* moved from 0xf00 */
498 STD_EXCEPTION_PSERIES(0x3000, performance_monitor)
499
500 . = 0x3100
501_GLOBAL(do_stab_bolted_pSeries)
502 mtcrf 0x80,r12
503 mfspr r12,SPRG2
504 EXCEPTION_PROLOG_PSERIES(PACA_EXSLB, .do_stab_bolted)
505
506
507 /* Space for the naca. Architected to be located at real address
508 * NACA_PHYS_ADDR. Various tools rely on this location being fixed.
509 * The first dword of the naca is required by iSeries LPAR to
510 * point to itVpdAreas. On pSeries native, this value is not used.
511 */
512 . = NACA_PHYS_ADDR
513 .globl __end_interrupts
514__end_interrupts:
515#ifdef CONFIG_PPC_ISERIES
516 .globl naca
517naca:
518 .llong itVpdAreas
519
520 /*
521 * The iSeries LPAR map is at this fixed address
522 * so that the HvReleaseData structure can address
523 * it with a 32-bit offset.
524 *
525 * The VSID values below are dependent on the
526 * VSID generation algorithm. See include/asm/mmu_context.h.
527 */
528
529 . = 0x4800
530
531 .llong 2 /* # ESIDs to be mapped by hypervisor */
532 .llong 1 /* # memory ranges to be mapped by hypervisor */
533 .llong STAB0_PAGE /* Page # of segment table within load area */
534 .llong 0 /* Reserved */
535 .llong 0 /* Reserved */
536 .llong 0 /* Reserved */
537 .llong 0 /* Reserved */
538 .llong 0 /* Reserved */
539 .llong (KERNELBASE>>SID_SHIFT)
540 .llong 0x408f92c94 /* KERNELBASE VSID */
541 /* We have to list the bolted VMALLOC segment here, too, so that it
542 * will be restored on shared processor switch */
543 .llong (VMALLOCBASE>>SID_SHIFT)
544 .llong 0xf09b89af5 /* VMALLOCBASE VSID */
545 .llong 8192 /* # pages to map (32 MB) */
546 .llong 0 /* Offset from start of loadarea to start of map */
547 .llong 0x408f92c940000 /* VPN of first page to map */
548
549 . = 0x6100
550
551/*** ISeries-LPAR interrupt handlers ***/
552
553 STD_EXCEPTION_ISERIES(0x200, machine_check, PACA_EXMC)
554
555 .globl data_access_iSeries
556data_access_iSeries:
557 mtspr SPRG1,r13
558BEGIN_FTR_SECTION
559 mtspr SPRG2,r12
560 mfspr r13,DAR
561 mfspr r12,DSISR
562 srdi r13,r13,60
563 rlwimi r13,r12,16,0x20
564 mfcr r12
565 cmpwi r13,0x2c
566 beq .do_stab_bolted_iSeries
567 mtcrf 0x80,r12
568 mfspr r12,SPRG2
569END_FTR_SECTION_IFCLR(CPU_FTR_SLB)
570 EXCEPTION_PROLOG_ISERIES_1(PACA_EXGEN)
571 EXCEPTION_PROLOG_ISERIES_2
572 b data_access_common
573
574.do_stab_bolted_iSeries:
575 mtcrf 0x80,r12
576 mfspr r12,SPRG2
577 EXCEPTION_PROLOG_ISERIES_1(PACA_EXSLB)
578 EXCEPTION_PROLOG_ISERIES_2
579 b .do_stab_bolted
580
581 .globl data_access_slb_iSeries
582data_access_slb_iSeries:
583 mtspr SPRG1,r13 /* save r13 */
584 EXCEPTION_PROLOG_ISERIES_1(PACA_EXSLB)
585 std r3,PACA_EXSLB+EX_R3(r13)
586 ld r12,PACALPPACA+LPPACASRR1(r13)
587 mfspr r3,DAR
588 b .do_slb_miss
589
590 STD_EXCEPTION_ISERIES(0x400, instruction_access, PACA_EXGEN)
591
592 .globl instruction_access_slb_iSeries
593instruction_access_slb_iSeries:
594 mtspr SPRG1,r13 /* save r13 */
595 EXCEPTION_PROLOG_ISERIES_1(PACA_EXSLB)
596 std r3,PACA_EXSLB+EX_R3(r13)
597 ld r12,PACALPPACA+LPPACASRR1(r13)
598 ld r3,PACALPPACA+LPPACASRR0(r13)
599 b .do_slb_miss
600
601 MASKABLE_EXCEPTION_ISERIES(0x500, hardware_interrupt)
602 STD_EXCEPTION_ISERIES(0x600, alignment, PACA_EXGEN)
603 STD_EXCEPTION_ISERIES(0x700, program_check, PACA_EXGEN)
604 STD_EXCEPTION_ISERIES(0x800, fp_unavailable, PACA_EXGEN)
605 MASKABLE_EXCEPTION_ISERIES(0x900, decrementer)
606 STD_EXCEPTION_ISERIES(0xa00, trap_0a, PACA_EXGEN)
607 STD_EXCEPTION_ISERIES(0xb00, trap_0b, PACA_EXGEN)
608
609 .globl system_call_iSeries
610system_call_iSeries:
611 mr r9,r13
612 mfspr r13,SPRG3
613 EXCEPTION_PROLOG_ISERIES_2
614 b system_call_common
615
616 STD_EXCEPTION_ISERIES( 0xd00, single_step, PACA_EXGEN)
617 STD_EXCEPTION_ISERIES( 0xe00, trap_0e, PACA_EXGEN)
618 STD_EXCEPTION_ISERIES( 0xf00, performance_monitor, PACA_EXGEN)
619
620 .globl system_reset_iSeries
621system_reset_iSeries:
622 mfspr r13,SPRG3 /* Get paca address */
623 mfmsr r24
624 ori r24,r24,MSR_RI
625 mtmsrd r24 /* RI on */
626 lhz r24,PACAPACAINDEX(r13) /* Get processor # */
627 cmpwi 0,r24,0 /* Are we processor 0? */
628 beq .__start_initialization_iSeries /* Start up the first processor */
629 mfspr r4,CTRLF
630 li r5,RUNLATCH /* Turn off the run light */
631 andc r4,r4,r5
632 mtspr CTRLT,r4
633
6341:
635 HMT_LOW
636#ifdef CONFIG_SMP
637 lbz r23,PACAPROCSTART(r13) /* Test if this processor
638 * should start */
639 sync
640 LOADADDR(r3,current_set)
641 sldi r28,r24,3 /* get current_set[cpu#] */
642 ldx r3,r3,r28
643 addi r1,r3,THREAD_SIZE
644 subi r1,r1,STACK_FRAME_OVERHEAD
645
646 cmpwi 0,r23,0
647 beq iSeries_secondary_smp_loop /* Loop until told to go */
648#ifdef SECONDARY_PROCESSORS
649 bne .__secondary_start /* Loop until told to go */
650#endif
651iSeries_secondary_smp_loop:
652 /* Let the Hypervisor know we are alive */
653 /* 8002 is a call to HvCallCfg::getLps, a harmless Hypervisor function */
654 lis r3,0x8002
655 rldicr r3,r3,32,15 /* r0 = (r3 << 32) & 0xffff000000000000 */
656#else /* CONFIG_SMP */
657 /* Yield the processor. This is required for non-SMP kernels
658 which are running on multi-threaded machines. */
659 lis r3,0x8000
660 rldicr r3,r3,32,15 /* r3 = (r3 << 32) & 0xffff000000000000 */
661 addi r3,r3,18 /* r3 = 0x8000000000000012 which is "yield" */
662 li r4,0 /* "yield timed" */
663 li r5,-1 /* "yield forever" */
664#endif /* CONFIG_SMP */
665 li r0,-1 /* r0=-1 indicates a Hypervisor call */
666 sc /* Invoke the hypervisor via a system call */
667 mfspr r13,SPRG3 /* Put r13 back ???? */
668 b 1b /* If SMP not configured, secondaries
669 * loop forever */
670
671 .globl decrementer_iSeries_masked
672decrementer_iSeries_masked:
673 li r11,1
674 stb r11,PACALPPACA+LPPACADECRINT(r13)
675 lwz r12,PACADEFAULTDECR(r13)
676 mtspr SPRN_DEC,r12
677 /* fall through */
678
679 .globl hardware_interrupt_iSeries_masked
680hardware_interrupt_iSeries_masked:
681 mtcrf 0x80,r9 /* Restore regs */
682 ld r11,PACALPPACA+LPPACASRR0(r13)
683 ld r12,PACALPPACA+LPPACASRR1(r13)
684 mtspr SRR0,r11
685 mtspr SRR1,r12
686 ld r9,PACA_EXGEN+EX_R9(r13)
687 ld r10,PACA_EXGEN+EX_R10(r13)
688 ld r11,PACA_EXGEN+EX_R11(r13)
689 ld r12,PACA_EXGEN+EX_R12(r13)
690 ld r13,PACA_EXGEN+EX_R13(r13)
691 rfid
692 b . /* prevent speculative execution */
693#endif
694
695/*
696 * Data area reserved for FWNMI option.
697 */
698 .= 0x7000
699 .globl fwnmi_data_area
700fwnmi_data_area:
701
702/*
703 * Vectors for the FWNMI option. Share common code.
704 */
705 . = 0x8000
706 .globl system_reset_fwnmi
707system_reset_fwnmi:
708 HMT_MEDIUM
709 mtspr SPRG1,r13 /* save r13 */
710 EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common)
711 .globl machine_check_fwnmi
712machine_check_fwnmi:
713 HMT_MEDIUM
714 mtspr SPRG1,r13 /* save r13 */
715 EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common)
716
717 /*
718 * Space for the initial segment table
719 * For LPAR, the hypervisor must fill in at least one entry
720 * before we get control (with relocate on)
721 */
722 . = STAB0_PHYS_ADDR
723 .globl __start_stab
724__start_stab:
725
726 . = (STAB0_PHYS_ADDR + PAGE_SIZE)
727 .globl __end_stab
728__end_stab:
729
730
731/*** Common interrupt handlers ***/
732
733 STD_EXCEPTION_COMMON(0x100, system_reset, .system_reset_exception)
734
735 /*
736 * Machine check is different because we use a different
737 * save area: PACA_EXMC instead of PACA_EXGEN.
738 */
739 .align 7
740 .globl machine_check_common
741machine_check_common:
742 EXCEPTION_PROLOG_COMMON(0x200, PACA_EXMC)
743 DISABLE_INTS
744 bl .save_nvgprs
745 addi r3,r1,STACK_FRAME_OVERHEAD
746 bl .machine_check_exception
747 b .ret_from_except
748
749 STD_EXCEPTION_COMMON_LITE(0x900, decrementer, .timer_interrupt)
750 STD_EXCEPTION_COMMON(0xa00, trap_0a, .unknown_exception)
751 STD_EXCEPTION_COMMON(0xb00, trap_0b, .unknown_exception)
752 STD_EXCEPTION_COMMON(0xd00, single_step, .single_step_exception)
753 STD_EXCEPTION_COMMON(0xe00, trap_0e, .unknown_exception)
754 STD_EXCEPTION_COMMON(0xf00, performance_monitor, .performance_monitor_exception)
755 STD_EXCEPTION_COMMON(0x1300, instruction_breakpoint, .instruction_breakpoint_exception)
756#ifdef CONFIG_ALTIVEC
757 STD_EXCEPTION_COMMON(0x1700, altivec_assist, .altivec_assist_exception)
758#else
759 STD_EXCEPTION_COMMON(0x1700, altivec_assist, .unknown_exception)
760#endif
761
762/*
763 * Here we have detected that the kernel stack pointer is bad.
764 * R9 contains the saved CR, r13 points to the paca,
765 * r10 contains the (bad) kernel stack pointer,
766 * r11 and r12 contain the saved SRR0 and SRR1.
767 * We switch to using the paca guard page as an emergency stack,
768 * save the registers there, and call kernel_bad_stack(), which panics.
769 */
770bad_stack:
771 ld r1,PACAEMERGSP(r13)
772 subi r1,r1,64+INT_FRAME_SIZE
773 std r9,_CCR(r1)
774 std r10,GPR1(r1)
775 std r11,_NIP(r1)
776 std r12,_MSR(r1)
777 mfspr r11,DAR
778 mfspr r12,DSISR
779 std r11,_DAR(r1)
780 std r12,_DSISR(r1)
781 mflr r10
782 mfctr r11
783 mfxer r12
784 std r10,_LINK(r1)
785 std r11,_CTR(r1)
786 std r12,_XER(r1)
787 SAVE_GPR(0,r1)
788 SAVE_GPR(2,r1)
789 SAVE_4GPRS(3,r1)
790 SAVE_2GPRS(7,r1)
791 SAVE_10GPRS(12,r1)
792 SAVE_10GPRS(22,r1)
793 addi r11,r1,INT_FRAME_SIZE
794 std r11,0(r1)
795 li r12,0
796 std r12,0(r11)
797 ld r2,PACATOC(r13)
7981: addi r3,r1,STACK_FRAME_OVERHEAD
799 bl .kernel_bad_stack
800 b 1b
801
802/*
803 * Return from an exception with minimal checks.
804 * The caller is assumed to have done EXCEPTION_PROLOG_COMMON.
805 * If interrupts have been enabled, or anything has been
806 * done that might have changed the scheduling status of
807 * any task or sent any task a signal, you should use
808 * ret_from_except or ret_from_except_lite instead of this.
809 */
810fast_exception_return:
811 ld r12,_MSR(r1)
812 ld r11,_NIP(r1)
813 andi. r3,r12,MSR_RI /* check if RI is set */
814 beq- unrecov_fer
815 ld r3,_CCR(r1)
816 ld r4,_LINK(r1)
817 ld r5,_CTR(r1)
818 ld r6,_XER(r1)
819 mtcr r3
820 mtlr r4
821 mtctr r5
822 mtxer r6
823 REST_GPR(0, r1)
824 REST_8GPRS(2, r1)
825
826 mfmsr r10
827 clrrdi r10,r10,2 /* clear RI (LE is 0 already) */
828 mtmsrd r10,1
829
830 mtspr SRR1,r12
831 mtspr SRR0,r11
832 REST_4GPRS(10, r1)
833 ld r1,GPR1(r1)
834 rfid
835 b . /* prevent speculative execution */
836
837unrecov_fer:
838 bl .save_nvgprs
8391: addi r3,r1,STACK_FRAME_OVERHEAD
840 bl .unrecoverable_exception
841 b 1b
842
843/*
844 * Here r13 points to the paca, r9 contains the saved CR,
845 * SRR0 and SRR1 are saved in r11 and r12,
846 * r9 - r13 are saved in paca->exgen.
847 */
848 .align 7
849 .globl data_access_common
850data_access_common:
851 mfspr r10,DAR
852 std r10,PACA_EXGEN+EX_DAR(r13)
853 mfspr r10,DSISR
854 stw r10,PACA_EXGEN+EX_DSISR(r13)
855 EXCEPTION_PROLOG_COMMON(0x300, PACA_EXGEN)
856 ld r3,PACA_EXGEN+EX_DAR(r13)
857 lwz r4,PACA_EXGEN+EX_DSISR(r13)
858 li r5,0x300
859 b .do_hash_page /* Try to handle as hpte fault */
860
861 .align 7
862 .globl instruction_access_common
863instruction_access_common:
864 EXCEPTION_PROLOG_COMMON(0x400, PACA_EXGEN)
865 ld r3,_NIP(r1)
866 andis. r4,r12,0x5820
867 li r5,0x400
868 b .do_hash_page /* Try to handle as hpte fault */
869
870 .align 7
871 .globl hardware_interrupt_common
872 .globl hardware_interrupt_entry
873hardware_interrupt_common:
874 EXCEPTION_PROLOG_COMMON(0x500, PACA_EXGEN)
875hardware_interrupt_entry:
876 DISABLE_INTS
877 addi r3,r1,STACK_FRAME_OVERHEAD
878 bl .do_IRQ
879 b .ret_from_except_lite
880
881 .align 7
882 .globl alignment_common
883alignment_common:
884 mfspr r10,DAR
885 std r10,PACA_EXGEN+EX_DAR(r13)
886 mfspr r10,DSISR
887 stw r10,PACA_EXGEN+EX_DSISR(r13)
888 EXCEPTION_PROLOG_COMMON(0x600, PACA_EXGEN)
889 ld r3,PACA_EXGEN+EX_DAR(r13)
890 lwz r4,PACA_EXGEN+EX_DSISR(r13)
891 std r3,_DAR(r1)
892 std r4,_DSISR(r1)
893 bl .save_nvgprs
894 addi r3,r1,STACK_FRAME_OVERHEAD
895 ENABLE_INTS
896 bl .alignment_exception
897 b .ret_from_except
898
899 .align 7
900 .globl program_check_common
901program_check_common:
902 EXCEPTION_PROLOG_COMMON(0x700, PACA_EXGEN)
903 bl .save_nvgprs
904 addi r3,r1,STACK_FRAME_OVERHEAD
905 ENABLE_INTS
906 bl .program_check_exception
907 b .ret_from_except
908
909 .align 7
910 .globl fp_unavailable_common
911fp_unavailable_common:
912 EXCEPTION_PROLOG_COMMON(0x800, PACA_EXGEN)
913 bne .load_up_fpu /* if from user, just load it up */
914 bl .save_nvgprs
915 addi r3,r1,STACK_FRAME_OVERHEAD
916 ENABLE_INTS
917 bl .kernel_fp_unavailable_exception
918 BUG_OPCODE
919
920 .align 7
921 .globl altivec_unavailable_common
922altivec_unavailable_common:
923 EXCEPTION_PROLOG_COMMON(0xf20, PACA_EXGEN)
924#ifdef CONFIG_ALTIVEC
925 bne .load_up_altivec /* if from user, just load it up */
926#endif
927 bl .save_nvgprs
928 addi r3,r1,STACK_FRAME_OVERHEAD
929 ENABLE_INTS
930 bl .altivec_unavailable_exception
931 b .ret_from_except
932
933/*
934 * Hash table stuff
935 */
936 .align 7
937_GLOBAL(do_hash_page)
938 std r3,_DAR(r1)
939 std r4,_DSISR(r1)
940
941 andis. r0,r4,0xa450 /* weird error? */
942 bne- .handle_page_fault /* if not, try to insert a HPTE */
943BEGIN_FTR_SECTION
944 andis. r0,r4,0x0020 /* Is it a segment table fault? */
945 bne- .do_ste_alloc /* If so handle it */
946END_FTR_SECTION_IFCLR(CPU_FTR_SLB)
947
948 /*
949 * We need to set the _PAGE_USER bit if MSR_PR is set or if we are
950 * accessing a userspace segment (even from the kernel). We assume
951 * kernel addresses always have the high bit set.
952 */
953 rlwinm r4,r4,32-25+9,31-9,31-9 /* DSISR_STORE -> _PAGE_RW */
954 rotldi r0,r3,15 /* Move high bit into MSR_PR posn */
955 orc r0,r12,r0 /* MSR_PR | ~high_bit */
956 rlwimi r4,r0,32-13,30,30 /* becomes _PAGE_USER access bit */
957 ori r4,r4,1 /* add _PAGE_PRESENT */
958 rlwimi r4,r5,22+2,31-2,31-2 /* Set _PAGE_EXEC if trap is 0x400 */
959
960 /*
961 * On iSeries, we soft-disable interrupts here, then
962 * hard-enable interrupts so that the hash_page code can spin on
963 * the hash_table_lock without problems on a shared processor.
964 */
965 DISABLE_INTS
966
967 /*
968 * r3 contains the faulting address
969 * r4 contains the required access permissions
970 * r5 contains the trap number
971 *
972 * at return r3 = 0 for success
973 */
974 bl .hash_page /* build HPTE if possible */
975 cmpdi r3,0 /* see if hash_page succeeded */
976
977#ifdef DO_SOFT_DISABLE
978 /*
979 * If we had interrupts soft-enabled at the point where the
980 * DSI/ISI occurred, and an interrupt came in during hash_page,
981 * handle it now.
982 * We jump to ret_from_except_lite rather than fast_exception_return
983 * because ret_from_except_lite will check for and handle pending
984 * interrupts if necessary.
985 */
986 beq .ret_from_except_lite
987 /* For a hash failure, we don't bother re-enabling interrupts */
988 ble- 12f
989
990 /*
991 * hash_page couldn't handle it, set soft interrupt enable back
992 * to what it was before the trap. Note that .local_irq_restore
993 * handles any interrupts pending at this point.
994 */
995 ld r3,SOFTE(r1)
996 bl .local_irq_restore
997 b 11f
998#else
999 beq fast_exception_return /* Return from exception on success */
1000 ble- 12f /* Failure return from hash_page */
1001
1002 /* fall through */
1003#endif
1004
1005/* Here we have a page fault that hash_page can't handle. */
1006_GLOBAL(handle_page_fault)
1007 ENABLE_INTS
100811: ld r4,_DAR(r1)
1009 ld r5,_DSISR(r1)
1010 addi r3,r1,STACK_FRAME_OVERHEAD
1011 bl .do_page_fault
1012 cmpdi r3,0
1013 beq+ .ret_from_except_lite
1014 bl .save_nvgprs
1015 mr r5,r3
1016 addi r3,r1,STACK_FRAME_OVERHEAD
1017 lwz r4,_DAR(r1)
1018 bl .bad_page_fault
1019 b .ret_from_except
1020
1021/* We have a page fault that hash_page could handle but HV refused
1022 * the PTE insertion
1023 */
102412: bl .save_nvgprs
1025 addi r3,r1,STACK_FRAME_OVERHEAD
1026 lwz r4,_DAR(r1)
1027 bl .low_hash_fault
1028 b .ret_from_except
1029
1030 /* here we have a segment miss */
1031_GLOBAL(do_ste_alloc)
1032 bl .ste_allocate /* try to insert stab entry */
1033 cmpdi r3,0
1034 beq+ fast_exception_return
1035 b .handle_page_fault
1036
1037/*
1038 * r13 points to the PACA, r9 contains the saved CR,
1039 * r11 and r12 contain the saved SRR0 and SRR1.
1040 * r9 - r13 are saved in paca->exslb.
1041 * We assume we aren't going to take any exceptions during this procedure.
1042 * We assume (DAR >> 60) == 0xc.
1043 */
1044 .align 7
1045_GLOBAL(do_stab_bolted)
1046 stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */
1047 std r11,PACA_EXSLB+EX_SRR0(r13) /* save SRR0 in exc. frame */
1048
1049 /* Hash to the primary group */
1050 ld r10,PACASTABVIRT(r13)
1051 mfspr r11,DAR
1052 srdi r11,r11,28
1053 rldimi r10,r11,7,52 /* r10 = first ste of the group */
1054
1055 /* Calculate VSID */
1056 /* This is a kernel address, so protovsid = ESID */
1057 ASM_VSID_SCRAMBLE(r11, r9)
1058 rldic r9,r11,12,16 /* r9 = vsid << 12 */
1059
1060 /* Search the primary group for a free entry */
10611: ld r11,0(r10) /* Test valid bit of the current ste */
1062 andi. r11,r11,0x80
1063 beq 2f
1064 addi r10,r10,16
1065 andi. r11,r10,0x70
1066 bne 1b
1067
1068 /* Stick for only searching the primary group for now. */
1069 /* At least for now, we use a very simple random castout scheme */
1070 /* Use the TB as a random number ; OR in 1 to avoid entry 0 */
1071 mftb r11
1072 rldic r11,r11,4,57 /* r11 = (r11 << 4) & 0x70 */
1073 ori r11,r11,0x10
1074
1075 /* r10 currently points to an ste one past the group of interest */
1076 /* make it point to the randomly selected entry */
1077 subi r10,r10,128
1078 or r10,r10,r11 /* r10 is the entry to invalidate */
1079
1080 isync /* mark the entry invalid */
1081 ld r11,0(r10)
1082 rldicl r11,r11,56,1 /* clear the valid bit */
1083 rotldi r11,r11,8
1084 std r11,0(r10)
1085 sync
1086
1087 clrrdi r11,r11,28 /* Get the esid part of the ste */
1088 slbie r11
1089
10902: std r9,8(r10) /* Store the vsid part of the ste */
1091 eieio
1092
1093 mfspr r11,DAR /* Get the new esid */
1094 clrrdi r11,r11,28 /* Permits a full 32b of ESID */
1095 ori r11,r11,0x90 /* Turn on valid and kp */
1096 std r11,0(r10) /* Put new entry back into the stab */
1097
1098 sync
1099
1100 /* All done -- return from exception. */
1101 lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */
1102 ld r11,PACA_EXSLB+EX_SRR0(r13) /* get saved SRR0 */
1103
1104 andi. r10,r12,MSR_RI
1105 beq- unrecov_slb
1106
1107 mtcrf 0x80,r9 /* restore CR */
1108
1109 mfmsr r10
1110 clrrdi r10,r10,2
1111 mtmsrd r10,1
1112
1113 mtspr SRR0,r11
1114 mtspr SRR1,r12
1115 ld r9,PACA_EXSLB+EX_R9(r13)
1116 ld r10,PACA_EXSLB+EX_R10(r13)
1117 ld r11,PACA_EXSLB+EX_R11(r13)
1118 ld r12,PACA_EXSLB+EX_R12(r13)
1119 ld r13,PACA_EXSLB+EX_R13(r13)
1120 rfid
1121 b . /* prevent speculative execution */
1122
1123/*
1124 * r13 points to the PACA, r9 contains the saved CR,
1125 * r11 and r12 contain the saved SRR0 and SRR1.
1126 * r3 has the faulting address
1127 * r9 - r13 are saved in paca->exslb.
1128 * r3 is saved in paca->slb_r3
1129 * We assume we aren't going to take any exceptions during this procedure.
1130 */
1131_GLOBAL(do_slb_miss)
1132 mflr r10
1133
1134 stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */
1135 std r10,PACA_EXSLB+EX_LR(r13) /* save LR */
1136
1137 bl .slb_allocate /* handle it */
1138
1139 /* All done -- return from exception. */
1140
1141 ld r10,PACA_EXSLB+EX_LR(r13)
1142 ld r3,PACA_EXSLB+EX_R3(r13)
1143 lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */
1144#ifdef CONFIG_PPC_ISERIES
1145 ld r11,PACALPPACA+LPPACASRR0(r13) /* get SRR0 value */
1146#endif /* CONFIG_PPC_ISERIES */
1147
1148 mtlr r10
1149
1150 andi. r10,r12,MSR_RI /* check for unrecoverable exception */
1151 beq- unrecov_slb
1152
1153.machine push
1154.machine "power4"
1155 mtcrf 0x80,r9
1156 mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */
1157.machine pop
1158
1159#ifdef CONFIG_PPC_ISERIES
1160 mtspr SRR0,r11
1161 mtspr SRR1,r12
1162#endif /* CONFIG_PPC_ISERIES */
1163 ld r9,PACA_EXSLB+EX_R9(r13)
1164 ld r10,PACA_EXSLB+EX_R10(r13)
1165 ld r11,PACA_EXSLB+EX_R11(r13)
1166 ld r12,PACA_EXSLB+EX_R12(r13)
1167 ld r13,PACA_EXSLB+EX_R13(r13)
1168 rfid
1169 b . /* prevent speculative execution */
1170
1171unrecov_slb:
1172 EXCEPTION_PROLOG_COMMON(0x4100, PACA_EXSLB)
1173 DISABLE_INTS
1174 bl .save_nvgprs
11751: addi r3,r1,STACK_FRAME_OVERHEAD
1176 bl .unrecoverable_exception
1177 b 1b
1178
1179
1180/*
1181 * On pSeries, secondary processors spin in the following code.
1182 * At entry, r3 = this processor's number (physical cpu id)
1183 */
1184_GLOBAL(pSeries_secondary_smp_init)
1185 mr r24,r3
1186
1187 /* turn on 64-bit mode */
1188 bl .enable_64b_mode
1189 isync
1190
1191 /* Copy some CPU settings from CPU 0 */
1192 bl .__restore_cpu_setup
1193
1194 /* Set up a paca value for this processor. Since we have the
1195 * physical cpu id in r3, we need to search the pacas to find
1196 * which logical id maps to our physical one.
1197 */
1198 LOADADDR(r13, paca) /* Get base vaddr of paca array */
1199 li r5,0 /* logical cpu id */
12001: lhz r6,PACAHWCPUID(r13) /* Load HW procid from paca */
1201 cmpw r6,r24 /* Compare to our id */
1202 beq 2f
1203 addi r13,r13,PACA_SIZE /* Loop to next PACA on miss */
1204 addi r5,r5,1
1205 cmpwi r5,NR_CPUS
1206 blt 1b
1207
120899: HMT_LOW /* Couldn't find our CPU id */
1209 b 99b
1210
12112: mtspr SPRG3,r13 /* Save vaddr of paca in SPRG3 */
1212 /* From now on, r24 is expected to be logica cpuid */
1213 mr r24,r5
12143: HMT_LOW
1215 lbz r23,PACAPROCSTART(r13) /* Test if this processor should */
1216 /* start. */
1217 sync
1218
1219 /* Create a temp kernel stack for use before relocation is on. */
1220 ld r1,PACAEMERGSP(r13)
1221 subi r1,r1,STACK_FRAME_OVERHEAD
1222
1223 cmpwi 0,r23,0
1224#ifdef CONFIG_SMP
1225#ifdef SECONDARY_PROCESSORS
1226 bne .__secondary_start
1227#endif
1228#endif
1229 b 3b /* Loop until told to go */
1230
1231#ifdef CONFIG_PPC_ISERIES
1232_STATIC(__start_initialization_iSeries)
1233 /* Clear out the BSS */
1234 LOADADDR(r11,__bss_stop)
1235 LOADADDR(r8,__bss_start)
1236 sub r11,r11,r8 /* bss size */
1237 addi r11,r11,7 /* round up to an even double word */
1238 rldicl. r11,r11,61,3 /* shift right by 3 */
1239 beq 4f
1240 addi r8,r8,-8
1241 li r0,0
1242 mtctr r11 /* zero this many doublewords */
12433: stdu r0,8(r8)
1244 bdnz 3b
12454:
1246 LOADADDR(r1,init_thread_union)
1247 addi r1,r1,THREAD_SIZE
1248 li r0,0
1249 stdu r0,-STACK_FRAME_OVERHEAD(r1)
1250
1251 LOADADDR(r3,cpu_specs)
1252 LOADADDR(r4,cur_cpu_spec)
1253 li r5,0
1254 bl .identify_cpu
1255
1256 LOADADDR(r2,__toc_start)
1257 addi r2,r2,0x4000
1258 addi r2,r2,0x4000
1259
1260 bl .iSeries_early_setup
1261
1262 /* relocation is on at this point */
1263
1264 b .start_here_common
1265#endif /* CONFIG_PPC_ISERIES */
1266
1267#ifdef CONFIG_PPC_MULTIPLATFORM
1268
1269_STATIC(__mmu_off)
1270 mfmsr r3
1271 andi. r0,r3,MSR_IR|MSR_DR
1272 beqlr
1273 andc r3,r3,r0
1274 mtspr SPRN_SRR0,r4
1275 mtspr SPRN_SRR1,r3
1276 sync
1277 rfid
1278 b . /* prevent speculative execution */
1279
1280
1281/*
1282 * Here is our main kernel entry point. We support currently 2 kind of entries
1283 * depending on the value of r5.
1284 *
1285 * r5 != NULL -> OF entry, we go to prom_init, "legacy" parameter content
1286 * in r3...r7
1287 *
1288 * r5 == NULL -> kexec style entry. r3 is a physical pointer to the
1289 * DT block, r4 is a physical pointer to the kernel itself
1290 *
1291 */
1292_GLOBAL(__start_initialization_multiplatform)
1293 /*
1294 * Are we booted from a PROM Of-type client-interface ?
1295 */
1296 cmpldi cr0,r5,0
1297 bne .__boot_from_prom /* yes -> prom */
1298
1299 /* Save parameters */
1300 mr r31,r3
1301 mr r30,r4
1302
1303 /* Make sure we are running in 64 bits mode */
1304 bl .enable_64b_mode
1305
1306 /* Setup some critical 970 SPRs before switching MMU off */
1307 bl .__970_cpu_preinit
1308
1309 /* cpu # */
1310 li r24,0
1311
1312 /* Switch off MMU if not already */
1313 LOADADDR(r4, .__after_prom_start - KERNELBASE)
1314 add r4,r4,r30
1315 bl .__mmu_off
1316 b .__after_prom_start
1317
1318_STATIC(__boot_from_prom)
1319 /* Save parameters */
1320 mr r31,r3
1321 mr r30,r4
1322 mr r29,r5
1323 mr r28,r6
1324 mr r27,r7
1325
1326 /* Make sure we are running in 64 bits mode */
1327 bl .enable_64b_mode
1328
1329 /* put a relocation offset into r3 */
1330 bl .reloc_offset
1331
1332 LOADADDR(r2,__toc_start)
1333 addi r2,r2,0x4000
1334 addi r2,r2,0x4000
1335
1336 /* Relocate the TOC from a virt addr to a real addr */
1337 sub r2,r2,r3
1338
1339 /* Restore parameters */
1340 mr r3,r31
1341 mr r4,r30
1342 mr r5,r29
1343 mr r6,r28
1344 mr r7,r27
1345
1346 /* Do all of the interaction with OF client interface */
1347 bl .prom_init
1348 /* We never return */
1349 trap
1350
1351/*
1352 * At this point, r3 contains the physical address we are running at,
1353 * returned by prom_init()
1354 */
1355_STATIC(__after_prom_start)
1356
1357/*
1358 * We need to run with __start at physical address 0.
1359 * This will leave some code in the first 256B of
1360 * real memory, which are reserved for software use.
1361 * The remainder of the first page is loaded with the fixed
1362 * interrupt vectors. The next two pages are filled with
1363 * unknown exception placeholders.
1364 *
1365 * Note: This process overwrites the OF exception vectors.
1366 * r26 == relocation offset
1367 * r27 == KERNELBASE
1368 */
1369 bl .reloc_offset
1370 mr r26,r3
1371 SET_REG_TO_CONST(r27,KERNELBASE)
1372
1373 li r3,0 /* target addr */
1374
1375 // XXX FIXME: Use phys returned by OF (r30)
1376 sub r4,r27,r26 /* source addr */
1377 /* current address of _start */
1378 /* i.e. where we are running */
1379 /* the source addr */
1380
1381 LOADADDR(r5,copy_to_here) /* # bytes of memory to copy */
1382 sub r5,r5,r27
1383
1384 li r6,0x100 /* Start offset, the first 0x100 */
1385 /* bytes were copied earlier. */
1386
1387 bl .copy_and_flush /* copy the first n bytes */
1388 /* this includes the code being */
1389 /* executed here. */
1390
1391 LOADADDR(r0, 4f) /* Jump to the copy of this code */
1392 mtctr r0 /* that we just made/relocated */
1393 bctr
1394
13954: LOADADDR(r5,klimit)
1396 sub r5,r5,r26
1397 ld r5,0(r5) /* get the value of klimit */
1398 sub r5,r5,r27
1399 bl .copy_and_flush /* copy the rest */
1400 b .start_here_multiplatform
1401
1402#endif /* CONFIG_PPC_MULTIPLATFORM */
1403
1404/*
1405 * Copy routine used to copy the kernel to start at physical address 0
1406 * and flush and invalidate the caches as needed.
1407 * r3 = dest addr, r4 = source addr, r5 = copy limit, r6 = start offset
1408 * on exit, r3, r4, r5 are unchanged, r6 is updated to be >= r5.
1409 *
1410 * Note: this routine *only* clobbers r0, r6 and lr
1411 */
1412_GLOBAL(copy_and_flush)
1413 addi r5,r5,-8
1414 addi r6,r6,-8
14154: li r0,16 /* Use the least common */
1416 /* denominator cache line */
1417 /* size. This results in */
1418 /* extra cache line flushes */
1419 /* but operation is correct. */
1420 /* Can't get cache line size */
1421 /* from NACA as it is being */
1422 /* moved too. */
1423
1424 mtctr r0 /* put # words/line in ctr */
14253: addi r6,r6,8 /* copy a cache line */
1426 ldx r0,r6,r4
1427 stdx r0,r6,r3
1428 bdnz 3b
1429 dcbst r6,r3 /* write it to memory */
1430 sync
1431 icbi r6,r3 /* flush the icache line */
1432 cmpld 0,r6,r5
1433 blt 4b
1434 sync
1435 addi r5,r5,8
1436 addi r6,r6,8
1437 blr
1438
1439.align 8
1440copy_to_here:
1441
1442/*
1443 * load_up_fpu(unused, unused, tsk)
1444 * Disable FP for the task which had the FPU previously,
1445 * and save its floating-point registers in its thread_struct.
1446 * Enables the FPU for use in the kernel on return.
1447 * On SMP we know the fpu is free, since we give it up every
1448 * switch (ie, no lazy save of the FP registers).
1449 * On entry: r13 == 'current' && last_task_used_math != 'current'
1450 */
1451_STATIC(load_up_fpu)
1452 mfmsr r5 /* grab the current MSR */
1453 ori r5,r5,MSR_FP
1454 mtmsrd r5 /* enable use of fpu now */
1455 isync
1456/*
1457 * For SMP, we don't do lazy FPU switching because it just gets too
1458 * horrendously complex, especially when a task switches from one CPU
1459 * to another. Instead we call giveup_fpu in switch_to.
1460 *
1461 */
1462#ifndef CONFIG_SMP
1463 ld r3,last_task_used_math@got(r2)
1464 ld r4,0(r3)
1465 cmpdi 0,r4,0
1466 beq 1f
1467 /* Save FP state to last_task_used_math's THREAD struct */
1468 addi r4,r4,THREAD
1469 SAVE_32FPRS(0, r4)
1470 mffs fr0
1471 stfd fr0,THREAD_FPSCR(r4)
1472 /* Disable FP for last_task_used_math */
1473 ld r5,PT_REGS(r4)
1474 ld r4,_MSR-STACK_FRAME_OVERHEAD(r5)
1475 li r6,MSR_FP|MSR_FE0|MSR_FE1
1476 andc r4,r4,r6
1477 std r4,_MSR-STACK_FRAME_OVERHEAD(r5)
14781:
1479#endif /* CONFIG_SMP */
1480 /* enable use of FP after return */
1481 ld r4,PACACURRENT(r13)
1482 addi r5,r4,THREAD /* Get THREAD */
1483 ld r4,THREAD_FPEXC_MODE(r5)
1484 ori r12,r12,MSR_FP
1485 or r12,r12,r4
1486 std r12,_MSR(r1)
1487 lfd fr0,THREAD_FPSCR(r5)
1488 mtfsf 0xff,fr0
1489 REST_32FPRS(0, r5)
1490#ifndef CONFIG_SMP
1491 /* Update last_task_used_math to 'current' */
1492 subi r4,r5,THREAD /* Back to 'current' */
1493 std r4,0(r3)
1494#endif /* CONFIG_SMP */
1495 /* restore registers and return */
1496 b fast_exception_return
1497
1498/*
1499 * disable_kernel_fp()
1500 * Disable the FPU.
1501 */
1502_GLOBAL(disable_kernel_fp)
1503 mfmsr r3
1504 rldicl r0,r3,(63-MSR_FP_LG),1
1505 rldicl r3,r0,(MSR_FP_LG+1),0
1506 mtmsrd r3 /* disable use of fpu now */
1507 isync
1508 blr
1509
1510/*
1511 * giveup_fpu(tsk)
1512 * Disable FP for the task given as the argument,
1513 * and save the floating-point registers in its thread_struct.
1514 * Enables the FPU for use in the kernel on return.
1515 */
1516_GLOBAL(giveup_fpu)
1517 mfmsr r5
1518 ori r5,r5,MSR_FP
1519 mtmsrd r5 /* enable use of fpu now */
1520 isync
1521 cmpdi 0,r3,0
1522 beqlr- /* if no previous owner, done */
1523 addi r3,r3,THREAD /* want THREAD of task */
1524 ld r5,PT_REGS(r3)
1525 cmpdi 0,r5,0
1526 SAVE_32FPRS(0, r3)
1527 mffs fr0
1528 stfd fr0,THREAD_FPSCR(r3)
1529 beq 1f
1530 ld r4,_MSR-STACK_FRAME_OVERHEAD(r5)
1531 li r3,MSR_FP|MSR_FE0|MSR_FE1
1532 andc r4,r4,r3 /* disable FP for previous task */
1533 std r4,_MSR-STACK_FRAME_OVERHEAD(r5)
15341:
1535#ifndef CONFIG_SMP
1536 li r5,0
1537 ld r4,last_task_used_math@got(r2)
1538 std r5,0(r4)
1539#endif /* CONFIG_SMP */
1540 blr
1541
1542
1543#ifdef CONFIG_ALTIVEC
1544
1545/*
1546 * load_up_altivec(unused, unused, tsk)
1547 * Disable VMX for the task which had it previously,
1548 * and save its vector registers in its thread_struct.
1549 * Enables the VMX for use in the kernel on return.
1550 * On SMP we know the VMX is free, since we give it up every
1551 * switch (ie, no lazy save of the vector registers).
1552 * On entry: r13 == 'current' && last_task_used_altivec != 'current'
1553 */
1554_STATIC(load_up_altivec)
1555 mfmsr r5 /* grab the current MSR */
1556 oris r5,r5,MSR_VEC@h
1557 mtmsrd r5 /* enable use of VMX now */
1558 isync
1559
1560/*
1561 * For SMP, we don't do lazy VMX switching because it just gets too
1562 * horrendously complex, especially when a task switches from one CPU
1563 * to another. Instead we call giveup_altvec in switch_to.
1564 * VRSAVE isn't dealt with here, that is done in the normal context
1565 * switch code. Note that we could rely on vrsave value to eventually
1566 * avoid saving all of the VREGs here...
1567 */
1568#ifndef CONFIG_SMP
1569 ld r3,last_task_used_altivec@got(r2)
1570 ld r4,0(r3)
1571 cmpdi 0,r4,0
1572 beq 1f
1573 /* Save VMX state to last_task_used_altivec's THREAD struct */
1574 addi r4,r4,THREAD
1575 SAVE_32VRS(0,r5,r4)
1576 mfvscr vr0
1577 li r10,THREAD_VSCR
1578 stvx vr0,r10,r4
1579 /* Disable VMX for last_task_used_altivec */
1580 ld r5,PT_REGS(r4)
1581 ld r4,_MSR-STACK_FRAME_OVERHEAD(r5)
1582 lis r6,MSR_VEC@h
1583 andc r4,r4,r6
1584 std r4,_MSR-STACK_FRAME_OVERHEAD(r5)
15851:
1586#endif /* CONFIG_SMP */
1587 /* Hack: if we get an altivec unavailable trap with VRSAVE
1588 * set to all zeros, we assume this is a broken application
1589 * that fails to set it properly, and thus we switch it to
1590 * all 1's
1591 */
1592 mfspr r4,SPRN_VRSAVE
1593 cmpdi 0,r4,0
1594 bne+ 1f
1595 li r4,-1
1596 mtspr SPRN_VRSAVE,r4
15971:
1598 /* enable use of VMX after return */
1599 ld r4,PACACURRENT(r13)
1600 addi r5,r4,THREAD /* Get THREAD */
1601 oris r12,r12,MSR_VEC@h
1602 std r12,_MSR(r1)
1603 li r4,1
1604 li r10,THREAD_VSCR
1605 stw r4,THREAD_USED_VR(r5)
1606 lvx vr0,r10,r5
1607 mtvscr vr0
1608 REST_32VRS(0,r4,r5)
1609#ifndef CONFIG_SMP
1610 /* Update last_task_used_math to 'current' */
1611 subi r4,r5,THREAD /* Back to 'current' */
1612 std r4,0(r3)
1613#endif /* CONFIG_SMP */
1614 /* restore registers and return */
1615 b fast_exception_return
1616
1617/*
1618 * disable_kernel_altivec()
1619 * Disable the VMX.
1620 */
1621_GLOBAL(disable_kernel_altivec)
1622 mfmsr r3
1623 rldicl r0,r3,(63-MSR_VEC_LG),1
1624 rldicl r3,r0,(MSR_VEC_LG+1),0
1625 mtmsrd r3 /* disable use of VMX now */
1626 isync
1627 blr
1628
1629/*
1630 * giveup_altivec(tsk)
1631 * Disable VMX for the task given as the argument,
1632 * and save the vector registers in its thread_struct.
1633 * Enables the VMX for use in the kernel on return.
1634 */
1635_GLOBAL(giveup_altivec)
1636 mfmsr r5
1637 oris r5,r5,MSR_VEC@h
1638 mtmsrd r5 /* enable use of VMX now */
1639 isync
1640 cmpdi 0,r3,0
1641 beqlr- /* if no previous owner, done */
1642 addi r3,r3,THREAD /* want THREAD of task */
1643 ld r5,PT_REGS(r3)
1644 cmpdi 0,r5,0
1645 SAVE_32VRS(0,r4,r3)
1646 mfvscr vr0
1647 li r4,THREAD_VSCR
1648 stvx vr0,r4,r3
1649 beq 1f
1650 ld r4,_MSR-STACK_FRAME_OVERHEAD(r5)
1651 lis r3,MSR_VEC@h
1652 andc r4,r4,r3 /* disable FP for previous task */
1653 std r4,_MSR-STACK_FRAME_OVERHEAD(r5)
16541:
1655#ifndef CONFIG_SMP
1656 li r5,0
1657 ld r4,last_task_used_altivec@got(r2)
1658 std r5,0(r4)
1659#endif /* CONFIG_SMP */
1660 blr
1661
1662#endif /* CONFIG_ALTIVEC */
1663
1664#ifdef CONFIG_SMP
1665#ifdef CONFIG_PPC_PMAC
1666/*
1667 * On PowerMac, secondary processors starts from the reset vector, which
1668 * is temporarily turned into a call to one of the functions below.
1669 */
1670 .section ".text";
1671 .align 2 ;
1672
1673 .globl pmac_secondary_start_1
1674pmac_secondary_start_1:
1675 li r24, 1
1676 b .pmac_secondary_start
1677
1678 .globl pmac_secondary_start_2
1679pmac_secondary_start_2:
1680 li r24, 2
1681 b .pmac_secondary_start
1682
1683 .globl pmac_secondary_start_3
1684pmac_secondary_start_3:
1685 li r24, 3
1686 b .pmac_secondary_start
1687
1688_GLOBAL(pmac_secondary_start)
1689 /* turn on 64-bit mode */
1690 bl .enable_64b_mode
1691 isync
1692
1693 /* Copy some CPU settings from CPU 0 */
1694 bl .__restore_cpu_setup
1695
1696 /* pSeries do that early though I don't think we really need it */
1697 mfmsr r3
1698 ori r3,r3,MSR_RI
1699 mtmsrd r3 /* RI on */
1700
1701 /* Set up a paca value for this processor. */
1702 LOADADDR(r4, paca) /* Get base vaddr of paca array */
1703 mulli r13,r24,PACA_SIZE /* Calculate vaddr of right paca */
1704 add r13,r13,r4 /* for this processor. */
1705 mtspr SPRG3,r13 /* Save vaddr of paca in SPRG3 */
1706
1707 /* Create a temp kernel stack for use before relocation is on. */
1708 ld r1,PACAEMERGSP(r13)
1709 subi r1,r1,STACK_FRAME_OVERHEAD
1710
1711 b .__secondary_start
1712
1713#endif /* CONFIG_PPC_PMAC */
1714
1715/*
1716 * This function is called after the master CPU has released the
1717 * secondary processors. The execution environment is relocation off.
1718 * The paca for this processor has the following fields initialized at
1719 * this point:
1720 * 1. Processor number
1721 * 2. Segment table pointer (virtual address)
1722 * On entry the following are set:
1723 * r1 = stack pointer. vaddr for iSeries, raddr (temp stack) for pSeries
1724 * r24 = cpu# (in Linux terms)
1725 * r13 = paca virtual address
1726 * SPRG3 = paca virtual address
1727 */
1728_GLOBAL(__secondary_start)
1729
1730 HMT_MEDIUM /* Set thread priority to MEDIUM */
1731
1732 ld r2,PACATOC(r13)
1733 li r6,0
1734 stb r6,PACAPROCENABLED(r13)
1735
1736#ifndef CONFIG_PPC_ISERIES
1737 /* Initialize the page table pointer register. */
1738 LOADADDR(r6,_SDR1)
1739 ld r6,0(r6) /* get the value of _SDR1 */
1740 mtspr SDR1,r6 /* set the htab location */
1741#endif
1742 /* Initialize the first segment table (or SLB) entry */
1743 ld r3,PACASTABVIRT(r13) /* get addr of segment table */
1744 bl .stab_initialize
1745
1746 /* Initialize the kernel stack. Just a repeat for iSeries. */
1747 LOADADDR(r3,current_set)
1748 sldi r28,r24,3 /* get current_set[cpu#] */
1749 ldx r1,r3,r28
1750 addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
1751 std r1,PACAKSAVE(r13)
1752
1753 ld r3,PACASTABREAL(r13) /* get raddr of segment table */
1754 ori r4,r3,1 /* turn on valid bit */
1755
1756#ifdef CONFIG_PPC_ISERIES
1757 li r0,-1 /* hypervisor call */
1758 li r3,1
1759 sldi r3,r3,63 /* 0x8000000000000000 */
1760 ori r3,r3,4 /* 0x8000000000000004 */
1761 sc /* HvCall_setASR */
1762#else
1763 /* set the ASR */
1764 ld r3,systemcfg@got(r2) /* r3 = ptr to systemcfg */
1765 lwz r3,PLATFORM(r3) /* r3 = platform flags */
1766 cmpldi r3,PLATFORM_PSERIES_LPAR
1767 bne 98f
1768 mfspr r3,PVR
1769 srwi r3,r3,16
1770 cmpwi r3,0x37 /* SStar */
1771 beq 97f
1772 cmpwi r3,0x36 /* IStar */
1773 beq 97f
1774 cmpwi r3,0x34 /* Pulsar */
1775 bne 98f
177697: li r3,H_SET_ASR /* hcall = H_SET_ASR */
1777 HVSC /* Invoking hcall */
1778 b 99f
177998: /* !(rpa hypervisor) || !(star) */
1780 mtasr r4 /* set the stab location */
178199:
1782#endif
1783 li r7,0
1784 mtlr r7
1785
1786 /* enable MMU and jump to start_secondary */
1787 LOADADDR(r3,.start_secondary_prolog)
1788 SET_REG_TO_CONST(r4, MSR_KERNEL)
1789#ifdef DO_SOFT_DISABLE
1790 ori r4,r4,MSR_EE
1791#endif
1792 mtspr SRR0,r3
1793 mtspr SRR1,r4
1794 rfid
1795 b . /* prevent speculative execution */
1796
1797/*
1798 * Running with relocation on at this point. All we want to do is
1799 * zero the stack back-chain pointer before going into C code.
1800 */
1801_GLOBAL(start_secondary_prolog)
1802 li r3,0
1803 std r3,0(r1) /* Zero the stack frame pointer */
1804 bl .start_secondary
1805#endif
1806
1807/*
1808 * This subroutine clobbers r11 and r12
1809 */
1810_GLOBAL(enable_64b_mode)
1811 mfmsr r11 /* grab the current MSR */
1812 li r12,1
1813 rldicr r12,r12,MSR_SF_LG,(63-MSR_SF_LG)
1814 or r11,r11,r12
1815 li r12,1
1816 rldicr r12,r12,MSR_ISF_LG,(63-MSR_ISF_LG)
1817 or r11,r11,r12
1818 mtmsrd r11
1819 isync
1820 blr
1821
1822#ifdef CONFIG_PPC_MULTIPLATFORM
1823/*
1824 * This is where the main kernel code starts.
1825 */
1826_STATIC(start_here_multiplatform)
1827 /* get a new offset, now that the kernel has moved. */
1828 bl .reloc_offset
1829 mr r26,r3
1830
1831 /* Clear out the BSS. It may have been done in prom_init,
1832 * already but that's irrelevant since prom_init will soon
1833 * be detached from the kernel completely. Besides, we need
1834 * to clear it now for kexec-style entry.
1835 */
1836 LOADADDR(r11,__bss_stop)
1837 LOADADDR(r8,__bss_start)
1838 sub r11,r11,r8 /* bss size */
1839 addi r11,r11,7 /* round up to an even double word */
1840 rldicl. r11,r11,61,3 /* shift right by 3 */
1841 beq 4f
1842 addi r8,r8,-8
1843 li r0,0
1844 mtctr r11 /* zero this many doublewords */
18453: stdu r0,8(r8)
1846 bdnz 3b
18474:
1848
1849 mfmsr r6
1850 ori r6,r6,MSR_RI
1851 mtmsrd r6 /* RI on */
1852
1853#ifdef CONFIG_HMT
1854 /* Start up the second thread on cpu 0 */
1855 mfspr r3,PVR
1856 srwi r3,r3,16
1857 cmpwi r3,0x34 /* Pulsar */
1858 beq 90f
1859 cmpwi r3,0x36 /* Icestar */
1860 beq 90f
1861 cmpwi r3,0x37 /* SStar */
1862 beq 90f
1863 b 91f /* HMT not supported */
186490: li r3,0
1865 bl .hmt_start_secondary
186691:
1867#endif
1868
1869 /* The following gets the stack and TOC set up with the regs */
1870 /* pointing to the real addr of the kernel stack. This is */
1871 /* all done to support the C function call below which sets */
1872 /* up the htab. This is done because we have relocated the */
1873 /* kernel but are still running in real mode. */
1874
1875 LOADADDR(r3,init_thread_union)
1876 sub r3,r3,r26
1877
1878 /* set up a stack pointer (physical address) */
1879 addi r1,r3,THREAD_SIZE
1880 li r0,0
1881 stdu r0,-STACK_FRAME_OVERHEAD(r1)
1882
1883 /* set up the TOC (physical address) */
1884 LOADADDR(r2,__toc_start)
1885 addi r2,r2,0x4000
1886 addi r2,r2,0x4000
1887 sub r2,r2,r26
1888
1889 LOADADDR(r3,cpu_specs)
1890 sub r3,r3,r26
1891 LOADADDR(r4,cur_cpu_spec)
1892 sub r4,r4,r26
1893 mr r5,r26
1894 bl .identify_cpu
1895
1896 /* Save some low level config HIDs of CPU0 to be copied to
1897 * other CPUs later on, or used for suspend/resume
1898 */
1899 bl .__save_cpu_setup
1900 sync
1901
1902 /* Setup a valid physical PACA pointer in SPRG3 for early_setup
1903 * note that boot_cpuid can always be 0 nowadays since there is
1904 * nowhere it can be initialized differently before we reach this
1905 * code
1906 */
1907 LOADADDR(r27, boot_cpuid)
1908 sub r27,r27,r26
1909 lwz r27,0(r27)
1910
1911 LOADADDR(r24, paca) /* Get base vaddr of paca array */
1912 mulli r13,r27,PACA_SIZE /* Calculate vaddr of right paca */
1913 add r13,r13,r24 /* for this processor. */
1914 sub r13,r13,r26 /* convert to physical addr */
1915 mtspr SPRG3,r13 /* PPPBBB: Temp... -Peter */
1916
1917 /* Do very early kernel initializations, including initial hash table,
1918 * stab and slb setup before we turn on relocation. */
1919
1920 /* Restore parameters passed from prom_init/kexec */
1921 mr r3,r31
1922 bl .early_setup
1923
1924 /* set the ASR */
1925 ld r3,PACASTABREAL(r13)
1926 ori r4,r3,1 /* turn on valid bit */
1927 ld r3,systemcfg@got(r2) /* r3 = ptr to systemcfg */
1928 lwz r3,PLATFORM(r3) /* r3 = platform flags */
1929 cmpldi r3,PLATFORM_PSERIES_LPAR
1930 bne 98f
1931 mfspr r3,PVR
1932 srwi r3,r3,16
1933 cmpwi r3,0x37 /* SStar */
1934 beq 97f
1935 cmpwi r3,0x36 /* IStar */
1936 beq 97f
1937 cmpwi r3,0x34 /* Pulsar */
1938 bne 98f
193997: li r3,H_SET_ASR /* hcall = H_SET_ASR */
1940 HVSC /* Invoking hcall */
1941 b 99f
194298: /* !(rpa hypervisor) || !(star) */
1943 mtasr r4 /* set the stab location */
194499:
1945 /* Set SDR1 (hash table pointer) */
1946 ld r3,systemcfg@got(r2) /* r3 = ptr to systemcfg */
1947 lwz r3,PLATFORM(r3) /* r3 = platform flags */
1948 /* Test if bit 0 is set (LPAR bit) */
1949 andi. r3,r3,0x1
1950 bne 98f
1951 LOADADDR(r6,_SDR1) /* Only if NOT LPAR */
1952 sub r6,r6,r26
1953 ld r6,0(r6) /* get the value of _SDR1 */
1954 mtspr SDR1,r6 /* set the htab location */
195598:
1956 LOADADDR(r3,.start_here_common)
1957 SET_REG_TO_CONST(r4, MSR_KERNEL)
1958 mtspr SRR0,r3
1959 mtspr SRR1,r4
1960 rfid
1961 b . /* prevent speculative execution */
1962#endif /* CONFIG_PPC_MULTIPLATFORM */
1963
1964 /* This is where all platforms converge execution */
1965_STATIC(start_here_common)
1966 /* relocation is on at this point */
1967
1968 /* The following code sets up the SP and TOC now that we are */
1969 /* running with translation enabled. */
1970
1971 LOADADDR(r3,init_thread_union)
1972
1973 /* set up the stack */
1974 addi r1,r3,THREAD_SIZE
1975 li r0,0
1976 stdu r0,-STACK_FRAME_OVERHEAD(r1)
1977
1978 /* Apply the CPUs-specific fixups (nop out sections not relevant
1979 * to this CPU
1980 */
1981 li r3,0
1982 bl .do_cpu_ftr_fixups
1983
1984 LOADADDR(r26, boot_cpuid)
1985 lwz r26,0(r26)
1986
1987 LOADADDR(r24, paca) /* Get base vaddr of paca array */
1988 mulli r13,r26,PACA_SIZE /* Calculate vaddr of right paca */
1989 add r13,r13,r24 /* for this processor. */
1990 mtspr SPRG3,r13
1991
1992 /* ptr to current */
1993 LOADADDR(r4,init_task)
1994 std r4,PACACURRENT(r13)
1995
1996 /* Load the TOC */
1997 ld r2,PACATOC(r13)
1998 std r1,PACAKSAVE(r13)
1999
2000 bl .setup_system
2001
2002 /* Load up the kernel context */
20035:
2004#ifdef DO_SOFT_DISABLE
2005 li r5,0
2006 stb r5,PACAPROCENABLED(r13) /* Soft Disabled */
2007 mfmsr r5
2008 ori r5,r5,MSR_EE /* Hard Enabled */
2009 mtmsrd r5
2010#endif
2011
2012 bl .start_kernel
2013
2014_GLOBAL(__setup_cpu_power3)
2015 blr
2016
2017_GLOBAL(hmt_init)
2018#ifdef CONFIG_HMT
2019 LOADADDR(r5, hmt_thread_data)
2020 mfspr r7,PVR
2021 srwi r7,r7,16
2022 cmpwi r7,0x34 /* Pulsar */
2023 beq 90f
2024 cmpwi r7,0x36 /* Icestar */
2025 beq 91f
2026 cmpwi r7,0x37 /* SStar */
2027 beq 91f
2028 b 101f
202990: mfspr r6,PIR
2030 andi. r6,r6,0x1f
2031 b 92f
203291: mfspr r6,PIR
2033 andi. r6,r6,0x3ff
203492: sldi r4,r24,3
2035 stwx r6,r5,r4
2036 bl .hmt_start_secondary
2037 b 101f
2038
2039__hmt_secondary_hold:
2040 LOADADDR(r5, hmt_thread_data)
2041 clrldi r5,r5,4
2042 li r7,0
2043 mfspr r6,PIR
2044 mfspr r8,PVR
2045 srwi r8,r8,16
2046 cmpwi r8,0x34
2047 bne 93f
2048 andi. r6,r6,0x1f
2049 b 103f
205093: andi. r6,r6,0x3f
2051
2052103: lwzx r8,r5,r7
2053 cmpw r8,r6
2054 beq 104f
2055 addi r7,r7,8
2056 b 103b
2057
2058104: addi r7,r7,4
2059 lwzx r9,r5,r7
2060 mr r24,r9
2061101:
2062#endif
2063 mr r3,r24
2064 b .pSeries_secondary_smp_init
2065
2066#ifdef CONFIG_HMT
2067_GLOBAL(hmt_start_secondary)
2068 LOADADDR(r4,__hmt_secondary_hold)
2069 clrldi r4,r4,4
2070 mtspr NIADORM, r4
2071 mfspr r4, MSRDORM
2072 li r5, -65
2073 and r4, r4, r5
2074 mtspr MSRDORM, r4
2075 lis r4,0xffef
2076 ori r4,r4,0x7403
2077 mtspr TSC, r4
2078 li r4,0x1f4
2079 mtspr TST, r4
2080 mfspr r4, HID0
2081 ori r4, r4, 0x1
2082 mtspr HID0, r4
2083 mfspr r4, CTRLF
2084 oris r4, r4, 0x40
2085 mtspr CTRLT, r4
2086 blr
2087#endif
2088
2089#if defined(CONFIG_SMP) && !defined(CONFIG_PPC_ISERIES)
2090_GLOBAL(smp_release_cpus)
2091 /* All secondary cpus are spinning on a common
2092 * spinloop, release them all now so they can start
2093 * to spin on their individual paca spinloops.
2094 * For non SMP kernels, the secondary cpus never
2095 * get out of the common spinloop.
2096 */
2097 li r3,1
2098 LOADADDR(r5,__secondary_hold_spinloop)
2099 std r3,0(r5)
2100 sync
2101 blr
2102#endif /* CONFIG_SMP && !CONFIG_PPC_ISERIES */
2103
2104
2105/*
2106 * We put a few things here that have to be page-aligned.
2107 * This stuff goes at the beginning of the data segment,
2108 * which is page-aligned.
2109 */
2110 .data
2111 .align 12
2112 .globl sdata
2113sdata:
2114 .globl empty_zero_page
2115empty_zero_page:
2116 .space 4096
2117
2118 .globl swapper_pg_dir
2119swapper_pg_dir:
2120 .space 4096
2121
2122 .globl ioremap_dir
2123ioremap_dir:
2124 .space 4096
2125
2126#ifdef CONFIG_SMP
2127/* 1 page segment table per cpu (max 48, cpu0 allocated at STAB0_PHYS_ADDR) */
2128 .globl stab_array
2129stab_array:
2130 .space 4096 * 48
2131#endif
2132
2133/*
2134 * This space gets a copy of optional info passed to us by the bootstrap
2135 * Used to pass parameters into the kernel like root=/dev/sda1, etc.
2136 */
2137 .globl cmd_line
2138cmd_line:
2139 .space COMMAND_LINE_SIZE
diff --git a/arch/ppc64/kernel/hvCall.S b/arch/ppc64/kernel/hvCall.S
new file mode 100644
index 000000000000..4c699eab1b95
--- /dev/null
+++ b/arch/ppc64/kernel/hvCall.S
@@ -0,0 +1,98 @@
1/*
2 * arch/ppc64/kernel/hvCall.S
3 *
4 *
5 * This file contains the code to perform calls to the
6 * iSeries LPAR hypervisor
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14#include <asm/ppc_asm.h>
15#include <asm/processor.h>
16
17 .text
18
19/*
20 * Hypervisor call
21 *
22 * Invoke the iSeries hypervisor via the System Call instruction
23 * Parameters are passed to this routine in registers r3 - r10
24 *
25 * r3 contains the HV function to be called
26 * r4-r10 contain the operands to the hypervisor function
27 *
28 */
29
30_GLOBAL(HvCall)
31_GLOBAL(HvCall0)
32_GLOBAL(HvCall1)
33_GLOBAL(HvCall2)
34_GLOBAL(HvCall3)
35_GLOBAL(HvCall4)
36_GLOBAL(HvCall5)
37_GLOBAL(HvCall6)
38_GLOBAL(HvCall7)
39
40
41 mfcr r0
42 std r0,-8(r1)
43 stdu r1,-(STACK_FRAME_OVERHEAD+16)(r1)
44
45 /* r0 = 0xffffffffffffffff indicates a hypervisor call */
46
47 li r0,-1
48
49 /* Invoke the hypervisor */
50
51 sc
52
53 ld r1,0(r1)
54 ld r0,-8(r1)
55 mtcrf 0xff,r0
56
57 /* return to caller, return value in r3 */
58
59 blr
60
61_GLOBAL(HvCall0Ret16)
62_GLOBAL(HvCall1Ret16)
63_GLOBAL(HvCall2Ret16)
64_GLOBAL(HvCall3Ret16)
65_GLOBAL(HvCall4Ret16)
66_GLOBAL(HvCall5Ret16)
67_GLOBAL(HvCall6Ret16)
68_GLOBAL(HvCall7Ret16)
69
70 mfcr r0
71 std r0,-8(r1)
72 std r31,-16(r1)
73 stdu r1,-(STACK_FRAME_OVERHEAD+32)(r1)
74
75 mr r31,r4
76 li r0,-1
77 mr r4,r5
78 mr r5,r6
79 mr r6,r7
80 mr r7,r8
81 mr r8,r9
82 mr r9,r10
83
84 sc
85
86 std r3,0(r31)
87 std r4,8(r31)
88
89 mr r3,r5
90
91 ld r1,0(r1)
92 ld r0,-8(r1)
93 mtcrf 0xff,r0
94 ld r31,-16(r1)
95
96 blr
97
98
diff --git a/arch/ppc64/kernel/hvconsole.c b/arch/ppc64/kernel/hvconsole.c
new file mode 100644
index 000000000000..c72fb8ffe974
--- /dev/null
+++ b/arch/ppc64/kernel/hvconsole.c
@@ -0,0 +1,121 @@
1/*
2 * hvconsole.c
3 * Copyright (C) 2004 Hollis Blanchard, IBM Corporation
4 * Copyright (C) 2004 IBM Corporation
5 *
6 * Additional Author(s):
7 * Ryan S. Arnold <rsa@us.ibm.com>
8 *
9 * LPAR console support.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 */
25
26#include <linux/kernel.h>
27#include <linux/module.h>
28#include <asm/hvcall.h>
29#include <asm/hvconsole.h>
30#include <asm/prom.h>
31
32/**
33 * hvc_get_chars - retrieve characters from firmware for denoted vterm adatper
34 * @vtermno: The vtermno or unit_address of the adapter from which to fetch the
35 * data.
36 * @buf: The character buffer into which to put the character data fetched from
37 * firmware.
38 * @count: not used?
39 */
40int hvc_get_chars(uint32_t vtermno, char *buf, int count)
41{
42 unsigned long got;
43
44 if (plpar_hcall(H_GET_TERM_CHAR, vtermno, 0, 0, 0, &got,
45 (unsigned long *)buf, (unsigned long *)buf+1) == H_Success) {
46 /*
47 * Work around a HV bug where it gives us a null
48 * after every \r. -- paulus
49 */
50 if (got > 0) {
51 int i;
52 for (i = 1; i < got; ++i) {
53 if (buf[i] == 0 && buf[i-1] == '\r') {
54 --got;
55 if (i < got)
56 memmove(&buf[i], &buf[i+1],
57 got - i);
58 }
59 }
60 }
61 return got;
62 }
63 return 0;
64}
65
66EXPORT_SYMBOL(hvc_get_chars);
67
68/**
69 * hvc_put_chars: send characters to firmware for denoted vterm adapter
70 * @vtermno: The vtermno or unit_address of the adapter from which the data
71 * originated.
72 * @buf: The character buffer that contains the character data to send to
73 * firmware.
74 * @count: Send this number of characters.
75 */
76int hvc_put_chars(uint32_t vtermno, const char *buf, int count)
77{
78 unsigned long *lbuf = (unsigned long *) buf;
79 long ret;
80
81 ret = plpar_hcall_norets(H_PUT_TERM_CHAR, vtermno, count, lbuf[0],
82 lbuf[1]);
83 if (ret == H_Success)
84 return count;
85 if (ret == H_Busy)
86 return 0;
87 return -EIO;
88}
89
90EXPORT_SYMBOL(hvc_put_chars);
91
92/*
93 * We hope/assume that the first vty found corresponds to the first console
94 * device.
95 */
96int hvc_find_vtys(void)
97{
98 struct device_node *vty;
99 int num_found = 0;
100
101 for (vty = of_find_node_by_name(NULL, "vty"); vty != NULL;
102 vty = of_find_node_by_name(vty, "vty")) {
103 uint32_t *vtermno;
104
105 /* We have statically defined space for only a certain number of
106 * console adapters. */
107 if (num_found >= MAX_NR_HVC_CONSOLES)
108 break;
109
110 vtermno = (uint32_t *)get_property(vty, "reg", NULL);
111 if (!vtermno)
112 continue;
113
114 if (device_is_compatible(vty, "hvterm1")) {
115 hvc_instantiate(*vtermno, num_found);
116 ++num_found;
117 }
118 }
119
120 return num_found;
121}
diff --git a/arch/ppc64/kernel/hvcserver.c b/arch/ppc64/kernel/hvcserver.c
new file mode 100644
index 000000000000..bde8f42da854
--- /dev/null
+++ b/arch/ppc64/kernel/hvcserver.c
@@ -0,0 +1,249 @@
1/*
2 * hvcserver.c
3 * Copyright (C) 2004 Ryan S Arnold, IBM Corporation
4 *
5 * PPC64 virtual I/O console server support.
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */
21
22#include <linux/kernel.h>
23#include <linux/list.h>
24#include <linux/module.h>
25#include <asm/hvcall.h>
26#include <asm/hvcserver.h>
27#include <asm/io.h>
28
29#define HVCS_ARCH_VERSION "1.0.0"
30
31MODULE_AUTHOR("Ryan S. Arnold <rsa@us.ibm.com>");
32MODULE_DESCRIPTION("IBM hvcs ppc64 API");
33MODULE_LICENSE("GPL");
34MODULE_VERSION(HVCS_ARCH_VERSION);
35
36/*
37 * Convert arch specific return codes into relevant errnos. The hvcs
38 * functions aren't performance sensitive, so this conversion isn't an
39 * issue.
40 */
41int hvcs_convert(long to_convert)
42{
43 switch (to_convert) {
44 case H_Success:
45 return 0;
46 case H_Parameter:
47 return -EINVAL;
48 case H_Hardware:
49 return -EIO;
50 case H_Busy:
51 case H_LongBusyOrder1msec:
52 case H_LongBusyOrder10msec:
53 case H_LongBusyOrder100msec:
54 case H_LongBusyOrder1sec:
55 case H_LongBusyOrder10sec:
56 case H_LongBusyOrder100sec:
57 return -EBUSY;
58 case H_Function: /* fall through */
59 default:
60 return -EPERM;
61 }
62}
63
64/**
65 * hvcs_free_partner_info - free pi allocated by hvcs_get_partner_info
66 * @head: list_head pointer for an allocated list of partner info structs to
67 * free.
68 *
69 * This function is used to free the partner info list that was returned by
70 * calling hvcs_get_partner_info().
71 */
72int hvcs_free_partner_info(struct list_head *head)
73{
74 struct hvcs_partner_info *pi;
75 struct list_head *element;
76
77 if (!head)
78 return -EINVAL;
79
80 while (!list_empty(head)) {
81 element = head->next;
82 pi = list_entry(element, struct hvcs_partner_info, node);
83 list_del(element);
84 kfree(pi);
85 }
86
87 return 0;
88}
89EXPORT_SYMBOL(hvcs_free_partner_info);
90
91/* Helper function for hvcs_get_partner_info */
92int hvcs_next_partner(uint32_t unit_address,
93 unsigned long last_p_partition_ID,
94 unsigned long last_p_unit_address, unsigned long *pi_buff)
95
96{
97 long retval;
98 retval = plpar_hcall_norets(H_VTERM_PARTNER_INFO, unit_address,
99 last_p_partition_ID,
100 last_p_unit_address, virt_to_phys(pi_buff));
101 return hvcs_convert(retval);
102}
103
104/**
105 * hvcs_get_partner_info - Get all of the partner info for a vty-server adapter
106 * @unit_address: The unit_address of the vty-server adapter for which this
107 * function is fetching partner info.
108 * @head: An initialized list_head pointer to an empty list to use to return the
109 * list of partner info fetched from the hypervisor to the caller.
110 * @pi_buff: A page sized buffer pre-allocated prior to calling this function
111 * that is to be used to be used by firmware as an iterator to keep track
112 * of the partner info retrieval.
113 *
114 * This function returns non-zero on success, or if there is no partner info.
115 *
116 * The pi_buff is pre-allocated prior to calling this function because this
117 * function may be called with a spin_lock held and kmalloc of a page is not
118 * recommended as GFP_ATOMIC.
119 *
120 * The first long of this buffer is used to store a partner unit address. The
121 * second long is used to store a partner partition ID and starting at
122 * pi_buff[2] is the 79 character Converged Location Code (diff size than the
123 * unsigned longs, hence the casting mumbo jumbo you see later).
124 *
125 * Invocation of this function should always be followed by an invocation of
126 * hvcs_free_partner_info() using a pointer to the SAME list head instance
127 * that was passed as a parameter to this function.
128 */
129int hvcs_get_partner_info(uint32_t unit_address, struct list_head *head,
130 unsigned long *pi_buff)
131{
132 /*
133 * Dealt with as longs because of the hcall interface even though the
134 * values are uint32_t.
135 */
136 unsigned long last_p_partition_ID;
137 unsigned long last_p_unit_address;
138 struct hvcs_partner_info *next_partner_info = NULL;
139 int more = 1;
140 int retval;
141
142 memset(pi_buff, 0x00, PAGE_SIZE);
143 /* invalid parameters */
144 if (!head || !pi_buff)
145 return -EINVAL;
146
147 last_p_partition_ID = last_p_unit_address = ~0UL;
148 INIT_LIST_HEAD(head);
149
150 do {
151 retval = hvcs_next_partner(unit_address, last_p_partition_ID,
152 last_p_unit_address, pi_buff);
153 if (retval) {
154 /*
155 * Don't indicate that we've failed if we have
156 * any list elements.
157 */
158 if (!list_empty(head))
159 return 0;
160 return retval;
161 }
162
163 last_p_partition_ID = pi_buff[0];
164 last_p_unit_address = pi_buff[1];
165
166 /* This indicates that there are no further partners */
167 if (last_p_partition_ID == ~0UL
168 && last_p_unit_address == ~0UL)
169 break;
170
171 /* This is a very small struct and will be freed soon in
172 * hvcs_free_partner_info(). */
173 next_partner_info = kmalloc(sizeof(struct hvcs_partner_info),
174 GFP_ATOMIC);
175
176 if (!next_partner_info) {
177 printk(KERN_WARNING "HVCONSOLE: kmalloc() failed to"
178 " allocate partner info struct.\n");
179 hvcs_free_partner_info(head);
180 return -ENOMEM;
181 }
182
183 next_partner_info->unit_address
184 = (unsigned int)last_p_unit_address;
185 next_partner_info->partition_ID
186 = (unsigned int)last_p_partition_ID;
187
188 /* copy the Null-term char too */
189 strncpy(&next_partner_info->location_code[0],
190 (char *)&pi_buff[2],
191 strlen((char *)&pi_buff[2]) + 1);
192
193 list_add_tail(&(next_partner_info->node), head);
194 next_partner_info = NULL;
195
196 } while (more);
197
198 return 0;
199}
200EXPORT_SYMBOL(hvcs_get_partner_info);
201
202/**
203 * hvcs_register_connection - establish a connection between this vty-server and
204 * a vty.
205 * @unit_address: The unit address of the vty-server adapter that is to be
206 * establish a connection.
207 * @p_partition_ID: The partition ID of the vty adapter that is to be connected.
208 * @p_unit_address: The unit address of the vty adapter to which the vty-server
209 * is to be connected.
210 *
211 * If this function is called once and -EINVAL is returned it may
212 * indicate that the partner info needs to be refreshed for the
213 * target unit address at which point the caller must invoke
214 * hvcs_get_partner_info() and then call this function again. If,
215 * for a second time, -EINVAL is returned then it indicates that
216 * there is probably already a partner connection registered to a
217 * different vty-server adapter. It is also possible that a second
218 * -EINVAL may indicate that one of the parms is not valid, for
219 * instance if the link was removed between the vty-server adapter
220 * and the vty adapter that you are trying to open. Don't shoot the
221 * messenger. Firmware implemented it this way.
222 */
223int hvcs_register_connection( uint32_t unit_address,
224 uint32_t p_partition_ID, uint32_t p_unit_address)
225{
226 long retval;
227 retval = plpar_hcall_norets(H_REGISTER_VTERM, unit_address,
228 p_partition_ID, p_unit_address);
229 return hvcs_convert(retval);
230}
231EXPORT_SYMBOL(hvcs_register_connection);
232
233/**
234 * hvcs_free_connection - free the connection between a vty-server and vty
235 * @unit_address: The unit address of the vty-server that is to have its
236 * connection severed.
237 *
238 * This function is used to free the partner connection between a vty-server
239 * adapter and a vty adapter.
240 *
241 * If -EBUSY is returned continue to call this function until 0 is returned.
242 */
243int hvcs_free_connection(uint32_t unit_address)
244{
245 long retval;
246 retval = plpar_hcall_norets(H_FREE_VTERM, unit_address);
247 return hvcs_convert(retval);
248}
249EXPORT_SYMBOL(hvcs_free_connection);
diff --git a/arch/ppc64/kernel/i8259.c b/arch/ppc64/kernel/i8259.c
new file mode 100644
index 000000000000..74dcfd68fc75
--- /dev/null
+++ b/arch/ppc64/kernel/i8259.c
@@ -0,0 +1,177 @@
1/*
2 * c 2001 PPC64 Team, IBM Corp
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 */
9#include <linux/stddef.h>
10#include <linux/init.h>
11#include <linux/sched.h>
12#include <linux/signal.h>
13#include <linux/cache.h>
14#include <linux/irq.h>
15#include <linux/interrupt.h>
16#include <asm/io.h>
17#include <asm/ppcdebug.h>
18#include "i8259.h"
19
20unsigned char cached_8259[2] = { 0xff, 0xff };
21#define cached_A1 (cached_8259[0])
22#define cached_21 (cached_8259[1])
23
24static __cacheline_aligned_in_smp DEFINE_SPINLOCK(i8259_lock);
25
26static int i8259_pic_irq_offset;
27static int i8259_present;
28
29int i8259_irq(int cpu)
30{
31 int irq;
32
33 spin_lock/*_irqsave*/(&i8259_lock/*, flags*/);
34 /*
35 * Perform an interrupt acknowledge cycle on controller 1
36 */
37 outb(0x0C, 0x20);
38 irq = inb(0x20) & 7;
39 if (irq == 2)
40 {
41 /*
42 * Interrupt is cascaded so perform interrupt
43 * acknowledge on controller 2
44 */
45 outb(0x0C, 0xA0);
46 irq = (inb(0xA0) & 7) + 8;
47 }
48 else if (irq==7)
49 {
50 /*
51 * This may be a spurious interrupt
52 *
53 * Read the interrupt status register. If the most
54 * significant bit is not set then there is no valid
55 * interrupt
56 */
57 outb(0x0b, 0x20);
58 if(~inb(0x20)&0x80) {
59 spin_unlock/*_irqrestore*/(&i8259_lock/*, flags*/);
60 return -1;
61 }
62 }
63 spin_unlock/*_irqrestore*/(&i8259_lock/*, flags*/);
64 return irq;
65}
66
67static void i8259_mask_and_ack_irq(unsigned int irq_nr)
68{
69 unsigned long flags;
70
71 spin_lock_irqsave(&i8259_lock, flags);
72 if ( irq_nr >= i8259_pic_irq_offset )
73 irq_nr -= i8259_pic_irq_offset;
74
75 if (irq_nr > 7) {
76 cached_A1 |= 1 << (irq_nr-8);
77 inb(0xA1); /* DUMMY */
78 outb(cached_A1,0xA1);
79 outb(0x20,0xA0); /* Non-specific EOI */
80 outb(0x20,0x20); /* Non-specific EOI to cascade */
81 } else {
82 cached_21 |= 1 << irq_nr;
83 inb(0x21); /* DUMMY */
84 outb(cached_21,0x21);
85 outb(0x20,0x20); /* Non-specific EOI */
86 }
87 spin_unlock_irqrestore(&i8259_lock, flags);
88}
89
90static void i8259_set_irq_mask(int irq_nr)
91{
92 outb(cached_A1,0xA1);
93 outb(cached_21,0x21);
94}
95
96static void i8259_mask_irq(unsigned int irq_nr)
97{
98 unsigned long flags;
99
100 spin_lock_irqsave(&i8259_lock, flags);
101 if ( irq_nr >= i8259_pic_irq_offset )
102 irq_nr -= i8259_pic_irq_offset;
103 if ( irq_nr < 8 )
104 cached_21 |= 1 << irq_nr;
105 else
106 cached_A1 |= 1 << (irq_nr-8);
107 i8259_set_irq_mask(irq_nr);
108 spin_unlock_irqrestore(&i8259_lock, flags);
109}
110
111static void i8259_unmask_irq(unsigned int irq_nr)
112{
113 unsigned long flags;
114
115 spin_lock_irqsave(&i8259_lock, flags);
116 if ( irq_nr >= i8259_pic_irq_offset )
117 irq_nr -= i8259_pic_irq_offset;
118 if ( irq_nr < 8 )
119 cached_21 &= ~(1 << irq_nr);
120 else
121 cached_A1 &= ~(1 << (irq_nr-8));
122 i8259_set_irq_mask(irq_nr);
123 spin_unlock_irqrestore(&i8259_lock, flags);
124}
125
126static void i8259_end_irq(unsigned int irq)
127{
128 if (!(get_irq_desc(irq)->status & (IRQ_DISABLED|IRQ_INPROGRESS)) &&
129 get_irq_desc(irq)->action)
130 i8259_unmask_irq(irq);
131}
132
133struct hw_interrupt_type i8259_pic = {
134 .typename = " i8259 ",
135 .enable = i8259_unmask_irq,
136 .disable = i8259_mask_irq,
137 .ack = i8259_mask_and_ack_irq,
138 .end = i8259_end_irq,
139};
140
141void __init i8259_init(int offset)
142{
143 unsigned long flags;
144
145 spin_lock_irqsave(&i8259_lock, flags);
146 i8259_pic_irq_offset = offset;
147 i8259_present = 1;
148 /* init master interrupt controller */
149 outb(0x11, 0x20); /* Start init sequence */
150 outb(0x00, 0x21); /* Vector base */
151 outb(0x04, 0x21); /* edge tiggered, Cascade (slave) on IRQ2 */
152 outb(0x01, 0x21); /* Select 8086 mode */
153 outb(0xFF, 0x21); /* Mask all */
154 /* init slave interrupt controller */
155 outb(0x11, 0xA0); /* Start init sequence */
156 outb(0x08, 0xA1); /* Vector base */
157 outb(0x02, 0xA1); /* edge triggered, Cascade (slave) on IRQ2 */
158 outb(0x01, 0xA1); /* Select 8086 mode */
159 outb(0xFF, 0xA1); /* Mask all */
160 outb(cached_A1, 0xA1);
161 outb(cached_21, 0x21);
162 spin_unlock_irqrestore(&i8259_lock, flags);
163
164}
165
166static int i8259_request_cascade(void)
167{
168 if (!i8259_present)
169 return -ENODEV;
170
171 request_irq( i8259_pic_irq_offset + 2, no_action, SA_INTERRUPT,
172 "82c59 secondary cascade", NULL );
173
174 return 0;
175}
176
177arch_initcall(i8259_request_cascade);
diff --git a/arch/ppc64/kernel/i8259.h b/arch/ppc64/kernel/i8259.h
new file mode 100644
index 000000000000..f74764ba0bfa
--- /dev/null
+++ b/arch/ppc64/kernel/i8259.h
@@ -0,0 +1,17 @@
1/*
2 * c 2001 PPC 64 Team, IBM Corp
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 */
9#ifndef _PPC_KERNEL_i8259_H
10#define _PPC_KERNEL_i8259_H
11
12extern struct hw_interrupt_type i8259_pic;
13
14extern void i8259_init(int offset);
15extern int i8259_irq(int);
16
17#endif /* _PPC_KERNEL_i8259_H */
diff --git a/arch/ppc64/kernel/iSeries_VpdInfo.c b/arch/ppc64/kernel/iSeries_VpdInfo.c
new file mode 100644
index 000000000000..a6f0ff2d0239
--- /dev/null
+++ b/arch/ppc64/kernel/iSeries_VpdInfo.c
@@ -0,0 +1,277 @@
1/************************************************************************/
2/* File iSeries_vpdInfo.c created by Allan Trautman on Fri Feb 2 2001. */
3/************************************************************************/
4/* This code gets the card location of the hardware */
5/* Copyright (C) 20yy <Allan H Trautman> <IBM Corp> */
6/* */
7/* This program is free software; you can redistribute it and/or modify */
8/* it under the terms of the GNU General Public License as published by */
9/* the Free Software Foundation; either version 2 of the License, or */
10/* (at your option) any later version. */
11/* */
12/* This program is distributed in the hope that it will be useful, */
13/* but WITHOUT ANY WARRANTY; without even the implied warranty of */
14/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
15/* GNU General Public License for more details. */
16/* */
17/* You should have received a copy of the GNU General Public License */
18/* along with this program; if not, write to the: */
19/* Free Software Foundation, Inc., */
20/* 59 Temple Place, Suite 330, */
21/* Boston, MA 02111-1307 USA */
22/************************************************************************/
23/* Change Activity: */
24/* Created, Feb 2, 2001 */
25/* Ported to ppc64, August 20, 2001 */
26/* End Change Activity */
27/************************************************************************/
28#include <linux/config.h>
29#include <linux/init.h>
30#include <linux/module.h>
31#include <linux/pci.h>
32#include <asm/types.h>
33#include <asm/resource.h>
34
35#include <asm/iSeries/HvCallPci.h>
36#include <asm/iSeries/HvTypes.h>
37#include <asm/iSeries/mf.h>
38#include <asm/iSeries/LparData.h>
39#include <asm/iSeries/iSeries_pci.h>
40#include "pci.h"
41
42/*
43 * Size of Bus VPD data
44 */
45#define BUS_VPDSIZE 1024
46/*
47 * Bus Vpd Tags
48 */
49#define VpdEndOfDataTag 0x78
50#define VpdEndOfAreaTag 0x79
51#define VpdIdStringTag 0x82
52#define VpdVendorAreaTag 0x84
53/*
54 * Mfg Area Tags
55 */
56#define VpdFruFlag 0x4647 // "FG"
57#define VpdFruFrameId 0x4649 // "FI"
58#define VpdSlotMapFormat 0x4D46 // "MF"
59#define VpdAsmPartNumber 0x504E // "PN"
60#define VpdFruSerial 0x534E // "SN"
61#define VpdSlotMap 0x534D // "SM"
62
63/*
64 * Structures of the areas
65 */
66struct MfgVpdAreaStruct {
67 u16 Tag;
68 u8 TagLength;
69 u8 AreaData1;
70 u8 AreaData2;
71};
72typedef struct MfgVpdAreaStruct MfgArea;
73#define MFG_ENTRY_SIZE 3
74
75struct SlotMapStruct {
76 u8 AgentId;
77 u8 SecondaryAgentId;
78 u8 PhbId;
79 char CardLocation[3];
80 char Parms[8];
81 char Reserved[2];
82};
83typedef struct SlotMapStruct SlotMap;
84#define SLOT_ENTRY_SIZE 16
85
86/*
87 * Formats the device information.
88 * - Pass in pci_dev* pointer to the device.
89 * - Pass in buffer to place the data. Danger here is the buffer must
90 * be as big as the client says it is. Should be at least 128 bytes.
91 * Return will the length of the string data put in the buffer.
92 * Format:
93 * PCI: Bus 0, Device 26, Vendor 0x12AE Frame 1, Card C10 Ethernet
94 * controller
95 */
96int iSeries_Device_Information(struct pci_dev *PciDev, char *buffer,
97 int BufferSize)
98{
99 struct iSeries_Device_Node *DevNode =
100 (struct iSeries_Device_Node *)PciDev->sysdata;
101 int len;
102
103 if (DevNode == NULL)
104 return sprintf(buffer,
105 "PCI: iSeries_Device_Information DevNode is NULL");
106
107 if (BufferSize < 128)
108 return 0;
109
110 len = sprintf(buffer, "PCI: Bus%3d, Device%3d, Vendor %04X ",
111 ISERIES_BUS(DevNode), PCI_SLOT(PciDev->devfn),
112 PciDev->vendor);
113 len += sprintf(buffer + len, "Frame%3d, Card %4s ",
114 DevNode->FrameId, DevNode->CardLocation);
115#ifdef CONFIG_PCI
116 if (pci_class_name(PciDev->class >> 8) == 0)
117 len += sprintf(buffer + len, "0x%04X ",
118 (int)(PciDev->class >> 8));
119 else
120 len += sprintf(buffer + len, "%s",
121 pci_class_name(PciDev->class >> 8));
122#endif
123 return len;
124}
125
126/*
127 * Parse the Slot Area
128 */
129void iSeries_Parse_SlotArea(SlotMap *MapPtr, int MapLen,
130 struct iSeries_Device_Node *DevNode)
131{
132 int SlotMapLen = MapLen;
133 SlotMap *SlotMapPtr = MapPtr;
134
135 /*
136 * Parse Slot label until we find the one requrested
137 */
138 while (SlotMapLen > 0) {
139 if (SlotMapPtr->AgentId == DevNode->AgentId ) {
140 /*
141 * If Phb wasn't found, grab the entry first one found.
142 */
143 if (DevNode->PhbId == 0xff)
144 DevNode->PhbId = SlotMapPtr->PhbId;
145 /* Found it, extract the data. */
146 if (SlotMapPtr->PhbId == DevNode->PhbId ) {
147 memcpy(&DevNode->CardLocation,
148 &SlotMapPtr->CardLocation, 3);
149 DevNode->CardLocation[3] = 0;
150 break;
151 }
152 }
153 /* Point to the next Slot */
154 SlotMapPtr = (SlotMap *)((char *)SlotMapPtr + SLOT_ENTRY_SIZE);
155 SlotMapLen -= SLOT_ENTRY_SIZE;
156 }
157}
158
159/*
160 * Parse the Mfg Area
161 */
162static void iSeries_Parse_MfgArea(u8 *AreaData, int AreaLen,
163 struct iSeries_Device_Node *DevNode)
164{
165 MfgArea *MfgAreaPtr = (MfgArea *)AreaData;
166 int MfgAreaLen = AreaLen;
167 u16 SlotMapFmt = 0;
168
169 /* Parse Mfg Data */
170 while (MfgAreaLen > 0) {
171 int MfgTagLen = MfgAreaPtr->TagLength;
172 /* Frame ID (FI 4649020310 ) */
173 if (MfgAreaPtr->Tag == VpdFruFrameId) /* FI */
174 DevNode->FrameId = MfgAreaPtr->AreaData1;
175 /* Slot Map Format (MF 4D46020004 ) */
176 else if (MfgAreaPtr->Tag == VpdSlotMapFormat) /* MF */
177 SlotMapFmt = (MfgAreaPtr->AreaData1 * 256)
178 + MfgAreaPtr->AreaData2;
179 /* Slot Map (SM 534D90 */
180 else if (MfgAreaPtr->Tag == VpdSlotMap) { /* SM */
181 SlotMap *SlotMapPtr;
182
183 if (SlotMapFmt == 0x1004)
184 SlotMapPtr = (SlotMap *)((char *)MfgAreaPtr
185 + MFG_ENTRY_SIZE + 1);
186 else
187 SlotMapPtr = (SlotMap *)((char *)MfgAreaPtr
188 + MFG_ENTRY_SIZE);
189 iSeries_Parse_SlotArea(SlotMapPtr, MfgTagLen, DevNode);
190 }
191 /*
192 * Point to the next Mfg Area
193 * Use defined size, sizeof give wrong answer
194 */
195 MfgAreaPtr = (MfgArea *)((char *)MfgAreaPtr + MfgTagLen
196 + MFG_ENTRY_SIZE);
197 MfgAreaLen -= (MfgTagLen + MFG_ENTRY_SIZE);
198 }
199}
200
201/*
202 * Look for "BUS".. Data is not Null terminated.
203 * PHBID of 0xFF indicates PHB was not found in VPD Data.
204 */
205static int iSeries_Parse_PhbId(u8 *AreaPtr, int AreaLength)
206{
207 u8 *PhbPtr = AreaPtr;
208 int DataLen = AreaLength;
209 char PhbId = 0xFF;
210
211 while (DataLen > 0) {
212 if ((*PhbPtr == 'B') && (*(PhbPtr + 1) == 'U')
213 && (*(PhbPtr + 2) == 'S')) {
214 PhbPtr += 3;
215 while (*PhbPtr == ' ')
216 ++PhbPtr;
217 PhbId = (*PhbPtr & 0x0F);
218 break;
219 }
220 ++PhbPtr;
221 --DataLen;
222 }
223 return PhbId;
224}
225
226/*
227 * Parse out the VPD Areas
228 */
229static void iSeries_Parse_Vpd(u8 *VpdData, int VpdDataLen,
230 struct iSeries_Device_Node *DevNode)
231{
232 u8 *TagPtr = VpdData;
233 int DataLen = VpdDataLen - 3;
234
235 while ((*TagPtr != VpdEndOfAreaTag) && (DataLen > 0)) {
236 int AreaLen = *(TagPtr + 1) + (*(TagPtr + 2) * 256);
237 u8 *AreaData = TagPtr + 3;
238
239 if (*TagPtr == VpdIdStringTag)
240 DevNode->PhbId = iSeries_Parse_PhbId(AreaData, AreaLen);
241 else if (*TagPtr == VpdVendorAreaTag)
242 iSeries_Parse_MfgArea(AreaData, AreaLen, DevNode);
243 /* Point to next Area. */
244 TagPtr = AreaData + AreaLen;
245 DataLen -= AreaLen;
246 }
247}
248
249void iSeries_Get_Location_Code(struct iSeries_Device_Node *DevNode)
250{
251 int BusVpdLen = 0;
252 u8 *BusVpdPtr = (u8 *)kmalloc(BUS_VPDSIZE, GFP_KERNEL);
253
254 if (BusVpdPtr == NULL) {
255 printk("PCI: Bus VPD Buffer allocation failure.\n");
256 return;
257 }
258 BusVpdLen = HvCallPci_getBusVpd(ISERIES_BUS(DevNode),
259 ISERIES_HV_ADDR(BusVpdPtr),
260 BUS_VPDSIZE);
261 if (BusVpdLen == 0) {
262 kfree(BusVpdPtr);
263 printk("PCI: Bus VPD Buffer zero length.\n");
264 return;
265 }
266 /* printk("PCI: BusVpdPtr: %p, %d\n",BusVpdPtr, BusVpdLen); */
267 /* Make sure this is what I think it is */
268 if (*BusVpdPtr != VpdIdStringTag) { /* 0x82 */
269 printk("PCI: Bus VPD Buffer missing starting tag.\n");
270 kfree(BusVpdPtr);
271 return;
272 }
273 iSeries_Parse_Vpd(BusVpdPtr,BusVpdLen, DevNode);
274 sprintf(DevNode->Location, "Frame%3d, Card %-4s", DevNode->FrameId,
275 DevNode->CardLocation);
276 kfree(BusVpdPtr);
277}
diff --git a/arch/ppc64/kernel/iSeries_htab.c b/arch/ppc64/kernel/iSeries_htab.c
new file mode 100644
index 000000000000..aa9e8fdd1a4f
--- /dev/null
+++ b/arch/ppc64/kernel/iSeries_htab.c
@@ -0,0 +1,242 @@
1/*
2 * iSeries hashtable management.
3 * Derived from pSeries_htab.c
4 *
5 * SMP scalability work:
6 * Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13#include <asm/machdep.h>
14#include <asm/pgtable.h>
15#include <asm/mmu.h>
16#include <asm/mmu_context.h>
17#include <asm/iSeries/HvCallHpt.h>
18#include <asm/abs_addr.h>
19#include <linux/spinlock.h>
20
21static spinlock_t iSeries_hlocks[64] __cacheline_aligned_in_smp = { [0 ... 63] = SPIN_LOCK_UNLOCKED};
22
23/*
24 * Very primitive algorithm for picking up a lock
25 */
26static inline void iSeries_hlock(unsigned long slot)
27{
28 if (slot & 0x8)
29 slot = ~slot;
30 spin_lock(&iSeries_hlocks[(slot >> 4) & 0x3f]);
31}
32
33static inline void iSeries_hunlock(unsigned long slot)
34{
35 if (slot & 0x8)
36 slot = ~slot;
37 spin_unlock(&iSeries_hlocks[(slot >> 4) & 0x3f]);
38}
39
40static long iSeries_hpte_insert(unsigned long hpte_group, unsigned long va,
41 unsigned long prpn, int secondary,
42 unsigned long hpteflags, int bolted, int large)
43{
44 long slot;
45 HPTE lhpte;
46
47 /*
48 * The hypervisor tries both primary and secondary.
49 * If we are being called to insert in the secondary,
50 * it means we have already tried both primary and secondary,
51 * so we return failure immediately.
52 */
53 if (secondary)
54 return -1;
55
56 iSeries_hlock(hpte_group);
57
58 slot = HvCallHpt_findValid(&lhpte, va >> PAGE_SHIFT);
59 BUG_ON(lhpte.dw0.dw0.v);
60
61 if (slot == -1) { /* No available entry found in either group */
62 iSeries_hunlock(hpte_group);
63 return -1;
64 }
65
66 if (slot < 0) { /* MSB set means secondary group */
67 secondary = 1;
68 slot &= 0x7fffffffffffffff;
69 }
70
71 lhpte.dw1.dword1 = 0;
72 lhpte.dw1.dw1.rpn = physRpn_to_absRpn(prpn);
73 lhpte.dw1.flags.flags = hpteflags;
74
75 lhpte.dw0.dword0 = 0;
76 lhpte.dw0.dw0.avpn = va >> 23;
77 lhpte.dw0.dw0.h = secondary;
78 lhpte.dw0.dw0.bolted = bolted;
79 lhpte.dw0.dw0.v = 1;
80
81 /* Now fill in the actual HPTE */
82 HvCallHpt_addValidate(slot, secondary, &lhpte);
83
84 iSeries_hunlock(hpte_group);
85
86 return (secondary << 3) | (slot & 7);
87}
88
89static unsigned long iSeries_hpte_getword0(unsigned long slot)
90{
91 unsigned long dword0;
92 HPTE hpte;
93
94 HvCallHpt_get(&hpte, slot);
95 dword0 = hpte.dw0.dword0;
96
97 return dword0;
98}
99
100static long iSeries_hpte_remove(unsigned long hpte_group)
101{
102 unsigned long slot_offset;
103 int i;
104 HPTE lhpte;
105
106 /* Pick a random slot to start at */
107 slot_offset = mftb() & 0x7;
108
109 iSeries_hlock(hpte_group);
110
111 for (i = 0; i < HPTES_PER_GROUP; i++) {
112 lhpte.dw0.dword0 =
113 iSeries_hpte_getword0(hpte_group + slot_offset);
114
115 if (!lhpte.dw0.dw0.bolted) {
116 HvCallHpt_invalidateSetSwBitsGet(hpte_group +
117 slot_offset, 0, 0);
118 iSeries_hunlock(hpte_group);
119 return i;
120 }
121
122 slot_offset++;
123 slot_offset &= 0x7;
124 }
125
126 iSeries_hunlock(hpte_group);
127
128 return -1;
129}
130
131/*
132 * The HyperVisor expects the "flags" argument in this form:
133 * bits 0..59 : reserved
134 * bit 60 : N
135 * bits 61..63 : PP2,PP1,PP0
136 */
137static long iSeries_hpte_updatepp(unsigned long slot, unsigned long newpp,
138 unsigned long va, int large, int local)
139{
140 HPTE hpte;
141 unsigned long avpn = va >> 23;
142
143 iSeries_hlock(slot);
144
145 HvCallHpt_get(&hpte, slot);
146 if ((hpte.dw0.dw0.avpn == avpn) && (hpte.dw0.dw0.v)) {
147 /*
148 * Hypervisor expects bits as NPPP, which is
149 * different from how they are mapped in our PP.
150 */
151 HvCallHpt_setPp(slot, (newpp & 0x3) | ((newpp & 0x4) << 1));
152 iSeries_hunlock(slot);
153 return 0;
154 }
155 iSeries_hunlock(slot);
156
157 return -1;
158}
159
160/*
161 * Functions used to find the PTE for a particular virtual address.
162 * Only used during boot when bolting pages.
163 *
164 * Input : vpn : virtual page number
165 * Output: PTE index within the page table of the entry
166 * -1 on failure
167 */
168static long iSeries_hpte_find(unsigned long vpn)
169{
170 HPTE hpte;
171 long slot;
172
173 /*
174 * The HvCallHpt_findValid interface is as follows:
175 * 0xffffffffffffffff : No entry found.
176 * 0x00000000xxxxxxxx : Entry found in primary group, slot x
177 * 0x80000000xxxxxxxx : Entry found in secondary group, slot x
178 */
179 slot = HvCallHpt_findValid(&hpte, vpn);
180 if (hpte.dw0.dw0.v) {
181 if (slot < 0) {
182 slot &= 0x7fffffffffffffff;
183 slot = -slot;
184 }
185 } else
186 slot = -1;
187 return slot;
188}
189
190/*
191 * Update the page protection bits. Intended to be used to create
192 * guard pages for kernel data structures on pages which are bolted
193 * in the HPT. Assumes pages being operated on will not be stolen.
194 * Does not work on large pages.
195 *
196 * No need to lock here because we should be the only user.
197 */
198static void iSeries_hpte_updateboltedpp(unsigned long newpp, unsigned long ea)
199{
200 unsigned long vsid,va,vpn;
201 long slot;
202
203 vsid = get_kernel_vsid(ea);
204 va = (vsid << 28) | (ea & 0x0fffffff);
205 vpn = va >> PAGE_SHIFT;
206 slot = iSeries_hpte_find(vpn);
207 if (slot == -1)
208 panic("updateboltedpp: Could not find page to bolt\n");
209 HvCallHpt_setPp(slot, newpp);
210}
211
212static void iSeries_hpte_invalidate(unsigned long slot, unsigned long va,
213 int large, int local)
214{
215 HPTE lhpte;
216 unsigned long avpn = va >> 23;
217 unsigned long flags;
218
219 local_irq_save(flags);
220
221 iSeries_hlock(slot);
222
223 lhpte.dw0.dword0 = iSeries_hpte_getword0(slot);
224
225 if ((lhpte.dw0.dw0.avpn == avpn) && lhpte.dw0.dw0.v)
226 HvCallHpt_invalidateSetSwBitsGet(slot, 0, 0);
227
228 iSeries_hunlock(slot);
229
230 local_irq_restore(flags);
231}
232
233void hpte_init_iSeries(void)
234{
235 ppc_md.hpte_invalidate = iSeries_hpte_invalidate;
236 ppc_md.hpte_updatepp = iSeries_hpte_updatepp;
237 ppc_md.hpte_updateboltedpp = iSeries_hpte_updateboltedpp;
238 ppc_md.hpte_insert = iSeries_hpte_insert;
239 ppc_md.hpte_remove = iSeries_hpte_remove;
240
241 htab_finish_init();
242}
diff --git a/arch/ppc64/kernel/iSeries_iommu.c b/arch/ppc64/kernel/iSeries_iommu.c
new file mode 100644
index 000000000000..4e1a47c8a802
--- /dev/null
+++ b/arch/ppc64/kernel/iSeries_iommu.c
@@ -0,0 +1,175 @@
1/*
2 * arch/ppc64/kernel/iSeries_iommu.c
3 *
4 * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation
5 *
6 * Rewrite, cleanup:
7 *
8 * Copyright (C) 2004 Olof Johansson <olof@austin.ibm.com>, IBM Corporation
9 *
10 * Dynamic DMA mapping support, iSeries-specific parts.
11 *
12 *
13 * This program is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation; either version 2 of the License, or
16 * (at your option) any later version.
17 *
18 * This program is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with this program; if not, write to the Free Software
25 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26 */
27
28#include <linux/types.h>
29#include <linux/dma-mapping.h>
30#include <linux/list.h>
31
32#include <asm/iommu.h>
33#include <asm/machdep.h>
34#include <asm/iSeries/HvCallXm.h>
35#include <asm/iSeries/iSeries_pci.h>
36
37extern struct list_head iSeries_Global_Device_List;
38
39
40static void tce_build_iSeries(struct iommu_table *tbl, long index, long npages,
41 unsigned long uaddr, enum dma_data_direction direction)
42{
43 u64 rc;
44 union tce_entry tce;
45
46 while (npages--) {
47 tce.te_word = 0;
48 tce.te_bits.tb_rpn = virt_to_abs(uaddr) >> PAGE_SHIFT;
49
50 if (tbl->it_type == TCE_VB) {
51 /* Virtual Bus */
52 tce.te_bits.tb_valid = 1;
53 tce.te_bits.tb_allio = 1;
54 if (direction != DMA_TO_DEVICE)
55 tce.te_bits.tb_rdwr = 1;
56 } else {
57 /* PCI Bus */
58 tce.te_bits.tb_rdwr = 1; /* Read allowed */
59 if (direction != DMA_TO_DEVICE)
60 tce.te_bits.tb_pciwr = 1;
61 }
62
63 rc = HvCallXm_setTce((u64)tbl->it_index, (u64)index,
64 tce.te_word);
65 if (rc)
66 panic("PCI_DMA: HvCallXm_setTce failed, Rc: 0x%lx\n",
67 rc);
68 index++;
69 uaddr += PAGE_SIZE;
70 }
71}
72
73static void tce_free_iSeries(struct iommu_table *tbl, long index, long npages)
74{
75 u64 rc;
76
77 while (npages--) {
78 rc = HvCallXm_setTce((u64)tbl->it_index, (u64)index, 0);
79 if (rc)
80 panic("PCI_DMA: HvCallXm_setTce failed, Rc: 0x%lx\n",
81 rc);
82 index++;
83 }
84}
85
86
87/*
88 * This function compares the known tables to find an iommu_table
89 * that has already been built for hardware TCEs.
90 */
91static struct iommu_table *iommu_table_find(struct iommu_table * tbl)
92{
93 struct iSeries_Device_Node *dp;
94
95 list_for_each_entry(dp, &iSeries_Global_Device_List, Device_List) {
96 if ((dp->iommu_table != NULL) &&
97 (dp->iommu_table->it_type == TCE_PCI) &&
98 (dp->iommu_table->it_offset == tbl->it_offset) &&
99 (dp->iommu_table->it_index == tbl->it_index) &&
100 (dp->iommu_table->it_size == tbl->it_size))
101 return dp->iommu_table;
102 }
103 return NULL;
104}
105
106/*
107 * Call Hv with the architected data structure to get TCE table info.
108 * info. Put the returned data into the Linux representation of the
109 * TCE table data.
110 * The Hardware Tce table comes in three flavors.
111 * 1. TCE table shared between Buses.
112 * 2. TCE table per Bus.
113 * 3. TCE Table per IOA.
114 */
115static void iommu_table_getparms(struct iSeries_Device_Node* dn,
116 struct iommu_table* tbl)
117{
118 struct iommu_table_cb *parms;
119
120 parms = kmalloc(sizeof(*parms), GFP_KERNEL);
121 if (parms == NULL)
122 panic("PCI_DMA: TCE Table Allocation failed.");
123
124 memset(parms, 0, sizeof(*parms));
125
126 parms->itc_busno = ISERIES_BUS(dn);
127 parms->itc_slotno = dn->LogicalSlot;
128 parms->itc_virtbus = 0;
129
130 HvCallXm_getTceTableParms(ISERIES_HV_ADDR(parms));
131
132 if (parms->itc_size == 0)
133 panic("PCI_DMA: parms->size is zero, parms is 0x%p", parms);
134
135 /* itc_size is in pages worth of table, it_size is in # of entries */
136 tbl->it_size = (parms->itc_size * PAGE_SIZE) / sizeof(union tce_entry);
137 tbl->it_busno = parms->itc_busno;
138 tbl->it_offset = parms->itc_offset;
139 tbl->it_index = parms->itc_index;
140 tbl->it_blocksize = 1;
141 tbl->it_type = TCE_PCI;
142
143 kfree(parms);
144}
145
146
147void iommu_devnode_init_iSeries(struct iSeries_Device_Node *dn)
148{
149 struct iommu_table *tbl;
150
151 tbl = kmalloc(sizeof(struct iommu_table), GFP_KERNEL);
152
153 iommu_table_getparms(dn, tbl);
154
155 /* Look for existing tce table */
156 dn->iommu_table = iommu_table_find(tbl);
157 if (dn->iommu_table == NULL)
158 dn->iommu_table = iommu_init_table(tbl);
159 else
160 kfree(tbl);
161}
162
163static void iommu_dev_setup_iSeries(struct pci_dev *dev) { }
164static void iommu_bus_setup_iSeries(struct pci_bus *bus) { }
165
166void iommu_init_early_iSeries(void)
167{
168 ppc_md.tce_build = tce_build_iSeries;
169 ppc_md.tce_free = tce_free_iSeries;
170
171 ppc_md.iommu_dev_setup = iommu_dev_setup_iSeries;
172 ppc_md.iommu_bus_setup = iommu_bus_setup_iSeries;
173
174 pci_iommu_init();
175}
diff --git a/arch/ppc64/kernel/iSeries_irq.c b/arch/ppc64/kernel/iSeries_irq.c
new file mode 100644
index 000000000000..f831d259dbb7
--- /dev/null
+++ b/arch/ppc64/kernel/iSeries_irq.c
@@ -0,0 +1,209 @@
1/************************************************************************/
2/* This module supports the iSeries PCI bus interrupt handling */
3/* Copyright (C) 20yy <Robert L Holtorf> <IBM Corp> */
4/* */
5/* This program is free software; you can redistribute it and/or modify */
6/* it under the terms of the GNU General Public License as published by */
7/* the Free Software Foundation; either version 2 of the License, or */
8/* (at your option) any later version. */
9/* */
10/* This program is distributed in the hope that it will be useful, */
11/* but WITHOUT ANY WARRANTY; without even the implied warranty of */
12/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
13/* GNU General Public License for more details. */
14/* */
15/* You should have received a copy of the GNU General Public License */
16/* along with this program; if not, write to the: */
17/* Free Software Foundation, Inc., */
18/* 59 Temple Place, Suite 330, */
19/* Boston, MA 02111-1307 USA */
20/************************************************************************/
21/* Change Activity: */
22/* Created, December 13, 2000 by Wayne Holm */
23/* End Change Activity */
24/************************************************************************/
25#include <linux/pci.h>
26#include <linux/init.h>
27#include <linux/threads.h>
28#include <linux/smp.h>
29#include <linux/param.h>
30#include <linux/string.h>
31#include <linux/bootmem.h>
32#include <linux/ide.h>
33
34#include <linux/irq.h>
35#include <linux/spinlock.h>
36#include <asm/ppcdebug.h>
37
38#include <asm/iSeries/HvCallPci.h>
39#include <asm/iSeries/HvCallXm.h>
40#include <asm/iSeries/iSeries_irq.h>
41#include <asm/iSeries/XmPciLpEvent.h>
42
43static unsigned int iSeries_startup_IRQ(unsigned int irq);
44static void iSeries_shutdown_IRQ(unsigned int irq);
45static void iSeries_enable_IRQ(unsigned int irq);
46static void iSeries_disable_IRQ(unsigned int irq);
47static void iSeries_end_IRQ(unsigned int irq);
48
49static hw_irq_controller iSeries_IRQ_handler = {
50 .typename = "iSeries irq controller",
51 .startup = iSeries_startup_IRQ,
52 .shutdown = iSeries_shutdown_IRQ,
53 .enable = iSeries_enable_IRQ,
54 .disable = iSeries_disable_IRQ,
55 .end = iSeries_end_IRQ
56};
57
58/* This maps virtual irq numbers to real irqs */
59unsigned int virt_irq_to_real_map[NR_IRQS];
60
61/* The next available virtual irq number */
62/* Note: the pcnet32 driver assumes irq numbers < 2 aren't valid. :( */
63static int next_virtual_irq = 2;
64
65/* This is called by init_IRQ. set in ppc_md.init_IRQ by iSeries_setup.c */
66void __init iSeries_init_IRQ(void)
67{
68 /* Register PCI event handler and open an event path */
69 XmPciLpEvent_init();
70}
71
72/*
73 * This is called out of iSeries_scan_slot to allocate an IRQ for an EADS slot
74 * It calculates the irq value for the slot.
75 * Note that subBusNumber is always 0 (at the moment at least).
76 */
77int __init iSeries_allocate_IRQ(HvBusNumber busNumber,
78 HvSubBusNumber subBusNumber, HvAgentId deviceId)
79{
80 unsigned int realirq, virtirq;
81 u8 idsel = (deviceId >> 4);
82 u8 function = deviceId & 7;
83
84 virtirq = next_virtual_irq++;
85 realirq = ((busNumber - 1) << 6) + ((idsel - 1) << 3) + function;
86 virt_irq_to_real_map[virtirq] = realirq;
87
88 irq_desc[virtirq].handler = &iSeries_IRQ_handler;
89 return virtirq;
90}
91
92#define REAL_IRQ_TO_BUS(irq) ((((irq) >> 6) & 0xff) + 1)
93#define REAL_IRQ_TO_IDSEL(irq) ((((irq) >> 3) & 7) + 1)
94#define REAL_IRQ_TO_FUNC(irq) ((irq) & 7)
95
96/* This is called by iSeries_activate_IRQs */
97static unsigned int iSeries_startup_IRQ(unsigned int irq)
98{
99 u32 bus, deviceId, function, mask;
100 const u32 subBus = 0;
101 unsigned int rirq = virt_irq_to_real_map[irq];
102
103 bus = REAL_IRQ_TO_BUS(rirq);
104 function = REAL_IRQ_TO_FUNC(rirq);
105 deviceId = (REAL_IRQ_TO_IDSEL(rirq) << 4) + function;
106
107 /* Link the IRQ number to the bridge */
108 HvCallXm_connectBusUnit(bus, subBus, deviceId, irq);
109
110 /* Unmask bridge interrupts in the FISR */
111 mask = 0x01010000 << function;
112 HvCallPci_unmaskFisr(bus, subBus, deviceId, mask);
113 iSeries_enable_IRQ(irq);
114 return 0;
115}
116
117/*
118 * This is called out of iSeries_fixup to activate interrupt
119 * generation for usable slots
120 */
121void __init iSeries_activate_IRQs()
122{
123 int irq;
124 unsigned long flags;
125
126 for_each_irq (irq) {
127 irq_desc_t *desc = get_irq_desc(irq);
128
129 if (desc && desc->handler && desc->handler->startup) {
130 spin_lock_irqsave(&desc->lock, flags);
131 desc->handler->startup(irq);
132 spin_unlock_irqrestore(&desc->lock, flags);
133 }
134 }
135}
136
137/* this is not called anywhere currently */
138static void iSeries_shutdown_IRQ(unsigned int irq)
139{
140 u32 bus, deviceId, function, mask;
141 const u32 subBus = 0;
142 unsigned int rirq = virt_irq_to_real_map[irq];
143
144 /* irq should be locked by the caller */
145 bus = REAL_IRQ_TO_BUS(rirq);
146 function = REAL_IRQ_TO_FUNC(rirq);
147 deviceId = (REAL_IRQ_TO_IDSEL(rirq) << 4) + function;
148
149 /* Invalidate the IRQ number in the bridge */
150 HvCallXm_connectBusUnit(bus, subBus, deviceId, 0);
151
152 /* Mask bridge interrupts in the FISR */
153 mask = 0x01010000 << function;
154 HvCallPci_maskFisr(bus, subBus, deviceId, mask);
155}
156
157/*
158 * This will be called by device drivers (via disable_IRQ)
159 * to disable INTA in the bridge interrupt status register.
160 */
161static void iSeries_disable_IRQ(unsigned int irq)
162{
163 u32 bus, deviceId, function, mask;
164 const u32 subBus = 0;
165 unsigned int rirq = virt_irq_to_real_map[irq];
166
167 /* The IRQ has already been locked by the caller */
168 bus = REAL_IRQ_TO_BUS(rirq);
169 function = REAL_IRQ_TO_FUNC(rirq);
170 deviceId = (REAL_IRQ_TO_IDSEL(rirq) << 4) + function;
171
172 /* Mask secondary INTA */
173 mask = 0x80000000;
174 HvCallPci_maskInterrupts(bus, subBus, deviceId, mask);
175 PPCDBG(PPCDBG_BUSWALK, "iSeries_disable_IRQ 0x%02X.%02X.%02X 0x%04X\n",
176 bus, subBus, deviceId, irq);
177}
178
179/*
180 * This will be called by device drivers (via enable_IRQ)
181 * to enable INTA in the bridge interrupt status register.
182 */
183static void iSeries_enable_IRQ(unsigned int irq)
184{
185 u32 bus, deviceId, function, mask;
186 const u32 subBus = 0;
187 unsigned int rirq = virt_irq_to_real_map[irq];
188
189 /* The IRQ has already been locked by the caller */
190 bus = REAL_IRQ_TO_BUS(rirq);
191 function = REAL_IRQ_TO_FUNC(rirq);
192 deviceId = (REAL_IRQ_TO_IDSEL(rirq) << 4) + function;
193
194 /* Unmask secondary INTA */
195 mask = 0x80000000;
196 HvCallPci_unmaskInterrupts(bus, subBus, deviceId, mask);
197 PPCDBG(PPCDBG_BUSWALK, "iSeries_enable_IRQ 0x%02X.%02X.%02X 0x%04X\n",
198 bus, subBus, deviceId, irq);
199}
200
201/*
202 * Need to define this so ppc_irq_dispatch_handler will NOT call
203 * enable_IRQ at the end of interrupt handling. However, this does
204 * nothing because there is not enough information provided to do
205 * the EOI HvCall. This is done by XmPciLpEvent.c
206 */
207static void iSeries_end_IRQ(unsigned int irq)
208{
209}
diff --git a/arch/ppc64/kernel/iSeries_pci.c b/arch/ppc64/kernel/iSeries_pci.c
new file mode 100644
index 000000000000..bd4c2554f1a0
--- /dev/null
+++ b/arch/ppc64/kernel/iSeries_pci.c
@@ -0,0 +1,912 @@
1/*
2 * iSeries_pci.c
3 *
4 * Copyright (C) 2001 Allan Trautman, IBM Corporation
5 *
6 * iSeries specific routines for PCI.
7 *
8 * Based on code from pci.c and iSeries_pci.c 32bit
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 */
24#include <linux/kernel.h>
25#include <linux/list.h>
26#include <linux/string.h>
27#include <linux/init.h>
28#include <linux/module.h>
29#include <linux/ide.h>
30#include <linux/pci.h>
31
32#include <asm/io.h>
33#include <asm/irq.h>
34#include <asm/prom.h>
35#include <asm/machdep.h>
36#include <asm/pci-bridge.h>
37#include <asm/ppcdebug.h>
38#include <asm/iommu.h>
39
40#include <asm/iSeries/HvCallPci.h>
41#include <asm/iSeries/HvCallSm.h>
42#include <asm/iSeries/HvCallXm.h>
43#include <asm/iSeries/LparData.h>
44#include <asm/iSeries/iSeries_irq.h>
45#include <asm/iSeries/iSeries_pci.h>
46#include <asm/iSeries/mf.h>
47
48#include "pci.h"
49
50extern unsigned long io_page_mask;
51
52/*
53 * Forward declares of prototypes.
54 */
55static struct iSeries_Device_Node *find_Device_Node(int bus, int devfn);
56static void scan_PHB_slots(struct pci_controller *Phb);
57static void scan_EADS_bridge(HvBusNumber Bus, HvSubBusNumber SubBus, int IdSel);
58static int scan_bridge_slot(HvBusNumber Bus, struct HvCallPci_BridgeInfo *Info);
59
60LIST_HEAD(iSeries_Global_Device_List);
61
62static int DeviceCount;
63
64/* Counters and control flags. */
65static long Pci_Io_Read_Count;
66static long Pci_Io_Write_Count;
67#if 0
68static long Pci_Cfg_Read_Count;
69static long Pci_Cfg_Write_Count;
70#endif
71static long Pci_Error_Count;
72
73static int Pci_Retry_Max = 3; /* Only retry 3 times */
74static int Pci_Error_Flag = 1; /* Set Retry Error on. */
75
76static struct pci_ops iSeries_pci_ops;
77
78/*
79 * Table defines
80 * Each Entry size is 4 MB * 1024 Entries = 4GB I/O address space.
81 */
82#define IOMM_TABLE_MAX_ENTRIES 1024
83#define IOMM_TABLE_ENTRY_SIZE 0x0000000000400000UL
84#define BASE_IO_MEMORY 0xE000000000000000UL
85
86static unsigned long max_io_memory = 0xE000000000000000UL;
87static long current_iomm_table_entry;
88
89/*
90 * Lookup Tables.
91 */
92static struct iSeries_Device_Node **iomm_table;
93static u8 *iobar_table;
94
95/*
96 * Static and Global variables
97 */
98static char *pci_io_text = "iSeries PCI I/O";
99static DEFINE_SPINLOCK(iomm_table_lock);
100
101/*
102 * iomm_table_initialize
103 *
104 * Allocates and initalizes the Address Translation Table and Bar
105 * Tables to get them ready for use. Must be called before any
106 * I/O space is handed out to the device BARs.
107 */
108static void iomm_table_initialize(void)
109{
110 spin_lock(&iomm_table_lock);
111 iomm_table = kmalloc(sizeof(*iomm_table) * IOMM_TABLE_MAX_ENTRIES,
112 GFP_KERNEL);
113 iobar_table = kmalloc(sizeof(*iobar_table) * IOMM_TABLE_MAX_ENTRIES,
114 GFP_KERNEL);
115 spin_unlock(&iomm_table_lock);
116 if ((iomm_table == NULL) || (iobar_table == NULL))
117 panic("PCI: I/O tables allocation failed.\n");
118}
119
120/*
121 * iomm_table_allocate_entry
122 *
123 * Adds pci_dev entry in address translation table
124 *
125 * - Allocates the number of entries required in table base on BAR
126 * size.
127 * - Allocates starting at BASE_IO_MEMORY and increases.
128 * - The size is round up to be a multiple of entry size.
129 * - CurrentIndex is incremented to keep track of the last entry.
130 * - Builds the resource entry for allocated BARs.
131 */
132static void iomm_table_allocate_entry(struct pci_dev *dev, int bar_num)
133{
134 struct resource *bar_res = &dev->resource[bar_num];
135 long bar_size = pci_resource_len(dev, bar_num);
136
137 /*
138 * No space to allocate, quick exit, skip Allocation.
139 */
140 if (bar_size == 0)
141 return;
142 /*
143 * Set Resource values.
144 */
145 spin_lock(&iomm_table_lock);
146 bar_res->name = pci_io_text;
147 bar_res->start =
148 IOMM_TABLE_ENTRY_SIZE * current_iomm_table_entry;
149 bar_res->start += BASE_IO_MEMORY;
150 bar_res->end = bar_res->start + bar_size - 1;
151 /*
152 * Allocate the number of table entries needed for BAR.
153 */
154 while (bar_size > 0 ) {
155 iomm_table[current_iomm_table_entry] = dev->sysdata;
156 iobar_table[current_iomm_table_entry] = bar_num;
157 bar_size -= IOMM_TABLE_ENTRY_SIZE;
158 ++current_iomm_table_entry;
159 }
160 max_io_memory = BASE_IO_MEMORY +
161 (IOMM_TABLE_ENTRY_SIZE * current_iomm_table_entry);
162 spin_unlock(&iomm_table_lock);
163}
164
165/*
166 * allocate_device_bars
167 *
168 * - Allocates ALL pci_dev BAR's and updates the resources with the
169 * BAR value. BARS with zero length will have the resources
170 * The HvCallPci_getBarParms is used to get the size of the BAR
171 * space. It calls iomm_table_allocate_entry to allocate
172 * each entry.
173 * - Loops through The Bar resources(0 - 5) including the ROM
174 * is resource(6).
175 */
176static void allocate_device_bars(struct pci_dev *dev)
177{
178 struct resource *bar_res;
179 int bar_num;
180
181 for (bar_num = 0; bar_num <= PCI_ROM_RESOURCE; ++bar_num) {
182 bar_res = &dev->resource[bar_num];
183 iomm_table_allocate_entry(dev, bar_num);
184 }
185}
186
187/*
188 * Log error information to system console.
189 * Filter out the device not there errors.
190 * PCI: EADs Connect Failed 0x18.58.10 Rc: 0x00xx
191 * PCI: Read Vendor Failed 0x18.58.10 Rc: 0x00xx
192 * PCI: Connect Bus Unit Failed 0x18.58.10 Rc: 0x00xx
193 */
194static void pci_Log_Error(char *Error_Text, int Bus, int SubBus,
195 int AgentId, int HvRc)
196{
197 if (HvRc == 0x0302)
198 return;
199 printk(KERN_ERR "PCI: %s Failed: 0x%02X.%02X.%02X Rc: 0x%04X",
200 Error_Text, Bus, SubBus, AgentId, HvRc);
201}
202
203/*
204 * build_device_node(u16 Bus, int SubBus, u8 DevFn)
205 */
206static struct iSeries_Device_Node *build_device_node(HvBusNumber Bus,
207 HvSubBusNumber SubBus, int AgentId, int Function)
208{
209 struct iSeries_Device_Node *node;
210
211 PPCDBG(PPCDBG_BUSWALK,
212 "-build_device_node 0x%02X.%02X.%02X Function: %02X\n",
213 Bus, SubBus, AgentId, Function);
214
215 node = kmalloc(sizeof(struct iSeries_Device_Node), GFP_KERNEL);
216 if (node == NULL)
217 return NULL;
218
219 memset(node, 0, sizeof(struct iSeries_Device_Node));
220 list_add_tail(&node->Device_List, &iSeries_Global_Device_List);
221#if 0
222 node->DsaAddr = ((u64)Bus << 48) + ((u64)SubBus << 40) + ((u64)0x10 << 32);
223#endif
224 node->DsaAddr.DsaAddr = 0;
225 node->DsaAddr.Dsa.busNumber = Bus;
226 node->DsaAddr.Dsa.subBusNumber = SubBus;
227 node->DsaAddr.Dsa.deviceId = 0x10;
228 node->AgentId = AgentId;
229 node->DevFn = PCI_DEVFN(ISERIES_ENCODE_DEVICE(AgentId), Function);
230 node->IoRetry = 0;
231 iSeries_Get_Location_Code(node);
232 return node;
233}
234
235/*
236 * unsigned long __init find_and_init_phbs(void)
237 *
238 * Description:
239 * This function checks for all possible system PCI host bridges that connect
240 * PCI buses. The system hypervisor is queried as to the guest partition
241 * ownership status. A pci_controller is built for any bus which is partially
242 * owned or fully owned by this guest partition.
243 */
244unsigned long __init find_and_init_phbs(void)
245{
246 struct pci_controller *phb;
247 HvBusNumber bus;
248
249 PPCDBG(PPCDBG_BUSWALK, "find_and_init_phbs Entry\n");
250
251 /* Check all possible buses. */
252 for (bus = 0; bus < 256; bus++) {
253 int ret = HvCallXm_testBus(bus);
254 if (ret == 0) {
255 printk("bus %d appears to exist\n", bus);
256
257 phb = (struct pci_controller *)kmalloc(sizeof(struct pci_controller), GFP_KERNEL);
258 if (phb == NULL)
259 return -ENOMEM;
260 pci_setup_pci_controller(phb);
261
262 phb->pci_mem_offset = phb->local_number = bus;
263 phb->first_busno = bus;
264 phb->last_busno = bus;
265 phb->ops = &iSeries_pci_ops;
266
267 PPCDBG(PPCDBG_BUSWALK, "PCI:Create iSeries pci_controller(%p), Bus: %04X\n",
268 phb, bus);
269
270 /* Find and connect the devices. */
271 scan_PHB_slots(phb);
272 }
273 /*
274 * Check for Unexpected Return code, a clue that something
275 * has gone wrong.
276 */
277 else if (ret != 0x0301)
278 printk(KERN_ERR "Unexpected Return on Probe(0x%04X): 0x%04X",
279 bus, ret);
280 }
281 return 0;
282}
283
284/*
285 * iSeries_pcibios_init
286 *
287 * Chance to initialize and structures or variable before PCI Bus walk.
288 */
289void iSeries_pcibios_init(void)
290{
291 PPCDBG(PPCDBG_BUSWALK, "iSeries_pcibios_init Entry.\n");
292 iomm_table_initialize();
293 find_and_init_phbs();
294 io_page_mask = -1;
295 PPCDBG(PPCDBG_BUSWALK, "iSeries_pcibios_init Exit.\n");
296}
297
298/*
299 * iSeries_pci_final_fixup(void)
300 */
301void __init iSeries_pci_final_fixup(void)
302{
303 struct pci_dev *pdev = NULL;
304 struct iSeries_Device_Node *node;
305 char Buffer[256];
306 int DeviceCount = 0;
307
308 PPCDBG(PPCDBG_BUSWALK, "iSeries_pcibios_fixup Entry.\n");
309
310 /* Fix up at the device node and pci_dev relationship */
311 mf_display_src(0xC9000100);
312
313 printk("pcibios_final_fixup\n");
314 for_each_pci_dev(pdev) {
315 node = find_Device_Node(pdev->bus->number, pdev->devfn);
316 printk("pci dev %p (%x.%x), node %p\n", pdev,
317 pdev->bus->number, pdev->devfn, node);
318
319 if (node != NULL) {
320 ++DeviceCount;
321 pdev->sysdata = (void *)node;
322 node->PciDev = pdev;
323 PPCDBG(PPCDBG_BUSWALK,
324 "pdev 0x%p <==> DevNode 0x%p\n",
325 pdev, node);
326 allocate_device_bars(pdev);
327 iSeries_Device_Information(pdev, Buffer,
328 sizeof(Buffer));
329 printk("%d. %s\n", DeviceCount, Buffer);
330 iommu_devnode_init_iSeries(node);
331 } else
332 printk("PCI: Device Tree not found for 0x%016lX\n",
333 (unsigned long)pdev);
334 pdev->irq = node->Irq;
335 }
336 iSeries_activate_IRQs();
337 mf_display_src(0xC9000200);
338}
339
340void pcibios_fixup_bus(struct pci_bus *PciBus)
341{
342 PPCDBG(PPCDBG_BUSWALK, "iSeries_pcibios_fixup_bus(0x%04X) Entry.\n",
343 PciBus->number);
344}
345
346void pcibios_fixup_resources(struct pci_dev *pdev)
347{
348 PPCDBG(PPCDBG_BUSWALK, "fixup_resources pdev %p\n", pdev);
349}
350
351/*
352 * Loop through each node function to find usable EADs bridges.
353 */
354static void scan_PHB_slots(struct pci_controller *Phb)
355{
356 struct HvCallPci_DeviceInfo *DevInfo;
357 HvBusNumber bus = Phb->local_number; /* System Bus */
358 const HvSubBusNumber SubBus = 0; /* EADs is always 0. */
359 int HvRc = 0;
360 int IdSel;
361 const int MaxAgents = 8;
362
363 DevInfo = (struct HvCallPci_DeviceInfo*)
364 kmalloc(sizeof(struct HvCallPci_DeviceInfo), GFP_KERNEL);
365 if (DevInfo == NULL)
366 return;
367
368 /*
369 * Probe for EADs Bridges
370 */
371 for (IdSel = 1; IdSel < MaxAgents; ++IdSel) {
372 HvRc = HvCallPci_getDeviceInfo(bus, SubBus, IdSel,
373 ISERIES_HV_ADDR(DevInfo),
374 sizeof(struct HvCallPci_DeviceInfo));
375 if (HvRc == 0) {
376 if (DevInfo->deviceType == HvCallPci_NodeDevice)
377 scan_EADS_bridge(bus, SubBus, IdSel);
378 else
379 printk("PCI: Invalid System Configuration(0x%02X)"
380 " for bus 0x%02x id 0x%02x.\n",
381 DevInfo->deviceType, bus, IdSel);
382 }
383 else
384 pci_Log_Error("getDeviceInfo", bus, SubBus, IdSel, HvRc);
385 }
386 kfree(DevInfo);
387}
388
389static void scan_EADS_bridge(HvBusNumber bus, HvSubBusNumber SubBus,
390 int IdSel)
391{
392 struct HvCallPci_BridgeInfo *BridgeInfo;
393 HvAgentId AgentId;
394 int Function;
395 int HvRc;
396
397 BridgeInfo = (struct HvCallPci_BridgeInfo *)
398 kmalloc(sizeof(struct HvCallPci_BridgeInfo), GFP_KERNEL);
399 if (BridgeInfo == NULL)
400 return;
401
402 /* Note: hvSubBus and irq is always be 0 at this level! */
403 for (Function = 0; Function < 8; ++Function) {
404 AgentId = ISERIES_PCI_AGENTID(IdSel, Function);
405 HvRc = HvCallXm_connectBusUnit(bus, SubBus, AgentId, 0);
406 if (HvRc == 0) {
407 printk("found device at bus %d idsel %d func %d (AgentId %x)\n",
408 bus, IdSel, Function, AgentId);
409 /* Connect EADs: 0x18.00.12 = 0x00 */
410 PPCDBG(PPCDBG_BUSWALK,
411 "PCI:Connect EADs: 0x%02X.%02X.%02X\n",
412 bus, SubBus, AgentId);
413 HvRc = HvCallPci_getBusUnitInfo(bus, SubBus, AgentId,
414 ISERIES_HV_ADDR(BridgeInfo),
415 sizeof(struct HvCallPci_BridgeInfo));
416 if (HvRc == 0) {
417 printk("bridge info: type %x subbus %x maxAgents %x maxsubbus %x logslot %x\n",
418 BridgeInfo->busUnitInfo.deviceType,
419 BridgeInfo->subBusNumber,
420 BridgeInfo->maxAgents,
421 BridgeInfo->maxSubBusNumber,
422 BridgeInfo->logicalSlotNumber);
423 PPCDBG(PPCDBG_BUSWALK,
424 "PCI: BridgeInfo, Type:0x%02X, SubBus:0x%02X, MaxAgents:0x%02X, MaxSubBus: 0x%02X, LSlot: 0x%02X\n",
425 BridgeInfo->busUnitInfo.deviceType,
426 BridgeInfo->subBusNumber,
427 BridgeInfo->maxAgents,
428 BridgeInfo->maxSubBusNumber,
429 BridgeInfo->logicalSlotNumber);
430
431 if (BridgeInfo->busUnitInfo.deviceType ==
432 HvCallPci_BridgeDevice) {
433 /* Scan_Bridge_Slot...: 0x18.00.12 */
434 scan_bridge_slot(bus, BridgeInfo);
435 } else
436 printk("PCI: Invalid Bridge Configuration(0x%02X)",
437 BridgeInfo->busUnitInfo.deviceType);
438 }
439 } else if (HvRc != 0x000B)
440 pci_Log_Error("EADs Connect",
441 bus, SubBus, AgentId, HvRc);
442 }
443 kfree(BridgeInfo);
444}
445
446/*
447 * This assumes that the node slot is always on the primary bus!
448 */
449static int scan_bridge_slot(HvBusNumber Bus,
450 struct HvCallPci_BridgeInfo *BridgeInfo)
451{
452 struct iSeries_Device_Node *node;
453 HvSubBusNumber SubBus = BridgeInfo->subBusNumber;
454 u16 VendorId = 0;
455 int HvRc = 0;
456 u8 Irq = 0;
457 int IdSel = ISERIES_GET_DEVICE_FROM_SUBBUS(SubBus);
458 int Function = ISERIES_GET_FUNCTION_FROM_SUBBUS(SubBus);
459 HvAgentId EADsIdSel = ISERIES_PCI_AGENTID(IdSel, Function);
460
461 /* iSeries_allocate_IRQ.: 0x18.00.12(0xA3) */
462 Irq = iSeries_allocate_IRQ(Bus, 0, EADsIdSel);
463 PPCDBG(PPCDBG_BUSWALK,
464 "PCI:- allocate and assign IRQ 0x%02X.%02X.%02X = 0x%02X\n",
465 Bus, 0, EADsIdSel, Irq);
466
467 /*
468 * Connect all functions of any device found.
469 */
470 for (IdSel = 1; IdSel <= BridgeInfo->maxAgents; ++IdSel) {
471 for (Function = 0; Function < 8; ++Function) {
472 HvAgentId AgentId = ISERIES_PCI_AGENTID(IdSel, Function);
473 HvRc = HvCallXm_connectBusUnit(Bus, SubBus,
474 AgentId, Irq);
475 if (HvRc != 0) {
476 pci_Log_Error("Connect Bus Unit",
477 Bus, SubBus, AgentId, HvRc);
478 continue;
479 }
480
481 HvRc = HvCallPci_configLoad16(Bus, SubBus, AgentId,
482 PCI_VENDOR_ID, &VendorId);
483 if (HvRc != 0) {
484 pci_Log_Error("Read Vendor",
485 Bus, SubBus, AgentId, HvRc);
486 continue;
487 }
488 printk("read vendor ID: %x\n", VendorId);
489
490 /* FoundDevice: 0x18.28.10 = 0x12AE */
491 PPCDBG(PPCDBG_BUSWALK,
492 "PCI:- FoundDevice: 0x%02X.%02X.%02X = 0x%04X, irq %d\n",
493 Bus, SubBus, AgentId, VendorId, Irq);
494 HvRc = HvCallPci_configStore8(Bus, SubBus, AgentId,
495 PCI_INTERRUPT_LINE, Irq);
496 if (HvRc != 0)
497 pci_Log_Error("PciCfgStore Irq Failed!",
498 Bus, SubBus, AgentId, HvRc);
499
500 ++DeviceCount;
501 node = build_device_node(Bus, SubBus, EADsIdSel, Function);
502 node->Vendor = VendorId;
503 node->Irq = Irq;
504 node->LogicalSlot = BridgeInfo->logicalSlotNumber;
505
506 } /* for (Function = 0; Function < 8; ++Function) */
507 } /* for (IdSel = 1; IdSel <= MaxAgents; ++IdSel) */
508 return HvRc;
509}
510
511/*
512 * I/0 Memory copy MUST use mmio commands on iSeries
513 * To do; For performance, include the hv call directly
514 */
515void iSeries_memset_io(volatile void __iomem *dest, char c, size_t Count)
516{
517 u8 ByteValue = c;
518 long NumberOfBytes = Count;
519
520 while (NumberOfBytes > 0) {
521 iSeries_Write_Byte(ByteValue, dest++);
522 -- NumberOfBytes;
523 }
524}
525EXPORT_SYMBOL(iSeries_memset_io);
526
527void iSeries_memcpy_toio(volatile void __iomem *dest, void *source, size_t count)
528{
529 char *src = source;
530 long NumberOfBytes = count;
531
532 while (NumberOfBytes > 0) {
533 iSeries_Write_Byte(*src++, dest++);
534 -- NumberOfBytes;
535 }
536}
537EXPORT_SYMBOL(iSeries_memcpy_toio);
538
539void iSeries_memcpy_fromio(void *dest, const volatile void __iomem *src, size_t count)
540{
541 char *dst = dest;
542 long NumberOfBytes = count;
543
544 while (NumberOfBytes > 0) {
545 *dst++ = iSeries_Read_Byte(src++);
546 -- NumberOfBytes;
547 }
548}
549EXPORT_SYMBOL(iSeries_memcpy_fromio);
550
551/*
552 * Look down the chain to find the matching Device Device
553 */
554static struct iSeries_Device_Node *find_Device_Node(int bus, int devfn)
555{
556 struct list_head *pos;
557
558 list_for_each(pos, &iSeries_Global_Device_List) {
559 struct iSeries_Device_Node *node =
560 list_entry(pos, struct iSeries_Device_Node, Device_List);
561
562 if ((bus == ISERIES_BUS(node)) && (devfn == node->DevFn))
563 return node;
564 }
565 return NULL;
566}
567
568#if 0
569/*
570 * Returns the device node for the passed pci_dev
571 * Sanity Check Node PciDev to passed pci_dev
572 * If none is found, returns a NULL which the client must handle.
573 */
574static struct iSeries_Device_Node *get_Device_Node(struct pci_dev *pdev)
575{
576 struct iSeries_Device_Node *node;
577
578 node = pdev->sysdata;
579 if (node == NULL || node->PciDev != pdev)
580 node = find_Device_Node(pdev->bus->number, pdev->devfn);
581 return node;
582}
583#endif
584
585/*
586 * Config space read and write functions.
587 * For now at least, we look for the device node for the bus and devfn
588 * that we are asked to access. It may be possible to translate the devfn
589 * to a subbus and deviceid more directly.
590 */
591static u64 hv_cfg_read_func[4] = {
592 HvCallPciConfigLoad8, HvCallPciConfigLoad16,
593 HvCallPciConfigLoad32, HvCallPciConfigLoad32
594};
595
596static u64 hv_cfg_write_func[4] = {
597 HvCallPciConfigStore8, HvCallPciConfigStore16,
598 HvCallPciConfigStore32, HvCallPciConfigStore32
599};
600
601/*
602 * Read PCI config space
603 */
604static int iSeries_pci_read_config(struct pci_bus *bus, unsigned int devfn,
605 int offset, int size, u32 *val)
606{
607 struct iSeries_Device_Node *node = find_Device_Node(bus->number, devfn);
608 u64 fn;
609 struct HvCallPci_LoadReturn ret;
610
611 if (node == NULL)
612 return PCIBIOS_DEVICE_NOT_FOUND;
613 if (offset > 255) {
614 *val = ~0;
615 return PCIBIOS_BAD_REGISTER_NUMBER;
616 }
617
618 fn = hv_cfg_read_func[(size - 1) & 3];
619 HvCall3Ret16(fn, &ret, node->DsaAddr.DsaAddr, offset, 0);
620
621 if (ret.rc != 0) {
622 *val = ~0;
623 return PCIBIOS_DEVICE_NOT_FOUND; /* or something */
624 }
625
626 *val = ret.value;
627 return 0;
628}
629
630/*
631 * Write PCI config space
632 */
633
634static int iSeries_pci_write_config(struct pci_bus *bus, unsigned int devfn,
635 int offset, int size, u32 val)
636{
637 struct iSeries_Device_Node *node = find_Device_Node(bus->number, devfn);
638 u64 fn;
639 u64 ret;
640
641 if (node == NULL)
642 return PCIBIOS_DEVICE_NOT_FOUND;
643 if (offset > 255)
644 return PCIBIOS_BAD_REGISTER_NUMBER;
645
646 fn = hv_cfg_write_func[(size - 1) & 3];
647 ret = HvCall4(fn, node->DsaAddr.DsaAddr, offset, val, 0);
648
649 if (ret != 0)
650 return PCIBIOS_DEVICE_NOT_FOUND;
651
652 return 0;
653}
654
655static struct pci_ops iSeries_pci_ops = {
656 .read = iSeries_pci_read_config,
657 .write = iSeries_pci_write_config
658};
659
660/*
661 * Check Return Code
662 * -> On Failure, print and log information.
663 * Increment Retry Count, if exceeds max, panic partition.
664 * -> If in retry, print and log success
665 *
666 * PCI: Device 23.90 ReadL I/O Error( 0): 0x1234
667 * PCI: Device 23.90 ReadL Retry( 1)
668 * PCI: Device 23.90 ReadL Retry Successful(1)
669 */
670static int CheckReturnCode(char *TextHdr, struct iSeries_Device_Node *DevNode,
671 u64 ret)
672{
673 if (ret != 0) {
674 ++Pci_Error_Count;
675 ++DevNode->IoRetry;
676 printk("PCI: %s: Device 0x%04X:%02X I/O Error(%2d): 0x%04X\n",
677 TextHdr, DevNode->DsaAddr.Dsa.busNumber, DevNode->DevFn,
678 DevNode->IoRetry, (int)ret);
679 /*
680 * Bump the retry and check for retry count exceeded.
681 * If, Exceeded, panic the system.
682 */
683 if ((DevNode->IoRetry > Pci_Retry_Max) &&
684 (Pci_Error_Flag > 0)) {
685 mf_display_src(0xB6000103);
686 panic_timeout = 0;
687 panic("PCI: Hardware I/O Error, SRC B6000103, "
688 "Automatic Reboot Disabled.\n");
689 }
690 return -1; /* Retry Try */
691 }
692 /* If retry was in progress, log success and rest retry count */
693 if (DevNode->IoRetry > 0)
694 DevNode->IoRetry = 0;
695 return 0;
696}
697
698/*
699 * Translate the I/O Address into a device node, bar, and bar offset.
700 * Note: Make sure the passed variable end up on the stack to avoid
701 * the exposure of being device global.
702 */
703static inline struct iSeries_Device_Node *xlate_iomm_address(
704 const volatile void __iomem *IoAddress,
705 u64 *dsaptr, u64 *BarOffsetPtr)
706{
707 unsigned long OrigIoAddr;
708 unsigned long BaseIoAddr;
709 unsigned long TableIndex;
710 struct iSeries_Device_Node *DevNode;
711
712 OrigIoAddr = (unsigned long __force)IoAddress;
713 if ((OrigIoAddr < BASE_IO_MEMORY) || (OrigIoAddr >= max_io_memory))
714 return NULL;
715 BaseIoAddr = OrigIoAddr - BASE_IO_MEMORY;
716 TableIndex = BaseIoAddr / IOMM_TABLE_ENTRY_SIZE;
717 DevNode = iomm_table[TableIndex];
718
719 if (DevNode != NULL) {
720 int barnum = iobar_table[TableIndex];
721 *dsaptr = DevNode->DsaAddr.DsaAddr | (barnum << 24);
722 *BarOffsetPtr = BaseIoAddr % IOMM_TABLE_ENTRY_SIZE;
723 } else
724 panic("PCI: Invalid PCI IoAddress detected!\n");
725 return DevNode;
726}
727
728/*
729 * Read MM I/O Instructions for the iSeries
730 * On MM I/O error, all ones are returned and iSeries_pci_IoError is cal
731 * else, data is returned in big Endian format.
732 *
733 * iSeries_Read_Byte = Read Byte ( 8 bit)
734 * iSeries_Read_Word = Read Word (16 bit)
735 * iSeries_Read_Long = Read Long (32 bit)
736 */
737u8 iSeries_Read_Byte(const volatile void __iomem *IoAddress)
738{
739 u64 BarOffset;
740 u64 dsa;
741 struct HvCallPci_LoadReturn ret;
742 struct iSeries_Device_Node *DevNode =
743 xlate_iomm_address(IoAddress, &dsa, &BarOffset);
744
745 if (DevNode == NULL) {
746 static unsigned long last_jiffies;
747 static int num_printed;
748
749 if ((jiffies - last_jiffies) > 60 * HZ) {
750 last_jiffies = jiffies;
751 num_printed = 0;
752 }
753 if (num_printed++ < 10)
754 printk(KERN_ERR "iSeries_Read_Byte: invalid access at IO address %p\n", IoAddress);
755 return 0xff;
756 }
757 do {
758 ++Pci_Io_Read_Count;
759 HvCall3Ret16(HvCallPciBarLoad8, &ret, dsa, BarOffset, 0);
760 } while (CheckReturnCode("RDB", DevNode, ret.rc) != 0);
761
762 return (u8)ret.value;
763}
764EXPORT_SYMBOL(iSeries_Read_Byte);
765
766u16 iSeries_Read_Word(const volatile void __iomem *IoAddress)
767{
768 u64 BarOffset;
769 u64 dsa;
770 struct HvCallPci_LoadReturn ret;
771 struct iSeries_Device_Node *DevNode =
772 xlate_iomm_address(IoAddress, &dsa, &BarOffset);
773
774 if (DevNode == NULL) {
775 static unsigned long last_jiffies;
776 static int num_printed;
777
778 if ((jiffies - last_jiffies) > 60 * HZ) {
779 last_jiffies = jiffies;
780 num_printed = 0;
781 }
782 if (num_printed++ < 10)
783 printk(KERN_ERR "iSeries_Read_Word: invalid access at IO address %p\n", IoAddress);
784 return 0xffff;
785 }
786 do {
787 ++Pci_Io_Read_Count;
788 HvCall3Ret16(HvCallPciBarLoad16, &ret, dsa,
789 BarOffset, 0);
790 } while (CheckReturnCode("RDW", DevNode, ret.rc) != 0);
791
792 return swab16((u16)ret.value);
793}
794EXPORT_SYMBOL(iSeries_Read_Word);
795
796u32 iSeries_Read_Long(const volatile void __iomem *IoAddress)
797{
798 u64 BarOffset;
799 u64 dsa;
800 struct HvCallPci_LoadReturn ret;
801 struct iSeries_Device_Node *DevNode =
802 xlate_iomm_address(IoAddress, &dsa, &BarOffset);
803
804 if (DevNode == NULL) {
805 static unsigned long last_jiffies;
806 static int num_printed;
807
808 if ((jiffies - last_jiffies) > 60 * HZ) {
809 last_jiffies = jiffies;
810 num_printed = 0;
811 }
812 if (num_printed++ < 10)
813 printk(KERN_ERR "iSeries_Read_Long: invalid access at IO address %p\n", IoAddress);
814 return 0xffffffff;
815 }
816 do {
817 ++Pci_Io_Read_Count;
818 HvCall3Ret16(HvCallPciBarLoad32, &ret, dsa,
819 BarOffset, 0);
820 } while (CheckReturnCode("RDL", DevNode, ret.rc) != 0);
821
822 return swab32((u32)ret.value);
823}
824EXPORT_SYMBOL(iSeries_Read_Long);
825
826/*
827 * Write MM I/O Instructions for the iSeries
828 *
829 * iSeries_Write_Byte = Write Byte (8 bit)
830 * iSeries_Write_Word = Write Word(16 bit)
831 * iSeries_Write_Long = Write Long(32 bit)
832 */
833void iSeries_Write_Byte(u8 data, volatile void __iomem *IoAddress)
834{
835 u64 BarOffset;
836 u64 dsa;
837 u64 rc;
838 struct iSeries_Device_Node *DevNode =
839 xlate_iomm_address(IoAddress, &dsa, &BarOffset);
840
841 if (DevNode == NULL) {
842 static unsigned long last_jiffies;
843 static int num_printed;
844
845 if ((jiffies - last_jiffies) > 60 * HZ) {
846 last_jiffies = jiffies;
847 num_printed = 0;
848 }
849 if (num_printed++ < 10)
850 printk(KERN_ERR "iSeries_Write_Byte: invalid access at IO address %p\n", IoAddress);
851 return;
852 }
853 do {
854 ++Pci_Io_Write_Count;
855 rc = HvCall4(HvCallPciBarStore8, dsa, BarOffset, data, 0);
856 } while (CheckReturnCode("WWB", DevNode, rc) != 0);
857}
858EXPORT_SYMBOL(iSeries_Write_Byte);
859
860void iSeries_Write_Word(u16 data, volatile void __iomem *IoAddress)
861{
862 u64 BarOffset;
863 u64 dsa;
864 u64 rc;
865 struct iSeries_Device_Node *DevNode =
866 xlate_iomm_address(IoAddress, &dsa, &BarOffset);
867
868 if (DevNode == NULL) {
869 static unsigned long last_jiffies;
870 static int num_printed;
871
872 if ((jiffies - last_jiffies) > 60 * HZ) {
873 last_jiffies = jiffies;
874 num_printed = 0;
875 }
876 if (num_printed++ < 10)
877 printk(KERN_ERR "iSeries_Write_Word: invalid access at IO address %p\n", IoAddress);
878 return;
879 }
880 do {
881 ++Pci_Io_Write_Count;
882 rc = HvCall4(HvCallPciBarStore16, dsa, BarOffset, swab16(data), 0);
883 } while (CheckReturnCode("WWW", DevNode, rc) != 0);
884}
885EXPORT_SYMBOL(iSeries_Write_Word);
886
887void iSeries_Write_Long(u32 data, volatile void __iomem *IoAddress)
888{
889 u64 BarOffset;
890 u64 dsa;
891 u64 rc;
892 struct iSeries_Device_Node *DevNode =
893 xlate_iomm_address(IoAddress, &dsa, &BarOffset);
894
895 if (DevNode == NULL) {
896 static unsigned long last_jiffies;
897 static int num_printed;
898
899 if ((jiffies - last_jiffies) > 60 * HZ) {
900 last_jiffies = jiffies;
901 num_printed = 0;
902 }
903 if (num_printed++ < 10)
904 printk(KERN_ERR "iSeries_Write_Long: invalid access at IO address %p\n", IoAddress);
905 return;
906 }
907 do {
908 ++Pci_Io_Write_Count;
909 rc = HvCall4(HvCallPciBarStore32, dsa, BarOffset, swab32(data), 0);
910 } while (CheckReturnCode("WWL", DevNode, rc) != 0);
911}
912EXPORT_SYMBOL(iSeries_Write_Long);
diff --git a/arch/ppc64/kernel/iSeries_pci_reset.c b/arch/ppc64/kernel/iSeries_pci_reset.c
new file mode 100644
index 000000000000..0f785e4584f7
--- /dev/null
+++ b/arch/ppc64/kernel/iSeries_pci_reset.c
@@ -0,0 +1,104 @@
1#define PCIFR(...)
2/************************************************************************/
3/* File iSeries_pci_reset.c created by Allan Trautman on Mar 21 2001. */
4/************************************************************************/
5/* This code supports the pci interface on the IBM iSeries systems. */
6/* Copyright (C) 20yy <Allan H Trautman> <IBM Corp> */
7/* */
8/* This program is free software; you can redistribute it and/or modify */
9/* it under the terms of the GNU General Public License as published by */
10/* the Free Software Foundation; either version 2 of the License, or */
11/* (at your option) any later version. */
12/* */
13/* This program is distributed in the hope that it will be useful, */
14/* but WITHOUT ANY WARRANTY; without even the implied warranty of */
15/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
16/* GNU General Public License for more details. */
17/* */
18/* You should have received a copy of the GNU General Public License */
19/* along with this program; if not, write to the: */
20/* Free Software Foundation, Inc., */
21/* 59 Temple Place, Suite 330, */
22/* Boston, MA 02111-1307 USA */
23/************************************************************************/
24/* Change Activity: */
25/* Created, March 20, 2001 */
26/* April 30, 2001, Added return codes on functions. */
27/* September 10, 2001, Ported to ppc64. */
28/* End Change Activity */
29/************************************************************************/
30#include <linux/kernel.h>
31#include <linux/init.h>
32#include <linux/module.h>
33#include <linux/pci.h>
34#include <linux/irq.h>
35#include <linux/delay.h>
36
37#include <asm/io.h>
38#include <asm/iSeries/HvCallPci.h>
39#include <asm/iSeries/HvTypes.h>
40#include <asm/iSeries/mf.h>
41#include <asm/pci.h>
42
43#include <asm/iSeries/iSeries_pci.h>
44#include "pci.h"
45
46/*
47 * Interface to toggle the reset line
48 * Time is in .1 seconds, need for seconds.
49 */
50int iSeries_Device_ToggleReset(struct pci_dev *PciDev, int AssertTime,
51 int DelayTime)
52{
53 unsigned int AssertDelay, WaitDelay;
54 struct iSeries_Device_Node *DeviceNode =
55 (struct iSeries_Device_Node *)PciDev->sysdata;
56
57 if (DeviceNode == NULL) {
58 printk("PCI: Pci Reset Failed, Device Node not found for pci_dev %p\n",
59 PciDev);
60 return -1;
61 }
62 /*
63 * Set defaults, Assert is .5 second, Wait is 3 seconds.
64 */
65 if (AssertTime == 0)
66 AssertDelay = 500;
67 else
68 AssertDelay = AssertTime * 100;
69
70 if (DelayTime == 0)
71 WaitDelay = 3000;
72 else
73 WaitDelay = DelayTime * 100;
74
75 /*
76 * Assert reset
77 */
78 DeviceNode->ReturnCode = HvCallPci_setSlotReset(ISERIES_BUS(DeviceNode),
79 0x00, DeviceNode->AgentId, 1);
80 if (DeviceNode->ReturnCode == 0) {
81 msleep(AssertDelay); /* Sleep for the time */
82 DeviceNode->ReturnCode =
83 HvCallPci_setSlotReset(ISERIES_BUS(DeviceNode),
84 0x00, DeviceNode->AgentId, 0);
85
86 /*
87 * Wait for device to reset
88 */
89 msleep(WaitDelay);
90 }
91 if (DeviceNode->ReturnCode == 0)
92 PCIFR("Slot 0x%04X.%02 Reset\n", ISERIES_BUS(DeviceNode),
93 DeviceNode->AgentId);
94 else {
95 printk("PCI: Slot 0x%04X.%02X Reset Failed, RCode: %04X\n",
96 ISERIES_BUS(DeviceNode), DeviceNode->AgentId,
97 DeviceNode->ReturnCode);
98 PCIFR("Slot 0x%04X.%02X Reset Failed, RCode: %04X\n",
99 ISERIES_BUS(DeviceNode), DeviceNode->AgentId,
100 DeviceNode->ReturnCode);
101 }
102 return DeviceNode->ReturnCode;
103}
104EXPORT_SYMBOL(iSeries_Device_ToggleReset);
diff --git a/arch/ppc64/kernel/iSeries_proc.c b/arch/ppc64/kernel/iSeries_proc.c
new file mode 100644
index 000000000000..0cc58ddf48de
--- /dev/null
+++ b/arch/ppc64/kernel/iSeries_proc.c
@@ -0,0 +1,162 @@
1/*
2 * iSeries_proc.c
3 * Copyright (C) 2001 Kyle A. Lucke IBM Corporation
4 * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen IBM Corporation
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 */
20#include <linux/init.h>
21#include <linux/proc_fs.h>
22#include <linux/seq_file.h>
23#include <linux/param.h> /* for HZ */
24#include <asm/paca.h>
25#include <asm/processor.h>
26#include <asm/time.h>
27#include <asm/lppaca.h>
28#include <asm/iSeries/ItLpQueue.h>
29#include <asm/iSeries/HvCallXm.h>
30#include <asm/iSeries/IoHriMainStore.h>
31#include <asm/iSeries/LparData.h>
32#include <asm/iSeries/iSeries_proc.h>
33
34static int __init iseries_proc_create(void)
35{
36 struct proc_dir_entry *e = proc_mkdir("iSeries", 0);
37 if (!e)
38 return 1;
39
40 return 0;
41}
42core_initcall(iseries_proc_create);
43
44static char *event_types[9] = {
45 "Hypervisor\t\t",
46 "Machine Facilities\t",
47 "Session Manager\t",
48 "SPD I/O\t\t",
49 "Virtual Bus\t\t",
50 "PCI I/O\t\t",
51 "RIO I/O\t\t",
52 "Virtual Lan\t\t",
53 "Virtual I/O\t\t"
54};
55
56static int proc_lpevents_show(struct seq_file *m, void *v)
57{
58 unsigned int i;
59
60 seq_printf(m, "LpEventQueue 0\n");
61 seq_printf(m, " events processed:\t%lu\n",
62 (unsigned long)xItLpQueue.xLpIntCount);
63
64 for (i = 0; i < 9; ++i)
65 seq_printf(m, " %s %10lu\n", event_types[i],
66 (unsigned long)xItLpQueue.xLpIntCountByType[i]);
67
68 seq_printf(m, "\n events processed by processor:\n");
69
70 for_each_online_cpu(i)
71 seq_printf(m, " CPU%02d %10u\n", i, paca[i].lpevent_count);
72
73 return 0;
74}
75
76static int proc_lpevents_open(struct inode *inode, struct file *file)
77{
78 return single_open(file, proc_lpevents_show, NULL);
79}
80
81static struct file_operations proc_lpevents_operations = {
82 .open = proc_lpevents_open,
83 .read = seq_read,
84 .llseek = seq_lseek,
85 .release = single_release,
86};
87
88static unsigned long startTitan = 0;
89static unsigned long startTb = 0;
90
91static int proc_titantod_show(struct seq_file *m, void *v)
92{
93 unsigned long tb0, titan_tod;
94
95 tb0 = get_tb();
96 titan_tod = HvCallXm_loadTod();
97
98 seq_printf(m, "Titan\n" );
99 seq_printf(m, " time base = %016lx\n", tb0);
100 seq_printf(m, " titan tod = %016lx\n", titan_tod);
101 seq_printf(m, " xProcFreq = %016x\n",
102 xIoHriProcessorVpd[0].xProcFreq);
103 seq_printf(m, " xTimeBaseFreq = %016x\n",
104 xIoHriProcessorVpd[0].xTimeBaseFreq);
105 seq_printf(m, " tb_ticks_per_jiffy = %lu\n", tb_ticks_per_jiffy);
106 seq_printf(m, " tb_ticks_per_usec = %lu\n", tb_ticks_per_usec);
107
108 if (!startTitan) {
109 startTitan = titan_tod;
110 startTb = tb0;
111 } else {
112 unsigned long titan_usec = (titan_tod - startTitan) >> 12;
113 unsigned long tb_ticks = (tb0 - startTb);
114 unsigned long titan_jiffies = titan_usec / (1000000/HZ);
115 unsigned long titan_jiff_usec = titan_jiffies * (1000000/HZ);
116 unsigned long titan_jiff_rem_usec = titan_usec - titan_jiff_usec;
117 unsigned long tb_jiffies = tb_ticks / tb_ticks_per_jiffy;
118 unsigned long tb_jiff_ticks = tb_jiffies * tb_ticks_per_jiffy;
119 unsigned long tb_jiff_rem_ticks = tb_ticks - tb_jiff_ticks;
120 unsigned long tb_jiff_rem_usec = tb_jiff_rem_ticks / tb_ticks_per_usec;
121 unsigned long new_tb_ticks_per_jiffy = (tb_ticks * (1000000/HZ))/titan_usec;
122
123 seq_printf(m, " titan elapsed = %lu uSec\n", titan_usec);
124 seq_printf(m, " tb elapsed = %lu ticks\n", tb_ticks);
125 seq_printf(m, " titan jiffies = %lu.%04lu \n", titan_jiffies,
126 titan_jiff_rem_usec);
127 seq_printf(m, " tb jiffies = %lu.%04lu\n", tb_jiffies,
128 tb_jiff_rem_usec);
129 seq_printf(m, " new tb_ticks_per_jiffy = %lu\n",
130 new_tb_ticks_per_jiffy);
131 }
132
133 return 0;
134}
135
136static int proc_titantod_open(struct inode *inode, struct file *file)
137{
138 return single_open(file, proc_titantod_show, NULL);
139}
140
141static struct file_operations proc_titantod_operations = {
142 .open = proc_titantod_open,
143 .read = seq_read,
144 .llseek = seq_lseek,
145 .release = single_release,
146};
147
148static int __init iseries_proc_init(void)
149{
150 struct proc_dir_entry *e;
151
152 e = create_proc_entry("iSeries/lpevents", S_IFREG|S_IRUGO, NULL);
153 if (e)
154 e->proc_fops = &proc_lpevents_operations;
155
156 e = create_proc_entry("iSeries/titanTod", S_IFREG|S_IRUGO, NULL);
157 if (e)
158 e->proc_fops = &proc_titantod_operations;
159
160 return 0;
161}
162__initcall(iseries_proc_init);
diff --git a/arch/ppc64/kernel/iSeries_setup.c b/arch/ppc64/kernel/iSeries_setup.c
new file mode 100644
index 000000000000..da20120f2261
--- /dev/null
+++ b/arch/ppc64/kernel/iSeries_setup.c
@@ -0,0 +1,877 @@
1/*
2 * Copyright (c) 2000 Mike Corrigan <mikejc@us.ibm.com>
3 * Copyright (c) 1999-2000 Grant Erickson <grant@lcse.umn.edu>
4 *
5 * Module name: iSeries_setup.c
6 *
7 * Description:
8 * Architecture- / platform-specific boot-time initialization code for
9 * the IBM iSeries LPAR. Adapted from original code by Grant Erickson and
10 * code by Gary Thomas, Cort Dougan <cort@fsmlabs.com>, and Dan Malek
11 * <dan@net4x.com>.
12 *
13 * This program is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU General Public License
15 * as published by the Free Software Foundation; either version
16 * 2 of the License, or (at your option) any later version.
17 */
18
19#undef DEBUG
20
21#include <linux/config.h>
22#include <linux/init.h>
23#include <linux/threads.h>
24#include <linux/smp.h>
25#include <linux/param.h>
26#include <linux/string.h>
27#include <linux/bootmem.h>
28#include <linux/initrd.h>
29#include <linux/seq_file.h>
30#include <linux/kdev_t.h>
31#include <linux/major.h>
32#include <linux/root_dev.h>
33
34#include <asm/processor.h>
35#include <asm/machdep.h>
36#include <asm/page.h>
37#include <asm/mmu.h>
38#include <asm/pgtable.h>
39#include <asm/mmu_context.h>
40#include <asm/cputable.h>
41#include <asm/sections.h>
42#include <asm/iommu.h>
43
44#include <asm/time.h>
45#include "iSeries_setup.h"
46#include <asm/naca.h>
47#include <asm/paca.h>
48#include <asm/cache.h>
49#include <asm/sections.h>
50#include <asm/iSeries/LparData.h>
51#include <asm/iSeries/HvCallHpt.h>
52#include <asm/iSeries/HvLpConfig.h>
53#include <asm/iSeries/HvCallEvent.h>
54#include <asm/iSeries/HvCallSm.h>
55#include <asm/iSeries/HvCallXm.h>
56#include <asm/iSeries/ItLpQueue.h>
57#include <asm/iSeries/IoHriMainStore.h>
58#include <asm/iSeries/iSeries_proc.h>
59#include <asm/iSeries/mf.h>
60#include <asm/iSeries/HvLpEvent.h>
61#include <asm/iSeries/iSeries_irq.h>
62
63extern void hvlog(char *fmt, ...);
64
65#ifdef DEBUG
66#define DBG(fmt...) hvlog(fmt)
67#else
68#define DBG(fmt...)
69#endif
70
71/* Function Prototypes */
72extern void ppcdbg_initialize(void);
73
74static void build_iSeries_Memory_Map(void);
75static void setup_iSeries_cache_sizes(void);
76static void iSeries_bolt_kernel(unsigned long saddr, unsigned long eaddr);
77extern void iSeries_pci_final_fixup(void);
78
79/* Global Variables */
80static unsigned long procFreqHz;
81static unsigned long procFreqMhz;
82static unsigned long procFreqMhzHundreths;
83
84static unsigned long tbFreqHz;
85static unsigned long tbFreqMhz;
86static unsigned long tbFreqMhzHundreths;
87
88int piranha_simulator;
89
90extern int rd_size; /* Defined in drivers/block/rd.c */
91extern unsigned long klimit;
92extern unsigned long embedded_sysmap_start;
93extern unsigned long embedded_sysmap_end;
94
95extern unsigned long iSeries_recal_tb;
96extern unsigned long iSeries_recal_titan;
97
98static int mf_initialized;
99
100struct MemoryBlock {
101 unsigned long absStart;
102 unsigned long absEnd;
103 unsigned long logicalStart;
104 unsigned long logicalEnd;
105};
106
107/*
108 * Process the main store vpd to determine where the holes in memory are
109 * and return the number of physical blocks and fill in the array of
110 * block data.
111 */
112static unsigned long iSeries_process_Condor_mainstore_vpd(
113 struct MemoryBlock *mb_array, unsigned long max_entries)
114{
115 unsigned long holeFirstChunk, holeSizeChunks;
116 unsigned long numMemoryBlocks = 1;
117 struct IoHriMainStoreSegment4 *msVpd =
118 (struct IoHriMainStoreSegment4 *)xMsVpd;
119 unsigned long holeStart = msVpd->nonInterleavedBlocksStartAdr;
120 unsigned long holeEnd = msVpd->nonInterleavedBlocksEndAdr;
121 unsigned long holeSize = holeEnd - holeStart;
122
123 printk("Mainstore_VPD: Condor\n");
124 /*
125 * Determine if absolute memory has any
126 * holes so that we can interpret the
127 * access map we get back from the hypervisor
128 * correctly.
129 */
130 mb_array[0].logicalStart = 0;
131 mb_array[0].logicalEnd = 0x100000000;
132 mb_array[0].absStart = 0;
133 mb_array[0].absEnd = 0x100000000;
134
135 if (holeSize) {
136 numMemoryBlocks = 2;
137 holeStart = holeStart & 0x000fffffffffffff;
138 holeStart = addr_to_chunk(holeStart);
139 holeFirstChunk = holeStart;
140 holeSize = addr_to_chunk(holeSize);
141 holeSizeChunks = holeSize;
142 printk( "Main store hole: start chunk = %0lx, size = %0lx chunks\n",
143 holeFirstChunk, holeSizeChunks );
144 mb_array[0].logicalEnd = holeFirstChunk;
145 mb_array[0].absEnd = holeFirstChunk;
146 mb_array[1].logicalStart = holeFirstChunk;
147 mb_array[1].logicalEnd = 0x100000000 - holeSizeChunks;
148 mb_array[1].absStart = holeFirstChunk + holeSizeChunks;
149 mb_array[1].absEnd = 0x100000000;
150 }
151 return numMemoryBlocks;
152}
153
154#define MaxSegmentAreas 32
155#define MaxSegmentAdrRangeBlocks 128
156#define MaxAreaRangeBlocks 4
157
158static unsigned long iSeries_process_Regatta_mainstore_vpd(
159 struct MemoryBlock *mb_array, unsigned long max_entries)
160{
161 struct IoHriMainStoreSegment5 *msVpdP =
162 (struct IoHriMainStoreSegment5 *)xMsVpd;
163 unsigned long numSegmentBlocks = 0;
164 u32 existsBits = msVpdP->msAreaExists;
165 unsigned long area_num;
166
167 printk("Mainstore_VPD: Regatta\n");
168
169 for (area_num = 0; area_num < MaxSegmentAreas; ++area_num ) {
170 unsigned long numAreaBlocks;
171 struct IoHriMainStoreArea4 *currentArea;
172
173 if (existsBits & 0x80000000) {
174 unsigned long block_num;
175
176 currentArea = &msVpdP->msAreaArray[area_num];
177 numAreaBlocks = currentArea->numAdrRangeBlocks;
178 printk("ms_vpd: processing area %2ld blocks=%ld",
179 area_num, numAreaBlocks);
180 for (block_num = 0; block_num < numAreaBlocks;
181 ++block_num ) {
182 /* Process an address range block */
183 struct MemoryBlock tempBlock;
184 unsigned long i;
185
186 tempBlock.absStart =
187 (unsigned long)currentArea->xAdrRangeBlock[block_num].blockStart;
188 tempBlock.absEnd =
189 (unsigned long)currentArea->xAdrRangeBlock[block_num].blockEnd;
190 tempBlock.logicalStart = 0;
191 tempBlock.logicalEnd = 0;
192 printk("\n block %ld absStart=%016lx absEnd=%016lx",
193 block_num, tempBlock.absStart,
194 tempBlock.absEnd);
195
196 for (i = 0; i < numSegmentBlocks; ++i) {
197 if (mb_array[i].absStart ==
198 tempBlock.absStart)
199 break;
200 }
201 if (i == numSegmentBlocks) {
202 if (numSegmentBlocks == max_entries)
203 panic("iSeries_process_mainstore_vpd: too many memory blocks");
204 mb_array[numSegmentBlocks] = tempBlock;
205 ++numSegmentBlocks;
206 } else
207 printk(" (duplicate)");
208 }
209 printk("\n");
210 }
211 existsBits <<= 1;
212 }
213 /* Now sort the blocks found into ascending sequence */
214 if (numSegmentBlocks > 1) {
215 unsigned long m, n;
216
217 for (m = 0; m < numSegmentBlocks - 1; ++m) {
218 for (n = numSegmentBlocks - 1; m < n; --n) {
219 if (mb_array[n].absStart <
220 mb_array[n-1].absStart) {
221 struct MemoryBlock tempBlock;
222
223 tempBlock = mb_array[n];
224 mb_array[n] = mb_array[n-1];
225 mb_array[n-1] = tempBlock;
226 }
227 }
228 }
229 }
230 /*
231 * Assign "logical" addresses to each block. These
232 * addresses correspond to the hypervisor "bitmap" space.
233 * Convert all addresses into units of 256K chunks.
234 */
235 {
236 unsigned long i, nextBitmapAddress;
237
238 printk("ms_vpd: %ld sorted memory blocks\n", numSegmentBlocks);
239 nextBitmapAddress = 0;
240 for (i = 0; i < numSegmentBlocks; ++i) {
241 unsigned long length = mb_array[i].absEnd -
242 mb_array[i].absStart;
243
244 mb_array[i].logicalStart = nextBitmapAddress;
245 mb_array[i].logicalEnd = nextBitmapAddress + length;
246 nextBitmapAddress += length;
247 printk(" Bitmap range: %016lx - %016lx\n"
248 " Absolute range: %016lx - %016lx\n",
249 mb_array[i].logicalStart,
250 mb_array[i].logicalEnd,
251 mb_array[i].absStart, mb_array[i].absEnd);
252 mb_array[i].absStart = addr_to_chunk(mb_array[i].absStart &
253 0x000fffffffffffff);
254 mb_array[i].absEnd = addr_to_chunk(mb_array[i].absEnd &
255 0x000fffffffffffff);
256 mb_array[i].logicalStart =
257 addr_to_chunk(mb_array[i].logicalStart);
258 mb_array[i].logicalEnd = addr_to_chunk(mb_array[i].logicalEnd);
259 }
260 }
261
262 return numSegmentBlocks;
263}
264
265static unsigned long iSeries_process_mainstore_vpd(struct MemoryBlock *mb_array,
266 unsigned long max_entries)
267{
268 unsigned long i;
269 unsigned long mem_blocks = 0;
270
271 if (cpu_has_feature(CPU_FTR_SLB))
272 mem_blocks = iSeries_process_Regatta_mainstore_vpd(mb_array,
273 max_entries);
274 else
275 mem_blocks = iSeries_process_Condor_mainstore_vpd(mb_array,
276 max_entries);
277
278 printk("Mainstore_VPD: numMemoryBlocks = %ld \n", mem_blocks);
279 for (i = 0; i < mem_blocks; ++i) {
280 printk("Mainstore_VPD: block %3ld logical chunks %016lx - %016lx\n"
281 " abs chunks %016lx - %016lx\n",
282 i, mb_array[i].logicalStart, mb_array[i].logicalEnd,
283 mb_array[i].absStart, mb_array[i].absEnd);
284 }
285 return mem_blocks;
286}
287
288static void __init iSeries_get_cmdline(void)
289{
290 char *p, *q;
291
292 /* copy the command line parameter from the primary VSP */
293 HvCallEvent_dmaToSp(cmd_line, 2 * 64* 1024, 256,
294 HvLpDma_Direction_RemoteToLocal);
295
296 p = cmd_line;
297 q = cmd_line + 255;
298 while(p < q) {
299 if (!*p || *p == '\n')
300 break;
301 ++p;
302 }
303 *p = 0;
304}
305
306static void __init iSeries_init_early(void)
307{
308 extern unsigned long memory_limit;
309
310 DBG(" -> iSeries_init_early()\n");
311
312 ppcdbg_initialize();
313
314#if defined(CONFIG_BLK_DEV_INITRD)
315 /*
316 * If the init RAM disk has been configured and there is
317 * a non-zero starting address for it, set it up
318 */
319 if (naca.xRamDisk) {
320 initrd_start = (unsigned long)__va(naca.xRamDisk);
321 initrd_end = initrd_start + naca.xRamDiskSize * PAGE_SIZE;
322 initrd_below_start_ok = 1; // ramdisk in kernel space
323 ROOT_DEV = Root_RAM0;
324 if (((rd_size * 1024) / PAGE_SIZE) < naca.xRamDiskSize)
325 rd_size = (naca.xRamDiskSize * PAGE_SIZE) / 1024;
326 } else
327#endif /* CONFIG_BLK_DEV_INITRD */
328 {
329 /* ROOT_DEV = MKDEV(VIODASD_MAJOR, 1); */
330 }
331
332 iSeries_recal_tb = get_tb();
333 iSeries_recal_titan = HvCallXm_loadTod();
334
335 /*
336 * Cache sizes must be initialized before hpte_init_iSeries is called
337 * as the later need them for flush_icache_range()
338 */
339 setup_iSeries_cache_sizes();
340
341 /*
342 * Initialize the hash table management pointers
343 */
344 hpte_init_iSeries();
345
346 /*
347 * Initialize the DMA/TCE management
348 */
349 iommu_init_early_iSeries();
350
351 /*
352 * Initialize the table which translate Linux physical addresses to
353 * AS/400 absolute addresses
354 */
355 build_iSeries_Memory_Map();
356
357 iSeries_get_cmdline();
358
359 /* Save unparsed command line copy for /proc/cmdline */
360 strlcpy(saved_command_line, cmd_line, COMMAND_LINE_SIZE);
361
362 /* Parse early parameters, in particular mem=x */
363 parse_early_param();
364
365 if (memory_limit) {
366 if (memory_limit < systemcfg->physicalMemorySize)
367 systemcfg->physicalMemorySize = memory_limit;
368 else {
369 printk("Ignoring mem=%lu >= ram_top.\n", memory_limit);
370 memory_limit = 0;
371 }
372 }
373
374 /* Bolt kernel mappings for all of memory (or just a bit if we've got a limit) */
375 iSeries_bolt_kernel(0, systemcfg->physicalMemorySize);
376
377 lmb_init();
378 lmb_add(0, systemcfg->physicalMemorySize);
379 lmb_analyze();
380 lmb_reserve(0, __pa(klimit));
381
382 /* Initialize machine-dependency vectors */
383#ifdef CONFIG_SMP
384 smp_init_iSeries();
385#endif
386 if (itLpNaca.xPirEnvironMode == 0)
387 piranha_simulator = 1;
388
389 /* Associate Lp Event Queue 0 with processor 0 */
390 HvCallEvent_setLpEventQueueInterruptProc(0, 0);
391
392 mf_init();
393 mf_initialized = 1;
394 mb();
395
396 /* If we were passed an initrd, set the ROOT_DEV properly if the values
397 * look sensible. If not, clear initrd reference.
398 */
399#ifdef CONFIG_BLK_DEV_INITRD
400 if (initrd_start >= KERNELBASE && initrd_end >= KERNELBASE &&
401 initrd_end > initrd_start)
402 ROOT_DEV = Root_RAM0;
403 else
404 initrd_start = initrd_end = 0;
405#endif /* CONFIG_BLK_DEV_INITRD */
406
407 DBG(" <- iSeries_init_early()\n");
408}
409
410/*
411 * The iSeries may have very large memories ( > 128 GB ) and a partition
412 * may get memory in "chunks" that may be anywhere in the 2**52 real
413 * address space. The chunks are 256K in size. To map this to the
414 * memory model Linux expects, the AS/400 specific code builds a
415 * translation table to translate what Linux thinks are "physical"
416 * addresses to the actual real addresses. This allows us to make
417 * it appear to Linux that we have contiguous memory starting at
418 * physical address zero while in fact this could be far from the truth.
419 * To avoid confusion, I'll let the words physical and/or real address
420 * apply to the Linux addresses while I'll use "absolute address" to
421 * refer to the actual hardware real address.
422 *
423 * build_iSeries_Memory_Map gets information from the Hypervisor and
424 * looks at the Main Store VPD to determine the absolute addresses
425 * of the memory that has been assigned to our partition and builds
426 * a table used to translate Linux's physical addresses to these
427 * absolute addresses. Absolute addresses are needed when
428 * communicating with the hypervisor (e.g. to build HPT entries)
429 */
430
431static void __init build_iSeries_Memory_Map(void)
432{
433 u32 loadAreaFirstChunk, loadAreaLastChunk, loadAreaSize;
434 u32 nextPhysChunk;
435 u32 hptFirstChunk, hptLastChunk, hptSizeChunks, hptSizePages;
436 u32 num_ptegs;
437 u32 totalChunks,moreChunks;
438 u32 currChunk, thisChunk, absChunk;
439 u32 currDword;
440 u32 chunkBit;
441 u64 map;
442 struct MemoryBlock mb[32];
443 unsigned long numMemoryBlocks, curBlock;
444
445 /* Chunk size on iSeries is 256K bytes */
446 totalChunks = (u32)HvLpConfig_getMsChunks();
447 klimit = msChunks_alloc(klimit, totalChunks, 1UL << 18);
448
449 /*
450 * Get absolute address of our load area
451 * and map it to physical address 0
452 * This guarantees that the loadarea ends up at physical 0
453 * otherwise, it might not be returned by PLIC as the first
454 * chunks
455 */
456
457 loadAreaFirstChunk = (u32)addr_to_chunk(itLpNaca.xLoadAreaAddr);
458 loadAreaSize = itLpNaca.xLoadAreaChunks;
459
460 /*
461 * Only add the pages already mapped here.
462 * Otherwise we might add the hpt pages
463 * The rest of the pages of the load area
464 * aren't in the HPT yet and can still
465 * be assigned an arbitrary physical address
466 */
467 if ((loadAreaSize * 64) > HvPagesToMap)
468 loadAreaSize = HvPagesToMap / 64;
469
470 loadAreaLastChunk = loadAreaFirstChunk + loadAreaSize - 1;
471
472 /*
473 * TODO Do we need to do something if the HPT is in the 64MB load area?
474 * This would be required if the itLpNaca.xLoadAreaChunks includes
475 * the HPT size
476 */
477
478 printk("Mapping load area - physical addr = 0000000000000000\n"
479 " absolute addr = %016lx\n",
480 chunk_to_addr(loadAreaFirstChunk));
481 printk("Load area size %dK\n", loadAreaSize * 256);
482
483 for (nextPhysChunk = 0; nextPhysChunk < loadAreaSize; ++nextPhysChunk)
484 msChunks.abs[nextPhysChunk] =
485 loadAreaFirstChunk + nextPhysChunk;
486
487 /*
488 * Get absolute address of our HPT and remember it so
489 * we won't map it to any physical address
490 */
491 hptFirstChunk = (u32)addr_to_chunk(HvCallHpt_getHptAddress());
492 hptSizePages = (u32)HvCallHpt_getHptPages();
493 hptSizeChunks = hptSizePages >> (msChunks.chunk_shift - PAGE_SHIFT);
494 hptLastChunk = hptFirstChunk + hptSizeChunks - 1;
495
496 printk("HPT absolute addr = %016lx, size = %dK\n",
497 chunk_to_addr(hptFirstChunk), hptSizeChunks * 256);
498
499 /* Fill in the hashed page table hash mask */
500 num_ptegs = hptSizePages *
501 (PAGE_SIZE / (sizeof(HPTE) * HPTES_PER_GROUP));
502 htab_hash_mask = num_ptegs - 1;
503
504 /*
505 * The actual hashed page table is in the hypervisor,
506 * we have no direct access
507 */
508 htab_address = NULL;
509
510 /*
511 * Determine if absolute memory has any
512 * holes so that we can interpret the
513 * access map we get back from the hypervisor
514 * correctly.
515 */
516 numMemoryBlocks = iSeries_process_mainstore_vpd(mb, 32);
517
518 /*
519 * Process the main store access map from the hypervisor
520 * to build up our physical -> absolute translation table
521 */
522 curBlock = 0;
523 currChunk = 0;
524 currDword = 0;
525 moreChunks = totalChunks;
526
527 while (moreChunks) {
528 map = HvCallSm_get64BitsOfAccessMap(itLpNaca.xLpIndex,
529 currDword);
530 thisChunk = currChunk;
531 while (map) {
532 chunkBit = map >> 63;
533 map <<= 1;
534 if (chunkBit) {
535 --moreChunks;
536 while (thisChunk >= mb[curBlock].logicalEnd) {
537 ++curBlock;
538 if (curBlock >= numMemoryBlocks)
539 panic("out of memory blocks");
540 }
541 if (thisChunk < mb[curBlock].logicalStart)
542 panic("memory block error");
543
544 absChunk = mb[curBlock].absStart +
545 (thisChunk - mb[curBlock].logicalStart);
546 if (((absChunk < hptFirstChunk) ||
547 (absChunk > hptLastChunk)) &&
548 ((absChunk < loadAreaFirstChunk) ||
549 (absChunk > loadAreaLastChunk))) {
550 msChunks.abs[nextPhysChunk] = absChunk;
551 ++nextPhysChunk;
552 }
553 }
554 ++thisChunk;
555 }
556 ++currDword;
557 currChunk += 64;
558 }
559
560 /*
561 * main store size (in chunks) is
562 * totalChunks - hptSizeChunks
563 * which should be equal to
564 * nextPhysChunk
565 */
566 systemcfg->physicalMemorySize = chunk_to_addr(nextPhysChunk);
567}
568
569/*
570 * Set up the variables that describe the cache line sizes
571 * for this machine.
572 */
573static void __init setup_iSeries_cache_sizes(void)
574{
575 unsigned int i, n;
576 unsigned int procIx = get_paca()->lppaca.dyn_hv_phys_proc_index;
577
578 systemcfg->icache_size =
579 ppc64_caches.isize = xIoHriProcessorVpd[procIx].xInstCacheSize * 1024;
580 systemcfg->icache_line_size =
581 ppc64_caches.iline_size =
582 xIoHriProcessorVpd[procIx].xInstCacheOperandSize;
583 systemcfg->dcache_size =
584 ppc64_caches.dsize =
585 xIoHriProcessorVpd[procIx].xDataL1CacheSizeKB * 1024;
586 systemcfg->dcache_line_size =
587 ppc64_caches.dline_size =
588 xIoHriProcessorVpd[procIx].xDataCacheOperandSize;
589 ppc64_caches.ilines_per_page = PAGE_SIZE / ppc64_caches.iline_size;
590 ppc64_caches.dlines_per_page = PAGE_SIZE / ppc64_caches.dline_size;
591
592 i = ppc64_caches.iline_size;
593 n = 0;
594 while ((i = (i / 2)))
595 ++n;
596 ppc64_caches.log_iline_size = n;
597
598 i = ppc64_caches.dline_size;
599 n = 0;
600 while ((i = (i / 2)))
601 ++n;
602 ppc64_caches.log_dline_size = n;
603
604 printk("D-cache line size = %d\n",
605 (unsigned int)ppc64_caches.dline_size);
606 printk("I-cache line size = %d\n",
607 (unsigned int)ppc64_caches.iline_size);
608}
609
610/*
611 * Create a pte. Used during initialization only.
612 */
613static void iSeries_make_pte(unsigned long va, unsigned long pa,
614 int mode)
615{
616 HPTE local_hpte, rhpte;
617 unsigned long hash, vpn;
618 long slot;
619
620 vpn = va >> PAGE_SHIFT;
621 hash = hpt_hash(vpn, 0);
622
623 local_hpte.dw1.dword1 = pa | mode;
624 local_hpte.dw0.dword0 = 0;
625 local_hpte.dw0.dw0.avpn = va >> 23;
626 local_hpte.dw0.dw0.bolted = 1; /* bolted */
627 local_hpte.dw0.dw0.v = 1;
628
629 slot = HvCallHpt_findValid(&rhpte, vpn);
630 if (slot < 0) {
631 /* Must find space in primary group */
632 panic("hash_page: hpte already exists\n");
633 }
634 HvCallHpt_addValidate(slot, 0, (HPTE *)&local_hpte );
635}
636
637/*
638 * Bolt the kernel addr space into the HPT
639 */
640static void __init iSeries_bolt_kernel(unsigned long saddr, unsigned long eaddr)
641{
642 unsigned long pa;
643 unsigned long mode_rw = _PAGE_ACCESSED | _PAGE_COHERENT | PP_RWXX;
644 HPTE hpte;
645
646 for (pa = saddr; pa < eaddr ;pa += PAGE_SIZE) {
647 unsigned long ea = (unsigned long)__va(pa);
648 unsigned long vsid = get_kernel_vsid(ea);
649 unsigned long va = (vsid << 28) | (pa & 0xfffffff);
650 unsigned long vpn = va >> PAGE_SHIFT;
651 unsigned long slot = HvCallHpt_findValid(&hpte, vpn);
652
653 /* Make non-kernel text non-executable */
654 if (!in_kernel_text(ea))
655 mode_rw |= HW_NO_EXEC;
656
657 if (hpte.dw0.dw0.v) {
658 /* HPTE exists, so just bolt it */
659 HvCallHpt_setSwBits(slot, 0x10, 0);
660 /* And make sure the pp bits are correct */
661 HvCallHpt_setPp(slot, PP_RWXX);
662 } else
663 /* No HPTE exists, so create a new bolted one */
664 iSeries_make_pte(va, phys_to_abs(pa), mode_rw);
665 }
666}
667
668extern unsigned long ppc_proc_freq;
669extern unsigned long ppc_tb_freq;
670
671/*
672 * Document me.
673 */
674static void __init iSeries_setup_arch(void)
675{
676 void *eventStack;
677 unsigned procIx = get_paca()->lppaca.dyn_hv_phys_proc_index;
678
679 /* Add an eye catcher and the systemcfg layout version number */
680 strcpy(systemcfg->eye_catcher, "SYSTEMCFG:PPC64");
681 systemcfg->version.major = SYSTEMCFG_MAJOR;
682 systemcfg->version.minor = SYSTEMCFG_MINOR;
683
684 /* Setup the Lp Event Queue */
685
686 /* Allocate a page for the Event Stack
687 * The hypervisor wants the absolute real address, so
688 * we subtract out the KERNELBASE and add in the
689 * absolute real address of the kernel load area
690 */
691 eventStack = alloc_bootmem_pages(LpEventStackSize);
692 memset(eventStack, 0, LpEventStackSize);
693
694 /* Invoke the hypervisor to initialize the event stack */
695 HvCallEvent_setLpEventStack(0, eventStack, LpEventStackSize);
696
697 /* Initialize fields in our Lp Event Queue */
698 xItLpQueue.xSlicEventStackPtr = (char *)eventStack;
699 xItLpQueue.xSlicCurEventPtr = (char *)eventStack;
700 xItLpQueue.xSlicLastValidEventPtr = (char *)eventStack +
701 (LpEventStackSize - LpEventMaxSize);
702 xItLpQueue.xIndex = 0;
703
704 /* Compute processor frequency */
705 procFreqHz = ((1UL << 34) * 1000000) /
706 xIoHriProcessorVpd[procIx].xProcFreq;
707 procFreqMhz = procFreqHz / 1000000;
708 procFreqMhzHundreths = (procFreqHz / 10000) - (procFreqMhz * 100);
709 ppc_proc_freq = procFreqHz;
710
711 /* Compute time base frequency */
712 tbFreqHz = ((1UL << 32) * 1000000) /
713 xIoHriProcessorVpd[procIx].xTimeBaseFreq;
714 tbFreqMhz = tbFreqHz / 1000000;
715 tbFreqMhzHundreths = (tbFreqHz / 10000) - (tbFreqMhz * 100);
716 ppc_tb_freq = tbFreqHz;
717
718 printk("Max logical processors = %d\n",
719 itVpdAreas.xSlicMaxLogicalProcs);
720 printk("Max physical processors = %d\n",
721 itVpdAreas.xSlicMaxPhysicalProcs);
722 printk("Processor frequency = %lu.%02lu\n", procFreqMhz,
723 procFreqMhzHundreths);
724 printk("Time base frequency = %lu.%02lu\n", tbFreqMhz,
725 tbFreqMhzHundreths);
726 systemcfg->processor = xIoHriProcessorVpd[procIx].xPVR;
727 printk("Processor version = %x\n", systemcfg->processor);
728}
729
730static void iSeries_get_cpuinfo(struct seq_file *m)
731{
732 seq_printf(m, "machine\t\t: 64-bit iSeries Logical Partition\n");
733}
734
735/*
736 * Document me.
737 * and Implement me.
738 */
739static int iSeries_get_irq(struct pt_regs *regs)
740{
741 /* -2 means ignore this interrupt */
742 return -2;
743}
744
745/*
746 * Document me.
747 */
748static void iSeries_restart(char *cmd)
749{
750 mf_reboot();
751}
752
753/*
754 * Document me.
755 */
756static void iSeries_power_off(void)
757{
758 mf_power_off();
759}
760
761/*
762 * Document me.
763 */
764static void iSeries_halt(void)
765{
766 mf_power_off();
767}
768
769extern void setup_default_decr(void);
770
771/*
772 * void __init iSeries_calibrate_decr()
773 *
774 * Description:
775 * This routine retrieves the internal processor frequency from the VPD,
776 * and sets up the kernel timer decrementer based on that value.
777 *
778 */
779static void __init iSeries_calibrate_decr(void)
780{
781 unsigned long cyclesPerUsec;
782 struct div_result divres;
783
784 /* Compute decrementer (and TB) frequency in cycles/sec */
785 cyclesPerUsec = ppc_tb_freq / 1000000;
786
787 /*
788 * Set the amount to refresh the decrementer by. This
789 * is the number of decrementer ticks it takes for
790 * 1/HZ seconds.
791 */
792 tb_ticks_per_jiffy = ppc_tb_freq / HZ;
793
794#if 0
795 /* TEST CODE FOR ADJTIME */
796 tb_ticks_per_jiffy += tb_ticks_per_jiffy / 5000;
797 /* END OF TEST CODE */
798#endif
799
800 /*
801 * tb_ticks_per_sec = freq; would give better accuracy
802 * but tb_ticks_per_sec = tb_ticks_per_jiffy*HZ; assures
803 * that jiffies (and xtime) will match the time returned
804 * by do_gettimeofday.
805 */
806 tb_ticks_per_sec = tb_ticks_per_jiffy * HZ;
807 tb_ticks_per_usec = cyclesPerUsec;
808 tb_to_us = mulhwu_scale_factor(ppc_tb_freq, 1000000);
809 div128_by_32(1024 * 1024, 0, tb_ticks_per_sec, &divres);
810 tb_to_xs = divres.result_low;
811 setup_default_decr();
812}
813
814static void __init iSeries_progress(char * st, unsigned short code)
815{
816 printk("Progress: [%04x] - %s\n", (unsigned)code, st);
817 if (!piranha_simulator && mf_initialized) {
818 if (code != 0xffff)
819 mf_display_progress(code);
820 else
821 mf_clear_src();
822 }
823}
824
825static void __init iSeries_fixup_klimit(void)
826{
827 /*
828 * Change klimit to take into account any ram disk
829 * that may be included
830 */
831 if (naca.xRamDisk)
832 klimit = KERNELBASE + (u64)naca.xRamDisk +
833 (naca.xRamDiskSize * PAGE_SIZE);
834 else {
835 /*
836 * No ram disk was included - check and see if there
837 * was an embedded system map. Change klimit to take
838 * into account any embedded system map
839 */
840 if (embedded_sysmap_end)
841 klimit = KERNELBASE + ((embedded_sysmap_end + 4095) &
842 0xfffffffffffff000);
843 }
844}
845
846static int __init iSeries_src_init(void)
847{
848 /* clear the progress line */
849 ppc_md.progress(" ", 0xffff);
850 return 0;
851}
852
853late_initcall(iSeries_src_init);
854
855void __init iSeries_early_setup(void)
856{
857 iSeries_fixup_klimit();
858
859 ppc_md.setup_arch = iSeries_setup_arch;
860 ppc_md.get_cpuinfo = iSeries_get_cpuinfo;
861 ppc_md.init_IRQ = iSeries_init_IRQ;
862 ppc_md.get_irq = iSeries_get_irq;
863 ppc_md.init_early = iSeries_init_early,
864
865 ppc_md.pcibios_fixup = iSeries_pci_final_fixup;
866
867 ppc_md.restart = iSeries_restart;
868 ppc_md.power_off = iSeries_power_off;
869 ppc_md.halt = iSeries_halt;
870
871 ppc_md.get_boot_time = iSeries_get_boot_time;
872 ppc_md.set_rtc_time = iSeries_set_rtc_time;
873 ppc_md.get_rtc_time = iSeries_get_rtc_time;
874 ppc_md.calibrate_decr = iSeries_calibrate_decr;
875 ppc_md.progress = iSeries_progress;
876}
877
diff --git a/arch/ppc64/kernel/iSeries_setup.h b/arch/ppc64/kernel/iSeries_setup.h
new file mode 100644
index 000000000000..c6eb29a245ac
--- /dev/null
+++ b/arch/ppc64/kernel/iSeries_setup.h
@@ -0,0 +1,26 @@
1/*
2 * Copyright (c) 2000 Mike Corrigan <mikejc@us.ibm.com>
3 * Copyright (c) 1999-2000 Grant Erickson <grant@lcse.umn.edu>
4 *
5 * Module name: as400_setup.h
6 *
7 * Description:
8 * Architecture- / platform-specific boot-time initialization code for
9 * the IBM AS/400 LPAR. Adapted from original code by Grant Erickson and
10 * code by Gary Thomas, Cort Dougan <cort@cs.nmt.edu>, and Dan Malek
11 * <dan@netx4.com>.
12 *
13 * This program is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU General Public License
15 * as published by the Free Software Foundation; either version
16 * 2 of the License, or (at your option) any later version.
17 */
18
19#ifndef __ISERIES_SETUP_H__
20#define __ISERIES_SETUP_H__
21
22extern void iSeries_get_boot_time(struct rtc_time *tm);
23extern int iSeries_set_rtc_time(struct rtc_time *tm);
24extern void iSeries_get_rtc_time(struct rtc_time *tm);
25
26#endif /* __ISERIES_SETUP_H__ */
diff --git a/arch/ppc64/kernel/iSeries_smp.c b/arch/ppc64/kernel/iSeries_smp.c
new file mode 100644
index 000000000000..ba1f084d5462
--- /dev/null
+++ b/arch/ppc64/kernel/iSeries_smp.c
@@ -0,0 +1,151 @@
1/*
2 * SMP support for iSeries machines.
3 *
4 * Dave Engebretsen, Peter Bergner, and
5 * Mike Corrigan {engebret|bergner|mikec}@us.ibm.com
6 *
7 * Plus various changes from other IBM teams...
8 *
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License
11 * as published by the Free Software Foundation; either version
12 * 2 of the License, or (at your option) any later version.
13 */
14
15#undef DEBUG
16
17#include <linux/config.h>
18#include <linux/kernel.h>
19#include <linux/module.h>
20#include <linux/sched.h>
21#include <linux/smp.h>
22#include <linux/smp_lock.h>
23#include <linux/interrupt.h>
24#include <linux/kernel_stat.h>
25#include <linux/delay.h>
26#include <linux/init.h>
27#include <linux/spinlock.h>
28#include <linux/cache.h>
29#include <linux/err.h>
30#include <linux/sysdev.h>
31#include <linux/cpu.h>
32
33#include <asm/ptrace.h>
34#include <asm/atomic.h>
35#include <asm/irq.h>
36#include <asm/page.h>
37#include <asm/pgtable.h>
38#include <asm/io.h>
39#include <asm/smp.h>
40#include <asm/paca.h>
41#include <asm/iSeries/LparData.h>
42#include <asm/iSeries/HvCall.h>
43#include <asm/iSeries/HvCallCfg.h>
44#include <asm/time.h>
45#include <asm/ppcdebug.h>
46#include <asm/machdep.h>
47#include <asm/cputable.h>
48#include <asm/system.h>
49
50static unsigned long iSeries_smp_message[NR_CPUS];
51
52void iSeries_smp_message_recv( struct pt_regs * regs )
53{
54 int cpu = smp_processor_id();
55 int msg;
56
57 if ( num_online_cpus() < 2 )
58 return;
59
60 for ( msg = 0; msg < 4; ++msg )
61 if ( test_and_clear_bit( msg, &iSeries_smp_message[cpu] ) )
62 smp_message_recv( msg, regs );
63}
64
65static inline void smp_iSeries_do_message(int cpu, int msg)
66{
67 set_bit(msg, &iSeries_smp_message[cpu]);
68 HvCall_sendIPI(&(paca[cpu]));
69}
70
71static void smp_iSeries_message_pass(int target, int msg)
72{
73 int i;
74
75 if (target < NR_CPUS)
76 smp_iSeries_do_message(target, msg);
77 else {
78 for_each_online_cpu(i) {
79 if (target == MSG_ALL_BUT_SELF
80 && i == smp_processor_id())
81 continue;
82 smp_iSeries_do_message(i, msg);
83 }
84 }
85}
86
87static int smp_iSeries_numProcs(void)
88{
89 unsigned np, i;
90
91 np = 0;
92 for (i=0; i < NR_CPUS; ++i) {
93 if (paca[i].lppaca.dyn_proc_status < 2) {
94 cpu_set(i, cpu_possible_map);
95 cpu_set(i, cpu_present_map);
96 cpu_set(i, cpu_sibling_map[i]);
97 ++np;
98 }
99 }
100 return np;
101}
102
103static int smp_iSeries_probe(void)
104{
105 unsigned i;
106 unsigned np = 0;
107
108 for (i=0; i < NR_CPUS; ++i) {
109 if (paca[i].lppaca.dyn_proc_status < 2) {
110 /*paca[i].active = 1;*/
111 ++np;
112 }
113 }
114
115 return np;
116}
117
118static void smp_iSeries_kick_cpu(int nr)
119{
120 BUG_ON(nr < 0 || nr >= NR_CPUS);
121
122 /* Verify that our partition has a processor nr */
123 if (paca[nr].lppaca.dyn_proc_status >= 2)
124 return;
125
126 /* The processor is currently spinning, waiting
127 * for the cpu_start field to become non-zero
128 * After we set cpu_start, the processor will
129 * continue on to secondary_start in iSeries_head.S
130 */
131 paca[nr].cpu_start = 1;
132}
133
134static void __devinit smp_iSeries_setup_cpu(int nr)
135{
136}
137
138static struct smp_ops_t iSeries_smp_ops = {
139 .message_pass = smp_iSeries_message_pass,
140 .probe = smp_iSeries_probe,
141 .kick_cpu = smp_iSeries_kick_cpu,
142 .setup_cpu = smp_iSeries_setup_cpu,
143};
144
145/* This is called very early. */
146void __init smp_init_iSeries(void)
147{
148 smp_ops = &iSeries_smp_ops;
149 systemcfg->processorCount = smp_iSeries_numProcs();
150}
151
diff --git a/arch/ppc64/kernel/idle.c b/arch/ppc64/kernel/idle.c
new file mode 100644
index 000000000000..6abc621d3ba0
--- /dev/null
+++ b/arch/ppc64/kernel/idle.c
@@ -0,0 +1,380 @@
1/*
2 * Idle daemon for PowerPC. Idle daemon will handle any action
3 * that needs to be taken when the system becomes idle.
4 *
5 * Originally Written by Cort Dougan (cort@cs.nmt.edu)
6 *
7 * iSeries supported added by Mike Corrigan <mikejc@us.ibm.com>
8 *
9 * Additional shared processor, SMT, and firmware support
10 * Copyright (c) 2003 Dave Engebretsen <engebret@us.ibm.com>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 */
17
18#include <linux/config.h>
19#include <linux/sched.h>
20#include <linux/kernel.h>
21#include <linux/smp.h>
22#include <linux/cpu.h>
23#include <linux/module.h>
24#include <linux/sysctl.h>
25#include <linux/smp.h>
26
27#include <asm/system.h>
28#include <asm/processor.h>
29#include <asm/mmu.h>
30#include <asm/cputable.h>
31#include <asm/time.h>
32#include <asm/iSeries/HvCall.h>
33#include <asm/iSeries/ItLpQueue.h>
34#include <asm/plpar_wrappers.h>
35#include <asm/systemcfg.h>
36
37extern void power4_idle(void);
38
39static int (*idle_loop)(void);
40
41#ifdef CONFIG_PPC_ISERIES
42static unsigned long maxYieldTime = 0;
43static unsigned long minYieldTime = 0xffffffffffffffffUL;
44
45static void yield_shared_processor(void)
46{
47 unsigned long tb;
48 unsigned long yieldTime;
49
50 HvCall_setEnabledInterrupts(HvCall_MaskIPI |
51 HvCall_MaskLpEvent |
52 HvCall_MaskLpProd |
53 HvCall_MaskTimeout);
54
55 tb = get_tb();
56 /* Compute future tb value when yield should expire */
57 HvCall_yieldProcessor(HvCall_YieldTimed, tb+tb_ticks_per_jiffy);
58
59 yieldTime = get_tb() - tb;
60 if (yieldTime > maxYieldTime)
61 maxYieldTime = yieldTime;
62
63 if (yieldTime < minYieldTime)
64 minYieldTime = yieldTime;
65
66 /*
67 * The decrementer stops during the yield. Force a fake decrementer
68 * here and let the timer_interrupt code sort out the actual time.
69 */
70 get_paca()->lppaca.int_dword.fields.decr_int = 1;
71 process_iSeries_events();
72}
73
74static int iSeries_idle(void)
75{
76 struct paca_struct *lpaca;
77 long oldval;
78 unsigned long CTRL;
79
80 /* ensure iSeries run light will be out when idle */
81 clear_thread_flag(TIF_RUN_LIGHT);
82 CTRL = mfspr(CTRLF);
83 CTRL &= ~RUNLATCH;
84 mtspr(CTRLT, CTRL);
85
86 lpaca = get_paca();
87
88 while (1) {
89 if (lpaca->lppaca.shared_proc) {
90 if (ItLpQueue_isLpIntPending(lpaca->lpqueue_ptr))
91 process_iSeries_events();
92 if (!need_resched())
93 yield_shared_processor();
94 } else {
95 oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED);
96
97 if (!oldval) {
98 set_thread_flag(TIF_POLLING_NRFLAG);
99
100 while (!need_resched()) {
101 HMT_medium();
102 if (ItLpQueue_isLpIntPending(lpaca->lpqueue_ptr))
103 process_iSeries_events();
104 HMT_low();
105 }
106
107 HMT_medium();
108 clear_thread_flag(TIF_POLLING_NRFLAG);
109 } else {
110 set_need_resched();
111 }
112 }
113
114 schedule();
115 }
116
117 return 0;
118}
119
120#else
121
122static int default_idle(void)
123{
124 long oldval;
125 unsigned int cpu = smp_processor_id();
126
127 while (1) {
128 oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED);
129
130 if (!oldval) {
131 set_thread_flag(TIF_POLLING_NRFLAG);
132
133 while (!need_resched() && !cpu_is_offline(cpu)) {
134 barrier();
135 /*
136 * Go into low thread priority and possibly
137 * low power mode.
138 */
139 HMT_low();
140 HMT_very_low();
141 }
142
143 HMT_medium();
144 clear_thread_flag(TIF_POLLING_NRFLAG);
145 } else {
146 set_need_resched();
147 }
148
149 schedule();
150 if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING)
151 cpu_die();
152 }
153
154 return 0;
155}
156
157#ifdef CONFIG_PPC_PSERIES
158
159DECLARE_PER_CPU(unsigned long, smt_snooze_delay);
160
161int dedicated_idle(void)
162{
163 long oldval;
164 struct paca_struct *lpaca = get_paca(), *ppaca;
165 unsigned long start_snooze;
166 unsigned long *smt_snooze_delay = &__get_cpu_var(smt_snooze_delay);
167 unsigned int cpu = smp_processor_id();
168
169 ppaca = &paca[cpu ^ 1];
170
171 while (1) {
172 /*
173 * Indicate to the HV that we are idle. Now would be
174 * a good time to find other work to dispatch.
175 */
176 lpaca->lppaca.idle = 1;
177
178 oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED);
179 if (!oldval) {
180 set_thread_flag(TIF_POLLING_NRFLAG);
181 start_snooze = __get_tb() +
182 *smt_snooze_delay * tb_ticks_per_usec;
183 while (!need_resched() && !cpu_is_offline(cpu)) {
184 /*
185 * Go into low thread priority and possibly
186 * low power mode.
187 */
188 HMT_low();
189 HMT_very_low();
190
191 if (*smt_snooze_delay == 0 ||
192 __get_tb() < start_snooze)
193 continue;
194
195 HMT_medium();
196
197 if (!(ppaca->lppaca.idle)) {
198 local_irq_disable();
199
200 /*
201 * We are about to sleep the thread
202 * and so wont be polling any
203 * more.
204 */
205 clear_thread_flag(TIF_POLLING_NRFLAG);
206
207 /*
208 * SMT dynamic mode. Cede will result
209 * in this thread going dormant, if the
210 * partner thread is still doing work.
211 * Thread wakes up if partner goes idle,
212 * an interrupt is presented, or a prod
213 * occurs. Returning from the cede
214 * enables external interrupts.
215 */
216 if (!need_resched())
217 cede_processor();
218 else
219 local_irq_enable();
220 } else {
221 /*
222 * Give the HV an opportunity at the
223 * processor, since we are not doing
224 * any work.
225 */
226 poll_pending();
227 }
228 }
229
230 clear_thread_flag(TIF_POLLING_NRFLAG);
231 } else {
232 set_need_resched();
233 }
234
235 HMT_medium();
236 lpaca->lppaca.idle = 0;
237 schedule();
238 if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING)
239 cpu_die();
240 }
241 return 0;
242}
243
244static int shared_idle(void)
245{
246 struct paca_struct *lpaca = get_paca();
247 unsigned int cpu = smp_processor_id();
248
249 while (1) {
250 /*
251 * Indicate to the HV that we are idle. Now would be
252 * a good time to find other work to dispatch.
253 */
254 lpaca->lppaca.idle = 1;
255
256 while (!need_resched() && !cpu_is_offline(cpu)) {
257 local_irq_disable();
258
259 /*
260 * Yield the processor to the hypervisor. We return if
261 * an external interrupt occurs (which are driven prior
262 * to returning here) or if a prod occurs from another
263 * processor. When returning here, external interrupts
264 * are enabled.
265 *
266 * Check need_resched() again with interrupts disabled
267 * to avoid a race.
268 */
269 if (!need_resched())
270 cede_processor();
271 else
272 local_irq_enable();
273 }
274
275 HMT_medium();
276 lpaca->lppaca.idle = 0;
277 schedule();
278 if (cpu_is_offline(smp_processor_id()) &&
279 system_state == SYSTEM_RUNNING)
280 cpu_die();
281 }
282
283 return 0;
284}
285
286#endif /* CONFIG_PPC_PSERIES */
287
288static int native_idle(void)
289{
290 while(1) {
291 /* check CPU type here */
292 if (!need_resched())
293 power4_idle();
294 if (need_resched())
295 schedule();
296
297 if (cpu_is_offline(_smp_processor_id()) &&
298 system_state == SYSTEM_RUNNING)
299 cpu_die();
300 }
301 return 0;
302}
303
304#endif /* CONFIG_PPC_ISERIES */
305
306void cpu_idle(void)
307{
308 idle_loop();
309}
310
311int powersave_nap;
312
313#ifdef CONFIG_SYSCTL
314/*
315 * Register the sysctl to set/clear powersave_nap.
316 */
317static ctl_table powersave_nap_ctl_table[]={
318 {
319 .ctl_name = KERN_PPC_POWERSAVE_NAP,
320 .procname = "powersave-nap",
321 .data = &powersave_nap,
322 .maxlen = sizeof(int),
323 .mode = 0644,
324 .proc_handler = &proc_dointvec,
325 },
326 { 0, },
327};
328static ctl_table powersave_nap_sysctl_root[] = {
329 { 1, "kernel", NULL, 0, 0755, powersave_nap_ctl_table, },
330 { 0,},
331};
332
333static int __init
334register_powersave_nap_sysctl(void)
335{
336 register_sysctl_table(powersave_nap_sysctl_root, 0);
337
338 return 0;
339}
340__initcall(register_powersave_nap_sysctl);
341#endif
342
343int idle_setup(void)
344{
345 /*
346 * Move that junk to each platform specific file, eventually define
347 * a pSeries_idle for shared processor stuff
348 */
349#ifdef CONFIG_PPC_ISERIES
350 idle_loop = iSeries_idle;
351 return 1;
352#else
353 idle_loop = default_idle;
354#endif
355#ifdef CONFIG_PPC_PSERIES
356 if (systemcfg->platform & PLATFORM_PSERIES) {
357 if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) {
358 if (get_paca()->lppaca.shared_proc) {
359 printk(KERN_INFO "Using shared processor idle loop\n");
360 idle_loop = shared_idle;
361 } else {
362 printk(KERN_INFO "Using dedicated idle loop\n");
363 idle_loop = dedicated_idle;
364 }
365 } else {
366 printk(KERN_INFO "Using default idle loop\n");
367 idle_loop = default_idle;
368 }
369 }
370#endif /* CONFIG_PPC_PSERIES */
371#ifndef CONFIG_PPC_ISERIES
372 if (systemcfg->platform == PLATFORM_POWERMAC ||
373 systemcfg->platform == PLATFORM_MAPLE) {
374 printk(KERN_INFO "Using native/NAP idle loop\n");
375 idle_loop = native_idle;
376 }
377#endif /* CONFIG_PPC_ISERIES */
378
379 return 1;
380}
diff --git a/arch/ppc64/kernel/idle_power4.S b/arch/ppc64/kernel/idle_power4.S
new file mode 100644
index 000000000000..97e4a2655040
--- /dev/null
+++ b/arch/ppc64/kernel/idle_power4.S
@@ -0,0 +1,79 @@
1/*
2 * This file contains the power_save function for 6xx & 7xxx CPUs
3 * rewritten in assembler
4 *
5 * Warning ! This code assumes that if your machine has a 750fx
6 * it will have PLL 1 set to low speed mode (used during NAP/DOZE).
7 * if this is not the case some additional changes will have to
8 * be done to check a runtime var (a bit like powersave-nap)
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16#include <linux/config.h>
17#include <linux/threads.h>
18#include <asm/processor.h>
19#include <asm/page.h>
20#include <asm/cputable.h>
21#include <asm/thread_info.h>
22#include <asm/ppc_asm.h>
23#include <asm/offsets.h>
24
25#undef DEBUG
26
27 .text
28
29/*
30 * Here is the power_save_6xx function. This could eventually be
31 * split into several functions & changing the function pointer
32 * depending on the various features.
33 */
34_GLOBAL(power4_idle)
35BEGIN_FTR_SECTION
36 blr
37END_FTR_SECTION_IFCLR(CPU_FTR_CAN_NAP)
38 /* We must dynamically check for the NAP feature as it
39 * can be cleared by CPU init after the fixups are done
40 */
41 LOADBASE(r3,cur_cpu_spec)
42 ld r4,cur_cpu_spec@l(r3)
43 ld r4,CPU_SPEC_FEATURES(r4)
44 andi. r0,r4,CPU_FTR_CAN_NAP
45 beqlr
46 /* Now check if user or arch enabled NAP mode */
47 LOADBASE(r3,powersave_nap)
48 lwz r4,powersave_nap@l(r3)
49 cmpwi 0,r4,0
50 beqlr
51
52 /* Clear MSR:EE */
53 mfmsr r7
54 li r4,0
55 ori r4,r4,MSR_EE
56 andc r0,r7,r4
57 mtmsrd r0
58
59 /* Check current_thread_info()->flags */
60 clrrdi r4,r1,THREAD_SHIFT
61 ld r4,TI_FLAGS(r4)
62 andi. r0,r4,_TIF_NEED_RESCHED
63 beq 1f
64 mtmsrd r7 /* out of line this ? */
65 blr
661:
67 /* Go to NAP now */
68BEGIN_FTR_SECTION
69 DSSALL
70 sync
71END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
72 oris r7,r7,MSR_POW@h
73 sync
74 isync
75 mtmsrd r7
76 isync
77 sync
78 blr
79
diff --git a/arch/ppc64/kernel/init_task.c b/arch/ppc64/kernel/init_task.c
new file mode 100644
index 000000000000..941043ae040f
--- /dev/null
+++ b/arch/ppc64/kernel/init_task.c
@@ -0,0 +1,36 @@
1#include <linux/mm.h>
2#include <linux/module.h>
3#include <linux/sched.h>
4#include <linux/init.h>
5#include <linux/init_task.h>
6#include <linux/fs.h>
7#include <linux/mqueue.h>
8#include <asm/uaccess.h>
9
10static struct fs_struct init_fs = INIT_FS;
11static struct files_struct init_files = INIT_FILES;
12static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
13static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
14struct mm_struct init_mm = INIT_MM(init_mm);
15
16EXPORT_SYMBOL(init_mm);
17
18/*
19 * Initial thread structure.
20 *
21 * We need to make sure that this is 16384-byte aligned due to the
22 * way process stacks are handled. This is done by having a special
23 * "init_task" linker map entry..
24 */
25union thread_union init_thread_union
26 __attribute__((__section__(".data.init_task"))) =
27 { INIT_THREAD_INFO(init_task) };
28
29/*
30 * Initial task structure.
31 *
32 * All other task structs will be allocated on slabs in fork.c
33 */
34struct task_struct init_task = INIT_TASK(init_task);
35
36EXPORT_SYMBOL(init_task);
diff --git a/arch/ppc64/kernel/ioctl32.c b/arch/ppc64/kernel/ioctl32.c
new file mode 100644
index 000000000000..a8005db23ec5
--- /dev/null
+++ b/arch/ppc64/kernel/ioctl32.c
@@ -0,0 +1,51 @@
1/*
2 * ioctl32.c: Conversion between 32bit and 64bit native ioctls.
3 *
4 * Based on sparc64 ioctl32.c by:
5 *
6 * Copyright (C) 1997-2000 Jakub Jelinek (jakub@redhat.com)
7 * Copyright (C) 1998 Eddie C. Dost (ecd@skynet.be)
8 *
9 * ppc64 changes:
10 *
11 * Copyright (C) 2000 Ken Aaker (kdaaker@rchland.vnet.ibm.com)
12 * Copyright (C) 2001 Anton Blanchard (antonb@au.ibm.com)
13 *
14 * These routines maintain argument size conversion between 32bit and 64bit
15 * ioctls.
16 *
17 * This program is free software; you can redistribute it and/or
18 * modify it under the terms of the GNU General Public License
19 * as published by the Free Software Foundation; either version
20 * 2 of the License, or (at your option) any later version.
21 */
22
23#define INCLUDES
24#include "compat_ioctl.c"
25#include <linux/syscalls.h>
26
27#define CODE
28#include "compat_ioctl.c"
29
30#define HANDLE_IOCTL(cmd,handler) { cmd, (ioctl_trans_handler_t)handler, NULL },
31#define COMPATIBLE_IOCTL(cmd) HANDLE_IOCTL(cmd,sys_ioctl)
32
33#define IOCTL_TABLE_START \
34 struct ioctl_trans ioctl_start[] = {
35#define IOCTL_TABLE_END \
36 };
37
38IOCTL_TABLE_START
39#include <linux/compat_ioctl.h>
40#define DECLARES
41#include "compat_ioctl.c"
42COMPATIBLE_IOCTL(TIOCSTART)
43COMPATIBLE_IOCTL(TIOCSTOP)
44COMPATIBLE_IOCTL(TIOCSLTC)
45/* Little p (/dev/rtc, /dev/envctrl, etc.) */
46COMPATIBLE_IOCTL(_IOR('p', 20, int[7])) /* RTCGET */
47COMPATIBLE_IOCTL(_IOW('p', 21, int[7])) /* RTCSET */
48
49IOCTL_TABLE_END
50
51int ioctl_table_size = ARRAY_SIZE(ioctl_start);
diff --git a/arch/ppc64/kernel/iomap.c b/arch/ppc64/kernel/iomap.c
new file mode 100644
index 000000000000..153cc8b0f136
--- /dev/null
+++ b/arch/ppc64/kernel/iomap.c
@@ -0,0 +1,126 @@
1/*
2 * arch/ppc64/kernel/iomap.c
3 *
4 * ppc64 "iomap" interface implementation.
5 *
6 * (C) Copyright 2004 Linus Torvalds
7 */
8#include <linux/init.h>
9#include <linux/pci.h>
10#include <linux/mm.h>
11#include <asm/io.h>
12
13/*
14 * Here comes the ppc64 implementation of the IOMAP
15 * interfaces.
16 */
17unsigned int fastcall ioread8(void __iomem *addr)
18{
19 return readb(addr);
20}
21unsigned int fastcall ioread16(void __iomem *addr)
22{
23 return readw(addr);
24}
25unsigned int fastcall ioread32(void __iomem *addr)
26{
27 return readl(addr);
28}
29EXPORT_SYMBOL(ioread8);
30EXPORT_SYMBOL(ioread16);
31EXPORT_SYMBOL(ioread32);
32
33void fastcall iowrite8(u8 val, void __iomem *addr)
34{
35 writeb(val, addr);
36}
37void fastcall iowrite16(u16 val, void __iomem *addr)
38{
39 writew(val, addr);
40}
41void fastcall iowrite32(u32 val, void __iomem *addr)
42{
43 writel(val, addr);
44}
45EXPORT_SYMBOL(iowrite8);
46EXPORT_SYMBOL(iowrite16);
47EXPORT_SYMBOL(iowrite32);
48
49/*
50 * These are the "repeat read/write" functions. Note the
51 * non-CPU byte order. We do things in "IO byteorder"
52 * here.
53 *
54 * FIXME! We could make these do EEH handling if we really
55 * wanted. Not clear if we do.
56 */
57void ioread8_rep(void __iomem *addr, void *dst, unsigned long count)
58{
59 _insb((u8 __force *) addr, dst, count);
60}
61void ioread16_rep(void __iomem *addr, void *dst, unsigned long count)
62{
63 _insw_ns((u16 __force *) addr, dst, count);
64}
65void ioread32_rep(void __iomem *addr, void *dst, unsigned long count)
66{
67 _insl_ns((u32 __force *) addr, dst, count);
68}
69EXPORT_SYMBOL(ioread8_rep);
70EXPORT_SYMBOL(ioread16_rep);
71EXPORT_SYMBOL(ioread32_rep);
72
73void iowrite8_rep(void __iomem *addr, const void *src, unsigned long count)
74{
75 _outsb((u8 __force *) addr, src, count);
76}
77void iowrite16_rep(void __iomem *addr, const void *src, unsigned long count)
78{
79 _outsw_ns((u16 __force *) addr, src, count);
80}
81void iowrite32_rep(void __iomem *addr, const void *src, unsigned long count)
82{
83 _outsl_ns((u32 __force *) addr, src, count);
84}
85EXPORT_SYMBOL(iowrite8_rep);
86EXPORT_SYMBOL(iowrite16_rep);
87EXPORT_SYMBOL(iowrite32_rep);
88
89void __iomem *ioport_map(unsigned long port, unsigned int len)
90{
91 if (!_IO_IS_VALID(port))
92 return NULL;
93 return (void __iomem *) (port+pci_io_base);
94}
95
96void ioport_unmap(void __iomem *addr)
97{
98 /* Nothing to do */
99}
100EXPORT_SYMBOL(ioport_map);
101EXPORT_SYMBOL(ioport_unmap);
102
103void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max)
104{
105 unsigned long start = pci_resource_start(dev, bar);
106 unsigned long len = pci_resource_len(dev, bar);
107 unsigned long flags = pci_resource_flags(dev, bar);
108
109 if (!len)
110 return NULL;
111 if (max && len > max)
112 len = max;
113 if (flags & IORESOURCE_IO)
114 return ioport_map(start, len);
115 if (flags & IORESOURCE_MEM)
116 return ioremap(start, len);
117 /* What? */
118 return NULL;
119}
120
121void pci_iounmap(struct pci_dev *dev, void __iomem *addr)
122{
123 /* Nothing to do */
124}
125EXPORT_SYMBOL(pci_iomap);
126EXPORT_SYMBOL(pci_iounmap);
diff --git a/arch/ppc64/kernel/iommu.c b/arch/ppc64/kernel/iommu.c
new file mode 100644
index 000000000000..344164681d2c
--- /dev/null
+++ b/arch/ppc64/kernel/iommu.c
@@ -0,0 +1,567 @@
1/*
2 * arch/ppc64/kernel/iommu.c
3 * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation
4 *
5 * Rewrite, cleanup, new allocation schemes, virtual merging:
6 * Copyright (C) 2004 Olof Johansson, IBM Corporation
7 * and Ben. Herrenschmidt, IBM Corporation
8 *
9 * Dynamic DMA mapping support, bus-independent parts.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 */
25
26
27#include <linux/config.h>
28#include <linux/init.h>
29#include <linux/types.h>
30#include <linux/slab.h>
31#include <linux/mm.h>
32#include <linux/spinlock.h>
33#include <linux/string.h>
34#include <linux/dma-mapping.h>
35#include <linux/init.h>
36#include <linux/bitops.h>
37#include <asm/io.h>
38#include <asm/prom.h>
39#include <asm/iommu.h>
40#include <asm/pci-bridge.h>
41#include <asm/machdep.h>
42
43#define DBG(...)
44
45#ifdef CONFIG_IOMMU_VMERGE
46static int novmerge = 0;
47#else
48static int novmerge = 1;
49#endif
50
51static int __init setup_iommu(char *str)
52{
53 if (!strcmp(str, "novmerge"))
54 novmerge = 1;
55 else if (!strcmp(str, "vmerge"))
56 novmerge = 0;
57 return 1;
58}
59
60__setup("iommu=", setup_iommu);
61
62static unsigned long iommu_range_alloc(struct iommu_table *tbl,
63 unsigned long npages,
64 unsigned long *handle,
65 unsigned int align_order)
66{
67 unsigned long n, end, i, start;
68 unsigned long limit;
69 int largealloc = npages > 15;
70 int pass = 0;
71 unsigned long align_mask;
72
73 align_mask = 0xffffffffffffffffl >> (64 - align_order);
74
75 /* This allocator was derived from x86_64's bit string search */
76
77 /* Sanity check */
78 if (unlikely(npages) == 0) {
79 if (printk_ratelimit())
80 WARN_ON(1);
81 return DMA_ERROR_CODE;
82 }
83
84 if (handle && *handle)
85 start = *handle;
86 else
87 start = largealloc ? tbl->it_largehint : tbl->it_hint;
88
89 /* Use only half of the table for small allocs (15 pages or less) */
90 limit = largealloc ? tbl->it_size : tbl->it_halfpoint;
91
92 if (largealloc && start < tbl->it_halfpoint)
93 start = tbl->it_halfpoint;
94
95 /* The case below can happen if we have a small segment appended
96 * to a large, or when the previous alloc was at the very end of
97 * the available space. If so, go back to the initial start.
98 */
99 if (start >= limit)
100 start = largealloc ? tbl->it_largehint : tbl->it_hint;
101
102 again:
103
104 n = find_next_zero_bit(tbl->it_map, limit, start);
105
106 /* Align allocation */
107 n = (n + align_mask) & ~align_mask;
108
109 end = n + npages;
110
111 if (unlikely(end >= limit)) {
112 if (likely(pass < 2)) {
113 /* First failure, just rescan the half of the table.
114 * Second failure, rescan the other half of the table.
115 */
116 start = (largealloc ^ pass) ? tbl->it_halfpoint : 0;
117 limit = pass ? tbl->it_size : limit;
118 pass++;
119 goto again;
120 } else {
121 /* Third failure, give up */
122 return DMA_ERROR_CODE;
123 }
124 }
125
126 for (i = n; i < end; i++)
127 if (test_bit(i, tbl->it_map)) {
128 start = i+1;
129 goto again;
130 }
131
132 for (i = n; i < end; i++)
133 __set_bit(i, tbl->it_map);
134
135 /* Bump the hint to a new block for small allocs. */
136 if (largealloc) {
137 /* Don't bump to new block to avoid fragmentation */
138 tbl->it_largehint = end;
139 } else {
140 /* Overflow will be taken care of at the next allocation */
141 tbl->it_hint = (end + tbl->it_blocksize - 1) &
142 ~(tbl->it_blocksize - 1);
143 }
144
145 /* Update handle for SG allocations */
146 if (handle)
147 *handle = end;
148
149 return n;
150}
151
152static dma_addr_t iommu_alloc(struct iommu_table *tbl, void *page,
153 unsigned int npages, enum dma_data_direction direction,
154 unsigned int align_order)
155{
156 unsigned long entry, flags;
157 dma_addr_t ret = DMA_ERROR_CODE;
158
159 spin_lock_irqsave(&(tbl->it_lock), flags);
160
161 entry = iommu_range_alloc(tbl, npages, NULL, align_order);
162
163 if (unlikely(entry == DMA_ERROR_CODE)) {
164 spin_unlock_irqrestore(&(tbl->it_lock), flags);
165 return DMA_ERROR_CODE;
166 }
167
168 entry += tbl->it_offset; /* Offset into real TCE table */
169 ret = entry << PAGE_SHIFT; /* Set the return dma address */
170
171 /* Put the TCEs in the HW table */
172 ppc_md.tce_build(tbl, entry, npages, (unsigned long)page & PAGE_MASK,
173 direction);
174
175
176 /* Flush/invalidate TLB caches if necessary */
177 if (ppc_md.tce_flush)
178 ppc_md.tce_flush(tbl);
179
180 spin_unlock_irqrestore(&(tbl->it_lock), flags);
181
182 /* Make sure updates are seen by hardware */
183 mb();
184
185 return ret;
186}
187
188static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
189 unsigned int npages)
190{
191 unsigned long entry, free_entry;
192 unsigned long i;
193
194 entry = dma_addr >> PAGE_SHIFT;
195 free_entry = entry - tbl->it_offset;
196
197 if (((free_entry + npages) > tbl->it_size) ||
198 (entry < tbl->it_offset)) {
199 if (printk_ratelimit()) {
200 printk(KERN_INFO "iommu_free: invalid entry\n");
201 printk(KERN_INFO "\tentry = 0x%lx\n", entry);
202 printk(KERN_INFO "\tdma_addr = 0x%lx\n", (u64)dma_addr);
203 printk(KERN_INFO "\tTable = 0x%lx\n", (u64)tbl);
204 printk(KERN_INFO "\tbus# = 0x%lx\n", (u64)tbl->it_busno);
205 printk(KERN_INFO "\tsize = 0x%lx\n", (u64)tbl->it_size);
206 printk(KERN_INFO "\tstartOff = 0x%lx\n", (u64)tbl->it_offset);
207 printk(KERN_INFO "\tindex = 0x%lx\n", (u64)tbl->it_index);
208 WARN_ON(1);
209 }
210 return;
211 }
212
213 ppc_md.tce_free(tbl, entry, npages);
214
215 for (i = 0; i < npages; i++)
216 __clear_bit(free_entry+i, tbl->it_map);
217}
218
219static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
220 unsigned int npages)
221{
222 unsigned long flags;
223
224 spin_lock_irqsave(&(tbl->it_lock), flags);
225
226 __iommu_free(tbl, dma_addr, npages);
227
228 /* Make sure TLB cache is flushed if the HW needs it. We do
229 * not do an mb() here on purpose, it is not needed on any of
230 * the current platforms.
231 */
232 if (ppc_md.tce_flush)
233 ppc_md.tce_flush(tbl);
234
235 spin_unlock_irqrestore(&(tbl->it_lock), flags);
236}
237
238int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
239 struct scatterlist *sglist, int nelems,
240 enum dma_data_direction direction)
241{
242 dma_addr_t dma_next = 0, dma_addr;
243 unsigned long flags;
244 struct scatterlist *s, *outs, *segstart;
245 int outcount;
246 unsigned long handle;
247
248 BUG_ON(direction == DMA_NONE);
249
250 if ((nelems == 0) || !tbl)
251 return 0;
252
253 outs = s = segstart = &sglist[0];
254 outcount = 1;
255 handle = 0;
256
257 /* Init first segment length for backout at failure */
258 outs->dma_length = 0;
259
260 DBG("mapping %d elements:\n", nelems);
261
262 spin_lock_irqsave(&(tbl->it_lock), flags);
263
264 for (s = outs; nelems; nelems--, s++) {
265 unsigned long vaddr, npages, entry, slen;
266
267 slen = s->length;
268 /* Sanity check */
269 if (slen == 0) {
270 dma_next = 0;
271 continue;
272 }
273 /* Allocate iommu entries for that segment */
274 vaddr = (unsigned long)page_address(s->page) + s->offset;
275 npages = PAGE_ALIGN(vaddr + slen) - (vaddr & PAGE_MASK);
276 npages >>= PAGE_SHIFT;
277 entry = iommu_range_alloc(tbl, npages, &handle, 0);
278
279 DBG(" - vaddr: %lx, size: %lx\n", vaddr, slen);
280
281 /* Handle failure */
282 if (unlikely(entry == DMA_ERROR_CODE)) {
283 if (printk_ratelimit())
284 printk(KERN_INFO "iommu_alloc failed, tbl %p vaddr %lx"
285 " npages %lx\n", tbl, vaddr, npages);
286 goto failure;
287 }
288
289 /* Convert entry to a dma_addr_t */
290 entry += tbl->it_offset;
291 dma_addr = entry << PAGE_SHIFT;
292 dma_addr |= s->offset;
293
294 DBG(" - %lx pages, entry: %lx, dma_addr: %lx\n",
295 npages, entry, dma_addr);
296
297 /* Insert into HW table */
298 ppc_md.tce_build(tbl, entry, npages, vaddr & PAGE_MASK, direction);
299
300 /* If we are in an open segment, try merging */
301 if (segstart != s) {
302 DBG(" - trying merge...\n");
303 /* We cannot merge if:
304 * - allocated dma_addr isn't contiguous to previous allocation
305 */
306 if (novmerge || (dma_addr != dma_next)) {
307 /* Can't merge: create a new segment */
308 segstart = s;
309 outcount++; outs++;
310 DBG(" can't merge, new segment.\n");
311 } else {
312 outs->dma_length += s->length;
313 DBG(" merged, new len: %lx\n", outs->dma_length);
314 }
315 }
316
317 if (segstart == s) {
318 /* This is a new segment, fill entries */
319 DBG(" - filling new segment.\n");
320 outs->dma_address = dma_addr;
321 outs->dma_length = slen;
322 }
323
324 /* Calculate next page pointer for contiguous check */
325 dma_next = dma_addr + slen;
326
327 DBG(" - dma next is: %lx\n", dma_next);
328 }
329
330 /* Flush/invalidate TLB caches if necessary */
331 if (ppc_md.tce_flush)
332 ppc_md.tce_flush(tbl);
333
334 spin_unlock_irqrestore(&(tbl->it_lock), flags);
335
336 /* Make sure updates are seen by hardware */
337 mb();
338
339 DBG("mapped %d elements:\n", outcount);
340
341 /* For the sake of iommu_free_sg, we clear out the length in the
342 * next entry of the sglist if we didn't fill the list completely
343 */
344 if (outcount < nelems) {
345 outs++;
346 outs->dma_address = DMA_ERROR_CODE;
347 outs->dma_length = 0;
348 }
349 return outcount;
350
351 failure:
352 for (s = &sglist[0]; s <= outs; s++) {
353 if (s->dma_length != 0) {
354 unsigned long vaddr, npages;
355
356 vaddr = s->dma_address & PAGE_MASK;
357 npages = (PAGE_ALIGN(s->dma_address + s->dma_length) - vaddr)
358 >> PAGE_SHIFT;
359 __iommu_free(tbl, vaddr, npages);
360 }
361 }
362 spin_unlock_irqrestore(&(tbl->it_lock), flags);
363 return 0;
364}
365
366
367void iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist,
368 int nelems, enum dma_data_direction direction)
369{
370 unsigned long flags;
371
372 BUG_ON(direction == DMA_NONE);
373
374 if (!tbl)
375 return;
376
377 spin_lock_irqsave(&(tbl->it_lock), flags);
378
379 while (nelems--) {
380 unsigned int npages;
381 dma_addr_t dma_handle = sglist->dma_address;
382
383 if (sglist->dma_length == 0)
384 break;
385 npages = (PAGE_ALIGN(dma_handle + sglist->dma_length)
386 - (dma_handle & PAGE_MASK)) >> PAGE_SHIFT;
387 __iommu_free(tbl, dma_handle, npages);
388 sglist++;
389 }
390
391 /* Flush/invalidate TLBs if necessary. As for iommu_free(), we
392 * do not do an mb() here, the affected platforms do not need it
393 * when freeing.
394 */
395 if (ppc_md.tce_flush)
396 ppc_md.tce_flush(tbl);
397
398 spin_unlock_irqrestore(&(tbl->it_lock), flags);
399}
400
401/*
402 * Build a iommu_table structure. This contains a bit map which
403 * is used to manage allocation of the tce space.
404 */
405struct iommu_table *iommu_init_table(struct iommu_table *tbl)
406{
407 unsigned long sz;
408 static int welcomed = 0;
409
410 /* Set aside 1/4 of the table for large allocations. */
411 tbl->it_halfpoint = tbl->it_size * 3 / 4;
412
413 /* number of bytes needed for the bitmap */
414 sz = (tbl->it_size + 7) >> 3;
415
416 tbl->it_map = (unsigned long *)__get_free_pages(GFP_ATOMIC, get_order(sz));
417 if (!tbl->it_map)
418 panic("iommu_init_table: Can't allocate %ld bytes\n", sz);
419
420 memset(tbl->it_map, 0, sz);
421
422 tbl->it_hint = 0;
423 tbl->it_largehint = tbl->it_halfpoint;
424 spin_lock_init(&tbl->it_lock);
425
426 if (!welcomed) {
427 printk(KERN_INFO "IOMMU table initialized, virtual merging %s\n",
428 novmerge ? "disabled" : "enabled");
429 welcomed = 1;
430 }
431
432 return tbl;
433}
434
435void iommu_free_table(struct device_node *dn)
436{
437 struct iommu_table *tbl = dn->iommu_table;
438 unsigned long bitmap_sz, i;
439 unsigned int order;
440
441 if (!tbl || !tbl->it_map) {
442 printk(KERN_ERR "%s: expected TCE map for %s\n", __FUNCTION__,
443 dn->full_name);
444 return;
445 }
446
447 /* verify that table contains no entries */
448 /* it_size is in entries, and we're examining 64 at a time */
449 for (i = 0; i < (tbl->it_size/64); i++) {
450 if (tbl->it_map[i] != 0) {
451 printk(KERN_WARNING "%s: Unexpected TCEs for %s\n",
452 __FUNCTION__, dn->full_name);
453 break;
454 }
455 }
456
457 /* calculate bitmap size in bytes */
458 bitmap_sz = (tbl->it_size + 7) / 8;
459
460 /* free bitmap */
461 order = get_order(bitmap_sz);
462 free_pages((unsigned long) tbl->it_map, order);
463
464 /* free table */
465 kfree(tbl);
466}
467
468/* Creates TCEs for a user provided buffer. The user buffer must be
469 * contiguous real kernel storage (not vmalloc). The address of the buffer
470 * passed here is the kernel (virtual) address of the buffer. The buffer
471 * need not be page aligned, the dma_addr_t returned will point to the same
472 * byte within the page as vaddr.
473 */
474dma_addr_t iommu_map_single(struct iommu_table *tbl, void *vaddr,
475 size_t size, enum dma_data_direction direction)
476{
477 dma_addr_t dma_handle = DMA_ERROR_CODE;
478 unsigned long uaddr;
479 unsigned int npages;
480
481 BUG_ON(direction == DMA_NONE);
482
483 uaddr = (unsigned long)vaddr;
484 npages = PAGE_ALIGN(uaddr + size) - (uaddr & PAGE_MASK);
485 npages >>= PAGE_SHIFT;
486
487 if (tbl) {
488 dma_handle = iommu_alloc(tbl, vaddr, npages, direction, 0);
489 if (dma_handle == DMA_ERROR_CODE) {
490 if (printk_ratelimit()) {
491 printk(KERN_INFO "iommu_alloc failed, "
492 "tbl %p vaddr %p npages %d\n",
493 tbl, vaddr, npages);
494 }
495 } else
496 dma_handle |= (uaddr & ~PAGE_MASK);
497 }
498
499 return dma_handle;
500}
501
502void iommu_unmap_single(struct iommu_table *tbl, dma_addr_t dma_handle,
503 size_t size, enum dma_data_direction direction)
504{
505 BUG_ON(direction == DMA_NONE);
506
507 if (tbl)
508 iommu_free(tbl, dma_handle, (PAGE_ALIGN(dma_handle + size) -
509 (dma_handle & PAGE_MASK)) >> PAGE_SHIFT);
510}
511
512/* Allocates a contiguous real buffer and creates mappings over it.
513 * Returns the virtual address of the buffer and sets dma_handle
514 * to the dma address (mapping) of the first page.
515 */
516void *iommu_alloc_coherent(struct iommu_table *tbl, size_t size,
517 dma_addr_t *dma_handle, unsigned int __nocast flag)
518{
519 void *ret = NULL;
520 dma_addr_t mapping;
521 unsigned int npages, order;
522
523 size = PAGE_ALIGN(size);
524 npages = size >> PAGE_SHIFT;
525 order = get_order(size);
526
527 /*
528 * Client asked for way too much space. This is checked later
529 * anyway. It is easier to debug here for the drivers than in
530 * the tce tables.
531 */
532 if (order >= IOMAP_MAX_ORDER) {
533 printk("iommu_alloc_consistent size too large: 0x%lx\n", size);
534 return NULL;
535 }
536
537 if (!tbl)
538 return NULL;
539
540 /* Alloc enough pages (and possibly more) */
541 ret = (void *)__get_free_pages(flag, order);
542 if (!ret)
543 return NULL;
544 memset(ret, 0, size);
545
546 /* Set up tces to cover the allocated range */
547 mapping = iommu_alloc(tbl, ret, npages, DMA_BIDIRECTIONAL, order);
548 if (mapping == DMA_ERROR_CODE) {
549 free_pages((unsigned long)ret, order);
550 ret = NULL;
551 } else
552 *dma_handle = mapping;
553 return ret;
554}
555
556void iommu_free_coherent(struct iommu_table *tbl, size_t size,
557 void *vaddr, dma_addr_t dma_handle)
558{
559 unsigned int npages;
560
561 if (tbl) {
562 size = PAGE_ALIGN(size);
563 npages = size >> PAGE_SHIFT;
564 iommu_free(tbl, dma_handle, npages);
565 free_pages((unsigned long)vaddr, get_order(size));
566 }
567}
diff --git a/arch/ppc64/kernel/irq.c b/arch/ppc64/kernel/irq.c
new file mode 100644
index 000000000000..4fd7f203c1e3
--- /dev/null
+++ b/arch/ppc64/kernel/irq.c
@@ -0,0 +1,519 @@
1/*
2 * arch/ppc/kernel/irq.c
3 *
4 * Derived from arch/i386/kernel/irq.c
5 * Copyright (C) 1992 Linus Torvalds
6 * Adapted from arch/i386 by Gary Thomas
7 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
8 * Updated and modified by Cort Dougan (cort@cs.nmt.edu)
9 * Copyright (C) 1996 Cort Dougan
10 * Adapted for Power Macintosh by Paul Mackerras
11 * Copyright (C) 1996 Paul Mackerras (paulus@cs.anu.edu.au)
12 * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
13 *
14 * This program is free software; you can redistribute it and/or
15 * modify it under the terms of the GNU General Public License
16 * as published by the Free Software Foundation; either version
17 * 2 of the License, or (at your option) any later version.
18 *
19 * This file contains the code used by various IRQ handling routines:
20 * asking for different IRQ's should be done through these routines
21 * instead of just grabbing them. Thus setups with different IRQ numbers
22 * shouldn't result in any weird surprises, and installing new handlers
23 * should be easier.
24 */
25
26#include <linux/errno.h>
27#include <linux/module.h>
28#include <linux/threads.h>
29#include <linux/kernel_stat.h>
30#include <linux/signal.h>
31#include <linux/sched.h>
32#include <linux/ioport.h>
33#include <linux/interrupt.h>
34#include <linux/timex.h>
35#include <linux/config.h>
36#include <linux/init.h>
37#include <linux/slab.h>
38#include <linux/pci.h>
39#include <linux/delay.h>
40#include <linux/irq.h>
41#include <linux/proc_fs.h>
42#include <linux/random.h>
43#include <linux/kallsyms.h>
44#include <linux/profile.h>
45#include <linux/bitops.h>
46
47#include <asm/uaccess.h>
48#include <asm/system.h>
49#include <asm/io.h>
50#include <asm/pgtable.h>
51#include <asm/irq.h>
52#include <asm/cache.h>
53#include <asm/prom.h>
54#include <asm/ptrace.h>
55#include <asm/iSeries/LparData.h>
56#include <asm/machdep.h>
57#include <asm/paca.h>
58
59#ifdef CONFIG_SMP
60extern void iSeries_smp_message_recv( struct pt_regs * );
61#endif
62
63extern irq_desc_t irq_desc[NR_IRQS];
64EXPORT_SYMBOL(irq_desc);
65
66int distribute_irqs = 1;
67int __irq_offset_value;
68int ppc_spurious_interrupts;
69unsigned long lpevent_count;
70u64 ppc64_interrupt_controller;
71
72int show_interrupts(struct seq_file *p, void *v)
73{
74 int i = *(loff_t *) v, j;
75 struct irqaction * action;
76 irq_desc_t *desc;
77 unsigned long flags;
78
79 if (i == 0) {
80 seq_printf(p, " ");
81 for (j=0; j<NR_CPUS; j++) {
82 if (cpu_online(j))
83 seq_printf(p, "CPU%d ",j);
84 }
85 seq_putc(p, '\n');
86 }
87
88 if (i < NR_IRQS) {
89 desc = get_irq_desc(i);
90 spin_lock_irqsave(&desc->lock, flags);
91 action = desc->action;
92 if (!action || !action->handler)
93 goto skip;
94 seq_printf(p, "%3d: ", i);
95#ifdef CONFIG_SMP
96 for (j = 0; j < NR_CPUS; j++) {
97 if (cpu_online(j))
98 seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
99 }
100#else
101 seq_printf(p, "%10u ", kstat_irqs(i));
102#endif /* CONFIG_SMP */
103 if (desc->handler)
104 seq_printf(p, " %s ", desc->handler->typename );
105 else
106 seq_printf(p, " None ");
107 seq_printf(p, "%s", (desc->status & IRQ_LEVEL) ? "Level " : "Edge ");
108 seq_printf(p, " %s",action->name);
109 for (action=action->next; action; action = action->next)
110 seq_printf(p, ", %s", action->name);
111 seq_putc(p, '\n');
112skip:
113 spin_unlock_irqrestore(&desc->lock, flags);
114 } else if (i == NR_IRQS)
115 seq_printf(p, "BAD: %10u\n", ppc_spurious_interrupts);
116 return 0;
117}
118
119#ifdef CONFIG_HOTPLUG_CPU
120void fixup_irqs(cpumask_t map)
121{
122 unsigned int irq;
123 static int warned;
124
125 for_each_irq(irq) {
126 cpumask_t mask;
127
128 if (irq_desc[irq].status & IRQ_PER_CPU)
129 continue;
130
131 cpus_and(mask, irq_affinity[irq], map);
132 if (any_online_cpu(mask) == NR_CPUS) {
133 printk("Breaking affinity for irq %i\n", irq);
134 mask = map;
135 }
136 if (irq_desc[irq].handler->set_affinity)
137 irq_desc[irq].handler->set_affinity(irq, mask);
138 else if (irq_desc[irq].action && !(warned++))
139 printk("Cannot set affinity for irq %i\n", irq);
140 }
141
142 local_irq_enable();
143 mdelay(1);
144 local_irq_disable();
145}
146#endif
147
148extern int noirqdebug;
149
150/*
151 * Eventually, this should take an array of interrupts and an array size
152 * so it can dispatch multiple interrupts.
153 */
154void ppc_irq_dispatch_handler(struct pt_regs *regs, int irq)
155{
156 int status;
157 struct irqaction *action;
158 int cpu = smp_processor_id();
159 irq_desc_t *desc = get_irq_desc(irq);
160 irqreturn_t action_ret;
161#ifdef CONFIG_IRQSTACKS
162 struct thread_info *curtp, *irqtp;
163#endif
164
165 kstat_cpu(cpu).irqs[irq]++;
166
167 if (desc->status & IRQ_PER_CPU) {
168 /* no locking required for CPU-local interrupts: */
169 ack_irq(irq);
170 action_ret = handle_IRQ_event(irq, regs, desc->action);
171 desc->handler->end(irq);
172 return;
173 }
174
175 spin_lock(&desc->lock);
176 ack_irq(irq);
177 /*
178 REPLAY is when Linux resends an IRQ that was dropped earlier
179 WAITING is used by probe to mark irqs that are being tested
180 */
181 status = desc->status & ~(IRQ_REPLAY | IRQ_WAITING);
182 status |= IRQ_PENDING; /* we _want_ to handle it */
183
184 /*
185 * If the IRQ is disabled for whatever reason, we cannot
186 * use the action we have.
187 */
188 action = NULL;
189 if (likely(!(status & (IRQ_DISABLED | IRQ_INPROGRESS)))) {
190 action = desc->action;
191 if (!action || !action->handler) {
192 ppc_spurious_interrupts++;
193 printk(KERN_DEBUG "Unhandled interrupt %x, disabled\n", irq);
194 /* We can't call disable_irq here, it would deadlock */
195 if (!desc->depth)
196 desc->depth = 1;
197 desc->status |= IRQ_DISABLED;
198 /* This is not a real spurrious interrupt, we
199 * have to eoi it, so we jump to out
200 */
201 mask_irq(irq);
202 goto out;
203 }
204 status &= ~IRQ_PENDING; /* we commit to handling */
205 status |= IRQ_INPROGRESS; /* we are handling it */
206 }
207 desc->status = status;
208
209 /*
210 * If there is no IRQ handler or it was disabled, exit early.
211 Since we set PENDING, if another processor is handling
212 a different instance of this same irq, the other processor
213 will take care of it.
214 */
215 if (unlikely(!action))
216 goto out;
217
218 /*
219 * Edge triggered interrupts need to remember
220 * pending events.
221 * This applies to any hw interrupts that allow a second
222 * instance of the same irq to arrive while we are in do_IRQ
223 * or in the handler. But the code here only handles the _second_
224 * instance of the irq, not the third or fourth. So it is mostly
225 * useful for irq hardware that does not mask cleanly in an
226 * SMP environment.
227 */
228 for (;;) {
229 spin_unlock(&desc->lock);
230
231#ifdef CONFIG_IRQSTACKS
232 /* Switch to the irq stack to handle this */
233 curtp = current_thread_info();
234 irqtp = hardirq_ctx[smp_processor_id()];
235 if (curtp != irqtp) {
236 irqtp->task = curtp->task;
237 irqtp->flags = 0;
238 action_ret = call_handle_IRQ_event(irq, regs, action, irqtp);
239 irqtp->task = NULL;
240 if (irqtp->flags)
241 set_bits(irqtp->flags, &curtp->flags);
242 } else
243#endif
244 action_ret = handle_IRQ_event(irq, regs, action);
245
246 spin_lock(&desc->lock);
247 if (!noirqdebug)
248 note_interrupt(irq, desc, action_ret);
249 if (likely(!(desc->status & IRQ_PENDING)))
250 break;
251 desc->status &= ~IRQ_PENDING;
252 }
253out:
254 desc->status &= ~IRQ_INPROGRESS;
255 /*
256 * The ->end() handler has to deal with interrupts which got
257 * disabled while the handler was running.
258 */
259 if (desc->handler) {
260 if (desc->handler->end)
261 desc->handler->end(irq);
262 else if (desc->handler->enable)
263 desc->handler->enable(irq);
264 }
265 spin_unlock(&desc->lock);
266}
267
268#ifdef CONFIG_PPC_ISERIES
269void do_IRQ(struct pt_regs *regs)
270{
271 struct paca_struct *lpaca;
272 struct ItLpQueue *lpq;
273
274 irq_enter();
275
276#ifdef CONFIG_DEBUG_STACKOVERFLOW
277 /* Debugging check for stack overflow: is there less than 2KB free? */
278 {
279 long sp;
280
281 sp = __get_SP() & (THREAD_SIZE-1);
282
283 if (unlikely(sp < (sizeof(struct thread_info) + 2048))) {
284 printk("do_IRQ: stack overflow: %ld\n",
285 sp - sizeof(struct thread_info));
286 dump_stack();
287 }
288 }
289#endif
290
291 lpaca = get_paca();
292#ifdef CONFIG_SMP
293 if (lpaca->lppaca.int_dword.fields.ipi_cnt) {
294 lpaca->lppaca.int_dword.fields.ipi_cnt = 0;
295 iSeries_smp_message_recv(regs);
296 }
297#endif /* CONFIG_SMP */
298 lpq = lpaca->lpqueue_ptr;
299 if (lpq && ItLpQueue_isLpIntPending(lpq))
300 lpevent_count += ItLpQueue_process(lpq, regs);
301
302 irq_exit();
303
304 if (lpaca->lppaca.int_dword.fields.decr_int) {
305 lpaca->lppaca.int_dword.fields.decr_int = 0;
306 /* Signal a fake decrementer interrupt */
307 timer_interrupt(regs);
308 }
309}
310
311#else /* CONFIG_PPC_ISERIES */
312
313void do_IRQ(struct pt_regs *regs)
314{
315 int irq;
316
317 irq_enter();
318
319#ifdef CONFIG_DEBUG_STACKOVERFLOW
320 /* Debugging check for stack overflow: is there less than 2KB free? */
321 {
322 long sp;
323
324 sp = __get_SP() & (THREAD_SIZE-1);
325
326 if (unlikely(sp < (sizeof(struct thread_info) + 2048))) {
327 printk("do_IRQ: stack overflow: %ld\n",
328 sp - sizeof(struct thread_info));
329 dump_stack();
330 }
331 }
332#endif
333
334 irq = ppc_md.get_irq(regs);
335
336 if (irq >= 0)
337 ppc_irq_dispatch_handler(regs, irq);
338 else
339 /* That's not SMP safe ... but who cares ? */
340 ppc_spurious_interrupts++;
341
342 irq_exit();
343}
344#endif /* CONFIG_PPC_ISERIES */
345
346void __init init_IRQ(void)
347{
348 static int once = 0;
349
350 if (once)
351 return;
352
353 once++;
354
355 ppc_md.init_IRQ();
356 irq_ctx_init();
357}
358
359#ifndef CONFIG_PPC_ISERIES
360/*
361 * Virtual IRQ mapping code, used on systems with XICS interrupt controllers.
362 */
363
364#define UNDEFINED_IRQ 0xffffffff
365unsigned int virt_irq_to_real_map[NR_IRQS];
366
367/*
368 * Don't use virtual irqs 0, 1, 2 for devices.
369 * The pcnet32 driver considers interrupt numbers < 2 to be invalid,
370 * and 2 is the XICS IPI interrupt.
371 * We limit virtual irqs to 17 less than NR_IRQS so that when we
372 * offset them by 16 (to reserve the first 16 for ISA interrupts)
373 * we don't end up with an interrupt number >= NR_IRQS.
374 */
375#define MIN_VIRT_IRQ 3
376#define MAX_VIRT_IRQ (NR_IRQS - NUM_ISA_INTERRUPTS - 1)
377#define NR_VIRT_IRQS (MAX_VIRT_IRQ - MIN_VIRT_IRQ + 1)
378
379void
380virt_irq_init(void)
381{
382 int i;
383 for (i = 0; i < NR_IRQS; i++)
384 virt_irq_to_real_map[i] = UNDEFINED_IRQ;
385}
386
387/* Create a mapping for a real_irq if it doesn't already exist.
388 * Return the virtual irq as a convenience.
389 */
390int virt_irq_create_mapping(unsigned int real_irq)
391{
392 unsigned int virq, first_virq;
393 static int warned;
394
395 if (ppc64_interrupt_controller == IC_OPEN_PIC)
396 return real_irq; /* no mapping for openpic (for now) */
397
398 /* don't map interrupts < MIN_VIRT_IRQ */
399 if (real_irq < MIN_VIRT_IRQ) {
400 virt_irq_to_real_map[real_irq] = real_irq;
401 return real_irq;
402 }
403
404 /* map to a number between MIN_VIRT_IRQ and MAX_VIRT_IRQ */
405 virq = real_irq;
406 if (virq > MAX_VIRT_IRQ)
407 virq = (virq % NR_VIRT_IRQS) + MIN_VIRT_IRQ;
408
409 /* search for this number or a free slot */
410 first_virq = virq;
411 while (virt_irq_to_real_map[virq] != UNDEFINED_IRQ) {
412 if (virt_irq_to_real_map[virq] == real_irq)
413 return virq;
414 if (++virq > MAX_VIRT_IRQ)
415 virq = MIN_VIRT_IRQ;
416 if (virq == first_virq)
417 goto nospace; /* oops, no free slots */
418 }
419
420 virt_irq_to_real_map[virq] = real_irq;
421 return virq;
422
423 nospace:
424 if (!warned) {
425 printk(KERN_CRIT "Interrupt table is full\n");
426 printk(KERN_CRIT "Increase NR_IRQS (currently %d) "
427 "in your kernel sources and rebuild.\n", NR_IRQS);
428 warned = 1;
429 }
430 return NO_IRQ;
431}
432
433/*
434 * In most cases will get a hit on the very first slot checked in the
435 * virt_irq_to_real_map. Only when there are a large number of
436 * IRQs will this be expensive.
437 */
438unsigned int real_irq_to_virt_slowpath(unsigned int real_irq)
439{
440 unsigned int virq;
441 unsigned int first_virq;
442
443 virq = real_irq;
444
445 if (virq > MAX_VIRT_IRQ)
446 virq = (virq % NR_VIRT_IRQS) + MIN_VIRT_IRQ;
447
448 first_virq = virq;
449
450 do {
451 if (virt_irq_to_real_map[virq] == real_irq)
452 return virq;
453
454 virq++;
455
456 if (virq >= MAX_VIRT_IRQ)
457 virq = 0;
458
459 } while (first_virq != virq);
460
461 return NO_IRQ;
462
463}
464
465#endif /* CONFIG_PPC_ISERIES */
466
467#ifdef CONFIG_IRQSTACKS
468struct thread_info *softirq_ctx[NR_CPUS];
469struct thread_info *hardirq_ctx[NR_CPUS];
470
471void irq_ctx_init(void)
472{
473 struct thread_info *tp;
474 int i;
475
476 for_each_cpu(i) {
477 memset((void *)softirq_ctx[i], 0, THREAD_SIZE);
478 tp = softirq_ctx[i];
479 tp->cpu = i;
480 tp->preempt_count = SOFTIRQ_OFFSET;
481
482 memset((void *)hardirq_ctx[i], 0, THREAD_SIZE);
483 tp = hardirq_ctx[i];
484 tp->cpu = i;
485 tp->preempt_count = HARDIRQ_OFFSET;
486 }
487}
488
489void do_softirq(void)
490{
491 unsigned long flags;
492 struct thread_info *curtp, *irqtp;
493
494 if (in_interrupt())
495 return;
496
497 local_irq_save(flags);
498
499 if (local_softirq_pending()) {
500 curtp = current_thread_info();
501 irqtp = softirq_ctx[smp_processor_id()];
502 irqtp->task = curtp->task;
503 call_do_softirq(irqtp);
504 irqtp->task = NULL;
505 }
506
507 local_irq_restore(flags);
508}
509EXPORT_SYMBOL(do_softirq);
510
511#endif /* CONFIG_IRQSTACKS */
512
513static int __init setup_noirqdistrib(char *str)
514{
515 distribute_irqs = 0;
516 return 1;
517}
518
519__setup("noirqdistrib", setup_noirqdistrib);
diff --git a/arch/ppc64/kernel/kprobes.c b/arch/ppc64/kernel/kprobes.c
new file mode 100644
index 000000000000..103daaf73573
--- /dev/null
+++ b/arch/ppc64/kernel/kprobes.c
@@ -0,0 +1,290 @@
1/*
2 * Kernel Probes (KProbes)
3 * arch/ppc64/kernel/kprobes.c
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 *
19 * Copyright (C) IBM Corporation, 2002, 2004
20 *
21 * 2002-Oct Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel
22 * Probes initial implementation ( includes contributions from
23 * Rusty Russell).
24 * 2004-July Suparna Bhattacharya <suparna@in.ibm.com> added jumper probes
25 * interface to access function arguments.
26 * 2004-Nov Ananth N Mavinakayanahalli <ananth@in.ibm.com> kprobes port
27 * for PPC64
28 */
29
30#include <linux/config.h>
31#include <linux/kprobes.h>
32#include <linux/ptrace.h>
33#include <linux/spinlock.h>
34#include <linux/preempt.h>
35#include <asm/kdebug.h>
36#include <asm/sstep.h>
37
38/* kprobe_status settings */
39#define KPROBE_HIT_ACTIVE 0x00000001
40#define KPROBE_HIT_SS 0x00000002
41
42static struct kprobe *current_kprobe;
43static unsigned long kprobe_status, kprobe_saved_msr;
44static struct pt_regs jprobe_saved_regs;
45
46int arch_prepare_kprobe(struct kprobe *p)
47{
48 kprobe_opcode_t insn = *p->addr;
49
50 if (IS_MTMSRD(insn) || IS_RFID(insn))
51 /* cannot put bp on RFID/MTMSRD */
52 return 1;
53 return 0;
54}
55
56void arch_copy_kprobe(struct kprobe *p)
57{
58 memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
59}
60
61void arch_remove_kprobe(struct kprobe *p)
62{
63}
64
65static inline void disarm_kprobe(struct kprobe *p, struct pt_regs *regs)
66{
67 *p->addr = p->opcode;
68 regs->nip = (unsigned long)p->addr;
69}
70
71static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
72{
73 regs->msr |= MSR_SE;
74 /*single step inline if it a breakpoint instruction*/
75 if (p->opcode == BREAKPOINT_INSTRUCTION)
76 regs->nip = (unsigned long)p->addr;
77 else
78 regs->nip = (unsigned long)&p->ainsn.insn;
79}
80
81static inline int kprobe_handler(struct pt_regs *regs)
82{
83 struct kprobe *p;
84 int ret = 0;
85 unsigned int *addr = (unsigned int *)regs->nip;
86
87 /* Check we're not actually recursing */
88 if (kprobe_running()) {
89 /* We *are* holding lock here, so this is safe.
90 Disarm the probe we just hit, and ignore it. */
91 p = get_kprobe(addr);
92 if (p) {
93 if (kprobe_status == KPROBE_HIT_SS) {
94 regs->msr &= ~MSR_SE;
95 regs->msr |= kprobe_saved_msr;
96 unlock_kprobes();
97 goto no_kprobe;
98 }
99 disarm_kprobe(p, regs);
100 ret = 1;
101 } else {
102 p = current_kprobe;
103 if (p->break_handler && p->break_handler(p, regs)) {
104 goto ss_probe;
105 }
106 }
107 /* If it's not ours, can't be delete race, (we hold lock). */
108 goto no_kprobe;
109 }
110
111 lock_kprobes();
112 p = get_kprobe(addr);
113 if (!p) {
114 unlock_kprobes();
115 if (*addr != BREAKPOINT_INSTRUCTION) {
116 /*
117 * PowerPC has multiple variants of the "trap"
118 * instruction. If the current instruction is a
119 * trap variant, it could belong to someone else
120 */
121 kprobe_opcode_t cur_insn = *addr;
122 if (IS_TW(cur_insn) || IS_TD(cur_insn) ||
123 IS_TWI(cur_insn) || IS_TDI(cur_insn))
124 goto no_kprobe;
125 /*
126 * The breakpoint instruction was removed right
127 * after we hit it. Another cpu has removed
128 * either a probepoint or a debugger breakpoint
129 * at this address. In either case, no further
130 * handling of this interrupt is appropriate.
131 */
132 ret = 1;
133 }
134 /* Not one of ours: let kernel handle it */
135 goto no_kprobe;
136 }
137
138 kprobe_status = KPROBE_HIT_ACTIVE;
139 current_kprobe = p;
140 kprobe_saved_msr = regs->msr;
141 if (p->pre_handler && p->pre_handler(p, regs))
142 /* handler has already set things up, so skip ss setup */
143 return 1;
144
145ss_probe:
146 prepare_singlestep(p, regs);
147 kprobe_status = KPROBE_HIT_SS;
148 /*
149 * This preempt_disable() matches the preempt_enable_no_resched()
150 * in post_kprobe_handler().
151 */
152 preempt_disable();
153 return 1;
154
155no_kprobe:
156 return ret;
157}
158
159/*
160 * Called after single-stepping. p->addr is the address of the
161 * instruction whose first byte has been replaced by the "breakpoint"
162 * instruction. To avoid the SMP problems that can occur when we
163 * temporarily put back the original opcode to single-step, we
164 * single-stepped a copy of the instruction. The address of this
165 * copy is p->ainsn.insn.
166 */
167static void resume_execution(struct kprobe *p, struct pt_regs *regs)
168{
169 int ret;
170
171 regs->nip = (unsigned long)p->addr;
172 ret = emulate_step(regs, p->ainsn.insn[0]);
173 if (ret == 0)
174 regs->nip = (unsigned long)p->addr + 4;
175
176 regs->msr &= ~MSR_SE;
177}
178
179static inline int post_kprobe_handler(struct pt_regs *regs)
180{
181 if (!kprobe_running())
182 return 0;
183
184 if (current_kprobe->post_handler)
185 current_kprobe->post_handler(current_kprobe, regs, 0);
186
187 resume_execution(current_kprobe, regs);
188 regs->msr |= kprobe_saved_msr;
189
190 unlock_kprobes();
191 preempt_enable_no_resched();
192
193 /*
194 * if somebody else is singlestepping across a probe point, msr
195 * will have SE set, in which case, continue the remaining processing
196 * of do_debug, as if this is not a probe hit.
197 */
198 if (regs->msr & MSR_SE)
199 return 0;
200
201 return 1;
202}
203
204/* Interrupts disabled, kprobe_lock held. */
205static inline int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
206{
207 if (current_kprobe->fault_handler
208 && current_kprobe->fault_handler(current_kprobe, regs, trapnr))
209 return 1;
210
211 if (kprobe_status & KPROBE_HIT_SS) {
212 resume_execution(current_kprobe, regs);
213 regs->msr |= kprobe_saved_msr;
214
215 unlock_kprobes();
216 preempt_enable_no_resched();
217 }
218 return 0;
219}
220
221/*
222 * Wrapper routine to for handling exceptions.
223 */
224int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val,
225 void *data)
226{
227 struct die_args *args = (struct die_args *)data;
228 int ret = NOTIFY_DONE;
229
230 /*
231 * Interrupts are not disabled here. We need to disable
232 * preemption, because kprobe_running() uses smp_processor_id().
233 */
234 preempt_disable();
235 switch (val) {
236 case DIE_IABR_MATCH:
237 case DIE_DABR_MATCH:
238 case DIE_BPT:
239 if (kprobe_handler(args->regs))
240 ret = NOTIFY_STOP;
241 break;
242 case DIE_SSTEP:
243 if (post_kprobe_handler(args->regs))
244 ret = NOTIFY_STOP;
245 break;
246 case DIE_GPF:
247 case DIE_PAGE_FAULT:
248 if (kprobe_running() &&
249 kprobe_fault_handler(args->regs, args->trapnr))
250 ret = NOTIFY_STOP;
251 break;
252 default:
253 break;
254 }
255 preempt_enable();
256 return ret;
257}
258
259int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
260{
261 struct jprobe *jp = container_of(p, struct jprobe, kp);
262
263 memcpy(&jprobe_saved_regs, regs, sizeof(struct pt_regs));
264
265 /* setup return addr to the jprobe handler routine */
266 regs->nip = (unsigned long)(((func_descr_t *)jp->entry)->entry);
267 regs->gpr[2] = (unsigned long)(((func_descr_t *)jp->entry)->toc);
268
269 return 1;
270}
271
272void jprobe_return(void)
273{
274 asm volatile("trap" ::: "memory");
275}
276
277void jprobe_return_end(void)
278{
279};
280
281int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
282{
283 /*
284 * FIXME - we should ideally be validating that we got here 'cos
285 * of the "trap" in jprobe_return() above, before restoring the
286 * saved regs...
287 */
288 memcpy(regs, &jprobe_saved_regs, sizeof(struct pt_regs));
289 return 1;
290}
diff --git a/arch/ppc64/kernel/lmb.c b/arch/ppc64/kernel/lmb.c
new file mode 100644
index 000000000000..d6c6bd03d2a4
--- /dev/null
+++ b/arch/ppc64/kernel/lmb.c
@@ -0,0 +1,372 @@
1/*
2 * Procedures for interfacing to Open Firmware.
3 *
4 * Peter Bergner, IBM Corp. June 2001.
5 * Copyright (C) 2001 Peter Bergner.
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 */
12
13#include <linux/config.h>
14#include <linux/kernel.h>
15#include <linux/init.h>
16#include <linux/bitops.h>
17#include <asm/types.h>
18#include <asm/page.h>
19#include <asm/prom.h>
20#include <asm/lmb.h>
21#include <asm/abs_addr.h>
22
23struct lmb lmb;
24
25#undef DEBUG
26
27void lmb_dump_all(void)
28{
29#ifdef DEBUG
30 unsigned long i;
31 struct lmb *_lmb = &lmb;
32
33 udbg_printf("lmb_dump_all:\n");
34 udbg_printf(" memory.cnt = 0x%lx\n",
35 _lmb->memory.cnt);
36 udbg_printf(" memory.size = 0x%lx\n",
37 _lmb->memory.size);
38 for (i=0; i < _lmb->memory.cnt ;i++) {
39 udbg_printf(" memory.region[0x%x].base = 0x%lx\n",
40 i, _lmb->memory.region[i].base);
41 udbg_printf(" .physbase = 0x%lx\n",
42 _lmb->memory.region[i].physbase);
43 udbg_printf(" .size = 0x%lx\n",
44 _lmb->memory.region[i].size);
45 }
46
47 udbg_printf("\n reserved.cnt = 0x%lx\n",
48 _lmb->reserved.cnt);
49 udbg_printf(" reserved.size = 0x%lx\n",
50 _lmb->reserved.size);
51 for (i=0; i < _lmb->reserved.cnt ;i++) {
52 udbg_printf(" reserved.region[0x%x].base = 0x%lx\n",
53 i, _lmb->reserved.region[i].base);
54 udbg_printf(" .physbase = 0x%lx\n",
55 _lmb->reserved.region[i].physbase);
56 udbg_printf(" .size = 0x%lx\n",
57 _lmb->reserved.region[i].size);
58 }
59#endif /* DEBUG */
60}
61
62static unsigned long __init
63lmb_addrs_overlap(unsigned long base1, unsigned long size1,
64 unsigned long base2, unsigned long size2)
65{
66 return ((base1 < (base2+size2)) && (base2 < (base1+size1)));
67}
68
69static long __init
70lmb_addrs_adjacent(unsigned long base1, unsigned long size1,
71 unsigned long base2, unsigned long size2)
72{
73 if (base2 == base1 + size1)
74 return 1;
75 else if (base1 == base2 + size2)
76 return -1;
77
78 return 0;
79}
80
81static long __init
82lmb_regions_adjacent(struct lmb_region *rgn, unsigned long r1, unsigned long r2)
83{
84 unsigned long base1 = rgn->region[r1].base;
85 unsigned long size1 = rgn->region[r1].size;
86 unsigned long base2 = rgn->region[r2].base;
87 unsigned long size2 = rgn->region[r2].size;
88
89 return lmb_addrs_adjacent(base1, size1, base2, size2);
90}
91
92/* Assumption: base addr of region 1 < base addr of region 2 */
93static void __init
94lmb_coalesce_regions(struct lmb_region *rgn, unsigned long r1, unsigned long r2)
95{
96 unsigned long i;
97
98 rgn->region[r1].size += rgn->region[r2].size;
99 for (i=r2; i < rgn->cnt-1; i++) {
100 rgn->region[i].base = rgn->region[i+1].base;
101 rgn->region[i].physbase = rgn->region[i+1].physbase;
102 rgn->region[i].size = rgn->region[i+1].size;
103 }
104 rgn->cnt--;
105}
106
107/* This routine called with relocation disabled. */
108void __init
109lmb_init(void)
110{
111 struct lmb *_lmb = &lmb;
112
113 /* Create a dummy zero size LMB which will get coalesced away later.
114 * This simplifies the lmb_add() code below...
115 */
116 _lmb->memory.region[0].base = 0;
117 _lmb->memory.region[0].size = 0;
118 _lmb->memory.cnt = 1;
119
120 /* Ditto. */
121 _lmb->reserved.region[0].base = 0;
122 _lmb->reserved.region[0].size = 0;
123 _lmb->reserved.cnt = 1;
124}
125
126/* This routine called with relocation disabled. */
127void __init
128lmb_analyze(void)
129{
130 unsigned long i;
131 unsigned long mem_size = 0;
132 unsigned long size_mask = 0;
133 struct lmb *_lmb = &lmb;
134#ifdef CONFIG_MSCHUNKS
135 unsigned long physbase = 0;
136#endif
137
138 for (i=0; i < _lmb->memory.cnt; i++) {
139 unsigned long lmb_size;
140
141 lmb_size = _lmb->memory.region[i].size;
142
143#ifdef CONFIG_MSCHUNKS
144 _lmb->memory.region[i].physbase = physbase;
145 physbase += lmb_size;
146#else
147 _lmb->memory.region[i].physbase = _lmb->memory.region[i].base;
148#endif
149 mem_size += lmb_size;
150 size_mask |= lmb_size;
151 }
152
153 _lmb->memory.size = mem_size;
154}
155
156/* This routine called with relocation disabled. */
157static long __init
158lmb_add_region(struct lmb_region *rgn, unsigned long base, unsigned long size)
159{
160 unsigned long i, coalesced = 0;
161 long adjacent;
162
163 /* First try and coalesce this LMB with another. */
164 for (i=0; i < rgn->cnt; i++) {
165 unsigned long rgnbase = rgn->region[i].base;
166 unsigned long rgnsize = rgn->region[i].size;
167
168 adjacent = lmb_addrs_adjacent(base,size,rgnbase,rgnsize);
169 if ( adjacent > 0 ) {
170 rgn->region[i].base -= size;
171 rgn->region[i].physbase -= size;
172 rgn->region[i].size += size;
173 coalesced++;
174 break;
175 }
176 else if ( adjacent < 0 ) {
177 rgn->region[i].size += size;
178 coalesced++;
179 break;
180 }
181 }
182
183 if ((i < rgn->cnt-1) && lmb_regions_adjacent(rgn, i, i+1) ) {
184 lmb_coalesce_regions(rgn, i, i+1);
185 coalesced++;
186 }
187
188 if ( coalesced ) {
189 return coalesced;
190 } else if ( rgn->cnt >= MAX_LMB_REGIONS ) {
191 return -1;
192 }
193
194 /* Couldn't coalesce the LMB, so add it to the sorted table. */
195 for (i=rgn->cnt-1; i >= 0; i--) {
196 if (base < rgn->region[i].base) {
197 rgn->region[i+1].base = rgn->region[i].base;
198 rgn->region[i+1].physbase = rgn->region[i].physbase;
199 rgn->region[i+1].size = rgn->region[i].size;
200 } else {
201 rgn->region[i+1].base = base;
202 rgn->region[i+1].physbase = lmb_abs_to_phys(base);
203 rgn->region[i+1].size = size;
204 break;
205 }
206 }
207 rgn->cnt++;
208
209 return 0;
210}
211
212/* This routine called with relocation disabled. */
213long __init
214lmb_add(unsigned long base, unsigned long size)
215{
216 struct lmb *_lmb = &lmb;
217 struct lmb_region *_rgn = &(_lmb->memory);
218
219 /* On pSeries LPAR systems, the first LMB is our RMO region. */
220 if ( base == 0 )
221 _lmb->rmo_size = size;
222
223 return lmb_add_region(_rgn, base, size);
224
225}
226
227long __init
228lmb_reserve(unsigned long base, unsigned long size)
229{
230 struct lmb *_lmb = &lmb;
231 struct lmb_region *_rgn = &(_lmb->reserved);
232
233 return lmb_add_region(_rgn, base, size);
234}
235
236long __init
237lmb_overlaps_region(struct lmb_region *rgn, unsigned long base, unsigned long size)
238{
239 unsigned long i;
240
241 for (i=0; i < rgn->cnt; i++) {
242 unsigned long rgnbase = rgn->region[i].base;
243 unsigned long rgnsize = rgn->region[i].size;
244 if ( lmb_addrs_overlap(base,size,rgnbase,rgnsize) ) {
245 break;
246 }
247 }
248
249 return (i < rgn->cnt) ? i : -1;
250}
251
252unsigned long __init
253lmb_alloc(unsigned long size, unsigned long align)
254{
255 return lmb_alloc_base(size, align, LMB_ALLOC_ANYWHERE);
256}
257
258unsigned long __init
259lmb_alloc_base(unsigned long size, unsigned long align, unsigned long max_addr)
260{
261 long i, j;
262 unsigned long base = 0;
263 struct lmb *_lmb = &lmb;
264 struct lmb_region *_mem = &(_lmb->memory);
265 struct lmb_region *_rsv = &(_lmb->reserved);
266
267 for (i=_mem->cnt-1; i >= 0; i--) {
268 unsigned long lmbbase = _mem->region[i].base;
269 unsigned long lmbsize = _mem->region[i].size;
270
271 if ( max_addr == LMB_ALLOC_ANYWHERE )
272 base = _ALIGN_DOWN(lmbbase+lmbsize-size, align);
273 else if ( lmbbase < max_addr )
274 base = _ALIGN_DOWN(min(lmbbase+lmbsize,max_addr)-size, align);
275 else
276 continue;
277
278 while ( (lmbbase <= base) &&
279 ((j = lmb_overlaps_region(_rsv,base,size)) >= 0) ) {
280 base = _ALIGN_DOWN(_rsv->region[j].base-size, align);
281 }
282
283 if ( (base != 0) && (lmbbase <= base) )
284 break;
285 }
286
287 if ( i < 0 )
288 return 0;
289
290 lmb_add_region(_rsv, base, size);
291
292 return base;
293}
294
295unsigned long __init
296lmb_phys_mem_size(void)
297{
298 struct lmb *_lmb = &lmb;
299#ifdef CONFIG_MSCHUNKS
300 return _lmb->memory.size;
301#else
302 struct lmb_region *_mem = &(_lmb->memory);
303 unsigned long total = 0;
304 int i;
305
306 /* add all physical memory to the bootmem map */
307 for (i=0; i < _mem->cnt; i++)
308 total += _mem->region[i].size;
309 return total;
310#endif /* CONFIG_MSCHUNKS */
311}
312
313unsigned long __init
314lmb_end_of_DRAM(void)
315{
316 struct lmb *_lmb = &lmb;
317 struct lmb_region *_mem = &(_lmb->memory);
318 int idx = _mem->cnt - 1;
319
320#ifdef CONFIG_MSCHUNKS
321 return (_mem->region[idx].physbase + _mem->region[idx].size);
322#else
323 return (_mem->region[idx].base + _mem->region[idx].size);
324#endif /* CONFIG_MSCHUNKS */
325
326 return 0;
327}
328
329unsigned long __init
330lmb_abs_to_phys(unsigned long aa)
331{
332 unsigned long i, pa = aa;
333 struct lmb *_lmb = &lmb;
334 struct lmb_region *_mem = &(_lmb->memory);
335
336 for (i=0; i < _mem->cnt; i++) {
337 unsigned long lmbbase = _mem->region[i].base;
338 unsigned long lmbsize = _mem->region[i].size;
339 if ( lmb_addrs_overlap(aa,1,lmbbase,lmbsize) ) {
340 pa = _mem->region[i].physbase + (aa - lmbbase);
341 break;
342 }
343 }
344
345 return pa;
346}
347
348/*
349 * Truncate the lmb list to memory_limit if it's set
350 * You must call lmb_analyze() after this.
351 */
352void __init lmb_enforce_memory_limit(void)
353{
354 extern unsigned long memory_limit;
355 unsigned long i, limit;
356 struct lmb_region *mem = &(lmb.memory);
357
358 if (! memory_limit)
359 return;
360
361 limit = memory_limit;
362 for (i = 0; i < mem->cnt; i++) {
363 if (limit > mem->region[i].size) {
364 limit -= mem->region[i].size;
365 continue;
366 }
367
368 mem->region[i].size = limit;
369 mem->cnt = i + 1;
370 break;
371 }
372}
diff --git a/arch/ppc64/kernel/lparcfg.c b/arch/ppc64/kernel/lparcfg.c
new file mode 100644
index 000000000000..a8fd32df848b
--- /dev/null
+++ b/arch/ppc64/kernel/lparcfg.c
@@ -0,0 +1,611 @@
1/*
2 * PowerPC64 LPAR Configuration Information Driver
3 *
4 * Dave Engebretsen engebret@us.ibm.com
5 * Copyright (c) 2003 Dave Engebretsen
6 * Will Schmidt willschm@us.ibm.com
7 * SPLPAR updates, Copyright (c) 2003 Will Schmidt IBM Corporation.
8 * seq_file updates, Copyright (c) 2004 Will Schmidt IBM Corporation.
9 * Nathan Lynch nathanl@austin.ibm.com
10 * Added lparcfg_write, Copyright (C) 2004 Nathan Lynch IBM Corporation.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 * This driver creates a proc file at /proc/ppc64/lparcfg which contains
18 * keyword - value pairs that specify the configuration of the partition.
19 */
20
21#include <linux/config.h>
22#include <linux/module.h>
23#include <linux/types.h>
24#include <linux/errno.h>
25#include <linux/proc_fs.h>
26#include <linux/init.h>
27#include <linux/seq_file.h>
28#include <asm/uaccess.h>
29#include <asm/iSeries/HvLpConfig.h>
30#include <asm/lppaca.h>
31#include <asm/iSeries/LparData.h>
32#include <asm/hvcall.h>
33#include <asm/cputable.h>
34#include <asm/rtas.h>
35#include <asm/system.h>
36#include <asm/time.h>
37
38#define MODULE_VERS "1.6"
39#define MODULE_NAME "lparcfg"
40
41/* #define LPARCFG_DEBUG */
42
43/* find a better place for this function... */
44void log_plpar_hcall_return(unsigned long rc, char *tag)
45{
46 if (rc == 0) /* success, return */
47 return;
48/* check for null tag ? */
49 if (rc == H_Hardware)
50 printk(KERN_INFO
51 "plpar-hcall (%s) failed with hardware fault\n", tag);
52 else if (rc == H_Function)
53 printk(KERN_INFO
54 "plpar-hcall (%s) failed; function not allowed\n", tag);
55 else if (rc == H_Authority)
56 printk(KERN_INFO
57 "plpar-hcall (%s) failed; not authorized to this function\n",
58 tag);
59 else if (rc == H_Parameter)
60 printk(KERN_INFO "plpar-hcall (%s) failed; Bad parameter(s)\n",
61 tag);
62 else
63 printk(KERN_INFO
64 "plpar-hcall (%s) failed with unexpected rc(0x%lx)\n",
65 tag, rc);
66
67}
68
69static struct proc_dir_entry *proc_ppc64_lparcfg;
70#define LPARCFG_BUFF_SIZE 4096
71
72#ifdef CONFIG_PPC_ISERIES
73
74/*
75 * For iSeries legacy systems, the PPA purr function is available from the
76 * emulated_time_base field in the paca.
77 */
78static unsigned long get_purr(void)
79{
80 unsigned long sum_purr = 0;
81 int cpu;
82 struct paca_struct *lpaca;
83
84 for_each_cpu(cpu) {
85 lpaca = paca + cpu;
86 sum_purr += lpaca->lppaca.emulated_time_base;
87
88#ifdef PURR_DEBUG
89 printk(KERN_INFO "get_purr for cpu (%d) has value (%ld) \n",
90 cpu, lpaca->lppaca.emulated_time_base);
91#endif
92 }
93 return sum_purr;
94}
95
96#define lparcfg_write NULL
97
98/*
99 * Methods used to fetch LPAR data when running on an iSeries platform.
100 */
101static int lparcfg_data(struct seq_file *m, void *v)
102{
103 unsigned long pool_id, lp_index;
104 int shared, entitled_capacity, max_entitled_capacity;
105 int processors, max_processors;
106 struct paca_struct *lpaca = get_paca();
107 unsigned long purr = get_purr();
108
109 seq_printf(m, "%s %s \n", MODULE_NAME, MODULE_VERS);
110
111 shared = (int)(lpaca->lppaca_ptr->shared_proc);
112 seq_printf(m, "serial_number=%c%c%c%c%c%c%c\n",
113 e2a(xItExtVpdPanel.mfgID[2]),
114 e2a(xItExtVpdPanel.mfgID[3]),
115 e2a(xItExtVpdPanel.systemSerial[1]),
116 e2a(xItExtVpdPanel.systemSerial[2]),
117 e2a(xItExtVpdPanel.systemSerial[3]),
118 e2a(xItExtVpdPanel.systemSerial[4]),
119 e2a(xItExtVpdPanel.systemSerial[5]));
120
121 seq_printf(m, "system_type=%c%c%c%c\n",
122 e2a(xItExtVpdPanel.machineType[0]),
123 e2a(xItExtVpdPanel.machineType[1]),
124 e2a(xItExtVpdPanel.machineType[2]),
125 e2a(xItExtVpdPanel.machineType[3]));
126
127 lp_index = HvLpConfig_getLpIndex();
128 seq_printf(m, "partition_id=%d\n", (int)lp_index);
129
130 seq_printf(m, "system_active_processors=%d\n",
131 (int)HvLpConfig_getSystemPhysicalProcessors());
132
133 seq_printf(m, "system_potential_processors=%d\n",
134 (int)HvLpConfig_getSystemPhysicalProcessors());
135
136 processors = (int)HvLpConfig_getPhysicalProcessors();
137 seq_printf(m, "partition_active_processors=%d\n", processors);
138
139 max_processors = (int)HvLpConfig_getMaxPhysicalProcessors();
140 seq_printf(m, "partition_potential_processors=%d\n", max_processors);
141
142 if (shared) {
143 entitled_capacity = HvLpConfig_getSharedProcUnits();
144 max_entitled_capacity = HvLpConfig_getMaxSharedProcUnits();
145 } else {
146 entitled_capacity = processors * 100;
147 max_entitled_capacity = max_processors * 100;
148 }
149 seq_printf(m, "partition_entitled_capacity=%d\n", entitled_capacity);
150
151 seq_printf(m, "partition_max_entitled_capacity=%d\n",
152 max_entitled_capacity);
153
154 if (shared) {
155 pool_id = HvLpConfig_getSharedPoolIndex();
156 seq_printf(m, "pool=%d\n", (int)pool_id);
157 seq_printf(m, "pool_capacity=%d\n",
158 (int)(HvLpConfig_getNumProcsInSharedPool(pool_id) *
159 100));
160 seq_printf(m, "purr=%ld\n", purr);
161 }
162
163 seq_printf(m, "shared_processor_mode=%d\n", shared);
164
165 return 0;
166}
167#endif /* CONFIG_PPC_ISERIES */
168
169#ifdef CONFIG_PPC_PSERIES
170/*
171 * Methods used to fetch LPAR data when running on a pSeries platform.
172 */
173
174/*
175 * H_GET_PPP hcall returns info in 4 parms.
176 * entitled_capacity,unallocated_capacity,
177 * aggregation, resource_capability).
178 *
179 * R4 = Entitled Processor Capacity Percentage.
180 * R5 = Unallocated Processor Capacity Percentage.
181 * R6 (AABBCCDDEEFFGGHH).
182 * XXXX - reserved (0)
183 * XXXX - reserved (0)
184 * XXXX - Group Number
185 * XXXX - Pool Number.
186 * R7 (IIJJKKLLMMNNOOPP).
187 * XX - reserved. (0)
188 * XX - bit 0-6 reserved (0). bit 7 is Capped indicator.
189 * XX - variable processor Capacity Weight
190 * XX - Unallocated Variable Processor Capacity Weight.
191 * XXXX - Active processors in Physical Processor Pool.
192 * XXXX - Processors active on platform.
193 */
194static unsigned int h_get_ppp(unsigned long *entitled,
195 unsigned long *unallocated,
196 unsigned long *aggregation,
197 unsigned long *resource)
198{
199 unsigned long rc;
200 rc = plpar_hcall_4out(H_GET_PPP, 0, 0, 0, 0, entitled, unallocated,
201 aggregation, resource);
202
203 log_plpar_hcall_return(rc, "H_GET_PPP");
204
205 return rc;
206}
207
208static void h_pic(unsigned long *pool_idle_time, unsigned long *num_procs)
209{
210 unsigned long rc;
211 unsigned long dummy;
212 rc = plpar_hcall(H_PIC, 0, 0, 0, 0, pool_idle_time, num_procs, &dummy);
213
214 log_plpar_hcall_return(rc, "H_PIC");
215}
216
217static unsigned long get_purr(void);
218
219/* Track sum of all purrs across all processors. This is used to further */
220/* calculate usage values by different applications */
221
222static unsigned long get_purr(void)
223{
224 unsigned long sum_purr = 0;
225 int cpu;
226 struct cpu_usage *cu;
227
228 for_each_cpu(cpu) {
229 cu = &per_cpu(cpu_usage_array, cpu);
230 sum_purr += cu->current_tb;
231 }
232 return sum_purr;
233}
234
235#define SPLPAR_CHARACTERISTICS_TOKEN 20
236#define SPLPAR_MAXLENGTH 1026*(sizeof(char))
237
238/*
239 * parse_system_parameter_string()
240 * Retrieve the potential_processors, max_entitled_capacity and friends
241 * through the get-system-parameter rtas call. Replace keyword strings as
242 * necessary.
243 */
244static void parse_system_parameter_string(struct seq_file *m)
245{
246 int call_status;
247
248 char *local_buffer = kmalloc(SPLPAR_MAXLENGTH, GFP_KERNEL);
249 if (!local_buffer) {
250 printk(KERN_ERR "%s %s kmalloc failure at line %d \n",
251 __FILE__, __FUNCTION__, __LINE__);
252 return;
253 }
254
255 spin_lock(&rtas_data_buf_lock);
256 memset(rtas_data_buf, 0, SPLPAR_MAXLENGTH);
257 call_status = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1,
258 NULL,
259 SPLPAR_CHARACTERISTICS_TOKEN,
260 __pa(rtas_data_buf));
261 memcpy(local_buffer, rtas_data_buf, SPLPAR_MAXLENGTH);
262 spin_unlock(&rtas_data_buf_lock);
263
264 if (call_status != 0) {
265 printk(KERN_INFO
266 "%s %s Error calling get-system-parameter (0x%x)\n",
267 __FILE__, __FUNCTION__, call_status);
268 } else {
269 int splpar_strlen;
270 int idx, w_idx;
271 char *workbuffer = kmalloc(SPLPAR_MAXLENGTH, GFP_KERNEL);
272 if (!workbuffer) {
273 printk(KERN_ERR "%s %s kmalloc failure at line %d \n",
274 __FILE__, __FUNCTION__, __LINE__);
275 return;
276 }
277#ifdef LPARCFG_DEBUG
278 printk(KERN_INFO "success calling get-system-parameter \n");
279#endif
280 splpar_strlen = local_buffer[0] * 16 + local_buffer[1];
281 local_buffer += 2; /* step over strlen value */
282
283 memset(workbuffer, 0, SPLPAR_MAXLENGTH);
284 w_idx = 0;
285 idx = 0;
286 while ((*local_buffer) && (idx < splpar_strlen)) {
287 workbuffer[w_idx++] = local_buffer[idx++];
288 if ((local_buffer[idx] == ',')
289 || (local_buffer[idx] == '\0')) {
290 workbuffer[w_idx] = '\0';
291 if (w_idx) {
292 /* avoid the empty string */
293 seq_printf(m, "%s\n", workbuffer);
294 }
295 memset(workbuffer, 0, SPLPAR_MAXLENGTH);
296 idx++; /* skip the comma */
297 w_idx = 0;
298 } else if (local_buffer[idx] == '=') {
299 /* code here to replace workbuffer contents
300 with different keyword strings */
301 if (0 == strcmp(workbuffer, "MaxEntCap")) {
302 strcpy(workbuffer,
303 "partition_max_entitled_capacity");
304 w_idx = strlen(workbuffer);
305 }
306 if (0 == strcmp(workbuffer, "MaxPlatProcs")) {
307 strcpy(workbuffer,
308 "system_potential_processors");
309 w_idx = strlen(workbuffer);
310 }
311 }
312 }
313 kfree(workbuffer);
314 local_buffer -= 2; /* back up over strlen value */
315 }
316 kfree(local_buffer);
317}
318
319static int lparcfg_count_active_processors(void);
320
321/* Return the number of processors in the system.
322 * This function reads through the device tree and counts
323 * the virtual processors, this does not include threads.
324 */
325static int lparcfg_count_active_processors(void)
326{
327 struct device_node *cpus_dn = NULL;
328 int count = 0;
329
330 while ((cpus_dn = of_find_node_by_type(cpus_dn, "cpu"))) {
331#ifdef LPARCFG_DEBUG
332 printk(KERN_ERR "cpus_dn %p \n", cpus_dn);
333#endif
334 count++;
335 }
336 return count;
337}
338
339static int lparcfg_data(struct seq_file *m, void *v)
340{
341 int partition_potential_processors;
342 int partition_active_processors;
343 struct device_node *rootdn;
344 const char *model = "";
345 const char *system_id = "";
346 unsigned int *lp_index_ptr, lp_index = 0;
347 struct device_node *rtas_node;
348 int *lrdrp;
349
350 rootdn = find_path_device("/");
351 if (rootdn) {
352 model = get_property(rootdn, "model", NULL);
353 system_id = get_property(rootdn, "system-id", NULL);
354 lp_index_ptr = (unsigned int *)
355 get_property(rootdn, "ibm,partition-no", NULL);
356 if (lp_index_ptr)
357 lp_index = *lp_index_ptr;
358 }
359
360 seq_printf(m, "%s %s \n", MODULE_NAME, MODULE_VERS);
361
362 seq_printf(m, "serial_number=%s\n", system_id);
363
364 seq_printf(m, "system_type=%s\n", model);
365
366 seq_printf(m, "partition_id=%d\n", (int)lp_index);
367
368 rtas_node = find_path_device("/rtas");
369 lrdrp = (int *)get_property(rtas_node, "ibm,lrdr-capacity", NULL);
370
371 if (lrdrp == NULL) {
372 partition_potential_processors = systemcfg->processorCount;
373 } else {
374 partition_potential_processors = *(lrdrp + 4);
375 }
376
377 partition_active_processors = lparcfg_count_active_processors();
378
379 if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) {
380 unsigned long h_entitled, h_unallocated;
381 unsigned long h_aggregation, h_resource;
382 unsigned long pool_idle_time, pool_procs;
383 unsigned long purr;
384
385 h_get_ppp(&h_entitled, &h_unallocated, &h_aggregation,
386 &h_resource);
387
388 seq_printf(m, "R4=0x%lx\n", h_entitled);
389 seq_printf(m, "R5=0x%lx\n", h_unallocated);
390 seq_printf(m, "R6=0x%lx\n", h_aggregation);
391 seq_printf(m, "R7=0x%lx\n", h_resource);
392
393 purr = get_purr();
394
395 /* this call handles the ibm,get-system-parameter contents */
396 parse_system_parameter_string(m);
397
398 seq_printf(m, "partition_entitled_capacity=%ld\n", h_entitled);
399
400 seq_printf(m, "group=%ld\n", (h_aggregation >> 2 * 8) & 0xffff);
401
402 seq_printf(m, "system_active_processors=%ld\n",
403 (h_resource >> 0 * 8) & 0xffff);
404
405 /* pool related entries are apropriate for shared configs */
406 if (paca[0].lppaca.shared_proc) {
407
408 h_pic(&pool_idle_time, &pool_procs);
409
410 seq_printf(m, "pool=%ld\n",
411 (h_aggregation >> 0 * 8) & 0xffff);
412
413 /* report pool_capacity in percentage */
414 seq_printf(m, "pool_capacity=%ld\n",
415 ((h_resource >> 2 * 8) & 0xffff) * 100);
416
417 seq_printf(m, "pool_idle_time=%ld\n", pool_idle_time);
418
419 seq_printf(m, "pool_num_procs=%ld\n", pool_procs);
420 }
421
422 seq_printf(m, "unallocated_capacity_weight=%ld\n",
423 (h_resource >> 4 * 8) & 0xFF);
424
425 seq_printf(m, "capacity_weight=%ld\n",
426 (h_resource >> 5 * 8) & 0xFF);
427
428 seq_printf(m, "capped=%ld\n", (h_resource >> 6 * 8) & 0x01);
429
430 seq_printf(m, "unallocated_capacity=%ld\n", h_unallocated);
431
432 seq_printf(m, "purr=%ld\n", purr);
433
434 } else { /* non SPLPAR case */
435
436 seq_printf(m, "system_active_processors=%d\n",
437 partition_potential_processors);
438
439 seq_printf(m, "system_potential_processors=%d\n",
440 partition_potential_processors);
441
442 seq_printf(m, "partition_max_entitled_capacity=%d\n",
443 partition_potential_processors * 100);
444
445 seq_printf(m, "partition_entitled_capacity=%d\n",
446 partition_active_processors * 100);
447 }
448
449 seq_printf(m, "partition_active_processors=%d\n",
450 partition_active_processors);
451
452 seq_printf(m, "partition_potential_processors=%d\n",
453 partition_potential_processors);
454
455 seq_printf(m, "shared_processor_mode=%d\n", paca[0].lppaca.shared_proc);
456
457 return 0;
458}
459
460/*
461 * Interface for changing system parameters (variable capacity weight
462 * and entitled capacity). Format of input is "param_name=value";
463 * anything after value is ignored. Valid parameters at this time are
464 * "partition_entitled_capacity" and "capacity_weight". We use
465 * H_SET_PPP to alter parameters.
466 *
467 * This function should be invoked only on systems with
468 * FW_FEATURE_SPLPAR.
469 */
470static ssize_t lparcfg_write(struct file *file, const char __user * buf,
471 size_t count, loff_t * off)
472{
473 char *kbuf;
474 char *tmp;
475 u64 new_entitled, *new_entitled_ptr = &new_entitled;
476 u8 new_weight, *new_weight_ptr = &new_weight;
477
478 unsigned long current_entitled; /* parameters for h_get_ppp */
479 unsigned long dummy;
480 unsigned long resource;
481 u8 current_weight;
482
483 ssize_t retval = -ENOMEM;
484
485 kbuf = kmalloc(count, GFP_KERNEL);
486 if (!kbuf)
487 goto out;
488
489 retval = -EFAULT;
490 if (copy_from_user(kbuf, buf, count))
491 goto out;
492
493 retval = -EINVAL;
494 kbuf[count - 1] = '\0';
495 tmp = strchr(kbuf, '=');
496 if (!tmp)
497 goto out;
498
499 *tmp++ = '\0';
500
501 if (!strcmp(kbuf, "partition_entitled_capacity")) {
502 char *endp;
503 *new_entitled_ptr = (u64) simple_strtoul(tmp, &endp, 10);
504 if (endp == tmp)
505 goto out;
506 new_weight_ptr = &current_weight;
507 } else if (!strcmp(kbuf, "capacity_weight")) {
508 char *endp;
509 *new_weight_ptr = (u8) simple_strtoul(tmp, &endp, 10);
510 if (endp == tmp)
511 goto out;
512 new_entitled_ptr = &current_entitled;
513 } else
514 goto out;
515
516 /* Get our current parameters */
517 retval = h_get_ppp(&current_entitled, &dummy, &dummy, &resource);
518 if (retval) {
519 retval = -EIO;
520 goto out;
521 }
522
523 current_weight = (resource >> 5 * 8) & 0xFF;
524
525 pr_debug("%s: current_entitled = %lu, current_weight = %lu\n",
526 __FUNCTION__, current_entitled, current_weight);
527
528 pr_debug("%s: new_entitled = %lu, new_weight = %lu\n",
529 __FUNCTION__, *new_entitled_ptr, *new_weight_ptr);
530
531 retval = plpar_hcall_norets(H_SET_PPP, *new_entitled_ptr,
532 *new_weight_ptr);
533
534 if (retval == H_Success || retval == H_Constrained) {
535 retval = count;
536 } else if (retval == H_Busy) {
537 retval = -EBUSY;
538 } else if (retval == H_Hardware) {
539 retval = -EIO;
540 } else if (retval == H_Parameter) {
541 retval = -EINVAL;
542 } else {
543 printk(KERN_WARNING "%s: received unknown hv return code %ld",
544 __FUNCTION__, retval);
545 retval = -EIO;
546 }
547
548 out:
549 kfree(kbuf);
550 return retval;
551}
552
553#endif /* CONFIG_PPC_PSERIES */
554
555static int lparcfg_open(struct inode *inode, struct file *file)
556{
557 return single_open(file, lparcfg_data, NULL);
558}
559
560struct file_operations lparcfg_fops = {
561 .owner = THIS_MODULE,
562 .read = seq_read,
563 .open = lparcfg_open,
564 .release = single_release,
565};
566
567int __init lparcfg_init(void)
568{
569 struct proc_dir_entry *ent;
570 mode_t mode = S_IRUSR;
571
572 /* Allow writing if we have FW_FEATURE_SPLPAR */
573 if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) {
574 lparcfg_fops.write = lparcfg_write;
575 mode |= S_IWUSR;
576 }
577
578 ent = create_proc_entry("ppc64/lparcfg", mode, NULL);
579 if (ent) {
580 ent->proc_fops = &lparcfg_fops;
581 ent->data = kmalloc(LPARCFG_BUFF_SIZE, GFP_KERNEL);
582 if (!ent->data) {
583 printk(KERN_ERR
584 "Failed to allocate buffer for lparcfg\n");
585 remove_proc_entry("lparcfg", ent->parent);
586 return -ENOMEM;
587 }
588 } else {
589 printk(KERN_ERR "Failed to create ppc64/lparcfg\n");
590 return -EIO;
591 }
592
593 proc_ppc64_lparcfg = ent;
594 return 0;
595}
596
597void __exit lparcfg_cleanup(void)
598{
599 if (proc_ppc64_lparcfg) {
600 if (proc_ppc64_lparcfg->data) {
601 kfree(proc_ppc64_lparcfg->data);
602 }
603 remove_proc_entry("lparcfg", proc_ppc64_lparcfg->parent);
604 }
605}
606
607module_init(lparcfg_init);
608module_exit(lparcfg_cleanup);
609MODULE_DESCRIPTION("Interface for LPAR configuration data");
610MODULE_AUTHOR("Dave Engebretsen");
611MODULE_LICENSE("GPL");
diff --git a/arch/ppc64/kernel/maple_pci.c b/arch/ppc64/kernel/maple_pci.c
new file mode 100644
index 000000000000..53993999b265
--- /dev/null
+++ b/arch/ppc64/kernel/maple_pci.c
@@ -0,0 +1,521 @@
1/*
2 * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org),
3 * IBM Corp.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version
8 * 2 of the License, or (at your option) any later version.
9 */
10
11#define DEBUG
12
13#include <linux/kernel.h>
14#include <linux/pci.h>
15#include <linux/delay.h>
16#include <linux/string.h>
17#include <linux/init.h>
18#include <linux/bootmem.h>
19
20#include <asm/sections.h>
21#include <asm/io.h>
22#include <asm/prom.h>
23#include <asm/pci-bridge.h>
24#include <asm/machdep.h>
25#include <asm/iommu.h>
26
27#include "pci.h"
28
29#ifdef DEBUG
30#define DBG(x...) printk(x)
31#else
32#define DBG(x...)
33#endif
34
35static struct pci_controller *u3_agp, *u3_ht;
36
37static int __init fixup_one_level_bus_range(struct device_node *node, int higher)
38{
39 for (; node != 0;node = node->sibling) {
40 int * bus_range;
41 unsigned int *class_code;
42 int len;
43
44 /* For PCI<->PCI bridges or CardBus bridges, we go down */
45 class_code = (unsigned int *) get_property(node, "class-code", NULL);
46 if (!class_code || ((*class_code >> 8) != PCI_CLASS_BRIDGE_PCI &&
47 (*class_code >> 8) != PCI_CLASS_BRIDGE_CARDBUS))
48 continue;
49 bus_range = (int *) get_property(node, "bus-range", &len);
50 if (bus_range != NULL && len > 2 * sizeof(int)) {
51 if (bus_range[1] > higher)
52 higher = bus_range[1];
53 }
54 higher = fixup_one_level_bus_range(node->child, higher);
55 }
56 return higher;
57}
58
59/* This routine fixes the "bus-range" property of all bridges in the
60 * system since they tend to have their "last" member wrong on macs
61 *
62 * Note that the bus numbers manipulated here are OF bus numbers, they
63 * are not Linux bus numbers.
64 */
65static void __init fixup_bus_range(struct device_node *bridge)
66{
67 int * bus_range;
68 int len;
69
70 /* Lookup the "bus-range" property for the hose */
71 bus_range = (int *) get_property(bridge, "bus-range", &len);
72 if (bus_range == NULL || len < 2 * sizeof(int)) {
73 printk(KERN_WARNING "Can't get bus-range for %s\n",
74 bridge->full_name);
75 return;
76 }
77 bus_range[1] = fixup_one_level_bus_range(bridge->child, bus_range[1]);
78}
79
80
81#define U3_AGP_CFA0(devfn, off) \
82 ((1 << (unsigned long)PCI_SLOT(dev_fn)) \
83 | (((unsigned long)PCI_FUNC(dev_fn)) << 8) \
84 | (((unsigned long)(off)) & 0xFCUL))
85
86#define U3_AGP_CFA1(bus, devfn, off) \
87 ((((unsigned long)(bus)) << 16) \
88 |(((unsigned long)(devfn)) << 8) \
89 |(((unsigned long)(off)) & 0xFCUL) \
90 |1UL)
91
92static unsigned long u3_agp_cfg_access(struct pci_controller* hose,
93 u8 bus, u8 dev_fn, u8 offset)
94{
95 unsigned int caddr;
96
97 if (bus == hose->first_busno) {
98 if (dev_fn < (11 << 3))
99 return 0;
100 caddr = U3_AGP_CFA0(dev_fn, offset);
101 } else
102 caddr = U3_AGP_CFA1(bus, dev_fn, offset);
103
104 /* Uninorth will return garbage if we don't read back the value ! */
105 do {
106 out_le32(hose->cfg_addr, caddr);
107 } while (in_le32(hose->cfg_addr) != caddr);
108
109 offset &= 0x07;
110 return ((unsigned long)hose->cfg_data) + offset;
111}
112
113static int u3_agp_read_config(struct pci_bus *bus, unsigned int devfn,
114 int offset, int len, u32 *val)
115{
116 struct pci_controller *hose;
117 unsigned long addr;
118
119 hose = pci_bus_to_host(bus);
120 if (hose == NULL)
121 return PCIBIOS_DEVICE_NOT_FOUND;
122
123 addr = u3_agp_cfg_access(hose, bus->number, devfn, offset);
124 if (!addr)
125 return PCIBIOS_DEVICE_NOT_FOUND;
126 /*
127 * Note: the caller has already checked that offset is
128 * suitably aligned and that len is 1, 2 or 4.
129 */
130 switch (len) {
131 case 1:
132 *val = in_8((u8 *)addr);
133 break;
134 case 2:
135 *val = in_le16((u16 *)addr);
136 break;
137 default:
138 *val = in_le32((u32 *)addr);
139 break;
140 }
141 return PCIBIOS_SUCCESSFUL;
142}
143
144static int u3_agp_write_config(struct pci_bus *bus, unsigned int devfn,
145 int offset, int len, u32 val)
146{
147 struct pci_controller *hose;
148 unsigned long addr;
149
150 hose = pci_bus_to_host(bus);
151 if (hose == NULL)
152 return PCIBIOS_DEVICE_NOT_FOUND;
153
154 addr = u3_agp_cfg_access(hose, bus->number, devfn, offset);
155 if (!addr)
156 return PCIBIOS_DEVICE_NOT_FOUND;
157 /*
158 * Note: the caller has already checked that offset is
159 * suitably aligned and that len is 1, 2 or 4.
160 */
161 switch (len) {
162 case 1:
163 out_8((u8 *)addr, val);
164 (void) in_8((u8 *)addr);
165 break;
166 case 2:
167 out_le16((u16 *)addr, val);
168 (void) in_le16((u16 *)addr);
169 break;
170 default:
171 out_le32((u32 *)addr, val);
172 (void) in_le32((u32 *)addr);
173 break;
174 }
175 return PCIBIOS_SUCCESSFUL;
176}
177
178static struct pci_ops u3_agp_pci_ops =
179{
180 u3_agp_read_config,
181 u3_agp_write_config
182};
183
184
185#define U3_HT_CFA0(devfn, off) \
186 ((((unsigned long)devfn) << 8) | offset)
187#define U3_HT_CFA1(bus, devfn, off) \
188 (U3_HT_CFA0(devfn, off) \
189 + (((unsigned long)bus) << 16) \
190 + 0x01000000UL)
191
192static unsigned long u3_ht_cfg_access(struct pci_controller* hose,
193 u8 bus, u8 devfn, u8 offset)
194{
195 if (bus == hose->first_busno) {
196 if (PCI_SLOT(devfn) == 0)
197 return 0;
198 return ((unsigned long)hose->cfg_data) + U3_HT_CFA0(devfn, offset);
199 } else
200 return ((unsigned long)hose->cfg_data) + U3_HT_CFA1(bus, devfn, offset);
201}
202
203static int u3_ht_read_config(struct pci_bus *bus, unsigned int devfn,
204 int offset, int len, u32 *val)
205{
206 struct pci_controller *hose;
207 unsigned long addr;
208
209 hose = pci_bus_to_host(bus);
210 if (hose == NULL)
211 return PCIBIOS_DEVICE_NOT_FOUND;
212
213 addr = u3_ht_cfg_access(hose, bus->number, devfn, offset);
214 if (!addr)
215 return PCIBIOS_DEVICE_NOT_FOUND;
216
217 /*
218 * Note: the caller has already checked that offset is
219 * suitably aligned and that len is 1, 2 or 4.
220 */
221 switch (len) {
222 case 1:
223 *val = in_8((u8 *)addr);
224 break;
225 case 2:
226 *val = in_le16((u16 *)addr);
227 break;
228 default:
229 *val = in_le32((u32 *)addr);
230 break;
231 }
232 return PCIBIOS_SUCCESSFUL;
233}
234
235static int u3_ht_write_config(struct pci_bus *bus, unsigned int devfn,
236 int offset, int len, u32 val)
237{
238 struct pci_controller *hose;
239 unsigned long addr;
240
241 hose = pci_bus_to_host(bus);
242 if (hose == NULL)
243 return PCIBIOS_DEVICE_NOT_FOUND;
244
245 addr = u3_ht_cfg_access(hose, bus->number, devfn, offset);
246 if (!addr)
247 return PCIBIOS_DEVICE_NOT_FOUND;
248 /*
249 * Note: the caller has already checked that offset is
250 * suitably aligned and that len is 1, 2 or 4.
251 */
252 switch (len) {
253 case 1:
254 out_8((u8 *)addr, val);
255 (void) in_8((u8 *)addr);
256 break;
257 case 2:
258 out_le16((u16 *)addr, val);
259 (void) in_le16((u16 *)addr);
260 break;
261 default:
262 out_le32((u32 *)addr, val);
263 (void) in_le32((u32 *)addr);
264 break;
265 }
266 return PCIBIOS_SUCCESSFUL;
267}
268
269static struct pci_ops u3_ht_pci_ops =
270{
271 u3_ht_read_config,
272 u3_ht_write_config
273};
274
275static void __init setup_u3_agp(struct pci_controller* hose)
276{
277 /* On G5, we move AGP up to high bus number so we don't need
278 * to reassign bus numbers for HT. If we ever have P2P bridges
279 * on AGP, we'll have to move pci_assign_all_busses to the
280 * pci_controller structure so we enable it for AGP and not for
281 * HT childs.
282 * We hard code the address because of the different size of
283 * the reg address cell, we shall fix that by killing struct
284 * reg_property and using some accessor functions instead
285 */
286 hose->first_busno = 0xf0;
287 hose->last_busno = 0xff;
288 hose->ops = &u3_agp_pci_ops;
289 hose->cfg_addr = ioremap(0xf0000000 + 0x800000, 0x1000);
290 hose->cfg_data = ioremap(0xf0000000 + 0xc00000, 0x1000);
291
292 u3_agp = hose;
293}
294
295static void __init setup_u3_ht(struct pci_controller* hose)
296{
297 hose->ops = &u3_ht_pci_ops;
298
299 /* We hard code the address because of the different size of
300 * the reg address cell, we shall fix that by killing struct
301 * reg_property and using some accessor functions instead
302 */
303 hose->cfg_data = (volatile unsigned char *)ioremap(0xf2000000, 0x02000000);
304
305 hose->first_busno = 0;
306 hose->last_busno = 0xef;
307
308 u3_ht = hose;
309}
310
311static int __init add_bridge(struct device_node *dev)
312{
313 int len;
314 struct pci_controller *hose;
315 char* disp_name;
316 int *bus_range;
317 int primary = 1;
318 struct property *of_prop;
319
320 DBG("Adding PCI host bridge %s\n", dev->full_name);
321
322 bus_range = (int *) get_property(dev, "bus-range", &len);
323 if (bus_range == NULL || len < 2 * sizeof(int)) {
324 printk(KERN_WARNING "Can't get bus-range for %s, assume bus 0\n",
325 dev->full_name);
326 }
327
328 hose = alloc_bootmem(sizeof(struct pci_controller));
329 if (hose == NULL)
330 return -ENOMEM;
331 pci_setup_pci_controller(hose);
332
333 hose->arch_data = dev;
334 hose->first_busno = bus_range ? bus_range[0] : 0;
335 hose->last_busno = bus_range ? bus_range[1] : 0xff;
336
337 of_prop = alloc_bootmem(sizeof(struct property) +
338 sizeof(hose->global_number));
339 if (of_prop) {
340 memset(of_prop, 0, sizeof(struct property));
341 of_prop->name = "linux,pci-domain";
342 of_prop->length = sizeof(hose->global_number);
343 of_prop->value = (unsigned char *)&of_prop[1];
344 memcpy(of_prop->value, &hose->global_number, sizeof(hose->global_number));
345 prom_add_property(dev, of_prop);
346 }
347
348 disp_name = NULL;
349 if (device_is_compatible(dev, "u3-agp")) {
350 setup_u3_agp(hose);
351 disp_name = "U3-AGP";
352 primary = 0;
353 } else if (device_is_compatible(dev, "u3-ht")) {
354 setup_u3_ht(hose);
355 disp_name = "U3-HT";
356 primary = 1;
357 }
358 printk(KERN_INFO "Found %s PCI host bridge. Firmware bus number: %d->%d\n",
359 disp_name, hose->first_busno, hose->last_busno);
360
361 /* Interpret the "ranges" property */
362 /* This also maps the I/O region and sets isa_io/mem_base */
363 pci_process_bridge_OF_ranges(hose, dev);
364 pci_setup_phb_io(hose, primary);
365
366 /* Fixup "bus-range" OF property */
367 fixup_bus_range(dev);
368
369 return 0;
370}
371
372
373void __init maple_pcibios_fixup(void)
374{
375 struct pci_dev *dev = NULL;
376
377 DBG(" -> maple_pcibios_fixup\n");
378
379 for_each_pci_dev(dev)
380 pci_read_irq_line(dev);
381
382 /* Do the mapping of the IO space */
383 phbs_remap_io();
384
385 DBG(" <- maple_pcibios_fixup\n");
386}
387
388static void __init maple_fixup_phb_resources(void)
389{
390 struct pci_controller *hose, *tmp;
391
392 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
393 unsigned long offset = (unsigned long)hose->io_base_virt - pci_io_base;
394 hose->io_resource.start += offset;
395 hose->io_resource.end += offset;
396 printk(KERN_INFO "PCI Host %d, io start: %lx; io end: %lx\n",
397 hose->global_number,
398 hose->io_resource.start, hose->io_resource.end);
399 }
400}
401
402void __init maple_pci_init(void)
403{
404 struct device_node *np, *root;
405 struct device_node *ht = NULL;
406
407 /* Probe root PCI hosts, that is on U3 the AGP host and the
408 * HyperTransport host. That one is actually "kept" around
409 * and actually added last as it's resource management relies
410 * on the AGP resources to have been setup first
411 */
412 root = of_find_node_by_path("/");
413 if (root == NULL) {
414 printk(KERN_CRIT "maple_find_bridges: can't find root of device tree\n");
415 return;
416 }
417 for (np = NULL; (np = of_get_next_child(root, np)) != NULL;) {
418 if (np->name == NULL)
419 continue;
420 if (strcmp(np->name, "pci") == 0) {
421 if (add_bridge(np) == 0)
422 of_node_get(np);
423 }
424 if (strcmp(np->name, "ht") == 0) {
425 of_node_get(np);
426 ht = np;
427 }
428 }
429 of_node_put(root);
430
431 /* Now setup the HyperTransport host if we found any
432 */
433 if (ht && add_bridge(ht) != 0)
434 of_node_put(ht);
435
436 /* Fixup the IO resources on our host bridges as the common code
437 * does it only for childs of the host bridges
438 */
439 maple_fixup_phb_resources();
440
441 /* Setup the linkage between OF nodes and PHBs */
442 pci_devs_phb_init();
443
444 /* Fixup the PCI<->OF mapping for U3 AGP due to bus renumbering. We
445 * assume there is no P2P bridge on the AGP bus, which should be a
446 * safe assumptions hopefully.
447 */
448 if (u3_agp) {
449 struct device_node *np = u3_agp->arch_data;
450 np->busno = 0xf0;
451 for (np = np->child; np; np = np->sibling)
452 np->busno = 0xf0;
453 }
454
455 /* Tell pci.c to use the common resource allocation mecanism */
456 pci_probe_only = 0;
457
458 /* Allow all IO */
459 io_page_mask = -1;
460}
461
462int maple_pci_get_legacy_ide_irq(struct pci_dev *pdev, int channel)
463{
464 struct device_node *np;
465 int irq = channel ? 15 : 14;
466
467 if (pdev->vendor != PCI_VENDOR_ID_AMD ||
468 pdev->device != PCI_DEVICE_ID_AMD_8111_IDE)
469 return irq;
470
471 np = pci_device_to_OF_node(pdev);
472 if (np == NULL)
473 return irq;
474 if (np->n_intrs < 2)
475 return irq;
476 return np->intrs[channel & 0x1].line;
477}
478
479/* XXX: To remove once all firmwares are ok */
480static void fixup_maple_ide(struct pci_dev* dev)
481{
482#if 0 /* Enable this to enable IDE port 0 */
483 {
484 u8 v;
485
486 pci_read_config_byte(dev, 0x40, &v);
487 v |= 2;
488 pci_write_config_byte(dev, 0x40, v);
489 }
490#endif
491#if 0 /* fix bus master base */
492 pci_write_config_dword(dev, 0x20, 0xcc01);
493 printk("old ide resource: %lx -> %lx \n",
494 dev->resource[4].start, dev->resource[4].end);
495 dev->resource[4].start = 0xcc00;
496 dev->resource[4].end = 0xcc10;
497#endif
498#if 1 /* Enable this to fixup IDE sense/polarity of irqs in IO-APICs */
499 {
500 struct pci_dev *apicdev;
501 u32 v;
502
503 apicdev = pci_get_slot (dev->bus, PCI_DEVFN(5,0));
504 if (apicdev == NULL)
505 printk("IDE Fixup IRQ: Can't find IO-APIC !\n");
506 else {
507 pci_write_config_byte(apicdev, 0xf2, 0x10 + 2*14);
508 pci_read_config_dword(apicdev, 0xf4, &v);
509 v &= ~0x00000022;
510 pci_write_config_dword(apicdev, 0xf4, v);
511 pci_write_config_byte(apicdev, 0xf2, 0x10 + 2*15);
512 pci_read_config_dword(apicdev, 0xf4, &v);
513 v &= ~0x00000022;
514 pci_write_config_dword(apicdev, 0xf4, v);
515 pci_dev_put(apicdev);
516 }
517 }
518#endif
519}
520DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_8111_IDE,
521 fixup_maple_ide);
diff --git a/arch/ppc64/kernel/maple_setup.c b/arch/ppc64/kernel/maple_setup.c
new file mode 100644
index 000000000000..1db6ea0f336f
--- /dev/null
+++ b/arch/ppc64/kernel/maple_setup.c
@@ -0,0 +1,240 @@
1/*
2 * arch/ppc64/kernel/maple_setup.c
3 *
4 * (c) Copyright 2004 Benjamin Herrenschmidt (benh@kernel.crashing.org),
5 * IBM Corp.
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 *
12 */
13
14#define DEBUG
15
16#include <linux/config.h>
17#include <linux/init.h>
18#include <linux/errno.h>
19#include <linux/sched.h>
20#include <linux/kernel.h>
21#include <linux/mm.h>
22#include <linux/stddef.h>
23#include <linux/unistd.h>
24#include <linux/ptrace.h>
25#include <linux/slab.h>
26#include <linux/user.h>
27#include <linux/a.out.h>
28#include <linux/tty.h>
29#include <linux/string.h>
30#include <linux/delay.h>
31#include <linux/ioport.h>
32#include <linux/major.h>
33#include <linux/initrd.h>
34#include <linux/vt_kern.h>
35#include <linux/console.h>
36#include <linux/ide.h>
37#include <linux/pci.h>
38#include <linux/adb.h>
39#include <linux/cuda.h>
40#include <linux/pmu.h>
41#include <linux/irq.h>
42#include <linux/seq_file.h>
43#include <linux/root_dev.h>
44#include <linux/serial.h>
45#include <linux/smp.h>
46
47#include <asm/processor.h>
48#include <asm/sections.h>
49#include <asm/prom.h>
50#include <asm/system.h>
51#include <asm/pgtable.h>
52#include <asm/bitops.h>
53#include <asm/io.h>
54#include <asm/pci-bridge.h>
55#include <asm/iommu.h>
56#include <asm/machdep.h>
57#include <asm/dma.h>
58#include <asm/cputable.h>
59#include <asm/time.h>
60#include <asm/of_device.h>
61#include <asm/lmb.h>
62
63#include "mpic.h"
64
65#ifdef DEBUG
66#define DBG(fmt...) udbg_printf(fmt)
67#else
68#define DBG(fmt...)
69#endif
70
71extern int maple_set_rtc_time(struct rtc_time *tm);
72extern void maple_get_rtc_time(struct rtc_time *tm);
73extern void maple_get_boot_time(struct rtc_time *tm);
74extern void maple_calibrate_decr(void);
75extern void maple_pci_init(void);
76extern void maple_pcibios_fixup(void);
77extern int maple_pci_get_legacy_ide_irq(struct pci_dev *dev, int channel);
78extern void generic_find_legacy_serial_ports(u64 *physport,
79 unsigned int *default_speed);
80
81
82static void maple_restart(char *cmd)
83{
84}
85
86static void maple_power_off(void)
87{
88}
89
90static void maple_halt(void)
91{
92}
93
94#ifdef CONFIG_SMP
95struct smp_ops_t maple_smp_ops = {
96 .probe = smp_mpic_probe,
97 .message_pass = smp_mpic_message_pass,
98 .kick_cpu = smp_generic_kick_cpu,
99 .setup_cpu = smp_mpic_setup_cpu,
100 .give_timebase = smp_generic_give_timebase,
101 .take_timebase = smp_generic_take_timebase,
102};
103#endif /* CONFIG_SMP */
104
105void __init maple_setup_arch(void)
106{
107 /* init to some ~sane value until calibrate_delay() runs */
108 loops_per_jiffy = 50000000;
109
110 /* Setup SMP callback */
111#ifdef CONFIG_SMP
112 smp_ops = &maple_smp_ops;
113#endif
114 /* Lookup PCI hosts */
115 maple_pci_init();
116
117#ifdef CONFIG_DUMMY_CONSOLE
118 conswitchp = &dummy_con;
119#endif
120}
121
122/*
123 * Early initialization.
124 */
125static void __init maple_init_early(void)
126{
127 unsigned int default_speed;
128 u64 physport;
129
130 DBG(" -> maple_init_early\n");
131
132 /* Initialize hash table, from now on, we can take hash faults
133 * and call ioremap
134 */
135 hpte_init_native();
136
137 /* Find the serial port */
138 generic_find_legacy_serial_ports(&physport, &default_speed);
139
140 DBG("phys port addr: %lx\n", (long)physport);
141
142 if (physport) {
143 void *comport;
144 /* Map the uart for udbg. */
145 comport = (void *)__ioremap(physport, 16, _PAGE_NO_CACHE);
146 udbg_init_uart(comport, default_speed);
147
148 ppc_md.udbg_putc = udbg_putc;
149 ppc_md.udbg_getc = udbg_getc;
150 ppc_md.udbg_getc_poll = udbg_getc_poll;
151 DBG("Hello World !\n");
152 }
153
154 /* Setup interrupt mapping options */
155 ppc64_interrupt_controller = IC_OPEN_PIC;
156
157 iommu_init_early_u3();
158
159 DBG(" <- maple_init_early\n");
160}
161
162
163static __init void maple_init_IRQ(void)
164{
165 struct device_node *root;
166 unsigned int *opprop;
167 unsigned long opic_addr;
168 struct mpic *mpic;
169 unsigned char senses[128];
170 int n;
171
172 DBG(" -> maple_init_IRQ\n");
173
174 /* XXX: Non standard, replace that with a proper openpic/mpic node
175 * in the device-tree. Find the Open PIC if present */
176 root = of_find_node_by_path("/");
177 opprop = (unsigned int *) get_property(root,
178 "platform-open-pic", NULL);
179 if (opprop == 0)
180 panic("OpenPIC not found !\n");
181
182 n = prom_n_addr_cells(root);
183 for (opic_addr = 0; n > 0; --n)
184 opic_addr = (opic_addr << 32) + *opprop++;
185 of_node_put(root);
186
187 /* Obtain sense values from device-tree */
188 prom_get_irq_senses(senses, 0, 128);
189
190 mpic = mpic_alloc(opic_addr,
191 MPIC_PRIMARY | MPIC_BIG_ENDIAN |
192 MPIC_BROKEN_U3 | MPIC_WANTS_RESET,
193 0, 0, 128, 128, senses, 128, "U3-MPIC");
194 BUG_ON(mpic == NULL);
195 mpic_init(mpic);
196
197 DBG(" <- maple_init_IRQ\n");
198}
199
200static void __init maple_progress(char *s, unsigned short hex)
201{
202 printk("*** %04x : %s\n", hex, s ? s : "");
203}
204
205
206/*
207 * Called very early, MMU is off, device-tree isn't unflattened
208 */
209static int __init maple_probe(int platform)
210{
211 if (platform != PLATFORM_MAPLE)
212 return 0;
213 /*
214 * On U3, the DART (iommu) must be allocated now since it
215 * has an impact on htab_initialize (due to the large page it
216 * occupies having to be broken up so the DART itself is not
217 * part of the cacheable linar mapping
218 */
219 alloc_u3_dart_table();
220
221 return 1;
222}
223
224struct machdep_calls __initdata maple_md = {
225 .probe = maple_probe,
226 .setup_arch = maple_setup_arch,
227 .init_early = maple_init_early,
228 .init_IRQ = maple_init_IRQ,
229 .get_irq = mpic_get_irq,
230 .pcibios_fixup = maple_pcibios_fixup,
231 .pci_get_legacy_ide_irq = maple_pci_get_legacy_ide_irq,
232 .restart = maple_restart,
233 .power_off = maple_power_off,
234 .halt = maple_halt,
235 .get_boot_time = maple_get_boot_time,
236 .set_rtc_time = maple_set_rtc_time,
237 .get_rtc_time = maple_get_rtc_time,
238 .calibrate_decr = maple_calibrate_decr,
239 .progress = maple_progress,
240};
diff --git a/arch/ppc64/kernel/maple_time.c b/arch/ppc64/kernel/maple_time.c
new file mode 100644
index 000000000000..07ce7895b43d
--- /dev/null
+++ b/arch/ppc64/kernel/maple_time.c
@@ -0,0 +1,226 @@
1/*
2 * arch/ppc64/kernel/maple_time.c
3 *
4 * (c) Copyright 2004 Benjamin Herrenschmidt (benh@kernel.crashing.org),
5 * IBM Corp.
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 *
12 */
13
14#undef DEBUG
15
16#include <linux/config.h>
17#include <linux/errno.h>
18#include <linux/sched.h>
19#include <linux/kernel.h>
20#include <linux/param.h>
21#include <linux/string.h>
22#include <linux/mm.h>
23#include <linux/init.h>
24#include <linux/time.h>
25#include <linux/adb.h>
26#include <linux/pmu.h>
27#include <linux/interrupt.h>
28#include <linux/mc146818rtc.h>
29#include <linux/bcd.h>
30
31#include <asm/sections.h>
32#include <asm/prom.h>
33#include <asm/system.h>
34#include <asm/io.h>
35#include <asm/pgtable.h>
36#include <asm/machdep.h>
37#include <asm/time.h>
38
39#ifdef DEBUG
40#define DBG(x...) printk(x)
41#else
42#define DBG(x...)
43#endif
44
45extern void setup_default_decr(void);
46extern void GregorianDay(struct rtc_time * tm);
47
48extern unsigned long ppc_tb_freq;
49extern unsigned long ppc_proc_freq;
50static int maple_rtc_addr;
51
52static int maple_clock_read(int addr)
53{
54 outb_p(addr, maple_rtc_addr);
55 return inb_p(maple_rtc_addr+1);
56}
57
58static void maple_clock_write(unsigned long val, int addr)
59{
60 outb_p(addr, maple_rtc_addr);
61 outb_p(val, maple_rtc_addr+1);
62}
63
64void maple_get_rtc_time(struct rtc_time *tm)
65{
66 int uip, i;
67
68 /* The Linux interpretation of the CMOS clock register contents:
69 * When the Update-In-Progress (UIP) flag goes from 1 to 0, the
70 * RTC registers show the second which has precisely just started.
71 * Let's hope other operating systems interpret the RTC the same way.
72 */
73
74 /* Since the UIP flag is set for about 2.2 ms and the clock
75 * is typically written with a precision of 1 jiffy, trying
76 * to obtain a precision better than a few milliseconds is
77 * an illusion. Only consistency is interesting, this also
78 * allows to use the routine for /dev/rtc without a potential
79 * 1 second kernel busy loop triggered by any reader of /dev/rtc.
80 */
81
82 for (i = 0; i<1000000; i++) {
83 uip = maple_clock_read(RTC_FREQ_SELECT);
84 tm->tm_sec = maple_clock_read(RTC_SECONDS);
85 tm->tm_min = maple_clock_read(RTC_MINUTES);
86 tm->tm_hour = maple_clock_read(RTC_HOURS);
87 tm->tm_mday = maple_clock_read(RTC_DAY_OF_MONTH);
88 tm->tm_mon = maple_clock_read(RTC_MONTH);
89 tm->tm_year = maple_clock_read(RTC_YEAR);
90 uip |= maple_clock_read(RTC_FREQ_SELECT);
91 if ((uip & RTC_UIP)==0)
92 break;
93 }
94
95 if (!(maple_clock_read(RTC_CONTROL) & RTC_DM_BINARY)
96 || RTC_ALWAYS_BCD) {
97 BCD_TO_BIN(tm->tm_sec);
98 BCD_TO_BIN(tm->tm_min);
99 BCD_TO_BIN(tm->tm_hour);
100 BCD_TO_BIN(tm->tm_mday);
101 BCD_TO_BIN(tm->tm_mon);
102 BCD_TO_BIN(tm->tm_year);
103 }
104 if ((tm->tm_year + 1900) < 1970)
105 tm->tm_year += 100;
106
107 GregorianDay(tm);
108}
109
110int maple_set_rtc_time(struct rtc_time *tm)
111{
112 unsigned char save_control, save_freq_select;
113 int sec, min, hour, mon, mday, year;
114
115 spin_lock(&rtc_lock);
116
117 save_control = maple_clock_read(RTC_CONTROL); /* tell the clock it's being set */
118
119 maple_clock_write((save_control|RTC_SET), RTC_CONTROL);
120
121 save_freq_select = maple_clock_read(RTC_FREQ_SELECT); /* stop and reset prescaler */
122
123 maple_clock_write((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT);
124
125 sec = tm->tm_sec;
126 min = tm->tm_min;
127 hour = tm->tm_hour;
128 mon = tm->tm_mon;
129 mday = tm->tm_mday;
130 year = tm->tm_year;
131
132 if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
133 BIN_TO_BCD(sec);
134 BIN_TO_BCD(min);
135 BIN_TO_BCD(hour);
136 BIN_TO_BCD(mon);
137 BIN_TO_BCD(mday);
138 BIN_TO_BCD(year);
139 }
140 maple_clock_write(sec, RTC_SECONDS);
141 maple_clock_write(min, RTC_MINUTES);
142 maple_clock_write(hour, RTC_HOURS);
143 maple_clock_write(mon, RTC_MONTH);
144 maple_clock_write(mday, RTC_DAY_OF_MONTH);
145 maple_clock_write(year, RTC_YEAR);
146
147 /* The following flags have to be released exactly in this order,
148 * otherwise the DS12887 (popular MC146818A clone with integrated
149 * battery and quartz) will not reset the oscillator and will not
150 * update precisely 500 ms later. You won't find this mentioned in
151 * the Dallas Semiconductor data sheets, but who believes data
152 * sheets anyway ... -- Markus Kuhn
153 */
154 maple_clock_write(save_control, RTC_CONTROL);
155 maple_clock_write(save_freq_select, RTC_FREQ_SELECT);
156
157 spin_unlock(&rtc_lock);
158
159 return 0;
160}
161
162void __init maple_get_boot_time(struct rtc_time *tm)
163{
164 struct device_node *rtcs;
165
166 rtcs = find_compatible_devices("rtc", "pnpPNP,b00");
167 if (rtcs && rtcs->addrs) {
168 maple_rtc_addr = rtcs->addrs[0].address;
169 printk(KERN_INFO "Maple: Found RTC at 0x%x\n", maple_rtc_addr);
170 } else {
171 maple_rtc_addr = RTC_PORT(0); /* legacy address */
172 printk(KERN_INFO "Maple: No device node for RTC, assuming "
173 "legacy address (0x%x)\n", maple_rtc_addr);
174 }
175
176 maple_get_rtc_time(tm);
177}
178
179/* XXX FIXME: Some sane defaults: 125 MHz timebase, 1GHz processor */
180#define DEFAULT_TB_FREQ 125000000UL
181#define DEFAULT_PROC_FREQ (DEFAULT_TB_FREQ * 8)
182
183void __init maple_calibrate_decr(void)
184{
185 struct device_node *cpu;
186 struct div_result divres;
187 unsigned int *fp = NULL;
188
189 /*
190 * The cpu node should have a timebase-frequency property
191 * to tell us the rate at which the decrementer counts.
192 */
193 cpu = of_find_node_by_type(NULL, "cpu");
194
195 ppc_tb_freq = DEFAULT_TB_FREQ;
196 if (cpu != 0)
197 fp = (unsigned int *)get_property(cpu, "timebase-frequency", NULL);
198 if (fp != NULL)
199 ppc_tb_freq = *fp;
200 else
201 printk(KERN_ERR "WARNING: Estimating decrementer frequency (not found)\n");
202 fp = NULL;
203 ppc_proc_freq = DEFAULT_PROC_FREQ;
204 if (cpu != 0)
205 fp = (unsigned int *)get_property(cpu, "clock-frequency", NULL);
206 if (fp != NULL)
207 ppc_proc_freq = *fp;
208 else
209 printk(KERN_ERR "WARNING: Estimating processor frequency (not found)\n");
210
211 of_node_put(cpu);
212
213 printk(KERN_INFO "time_init: decrementer frequency = %lu.%.6lu MHz\n",
214 ppc_tb_freq/1000000, ppc_tb_freq%1000000);
215 printk(KERN_INFO "time_init: processor frequency = %lu.%.6lu MHz\n",
216 ppc_proc_freq/1000000, ppc_proc_freq%1000000);
217
218 tb_ticks_per_jiffy = ppc_tb_freq / HZ;
219 tb_ticks_per_sec = tb_ticks_per_jiffy * HZ;
220 tb_ticks_per_usec = ppc_tb_freq / 1000000;
221 tb_to_us = mulhwu_scale_factor(ppc_tb_freq, 1000000);
222 div128_by_32(1024*1024, 0, tb_ticks_per_sec, &divres);
223 tb_to_xs = divres.result_low;
224
225 setup_default_decr();
226}
diff --git a/arch/ppc64/kernel/mf.c b/arch/ppc64/kernel/mf.c
new file mode 100644
index 000000000000..1bd52ece497c
--- /dev/null
+++ b/arch/ppc64/kernel/mf.c
@@ -0,0 +1,1239 @@
1/*
2 * mf.c
3 * Copyright (C) 2001 Troy D. Armstrong IBM Corporation
4 * Copyright (C) 2004 Stephen Rothwell IBM Corporation
5 *
6 * This modules exists as an interface between a Linux secondary partition
7 * running on an iSeries and the primary partition's Virtual Service
8 * Processor (VSP) object. The VSP has final authority over powering on/off
9 * all partitions in the iSeries. It also provides miscellaneous low-level
10 * machine facility type operations.
11 *
12 *
13 * This program is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation; either version 2 of the License, or
16 * (at your option) any later version.
17 *
18 * This program is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with this program; if not, write to the Free Software
25 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26 */
27
28#include <linux/types.h>
29#include <linux/errno.h>
30#include <linux/kernel.h>
31#include <linux/init.h>
32#include <linux/completion.h>
33#include <linux/delay.h>
34#include <linux/dma-mapping.h>
35#include <linux/bcd.h>
36
37#include <asm/time.h>
38#include <asm/uaccess.h>
39#include <asm/iSeries/vio.h>
40#include <asm/iSeries/mf.h>
41#include <asm/iSeries/HvLpConfig.h>
42#include <asm/iSeries/ItSpCommArea.h>
43
44/*
45 * This is the structure layout for the Machine Facilites LPAR event
46 * flows.
47 */
48struct vsp_cmd_data {
49 u64 token;
50 u16 cmd;
51 HvLpIndex lp_index;
52 u8 result_code;
53 u32 reserved;
54 union {
55 u64 state; /* GetStateOut */
56 u64 ipl_type; /* GetIplTypeOut, Function02SelectIplTypeIn */
57 u64 ipl_mode; /* GetIplModeOut, Function02SelectIplModeIn */
58 u64 page[4]; /* GetSrcHistoryIn */
59 u64 flag; /* GetAutoIplWhenPrimaryIplsOut,
60 SetAutoIplWhenPrimaryIplsIn,
61 WhiteButtonPowerOffIn,
62 Function08FastPowerOffIn,
63 IsSpcnRackPowerIncompleteOut */
64 struct {
65 u64 token;
66 u64 address_type;
67 u64 side;
68 u32 length;
69 u32 offset;
70 } kern; /* SetKernelImageIn, GetKernelImageIn,
71 SetKernelCmdLineIn, GetKernelCmdLineIn */
72 u32 length_out; /* GetKernelImageOut, GetKernelCmdLineOut */
73 u8 reserved[80];
74 } sub_data;
75};
76
77struct vsp_rsp_data {
78 struct completion com;
79 struct vsp_cmd_data *response;
80};
81
82struct alloc_data {
83 u16 size;
84 u16 type;
85 u32 count;
86 u16 reserved1;
87 u8 reserved2;
88 HvLpIndex target_lp;
89};
90
91struct ce_msg_data;
92
93typedef void (*ce_msg_comp_hdlr)(void *token, struct ce_msg_data *vsp_cmd_rsp);
94
95struct ce_msg_comp_data {
96 ce_msg_comp_hdlr handler;
97 void *token;
98};
99
100struct ce_msg_data {
101 u8 ce_msg[12];
102 char reserved[4];
103 struct ce_msg_comp_data *completion;
104};
105
106struct io_mf_lp_event {
107 struct HvLpEvent hp_lp_event;
108 u16 subtype_result_code;
109 u16 reserved1;
110 u32 reserved2;
111 union {
112 struct alloc_data alloc;
113 struct ce_msg_data ce_msg;
114 struct vsp_cmd_data vsp_cmd;
115 } data;
116};
117
118#define subtype_data(a, b, c, d) \
119 (((a) << 24) + ((b) << 16) + ((c) << 8) + (d))
120
121/*
122 * All outgoing event traffic is kept on a FIFO queue. The first
123 * pointer points to the one that is outstanding, and all new
124 * requests get stuck on the end. Also, we keep a certain number of
125 * preallocated pending events so that we can operate very early in
126 * the boot up sequence (before kmalloc is ready).
127 */
128struct pending_event {
129 struct pending_event *next;
130 struct io_mf_lp_event event;
131 MFCompleteHandler hdlr;
132 char dma_data[72];
133 unsigned dma_data_length;
134 unsigned remote_address;
135};
136static spinlock_t pending_event_spinlock;
137static struct pending_event *pending_event_head;
138static struct pending_event *pending_event_tail;
139static struct pending_event *pending_event_avail;
140static struct pending_event pending_event_prealloc[16];
141
142/*
143 * Put a pending event onto the available queue, so it can get reused.
144 * Attention! You must have the pending_event_spinlock before calling!
145 */
146static void free_pending_event(struct pending_event *ev)
147{
148 if (ev != NULL) {
149 ev->next = pending_event_avail;
150 pending_event_avail = ev;
151 }
152}
153
154/*
155 * Enqueue the outbound event onto the stack. If the queue was
156 * empty to begin with, we must also issue it via the Hypervisor
157 * interface. There is a section of code below that will touch
158 * the first stack pointer without the protection of the pending_event_spinlock.
159 * This is OK, because we know that nobody else will be modifying
160 * the first pointer when we do this.
161 */
162static int signal_event(struct pending_event *ev)
163{
164 int rc = 0;
165 unsigned long flags;
166 int go = 1;
167 struct pending_event *ev1;
168 HvLpEvent_Rc hv_rc;
169
170 /* enqueue the event */
171 if (ev != NULL) {
172 ev->next = NULL;
173 spin_lock_irqsave(&pending_event_spinlock, flags);
174 if (pending_event_head == NULL)
175 pending_event_head = ev;
176 else {
177 go = 0;
178 pending_event_tail->next = ev;
179 }
180 pending_event_tail = ev;
181 spin_unlock_irqrestore(&pending_event_spinlock, flags);
182 }
183
184 /* send the event */
185 while (go) {
186 go = 0;
187
188 /* any DMA data to send beforehand? */
189 if (pending_event_head->dma_data_length > 0)
190 HvCallEvent_dmaToSp(pending_event_head->dma_data,
191 pending_event_head->remote_address,
192 pending_event_head->dma_data_length,
193 HvLpDma_Direction_LocalToRemote);
194
195 hv_rc = HvCallEvent_signalLpEvent(
196 &pending_event_head->event.hp_lp_event);
197 if (hv_rc != HvLpEvent_Rc_Good) {
198 printk(KERN_ERR "mf.c: HvCallEvent_signalLpEvent() "
199 "failed with %d\n", (int)hv_rc);
200
201 spin_lock_irqsave(&pending_event_spinlock, flags);
202 ev1 = pending_event_head;
203 pending_event_head = pending_event_head->next;
204 if (pending_event_head != NULL)
205 go = 1;
206 spin_unlock_irqrestore(&pending_event_spinlock, flags);
207
208 if (ev1 == ev)
209 rc = -EIO;
210 else if (ev1->hdlr != NULL)
211 (*ev1->hdlr)((void *)ev1->event.hp_lp_event.xCorrelationToken, -EIO);
212
213 spin_lock_irqsave(&pending_event_spinlock, flags);
214 free_pending_event(ev1);
215 spin_unlock_irqrestore(&pending_event_spinlock, flags);
216 }
217 }
218
219 return rc;
220}
221
222/*
223 * Allocate a new pending_event structure, and initialize it.
224 */
225static struct pending_event *new_pending_event(void)
226{
227 struct pending_event *ev = NULL;
228 HvLpIndex primary_lp = HvLpConfig_getPrimaryLpIndex();
229 unsigned long flags;
230 struct HvLpEvent *hev;
231
232 spin_lock_irqsave(&pending_event_spinlock, flags);
233 if (pending_event_avail != NULL) {
234 ev = pending_event_avail;
235 pending_event_avail = pending_event_avail->next;
236 }
237 spin_unlock_irqrestore(&pending_event_spinlock, flags);
238 if (ev == NULL) {
239 ev = kmalloc(sizeof(struct pending_event), GFP_ATOMIC);
240 if (ev == NULL) {
241 printk(KERN_ERR "mf.c: unable to kmalloc %ld bytes\n",
242 sizeof(struct pending_event));
243 return NULL;
244 }
245 }
246 memset(ev, 0, sizeof(struct pending_event));
247 hev = &ev->event.hp_lp_event;
248 hev->xFlags.xValid = 1;
249 hev->xFlags.xAckType = HvLpEvent_AckType_ImmediateAck;
250 hev->xFlags.xAckInd = HvLpEvent_AckInd_DoAck;
251 hev->xFlags.xFunction = HvLpEvent_Function_Int;
252 hev->xType = HvLpEvent_Type_MachineFac;
253 hev->xSourceLp = HvLpConfig_getLpIndex();
254 hev->xTargetLp = primary_lp;
255 hev->xSizeMinus1 = sizeof(ev->event) - 1;
256 hev->xRc = HvLpEvent_Rc_Good;
257 hev->xSourceInstanceId = HvCallEvent_getSourceLpInstanceId(primary_lp,
258 HvLpEvent_Type_MachineFac);
259 hev->xTargetInstanceId = HvCallEvent_getTargetLpInstanceId(primary_lp,
260 HvLpEvent_Type_MachineFac);
261
262 return ev;
263}
264
265static int signal_vsp_instruction(struct vsp_cmd_data *vsp_cmd)
266{
267 struct pending_event *ev = new_pending_event();
268 int rc;
269 struct vsp_rsp_data response;
270
271 if (ev == NULL)
272 return -ENOMEM;
273
274 init_completion(&response.com);
275 response.response = vsp_cmd;
276 ev->event.hp_lp_event.xSubtype = 6;
277 ev->event.hp_lp_event.x.xSubtypeData =
278 subtype_data('M', 'F', 'V', 'I');
279 ev->event.data.vsp_cmd.token = (u64)&response;
280 ev->event.data.vsp_cmd.cmd = vsp_cmd->cmd;
281 ev->event.data.vsp_cmd.lp_index = HvLpConfig_getLpIndex();
282 ev->event.data.vsp_cmd.result_code = 0xFF;
283 ev->event.data.vsp_cmd.reserved = 0;
284 memcpy(&(ev->event.data.vsp_cmd.sub_data),
285 &(vsp_cmd->sub_data), sizeof(vsp_cmd->sub_data));
286 mb();
287
288 rc = signal_event(ev);
289 if (rc == 0)
290 wait_for_completion(&response.com);
291 return rc;
292}
293
294
295/*
296 * Send a 12-byte CE message to the primary partition VSP object
297 */
298static int signal_ce_msg(char *ce_msg, struct ce_msg_comp_data *completion)
299{
300 struct pending_event *ev = new_pending_event();
301
302 if (ev == NULL)
303 return -ENOMEM;
304
305 ev->event.hp_lp_event.xSubtype = 0;
306 ev->event.hp_lp_event.x.xSubtypeData =
307 subtype_data('M', 'F', 'C', 'E');
308 memcpy(ev->event.data.ce_msg.ce_msg, ce_msg, 12);
309 ev->event.data.ce_msg.completion = completion;
310 return signal_event(ev);
311}
312
313/*
314 * Send a 12-byte CE message (with no data) to the primary partition VSP object
315 */
316static int signal_ce_msg_simple(u8 ce_op, struct ce_msg_comp_data *completion)
317{
318 u8 ce_msg[12];
319
320 memset(ce_msg, 0, sizeof(ce_msg));
321 ce_msg[3] = ce_op;
322 return signal_ce_msg(ce_msg, completion);
323}
324
325/*
326 * Send a 12-byte CE message and DMA data to the primary partition VSP object
327 */
328static int dma_and_signal_ce_msg(char *ce_msg,
329 struct ce_msg_comp_data *completion, void *dma_data,
330 unsigned dma_data_length, unsigned remote_address)
331{
332 struct pending_event *ev = new_pending_event();
333
334 if (ev == NULL)
335 return -ENOMEM;
336
337 ev->event.hp_lp_event.xSubtype = 0;
338 ev->event.hp_lp_event.x.xSubtypeData =
339 subtype_data('M', 'F', 'C', 'E');
340 memcpy(ev->event.data.ce_msg.ce_msg, ce_msg, 12);
341 ev->event.data.ce_msg.completion = completion;
342 memcpy(ev->dma_data, dma_data, dma_data_length);
343 ev->dma_data_length = dma_data_length;
344 ev->remote_address = remote_address;
345 return signal_event(ev);
346}
347
348/*
349 * Initiate a nice (hopefully) shutdown of Linux. We simply are
350 * going to try and send the init process a SIGINT signal. If
351 * this fails (why?), we'll simply force it off in a not-so-nice
352 * manner.
353 */
354static int shutdown(void)
355{
356 int rc = kill_proc(1, SIGINT, 1);
357
358 if (rc) {
359 printk(KERN_ALERT "mf.c: SIGINT to init failed (%d), "
360 "hard shutdown commencing\n", rc);
361 mf_power_off();
362 } else
363 printk(KERN_INFO "mf.c: init has been successfully notified "
364 "to proceed with shutdown\n");
365 return rc;
366}
367
368/*
369 * The primary partition VSP object is sending us a new
370 * event flow. Handle it...
371 */
372static void handle_int(struct io_mf_lp_event *event)
373{
374 struct ce_msg_data *ce_msg_data;
375 struct ce_msg_data *pce_msg_data;
376 unsigned long flags;
377 struct pending_event *pev;
378
379 /* ack the interrupt */
380 event->hp_lp_event.xRc = HvLpEvent_Rc_Good;
381 HvCallEvent_ackLpEvent(&event->hp_lp_event);
382
383 /* process interrupt */
384 switch (event->hp_lp_event.xSubtype) {
385 case 0: /* CE message */
386 ce_msg_data = &event->data.ce_msg;
387 switch (ce_msg_data->ce_msg[3]) {
388 case 0x5B: /* power control notification */
389 if ((ce_msg_data->ce_msg[5] & 0x20) != 0) {
390 printk(KERN_INFO "mf.c: Commencing partition shutdown\n");
391 if (shutdown() == 0)
392 signal_ce_msg_simple(0xDB, NULL);
393 }
394 break;
395 case 0xC0: /* get time */
396 spin_lock_irqsave(&pending_event_spinlock, flags);
397 pev = pending_event_head;
398 if (pev != NULL)
399 pending_event_head = pending_event_head->next;
400 spin_unlock_irqrestore(&pending_event_spinlock, flags);
401 if (pev == NULL)
402 break;
403 pce_msg_data = &pev->event.data.ce_msg;
404 if (pce_msg_data->ce_msg[3] != 0x40)
405 break;
406 if (pce_msg_data->completion != NULL) {
407 ce_msg_comp_hdlr handler =
408 pce_msg_data->completion->handler;
409 void *token = pce_msg_data->completion->token;
410
411 if (handler != NULL)
412 (*handler)(token, ce_msg_data);
413 }
414 spin_lock_irqsave(&pending_event_spinlock, flags);
415 free_pending_event(pev);
416 spin_unlock_irqrestore(&pending_event_spinlock, flags);
417 /* send next waiting event */
418 if (pending_event_head != NULL)
419 signal_event(NULL);
420 break;
421 }
422 break;
423 case 1: /* IT sys shutdown */
424 printk(KERN_INFO "mf.c: Commencing system shutdown\n");
425 shutdown();
426 break;
427 }
428}
429
430/*
431 * The primary partition VSP object is acknowledging the receipt
432 * of a flow we sent to them. If there are other flows queued
433 * up, we must send another one now...
434 */
435static void handle_ack(struct io_mf_lp_event *event)
436{
437 unsigned long flags;
438 struct pending_event *two = NULL;
439 unsigned long free_it = 0;
440 struct ce_msg_data *ce_msg_data;
441 struct ce_msg_data *pce_msg_data;
442 struct vsp_rsp_data *rsp;
443
444 /* handle current event */
445 if (pending_event_head == NULL) {
446 printk(KERN_ERR "mf.c: stack empty for receiving ack\n");
447 return;
448 }
449
450 switch (event->hp_lp_event.xSubtype) {
451 case 0: /* CE msg */
452 ce_msg_data = &event->data.ce_msg;
453 if (ce_msg_data->ce_msg[3] != 0x40) {
454 free_it = 1;
455 break;
456 }
457 if (ce_msg_data->ce_msg[2] == 0)
458 break;
459 free_it = 1;
460 pce_msg_data = &pending_event_head->event.data.ce_msg;
461 if (pce_msg_data->completion != NULL) {
462 ce_msg_comp_hdlr handler =
463 pce_msg_data->completion->handler;
464 void *token = pce_msg_data->completion->token;
465
466 if (handler != NULL)
467 (*handler)(token, ce_msg_data);
468 }
469 break;
470 case 4: /* allocate */
471 case 5: /* deallocate */
472 if (pending_event_head->hdlr != NULL)
473 (*pending_event_head->hdlr)((void *)event->hp_lp_event.xCorrelationToken, event->data.alloc.count);
474 free_it = 1;
475 break;
476 case 6:
477 free_it = 1;
478 rsp = (struct vsp_rsp_data *)event->data.vsp_cmd.token;
479 if (rsp == NULL) {
480 printk(KERN_ERR "mf.c: no rsp\n");
481 break;
482 }
483 if (rsp->response != NULL)
484 memcpy(rsp->response, &event->data.vsp_cmd,
485 sizeof(event->data.vsp_cmd));
486 complete(&rsp->com);
487 break;
488 }
489
490 /* remove from queue */
491 spin_lock_irqsave(&pending_event_spinlock, flags);
492 if ((pending_event_head != NULL) && (free_it == 1)) {
493 struct pending_event *oldHead = pending_event_head;
494
495 pending_event_head = pending_event_head->next;
496 two = pending_event_head;
497 free_pending_event(oldHead);
498 }
499 spin_unlock_irqrestore(&pending_event_spinlock, flags);
500
501 /* send next waiting event */
502 if (two != NULL)
503 signal_event(NULL);
504}
505
506/*
507 * This is the generic event handler we are registering with
508 * the Hypervisor. Ensure the flows are for us, and then
509 * parse it enough to know if it is an interrupt or an
510 * acknowledge.
511 */
512static void hv_handler(struct HvLpEvent *event, struct pt_regs *regs)
513{
514 if ((event != NULL) && (event->xType == HvLpEvent_Type_MachineFac)) {
515 switch(event->xFlags.xFunction) {
516 case HvLpEvent_Function_Ack:
517 handle_ack((struct io_mf_lp_event *)event);
518 break;
519 case HvLpEvent_Function_Int:
520 handle_int((struct io_mf_lp_event *)event);
521 break;
522 default:
523 printk(KERN_ERR "mf.c: non ack/int event received\n");
524 break;
525 }
526 } else
527 printk(KERN_ERR "mf.c: alien event received\n");
528}
529
530/*
531 * Global kernel interface to allocate and seed events into the
532 * Hypervisor.
533 */
534void mf_allocate_lp_events(HvLpIndex target_lp, HvLpEvent_Type type,
535 unsigned size, unsigned count, MFCompleteHandler hdlr,
536 void *user_token)
537{
538 struct pending_event *ev = new_pending_event();
539 int rc;
540
541 if (ev == NULL) {
542 rc = -ENOMEM;
543 } else {
544 ev->event.hp_lp_event.xSubtype = 4;
545 ev->event.hp_lp_event.xCorrelationToken = (u64)user_token;
546 ev->event.hp_lp_event.x.xSubtypeData =
547 subtype_data('M', 'F', 'M', 'A');
548 ev->event.data.alloc.target_lp = target_lp;
549 ev->event.data.alloc.type = type;
550 ev->event.data.alloc.size = size;
551 ev->event.data.alloc.count = count;
552 ev->hdlr = hdlr;
553 rc = signal_event(ev);
554 }
555 if ((rc != 0) && (hdlr != NULL))
556 (*hdlr)(user_token, rc);
557}
558EXPORT_SYMBOL(mf_allocate_lp_events);
559
560/*
561 * Global kernel interface to unseed and deallocate events already in
562 * Hypervisor.
563 */
564void mf_deallocate_lp_events(HvLpIndex target_lp, HvLpEvent_Type type,
565 unsigned count, MFCompleteHandler hdlr, void *user_token)
566{
567 struct pending_event *ev = new_pending_event();
568 int rc;
569
570 if (ev == NULL)
571 rc = -ENOMEM;
572 else {
573 ev->event.hp_lp_event.xSubtype = 5;
574 ev->event.hp_lp_event.xCorrelationToken = (u64)user_token;
575 ev->event.hp_lp_event.x.xSubtypeData =
576 subtype_data('M', 'F', 'M', 'D');
577 ev->event.data.alloc.target_lp = target_lp;
578 ev->event.data.alloc.type = type;
579 ev->event.data.alloc.count = count;
580 ev->hdlr = hdlr;
581 rc = signal_event(ev);
582 }
583 if ((rc != 0) && (hdlr != NULL))
584 (*hdlr)(user_token, rc);
585}
586EXPORT_SYMBOL(mf_deallocate_lp_events);
587
588/*
589 * Global kernel interface to tell the VSP object in the primary
590 * partition to power this partition off.
591 */
592void mf_power_off(void)
593{
594 printk(KERN_INFO "mf.c: Down it goes...\n");
595 signal_ce_msg_simple(0x4d, NULL);
596 for (;;)
597 ;
598}
599
600/*
601 * Global kernel interface to tell the VSP object in the primary
602 * partition to reboot this partition.
603 */
604void mf_reboot(void)
605{
606 printk(KERN_INFO "mf.c: Preparing to bounce...\n");
607 signal_ce_msg_simple(0x4e, NULL);
608 for (;;)
609 ;
610}
611
612/*
613 * Display a single word SRC onto the VSP control panel.
614 */
615void mf_display_src(u32 word)
616{
617 u8 ce[12];
618
619 memset(ce, 0, sizeof(ce));
620 ce[3] = 0x4a;
621 ce[7] = 0x01;
622 ce[8] = word >> 24;
623 ce[9] = word >> 16;
624 ce[10] = word >> 8;
625 ce[11] = word;
626 signal_ce_msg(ce, NULL);
627}
628
629/*
630 * Display a single word SRC of the form "PROGXXXX" on the VSP control panel.
631 */
632void mf_display_progress(u16 value)
633{
634 u8 ce[12];
635 u8 src[72];
636
637 memcpy(ce, "\x00\x00\x04\x4A\x00\x00\x00\x48\x00\x00\x00\x00", 12);
638 memcpy(src, "\x01\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00"
639 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
640 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
641 "\x00\x00\x00\x00PROGxxxx ",
642 72);
643 src[6] = value >> 8;
644 src[7] = value & 255;
645 src[44] = "0123456789ABCDEF"[(value >> 12) & 15];
646 src[45] = "0123456789ABCDEF"[(value >> 8) & 15];
647 src[46] = "0123456789ABCDEF"[(value >> 4) & 15];
648 src[47] = "0123456789ABCDEF"[value & 15];
649 dma_and_signal_ce_msg(ce, NULL, src, sizeof(src), 9 * 64 * 1024);
650}
651
652/*
653 * Clear the VSP control panel. Used to "erase" an SRC that was
654 * previously displayed.
655 */
656void mf_clear_src(void)
657{
658 signal_ce_msg_simple(0x4b, NULL);
659}
660
661/*
662 * Initialization code here.
663 */
664void mf_init(void)
665{
666 int i;
667
668 /* initialize */
669 spin_lock_init(&pending_event_spinlock);
670 for (i = 0;
671 i < sizeof(pending_event_prealloc) / sizeof(*pending_event_prealloc);
672 ++i)
673 free_pending_event(&pending_event_prealloc[i]);
674 HvLpEvent_registerHandler(HvLpEvent_Type_MachineFac, &hv_handler);
675
676 /* virtual continue ack */
677 signal_ce_msg_simple(0x57, NULL);
678
679 /* initialization complete */
680 printk(KERN_NOTICE "mf.c: iSeries Linux LPAR Machine Facilities "
681 "initialized\n");
682}
683
684struct rtc_time_data {
685 struct completion com;
686 struct ce_msg_data ce_msg;
687 int rc;
688};
689
690static void get_rtc_time_complete(void *token, struct ce_msg_data *ce_msg)
691{
692 struct rtc_time_data *rtc = token;
693
694 memcpy(&rtc->ce_msg, ce_msg, sizeof(rtc->ce_msg));
695 rtc->rc = 0;
696 complete(&rtc->com);
697}
698
699int mf_get_rtc(struct rtc_time *tm)
700{
701 struct ce_msg_comp_data ce_complete;
702 struct rtc_time_data rtc_data;
703 int rc;
704
705 memset(&ce_complete, 0, sizeof(ce_complete));
706 memset(&rtc_data, 0, sizeof(rtc_data));
707 init_completion(&rtc_data.com);
708 ce_complete.handler = &get_rtc_time_complete;
709 ce_complete.token = &rtc_data;
710 rc = signal_ce_msg_simple(0x40, &ce_complete);
711 if (rc)
712 return rc;
713 wait_for_completion(&rtc_data.com);
714 tm->tm_wday = 0;
715 tm->tm_yday = 0;
716 tm->tm_isdst = 0;
717 if (rtc_data.rc) {
718 tm->tm_sec = 0;
719 tm->tm_min = 0;
720 tm->tm_hour = 0;
721 tm->tm_mday = 15;
722 tm->tm_mon = 5;
723 tm->tm_year = 52;
724 return rtc_data.rc;
725 }
726
727 if ((rtc_data.ce_msg.ce_msg[2] == 0xa9) ||
728 (rtc_data.ce_msg.ce_msg[2] == 0xaf)) {
729 /* TOD clock is not set */
730 tm->tm_sec = 1;
731 tm->tm_min = 1;
732 tm->tm_hour = 1;
733 tm->tm_mday = 10;
734 tm->tm_mon = 8;
735 tm->tm_year = 71;
736 mf_set_rtc(tm);
737 }
738 {
739 u8 *ce_msg = rtc_data.ce_msg.ce_msg;
740 u8 year = ce_msg[5];
741 u8 sec = ce_msg[6];
742 u8 min = ce_msg[7];
743 u8 hour = ce_msg[8];
744 u8 day = ce_msg[10];
745 u8 mon = ce_msg[11];
746
747 BCD_TO_BIN(sec);
748 BCD_TO_BIN(min);
749 BCD_TO_BIN(hour);
750 BCD_TO_BIN(day);
751 BCD_TO_BIN(mon);
752 BCD_TO_BIN(year);
753
754 if (year <= 69)
755 year += 100;
756
757 tm->tm_sec = sec;
758 tm->tm_min = min;
759 tm->tm_hour = hour;
760 tm->tm_mday = day;
761 tm->tm_mon = mon;
762 tm->tm_year = year;
763 }
764
765 return 0;
766}
767
768int mf_set_rtc(struct rtc_time *tm)
769{
770 char ce_time[12];
771 u8 day, mon, hour, min, sec, y1, y2;
772 unsigned year;
773
774 year = 1900 + tm->tm_year;
775 y1 = year / 100;
776 y2 = year % 100;
777
778 sec = tm->tm_sec;
779 min = tm->tm_min;
780 hour = tm->tm_hour;
781 day = tm->tm_mday;
782 mon = tm->tm_mon + 1;
783
784 BIN_TO_BCD(sec);
785 BIN_TO_BCD(min);
786 BIN_TO_BCD(hour);
787 BIN_TO_BCD(mon);
788 BIN_TO_BCD(day);
789 BIN_TO_BCD(y1);
790 BIN_TO_BCD(y2);
791
792 memset(ce_time, 0, sizeof(ce_time));
793 ce_time[3] = 0x41;
794 ce_time[4] = y1;
795 ce_time[5] = y2;
796 ce_time[6] = sec;
797 ce_time[7] = min;
798 ce_time[8] = hour;
799 ce_time[10] = day;
800 ce_time[11] = mon;
801
802 return signal_ce_msg(ce_time, NULL);
803}
804
805#ifdef CONFIG_PROC_FS
806
807static int proc_mf_dump_cmdline(char *page, char **start, off_t off,
808 int count, int *eof, void *data)
809{
810 int len;
811 char *p;
812 struct vsp_cmd_data vsp_cmd;
813 int rc;
814 dma_addr_t dma_addr;
815
816 /* The HV appears to return no more than 256 bytes of command line */
817 if (off >= 256)
818 return 0;
819 if ((off + count) > 256)
820 count = 256 - off;
821
822 dma_addr = dma_map_single(iSeries_vio_dev, page, off + count,
823 DMA_FROM_DEVICE);
824 if (dma_mapping_error(dma_addr))
825 return -ENOMEM;
826 memset(page, 0, off + count);
827 memset(&vsp_cmd, 0, sizeof(vsp_cmd));
828 vsp_cmd.cmd = 33;
829 vsp_cmd.sub_data.kern.token = dma_addr;
830 vsp_cmd.sub_data.kern.address_type = HvLpDma_AddressType_TceIndex;
831 vsp_cmd.sub_data.kern.side = (u64)data;
832 vsp_cmd.sub_data.kern.length = off + count;
833 mb();
834 rc = signal_vsp_instruction(&vsp_cmd);
835 dma_unmap_single(iSeries_vio_dev, dma_addr, off + count,
836 DMA_FROM_DEVICE);
837 if (rc)
838 return rc;
839 if (vsp_cmd.result_code != 0)
840 return -ENOMEM;
841 p = page;
842 len = 0;
843 while (len < (off + count)) {
844 if ((*p == '\0') || (*p == '\n')) {
845 if (*p == '\0')
846 *p = '\n';
847 p++;
848 len++;
849 *eof = 1;
850 break;
851 }
852 p++;
853 len++;
854 }
855
856 if (len < off) {
857 *eof = 1;
858 len = 0;
859 }
860 return len;
861}
862
863#if 0
864static int mf_getVmlinuxChunk(char *buffer, int *size, int offset, u64 side)
865{
866 struct vsp_cmd_data vsp_cmd;
867 int rc;
868 int len = *size;
869 dma_addr_t dma_addr;
870
871 dma_addr = dma_map_single(iSeries_vio_dev, buffer, len,
872 DMA_FROM_DEVICE);
873 memset(buffer, 0, len);
874 memset(&vsp_cmd, 0, sizeof(vsp_cmd));
875 vsp_cmd.cmd = 32;
876 vsp_cmd.sub_data.kern.token = dma_addr;
877 vsp_cmd.sub_data.kern.address_type = HvLpDma_AddressType_TceIndex;
878 vsp_cmd.sub_data.kern.side = side;
879 vsp_cmd.sub_data.kern.offset = offset;
880 vsp_cmd.sub_data.kern.length = len;
881 mb();
882 rc = signal_vsp_instruction(&vsp_cmd);
883 if (rc == 0) {
884 if (vsp_cmd.result_code == 0)
885 *size = vsp_cmd.sub_data.length_out;
886 else
887 rc = -ENOMEM;
888 }
889
890 dma_unmap_single(iSeries_vio_dev, dma_addr, len, DMA_FROM_DEVICE);
891
892 return rc;
893}
894
895static int proc_mf_dump_vmlinux(char *page, char **start, off_t off,
896 int count, int *eof, void *data)
897{
898 int sizeToGet = count;
899
900 if (!capable(CAP_SYS_ADMIN))
901 return -EACCES;
902
903 if (mf_getVmlinuxChunk(page, &sizeToGet, off, (u64)data) == 0) {
904 if (sizeToGet != 0) {
905 *start = page + off;
906 return sizeToGet;
907 }
908 *eof = 1;
909 return 0;
910 }
911 *eof = 1;
912 return 0;
913}
914#endif
915
916static int proc_mf_dump_side(char *page, char **start, off_t off,
917 int count, int *eof, void *data)
918{
919 int len;
920 char mf_current_side = ' ';
921 struct vsp_cmd_data vsp_cmd;
922
923 memset(&vsp_cmd, 0, sizeof(vsp_cmd));
924 vsp_cmd.cmd = 2;
925 vsp_cmd.sub_data.ipl_type = 0;
926 mb();
927
928 if (signal_vsp_instruction(&vsp_cmd) == 0) {
929 if (vsp_cmd.result_code == 0) {
930 switch (vsp_cmd.sub_data.ipl_type) {
931 case 0: mf_current_side = 'A';
932 break;
933 case 1: mf_current_side = 'B';
934 break;
935 case 2: mf_current_side = 'C';
936 break;
937 default: mf_current_side = 'D';
938 break;
939 }
940 }
941 }
942
943 len = sprintf(page, "%c\n", mf_current_side);
944
945 if (len <= (off + count))
946 *eof = 1;
947 *start = page + off;
948 len -= off;
949 if (len > count)
950 len = count;
951 if (len < 0)
952 len = 0;
953 return len;
954}
955
956static int proc_mf_change_side(struct file *file, const char __user *buffer,
957 unsigned long count, void *data)
958{
959 char side;
960 u64 newSide;
961 struct vsp_cmd_data vsp_cmd;
962
963 if (!capable(CAP_SYS_ADMIN))
964 return -EACCES;
965
966 if (count == 0)
967 return 0;
968
969 if (get_user(side, buffer))
970 return -EFAULT;
971
972 switch (side) {
973 case 'A': newSide = 0;
974 break;
975 case 'B': newSide = 1;
976 break;
977 case 'C': newSide = 2;
978 break;
979 case 'D': newSide = 3;
980 break;
981 default:
982 printk(KERN_ERR "mf_proc.c: proc_mf_change_side: invalid side\n");
983 return -EINVAL;
984 }
985
986 memset(&vsp_cmd, 0, sizeof(vsp_cmd));
987 vsp_cmd.sub_data.ipl_type = newSide;
988 vsp_cmd.cmd = 10;
989
990 (void)signal_vsp_instruction(&vsp_cmd);
991
992 return count;
993}
994
995#if 0
996static void mf_getSrcHistory(char *buffer, int size)
997{
998 struct IplTypeReturnStuff return_stuff;
999 struct pending_event *ev = new_pending_event();
1000 int rc = 0;
1001 char *pages[4];
1002
1003 pages[0] = kmalloc(4096, GFP_ATOMIC);
1004 pages[1] = kmalloc(4096, GFP_ATOMIC);
1005 pages[2] = kmalloc(4096, GFP_ATOMIC);
1006 pages[3] = kmalloc(4096, GFP_ATOMIC);
1007 if ((ev == NULL) || (pages[0] == NULL) || (pages[1] == NULL)
1008 || (pages[2] == NULL) || (pages[3] == NULL))
1009 return -ENOMEM;
1010
1011 return_stuff.xType = 0;
1012 return_stuff.xRc = 0;
1013 return_stuff.xDone = 0;
1014 ev->event.hp_lp_event.xSubtype = 6;
1015 ev->event.hp_lp_event.x.xSubtypeData =
1016 subtype_data('M', 'F', 'V', 'I');
1017 ev->event.data.vsp_cmd.xEvent = &return_stuff;
1018 ev->event.data.vsp_cmd.cmd = 4;
1019 ev->event.data.vsp_cmd.lp_index = HvLpConfig_getLpIndex();
1020 ev->event.data.vsp_cmd.result_code = 0xFF;
1021 ev->event.data.vsp_cmd.reserved = 0;
1022 ev->event.data.vsp_cmd.sub_data.page[0] = ISERIES_HV_ADDR(pages[0]);
1023 ev->event.data.vsp_cmd.sub_data.page[1] = ISERIES_HV_ADDR(pages[1]);
1024 ev->event.data.vsp_cmd.sub_data.page[2] = ISERIES_HV_ADDR(pages[2]);
1025 ev->event.data.vsp_cmd.sub_data.page[3] = ISERIES_HV_ADDR(pages[3]);
1026 mb();
1027 if (signal_event(ev) != 0)
1028 return;
1029
1030 while (return_stuff.xDone != 1)
1031 udelay(10);
1032 if (return_stuff.xRc == 0)
1033 memcpy(buffer, pages[0], size);
1034 kfree(pages[0]);
1035 kfree(pages[1]);
1036 kfree(pages[2]);
1037 kfree(pages[3]);
1038}
1039#endif
1040
1041static int proc_mf_dump_src(char *page, char **start, off_t off,
1042 int count, int *eof, void *data)
1043{
1044#if 0
1045 int len;
1046
1047 mf_getSrcHistory(page, count);
1048 len = count;
1049 len -= off;
1050 if (len < count) {
1051 *eof = 1;
1052 if (len <= 0)
1053 return 0;
1054 } else
1055 len = count;
1056 *start = page + off;
1057 return len;
1058#else
1059 return 0;
1060#endif
1061}
1062
1063static int proc_mf_change_src(struct file *file, const char __user *buffer,
1064 unsigned long count, void *data)
1065{
1066 char stkbuf[10];
1067
1068 if (!capable(CAP_SYS_ADMIN))
1069 return -EACCES;
1070
1071 if ((count < 4) && (count != 1)) {
1072 printk(KERN_ERR "mf_proc: invalid src\n");
1073 return -EINVAL;
1074 }
1075
1076 if (count > (sizeof(stkbuf) - 1))
1077 count = sizeof(stkbuf) - 1;
1078 if (copy_from_user(stkbuf, buffer, count))
1079 return -EFAULT;
1080
1081 if ((count == 1) && (*stkbuf == '\0'))
1082 mf_clear_src();
1083 else
1084 mf_display_src(*(u32 *)stkbuf);
1085
1086 return count;
1087}
1088
1089static int proc_mf_change_cmdline(struct file *file, const char __user *buffer,
1090 unsigned long count, void *data)
1091{
1092 struct vsp_cmd_data vsp_cmd;
1093 dma_addr_t dma_addr;
1094 char *page;
1095 int ret = -EACCES;
1096
1097 if (!capable(CAP_SYS_ADMIN))
1098 goto out;
1099
1100 dma_addr = 0;
1101 page = dma_alloc_coherent(iSeries_vio_dev, count, &dma_addr,
1102 GFP_ATOMIC);
1103 ret = -ENOMEM;
1104 if (page == NULL)
1105 goto out;
1106
1107 ret = -EFAULT;
1108 if (copy_from_user(page, buffer, count))
1109 goto out_free;
1110
1111 memset(&vsp_cmd, 0, sizeof(vsp_cmd));
1112 vsp_cmd.cmd = 31;
1113 vsp_cmd.sub_data.kern.token = dma_addr;
1114 vsp_cmd.sub_data.kern.address_type = HvLpDma_AddressType_TceIndex;
1115 vsp_cmd.sub_data.kern.side = (u64)data;
1116 vsp_cmd.sub_data.kern.length = count;
1117 mb();
1118 (void)signal_vsp_instruction(&vsp_cmd);
1119 ret = count;
1120
1121out_free:
1122 dma_free_coherent(iSeries_vio_dev, count, page, dma_addr);
1123out:
1124 return ret;
1125}
1126
1127static ssize_t proc_mf_change_vmlinux(struct file *file,
1128 const char __user *buf,
1129 size_t count, loff_t *ppos)
1130{
1131 struct proc_dir_entry *dp = PDE(file->f_dentry->d_inode);
1132 ssize_t rc;
1133 dma_addr_t dma_addr;
1134 char *page;
1135 struct vsp_cmd_data vsp_cmd;
1136
1137 rc = -EACCES;
1138 if (!capable(CAP_SYS_ADMIN))
1139 goto out;
1140
1141 dma_addr = 0;
1142 page = dma_alloc_coherent(iSeries_vio_dev, count, &dma_addr,
1143 GFP_ATOMIC);
1144 rc = -ENOMEM;
1145 if (page == NULL) {
1146 printk(KERN_ERR "mf.c: couldn't allocate memory to set vmlinux chunk\n");
1147 goto out;
1148 }
1149 rc = -EFAULT;
1150 if (copy_from_user(page, buf, count))
1151 goto out_free;
1152
1153 memset(&vsp_cmd, 0, sizeof(vsp_cmd));
1154 vsp_cmd.cmd = 30;
1155 vsp_cmd.sub_data.kern.token = dma_addr;
1156 vsp_cmd.sub_data.kern.address_type = HvLpDma_AddressType_TceIndex;
1157 vsp_cmd.sub_data.kern.side = (u64)dp->data;
1158 vsp_cmd.sub_data.kern.offset = *ppos;
1159 vsp_cmd.sub_data.kern.length = count;
1160 mb();
1161 rc = signal_vsp_instruction(&vsp_cmd);
1162 if (rc)
1163 goto out_free;
1164 rc = -ENOMEM;
1165 if (vsp_cmd.result_code != 0)
1166 goto out_free;
1167
1168 *ppos += count;
1169 rc = count;
1170out_free:
1171 dma_free_coherent(iSeries_vio_dev, count, page, dma_addr);
1172out:
1173 return rc;
1174}
1175
1176static struct file_operations proc_vmlinux_operations = {
1177 .write = proc_mf_change_vmlinux,
1178};
1179
1180static int __init mf_proc_init(void)
1181{
1182 struct proc_dir_entry *mf_proc_root;
1183 struct proc_dir_entry *ent;
1184 struct proc_dir_entry *mf;
1185 char name[2];
1186 int i;
1187
1188 mf_proc_root = proc_mkdir("iSeries/mf", NULL);
1189 if (!mf_proc_root)
1190 return 1;
1191
1192 name[1] = '\0';
1193 for (i = 0; i < 4; i++) {
1194 name[0] = 'A' + i;
1195 mf = proc_mkdir(name, mf_proc_root);
1196 if (!mf)
1197 return 1;
1198
1199 ent = create_proc_entry("cmdline", S_IFREG|S_IRUSR|S_IWUSR, mf);
1200 if (!ent)
1201 return 1;
1202 ent->nlink = 1;
1203 ent->data = (void *)(long)i;
1204 ent->read_proc = proc_mf_dump_cmdline;
1205 ent->write_proc = proc_mf_change_cmdline;
1206
1207 if (i == 3) /* no vmlinux entry for 'D' */
1208 continue;
1209
1210 ent = create_proc_entry("vmlinux", S_IFREG|S_IWUSR, mf);
1211 if (!ent)
1212 return 1;
1213 ent->nlink = 1;
1214 ent->data = (void *)(long)i;
1215 ent->proc_fops = &proc_vmlinux_operations;
1216 }
1217
1218 ent = create_proc_entry("side", S_IFREG|S_IRUSR|S_IWUSR, mf_proc_root);
1219 if (!ent)
1220 return 1;
1221 ent->nlink = 1;
1222 ent->data = (void *)0;
1223 ent->read_proc = proc_mf_dump_side;
1224 ent->write_proc = proc_mf_change_side;
1225
1226 ent = create_proc_entry("src", S_IFREG|S_IRUSR|S_IWUSR, mf_proc_root);
1227 if (!ent)
1228 return 1;
1229 ent->nlink = 1;
1230 ent->data = (void *)0;
1231 ent->read_proc = proc_mf_dump_src;
1232 ent->write_proc = proc_mf_change_src;
1233
1234 return 0;
1235}
1236
1237__initcall(mf_proc_init);
1238
1239#endif /* CONFIG_PROC_FS */
diff --git a/arch/ppc64/kernel/misc.S b/arch/ppc64/kernel/misc.S
new file mode 100644
index 000000000000..90b41f48d21c
--- /dev/null
+++ b/arch/ppc64/kernel/misc.S
@@ -0,0 +1,1234 @@
1/*
2 * arch/ppc/kernel/misc.S
3 *
4 *
5 *
6 * This file contains miscellaneous low-level functions.
7 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
8 *
9 * Largely rewritten by Cort Dougan (cort@cs.nmt.edu)
10 * and Paul Mackerras.
11 * Adapted for iSeries by Mike Corrigan (mikejc@us.ibm.com)
12 * PPC64 updates by Dave Engebretsen (engebret@us.ibm.com)
13 *
14 * This program is free software; you can redistribute it and/or
15 * modify it under the terms of the GNU General Public License
16 * as published by the Free Software Foundation; either version
17 * 2 of the License, or (at your option) any later version.
18 *
19 */
20
21#include <linux/config.h>
22#include <linux/sys.h>
23#include <asm/unistd.h>
24#include <asm/errno.h>
25#include <asm/processor.h>
26#include <asm/page.h>
27#include <asm/cache.h>
28#include <asm/ppc_asm.h>
29#include <asm/offsets.h>
30#include <asm/cputable.h>
31
32 .text
33
34/*
35 * Returns (address we're running at) - (address we were linked at)
36 * for use before the text and data are mapped to KERNELBASE.
37 */
38
39_GLOBAL(reloc_offset)
40 mflr r0
41 bl 1f
421: mflr r3
43 LOADADDR(r4,1b)
44 sub r3,r4,r3
45 mtlr r0
46 blr
47
48_GLOBAL(get_msr)
49 mfmsr r3
50 blr
51
52_GLOBAL(get_dar)
53 mfdar r3
54 blr
55
56_GLOBAL(get_srr0)
57 mfsrr0 r3
58 blr
59
60_GLOBAL(get_srr1)
61 mfsrr1 r3
62 blr
63
64_GLOBAL(get_sp)
65 mr r3,r1
66 blr
67
68#ifdef CONFIG_PPC_ISERIES
69/* unsigned long local_save_flags(void) */
70_GLOBAL(local_get_flags)
71 lbz r3,PACAPROCENABLED(r13)
72 blr
73
74/* unsigned long local_irq_disable(void) */
75_GLOBAL(local_irq_disable)
76 lbz r3,PACAPROCENABLED(r13)
77 li r4,0
78 stb r4,PACAPROCENABLED(r13)
79 blr /* Done */
80
81/* void local_irq_restore(unsigned long flags) */
82_GLOBAL(local_irq_restore)
83 lbz r5,PACAPROCENABLED(r13)
84 /* Check if things are setup the way we want _already_. */
85 cmpw 0,r3,r5
86 beqlr
87 /* are we enabling interrupts? */
88 cmpdi 0,r3,0
89 stb r3,PACAPROCENABLED(r13)
90 beqlr
91 /* Check pending interrupts */
92 /* A decrementer, IPI or PMC interrupt may have occurred
93 * while we were in the hypervisor (which enables) */
94 ld r4,PACALPPACA+LPPACAANYINT(r13)
95 cmpdi r4,0
96 beqlr
97
98 /*
99 * Handle pending interrupts in interrupt context
100 */
101 li r0,0x5555
102 sc
103 blr
104#endif /* CONFIG_PPC_ISERIES */
105
106#ifdef CONFIG_IRQSTACKS
107_GLOBAL(call_do_softirq)
108 mflr r0
109 std r0,16(r1)
110 stdu r1,THREAD_SIZE-112(r3)
111 mr r1,r3
112 bl .__do_softirq
113 ld r1,0(r1)
114 ld r0,16(r1)
115 mtlr r0
116 blr
117
118_GLOBAL(call_handle_IRQ_event)
119 mflr r0
120 std r0,16(r1)
121 stdu r1,THREAD_SIZE-112(r6)
122 mr r1,r6
123 bl .handle_IRQ_event
124 ld r1,0(r1)
125 ld r0,16(r1)
126 mtlr r0
127 blr
128#endif /* CONFIG_IRQSTACKS */
129
130 /*
131 * To be called by C code which needs to do some operations with MMU
132 * disabled. Note that interrupts have to be disabled by the caller
133 * prior to calling us. The code called _MUST_ be in the RMO of course
134 * and part of the linear mapping as we don't attempt to translate the
135 * stack pointer at all. The function is called with the stack switched
136 * to this CPU emergency stack
137 *
138 * prototype is void *call_with_mmu_off(void *func, void *data);
139 *
140 * the called function is expected to be of the form
141 *
142 * void *called(void *data);
143 */
144_GLOBAL(call_with_mmu_off)
145 mflr r0 /* get link, save it on stackframe */
146 std r0,16(r1)
147 mr r1,r5 /* save old stack ptr */
148 ld r1,PACAEMERGSP(r13) /* get emerg. stack */
149 subi r1,r1,STACK_FRAME_OVERHEAD
150 std r0,16(r1) /* save link on emerg. stack */
151 std r5,0(r1) /* save old stack ptr in backchain */
152 ld r3,0(r3) /* get to real function ptr (assume same TOC) */
153 bl 2f /* we need LR to return, continue at label 2 */
154
155 ld r0,16(r1) /* we return here from the call, get LR and */
156 ld r1,0(r1) /* .. old stack ptr */
157 mtspr SPRN_SRR0,r0 /* and get back to virtual mode with these */
158 mfmsr r4
159 ori r4,r4,MSR_IR|MSR_DR
160 mtspr SPRN_SRR1,r4
161 rfid
162
1632: mtspr SPRN_SRR0,r3 /* coming from above, enter real mode */
164 mr r3,r4 /* get parameter */
165 mfmsr r0
166 ori r0,r0,MSR_IR|MSR_DR
167 xori r0,r0,MSR_IR|MSR_DR
168 mtspr SPRN_SRR1,r0
169 rfid
170
171
172 .section ".toc","aw"
173PPC64_CACHES:
174 .tc ppc64_caches[TC],ppc64_caches
175 .section ".text"
176
177/*
178 * Write any modified data cache blocks out to memory
179 * and invalidate the corresponding instruction cache blocks.
180 *
181 * flush_icache_range(unsigned long start, unsigned long stop)
182 *
183 * flush all bytes from start through stop-1 inclusive
184 */
185
186_GLOBAL(__flush_icache_range)
187
188/*
189 * Flush the data cache to memory
190 *
191 * Different systems have different cache line sizes
192 * and in some cases i-cache and d-cache line sizes differ from
193 * each other.
194 */
195 ld r10,PPC64_CACHES@toc(r2)
196 lwz r7,DCACHEL1LINESIZE(r10)/* Get cache line size */
197 addi r5,r7,-1
198 andc r6,r3,r5 /* round low to line bdy */
199 subf r8,r6,r4 /* compute length */
200 add r8,r8,r5 /* ensure we get enough */
201 lwz r9,DCACHEL1LOGLINESIZE(r10) /* Get log-2 of cache line size */
202 srw. r8,r8,r9 /* compute line count */
203 beqlr /* nothing to do? */
204 mtctr r8
2051: dcbst 0,r6
206 add r6,r6,r7
207 bdnz 1b
208 sync
209
210/* Now invalidate the instruction cache */
211
212 lwz r7,ICACHEL1LINESIZE(r10) /* Get Icache line size */
213 addi r5,r7,-1
214 andc r6,r3,r5 /* round low to line bdy */
215 subf r8,r6,r4 /* compute length */
216 add r8,r8,r5
217 lwz r9,ICACHEL1LOGLINESIZE(r10) /* Get log-2 of Icache line size */
218 srw. r8,r8,r9 /* compute line count */
219 beqlr /* nothing to do? */
220 mtctr r8
2212: icbi 0,r6
222 add r6,r6,r7
223 bdnz 2b
224 isync
225 blr
226
227/*
228 * Like above, but only do the D-cache.
229 *
230 * flush_dcache_range(unsigned long start, unsigned long stop)
231 *
232 * flush all bytes from start to stop-1 inclusive
233 */
234_GLOBAL(flush_dcache_range)
235
236/*
237 * Flush the data cache to memory
238 *
239 * Different systems have different cache line sizes
240 */
241 ld r10,PPC64_CACHES@toc(r2)
242 lwz r7,DCACHEL1LINESIZE(r10) /* Get dcache line size */
243 addi r5,r7,-1
244 andc r6,r3,r5 /* round low to line bdy */
245 subf r8,r6,r4 /* compute length */
246 add r8,r8,r5 /* ensure we get enough */
247 lwz r9,DCACHEL1LOGLINESIZE(r10) /* Get log-2 of dcache line size */
248 srw. r8,r8,r9 /* compute line count */
249 beqlr /* nothing to do? */
250 mtctr r8
2510: dcbst 0,r6
252 add r6,r6,r7
253 bdnz 0b
254 sync
255 blr
256
257/*
258 * Like above, but works on non-mapped physical addresses.
259 * Use only for non-LPAR setups ! It also assumes real mode
260 * is cacheable. Used for flushing out the DART before using
261 * it as uncacheable memory
262 *
263 * flush_dcache_phys_range(unsigned long start, unsigned long stop)
264 *
265 * flush all bytes from start to stop-1 inclusive
266 */
267_GLOBAL(flush_dcache_phys_range)
268 ld r10,PPC64_CACHES@toc(r2)
269 lwz r7,DCACHEL1LINESIZE(r10) /* Get dcache line size */
270 addi r5,r7,-1
271 andc r6,r3,r5 /* round low to line bdy */
272 subf r8,r6,r4 /* compute length */
273 add r8,r8,r5 /* ensure we get enough */
274 lwz r9,DCACHEL1LOGLINESIZE(r10) /* Get log-2 of dcache line size */
275 srw. r8,r8,r9 /* compute line count */
276 beqlr /* nothing to do? */
277 mfmsr r5 /* Disable MMU Data Relocation */
278 ori r0,r5,MSR_DR
279 xori r0,r0,MSR_DR
280 sync
281 mtmsr r0
282 sync
283 isync
284 mtctr r8
2850: dcbst 0,r6
286 add r6,r6,r7
287 bdnz 0b
288 sync
289 isync
290 mtmsr r5 /* Re-enable MMU Data Relocation */
291 sync
292 isync
293 blr
294
295_GLOBAL(flush_inval_dcache_range)
296 ld r10,PPC64_CACHES@toc(r2)
297 lwz r7,DCACHEL1LINESIZE(r10) /* Get dcache line size */
298 addi r5,r7,-1
299 andc r6,r3,r5 /* round low to line bdy */
300 subf r8,r6,r4 /* compute length */
301 add r8,r8,r5 /* ensure we get enough */
302 lwz r9,DCACHEL1LOGLINESIZE(r10)/* Get log-2 of dcache line size */
303 srw. r8,r8,r9 /* compute line count */
304 beqlr /* nothing to do? */
305 sync
306 isync
307 mtctr r8
3080: dcbf 0,r6
309 add r6,r6,r7
310 bdnz 0b
311 sync
312 isync
313 blr
314
315
316/*
317 * Flush a particular page from the data cache to RAM.
318 * Note: this is necessary because the instruction cache does *not*
319 * snoop from the data cache.
320 *
321 * void __flush_dcache_icache(void *page)
322 */
323_GLOBAL(__flush_dcache_icache)
324/*
325 * Flush the data cache to memory
326 *
327 * Different systems have different cache line sizes
328 */
329
330/* Flush the dcache */
331 ld r7,PPC64_CACHES@toc(r2)
332 clrrdi r3,r3,12 /* Page align */
333 lwz r4,DCACHEL1LINESPERPAGE(r7) /* Get # dcache lines per page */
334 lwz r5,DCACHEL1LINESIZE(r7) /* Get dcache line size */
335 mr r6,r3
336 mtctr r4
3370: dcbst 0,r6
338 add r6,r6,r5
339 bdnz 0b
340 sync
341
342/* Now invalidate the icache */
343
344 lwz r4,ICACHEL1LINESPERPAGE(r7) /* Get # icache lines per page */
345 lwz r5,ICACHEL1LINESIZE(r7) /* Get icache line size */
346 mtctr r4
3471: icbi 0,r3
348 add r3,r3,r5
349 bdnz 1b
350 isync
351 blr
352
353/*
354 * I/O string operations
355 *
356 * insb(port, buf, len)
357 * outsb(port, buf, len)
358 * insw(port, buf, len)
359 * outsw(port, buf, len)
360 * insl(port, buf, len)
361 * outsl(port, buf, len)
362 * insw_ns(port, buf, len)
363 * outsw_ns(port, buf, len)
364 * insl_ns(port, buf, len)
365 * outsl_ns(port, buf, len)
366 *
367 * The *_ns versions don't do byte-swapping.
368 */
369_GLOBAL(_insb)
370 cmpwi 0,r5,0
371 mtctr r5
372 subi r4,r4,1
373 blelr-
37400: lbz r5,0(r3)
375 eieio
376 stbu r5,1(r4)
377 bdnz 00b
378 twi 0,r5,0
379 isync
380 blr
381
382_GLOBAL(_outsb)
383 cmpwi 0,r5,0
384 mtctr r5
385 subi r4,r4,1
386 blelr-
38700: lbzu r5,1(r4)
388 stb r5,0(r3)
389 bdnz 00b
390 sync
391 blr
392
393_GLOBAL(_insw)
394 cmpwi 0,r5,0
395 mtctr r5
396 subi r4,r4,2
397 blelr-
39800: lhbrx r5,0,r3
399 eieio
400 sthu r5,2(r4)
401 bdnz 00b
402 twi 0,r5,0
403 isync
404 blr
405
406_GLOBAL(_outsw)
407 cmpwi 0,r5,0
408 mtctr r5
409 subi r4,r4,2
410 blelr-
41100: lhzu r5,2(r4)
412 sthbrx r5,0,r3
413 bdnz 00b
414 sync
415 blr
416
417_GLOBAL(_insl)
418 cmpwi 0,r5,0
419 mtctr r5
420 subi r4,r4,4
421 blelr-
42200: lwbrx r5,0,r3
423 eieio
424 stwu r5,4(r4)
425 bdnz 00b
426 twi 0,r5,0
427 isync
428 blr
429
430_GLOBAL(_outsl)
431 cmpwi 0,r5,0
432 mtctr r5
433 subi r4,r4,4
434 blelr-
43500: lwzu r5,4(r4)
436 stwbrx r5,0,r3
437 bdnz 00b
438 sync
439 blr
440
441/* _GLOBAL(ide_insw) now in drivers/ide/ide-iops.c */
442_GLOBAL(_insw_ns)
443 cmpwi 0,r5,0
444 mtctr r5
445 subi r4,r4,2
446 blelr-
44700: lhz r5,0(r3)
448 eieio
449 sthu r5,2(r4)
450 bdnz 00b
451 twi 0,r5,0
452 isync
453 blr
454
455/* _GLOBAL(ide_outsw) now in drivers/ide/ide-iops.c */
456_GLOBAL(_outsw_ns)
457 cmpwi 0,r5,0
458 mtctr r5
459 subi r4,r4,2
460 blelr-
46100: lhzu r5,2(r4)
462 sth r5,0(r3)
463 bdnz 00b
464 sync
465 blr
466
467_GLOBAL(_insl_ns)
468 cmpwi 0,r5,0
469 mtctr r5
470 subi r4,r4,4
471 blelr-
47200: lwz r5,0(r3)
473 eieio
474 stwu r5,4(r4)
475 bdnz 00b
476 twi 0,r5,0
477 isync
478 blr
479
480_GLOBAL(_outsl_ns)
481 cmpwi 0,r5,0
482 mtctr r5
483 subi r4,r4,4
484 blelr-
48500: lwzu r5,4(r4)
486 stw r5,0(r3)
487 bdnz 00b
488 sync
489 blr
490
491
492_GLOBAL(cvt_fd)
493 lfd 0,0(r5) /* load up fpscr value */
494 mtfsf 0xff,0
495 lfs 0,0(r3)
496 stfd 0,0(r4)
497 mffs 0 /* save new fpscr value */
498 stfd 0,0(r5)
499 blr
500
501_GLOBAL(cvt_df)
502 lfd 0,0(r5) /* load up fpscr value */
503 mtfsf 0xff,0
504 lfd 0,0(r3)
505 stfs 0,0(r4)
506 mffs 0 /* save new fpscr value */
507 stfd 0,0(r5)
508 blr
509
510/*
511 * identify_cpu and calls setup_cpu
512 * In: r3 = base of the cpu_specs array
513 * r4 = address of cur_cpu_spec
514 * r5 = relocation offset
515 */
516_GLOBAL(identify_cpu)
517 mfpvr r7
5181:
519 lwz r8,CPU_SPEC_PVR_MASK(r3)
520 and r8,r8,r7
521 lwz r9,CPU_SPEC_PVR_VALUE(r3)
522 cmplw 0,r9,r8
523 beq 1f
524 addi r3,r3,CPU_SPEC_ENTRY_SIZE
525 b 1b
5261:
527 add r0,r3,r5
528 std r0,0(r4)
529 ld r4,CPU_SPEC_SETUP(r3)
530 sub r4,r4,r5
531 ld r4,0(r4)
532 sub r4,r4,r5
533 mtctr r4
534 /* Calling convention for cpu setup is r3=offset, r4=cur_cpu_spec */
535 mr r4,r3
536 mr r3,r5
537 bctr
538
539/*
540 * do_cpu_ftr_fixups - goes through the list of CPU feature fixups
541 * and writes nop's over sections of code that don't apply for this cpu.
542 * r3 = data offset (not changed)
543 */
544_GLOBAL(do_cpu_ftr_fixups)
545 /* Get CPU 0 features */
546 LOADADDR(r6,cur_cpu_spec)
547 sub r6,r6,r3
548 ld r4,0(r6)
549 sub r4,r4,r3
550 ld r4,CPU_SPEC_FEATURES(r4)
551 /* Get the fixup table */
552 LOADADDR(r6,__start___ftr_fixup)
553 sub r6,r6,r3
554 LOADADDR(r7,__stop___ftr_fixup)
555 sub r7,r7,r3
556 /* Do the fixup */
5571: cmpld r6,r7
558 bgelr
559 addi r6,r6,32
560 ld r8,-32(r6) /* mask */
561 and r8,r8,r4
562 ld r9,-24(r6) /* value */
563 cmpld r8,r9
564 beq 1b
565 ld r8,-16(r6) /* section begin */
566 ld r9,-8(r6) /* section end */
567 subf. r9,r8,r9
568 beq 1b
569 /* write nops over the section of code */
570 /* todo: if large section, add a branch at the start of it */
571 srwi r9,r9,2
572 mtctr r9
573 sub r8,r8,r3
574 lis r0,0x60000000@h /* nop */
5753: stw r0,0(r8)
576 andi. r10,r4,CPU_FTR_SPLIT_ID_CACHE@l
577 beq 2f
578 dcbst 0,r8 /* suboptimal, but simpler */
579 sync
580 icbi 0,r8
5812: addi r8,r8,4
582 bdnz 3b
583 sync /* additional sync needed on g4 */
584 isync
585 b 1b
586
587#if defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_MAPLE)
588/*
589 * Do an IO access in real mode
590 */
591_GLOBAL(real_readb)
592 mfmsr r7
593 ori r0,r7,MSR_DR
594 xori r0,r0,MSR_DR
595 sync
596 mtmsrd r0
597 sync
598 isync
599 mfspr r6,SPRN_HID4
600 rldicl r5,r6,32,0
601 ori r5,r5,0x100
602 rldicl r5,r5,32,0
603 sync
604 mtspr SPRN_HID4,r5
605 isync
606 slbia
607 isync
608 lbz r3,0(r3)
609 sync
610 mtspr SPRN_HID4,r6
611 isync
612 slbia
613 isync
614 mtmsrd r7
615 sync
616 isync
617 blr
618
619 /*
620 * Do an IO access in real mode
621 */
622_GLOBAL(real_writeb)
623 mfmsr r7
624 ori r0,r7,MSR_DR
625 xori r0,r0,MSR_DR
626 sync
627 mtmsrd r0
628 sync
629 isync
630 mfspr r6,SPRN_HID4
631 rldicl r5,r6,32,0
632 ori r5,r5,0x100
633 rldicl r5,r5,32,0
634 sync
635 mtspr SPRN_HID4,r5
636 isync
637 slbia
638 isync
639 stb r3,0(r4)
640 sync
641 mtspr SPRN_HID4,r6
642 isync
643 slbia
644 isync
645 mtmsrd r7
646 sync
647 isync
648 blr
649#endif /* defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_MAPLE) */
650
651/*
652 * Create a kernel thread
653 * kernel_thread(fn, arg, flags)
654 */
655_GLOBAL(kernel_thread)
656 std r29,-24(r1)
657 std r30,-16(r1)
658 stdu r1,-STACK_FRAME_OVERHEAD(r1)
659 mr r29,r3
660 mr r30,r4
661 ori r3,r5,CLONE_VM /* flags */
662 oris r3,r3,(CLONE_UNTRACED>>16)
663 li r4,0 /* new sp (unused) */
664 li r0,__NR_clone
665 sc
666 cmpdi 0,r3,0 /* parent or child? */
667 bne 1f /* return if parent */
668 li r0,0
669 stdu r0,-STACK_FRAME_OVERHEAD(r1)
670 ld r2,8(r29)
671 ld r29,0(r29)
672 mtlr r29 /* fn addr in lr */
673 mr r3,r30 /* load arg and call fn */
674 blrl
675 li r0,__NR_exit /* exit after child exits */
676 li r3,0
677 sc
6781: addi r1,r1,STACK_FRAME_OVERHEAD
679 ld r29,-24(r1)
680 ld r30,-16(r1)
681 blr
682
683/* Why isn't this a) automatic, b) written in 'C'? */
684 .balign 8
685_GLOBAL(sys_call_table32)
686 .llong .sys_restart_syscall /* 0 */
687 .llong .sys_exit
688 .llong .ppc_fork
689 .llong .sys_read
690 .llong .sys_write
691 .llong .sys32_open /* 5 */
692 .llong .sys_close
693 .llong .sys32_waitpid
694 .llong .sys32_creat
695 .llong .sys_link
696 .llong .sys_unlink /* 10 */
697 .llong .sys32_execve
698 .llong .sys_chdir
699 .llong .compat_sys_time
700 .llong .sys_mknod
701 .llong .sys_chmod /* 15 */
702 .llong .sys_lchown
703 .llong .sys_ni_syscall /* old break syscall */
704 .llong .sys_ni_syscall /* old stat syscall */
705 .llong .ppc32_lseek
706 .llong .sys_getpid /* 20 */
707 .llong .compat_sys_mount
708 .llong .sys_oldumount
709 .llong .sys_setuid
710 .llong .sys_getuid
711 .llong .compat_sys_stime /* 25 */
712 .llong .sys32_ptrace
713 .llong .sys_alarm
714 .llong .sys_ni_syscall /* old fstat syscall */
715 .llong .sys32_pause
716 .llong .compat_sys_utime /* 30 */
717 .llong .sys_ni_syscall /* old stty syscall */
718 .llong .sys_ni_syscall /* old gtty syscall */
719 .llong .sys32_access
720 .llong .sys32_nice
721 .llong .sys_ni_syscall /* 35 - old ftime syscall */
722 .llong .sys_sync
723 .llong .sys32_kill
724 .llong .sys_rename
725 .llong .sys32_mkdir
726 .llong .sys_rmdir /* 40 */
727 .llong .sys_dup
728 .llong .sys_pipe
729 .llong .compat_sys_times
730 .llong .sys_ni_syscall /* old prof syscall */
731 .llong .sys_brk /* 45 */
732 .llong .sys_setgid
733 .llong .sys_getgid
734 .llong .sys_signal
735 .llong .sys_geteuid
736 .llong .sys_getegid /* 50 */
737 .llong .sys_acct
738 .llong .sys_umount
739 .llong .sys_ni_syscall /* old lock syscall */
740 .llong .compat_sys_ioctl
741 .llong .compat_sys_fcntl /* 55 */
742 .llong .sys_ni_syscall /* old mpx syscall */
743 .llong .sys32_setpgid
744 .llong .sys_ni_syscall /* old ulimit syscall */
745 .llong .sys32_olduname
746 .llong .sys32_umask /* 60 */
747 .llong .sys_chroot
748 .llong .sys_ustat
749 .llong .sys_dup2
750 .llong .sys_getppid
751 .llong .sys_getpgrp /* 65 */
752 .llong .sys_setsid
753 .llong .sys32_sigaction
754 .llong .sys_sgetmask
755 .llong .sys32_ssetmask
756 .llong .sys_setreuid /* 70 */
757 .llong .sys_setregid
758 .llong .ppc32_sigsuspend
759 .llong .compat_sys_sigpending
760 .llong .sys32_sethostname
761 .llong .compat_sys_setrlimit /* 75 */
762 .llong .compat_sys_old_getrlimit
763 .llong .compat_sys_getrusage
764 .llong .sys32_gettimeofday
765 .llong .sys32_settimeofday
766 .llong .sys32_getgroups /* 80 */
767 .llong .sys32_setgroups
768 .llong .sys_ni_syscall /* old select syscall */
769 .llong .sys_symlink
770 .llong .sys_ni_syscall /* old lstat syscall */
771 .llong .sys32_readlink /* 85 */
772 .llong .sys_uselib
773 .llong .sys_swapon
774 .llong .sys_reboot
775 .llong .old32_readdir
776 .llong .sys_mmap /* 90 */
777 .llong .sys_munmap
778 .llong .sys_truncate
779 .llong .sys_ftruncate
780 .llong .sys_fchmod
781 .llong .sys_fchown /* 95 */
782 .llong .sys32_getpriority
783 .llong .sys32_setpriority
784 .llong .sys_ni_syscall /* old profil syscall */
785 .llong .compat_sys_statfs
786 .llong .compat_sys_fstatfs /* 100 */
787 .llong .sys_ni_syscall /* old ioperm syscall */
788 .llong .compat_sys_socketcall
789 .llong .sys32_syslog
790 .llong .compat_sys_setitimer
791 .llong .compat_sys_getitimer /* 105 */
792 .llong .compat_sys_newstat
793 .llong .compat_sys_newlstat
794 .llong .compat_sys_newfstat
795 .llong .sys_uname
796 .llong .sys_ni_syscall /* 110 old iopl syscall */
797 .llong .sys_vhangup
798 .llong .sys_ni_syscall /* old idle syscall */
799 .llong .sys_ni_syscall /* old vm86 syscall */
800 .llong .compat_sys_wait4
801 .llong .sys_swapoff /* 115 */
802 .llong .sys32_sysinfo
803 .llong .sys32_ipc
804 .llong .sys_fsync
805 .llong .ppc32_sigreturn
806 .llong .ppc_clone /* 120 */
807 .llong .sys32_setdomainname
808 .llong .ppc64_newuname
809 .llong .sys_ni_syscall /* old modify_ldt syscall */
810 .llong .sys32_adjtimex
811 .llong .sys_mprotect /* 125 */
812 .llong .compat_sys_sigprocmask
813 .llong .sys_ni_syscall /* old create_module syscall */
814 .llong .sys_init_module
815 .llong .sys_delete_module
816 .llong .sys_ni_syscall /* 130 old get_kernel_syms syscall */
817 .llong .sys_quotactl
818 .llong .sys32_getpgid
819 .llong .sys_fchdir
820 .llong .sys_bdflush
821 .llong .sys32_sysfs /* 135 */
822 .llong .ppc64_personality
823 .llong .sys_ni_syscall /* for afs_syscall */
824 .llong .sys_setfsuid
825 .llong .sys_setfsgid
826 .llong .sys_llseek /* 140 */
827 .llong .sys32_getdents
828 .llong .ppc32_select
829 .llong .sys_flock
830 .llong .sys_msync
831 .llong .compat_sys_readv /* 145 */
832 .llong .compat_sys_writev
833 .llong .sys32_getsid
834 .llong .sys_fdatasync
835 .llong .sys32_sysctl
836 .llong .sys_mlock /* 150 */
837 .llong .sys_munlock
838 .llong .sys_mlockall
839 .llong .sys_munlockall
840 .llong .sys32_sched_setparam
841 .llong .sys32_sched_getparam /* 155 */
842 .llong .sys32_sched_setscheduler
843 .llong .sys32_sched_getscheduler
844 .llong .sys_sched_yield
845 .llong .sys32_sched_get_priority_max
846 .llong .sys32_sched_get_priority_min /* 160 */
847 .llong .sys32_sched_rr_get_interval
848 .llong .compat_sys_nanosleep
849 .llong .sys_mremap
850 .llong .sys_setresuid
851 .llong .sys_getresuid /* 165 */
852 .llong .sys_ni_syscall /* old query_module syscall */
853 .llong .sys_poll
854 .llong .compat_sys_nfsservctl
855 .llong .sys_setresgid
856 .llong .sys_getresgid /* 170 */
857 .llong .sys32_prctl
858 .llong .ppc32_rt_sigreturn
859 .llong .sys32_rt_sigaction
860 .llong .sys32_rt_sigprocmask
861 .llong .sys32_rt_sigpending /* 175 */
862 .llong .compat_sys_rt_sigtimedwait
863 .llong .sys32_rt_sigqueueinfo
864 .llong .ppc32_rt_sigsuspend
865 .llong .sys32_pread64
866 .llong .sys32_pwrite64 /* 180 */
867 .llong .sys_chown
868 .llong .sys_getcwd
869 .llong .sys_capget
870 .llong .sys_capset
871 .llong .sys32_sigaltstack /* 185 */
872 .llong .sys32_sendfile
873 .llong .sys_ni_syscall /* reserved for streams1 */
874 .llong .sys_ni_syscall /* reserved for streams2 */
875 .llong .ppc_vfork
876 .llong .compat_sys_getrlimit /* 190 */
877 .llong .sys32_readahead
878 .llong .sys32_mmap2
879 .llong .sys32_truncate64
880 .llong .sys32_ftruncate64
881 .llong .sys_stat64 /* 195 */
882 .llong .sys_lstat64
883 .llong .sys_fstat64
884 .llong .sys32_pciconfig_read
885 .llong .sys32_pciconfig_write
886 .llong .sys32_pciconfig_iobase /* 200 - pciconfig_iobase */
887 .llong .sys_ni_syscall /* reserved for MacOnLinux */
888 .llong .sys_getdents64
889 .llong .sys_pivot_root
890 .llong .compat_sys_fcntl64
891 .llong .sys_madvise /* 205 */
892 .llong .sys_mincore
893 .llong .sys_gettid
894 .llong .sys_tkill
895 .llong .sys_setxattr
896 .llong .sys_lsetxattr /* 210 */
897 .llong .sys_fsetxattr
898 .llong .sys_getxattr
899 .llong .sys_lgetxattr
900 .llong .sys_fgetxattr
901 .llong .sys_listxattr /* 215 */
902 .llong .sys_llistxattr
903 .llong .sys_flistxattr
904 .llong .sys_removexattr
905 .llong .sys_lremovexattr
906 .llong .sys_fremovexattr /* 220 */
907 .llong .compat_sys_futex
908 .llong .compat_sys_sched_setaffinity
909 .llong .compat_sys_sched_getaffinity
910 .llong .sys_ni_syscall
911 .llong .sys_ni_syscall /* 225 - reserved for tux */
912 .llong .sys32_sendfile64
913 .llong .compat_sys_io_setup
914 .llong .sys_io_destroy
915 .llong .compat_sys_io_getevents
916 .llong .compat_sys_io_submit
917 .llong .sys_io_cancel
918 .llong .sys_set_tid_address
919 .llong .ppc32_fadvise64
920 .llong .sys_exit_group
921 .llong .ppc32_lookup_dcookie /* 235 */
922 .llong .sys_epoll_create
923 .llong .sys_epoll_ctl
924 .llong .sys_epoll_wait
925 .llong .sys_remap_file_pages
926 .llong .ppc32_timer_create /* 240 */
927 .llong .compat_sys_timer_settime
928 .llong .compat_sys_timer_gettime
929 .llong .sys_timer_getoverrun
930 .llong .sys_timer_delete
931 .llong .compat_sys_clock_settime /* 245 */
932 .llong .compat_sys_clock_gettime
933 .llong .compat_sys_clock_getres
934 .llong .compat_sys_clock_nanosleep
935 .llong .ppc32_swapcontext
936 .llong .sys32_tgkill /* 250 */
937 .llong .sys32_utimes
938 .llong .compat_sys_statfs64
939 .llong .compat_sys_fstatfs64
940 .llong .ppc32_fadvise64_64 /* 32bit only fadvise64_64 */
941 .llong .ppc_rtas /* 255 */
942 .llong .sys_ni_syscall /* 256 reserved for sys_debug_setcontext */
943 .llong .sys_ni_syscall /* 257 reserved for vserver */
944 .llong .sys_ni_syscall /* 258 reserved for new sys_remap_file_pages */
945 .llong .compat_sys_mbind
946 .llong .compat_sys_get_mempolicy /* 260 */
947 .llong .compat_sys_set_mempolicy
948 .llong .compat_sys_mq_open
949 .llong .sys_mq_unlink
950 .llong .compat_sys_mq_timedsend
951 .llong .compat_sys_mq_timedreceive /* 265 */
952 .llong .compat_sys_mq_notify
953 .llong .compat_sys_mq_getsetattr
954 .llong .sys_ni_syscall /* 268 reserved for sys_kexec_load */
955 .llong .sys32_add_key
956 .llong .sys32_request_key
957 .llong .compat_sys_keyctl
958 .llong .compat_sys_waitid
959
960 .balign 8
961_GLOBAL(sys_call_table)
962 .llong .sys_restart_syscall /* 0 */
963 .llong .sys_exit
964 .llong .ppc_fork
965 .llong .sys_read
966 .llong .sys_write
967 .llong .sys_open /* 5 */
968 .llong .sys_close
969 .llong .sys_waitpid
970 .llong .sys_creat
971 .llong .sys_link
972 .llong .sys_unlink /* 10 */
973 .llong .sys_execve
974 .llong .sys_chdir
975 .llong .sys64_time
976 .llong .sys_mknod
977 .llong .sys_chmod /* 15 */
978 .llong .sys_lchown
979 .llong .sys_ni_syscall /* old break syscall */
980 .llong .sys_ni_syscall /* old stat syscall */
981 .llong .sys_lseek
982 .llong .sys_getpid /* 20 */
983 .llong .sys_mount
984 .llong .sys_ni_syscall /* old umount syscall */
985 .llong .sys_setuid
986 .llong .sys_getuid
987 .llong .sys_stime /* 25 */
988 .llong .sys_ptrace
989 .llong .sys_alarm
990 .llong .sys_ni_syscall /* old fstat syscall */
991 .llong .sys_pause
992 .llong .sys_utime /* 30 */
993 .llong .sys_ni_syscall /* old stty syscall */
994 .llong .sys_ni_syscall /* old gtty syscall */
995 .llong .sys_access
996 .llong .sys_nice
997 .llong .sys_ni_syscall /* 35 - old ftime syscall */
998 .llong .sys_sync
999 .llong .sys_kill
1000 .llong .sys_rename
1001 .llong .sys_mkdir
1002 .llong .sys_rmdir /* 40 */
1003 .llong .sys_dup
1004 .llong .sys_pipe
1005 .llong .sys_times
1006 .llong .sys_ni_syscall /* old prof syscall */
1007 .llong .sys_brk /* 45 */
1008 .llong .sys_setgid
1009 .llong .sys_getgid
1010 .llong .sys_signal
1011 .llong .sys_geteuid
1012 .llong .sys_getegid /* 50 */
1013 .llong .sys_acct
1014 .llong .sys_umount
1015 .llong .sys_ni_syscall /* old lock syscall */
1016 .llong .sys_ioctl
1017 .llong .sys_fcntl /* 55 */
1018 .llong .sys_ni_syscall /* old mpx syscall */
1019 .llong .sys_setpgid
1020 .llong .sys_ni_syscall /* old ulimit syscall */
1021 .llong .sys_ni_syscall /* old uname syscall */
1022 .llong .sys_umask /* 60 */
1023 .llong .sys_chroot
1024 .llong .sys_ustat
1025 .llong .sys_dup2
1026 .llong .sys_getppid
1027 .llong .sys_getpgrp /* 65 */
1028 .llong .sys_setsid
1029 .llong .sys_ni_syscall
1030 .llong .sys_sgetmask
1031 .llong .sys_ssetmask
1032 .llong .sys_setreuid /* 70 */
1033 .llong .sys_setregid
1034 .llong .sys_ni_syscall
1035 .llong .sys_ni_syscall
1036 .llong .sys_sethostname
1037 .llong .sys_setrlimit /* 75 */
1038 .llong .sys_ni_syscall /* old getrlimit syscall */
1039 .llong .sys_getrusage
1040 .llong .sys_gettimeofday
1041 .llong .sys_settimeofday
1042 .llong .sys_getgroups /* 80 */
1043 .llong .sys_setgroups
1044 .llong .sys_ni_syscall /* old select syscall */
1045 .llong .sys_symlink
1046 .llong .sys_ni_syscall /* old lstat syscall */
1047 .llong .sys_readlink /* 85 */
1048 .llong .sys_uselib
1049 .llong .sys_swapon
1050 .llong .sys_reboot
1051 .llong .sys_ni_syscall /* old readdir syscall */
1052 .llong .sys_mmap /* 90 */
1053 .llong .sys_munmap
1054 .llong .sys_truncate
1055 .llong .sys_ftruncate
1056 .llong .sys_fchmod
1057 .llong .sys_fchown /* 95 */
1058 .llong .sys_getpriority
1059 .llong .sys_setpriority
1060 .llong .sys_ni_syscall /* old profil syscall holder */
1061 .llong .sys_statfs
1062 .llong .sys_fstatfs /* 100 */
1063 .llong .sys_ni_syscall /* old ioperm syscall */
1064 .llong .sys_socketcall
1065 .llong .sys_syslog
1066 .llong .sys_setitimer
1067 .llong .sys_getitimer /* 105 */
1068 .llong .sys_newstat
1069 .llong .sys_newlstat
1070 .llong .sys_newfstat
1071 .llong .sys_ni_syscall /* old uname syscall */
1072 .llong .sys_ni_syscall /* 110 old iopl syscall */
1073 .llong .sys_vhangup
1074 .llong .sys_ni_syscall /* old idle syscall */
1075 .llong .sys_ni_syscall /* old vm86 syscall */
1076 .llong .sys_wait4
1077 .llong .sys_swapoff /* 115 */
1078 .llong .sys_sysinfo
1079 .llong .sys_ipc
1080 .llong .sys_fsync
1081 .llong .sys_ni_syscall
1082 .llong .ppc_clone /* 120 */
1083 .llong .sys_setdomainname
1084 .llong .ppc64_newuname
1085 .llong .sys_ni_syscall /* old modify_ldt syscall */
1086 .llong .sys_adjtimex
1087 .llong .sys_mprotect /* 125 */
1088 .llong .sys_ni_syscall
1089 .llong .sys_ni_syscall /* old create_module syscall */
1090 .llong .sys_init_module
1091 .llong .sys_delete_module
1092 .llong .sys_ni_syscall /* 130 old get_kernel_syms syscall */
1093 .llong .sys_quotactl
1094 .llong .sys_getpgid
1095 .llong .sys_fchdir
1096 .llong .sys_bdflush
1097 .llong .sys_sysfs /* 135 */
1098 .llong .ppc64_personality
1099 .llong .sys_ni_syscall /* for afs_syscall */
1100 .llong .sys_setfsuid
1101 .llong .sys_setfsgid
1102 .llong .sys_llseek /* 140 */
1103 .llong .sys_getdents
1104 .llong .sys_select
1105 .llong .sys_flock
1106 .llong .sys_msync
1107 .llong .sys_readv /* 145 */
1108 .llong .sys_writev
1109 .llong .sys_getsid
1110 .llong .sys_fdatasync
1111 .llong .sys_sysctl
1112 .llong .sys_mlock /* 150 */
1113 .llong .sys_munlock
1114 .llong .sys_mlockall
1115 .llong .sys_munlockall
1116 .llong .sys_sched_setparam
1117 .llong .sys_sched_getparam /* 155 */
1118 .llong .sys_sched_setscheduler
1119 .llong .sys_sched_getscheduler
1120 .llong .sys_sched_yield
1121 .llong .sys_sched_get_priority_max
1122 .llong .sys_sched_get_priority_min /* 160 */
1123 .llong .sys_sched_rr_get_interval
1124 .llong .sys_nanosleep
1125 .llong .sys_mremap
1126 .llong .sys_setresuid
1127 .llong .sys_getresuid /* 165 */
1128 .llong .sys_ni_syscall /* old query_module syscall */
1129 .llong .sys_poll
1130 .llong .sys_nfsservctl
1131 .llong .sys_setresgid
1132 .llong .sys_getresgid /* 170 */
1133 .llong .sys_prctl
1134 .llong .ppc64_rt_sigreturn
1135 .llong .sys_rt_sigaction
1136 .llong .sys_rt_sigprocmask
1137 .llong .sys_rt_sigpending /* 175 */
1138 .llong .sys_rt_sigtimedwait
1139 .llong .sys_rt_sigqueueinfo
1140 .llong .ppc64_rt_sigsuspend
1141 .llong .sys_pread64
1142 .llong .sys_pwrite64 /* 180 */
1143 .llong .sys_chown
1144 .llong .sys_getcwd
1145 .llong .sys_capget
1146 .llong .sys_capset
1147 .llong .sys_sigaltstack /* 185 */
1148 .llong .sys_sendfile64
1149 .llong .sys_ni_syscall /* reserved for streams1 */
1150 .llong .sys_ni_syscall /* reserved for streams2 */
1151 .llong .ppc_vfork
1152 .llong .sys_getrlimit /* 190 */
1153 .llong .sys_readahead
1154 .llong .sys_ni_syscall /* 32bit only mmap2 */
1155 .llong .sys_ni_syscall /* 32bit only truncate64 */
1156 .llong .sys_ni_syscall /* 32bit only ftruncate64 */
1157 .llong .sys_ni_syscall /* 195 - 32bit only stat64 */
1158 .llong .sys_ni_syscall /* 32bit only lstat64 */
1159 .llong .sys_ni_syscall /* 32bit only fstat64 */
1160 .llong .sys_ni_syscall /* 32bit only pciconfig_read */
1161 .llong .sys_ni_syscall /* 32bit only pciconfig_write */
1162 .llong .sys_ni_syscall /* 32bit only pciconfig_iobase */
1163 .llong .sys_ni_syscall /* reserved for MacOnLinux */
1164 .llong .sys_getdents64
1165 .llong .sys_pivot_root
1166 .llong .sys_ni_syscall /* 32bit only fcntl64 */
1167 .llong .sys_madvise /* 205 */
1168 .llong .sys_mincore
1169 .llong .sys_gettid
1170 .llong .sys_tkill
1171 .llong .sys_setxattr
1172 .llong .sys_lsetxattr /* 210 */
1173 .llong .sys_fsetxattr
1174 .llong .sys_getxattr
1175 .llong .sys_lgetxattr
1176 .llong .sys_fgetxattr
1177 .llong .sys_listxattr /* 215 */
1178 .llong .sys_llistxattr
1179 .llong .sys_flistxattr
1180 .llong .sys_removexattr
1181 .llong .sys_lremovexattr
1182 .llong .sys_fremovexattr /* 220 */
1183 .llong .sys_futex
1184 .llong .sys_sched_setaffinity
1185 .llong .sys_sched_getaffinity
1186 .llong .sys_ni_syscall
1187 .llong .sys_ni_syscall /* 225 - reserved for tux */
1188 .llong .sys_ni_syscall /* 32bit only sendfile64 */
1189 .llong .sys_io_setup
1190 .llong .sys_io_destroy
1191 .llong .sys_io_getevents
1192 .llong .sys_io_submit /* 230 */
1193 .llong .sys_io_cancel
1194 .llong .sys_set_tid_address
1195 .llong .sys_fadvise64
1196 .llong .sys_exit_group
1197 .llong .sys_lookup_dcookie /* 235 */
1198 .llong .sys_epoll_create
1199 .llong .sys_epoll_ctl
1200 .llong .sys_epoll_wait
1201 .llong .sys_remap_file_pages
1202 .llong .sys_timer_create /* 240 */
1203 .llong .sys_timer_settime
1204 .llong .sys_timer_gettime
1205 .llong .sys_timer_getoverrun
1206 .llong .sys_timer_delete
1207 .llong .sys_clock_settime /* 245 */
1208 .llong .sys_clock_gettime
1209 .llong .sys_clock_getres
1210 .llong .sys_clock_nanosleep
1211 .llong .ppc64_swapcontext
1212 .llong .sys_tgkill /* 250 */
1213 .llong .sys_utimes
1214 .llong .sys_statfs64
1215 .llong .sys_fstatfs64
1216 .llong .sys_ni_syscall /* 32bit only fadvise64_64 */
1217 .llong .ppc_rtas /* 255 */
1218 .llong .sys_ni_syscall /* 256 reserved for sys_debug_setcontext */
1219 .llong .sys_ni_syscall /* 257 reserved for vserver */
1220 .llong .sys_ni_syscall /* 258 reserved for new sys_remap_file_pages */
1221 .llong .sys_mbind
1222 .llong .sys_get_mempolicy /* 260 */
1223 .llong .sys_set_mempolicy
1224 .llong .sys_mq_open
1225 .llong .sys_mq_unlink
1226 .llong .sys_mq_timedsend
1227 .llong .sys_mq_timedreceive /* 265 */
1228 .llong .sys_mq_notify
1229 .llong .sys_mq_getsetattr
1230 .llong .sys_ni_syscall /* 268 reserved for sys_kexec_load */
1231 .llong .sys_add_key
1232 .llong .sys_request_key /* 270 */
1233 .llong .sys_keyctl
1234 .llong .sys_waitid
diff --git a/arch/ppc64/kernel/module.c b/arch/ppc64/kernel/module.c
new file mode 100644
index 000000000000..c683bf88e690
--- /dev/null
+++ b/arch/ppc64/kernel/module.c
@@ -0,0 +1,442 @@
1/* Kernel module help for PPC64.
2 Copyright (C) 2001, 2003 Rusty Russell IBM Corporation.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17*/
18#include <linux/module.h>
19#include <linux/elf.h>
20#include <linux/moduleloader.h>
21#include <linux/err.h>
22#include <linux/vmalloc.h>
23#include <asm/module.h>
24#include <asm/uaccess.h>
25
26/* FIXME: We don't do .init separately. To do this, we'd need to have
27 a separate r2 value in the init and core section, and stub between
28 them, too.
29
30 Using a magic allocator which places modules within 32MB solves
31 this, and makes other things simpler. Anton?
32 --RR. */
33#if 0
34#define DEBUGP printk
35#else
36#define DEBUGP(fmt , ...)
37#endif
38
39/* There's actually a third entry here, but it's unused */
40struct ppc64_opd_entry
41{
42 unsigned long funcaddr;
43 unsigned long r2;
44};
45
46/* Like PPC32, we need little trampolines to do > 24-bit jumps (into
47 the kernel itself). But on PPC64, these need to be used for every
48 jump, actually, to reset r2 (TOC+0x8000). */
49struct ppc64_stub_entry
50{
51 /* 28 byte jump instruction sequence (7 instructions) */
52 unsigned char jump[28];
53 unsigned char unused[4];
54 /* Data for the above code */
55 struct ppc64_opd_entry opd;
56};
57
58/* We use a stub to fix up r2 (TOC ptr) and to jump to the (external)
59 function which may be more than 24-bits away. We could simply
60 patch the new r2 value and function pointer into the stub, but it's
61 significantly shorter to put these values at the end of the stub
62 code, and patch the stub address (32-bits relative to the TOC ptr,
63 r2) into the stub. */
64static struct ppc64_stub_entry ppc64_stub =
65{ .jump = {
66 0x3d, 0x82, 0x00, 0x00, /* addis r12,r2, <high> */
67 0x39, 0x8c, 0x00, 0x00, /* addi r12,r12, <low> */
68 /* Save current r2 value in magic place on the stack. */
69 0xf8, 0x41, 0x00, 0x28, /* std r2,40(r1) */
70 0xe9, 0x6c, 0x00, 0x20, /* ld r11,32(r12) */
71 0xe8, 0x4c, 0x00, 0x28, /* ld r2,40(r12) */
72 0x7d, 0x69, 0x03, 0xa6, /* mtctr r11 */
73 0x4e, 0x80, 0x04, 0x20 /* bctr */
74} };
75
76/* Count how many different 24-bit relocations (different symbol,
77 different addend) */
78static unsigned int count_relocs(const Elf64_Rela *rela, unsigned int num)
79{
80 unsigned int i, j, ret = 0;
81
82 /* FIXME: Only count external ones --RR */
83 /* Sure, this is order(n^2), but it's usually short, and not
84 time critical */
85 for (i = 0; i < num; i++) {
86 /* Only count 24-bit relocs, others don't need stubs */
87 if (ELF64_R_TYPE(rela[i].r_info) != R_PPC_REL24)
88 continue;
89 for (j = 0; j < i; j++) {
90 /* If this addend appeared before, it's
91 already been counted */
92 if (rela[i].r_info == rela[j].r_info
93 && rela[i].r_addend == rela[j].r_addend)
94 break;
95 }
96 if (j == i) ret++;
97 }
98 return ret;
99}
100
101void *module_alloc(unsigned long size)
102{
103 if (size == 0)
104 return NULL;
105
106 return vmalloc_exec(size);
107}
108
109/* Free memory returned from module_alloc */
110void module_free(struct module *mod, void *module_region)
111{
112 vfree(module_region);
113 /* FIXME: If module_region == mod->init_region, trim exception
114 table entries. */
115}
116
117/* Get size of potential trampolines required. */
118static unsigned long get_stubs_size(const Elf64_Ehdr *hdr,
119 const Elf64_Shdr *sechdrs)
120{
121 /* One extra reloc so it's always 0-funcaddr terminated */
122 unsigned long relocs = 1;
123 unsigned i;
124
125 /* Every relocated section... */
126 for (i = 1; i < hdr->e_shnum; i++) {
127 if (sechdrs[i].sh_type == SHT_RELA) {
128 DEBUGP("Found relocations in section %u\n", i);
129 DEBUGP("Ptr: %p. Number: %lu\n",
130 (void *)sechdrs[i].sh_addr,
131 sechdrs[i].sh_size / sizeof(Elf64_Rela));
132 relocs += count_relocs((void *)sechdrs[i].sh_addr,
133 sechdrs[i].sh_size
134 / sizeof(Elf64_Rela));
135 }
136 }
137
138 DEBUGP("Looks like a total of %lu stubs, max\n", relocs);
139 return relocs * sizeof(struct ppc64_stub_entry);
140}
141
142static void dedotify_versions(struct modversion_info *vers,
143 unsigned long size)
144{
145 struct modversion_info *end;
146
147 for (end = (void *)vers + size; vers < end; vers++)
148 if (vers->name[0] == '.')
149 memmove(vers->name, vers->name+1, strlen(vers->name));
150}
151
152/* Undefined symbols which refer to .funcname, hack to funcname */
153static void dedotify(Elf64_Sym *syms, unsigned int numsyms, char *strtab)
154{
155 unsigned int i;
156
157 for (i = 1; i < numsyms; i++) {
158 if (syms[i].st_shndx == SHN_UNDEF) {
159 char *name = strtab + syms[i].st_name;
160 if (name[0] == '.')
161 memmove(name, name+1, strlen(name));
162 }
163 }
164}
165
166int module_frob_arch_sections(Elf64_Ehdr *hdr,
167 Elf64_Shdr *sechdrs,
168 char *secstrings,
169 struct module *me)
170{
171 unsigned int i;
172
173 /* Find .toc and .stubs sections, symtab and strtab */
174 for (i = 1; i < hdr->e_shnum; i++) {
175 char *p;
176 if (strcmp(secstrings + sechdrs[i].sh_name, ".stubs") == 0)
177 me->arch.stubs_section = i;
178 else if (strcmp(secstrings + sechdrs[i].sh_name, ".toc") == 0)
179 me->arch.toc_section = i;
180 else if (strcmp(secstrings+sechdrs[i].sh_name,"__versions")==0)
181 dedotify_versions((void *)hdr + sechdrs[i].sh_offset,
182 sechdrs[i].sh_size);
183
184 /* We don't handle .init for the moment: rename to _init */
185 while ((p = strstr(secstrings + sechdrs[i].sh_name, ".init")))
186 p[0] = '_';
187
188 if (sechdrs[i].sh_type == SHT_SYMTAB)
189 dedotify((void *)hdr + sechdrs[i].sh_offset,
190 sechdrs[i].sh_size / sizeof(Elf64_Sym),
191 (void *)hdr
192 + sechdrs[sechdrs[i].sh_link].sh_offset);
193 }
194 if (!me->arch.stubs_section || !me->arch.toc_section) {
195 printk("%s: doesn't contain .toc or .stubs.\n", me->name);
196 return -ENOEXEC;
197 }
198
199 /* Override the stubs size */
200 sechdrs[me->arch.stubs_section].sh_size = get_stubs_size(hdr, sechdrs);
201 return 0;
202}
203
204int apply_relocate(Elf64_Shdr *sechdrs,
205 const char *strtab,
206 unsigned int symindex,
207 unsigned int relsec,
208 struct module *me)
209{
210 printk(KERN_ERR "%s: Non-ADD RELOCATION unsupported\n", me->name);
211 return -ENOEXEC;
212}
213
214/* r2 is the TOC pointer: it actually points 0x8000 into the TOC (this
215 gives the value maximum span in an instruction which uses a signed
216 offset) */
217static inline unsigned long my_r2(Elf64_Shdr *sechdrs, struct module *me)
218{
219 return sechdrs[me->arch.toc_section].sh_addr + 0x8000;
220}
221
222/* Both low and high 16 bits are added as SIGNED additions, so if low
223 16 bits has high bit set, high 16 bits must be adjusted. These
224 macros do that (stolen from binutils). */
225#define PPC_LO(v) ((v) & 0xffff)
226#define PPC_HI(v) (((v) >> 16) & 0xffff)
227#define PPC_HA(v) PPC_HI ((v) + 0x8000)
228
229/* Patch stub to reference function and correct r2 value. */
230static inline int create_stub(Elf64_Shdr *sechdrs,
231 struct ppc64_stub_entry *entry,
232 struct ppc64_opd_entry *opd,
233 struct module *me)
234{
235 Elf64_Half *loc1, *loc2;
236 long reladdr;
237
238 *entry = ppc64_stub;
239
240 loc1 = (Elf64_Half *)&entry->jump[2];
241 loc2 = (Elf64_Half *)&entry->jump[6];
242
243 /* Stub uses address relative to r2. */
244 reladdr = (unsigned long)entry - my_r2(sechdrs, me);
245 if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) {
246 printk("%s: Address %p of stub out of range of %p.\n",
247 me->name, (void *)reladdr, (void *)my_r2);
248 return 0;
249 }
250 DEBUGP("Stub %p get data from reladdr %li\n", entry, reladdr);
251
252 *loc1 = PPC_HA(reladdr);
253 *loc2 = PPC_LO(reladdr);
254 entry->opd.funcaddr = opd->funcaddr;
255 entry->opd.r2 = opd->r2;
256 return 1;
257}
258
259/* Create stub to jump to function described in this OPD: we need the
260 stub to set up the TOC ptr (r2) for the function. */
261static unsigned long stub_for_addr(Elf64_Shdr *sechdrs,
262 unsigned long opdaddr,
263 struct module *me)
264{
265 struct ppc64_stub_entry *stubs;
266 struct ppc64_opd_entry *opd = (void *)opdaddr;
267 unsigned int i, num_stubs;
268
269 num_stubs = sechdrs[me->arch.stubs_section].sh_size / sizeof(*stubs);
270
271 /* Find this stub, or if that fails, the next avail. entry */
272 stubs = (void *)sechdrs[me->arch.stubs_section].sh_addr;
273 for (i = 0; stubs[i].opd.funcaddr; i++) {
274 BUG_ON(i >= num_stubs);
275
276 if (stubs[i].opd.funcaddr == opd->funcaddr)
277 return (unsigned long)&stubs[i];
278 }
279
280 if (!create_stub(sechdrs, &stubs[i], opd, me))
281 return 0;
282
283 return (unsigned long)&stubs[i];
284}
285
286/* We expect a noop next: if it is, replace it with instruction to
287 restore r2. */
288static int restore_r2(u32 *instruction, struct module *me)
289{
290 if (*instruction != 0x60000000) {
291 printk("%s: Expect noop after relocate, got %08x\n",
292 me->name, *instruction);
293 return 0;
294 }
295 *instruction = 0xe8410028; /* ld r2,40(r1) */
296 return 1;
297}
298
299int apply_relocate_add(Elf64_Shdr *sechdrs,
300 const char *strtab,
301 unsigned int symindex,
302 unsigned int relsec,
303 struct module *me)
304{
305 unsigned int i;
306 Elf64_Rela *rela = (void *)sechdrs[relsec].sh_addr;
307 Elf64_Sym *sym;
308 unsigned long *location;
309 unsigned long value;
310
311 DEBUGP("Applying ADD relocate section %u to %u\n", relsec,
312 sechdrs[relsec].sh_info);
313 for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rela); i++) {
314 /* This is where to make the change */
315 location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr
316 + rela[i].r_offset;
317 /* This is the symbol it is referring to */
318 sym = (Elf64_Sym *)sechdrs[symindex].sh_addr
319 + ELF64_R_SYM(rela[i].r_info);
320
321 DEBUGP("RELOC at %p: %li-type as %s (%lu) + %li\n",
322 location, (long)ELF64_R_TYPE(rela[i].r_info),
323 strtab + sym->st_name, (unsigned long)sym->st_value,
324 (long)rela[i].r_addend);
325
326 /* `Everything is relative'. */
327 value = sym->st_value + rela[i].r_addend;
328
329 switch (ELF64_R_TYPE(rela[i].r_info)) {
330 case R_PPC64_ADDR32:
331 /* Simply set it */
332 *(u32 *)location = value;
333 break;
334
335 case R_PPC64_ADDR64:
336 /* Simply set it */
337 *(unsigned long *)location = value;
338 break;
339
340 case R_PPC64_TOC:
341 *(unsigned long *)location = my_r2(sechdrs, me);
342 break;
343
344 case R_PPC64_TOC16_DS:
345 /* Subtact TOC pointer */
346 value -= my_r2(sechdrs, me);
347 if ((value & 3) != 0 || value + 0x8000 > 0xffff) {
348 printk("%s: bad TOC16_DS relocation (%lu)\n",
349 me->name, value);
350 return -ENOEXEC;
351 }
352 *((uint16_t *) location)
353 = (*((uint16_t *) location) & ~0xfffc)
354 | (value & 0xfffc);
355 break;
356
357 case R_PPC_REL24:
358 /* FIXME: Handle weak symbols here --RR */
359 if (sym->st_shndx == SHN_UNDEF) {
360 /* External: go via stub */
361 value = stub_for_addr(sechdrs, value, me);
362 if (!value)
363 return -ENOENT;
364 if (!restore_r2((u32 *)location + 1, me))
365 return -ENOEXEC;
366 }
367
368 /* Convert value to relative */
369 value -= (unsigned long)location;
370 if (value + 0x2000000 > 0x3ffffff || (value & 3) != 0){
371 printk("%s: REL24 %li out of range!\n",
372 me->name, (long int)value);
373 return -ENOEXEC;
374 }
375
376 /* Only replace bits 2 through 26 */
377 *(uint32_t *)location
378 = (*(uint32_t *)location & ~0x03fffffc)
379 | (value & 0x03fffffc);
380 break;
381
382 default:
383 printk("%s: Unknown ADD relocation: %lu\n",
384 me->name,
385 (unsigned long)ELF64_R_TYPE(rela[i].r_info));
386 return -ENOEXEC;
387 }
388 }
389
390 return 0;
391}
392
393LIST_HEAD(module_bug_list);
394
395int module_finalize(const Elf_Ehdr *hdr,
396 const Elf_Shdr *sechdrs, struct module *me)
397{
398 char *secstrings;
399 unsigned int i;
400
401 me->arch.bug_table = NULL;
402 me->arch.num_bugs = 0;
403
404 /* Find the __bug_table section, if present */
405 secstrings = (char *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
406 for (i = 1; i < hdr->e_shnum; i++) {
407 if (strcmp(secstrings+sechdrs[i].sh_name, "__bug_table"))
408 continue;
409 me->arch.bug_table = (void *) sechdrs[i].sh_addr;
410 me->arch.num_bugs = sechdrs[i].sh_size / sizeof(struct bug_entry);
411 break;
412 }
413
414 /*
415 * Strictly speaking this should have a spinlock to protect against
416 * traversals, but since we only traverse on BUG()s, a spinlock
417 * could potentially lead to deadlock and thus be counter-productive.
418 */
419 list_add(&me->arch.bug_list, &module_bug_list);
420
421 return 0;
422}
423
424void module_arch_cleanup(struct module *mod)
425{
426 list_del(&mod->arch.bug_list);
427}
428
429struct bug_entry *module_find_bug(unsigned long bugaddr)
430{
431 struct mod_arch_specific *mod;
432 unsigned int i;
433 struct bug_entry *bug;
434
435 list_for_each_entry(mod, &module_bug_list, bug_list) {
436 bug = mod->bug_table;
437 for (i = 0; i < mod->num_bugs; ++i, ++bug)
438 if (bugaddr == bug->bug_addr)
439 return bug;
440 }
441 return NULL;
442}
diff --git a/arch/ppc64/kernel/mpic.c b/arch/ppc64/kernel/mpic.c
new file mode 100644
index 000000000000..593ea5b82afa
--- /dev/null
+++ b/arch/ppc64/kernel/mpic.c
@@ -0,0 +1,859 @@
1/*
2 * arch/ppc64/kernel/mpic.c
3 *
4 * Driver for interrupt controllers following the OpenPIC standard, the
5 * common implementation beeing IBM's MPIC. This driver also can deal
6 * with various broken implementations of this HW.
7 *
8 * Copyright (C) 2004 Benjamin Herrenschmidt, IBM Corp.
9 *
10 * This file is subject to the terms and conditions of the GNU General Public
11 * License. See the file COPYING in the main directory of this archive
12 * for more details.
13 */
14
15#undef DEBUG
16
17#include <linux/config.h>
18#include <linux/types.h>
19#include <linux/kernel.h>
20#include <linux/init.h>
21#include <linux/irq.h>
22#include <linux/smp.h>
23#include <linux/interrupt.h>
24#include <linux/bootmem.h>
25#include <linux/spinlock.h>
26#include <linux/pci.h>
27
28#include <asm/ptrace.h>
29#include <asm/signal.h>
30#include <asm/io.h>
31#include <asm/pgtable.h>
32#include <asm/irq.h>
33#include <asm/machdep.h>
34
35#include "mpic.h"
36
37#ifdef DEBUG
38#define DBG(fmt...) printk(fmt)
39#else
40#define DBG(fmt...)
41#endif
42
43static struct mpic *mpics;
44static struct mpic *mpic_primary;
45static DEFINE_SPINLOCK(mpic_lock);
46
47
48/*
49 * Register accessor functions
50 */
51
52
53static inline u32 _mpic_read(unsigned int be, volatile u32 __iomem *base,
54 unsigned int reg)
55{
56 if (be)
57 return in_be32(base + (reg >> 2));
58 else
59 return in_le32(base + (reg >> 2));
60}
61
62static inline void _mpic_write(unsigned int be, volatile u32 __iomem *base,
63 unsigned int reg, u32 value)
64{
65 if (be)
66 out_be32(base + (reg >> 2), value);
67 else
68 out_le32(base + (reg >> 2), value);
69}
70
71static inline u32 _mpic_ipi_read(struct mpic *mpic, unsigned int ipi)
72{
73 unsigned int be = (mpic->flags & MPIC_BIG_ENDIAN) != 0;
74 unsigned int offset = MPIC_GREG_IPI_VECTOR_PRI_0 + (ipi * 0x10);
75
76 if (mpic->flags & MPIC_BROKEN_IPI)
77 be = !be;
78 return _mpic_read(be, mpic->gregs, offset);
79}
80
81static inline void _mpic_ipi_write(struct mpic *mpic, unsigned int ipi, u32 value)
82{
83 unsigned int offset = MPIC_GREG_IPI_VECTOR_PRI_0 + (ipi * 0x10);
84
85 _mpic_write(mpic->flags & MPIC_BIG_ENDIAN, mpic->gregs, offset, value);
86}
87
88static inline u32 _mpic_cpu_read(struct mpic *mpic, unsigned int reg)
89{
90 unsigned int cpu = 0;
91
92 if (mpic->flags & MPIC_PRIMARY)
93 cpu = hard_smp_processor_id();
94
95 return _mpic_read(mpic->flags & MPIC_BIG_ENDIAN, mpic->cpuregs[cpu], reg);
96}
97
98static inline void _mpic_cpu_write(struct mpic *mpic, unsigned int reg, u32 value)
99{
100 unsigned int cpu = 0;
101
102 if (mpic->flags & MPIC_PRIMARY)
103 cpu = hard_smp_processor_id();
104
105 _mpic_write(mpic->flags & MPIC_BIG_ENDIAN, mpic->cpuregs[cpu], reg, value);
106}
107
108static inline u32 _mpic_irq_read(struct mpic *mpic, unsigned int src_no, unsigned int reg)
109{
110 unsigned int isu = src_no >> mpic->isu_shift;
111 unsigned int idx = src_no & mpic->isu_mask;
112
113 return _mpic_read(mpic->flags & MPIC_BIG_ENDIAN, mpic->isus[isu],
114 reg + (idx * MPIC_IRQ_STRIDE));
115}
116
117static inline void _mpic_irq_write(struct mpic *mpic, unsigned int src_no,
118 unsigned int reg, u32 value)
119{
120 unsigned int isu = src_no >> mpic->isu_shift;
121 unsigned int idx = src_no & mpic->isu_mask;
122
123 _mpic_write(mpic->flags & MPIC_BIG_ENDIAN, mpic->isus[isu],
124 reg + (idx * MPIC_IRQ_STRIDE), value);
125}
126
127#define mpic_read(b,r) _mpic_read(mpic->flags & MPIC_BIG_ENDIAN,(b),(r))
128#define mpic_write(b,r,v) _mpic_write(mpic->flags & MPIC_BIG_ENDIAN,(b),(r),(v))
129#define mpic_ipi_read(i) _mpic_ipi_read(mpic,(i))
130#define mpic_ipi_write(i,v) _mpic_ipi_write(mpic,(i),(v))
131#define mpic_cpu_read(i) _mpic_cpu_read(mpic,(i))
132#define mpic_cpu_write(i,v) _mpic_cpu_write(mpic,(i),(v))
133#define mpic_irq_read(s,r) _mpic_irq_read(mpic,(s),(r))
134#define mpic_irq_write(s,r,v) _mpic_irq_write(mpic,(s),(r),(v))
135
136
137/*
138 * Low level utility functions
139 */
140
141
142
143/* Check if we have one of those nice broken MPICs with a flipped endian on
144 * reads from IPI registers
145 */
146static void __init mpic_test_broken_ipi(struct mpic *mpic)
147{
148 u32 r;
149
150 mpic_write(mpic->gregs, MPIC_GREG_IPI_VECTOR_PRI_0, MPIC_VECPRI_MASK);
151 r = mpic_read(mpic->gregs, MPIC_GREG_IPI_VECTOR_PRI_0);
152
153 if (r == le32_to_cpu(MPIC_VECPRI_MASK)) {
154 printk(KERN_INFO "mpic: Detected reversed IPI registers\n");
155 mpic->flags |= MPIC_BROKEN_IPI;
156 }
157}
158
159#ifdef CONFIG_MPIC_BROKEN_U3
160
161/* Test if an interrupt is sourced from HyperTransport (used on broken U3s)
162 * to force the edge setting on the MPIC and do the ack workaround.
163 */
164static inline int mpic_is_ht_interrupt(struct mpic *mpic, unsigned int source_no)
165{
166 if (source_no >= 128 || !mpic->fixups)
167 return 0;
168 return mpic->fixups[source_no].base != NULL;
169}
170
171static inline void mpic_apic_end_irq(struct mpic *mpic, unsigned int source_no)
172{
173 struct mpic_irq_fixup *fixup = &mpic->fixups[source_no];
174 u32 tmp;
175
176 spin_lock(&mpic->fixup_lock);
177 writeb(0x11 + 2 * fixup->irq, fixup->base);
178 tmp = readl(fixup->base + 2);
179 writel(tmp | 0x80000000ul, fixup->base + 2);
180 /* config writes shouldn't be posted but let's be safe ... */
181 (void)readl(fixup->base + 2);
182 spin_unlock(&mpic->fixup_lock);
183}
184
185
186static void __init mpic_amd8111_read_irq(struct mpic *mpic, u8 __iomem *devbase)
187{
188 int i, irq;
189 u32 tmp;
190
191 printk(KERN_INFO "mpic: - Workarounds on AMD 8111 @ %p\n", devbase);
192
193 for (i=0; i < 24; i++) {
194 writeb(0x10 + 2*i, devbase + 0xf2);
195 tmp = readl(devbase + 0xf4);
196 if ((tmp & 0x1) || !(tmp & 0x20))
197 continue;
198 irq = (tmp >> 16) & 0xff;
199 mpic->fixups[irq].irq = i;
200 mpic->fixups[irq].base = devbase + 0xf2;
201 }
202}
203
204static void __init mpic_amd8131_read_irq(struct mpic *mpic, u8 __iomem *devbase)
205{
206 int i, irq;
207 u32 tmp;
208
209 printk(KERN_INFO "mpic: - Workarounds on AMD 8131 @ %p\n", devbase);
210
211 for (i=0; i < 4; i++) {
212 writeb(0x10 + 2*i, devbase + 0xba);
213 tmp = readl(devbase + 0xbc);
214 if ((tmp & 0x1) || !(tmp & 0x20))
215 continue;
216 irq = (tmp >> 16) & 0xff;
217 mpic->fixups[irq].irq = i;
218 mpic->fixups[irq].base = devbase + 0xba;
219 }
220}
221
222static void __init mpic_scan_ioapics(struct mpic *mpic)
223{
224 unsigned int devfn;
225 u8 __iomem *cfgspace;
226
227 printk(KERN_INFO "mpic: Setting up IO-APICs workarounds for U3\n");
228
229 /* Allocate fixups array */
230 mpic->fixups = alloc_bootmem(128 * sizeof(struct mpic_irq_fixup));
231 BUG_ON(mpic->fixups == NULL);
232 memset(mpic->fixups, 0, 128 * sizeof(struct mpic_irq_fixup));
233
234 /* Init spinlock */
235 spin_lock_init(&mpic->fixup_lock);
236
237 /* Map u3 config space. We assume all IO-APICs are on the primary bus
238 * and slot will never be above "0xf" so we only need to map 32k
239 */
240 cfgspace = (unsigned char __iomem *)ioremap(0xf2000000, 0x8000);
241 BUG_ON(cfgspace == NULL);
242
243 /* Now we scan all slots. We do a very quick scan, we read the header type,
244 * vendor ID and device ID only, that's plenty enough
245 */
246 for (devfn = 0; devfn < PCI_DEVFN(0x10,0); devfn ++) {
247 u8 __iomem *devbase = cfgspace + (devfn << 8);
248 u8 hdr_type = readb(devbase + PCI_HEADER_TYPE);
249 u32 l = readl(devbase + PCI_VENDOR_ID);
250 u16 vendor_id, device_id;
251 int multifunc = 0;
252
253 DBG("devfn %x, l: %x\n", devfn, l);
254
255 /* If no device, skip */
256 if (l == 0xffffffff || l == 0x00000000 ||
257 l == 0x0000ffff || l == 0xffff0000)
258 goto next;
259
260 /* Check if it's a multifunction device (only really used
261 * to function 0 though
262 */
263 multifunc = !!(hdr_type & 0x80);
264 vendor_id = l & 0xffff;
265 device_id = (l >> 16) & 0xffff;
266
267 /* If a known device, go to fixup setup code */
268 if (vendor_id == PCI_VENDOR_ID_AMD && device_id == 0x7460)
269 mpic_amd8111_read_irq(mpic, devbase);
270 if (vendor_id == PCI_VENDOR_ID_AMD && device_id == 0x7450)
271 mpic_amd8131_read_irq(mpic, devbase);
272 next:
273 /* next device, if function 0 */
274 if ((PCI_FUNC(devfn) == 0) && !multifunc)
275 devfn += 7;
276 }
277}
278
279#endif /* CONFIG_MPIC_BROKEN_U3 */
280
281
282/* Find an mpic associated with a given linux interrupt */
283static struct mpic *mpic_find(unsigned int irq, unsigned int *is_ipi)
284{
285 struct mpic *mpic = mpics;
286
287 while(mpic) {
288 /* search IPIs first since they may override the main interrupts */
289 if (irq >= mpic->ipi_offset && irq < (mpic->ipi_offset + 4)) {
290 if (is_ipi)
291 *is_ipi = 1;
292 return mpic;
293 }
294 if (irq >= mpic->irq_offset &&
295 irq < (mpic->irq_offset + mpic->irq_count)) {
296 if (is_ipi)
297 *is_ipi = 0;
298 return mpic;
299 }
300 mpic = mpic -> next;
301 }
302 return NULL;
303}
304
305/* Convert a cpu mask from logical to physical cpu numbers. */
306static inline u32 mpic_physmask(u32 cpumask)
307{
308 int i;
309 u32 mask = 0;
310
311 for (i = 0; i < NR_CPUS; ++i, cpumask >>= 1)
312 mask |= (cpumask & 1) << get_hard_smp_processor_id(i);
313 return mask;
314}
315
316#ifdef CONFIG_SMP
317/* Get the mpic structure from the IPI number */
318static inline struct mpic * mpic_from_ipi(unsigned int ipi)
319{
320 return container_of(irq_desc[ipi].handler, struct mpic, hc_ipi);
321}
322#endif
323
324/* Get the mpic structure from the irq number */
325static inline struct mpic * mpic_from_irq(unsigned int irq)
326{
327 return container_of(irq_desc[irq].handler, struct mpic, hc_irq);
328}
329
330/* Send an EOI */
331static inline void mpic_eoi(struct mpic *mpic)
332{
333 mpic_cpu_write(MPIC_CPU_EOI, 0);
334 (void)mpic_cpu_read(MPIC_CPU_WHOAMI);
335}
336
337#ifdef CONFIG_SMP
338static irqreturn_t mpic_ipi_action(int irq, void *dev_id, struct pt_regs *regs)
339{
340 struct mpic *mpic = dev_id;
341
342 smp_message_recv(irq - mpic->ipi_offset, regs);
343 return IRQ_HANDLED;
344}
345#endif /* CONFIG_SMP */
346
347/*
348 * Linux descriptor level callbacks
349 */
350
351
352static void mpic_enable_irq(unsigned int irq)
353{
354 unsigned int loops = 100000;
355 struct mpic *mpic = mpic_from_irq(irq);
356 unsigned int src = irq - mpic->irq_offset;
357
358 DBG("%s: enable_irq: %d (src %d)\n", mpic->name, irq, src);
359
360 mpic_irq_write(src, MPIC_IRQ_VECTOR_PRI,
361 mpic_irq_read(src, MPIC_IRQ_VECTOR_PRI) & ~MPIC_VECPRI_MASK);
362
363 /* make sure mask gets to controller before we return to user */
364 do {
365 if (!loops--) {
366 printk(KERN_ERR "mpic_enable_irq timeout\n");
367 break;
368 }
369 } while(mpic_irq_read(src, MPIC_IRQ_VECTOR_PRI) & MPIC_VECPRI_MASK);
370}
371
372static void mpic_disable_irq(unsigned int irq)
373{
374 unsigned int loops = 100000;
375 struct mpic *mpic = mpic_from_irq(irq);
376 unsigned int src = irq - mpic->irq_offset;
377
378 DBG("%s: disable_irq: %d (src %d)\n", mpic->name, irq, src);
379
380 mpic_irq_write(src, MPIC_IRQ_VECTOR_PRI,
381 mpic_irq_read(src, MPIC_IRQ_VECTOR_PRI) | MPIC_VECPRI_MASK);
382
383 /* make sure mask gets to controller before we return to user */
384 do {
385 if (!loops--) {
386 printk(KERN_ERR "mpic_enable_irq timeout\n");
387 break;
388 }
389 } while(!(mpic_irq_read(src, MPIC_IRQ_VECTOR_PRI) & MPIC_VECPRI_MASK));
390}
391
392static void mpic_end_irq(unsigned int irq)
393{
394 struct mpic *mpic = mpic_from_irq(irq);
395
396 DBG("%s: end_irq: %d\n", mpic->name, irq);
397
398 /* We always EOI on end_irq() even for edge interrupts since that
399 * should only lower the priority, the MPIC should have properly
400 * latched another edge interrupt coming in anyway
401 */
402
403#ifdef CONFIG_MPIC_BROKEN_U3
404 if (mpic->flags & MPIC_BROKEN_U3) {
405 unsigned int src = irq - mpic->irq_offset;
406 if (mpic_is_ht_interrupt(mpic, src))
407 mpic_apic_end_irq(mpic, src);
408 }
409#endif /* CONFIG_MPIC_BROKEN_U3 */
410
411 mpic_eoi(mpic);
412}
413
414#ifdef CONFIG_SMP
415
416static void mpic_enable_ipi(unsigned int irq)
417{
418 struct mpic *mpic = mpic_from_ipi(irq);
419 unsigned int src = irq - mpic->ipi_offset;
420
421 DBG("%s: enable_ipi: %d (ipi %d)\n", mpic->name, irq, src);
422 mpic_ipi_write(src, mpic_ipi_read(src) & ~MPIC_VECPRI_MASK);
423}
424
425static void mpic_disable_ipi(unsigned int irq)
426{
427 /* NEVER disable an IPI... that's just plain wrong! */
428}
429
430static void mpic_end_ipi(unsigned int irq)
431{
432 struct mpic *mpic = mpic_from_ipi(irq);
433
434 /*
435 * IPIs are marked IRQ_PER_CPU. This has the side effect of
436 * preventing the IRQ_PENDING/IRQ_INPROGRESS logic from
437 * applying to them. We EOI them late to avoid re-entering.
438 * We mark IPI's with SA_INTERRUPT as they must run with
439 * irqs disabled.
440 */
441 mpic_eoi(mpic);
442}
443
444#endif /* CONFIG_SMP */
445
446static void mpic_set_affinity(unsigned int irq, cpumask_t cpumask)
447{
448 struct mpic *mpic = mpic_from_irq(irq);
449
450 cpumask_t tmp;
451
452 cpus_and(tmp, cpumask, cpu_online_map);
453
454 mpic_irq_write(irq - mpic->irq_offset, MPIC_IRQ_DESTINATION,
455 mpic_physmask(cpus_addr(tmp)[0]));
456}
457
458
459/*
460 * Exported functions
461 */
462
463
464struct mpic * __init mpic_alloc(unsigned long phys_addr,
465 unsigned int flags,
466 unsigned int isu_size,
467 unsigned int irq_offset,
468 unsigned int irq_count,
469 unsigned int ipi_offset,
470 unsigned char *senses,
471 unsigned int senses_count,
472 const char *name)
473{
474 struct mpic *mpic;
475 u32 reg;
476 const char *vers;
477 int i;
478
479 mpic = alloc_bootmem(sizeof(struct mpic));
480 if (mpic == NULL)
481 return NULL;
482
483 memset(mpic, 0, sizeof(struct mpic));
484 mpic->name = name;
485
486 mpic->hc_irq.typename = name;
487 mpic->hc_irq.enable = mpic_enable_irq;
488 mpic->hc_irq.disable = mpic_disable_irq;
489 mpic->hc_irq.end = mpic_end_irq;
490 if (flags & MPIC_PRIMARY)
491 mpic->hc_irq.set_affinity = mpic_set_affinity;
492#ifdef CONFIG_SMP
493 mpic->hc_ipi.typename = name;
494 mpic->hc_ipi.enable = mpic_enable_ipi;
495 mpic->hc_ipi.disable = mpic_disable_ipi;
496 mpic->hc_ipi.end = mpic_end_ipi;
497#endif /* CONFIG_SMP */
498
499 mpic->flags = flags;
500 mpic->isu_size = isu_size;
501 mpic->irq_offset = irq_offset;
502 mpic->irq_count = irq_count;
503 mpic->ipi_offset = ipi_offset;
504 mpic->num_sources = 0; /* so far */
505 mpic->senses = senses;
506 mpic->senses_count = senses_count;
507
508 /* Map the global registers */
509 mpic->gregs = ioremap(phys_addr + MPIC_GREG_BASE, 0x1000);
510 mpic->tmregs = mpic->gregs + (MPIC_TIMER_BASE >> 2);
511 BUG_ON(mpic->gregs == NULL);
512
513 /* Reset */
514 if (flags & MPIC_WANTS_RESET) {
515 mpic_write(mpic->gregs, MPIC_GREG_GLOBAL_CONF_0,
516 mpic_read(mpic->gregs, MPIC_GREG_GLOBAL_CONF_0)
517 | MPIC_GREG_GCONF_RESET);
518 while( mpic_read(mpic->gregs, MPIC_GREG_GLOBAL_CONF_0)
519 & MPIC_GREG_GCONF_RESET)
520 mb();
521 }
522
523 /* Read feature register, calculate num CPUs and, for non-ISU
524 * MPICs, num sources as well. On ISU MPICs, sources are counted
525 * as ISUs are added
526 */
527 reg = mpic_read(mpic->gregs, MPIC_GREG_FEATURE_0);
528 mpic->num_cpus = ((reg & MPIC_GREG_FEATURE_LAST_CPU_MASK)
529 >> MPIC_GREG_FEATURE_LAST_CPU_SHIFT) + 1;
530 if (isu_size == 0)
531 mpic->num_sources = ((reg & MPIC_GREG_FEATURE_LAST_SRC_MASK)
532 >> MPIC_GREG_FEATURE_LAST_SRC_SHIFT) + 1;
533
534 /* Map the per-CPU registers */
535 for (i = 0; i < mpic->num_cpus; i++) {
536 mpic->cpuregs[i] = ioremap(phys_addr + MPIC_CPU_BASE +
537 i * MPIC_CPU_STRIDE, 0x1000);
538 BUG_ON(mpic->cpuregs[i] == NULL);
539 }
540
541 /* Initialize main ISU if none provided */
542 if (mpic->isu_size == 0) {
543 mpic->isu_size = mpic->num_sources;
544 mpic->isus[0] = ioremap(phys_addr + MPIC_IRQ_BASE,
545 MPIC_IRQ_STRIDE * mpic->isu_size);
546 BUG_ON(mpic->isus[0] == NULL);
547 }
548 mpic->isu_shift = 1 + __ilog2(mpic->isu_size - 1);
549 mpic->isu_mask = (1 << mpic->isu_shift) - 1;
550
551 /* Display version */
552 switch (reg & MPIC_GREG_FEATURE_VERSION_MASK) {
553 case 1:
554 vers = "1.0";
555 break;
556 case 2:
557 vers = "1.2";
558 break;
559 case 3:
560 vers = "1.3";
561 break;
562 default:
563 vers = "<unknown>";
564 break;
565 }
566 printk(KERN_INFO "mpic: Setting up MPIC \"%s\" version %s at %lx, max %d CPUs\n",
567 name, vers, phys_addr, mpic->num_cpus);
568 printk(KERN_INFO "mpic: ISU size: %d, shift: %d, mask: %x\n", mpic->isu_size,
569 mpic->isu_shift, mpic->isu_mask);
570
571 mpic->next = mpics;
572 mpics = mpic;
573
574 if (flags & MPIC_PRIMARY)
575 mpic_primary = mpic;
576
577 return mpic;
578}
579
580void __init mpic_assign_isu(struct mpic *mpic, unsigned int isu_num,
581 unsigned long phys_addr)
582{
583 unsigned int isu_first = isu_num * mpic->isu_size;
584
585 BUG_ON(isu_num >= MPIC_MAX_ISU);
586
587 mpic->isus[isu_num] = ioremap(phys_addr, MPIC_IRQ_STRIDE * mpic->isu_size);
588 if ((isu_first + mpic->isu_size) > mpic->num_sources)
589 mpic->num_sources = isu_first + mpic->isu_size;
590}
591
592void __init mpic_setup_cascade(unsigned int irq, mpic_cascade_t handler,
593 void *data)
594{
595 struct mpic *mpic = mpic_find(irq, NULL);
596 unsigned long flags;
597
598 /* Synchronization here is a bit dodgy, so don't try to replace cascade
599 * interrupts on the fly too often ... but normally it's set up at boot.
600 */
601 spin_lock_irqsave(&mpic_lock, flags);
602 if (mpic->cascade)
603 mpic_disable_irq(mpic->cascade_vec + mpic->irq_offset);
604 mpic->cascade = NULL;
605 wmb();
606 mpic->cascade_vec = irq - mpic->irq_offset;
607 mpic->cascade_data = data;
608 wmb();
609 mpic->cascade = handler;
610 mpic_enable_irq(irq);
611 spin_unlock_irqrestore(&mpic_lock, flags);
612}
613
614void __init mpic_init(struct mpic *mpic)
615{
616 int i;
617
618 BUG_ON(mpic->num_sources == 0);
619
620 printk(KERN_INFO "mpic: Initializing for %d sources\n", mpic->num_sources);
621
622 /* Set current processor priority to max */
623 mpic_cpu_write(MPIC_CPU_CURRENT_TASK_PRI, 0xf);
624
625 /* Initialize timers: just disable them all */
626 for (i = 0; i < 4; i++) {
627 mpic_write(mpic->tmregs,
628 i * MPIC_TIMER_STRIDE + MPIC_TIMER_DESTINATION, 0);
629 mpic_write(mpic->tmregs,
630 i * MPIC_TIMER_STRIDE + MPIC_TIMER_VECTOR_PRI,
631 MPIC_VECPRI_MASK |
632 (MPIC_VEC_TIMER_0 + i));
633 }
634
635 /* Initialize IPIs to our reserved vectors and mark them disabled for now */
636 mpic_test_broken_ipi(mpic);
637 for (i = 0; i < 4; i++) {
638 mpic_ipi_write(i,
639 MPIC_VECPRI_MASK |
640 (10 << MPIC_VECPRI_PRIORITY_SHIFT) |
641 (MPIC_VEC_IPI_0 + i));
642#ifdef CONFIG_SMP
643 if (!(mpic->flags & MPIC_PRIMARY))
644 continue;
645 irq_desc[mpic->ipi_offset+i].status |= IRQ_PER_CPU;
646 irq_desc[mpic->ipi_offset+i].handler = &mpic->hc_ipi;
647
648#endif /* CONFIG_SMP */
649 }
650
651 /* Initialize interrupt sources */
652 if (mpic->irq_count == 0)
653 mpic->irq_count = mpic->num_sources;
654
655#ifdef CONFIG_MPIC_BROKEN_U3
656 /* Do the ioapic fixups on U3 broken mpic */
657 DBG("MPIC flags: %x\n", mpic->flags);
658 if ((mpic->flags & MPIC_BROKEN_U3) && (mpic->flags & MPIC_PRIMARY))
659 mpic_scan_ioapics(mpic);
660#endif /* CONFIG_MPIC_BROKEN_U3 */
661
662 for (i = 0; i < mpic->num_sources; i++) {
663 /* start with vector = source number, and masked */
664 u32 vecpri = MPIC_VECPRI_MASK | i | (8 << MPIC_VECPRI_PRIORITY_SHIFT);
665 int level = 0;
666
667 /* if it's an IPI, we skip it */
668 if ((mpic->irq_offset + i) >= (mpic->ipi_offset + i) &&
669 (mpic->irq_offset + i) < (mpic->ipi_offset + i + 4))
670 continue;
671
672 /* do senses munging */
673 if (mpic->senses && i < mpic->senses_count) {
674 if (mpic->senses[i] & IRQ_SENSE_LEVEL)
675 vecpri |= MPIC_VECPRI_SENSE_LEVEL;
676 if (mpic->senses[i] & IRQ_POLARITY_POSITIVE)
677 vecpri |= MPIC_VECPRI_POLARITY_POSITIVE;
678 } else
679 vecpri |= MPIC_VECPRI_SENSE_LEVEL;
680
681 /* remember if it was a level interrupts */
682 level = (vecpri & MPIC_VECPRI_SENSE_LEVEL);
683
684 /* deal with broken U3 */
685 if (mpic->flags & MPIC_BROKEN_U3) {
686#ifdef CONFIG_MPIC_BROKEN_U3
687 if (mpic_is_ht_interrupt(mpic, i)) {
688 vecpri &= ~(MPIC_VECPRI_SENSE_MASK |
689 MPIC_VECPRI_POLARITY_MASK);
690 vecpri |= MPIC_VECPRI_POLARITY_POSITIVE;
691 }
692#else
693 printk(KERN_ERR "mpic: BROKEN_U3 set, but CONFIG doesn't match\n");
694#endif
695 }
696
697 DBG("setup source %d, vecpri: %08x, level: %d\n", i, vecpri,
698 (level != 0));
699
700 /* init hw */
701 mpic_irq_write(i, MPIC_IRQ_VECTOR_PRI, vecpri);
702 mpic_irq_write(i, MPIC_IRQ_DESTINATION,
703 1 << get_hard_smp_processor_id(boot_cpuid));
704
705 /* init linux descriptors */
706 if (i < mpic->irq_count) {
707 irq_desc[mpic->irq_offset+i].status = level ? IRQ_LEVEL : 0;
708 irq_desc[mpic->irq_offset+i].handler = &mpic->hc_irq;
709 }
710 }
711
712 /* Init spurrious vector */
713 mpic_write(mpic->gregs, MPIC_GREG_SPURIOUS, MPIC_VEC_SPURRIOUS);
714
715 /* Disable 8259 passthrough */
716 mpic_write(mpic->gregs, MPIC_GREG_GLOBAL_CONF_0,
717 mpic_read(mpic->gregs, MPIC_GREG_GLOBAL_CONF_0)
718 | MPIC_GREG_GCONF_8259_PTHROU_DIS);
719
720 /* Set current processor priority to 0 */
721 mpic_cpu_write(MPIC_CPU_CURRENT_TASK_PRI, 0);
722}
723
724
725
726void mpic_irq_set_priority(unsigned int irq, unsigned int pri)
727{
728 int is_ipi;
729 struct mpic *mpic = mpic_find(irq, &is_ipi);
730 unsigned long flags;
731 u32 reg;
732
733 spin_lock_irqsave(&mpic_lock, flags);
734 if (is_ipi) {
735 reg = mpic_ipi_read(irq - mpic->ipi_offset) & MPIC_VECPRI_PRIORITY_MASK;
736 mpic_ipi_write(irq - mpic->ipi_offset,
737 reg | (pri << MPIC_VECPRI_PRIORITY_SHIFT));
738 } else {
739 reg = mpic_irq_read(irq - mpic->irq_offset, MPIC_IRQ_VECTOR_PRI)
740 & MPIC_VECPRI_PRIORITY_MASK;
741 mpic_irq_write(irq - mpic->irq_offset, MPIC_IRQ_VECTOR_PRI,
742 reg | (pri << MPIC_VECPRI_PRIORITY_SHIFT));
743 }
744 spin_unlock_irqrestore(&mpic_lock, flags);
745}
746
747unsigned int mpic_irq_get_priority(unsigned int irq)
748{
749 int is_ipi;
750 struct mpic *mpic = mpic_find(irq, &is_ipi);
751 unsigned long flags;
752 u32 reg;
753
754 spin_lock_irqsave(&mpic_lock, flags);
755 if (is_ipi)
756 reg = mpic_ipi_read(irq - mpic->ipi_offset);
757 else
758 reg = mpic_irq_read(irq - mpic->irq_offset, MPIC_IRQ_VECTOR_PRI);
759 spin_unlock_irqrestore(&mpic_lock, flags);
760 return (reg & MPIC_VECPRI_PRIORITY_MASK) >> MPIC_VECPRI_PRIORITY_SHIFT;
761}
762
763void mpic_setup_this_cpu(void)
764{
765#ifdef CONFIG_SMP
766 struct mpic *mpic = mpic_primary;
767 unsigned long flags;
768 u32 msk = 1 << hard_smp_processor_id();
769 unsigned int i;
770
771 BUG_ON(mpic == NULL);
772
773 DBG("%s: setup_this_cpu(%d)\n", mpic->name, hard_smp_processor_id());
774
775 spin_lock_irqsave(&mpic_lock, flags);
776
777 /* let the mpic know we want intrs. default affinity is 0xffffffff
778 * until changed via /proc. That's how it's done on x86. If we want
779 * it differently, then we should make sure we also change the default
780 * values of irq_affinity in irq.c.
781 */
782 if (distribute_irqs) {
783 for (i = 0; i < mpic->num_sources ; i++)
784 mpic_irq_write(i, MPIC_IRQ_DESTINATION,
785 mpic_irq_read(i, MPIC_IRQ_DESTINATION) | msk);
786 }
787
788 /* Set current processor priority to 0 */
789 mpic_cpu_write(MPIC_CPU_CURRENT_TASK_PRI, 0);
790
791 spin_unlock_irqrestore(&mpic_lock, flags);
792#endif /* CONFIG_SMP */
793}
794
795void mpic_send_ipi(unsigned int ipi_no, unsigned int cpu_mask)
796{
797 struct mpic *mpic = mpic_primary;
798
799 BUG_ON(mpic == NULL);
800
801 DBG("%s: send_ipi(ipi_no: %d)\n", mpic->name, ipi_no);
802
803 mpic_cpu_write(MPIC_CPU_IPI_DISPATCH_0 + ipi_no * 0x10,
804 mpic_physmask(cpu_mask & cpus_addr(cpu_online_map)[0]));
805}
806
807int mpic_get_one_irq(struct mpic *mpic, struct pt_regs *regs)
808{
809 u32 irq;
810
811 irq = mpic_cpu_read(MPIC_CPU_INTACK) & MPIC_VECPRI_VECTOR_MASK;
812 DBG("%s: get_one_irq(): %d\n", mpic->name, irq);
813
814 if (mpic->cascade && irq == mpic->cascade_vec) {
815 DBG("%s: cascading ...\n", mpic->name);
816 irq = mpic->cascade(regs, mpic->cascade_data);
817 mpic_eoi(mpic);
818 return irq;
819 }
820 if (unlikely(irq == MPIC_VEC_SPURRIOUS))
821 return -1;
822 if (irq < MPIC_VEC_IPI_0)
823 return irq + mpic->irq_offset;
824 DBG("%s: ipi %d !\n", mpic->name, irq - MPIC_VEC_IPI_0);
825 return irq - MPIC_VEC_IPI_0 + mpic->ipi_offset;
826}
827
828int mpic_get_irq(struct pt_regs *regs)
829{
830 struct mpic *mpic = mpic_primary;
831
832 BUG_ON(mpic == NULL);
833
834 return mpic_get_one_irq(mpic, regs);
835}
836
837
838#ifdef CONFIG_SMP
839void mpic_request_ipis(void)
840{
841 struct mpic *mpic = mpic_primary;
842
843 BUG_ON(mpic == NULL);
844
845 printk("requesting IPIs ... \n");
846
847 /* IPIs are marked SA_INTERRUPT as they must run with irqs disabled */
848 request_irq(mpic->ipi_offset+0, mpic_ipi_action, SA_INTERRUPT,
849 "IPI0 (call function)", mpic);
850 request_irq(mpic->ipi_offset+1, mpic_ipi_action, SA_INTERRUPT,
851 "IPI1 (reschedule)", mpic);
852 request_irq(mpic->ipi_offset+2, mpic_ipi_action, SA_INTERRUPT,
853 "IPI2 (unused)", mpic);
854 request_irq(mpic->ipi_offset+3, mpic_ipi_action, SA_INTERRUPT,
855 "IPI3 (debugger break)", mpic);
856
857 printk("IPIs requested... \n");
858}
859#endif /* CONFIG_SMP */
diff --git a/arch/ppc64/kernel/mpic.h b/arch/ppc64/kernel/mpic.h
new file mode 100644
index 000000000000..571b3c99e062
--- /dev/null
+++ b/arch/ppc64/kernel/mpic.h
@@ -0,0 +1,267 @@
1#include <linux/irq.h>
2
3/*
4 * Global registers
5 */
6
7#define MPIC_GREG_BASE 0x01000
8
9#define MPIC_GREG_FEATURE_0 0x00000
10#define MPIC_GREG_FEATURE_LAST_SRC_MASK 0x07ff0000
11#define MPIC_GREG_FEATURE_LAST_SRC_SHIFT 16
12#define MPIC_GREG_FEATURE_LAST_CPU_MASK 0x00001f00
13#define MPIC_GREG_FEATURE_LAST_CPU_SHIFT 8
14#define MPIC_GREG_FEATURE_VERSION_MASK 0xff
15#define MPIC_GREG_FEATURE_1 0x00010
16#define MPIC_GREG_GLOBAL_CONF_0 0x00020
17#define MPIC_GREG_GCONF_RESET 0x80000000
18#define MPIC_GREG_GCONF_8259_PTHROU_DIS 0x20000000
19#define MPIC_GREG_GCONF_BASE_MASK 0x000fffff
20#define MPIC_GREG_GLOBAL_CONF_1 0x00030
21#define MPIC_GREG_VENDOR_0 0x00040
22#define MPIC_GREG_VENDOR_1 0x00050
23#define MPIC_GREG_VENDOR_2 0x00060
24#define MPIC_GREG_VENDOR_3 0x00070
25#define MPIC_GREG_VENDOR_ID 0x00080
26#define MPIC_GREG_VENDOR_ID_STEPPING_MASK 0x00ff0000
27#define MPIC_GREG_VENDOR_ID_STEPPING_SHIFT 16
28#define MPIC_GREG_VENDOR_ID_DEVICE_ID_MASK 0x0000ff00
29#define MPIC_GREG_VENDOR_ID_DEVICE_ID_SHIFT 8
30#define MPIC_GREG_VENDOR_ID_VENDOR_ID_MASK 0x000000ff
31#define MPIC_GREG_PROCESSOR_INIT 0x00090
32#define MPIC_GREG_IPI_VECTOR_PRI_0 0x000a0
33#define MPIC_GREG_IPI_VECTOR_PRI_1 0x000b0
34#define MPIC_GREG_IPI_VECTOR_PRI_2 0x000c0
35#define MPIC_GREG_IPI_VECTOR_PRI_3 0x000d0
36#define MPIC_GREG_SPURIOUS 0x000e0
37#define MPIC_GREG_TIMER_FREQ 0x000f0
38
39/*
40 *
41 * Timer registers
42 */
43#define MPIC_TIMER_BASE 0x01100
44#define MPIC_TIMER_STRIDE 0x40
45
46#define MPIC_TIMER_CURRENT_CNT 0x00000
47#define MPIC_TIMER_BASE_CNT 0x00010
48#define MPIC_TIMER_VECTOR_PRI 0x00020
49#define MPIC_TIMER_DESTINATION 0x00030
50
51/*
52 * Per-Processor registers
53 */
54
55#define MPIC_CPU_THISBASE 0x00000
56#define MPIC_CPU_BASE 0x20000
57#define MPIC_CPU_STRIDE 0x01000
58
59#define MPIC_CPU_IPI_DISPATCH_0 0x00040
60#define MPIC_CPU_IPI_DISPATCH_1 0x00050
61#define MPIC_CPU_IPI_DISPATCH_2 0x00060
62#define MPIC_CPU_IPI_DISPATCH_3 0x00070
63#define MPIC_CPU_CURRENT_TASK_PRI 0x00080
64#define MPIC_CPU_TASKPRI_MASK 0x0000000f
65#define MPIC_CPU_WHOAMI 0x00090
66#define MPIC_CPU_WHOAMI_MASK 0x0000001f
67#define MPIC_CPU_INTACK 0x000a0
68#define MPIC_CPU_EOI 0x000b0
69
70/*
71 * Per-source registers
72 */
73
74#define MPIC_IRQ_BASE 0x10000
75#define MPIC_IRQ_STRIDE 0x00020
76#define MPIC_IRQ_VECTOR_PRI 0x00000
77#define MPIC_VECPRI_MASK 0x80000000
78#define MPIC_VECPRI_ACTIVITY 0x40000000 /* Read Only */
79#define MPIC_VECPRI_PRIORITY_MASK 0x000f0000
80#define MPIC_VECPRI_PRIORITY_SHIFT 16
81#define MPIC_VECPRI_VECTOR_MASK 0x000007ff
82#define MPIC_VECPRI_POLARITY_POSITIVE 0x00800000
83#define MPIC_VECPRI_POLARITY_NEGATIVE 0x00000000
84#define MPIC_VECPRI_POLARITY_MASK 0x00800000
85#define MPIC_VECPRI_SENSE_LEVEL 0x00400000
86#define MPIC_VECPRI_SENSE_EDGE 0x00000000
87#define MPIC_VECPRI_SENSE_MASK 0x00400000
88#define MPIC_IRQ_DESTINATION 0x00010
89
90#define MPIC_MAX_IRQ_SOURCES 2048
91#define MPIC_MAX_CPUS 32
92#define MPIC_MAX_ISU 32
93
94/*
95 * Special vector numbers (internal use only)
96 */
97#define MPIC_VEC_SPURRIOUS 255
98#define MPIC_VEC_IPI_3 254
99#define MPIC_VEC_IPI_2 253
100#define MPIC_VEC_IPI_1 252
101#define MPIC_VEC_IPI_0 251
102
103/* unused */
104#define MPIC_VEC_TIMER_3 250
105#define MPIC_VEC_TIMER_2 249
106#define MPIC_VEC_TIMER_1 248
107#define MPIC_VEC_TIMER_0 247
108
109/* Type definition of the cascade handler */
110typedef int (*mpic_cascade_t)(struct pt_regs *regs, void *data);
111
112#ifdef CONFIG_MPIC_BROKEN_U3
113/* Fixup table entry */
114struct mpic_irq_fixup
115{
116 u8 __iomem *base;
117 unsigned int irq;
118};
119#endif /* CONFIG_MPIC_BROKEN_U3 */
120
121
122/* The instance data of a given MPIC */
123struct mpic
124{
125 /* The "linux" controller struct */
126 hw_irq_controller hc_irq;
127#ifdef CONFIG_SMP
128 hw_irq_controller hc_ipi;
129#endif
130 const char *name;
131 /* Flags */
132 unsigned int flags;
133 /* How many irq sources in a given ISU */
134 unsigned int isu_size;
135 unsigned int isu_shift;
136 unsigned int isu_mask;
137 /* Offset of irq vector numbers */
138 unsigned int irq_offset;
139 unsigned int irq_count;
140 /* Offset of ipi vector numbers */
141 unsigned int ipi_offset;
142 /* Number of sources */
143 unsigned int num_sources;
144 /* Number of CPUs */
145 unsigned int num_cpus;
146 /* cascade handler */
147 mpic_cascade_t cascade;
148 void *cascade_data;
149 unsigned int cascade_vec;
150 /* senses array */
151 unsigned char *senses;
152 unsigned int senses_count;
153
154#ifdef CONFIG_MPIC_BROKEN_U3
155 /* The fixup table */
156 struct mpic_irq_fixup *fixups;
157 spinlock_t fixup_lock;
158#endif
159
160 /* The various ioremap'ed bases */
161 volatile u32 __iomem *gregs;
162 volatile u32 __iomem *tmregs;
163 volatile u32 __iomem *cpuregs[MPIC_MAX_CPUS];
164 volatile u32 __iomem *isus[MPIC_MAX_ISU];
165
166 /* link */
167 struct mpic *next;
168};
169
170/* This is the primary controller, only that one has IPIs and
171 * has afinity control. A non-primary MPIC always uses CPU0
172 * registers only
173 */
174#define MPIC_PRIMARY 0x00000001
175/* Set this for a big-endian MPIC */
176#define MPIC_BIG_ENDIAN 0x00000002
177/* Broken U3 MPIC */
178#define MPIC_BROKEN_U3 0x00000004
179/* Broken IPI registers (autodetected) */
180#define MPIC_BROKEN_IPI 0x00000008
181/* MPIC wants a reset */
182#define MPIC_WANTS_RESET 0x00000010
183
184/* Allocate the controller structure and setup the linux irq descs
185 * for the range if interrupts passed in. No HW initialization is
186 * actually performed.
187 *
188 * @phys_addr: physial base address of the MPIC
189 * @flags: flags, see constants above
190 * @isu_size: number of interrupts in an ISU. Use 0 to use a
191 * standard ISU-less setup (aka powermac)
192 * @irq_offset: first irq number to assign to this mpic
193 * @irq_count: number of irqs to use with this mpic IRQ sources. Pass 0
194 * to match the number of sources
195 * @ipi_offset: first irq number to assign to this mpic IPI sources,
196 * used only on primary mpic
197 * @senses: array of sense values
198 * @senses_num: number of entries in the array
199 *
200 * Note about the sense array. If none is passed, all interrupts are
201 * setup to be level negative unless MPIC_BROKEN_U3 is set in which
202 * case they are edge positive (and the array is ignored anyway).
203 * The values in the array start at the first source of the MPIC,
204 * that is senses[0] correspond to linux irq "irq_offset".
205 */
206extern struct mpic *mpic_alloc(unsigned long phys_addr,
207 unsigned int flags,
208 unsigned int isu_size,
209 unsigned int irq_offset,
210 unsigned int irq_count,
211 unsigned int ipi_offset,
212 unsigned char *senses,
213 unsigned int senses_num,
214 const char *name);
215
216/* Assign ISUs, to call before mpic_init()
217 *
218 * @mpic: controller structure as returned by mpic_alloc()
219 * @isu_num: ISU number
220 * @phys_addr: physical address of the ISU
221 */
222extern void mpic_assign_isu(struct mpic *mpic, unsigned int isu_num,
223 unsigned long phys_addr);
224
225/* Initialize the controller. After this has been called, none of the above
226 * should be called again for this mpic
227 */
228extern void mpic_init(struct mpic *mpic);
229
230/* Setup a cascade. Currently, only one cascade is supported this
231 * way, though you can always do a normal request_irq() and add
232 * other cascades this way. You should call this _after_ having
233 * added all the ISUs
234 *
235 * @irq_no: "linux" irq number of the cascade (that is offset'ed vector)
236 * @handler: cascade handler function
237 */
238extern void mpic_setup_cascade(unsigned int irq_no, mpic_cascade_t hanlder,
239 void *data);
240
241/*
242 * All of the following functions must only be used after the
243 * ISUs have been assigned and the controller fully initialized
244 * with mpic_init()
245 */
246
247
248/* Change/Read the priority of an interrupt. Default is 8 for irqs and
249 * 10 for IPIs. You can call this on both IPIs and IRQ numbers, but the
250 * IPI number is then the offset'ed (linux irq number mapped to the IPI)
251 */
252extern void mpic_irq_set_priority(unsigned int irq, unsigned int pri);
253extern unsigned int mpic_irq_get_priority(unsigned int irq);
254
255/* Setup a non-boot CPU */
256extern void mpic_setup_this_cpu(void);
257
258/* Request IPIs on primary mpic */
259extern void mpic_request_ipis(void);
260
261/* Send an IPI (non offseted number 0..3) */
262extern void mpic_send_ipi(unsigned int ipi_no, unsigned int cpu_mask);
263
264/* Fetch interrupt from a given mpic */
265extern int mpic_get_one_irq(struct mpic *mpic, struct pt_regs *regs);
266/* This one gets to the primary mpic */
267extern int mpic_get_irq(struct pt_regs *regs);
diff --git a/arch/ppc64/kernel/nvram.c b/arch/ppc64/kernel/nvram.c
new file mode 100644
index 000000000000..b9069c2d1933
--- /dev/null
+++ b/arch/ppc64/kernel/nvram.c
@@ -0,0 +1,746 @@
1/*
2 * c 2001 PPC 64 Team, IBM Corp
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * /dev/nvram driver for PPC64
10 *
11 * This perhaps should live in drivers/char
12 *
13 * TODO: Split the /dev/nvram part (that one can use
14 * drivers/char/generic_nvram.c) from the arch & partition
15 * parsing code.
16 */
17
18#include <linux/module.h>
19
20#include <linux/types.h>
21#include <linux/errno.h>
22#include <linux/fs.h>
23#include <linux/miscdevice.h>
24#include <linux/fcntl.h>
25#include <linux/nvram.h>
26#include <linux/init.h>
27#include <linux/slab.h>
28#include <linux/spinlock.h>
29#include <asm/uaccess.h>
30#include <asm/nvram.h>
31#include <asm/rtas.h>
32#include <asm/prom.h>
33#include <asm/machdep.h>
34#include <asm/systemcfg.h>
35
36#undef DEBUG_NVRAM
37
38static int nvram_scan_partitions(void);
39static int nvram_setup_partition(void);
40static int nvram_create_os_partition(void);
41static int nvram_remove_os_partition(void);
42
43static struct nvram_partition * nvram_part;
44static long nvram_error_log_index = -1;
45static long nvram_error_log_size = 0;
46
47int no_logging = 1; /* Until we initialize everything,
48 * make sure we don't try logging
49 * anything */
50
51extern volatile int error_log_cnt;
52
53struct err_log_info {
54 int error_type;
55 unsigned int seq_num;
56};
57
58static loff_t dev_nvram_llseek(struct file *file, loff_t offset, int origin)
59{
60 int size;
61
62 if (ppc_md.nvram_size == NULL)
63 return -ENODEV;
64 size = ppc_md.nvram_size();
65
66 switch (origin) {
67 case 1:
68 offset += file->f_pos;
69 break;
70 case 2:
71 offset += size;
72 break;
73 }
74 if (offset < 0)
75 return -EINVAL;
76 file->f_pos = offset;
77 return file->f_pos;
78}
79
80
81static ssize_t dev_nvram_read(struct file *file, char __user *buf,
82 size_t count, loff_t *ppos)
83{
84 ssize_t len;
85 char *tmp_buffer;
86 int size;
87
88 if (ppc_md.nvram_size == NULL)
89 return -ENODEV;
90 size = ppc_md.nvram_size();
91
92 if (!access_ok(VERIFY_WRITE, buf, count))
93 return -EFAULT;
94 if (*ppos >= size)
95 return 0;
96 if (count > size)
97 count = size;
98
99 tmp_buffer = (char *) kmalloc(count, GFP_KERNEL);
100 if (!tmp_buffer) {
101 printk(KERN_ERR "dev_read_nvram: kmalloc failed\n");
102 return -ENOMEM;
103 }
104
105 len = ppc_md.nvram_read(tmp_buffer, count, ppos);
106 if ((long)len <= 0) {
107 kfree(tmp_buffer);
108 return len;
109 }
110
111 if (copy_to_user(buf, tmp_buffer, len)) {
112 kfree(tmp_buffer);
113 return -EFAULT;
114 }
115
116 kfree(tmp_buffer);
117 return len;
118
119}
120
121static ssize_t dev_nvram_write(struct file *file, const char __user *buf,
122 size_t count, loff_t *ppos)
123{
124 ssize_t len;
125 char * tmp_buffer;
126 int size;
127
128 if (ppc_md.nvram_size == NULL)
129 return -ENODEV;
130 size = ppc_md.nvram_size();
131
132 if (!access_ok(VERIFY_READ, buf, count))
133 return -EFAULT;
134 if (*ppos >= size)
135 return 0;
136 if (count > size)
137 count = size;
138
139 tmp_buffer = (char *) kmalloc(count, GFP_KERNEL);
140 if (!tmp_buffer) {
141 printk(KERN_ERR "dev_nvram_write: kmalloc failed\n");
142 return -ENOMEM;
143 }
144
145 if (copy_from_user(tmp_buffer, buf, count)) {
146 kfree(tmp_buffer);
147 return -EFAULT;
148 }
149
150 len = ppc_md.nvram_write(tmp_buffer, count, ppos);
151 if ((long)len <= 0) {
152 kfree(tmp_buffer);
153 return len;
154 }
155
156 kfree(tmp_buffer);
157 return len;
158}
159
160static int dev_nvram_ioctl(struct inode *inode, struct file *file,
161 unsigned int cmd, unsigned long arg)
162{
163 switch(cmd) {
164#ifdef CONFIG_PPC_PMAC
165 case OBSOLETE_PMAC_NVRAM_GET_OFFSET:
166 printk(KERN_WARNING "nvram: Using obsolete PMAC_NVRAM_GET_OFFSET ioctl\n");
167 case IOC_NVRAM_GET_OFFSET: {
168 int part, offset;
169
170 if (systemcfg->platform != PLATFORM_POWERMAC)
171 return -EINVAL;
172 if (copy_from_user(&part, (void __user*)arg, sizeof(part)) != 0)
173 return -EFAULT;
174 if (part < pmac_nvram_OF || part > pmac_nvram_NR)
175 return -EINVAL;
176 offset = pmac_get_partition(part);
177 if (offset < 0)
178 return offset;
179 if (copy_to_user((void __user*)arg, &offset, sizeof(offset)) != 0)
180 return -EFAULT;
181 return 0;
182 }
183#endif /* CONFIG_PPC_PMAC */
184 }
185 return -EINVAL;
186}
187
188struct file_operations nvram_fops = {
189 .owner = THIS_MODULE,
190 .llseek = dev_nvram_llseek,
191 .read = dev_nvram_read,
192 .write = dev_nvram_write,
193 .ioctl = dev_nvram_ioctl,
194};
195
196static struct miscdevice nvram_dev = {
197 NVRAM_MINOR,
198 "nvram",
199 &nvram_fops
200};
201
202
203#ifdef DEBUG_NVRAM
204static void nvram_print_partitions(char * label)
205{
206 struct list_head * p;
207 struct nvram_partition * tmp_part;
208
209 printk(KERN_WARNING "--------%s---------\n", label);
210 printk(KERN_WARNING "indx\t\tsig\tchks\tlen\tname\n");
211 list_for_each(p, &nvram_part->partition) {
212 tmp_part = list_entry(p, struct nvram_partition, partition);
213 printk(KERN_WARNING "%d \t%02x\t%02x\t%d\t%s\n",
214 tmp_part->index, tmp_part->header.signature,
215 tmp_part->header.checksum, tmp_part->header.length,
216 tmp_part->header.name);
217 }
218}
219#endif
220
221
222static int nvram_write_header(struct nvram_partition * part)
223{
224 loff_t tmp_index;
225 int rc;
226
227 tmp_index = part->index;
228 rc = ppc_md.nvram_write((char *)&part->header, NVRAM_HEADER_LEN, &tmp_index);
229
230 return rc;
231}
232
233
234static unsigned char nvram_checksum(struct nvram_header *p)
235{
236 unsigned int c_sum, c_sum2;
237 unsigned short *sp = (unsigned short *)p->name; /* assume 6 shorts */
238 c_sum = p->signature + p->length + sp[0] + sp[1] + sp[2] + sp[3] + sp[4] + sp[5];
239
240 /* The sum may have spilled into the 3rd byte. Fold it back. */
241 c_sum = ((c_sum & 0xffff) + (c_sum >> 16)) & 0xffff;
242 /* The sum cannot exceed 2 bytes. Fold it into a checksum */
243 c_sum2 = (c_sum >> 8) + (c_sum << 8);
244 c_sum = ((c_sum + c_sum2) >> 8) & 0xff;
245 return c_sum;
246}
247
248
249/*
250 * Find an nvram partition, sig can be 0 for any
251 * partition or name can be NULL for any name, else
252 * tries to match both
253 */
254struct nvram_partition *nvram_find_partition(int sig, const char *name)
255{
256 struct nvram_partition * part;
257 struct list_head * p;
258
259 list_for_each(p, &nvram_part->partition) {
260 part = list_entry(p, struct nvram_partition, partition);
261
262 if (sig && part->header.signature != sig)
263 continue;
264 if (name && 0 != strncmp(name, part->header.name, 12))
265 continue;
266 return part;
267 }
268 return NULL;
269}
270EXPORT_SYMBOL(nvram_find_partition);
271
272
273static int nvram_remove_os_partition(void)
274{
275 struct list_head *i;
276 struct list_head *j;
277 struct nvram_partition * part;
278 struct nvram_partition * cur_part;
279 int rc;
280
281 list_for_each(i, &nvram_part->partition) {
282 part = list_entry(i, struct nvram_partition, partition);
283 if (part->header.signature != NVRAM_SIG_OS)
284 continue;
285
286 /* Make os partition a free partition */
287 part->header.signature = NVRAM_SIG_FREE;
288 sprintf(part->header.name, "wwwwwwwwwwww");
289 part->header.checksum = nvram_checksum(&part->header);
290
291 /* Merge contiguous free partitions backwards */
292 list_for_each_prev(j, &part->partition) {
293 cur_part = list_entry(j, struct nvram_partition, partition);
294 if (cur_part == nvram_part || cur_part->header.signature != NVRAM_SIG_FREE) {
295 break;
296 }
297
298 part->header.length += cur_part->header.length;
299 part->header.checksum = nvram_checksum(&part->header);
300 part->index = cur_part->index;
301
302 list_del(&cur_part->partition);
303 kfree(cur_part);
304 j = &part->partition; /* fixup our loop */
305 }
306
307 /* Merge contiguous free partitions forwards */
308 list_for_each(j, &part->partition) {
309 cur_part = list_entry(j, struct nvram_partition, partition);
310 if (cur_part == nvram_part || cur_part->header.signature != NVRAM_SIG_FREE) {
311 break;
312 }
313
314 part->header.length += cur_part->header.length;
315 part->header.checksum = nvram_checksum(&part->header);
316
317 list_del(&cur_part->partition);
318 kfree(cur_part);
319 j = &part->partition; /* fixup our loop */
320 }
321
322 rc = nvram_write_header(part);
323 if (rc <= 0) {
324 printk(KERN_ERR "nvram_remove_os_partition: nvram_write failed (%d)\n", rc);
325 return rc;
326 }
327
328 }
329
330 return 0;
331}
332
333/* nvram_create_os_partition
334 *
335 * Create a OS linux partition to buffer error logs.
336 * Will create a partition starting at the first free
337 * space found if space has enough room.
338 */
339static int nvram_create_os_partition(void)
340{
341 struct list_head * p;
342 struct nvram_partition * part;
343 struct nvram_partition * new_part = NULL;
344 struct nvram_partition * free_part = NULL;
345 int seq_init[2] = { 0, 0 };
346 loff_t tmp_index;
347 long size = 0;
348 int rc;
349
350 /* Find a free partition that will give us the maximum needed size
351 If can't find one that will give us the minimum size needed */
352 list_for_each(p, &nvram_part->partition) {
353 part = list_entry(p, struct nvram_partition, partition);
354 if (part->header.signature != NVRAM_SIG_FREE)
355 continue;
356
357 if (part->header.length >= NVRAM_MAX_REQ) {
358 size = NVRAM_MAX_REQ;
359 free_part = part;
360 break;
361 }
362 if (!size && part->header.length >= NVRAM_MIN_REQ) {
363 size = NVRAM_MIN_REQ;
364 free_part = part;
365 }
366 }
367 if (!size) {
368 return -ENOSPC;
369 }
370
371 /* Create our OS partition */
372 new_part = (struct nvram_partition *)
373 kmalloc(sizeof(struct nvram_partition), GFP_KERNEL);
374 if (!new_part) {
375 printk(KERN_ERR "nvram_create_os_partition: kmalloc failed\n");
376 return -ENOMEM;
377 }
378
379 new_part->index = free_part->index;
380 new_part->header.signature = NVRAM_SIG_OS;
381 new_part->header.length = size;
382 sprintf(new_part->header.name, "ppc64,linux");
383 new_part->header.checksum = nvram_checksum(&new_part->header);
384
385 rc = nvram_write_header(new_part);
386 if (rc <= 0) {
387 printk(KERN_ERR "nvram_create_os_partition: nvram_write_header \
388 failed (%d)\n", rc);
389 return rc;
390 }
391
392 /* make sure and initialize to zero the sequence number and the error
393 type logged */
394 tmp_index = new_part->index + NVRAM_HEADER_LEN;
395 rc = ppc_md.nvram_write((char *)&seq_init, sizeof(seq_init), &tmp_index);
396 if (rc <= 0) {
397 printk(KERN_ERR "nvram_create_os_partition: nvram_write failed (%d)\n", rc);
398 return rc;
399 }
400
401 nvram_error_log_index = new_part->index + NVRAM_HEADER_LEN;
402 nvram_error_log_size = ((part->header.length - 1) *
403 NVRAM_BLOCK_LEN) - sizeof(struct err_log_info);
404
405 list_add_tail(&new_part->partition, &free_part->partition);
406
407 if (free_part->header.length <= size) {
408 list_del(&free_part->partition);
409 kfree(free_part);
410 return 0;
411 }
412
413 /* Adjust the partition we stole the space from */
414 free_part->index += size * NVRAM_BLOCK_LEN;
415 free_part->header.length -= size;
416 free_part->header.checksum = nvram_checksum(&free_part->header);
417
418 rc = nvram_write_header(free_part);
419 if (rc <= 0) {
420 printk(KERN_ERR "nvram_create_os_partition: nvram_write_header "
421 "failed (%d)\n", rc);
422 return rc;
423 }
424
425 return 0;
426}
427
428
429/* nvram_setup_partition
430 *
431 * This will setup the partition we need for buffering the
432 * error logs and cleanup partitions if needed.
433 *
434 * The general strategy is the following:
435 * 1.) If there is ppc64,linux partition large enough then use it.
436 * 2.) If there is not a ppc64,linux partition large enough, search
437 * for a free partition that is large enough.
438 * 3.) If there is not a free partition large enough remove
439 * _all_ OS partitions and consolidate the space.
440 * 4.) Will first try getting a chunk that will satisfy the maximum
441 * error log size (NVRAM_MAX_REQ).
442 * 5.) If the max chunk cannot be allocated then try finding a chunk
443 * that will satisfy the minum needed (NVRAM_MIN_REQ).
444 */
445static int nvram_setup_partition(void)
446{
447 struct list_head * p;
448 struct nvram_partition * part;
449 int rc;
450
451 /* For now, we don't do any of this on pmac, until I
452 * have figured out if it's worth killing some unused stuffs
453 * in our nvram, as Apple defined partitions use pretty much
454 * all of the space
455 */
456 if (systemcfg->platform == PLATFORM_POWERMAC)
457 return -ENOSPC;
458
459 /* see if we have an OS partition that meets our needs.
460 will try getting the max we need. If not we'll delete
461 partitions and try again. */
462 list_for_each(p, &nvram_part->partition) {
463 part = list_entry(p, struct nvram_partition, partition);
464 if (part->header.signature != NVRAM_SIG_OS)
465 continue;
466
467 if (strcmp(part->header.name, "ppc64,linux"))
468 continue;
469
470 if (part->header.length >= NVRAM_MIN_REQ) {
471 /* found our partition */
472 nvram_error_log_index = part->index + NVRAM_HEADER_LEN;
473 nvram_error_log_size = ((part->header.length - 1) *
474 NVRAM_BLOCK_LEN) - sizeof(struct err_log_info);
475 return 0;
476 }
477 }
478
479 /* try creating a partition with the free space we have */
480 rc = nvram_create_os_partition();
481 if (!rc) {
482 return 0;
483 }
484
485 /* need to free up some space */
486 rc = nvram_remove_os_partition();
487 if (rc) {
488 return rc;
489 }
490
491 /* create a partition in this new space */
492 rc = nvram_create_os_partition();
493 if (rc) {
494 printk(KERN_ERR "nvram_create_os_partition: Could not find a "
495 "NVRAM partition large enough\n");
496 return rc;
497 }
498
499 return 0;
500}
501
502
503static int nvram_scan_partitions(void)
504{
505 loff_t cur_index = 0;
506 struct nvram_header phead;
507 struct nvram_partition * tmp_part;
508 unsigned char c_sum;
509 char * header;
510 int total_size;
511 int err;
512
513 if (ppc_md.nvram_size == NULL)
514 return -ENODEV;
515 total_size = ppc_md.nvram_size();
516
517 header = (char *) kmalloc(NVRAM_HEADER_LEN, GFP_KERNEL);
518 if (!header) {
519 printk(KERN_ERR "nvram_scan_partitions: Failed kmalloc\n");
520 return -ENOMEM;
521 }
522
523 while (cur_index < total_size) {
524
525 err = ppc_md.nvram_read(header, NVRAM_HEADER_LEN, &cur_index);
526 if (err != NVRAM_HEADER_LEN) {
527 printk(KERN_ERR "nvram_scan_partitions: Error parsing "
528 "nvram partitions\n");
529 goto out;
530 }
531
532 cur_index -= NVRAM_HEADER_LEN; /* nvram_read will advance us */
533
534 memcpy(&phead, header, NVRAM_HEADER_LEN);
535
536 err = 0;
537 c_sum = nvram_checksum(&phead);
538 if (c_sum != phead.checksum) {
539 printk(KERN_WARNING "WARNING: nvram partition checksum"
540 " was %02x, should be %02x!\n",
541 phead.checksum, c_sum);
542 printk(KERN_WARNING "Terminating nvram partition scan\n");
543 goto out;
544 }
545 if (!phead.length) {
546 printk(KERN_WARNING "WARNING: nvram corruption "
547 "detected: 0-length partition\n");
548 goto out;
549 }
550 tmp_part = (struct nvram_partition *)
551 kmalloc(sizeof(struct nvram_partition), GFP_KERNEL);
552 err = -ENOMEM;
553 if (!tmp_part) {
554 printk(KERN_ERR "nvram_scan_partitions: kmalloc failed\n");
555 goto out;
556 }
557
558 memcpy(&tmp_part->header, &phead, NVRAM_HEADER_LEN);
559 tmp_part->index = cur_index;
560 list_add_tail(&tmp_part->partition, &nvram_part->partition);
561
562 cur_index += phead.length * NVRAM_BLOCK_LEN;
563 }
564 err = 0;
565
566 out:
567 kfree(header);
568 return err;
569}
570
571static int __init nvram_init(void)
572{
573 int error;
574 int rc;
575
576 if (ppc_md.nvram_size == NULL || ppc_md.nvram_size() <= 0)
577 return -ENODEV;
578
579 rc = misc_register(&nvram_dev);
580 if (rc != 0) {
581 printk(KERN_ERR "nvram_init: failed to register device\n");
582 return rc;
583 }
584
585 /* initialize our anchor for the nvram partition list */
586 nvram_part = (struct nvram_partition *) kmalloc(sizeof(struct nvram_partition), GFP_KERNEL);
587 if (!nvram_part) {
588 printk(KERN_ERR "nvram_init: Failed kmalloc\n");
589 return -ENOMEM;
590 }
591 INIT_LIST_HEAD(&nvram_part->partition);
592
593 /* Get all the NVRAM partitions */
594 error = nvram_scan_partitions();
595 if (error) {
596 printk(KERN_ERR "nvram_init: Failed nvram_scan_partitions\n");
597 return error;
598 }
599
600 if(nvram_setup_partition())
601 printk(KERN_WARNING "nvram_init: Could not find nvram partition"
602 " for nvram buffered error logging.\n");
603
604#ifdef DEBUG_NVRAM
605 nvram_print_partitions("NVRAM Partitions");
606#endif
607
608 return rc;
609}
610
611void __exit nvram_cleanup(void)
612{
613 misc_deregister( &nvram_dev );
614}
615
616
617#ifdef CONFIG_PPC_PSERIES
618
619/* nvram_write_error_log
620 *
621 * We need to buffer the error logs into nvram to ensure that we have
622 * the failure information to decode. If we have a severe error there
623 * is no way to guarantee that the OS or the machine is in a state to
624 * get back to user land and write the error to disk. For example if
625 * the SCSI device driver causes a Machine Check by writing to a bad
626 * IO address, there is no way of guaranteeing that the device driver
627 * is in any state that is would also be able to write the error data
628 * captured to disk, thus we buffer it in NVRAM for analysis on the
629 * next boot.
630 *
631 * In NVRAM the partition containing the error log buffer will looks like:
632 * Header (in bytes):
633 * +-----------+----------+--------+------------+------------------+
634 * | signature | checksum | length | name | data |
635 * |0 |1 |2 3|4 15|16 length-1|
636 * +-----------+----------+--------+------------+------------------+
637 *
638 * The 'data' section would look like (in bytes):
639 * +--------------+------------+-----------------------------------+
640 * | event_logged | sequence # | error log |
641 * |0 3|4 7|8 nvram_error_log_size-1|
642 * +--------------+------------+-----------------------------------+
643 *
644 * event_logged: 0 if event has not been logged to syslog, 1 if it has
645 * sequence #: The unique sequence # for each event. (until it wraps)
646 * error log: The error log from event_scan
647 */
648int nvram_write_error_log(char * buff, int length, unsigned int err_type)
649{
650 int rc;
651 loff_t tmp_index;
652 struct err_log_info info;
653
654 if (no_logging) {
655 return -EPERM;
656 }
657
658 if (nvram_error_log_index == -1) {
659 return -ESPIPE;
660 }
661
662 if (length > nvram_error_log_size) {
663 length = nvram_error_log_size;
664 }
665
666 info.error_type = err_type;
667 info.seq_num = error_log_cnt;
668
669 tmp_index = nvram_error_log_index;
670
671 rc = ppc_md.nvram_write((char *)&info, sizeof(struct err_log_info), &tmp_index);
672 if (rc <= 0) {
673 printk(KERN_ERR "nvram_write_error_log: Failed nvram_write (%d)\n", rc);
674 return rc;
675 }
676
677 rc = ppc_md.nvram_write(buff, length, &tmp_index);
678 if (rc <= 0) {
679 printk(KERN_ERR "nvram_write_error_log: Failed nvram_write (%d)\n", rc);
680 return rc;
681 }
682
683 return 0;
684}
685
686/* nvram_read_error_log
687 *
688 * Reads nvram for error log for at most 'length'
689 */
690int nvram_read_error_log(char * buff, int length, unsigned int * err_type)
691{
692 int rc;
693 loff_t tmp_index;
694 struct err_log_info info;
695
696 if (nvram_error_log_index == -1)
697 return -1;
698
699 if (length > nvram_error_log_size)
700 length = nvram_error_log_size;
701
702 tmp_index = nvram_error_log_index;
703
704 rc = ppc_md.nvram_read((char *)&info, sizeof(struct err_log_info), &tmp_index);
705 if (rc <= 0) {
706 printk(KERN_ERR "nvram_read_error_log: Failed nvram_read (%d)\n", rc);
707 return rc;
708 }
709
710 rc = ppc_md.nvram_read(buff, length, &tmp_index);
711 if (rc <= 0) {
712 printk(KERN_ERR "nvram_read_error_log: Failed nvram_read (%d)\n", rc);
713 return rc;
714 }
715
716 error_log_cnt = info.seq_num;
717 *err_type = info.error_type;
718
719 return 0;
720}
721
722/* This doesn't actually zero anything, but it sets the event_logged
723 * word to tell that this event is safely in syslog.
724 */
725int nvram_clear_error_log(void)
726{
727 loff_t tmp_index;
728 int clear_word = ERR_FLAG_ALREADY_LOGGED;
729 int rc;
730
731 tmp_index = nvram_error_log_index;
732
733 rc = ppc_md.nvram_write((char *)&clear_word, sizeof(int), &tmp_index);
734 if (rc <= 0) {
735 printk(KERN_ERR "nvram_clear_error_log: Failed nvram_write (%d)\n", rc);
736 return rc;
737 }
738
739 return 0;
740}
741
742#endif /* CONFIG_PPC_PSERIES */
743
744module_init(nvram_init);
745module_exit(nvram_cleanup);
746MODULE_LICENSE("GPL");
diff --git a/arch/ppc64/kernel/of_device.c b/arch/ppc64/kernel/of_device.c
new file mode 100644
index 000000000000..b27a75f1b986
--- /dev/null
+++ b/arch/ppc64/kernel/of_device.c
@@ -0,0 +1,272 @@
1#include <linux/config.h>
2#include <linux/string.h>
3#include <linux/kernel.h>
4#include <linux/init.h>
5#include <linux/module.h>
6#include <asm/errno.h>
7#include <asm/of_device.h>
8
9/**
10 * of_match_device - Tell if an of_device structure has a matching
11 * of_match structure
12 * @ids: array of of device match structures to search in
13 * @dev: the of device structure to match against
14 *
15 * Used by a driver to check whether an of_device present in the
16 * system is in its list of supported devices.
17 */
18const struct of_match * of_match_device(const struct of_match *matches,
19 const struct of_device *dev)
20{
21 if (!dev->node)
22 return NULL;
23 while (matches->name || matches->type || matches->compatible) {
24 int match = 1;
25 if (matches->name && matches->name != OF_ANY_MATCH)
26 match &= dev->node->name
27 && !strcmp(matches->name, dev->node->name);
28 if (matches->type && matches->type != OF_ANY_MATCH)
29 match &= dev->node->type
30 && !strcmp(matches->type, dev->node->type);
31 if (matches->compatible && matches->compatible != OF_ANY_MATCH)
32 match &= device_is_compatible(dev->node,
33 matches->compatible);
34 if (match)
35 return matches;
36 matches++;
37 }
38 return NULL;
39}
40
41static int of_platform_bus_match(struct device *dev, struct device_driver *drv)
42{
43 struct of_device * of_dev = to_of_device(dev);
44 struct of_platform_driver * of_drv = to_of_platform_driver(drv);
45 const struct of_match * matches = of_drv->match_table;
46
47 if (!matches)
48 return 0;
49
50 return of_match_device(matches, of_dev) != NULL;
51}
52
53struct of_device *of_dev_get(struct of_device *dev)
54{
55 struct device *tmp;
56
57 if (!dev)
58 return NULL;
59 tmp = get_device(&dev->dev);
60 if (tmp)
61 return to_of_device(tmp);
62 else
63 return NULL;
64}
65
66void of_dev_put(struct of_device *dev)
67{
68 if (dev)
69 put_device(&dev->dev);
70}
71
72
73static int of_device_probe(struct device *dev)
74{
75 int error = -ENODEV;
76 struct of_platform_driver *drv;
77 struct of_device *of_dev;
78 const struct of_match *match;
79
80 drv = to_of_platform_driver(dev->driver);
81 of_dev = to_of_device(dev);
82
83 if (!drv->probe)
84 return error;
85
86 of_dev_get(of_dev);
87
88 match = of_match_device(drv->match_table, of_dev);
89 if (match)
90 error = drv->probe(of_dev, match);
91 if (error)
92 of_dev_put(of_dev);
93
94 return error;
95}
96
97static int of_device_remove(struct device *dev)
98{
99 struct of_device * of_dev = to_of_device(dev);
100 struct of_platform_driver * drv = to_of_platform_driver(dev->driver);
101
102 if (dev->driver && drv->remove)
103 drv->remove(of_dev);
104 return 0;
105}
106
107static int of_device_suspend(struct device *dev, u32 state)
108{
109 struct of_device * of_dev = to_of_device(dev);
110 struct of_platform_driver * drv = to_of_platform_driver(dev->driver);
111 int error = 0;
112
113 if (dev->driver && drv->suspend)
114 error = drv->suspend(of_dev, state);
115 return error;
116}
117
118static int of_device_resume(struct device * dev)
119{
120 struct of_device * of_dev = to_of_device(dev);
121 struct of_platform_driver * drv = to_of_platform_driver(dev->driver);
122 int error = 0;
123
124 if (dev->driver && drv->resume)
125 error = drv->resume(of_dev);
126 return error;
127}
128
129struct bus_type of_platform_bus_type = {
130 .name = "of_platform",
131 .match = of_platform_bus_match,
132 .suspend = of_device_suspend,
133 .resume = of_device_resume,
134};
135
136static int __init of_bus_driver_init(void)
137{
138 return bus_register(&of_platform_bus_type);
139}
140
141postcore_initcall(of_bus_driver_init);
142
143int of_register_driver(struct of_platform_driver *drv)
144{
145 int count = 0;
146
147 /* initialize common driver fields */
148 drv->driver.name = drv->name;
149 drv->driver.bus = &of_platform_bus_type;
150 drv->driver.probe = of_device_probe;
151 drv->driver.remove = of_device_remove;
152
153 /* register with core */
154 count = driver_register(&drv->driver);
155 return count ? count : 1;
156}
157
158void of_unregister_driver(struct of_platform_driver *drv)
159{
160 driver_unregister(&drv->driver);
161}
162
163
164static ssize_t dev_show_devspec(struct device *dev, char *buf)
165{
166 struct of_device *ofdev;
167
168 ofdev = to_of_device(dev);
169 return sprintf(buf, "%s", ofdev->node->full_name);
170}
171
172static DEVICE_ATTR(devspec, S_IRUGO, dev_show_devspec, NULL);
173
174/**
175 * of_release_dev - free an of device structure when all users of it are finished.
176 * @dev: device that's been disconnected
177 *
178 * Will be called only by the device core when all users of this of device are
179 * done.
180 */
181void of_release_dev(struct device *dev)
182{
183 struct of_device *ofdev;
184
185 ofdev = to_of_device(dev);
186 kfree(ofdev);
187}
188
189int of_device_register(struct of_device *ofdev)
190{
191 int rc;
192 struct of_device **odprop;
193
194 BUG_ON(ofdev->node == NULL);
195
196 odprop = (struct of_device **)get_property(ofdev->node, "linux,device", NULL);
197 if (!odprop) {
198 struct property *new_prop;
199
200 new_prop = kmalloc(sizeof(struct property) + sizeof(struct of_device *),
201 GFP_KERNEL);
202 if (new_prop == NULL)
203 return -ENOMEM;
204 new_prop->name = "linux,device";
205 new_prop->length = sizeof(sizeof(struct of_device *));
206 new_prop->value = (unsigned char *)&new_prop[1];
207 odprop = (struct of_device **)new_prop->value;
208 *odprop = NULL;
209 prom_add_property(ofdev->node, new_prop);
210 }
211 *odprop = ofdev;
212
213 rc = device_register(&ofdev->dev);
214 if (rc)
215 return rc;
216
217 device_create_file(&ofdev->dev, &dev_attr_devspec);
218
219 return 0;
220}
221
222void of_device_unregister(struct of_device *ofdev)
223{
224 struct of_device **odprop;
225
226 device_remove_file(&ofdev->dev, &dev_attr_devspec);
227
228 odprop = (struct of_device **)get_property(ofdev->node, "linux,device", NULL);
229 if (odprop)
230 *odprop = NULL;
231
232 device_unregister(&ofdev->dev);
233}
234
235struct of_device* of_platform_device_create(struct device_node *np, const char *bus_id)
236{
237 struct of_device *dev;
238 u32 *reg;
239
240 dev = kmalloc(sizeof(*dev), GFP_KERNEL);
241 if (!dev)
242 return NULL;
243 memset(dev, 0, sizeof(*dev));
244
245 dev->node = np;
246 dev->dma_mask = 0xffffffffUL;
247 dev->dev.dma_mask = &dev->dma_mask;
248 dev->dev.parent = NULL;
249 dev->dev.bus = &of_platform_bus_type;
250 dev->dev.release = of_release_dev;
251
252 reg = (u32 *)get_property(np, "reg", NULL);
253 strlcpy(dev->dev.bus_id, bus_id, BUS_ID_SIZE);
254
255 if (of_device_register(dev) != 0) {
256 kfree(dev);
257 return NULL;
258 }
259
260 return dev;
261}
262
263EXPORT_SYMBOL(of_match_device);
264EXPORT_SYMBOL(of_platform_bus_type);
265EXPORT_SYMBOL(of_register_driver);
266EXPORT_SYMBOL(of_unregister_driver);
267EXPORT_SYMBOL(of_device_register);
268EXPORT_SYMBOL(of_device_unregister);
269EXPORT_SYMBOL(of_dev_get);
270EXPORT_SYMBOL(of_dev_put);
271EXPORT_SYMBOL(of_platform_device_create);
272EXPORT_SYMBOL(of_release_dev);
diff --git a/arch/ppc64/kernel/pSeries_hvCall.S b/arch/ppc64/kernel/pSeries_hvCall.S
new file mode 100644
index 000000000000..0715d3038019
--- /dev/null
+++ b/arch/ppc64/kernel/pSeries_hvCall.S
@@ -0,0 +1,123 @@
1/*
2 * arch/ppc64/kernel/pSeries_hvCall.S
3 *
4 * This file contains the generic code to perform a call to the
5 * pSeries LPAR hypervisor.
6 * NOTE: this file will go away when we move to inline this work.
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13#include <asm/hvcall.h>
14#include <asm/processor.h>
15#include <asm/ppc_asm.h>
16
17#define STK_PARM(i) (48 + ((i)-3)*8)
18
19 .text
20
21/* long plpar_hcall(unsigned long opcode, R3
22 unsigned long arg1, R4
23 unsigned long arg2, R5
24 unsigned long arg3, R6
25 unsigned long arg4, R7
26 unsigned long *out1, R8
27 unsigned long *out2, R9
28 unsigned long *out3); R10
29 */
30_GLOBAL(plpar_hcall)
31 mfcr r0
32
33 std r8,STK_PARM(r8)(r1) /* Save out ptrs */
34 std r9,STK_PARM(r9)(r1)
35 std r10,STK_PARM(r10)(r1)
36
37 stw r0,8(r1)
38
39 HVSC /* invoke the hypervisor */
40
41 lwz r0,8(r1)
42
43 ld r8,STK_PARM(r8)(r1) /* Fetch r4-r6 ret args */
44 ld r9,STK_PARM(r9)(r1)
45 ld r10,STK_PARM(r10)(r1)
46 std r4,0(r8)
47 std r5,0(r9)
48 std r6,0(r10)
49
50 mtcrf 0xff,r0
51 blr /* return r3 = status */
52
53
54/* Simple interface with no output values (other than status) */
55_GLOBAL(plpar_hcall_norets)
56 mfcr r0
57 stw r0,8(r1)
58
59 HVSC /* invoke the hypervisor */
60
61 lwz r0,8(r1)
62 mtcrf 0xff,r0
63 blr /* return r3 = status */
64
65
66/* long plpar_hcall_8arg_2ret(unsigned long opcode, R3
67 unsigned long arg1, R4
68 unsigned long arg2, R5
69 unsigned long arg3, R6
70 unsigned long arg4, R7
71 unsigned long arg5, R8
72 unsigned long arg6, R9
73 unsigned long arg7, R10
74 unsigned long arg8, 112(R1)
75 unsigned long *out1); 120(R1)
76 */
77_GLOBAL(plpar_hcall_8arg_2ret)
78 mfcr r0
79 ld r11,STK_PARM(r11)(r1) /* put arg8 in R11 */
80 stw r0,8(r1)
81
82 HVSC /* invoke the hypervisor */
83
84 lwz r0,8(r1)
85 ld r10,STK_PARM(r12)(r1) /* Fetch r4 ret arg */
86 std r4,0(r10)
87 mtcrf 0xff,r0
88 blr /* return r3 = status */
89
90
91/* long plpar_hcall_4out(unsigned long opcode, R3
92 unsigned long arg1, R4
93 unsigned long arg2, R5
94 unsigned long arg3, R6
95 unsigned long arg4, R7
96 unsigned long *out1, R8
97 unsigned long *out2, R9
98 unsigned long *out3, R10
99 unsigned long *out4); 112(R1)
100 */
101_GLOBAL(plpar_hcall_4out)
102 mfcr r0
103 stw r0,8(r1)
104
105 std r8,STK_PARM(r8)(r1) /* Save out ptrs */
106 std r9,STK_PARM(r9)(r1)
107 std r10,STK_PARM(r10)(r1)
108
109 HVSC /* invoke the hypervisor */
110
111 lwz r0,8(r1)
112
113 ld r8,STK_PARM(r8)(r1) /* Fetch r4-r7 ret args */
114 ld r9,STK_PARM(r9)(r1)
115 ld r10,STK_PARM(r10)(r1)
116 ld r11,STK_PARM(r11)(r1)
117 std r4,0(r8)
118 std r5,0(r9)
119 std r6,0(r10)
120 std r7,0(r11)
121
122 mtcrf 0xff,r0
123 blr /* return r3 = status */
diff --git a/arch/ppc64/kernel/pSeries_iommu.c b/arch/ppc64/kernel/pSeries_iommu.c
new file mode 100644
index 000000000000..69130522a87e
--- /dev/null
+++ b/arch/ppc64/kernel/pSeries_iommu.c
@@ -0,0 +1,570 @@
1/*
2 * arch/ppc64/kernel/pSeries_iommu.c
3 *
4 * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation
5 *
6 * Rewrite, cleanup:
7 *
8 * Copyright (C) 2004 Olof Johansson <olof@austin.ibm.com>, IBM Corporation
9 *
10 * Dynamic DMA mapping support, pSeries-specific parts, both SMP and LPAR.
11 *
12 *
13 * This program is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation; either version 2 of the License, or
16 * (at your option) any later version.
17 *
18 * This program is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with this program; if not, write to the Free Software
25 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26 */
27
28#include <linux/config.h>
29#include <linux/init.h>
30#include <linux/types.h>
31#include <linux/slab.h>
32#include <linux/mm.h>
33#include <linux/spinlock.h>
34#include <linux/string.h>
35#include <linux/pci.h>
36#include <linux/dma-mapping.h>
37#include <asm/io.h>
38#include <asm/prom.h>
39#include <asm/rtas.h>
40#include <asm/ppcdebug.h>
41#include <asm/iommu.h>
42#include <asm/pci-bridge.h>
43#include <asm/machdep.h>
44#include <asm/abs_addr.h>
45#include <asm/plpar_wrappers.h>
46#include <asm/pSeries_reconfig.h>
47#include <asm/systemcfg.h>
48#include "pci.h"
49
50#define DBG(fmt...)
51
52extern int is_python(struct device_node *);
53
54static void tce_build_pSeries(struct iommu_table *tbl, long index,
55 long npages, unsigned long uaddr,
56 enum dma_data_direction direction)
57{
58 union tce_entry t;
59 union tce_entry *tp;
60
61 t.te_word = 0;
62 t.te_rdwr = 1; // Read allowed
63
64 if (direction != DMA_TO_DEVICE)
65 t.te_pciwr = 1;
66
67 tp = ((union tce_entry *)tbl->it_base) + index;
68
69 while (npages--) {
70 /* can't move this out since we might cross LMB boundary */
71 t.te_rpn = (virt_to_abs(uaddr)) >> PAGE_SHIFT;
72
73 tp->te_word = t.te_word;
74
75 uaddr += PAGE_SIZE;
76 tp++;
77 }
78}
79
80
81static void tce_free_pSeries(struct iommu_table *tbl, long index, long npages)
82{
83 union tce_entry t;
84 union tce_entry *tp;
85
86 t.te_word = 0;
87 tp = ((union tce_entry *)tbl->it_base) + index;
88
89 while (npages--) {
90 tp->te_word = t.te_word;
91
92 tp++;
93 }
94}
95
96
97static void tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum,
98 long npages, unsigned long uaddr,
99 enum dma_data_direction direction)
100{
101 u64 rc;
102 union tce_entry tce;
103
104 tce.te_word = 0;
105 tce.te_rpn = (virt_to_abs(uaddr)) >> PAGE_SHIFT;
106 tce.te_rdwr = 1;
107 if (direction != DMA_TO_DEVICE)
108 tce.te_pciwr = 1;
109
110 while (npages--) {
111 rc = plpar_tce_put((u64)tbl->it_index,
112 (u64)tcenum << 12,
113 tce.te_word );
114
115 if (rc && printk_ratelimit()) {
116 printk("tce_build_pSeriesLP: plpar_tce_put failed. rc=%ld\n", rc);
117 printk("\tindex = 0x%lx\n", (u64)tbl->it_index);
118 printk("\ttcenum = 0x%lx\n", (u64)tcenum);
119 printk("\ttce val = 0x%lx\n", tce.te_word );
120 show_stack(current, (unsigned long *)__get_SP());
121 }
122
123 tcenum++;
124 tce.te_rpn++;
125 }
126}
127
128static DEFINE_PER_CPU(void *, tce_page) = NULL;
129
130static void tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
131 long npages, unsigned long uaddr,
132 enum dma_data_direction direction)
133{
134 u64 rc;
135 union tce_entry tce, *tcep;
136 long l, limit;
137
138 if (npages == 1)
139 return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr,
140 direction);
141
142 tcep = __get_cpu_var(tce_page);
143
144 /* This is safe to do since interrupts are off when we're called
145 * from iommu_alloc{,_sg}()
146 */
147 if (!tcep) {
148 tcep = (void *)__get_free_page(GFP_ATOMIC);
149 /* If allocation fails, fall back to the loop implementation */
150 if (!tcep)
151 return tce_build_pSeriesLP(tbl, tcenum, npages,
152 uaddr, direction);
153 __get_cpu_var(tce_page) = tcep;
154 }
155
156 tce.te_word = 0;
157 tce.te_rpn = (virt_to_abs(uaddr)) >> PAGE_SHIFT;
158 tce.te_rdwr = 1;
159 if (direction != DMA_TO_DEVICE)
160 tce.te_pciwr = 1;
161
162 /* We can map max one pageful of TCEs at a time */
163 do {
164 /*
165 * Set up the page with TCE data, looping through and setting
166 * the values.
167 */
168 limit = min_t(long, npages, PAGE_SIZE/sizeof(union tce_entry));
169
170 for (l = 0; l < limit; l++) {
171 tcep[l] = tce;
172 tce.te_rpn++;
173 }
174
175 rc = plpar_tce_put_indirect((u64)tbl->it_index,
176 (u64)tcenum << 12,
177 (u64)virt_to_abs(tcep),
178 limit);
179
180 npages -= limit;
181 tcenum += limit;
182 } while (npages > 0 && !rc);
183
184 if (rc && printk_ratelimit()) {
185 printk("tce_buildmulti_pSeriesLP: plpar_tce_put failed. rc=%ld\n", rc);
186 printk("\tindex = 0x%lx\n", (u64)tbl->it_index);
187 printk("\tnpages = 0x%lx\n", (u64)npages);
188 printk("\ttce[0] val = 0x%lx\n", tcep[0].te_word);
189 show_stack(current, (unsigned long *)__get_SP());
190 }
191}
192
193static void tce_free_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages)
194{
195 u64 rc;
196 union tce_entry tce;
197
198 tce.te_word = 0;
199
200 while (npages--) {
201 rc = plpar_tce_put((u64)tbl->it_index,
202 (u64)tcenum << 12,
203 tce.te_word);
204
205 if (rc && printk_ratelimit()) {
206 printk("tce_free_pSeriesLP: plpar_tce_put failed. rc=%ld\n", rc);
207 printk("\tindex = 0x%lx\n", (u64)tbl->it_index);
208 printk("\ttcenum = 0x%lx\n", (u64)tcenum);
209 printk("\ttce val = 0x%lx\n", tce.te_word );
210 show_stack(current, (unsigned long *)__get_SP());
211 }
212
213 tcenum++;
214 }
215}
216
217
218static void tce_freemulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages)
219{
220 u64 rc;
221 union tce_entry tce;
222
223 tce.te_word = 0;
224
225 rc = plpar_tce_stuff((u64)tbl->it_index,
226 (u64)tcenum << 12,
227 tce.te_word,
228 npages);
229
230 if (rc && printk_ratelimit()) {
231 printk("tce_freemulti_pSeriesLP: plpar_tce_stuff failed\n");
232 printk("\trc = %ld\n", rc);
233 printk("\tindex = 0x%lx\n", (u64)tbl->it_index);
234 printk("\tnpages = 0x%lx\n", (u64)npages);
235 printk("\ttce val = 0x%lx\n", tce.te_word );
236 show_stack(current, (unsigned long *)__get_SP());
237 }
238}
239
240static void iommu_table_setparms(struct pci_controller *phb,
241 struct device_node *dn,
242 struct iommu_table *tbl)
243{
244 struct device_node *node;
245 unsigned long *basep;
246 unsigned int *sizep;
247
248 node = (struct device_node *)phb->arch_data;
249
250 basep = (unsigned long *)get_property(node, "linux,tce-base", NULL);
251 sizep = (unsigned int *)get_property(node, "linux,tce-size", NULL);
252 if (basep == NULL || sizep == NULL) {
253 printk(KERN_ERR "PCI_DMA: iommu_table_setparms: %s has "
254 "missing tce entries !\n", dn->full_name);
255 return;
256 }
257
258 tbl->it_base = (unsigned long)__va(*basep);
259 memset((void *)tbl->it_base, 0, *sizep);
260
261 tbl->it_busno = phb->bus->number;
262
263 /* Units of tce entries */
264 tbl->it_offset = phb->dma_window_base_cur >> PAGE_SHIFT;
265
266 /* Test if we are going over 2GB of DMA space */
267 if (phb->dma_window_base_cur + phb->dma_window_size > (1L << 31))
268 panic("PCI_DMA: Unexpected number of IOAs under this PHB.\n");
269
270 phb->dma_window_base_cur += phb->dma_window_size;
271
272 /* Set the tce table size - measured in entries */
273 tbl->it_size = phb->dma_window_size >> PAGE_SHIFT;
274
275 tbl->it_index = 0;
276 tbl->it_blocksize = 16;
277 tbl->it_type = TCE_PCI;
278}
279
280/*
281 * iommu_table_setparms_lpar
282 *
283 * Function: On pSeries LPAR systems, return TCE table info, given a pci bus.
284 *
285 * ToDo: properly interpret the ibm,dma-window property. The definition is:
286 * logical-bus-number (1 word)
287 * phys-address (#address-cells words)
288 * size (#cell-size words)
289 *
290 * Currently we hard code these sizes (more or less).
291 */
292static void iommu_table_setparms_lpar(struct pci_controller *phb,
293 struct device_node *dn,
294 struct iommu_table *tbl,
295 unsigned int *dma_window)
296{
297 tbl->it_busno = dn->bussubno;
298
299 /* TODO: Parse field size properties properly. */
300 tbl->it_size = (((unsigned long)dma_window[4] << 32) |
301 (unsigned long)dma_window[5]) >> PAGE_SHIFT;
302 tbl->it_offset = (((unsigned long)dma_window[2] << 32) |
303 (unsigned long)dma_window[3]) >> PAGE_SHIFT;
304 tbl->it_base = 0;
305 tbl->it_index = dma_window[0];
306 tbl->it_blocksize = 16;
307 tbl->it_type = TCE_PCI;
308}
309
310static void iommu_bus_setup_pSeries(struct pci_bus *bus)
311{
312 struct device_node *dn, *pdn;
313 struct iommu_table *tbl;
314
315 DBG("iommu_bus_setup_pSeries, bus %p, bus->self %p\n", bus, bus->self);
316
317 /* For each (root) bus, we carve up the available DMA space in 256MB
318 * pieces. Since each piece is used by one (sub) bus/device, that would
319 * give a maximum of 7 devices per PHB. In most cases, this is plenty.
320 *
321 * The exception is on Python PHBs (pre-POWER4). Here we don't have EADS
322 * bridges below the PHB to allocate the sectioned tables to, so instead
323 * we allocate a 1GB table at the PHB level.
324 */
325
326 dn = pci_bus_to_OF_node(bus);
327
328 if (!bus->self) {
329 /* Root bus */
330 if (is_python(dn)) {
331 unsigned int *iohole;
332
333 DBG("Python root bus %s\n", bus->name);
334
335 iohole = (unsigned int *)get_property(dn, "io-hole", 0);
336
337 if (iohole) {
338 /* On first bus we need to leave room for the
339 * ISA address space. Just skip the first 256MB
340 * alltogether. This leaves 768MB for the window.
341 */
342 DBG("PHB has io-hole, reserving 256MB\n");
343 dn->phb->dma_window_size = 3 << 28;
344 dn->phb->dma_window_base_cur = 1 << 28;
345 } else {
346 /* 1GB window by default */
347 dn->phb->dma_window_size = 1 << 30;
348 dn->phb->dma_window_base_cur = 0;
349 }
350
351 tbl = kmalloc(sizeof(struct iommu_table), GFP_KERNEL);
352
353 iommu_table_setparms(dn->phb, dn, tbl);
354 dn->iommu_table = iommu_init_table(tbl);
355 } else {
356 /* Do a 128MB table at root. This is used for the IDE
357 * controller on some SMP-mode POWER4 machines. It
358 * doesn't hurt to allocate it on other machines
359 * -- it'll just be unused since new tables are
360 * allocated on the EADS level.
361 *
362 * Allocate at offset 128MB to avoid having to deal
363 * with ISA holes; 128MB table for IDE is plenty.
364 */
365 dn->phb->dma_window_size = 1 << 27;
366 dn->phb->dma_window_base_cur = 1 << 27;
367
368 tbl = kmalloc(sizeof(struct iommu_table), GFP_KERNEL);
369
370 iommu_table_setparms(dn->phb, dn, tbl);
371 dn->iommu_table = iommu_init_table(tbl);
372
373 /* All child buses have 256MB tables */
374 dn->phb->dma_window_size = 1 << 28;
375 }
376 } else {
377 pdn = pci_bus_to_OF_node(bus->parent);
378
379 if (!bus->parent->self && !is_python(pdn)) {
380 struct iommu_table *tbl;
381 /* First child and not python means this is the EADS
382 * level. Allocate new table for this slot with 256MB
383 * window.
384 */
385
386 tbl = kmalloc(sizeof(struct iommu_table), GFP_KERNEL);
387
388 iommu_table_setparms(dn->phb, dn, tbl);
389
390 dn->iommu_table = iommu_init_table(tbl);
391 } else {
392 /* Lower than first child or under python, use parent table */
393 dn->iommu_table = pdn->iommu_table;
394 }
395 }
396}
397
398
399static void iommu_bus_setup_pSeriesLP(struct pci_bus *bus)
400{
401 struct iommu_table *tbl;
402 struct device_node *dn, *pdn;
403 unsigned int *dma_window = NULL;
404
405 DBG("iommu_bus_setup_pSeriesLP, bus %p, bus->self %p\n", bus, bus->self);
406
407 dn = pci_bus_to_OF_node(bus);
408
409 /* Find nearest ibm,dma-window, walking up the device tree */
410 for (pdn = dn; pdn != NULL; pdn = pdn->parent) {
411 dma_window = (unsigned int *)get_property(pdn, "ibm,dma-window", NULL);
412 if (dma_window != NULL)
413 break;
414 }
415
416 if (dma_window == NULL) {
417 DBG("iommu_bus_setup_pSeriesLP: bus %s seems to have no ibm,dma-window property\n", dn->full_name);
418 return;
419 }
420
421 if (!pdn->iommu_table) {
422 /* Bussubno hasn't been copied yet.
423 * Do it now because iommu_table_setparms_lpar needs it.
424 */
425 pdn->bussubno = bus->number;
426
427 tbl = (struct iommu_table *)kmalloc(sizeof(struct iommu_table),
428 GFP_KERNEL);
429
430 iommu_table_setparms_lpar(pdn->phb, pdn, tbl, dma_window);
431
432 pdn->iommu_table = iommu_init_table(tbl);
433 }
434
435 if (pdn != dn)
436 dn->iommu_table = pdn->iommu_table;
437}
438
439
440static void iommu_dev_setup_pSeries(struct pci_dev *dev)
441{
442 struct device_node *dn, *mydn;
443
444 DBG("iommu_dev_setup_pSeries, dev %p (%s)\n", dev, dev->pretty_name);
445 /* Now copy the iommu_table ptr from the bus device down to the
446 * pci device_node. This means get_iommu_table() won't need to search
447 * up the device tree to find it.
448 */
449 mydn = dn = pci_device_to_OF_node(dev);
450
451 while (dn && dn->iommu_table == NULL)
452 dn = dn->parent;
453
454 if (dn) {
455 mydn->iommu_table = dn->iommu_table;
456 } else {
457 DBG("iommu_dev_setup_pSeries, dev %p (%s) has no iommu table\n", dev, dev->pretty_name);
458 }
459}
460
461static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned long action, void *node)
462{
463 int err = NOTIFY_OK;
464 struct device_node *np = node;
465
466 switch (action) {
467 case PSERIES_RECONFIG_REMOVE:
468 if (np->iommu_table &&
469 get_property(np, "ibm,dma-window", NULL))
470 iommu_free_table(np);
471 break;
472 default:
473 err = NOTIFY_DONE;
474 break;
475 }
476 return err;
477}
478
479static struct notifier_block iommu_reconfig_nb = {
480 .notifier_call = iommu_reconfig_notifier,
481};
482
483static void iommu_dev_setup_pSeriesLP(struct pci_dev *dev)
484{
485 struct device_node *pdn, *dn;
486 struct iommu_table *tbl;
487 int *dma_window = NULL;
488
489 DBG("iommu_dev_setup_pSeriesLP, dev %p (%s)\n", dev, dev->pretty_name);
490
491 /* dev setup for LPAR is a little tricky, since the device tree might
492 * contain the dma-window properties per-device and not neccesarily
493 * for the bus. So we need to search upwards in the tree until we
494 * either hit a dma-window property, OR find a parent with a table
495 * already allocated.
496 */
497 dn = pci_device_to_OF_node(dev);
498
499 for (pdn = dn; pdn && !pdn->iommu_table; pdn = pdn->parent) {
500 dma_window = (unsigned int *)get_property(pdn, "ibm,dma-window", NULL);
501 if (dma_window)
502 break;
503 }
504
505 /* Check for parent == NULL so we don't try to setup the empty EADS
506 * slots on POWER4 machines.
507 */
508 if (dma_window == NULL || pdn->parent == NULL) {
509 /* Fall back to regular (non-LPAR) dev setup */
510 DBG("No dma window for device, falling back to regular setup\n");
511 iommu_dev_setup_pSeries(dev);
512 return;
513 } else {
514 DBG("Found DMA window, allocating table\n");
515 }
516
517 if (!pdn->iommu_table) {
518 /* iommu_table_setparms_lpar needs bussubno. */
519 pdn->bussubno = pdn->phb->bus->number;
520
521 tbl = (struct iommu_table *)kmalloc(sizeof(struct iommu_table),
522 GFP_KERNEL);
523
524 iommu_table_setparms_lpar(pdn->phb, pdn, tbl, dma_window);
525
526 pdn->iommu_table = iommu_init_table(tbl);
527 }
528
529 if (pdn != dn)
530 dn->iommu_table = pdn->iommu_table;
531}
532
533static void iommu_bus_setup_null(struct pci_bus *b) { }
534static void iommu_dev_setup_null(struct pci_dev *d) { }
535
536/* These are called very early. */
537void iommu_init_early_pSeries(void)
538{
539 if (of_chosen && get_property(of_chosen, "linux,iommu-off", NULL)) {
540 /* Direct I/O, IOMMU off */
541 ppc_md.iommu_dev_setup = iommu_dev_setup_null;
542 ppc_md.iommu_bus_setup = iommu_bus_setup_null;
543 pci_direct_iommu_init();
544
545 return;
546 }
547
548 if (systemcfg->platform & PLATFORM_LPAR) {
549 if (cur_cpu_spec->firmware_features & FW_FEATURE_MULTITCE) {
550 ppc_md.tce_build = tce_buildmulti_pSeriesLP;
551 ppc_md.tce_free = tce_freemulti_pSeriesLP;
552 } else {
553 ppc_md.tce_build = tce_build_pSeriesLP;
554 ppc_md.tce_free = tce_free_pSeriesLP;
555 }
556 ppc_md.iommu_bus_setup = iommu_bus_setup_pSeriesLP;
557 ppc_md.iommu_dev_setup = iommu_dev_setup_pSeriesLP;
558 } else {
559 ppc_md.tce_build = tce_build_pSeries;
560 ppc_md.tce_free = tce_free_pSeries;
561 ppc_md.iommu_bus_setup = iommu_bus_setup_pSeries;
562 ppc_md.iommu_dev_setup = iommu_dev_setup_pSeries;
563 }
564
565
566 pSeries_reconfig_notifier_register(&iommu_reconfig_nb);
567
568 pci_iommu_init();
569}
570
diff --git a/arch/ppc64/kernel/pSeries_lpar.c b/arch/ppc64/kernel/pSeries_lpar.c
new file mode 100644
index 000000000000..6534812db437
--- /dev/null
+++ b/arch/ppc64/kernel/pSeries_lpar.c
@@ -0,0 +1,531 @@
1/*
2 * pSeries_lpar.c
3 * Copyright (C) 2001 Todd Inglett, IBM Corporation
4 *
5 * pSeries LPAR support.
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */
21
22#define DEBUG
23
24#include <linux/config.h>
25#include <linux/kernel.h>
26#include <linux/dma-mapping.h>
27#include <asm/processor.h>
28#include <asm/mmu.h>
29#include <asm/page.h>
30#include <asm/pgtable.h>
31#include <asm/machdep.h>
32#include <asm/abs_addr.h>
33#include <asm/mmu_context.h>
34#include <asm/ppcdebug.h>
35#include <asm/iommu.h>
36#include <asm/tlbflush.h>
37#include <asm/tlb.h>
38#include <asm/prom.h>
39#include <asm/abs_addr.h>
40#include <asm/cputable.h>
41#include <asm/plpar_wrappers.h>
42
43#ifdef DEBUG
44#define DBG(fmt...) udbg_printf(fmt)
45#else
46#define DBG(fmt...)
47#endif
48
49/* in pSeries_hvCall.S */
50EXPORT_SYMBOL(plpar_hcall);
51EXPORT_SYMBOL(plpar_hcall_4out);
52EXPORT_SYMBOL(plpar_hcall_norets);
53EXPORT_SYMBOL(plpar_hcall_8arg_2ret);
54
55extern void fw_feature_init(void);
56extern void pSeries_find_serial_port(void);
57
58
59int vtermno; /* virtual terminal# for udbg */
60
61#define __ALIGNED__ __attribute__((__aligned__(sizeof(long))))
62static void udbg_hvsi_putc(unsigned char c)
63{
64 /* packet's seqno isn't used anyways */
65 uint8_t packet[] __ALIGNED__ = { 0xff, 5, 0, 0, c };
66 int rc;
67
68 if (c == '\n')
69 udbg_hvsi_putc('\r');
70
71 do {
72 rc = plpar_put_term_char(vtermno, sizeof(packet), packet);
73 } while (rc == H_Busy);
74}
75
76static long hvsi_udbg_buf_len;
77static uint8_t hvsi_udbg_buf[256];
78
79static int udbg_hvsi_getc_poll(void)
80{
81 unsigned char ch;
82 int rc, i;
83
84 if (hvsi_udbg_buf_len == 0) {
85 rc = plpar_get_term_char(vtermno, &hvsi_udbg_buf_len, hvsi_udbg_buf);
86 if (rc != H_Success || hvsi_udbg_buf[0] != 0xff) {
87 /* bad read or non-data packet */
88 hvsi_udbg_buf_len = 0;
89 } else {
90 /* remove the packet header */
91 for (i = 4; i < hvsi_udbg_buf_len; i++)
92 hvsi_udbg_buf[i-4] = hvsi_udbg_buf[i];
93 hvsi_udbg_buf_len -= 4;
94 }
95 }
96
97 if (hvsi_udbg_buf_len <= 0 || hvsi_udbg_buf_len > 256) {
98 /* no data ready */
99 hvsi_udbg_buf_len = 0;
100 return -1;
101 }
102
103 ch = hvsi_udbg_buf[0];
104 /* shift remaining data down */
105 for (i = 1; i < hvsi_udbg_buf_len; i++) {
106 hvsi_udbg_buf[i-1] = hvsi_udbg_buf[i];
107 }
108 hvsi_udbg_buf_len--;
109
110 return ch;
111}
112
113static unsigned char udbg_hvsi_getc(void)
114{
115 int ch;
116 for (;;) {
117 ch = udbg_hvsi_getc_poll();
118 if (ch == -1) {
119 /* This shouldn't be needed...but... */
120 volatile unsigned long delay;
121 for (delay=0; delay < 2000000; delay++)
122 ;
123 } else {
124 return ch;
125 }
126 }
127}
128
129static void udbg_putcLP(unsigned char c)
130{
131 char buf[16];
132 unsigned long rc;
133
134 if (c == '\n')
135 udbg_putcLP('\r');
136
137 buf[0] = c;
138 do {
139 rc = plpar_put_term_char(vtermno, 1, buf);
140 } while(rc == H_Busy);
141}
142
143/* Buffered chars getc */
144static long inbuflen;
145static long inbuf[2]; /* must be 2 longs */
146
147static int udbg_getc_pollLP(void)
148{
149 /* The interface is tricky because it may return up to 16 chars.
150 * We save them statically for future calls to udbg_getc().
151 */
152 char ch, *buf = (char *)inbuf;
153 int i;
154 long rc;
155 if (inbuflen == 0) {
156 /* get some more chars. */
157 inbuflen = 0;
158 rc = plpar_get_term_char(vtermno, &inbuflen, buf);
159 if (rc != H_Success)
160 inbuflen = 0; /* otherwise inbuflen is garbage */
161 }
162 if (inbuflen <= 0 || inbuflen > 16) {
163 /* Catch error case as well as other oddities (corruption) */
164 inbuflen = 0;
165 return -1;
166 }
167 ch = buf[0];
168 for (i = 1; i < inbuflen; i++) /* shuffle them down. */
169 buf[i-1] = buf[i];
170 inbuflen--;
171 return ch;
172}
173
174static unsigned char udbg_getcLP(void)
175{
176 int ch;
177 for (;;) {
178 ch = udbg_getc_pollLP();
179 if (ch == -1) {
180 /* This shouldn't be needed...but... */
181 volatile unsigned long delay;
182 for (delay=0; delay < 2000000; delay++)
183 ;
184 } else {
185 return ch;
186 }
187 }
188}
189
190/* call this from early_init() for a working debug console on
191 * vterm capable LPAR machines
192 */
193void udbg_init_debug_lpar(void)
194{
195 vtermno = 0;
196 ppc_md.udbg_putc = udbg_putcLP;
197 ppc_md.udbg_getc = udbg_getcLP;
198 ppc_md.udbg_getc_poll = udbg_getc_pollLP;
199}
200
201/* returns 0 if couldn't find or use /chosen/stdout as console */
202int find_udbg_vterm(void)
203{
204 struct device_node *stdout_node;
205 u32 *termno;
206 char *name;
207 int found = 0;
208
209 /* find the boot console from /chosen/stdout */
210 if (!of_chosen)
211 return 0;
212 name = (char *)get_property(of_chosen, "linux,stdout-path", NULL);
213 if (name == NULL)
214 return 0;
215 stdout_node = of_find_node_by_path(name);
216 if (!stdout_node)
217 return 0;
218
219 /* now we have the stdout node; figure out what type of device it is. */
220 name = (char *)get_property(stdout_node, "name", NULL);
221 if (!name) {
222 printk(KERN_WARNING "stdout node missing 'name' property!\n");
223 goto out;
224 }
225
226 if (strncmp(name, "vty", 3) == 0) {
227 if (device_is_compatible(stdout_node, "hvterm1")) {
228 termno = (u32 *)get_property(stdout_node, "reg", NULL);
229 if (termno) {
230 vtermno = termno[0];
231 ppc_md.udbg_putc = udbg_putcLP;
232 ppc_md.udbg_getc = udbg_getcLP;
233 ppc_md.udbg_getc_poll = udbg_getc_pollLP;
234 found = 1;
235 }
236 } else if (device_is_compatible(stdout_node, "hvterm-protocol")) {
237 termno = (u32 *)get_property(stdout_node, "reg", NULL);
238 if (termno) {
239 vtermno = termno[0];
240 ppc_md.udbg_putc = udbg_hvsi_putc;
241 ppc_md.udbg_getc = udbg_hvsi_getc;
242 ppc_md.udbg_getc_poll = udbg_hvsi_getc_poll;
243 found = 1;
244 }
245 }
246 } else if (strncmp(name, "serial", 6)) {
247 /* XXX fix ISA serial console */
248 printk(KERN_WARNING "serial stdout on LPAR ('%s')! "
249 "can't print udbg messages\n",
250 stdout_node->full_name);
251 } else {
252 printk(KERN_WARNING "don't know how to print to stdout '%s'\n",
253 stdout_node->full_name);
254 }
255
256out:
257 of_node_put(stdout_node);
258 return found;
259}
260
261void vpa_init(int cpu)
262{
263 int hwcpu = get_hard_smp_processor_id(cpu);
264 unsigned long vpa = (unsigned long)&(paca[cpu].lppaca);
265 long ret;
266 unsigned long flags;
267
268 /* Register the Virtual Processor Area (VPA) */
269 flags = 1UL << (63 - 18);
270 ret = register_vpa(flags, hwcpu, __pa(vpa));
271
272 if (ret)
273 printk(KERN_ERR "WARNING: vpa_init: VPA registration for "
274 "cpu %d (hw %d) of area %lx returns %ld\n",
275 cpu, hwcpu, __pa(vpa), ret);
276}
277
278long pSeries_lpar_hpte_insert(unsigned long hpte_group,
279 unsigned long va, unsigned long prpn,
280 int secondary, unsigned long hpteflags,
281 int bolted, int large)
282{
283 unsigned long arpn = physRpn_to_absRpn(prpn);
284 unsigned long lpar_rc;
285 unsigned long flags;
286 unsigned long slot;
287 HPTE lhpte;
288 unsigned long dummy0, dummy1;
289
290 /* Fill in the local HPTE with absolute rpn, avpn and flags */
291 lhpte.dw1.dword1 = 0;
292 lhpte.dw1.dw1.rpn = arpn;
293 lhpte.dw1.flags.flags = hpteflags;
294
295 lhpte.dw0.dword0 = 0;
296 lhpte.dw0.dw0.avpn = va >> 23;
297 lhpte.dw0.dw0.h = secondary;
298 lhpte.dw0.dw0.bolted = bolted;
299 lhpte.dw0.dw0.v = 1;
300
301 if (large) {
302 lhpte.dw0.dw0.l = 1;
303 lhpte.dw0.dw0.avpn &= ~0x1UL;
304 }
305
306 /* Now fill in the actual HPTE */
307 /* Set CEC cookie to 0 */
308 /* Zero page = 0 */
309 /* I-cache Invalidate = 0 */
310 /* I-cache synchronize = 0 */
311 /* Exact = 0 */
312 flags = 0;
313
314 /* XXX why is this here? - Anton */
315 if (hpteflags & (_PAGE_GUARDED|_PAGE_NO_CACHE))
316 lhpte.dw1.flags.flags &= ~_PAGE_COHERENT;
317
318 lpar_rc = plpar_hcall(H_ENTER, flags, hpte_group, lhpte.dw0.dword0,
319 lhpte.dw1.dword1, &slot, &dummy0, &dummy1);
320
321 if (unlikely(lpar_rc == H_PTEG_Full))
322 return -1;
323
324 /*
325 * Since we try and ioremap PHBs we don't own, the pte insert
326 * will fail. However we must catch the failure in hash_page
327 * or we will loop forever, so return -2 in this case.
328 */
329 if (unlikely(lpar_rc != H_Success))
330 return -2;
331
332 /* Because of iSeries, we have to pass down the secondary
333 * bucket bit here as well
334 */
335 return (slot & 7) | (secondary << 3);
336}
337
338static DEFINE_SPINLOCK(pSeries_lpar_tlbie_lock);
339
340static long pSeries_lpar_hpte_remove(unsigned long hpte_group)
341{
342 unsigned long slot_offset;
343 unsigned long lpar_rc;
344 int i;
345 unsigned long dummy1, dummy2;
346
347 /* pick a random slot to start at */
348 slot_offset = mftb() & 0x7;
349
350 for (i = 0; i < HPTES_PER_GROUP; i++) {
351
352 /* don't remove a bolted entry */
353 lpar_rc = plpar_pte_remove(H_ANDCOND, hpte_group + slot_offset,
354 (0x1UL << 4), &dummy1, &dummy2);
355
356 if (lpar_rc == H_Success)
357 return i;
358
359 BUG_ON(lpar_rc != H_Not_Found);
360
361 slot_offset++;
362 slot_offset &= 0x7;
363 }
364
365 return -1;
366}
367
368static void pSeries_lpar_hptab_clear(void)
369{
370 unsigned long size_bytes = 1UL << ppc64_pft_size;
371 unsigned long hpte_count = size_bytes >> 4;
372 unsigned long dummy1, dummy2;
373 int i;
374
375 /* TODO: Use bulk call */
376 for (i = 0; i < hpte_count; i++)
377 plpar_pte_remove(0, i, 0, &dummy1, &dummy2);
378}
379
380/*
381 * NOTE: for updatepp ops we are fortunate that the linux "newpp" bits and
382 * the low 3 bits of flags happen to line up. So no transform is needed.
383 * We can probably optimize here and assume the high bits of newpp are
384 * already zero. For now I am paranoid.
385 */
386static long pSeries_lpar_hpte_updatepp(unsigned long slot, unsigned long newpp,
387 unsigned long va, int large, int local)
388{
389 unsigned long lpar_rc;
390 unsigned long flags = (newpp & 7) | H_AVPN;
391 unsigned long avpn = va >> 23;
392
393 if (large)
394 avpn &= ~0x1UL;
395
396 lpar_rc = plpar_pte_protect(flags, slot, (avpn << 7));
397
398 if (lpar_rc == H_Not_Found)
399 return -1;
400
401 BUG_ON(lpar_rc != H_Success);
402
403 return 0;
404}
405
406static unsigned long pSeries_lpar_hpte_getword0(unsigned long slot)
407{
408 unsigned long dword0;
409 unsigned long lpar_rc;
410 unsigned long dummy_word1;
411 unsigned long flags;
412
413 /* Read 1 pte at a time */
414 /* Do not need RPN to logical page translation */
415 /* No cross CEC PFT access */
416 flags = 0;
417
418 lpar_rc = plpar_pte_read(flags, slot, &dword0, &dummy_word1);
419
420 BUG_ON(lpar_rc != H_Success);
421
422 return dword0;
423}
424
425static long pSeries_lpar_hpte_find(unsigned long vpn)
426{
427 unsigned long hash;
428 unsigned long i, j;
429 long slot;
430 union {
431 unsigned long dword0;
432 Hpte_dword0 dw0;
433 } hpte_dw0;
434 Hpte_dword0 dw0;
435
436 hash = hpt_hash(vpn, 0);
437
438 for (j = 0; j < 2; j++) {
439 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
440 for (i = 0; i < HPTES_PER_GROUP; i++) {
441 hpte_dw0.dword0 = pSeries_lpar_hpte_getword0(slot);
442 dw0 = hpte_dw0.dw0;
443
444 if ((dw0.avpn == (vpn >> 11)) && dw0.v &&
445 (dw0.h == j)) {
446 /* HPTE matches */
447 if (j)
448 slot = -slot;
449 return slot;
450 }
451 ++slot;
452 }
453 hash = ~hash;
454 }
455
456 return -1;
457}
458
459static void pSeries_lpar_hpte_updateboltedpp(unsigned long newpp,
460 unsigned long ea)
461{
462 unsigned long lpar_rc;
463 unsigned long vsid, va, vpn, flags;
464 long slot;
465
466 vsid = get_kernel_vsid(ea);
467 va = (vsid << 28) | (ea & 0x0fffffff);
468 vpn = va >> PAGE_SHIFT;
469
470 slot = pSeries_lpar_hpte_find(vpn);
471 BUG_ON(slot == -1);
472
473 flags = newpp & 7;
474 lpar_rc = plpar_pte_protect(flags, slot, 0);
475
476 BUG_ON(lpar_rc != H_Success);
477}
478
479static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long va,
480 int large, int local)
481{
482 unsigned long avpn = va >> 23;
483 unsigned long lpar_rc;
484 unsigned long dummy1, dummy2;
485
486 if (large)
487 avpn &= ~0x1UL;
488
489 lpar_rc = plpar_pte_remove(H_AVPN, slot, (avpn << 7), &dummy1,
490 &dummy2);
491
492 if (lpar_rc == H_Not_Found)
493 return;
494
495 BUG_ON(lpar_rc != H_Success);
496}
497
498/*
499 * Take a spinlock around flushes to avoid bouncing the hypervisor tlbie
500 * lock.
501 */
502void pSeries_lpar_flush_hash_range(unsigned long context, unsigned long number,
503 int local)
504{
505 int i;
506 unsigned long flags = 0;
507 struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
508 int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
509
510 if (lock_tlbie)
511 spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags);
512
513 for (i = 0; i < number; i++)
514 flush_hash_page(context, batch->addr[i], batch->pte[i], local);
515
516 if (lock_tlbie)
517 spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags);
518}
519
520void hpte_init_lpar(void)
521{
522 ppc_md.hpte_invalidate = pSeries_lpar_hpte_invalidate;
523 ppc_md.hpte_updatepp = pSeries_lpar_hpte_updatepp;
524 ppc_md.hpte_updateboltedpp = pSeries_lpar_hpte_updateboltedpp;
525 ppc_md.hpte_insert = pSeries_lpar_hpte_insert;
526 ppc_md.hpte_remove = pSeries_lpar_hpte_remove;
527 ppc_md.flush_hash_range = pSeries_lpar_flush_hash_range;
528 ppc_md.hpte_clear_all = pSeries_lpar_hptab_clear;
529
530 htab_finish_init();
531}
diff --git a/arch/ppc64/kernel/pSeries_nvram.c b/arch/ppc64/kernel/pSeries_nvram.c
new file mode 100644
index 000000000000..18abfb1f4e24
--- /dev/null
+++ b/arch/ppc64/kernel/pSeries_nvram.c
@@ -0,0 +1,148 @@
1/*
2 * c 2001 PPC 64 Team, IBM Corp
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * /dev/nvram driver for PPC64
10 *
11 * This perhaps should live in drivers/char
12 */
13
14
15#include <linux/types.h>
16#include <linux/errno.h>
17#include <linux/init.h>
18#include <linux/slab.h>
19#include <linux/spinlock.h>
20#include <asm/uaccess.h>
21#include <asm/nvram.h>
22#include <asm/rtas.h>
23#include <asm/prom.h>
24#include <asm/machdep.h>
25
26static unsigned int nvram_size;
27static int nvram_fetch, nvram_store;
28static char nvram_buf[NVRW_CNT]; /* assume this is in the first 4GB */
29static DEFINE_SPINLOCK(nvram_lock);
30
31
32static ssize_t pSeries_nvram_read(char *buf, size_t count, loff_t *index)
33{
34 unsigned int i;
35 unsigned long len;
36 int done;
37 unsigned long flags;
38 char *p = buf;
39
40
41 if (nvram_size == 0 || nvram_fetch == RTAS_UNKNOWN_SERVICE)
42 return -ENODEV;
43
44 if (*index >= nvram_size)
45 return 0;
46
47 i = *index;
48 if (i + count > nvram_size)
49 count = nvram_size - i;
50
51 spin_lock_irqsave(&nvram_lock, flags);
52
53 for (; count != 0; count -= len) {
54 len = count;
55 if (len > NVRW_CNT)
56 len = NVRW_CNT;
57
58 if ((rtas_call(nvram_fetch, 3, 2, &done, i, __pa(nvram_buf),
59 len) != 0) || len != done) {
60 spin_unlock_irqrestore(&nvram_lock, flags);
61 return -EIO;
62 }
63
64 memcpy(p, nvram_buf, len);
65
66 p += len;
67 i += len;
68 }
69
70 spin_unlock_irqrestore(&nvram_lock, flags);
71
72 *index = i;
73 return p - buf;
74}
75
76static ssize_t pSeries_nvram_write(char *buf, size_t count, loff_t *index)
77{
78 unsigned int i;
79 unsigned long len;
80 int done;
81 unsigned long flags;
82 const char *p = buf;
83
84 if (nvram_size == 0 || nvram_store == RTAS_UNKNOWN_SERVICE)
85 return -ENODEV;
86
87 if (*index >= nvram_size)
88 return 0;
89
90 i = *index;
91 if (i + count > nvram_size)
92 count = nvram_size - i;
93
94 spin_lock_irqsave(&nvram_lock, flags);
95
96 for (; count != 0; count -= len) {
97 len = count;
98 if (len > NVRW_CNT)
99 len = NVRW_CNT;
100
101 memcpy(nvram_buf, p, len);
102
103 if ((rtas_call(nvram_store, 3, 2, &done, i, __pa(nvram_buf),
104 len) != 0) || len != done) {
105 spin_unlock_irqrestore(&nvram_lock, flags);
106 return -EIO;
107 }
108
109 p += len;
110 i += len;
111 }
112 spin_unlock_irqrestore(&nvram_lock, flags);
113
114 *index = i;
115 return p - buf;
116}
117
118static ssize_t pSeries_nvram_get_size(void)
119{
120 return nvram_size ? nvram_size : -ENODEV;
121}
122
123int __init pSeries_nvram_init(void)
124{
125 struct device_node *nvram;
126 unsigned int *nbytes_p, proplen;
127
128 nvram = of_find_node_by_type(NULL, "nvram");
129 if (nvram == NULL)
130 return -ENODEV;
131
132 nbytes_p = (unsigned int *)get_property(nvram, "#bytes", &proplen);
133 if (nbytes_p == NULL || proplen != sizeof(unsigned int))
134 return -EIO;
135
136 nvram_size = *nbytes_p;
137
138 nvram_fetch = rtas_token("nvram-fetch");
139 nvram_store = rtas_token("nvram-store");
140 printk(KERN_INFO "PPC64 nvram contains %d bytes\n", nvram_size);
141 of_node_put(nvram);
142
143 ppc_md.nvram_read = pSeries_nvram_read;
144 ppc_md.nvram_write = pSeries_nvram_write;
145 ppc_md.nvram_size = pSeries_nvram_get_size;
146
147 return 0;
148}
diff --git a/arch/ppc64/kernel/pSeries_pci.c b/arch/ppc64/kernel/pSeries_pci.c
new file mode 100644
index 000000000000..0b1cca281408
--- /dev/null
+++ b/arch/ppc64/kernel/pSeries_pci.c
@@ -0,0 +1,602 @@
1/*
2 * pSeries_pci.c
3 *
4 * Copyright (C) 2001 Dave Engebretsen, IBM Corporation
5 * Copyright (C) 2003 Anton Blanchard <anton@au.ibm.com>, IBM
6 *
7 * pSeries specific routines for PCI.
8 *
9 * Based on code from pci.c and chrp_pci.c
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 */
25
26#include <linux/kernel.h>
27#include <linux/threads.h>
28#include <linux/pci.h>
29#include <linux/string.h>
30#include <linux/init.h>
31#include <linux/bootmem.h>
32
33#include <asm/io.h>
34#include <asm/pgtable.h>
35#include <asm/irq.h>
36#include <asm/prom.h>
37#include <asm/machdep.h>
38#include <asm/pci-bridge.h>
39#include <asm/iommu.h>
40#include <asm/rtas.h>
41
42#include "mpic.h"
43#include "pci.h"
44
45/* RTAS tokens */
46static int read_pci_config;
47static int write_pci_config;
48static int ibm_read_pci_config;
49static int ibm_write_pci_config;
50
51static int s7a_workaround;
52
53extern struct mpic *pSeries_mpic;
54
55static int config_access_valid(struct device_node *dn, int where)
56{
57 if (where < 256)
58 return 1;
59 if (where < 4096 && dn->pci_ext_config_space)
60 return 1;
61
62 return 0;
63}
64
65static int rtas_read_config(struct device_node *dn, int where, int size, u32 *val)
66{
67 int returnval = -1;
68 unsigned long buid, addr;
69 int ret;
70
71 if (!dn)
72 return PCIBIOS_DEVICE_NOT_FOUND;
73 if (!config_access_valid(dn, where))
74 return PCIBIOS_BAD_REGISTER_NUMBER;
75
76 addr = ((where & 0xf00) << 20) | (dn->busno << 16) |
77 (dn->devfn << 8) | (where & 0xff);
78 buid = dn->phb->buid;
79 if (buid) {
80 ret = rtas_call(ibm_read_pci_config, 4, 2, &returnval,
81 addr, buid >> 32, buid & 0xffffffff, size);
82 } else {
83 ret = rtas_call(read_pci_config, 2, 2, &returnval, addr, size);
84 }
85 *val = returnval;
86
87 if (ret)
88 return PCIBIOS_DEVICE_NOT_FOUND;
89
90 if (returnval == EEH_IO_ERROR_VALUE(size)
91 && eeh_dn_check_failure (dn, NULL))
92 return PCIBIOS_DEVICE_NOT_FOUND;
93
94 return PCIBIOS_SUCCESSFUL;
95}
96
97static int rtas_pci_read_config(struct pci_bus *bus,
98 unsigned int devfn,
99 int where, int size, u32 *val)
100{
101 struct device_node *busdn, *dn;
102
103 if (bus->self)
104 busdn = pci_device_to_OF_node(bus->self);
105 else
106 busdn = bus->sysdata; /* must be a phb */
107
108 /* Search only direct children of the bus */
109 for (dn = busdn->child; dn; dn = dn->sibling)
110 if (dn->devfn == devfn)
111 return rtas_read_config(dn, where, size, val);
112 return PCIBIOS_DEVICE_NOT_FOUND;
113}
114
115static int rtas_write_config(struct device_node *dn, int where, int size, u32 val)
116{
117 unsigned long buid, addr;
118 int ret;
119
120 if (!dn)
121 return PCIBIOS_DEVICE_NOT_FOUND;
122 if (!config_access_valid(dn, where))
123 return PCIBIOS_BAD_REGISTER_NUMBER;
124
125 addr = ((where & 0xf00) << 20) | (dn->busno << 16) |
126 (dn->devfn << 8) | (where & 0xff);
127 buid = dn->phb->buid;
128 if (buid) {
129 ret = rtas_call(ibm_write_pci_config, 5, 1, NULL, addr, buid >> 32, buid & 0xffffffff, size, (ulong) val);
130 } else {
131 ret = rtas_call(write_pci_config, 3, 1, NULL, addr, size, (ulong)val);
132 }
133
134 if (ret)
135 return PCIBIOS_DEVICE_NOT_FOUND;
136
137 return PCIBIOS_SUCCESSFUL;
138}
139
140static int rtas_pci_write_config(struct pci_bus *bus,
141 unsigned int devfn,
142 int where, int size, u32 val)
143{
144 struct device_node *busdn, *dn;
145
146 if (bus->self)
147 busdn = pci_device_to_OF_node(bus->self);
148 else
149 busdn = bus->sysdata; /* must be a phb */
150
151 /* Search only direct children of the bus */
152 for (dn = busdn->child; dn; dn = dn->sibling)
153 if (dn->devfn == devfn)
154 return rtas_write_config(dn, where, size, val);
155 return PCIBIOS_DEVICE_NOT_FOUND;
156}
157
158struct pci_ops rtas_pci_ops = {
159 rtas_pci_read_config,
160 rtas_pci_write_config
161};
162
163int is_python(struct device_node *dev)
164{
165 char *model = (char *)get_property(dev, "model", NULL);
166
167 if (model && strstr(model, "Python"))
168 return 1;
169
170 return 0;
171}
172
173static int get_phb_reg_prop(struct device_node *dev,
174 unsigned int addr_size_words,
175 struct reg_property64 *reg)
176{
177 unsigned int *ui_ptr = NULL, len;
178
179 /* Found a PHB, now figure out where his registers are mapped. */
180 ui_ptr = (unsigned int *)get_property(dev, "reg", &len);
181 if (ui_ptr == NULL)
182 return 1;
183
184 if (addr_size_words == 1) {
185 reg->address = ((struct reg_property32 *)ui_ptr)->address;
186 reg->size = ((struct reg_property32 *)ui_ptr)->size;
187 } else {
188 *reg = *((struct reg_property64 *)ui_ptr);
189 }
190
191 return 0;
192}
193
194static void python_countermeasures(struct device_node *dev,
195 unsigned int addr_size_words)
196{
197 struct reg_property64 reg_struct;
198 void __iomem *chip_regs;
199 volatile u32 val;
200
201 if (get_phb_reg_prop(dev, addr_size_words, &reg_struct))
202 return;
203
204 /* Python's register file is 1 MB in size. */
205 chip_regs = ioremap(reg_struct.address & ~(0xfffffUL), 0x100000);
206
207 /*
208 * Firmware doesn't always clear this bit which is critical
209 * for good performance - Anton
210 */
211
212#define PRG_CL_RESET_VALID 0x00010000
213
214 val = in_be32(chip_regs + 0xf6030);
215 if (val & PRG_CL_RESET_VALID) {
216 printk(KERN_INFO "Python workaround: ");
217 val &= ~PRG_CL_RESET_VALID;
218 out_be32(chip_regs + 0xf6030, val);
219 /*
220 * We must read it back for changes to
221 * take effect
222 */
223 val = in_be32(chip_regs + 0xf6030);
224 printk("reg0: %x\n", val);
225 }
226
227 iounmap(chip_regs);
228}
229
230void __init init_pci_config_tokens (void)
231{
232 read_pci_config = rtas_token("read-pci-config");
233 write_pci_config = rtas_token("write-pci-config");
234 ibm_read_pci_config = rtas_token("ibm,read-pci-config");
235 ibm_write_pci_config = rtas_token("ibm,write-pci-config");
236}
237
238unsigned long __devinit get_phb_buid (struct device_node *phb)
239{
240 int addr_cells;
241 unsigned int *buid_vals;
242 unsigned int len;
243 unsigned long buid;
244
245 if (ibm_read_pci_config == -1) return 0;
246
247 /* PHB's will always be children of the root node,
248 * or so it is promised by the current firmware. */
249 if (phb->parent == NULL)
250 return 0;
251 if (phb->parent->parent)
252 return 0;
253
254 buid_vals = (unsigned int *) get_property(phb, "reg", &len);
255 if (buid_vals == NULL)
256 return 0;
257
258 addr_cells = prom_n_addr_cells(phb);
259 if (addr_cells == 1) {
260 buid = (unsigned long) buid_vals[0];
261 } else {
262 buid = (((unsigned long)buid_vals[0]) << 32UL) |
263 (((unsigned long)buid_vals[1]) & 0xffffffff);
264 }
265 return buid;
266}
267
268static int phb_set_bus_ranges(struct device_node *dev,
269 struct pci_controller *phb)
270{
271 int *bus_range;
272 unsigned int len;
273
274 bus_range = (int *) get_property(dev, "bus-range", &len);
275 if (bus_range == NULL || len < 2 * sizeof(int)) {
276 return 1;
277 }
278
279 phb->first_busno = bus_range[0];
280 phb->last_busno = bus_range[1];
281
282 return 0;
283}
284
285static int __devinit setup_phb(struct device_node *dev,
286 struct pci_controller *phb,
287 unsigned int addr_size_words)
288{
289 pci_setup_pci_controller(phb);
290
291 if (is_python(dev))
292 python_countermeasures(dev, addr_size_words);
293
294 if (phb_set_bus_ranges(dev, phb))
295 return 1;
296
297 phb->arch_data = dev;
298 phb->ops = &rtas_pci_ops;
299 phb->buid = get_phb_buid(dev);
300
301 return 0;
302}
303
304static void __devinit add_linux_pci_domain(struct device_node *dev,
305 struct pci_controller *phb,
306 struct property *of_prop)
307{
308 memset(of_prop, 0, sizeof(struct property));
309 of_prop->name = "linux,pci-domain";
310 of_prop->length = sizeof(phb->global_number);
311 of_prop->value = (unsigned char *)&of_prop[1];
312 memcpy(of_prop->value, &phb->global_number, sizeof(phb->global_number));
313 prom_add_property(dev, of_prop);
314}
315
316static struct pci_controller * __init alloc_phb(struct device_node *dev,
317 unsigned int addr_size_words)
318{
319 struct pci_controller *phb;
320 struct property *of_prop;
321
322 phb = alloc_bootmem(sizeof(struct pci_controller));
323 if (phb == NULL)
324 return NULL;
325
326 of_prop = alloc_bootmem(sizeof(struct property) +
327 sizeof(phb->global_number));
328 if (!of_prop)
329 return NULL;
330
331 if (setup_phb(dev, phb, addr_size_words))
332 return NULL;
333
334 add_linux_pci_domain(dev, phb, of_prop);
335
336 return phb;
337}
338
339static struct pci_controller * __devinit alloc_phb_dynamic(struct device_node *dev, unsigned int addr_size_words)
340{
341 struct pci_controller *phb;
342
343 phb = (struct pci_controller *)kmalloc(sizeof(struct pci_controller),
344 GFP_KERNEL);
345 if (phb == NULL)
346 return NULL;
347
348 if (setup_phb(dev, phb, addr_size_words))
349 return NULL;
350
351 phb->is_dynamic = 1;
352
353 /* TODO: linux,pci-domain? */
354
355 return phb;
356}
357
358unsigned long __init find_and_init_phbs(void)
359{
360 struct device_node *node;
361 struct pci_controller *phb;
362 unsigned int root_size_cells = 0;
363 unsigned int index;
364 unsigned int *opprop = NULL;
365 struct device_node *root = of_find_node_by_path("/");
366
367 if (ppc64_interrupt_controller == IC_OPEN_PIC) {
368 opprop = (unsigned int *)get_property(root,
369 "platform-open-pic", NULL);
370 }
371
372 root_size_cells = prom_n_size_cells(root);
373
374 index = 0;
375
376 for (node = of_get_next_child(root, NULL);
377 node != NULL;
378 node = of_get_next_child(root, node)) {
379 if (node->type == NULL || strcmp(node->type, "pci") != 0)
380 continue;
381
382 phb = alloc_phb(node, root_size_cells);
383 if (!phb)
384 continue;
385
386 pci_process_bridge_OF_ranges(phb, node);
387 pci_setup_phb_io(phb, index == 0);
388
389 if (ppc64_interrupt_controller == IC_OPEN_PIC && pSeries_mpic) {
390 int addr = root_size_cells * (index + 2) - 1;
391 mpic_assign_isu(pSeries_mpic, index, opprop[addr]);
392 }
393
394 index++;
395 }
396
397 of_node_put(root);
398 pci_devs_phb_init();
399
400 /*
401 * pci_probe_only and pci_assign_all_buses can be set via properties
402 * in chosen.
403 */
404 if (of_chosen) {
405 int *prop;
406
407 prop = (int *)get_property(of_chosen, "linux,pci-probe-only",
408 NULL);
409 if (prop)
410 pci_probe_only = *prop;
411
412 prop = (int *)get_property(of_chosen,
413 "linux,pci-assign-all-buses", NULL);
414 if (prop)
415 pci_assign_all_buses = *prop;
416 }
417
418 return 0;
419}
420
421struct pci_controller * __devinit init_phb_dynamic(struct device_node *dn)
422{
423 struct device_node *root = of_find_node_by_path("/");
424 unsigned int root_size_cells = 0;
425 struct pci_controller *phb;
426 struct pci_bus *bus;
427 int primary;
428
429 root_size_cells = prom_n_size_cells(root);
430
431 primary = list_empty(&hose_list);
432 phb = alloc_phb_dynamic(dn, root_size_cells);
433 if (!phb)
434 return NULL;
435
436 pci_process_bridge_OF_ranges(phb, dn);
437
438 pci_setup_phb_io_dynamic(phb, primary);
439 of_node_put(root);
440
441 pci_devs_phb_init_dynamic(phb);
442 phb->last_busno = 0xff;
443 bus = pci_scan_bus(phb->first_busno, phb->ops, phb->arch_data);
444 phb->bus = bus;
445 phb->last_busno = bus->subordinate;
446
447 return phb;
448}
449EXPORT_SYMBOL(init_phb_dynamic);
450
451#if 0
452void pcibios_name_device(struct pci_dev *dev)
453{
454 struct device_node *dn;
455
456 /*
457 * Add IBM loc code (slot) as a prefix to the device names for service
458 */
459 dn = pci_device_to_OF_node(dev);
460 if (dn) {
461 char *loc_code = get_property(dn, "ibm,loc-code", 0);
462 if (loc_code) {
463 int loc_len = strlen(loc_code);
464 if (loc_len < sizeof(dev->dev.name)) {
465 memmove(dev->dev.name+loc_len+1, dev->dev.name,
466 sizeof(dev->dev.name)-loc_len-1);
467 memcpy(dev->dev.name, loc_code, loc_len);
468 dev->dev.name[loc_len] = ' ';
469 dev->dev.name[sizeof(dev->dev.name)-1] = '\0';
470 }
471 }
472 }
473}
474DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pcibios_name_device);
475#endif
476
477static void check_s7a(void)
478{
479 struct device_node *root;
480 char *model;
481
482 root = of_find_node_by_path("/");
483 if (root) {
484 model = get_property(root, "model", NULL);
485 if (model && !strcmp(model, "IBM,7013-S7A"))
486 s7a_workaround = 1;
487 of_node_put(root);
488 }
489}
490
491/* RPA-specific bits for removing PHBs */
492int pcibios_remove_root_bus(struct pci_controller *phb)
493{
494 struct pci_bus *b = phb->bus;
495 struct resource *res;
496 int rc, i;
497
498 res = b->resource[0];
499 if (!res->flags) {
500 printk(KERN_ERR "%s: no IO resource for PHB %s\n", __FUNCTION__,
501 b->name);
502 return 1;
503 }
504
505 rc = unmap_bus_range(b);
506 if (rc) {
507 printk(KERN_ERR "%s: failed to unmap IO on bus %s\n",
508 __FUNCTION__, b->name);
509 return 1;
510 }
511
512 if (release_resource(res)) {
513 printk(KERN_ERR "%s: failed to release IO on bus %s\n",
514 __FUNCTION__, b->name);
515 return 1;
516 }
517
518 for (i = 1; i < 3; ++i) {
519 res = b->resource[i];
520 if (!res->flags && i == 0) {
521 printk(KERN_ERR "%s: no MEM resource for PHB %s\n",
522 __FUNCTION__, b->name);
523 return 1;
524 }
525 if (res->flags && release_resource(res)) {
526 printk(KERN_ERR
527 "%s: failed to release IO %d on bus %s\n",
528 __FUNCTION__, i, b->name);
529 return 1;
530 }
531 }
532
533 list_del(&phb->list_node);
534 if (phb->is_dynamic)
535 kfree(phb);
536
537 return 0;
538}
539EXPORT_SYMBOL(pcibios_remove_root_bus);
540
541static void __init pSeries_request_regions(void)
542{
543 if (!isa_io_base)
544 return;
545
546 request_region(0x20,0x20,"pic1");
547 request_region(0xa0,0x20,"pic2");
548 request_region(0x00,0x20,"dma1");
549 request_region(0x40,0x20,"timer");
550 request_region(0x80,0x10,"dma page reg");
551 request_region(0xc0,0x20,"dma2");
552}
553
554void __init pSeries_final_fixup(void)
555{
556 struct pci_dev *dev = NULL;
557
558 check_s7a();
559
560 for_each_pci_dev(dev) {
561 pci_read_irq_line(dev);
562 if (s7a_workaround) {
563 if (dev->irq > 16) {
564 dev->irq -= 3;
565 pci_write_config_byte(dev, PCI_INTERRUPT_LINE, dev->irq);
566 }
567 }
568 }
569
570 phbs_remap_io();
571 pSeries_request_regions();
572
573 pci_addr_cache_build();
574}
575
576/*
577 * Assume the winbond 82c105 is the IDE controller on a
578 * p610. We should probably be more careful in case
579 * someone tries to plug in a similar adapter.
580 */
581static void fixup_winbond_82c105(struct pci_dev* dev)
582{
583 int i;
584 unsigned int reg;
585
586 if (!(systemcfg->platform & PLATFORM_PSERIES))
587 return;
588
589 printk("Using INTC for W82c105 IDE controller.\n");
590 pci_read_config_dword(dev, 0x40, &reg);
591 /* Enable LEGIRQ to use INTC instead of ISA interrupts */
592 pci_write_config_dword(dev, 0x40, reg | (1<<11));
593
594 for (i = 0; i < DEVICE_COUNT_RESOURCE; ++i) {
595 /* zap the 2nd function of the winbond chip */
596 if (dev->resource[i].flags & IORESOURCE_IO
597 && dev->bus->number == 0 && dev->devfn == 0x81)
598 dev->resource[i].flags &= ~IORESOURCE_IO;
599 }
600}
601DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_WINBOND, PCI_DEVICE_ID_WINBOND_82C105,
602 fixup_winbond_82c105);
diff --git a/arch/ppc64/kernel/pSeries_reconfig.c b/arch/ppc64/kernel/pSeries_reconfig.c
new file mode 100644
index 000000000000..cb5443f2e49b
--- /dev/null
+++ b/arch/ppc64/kernel/pSeries_reconfig.c
@@ -0,0 +1,434 @@
1/*
2 * pSeries_reconfig.c - support for dynamic reconfiguration (including PCI
3 * Hotplug and Dynamic Logical Partitioning on RPA platforms).
4 *
5 * Copyright (C) 2005 Nathan Lynch
6 * Copyright (C) 2005 IBM Corporation
7 *
8 *
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License version
11 * 2 as published by the Free Software Foundation.
12 */
13
14#include <linux/kernel.h>
15#include <linux/kref.h>
16#include <linux/notifier.h>
17#include <linux/proc_fs.h>
18
19#include <asm/prom.h>
20#include <asm/pSeries_reconfig.h>
21#include <asm/uaccess.h>
22
23
24
25/*
26 * Routines for "runtime" addition and removal of device tree nodes.
27 */
28#ifdef CONFIG_PROC_DEVICETREE
29/*
30 * Add a node to /proc/device-tree.
31 */
32static void add_node_proc_entries(struct device_node *np)
33{
34 struct proc_dir_entry *ent;
35
36 ent = proc_mkdir(strrchr(np->full_name, '/') + 1, np->parent->pde);
37 if (ent)
38 proc_device_tree_add_node(np, ent);
39}
40
41static void remove_node_proc_entries(struct device_node *np)
42{
43 struct property *pp = np->properties;
44 struct device_node *parent = np->parent;
45
46 while (pp) {
47 remove_proc_entry(pp->name, np->pde);
48 pp = pp->next;
49 }
50
51 /* Assuming that symlinks have the same parent directory as
52 * np->pde.
53 */
54 if (np->name_link)
55 remove_proc_entry(np->name_link->name, parent->pde);
56 if (np->addr_link)
57 remove_proc_entry(np->addr_link->name, parent->pde);
58 if (np->pde)
59 remove_proc_entry(np->pde->name, parent->pde);
60}
61#else /* !CONFIG_PROC_DEVICETREE */
62static void add_node_proc_entries(struct device_node *np)
63{
64 return;
65}
66
67static void remove_node_proc_entries(struct device_node *np)
68{
69 return;
70}
71#endif /* CONFIG_PROC_DEVICETREE */
72
73/**
74 * derive_parent - basically like dirname(1)
75 * @path: the full_name of a node to be added to the tree
76 *
77 * Returns the node which should be the parent of the node
78 * described by path. E.g., for path = "/foo/bar", returns
79 * the node with full_name = "/foo".
80 */
81static struct device_node *derive_parent(const char *path)
82{
83 struct device_node *parent = NULL;
84 char *parent_path = "/";
85 size_t parent_path_len = strrchr(path, '/') - path + 1;
86
87 /* reject if path is "/" */
88 if (!strcmp(path, "/"))
89 return ERR_PTR(-EINVAL);
90
91 if (strrchr(path, '/') != path) {
92 parent_path = kmalloc(parent_path_len, GFP_KERNEL);
93 if (!parent_path)
94 return ERR_PTR(-ENOMEM);
95 strlcpy(parent_path, path, parent_path_len);
96 }
97 parent = of_find_node_by_path(parent_path);
98 if (!parent)
99 return ERR_PTR(-EINVAL);
100 if (strcmp(parent_path, "/"))
101 kfree(parent_path);
102 return parent;
103}
104
105static struct notifier_block *pSeries_reconfig_chain;
106
107int pSeries_reconfig_notifier_register(struct notifier_block *nb)
108{
109 return notifier_chain_register(&pSeries_reconfig_chain, nb);
110}
111
112void pSeries_reconfig_notifier_unregister(struct notifier_block *nb)
113{
114 notifier_chain_unregister(&pSeries_reconfig_chain, nb);
115}
116
117static int pSeries_reconfig_add_node(const char *path, struct property *proplist)
118{
119 struct device_node *np;
120 int err = -ENOMEM;
121
122 np = kcalloc(1, sizeof(*np), GFP_KERNEL);
123 if (!np)
124 goto out_err;
125
126 np->full_name = kmalloc(strlen(path) + 1, GFP_KERNEL);
127 if (!np->full_name)
128 goto out_err;
129
130 strcpy(np->full_name, path);
131
132 np->properties = proplist;
133 OF_MARK_DYNAMIC(np);
134 kref_init(&np->kref);
135
136 np->parent = derive_parent(path);
137 if (IS_ERR(np->parent)) {
138 err = PTR_ERR(np->parent);
139 goto out_err;
140 }
141
142 err = notifier_call_chain(&pSeries_reconfig_chain,
143 PSERIES_RECONFIG_ADD, np);
144 if (err == NOTIFY_BAD) {
145 printk(KERN_ERR "Failed to add device node %s\n", path);
146 err = -ENOMEM; /* For now, safe to assume kmalloc failure */
147 goto out_err;
148 }
149
150 of_attach_node(np);
151
152 add_node_proc_entries(np);
153
154 of_node_put(np->parent);
155
156 return 0;
157
158out_err:
159 if (np) {
160 of_node_put(np->parent);
161 kfree(np->full_name);
162 kfree(np);
163 }
164 return err;
165}
166
167static int pSeries_reconfig_remove_node(struct device_node *np)
168{
169 struct device_node *parent, *child;
170
171 parent = of_get_parent(np);
172 if (!parent)
173 return -EINVAL;
174
175 if ((child = of_get_next_child(np, NULL))) {
176 of_node_put(child);
177 return -EBUSY;
178 }
179
180 remove_node_proc_entries(np);
181
182 notifier_call_chain(&pSeries_reconfig_chain,
183 PSERIES_RECONFIG_REMOVE, np);
184 of_detach_node(np);
185
186 of_node_put(parent);
187 of_node_put(np); /* Must decrement the refcount */
188 return 0;
189}
190
191/*
192 * /proc/ppc64/ofdt - yucky binary interface for adding and removing
193 * OF device nodes. Should be deprecated as soon as we get an
194 * in-kernel wrapper for the RTAS ibm,configure-connector call.
195 */
196
197static void release_prop_list(const struct property *prop)
198{
199 struct property *next;
200 for (; prop; prop = next) {
201 next = prop->next;
202 kfree(prop->name);
203 kfree(prop->value);
204 kfree(prop);
205 }
206
207}
208
209/**
210 * parse_next_property - process the next property from raw input buffer
211 * @buf: input buffer, must be nul-terminated
212 * @end: end of the input buffer + 1, for validation
213 * @name: return value; set to property name in buf
214 * @length: return value; set to length of value
215 * @value: return value; set to the property value in buf
216 *
217 * Note that the caller must make copies of the name and value returned,
218 * this function does no allocation or copying of the data. Return value
219 * is set to the next name in buf, or NULL on error.
220 */
221static char * parse_next_property(char *buf, char *end, char **name, int *length,
222 unsigned char **value)
223{
224 char *tmp;
225
226 *name = buf;
227
228 tmp = strchr(buf, ' ');
229 if (!tmp) {
230 printk(KERN_ERR "property parse failed in %s at line %d\n",
231 __FUNCTION__, __LINE__);
232 return NULL;
233 }
234 *tmp = '\0';
235
236 if (++tmp >= end) {
237 printk(KERN_ERR "property parse failed in %s at line %d\n",
238 __FUNCTION__, __LINE__);
239 return NULL;
240 }
241
242 /* now we're on the length */
243 *length = -1;
244 *length = simple_strtoul(tmp, &tmp, 10);
245 if (*length == -1) {
246 printk(KERN_ERR "property parse failed in %s at line %d\n",
247 __FUNCTION__, __LINE__);
248 return NULL;
249 }
250 if (*tmp != ' ' || ++tmp >= end) {
251 printk(KERN_ERR "property parse failed in %s at line %d\n",
252 __FUNCTION__, __LINE__);
253 return NULL;
254 }
255
256 /* now we're on the value */
257 *value = tmp;
258 tmp += *length;
259 if (tmp > end) {
260 printk(KERN_ERR "property parse failed in %s at line %d\n",
261 __FUNCTION__, __LINE__);
262 return NULL;
263 }
264 else if (tmp < end && *tmp != ' ' && *tmp != '\0') {
265 printk(KERN_ERR "property parse failed in %s at line %d\n",
266 __FUNCTION__, __LINE__);
267 return NULL;
268 }
269 tmp++;
270
271 /* and now we should be on the next name, or the end */
272 return tmp;
273}
274
275static struct property *new_property(const char *name, const int length,
276 const unsigned char *value, struct property *last)
277{
278 struct property *new = kmalloc(sizeof(*new), GFP_KERNEL);
279
280 if (!new)
281 return NULL;
282 memset(new, 0, sizeof(*new));
283
284 if (!(new->name = kmalloc(strlen(name) + 1, GFP_KERNEL)))
285 goto cleanup;
286 if (!(new->value = kmalloc(length + 1, GFP_KERNEL)))
287 goto cleanup;
288
289 strcpy(new->name, name);
290 memcpy(new->value, value, length);
291 *(((char *)new->value) + length) = 0;
292 new->length = length;
293 new->next = last;
294 return new;
295
296cleanup:
297 if (new->name)
298 kfree(new->name);
299 if (new->value)
300 kfree(new->value);
301 kfree(new);
302 return NULL;
303}
304
305static int do_add_node(char *buf, size_t bufsize)
306{
307 char *path, *end, *name;
308 struct device_node *np;
309 struct property *prop = NULL;
310 unsigned char* value;
311 int length, rv = 0;
312
313 end = buf + bufsize;
314 path = buf;
315 buf = strchr(buf, ' ');
316 if (!buf)
317 return -EINVAL;
318 *buf = '\0';
319 buf++;
320
321 if ((np = of_find_node_by_path(path))) {
322 of_node_put(np);
323 return -EINVAL;
324 }
325
326 /* rv = build_prop_list(tmp, bufsize - (tmp - buf), &proplist); */
327 while (buf < end &&
328 (buf = parse_next_property(buf, end, &name, &length, &value))) {
329 struct property *last = prop;
330
331 prop = new_property(name, length, value, last);
332 if (!prop) {
333 rv = -ENOMEM;
334 prop = last;
335 goto out;
336 }
337 }
338 if (!buf) {
339 rv = -EINVAL;
340 goto out;
341 }
342
343 rv = pSeries_reconfig_add_node(path, prop);
344
345out:
346 if (rv)
347 release_prop_list(prop);
348 return rv;
349}
350
351static int do_remove_node(char *buf)
352{
353 struct device_node *node;
354 int rv = -ENODEV;
355
356 if ((node = of_find_node_by_path(buf)))
357 rv = pSeries_reconfig_remove_node(node);
358
359 of_node_put(node);
360 return rv;
361}
362
363/**
364 * ofdt_write - perform operations on the Open Firmware device tree
365 *
366 * @file: not used
367 * @buf: command and arguments
368 * @count: size of the command buffer
369 * @off: not used
370 *
371 * Operations supported at this time are addition and removal of
372 * whole nodes along with their properties. Operations on individual
373 * properties are not implemented (yet).
374 */
375static ssize_t ofdt_write(struct file *file, const char __user *buf, size_t count,
376 loff_t *off)
377{
378 int rv = 0;
379 char *kbuf;
380 char *tmp;
381
382 if (!(kbuf = kmalloc(count + 1, GFP_KERNEL))) {
383 rv = -ENOMEM;
384 goto out;
385 }
386 if (copy_from_user(kbuf, buf, count)) {
387 rv = -EFAULT;
388 goto out;
389 }
390
391 kbuf[count] = '\0';
392
393 tmp = strchr(kbuf, ' ');
394 if (!tmp) {
395 rv = -EINVAL;
396 goto out;
397 }
398 *tmp = '\0';
399 tmp++;
400
401 if (!strcmp(kbuf, "add_node"))
402 rv = do_add_node(tmp, count - (tmp - kbuf));
403 else if (!strcmp(kbuf, "remove_node"))
404 rv = do_remove_node(tmp);
405 else
406 rv = -EINVAL;
407out:
408 kfree(kbuf);
409 return rv ? rv : count;
410}
411
412static struct file_operations ofdt_fops = {
413 .write = ofdt_write
414};
415
416/* create /proc/ppc64/ofdt write-only by root */
417static int proc_ppc64_create_ofdt(void)
418{
419 struct proc_dir_entry *ent;
420
421 if (!(systemcfg->platform & PLATFORM_PSERIES))
422 return 0;
423
424 ent = create_proc_entry("ppc64/ofdt", S_IWUSR, NULL);
425 if (ent) {
426 ent->nlink = 1;
427 ent->data = NULL;
428 ent->size = 0;
429 ent->proc_fops = &ofdt_fops;
430 }
431
432 return 0;
433}
434__initcall(proc_ppc64_create_ofdt);
diff --git a/arch/ppc64/kernel/pSeries_setup.c b/arch/ppc64/kernel/pSeries_setup.c
new file mode 100644
index 000000000000..06536de51257
--- /dev/null
+++ b/arch/ppc64/kernel/pSeries_setup.c
@@ -0,0 +1,612 @@
1/*
2 * linux/arch/ppc/kernel/setup.c
3 *
4 * Copyright (C) 1995 Linus Torvalds
5 * Adapted from 'alpha' version by Gary Thomas
6 * Modified by Cort Dougan (cort@cs.nmt.edu)
7 * Modified by PPC64 Team, IBM Corp
8 *
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License
11 * as published by the Free Software Foundation; either version
12 * 2 of the License, or (at your option) any later version.
13 */
14
15/*
16 * bootup setup stuff..
17 */
18
19#undef DEBUG
20
21#include <linux/config.h>
22#include <linux/errno.h>
23#include <linux/sched.h>
24#include <linux/kernel.h>
25#include <linux/mm.h>
26#include <linux/stddef.h>
27#include <linux/unistd.h>
28#include <linux/slab.h>
29#include <linux/user.h>
30#include <linux/a.out.h>
31#include <linux/tty.h>
32#include <linux/major.h>
33#include <linux/interrupt.h>
34#include <linux/reboot.h>
35#include <linux/init.h>
36#include <linux/ioport.h>
37#include <linux/console.h>
38#include <linux/pci.h>
39#include <linux/version.h>
40#include <linux/adb.h>
41#include <linux/module.h>
42#include <linux/delay.h>
43#include <linux/irq.h>
44#include <linux/seq_file.h>
45#include <linux/root_dev.h>
46
47#include <asm/mmu.h>
48#include <asm/processor.h>
49#include <asm/io.h>
50#include <asm/pgtable.h>
51#include <asm/prom.h>
52#include <asm/rtas.h>
53#include <asm/pci-bridge.h>
54#include <asm/iommu.h>
55#include <asm/dma.h>
56#include <asm/machdep.h>
57#include <asm/irq.h>
58#include <asm/time.h>
59#include <asm/nvram.h>
60#include <asm/plpar_wrappers.h>
61#include <asm/xics.h>
62#include <asm/cputable.h>
63
64#include "i8259.h"
65#include "mpic.h"
66#include "pci.h"
67
68#ifdef DEBUG
69#define DBG(fmt...) udbg_printf(fmt)
70#else
71#define DBG(fmt...)
72#endif
73
74extern void pSeries_final_fixup(void);
75
76extern void pSeries_get_boot_time(struct rtc_time *rtc_time);
77extern void pSeries_get_rtc_time(struct rtc_time *rtc_time);
78extern int pSeries_set_rtc_time(struct rtc_time *rtc_time);
79extern void find_udbg_vterm(void);
80extern void system_reset_fwnmi(void); /* from head.S */
81extern void machine_check_fwnmi(void); /* from head.S */
82extern void generic_find_legacy_serial_ports(u64 *physport,
83 unsigned int *default_speed);
84
85int fwnmi_active; /* TRUE if an FWNMI handler is present */
86
87extern unsigned long ppc_proc_freq;
88extern unsigned long ppc_tb_freq;
89
90extern void pSeries_system_reset_exception(struct pt_regs *regs);
91extern int pSeries_machine_check_exception(struct pt_regs *regs);
92
93static volatile void __iomem * chrp_int_ack_special;
94struct mpic *pSeries_mpic;
95
96void pSeries_get_cpuinfo(struct seq_file *m)
97{
98 struct device_node *root;
99 const char *model = "";
100
101 root = of_find_node_by_path("/");
102 if (root)
103 model = get_property(root, "model", NULL);
104 seq_printf(m, "machine\t\t: CHRP %s\n", model);
105 of_node_put(root);
106}
107
108/* Initialize firmware assisted non-maskable interrupts if
109 * the firmware supports this feature.
110 *
111 */
112static void __init fwnmi_init(void)
113{
114 int ret;
115 int ibm_nmi_register = rtas_token("ibm,nmi-register");
116 if (ibm_nmi_register == RTAS_UNKNOWN_SERVICE)
117 return;
118 ret = rtas_call(ibm_nmi_register, 2, 1, NULL,
119 __pa((unsigned long)system_reset_fwnmi),
120 __pa((unsigned long)machine_check_fwnmi));
121 if (ret == 0)
122 fwnmi_active = 1;
123}
124
125static int pSeries_irq_cascade(struct pt_regs *regs, void *data)
126{
127 if (chrp_int_ack_special)
128 return readb(chrp_int_ack_special);
129 else
130 return i8259_irq(smp_processor_id());
131}
132
133static void __init pSeries_init_mpic(void)
134{
135 unsigned int *addrp;
136 struct device_node *np;
137 int i;
138
139 /* All ISUs are setup, complete initialization */
140 mpic_init(pSeries_mpic);
141
142 /* Check what kind of cascade ACK we have */
143 if (!(np = of_find_node_by_name(NULL, "pci"))
144 || !(addrp = (unsigned int *)
145 get_property(np, "8259-interrupt-acknowledge", NULL)))
146 printk(KERN_ERR "Cannot find pci to get ack address\n");
147 else
148 chrp_int_ack_special = ioremap(addrp[prom_n_addr_cells(np)-1], 1);
149 of_node_put(np);
150
151 /* Setup the legacy interrupts & controller */
152 for (i = 0; i < NUM_ISA_INTERRUPTS; i++)
153 irq_desc[i].handler = &i8259_pic;
154 i8259_init(0);
155
156 /* Hook cascade to mpic */
157 mpic_setup_cascade(NUM_ISA_INTERRUPTS, pSeries_irq_cascade, NULL);
158}
159
160static void __init pSeries_setup_mpic(void)
161{
162 unsigned int *opprop;
163 unsigned long openpic_addr = 0;
164 unsigned char senses[NR_IRQS - NUM_ISA_INTERRUPTS];
165 struct device_node *root;
166 int irq_count;
167
168 /* Find the Open PIC if present */
169 root = of_find_node_by_path("/");
170 opprop = (unsigned int *) get_property(root, "platform-open-pic", NULL);
171 if (opprop != 0) {
172 int n = prom_n_addr_cells(root);
173
174 for (openpic_addr = 0; n > 0; --n)
175 openpic_addr = (openpic_addr << 32) + *opprop++;
176 printk(KERN_DEBUG "OpenPIC addr: %lx\n", openpic_addr);
177 }
178 of_node_put(root);
179
180 BUG_ON(openpic_addr == 0);
181
182 /* Get the sense values from OF */
183 prom_get_irq_senses(senses, NUM_ISA_INTERRUPTS, NR_IRQS);
184
185 /* Setup the openpic driver */
186 irq_count = NR_IRQS - NUM_ISA_INTERRUPTS - 4; /* leave room for IPIs */
187 pSeries_mpic = mpic_alloc(openpic_addr, MPIC_PRIMARY,
188 16, 16, irq_count, /* isu size, irq offset, irq count */
189 NR_IRQS - 4, /* ipi offset */
190 senses, irq_count, /* sense & sense size */
191 " MPIC ");
192}
193
194static void __init pSeries_setup_arch(void)
195{
196 /* Fixup ppc_md depending on the type of interrupt controller */
197 if (ppc64_interrupt_controller == IC_OPEN_PIC) {
198 ppc_md.init_IRQ = pSeries_init_mpic;
199 ppc_md.get_irq = mpic_get_irq;
200 /* Allocate the mpic now, so that find_and_init_phbs() can
201 * fill the ISUs */
202 pSeries_setup_mpic();
203 } else {
204 ppc_md.init_IRQ = xics_init_IRQ;
205 ppc_md.get_irq = xics_get_irq;
206 }
207
208#ifdef CONFIG_SMP
209 smp_init_pSeries();
210#endif
211 /* openpic global configuration register (64-bit format). */
212 /* openpic Interrupt Source Unit pointer (64-bit format). */
213 /* python0 facility area (mmio) (64-bit format) REAL address. */
214
215 /* init to some ~sane value until calibrate_delay() runs */
216 loops_per_jiffy = 50000000;
217
218 if (ROOT_DEV == 0) {
219 printk("No ramdisk, default root is /dev/sda2\n");
220 ROOT_DEV = Root_SDA2;
221 }
222
223 fwnmi_init();
224
225 /* Find and initialize PCI host bridges */
226 init_pci_config_tokens();
227 eeh_init();
228 find_and_init_phbs();
229
230#ifdef CONFIG_DUMMY_CONSOLE
231 conswitchp = &dummy_con;
232#endif
233
234 pSeries_nvram_init();
235
236 if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR)
237 vpa_init(boot_cpuid);
238}
239
240static int __init pSeries_init_panel(void)
241{
242 /* Manually leave the kernel version on the panel. */
243 ppc_md.progress("Linux ppc64\n", 0);
244 ppc_md.progress(UTS_RELEASE, 0);
245
246 return 0;
247}
248arch_initcall(pSeries_init_panel);
249
250
251/* Build up the firmware_features bitmask field
252 * using contents of device-tree/ibm,hypertas-functions.
253 * Ultimately this functionality may be moved into prom.c prom_init().
254 */
255void __init fw_feature_init(void)
256{
257 struct device_node * dn;
258 char * hypertas;
259 unsigned int len;
260
261 DBG(" -> fw_feature_init()\n");
262
263 cur_cpu_spec->firmware_features = 0;
264 dn = of_find_node_by_path("/rtas");
265 if (dn == NULL) {
266 printk(KERN_ERR "WARNING ! Cannot find RTAS in device-tree !\n");
267 goto no_rtas;
268 }
269
270 hypertas = get_property(dn, "ibm,hypertas-functions", &len);
271 if (hypertas) {
272 while (len > 0){
273 int i, hypertas_len;
274 /* check value against table of strings */
275 for(i=0; i < FIRMWARE_MAX_FEATURES ;i++) {
276 if ((firmware_features_table[i].name) &&
277 (strcmp(firmware_features_table[i].name,hypertas))==0) {
278 /* we have a match */
279 cur_cpu_spec->firmware_features |=
280 (firmware_features_table[i].val);
281 break;
282 }
283 }
284 hypertas_len = strlen(hypertas);
285 len -= hypertas_len +1;
286 hypertas+= hypertas_len +1;
287 }
288 }
289
290 of_node_put(dn);
291 no_rtas:
292 printk(KERN_INFO "firmware_features = 0x%lx\n",
293 cur_cpu_spec->firmware_features);
294
295 DBG(" <- fw_feature_init()\n");
296}
297
298
299static void __init pSeries_discover_pic(void)
300{
301 struct device_node *np;
302 char *typep;
303
304 /*
305 * Setup interrupt mapping options that are needed for finish_device_tree
306 * to properly parse the OF interrupt tree & do the virtual irq mapping
307 */
308 __irq_offset_value = NUM_ISA_INTERRUPTS;
309 ppc64_interrupt_controller = IC_INVALID;
310 for (np = NULL; (np = of_find_node_by_name(np, "interrupt-controller"));) {
311 typep = (char *)get_property(np, "compatible", NULL);
312 if (strstr(typep, "open-pic"))
313 ppc64_interrupt_controller = IC_OPEN_PIC;
314 else if (strstr(typep, "ppc-xicp"))
315 ppc64_interrupt_controller = IC_PPC_XIC;
316 else
317 printk("pSeries_discover_pic: failed to recognize"
318 " interrupt-controller\n");
319 break;
320 }
321}
322
323static void pSeries_mach_cpu_die(void)
324{
325 local_irq_disable();
326 idle_task_exit();
327 /* Some hardware requires clearing the CPPR, while other hardware does not
328 * it is safe either way
329 */
330 pSeriesLP_cppr_info(0, 0);
331 rtas_stop_self();
332 /* Should never get here... */
333 BUG();
334 for(;;);
335}
336
337
338/*
339 * Early initialization. Relocation is on but do not reference unbolted pages
340 */
341static void __init pSeries_init_early(void)
342{
343 void *comport;
344 int iommu_off = 0;
345 unsigned int default_speed;
346 u64 physport;
347
348 DBG(" -> pSeries_init_early()\n");
349
350 fw_feature_init();
351
352 if (systemcfg->platform & PLATFORM_LPAR)
353 hpte_init_lpar();
354 else {
355 hpte_init_native();
356 iommu_off = (of_chosen &&
357 get_property(of_chosen, "linux,iommu-off", NULL));
358 }
359
360 generic_find_legacy_serial_ports(&physport, &default_speed);
361
362 if (systemcfg->platform & PLATFORM_LPAR)
363 find_udbg_vterm();
364 else if (physport) {
365 /* Map the uart for udbg. */
366 comport = (void *)__ioremap(physport, 16, _PAGE_NO_CACHE);
367 udbg_init_uart(comport, default_speed);
368
369 ppc_md.udbg_putc = udbg_putc;
370 ppc_md.udbg_getc = udbg_getc;
371 ppc_md.udbg_getc_poll = udbg_getc_poll;
372 DBG("Hello World !\n");
373 }
374
375
376 iommu_init_early_pSeries();
377
378 pSeries_discover_pic();
379
380 DBG(" <- pSeries_init_early()\n");
381}
382
383
384static void pSeries_progress(char *s, unsigned short hex)
385{
386 struct device_node *root;
387 int width, *p;
388 char *os;
389 static int display_character, set_indicator;
390 static int max_width;
391 static DEFINE_SPINLOCK(progress_lock);
392 static int pending_newline = 0; /* did last write end with unprinted newline? */
393
394 if (!rtas.base)
395 return;
396
397 if (max_width == 0) {
398 if ((root = find_path_device("/rtas")) &&
399 (p = (unsigned int *)get_property(root,
400 "ibm,display-line-length",
401 NULL)))
402 max_width = *p;
403 else
404 max_width = 0x10;
405 display_character = rtas_token("display-character");
406 set_indicator = rtas_token("set-indicator");
407 }
408
409 if (display_character == RTAS_UNKNOWN_SERVICE) {
410 /* use hex display if available */
411 if (set_indicator != RTAS_UNKNOWN_SERVICE)
412 rtas_call(set_indicator, 3, 1, NULL, 6, 0, hex);
413 return;
414 }
415
416 spin_lock(&progress_lock);
417
418 /*
419 * Last write ended with newline, but we didn't print it since
420 * it would just clear the bottom line of output. Print it now
421 * instead.
422 *
423 * If no newline is pending, print a CR to start output at the
424 * beginning of the line.
425 */
426 if (pending_newline) {
427 rtas_call(display_character, 1, 1, NULL, '\r');
428 rtas_call(display_character, 1, 1, NULL, '\n');
429 pending_newline = 0;
430 } else {
431 rtas_call(display_character, 1, 1, NULL, '\r');
432 }
433
434 width = max_width;
435 os = s;
436 while (*os) {
437 if (*os == '\n' || *os == '\r') {
438 /* Blank to end of line. */
439 while (width-- > 0)
440 rtas_call(display_character, 1, 1, NULL, ' ');
441
442 /* If newline is the last character, save it
443 * until next call to avoid bumping up the
444 * display output.
445 */
446 if (*os == '\n' && !os[1]) {
447 pending_newline = 1;
448 spin_unlock(&progress_lock);
449 return;
450 }
451
452 /* RTAS wants CR-LF, not just LF */
453
454 if (*os == '\n') {
455 rtas_call(display_character, 1, 1, NULL, '\r');
456 rtas_call(display_character, 1, 1, NULL, '\n');
457 } else {
458 /* CR might be used to re-draw a line, so we'll
459 * leave it alone and not add LF.
460 */
461 rtas_call(display_character, 1, 1, NULL, *os);
462 }
463
464 width = max_width;
465 } else {
466 width--;
467 rtas_call(display_character, 1, 1, NULL, *os);
468 }
469
470 os++;
471
472 /* if we overwrite the screen length */
473 if (width <= 0)
474 while ((*os != 0) && (*os != '\n') && (*os != '\r'))
475 os++;
476 }
477
478 /* Blank to end of line. */
479 while (width-- > 0)
480 rtas_call(display_character, 1, 1, NULL, ' ');
481
482 spin_unlock(&progress_lock);
483}
484
485extern void setup_default_decr(void);
486
487/* Some sane defaults: 125 MHz timebase, 1GHz processor */
488#define DEFAULT_TB_FREQ 125000000UL
489#define DEFAULT_PROC_FREQ (DEFAULT_TB_FREQ * 8)
490
491static void __init pSeries_calibrate_decr(void)
492{
493 struct device_node *cpu;
494 struct div_result divres;
495 unsigned int *fp;
496 int node_found;
497
498 /*
499 * The cpu node should have a timebase-frequency property
500 * to tell us the rate at which the decrementer counts.
501 */
502 cpu = of_find_node_by_type(NULL, "cpu");
503
504 ppc_tb_freq = DEFAULT_TB_FREQ; /* hardcoded default */
505 node_found = 0;
506 if (cpu != 0) {
507 fp = (unsigned int *)get_property(cpu, "timebase-frequency",
508 NULL);
509 if (fp != 0) {
510 node_found = 1;
511 ppc_tb_freq = *fp;
512 }
513 }
514 if (!node_found)
515 printk(KERN_ERR "WARNING: Estimating decrementer frequency "
516 "(not found)\n");
517
518 ppc_proc_freq = DEFAULT_PROC_FREQ;
519 node_found = 0;
520 if (cpu != 0) {
521 fp = (unsigned int *)get_property(cpu, "clock-frequency",
522 NULL);
523 if (fp != 0) {
524 node_found = 1;
525 ppc_proc_freq = *fp;
526 }
527 }
528 if (!node_found)
529 printk(KERN_ERR "WARNING: Estimating processor frequency "
530 "(not found)\n");
531
532 of_node_put(cpu);
533
534 printk(KERN_INFO "time_init: decrementer frequency = %lu.%.6lu MHz\n",
535 ppc_tb_freq/1000000, ppc_tb_freq%1000000);
536 printk(KERN_INFO "time_init: processor frequency = %lu.%.6lu MHz\n",
537 ppc_proc_freq/1000000, ppc_proc_freq%1000000);
538
539 tb_ticks_per_jiffy = ppc_tb_freq / HZ;
540 tb_ticks_per_sec = tb_ticks_per_jiffy * HZ;
541 tb_ticks_per_usec = ppc_tb_freq / 1000000;
542 tb_to_us = mulhwu_scale_factor(ppc_tb_freq, 1000000);
543 div128_by_32(1024*1024, 0, tb_ticks_per_sec, &divres);
544 tb_to_xs = divres.result_low;
545
546 setup_default_decr();
547}
548
549static int pSeries_check_legacy_ioport(unsigned int baseport)
550{
551 struct device_node *np;
552
553#define I8042_DATA_REG 0x60
554#define FDC_BASE 0x3f0
555
556
557 switch(baseport) {
558 case I8042_DATA_REG:
559 np = of_find_node_by_type(NULL, "8042");
560 if (np == NULL)
561 return -ENODEV;
562 of_node_put(np);
563 break;
564 case FDC_BASE:
565 np = of_find_node_by_type(NULL, "fdc");
566 if (np == NULL)
567 return -ENODEV;
568 of_node_put(np);
569 break;
570 }
571 return 0;
572}
573
574/*
575 * Called very early, MMU is off, device-tree isn't unflattened
576 */
577extern struct machdep_calls pSeries_md;
578
579static int __init pSeries_probe(int platform)
580{
581 if (platform != PLATFORM_PSERIES &&
582 platform != PLATFORM_PSERIES_LPAR)
583 return 0;
584
585 /* if we have some ppc_md fixups for LPAR to do, do
586 * it here ...
587 */
588
589 return 1;
590}
591
592struct machdep_calls __initdata pSeries_md = {
593 .probe = pSeries_probe,
594 .setup_arch = pSeries_setup_arch,
595 .init_early = pSeries_init_early,
596 .get_cpuinfo = pSeries_get_cpuinfo,
597 .log_error = pSeries_log_error,
598 .pcibios_fixup = pSeries_final_fixup,
599 .restart = rtas_restart,
600 .power_off = rtas_power_off,
601 .halt = rtas_halt,
602 .panic = rtas_os_term,
603 .cpu_die = pSeries_mach_cpu_die,
604 .get_boot_time = pSeries_get_boot_time,
605 .get_rtc_time = pSeries_get_rtc_time,
606 .set_rtc_time = pSeries_set_rtc_time,
607 .calibrate_decr = pSeries_calibrate_decr,
608 .progress = pSeries_progress,
609 .check_legacy_ioport = pSeries_check_legacy_ioport,
610 .system_reset_exception = pSeries_system_reset_exception,
611 .machine_check_exception = pSeries_machine_check_exception,
612};
diff --git a/arch/ppc64/kernel/pSeries_smp.c b/arch/ppc64/kernel/pSeries_smp.c
new file mode 100644
index 000000000000..c60d8cb2b84d
--- /dev/null
+++ b/arch/ppc64/kernel/pSeries_smp.c
@@ -0,0 +1,451 @@
1/*
2 * SMP support for pSeries machines.
3 *
4 * Dave Engebretsen, Peter Bergner, and
5 * Mike Corrigan {engebret|bergner|mikec}@us.ibm.com
6 *
7 * Plus various changes from other IBM teams...
8 *
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License
11 * as published by the Free Software Foundation; either version
12 * 2 of the License, or (at your option) any later version.
13 */
14
15#undef DEBUG
16
17#include <linux/config.h>
18#include <linux/kernel.h>
19#include <linux/module.h>
20#include <linux/sched.h>
21#include <linux/smp.h>
22#include <linux/interrupt.h>
23#include <linux/delay.h>
24#include <linux/init.h>
25#include <linux/spinlock.h>
26#include <linux/cache.h>
27#include <linux/err.h>
28#include <linux/sysdev.h>
29#include <linux/cpu.h>
30
31#include <asm/ptrace.h>
32#include <asm/atomic.h>
33#include <asm/irq.h>
34#include <asm/page.h>
35#include <asm/pgtable.h>
36#include <asm/io.h>
37#include <asm/prom.h>
38#include <asm/smp.h>
39#include <asm/paca.h>
40#include <asm/time.h>
41#include <asm/machdep.h>
42#include <asm/xics.h>
43#include <asm/cputable.h>
44#include <asm/system.h>
45#include <asm/rtas.h>
46#include <asm/plpar_wrappers.h>
47#include <asm/pSeries_reconfig.h>
48
49#include "mpic.h"
50
51#ifdef DEBUG
52#define DBG(fmt...) udbg_printf(fmt)
53#else
54#define DBG(fmt...)
55#endif
56
57/*
58 * The primary thread of each non-boot processor is recorded here before
59 * smp init.
60 */
61static cpumask_t of_spin_map;
62
63extern void pSeries_secondary_smp_init(unsigned long);
64
65#ifdef CONFIG_HOTPLUG_CPU
66
67/* Get state of physical CPU.
68 * Return codes:
69 * 0 - The processor is in the RTAS stopped state
70 * 1 - stop-self is in progress
71 * 2 - The processor is not in the RTAS stopped state
72 * -1 - Hardware Error
73 * -2 - Hardware Busy, Try again later.
74 */
75static int query_cpu_stopped(unsigned int pcpu)
76{
77 int cpu_status;
78 int status, qcss_tok;
79
80 qcss_tok = rtas_token("query-cpu-stopped-state");
81 if (qcss_tok == RTAS_UNKNOWN_SERVICE)
82 return -1;
83 status = rtas_call(qcss_tok, 1, 2, &cpu_status, pcpu);
84 if (status != 0) {
85 printk(KERN_ERR
86 "RTAS query-cpu-stopped-state failed: %i\n", status);
87 return status;
88 }
89
90 return cpu_status;
91}
92
93int pSeries_cpu_disable(void)
94{
95 systemcfg->processorCount--;
96
97 /*fix boot_cpuid here*/
98 if (smp_processor_id() == boot_cpuid)
99 boot_cpuid = any_online_cpu(cpu_online_map);
100
101 /* FIXME: abstract this to not be platform specific later on */
102 xics_migrate_irqs_away();
103 return 0;
104}
105
106void pSeries_cpu_die(unsigned int cpu)
107{
108 int tries;
109 int cpu_status;
110 unsigned int pcpu = get_hard_smp_processor_id(cpu);
111
112 for (tries = 0; tries < 25; tries++) {
113 cpu_status = query_cpu_stopped(pcpu);
114 if (cpu_status == 0 || cpu_status == -1)
115 break;
116 msleep(200);
117 }
118 if (cpu_status != 0) {
119 printk("Querying DEAD? cpu %i (%i) shows %i\n",
120 cpu, pcpu, cpu_status);
121 }
122
123 /* Isolation and deallocation are definatly done by
124 * drslot_chrp_cpu. If they were not they would be
125 * done here. Change isolate state to Isolate and
126 * change allocation-state to Unusable.
127 */
128 paca[cpu].cpu_start = 0;
129}
130
131/*
132 * Update cpu_present_map and paca(s) for a new cpu node. The wrinkle
133 * here is that a cpu device node may represent up to two logical cpus
134 * in the SMT case. We must honor the assumption in other code that
135 * the logical ids for sibling SMT threads x and y are adjacent, such
136 * that x^1 == y and y^1 == x.
137 */
138static int pSeries_add_processor(struct device_node *np)
139{
140 unsigned int cpu;
141 cpumask_t candidate_map, tmp = CPU_MASK_NONE;
142 int err = -ENOSPC, len, nthreads, i;
143 u32 *intserv;
144
145 intserv = (u32 *)get_property(np, "ibm,ppc-interrupt-server#s", &len);
146 if (!intserv)
147 return 0;
148
149 nthreads = len / sizeof(u32);
150 for (i = 0; i < nthreads; i++)
151 cpu_set(i, tmp);
152
153 lock_cpu_hotplug();
154
155 BUG_ON(!cpus_subset(cpu_present_map, cpu_possible_map));
156
157 /* Get a bitmap of unoccupied slots. */
158 cpus_xor(candidate_map, cpu_possible_map, cpu_present_map);
159 if (cpus_empty(candidate_map)) {
160 /* If we get here, it most likely means that NR_CPUS is
161 * less than the partition's max processors setting.
162 */
163 printk(KERN_ERR "Cannot add cpu %s; this system configuration"
164 " supports %d logical cpus.\n", np->full_name,
165 cpus_weight(cpu_possible_map));
166 goto out_unlock;
167 }
168
169 while (!cpus_empty(tmp))
170 if (cpus_subset(tmp, candidate_map))
171 /* Found a range where we can insert the new cpu(s) */
172 break;
173 else
174 cpus_shift_left(tmp, tmp, nthreads);
175
176 if (cpus_empty(tmp)) {
177 printk(KERN_ERR "Unable to find space in cpu_present_map for"
178 " processor %s with %d thread(s)\n", np->name,
179 nthreads);
180 goto out_unlock;
181 }
182
183 for_each_cpu_mask(cpu, tmp) {
184 BUG_ON(cpu_isset(cpu, cpu_present_map));
185 cpu_set(cpu, cpu_present_map);
186 set_hard_smp_processor_id(cpu, *intserv++);
187 }
188 err = 0;
189out_unlock:
190 unlock_cpu_hotplug();
191 return err;
192}
193
194/*
195 * Update the present map for a cpu node which is going away, and set
196 * the hard id in the paca(s) to -1 to be consistent with boot time
197 * convention for non-present cpus.
198 */
199static void pSeries_remove_processor(struct device_node *np)
200{
201 unsigned int cpu;
202 int len, nthreads, i;
203 u32 *intserv;
204
205 intserv = (u32 *)get_property(np, "ibm,ppc-interrupt-server#s", &len);
206 if (!intserv)
207 return;
208
209 nthreads = len / sizeof(u32);
210
211 lock_cpu_hotplug();
212 for (i = 0; i < nthreads; i++) {
213 for_each_present_cpu(cpu) {
214 if (get_hard_smp_processor_id(cpu) != intserv[i])
215 continue;
216 BUG_ON(cpu_online(cpu));
217 cpu_clear(cpu, cpu_present_map);
218 set_hard_smp_processor_id(cpu, -1);
219 break;
220 }
221 if (cpu == NR_CPUS)
222 printk(KERN_WARNING "Could not find cpu to remove "
223 "with physical id 0x%x\n", intserv[i]);
224 }
225 unlock_cpu_hotplug();
226}
227
228static int pSeries_smp_notifier(struct notifier_block *nb, unsigned long action, void *node)
229{
230 int err = NOTIFY_OK;
231
232 switch (action) {
233 case PSERIES_RECONFIG_ADD:
234 if (pSeries_add_processor(node))
235 err = NOTIFY_BAD;
236 break;
237 case PSERIES_RECONFIG_REMOVE:
238 pSeries_remove_processor(node);
239 break;
240 default:
241 err = NOTIFY_DONE;
242 break;
243 }
244 return err;
245}
246
247static struct notifier_block pSeries_smp_nb = {
248 .notifier_call = pSeries_smp_notifier,
249};
250
251#endif /* CONFIG_HOTPLUG_CPU */
252
253/**
254 * smp_startup_cpu() - start the given cpu
255 *
256 * At boot time, there is nothing to do for primary threads which were
257 * started from Open Firmware. For anything else, call RTAS with the
258 * appropriate start location.
259 *
260 * Returns:
261 * 0 - failure
262 * 1 - success
263 */
264static inline int __devinit smp_startup_cpu(unsigned int lcpu)
265{
266 int status;
267 unsigned long start_here = __pa((u32)*((unsigned long *)
268 pSeries_secondary_smp_init));
269 unsigned int pcpu;
270
271 if (cpu_isset(lcpu, of_spin_map))
272 /* Already started by OF and sitting in spin loop */
273 return 1;
274
275 pcpu = get_hard_smp_processor_id(lcpu);
276
277 /* Fixup atomic count: it exited inside IRQ handler. */
278 paca[lcpu].__current->thread_info->preempt_count = 0;
279
280 status = rtas_call(rtas_token("start-cpu"), 3, 1, NULL,
281 pcpu, start_here, lcpu);
282 if (status != 0) {
283 printk(KERN_ERR "start-cpu failed: %i\n", status);
284 return 0;
285 }
286 return 1;
287}
288
289static inline void smp_xics_do_message(int cpu, int msg)
290{
291 set_bit(msg, &xics_ipi_message[cpu].value);
292 mb();
293 xics_cause_IPI(cpu);
294}
295
296static void smp_xics_message_pass(int target, int msg)
297{
298 unsigned int i;
299
300 if (target < NR_CPUS) {
301 smp_xics_do_message(target, msg);
302 } else {
303 for_each_online_cpu(i) {
304 if (target == MSG_ALL_BUT_SELF
305 && i == smp_processor_id())
306 continue;
307 smp_xics_do_message(i, msg);
308 }
309 }
310}
311
312static int __init smp_xics_probe(void)
313{
314 xics_request_IPIs();
315
316 return cpus_weight(cpu_possible_map);
317}
318
319static void __devinit smp_xics_setup_cpu(int cpu)
320{
321 if (cpu != boot_cpuid)
322 xics_setup_cpu();
323
324 if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR)
325 vpa_init(cpu);
326
327 cpu_clear(cpu, of_spin_map);
328
329 /*
330 * Put the calling processor into the GIQ. This is really only
331 * necessary from a secondary thread as the OF start-cpu interface
332 * performs this function for us on primary threads.
333 */
334 rtas_set_indicator(GLOBAL_INTERRUPT_QUEUE,
335 (1UL << interrupt_server_size) - 1 - default_distrib_server, 1);
336}
337
338static DEFINE_SPINLOCK(timebase_lock);
339static unsigned long timebase = 0;
340
341static void __devinit pSeries_give_timebase(void)
342{
343 spin_lock(&timebase_lock);
344 rtas_call(rtas_token("freeze-time-base"), 0, 1, NULL);
345 timebase = get_tb();
346 spin_unlock(&timebase_lock);
347
348 while (timebase)
349 barrier();
350 rtas_call(rtas_token("thaw-time-base"), 0, 1, NULL);
351}
352
353static void __devinit pSeries_take_timebase(void)
354{
355 while (!timebase)
356 barrier();
357 spin_lock(&timebase_lock);
358 set_tb(timebase >> 32, timebase & 0xffffffff);
359 timebase = 0;
360 spin_unlock(&timebase_lock);
361}
362
363static void __devinit smp_pSeries_kick_cpu(int nr)
364{
365 BUG_ON(nr < 0 || nr >= NR_CPUS);
366
367 if (!smp_startup_cpu(nr))
368 return;
369
370 /*
371 * The processor is currently spinning, waiting for the
372 * cpu_start field to become non-zero After we set cpu_start,
373 * the processor will continue on to secondary_start
374 */
375 paca[nr].cpu_start = 1;
376}
377
378static int smp_pSeries_cpu_bootable(unsigned int nr)
379{
380 /* Special case - we inhibit secondary thread startup
381 * during boot if the user requests it. Odd-numbered
382 * cpus are assumed to be secondary threads.
383 */
384 if (system_state < SYSTEM_RUNNING &&
385 cur_cpu_spec->cpu_features & CPU_FTR_SMT &&
386 !smt_enabled_at_boot && nr % 2 != 0)
387 return 0;
388
389 return 1;
390}
391
392static struct smp_ops_t pSeries_mpic_smp_ops = {
393 .message_pass = smp_mpic_message_pass,
394 .probe = smp_mpic_probe,
395 .kick_cpu = smp_pSeries_kick_cpu,
396 .setup_cpu = smp_mpic_setup_cpu,
397};
398
399static struct smp_ops_t pSeries_xics_smp_ops = {
400 .message_pass = smp_xics_message_pass,
401 .probe = smp_xics_probe,
402 .kick_cpu = smp_pSeries_kick_cpu,
403 .setup_cpu = smp_xics_setup_cpu,
404 .cpu_bootable = smp_pSeries_cpu_bootable,
405};
406
407/* This is called very early */
408void __init smp_init_pSeries(void)
409{
410 int i;
411
412 DBG(" -> smp_init_pSeries()\n");
413
414 if (ppc64_interrupt_controller == IC_OPEN_PIC)
415 smp_ops = &pSeries_mpic_smp_ops;
416 else
417 smp_ops = &pSeries_xics_smp_ops;
418
419#ifdef CONFIG_HOTPLUG_CPU
420 smp_ops->cpu_disable = pSeries_cpu_disable;
421 smp_ops->cpu_die = pSeries_cpu_die;
422
423 /* Processors can be added/removed only on LPAR */
424 if (systemcfg->platform == PLATFORM_PSERIES_LPAR)
425 pSeries_reconfig_notifier_register(&pSeries_smp_nb);
426#endif
427
428 /* Mark threads which are still spinning in hold loops. */
429 if (cur_cpu_spec->cpu_features & CPU_FTR_SMT)
430 for_each_present_cpu(i) {
431 if (i % 2 == 0)
432 /*
433 * Even-numbered logical cpus correspond to
434 * primary threads.
435 */
436 cpu_set(i, of_spin_map);
437 }
438 else
439 of_spin_map = cpu_present_map;
440
441 cpu_clear(boot_cpuid, of_spin_map);
442
443 /* Non-lpar has additional take/give timebase */
444 if (rtas_token("freeze-time-base") != RTAS_UNKNOWN_SERVICE) {
445 smp_ops->give_timebase = pSeries_give_timebase;
446 smp_ops->take_timebase = pSeries_take_timebase;
447 }
448
449 DBG(" <- smp_init_pSeries()\n");
450}
451
diff --git a/arch/ppc64/kernel/pacaData.c b/arch/ppc64/kernel/pacaData.c
new file mode 100644
index 000000000000..a3e0975c26c1
--- /dev/null
+++ b/arch/ppc64/kernel/pacaData.c
@@ -0,0 +1,224 @@
1/*
2 * c 2001 PPC 64 Team, IBM Corp
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 */
9
10#include <linux/config.h>
11#include <linux/types.h>
12#include <linux/threads.h>
13#include <linux/module.h>
14
15#include <asm/processor.h>
16#include <asm/ptrace.h>
17#include <asm/page.h>
18
19#include <asm/lppaca.h>
20#include <asm/iSeries/ItLpQueue.h>
21#include <asm/paca.h>
22
23static union {
24 struct systemcfg data;
25 u8 page[PAGE_SIZE];
26} systemcfg_store __page_aligned;
27struct systemcfg *systemcfg = &systemcfg_store.data;
28EXPORT_SYMBOL(systemcfg);
29
30
31/* This symbol is provided by the linker - let it fill in the paca
32 * field correctly */
33extern unsigned long __toc_start;
34
35/* The Paca is an array with one entry per processor. Each contains an
36 * lppaca, which contains the information shared between the
37 * hypervisor and Linux. Each also contains an ItLpRegSave area which
38 * is used by the hypervisor to save registers.
39 * On systems with hardware multi-threading, there are two threads
40 * per processor. The Paca array must contain an entry for each thread.
41 * The VPD Areas will give a max logical processors = 2 * max physical
42 * processors. The processor VPD array needs one entry per physical
43 * processor (not thread).
44 */
45#ifdef CONFIG_PPC_ISERIES
46#define EXTRA_INITS(number, lpq) \
47 .lppaca_ptr = &paca[number].lppaca, \
48 .lpqueue_ptr = (lpq), /* &xItLpQueue, */ \
49 .reg_save_ptr = &paca[number].reg_save, \
50 .reg_save = { \
51 .xDesc = 0xd397d9e2, /* "LpRS" */ \
52 .xSize = sizeof(struct ItLpRegSave) \
53 },
54#else
55#define EXTRA_INITS(number, lpq)
56#endif
57
58#define PACAINITDATA(number,start,lpq,asrr,asrv) \
59{ \
60 .lock_token = 0x8000, \
61 .paca_index = (number), /* Paca Index */ \
62 .default_decr = 0x00ff0000, /* Initial Decr */ \
63 .kernel_toc = (unsigned long)(&__toc_start) + 0x8000UL, \
64 .stab_real = (asrr), /* Real pointer to segment table */ \
65 .stab_addr = (asrv), /* Virt pointer to segment table */ \
66 .cpu_start = (start), /* Processor start */ \
67 .hw_cpu_id = 0xffff, \
68 .lppaca = { \
69 .desc = 0xd397d781, /* "LpPa" */ \
70 .size = sizeof(struct lppaca), \
71 .dyn_proc_status = 2, \
72 .decr_val = 0x00ff0000, \
73 .fpregs_in_use = 1, \
74 .end_of_quantum = 0xfffffffffffffffful, \
75 .slb_count = 64, \
76 }, \
77 EXTRA_INITS((number), (lpq)) \
78}
79
80struct paca_struct paca[] = {
81#ifdef CONFIG_PPC_ISERIES
82 PACAINITDATA( 0, 1, &xItLpQueue, 0, STAB0_VIRT_ADDR),
83#else
84 PACAINITDATA( 0, 1, NULL, STAB0_PHYS_ADDR, STAB0_VIRT_ADDR),
85#endif
86#if NR_CPUS > 1
87 PACAINITDATA( 1, 0, NULL, 0, 0),
88 PACAINITDATA( 2, 0, NULL, 0, 0),
89 PACAINITDATA( 3, 0, NULL, 0, 0),
90#if NR_CPUS > 4
91 PACAINITDATA( 4, 0, NULL, 0, 0),
92 PACAINITDATA( 5, 0, NULL, 0, 0),
93 PACAINITDATA( 6, 0, NULL, 0, 0),
94 PACAINITDATA( 7, 0, NULL, 0, 0),
95#if NR_CPUS > 8
96 PACAINITDATA( 8, 0, NULL, 0, 0),
97 PACAINITDATA( 9, 0, NULL, 0, 0),
98 PACAINITDATA(10, 0, NULL, 0, 0),
99 PACAINITDATA(11, 0, NULL, 0, 0),
100 PACAINITDATA(12, 0, NULL, 0, 0),
101 PACAINITDATA(13, 0, NULL, 0, 0),
102 PACAINITDATA(14, 0, NULL, 0, 0),
103 PACAINITDATA(15, 0, NULL, 0, 0),
104 PACAINITDATA(16, 0, NULL, 0, 0),
105 PACAINITDATA(17, 0, NULL, 0, 0),
106 PACAINITDATA(18, 0, NULL, 0, 0),
107 PACAINITDATA(19, 0, NULL, 0, 0),
108 PACAINITDATA(20, 0, NULL, 0, 0),
109 PACAINITDATA(21, 0, NULL, 0, 0),
110 PACAINITDATA(22, 0, NULL, 0, 0),
111 PACAINITDATA(23, 0, NULL, 0, 0),
112 PACAINITDATA(24, 0, NULL, 0, 0),
113 PACAINITDATA(25, 0, NULL, 0, 0),
114 PACAINITDATA(26, 0, NULL, 0, 0),
115 PACAINITDATA(27, 0, NULL, 0, 0),
116 PACAINITDATA(28, 0, NULL, 0, 0),
117 PACAINITDATA(29, 0, NULL, 0, 0),
118 PACAINITDATA(30, 0, NULL, 0, 0),
119 PACAINITDATA(31, 0, NULL, 0, 0),
120#if NR_CPUS > 32
121 PACAINITDATA(32, 0, NULL, 0, 0),
122 PACAINITDATA(33, 0, NULL, 0, 0),
123 PACAINITDATA(34, 0, NULL, 0, 0),
124 PACAINITDATA(35, 0, NULL, 0, 0),
125 PACAINITDATA(36, 0, NULL, 0, 0),
126 PACAINITDATA(37, 0, NULL, 0, 0),
127 PACAINITDATA(38, 0, NULL, 0, 0),
128 PACAINITDATA(39, 0, NULL, 0, 0),
129 PACAINITDATA(40, 0, NULL, 0, 0),
130 PACAINITDATA(41, 0, NULL, 0, 0),
131 PACAINITDATA(42, 0, NULL, 0, 0),
132 PACAINITDATA(43, 0, NULL, 0, 0),
133 PACAINITDATA(44, 0, NULL, 0, 0),
134 PACAINITDATA(45, 0, NULL, 0, 0),
135 PACAINITDATA(46, 0, NULL, 0, 0),
136 PACAINITDATA(47, 0, NULL, 0, 0),
137 PACAINITDATA(48, 0, NULL, 0, 0),
138 PACAINITDATA(49, 0, NULL, 0, 0),
139 PACAINITDATA(50, 0, NULL, 0, 0),
140 PACAINITDATA(51, 0, NULL, 0, 0),
141 PACAINITDATA(52, 0, NULL, 0, 0),
142 PACAINITDATA(53, 0, NULL, 0, 0),
143 PACAINITDATA(54, 0, NULL, 0, 0),
144 PACAINITDATA(55, 0, NULL, 0, 0),
145 PACAINITDATA(56, 0, NULL, 0, 0),
146 PACAINITDATA(57, 0, NULL, 0, 0),
147 PACAINITDATA(58, 0, NULL, 0, 0),
148 PACAINITDATA(59, 0, NULL, 0, 0),
149 PACAINITDATA(60, 0, NULL, 0, 0),
150 PACAINITDATA(61, 0, NULL, 0, 0),
151 PACAINITDATA(62, 0, NULL, 0, 0),
152 PACAINITDATA(63, 0, NULL, 0, 0),
153#if NR_CPUS > 64
154 PACAINITDATA(64, 0, NULL, 0, 0),
155 PACAINITDATA(65, 0, NULL, 0, 0),
156 PACAINITDATA(66, 0, NULL, 0, 0),
157 PACAINITDATA(67, 0, NULL, 0, 0),
158 PACAINITDATA(68, 0, NULL, 0, 0),
159 PACAINITDATA(69, 0, NULL, 0, 0),
160 PACAINITDATA(70, 0, NULL, 0, 0),
161 PACAINITDATA(71, 0, NULL, 0, 0),
162 PACAINITDATA(72, 0, NULL, 0, 0),
163 PACAINITDATA(73, 0, NULL, 0, 0),
164 PACAINITDATA(74, 0, NULL, 0, 0),
165 PACAINITDATA(75, 0, NULL, 0, 0),
166 PACAINITDATA(76, 0, NULL, 0, 0),
167 PACAINITDATA(77, 0, NULL, 0, 0),
168 PACAINITDATA(78, 0, NULL, 0, 0),
169 PACAINITDATA(79, 0, NULL, 0, 0),
170 PACAINITDATA(80, 0, NULL, 0, 0),
171 PACAINITDATA(81, 0, NULL, 0, 0),
172 PACAINITDATA(82, 0, NULL, 0, 0),
173 PACAINITDATA(83, 0, NULL, 0, 0),
174 PACAINITDATA(84, 0, NULL, 0, 0),
175 PACAINITDATA(85, 0, NULL, 0, 0),
176 PACAINITDATA(86, 0, NULL, 0, 0),
177 PACAINITDATA(87, 0, NULL, 0, 0),
178 PACAINITDATA(88, 0, NULL, 0, 0),
179 PACAINITDATA(89, 0, NULL, 0, 0),
180 PACAINITDATA(90, 0, NULL, 0, 0),
181 PACAINITDATA(91, 0, NULL, 0, 0),
182 PACAINITDATA(92, 0, NULL, 0, 0),
183 PACAINITDATA(93, 0, NULL, 0, 0),
184 PACAINITDATA(94, 0, NULL, 0, 0),
185 PACAINITDATA(95, 0, NULL, 0, 0),
186 PACAINITDATA(96, 0, NULL, 0, 0),
187 PACAINITDATA(97, 0, NULL, 0, 0),
188 PACAINITDATA(98, 0, NULL, 0, 0),
189 PACAINITDATA(99, 0, NULL, 0, 0),
190 PACAINITDATA(100, 0, NULL, 0, 0),
191 PACAINITDATA(101, 0, NULL, 0, 0),
192 PACAINITDATA(102, 0, NULL, 0, 0),
193 PACAINITDATA(103, 0, NULL, 0, 0),
194 PACAINITDATA(104, 0, NULL, 0, 0),
195 PACAINITDATA(105, 0, NULL, 0, 0),
196 PACAINITDATA(106, 0, NULL, 0, 0),
197 PACAINITDATA(107, 0, NULL, 0, 0),
198 PACAINITDATA(108, 0, NULL, 0, 0),
199 PACAINITDATA(109, 0, NULL, 0, 0),
200 PACAINITDATA(110, 0, NULL, 0, 0),
201 PACAINITDATA(111, 0, NULL, 0, 0),
202 PACAINITDATA(112, 0, NULL, 0, 0),
203 PACAINITDATA(113, 0, NULL, 0, 0),
204 PACAINITDATA(114, 0, NULL, 0, 0),
205 PACAINITDATA(115, 0, NULL, 0, 0),
206 PACAINITDATA(116, 0, NULL, 0, 0),
207 PACAINITDATA(117, 0, NULL, 0, 0),
208 PACAINITDATA(118, 0, NULL, 0, 0),
209 PACAINITDATA(119, 0, NULL, 0, 0),
210 PACAINITDATA(120, 0, NULL, 0, 0),
211 PACAINITDATA(121, 0, NULL, 0, 0),
212 PACAINITDATA(122, 0, NULL, 0, 0),
213 PACAINITDATA(123, 0, NULL, 0, 0),
214 PACAINITDATA(124, 0, NULL, 0, 0),
215 PACAINITDATA(125, 0, NULL, 0, 0),
216 PACAINITDATA(126, 0, NULL, 0, 0),
217 PACAINITDATA(127, 0, NULL, 0, 0),
218#endif
219#endif
220#endif
221#endif
222#endif
223};
224EXPORT_SYMBOL(paca);
diff --git a/arch/ppc64/kernel/pci.c b/arch/ppc64/kernel/pci.c
new file mode 100644
index 000000000000..fdd8f7869a68
--- /dev/null
+++ b/arch/ppc64/kernel/pci.c
@@ -0,0 +1,942 @@
1/*
2 * Port for PPC64 David Engebretsen, IBM Corp.
3 * Contains common pci routines for ppc64 platform, pSeries and iSeries brands.
4 *
5 * Copyright (C) 2003 Anton Blanchard <anton@au.ibm.com>, IBM
6 * Rework, based on alpha PCI code.
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14#undef DEBUG
15
16#include <linux/config.h>
17#include <linux/kernel.h>
18#include <linux/pci.h>
19#include <linux/string.h>
20#include <linux/init.h>
21#include <linux/bootmem.h>
22#include <linux/mm.h>
23#include <linux/list.h>
24
25#include <asm/processor.h>
26#include <asm/io.h>
27#include <asm/prom.h>
28#include <asm/pci-bridge.h>
29#include <asm/byteorder.h>
30#include <asm/irq.h>
31#include <asm/machdep.h>
32#include <asm/udbg.h>
33
34#include "pci.h"
35
36#ifdef DEBUG
37#define DBG(fmt...) udbg_printf(fmt)
38#else
39#define DBG(fmt...)
40#endif
41
42unsigned long pci_probe_only = 1;
43unsigned long pci_assign_all_buses = 0;
44
45/*
46 * legal IO pages under MAX_ISA_PORT. This is to ensure we don't touch
47 * devices we don't have access to.
48 */
49unsigned long io_page_mask;
50
51EXPORT_SYMBOL(io_page_mask);
52
53
54unsigned int pcibios_assign_all_busses(void)
55{
56 return pci_assign_all_buses;
57}
58
59/* pci_io_base -- the base address from which io bars are offsets.
60 * This is the lowest I/O base address (so bar values are always positive),
61 * and it *must* be the start of ISA space if an ISA bus exists because
62 * ISA drivers use hard coded offsets. If no ISA bus exists a dummy
63 * page is mapped and isa_io_limit prevents access to it.
64 */
65unsigned long isa_io_base; /* NULL if no ISA bus */
66EXPORT_SYMBOL(isa_io_base);
67unsigned long pci_io_base;
68EXPORT_SYMBOL(pci_io_base);
69
70void iSeries_pcibios_init(void);
71
72LIST_HEAD(hose_list);
73
74struct dma_mapping_ops pci_dma_ops;
75EXPORT_SYMBOL(pci_dma_ops);
76
77int global_phb_number; /* Global phb counter */
78
79/* Cached ISA bridge dev. */
80struct pci_dev *ppc64_isabridge_dev = NULL;
81
82static void fixup_broken_pcnet32(struct pci_dev* dev)
83{
84 if ((dev->class>>8 == PCI_CLASS_NETWORK_ETHERNET)) {
85 dev->vendor = PCI_VENDOR_ID_AMD;
86 pci_write_config_word(dev, PCI_VENDOR_ID, PCI_VENDOR_ID_AMD);
87 pci_name_device(dev);
88 }
89}
90DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_TRIDENT, PCI_ANY_ID, fixup_broken_pcnet32);
91
92void pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region,
93 struct resource *res)
94{
95 unsigned long offset = 0;
96 struct pci_controller *hose = pci_bus_to_host(dev->bus);
97
98 if (!hose)
99 return;
100
101 if (res->flags & IORESOURCE_IO)
102 offset = (unsigned long)hose->io_base_virt - pci_io_base;
103
104 if (res->flags & IORESOURCE_MEM)
105 offset = hose->pci_mem_offset;
106
107 region->start = res->start - offset;
108 region->end = res->end - offset;
109}
110
111#ifdef CONFIG_HOTPLUG
112EXPORT_SYMBOL(pcibios_resource_to_bus);
113#endif
114
115/*
116 * We need to avoid collisions with `mirrored' VGA ports
117 * and other strange ISA hardware, so we always want the
118 * addresses to be allocated in the 0x000-0x0ff region
119 * modulo 0x400.
120 *
121 * Why? Because some silly external IO cards only decode
122 * the low 10 bits of the IO address. The 0x00-0xff region
123 * is reserved for motherboard devices that decode all 16
124 * bits, so it's ok to allocate at, say, 0x2800-0x28ff,
125 * but we want to try to avoid allocating at 0x2900-0x2bff
126 * which might have be mirrored at 0x0100-0x03ff..
127 */
128void pcibios_align_resource(void *data, struct resource *res,
129 unsigned long size, unsigned long align)
130{
131 struct pci_dev *dev = data;
132 struct pci_controller *hose = pci_bus_to_host(dev->bus);
133 unsigned long start = res->start;
134 unsigned long alignto;
135
136 if (res->flags & IORESOURCE_IO) {
137 unsigned long offset = (unsigned long)hose->io_base_virt -
138 pci_io_base;
139 /* Make sure we start at our min on all hoses */
140 if (start - offset < PCIBIOS_MIN_IO)
141 start = PCIBIOS_MIN_IO + offset;
142
143 /*
144 * Put everything into 0x00-0xff region modulo 0x400
145 */
146 if (start & 0x300)
147 start = (start + 0x3ff) & ~0x3ff;
148
149 } else if (res->flags & IORESOURCE_MEM) {
150 /* Make sure we start at our min on all hoses */
151 if (start - hose->pci_mem_offset < PCIBIOS_MIN_MEM)
152 start = PCIBIOS_MIN_MEM + hose->pci_mem_offset;
153
154 /* Align to multiple of size of minimum base. */
155 alignto = max(0x1000UL, align);
156 start = ALIGN(start, alignto);
157 }
158
159 res->start = start;
160}
161
162static DEFINE_SPINLOCK(hose_spinlock);
163
164/*
165 * pci_controller(phb) initialized common variables.
166 */
167void __devinit pci_setup_pci_controller(struct pci_controller *hose)
168{
169 memset(hose, 0, sizeof(struct pci_controller));
170
171 spin_lock(&hose_spinlock);
172 hose->global_number = global_phb_number++;
173 list_add_tail(&hose->list_node, &hose_list);
174 spin_unlock(&hose_spinlock);
175}
176
177static void __init pcibios_claim_one_bus(struct pci_bus *b)
178{
179 struct pci_dev *dev;
180 struct pci_bus *child_bus;
181
182 list_for_each_entry(dev, &b->devices, bus_list) {
183 int i;
184
185 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
186 struct resource *r = &dev->resource[i];
187
188 if (r->parent || !r->start || !r->flags)
189 continue;
190 pci_claim_resource(dev, i);
191 }
192 }
193
194 list_for_each_entry(child_bus, &b->children, node)
195 pcibios_claim_one_bus(child_bus);
196}
197
198#ifndef CONFIG_PPC_ISERIES
199static void __init pcibios_claim_of_setup(void)
200{
201 struct pci_bus *b;
202
203 list_for_each_entry(b, &pci_root_buses, node)
204 pcibios_claim_one_bus(b);
205}
206#endif
207
208static int __init pcibios_init(void)
209{
210 struct pci_controller *hose, *tmp;
211 struct pci_bus *bus;
212
213 /* For now, override phys_mem_access_prot. If we need it,
214 * later, we may move that initialization to each ppc_md
215 */
216 ppc_md.phys_mem_access_prot = pci_phys_mem_access_prot;
217
218#ifdef CONFIG_PPC_ISERIES
219 iSeries_pcibios_init();
220#endif
221
222 printk("PCI: Probing PCI hardware\n");
223
224 /* Scan all of the recorded PCI controllers. */
225 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
226 hose->last_busno = 0xff;
227 bus = pci_scan_bus(hose->first_busno, hose->ops,
228 hose->arch_data);
229 hose->bus = bus;
230 hose->last_busno = bus->subordinate;
231 }
232
233#ifndef CONFIG_PPC_ISERIES
234 if (pci_probe_only)
235 pcibios_claim_of_setup();
236 else
237 /* FIXME: `else' will be removed when
238 pci_assign_unassigned_resources() is able to work
239 correctly with [partially] allocated PCI tree. */
240 pci_assign_unassigned_resources();
241#endif /* !CONFIG_PPC_ISERIES */
242
243 /* Call machine dependent final fixup */
244 if (ppc_md.pcibios_fixup)
245 ppc_md.pcibios_fixup();
246
247 /* Cache the location of the ISA bridge (if we have one) */
248 ppc64_isabridge_dev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
249 if (ppc64_isabridge_dev != NULL)
250 printk("ISA bridge at %s\n", pci_name(ppc64_isabridge_dev));
251
252 printk("PCI: Probing PCI hardware done\n");
253
254 return 0;
255}
256
257subsys_initcall(pcibios_init);
258
259char __init *pcibios_setup(char *str)
260{
261 return str;
262}
263
264int pcibios_enable_device(struct pci_dev *dev, int mask)
265{
266 u16 cmd, oldcmd;
267 int i;
268
269 pci_read_config_word(dev, PCI_COMMAND, &cmd);
270 oldcmd = cmd;
271
272 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
273 struct resource *res = &dev->resource[i];
274
275 /* Only set up the requested stuff */
276 if (!(mask & (1<<i)))
277 continue;
278
279 if (res->flags & IORESOURCE_IO)
280 cmd |= PCI_COMMAND_IO;
281 if (res->flags & IORESOURCE_MEM)
282 cmd |= PCI_COMMAND_MEMORY;
283 }
284
285 if (cmd != oldcmd) {
286 printk(KERN_DEBUG "PCI: Enabling device: (%s), cmd %x\n",
287 pci_name(dev), cmd);
288 /* Enable the appropriate bits in the PCI command register. */
289 pci_write_config_word(dev, PCI_COMMAND, cmd);
290 }
291 return 0;
292}
293
294/*
295 * Return the domain number for this bus.
296 */
297int pci_domain_nr(struct pci_bus *bus)
298{
299#ifdef CONFIG_PPC_ISERIES
300 return 0;
301#else
302 struct pci_controller *hose = pci_bus_to_host(bus);
303
304 return hose->global_number;
305#endif
306}
307
308EXPORT_SYMBOL(pci_domain_nr);
309
310/* Decide whether to display the domain number in /proc */
311int pci_proc_domain(struct pci_bus *bus)
312{
313#ifdef CONFIG_PPC_ISERIES
314 return 0;
315#else
316 struct pci_controller *hose = pci_bus_to_host(bus);
317 return hose->buid;
318#endif
319}
320
321/*
322 * Platform support for /proc/bus/pci/X/Y mmap()s,
323 * modelled on the sparc64 implementation by Dave Miller.
324 * -- paulus.
325 */
326
327/*
328 * Adjust vm_pgoff of VMA such that it is the physical page offset
329 * corresponding to the 32-bit pci bus offset for DEV requested by the user.
330 *
331 * Basically, the user finds the base address for his device which he wishes
332 * to mmap. They read the 32-bit value from the config space base register,
333 * add whatever PAGE_SIZE multiple offset they wish, and feed this into the
334 * offset parameter of mmap on /proc/bus/pci/XXX for that device.
335 *
336 * Returns negative error code on failure, zero on success.
337 */
338static struct resource *__pci_mmap_make_offset(struct pci_dev *dev,
339 unsigned long *offset,
340 enum pci_mmap_state mmap_state)
341{
342 struct pci_controller *hose = pci_bus_to_host(dev->bus);
343 unsigned long io_offset = 0;
344 int i, res_bit;
345
346 if (hose == 0)
347 return NULL; /* should never happen */
348
349 /* If memory, add on the PCI bridge address offset */
350 if (mmap_state == pci_mmap_mem) {
351 *offset += hose->pci_mem_offset;
352 res_bit = IORESOURCE_MEM;
353 } else {
354 io_offset = (unsigned long)hose->io_base_virt;
355 *offset += io_offset;
356 res_bit = IORESOURCE_IO;
357 }
358
359 /*
360 * Check that the offset requested corresponds to one of the
361 * resources of the device.
362 */
363 for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
364 struct resource *rp = &dev->resource[i];
365 int flags = rp->flags;
366
367 /* treat ROM as memory (should be already) */
368 if (i == PCI_ROM_RESOURCE)
369 flags |= IORESOURCE_MEM;
370
371 /* Active and same type? */
372 if ((flags & res_bit) == 0)
373 continue;
374
375 /* In the range of this resource? */
376 if (*offset < (rp->start & PAGE_MASK) || *offset > rp->end)
377 continue;
378
379 /* found it! construct the final physical address */
380 if (mmap_state == pci_mmap_io)
381 *offset += hose->io_base_phys - io_offset;
382 return rp;
383 }
384
385 return NULL;
386}
387
388/*
389 * Set vm_page_prot of VMA, as appropriate for this architecture, for a pci
390 * device mapping.
391 */
392static pgprot_t __pci_mmap_set_pgprot(struct pci_dev *dev, struct resource *rp,
393 pgprot_t protection,
394 enum pci_mmap_state mmap_state,
395 int write_combine)
396{
397 unsigned long prot = pgprot_val(protection);
398
399 /* Write combine is always 0 on non-memory space mappings. On
400 * memory space, if the user didn't pass 1, we check for a
401 * "prefetchable" resource. This is a bit hackish, but we use
402 * this to workaround the inability of /sysfs to provide a write
403 * combine bit
404 */
405 if (mmap_state != pci_mmap_mem)
406 write_combine = 0;
407 else if (write_combine == 0) {
408 if (rp->flags & IORESOURCE_PREFETCH)
409 write_combine = 1;
410 }
411
412 /* XXX would be nice to have a way to ask for write-through */
413 prot |= _PAGE_NO_CACHE;
414 if (write_combine)
415 prot &= ~_PAGE_GUARDED;
416 else
417 prot |= _PAGE_GUARDED;
418
419 printk("PCI map for %s:%lx, prot: %lx\n", pci_name(dev), rp->start,
420 prot);
421
422 return __pgprot(prot);
423}
424
425/*
426 * This one is used by /dev/mem and fbdev who have no clue about the
427 * PCI device, it tries to find the PCI device first and calls the
428 * above routine
429 */
430pgprot_t pci_phys_mem_access_prot(struct file *file,
431 unsigned long offset,
432 unsigned long size,
433 pgprot_t protection)
434{
435 struct pci_dev *pdev = NULL;
436 struct resource *found = NULL;
437 unsigned long prot = pgprot_val(protection);
438 int i;
439
440 if (page_is_ram(offset >> PAGE_SHIFT))
441 return prot;
442
443 prot |= _PAGE_NO_CACHE | _PAGE_GUARDED;
444
445 for_each_pci_dev(pdev) {
446 for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
447 struct resource *rp = &pdev->resource[i];
448 int flags = rp->flags;
449
450 /* Active and same type? */
451 if ((flags & IORESOURCE_MEM) == 0)
452 continue;
453 /* In the range of this resource? */
454 if (offset < (rp->start & PAGE_MASK) ||
455 offset > rp->end)
456 continue;
457 found = rp;
458 break;
459 }
460 if (found)
461 break;
462 }
463 if (found) {
464 if (found->flags & IORESOURCE_PREFETCH)
465 prot &= ~_PAGE_GUARDED;
466 pci_dev_put(pdev);
467 }
468
469 DBG("non-PCI map for %lx, prot: %lx\n", offset, prot);
470
471 return __pgprot(prot);
472}
473
474
475/*
476 * Perform the actual remap of the pages for a PCI device mapping, as
477 * appropriate for this architecture. The region in the process to map
478 * is described by vm_start and vm_end members of VMA, the base physical
479 * address is found in vm_pgoff.
480 * The pci device structure is provided so that architectures may make mapping
481 * decisions on a per-device or per-bus basis.
482 *
483 * Returns a negative error code on failure, zero on success.
484 */
485int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
486 enum pci_mmap_state mmap_state,
487 int write_combine)
488{
489 unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
490 struct resource *rp;
491 int ret;
492
493 rp = __pci_mmap_make_offset(dev, &offset, mmap_state);
494 if (rp == NULL)
495 return -EINVAL;
496
497 vma->vm_pgoff = offset >> PAGE_SHIFT;
498 vma->vm_flags |= VM_SHM | VM_LOCKED | VM_IO;
499 vma->vm_page_prot = __pci_mmap_set_pgprot(dev, rp,
500 vma->vm_page_prot,
501 mmap_state, write_combine);
502
503 ret = remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
504 vma->vm_end - vma->vm_start, vma->vm_page_prot);
505
506 return ret;
507}
508
509#ifdef CONFIG_PPC_MULTIPLATFORM
510static ssize_t pci_show_devspec(struct device *dev, char *buf)
511{
512 struct pci_dev *pdev;
513 struct device_node *np;
514
515 pdev = to_pci_dev (dev);
516 np = pci_device_to_OF_node(pdev);
517 if (np == NULL || np->full_name == NULL)
518 return 0;
519 return sprintf(buf, "%s", np->full_name);
520}
521static DEVICE_ATTR(devspec, S_IRUGO, pci_show_devspec, NULL);
522#endif /* CONFIG_PPC_MULTIPLATFORM */
523
524void pcibios_add_platform_entries(struct pci_dev *pdev)
525{
526#ifdef CONFIG_PPC_MULTIPLATFORM
527 device_create_file(&pdev->dev, &dev_attr_devspec);
528#endif /* CONFIG_PPC_MULTIPLATFORM */
529}
530
531#ifdef CONFIG_PPC_MULTIPLATFORM
532
533#define ISA_SPACE_MASK 0x1
534#define ISA_SPACE_IO 0x1
535
536static void __devinit pci_process_ISA_OF_ranges(struct device_node *isa_node,
537 unsigned long phb_io_base_phys,
538 void __iomem * phb_io_base_virt)
539{
540 struct isa_range *range;
541 unsigned long pci_addr;
542 unsigned int isa_addr;
543 unsigned int size;
544 int rlen = 0;
545
546 range = (struct isa_range *) get_property(isa_node, "ranges", &rlen);
547 if (range == NULL || (rlen < sizeof(struct isa_range))) {
548 printk(KERN_ERR "no ISA ranges or unexpected isa range size,"
549 "mapping 64k\n");
550 __ioremap_explicit(phb_io_base_phys, (unsigned long)phb_io_base_virt,
551 0x10000, _PAGE_NO_CACHE);
552 return;
553 }
554
555 /* From "ISA Binding to 1275"
556 * The ranges property is laid out as an array of elements,
557 * each of which comprises:
558 * cells 0 - 1: an ISA address
559 * cells 2 - 4: a PCI address
560 * (size depending on dev->n_addr_cells)
561 * cell 5: the size of the range
562 */
563 if ((range->isa_addr.a_hi && ISA_SPACE_MASK) == ISA_SPACE_IO) {
564 isa_addr = range->isa_addr.a_lo;
565 pci_addr = (unsigned long) range->pci_addr.a_mid << 32 |
566 range->pci_addr.a_lo;
567
568 /* Assume these are both zero */
569 if ((pci_addr != 0) || (isa_addr != 0)) {
570 printk(KERN_ERR "unexpected isa to pci mapping: %s\n",
571 __FUNCTION__);
572 return;
573 }
574
575 size = PAGE_ALIGN(range->size);
576
577 __ioremap_explicit(phb_io_base_phys,
578 (unsigned long) phb_io_base_virt,
579 size, _PAGE_NO_CACHE);
580 }
581}
582
583void __devinit pci_process_bridge_OF_ranges(struct pci_controller *hose,
584 struct device_node *dev)
585{
586 unsigned int *ranges;
587 unsigned long size;
588 int rlen = 0;
589 int memno = 0;
590 struct resource *res;
591 int np, na = prom_n_addr_cells(dev);
592 unsigned long pci_addr, cpu_phys_addr;
593
594 np = na + 5;
595
596 /* From "PCI Binding to 1275"
597 * The ranges property is laid out as an array of elements,
598 * each of which comprises:
599 * cells 0 - 2: a PCI address
600 * cells 3 or 3+4: a CPU physical address
601 * (size depending on dev->n_addr_cells)
602 * cells 4+5 or 5+6: the size of the range
603 */
604 rlen = 0;
605 hose->io_base_phys = 0;
606 ranges = (unsigned int *) get_property(dev, "ranges", &rlen);
607 while ((rlen -= np * sizeof(unsigned int)) >= 0) {
608 res = NULL;
609 pci_addr = (unsigned long)ranges[1] << 32 | ranges[2];
610
611 cpu_phys_addr = ranges[3];
612 if (na == 2)
613 cpu_phys_addr = cpu_phys_addr << 32 | ranges[4];
614
615 size = (unsigned long)ranges[na+3] << 32 | ranges[na+4];
616 if (size == 0)
617 continue;
618 switch ((ranges[0] >> 24) & 0x3) {
619 case 1: /* I/O space */
620 hose->io_base_phys = cpu_phys_addr;
621 hose->pci_io_size = size;
622
623 res = &hose->io_resource;
624 res->flags = IORESOURCE_IO;
625 res->start = pci_addr;
626 DBG("phb%d: IO 0x%lx -> 0x%lx\n", hose->global_number,
627 res->start, res->start + size - 1);
628 break;
629 case 2: /* memory space */
630 memno = 0;
631 while (memno < 3 && hose->mem_resources[memno].flags)
632 ++memno;
633
634 if (memno == 0)
635 hose->pci_mem_offset = cpu_phys_addr - pci_addr;
636 if (memno < 3) {
637 res = &hose->mem_resources[memno];
638 res->flags = IORESOURCE_MEM;
639 res->start = cpu_phys_addr;
640 DBG("phb%d: MEM 0x%lx -> 0x%lx\n", hose->global_number,
641 res->start, res->start + size - 1);
642 }
643 break;
644 }
645 if (res != NULL) {
646 res->name = dev->full_name;
647 res->end = res->start + size - 1;
648 res->parent = NULL;
649 res->sibling = NULL;
650 res->child = NULL;
651 }
652 ranges += np;
653 }
654}
655
656void __init pci_setup_phb_io(struct pci_controller *hose, int primary)
657{
658 unsigned long size = hose->pci_io_size;
659 unsigned long io_virt_offset;
660 struct resource *res;
661 struct device_node *isa_dn;
662
663 hose->io_base_virt = reserve_phb_iospace(size);
664 DBG("phb%d io_base_phys 0x%lx io_base_virt 0x%lx\n",
665 hose->global_number, hose->io_base_phys,
666 (unsigned long) hose->io_base_virt);
667
668 if (primary) {
669 pci_io_base = (unsigned long)hose->io_base_virt;
670 isa_dn = of_find_node_by_type(NULL, "isa");
671 if (isa_dn) {
672 isa_io_base = pci_io_base;
673 pci_process_ISA_OF_ranges(isa_dn, hose->io_base_phys,
674 hose->io_base_virt);
675 of_node_put(isa_dn);
676 /* Allow all IO */
677 io_page_mask = -1;
678 }
679 }
680
681 io_virt_offset = (unsigned long)hose->io_base_virt - pci_io_base;
682 res = &hose->io_resource;
683 res->start += io_virt_offset;
684 res->end += io_virt_offset;
685}
686
687void __devinit pci_setup_phb_io_dynamic(struct pci_controller *hose,
688 int primary)
689{
690 unsigned long size = hose->pci_io_size;
691 unsigned long io_virt_offset;
692 struct resource *res;
693
694 hose->io_base_virt = __ioremap(hose->io_base_phys, size,
695 _PAGE_NO_CACHE);
696 DBG("phb%d io_base_phys 0x%lx io_base_virt 0x%lx\n",
697 hose->global_number, hose->io_base_phys,
698 (unsigned long) hose->io_base_virt);
699
700 if (primary)
701 pci_io_base = (unsigned long)hose->io_base_virt;
702
703 io_virt_offset = (unsigned long)hose->io_base_virt - pci_io_base;
704 res = &hose->io_resource;
705 res->start += io_virt_offset;
706 res->end += io_virt_offset;
707}
708
709
710static int get_bus_io_range(struct pci_bus *bus, unsigned long *start_phys,
711 unsigned long *start_virt, unsigned long *size)
712{
713 struct pci_controller *hose = pci_bus_to_host(bus);
714 struct pci_bus_region region;
715 struct resource *res;
716
717 if (bus->self) {
718 res = bus->resource[0];
719 pcibios_resource_to_bus(bus->self, &region, res);
720 *start_phys = hose->io_base_phys + region.start;
721 *start_virt = (unsigned long) hose->io_base_virt +
722 region.start;
723 if (region.end > region.start)
724 *size = region.end - region.start + 1;
725 else {
726 printk("%s(): unexpected region 0x%lx->0x%lx\n",
727 __FUNCTION__, region.start, region.end);
728 return 1;
729 }
730
731 } else {
732 /* Root Bus */
733 res = &hose->io_resource;
734 *start_phys = hose->io_base_phys;
735 *start_virt = (unsigned long) hose->io_base_virt;
736 if (res->end > res->start)
737 *size = res->end - res->start + 1;
738 else {
739 printk("%s(): unexpected region 0x%lx->0x%lx\n",
740 __FUNCTION__, res->start, res->end);
741 return 1;
742 }
743 }
744
745 return 0;
746}
747
748int unmap_bus_range(struct pci_bus *bus)
749{
750 unsigned long start_phys;
751 unsigned long start_virt;
752 unsigned long size;
753
754 if (!bus) {
755 printk(KERN_ERR "%s() expected bus\n", __FUNCTION__);
756 return 1;
757 }
758
759 if (get_bus_io_range(bus, &start_phys, &start_virt, &size))
760 return 1;
761 if (iounmap_explicit((void __iomem *) start_virt, size))
762 return 1;
763
764 return 0;
765}
766EXPORT_SYMBOL(unmap_bus_range);
767
768int remap_bus_range(struct pci_bus *bus)
769{
770 unsigned long start_phys;
771 unsigned long start_virt;
772 unsigned long size;
773
774 if (!bus) {
775 printk(KERN_ERR "%s() expected bus\n", __FUNCTION__);
776 return 1;
777 }
778
779
780 if (get_bus_io_range(bus, &start_phys, &start_virt, &size))
781 return 1;
782 printk("mapping IO %lx -> %lx, size: %lx\n", start_phys, start_virt, size);
783 if (__ioremap_explicit(start_phys, start_virt, size, _PAGE_NO_CACHE))
784 return 1;
785
786 return 0;
787}
788EXPORT_SYMBOL(remap_bus_range);
789
790void phbs_remap_io(void)
791{
792 struct pci_controller *hose, *tmp;
793
794 list_for_each_entry_safe(hose, tmp, &hose_list, list_node)
795 remap_bus_range(hose->bus);
796}
797
798/*
799 * ppc64 can have multifunction devices that do not respond to function 0.
800 * In this case we must scan all functions.
801 */
802int pcibios_scan_all_fns(struct pci_bus *bus, int devfn)
803{
804 struct device_node *busdn, *dn;
805
806 if (bus->self)
807 busdn = pci_device_to_OF_node(bus->self);
808 else
809 busdn = bus->sysdata; /* must be a phb */
810
811 if (busdn == NULL)
812 return 0;
813
814 /*
815 * Check to see if there is any of the 8 functions are in the
816 * device tree. If they are then we need to scan all the
817 * functions of this slot.
818 */
819 for (dn = busdn->child; dn; dn = dn->sibling)
820 if ((dn->devfn >> 3) == (devfn >> 3))
821 return 1;
822
823 return 0;
824}
825
826
827void __devinit pcibios_fixup_device_resources(struct pci_dev *dev,
828 struct pci_bus *bus)
829{
830 /* Update device resources. */
831 struct pci_controller *hose = pci_bus_to_host(bus);
832 int i;
833
834 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
835 if (dev->resource[i].flags & IORESOURCE_IO) {
836 unsigned long offset = (unsigned long)hose->io_base_virt
837 - pci_io_base;
838 unsigned long start, end, mask;
839
840 start = dev->resource[i].start += offset;
841 end = dev->resource[i].end += offset;
842
843 /* Need to allow IO access to pages that are in the
844 ISA range */
845 if (start < MAX_ISA_PORT) {
846 if (end > MAX_ISA_PORT)
847 end = MAX_ISA_PORT;
848
849 start >>= PAGE_SHIFT;
850 end >>= PAGE_SHIFT;
851
852 /* get the range of pages for the map */
853 mask = ((1 << (end+1))-1) ^ ((1 << start)-1);
854 io_page_mask |= mask;
855 }
856 }
857 else if (dev->resource[i].flags & IORESOURCE_MEM) {
858 dev->resource[i].start += hose->pci_mem_offset;
859 dev->resource[i].end += hose->pci_mem_offset;
860 }
861 }
862}
863EXPORT_SYMBOL(pcibios_fixup_device_resources);
864
865void __devinit pcibios_fixup_bus(struct pci_bus *bus)
866{
867 struct pci_controller *hose = pci_bus_to_host(bus);
868 struct pci_dev *dev = bus->self;
869 struct resource *res;
870 int i;
871
872 if (!dev) {
873 /* Root bus. */
874
875 hose->bus = bus;
876 bus->resource[0] = res = &hose->io_resource;
877
878 if (res->flags && request_resource(&ioport_resource, res))
879 printk(KERN_ERR "Failed to request IO on "
880 "PCI domain %d\n", pci_domain_nr(bus));
881
882 for (i = 0; i < 3; ++i) {
883 res = &hose->mem_resources[i];
884 bus->resource[i+1] = res;
885 if (res->flags && request_resource(&iomem_resource, res))
886 printk(KERN_ERR "Failed to request MEM on "
887 "PCI domain %d\n",
888 pci_domain_nr(bus));
889 }
890 } else if (pci_probe_only &&
891 (dev->class >> 8) == PCI_CLASS_BRIDGE_PCI) {
892 /* This is a subordinate bridge */
893
894 pci_read_bridge_bases(bus);
895 pcibios_fixup_device_resources(dev, bus);
896 }
897
898 ppc_md.iommu_bus_setup(bus);
899
900 list_for_each_entry(dev, &bus->devices, bus_list)
901 ppc_md.iommu_dev_setup(dev);
902
903 if (!pci_probe_only)
904 return;
905
906 list_for_each_entry(dev, &bus->devices, bus_list) {
907 if ((dev->class >> 8) != PCI_CLASS_BRIDGE_PCI)
908 pcibios_fixup_device_resources(dev, bus);
909 }
910}
911EXPORT_SYMBOL(pcibios_fixup_bus);
912
913/*
914 * Reads the interrupt pin to determine if interrupt is use by card.
915 * If the interrupt is used, then gets the interrupt line from the
916 * openfirmware and sets it in the pci_dev and pci_config line.
917 */
918int pci_read_irq_line(struct pci_dev *pci_dev)
919{
920 u8 intpin;
921 struct device_node *node;
922
923 pci_read_config_byte(pci_dev, PCI_INTERRUPT_PIN, &intpin);
924 if (intpin == 0)
925 return 0;
926
927 node = pci_device_to_OF_node(pci_dev);
928 if (node == NULL)
929 return -1;
930
931 if (node->n_intrs == 0)
932 return -1;
933
934 pci_dev->irq = node->intrs[0].line;
935
936 pci_write_config_byte(pci_dev, PCI_INTERRUPT_LINE, pci_dev->irq);
937
938 return 0;
939}
940EXPORT_SYMBOL(pci_read_irq_line);
941
942#endif /* CONFIG_PPC_MULTIPLATFORM */
diff --git a/arch/ppc64/kernel/pci.h b/arch/ppc64/kernel/pci.h
new file mode 100644
index 000000000000..0fd7d849aa77
--- /dev/null
+++ b/arch/ppc64/kernel/pci.h
@@ -0,0 +1,51 @@
1/*
2 * c 2001 PPC 64 Team, IBM Corp
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 */
9#ifndef __PPC_KERNEL_PCI_H__
10#define __PPC_KERNEL_PCI_H__
11
12#include <linux/pci.h>
13#include <asm/pci-bridge.h>
14
15extern unsigned long isa_io_base;
16
17extern void pci_setup_pci_controller(struct pci_controller *hose);
18extern void pci_setup_phb_io(struct pci_controller *hose, int primary);
19extern void pci_setup_phb_io_dynamic(struct pci_controller *hose, int primary);
20
21
22extern struct list_head hose_list;
23extern int global_phb_number;
24
25extern unsigned long find_and_init_phbs(void);
26
27extern struct pci_dev *ppc64_isabridge_dev; /* may be NULL if no ISA bus */
28
29/* PCI device_node operations */
30struct device_node;
31typedef void *(*traverse_func)(struct device_node *me, void *data);
32void *traverse_pci_devices(struct device_node *start, traverse_func pre,
33 void *data);
34
35void pci_devs_phb_init(void);
36void pci_devs_phb_init_dynamic(struct pci_controller *phb);
37struct device_node *fetch_dev_dn(struct pci_dev *dev);
38
39/* PCI address cache management routines */
40void pci_addr_cache_insert_device(struct pci_dev *dev);
41void pci_addr_cache_remove_device(struct pci_dev *dev);
42
43/* From pSeries_pci.h */
44void init_pci_config_tokens (void);
45unsigned long get_phb_buid (struct device_node *);
46
47extern unsigned long pci_probe_only;
48extern unsigned long pci_assign_all_buses;
49extern int pci_read_irq_line(struct pci_dev *pci_dev);
50
51#endif /* __PPC_KERNEL_PCI_H__ */
diff --git a/arch/ppc64/kernel/pci_direct_iommu.c b/arch/ppc64/kernel/pci_direct_iommu.c
new file mode 100644
index 000000000000..b8f7f58824f4
--- /dev/null
+++ b/arch/ppc64/kernel/pci_direct_iommu.c
@@ -0,0 +1,95 @@
1/*
2 * Support for DMA from PCI devices to main memory on
3 * machines without an iommu or with directly addressable
4 * RAM (typically a pmac with 2Gb of RAM or less)
5 *
6 * Copyright (C) 2003 Benjamin Herrenschmidt (benh@kernel.crashing.org)
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14#include <linux/kernel.h>
15#include <linux/pci.h>
16#include <linux/delay.h>
17#include <linux/string.h>
18#include <linux/init.h>
19#include <linux/bootmem.h>
20#include <linux/mm.h>
21#include <linux/dma-mapping.h>
22
23#include <asm/sections.h>
24#include <asm/io.h>
25#include <asm/prom.h>
26#include <asm/pci-bridge.h>
27#include <asm/machdep.h>
28#include <asm/pmac_feature.h>
29#include <asm/abs_addr.h>
30
31#include "pci.h"
32
33static void *pci_direct_alloc_coherent(struct device *hwdev, size_t size,
34 dma_addr_t *dma_handle, unsigned int __nocast flag)
35{
36 void *ret;
37
38 ret = (void *)__get_free_pages(flag, get_order(size));
39 if (ret != NULL) {
40 memset(ret, 0, size);
41 *dma_handle = virt_to_abs(ret);
42 }
43 return ret;
44}
45
46static void pci_direct_free_coherent(struct device *hwdev, size_t size,
47 void *vaddr, dma_addr_t dma_handle)
48{
49 free_pages((unsigned long)vaddr, get_order(size));
50}
51
52static dma_addr_t pci_direct_map_single(struct device *hwdev, void *ptr,
53 size_t size, enum dma_data_direction direction)
54{
55 return virt_to_abs(ptr);
56}
57
58static void pci_direct_unmap_single(struct device *hwdev, dma_addr_t dma_addr,
59 size_t size, enum dma_data_direction direction)
60{
61}
62
63static int pci_direct_map_sg(struct device *hwdev, struct scatterlist *sg,
64 int nents, enum dma_data_direction direction)
65{
66 int i;
67
68 for (i = 0; i < nents; i++, sg++) {
69 sg->dma_address = page_to_phys(sg->page) + sg->offset;
70 sg->dma_length = sg->length;
71 }
72
73 return nents;
74}
75
76static void pci_direct_unmap_sg(struct device *hwdev, struct scatterlist *sg,
77 int nents, enum dma_data_direction direction)
78{
79}
80
81static int pci_direct_dma_supported(struct device *dev, u64 mask)
82{
83 return mask < 0x100000000ull;
84}
85
86void __init pci_direct_iommu_init(void)
87{
88 pci_dma_ops.alloc_coherent = pci_direct_alloc_coherent;
89 pci_dma_ops.free_coherent = pci_direct_free_coherent;
90 pci_dma_ops.map_single = pci_direct_map_single;
91 pci_dma_ops.unmap_single = pci_direct_unmap_single;
92 pci_dma_ops.map_sg = pci_direct_map_sg;
93 pci_dma_ops.unmap_sg = pci_direct_unmap_sg;
94 pci_dma_ops.dma_supported = pci_direct_dma_supported;
95}
diff --git a/arch/ppc64/kernel/pci_dn.c b/arch/ppc64/kernel/pci_dn.c
new file mode 100644
index 000000000000..ec345462afc3
--- /dev/null
+++ b/arch/ppc64/kernel/pci_dn.c
@@ -0,0 +1,198 @@
1/*
2 * pci_dn.c
3 *
4 * Copyright (C) 2001 Todd Inglett, IBM Corporation
5 *
6 * PCI manipulation via device_nodes.
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 */
22#include <linux/kernel.h>
23#include <linux/pci.h>
24#include <linux/string.h>
25#include <linux/init.h>
26
27#include <asm/io.h>
28#include <asm/prom.h>
29#include <asm/pci-bridge.h>
30#include <asm/pSeries_reconfig.h>
31
32#include "pci.h"
33
34/*
35 * Traverse_func that inits the PCI fields of the device node.
36 * NOTE: this *must* be done before read/write config to the device.
37 */
38static void * __devinit update_dn_pci_info(struct device_node *dn, void *data)
39{
40 struct pci_controller *phb = data;
41 int *type = (int *)get_property(dn, "ibm,pci-config-space-type", NULL);
42 u32 *regs;
43
44 dn->phb = phb;
45 regs = (u32 *)get_property(dn, "reg", NULL);
46 if (regs) {
47 /* First register entry is addr (00BBSS00) */
48 dn->busno = (regs[0] >> 16) & 0xff;
49 dn->devfn = (regs[0] >> 8) & 0xff;
50 }
51
52 dn->pci_ext_config_space = (type && *type == 1);
53 return NULL;
54}
55
56/*
57 * Traverse a device tree stopping each PCI device in the tree.
58 * This is done depth first. As each node is processed, a "pre"
59 * function is called and the children are processed recursively.
60 *
61 * The "pre" func returns a value. If non-zero is returned from
62 * the "pre" func, the traversal stops and this value is returned.
63 * This return value is useful when using traverse as a method of
64 * finding a device.
65 *
66 * NOTE: we do not run the func for devices that do not appear to
67 * be PCI except for the start node which we assume (this is good
68 * because the start node is often a phb which may be missing PCI
69 * properties).
70 * We use the class-code as an indicator. If we run into
71 * one of these nodes we also assume its siblings are non-pci for
72 * performance.
73 */
74void *traverse_pci_devices(struct device_node *start, traverse_func pre,
75 void *data)
76{
77 struct device_node *dn, *nextdn;
78 void *ret;
79
80 /* We started with a phb, iterate all childs */
81 for (dn = start->child; dn; dn = nextdn) {
82 u32 *classp, class;
83
84 nextdn = NULL;
85 classp = (u32 *)get_property(dn, "class-code", NULL);
86 class = classp ? *classp : 0;
87
88 if (pre && ((ret = pre(dn, data)) != NULL))
89 return ret;
90
91 /* If we are a PCI bridge, go down */
92 if (dn->child && ((class >> 8) == PCI_CLASS_BRIDGE_PCI ||
93 (class >> 8) == PCI_CLASS_BRIDGE_CARDBUS))
94 /* Depth first...do children */
95 nextdn = dn->child;
96 else if (dn->sibling)
97 /* ok, try next sibling instead. */
98 nextdn = dn->sibling;
99 if (!nextdn) {
100 /* Walk up to next valid sibling. */
101 do {
102 dn = dn->parent;
103 if (dn == start)
104 return NULL;
105 } while (dn->sibling == NULL);
106 nextdn = dn->sibling;
107 }
108 }
109 return NULL;
110}
111
112void __devinit pci_devs_phb_init_dynamic(struct pci_controller *phb)
113{
114 struct device_node * dn = (struct device_node *) phb->arch_data;
115
116 /* PHB nodes themselves must not match */
117 dn->devfn = dn->busno = -1;
118 dn->phb = phb;
119
120 /* Update dn->phb ptrs for new phb and children devices */
121 traverse_pci_devices(dn, update_dn_pci_info, phb);
122}
123
124/*
125 * Traversal func that looks for a <busno,devfcn> value.
126 * If found, the device_node is returned (thus terminating the traversal).
127 */
128static void *is_devfn_node(struct device_node *dn, void *data)
129{
130 int busno = ((unsigned long)data >> 8) & 0xff;
131 int devfn = ((unsigned long)data) & 0xff;
132
133 return ((devfn == dn->devfn) && (busno == dn->busno)) ? dn : NULL;
134}
135
136/*
137 * This is the "slow" path for looking up a device_node from a
138 * pci_dev. It will hunt for the device under its parent's
139 * phb and then update sysdata for a future fastpath.
140 *
141 * It may also do fixups on the actual device since this happens
142 * on the first read/write.
143 *
144 * Note that it also must deal with devices that don't exist.
145 * In this case it may probe for real hardware ("just in case")
146 * and add a device_node to the device tree if necessary.
147 *
148 */
149struct device_node *fetch_dev_dn(struct pci_dev *dev)
150{
151 struct device_node *orig_dn = dev->sysdata;
152 struct pci_controller *phb = orig_dn->phb; /* assume same phb as orig_dn */
153 struct device_node *phb_dn;
154 struct device_node *dn;
155 unsigned long searchval = (dev->bus->number << 8) | dev->devfn;
156
157 phb_dn = phb->arch_data;
158 dn = traverse_pci_devices(phb_dn, is_devfn_node, (void *)searchval);
159 if (dn)
160 dev->sysdata = dn;
161 return dn;
162}
163EXPORT_SYMBOL(fetch_dev_dn);
164
165static int pci_dn_reconfig_notifier(struct notifier_block *nb, unsigned long action, void *node)
166{
167 struct device_node *np = node;
168 int err = NOTIFY_OK;
169
170 switch (action) {
171 case PSERIES_RECONFIG_ADD:
172 update_dn_pci_info(np, np->parent->phb);
173 break;
174 default:
175 err = NOTIFY_DONE;
176 break;
177 }
178 return err;
179}
180
181static struct notifier_block pci_dn_reconfig_nb = {
182 .notifier_call = pci_dn_reconfig_notifier,
183};
184
185/*
186 * Actually initialize the phbs.
187 * The buswalk on this phb has not happened yet.
188 */
189void __init pci_devs_phb_init(void)
190{
191 struct pci_controller *phb, *tmp;
192
193 /* This must be done first so the device nodes have valid pci info! */
194 list_for_each_entry_safe(phb, tmp, &hose_list, list_node)
195 pci_devs_phb_init_dynamic(phb);
196
197 pSeries_reconfig_notifier_register(&pci_dn_reconfig_nb);
198}
diff --git a/arch/ppc64/kernel/pci_iommu.c b/arch/ppc64/kernel/pci_iommu.c
new file mode 100644
index 000000000000..ef0a62b916be
--- /dev/null
+++ b/arch/ppc64/kernel/pci_iommu.c
@@ -0,0 +1,139 @@
1/*
2 * arch/ppc64/kernel/pci_iommu.c
3 * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation
4 *
5 * Rewrite, cleanup, new allocation schemes:
6 * Copyright (C) 2004 Olof Johansson, IBM Corporation
7 *
8 * Dynamic DMA mapping support, platform-independent parts.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 */
24
25
26#include <linux/config.h>
27#include <linux/init.h>
28#include <linux/types.h>
29#include <linux/slab.h>
30#include <linux/mm.h>
31#include <linux/spinlock.h>
32#include <linux/string.h>
33#include <linux/pci.h>
34#include <linux/dma-mapping.h>
35#include <asm/io.h>
36#include <asm/prom.h>
37#include <asm/iommu.h>
38#include <asm/pci-bridge.h>
39#include <asm/machdep.h>
40#include "pci.h"
41
42#ifdef CONFIG_PPC_ISERIES
43#include <asm/iSeries/iSeries_pci.h>
44#endif /* CONFIG_PPC_ISERIES */
45
46/*
47 * We can use ->sysdata directly and avoid the extra work in
48 * pci_device_to_OF_node since ->sysdata will have been initialised
49 * in the iommu init code for all devices.
50 */
51#define PCI_GET_DN(dev) ((struct device_node *)((dev)->sysdata))
52
53static inline struct iommu_table *devnode_table(struct device *dev)
54{
55 struct pci_dev *pdev;
56
57 if (!dev) {
58 pdev = ppc64_isabridge_dev;
59 if (!pdev)
60 return NULL;
61 } else
62 pdev = to_pci_dev(dev);
63
64#ifdef CONFIG_PPC_ISERIES
65 return ISERIES_DEVNODE(pdev)->iommu_table;
66#endif /* CONFIG_PPC_ISERIES */
67
68#ifdef CONFIG_PPC_MULTIPLATFORM
69 return PCI_GET_DN(pdev)->iommu_table;
70#endif /* CONFIG_PPC_MULTIPLATFORM */
71}
72
73
74/* Allocates a contiguous real buffer and creates mappings over it.
75 * Returns the virtual address of the buffer and sets dma_handle
76 * to the dma address (mapping) of the first page.
77 */
78static void *pci_iommu_alloc_coherent(struct device *hwdev, size_t size,
79 dma_addr_t *dma_handle, unsigned int __nocast flag)
80{
81 return iommu_alloc_coherent(devnode_table(hwdev), size, dma_handle,
82 flag);
83}
84
85static void pci_iommu_free_coherent(struct device *hwdev, size_t size,
86 void *vaddr, dma_addr_t dma_handle)
87{
88 iommu_free_coherent(devnode_table(hwdev), size, vaddr, dma_handle);
89}
90
91/* Creates TCEs for a user provided buffer. The user buffer must be
92 * contiguous real kernel storage (not vmalloc). The address of the buffer
93 * passed here is the kernel (virtual) address of the buffer. The buffer
94 * need not be page aligned, the dma_addr_t returned will point to the same
95 * byte within the page as vaddr.
96 */
97static dma_addr_t pci_iommu_map_single(struct device *hwdev, void *vaddr,
98 size_t size, enum dma_data_direction direction)
99{
100 return iommu_map_single(devnode_table(hwdev), vaddr, size, direction);
101}
102
103
104static void pci_iommu_unmap_single(struct device *hwdev, dma_addr_t dma_handle,
105 size_t size, enum dma_data_direction direction)
106{
107 iommu_unmap_single(devnode_table(hwdev), dma_handle, size, direction);
108}
109
110
111static int pci_iommu_map_sg(struct device *pdev, struct scatterlist *sglist,
112 int nelems, enum dma_data_direction direction)
113{
114 return iommu_map_sg(pdev, devnode_table(pdev), sglist,
115 nelems, direction);
116}
117
118static void pci_iommu_unmap_sg(struct device *pdev, struct scatterlist *sglist,
119 int nelems, enum dma_data_direction direction)
120{
121 iommu_unmap_sg(devnode_table(pdev), sglist, nelems, direction);
122}
123
124/* We support DMA to/from any memory page via the iommu */
125static int pci_iommu_dma_supported(struct device *dev, u64 mask)
126{
127 return 1;
128}
129
130void pci_iommu_init(void)
131{
132 pci_dma_ops.alloc_coherent = pci_iommu_alloc_coherent;
133 pci_dma_ops.free_coherent = pci_iommu_free_coherent;
134 pci_dma_ops.map_single = pci_iommu_map_single;
135 pci_dma_ops.unmap_single = pci_iommu_unmap_single;
136 pci_dma_ops.map_sg = pci_iommu_map_sg;
137 pci_dma_ops.unmap_sg = pci_iommu_unmap_sg;
138 pci_dma_ops.dma_supported = pci_iommu_dma_supported;
139}
diff --git a/arch/ppc64/kernel/pmac.h b/arch/ppc64/kernel/pmac.h
new file mode 100644
index 000000000000..40e1c5030f74
--- /dev/null
+++ b/arch/ppc64/kernel/pmac.h
@@ -0,0 +1,31 @@
1#ifndef __PMAC_H__
2#define __PMAC_H__
3
4#include <linux/pci.h>
5#include <linux/ide.h>
6
7/*
8 * Declaration for the various functions exported by the
9 * pmac_* files. Mostly for use by pmac_setup
10 */
11
12extern void pmac_get_boot_time(struct rtc_time *tm);
13extern void pmac_get_rtc_time(struct rtc_time *tm);
14extern int pmac_set_rtc_time(struct rtc_time *tm);
15extern void pmac_read_rtc_time(void);
16extern void pmac_calibrate_decr(void);
17
18extern void pmac_pcibios_fixup(void);
19extern void pmac_pci_init(void);
20extern void pmac_setup_pci_dma(void);
21extern void pmac_check_ht_link(void);
22
23extern void pmac_setup_smp(void);
24
25extern unsigned long pmac_ide_get_base(int index);
26extern void pmac_ide_init_hwif_ports(hw_regs_t *hw,
27 unsigned long data_port, unsigned long ctrl_port, int *irq);
28
29extern void pmac_nvram_init(void);
30
31#endif /* __PMAC_H__ */
diff --git a/arch/ppc64/kernel/pmac_feature.c b/arch/ppc64/kernel/pmac_feature.c
new file mode 100644
index 000000000000..7f1062d222c9
--- /dev/null
+++ b/arch/ppc64/kernel/pmac_feature.c
@@ -0,0 +1,676 @@
1/*
2 * arch/ppc/platforms/pmac_feature.c
3 *
4 * Copyright (C) 1996-2001 Paul Mackerras (paulus@cs.anu.edu.au)
5 * Ben. Herrenschmidt (benh@kernel.crashing.org)
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 *
12 * TODO:
13 *
14 * - Replace mdelay with some schedule loop if possible
15 * - Shorten some obfuscated delays on some routines (like modem
16 * power)
17 * - Refcount some clocks (see darwin)
18 * - Split split split...
19 *
20 */
21#include <linux/config.h>
22#include <linux/types.h>
23#include <linux/init.h>
24#include <linux/delay.h>
25#include <linux/kernel.h>
26#include <linux/sched.h>
27#include <linux/spinlock.h>
28#include <linux/adb.h>
29#include <linux/pmu.h>
30#include <linux/ioport.h>
31#include <linux/pci.h>
32#include <asm/sections.h>
33#include <asm/errno.h>
34#include <asm/keylargo.h>
35#include <asm/uninorth.h>
36#include <asm/io.h>
37#include <asm/prom.h>
38#include <asm/machdep.h>
39#include <asm/pmac_feature.h>
40#include <asm/dbdma.h>
41#include <asm/pci-bridge.h>
42#include <asm/pmac_low_i2c.h>
43
44#undef DEBUG_FEATURE
45
46#ifdef DEBUG_FEATURE
47#define DBG(fmt...) printk(KERN_DEBUG fmt)
48#else
49#define DBG(fmt...)
50#endif
51
52/*
53 * We use a single global lock to protect accesses. Each driver has
54 * to take care of its own locking
55 */
56static DEFINE_SPINLOCK(feature_lock __pmacdata);
57
58#define LOCK(flags) spin_lock_irqsave(&feature_lock, flags);
59#define UNLOCK(flags) spin_unlock_irqrestore(&feature_lock, flags);
60
61
62/*
63 * Instance of some macio stuffs
64 */
65struct macio_chip macio_chips[MAX_MACIO_CHIPS] __pmacdata;
66
67struct macio_chip* __pmac
68macio_find(struct device_node* child, int type)
69{
70 while(child) {
71 int i;
72
73 for (i=0; i < MAX_MACIO_CHIPS && macio_chips[i].of_node; i++)
74 if (child == macio_chips[i].of_node &&
75 (!type || macio_chips[i].type == type))
76 return &macio_chips[i];
77 child = child->parent;
78 }
79 return NULL;
80}
81
82static const char* macio_names[] __pmacdata =
83{
84 "Unknown",
85 "Grand Central",
86 "OHare",
87 "OHareII",
88 "Heathrow",
89 "Gatwick",
90 "Paddington",
91 "Keylargo",
92 "Pangea",
93 "Intrepid",
94 "K2"
95};
96
97
98
99/*
100 * Uninorth reg. access. Note that Uni-N regs are big endian
101 */
102
103#define UN_REG(r) (uninorth_base + ((r) >> 2))
104#define UN_IN(r) (in_be32(UN_REG(r)))
105#define UN_OUT(r,v) (out_be32(UN_REG(r), (v)))
106#define UN_BIS(r,v) (UN_OUT((r), UN_IN(r) | (v)))
107#define UN_BIC(r,v) (UN_OUT((r), UN_IN(r) & ~(v)))
108
109static struct device_node* uninorth_node __pmacdata;
110static u32* uninorth_base __pmacdata;
111static u32 uninorth_rev __pmacdata;
112static void *u3_ht;
113
114extern struct device_node *k2_skiplist[2];
115
116/*
117 * For each motherboard family, we have a table of functions pointers
118 * that handle the various features.
119 */
120
121typedef long (*feature_call)(struct device_node* node, long param, long value);
122
123struct feature_table_entry {
124 unsigned int selector;
125 feature_call function;
126};
127
128struct pmac_mb_def
129{
130 const char* model_string;
131 const char* model_name;
132 int model_id;
133 struct feature_table_entry* features;
134 unsigned long board_flags;
135};
136static struct pmac_mb_def pmac_mb __pmacdata;
137
138/*
139 * Here are the chip specific feature functions
140 */
141
142
143static long __pmac g5_read_gpio(struct device_node* node, long param, long value)
144{
145 struct macio_chip* macio = &macio_chips[0];
146
147 return MACIO_IN8(param);
148}
149
150
151static long __pmac g5_write_gpio(struct device_node* node, long param, long value)
152{
153 struct macio_chip* macio = &macio_chips[0];
154
155 MACIO_OUT8(param, (u8)(value & 0xff));
156 return 0;
157}
158
159static long __pmac g5_gmac_enable(struct device_node* node, long param, long value)
160{
161 struct macio_chip* macio = &macio_chips[0];
162 unsigned long flags;
163
164 if (node == NULL)
165 return -ENODEV;
166
167 LOCK(flags);
168 if (value) {
169 MACIO_BIS(KEYLARGO_FCR1, K2_FCR1_GMAC_CLK_ENABLE);
170 mb();
171 k2_skiplist[0] = NULL;
172 } else {
173 k2_skiplist[0] = node;
174 mb();
175 MACIO_BIC(KEYLARGO_FCR1, K2_FCR1_GMAC_CLK_ENABLE);
176 }
177
178 UNLOCK(flags);
179 mdelay(1);
180
181 return 0;
182}
183
184static long __pmac g5_fw_enable(struct device_node* node, long param, long value)
185{
186 struct macio_chip* macio = &macio_chips[0];
187 unsigned long flags;
188
189 if (node == NULL)
190 return -ENODEV;
191
192 LOCK(flags);
193 if (value) {
194 MACIO_BIS(KEYLARGO_FCR1, K2_FCR1_FW_CLK_ENABLE);
195 mb();
196 k2_skiplist[1] = NULL;
197 } else {
198 k2_skiplist[1] = node;
199 mb();
200 MACIO_BIC(KEYLARGO_FCR1, K2_FCR1_FW_CLK_ENABLE);
201 }
202
203 UNLOCK(flags);
204 mdelay(1);
205
206 return 0;
207}
208
209static long __pmac g5_mpic_enable(struct device_node* node, long param, long value)
210{
211 unsigned long flags;
212
213 if (node->parent == NULL || strcmp(node->parent->name, "u3"))
214 return 0;
215
216 LOCK(flags);
217 UN_BIS(U3_TOGGLE_REG, U3_MPIC_RESET | U3_MPIC_OUTPUT_ENABLE);
218 UNLOCK(flags);
219
220 return 0;
221}
222
223static long __pmac g5_eth_phy_reset(struct device_node* node, long param, long value)
224{
225 struct macio_chip* macio = &macio_chips[0];
226 struct device_node *phy;
227 int need_reset;
228
229 /*
230 * We must not reset the combo PHYs, only the BCM5221 found in
231 * the iMac G5.
232 */
233 phy = of_get_next_child(node, NULL);
234 if (!phy)
235 return -ENODEV;
236 need_reset = device_is_compatible(phy, "B5221");
237 of_node_put(phy);
238 if (!need_reset)
239 return 0;
240
241 /* PHY reset is GPIO 29, not in device-tree unfortunately */
242 MACIO_OUT8(K2_GPIO_EXTINT_0 + 29,
243 KEYLARGO_GPIO_OUTPUT_ENABLE | KEYLARGO_GPIO_OUTOUT_DATA);
244 /* Thankfully, this is now always called at a time when we can
245 * schedule by sungem.
246 */
247 msleep(10);
248 MACIO_OUT8(K2_GPIO_EXTINT_0 + 29, 0);
249
250 return 0;
251}
252
253#ifdef CONFIG_SMP
254static long __pmac g5_reset_cpu(struct device_node* node, long param, long value)
255{
256 unsigned int reset_io = 0;
257 unsigned long flags;
258 struct macio_chip* macio;
259 struct device_node* np;
260
261 macio = &macio_chips[0];
262 if (macio->type != macio_keylargo2)
263 return -ENODEV;
264
265 np = find_path_device("/cpus");
266 if (np == NULL)
267 return -ENODEV;
268 for (np = np->child; np != NULL; np = np->sibling) {
269 u32* num = (u32 *)get_property(np, "reg", NULL);
270 u32* rst = (u32 *)get_property(np, "soft-reset", NULL);
271 if (num == NULL || rst == NULL)
272 continue;
273 if (param == *num) {
274 reset_io = *rst;
275 break;
276 }
277 }
278 if (np == NULL || reset_io == 0)
279 return -ENODEV;
280
281 LOCK(flags);
282 MACIO_OUT8(reset_io, KEYLARGO_GPIO_OUTPUT_ENABLE);
283 (void)MACIO_IN8(reset_io);
284 udelay(1);
285 MACIO_OUT8(reset_io, 0);
286 (void)MACIO_IN8(reset_io);
287 UNLOCK(flags);
288
289 return 0;
290}
291#endif /* CONFIG_SMP */
292
293/*
294 * This can be called from pmac_smp so isn't static
295 *
296 * This takes the second CPU off the bus on dual CPU machines
297 * running UP
298 */
299void __pmac g5_phy_disable_cpu1(void)
300{
301 UN_OUT(U3_API_PHY_CONFIG_1, 0);
302}
303
304static long __pmac generic_get_mb_info(struct device_node* node, long param, long value)
305{
306 switch(param) {
307 case PMAC_MB_INFO_MODEL:
308 return pmac_mb.model_id;
309 case PMAC_MB_INFO_FLAGS:
310 return pmac_mb.board_flags;
311 case PMAC_MB_INFO_NAME:
312 /* hack hack hack... but should work */
313 *((const char **)value) = pmac_mb.model_name;
314 return 0;
315 }
316 return -EINVAL;
317}
318
319
320/*
321 * Table definitions
322 */
323
324/* Used on any machine
325 */
326static struct feature_table_entry any_features[] __pmacdata = {
327 { PMAC_FTR_GET_MB_INFO, generic_get_mb_info },
328 { 0, NULL }
329};
330
331/* G5 features
332 */
333static struct feature_table_entry g5_features[] __pmacdata = {
334 { PMAC_FTR_GMAC_ENABLE, g5_gmac_enable },
335 { PMAC_FTR_1394_ENABLE, g5_fw_enable },
336 { PMAC_FTR_ENABLE_MPIC, g5_mpic_enable },
337 { PMAC_FTR_READ_GPIO, g5_read_gpio },
338 { PMAC_FTR_WRITE_GPIO, g5_write_gpio },
339 { PMAC_FTR_GMAC_PHY_RESET, g5_eth_phy_reset },
340#ifdef CONFIG_SMP
341 { PMAC_FTR_RESET_CPU, g5_reset_cpu },
342#endif /* CONFIG_SMP */
343 { 0, NULL }
344};
345
346static struct pmac_mb_def pmac_mb_defs[] __pmacdata = {
347 { "PowerMac7,2", "PowerMac G5",
348 PMAC_TYPE_POWERMAC_G5, g5_features,
349 0,
350 },
351 { "PowerMac7,3", "PowerMac G5",
352 PMAC_TYPE_POWERMAC_G5, g5_features,
353 0,
354 },
355 { "PowerMac8,1", "iMac G5",
356 PMAC_TYPE_IMAC_G5, g5_features,
357 0,
358 },
359 { "PowerMac9,1", "PowerMac G5",
360 PMAC_TYPE_POWERMAC_G5_U3L, g5_features,
361 0,
362 },
363 { "RackMac3,1", "XServe G5",
364 PMAC_TYPE_XSERVE_G5, g5_features,
365 0,
366 },
367};
368
369/*
370 * The toplevel feature_call callback
371 */
372long __pmac pmac_do_feature_call(unsigned int selector, ...)
373{
374 struct device_node* node;
375 long param, value;
376 int i;
377 feature_call func = NULL;
378 va_list args;
379
380 if (pmac_mb.features)
381 for (i=0; pmac_mb.features[i].function; i++)
382 if (pmac_mb.features[i].selector == selector) {
383 func = pmac_mb.features[i].function;
384 break;
385 }
386 if (!func)
387 for (i=0; any_features[i].function; i++)
388 if (any_features[i].selector == selector) {
389 func = any_features[i].function;
390 break;
391 }
392 if (!func)
393 return -ENODEV;
394
395 va_start(args, selector);
396 node = (struct device_node*)va_arg(args, void*);
397 param = va_arg(args, long);
398 value = va_arg(args, long);
399 va_end(args);
400
401 return func(node, param, value);
402}
403
404static int __init probe_motherboard(void)
405{
406 int i;
407 struct macio_chip* macio = &macio_chips[0];
408 const char* model = NULL;
409 struct device_node *dt;
410
411 /* Lookup known motherboard type in device-tree. First try an
412 * exact match on the "model" property, then try a "compatible"
413 * match is none is found.
414 */
415 dt = find_devices("device-tree");
416 if (dt != NULL)
417 model = (const char *) get_property(dt, "model", NULL);
418 for(i=0; model && i<(sizeof(pmac_mb_defs)/sizeof(struct pmac_mb_def)); i++) {
419 if (strcmp(model, pmac_mb_defs[i].model_string) == 0) {
420 pmac_mb = pmac_mb_defs[i];
421 goto found;
422 }
423 }
424 for(i=0; i<(sizeof(pmac_mb_defs)/sizeof(struct pmac_mb_def)); i++) {
425 if (machine_is_compatible(pmac_mb_defs[i].model_string)) {
426 pmac_mb = pmac_mb_defs[i];
427 goto found;
428 }
429 }
430
431 /* Fallback to selection depending on mac-io chip type */
432 switch(macio->type) {
433 case macio_keylargo2:
434 pmac_mb.model_id = PMAC_TYPE_UNKNOWN_K2;
435 pmac_mb.model_name = "Unknown K2-based";
436 pmac_mb.features = g5_features;
437
438 default:
439 return -ENODEV;
440 }
441found:
442 /* Check for "mobile" machine */
443 if (model && (strncmp(model, "PowerBook", 9) == 0
444 || strncmp(model, "iBook", 5) == 0))
445 pmac_mb.board_flags |= PMAC_MB_MOBILE;
446
447
448 printk(KERN_INFO "PowerMac motherboard: %s\n", pmac_mb.model_name);
449 return 0;
450}
451
452/* Initialize the Core99 UniNorth host bridge and memory controller
453 */
454static void __init probe_uninorth(void)
455{
456 uninorth_node = of_find_node_by_name(NULL, "u3");
457 if (uninorth_node && uninorth_node->n_addrs > 0) {
458 /* Small hack until I figure out if parsing in prom.c is correct. I should
459 * get rid of those pre-parsed junk anyway
460 */
461 unsigned long address = uninorth_node->addrs[0].address;
462 uninorth_base = ioremap(address, 0x40000);
463 uninorth_rev = in_be32(UN_REG(UNI_N_VERSION));
464 u3_ht = ioremap(address + U3_HT_CONFIG_BASE, 0x1000);
465 } else
466 uninorth_node = NULL;
467
468 if (!uninorth_node)
469 return;
470
471 printk(KERN_INFO "Found U3 memory controller & host bridge, revision: %d\n",
472 uninorth_rev);
473 printk(KERN_INFO "Mapped at 0x%08lx\n", (unsigned long)uninorth_base);
474
475}
476
477static void __init probe_one_macio(const char* name, const char* compat, int type)
478{
479 struct device_node* node;
480 int i;
481 volatile u32* base;
482 u32* revp;
483
484 node = find_devices(name);
485 if (!node || !node->n_addrs)
486 return;
487 if (compat)
488 do {
489 if (device_is_compatible(node, compat))
490 break;
491 node = node->next;
492 } while (node);
493 if (!node)
494 return;
495 for(i=0; i<MAX_MACIO_CHIPS; i++) {
496 if (!macio_chips[i].of_node)
497 break;
498 if (macio_chips[i].of_node == node)
499 return;
500 }
501 if (i >= MAX_MACIO_CHIPS) {
502 printk(KERN_ERR "pmac_feature: Please increase MAX_MACIO_CHIPS !\n");
503 printk(KERN_ERR "pmac_feature: %s skipped\n", node->full_name);
504 return;
505 }
506 base = (volatile u32*)ioremap(node->addrs[0].address, node->addrs[0].size);
507 if (!base) {
508 printk(KERN_ERR "pmac_feature: Can't map mac-io chip !\n");
509 return;
510 }
511 if (type == macio_keylargo) {
512 u32* did = (u32 *)get_property(node, "device-id", NULL);
513 if (*did == 0x00000025)
514 type = macio_pangea;
515 if (*did == 0x0000003e)
516 type = macio_intrepid;
517 }
518 macio_chips[i].of_node = node;
519 macio_chips[i].type = type;
520 macio_chips[i].base = base;
521 macio_chips[i].flags = MACIO_FLAG_SCCB_ON | MACIO_FLAG_SCCB_ON;
522 macio_chips[i].name = macio_names[type];
523 revp = (u32 *)get_property(node, "revision-id", NULL);
524 if (revp)
525 macio_chips[i].rev = *revp;
526 printk(KERN_INFO "Found a %s mac-io controller, rev: %d, mapped at 0x%p\n",
527 macio_names[type], macio_chips[i].rev, macio_chips[i].base);
528}
529
530static int __init
531probe_macios(void)
532{
533 probe_one_macio("mac-io", "K2-Keylargo", macio_keylargo2);
534
535 macio_chips[0].lbus.index = 0;
536 macio_chips[1].lbus.index = 1;
537
538 return (macio_chips[0].of_node == NULL) ? -ENODEV : 0;
539}
540
541static void __init
542set_initial_features(void)
543{
544 struct device_node *np;
545
546 if (macio_chips[0].type == macio_keylargo2) {
547#ifndef CONFIG_SMP
548 /* On SMP machines running UP, we have the second CPU eating
549 * bus cycles. We need to take it off the bus. This is done
550 * from pmac_smp for SMP kernels running on one CPU
551 */
552 np = of_find_node_by_type(NULL, "cpu");
553 if (np != NULL)
554 np = of_find_node_by_type(np, "cpu");
555 if (np != NULL) {
556 g5_phy_disable_cpu1();
557 of_node_put(np);
558 }
559#endif /* CONFIG_SMP */
560 /* Enable GMAC for now for PCI probing. It will be disabled
561 * later on after PCI probe
562 */
563 np = of_find_node_by_name(NULL, "ethernet");
564 while(np) {
565 if (device_is_compatible(np, "K2-GMAC"))
566 g5_gmac_enable(np, 0, 1);
567 np = of_find_node_by_name(np, "ethernet");
568 }
569
570 /* Enable FW before PCI probe. Will be disabled later on
571 * Note: We should have a batter way to check that we are
572 * dealing with uninorth internal cell and not a PCI cell
573 * on the external PCI. The code below works though.
574 */
575 np = of_find_node_by_name(NULL, "firewire");
576 while(np) {
577 if (device_is_compatible(np, "pci106b,5811")) {
578 macio_chips[0].flags |= MACIO_FLAG_FW_SUPPORTED;
579 g5_fw_enable(np, 0, 1);
580 }
581 np = of_find_node_by_name(np, "firewire");
582 }
583 }
584}
585
586void __init
587pmac_feature_init(void)
588{
589 /* Detect the UniNorth memory controller */
590 probe_uninorth();
591
592 /* Probe mac-io controllers */
593 if (probe_macios()) {
594 printk(KERN_WARNING "No mac-io chip found\n");
595 return;
596 }
597
598 /* Setup low-level i2c stuffs */
599 pmac_init_low_i2c();
600
601 /* Probe machine type */
602 if (probe_motherboard())
603 printk(KERN_WARNING "Unknown PowerMac !\n");
604
605 /* Set some initial features (turn off some chips that will
606 * be later turned on)
607 */
608 set_initial_features();
609}
610
611int __init pmac_feature_late_init(void)
612{
613#if 0
614 struct device_node* np;
615
616 /* Request some resources late */
617 if (uninorth_node)
618 request_OF_resource(uninorth_node, 0, NULL);
619 np = find_devices("hammerhead");
620 if (np)
621 request_OF_resource(np, 0, NULL);
622 np = find_devices("interrupt-controller");
623 if (np)
624 request_OF_resource(np, 0, NULL);
625#endif
626 return 0;
627}
628
629device_initcall(pmac_feature_late_init);
630
631#if 0
632static void dump_HT_speeds(char *name, u32 cfg, u32 frq)
633{
634 int freqs[16] = { 200,300,400,500,600,800,1000,0,0,0,0,0,0,0,0,0 };
635 int bits[8] = { 8,16,0,32,2,4,0,0 };
636 int freq = (frq >> 8) & 0xf;
637
638 if (freqs[freq] == 0)
639 printk("%s: Unknown HT link frequency %x\n", name, freq);
640 else
641 printk("%s: %d MHz on main link, (%d in / %d out) bits width\n",
642 name, freqs[freq],
643 bits[(cfg >> 28) & 0x7], bits[(cfg >> 24) & 0x7]);
644}
645#endif
646
647void __init pmac_check_ht_link(void)
648{
649#if 0 /* Disabled for now */
650 u32 ufreq, freq, ucfg, cfg;
651 struct device_node *pcix_node;
652 u8 px_bus, px_devfn;
653 struct pci_controller *px_hose;
654
655 (void)in_be32(u3_ht + U3_HT_LINK_COMMAND);
656 ucfg = cfg = in_be32(u3_ht + U3_HT_LINK_CONFIG);
657 ufreq = freq = in_be32(u3_ht + U3_HT_LINK_FREQ);
658 dump_HT_speeds("U3 HyperTransport", cfg, freq);
659
660 pcix_node = of_find_compatible_node(NULL, "pci", "pci-x");
661 if (pcix_node == NULL) {
662 printk("No PCI-X bridge found\n");
663 return;
664 }
665 px_hose = pcix_node->phb;
666 px_bus = pcix_node->busno;
667 px_devfn = pcix_node->devfn;
668
669 early_read_config_dword(px_hose, px_bus, px_devfn, 0xc4, &cfg);
670 early_read_config_dword(px_hose, px_bus, px_devfn, 0xcc, &freq);
671 dump_HT_speeds("PCI-X HT Uplink", cfg, freq);
672 early_read_config_dword(px_hose, px_bus, px_devfn, 0xc8, &cfg);
673 early_read_config_dword(px_hose, px_bus, px_devfn, 0xd0, &freq);
674 dump_HT_speeds("PCI-X HT Downlink", cfg, freq);
675#endif
676}
diff --git a/arch/ppc64/kernel/pmac_low_i2c.c b/arch/ppc64/kernel/pmac_low_i2c.c
new file mode 100644
index 000000000000..f3f39e8e337a
--- /dev/null
+++ b/arch/ppc64/kernel/pmac_low_i2c.c
@@ -0,0 +1,523 @@
1/*
2 * arch/ppc/platforms/pmac_low_i2c.c
3 *
4 * Copyright (C) 2003 Ben. Herrenschmidt (benh@kernel.crashing.org)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 * This file contains some low-level i2c access routines that
12 * need to be used by various bits of the PowerMac platform code
13 * at times where the real asynchronous & interrupt driven driver
14 * cannot be used. The API borrows some semantics from the darwin
15 * driver in order to ease the implementation of the platform
16 * properties parser
17 */
18
19#undef DEBUG
20
21#include <linux/config.h>
22#include <linux/types.h>
23#include <linux/sched.h>
24#include <linux/init.h>
25#include <linux/module.h>
26#include <linux/adb.h>
27#include <linux/pmu.h>
28#include <asm/keylargo.h>
29#include <asm/uninorth.h>
30#include <asm/io.h>
31#include <asm/prom.h>
32#include <asm/machdep.h>
33#include <asm/pmac_low_i2c.h>
34
35#define MAX_LOW_I2C_HOST 4
36
37#ifdef DEBUG
38#define DBG(x...) do {\
39 printk(KERN_DEBUG "KW:" x); \
40 } while(0)
41#else
42#define DBG(x...)
43#endif
44
45struct low_i2c_host;
46
47typedef int (*low_i2c_func_t)(struct low_i2c_host *host, u8 addr, u8 sub, u8 *data, int len);
48
49struct low_i2c_host
50{
51 struct device_node *np; /* OF device node */
52 struct semaphore mutex; /* Access mutex for use by i2c-keywest */
53 low_i2c_func_t func; /* Access function */
54 unsigned int is_open : 1; /* Poor man's access control */
55 int mode; /* Current mode */
56 int channel; /* Current channel */
57 int num_channels; /* Number of channels */
58 void __iomem *base; /* For keywest-i2c, base address */
59 int bsteps; /* And register stepping */
60 int speed; /* And speed */
61};
62
63static struct low_i2c_host low_i2c_hosts[MAX_LOW_I2C_HOST];
64
65/* No locking is necessary on allocation, we are running way before
66 * anything can race with us
67 */
68static struct low_i2c_host *find_low_i2c_host(struct device_node *np)
69{
70 int i;
71
72 for (i = 0; i < MAX_LOW_I2C_HOST; i++)
73 if (low_i2c_hosts[i].np == np)
74 return &low_i2c_hosts[i];
75 return NULL;
76}
77
78/*
79 *
80 * i2c-keywest implementation (UniNorth, U2, U3, Keylargo's)
81 *
82 */
83
84/*
85 * Keywest i2c definitions borrowed from drivers/i2c/i2c-keywest.h,
86 * should be moved somewhere in include/asm-ppc/
87 */
88/* Register indices */
89typedef enum {
90 reg_mode = 0,
91 reg_control,
92 reg_status,
93 reg_isr,
94 reg_ier,
95 reg_addr,
96 reg_subaddr,
97 reg_data
98} reg_t;
99
100
101/* Mode register */
102#define KW_I2C_MODE_100KHZ 0x00
103#define KW_I2C_MODE_50KHZ 0x01
104#define KW_I2C_MODE_25KHZ 0x02
105#define KW_I2C_MODE_DUMB 0x00
106#define KW_I2C_MODE_STANDARD 0x04
107#define KW_I2C_MODE_STANDARDSUB 0x08
108#define KW_I2C_MODE_COMBINED 0x0C
109#define KW_I2C_MODE_MODE_MASK 0x0C
110#define KW_I2C_MODE_CHAN_MASK 0xF0
111
112/* Control register */
113#define KW_I2C_CTL_AAK 0x01
114#define KW_I2C_CTL_XADDR 0x02
115#define KW_I2C_CTL_STOP 0x04
116#define KW_I2C_CTL_START 0x08
117
118/* Status register */
119#define KW_I2C_STAT_BUSY 0x01
120#define KW_I2C_STAT_LAST_AAK 0x02
121#define KW_I2C_STAT_LAST_RW 0x04
122#define KW_I2C_STAT_SDA 0x08
123#define KW_I2C_STAT_SCL 0x10
124
125/* IER & ISR registers */
126#define KW_I2C_IRQ_DATA 0x01
127#define KW_I2C_IRQ_ADDR 0x02
128#define KW_I2C_IRQ_STOP 0x04
129#define KW_I2C_IRQ_START 0x08
130#define KW_I2C_IRQ_MASK 0x0F
131
132/* State machine states */
133enum {
134 state_idle,
135 state_addr,
136 state_read,
137 state_write,
138 state_stop,
139 state_dead
140};
141
142#define WRONG_STATE(name) do {\
143 printk(KERN_DEBUG "KW: wrong state. Got %s, state: %s (isr: %02x)\n", \
144 name, __kw_state_names[state], isr); \
145 } while(0)
146
147static const char *__kw_state_names[] = {
148 "state_idle",
149 "state_addr",
150 "state_read",
151 "state_write",
152 "state_stop",
153 "state_dead"
154};
155
156static inline u8 __kw_read_reg(struct low_i2c_host *host, reg_t reg)
157{
158 return readb(host->base + (((unsigned int)reg) << host->bsteps));
159}
160
161static inline void __kw_write_reg(struct low_i2c_host *host, reg_t reg, u8 val)
162{
163 writeb(val, host->base + (((unsigned)reg) << host->bsteps));
164 (void)__kw_read_reg(host, reg_subaddr);
165}
166
167#define kw_write_reg(reg, val) __kw_write_reg(host, reg, val)
168#define kw_read_reg(reg) __kw_read_reg(host, reg)
169
170
171/* Don't schedule, the g5 fan controller is too
172 * timing sensitive
173 */
174static u8 kw_wait_interrupt(struct low_i2c_host* host)
175{
176 int i, j;
177 u8 isr;
178
179 for (i = 0; i < 100000; i++) {
180 isr = kw_read_reg(reg_isr) & KW_I2C_IRQ_MASK;
181 if (isr != 0)
182 return isr;
183
184 /* This code is used with the timebase frozen, we cannot rely
185 * on udelay ! For now, just use a bogus loop
186 */
187 for (j = 1; j < 10000; j++)
188 mb();
189 }
190 return isr;
191}
192
193static int kw_handle_interrupt(struct low_i2c_host *host, int state, int rw, int *rc, u8 **data, int *len, u8 isr)
194{
195 u8 ack;
196
197 DBG("kw_handle_interrupt(%s, isr: %x)\n", __kw_state_names[state], isr);
198
199 if (isr == 0) {
200 if (state != state_stop) {
201 DBG("KW: Timeout !\n");
202 *rc = -EIO;
203 goto stop;
204 }
205 if (state == state_stop) {
206 ack = kw_read_reg(reg_status);
207 if (!(ack & KW_I2C_STAT_BUSY)) {
208 state = state_idle;
209 kw_write_reg(reg_ier, 0x00);
210 }
211 }
212 return state;
213 }
214
215 if (isr & KW_I2C_IRQ_ADDR) {
216 ack = kw_read_reg(reg_status);
217 if (state != state_addr) {
218 kw_write_reg(reg_isr, KW_I2C_IRQ_ADDR);
219 WRONG_STATE("KW_I2C_IRQ_ADDR");
220 *rc = -EIO;
221 goto stop;
222 }
223 if ((ack & KW_I2C_STAT_LAST_AAK) == 0) {
224 *rc = -ENODEV;
225 DBG("KW: NAK on address\n");
226 return state_stop;
227 } else {
228 if (rw) {
229 state = state_read;
230 if (*len > 1)
231 kw_write_reg(reg_control, KW_I2C_CTL_AAK);
232 } else {
233 state = state_write;
234 kw_write_reg(reg_data, **data);
235 (*data)++; (*len)--;
236 }
237 }
238 kw_write_reg(reg_isr, KW_I2C_IRQ_ADDR);
239 }
240
241 if (isr & KW_I2C_IRQ_DATA) {
242 if (state == state_read) {
243 **data = kw_read_reg(reg_data);
244 (*data)++; (*len)--;
245 kw_write_reg(reg_isr, KW_I2C_IRQ_DATA);
246 if ((*len) == 0)
247 state = state_stop;
248 else if ((*len) == 1)
249 kw_write_reg(reg_control, 0);
250 } else if (state == state_write) {
251 ack = kw_read_reg(reg_status);
252 if ((ack & KW_I2C_STAT_LAST_AAK) == 0) {
253 DBG("KW: nack on data write\n");
254 *rc = -EIO;
255 goto stop;
256 } else if (*len) {
257 kw_write_reg(reg_data, **data);
258 (*data)++; (*len)--;
259 } else {
260 kw_write_reg(reg_control, KW_I2C_CTL_STOP);
261 state = state_stop;
262 *rc = 0;
263 }
264 kw_write_reg(reg_isr, KW_I2C_IRQ_DATA);
265 } else {
266 kw_write_reg(reg_isr, KW_I2C_IRQ_DATA);
267 WRONG_STATE("KW_I2C_IRQ_DATA");
268 if (state != state_stop) {
269 *rc = -EIO;
270 goto stop;
271 }
272 }
273 }
274
275 if (isr & KW_I2C_IRQ_STOP) {
276 kw_write_reg(reg_isr, KW_I2C_IRQ_STOP);
277 if (state != state_stop) {
278 WRONG_STATE("KW_I2C_IRQ_STOP");
279 *rc = -EIO;
280 }
281 return state_idle;
282 }
283
284 if (isr & KW_I2C_IRQ_START)
285 kw_write_reg(reg_isr, KW_I2C_IRQ_START);
286
287 return state;
288
289 stop:
290 kw_write_reg(reg_control, KW_I2C_CTL_STOP);
291 return state_stop;
292}
293
294static int keywest_low_i2c_func(struct low_i2c_host *host, u8 addr, u8 subaddr, u8 *data, int len)
295{
296 u8 mode_reg = host->speed;
297 int state = state_addr;
298 int rc = 0;
299
300 /* Setup mode & subaddress if any */
301 switch(host->mode) {
302 case pmac_low_i2c_mode_dumb:
303 printk(KERN_ERR "low_i2c: Dumb mode not supported !\n");
304 return -EINVAL;
305 case pmac_low_i2c_mode_std:
306 mode_reg |= KW_I2C_MODE_STANDARD;
307 break;
308 case pmac_low_i2c_mode_stdsub:
309 mode_reg |= KW_I2C_MODE_STANDARDSUB;
310 break;
311 case pmac_low_i2c_mode_combined:
312 mode_reg |= KW_I2C_MODE_COMBINED;
313 break;
314 }
315
316 /* Setup channel & clear pending irqs */
317 kw_write_reg(reg_isr, kw_read_reg(reg_isr));
318 kw_write_reg(reg_mode, mode_reg | (host->channel << 4));
319 kw_write_reg(reg_status, 0);
320
321 /* Set up address and r/w bit */
322 kw_write_reg(reg_addr, addr);
323
324 /* Set up the sub address */
325 if ((mode_reg & KW_I2C_MODE_MODE_MASK) == KW_I2C_MODE_STANDARDSUB
326 || (mode_reg & KW_I2C_MODE_MODE_MASK) == KW_I2C_MODE_COMBINED)
327 kw_write_reg(reg_subaddr, subaddr);
328
329 /* Start sending address & disable interrupt*/
330 kw_write_reg(reg_ier, 0 /*KW_I2C_IRQ_MASK*/);
331 kw_write_reg(reg_control, KW_I2C_CTL_XADDR);
332
333 /* State machine, to turn into an interrupt handler */
334 while(state != state_idle) {
335 u8 isr = kw_wait_interrupt(host);
336 state = kw_handle_interrupt(host, state, addr & 1, &rc, &data, &len, isr);
337 }
338
339 return rc;
340}
341
342static void keywest_low_i2c_add(struct device_node *np)
343{
344 struct low_i2c_host *host = find_low_i2c_host(NULL);
345 u32 *psteps, *prate, steps, aoffset = 0;
346 struct device_node *parent;
347
348 if (host == NULL) {
349 printk(KERN_ERR "low_i2c: Can't allocate host for %s\n",
350 np->full_name);
351 return;
352 }
353 memset(host, 0, sizeof(*host));
354
355 init_MUTEX(&host->mutex);
356 host->np = of_node_get(np);
357 psteps = (u32 *)get_property(np, "AAPL,address-step", NULL);
358 steps = psteps ? (*psteps) : 0x10;
359 for (host->bsteps = 0; (steps & 0x01) == 0; host->bsteps++)
360 steps >>= 1;
361 parent = of_get_parent(np);
362 host->num_channels = 1;
363 if (parent && parent->name[0] == 'u') {
364 host->num_channels = 2;
365 aoffset = 3;
366 }
367 /* Select interface rate */
368 host->speed = KW_I2C_MODE_100KHZ;
369 prate = (u32 *)get_property(np, "AAPL,i2c-rate", NULL);
370 if (prate) switch(*prate) {
371 case 100:
372 host->speed = KW_I2C_MODE_100KHZ;
373 break;
374 case 50:
375 host->speed = KW_I2C_MODE_50KHZ;
376 break;
377 case 25:
378 host->speed = KW_I2C_MODE_25KHZ;
379 break;
380 }
381
382 host->mode = pmac_low_i2c_mode_std;
383 host->base = ioremap(np->addrs[0].address + aoffset,
384 np->addrs[0].size);
385 host->func = keywest_low_i2c_func;
386}
387
388/*
389 *
390 * PMU implementation
391 *
392 */
393
394
395#ifdef CONFIG_ADB_PMU
396
397static int pmu_low_i2c_func(struct low_i2c_host *host, u8 addr, u8 sub, u8 *data, int len)
398{
399 // TODO
400 return -ENODEV;
401}
402
403static void pmu_low_i2c_add(struct device_node *np)
404{
405 struct low_i2c_host *host = find_low_i2c_host(NULL);
406
407 if (host == NULL) {
408 printk(KERN_ERR "low_i2c: Can't allocate host for %s\n",
409 np->full_name);
410 return;
411 }
412 memset(host, 0, sizeof(*host));
413
414 init_MUTEX(&host->mutex);
415 host->np = of_node_get(np);
416 host->num_channels = 3;
417 host->mode = pmac_low_i2c_mode_std;
418 host->func = pmu_low_i2c_func;
419}
420
421#endif /* CONFIG_ADB_PMU */
422
423void __init pmac_init_low_i2c(void)
424{
425 struct device_node *np;
426
427 /* Probe keywest-i2c busses */
428 np = of_find_compatible_node(NULL, "i2c", "keywest-i2c");
429 while(np) {
430 keywest_low_i2c_add(np);
431 np = of_find_compatible_node(np, "i2c", "keywest-i2c");
432 }
433
434#ifdef CONFIG_ADB_PMU
435 /* Probe PMU busses */
436 np = of_find_node_by_name(NULL, "via-pmu");
437 if (np)
438 pmu_low_i2c_add(np);
439#endif /* CONFIG_ADB_PMU */
440
441 /* TODO: Add CUDA support as well */
442}
443
444int pmac_low_i2c_lock(struct device_node *np)
445{
446 struct low_i2c_host *host = find_low_i2c_host(np);
447
448 if (!host)
449 return -ENODEV;
450 down(&host->mutex);
451 return 0;
452}
453EXPORT_SYMBOL(pmac_low_i2c_lock);
454
455int pmac_low_i2c_unlock(struct device_node *np)
456{
457 struct low_i2c_host *host = find_low_i2c_host(np);
458
459 if (!host)
460 return -ENODEV;
461 up(&host->mutex);
462 return 0;
463}
464EXPORT_SYMBOL(pmac_low_i2c_unlock);
465
466
467int pmac_low_i2c_open(struct device_node *np, int channel)
468{
469 struct low_i2c_host *host = find_low_i2c_host(np);
470
471 if (!host)
472 return -ENODEV;
473
474 if (channel >= host->num_channels)
475 return -EINVAL;
476
477 down(&host->mutex);
478 host->is_open = 1;
479 host->channel = channel;
480
481 return 0;
482}
483EXPORT_SYMBOL(pmac_low_i2c_open);
484
485int pmac_low_i2c_close(struct device_node *np)
486{
487 struct low_i2c_host *host = find_low_i2c_host(np);
488
489 if (!host)
490 return -ENODEV;
491
492 host->is_open = 0;
493 up(&host->mutex);
494
495 return 0;
496}
497EXPORT_SYMBOL(pmac_low_i2c_close);
498
499int pmac_low_i2c_setmode(struct device_node *np, int mode)
500{
501 struct low_i2c_host *host = find_low_i2c_host(np);
502
503 if (!host)
504 return -ENODEV;
505 WARN_ON(!host->is_open);
506 host->mode = mode;
507
508 return 0;
509}
510EXPORT_SYMBOL(pmac_low_i2c_setmode);
511
512int pmac_low_i2c_xfer(struct device_node *np, u8 addrdir, u8 subaddr, u8 *data, int len)
513{
514 struct low_i2c_host *host = find_low_i2c_host(np);
515
516 if (!host)
517 return -ENODEV;
518 WARN_ON(!host->is_open);
519
520 return host->func(host, addrdir, subaddr, data, len);
521}
522EXPORT_SYMBOL(pmac_low_i2c_xfer);
523
diff --git a/arch/ppc64/kernel/pmac_nvram.c b/arch/ppc64/kernel/pmac_nvram.c
new file mode 100644
index 000000000000..e32a902236e3
--- /dev/null
+++ b/arch/ppc64/kernel/pmac_nvram.c
@@ -0,0 +1,495 @@
1/*
2 * arch/ppc/platforms/pmac_nvram.c
3 *
4 * Copyright (C) 2002 Benjamin Herrenschmidt (benh@kernel.crashing.org)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 * Todo: - add support for the OF persistent properties
12 */
13#include <linux/config.h>
14#include <linux/module.h>
15#include <linux/kernel.h>
16#include <linux/stddef.h>
17#include <linux/string.h>
18#include <linux/init.h>
19#include <linux/slab.h>
20#include <linux/delay.h>
21#include <linux/errno.h>
22#include <linux/bootmem.h>
23#include <linux/completion.h>
24#include <linux/spinlock.h>
25#include <asm/sections.h>
26#include <asm/io.h>
27#include <asm/system.h>
28#include <asm/prom.h>
29#include <asm/machdep.h>
30#include <asm/nvram.h>
31
32#define DEBUG
33
34#ifdef DEBUG
35#define DBG(x...) printk(x)
36#else
37#define DBG(x...)
38#endif
39
40#define NVRAM_SIZE 0x2000 /* 8kB of non-volatile RAM */
41
42#define CORE99_SIGNATURE 0x5a
43#define CORE99_ADLER_START 0x14
44
45/* On Core99, nvram is either a sharp, a micron or an AMD flash */
46#define SM_FLASH_STATUS_DONE 0x80
47#define SM_FLASH_STATUS_ERR 0x38
48
49#define SM_FLASH_CMD_ERASE_CONFIRM 0xd0
50#define SM_FLASH_CMD_ERASE_SETUP 0x20
51#define SM_FLASH_CMD_RESET 0xff
52#define SM_FLASH_CMD_WRITE_SETUP 0x40
53#define SM_FLASH_CMD_CLEAR_STATUS 0x50
54#define SM_FLASH_CMD_READ_STATUS 0x70
55
56/* CHRP NVRAM header */
57struct chrp_header {
58 u8 signature;
59 u8 cksum;
60 u16 len;
61 char name[12];
62 u8 data[0];
63};
64
65struct core99_header {
66 struct chrp_header hdr;
67 u32 adler;
68 u32 generation;
69 u32 reserved[2];
70};
71
72/*
73 * Read and write the non-volatile RAM on PowerMacs and CHRP machines.
74 */
75static volatile unsigned char *nvram_data;
76static int core99_bank = 0;
77// XXX Turn that into a sem
78static DEFINE_SPINLOCK(nv_lock);
79
80extern int system_running;
81
82static int (*core99_write_bank)(int bank, u8* datas);
83static int (*core99_erase_bank)(int bank);
84
85static char *nvram_image __pmacdata;
86
87
88static ssize_t __pmac core99_nvram_read(char *buf, size_t count, loff_t *index)
89{
90 int i;
91
92 if (nvram_image == NULL)
93 return -ENODEV;
94 if (*index > NVRAM_SIZE)
95 return 0;
96
97 i = *index;
98 if (i + count > NVRAM_SIZE)
99 count = NVRAM_SIZE - i;
100
101 memcpy(buf, &nvram_image[i], count);
102 *index = i + count;
103 return count;
104}
105
106static ssize_t __pmac core99_nvram_write(char *buf, size_t count, loff_t *index)
107{
108 int i;
109
110 if (nvram_image == NULL)
111 return -ENODEV;
112 if (*index > NVRAM_SIZE)
113 return 0;
114
115 i = *index;
116 if (i + count > NVRAM_SIZE)
117 count = NVRAM_SIZE - i;
118
119 memcpy(&nvram_image[i], buf, count);
120 *index = i + count;
121 return count;
122}
123
124static ssize_t __pmac core99_nvram_size(void)
125{
126 if (nvram_image == NULL)
127 return -ENODEV;
128 return NVRAM_SIZE;
129}
130
131static u8 __pmac chrp_checksum(struct chrp_header* hdr)
132{
133 u8 *ptr;
134 u16 sum = hdr->signature;
135 for (ptr = (u8 *)&hdr->len; ptr < hdr->data; ptr++)
136 sum += *ptr;
137 while (sum > 0xFF)
138 sum = (sum & 0xFF) + (sum>>8);
139 return sum;
140}
141
142static u32 __pmac core99_calc_adler(u8 *buffer)
143{
144 int cnt;
145 u32 low, high;
146
147 buffer += CORE99_ADLER_START;
148 low = 1;
149 high = 0;
150 for (cnt=0; cnt<(NVRAM_SIZE-CORE99_ADLER_START); cnt++) {
151 if ((cnt % 5000) == 0) {
152 high %= 65521UL;
153 high %= 65521UL;
154 }
155 low += buffer[cnt];
156 high += low;
157 }
158 low %= 65521UL;
159 high %= 65521UL;
160
161 return (high << 16) | low;
162}
163
164static u32 __pmac core99_check(u8* datas)
165{
166 struct core99_header* hdr99 = (struct core99_header*)datas;
167
168 if (hdr99->hdr.signature != CORE99_SIGNATURE) {
169 DBG("Invalid signature\n");
170 return 0;
171 }
172 if (hdr99->hdr.cksum != chrp_checksum(&hdr99->hdr)) {
173 DBG("Invalid checksum\n");
174 return 0;
175 }
176 if (hdr99->adler != core99_calc_adler(datas)) {
177 DBG("Invalid adler\n");
178 return 0;
179 }
180 return hdr99->generation;
181}
182
183static int __pmac sm_erase_bank(int bank)
184{
185 int stat, i;
186 unsigned long timeout;
187
188 u8* base = (u8 *)nvram_data + core99_bank*NVRAM_SIZE;
189
190 DBG("nvram: Sharp/Micron Erasing bank %d...\n", bank);
191
192 out_8(base, SM_FLASH_CMD_ERASE_SETUP);
193 out_8(base, SM_FLASH_CMD_ERASE_CONFIRM);
194 timeout = 0;
195 do {
196 if (++timeout > 1000000) {
197 printk(KERN_ERR "nvram: Sharp/Miron flash erase timeout !\n");
198 break;
199 }
200 out_8(base, SM_FLASH_CMD_READ_STATUS);
201 stat = in_8(base);
202 } while (!(stat & SM_FLASH_STATUS_DONE));
203
204 out_8(base, SM_FLASH_CMD_CLEAR_STATUS);
205 out_8(base, SM_FLASH_CMD_RESET);
206
207 for (i=0; i<NVRAM_SIZE; i++)
208 if (base[i] != 0xff) {
209 printk(KERN_ERR "nvram: Sharp/Micron flash erase failed !\n");
210 return -ENXIO;
211 }
212 return 0;
213}
214
215static int __pmac sm_write_bank(int bank, u8* datas)
216{
217 int i, stat = 0;
218 unsigned long timeout;
219
220 u8* base = (u8 *)nvram_data + core99_bank*NVRAM_SIZE;
221
222 DBG("nvram: Sharp/Micron Writing bank %d...\n", bank);
223
224 for (i=0; i<NVRAM_SIZE; i++) {
225 out_8(base+i, SM_FLASH_CMD_WRITE_SETUP);
226 udelay(1);
227 out_8(base+i, datas[i]);
228 timeout = 0;
229 do {
230 if (++timeout > 1000000) {
231 printk(KERN_ERR "nvram: Sharp/Micron flash write timeout !\n");
232 break;
233 }
234 out_8(base, SM_FLASH_CMD_READ_STATUS);
235 stat = in_8(base);
236 } while (!(stat & SM_FLASH_STATUS_DONE));
237 if (!(stat & SM_FLASH_STATUS_DONE))
238 break;
239 }
240 out_8(base, SM_FLASH_CMD_CLEAR_STATUS);
241 out_8(base, SM_FLASH_CMD_RESET);
242 for (i=0; i<NVRAM_SIZE; i++)
243 if (base[i] != datas[i]) {
244 printk(KERN_ERR "nvram: Sharp/Micron flash write failed !\n");
245 return -ENXIO;
246 }
247 return 0;
248}
249
250static int __pmac amd_erase_bank(int bank)
251{
252 int i, stat = 0;
253 unsigned long timeout;
254
255 u8* base = (u8 *)nvram_data + core99_bank*NVRAM_SIZE;
256
257 DBG("nvram: AMD Erasing bank %d...\n", bank);
258
259 /* Unlock 1 */
260 out_8(base+0x555, 0xaa);
261 udelay(1);
262 /* Unlock 2 */
263 out_8(base+0x2aa, 0x55);
264 udelay(1);
265
266 /* Sector-Erase */
267 out_8(base+0x555, 0x80);
268 udelay(1);
269 out_8(base+0x555, 0xaa);
270 udelay(1);
271 out_8(base+0x2aa, 0x55);
272 udelay(1);
273 out_8(base, 0x30);
274 udelay(1);
275
276 timeout = 0;
277 do {
278 if (++timeout > 1000000) {
279 printk(KERN_ERR "nvram: AMD flash erase timeout !\n");
280 break;
281 }
282 stat = in_8(base) ^ in_8(base);
283 } while (stat != 0);
284
285 /* Reset */
286 out_8(base, 0xf0);
287 udelay(1);
288
289 for (i=0; i<NVRAM_SIZE; i++)
290 if (base[i] != 0xff) {
291 printk(KERN_ERR "nvram: AMD flash erase failed !\n");
292 return -ENXIO;
293 }
294 return 0;
295}
296
297static int __pmac amd_write_bank(int bank, u8* datas)
298{
299 int i, stat = 0;
300 unsigned long timeout;
301
302 u8* base = (u8 *)nvram_data + core99_bank*NVRAM_SIZE;
303
304 DBG("nvram: AMD Writing bank %d...\n", bank);
305
306 for (i=0; i<NVRAM_SIZE; i++) {
307 /* Unlock 1 */
308 out_8(base+0x555, 0xaa);
309 udelay(1);
310 /* Unlock 2 */
311 out_8(base+0x2aa, 0x55);
312 udelay(1);
313
314 /* Write single word */
315 out_8(base+0x555, 0xa0);
316 udelay(1);
317 out_8(base+i, datas[i]);
318
319 timeout = 0;
320 do {
321 if (++timeout > 1000000) {
322 printk(KERN_ERR "nvram: AMD flash write timeout !\n");
323 break;
324 }
325 stat = in_8(base) ^ in_8(base);
326 } while (stat != 0);
327 if (stat != 0)
328 break;
329 }
330
331 /* Reset */
332 out_8(base, 0xf0);
333 udelay(1);
334
335 for (i=0; i<NVRAM_SIZE; i++)
336 if (base[i] != datas[i]) {
337 printk(KERN_ERR "nvram: AMD flash write failed !\n");
338 return -ENXIO;
339 }
340 return 0;
341}
342
343
344static int __pmac core99_nvram_sync(void)
345{
346 struct core99_header* hdr99;
347 unsigned long flags;
348
349 spin_lock_irqsave(&nv_lock, flags);
350 if (!memcmp(nvram_image, (u8*)nvram_data + core99_bank*NVRAM_SIZE,
351 NVRAM_SIZE))
352 goto bail;
353
354 DBG("Updating nvram...\n");
355
356 hdr99 = (struct core99_header*)nvram_image;
357 hdr99->generation++;
358 hdr99->hdr.signature = CORE99_SIGNATURE;
359 hdr99->hdr.cksum = chrp_checksum(&hdr99->hdr);
360 hdr99->adler = core99_calc_adler(nvram_image);
361 core99_bank = core99_bank ? 0 : 1;
362 if (core99_erase_bank)
363 if (core99_erase_bank(core99_bank)) {
364 printk("nvram: Error erasing bank %d\n", core99_bank);
365 goto bail;
366 }
367 if (core99_write_bank)
368 if (core99_write_bank(core99_bank, nvram_image))
369 printk("nvram: Error writing bank %d\n", core99_bank);
370 bail:
371 spin_unlock_irqrestore(&nv_lock, flags);
372
373 return 0;
374}
375
376int __init pmac_nvram_init(void)
377{
378 struct device_node *dp;
379 u32 gen_bank0, gen_bank1;
380 int i;
381
382 dp = find_devices("nvram");
383 if (dp == NULL) {
384 printk(KERN_ERR "Can't find NVRAM device\n");
385 return -ENODEV;
386 }
387 if (!device_is_compatible(dp, "nvram,flash")) {
388 printk(KERN_ERR "Incompatible type of NVRAM\n");
389 return -ENXIO;
390 }
391
392 nvram_image = alloc_bootmem(NVRAM_SIZE);
393 if (nvram_image == NULL) {
394 printk(KERN_ERR "nvram: can't allocate ram image\n");
395 return -ENOMEM;
396 }
397 nvram_data = ioremap(dp->addrs[0].address, NVRAM_SIZE*2);
398
399 DBG("nvram: Checking bank 0...\n");
400
401 gen_bank0 = core99_check((u8 *)nvram_data);
402 gen_bank1 = core99_check((u8 *)nvram_data + NVRAM_SIZE);
403 core99_bank = (gen_bank0 < gen_bank1) ? 1 : 0;
404
405 DBG("nvram: gen0=%d, gen1=%d\n", gen_bank0, gen_bank1);
406 DBG("nvram: Active bank is: %d\n", core99_bank);
407
408 for (i=0; i<NVRAM_SIZE; i++)
409 nvram_image[i] = nvram_data[i + core99_bank*NVRAM_SIZE];
410
411 ppc_md.nvram_read = core99_nvram_read;
412 ppc_md.nvram_write = core99_nvram_write;
413 ppc_md.nvram_size = core99_nvram_size;
414 ppc_md.nvram_sync = core99_nvram_sync;
415
416 /*
417 * Maybe we could be smarter here though making an exclusive list
418 * of known flash chips is a bit nasty as older OF didn't provide us
419 * with a useful "compatible" entry. A solution would be to really
420 * identify the chip using flash id commands and base ourselves on
421 * a list of known chips IDs
422 */
423 if (device_is_compatible(dp, "amd-0137")) {
424 core99_erase_bank = amd_erase_bank;
425 core99_write_bank = amd_write_bank;
426 } else {
427 core99_erase_bank = sm_erase_bank;
428 core99_write_bank = sm_write_bank;
429 }
430
431 return 0;
432}
433
434int __pmac pmac_get_partition(int partition)
435{
436 struct nvram_partition *part;
437 const char *name;
438 int sig;
439
440 switch(partition) {
441 case pmac_nvram_OF:
442 name = "common";
443 sig = NVRAM_SIG_SYS;
444 break;
445 case pmac_nvram_XPRAM:
446 name = "APL,MacOS75";
447 sig = NVRAM_SIG_OS;
448 break;
449 case pmac_nvram_NR:
450 default:
451 /* Oldworld stuff */
452 return -ENODEV;
453 }
454
455 part = nvram_find_partition(sig, name);
456 if (part == NULL)
457 return 0;
458
459 return part->index;
460}
461
462u8 __pmac pmac_xpram_read(int xpaddr)
463{
464 int offset = pmac_get_partition(pmac_nvram_XPRAM);
465 loff_t index;
466 u8 buf;
467 ssize_t count;
468
469 if (offset < 0 || xpaddr < 0 || xpaddr > 0x100)
470 return 0xff;
471 index = offset + xpaddr;
472
473 count = ppc_md.nvram_read(&buf, 1, &index);
474 if (count != 1)
475 return 0xff;
476 return buf;
477}
478
479void __pmac pmac_xpram_write(int xpaddr, u8 data)
480{
481 int offset = pmac_get_partition(pmac_nvram_XPRAM);
482 loff_t index;
483 u8 buf;
484
485 if (offset < 0 || xpaddr < 0 || xpaddr > 0x100)
486 return;
487 index = offset + xpaddr;
488 buf = data;
489
490 ppc_md.nvram_write(&buf, 1, &index);
491}
492
493EXPORT_SYMBOL(pmac_get_partition);
494EXPORT_SYMBOL(pmac_xpram_read);
495EXPORT_SYMBOL(pmac_xpram_write);
diff --git a/arch/ppc64/kernel/pmac_pci.c b/arch/ppc64/kernel/pmac_pci.c
new file mode 100644
index 000000000000..71fe911ad183
--- /dev/null
+++ b/arch/ppc64/kernel/pmac_pci.c
@@ -0,0 +1,793 @@
1/*
2 * Support for PCI bridges found on Power Macintoshes.
3 * At present the "bandit" and "chaos" bridges are supported.
4 * Fortunately you access configuration space in the same
5 * way with either bridge.
6 *
7 * Copyright (C) 2003 Benjamin Herrenschmuidt (benh@kernel.crashing.org)
8 * Copyright (C) 1997 Paul Mackerras (paulus@samba.org)
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16#include <linux/kernel.h>
17#include <linux/pci.h>
18#include <linux/delay.h>
19#include <linux/string.h>
20#include <linux/init.h>
21#include <linux/bootmem.h>
22
23#include <asm/sections.h>
24#include <asm/io.h>
25#include <asm/prom.h>
26#include <asm/pci-bridge.h>
27#include <asm/machdep.h>
28#include <asm/pmac_feature.h>
29#include <asm/iommu.h>
30
31#include "pci.h"
32#include "pmac.h"
33
34#define DEBUG
35
36#ifdef DEBUG
37#define DBG(x...) printk(x)
38#else
39#define DBG(x...)
40#endif
41
42/* XXX Could be per-controller, but I don't think we risk anything by
43 * assuming we won't have both UniNorth and Bandit */
44static int has_uninorth;
45static struct pci_controller *u3_agp;
46struct device_node *k2_skiplist[2];
47
48static int __init fixup_one_level_bus_range(struct device_node *node, int higher)
49{
50 for (; node != 0;node = node->sibling) {
51 int * bus_range;
52 unsigned int *class_code;
53 int len;
54
55 /* For PCI<->PCI bridges or CardBus bridges, we go down */
56 class_code = (unsigned int *) get_property(node, "class-code", NULL);
57 if (!class_code || ((*class_code >> 8) != PCI_CLASS_BRIDGE_PCI &&
58 (*class_code >> 8) != PCI_CLASS_BRIDGE_CARDBUS))
59 continue;
60 bus_range = (int *) get_property(node, "bus-range", &len);
61 if (bus_range != NULL && len > 2 * sizeof(int)) {
62 if (bus_range[1] > higher)
63 higher = bus_range[1];
64 }
65 higher = fixup_one_level_bus_range(node->child, higher);
66 }
67 return higher;
68}
69
70/* This routine fixes the "bus-range" property of all bridges in the
71 * system since they tend to have their "last" member wrong on macs
72 *
73 * Note that the bus numbers manipulated here are OF bus numbers, they
74 * are not Linux bus numbers.
75 */
76static void __init fixup_bus_range(struct device_node *bridge)
77{
78 int * bus_range;
79 int len;
80
81 /* Lookup the "bus-range" property for the hose */
82 bus_range = (int *) get_property(bridge, "bus-range", &len);
83 if (bus_range == NULL || len < 2 * sizeof(int)) {
84 printk(KERN_WARNING "Can't get bus-range for %s\n",
85 bridge->full_name);
86 return;
87 }
88 bus_range[1] = fixup_one_level_bus_range(bridge->child, bus_range[1]);
89}
90
91/*
92 * Apple MacRISC (U3, UniNorth, Bandit, Chaos) PCI controllers.
93 *
94 * The "Bandit" version is present in all early PCI PowerMacs,
95 * and up to the first ones using Grackle. Some machines may
96 * have 2 bandit controllers (2 PCI busses).
97 *
98 * "Chaos" is used in some "Bandit"-type machines as a bridge
99 * for the separate display bus. It is accessed the same
100 * way as bandit, but cannot be probed for devices. It therefore
101 * has its own config access functions.
102 *
103 * The "UniNorth" version is present in all Core99 machines
104 * (iBook, G4, new IMacs, and all the recent Apple machines).
105 * It contains 3 controllers in one ASIC.
106 *
107 * The U3 is the bridge used on G5 machines. It contains on
108 * AGP bus which is dealt with the old UniNorth access routines
109 * and an HyperTransport bus which uses its own set of access
110 * functions.
111 */
112
113#define MACRISC_CFA0(devfn, off) \
114 ((1 << (unsigned long)PCI_SLOT(dev_fn)) \
115 | (((unsigned long)PCI_FUNC(dev_fn)) << 8) \
116 | (((unsigned long)(off)) & 0xFCUL))
117
118#define MACRISC_CFA1(bus, devfn, off) \
119 ((((unsigned long)(bus)) << 16) \
120 |(((unsigned long)(devfn)) << 8) \
121 |(((unsigned long)(off)) & 0xFCUL) \
122 |1UL)
123
124static unsigned long __pmac macrisc_cfg_access(struct pci_controller* hose,
125 u8 bus, u8 dev_fn, u8 offset)
126{
127 unsigned int caddr;
128
129 if (bus == hose->first_busno) {
130 if (dev_fn < (11 << 3))
131 return 0;
132 caddr = MACRISC_CFA0(dev_fn, offset);
133 } else
134 caddr = MACRISC_CFA1(bus, dev_fn, offset);
135
136 /* Uninorth will return garbage if we don't read back the value ! */
137 do {
138 out_le32(hose->cfg_addr, caddr);
139 } while (in_le32(hose->cfg_addr) != caddr);
140
141 offset &= has_uninorth ? 0x07 : 0x03;
142 return ((unsigned long)hose->cfg_data) + offset;
143}
144
145static int __pmac macrisc_read_config(struct pci_bus *bus, unsigned int devfn,
146 int offset, int len, u32 *val)
147{
148 struct pci_controller *hose;
149 unsigned long addr;
150
151 hose = pci_bus_to_host(bus);
152 if (hose == NULL)
153 return PCIBIOS_DEVICE_NOT_FOUND;
154
155 addr = macrisc_cfg_access(hose, bus->number, devfn, offset);
156 if (!addr)
157 return PCIBIOS_DEVICE_NOT_FOUND;
158 /*
159 * Note: the caller has already checked that offset is
160 * suitably aligned and that len is 1, 2 or 4.
161 */
162 switch (len) {
163 case 1:
164 *val = in_8((u8 *)addr);
165 break;
166 case 2:
167 *val = in_le16((u16 *)addr);
168 break;
169 default:
170 *val = in_le32((u32 *)addr);
171 break;
172 }
173 return PCIBIOS_SUCCESSFUL;
174}
175
176static int __pmac macrisc_write_config(struct pci_bus *bus, unsigned int devfn,
177 int offset, int len, u32 val)
178{
179 struct pci_controller *hose;
180 unsigned long addr;
181
182 hose = pci_bus_to_host(bus);
183 if (hose == NULL)
184 return PCIBIOS_DEVICE_NOT_FOUND;
185
186 addr = macrisc_cfg_access(hose, bus->number, devfn, offset);
187 if (!addr)
188 return PCIBIOS_DEVICE_NOT_FOUND;
189 /*
190 * Note: the caller has already checked that offset is
191 * suitably aligned and that len is 1, 2 or 4.
192 */
193 switch (len) {
194 case 1:
195 out_8((u8 *)addr, val);
196 (void) in_8((u8 *)addr);
197 break;
198 case 2:
199 out_le16((u16 *)addr, val);
200 (void) in_le16((u16 *)addr);
201 break;
202 default:
203 out_le32((u32 *)addr, val);
204 (void) in_le32((u32 *)addr);
205 break;
206 }
207 return PCIBIOS_SUCCESSFUL;
208}
209
210static struct pci_ops macrisc_pci_ops =
211{
212 macrisc_read_config,
213 macrisc_write_config
214};
215
216/*
217 * These versions of U3 HyperTransport config space access ops do not
218 * implement self-view of the HT host yet
219 */
220
221/*
222 * This function deals with some "special cases" devices.
223 *
224 * 0 -> No special case
225 * 1 -> Skip the device but act as if the access was successfull
226 * (return 0xff's on reads, eventually, cache config space
227 * accesses in a later version)
228 * -1 -> Hide the device (unsuccessful acess)
229 */
230static int u3_ht_skip_device(struct pci_controller *hose,
231 struct pci_bus *bus, unsigned int devfn)
232{
233 struct device_node *busdn, *dn;
234 int i;
235
236 /* We only allow config cycles to devices that are in OF device-tree
237 * as we are apparently having some weird things going on with some
238 * revs of K2 on recent G5s
239 */
240 if (bus->self)
241 busdn = pci_device_to_OF_node(bus->self);
242 else
243 busdn = hose->arch_data;
244 for (dn = busdn->child; dn; dn = dn->sibling)
245 if (dn->devfn == devfn)
246 break;
247 if (dn == NULL)
248 return -1;
249
250 /*
251 * When a device in K2 is powered down, we die on config
252 * cycle accesses. Fix that here.
253 */
254 for (i=0; i<2; i++)
255 if (k2_skiplist[i] == dn)
256 return 1;
257
258 return 0;
259}
260
261#define U3_HT_CFA0(devfn, off) \
262 ((((unsigned long)devfn) << 8) | offset)
263#define U3_HT_CFA1(bus, devfn, off) \
264 (U3_HT_CFA0(devfn, off) \
265 + (((unsigned long)bus) << 16) \
266 + 0x01000000UL)
267
268static unsigned long __pmac u3_ht_cfg_access(struct pci_controller* hose,
269 u8 bus, u8 devfn, u8 offset)
270{
271 if (bus == hose->first_busno) {
272 /* For now, we don't self probe U3 HT bridge */
273 if (PCI_SLOT(devfn) == 0)
274 return 0;
275 return ((unsigned long)hose->cfg_data) + U3_HT_CFA0(devfn, offset);
276 } else
277 return ((unsigned long)hose->cfg_data) + U3_HT_CFA1(bus, devfn, offset);
278}
279
280static int __pmac u3_ht_read_config(struct pci_bus *bus, unsigned int devfn,
281 int offset, int len, u32 *val)
282{
283 struct pci_controller *hose;
284 unsigned long addr;
285
286
287 hose = pci_bus_to_host(bus);
288 if (hose == NULL)
289 return PCIBIOS_DEVICE_NOT_FOUND;
290
291 addr = u3_ht_cfg_access(hose, bus->number, devfn, offset);
292 if (!addr)
293 return PCIBIOS_DEVICE_NOT_FOUND;
294
295 switch (u3_ht_skip_device(hose, bus, devfn)) {
296 case 0:
297 break;
298 case 1:
299 switch (len) {
300 case 1:
301 *val = 0xff; break;
302 case 2:
303 *val = 0xffff; break;
304 default:
305 *val = 0xfffffffful; break;
306 }
307 return PCIBIOS_SUCCESSFUL;
308 default:
309 return PCIBIOS_DEVICE_NOT_FOUND;
310 }
311
312 /*
313 * Note: the caller has already checked that offset is
314 * suitably aligned and that len is 1, 2 or 4.
315 */
316 switch (len) {
317 case 1:
318 *val = in_8((u8 *)addr);
319 break;
320 case 2:
321 *val = in_le16((u16 *)addr);
322 break;
323 default:
324 *val = in_le32((u32 *)addr);
325 break;
326 }
327 return PCIBIOS_SUCCESSFUL;
328}
329
330static int __pmac u3_ht_write_config(struct pci_bus *bus, unsigned int devfn,
331 int offset, int len, u32 val)
332{
333 struct pci_controller *hose;
334 unsigned long addr;
335
336 hose = pci_bus_to_host(bus);
337 if (hose == NULL)
338 return PCIBIOS_DEVICE_NOT_FOUND;
339
340 addr = u3_ht_cfg_access(hose, bus->number, devfn, offset);
341 if (!addr)
342 return PCIBIOS_DEVICE_NOT_FOUND;
343
344 switch (u3_ht_skip_device(hose, bus, devfn)) {
345 case 0:
346 break;
347 case 1:
348 return PCIBIOS_SUCCESSFUL;
349 default:
350 return PCIBIOS_DEVICE_NOT_FOUND;
351 }
352
353 /*
354 * Note: the caller has already checked that offset is
355 * suitably aligned and that len is 1, 2 or 4.
356 */
357 switch (len) {
358 case 1:
359 out_8((u8 *)addr, val);
360 (void) in_8((u8 *)addr);
361 break;
362 case 2:
363 out_le16((u16 *)addr, val);
364 (void) in_le16((u16 *)addr);
365 break;
366 default:
367 out_le32((u32 *)addr, val);
368 (void) in_le32((u32 *)addr);
369 break;
370 }
371 return PCIBIOS_SUCCESSFUL;
372}
373
374static struct pci_ops u3_ht_pci_ops =
375{
376 u3_ht_read_config,
377 u3_ht_write_config
378};
379
380static void __init setup_u3_agp(struct pci_controller* hose)
381{
382 /* On G5, we move AGP up to high bus number so we don't need
383 * to reassign bus numbers for HT. If we ever have P2P bridges
384 * on AGP, we'll have to move pci_assign_all_busses to the
385 * pci_controller structure so we enable it for AGP and not for
386 * HT childs.
387 * We hard code the address because of the different size of
388 * the reg address cell, we shall fix that by killing struct
389 * reg_property and using some accessor functions instead
390 */
391 hose->first_busno = 0xf0;
392 hose->last_busno = 0xff;
393 has_uninorth = 1;
394 hose->ops = &macrisc_pci_ops;
395 hose->cfg_addr = ioremap(0xf0000000 + 0x800000, 0x1000);
396 hose->cfg_data = ioremap(0xf0000000 + 0xc00000, 0x1000);
397
398 u3_agp = hose;
399}
400
401static void __init setup_u3_ht(struct pci_controller* hose)
402{
403 struct device_node *np = (struct device_node *)hose->arch_data;
404 int i, cur;
405
406 hose->ops = &u3_ht_pci_ops;
407
408 /* We hard code the address because of the different size of
409 * the reg address cell, we shall fix that by killing struct
410 * reg_property and using some accessor functions instead
411 */
412 hose->cfg_data = (volatile unsigned char *)ioremap(0xf2000000, 0x02000000);
413
414 /*
415 * /ht node doesn't expose a "ranges" property, so we "remove" regions that
416 * have been allocated to AGP. So far, this version of the code doesn't assign
417 * any of the 0xfxxxxxxx "fine" memory regions to /ht.
418 * We need to fix that sooner or later by either parsing all child "ranges"
419 * properties or figuring out the U3 address space decoding logic and
420 * then read it's configuration register (if any).
421 */
422 hose->io_base_phys = 0xf4000000;
423 hose->io_base_virt = ioremap(hose->io_base_phys, 0x00400000);
424 isa_io_base = pci_io_base = (unsigned long) hose->io_base_virt;
425 hose->io_resource.name = np->full_name;
426 hose->io_resource.start = 0;
427 hose->io_resource.end = 0x003fffff;
428 hose->io_resource.flags = IORESOURCE_IO;
429 hose->pci_mem_offset = 0;
430 hose->first_busno = 0;
431 hose->last_busno = 0xef;
432 hose->mem_resources[0].name = np->full_name;
433 hose->mem_resources[0].start = 0x80000000;
434 hose->mem_resources[0].end = 0xefffffff;
435 hose->mem_resources[0].flags = IORESOURCE_MEM;
436
437 if (u3_agp == NULL) {
438 DBG("U3 has no AGP, using full resource range\n");
439 return;
440 }
441
442 /* We "remove" the AGP resources from the resources allocated to HT, that
443 * is we create "holes". However, that code does assumptions that so far
444 * happen to be true (cross fingers...), typically that resources in the
445 * AGP node are properly ordered
446 */
447 cur = 0;
448 for (i=0; i<3; i++) {
449 struct resource *res = &u3_agp->mem_resources[i];
450 if (res->flags != IORESOURCE_MEM)
451 continue;
452 /* We don't care about "fine" resources */
453 if (res->start >= 0xf0000000)
454 continue;
455 /* Check if it's just a matter of "shrinking" us in one direction */
456 if (hose->mem_resources[cur].start == res->start) {
457 DBG("U3/HT: shrink start of %d, %08lx -> %08lx\n",
458 cur, hose->mem_resources[cur].start, res->end + 1);
459 hose->mem_resources[cur].start = res->end + 1;
460 continue;
461 }
462 if (hose->mem_resources[cur].end == res->end) {
463 DBG("U3/HT: shrink end of %d, %08lx -> %08lx\n",
464 cur, hose->mem_resources[cur].end, res->start - 1);
465 hose->mem_resources[cur].end = res->start - 1;
466 continue;
467 }
468 /* No, it's not the case, we need a hole */
469 if (cur == 2) {
470 /* not enough resources for a hole, we drop part of the range */
471 printk(KERN_WARNING "Running out of resources for /ht host !\n");
472 hose->mem_resources[cur].end = res->start - 1;
473 continue;
474 }
475 cur++;
476 DBG("U3/HT: hole, %d end at %08lx, %d start at %08lx\n",
477 cur-1, res->start - 1, cur, res->end + 1);
478 hose->mem_resources[cur].name = np->full_name;
479 hose->mem_resources[cur].flags = IORESOURCE_MEM;
480 hose->mem_resources[cur].start = res->end + 1;
481 hose->mem_resources[cur].end = hose->mem_resources[cur-1].end;
482 hose->mem_resources[cur-1].end = res->start - 1;
483 }
484}
485
486static void __init pmac_process_bridge_OF_ranges(struct pci_controller *hose,
487 struct device_node *dev, int primary)
488{
489 static unsigned int static_lc_ranges[2024];
490 unsigned int *dt_ranges, *lc_ranges, *ranges, *prev;
491 unsigned int size;
492 int rlen = 0, orig_rlen;
493 int memno = 0;
494 struct resource *res;
495 int np, na = prom_n_addr_cells(dev);
496
497 np = na + 5;
498
499 /* First we try to merge ranges to fix a problem with some pmacs
500 * that can have more than 3 ranges, fortunately using contiguous
501 * addresses -- BenH
502 */
503 dt_ranges = (unsigned int *) get_property(dev, "ranges", &rlen);
504 if (!dt_ranges)
505 return;
506 /* lc_ranges = alloc_bootmem(rlen);*/
507 lc_ranges = static_lc_ranges;
508 if (!lc_ranges)
509 return; /* what can we do here ? */
510 memcpy(lc_ranges, dt_ranges, rlen);
511 orig_rlen = rlen;
512
513 /* Let's work on a copy of the "ranges" property instead of damaging
514 * the device-tree image in memory
515 */
516 ranges = lc_ranges;
517 prev = NULL;
518 while ((rlen -= np * sizeof(unsigned int)) >= 0) {
519 if (prev) {
520 if (prev[0] == ranges[0] && prev[1] == ranges[1] &&
521 (prev[2] + prev[na+4]) == ranges[2] &&
522 (prev[na+2] + prev[na+4]) == ranges[na+2]) {
523 prev[na+4] += ranges[na+4];
524 ranges[0] = 0;
525 ranges += np;
526 continue;
527 }
528 }
529 prev = ranges;
530 ranges += np;
531 }
532
533 /*
534 * The ranges property is laid out as an array of elements,
535 * each of which comprises:
536 * cells 0 - 2: a PCI address
537 * cells 3 or 3+4: a CPU physical address
538 * (size depending on dev->n_addr_cells)
539 * cells 4+5 or 5+6: the size of the range
540 */
541 ranges = lc_ranges;
542 rlen = orig_rlen;
543 while (ranges && (rlen -= np * sizeof(unsigned int)) >= 0) {
544 res = NULL;
545 size = ranges[na+4];
546 switch (ranges[0] >> 24) {
547 case 1: /* I/O space */
548 if (ranges[2] != 0)
549 break;
550 hose->io_base_phys = ranges[na+2];
551 /* limit I/O space to 16MB */
552 if (size > 0x01000000)
553 size = 0x01000000;
554 hose->io_base_virt = ioremap(ranges[na+2], size);
555 if (primary)
556 isa_io_base = (unsigned long) hose->io_base_virt;
557 res = &hose->io_resource;
558 res->flags = IORESOURCE_IO;
559 res->start = ranges[2];
560 break;
561 case 2: /* memory space */
562 memno = 0;
563 if (ranges[1] == 0 && ranges[2] == 0
564 && ranges[na+4] <= (16 << 20)) {
565 /* 1st 16MB, i.e. ISA memory area */
566#if 0
567 if (primary)
568 isa_mem_base = ranges[na+2];
569#endif
570 memno = 1;
571 }
572 while (memno < 3 && hose->mem_resources[memno].flags)
573 ++memno;
574 if (memno == 0)
575 hose->pci_mem_offset = ranges[na+2] - ranges[2];
576 if (memno < 3) {
577 res = &hose->mem_resources[memno];
578 res->flags = IORESOURCE_MEM;
579 res->start = ranges[na+2];
580 }
581 break;
582 }
583 if (res != NULL) {
584 res->name = dev->full_name;
585 res->end = res->start + size - 1;
586 res->parent = NULL;
587 res->sibling = NULL;
588 res->child = NULL;
589 }
590 ranges += np;
591 }
592}
593
594/*
595 * We assume that if we have a G3 powermac, we have one bridge called
596 * "pci" (a MPC106) and no bandit or chaos bridges, and contrariwise,
597 * if we have one or more bandit or chaos bridges, we don't have a MPC106.
598 */
599static int __init add_bridge(struct device_node *dev)
600{
601 int len;
602 struct pci_controller *hose;
603 char* disp_name;
604 int *bus_range;
605 int primary = 1;
606 struct property *of_prop;
607
608 DBG("Adding PCI host bridge %s\n", dev->full_name);
609
610 bus_range = (int *) get_property(dev, "bus-range", &len);
611 if (bus_range == NULL || len < 2 * sizeof(int)) {
612 printk(KERN_WARNING "Can't get bus-range for %s, assume bus 0\n",
613 dev->full_name);
614 }
615
616 hose = alloc_bootmem(sizeof(struct pci_controller));
617 if (hose == NULL)
618 return -ENOMEM;
619 pci_setup_pci_controller(hose);
620
621 hose->arch_data = dev;
622 hose->first_busno = bus_range ? bus_range[0] : 0;
623 hose->last_busno = bus_range ? bus_range[1] : 0xff;
624
625 of_prop = alloc_bootmem(sizeof(struct property) +
626 sizeof(hose->global_number));
627 if (of_prop) {
628 memset(of_prop, 0, sizeof(struct property));
629 of_prop->name = "linux,pci-domain";
630 of_prop->length = sizeof(hose->global_number);
631 of_prop->value = (unsigned char *)&of_prop[1];
632 memcpy(of_prop->value, &hose->global_number, sizeof(hose->global_number));
633 prom_add_property(dev, of_prop);
634 }
635
636 disp_name = NULL;
637 if (device_is_compatible(dev, "u3-agp")) {
638 setup_u3_agp(hose);
639 disp_name = "U3-AGP";
640 primary = 0;
641 } else if (device_is_compatible(dev, "u3-ht")) {
642 setup_u3_ht(hose);
643 disp_name = "U3-HT";
644 primary = 1;
645 }
646 printk(KERN_INFO "Found %s PCI host bridge. Firmware bus number: %d->%d\n",
647 disp_name, hose->first_busno, hose->last_busno);
648
649 /* Interpret the "ranges" property */
650 /* This also maps the I/O region and sets isa_io/mem_base */
651 pmac_process_bridge_OF_ranges(hose, dev, primary);
652
653 /* Fixup "bus-range" OF property */
654 fixup_bus_range(dev);
655
656 return 0;
657}
658
659/*
660 * We use our own read_irq_line here because PCI_INTERRUPT_PIN is
661 * crap on some of Apple ASICs. We unconditionally use the Open Firmware
662 * interrupt number as this is always right.
663 */
664static int pmac_pci_read_irq_line(struct pci_dev *pci_dev)
665{
666 struct device_node *node;
667
668 node = pci_device_to_OF_node(pci_dev);
669 if (node == NULL)
670 return -1;
671 if (node->n_intrs == 0)
672 return -1;
673 pci_dev->irq = node->intrs[0].line;
674 pci_write_config_byte(pci_dev, PCI_INTERRUPT_LINE, pci_dev->irq);
675
676 return 0;
677}
678
679void __init pmac_pcibios_fixup(void)
680{
681 struct pci_dev *dev = NULL;
682
683 for_each_pci_dev(dev)
684 pmac_pci_read_irq_line(dev);
685}
686
687static void __init pmac_fixup_phb_resources(void)
688{
689 struct pci_controller *hose, *tmp;
690
691 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
692 unsigned long offset = (unsigned long)hose->io_base_virt - pci_io_base;
693 hose->io_resource.start += offset;
694 hose->io_resource.end += offset;
695 printk(KERN_INFO "PCI Host %d, io start: %lx; io end: %lx\n",
696 hose->global_number,
697 hose->io_resource.start, hose->io_resource.end);
698 }
699}
700
701void __init pmac_pci_init(void)
702{
703 struct device_node *np, *root;
704 struct device_node *ht = NULL;
705
706 /* Probe root PCI hosts, that is on U3 the AGP host and the
707 * HyperTransport host. That one is actually "kept" around
708 * and actually added last as it's resource management relies
709 * on the AGP resources to have been setup first
710 */
711 root = of_find_node_by_path("/");
712 if (root == NULL) {
713 printk(KERN_CRIT "pmac_find_bridges: can't find root of device tree\n");
714 return;
715 }
716 for (np = NULL; (np = of_get_next_child(root, np)) != NULL;) {
717 if (np->name == NULL)
718 continue;
719 if (strcmp(np->name, "pci") == 0) {
720 if (add_bridge(np) == 0)
721 of_node_get(np);
722 }
723 if (strcmp(np->name, "ht") == 0) {
724 of_node_get(np);
725 ht = np;
726 }
727 }
728 of_node_put(root);
729
730 /* Now setup the HyperTransport host if we found any
731 */
732 if (ht && add_bridge(ht) != 0)
733 of_node_put(ht);
734
735 /* Fixup the IO resources on our host bridges as the common code
736 * does it only for childs of the host bridges
737 */
738 pmac_fixup_phb_resources();
739
740 /* Setup the linkage between OF nodes and PHBs */
741 pci_devs_phb_init();
742
743 /* Fixup the PCI<->OF mapping for U3 AGP due to bus renumbering. We
744 * assume there is no P2P bridge on the AGP bus, which should be a
745 * safe assumptions hopefully.
746 */
747 if (u3_agp) {
748 struct device_node *np = u3_agp->arch_data;
749 np->busno = 0xf0;
750 for (np = np->child; np; np = np->sibling)
751 np->busno = 0xf0;
752 }
753
754 pmac_check_ht_link();
755
756 /* Tell pci.c to not use the common resource allocation mecanism */
757 pci_probe_only = 1;
758
759 /* Allow all IO */
760 io_page_mask = -1;
761}
762
763/*
764 * Disable second function on K2-SATA, it's broken
765 * and disable IO BARs on first one
766 */
767static void fixup_k2_sata(struct pci_dev* dev)
768{
769 int i;
770 u16 cmd;
771
772 if (PCI_FUNC(dev->devfn) > 0) {
773 pci_read_config_word(dev, PCI_COMMAND, &cmd);
774 cmd &= ~(PCI_COMMAND_IO | PCI_COMMAND_MEMORY);
775 pci_write_config_word(dev, PCI_COMMAND, cmd);
776 for (i = 0; i < 6; i++) {
777 dev->resource[i].start = dev->resource[i].end = 0;
778 dev->resource[i].flags = 0;
779 pci_write_config_dword(dev, PCI_BASE_ADDRESS_0 + 4 * i, 0);
780 }
781 } else {
782 pci_read_config_word(dev, PCI_COMMAND, &cmd);
783 cmd &= ~PCI_COMMAND_IO;
784 pci_write_config_word(dev, PCI_COMMAND, cmd);
785 for (i = 0; i < 5; i++) {
786 dev->resource[i].start = dev->resource[i].end = 0;
787 dev->resource[i].flags = 0;
788 pci_write_config_dword(dev, PCI_BASE_ADDRESS_0 + 4 * i, 0);
789 }
790 }
791}
792DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SERVERWORKS, 0x0240, fixup_k2_sata);
793
diff --git a/arch/ppc64/kernel/pmac_setup.c b/arch/ppc64/kernel/pmac_setup.c
new file mode 100644
index 000000000000..6cf03d387b91
--- /dev/null
+++ b/arch/ppc64/kernel/pmac_setup.c
@@ -0,0 +1,511 @@
1/*
2 * arch/ppc/platforms/setup.c
3 *
4 * PowerPC version
5 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
6 *
7 * Adapted for Power Macintosh by Paul Mackerras
8 * Copyright (C) 1996 Paul Mackerras (paulus@cs.anu.edu.au)
9 *
10 * Derived from "arch/alpha/kernel/setup.c"
11 * Copyright (C) 1995 Linus Torvalds
12 *
13 * Maintained by Benjamin Herrenschmidt (benh@kernel.crashing.org)
14 *
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation; either version
18 * 2 of the License, or (at your option) any later version.
19 *
20 */
21
22/*
23 * bootup setup stuff..
24 */
25
26#undef DEBUG
27
28#include <linux/config.h>
29#include <linux/init.h>
30#include <linux/errno.h>
31#include <linux/sched.h>
32#include <linux/kernel.h>
33#include <linux/mm.h>
34#include <linux/stddef.h>
35#include <linux/unistd.h>
36#include <linux/ptrace.h>
37#include <linux/slab.h>
38#include <linux/user.h>
39#include <linux/a.out.h>
40#include <linux/tty.h>
41#include <linux/string.h>
42#include <linux/delay.h>
43#include <linux/ioport.h>
44#include <linux/major.h>
45#include <linux/initrd.h>
46#include <linux/vt_kern.h>
47#include <linux/console.h>
48#include <linux/ide.h>
49#include <linux/pci.h>
50#include <linux/adb.h>
51#include <linux/cuda.h>
52#include <linux/pmu.h>
53#include <linux/irq.h>
54#include <linux/seq_file.h>
55#include <linux/root_dev.h>
56#include <linux/bitops.h>
57
58#include <asm/processor.h>
59#include <asm/sections.h>
60#include <asm/prom.h>
61#include <asm/system.h>
62#include <asm/io.h>
63#include <asm/pci-bridge.h>
64#include <asm/iommu.h>
65#include <asm/machdep.h>
66#include <asm/dma.h>
67#include <asm/btext.h>
68#include <asm/cputable.h>
69#include <asm/pmac_feature.h>
70#include <asm/time.h>
71#include <asm/of_device.h>
72#include <asm/lmb.h>
73#include <asm/smu.h>
74
75#include "pmac.h"
76#include "mpic.h"
77
78#ifdef DEBUG
79#define DBG(fmt...) udbg_printf(fmt)
80#else
81#define DBG(fmt...)
82#endif
83
84static int current_root_goodness = -1;
85#define DEFAULT_ROOT_DEVICE Root_SDA1 /* sda1 - slightly silly choice */
86
87extern int powersave_nap;
88int sccdbg;
89
90sys_ctrler_t sys_ctrler;
91EXPORT_SYMBOL(sys_ctrler);
92
93#ifdef CONFIG_PMAC_SMU
94unsigned long smu_cmdbuf_abs;
95EXPORT_SYMBOL(smu_cmdbuf_abs);
96#endif
97
98extern void udbg_init_scc(struct device_node *np);
99
100void __pmac pmac_show_cpuinfo(struct seq_file *m)
101{
102 struct device_node *np;
103 char *pp;
104 int plen;
105 char* mbname;
106 int mbmodel = pmac_call_feature(PMAC_FTR_GET_MB_INFO, NULL,
107 PMAC_MB_INFO_MODEL, 0);
108 unsigned int mbflags = pmac_call_feature(PMAC_FTR_GET_MB_INFO, NULL,
109 PMAC_MB_INFO_FLAGS, 0);
110
111 if (pmac_call_feature(PMAC_FTR_GET_MB_INFO, NULL, PMAC_MB_INFO_NAME,
112 (long)&mbname) != 0)
113 mbname = "Unknown";
114
115 /* find motherboard type */
116 seq_printf(m, "machine\t\t: ");
117 np = find_devices("device-tree");
118 if (np != NULL) {
119 pp = (char *) get_property(np, "model", NULL);
120 if (pp != NULL)
121 seq_printf(m, "%s\n", pp);
122 else
123 seq_printf(m, "PowerMac\n");
124 pp = (char *) get_property(np, "compatible", &plen);
125 if (pp != NULL) {
126 seq_printf(m, "motherboard\t:");
127 while (plen > 0) {
128 int l = strlen(pp) + 1;
129 seq_printf(m, " %s", pp);
130 plen -= l;
131 pp += l;
132 }
133 seq_printf(m, "\n");
134 }
135 } else
136 seq_printf(m, "PowerMac\n");
137
138 /* print parsed model */
139 seq_printf(m, "detected as\t: %d (%s)\n", mbmodel, mbname);
140 seq_printf(m, "pmac flags\t: %08x\n", mbflags);
141
142 /* Indicate newworld */
143 seq_printf(m, "pmac-generation\t: NewWorld\n");
144}
145
146
147void __init pmac_setup_arch(void)
148{
149 /* init to some ~sane value until calibrate_delay() runs */
150 loops_per_jiffy = 50000000;
151
152 /* Probe motherboard chipset */
153 pmac_feature_init();
154#if 0
155 /* Lock-enable the SCC channel used for debug */
156 if (sccdbg) {
157 np = of_find_node_by_name(NULL, "escc");
158 if (np)
159 pmac_call_feature(PMAC_FTR_SCC_ENABLE, np,
160 PMAC_SCC_ASYNC | PMAC_SCC_FLAG_XMON, 1);
161 }
162#endif
163 /* We can NAP */
164 powersave_nap = 1;
165
166#ifdef CONFIG_ADB_PMU
167 /* Initialize the PMU if any */
168 find_via_pmu();
169#endif
170#ifdef CONFIG_PMAC_SMU
171 /* Initialize the SMU if any */
172 smu_init();
173#endif
174
175 /* Init NVRAM access */
176 pmac_nvram_init();
177
178 /* Setup SMP callback */
179#ifdef CONFIG_SMP
180 pmac_setup_smp();
181#endif
182
183 /* Lookup PCI hosts */
184 pmac_pci_init();
185
186#ifdef CONFIG_DUMMY_CONSOLE
187 conswitchp = &dummy_con;
188#endif
189}
190
191#ifdef CONFIG_SCSI
192void note_scsi_host(struct device_node *node, void *host)
193{
194 /* Obsolete */
195}
196#endif
197
198
199static int initializing = 1;
200
201static int pmac_late_init(void)
202{
203 initializing = 0;
204 return 0;
205}
206
207late_initcall(pmac_late_init);
208
209/* can't be __init - can be called whenever a disk is first accessed */
210void __pmac note_bootable_part(dev_t dev, int part, int goodness)
211{
212 extern dev_t boot_dev;
213 char *p;
214
215 if (!initializing)
216 return;
217 if ((goodness <= current_root_goodness) &&
218 ROOT_DEV != DEFAULT_ROOT_DEVICE)
219 return;
220 p = strstr(saved_command_line, "root=");
221 if (p != NULL && (p == saved_command_line || p[-1] == ' '))
222 return;
223
224 if (!boot_dev || dev == boot_dev) {
225 ROOT_DEV = dev + part;
226 boot_dev = 0;
227 current_root_goodness = goodness;
228 }
229}
230
231void __pmac pmac_restart(char *cmd)
232{
233 switch(sys_ctrler) {
234#ifdef CONFIG_ADB_PMU
235 case SYS_CTRLER_PMU:
236 pmu_restart();
237 break;
238#endif
239
240#ifdef CONFIG_PMAC_SMU
241 case SYS_CTRLER_SMU:
242 smu_restart();
243 break;
244#endif
245 default:
246 ;
247 }
248}
249
250void __pmac pmac_power_off(void)
251{
252 switch(sys_ctrler) {
253#ifdef CONFIG_ADB_PMU
254 case SYS_CTRLER_PMU:
255 pmu_shutdown();
256 break;
257#endif
258#ifdef CONFIG_PMAC_SMU
259 case SYS_CTRLER_SMU:
260 smu_shutdown();
261 break;
262#endif
263 default:
264 ;
265 }
266}
267
268void __pmac pmac_halt(void)
269{
270 pmac_power_off();
271}
272
273#ifdef CONFIG_BOOTX_TEXT
274static int dummy_getc_poll(void)
275{
276 return -1;
277}
278
279static unsigned char dummy_getc(void)
280{
281 return 0;
282}
283
284static void btext_putc(unsigned char c)
285{
286 btext_drawchar(c);
287}
288
289static void __init init_boot_display(void)
290{
291 char *name;
292 struct device_node *np = NULL;
293 int rc = -ENODEV;
294
295 printk("trying to initialize btext ...\n");
296
297 name = (char *)get_property(of_chosen, "linux,stdout-path", NULL);
298 if (name != NULL) {
299 np = of_find_node_by_path(name);
300 if (np != NULL) {
301 if (strcmp(np->type, "display") != 0) {
302 printk("boot stdout isn't a display !\n");
303 of_node_put(np);
304 np = NULL;
305 }
306 }
307 }
308 if (np)
309 rc = btext_initialize(np);
310 if (rc == 0)
311 return;
312
313 for (np = NULL; (np = of_find_node_by_type(np, "display"));) {
314 if (get_property(np, "linux,opened", NULL)) {
315 printk("trying %s ...\n", np->full_name);
316 rc = btext_initialize(np);
317 printk("result: %d\n", rc);
318 }
319 if (rc == 0)
320 return;
321 }
322}
323#endif /* CONFIG_BOOTX_TEXT */
324
325/*
326 * Early initialization.
327 */
328void __init pmac_init_early(void)
329{
330 DBG(" -> pmac_init_early\n");
331
332 /* Initialize hash table, from now on, we can take hash faults
333 * and call ioremap
334 */
335 hpte_init_native();
336
337 /* Init SCC */
338 if (strstr(cmd_line, "sccdbg")) {
339 sccdbg = 1;
340 udbg_init_scc(NULL);
341 }
342
343 else {
344#ifdef CONFIG_BOOTX_TEXT
345 init_boot_display();
346
347 ppc_md.udbg_putc = btext_putc;
348 ppc_md.udbg_getc = dummy_getc;
349 ppc_md.udbg_getc_poll = dummy_getc_poll;
350#endif /* CONFIG_BOOTX_TEXT */
351 }
352
353 /* Setup interrupt mapping options */
354 ppc64_interrupt_controller = IC_OPEN_PIC;
355
356 iommu_init_early_u3();
357
358 DBG(" <- pmac_init_early\n");
359}
360
361static int pmac_u3_cascade(struct pt_regs *regs, void *data)
362{
363 return mpic_get_one_irq((struct mpic *)data, regs);
364}
365
366static __init void pmac_init_IRQ(void)
367{
368 struct device_node *irqctrler = NULL;
369 struct device_node *irqctrler2 = NULL;
370 struct device_node *np = NULL;
371 struct mpic *mpic1, *mpic2;
372
373 /* We first try to detect Apple's new Core99 chipset, since mac-io
374 * is quite different on those machines and contains an IBM MPIC2.
375 */
376 while ((np = of_find_node_by_type(np, "open-pic")) != NULL) {
377 struct device_node *parent = of_get_parent(np);
378 if (parent && !strcmp(parent->name, "u3"))
379 irqctrler2 = of_node_get(np);
380 else
381 irqctrler = of_node_get(np);
382 of_node_put(parent);
383 }
384 if (irqctrler != NULL && irqctrler->n_addrs > 0) {
385 unsigned char senses[128];
386
387 printk(KERN_INFO "PowerMac using OpenPIC irq controller at 0x%08x\n",
388 (unsigned int)irqctrler->addrs[0].address);
389
390 prom_get_irq_senses(senses, 0, 128);
391 mpic1 = mpic_alloc(irqctrler->addrs[0].address,
392 MPIC_PRIMARY | MPIC_WANTS_RESET,
393 0, 0, 128, 256, senses, 128, " K2-MPIC ");
394 BUG_ON(mpic1 == NULL);
395 mpic_init(mpic1);
396
397 if (irqctrler2 != NULL && irqctrler2->n_intrs > 0 &&
398 irqctrler2->n_addrs > 0) {
399 printk(KERN_INFO "Slave OpenPIC at 0x%08x hooked on IRQ %d\n",
400 (u32)irqctrler2->addrs[0].address,
401 irqctrler2->intrs[0].line);
402
403 pmac_call_feature(PMAC_FTR_ENABLE_MPIC, irqctrler2, 0, 0);
404 prom_get_irq_senses(senses, 128, 128 + 128);
405
406 /* We don't need to set MPIC_BROKEN_U3 here since we don't have
407 * hypertransport interrupts routed to it
408 */
409 mpic2 = mpic_alloc(irqctrler2->addrs[0].address,
410 MPIC_BIG_ENDIAN | MPIC_WANTS_RESET,
411 0, 128, 128, 0, senses, 128, " U3-MPIC ");
412 BUG_ON(mpic2 == NULL);
413 mpic_init(mpic2);
414 mpic_setup_cascade(irqctrler2->intrs[0].line,
415 pmac_u3_cascade, mpic2);
416 }
417 }
418 of_node_put(irqctrler);
419 of_node_put(irqctrler2);
420}
421
422static void __init pmac_progress(char *s, unsigned short hex)
423{
424 if (sccdbg) {
425 udbg_puts(s);
426 udbg_puts("\n");
427 }
428#ifdef CONFIG_BOOTX_TEXT
429 else if (boot_text_mapped) {
430 btext_drawstring(s);
431 btext_drawstring("\n");
432 }
433#endif /* CONFIG_BOOTX_TEXT */
434}
435
436/*
437 * pmac has no legacy IO, anything calling this function has to
438 * fail or bad things will happen
439 */
440static int pmac_check_legacy_ioport(unsigned int baseport)
441{
442 return -ENODEV;
443}
444
445static int __init pmac_declare_of_platform_devices(void)
446{
447 struct device_node *np;
448
449 np = find_devices("u3");
450 if (np) {
451 for (np = np->child; np != NULL; np = np->sibling)
452 if (strncmp(np->name, "i2c", 3) == 0) {
453 of_platform_device_create(np, "u3-i2c");
454 break;
455 }
456 }
457
458 return 0;
459}
460
461device_initcall(pmac_declare_of_platform_devices);
462
463/*
464 * Called very early, MMU is off, device-tree isn't unflattened
465 */
466static int __init pmac_probe(int platform)
467{
468 if (platform != PLATFORM_POWERMAC)
469 return 0;
470 /*
471 * On U3, the DART (iommu) must be allocated now since it
472 * has an impact on htab_initialize (due to the large page it
473 * occupies having to be broken up so the DART itself is not
474 * part of the cacheable linar mapping
475 */
476 alloc_u3_dart_table();
477
478#ifdef CONFIG_PMAC_SMU
479 /*
480 * SMU based G5s need some memory below 2Gb, at least the current
481 * driver needs that. We have to allocate it now. We allocate 4k
482 * (1 small page) for now.
483 */
484 smu_cmdbuf_abs = lmb_alloc_base(4096, 4096, 0x80000000UL);
485#endif /* CONFIG_PMAC_SMU */
486
487 return 1;
488}
489
490struct machdep_calls __initdata pmac_md = {
491#ifdef CONFIG_HOTPLUG_CPU
492 .cpu_die = generic_mach_cpu_die,
493#endif
494 .probe = pmac_probe,
495 .setup_arch = pmac_setup_arch,
496 .init_early = pmac_init_early,
497 .get_cpuinfo = pmac_show_cpuinfo,
498 .init_IRQ = pmac_init_IRQ,
499 .get_irq = mpic_get_irq,
500 .pcibios_fixup = pmac_pcibios_fixup,
501 .restart = pmac_restart,
502 .power_off = pmac_power_off,
503 .halt = pmac_halt,
504 .get_boot_time = pmac_get_boot_time,
505 .set_rtc_time = pmac_set_rtc_time,
506 .get_rtc_time = pmac_get_rtc_time,
507 .calibrate_decr = pmac_calibrate_decr,
508 .feature_call = pmac_do_feature_call,
509 .progress = pmac_progress,
510 .check_legacy_ioport = pmac_check_legacy_ioport
511};
diff --git a/arch/ppc64/kernel/pmac_smp.c b/arch/ppc64/kernel/pmac_smp.c
new file mode 100644
index 000000000000..c27588ede2fe
--- /dev/null
+++ b/arch/ppc64/kernel/pmac_smp.c
@@ -0,0 +1,316 @@
1/*
2 * SMP support for power macintosh.
3 *
4 * We support both the old "powersurge" SMP architecture
5 * and the current Core99 (G4 PowerMac) machines.
6 *
7 * Note that we don't support the very first rev. of
8 * Apple/DayStar 2 CPUs board, the one with the funky
9 * watchdog. Hopefully, none of these should be there except
10 * maybe internally to Apple. I should probably still add some
11 * code to detect this card though and disable SMP. --BenH.
12 *
13 * Support Macintosh G4 SMP by Troy Benjegerdes (hozer@drgw.net)
14 * and Ben Herrenschmidt <benh@kernel.crashing.org>.
15 *
16 * Support for DayStar quad CPU cards
17 * Copyright (C) XLR8, Inc. 1994-2000
18 *
19 * This program is free software; you can redistribute it and/or
20 * modify it under the terms of the GNU General Public License
21 * as published by the Free Software Foundation; either version
22 * 2 of the License, or (at your option) any later version.
23 */
24
25#undef DEBUG
26
27#include <linux/config.h>
28#include <linux/kernel.h>
29#include <linux/sched.h>
30#include <linux/smp.h>
31#include <linux/smp_lock.h>
32#include <linux/interrupt.h>
33#include <linux/kernel_stat.h>
34#include <linux/init.h>
35#include <linux/spinlock.h>
36#include <linux/errno.h>
37#include <linux/irq.h>
38
39#include <asm/ptrace.h>
40#include <asm/atomic.h>
41#include <asm/irq.h>
42#include <asm/page.h>
43#include <asm/pgtable.h>
44#include <asm/sections.h>
45#include <asm/io.h>
46#include <asm/prom.h>
47#include <asm/smp.h>
48#include <asm/machdep.h>
49#include <asm/pmac_feature.h>
50#include <asm/time.h>
51#include <asm/cacheflush.h>
52#include <asm/keylargo.h>
53#include <asm/pmac_low_i2c.h>
54
55#include "mpic.h"
56
57#ifdef DEBUG
58#define DBG(fmt...) udbg_printf(fmt)
59#else
60#define DBG(fmt...)
61#endif
62
63extern void pmac_secondary_start_1(void);
64extern void pmac_secondary_start_2(void);
65extern void pmac_secondary_start_3(void);
66
67extern struct smp_ops_t *smp_ops;
68
69static void (*pmac_tb_freeze)(int freeze);
70static struct device_node *pmac_tb_clock_chip_host;
71static DEFINE_SPINLOCK(timebase_lock);
72static unsigned long timebase;
73
74static void smp_core99_cypress_tb_freeze(int freeze)
75{
76 u8 data;
77 int rc;
78
79 /* Strangely, the device-tree says address is 0xd2, but darwin
80 * accesses 0xd0 ...
81 */
82 pmac_low_i2c_setmode(pmac_tb_clock_chip_host, pmac_low_i2c_mode_combined);
83 rc = pmac_low_i2c_xfer(pmac_tb_clock_chip_host,
84 0xd0 | pmac_low_i2c_read,
85 0x81, &data, 1);
86 if (rc != 0)
87 goto bail;
88
89 data = (data & 0xf3) | (freeze ? 0x00 : 0x0c);
90
91 pmac_low_i2c_setmode(pmac_tb_clock_chip_host, pmac_low_i2c_mode_stdsub);
92 rc = pmac_low_i2c_xfer(pmac_tb_clock_chip_host,
93 0xd0 | pmac_low_i2c_write,
94 0x81, &data, 1);
95
96 bail:
97 if (rc != 0) {
98 printk("Cypress Timebase %s rc: %d\n",
99 freeze ? "freeze" : "unfreeze", rc);
100 panic("Timebase freeze failed !\n");
101 }
102}
103
104static void smp_core99_pulsar_tb_freeze(int freeze)
105{
106 u8 data;
107 int rc;
108
109 /* Strangely, the device-tree says address is 0xd2, but darwin
110 * accesses 0xd0 ...
111 */
112 pmac_low_i2c_setmode(pmac_tb_clock_chip_host, pmac_low_i2c_mode_combined);
113 rc = pmac_low_i2c_xfer(pmac_tb_clock_chip_host,
114 0xd4 | pmac_low_i2c_read,
115 0x2e, &data, 1);
116 if (rc != 0)
117 goto bail;
118
119 data = (data & 0x88) | (freeze ? 0x11 : 0x22);
120
121 pmac_low_i2c_setmode(pmac_tb_clock_chip_host, pmac_low_i2c_mode_stdsub);
122 rc = pmac_low_i2c_xfer(pmac_tb_clock_chip_host,
123 0xd4 | pmac_low_i2c_write,
124 0x2e, &data, 1);
125 bail:
126 if (rc != 0) {
127 printk(KERN_ERR "Pulsar Timebase %s rc: %d\n",
128 freeze ? "freeze" : "unfreeze", rc);
129 panic("Timebase freeze failed !\n");
130 }
131}
132
133
134static void smp_core99_give_timebase(void)
135{
136 /* Open i2c bus for synchronous access */
137 if (pmac_low_i2c_open(pmac_tb_clock_chip_host, 0))
138 panic("Can't open i2c for TB sync !\n");
139
140 spin_lock(&timebase_lock);
141 (*pmac_tb_freeze)(1);
142 mb();
143 timebase = get_tb();
144 spin_unlock(&timebase_lock);
145
146 while (timebase)
147 barrier();
148
149 spin_lock(&timebase_lock);
150 (*pmac_tb_freeze)(0);
151 spin_unlock(&timebase_lock);
152
153 /* Close i2c bus */
154 pmac_low_i2c_close(pmac_tb_clock_chip_host);
155}
156
157
158static void __devinit smp_core99_take_timebase(void)
159{
160 while (!timebase)
161 barrier();
162 spin_lock(&timebase_lock);
163 set_tb(timebase >> 32, timebase & 0xffffffff);
164 timebase = 0;
165 spin_unlock(&timebase_lock);
166}
167
168
169static int __init smp_core99_probe(void)
170{
171 struct device_node *cpus;
172 struct device_node *cc;
173 int ncpus = 0;
174
175 /* Maybe use systemconfiguration here ? */
176 if (ppc_md.progress) ppc_md.progress("smp_core99_probe", 0x345);
177
178 /* Count CPUs in the device-tree */
179 for (cpus = NULL; (cpus = of_find_node_by_type(cpus, "cpu")) != NULL;)
180 ++ncpus;
181
182 printk(KERN_INFO "PowerMac SMP probe found %d cpus\n", ncpus);
183
184 /* Nothing more to do if less than 2 of them */
185 if (ncpus <= 1)
186 return 1;
187
188 /* Look for the clock chip */
189 for (cc = NULL; (cc = of_find_node_by_name(cc, "i2c-hwclock")) != NULL;) {
190 struct device_node *p = of_get_parent(cc);
191 u32 *reg;
192 int ok;
193 ok = p && device_is_compatible(p, "uni-n-i2c");
194 if (!ok)
195 goto next;
196 reg = (u32 *)get_property(cc, "reg", NULL);
197 if (reg == NULL)
198 goto next;
199 switch (*reg) {
200 case 0xd2:
201 pmac_tb_freeze = smp_core99_cypress_tb_freeze;
202 printk(KERN_INFO "Timebase clock is Cypress chip\n");
203 break;
204 case 0xd4:
205 pmac_tb_freeze = smp_core99_pulsar_tb_freeze;
206 printk(KERN_INFO "Timebase clock is Pulsar chip\n");
207 break;
208 }
209 if (pmac_tb_freeze != NULL) {
210 pmac_tb_clock_chip_host = p;
211 smp_ops->give_timebase = smp_core99_give_timebase;
212 smp_ops->take_timebase = smp_core99_take_timebase;
213 break;
214 }
215 next:
216 of_node_put(p);
217 }
218
219 mpic_request_ipis();
220
221 return ncpus;
222}
223
224static void __init smp_core99_kick_cpu(int nr)
225{
226 int save_vector, j;
227 unsigned long new_vector;
228 unsigned long flags;
229 volatile unsigned int *vector
230 = ((volatile unsigned int *)(KERNELBASE+0x100));
231
232 if (nr < 1 || nr > 3)
233 return;
234 if (ppc_md.progress) ppc_md.progress("smp_core99_kick_cpu", 0x346);
235
236 local_irq_save(flags);
237 local_irq_disable();
238
239 /* Save reset vector */
240 save_vector = *vector;
241
242 /* Setup fake reset vector that does
243 * b .pmac_secondary_start - KERNELBASE
244 */
245 switch(nr) {
246 case 1:
247 new_vector = (unsigned long)pmac_secondary_start_1;
248 break;
249 case 2:
250 new_vector = (unsigned long)pmac_secondary_start_2;
251 break;
252 case 3:
253 default:
254 new_vector = (unsigned long)pmac_secondary_start_3;
255 break;
256 }
257 *vector = 0x48000002 + (new_vector - KERNELBASE);
258
259 /* flush data cache and inval instruction cache */
260 flush_icache_range((unsigned long) vector, (unsigned long) vector + 4);
261
262 /* Put some life in our friend */
263 pmac_call_feature(PMAC_FTR_RESET_CPU, NULL, nr, 0);
264 paca[nr].cpu_start = 1;
265
266 /* FIXME: We wait a bit for the CPU to take the exception, I should
267 * instead wait for the entry code to set something for me. Well,
268 * ideally, all that crap will be done in prom.c and the CPU left
269 * in a RAM-based wait loop like CHRP.
270 */
271 for (j = 1; j < 1000000; j++)
272 mb();
273
274 /* Restore our exception vector */
275 *vector = save_vector;
276 flush_icache_range((unsigned long) vector, (unsigned long) vector + 4);
277
278 local_irq_restore(flags);
279 if (ppc_md.progress) ppc_md.progress("smp_core99_kick_cpu done", 0x347);
280}
281
282static void __init smp_core99_setup_cpu(int cpu_nr)
283{
284 /* Setup MPIC */
285 mpic_setup_this_cpu();
286
287 if (cpu_nr == 0) {
288 extern void g5_phy_disable_cpu1(void);
289
290 /* If we didn't start the second CPU, we must take
291 * it off the bus
292 */
293 if (num_online_cpus() < 2)
294 g5_phy_disable_cpu1();
295 if (ppc_md.progress) ppc_md.progress("smp_core99_setup_cpu 0 done", 0x349);
296 }
297}
298
299struct smp_ops_t core99_smp_ops __pmacdata = {
300 .message_pass = smp_mpic_message_pass,
301 .probe = smp_core99_probe,
302 .kick_cpu = smp_core99_kick_cpu,
303 .setup_cpu = smp_core99_setup_cpu,
304 .give_timebase = smp_generic_give_timebase,
305 .take_timebase = smp_generic_take_timebase,
306};
307
308void __init pmac_setup_smp(void)
309{
310 smp_ops = &core99_smp_ops;
311#ifdef CONFIG_HOTPLUG_CPU
312 smp_ops->cpu_enable = generic_cpu_enable;
313 smp_ops->cpu_disable = generic_cpu_disable;
314 smp_ops->cpu_die = generic_cpu_die;
315#endif
316}
diff --git a/arch/ppc64/kernel/pmac_time.c b/arch/ppc64/kernel/pmac_time.c
new file mode 100644
index 000000000000..f24827581dd7
--- /dev/null
+++ b/arch/ppc64/kernel/pmac_time.c
@@ -0,0 +1,201 @@
1/*
2 * Support for periodic interrupts (100 per second) and for getting
3 * the current time from the RTC on Power Macintoshes.
4 *
5 * We use the decrementer register for our periodic interrupts.
6 *
7 * Paul Mackerras August 1996.
8 * Copyright (C) 1996 Paul Mackerras.
9 * Copyright (C) 2003-2005 Benjamin Herrenschmidt.
10 *
11 */
12#include <linux/config.h>
13#include <linux/errno.h>
14#include <linux/sched.h>
15#include <linux/kernel.h>
16#include <linux/param.h>
17#include <linux/string.h>
18#include <linux/mm.h>
19#include <linux/init.h>
20#include <linux/time.h>
21#include <linux/adb.h>
22#include <linux/pmu.h>
23#include <linux/interrupt.h>
24
25#include <asm/sections.h>
26#include <asm/prom.h>
27#include <asm/system.h>
28#include <asm/io.h>
29#include <asm/pgtable.h>
30#include <asm/machdep.h>
31#include <asm/time.h>
32#include <asm/nvram.h>
33#include <asm/smu.h>
34
35#undef DEBUG
36
37#ifdef DEBUG
38#define DBG(x...) printk(x)
39#else
40#define DBG(x...)
41#endif
42
43extern void setup_default_decr(void);
44
45extern unsigned long ppc_tb_freq;
46extern unsigned long ppc_proc_freq;
47
48/* Apparently the RTC stores seconds since 1 Jan 1904 */
49#define RTC_OFFSET 2082844800
50
51/*
52 * Calibrate the decrementer frequency with the VIA timer 1.
53 */
54#define VIA_TIMER_FREQ_6 4700000 /* time 1 frequency * 6 */
55
56extern struct timezone sys_tz;
57extern void to_tm(int tim, struct rtc_time * tm);
58
59void __pmac pmac_get_rtc_time(struct rtc_time *tm)
60{
61 switch(sys_ctrler) {
62#ifdef CONFIG_ADB_PMU
63 case SYS_CTRLER_PMU: {
64 /* TODO: Move that to a function in the PMU driver */
65 struct adb_request req;
66 unsigned int now;
67
68 if (pmu_request(&req, NULL, 1, PMU_READ_RTC) < 0)
69 return;
70 pmu_wait_complete(&req);
71 if (req.reply_len != 4)
72 printk(KERN_ERR "pmac_get_rtc_time: PMU returned a %d"
73 " bytes reply\n", req.reply_len);
74 now = (req.reply[0] << 24) + (req.reply[1] << 16)
75 + (req.reply[2] << 8) + req.reply[3];
76 DBG("get: %u -> %u\n", (int)now, (int)(now - RTC_OFFSET));
77 now -= RTC_OFFSET;
78
79 to_tm(now, tm);
80 tm->tm_year -= 1900;
81 tm->tm_mon -= 1;
82
83 DBG("-> tm_mday: %d, tm_mon: %d, tm_year: %d, %d:%02d:%02d\n",
84 tm->tm_mday, tm->tm_mon, tm->tm_year,
85 tm->tm_hour, tm->tm_min, tm->tm_sec);
86 break;
87 }
88#endif /* CONFIG_ADB_PMU */
89
90#ifdef CONFIG_PMAC_SMU
91 case SYS_CTRLER_SMU:
92 smu_get_rtc_time(tm);
93 break;
94#endif /* CONFIG_PMAC_SMU */
95 default:
96 ;
97 }
98}
99
100int __pmac pmac_set_rtc_time(struct rtc_time *tm)
101{
102 switch(sys_ctrler) {
103#ifdef CONFIG_ADB_PMU
104 case SYS_CTRLER_PMU: {
105 /* TODO: Move that to a function in the PMU driver */
106 struct adb_request req;
107 unsigned int nowtime;
108
109 DBG("set: tm_mday: %d, tm_mon: %d, tm_year: %d,"
110 " %d:%02d:%02d\n",
111 tm->tm_mday, tm->tm_mon, tm->tm_year,
112 tm->tm_hour, tm->tm_min, tm->tm_sec);
113
114 nowtime = mktime(tm->tm_year + 1900, tm->tm_mon + 1,
115 tm->tm_mday, tm->tm_hour, tm->tm_min,
116 tm->tm_sec);
117
118 DBG("-> %u -> %u\n", (int)nowtime,
119 (int)(nowtime + RTC_OFFSET));
120 nowtime += RTC_OFFSET;
121
122 if (pmu_request(&req, NULL, 5, PMU_SET_RTC,
123 nowtime >> 24, nowtime >> 16,
124 nowtime >> 8, nowtime) < 0)
125 return -ENXIO;
126 pmu_wait_complete(&req);
127 if (req.reply_len != 0)
128 printk(KERN_ERR "pmac_set_rtc_time: PMU returned a %d"
129 " bytes reply\n", req.reply_len);
130 return 0;
131 }
132#endif /* CONFIG_ADB_PMU */
133
134#ifdef CONFIG_PMAC_SMU
135 case SYS_CTRLER_SMU:
136 return smu_set_rtc_time(tm);
137#endif /* CONFIG_PMAC_SMU */
138 default:
139 return -ENODEV;
140 }
141}
142
143void __init pmac_get_boot_time(struct rtc_time *tm)
144{
145 pmac_get_rtc_time(tm);
146
147#ifdef disabled__CONFIG_NVRAM
148 s32 delta = 0;
149 int dst;
150
151 delta = ((s32)pmac_xpram_read(PMAC_XPRAM_MACHINE_LOC + 0x9)) << 16;
152 delta |= ((s32)pmac_xpram_read(PMAC_XPRAM_MACHINE_LOC + 0xa)) << 8;
153 delta |= pmac_xpram_read(PMAC_XPRAM_MACHINE_LOC + 0xb);
154 if (delta & 0x00800000UL)
155 delta |= 0xFF000000UL;
156 dst = ((pmac_xpram_read(PMAC_XPRAM_MACHINE_LOC + 0x8) & 0x80) != 0);
157 printk("GMT Delta read from XPRAM: %d minutes, DST: %s\n", delta/60,
158 dst ? "on" : "off");
159#endif
160}
161
162/*
163 * Query the OF and get the decr frequency.
164 * This was taken from the pmac time_init() when merging the prep/pmac
165 * time functions.
166 */
167void __init pmac_calibrate_decr(void)
168{
169 struct device_node *cpu;
170 unsigned int freq, *fp;
171 struct div_result divres;
172
173 /*
174 * The cpu node should have a timebase-frequency property
175 * to tell us the rate at which the decrementer counts.
176 */
177 cpu = find_type_devices("cpu");
178 if (cpu == 0)
179 panic("can't find cpu node in time_init");
180 fp = (unsigned int *) get_property(cpu, "timebase-frequency", NULL);
181 if (fp == 0)
182 panic("can't get cpu timebase frequency");
183 freq = *fp;
184 printk("time_init: decrementer frequency = %u.%.6u MHz\n",
185 freq/1000000, freq%1000000);
186 tb_ticks_per_jiffy = freq / HZ;
187 tb_ticks_per_sec = tb_ticks_per_jiffy * HZ;
188 tb_ticks_per_usec = freq / 1000000;
189 tb_to_us = mulhwu_scale_factor(freq, 1000000);
190 div128_by_32( 1024*1024, 0, tb_ticks_per_sec, &divres );
191 tb_to_xs = divres.result_low;
192 ppc_tb_freq = freq;
193
194 fp = (unsigned int *)get_property(cpu, "clock-frequency", NULL);
195 if (fp == 0)
196 panic("can't get cpu processor frequency");
197 ppc_proc_freq = *fp;
198
199 setup_default_decr();
200}
201
diff --git a/arch/ppc64/kernel/pmc.c b/arch/ppc64/kernel/pmc.c
new file mode 100644
index 000000000000..67be773f9c00
--- /dev/null
+++ b/arch/ppc64/kernel/pmc.c
@@ -0,0 +1,67 @@
1/*
2 * linux/arch/ppc64/kernel/pmc.c
3 *
4 * Copyright (C) 2004 David Gibson, IBM Corporation.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#include <linux/config.h>
13#include <linux/errno.h>
14#include <linux/spinlock.h>
15#include <linux/module.h>
16
17#include <asm/processor.h>
18#include <asm/pmc.h>
19
20/* Ensure exceptions are disabled */
21static void dummy_perf(struct pt_regs *regs)
22{
23 unsigned int mmcr0 = mfspr(SPRN_MMCR0);
24
25 mmcr0 &= ~(MMCR0_PMXE|MMCR0_PMAO);
26 mtspr(SPRN_MMCR0, mmcr0);
27}
28
29static spinlock_t pmc_owner_lock = SPIN_LOCK_UNLOCKED;
30static void *pmc_owner_caller; /* mostly for debugging */
31perf_irq_t perf_irq = dummy_perf;
32
33int reserve_pmc_hardware(perf_irq_t new_perf_irq)
34{
35 int err = 0;
36
37 spin_lock(&pmc_owner_lock);
38
39 if (pmc_owner_caller) {
40 printk(KERN_WARNING "reserve_pmc_hardware: "
41 "PMC hardware busy (reserved by caller %p)\n",
42 pmc_owner_caller);
43 err = -EBUSY;
44 goto out;
45 }
46
47 pmc_owner_caller = __builtin_return_address(0);
48 perf_irq = new_perf_irq ? : dummy_perf;
49
50 out:
51 spin_unlock(&pmc_owner_lock);
52 return err;
53}
54EXPORT_SYMBOL_GPL(reserve_pmc_hardware);
55
56void release_pmc_hardware(void)
57{
58 spin_lock(&pmc_owner_lock);
59
60 WARN_ON(! pmc_owner_caller);
61
62 pmc_owner_caller = NULL;
63 perf_irq = dummy_perf;
64
65 spin_unlock(&pmc_owner_lock);
66}
67EXPORT_SYMBOL_GPL(release_pmc_hardware);
diff --git a/arch/ppc64/kernel/ppc_ksyms.c b/arch/ppc64/kernel/ppc_ksyms.c
new file mode 100644
index 000000000000..6ced63a3439f
--- /dev/null
+++ b/arch/ppc64/kernel/ppc_ksyms.c
@@ -0,0 +1,95 @@
1/*
2 * c 2001 PPC 64 Team, IBM Corp
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 */
9#include <linux/config.h>
10#include <linux/module.h>
11#include <linux/string.h>
12#include <linux/console.h>
13#include <net/checksum.h>
14
15#include <asm/processor.h>
16#include <asm/uaccess.h>
17#include <asm/io.h>
18#include <asm/system.h>
19#include <asm/hw_irq.h>
20#include <asm/abs_addr.h>
21#include <asm/cacheflush.h>
22#include <asm/iSeries/HvCallSc.h>
23
24EXPORT_SYMBOL(strcpy);
25EXPORT_SYMBOL(strncpy);
26EXPORT_SYMBOL(strcat);
27EXPORT_SYMBOL(strncat);
28EXPORT_SYMBOL(strchr);
29EXPORT_SYMBOL(strrchr);
30EXPORT_SYMBOL(strpbrk);
31EXPORT_SYMBOL(strstr);
32EXPORT_SYMBOL(strlen);
33EXPORT_SYMBOL(strnlen);
34EXPORT_SYMBOL(strcmp);
35EXPORT_SYMBOL(strncmp);
36
37EXPORT_SYMBOL(csum_partial);
38EXPORT_SYMBOL(csum_partial_copy_generic);
39EXPORT_SYMBOL(ip_fast_csum);
40EXPORT_SYMBOL(csum_tcpudp_magic);
41
42EXPORT_SYMBOL(__copy_tofrom_user);
43EXPORT_SYMBOL(__clear_user);
44EXPORT_SYMBOL(__strncpy_from_user);
45EXPORT_SYMBOL(__strnlen_user);
46
47EXPORT_SYMBOL(reloc_offset);
48
49#ifdef CONFIG_PPC_ISERIES
50EXPORT_SYMBOL(HvCall0);
51EXPORT_SYMBOL(HvCall1);
52EXPORT_SYMBOL(HvCall2);
53EXPORT_SYMBOL(HvCall3);
54EXPORT_SYMBOL(HvCall4);
55EXPORT_SYMBOL(HvCall5);
56EXPORT_SYMBOL(HvCall6);
57EXPORT_SYMBOL(HvCall7);
58#endif
59
60EXPORT_SYMBOL(_insb);
61EXPORT_SYMBOL(_outsb);
62EXPORT_SYMBOL(_insw);
63EXPORT_SYMBOL(_outsw);
64EXPORT_SYMBOL(_insl);
65EXPORT_SYMBOL(_outsl);
66EXPORT_SYMBOL(_insw_ns);
67EXPORT_SYMBOL(_outsw_ns);
68EXPORT_SYMBOL(_insl_ns);
69EXPORT_SYMBOL(_outsl_ns);
70
71EXPORT_SYMBOL(kernel_thread);
72
73EXPORT_SYMBOL(giveup_fpu);
74#ifdef CONFIG_ALTIVEC
75EXPORT_SYMBOL(giveup_altivec);
76#endif
77EXPORT_SYMBOL(flush_icache_range);
78
79#ifdef CONFIG_SMP
80#ifdef CONFIG_PPC_ISERIES
81EXPORT_SYMBOL(local_get_flags);
82EXPORT_SYMBOL(local_irq_disable);
83EXPORT_SYMBOL(local_irq_restore);
84#endif
85#endif
86
87EXPORT_SYMBOL(memcpy);
88EXPORT_SYMBOL(memset);
89EXPORT_SYMBOL(memmove);
90EXPORT_SYMBOL(memscan);
91EXPORT_SYMBOL(memcmp);
92EXPORT_SYMBOL(memchr);
93
94EXPORT_SYMBOL(timer_interrupt);
95EXPORT_SYMBOL(console_drivers);
diff --git a/arch/ppc64/kernel/proc_ppc64.c b/arch/ppc64/kernel/proc_ppc64.c
new file mode 100644
index 000000000000..0914b0669b05
--- /dev/null
+++ b/arch/ppc64/kernel/proc_ppc64.c
@@ -0,0 +1,128 @@
1/*
2 * arch/ppc64/kernel/proc_ppc64.c
3 *
4 * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen IBM Corporation
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 */
20
21#include <linux/config.h>
22#include <linux/init.h>
23#include <linux/mm.h>
24#include <linux/proc_fs.h>
25#include <linux/slab.h>
26#include <linux/kernel.h>
27
28#include <asm/systemcfg.h>
29#include <asm/rtas.h>
30#include <asm/uaccess.h>
31#include <asm/prom.h>
32
33static loff_t page_map_seek( struct file *file, loff_t off, int whence);
34static ssize_t page_map_read( struct file *file, char __user *buf, size_t nbytes,
35 loff_t *ppos);
36static int page_map_mmap( struct file *file, struct vm_area_struct *vma );
37
38static struct file_operations page_map_fops = {
39 .llseek = page_map_seek,
40 .read = page_map_read,
41 .mmap = page_map_mmap
42};
43
44/*
45 * Create the ppc64 and ppc64/rtas directories early. This allows us to
46 * assume that they have been previously created in drivers.
47 */
48static int __init proc_ppc64_create(void)
49{
50 struct proc_dir_entry *root;
51
52 root = proc_mkdir("ppc64", NULL);
53 if (!root)
54 return 1;
55
56 if (!(systemcfg->platform & PLATFORM_PSERIES))
57 return 0;
58
59 if (!proc_mkdir("rtas", root))
60 return 1;
61
62 if (!proc_symlink("rtas", NULL, "ppc64/rtas"))
63 return 1;
64
65 return 0;
66}
67core_initcall(proc_ppc64_create);
68
69static int __init proc_ppc64_init(void)
70{
71 struct proc_dir_entry *pde;
72
73 pde = create_proc_entry("ppc64/systemcfg", S_IFREG|S_IRUGO, NULL);
74 if (!pde)
75 return 1;
76 pde->nlink = 1;
77 pde->data = systemcfg;
78 pde->size = PAGE_SIZE;
79 pde->proc_fops = &page_map_fops;
80
81 return 0;
82}
83__initcall(proc_ppc64_init);
84
85static loff_t page_map_seek( struct file *file, loff_t off, int whence)
86{
87 loff_t new;
88 struct proc_dir_entry *dp = PDE(file->f_dentry->d_inode);
89
90 switch(whence) {
91 case 0:
92 new = off;
93 break;
94 case 1:
95 new = file->f_pos + off;
96 break;
97 case 2:
98 new = dp->size + off;
99 break;
100 default:
101 return -EINVAL;
102 }
103 if ( new < 0 || new > dp->size )
104 return -EINVAL;
105 return (file->f_pos = new);
106}
107
108static ssize_t page_map_read( struct file *file, char __user *buf, size_t nbytes,
109 loff_t *ppos)
110{
111 struct proc_dir_entry *dp = PDE(file->f_dentry->d_inode);
112 return simple_read_from_buffer(buf, nbytes, ppos, dp->data, dp->size);
113}
114
115static int page_map_mmap( struct file *file, struct vm_area_struct *vma )
116{
117 struct proc_dir_entry *dp = PDE(file->f_dentry->d_inode);
118
119 vma->vm_flags |= VM_SHM | VM_LOCKED;
120
121 if ((vma->vm_end - vma->vm_start) > dp->size)
122 return -EINVAL;
123
124 remap_pfn_range(vma, vma->vm_start, __pa(dp->data) >> PAGE_SHIFT,
125 dp->size, vma->vm_page_prot);
126 return 0;
127}
128
diff --git a/arch/ppc64/kernel/process.c b/arch/ppc64/kernel/process.c
new file mode 100644
index 000000000000..8b0686122738
--- /dev/null
+++ b/arch/ppc64/kernel/process.c
@@ -0,0 +1,688 @@
1/*
2 * linux/arch/ppc64/kernel/process.c
3 *
4 * Derived from "arch/i386/kernel/process.c"
5 * Copyright (C) 1995 Linus Torvalds
6 *
7 * Updated and modified by Cort Dougan (cort@cs.nmt.edu) and
8 * Paul Mackerras (paulus@cs.anu.edu.au)
9 *
10 * PowerPC version
11 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
12 *
13 * This program is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU General Public License
15 * as published by the Free Software Foundation; either version
16 * 2 of the License, or (at your option) any later version.
17 */
18
19#include <linux/config.h>
20#include <linux/module.h>
21#include <linux/errno.h>
22#include <linux/sched.h>
23#include <linux/kernel.h>
24#include <linux/mm.h>
25#include <linux/smp.h>
26#include <linux/smp_lock.h>
27#include <linux/stddef.h>
28#include <linux/unistd.h>
29#include <linux/slab.h>
30#include <linux/user.h>
31#include <linux/elf.h>
32#include <linux/init.h>
33#include <linux/init_task.h>
34#include <linux/prctl.h>
35#include <linux/ptrace.h>
36#include <linux/kallsyms.h>
37#include <linux/interrupt.h>
38#include <linux/utsname.h>
39
40#include <asm/pgtable.h>
41#include <asm/uaccess.h>
42#include <asm/system.h>
43#include <asm/io.h>
44#include <asm/processor.h>
45#include <asm/mmu.h>
46#include <asm/mmu_context.h>
47#include <asm/prom.h>
48#include <asm/ppcdebug.h>
49#include <asm/machdep.h>
50#include <asm/iSeries/HvCallHpt.h>
51#include <asm/cputable.h>
52#include <asm/sections.h>
53#include <asm/tlbflush.h>
54#include <asm/time.h>
55
56#ifndef CONFIG_SMP
57struct task_struct *last_task_used_math = NULL;
58struct task_struct *last_task_used_altivec = NULL;
59#endif
60
61struct mm_struct ioremap_mm = {
62 .pgd = ioremap_dir,
63 .mm_users = ATOMIC_INIT(2),
64 .mm_count = ATOMIC_INIT(1),
65 .cpu_vm_mask = CPU_MASK_ALL,
66 .page_table_lock = SPIN_LOCK_UNLOCKED,
67};
68
69/*
70 * Make sure the floating-point register state in the
71 * the thread_struct is up to date for task tsk.
72 */
73void flush_fp_to_thread(struct task_struct *tsk)
74{
75 if (tsk->thread.regs) {
76 /*
77 * We need to disable preemption here because if we didn't,
78 * another process could get scheduled after the regs->msr
79 * test but before we have finished saving the FP registers
80 * to the thread_struct. That process could take over the
81 * FPU, and then when we get scheduled again we would store
82 * bogus values for the remaining FP registers.
83 */
84 preempt_disable();
85 if (tsk->thread.regs->msr & MSR_FP) {
86#ifdef CONFIG_SMP
87 /*
88 * This should only ever be called for current or
89 * for a stopped child process. Since we save away
90 * the FP register state on context switch on SMP,
91 * there is something wrong if a stopped child appears
92 * to still have its FP state in the CPU registers.
93 */
94 BUG_ON(tsk != current);
95#endif
96 giveup_fpu(current);
97 }
98 preempt_enable();
99 }
100}
101
102void enable_kernel_fp(void)
103{
104 WARN_ON(preemptible());
105
106#ifdef CONFIG_SMP
107 if (current->thread.regs && (current->thread.regs->msr & MSR_FP))
108 giveup_fpu(current);
109 else
110 giveup_fpu(NULL); /* just enables FP for kernel */
111#else
112 giveup_fpu(last_task_used_math);
113#endif /* CONFIG_SMP */
114}
115EXPORT_SYMBOL(enable_kernel_fp);
116
117int dump_task_fpu(struct task_struct *tsk, elf_fpregset_t *fpregs)
118{
119 if (!tsk->thread.regs)
120 return 0;
121 flush_fp_to_thread(current);
122
123 memcpy(fpregs, &tsk->thread.fpr[0], sizeof(*fpregs));
124
125 return 1;
126}
127
128#ifdef CONFIG_ALTIVEC
129
130void enable_kernel_altivec(void)
131{
132 WARN_ON(preemptible());
133
134#ifdef CONFIG_SMP
135 if (current->thread.regs && (current->thread.regs->msr & MSR_VEC))
136 giveup_altivec(current);
137 else
138 giveup_altivec(NULL); /* just enables FP for kernel */
139#else
140 giveup_altivec(last_task_used_altivec);
141#endif /* CONFIG_SMP */
142}
143EXPORT_SYMBOL(enable_kernel_altivec);
144
145/*
146 * Make sure the VMX/Altivec register state in the
147 * the thread_struct is up to date for task tsk.
148 */
149void flush_altivec_to_thread(struct task_struct *tsk)
150{
151 if (tsk->thread.regs) {
152 preempt_disable();
153 if (tsk->thread.regs->msr & MSR_VEC) {
154#ifdef CONFIG_SMP
155 BUG_ON(tsk != current);
156#endif
157 giveup_altivec(current);
158 }
159 preempt_enable();
160 }
161}
162
163int dump_task_altivec(struct pt_regs *regs, elf_vrregset_t *vrregs)
164{
165 flush_altivec_to_thread(current);
166 memcpy(vrregs, &current->thread.vr[0], sizeof(*vrregs));
167 return 1;
168}
169
170#endif /* CONFIG_ALTIVEC */
171
172DEFINE_PER_CPU(struct cpu_usage, cpu_usage_array);
173
174struct task_struct *__switch_to(struct task_struct *prev,
175 struct task_struct *new)
176{
177 struct thread_struct *new_thread, *old_thread;
178 unsigned long flags;
179 struct task_struct *last;
180
181#ifdef CONFIG_SMP
182 /* avoid complexity of lazy save/restore of fpu
183 * by just saving it every time we switch out if
184 * this task used the fpu during the last quantum.
185 *
186 * If it tries to use the fpu again, it'll trap and
187 * reload its fp regs. So we don't have to do a restore
188 * every switch, just a save.
189 * -- Cort
190 */
191 if (prev->thread.regs && (prev->thread.regs->msr & MSR_FP))
192 giveup_fpu(prev);
193#ifdef CONFIG_ALTIVEC
194 if (prev->thread.regs && (prev->thread.regs->msr & MSR_VEC))
195 giveup_altivec(prev);
196#endif /* CONFIG_ALTIVEC */
197#endif /* CONFIG_SMP */
198
199#if defined(CONFIG_ALTIVEC) && !defined(CONFIG_SMP)
200 /* Avoid the trap. On smp this this never happens since
201 * we don't set last_task_used_altivec -- Cort
202 */
203 if (new->thread.regs && last_task_used_altivec == new)
204 new->thread.regs->msr |= MSR_VEC;
205#endif /* CONFIG_ALTIVEC */
206
207 flush_tlb_pending();
208
209 new_thread = &new->thread;
210 old_thread = &current->thread;
211
212/* Collect purr utilization data per process and per processor wise */
213/* purr is nothing but processor time base */
214
215#if defined(CONFIG_PPC_PSERIES)
216 if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) {
217 struct cpu_usage *cu = &__get_cpu_var(cpu_usage_array);
218 long unsigned start_tb, current_tb;
219 start_tb = old_thread->start_tb;
220 cu->current_tb = current_tb = mfspr(SPRN_PURR);
221 old_thread->accum_tb += (current_tb - start_tb);
222 new_thread->start_tb = current_tb;
223 }
224#endif
225
226
227 local_irq_save(flags);
228 last = _switch(old_thread, new_thread);
229
230 local_irq_restore(flags);
231
232 return last;
233}
234
235static int instructions_to_print = 16;
236
237static void show_instructions(struct pt_regs *regs)
238{
239 int i;
240 unsigned long pc = regs->nip - (instructions_to_print * 3 / 4 *
241 sizeof(int));
242
243 printk("Instruction dump:");
244
245 for (i = 0; i < instructions_to_print; i++) {
246 int instr;
247
248 if (!(i % 8))
249 printk("\n");
250
251 if (((REGION_ID(pc) != KERNEL_REGION_ID) &&
252 (REGION_ID(pc) != VMALLOC_REGION_ID)) ||
253 __get_user(instr, (unsigned int *)pc)) {
254 printk("XXXXXXXX ");
255 } else {
256 if (regs->nip == pc)
257 printk("<%08x> ", instr);
258 else
259 printk("%08x ", instr);
260 }
261
262 pc += sizeof(int);
263 }
264
265 printk("\n");
266}
267
268void show_regs(struct pt_regs * regs)
269{
270 int i;
271 unsigned long trap;
272
273 printk("NIP: %016lX XER: %08X LR: %016lX CTR: %016lX\n",
274 regs->nip, (unsigned int)regs->xer, regs->link, regs->ctr);
275 printk("REGS: %p TRAP: %04lx %s (%s)\n",
276 regs, regs->trap, print_tainted(), system_utsname.release);
277 printk("MSR: %016lx EE: %01x PR: %01x FP: %01x ME: %01x "
278 "IR/DR: %01x%01x CR: %08X\n",
279 regs->msr, regs->msr&MSR_EE ? 1 : 0, regs->msr&MSR_PR ? 1 : 0,
280 regs->msr & MSR_FP ? 1 : 0,regs->msr&MSR_ME ? 1 : 0,
281 regs->msr&MSR_IR ? 1 : 0,
282 regs->msr&MSR_DR ? 1 : 0,
283 (unsigned int)regs->ccr);
284 trap = TRAP(regs);
285 printk("DAR: %016lx DSISR: %016lx\n", regs->dar, regs->dsisr);
286 printk("TASK: %p[%d] '%s' THREAD: %p",
287 current, current->pid, current->comm, current->thread_info);
288
289#ifdef CONFIG_SMP
290 printk(" CPU: %d", smp_processor_id());
291#endif /* CONFIG_SMP */
292
293 for (i = 0; i < 32; i++) {
294 if ((i % 4) == 0) {
295 printk("\n" KERN_INFO "GPR%02d: ", i);
296 }
297
298 printk("%016lX ", regs->gpr[i]);
299 if (i == 13 && !FULL_REGS(regs))
300 break;
301 }
302 printk("\n");
303 /*
304 * Lookup NIP late so we have the best change of getting the
305 * above info out without failing
306 */
307 printk("NIP [%016lx] ", regs->nip);
308 print_symbol("%s\n", regs->nip);
309 printk("LR [%016lx] ", regs->link);
310 print_symbol("%s\n", regs->link);
311 show_stack(current, (unsigned long *)regs->gpr[1]);
312 if (!user_mode(regs))
313 show_instructions(regs);
314}
315
316void exit_thread(void)
317{
318#ifndef CONFIG_SMP
319 if (last_task_used_math == current)
320 last_task_used_math = NULL;
321#ifdef CONFIG_ALTIVEC
322 if (last_task_used_altivec == current)
323 last_task_used_altivec = NULL;
324#endif /* CONFIG_ALTIVEC */
325#endif /* CONFIG_SMP */
326}
327
328void flush_thread(void)
329{
330 struct thread_info *t = current_thread_info();
331
332 if (t->flags & _TIF_ABI_PENDING)
333 t->flags ^= (_TIF_ABI_PENDING | _TIF_32BIT);
334
335#ifndef CONFIG_SMP
336 if (last_task_used_math == current)
337 last_task_used_math = NULL;
338#ifdef CONFIG_ALTIVEC
339 if (last_task_used_altivec == current)
340 last_task_used_altivec = NULL;
341#endif /* CONFIG_ALTIVEC */
342#endif /* CONFIG_SMP */
343}
344
345void
346release_thread(struct task_struct *t)
347{
348}
349
350
351/*
352 * This gets called before we allocate a new thread and copy
353 * the current task into it.
354 */
355void prepare_to_copy(struct task_struct *tsk)
356{
357 flush_fp_to_thread(current);
358 flush_altivec_to_thread(current);
359}
360
361/*
362 * Copy a thread..
363 */
364int
365copy_thread(int nr, unsigned long clone_flags, unsigned long usp,
366 unsigned long unused, struct task_struct *p, struct pt_regs *regs)
367{
368 struct pt_regs *childregs, *kregs;
369 extern void ret_from_fork(void);
370 unsigned long sp = (unsigned long)p->thread_info + THREAD_SIZE;
371
372 /* Copy registers */
373 sp -= sizeof(struct pt_regs);
374 childregs = (struct pt_regs *) sp;
375 *childregs = *regs;
376 if ((childregs->msr & MSR_PR) == 0) {
377 /* for kernel thread, set stackptr in new task */
378 childregs->gpr[1] = sp + sizeof(struct pt_regs);
379 p->thread.regs = NULL; /* no user register state */
380 clear_ti_thread_flag(p->thread_info, TIF_32BIT);
381#ifdef CONFIG_PPC_ISERIES
382 set_ti_thread_flag(p->thread_info, TIF_RUN_LIGHT);
383#endif
384 } else {
385 childregs->gpr[1] = usp;
386 p->thread.regs = childregs;
387 if (clone_flags & CLONE_SETTLS) {
388 if (test_thread_flag(TIF_32BIT))
389 childregs->gpr[2] = childregs->gpr[6];
390 else
391 childregs->gpr[13] = childregs->gpr[6];
392 }
393 }
394 childregs->gpr[3] = 0; /* Result from fork() */
395 sp -= STACK_FRAME_OVERHEAD;
396
397 /*
398 * The way this works is that at some point in the future
399 * some task will call _switch to switch to the new task.
400 * That will pop off the stack frame created below and start
401 * the new task running at ret_from_fork. The new task will
402 * do some house keeping and then return from the fork or clone
403 * system call, using the stack frame created above.
404 */
405 sp -= sizeof(struct pt_regs);
406 kregs = (struct pt_regs *) sp;
407 sp -= STACK_FRAME_OVERHEAD;
408 p->thread.ksp = sp;
409 if (cpu_has_feature(CPU_FTR_SLB)) {
410 unsigned long sp_vsid = get_kernel_vsid(sp);
411
412 sp_vsid <<= SLB_VSID_SHIFT;
413 sp_vsid |= SLB_VSID_KERNEL;
414 if (cpu_has_feature(CPU_FTR_16M_PAGE))
415 sp_vsid |= SLB_VSID_L;
416
417 p->thread.ksp_vsid = sp_vsid;
418 }
419
420 /*
421 * The PPC64 ABI makes use of a TOC to contain function
422 * pointers. The function (ret_from_except) is actually a pointer
423 * to the TOC entry. The first entry is a pointer to the actual
424 * function.
425 */
426 kregs->nip = *((unsigned long *)ret_from_fork);
427
428 return 0;
429}
430
431/*
432 * Set up a thread for executing a new program
433 */
434void start_thread(struct pt_regs *regs, unsigned long fdptr, unsigned long sp)
435{
436 unsigned long entry, toc, load_addr = regs->gpr[2];
437
438 /* fdptr is a relocated pointer to the function descriptor for
439 * the elf _start routine. The first entry in the function
440 * descriptor is the entry address of _start and the second
441 * entry is the TOC value we need to use.
442 */
443 set_fs(USER_DS);
444 __get_user(entry, (unsigned long __user *)fdptr);
445 __get_user(toc, (unsigned long __user *)fdptr+1);
446
447 /* Check whether the e_entry function descriptor entries
448 * need to be relocated before we can use them.
449 */
450 if (load_addr != 0) {
451 entry += load_addr;
452 toc += load_addr;
453 }
454
455 /*
456 * If we exec out of a kernel thread then thread.regs will not be
457 * set. Do it now.
458 */
459 if (!current->thread.regs) {
460 unsigned long childregs = (unsigned long)current->thread_info +
461 THREAD_SIZE;
462 childregs -= sizeof(struct pt_regs);
463 current->thread.regs = (struct pt_regs *)childregs;
464 }
465
466 regs->nip = entry;
467 regs->gpr[1] = sp;
468 regs->gpr[2] = toc;
469 regs->msr = MSR_USER64;
470#ifndef CONFIG_SMP
471 if (last_task_used_math == current)
472 last_task_used_math = 0;
473#endif /* CONFIG_SMP */
474 memset(current->thread.fpr, 0, sizeof(current->thread.fpr));
475 current->thread.fpscr = 0;
476#ifdef CONFIG_ALTIVEC
477#ifndef CONFIG_SMP
478 if (last_task_used_altivec == current)
479 last_task_used_altivec = 0;
480#endif /* CONFIG_SMP */
481 memset(current->thread.vr, 0, sizeof(current->thread.vr));
482 current->thread.vscr.u[0] = 0;
483 current->thread.vscr.u[1] = 0;
484 current->thread.vscr.u[2] = 0;
485 current->thread.vscr.u[3] = 0x00010000; /* Java mode disabled */
486 current->thread.vrsave = 0;
487 current->thread.used_vr = 0;
488#endif /* CONFIG_ALTIVEC */
489}
490EXPORT_SYMBOL(start_thread);
491
492int set_fpexc_mode(struct task_struct *tsk, unsigned int val)
493{
494 struct pt_regs *regs = tsk->thread.regs;
495
496 if (val > PR_FP_EXC_PRECISE)
497 return -EINVAL;
498 tsk->thread.fpexc_mode = __pack_fe01(val);
499 if (regs != NULL && (regs->msr & MSR_FP) != 0)
500 regs->msr = (regs->msr & ~(MSR_FE0|MSR_FE1))
501 | tsk->thread.fpexc_mode;
502 return 0;
503}
504
505int get_fpexc_mode(struct task_struct *tsk, unsigned long adr)
506{
507 unsigned int val;
508
509 val = __unpack_fe01(tsk->thread.fpexc_mode);
510 return put_user(val, (unsigned int __user *) adr);
511}
512
513int sys_clone(unsigned long clone_flags, unsigned long p2, unsigned long p3,
514 unsigned long p4, unsigned long p5, unsigned long p6,
515 struct pt_regs *regs)
516{
517 unsigned long parent_tidptr = 0;
518 unsigned long child_tidptr = 0;
519
520 if (p2 == 0)
521 p2 = regs->gpr[1]; /* stack pointer for child */
522
523 if (clone_flags & (CLONE_PARENT_SETTID | CLONE_CHILD_SETTID |
524 CLONE_CHILD_CLEARTID)) {
525 parent_tidptr = p3;
526 child_tidptr = p5;
527 if (test_thread_flag(TIF_32BIT)) {
528 parent_tidptr &= 0xffffffff;
529 child_tidptr &= 0xffffffff;
530 }
531 }
532
533 return do_fork(clone_flags, p2, regs, 0,
534 (int __user *)parent_tidptr, (int __user *)child_tidptr);
535}
536
537int sys_fork(unsigned long p1, unsigned long p2, unsigned long p3,
538 unsigned long p4, unsigned long p5, unsigned long p6,
539 struct pt_regs *regs)
540{
541 return do_fork(SIGCHLD, regs->gpr[1], regs, 0, NULL, NULL);
542}
543
544int sys_vfork(unsigned long p1, unsigned long p2, unsigned long p3,
545 unsigned long p4, unsigned long p5, unsigned long p6,
546 struct pt_regs *regs)
547{
548 return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->gpr[1], regs, 0,
549 NULL, NULL);
550}
551
552int sys_execve(unsigned long a0, unsigned long a1, unsigned long a2,
553 unsigned long a3, unsigned long a4, unsigned long a5,
554 struct pt_regs *regs)
555{
556 int error;
557 char * filename;
558
559 filename = getname((char __user *) a0);
560 error = PTR_ERR(filename);
561 if (IS_ERR(filename))
562 goto out;
563 flush_fp_to_thread(current);
564 flush_altivec_to_thread(current);
565 error = do_execve(filename, (char __user * __user *) a1,
566 (char __user * __user *) a2, regs);
567
568 if (error == 0) {
569 task_lock(current);
570 current->ptrace &= ~PT_DTRACE;
571 task_unlock(current);
572 }
573 putname(filename);
574
575out:
576 return error;
577}
578
579static int kstack_depth_to_print = 64;
580
581static int validate_sp(unsigned long sp, struct task_struct *p,
582 unsigned long nbytes)
583{
584 unsigned long stack_page = (unsigned long)p->thread_info;
585
586 if (sp >= stack_page + sizeof(struct thread_struct)
587 && sp <= stack_page + THREAD_SIZE - nbytes)
588 return 1;
589
590#ifdef CONFIG_IRQSTACKS
591 stack_page = (unsigned long) hardirq_ctx[task_cpu(p)];
592 if (sp >= stack_page + sizeof(struct thread_struct)
593 && sp <= stack_page + THREAD_SIZE - nbytes)
594 return 1;
595
596 stack_page = (unsigned long) softirq_ctx[task_cpu(p)];
597 if (sp >= stack_page + sizeof(struct thread_struct)
598 && sp <= stack_page + THREAD_SIZE - nbytes)
599 return 1;
600#endif
601
602 return 0;
603}
604
605unsigned long get_wchan(struct task_struct *p)
606{
607 unsigned long ip, sp;
608 int count = 0;
609
610 if (!p || p == current || p->state == TASK_RUNNING)
611 return 0;
612
613 sp = p->thread.ksp;
614 if (!validate_sp(sp, p, 112))
615 return 0;
616
617 do {
618 sp = *(unsigned long *)sp;
619 if (!validate_sp(sp, p, 112))
620 return 0;
621 if (count > 0) {
622 ip = *(unsigned long *)(sp + 16);
623 if (!in_sched_functions(ip))
624 return ip;
625 }
626 } while (count++ < 16);
627 return 0;
628}
629EXPORT_SYMBOL(get_wchan);
630
631void show_stack(struct task_struct *p, unsigned long *_sp)
632{
633 unsigned long ip, newsp, lr;
634 int count = 0;
635 unsigned long sp = (unsigned long)_sp;
636 int firstframe = 1;
637
638 if (sp == 0) {
639 if (p) {
640 sp = p->thread.ksp;
641 } else {
642 sp = __get_SP();
643 p = current;
644 }
645 }
646
647 lr = 0;
648 printk("Call Trace:\n");
649 do {
650 if (!validate_sp(sp, p, 112))
651 return;
652
653 _sp = (unsigned long *) sp;
654 newsp = _sp[0];
655 ip = _sp[2];
656 if (!firstframe || ip != lr) {
657 printk("[%016lx] [%016lx] ", sp, ip);
658 print_symbol("%s", ip);
659 if (firstframe)
660 printk(" (unreliable)");
661 printk("\n");
662 }
663 firstframe = 0;
664
665 /*
666 * See if this is an exception frame.
667 * We look for the "regshere" marker in the current frame.
668 */
669 if (validate_sp(sp, p, sizeof(struct pt_regs) + 400)
670 && _sp[12] == 0x7265677368657265ul) {
671 struct pt_regs *regs = (struct pt_regs *)
672 (sp + STACK_FRAME_OVERHEAD);
673 printk("--- Exception: %lx", regs->trap);
674 print_symbol(" at %s\n", regs->nip);
675 lr = regs->link;
676 print_symbol(" LR = %s\n", lr);
677 firstframe = 1;
678 }
679
680 sp = newsp;
681 } while (count++ < kstack_depth_to_print);
682}
683
684void dump_stack(void)
685{
686 show_stack(current, (unsigned long *)__get_SP());
687}
688EXPORT_SYMBOL(dump_stack);
diff --git a/arch/ppc64/kernel/prom.c b/arch/ppc64/kernel/prom.c
new file mode 100644
index 000000000000..01739d5c47c7
--- /dev/null
+++ b/arch/ppc64/kernel/prom.c
@@ -0,0 +1,1820 @@
1/*
2 *
3 *
4 * Procedures for interfacing to Open Firmware.
5 *
6 * Paul Mackerras August 1996.
7 * Copyright (C) 1996 Paul Mackerras.
8 *
9 * Adapted for 64bit PowerPC by Dave Engebretsen and Peter Bergner.
10 * {engebret|bergner}@us.ibm.com
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 */
17
18#undef DEBUG
19
20#include <stdarg.h>
21#include <linux/config.h>
22#include <linux/kernel.h>
23#include <linux/string.h>
24#include <linux/init.h>
25#include <linux/version.h>
26#include <linux/threads.h>
27#include <linux/spinlock.h>
28#include <linux/types.h>
29#include <linux/pci.h>
30#include <linux/stringify.h>
31#include <linux/delay.h>
32#include <linux/initrd.h>
33#include <linux/bitops.h>
34#include <linux/module.h>
35
36#include <asm/prom.h>
37#include <asm/rtas.h>
38#include <asm/lmb.h>
39#include <asm/abs_addr.h>
40#include <asm/page.h>
41#include <asm/processor.h>
42#include <asm/irq.h>
43#include <asm/io.h>
44#include <asm/smp.h>
45#include <asm/system.h>
46#include <asm/mmu.h>
47#include <asm/pgtable.h>
48#include <asm/pci.h>
49#include <asm/iommu.h>
50#include <asm/bootinfo.h>
51#include <asm/ppcdebug.h>
52#include <asm/btext.h>
53#include <asm/sections.h>
54#include <asm/machdep.h>
55#include <asm/pSeries_reconfig.h>
56
57#ifdef DEBUG
58#define DBG(fmt...) udbg_printf(fmt)
59#else
60#define DBG(fmt...)
61#endif
62
63struct pci_reg_property {
64 struct pci_address addr;
65 u32 size_hi;
66 u32 size_lo;
67};
68
69struct isa_reg_property {
70 u32 space;
71 u32 address;
72 u32 size;
73};
74
75
76typedef int interpret_func(struct device_node *, unsigned long *,
77 int, int, int);
78
79extern struct rtas_t rtas;
80extern struct lmb lmb;
81extern unsigned long klimit;
82
83static int __initdata dt_root_addr_cells;
84static int __initdata dt_root_size_cells;
85static int __initdata iommu_is_off;
86int __initdata iommu_force_on;
87typedef u32 cell_t;
88
89#if 0
90static struct boot_param_header *initial_boot_params __initdata;
91#else
92struct boot_param_header *initial_boot_params;
93#endif
94
95static struct device_node *allnodes = NULL;
96
97/* use when traversing tree through the allnext, child, sibling,
98 * or parent members of struct device_node.
99 */
100static DEFINE_RWLOCK(devtree_lock);
101
102/* export that to outside world */
103struct device_node *of_chosen;
104
105/*
106 * Wrapper for allocating memory for various data that needs to be
107 * attached to device nodes as they are processed at boot or when
108 * added to the device tree later (e.g. DLPAR). At boot there is
109 * already a region reserved so we just increment *mem_start by size;
110 * otherwise we call kmalloc.
111 */
112static void * prom_alloc(unsigned long size, unsigned long *mem_start)
113{
114 unsigned long tmp;
115
116 if (!mem_start)
117 return kmalloc(size, GFP_KERNEL);
118
119 tmp = *mem_start;
120 *mem_start += size;
121 return (void *)tmp;
122}
123
124/*
125 * Find the device_node with a given phandle.
126 */
127static struct device_node * find_phandle(phandle ph)
128{
129 struct device_node *np;
130
131 for (np = allnodes; np != 0; np = np->allnext)
132 if (np->linux_phandle == ph)
133 return np;
134 return NULL;
135}
136
137/*
138 * Find the interrupt parent of a node.
139 */
140static struct device_node * __devinit intr_parent(struct device_node *p)
141{
142 phandle *parp;
143
144 parp = (phandle *) get_property(p, "interrupt-parent", NULL);
145 if (parp == NULL)
146 return p->parent;
147 return find_phandle(*parp);
148}
149
150/*
151 * Find out the size of each entry of the interrupts property
152 * for a node.
153 */
154int __devinit prom_n_intr_cells(struct device_node *np)
155{
156 struct device_node *p;
157 unsigned int *icp;
158
159 for (p = np; (p = intr_parent(p)) != NULL; ) {
160 icp = (unsigned int *)
161 get_property(p, "#interrupt-cells", NULL);
162 if (icp != NULL)
163 return *icp;
164 if (get_property(p, "interrupt-controller", NULL) != NULL
165 || get_property(p, "interrupt-map", NULL) != NULL) {
166 printk("oops, node %s doesn't have #interrupt-cells\n",
167 p->full_name);
168 return 1;
169 }
170 }
171#ifdef DEBUG_IRQ
172 printk("prom_n_intr_cells failed for %s\n", np->full_name);
173#endif
174 return 1;
175}
176
177/*
178 * Map an interrupt from a device up to the platform interrupt
179 * descriptor.
180 */
181static int __devinit map_interrupt(unsigned int **irq, struct device_node **ictrler,
182 struct device_node *np, unsigned int *ints,
183 int nintrc)
184{
185 struct device_node *p, *ipar;
186 unsigned int *imap, *imask, *ip;
187 int i, imaplen, match;
188 int newintrc = 0, newaddrc = 0;
189 unsigned int *reg;
190 int naddrc;
191
192 reg = (unsigned int *) get_property(np, "reg", NULL);
193 naddrc = prom_n_addr_cells(np);
194 p = intr_parent(np);
195 while (p != NULL) {
196 if (get_property(p, "interrupt-controller", NULL) != NULL)
197 /* this node is an interrupt controller, stop here */
198 break;
199 imap = (unsigned int *)
200 get_property(p, "interrupt-map", &imaplen);
201 if (imap == NULL) {
202 p = intr_parent(p);
203 continue;
204 }
205 imask = (unsigned int *)
206 get_property(p, "interrupt-map-mask", NULL);
207 if (imask == NULL) {
208 printk("oops, %s has interrupt-map but no mask\n",
209 p->full_name);
210 return 0;
211 }
212 imaplen /= sizeof(unsigned int);
213 match = 0;
214 ipar = NULL;
215 while (imaplen > 0 && !match) {
216 /* check the child-interrupt field */
217 match = 1;
218 for (i = 0; i < naddrc && match; ++i)
219 match = ((reg[i] ^ imap[i]) & imask[i]) == 0;
220 for (; i < naddrc + nintrc && match; ++i)
221 match = ((ints[i-naddrc] ^ imap[i]) & imask[i]) == 0;
222 imap += naddrc + nintrc;
223 imaplen -= naddrc + nintrc;
224 /* grab the interrupt parent */
225 ipar = find_phandle((phandle) *imap++);
226 --imaplen;
227 if (ipar == NULL) {
228 printk("oops, no int parent %x in map of %s\n",
229 imap[-1], p->full_name);
230 return 0;
231 }
232 /* find the parent's # addr and intr cells */
233 ip = (unsigned int *)
234 get_property(ipar, "#interrupt-cells", NULL);
235 if (ip == NULL) {
236 printk("oops, no #interrupt-cells on %s\n",
237 ipar->full_name);
238 return 0;
239 }
240 newintrc = *ip;
241 ip = (unsigned int *)
242 get_property(ipar, "#address-cells", NULL);
243 newaddrc = (ip == NULL)? 0: *ip;
244 imap += newaddrc + newintrc;
245 imaplen -= newaddrc + newintrc;
246 }
247 if (imaplen < 0) {
248 printk("oops, error decoding int-map on %s, len=%d\n",
249 p->full_name, imaplen);
250 return 0;
251 }
252 if (!match) {
253#ifdef DEBUG_IRQ
254 printk("oops, no match in %s int-map for %s\n",
255 p->full_name, np->full_name);
256#endif
257 return 0;
258 }
259 p = ipar;
260 naddrc = newaddrc;
261 nintrc = newintrc;
262 ints = imap - nintrc;
263 reg = ints - naddrc;
264 }
265 if (p == NULL) {
266#ifdef DEBUG_IRQ
267 printk("hmmm, int tree for %s doesn't have ctrler\n",
268 np->full_name);
269#endif
270 return 0;
271 }
272 *irq = ints;
273 *ictrler = p;
274 return nintrc;
275}
276
277static int __devinit finish_node_interrupts(struct device_node *np,
278 unsigned long *mem_start,
279 int measure_only)
280{
281 unsigned int *ints;
282 int intlen, intrcells, intrcount;
283 int i, j, n;
284 unsigned int *irq, virq;
285 struct device_node *ic;
286
287 ints = (unsigned int *) get_property(np, "interrupts", &intlen);
288 if (ints == NULL)
289 return 0;
290 intrcells = prom_n_intr_cells(np);
291 intlen /= intrcells * sizeof(unsigned int);
292
293 np->intrs = prom_alloc(intlen * sizeof(*(np->intrs)), mem_start);
294 if (!np->intrs)
295 return -ENOMEM;
296
297 if (measure_only)
298 return 0;
299
300 intrcount = 0;
301 for (i = 0; i < intlen; ++i, ints += intrcells) {
302 n = map_interrupt(&irq, &ic, np, ints, intrcells);
303 if (n <= 0)
304 continue;
305
306 /* don't map IRQ numbers under a cascaded 8259 controller */
307 if (ic && device_is_compatible(ic, "chrp,iic")) {
308 np->intrs[intrcount].line = irq[0];
309 } else {
310 virq = virt_irq_create_mapping(irq[0]);
311 if (virq == NO_IRQ) {
312 printk(KERN_CRIT "Could not allocate interrupt"
313 " number for %s\n", np->full_name);
314 continue;
315 }
316 np->intrs[intrcount].line = irq_offset_up(virq);
317 }
318
319 /* We offset irq numbers for the u3 MPIC by 128 in PowerMac */
320 if (systemcfg->platform == PLATFORM_POWERMAC && ic && ic->parent) {
321 char *name = get_property(ic->parent, "name", NULL);
322 if (name && !strcmp(name, "u3"))
323 np->intrs[intrcount].line += 128;
324 }
325 np->intrs[intrcount].sense = 1;
326 if (n > 1)
327 np->intrs[intrcount].sense = irq[1];
328 if (n > 2) {
329 printk("hmmm, got %d intr cells for %s:", n,
330 np->full_name);
331 for (j = 0; j < n; ++j)
332 printk(" %d", irq[j]);
333 printk("\n");
334 }
335 ++intrcount;
336 }
337 np->n_intrs = intrcount;
338
339 return 0;
340}
341
342static int __devinit interpret_pci_props(struct device_node *np,
343 unsigned long *mem_start,
344 int naddrc, int nsizec,
345 int measure_only)
346{
347 struct address_range *adr;
348 struct pci_reg_property *pci_addrs;
349 int i, l, n_addrs;
350
351 pci_addrs = (struct pci_reg_property *)
352 get_property(np, "assigned-addresses", &l);
353 if (!pci_addrs)
354 return 0;
355
356 n_addrs = l / sizeof(*pci_addrs);
357
358 adr = prom_alloc(n_addrs * sizeof(*adr), mem_start);
359 if (!adr)
360 return -ENOMEM;
361
362 if (measure_only)
363 return 0;
364
365 np->addrs = adr;
366 np->n_addrs = n_addrs;
367
368 for (i = 0; i < n_addrs; i++) {
369 adr[i].space = pci_addrs[i].addr.a_hi;
370 adr[i].address = pci_addrs[i].addr.a_lo |
371 ((u64)pci_addrs[i].addr.a_mid << 32);
372 adr[i].size = pci_addrs[i].size_lo;
373 }
374
375 return 0;
376}
377
378static int __init interpret_dbdma_props(struct device_node *np,
379 unsigned long *mem_start,
380 int naddrc, int nsizec,
381 int measure_only)
382{
383 struct reg_property32 *rp;
384 struct address_range *adr;
385 unsigned long base_address;
386 int i, l;
387 struct device_node *db;
388
389 base_address = 0;
390 if (!measure_only) {
391 for (db = np->parent; db != NULL; db = db->parent) {
392 if (!strcmp(db->type, "dbdma") && db->n_addrs != 0) {
393 base_address = db->addrs[0].address;
394 break;
395 }
396 }
397 }
398
399 rp = (struct reg_property32 *) get_property(np, "reg", &l);
400 if (rp != 0 && l >= sizeof(struct reg_property32)) {
401 i = 0;
402 adr = (struct address_range *) (*mem_start);
403 while ((l -= sizeof(struct reg_property32)) >= 0) {
404 if (!measure_only) {
405 adr[i].space = 2;
406 adr[i].address = rp[i].address + base_address;
407 adr[i].size = rp[i].size;
408 }
409 ++i;
410 }
411 np->addrs = adr;
412 np->n_addrs = i;
413 (*mem_start) += i * sizeof(struct address_range);
414 }
415
416 return 0;
417}
418
419static int __init interpret_macio_props(struct device_node *np,
420 unsigned long *mem_start,
421 int naddrc, int nsizec,
422 int measure_only)
423{
424 struct reg_property32 *rp;
425 struct address_range *adr;
426 unsigned long base_address;
427 int i, l;
428 struct device_node *db;
429
430 base_address = 0;
431 if (!measure_only) {
432 for (db = np->parent; db != NULL; db = db->parent) {
433 if (!strcmp(db->type, "mac-io") && db->n_addrs != 0) {
434 base_address = db->addrs[0].address;
435 break;
436 }
437 }
438 }
439
440 rp = (struct reg_property32 *) get_property(np, "reg", &l);
441 if (rp != 0 && l >= sizeof(struct reg_property32)) {
442 i = 0;
443 adr = (struct address_range *) (*mem_start);
444 while ((l -= sizeof(struct reg_property32)) >= 0) {
445 if (!measure_only) {
446 adr[i].space = 2;
447 adr[i].address = rp[i].address + base_address;
448 adr[i].size = rp[i].size;
449 }
450 ++i;
451 }
452 np->addrs = adr;
453 np->n_addrs = i;
454 (*mem_start) += i * sizeof(struct address_range);
455 }
456
457 return 0;
458}
459
460static int __init interpret_isa_props(struct device_node *np,
461 unsigned long *mem_start,
462 int naddrc, int nsizec,
463 int measure_only)
464{
465 struct isa_reg_property *rp;
466 struct address_range *adr;
467 int i, l;
468
469 rp = (struct isa_reg_property *) get_property(np, "reg", &l);
470 if (rp != 0 && l >= sizeof(struct isa_reg_property)) {
471 i = 0;
472 adr = (struct address_range *) (*mem_start);
473 while ((l -= sizeof(struct isa_reg_property)) >= 0) {
474 if (!measure_only) {
475 adr[i].space = rp[i].space;
476 adr[i].address = rp[i].address;
477 adr[i].size = rp[i].size;
478 }
479 ++i;
480 }
481 np->addrs = adr;
482 np->n_addrs = i;
483 (*mem_start) += i * sizeof(struct address_range);
484 }
485
486 return 0;
487}
488
489static int __init interpret_root_props(struct device_node *np,
490 unsigned long *mem_start,
491 int naddrc, int nsizec,
492 int measure_only)
493{
494 struct address_range *adr;
495 int i, l;
496 unsigned int *rp;
497 int rpsize = (naddrc + nsizec) * sizeof(unsigned int);
498
499 rp = (unsigned int *) get_property(np, "reg", &l);
500 if (rp != 0 && l >= rpsize) {
501 i = 0;
502 adr = (struct address_range *) (*mem_start);
503 while ((l -= rpsize) >= 0) {
504 if (!measure_only) {
505 adr[i].space = 0;
506 adr[i].address = rp[naddrc - 1];
507 adr[i].size = rp[naddrc + nsizec - 1];
508 }
509 ++i;
510 rp += naddrc + nsizec;
511 }
512 np->addrs = adr;
513 np->n_addrs = i;
514 (*mem_start) += i * sizeof(struct address_range);
515 }
516
517 return 0;
518}
519
520static int __devinit finish_node(struct device_node *np,
521 unsigned long *mem_start,
522 interpret_func *ifunc,
523 int naddrc, int nsizec,
524 int measure_only)
525{
526 struct device_node *child;
527 int *ip, rc = 0;
528
529 /* get the device addresses and interrupts */
530 if (ifunc != NULL)
531 rc = ifunc(np, mem_start, naddrc, nsizec, measure_only);
532 if (rc)
533 goto out;
534
535 rc = finish_node_interrupts(np, mem_start, measure_only);
536 if (rc)
537 goto out;
538
539 /* Look for #address-cells and #size-cells properties. */
540 ip = (int *) get_property(np, "#address-cells", NULL);
541 if (ip != NULL)
542 naddrc = *ip;
543 ip = (int *) get_property(np, "#size-cells", NULL);
544 if (ip != NULL)
545 nsizec = *ip;
546
547 /* the f50 sets the name to 'display' and 'compatible' to what we
548 * expect for the name -- Cort
549 */
550 if (!strcmp(np->name, "display"))
551 np->name = get_property(np, "compatible", NULL);
552
553 if (!strcmp(np->name, "device-tree") || np->parent == NULL)
554 ifunc = interpret_root_props;
555 else if (np->type == 0)
556 ifunc = NULL;
557 else if (!strcmp(np->type, "pci") || !strcmp(np->type, "vci"))
558 ifunc = interpret_pci_props;
559 else if (!strcmp(np->type, "dbdma"))
560 ifunc = interpret_dbdma_props;
561 else if (!strcmp(np->type, "mac-io") || ifunc == interpret_macio_props)
562 ifunc = interpret_macio_props;
563 else if (!strcmp(np->type, "isa"))
564 ifunc = interpret_isa_props;
565 else if (!strcmp(np->name, "uni-n") || !strcmp(np->name, "u3"))
566 ifunc = interpret_root_props;
567 else if (!((ifunc == interpret_dbdma_props
568 || ifunc == interpret_macio_props)
569 && (!strcmp(np->type, "escc")
570 || !strcmp(np->type, "media-bay"))))
571 ifunc = NULL;
572
573 for (child = np->child; child != NULL; child = child->sibling) {
574 rc = finish_node(child, mem_start, ifunc,
575 naddrc, nsizec, measure_only);
576 if (rc)
577 goto out;
578 }
579out:
580 return rc;
581}
582
583/**
584 * finish_device_tree is called once things are running normally
585 * (i.e. with text and data mapped to the address they were linked at).
586 * It traverses the device tree and fills in some of the additional,
587 * fields in each node like {n_}addrs and {n_}intrs, the virt interrupt
588 * mapping is also initialized at this point.
589 */
590void __init finish_device_tree(void)
591{
592 unsigned long start, end, size = 0;
593
594 DBG(" -> finish_device_tree\n");
595
596 if (ppc64_interrupt_controller == IC_INVALID) {
597 DBG("failed to configure interrupt controller type\n");
598 panic("failed to configure interrupt controller type\n");
599 }
600
601 /* Initialize virtual IRQ map */
602 virt_irq_init();
603
604 /*
605 * Finish device-tree (pre-parsing some properties etc...)
606 * We do this in 2 passes. One with "measure_only" set, which
607 * will only measure the amount of memory needed, then we can
608 * allocate that memory, and call finish_node again. However,
609 * we must be careful as most routines will fail nowadays when
610 * prom_alloc() returns 0, so we must make sure our first pass
611 * doesn't start at 0. We pre-initialize size to 16 for that
612 * reason and then remove those additional 16 bytes
613 */
614 size = 16;
615 finish_node(allnodes, &size, NULL, 0, 0, 1);
616 size -= 16;
617 end = start = (unsigned long)abs_to_virt(lmb_alloc(size, 128));
618 finish_node(allnodes, &end, NULL, 0, 0, 0);
619 BUG_ON(end != start + size);
620
621 DBG(" <- finish_device_tree\n");
622}
623
624#ifdef DEBUG
625#define printk udbg_printf
626#endif
627
628static inline char *find_flat_dt_string(u32 offset)
629{
630 return ((char *)initial_boot_params) + initial_boot_params->off_dt_strings
631 + offset;
632}
633
634/**
635 * This function is used to scan the flattened device-tree, it is
636 * used to extract the memory informations at boot before we can
637 * unflatten the tree
638 */
639static int __init scan_flat_dt(int (*it)(unsigned long node,
640 const char *full_path, void *data),
641 void *data)
642{
643 unsigned long p = ((unsigned long)initial_boot_params) +
644 initial_boot_params->off_dt_struct;
645 int rc = 0;
646
647 do {
648 u32 tag = *((u32 *)p);
649 char *pathp;
650
651 p += 4;
652 if (tag == OF_DT_END_NODE)
653 continue;
654 if (tag == OF_DT_END)
655 break;
656 if (tag == OF_DT_PROP) {
657 u32 sz = *((u32 *)p);
658 p += 8;
659 p = _ALIGN(p, sz >= 8 ? 8 : 4);
660 p += sz;
661 p = _ALIGN(p, 4);
662 continue;
663 }
664 if (tag != OF_DT_BEGIN_NODE) {
665 printk(KERN_WARNING "Invalid tag %x scanning flattened"
666 " device tree !\n", tag);
667 return -EINVAL;
668 }
669 pathp = (char *)p;
670 p = _ALIGN(p + strlen(pathp) + 1, 4);
671 rc = it(p, pathp, data);
672 if (rc != 0)
673 break;
674 } while(1);
675
676 return rc;
677}
678
679/**
680 * This function can be used within scan_flattened_dt callback to get
681 * access to properties
682 */
683static void* __init get_flat_dt_prop(unsigned long node, const char *name,
684 unsigned long *size)
685{
686 unsigned long p = node;
687
688 do {
689 u32 tag = *((u32 *)p);
690 u32 sz, noff;
691 const char *nstr;
692
693 p += 4;
694 if (tag != OF_DT_PROP)
695 return NULL;
696
697 sz = *((u32 *)p);
698 noff = *((u32 *)(p + 4));
699 p += 8;
700 p = _ALIGN(p, sz >= 8 ? 8 : 4);
701
702 nstr = find_flat_dt_string(noff);
703 if (nstr == NULL) {
704 printk(KERN_WARNING "Can't find property index name !\n");
705 return NULL;
706 }
707 if (strcmp(name, nstr) == 0) {
708 if (size)
709 *size = sz;
710 return (void *)p;
711 }
712 p += sz;
713 p = _ALIGN(p, 4);
714 } while(1);
715}
716
717static void *__init unflatten_dt_alloc(unsigned long *mem, unsigned long size,
718 unsigned long align)
719{
720 void *res;
721
722 *mem = _ALIGN(*mem, align);
723 res = (void *)*mem;
724 *mem += size;
725
726 return res;
727}
728
729static unsigned long __init unflatten_dt_node(unsigned long mem,
730 unsigned long *p,
731 struct device_node *dad,
732 struct device_node ***allnextpp)
733{
734 struct device_node *np;
735 struct property *pp, **prev_pp = NULL;
736 char *pathp;
737 u32 tag;
738 unsigned int l;
739
740 tag = *((u32 *)(*p));
741 if (tag != OF_DT_BEGIN_NODE) {
742 printk("Weird tag at start of node: %x\n", tag);
743 return mem;
744 }
745 *p += 4;
746 pathp = (char *)*p;
747 l = strlen(pathp) + 1;
748 *p = _ALIGN(*p + l, 4);
749
750 np = unflatten_dt_alloc(&mem, sizeof(struct device_node) + l,
751 __alignof__(struct device_node));
752 if (allnextpp) {
753 memset(np, 0, sizeof(*np));
754 np->full_name = ((char*)np) + sizeof(struct device_node);
755 memcpy(np->full_name, pathp, l);
756 prev_pp = &np->properties;
757 **allnextpp = np;
758 *allnextpp = &np->allnext;
759 if (dad != NULL) {
760 np->parent = dad;
761 /* we temporarily use the `next' field as `last_child'. */
762 if (dad->next == 0)
763 dad->child = np;
764 else
765 dad->next->sibling = np;
766 dad->next = np;
767 }
768 kref_init(&np->kref);
769 }
770 while(1) {
771 u32 sz, noff;
772 char *pname;
773
774 tag = *((u32 *)(*p));
775 if (tag != OF_DT_PROP)
776 break;
777 *p += 4;
778 sz = *((u32 *)(*p));
779 noff = *((u32 *)((*p) + 4));
780 *p = _ALIGN((*p) + 8, sz >= 8 ? 8 : 4);
781
782 pname = find_flat_dt_string(noff);
783 if (pname == NULL) {
784 printk("Can't find property name in list !\n");
785 break;
786 }
787 l = strlen(pname) + 1;
788 pp = unflatten_dt_alloc(&mem, sizeof(struct property),
789 __alignof__(struct property));
790 if (allnextpp) {
791 if (strcmp(pname, "linux,phandle") == 0) {
792 np->node = *((u32 *)*p);
793 if (np->linux_phandle == 0)
794 np->linux_phandle = np->node;
795 }
796 if (strcmp(pname, "ibm,phandle") == 0)
797 np->linux_phandle = *((u32 *)*p);
798 pp->name = pname;
799 pp->length = sz;
800 pp->value = (void *)*p;
801 *prev_pp = pp;
802 prev_pp = &pp->next;
803 }
804 *p = _ALIGN((*p) + sz, 4);
805 }
806 if (allnextpp) {
807 *prev_pp = NULL;
808 np->name = get_property(np, "name", NULL);
809 np->type = get_property(np, "device_type", NULL);
810
811 if (!np->name)
812 np->name = "<NULL>";
813 if (!np->type)
814 np->type = "<NULL>";
815 }
816 while (tag == OF_DT_BEGIN_NODE) {
817 mem = unflatten_dt_node(mem, p, np, allnextpp);
818 tag = *((u32 *)(*p));
819 }
820 if (tag != OF_DT_END_NODE) {
821 printk("Weird tag at start of node: %x\n", tag);
822 return mem;
823 }
824 *p += 4;
825 return mem;
826}
827
828
829/**
830 * unflattens the device-tree passed by the firmware, creating the
831 * tree of struct device_node. It also fills the "name" and "type"
832 * pointers of the nodes so the normal device-tree walking functions
833 * can be used (this used to be done by finish_device_tree)
834 */
835void __init unflatten_device_tree(void)
836{
837 unsigned long start, mem, size;
838 struct device_node **allnextp = &allnodes;
839 char *p;
840 int l = 0;
841
842 DBG(" -> unflatten_device_tree()\n");
843
844 /* First pass, scan for size */
845 start = ((unsigned long)initial_boot_params) +
846 initial_boot_params->off_dt_struct;
847 size = unflatten_dt_node(0, &start, NULL, NULL);
848
849 DBG(" size is %lx, allocating...\n", size);
850
851 /* Allocate memory for the expanded device tree */
852 mem = (unsigned long)abs_to_virt(lmb_alloc(size,
853 __alignof__(struct device_node)));
854 DBG(" unflattening...\n", mem);
855
856 /* Second pass, do actual unflattening */
857 start = ((unsigned long)initial_boot_params) +
858 initial_boot_params->off_dt_struct;
859 unflatten_dt_node(mem, &start, NULL, &allnextp);
860 if (*((u32 *)start) != OF_DT_END)
861 printk(KERN_WARNING "Weird tag at end of tree: %x\n", *((u32 *)start));
862 *allnextp = NULL;
863
864 /* Get pointer to OF "/chosen" node for use everywhere */
865 of_chosen = of_find_node_by_path("/chosen");
866
867 /* Retreive command line */
868 if (of_chosen != NULL) {
869 p = (char *)get_property(of_chosen, "bootargs", &l);
870 if (p != NULL && l > 0)
871 strlcpy(cmd_line, p, min(l, COMMAND_LINE_SIZE));
872 }
873#ifdef CONFIG_CMDLINE
874 if (l == 0 || (l == 1 && (*p) == 0))
875 strlcpy(cmd_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
876#endif /* CONFIG_CMDLINE */
877
878 DBG("Command line is: %s\n", cmd_line);
879
880 DBG(" <- unflatten_device_tree()\n");
881}
882
883
884static int __init early_init_dt_scan_cpus(unsigned long node,
885 const char *full_path, void *data)
886{
887 char *type = get_flat_dt_prop(node, "device_type", NULL);
888
889 /* We are scanning "cpu" nodes only */
890 if (type == NULL || strcmp(type, "cpu") != 0)
891 return 0;
892
893 /* On LPAR, look for the first ibm,pft-size property for the hash table size
894 */
895 if (systemcfg->platform == PLATFORM_PSERIES_LPAR && ppc64_pft_size == 0) {
896 u32 *pft_size;
897 pft_size = (u32 *)get_flat_dt_prop(node, "ibm,pft-size", NULL);
898 if (pft_size != NULL) {
899 /* pft_size[0] is the NUMA CEC cookie */
900 ppc64_pft_size = pft_size[1];
901 }
902 }
903
904 if (initial_boot_params && initial_boot_params->version >= 2) {
905 /* version 2 of the kexec param format adds the phys cpuid
906 * of booted proc.
907 */
908 boot_cpuid_phys = initial_boot_params->boot_cpuid_phys;
909 boot_cpuid = 0;
910 } else {
911 /* Check if it's the boot-cpu, set it's hw index in paca now */
912 if (get_flat_dt_prop(node, "linux,boot-cpu", NULL) != NULL) {
913 u32 *prop = get_flat_dt_prop(node, "reg", NULL);
914 set_hard_smp_processor_id(0, prop == NULL ? 0 : *prop);
915 boot_cpuid_phys = get_hard_smp_processor_id(0);
916 }
917 }
918
919 return 0;
920}
921
922static int __init early_init_dt_scan_chosen(unsigned long node,
923 const char *full_path, void *data)
924{
925 u32 *prop;
926 u64 *prop64;
927 extern unsigned long memory_limit, tce_alloc_start, tce_alloc_end;
928
929 if (strcmp(full_path, "/chosen") != 0)
930 return 0;
931
932 /* get platform type */
933 prop = (u32 *)get_flat_dt_prop(node, "linux,platform", NULL);
934 if (prop == NULL)
935 return 0;
936 systemcfg->platform = *prop;
937
938 /* check if iommu is forced on or off */
939 if (get_flat_dt_prop(node, "linux,iommu-off", NULL) != NULL)
940 iommu_is_off = 1;
941 if (get_flat_dt_prop(node, "linux,iommu-force-on", NULL) != NULL)
942 iommu_force_on = 1;
943
944 prop64 = (u64*)get_flat_dt_prop(node, "linux,memory-limit", NULL);
945 if (prop64)
946 memory_limit = *prop64;
947
948 prop64 = (u64*)get_flat_dt_prop(node, "linux,tce-alloc-start", NULL);
949 if (prop64)
950 tce_alloc_start = *prop64;
951
952 prop64 = (u64*)get_flat_dt_prop(node, "linux,tce-alloc-end", NULL);
953 if (prop64)
954 tce_alloc_end = *prop64;
955
956#ifdef CONFIG_PPC_RTAS
957 /* To help early debugging via the front panel, we retreive a minimal
958 * set of RTAS infos now if available
959 */
960 {
961 u64 *basep, *entryp;
962
963 basep = (u64*)get_flat_dt_prop(node, "linux,rtas-base", NULL);
964 entryp = (u64*)get_flat_dt_prop(node, "linux,rtas-entry", NULL);
965 prop = (u32*)get_flat_dt_prop(node, "linux,rtas-size", NULL);
966 if (basep && entryp && prop) {
967 rtas.base = *basep;
968 rtas.entry = *entryp;
969 rtas.size = *prop;
970 }
971 }
972#endif /* CONFIG_PPC_RTAS */
973
974 /* break now */
975 return 1;
976}
977
978static int __init early_init_dt_scan_root(unsigned long node,
979 const char *full_path, void *data)
980{
981 u32 *prop;
982
983 if (strcmp(full_path, "/") != 0)
984 return 0;
985
986 prop = (u32 *)get_flat_dt_prop(node, "#size-cells", NULL);
987 dt_root_size_cells = (prop == NULL) ? 1 : *prop;
988
989 prop = (u32 *)get_flat_dt_prop(node, "#address-cells", NULL);
990 dt_root_addr_cells = (prop == NULL) ? 2 : *prop;
991
992 /* break now */
993 return 1;
994}
995
996static unsigned long __init dt_mem_next_cell(int s, cell_t **cellp)
997{
998 cell_t *p = *cellp;
999 unsigned long r = 0;
1000
1001 /* Ignore more than 2 cells */
1002 while (s > 2) {
1003 p++;
1004 s--;
1005 }
1006 while (s) {
1007 r <<= 32;
1008 r |= *(p++);
1009 s--;
1010 }
1011
1012 *cellp = p;
1013 return r;
1014}
1015
1016
1017static int __init early_init_dt_scan_memory(unsigned long node,
1018 const char *full_path, void *data)
1019{
1020 char *type = get_flat_dt_prop(node, "device_type", NULL);
1021 cell_t *reg, *endp;
1022 unsigned long l;
1023
1024 /* We are scanning "memory" nodes only */
1025 if (type == NULL || strcmp(type, "memory") != 0)
1026 return 0;
1027
1028 reg = (cell_t *)get_flat_dt_prop(node, "reg", &l);
1029 if (reg == NULL)
1030 return 0;
1031
1032 endp = reg + (l / sizeof(cell_t));
1033
1034 DBG("memory scan node %s ...\n", full_path);
1035 while ((endp - reg) >= (dt_root_addr_cells + dt_root_size_cells)) {
1036 unsigned long base, size;
1037
1038 base = dt_mem_next_cell(dt_root_addr_cells, &reg);
1039 size = dt_mem_next_cell(dt_root_size_cells, &reg);
1040
1041 if (size == 0)
1042 continue;
1043 DBG(" - %lx , %lx\n", base, size);
1044 if (iommu_is_off) {
1045 if (base >= 0x80000000ul)
1046 continue;
1047 if ((base + size) > 0x80000000ul)
1048 size = 0x80000000ul - base;
1049 }
1050 lmb_add(base, size);
1051 }
1052 return 0;
1053}
1054
1055static void __init early_reserve_mem(void)
1056{
1057 u64 base, size;
1058 u64 *reserve_map = (u64 *)(((unsigned long)initial_boot_params) +
1059 initial_boot_params->off_mem_rsvmap);
1060 while (1) {
1061 base = *(reserve_map++);
1062 size = *(reserve_map++);
1063 if (size == 0)
1064 break;
1065 DBG("reserving: %lx -> %lx\n", base, size);
1066 lmb_reserve(base, size);
1067 }
1068
1069#if 0
1070 DBG("memory reserved, lmbs :\n");
1071 lmb_dump_all();
1072#endif
1073}
1074
1075void __init early_init_devtree(void *params)
1076{
1077 DBG(" -> early_init_devtree()\n");
1078
1079 /* Setup flat device-tree pointer */
1080 initial_boot_params = params;
1081
1082 /* By default, hash size is not set */
1083 ppc64_pft_size = 0;
1084
1085 /* Retreive various informations from the /chosen node of the
1086 * device-tree, including the platform type, initrd location and
1087 * size, TCE reserve, and more ...
1088 */
1089 scan_flat_dt(early_init_dt_scan_chosen, NULL);
1090
1091 /* Scan memory nodes and rebuild LMBs */
1092 lmb_init();
1093 scan_flat_dt(early_init_dt_scan_root, NULL);
1094 scan_flat_dt(early_init_dt_scan_memory, NULL);
1095 lmb_enforce_memory_limit();
1096 lmb_analyze();
1097 systemcfg->physicalMemorySize = lmb_phys_mem_size();
1098 lmb_reserve(0, __pa(klimit));
1099
1100 DBG("Phys. mem: %lx\n", systemcfg->physicalMemorySize);
1101
1102 /* Reserve LMB regions used by kernel, initrd, dt, etc... */
1103 early_reserve_mem();
1104
1105 DBG("Scanning CPUs ...\n");
1106
1107 /* Retreive hash table size from flattened tree */
1108 scan_flat_dt(early_init_dt_scan_cpus, NULL);
1109
1110 /* If hash size wasn't obtained above, we calculate it now based on
1111 * the total RAM size
1112 */
1113 if (ppc64_pft_size == 0) {
1114 unsigned long rnd_mem_size, pteg_count;
1115
1116 /* round mem_size up to next power of 2 */
1117 rnd_mem_size = 1UL << __ilog2(systemcfg->physicalMemorySize);
1118 if (rnd_mem_size < systemcfg->physicalMemorySize)
1119 rnd_mem_size <<= 1;
1120
1121 /* # pages / 2 */
1122 pteg_count = max(rnd_mem_size >> (12 + 1), 1UL << 11);
1123
1124 ppc64_pft_size = __ilog2(pteg_count << 7);
1125 }
1126
1127 DBG("Hash pftSize: %x\n", (int)ppc64_pft_size);
1128 DBG(" <- early_init_devtree()\n");
1129}
1130
1131#undef printk
1132
1133int
1134prom_n_addr_cells(struct device_node* np)
1135{
1136 int* ip;
1137 do {
1138 if (np->parent)
1139 np = np->parent;
1140 ip = (int *) get_property(np, "#address-cells", NULL);
1141 if (ip != NULL)
1142 return *ip;
1143 } while (np->parent);
1144 /* No #address-cells property for the root node, default to 1 */
1145 return 1;
1146}
1147
1148int
1149prom_n_size_cells(struct device_node* np)
1150{
1151 int* ip;
1152 do {
1153 if (np->parent)
1154 np = np->parent;
1155 ip = (int *) get_property(np, "#size-cells", NULL);
1156 if (ip != NULL)
1157 return *ip;
1158 } while (np->parent);
1159 /* No #size-cells property for the root node, default to 1 */
1160 return 1;
1161}
1162
1163/**
1164 * Work out the sense (active-low level / active-high edge)
1165 * of each interrupt from the device tree.
1166 */
1167void __init prom_get_irq_senses(unsigned char *senses, int off, int max)
1168{
1169 struct device_node *np;
1170 int i, j;
1171
1172 /* default to level-triggered */
1173 memset(senses, 1, max - off);
1174
1175 for (np = allnodes; np != 0; np = np->allnext) {
1176 for (j = 0; j < np->n_intrs; j++) {
1177 i = np->intrs[j].line;
1178 if (i >= off && i < max)
1179 senses[i-off] = np->intrs[j].sense ?
1180 IRQ_SENSE_LEVEL | IRQ_POLARITY_NEGATIVE :
1181 IRQ_SENSE_EDGE | IRQ_POLARITY_POSITIVE;
1182 }
1183 }
1184}
1185
1186/**
1187 * Construct and return a list of the device_nodes with a given name.
1188 */
1189struct device_node *
1190find_devices(const char *name)
1191{
1192 struct device_node *head, **prevp, *np;
1193
1194 prevp = &head;
1195 for (np = allnodes; np != 0; np = np->allnext) {
1196 if (np->name != 0 && strcasecmp(np->name, name) == 0) {
1197 *prevp = np;
1198 prevp = &np->next;
1199 }
1200 }
1201 *prevp = NULL;
1202 return head;
1203}
1204EXPORT_SYMBOL(find_devices);
1205
1206/**
1207 * Construct and return a list of the device_nodes with a given type.
1208 */
1209struct device_node *
1210find_type_devices(const char *type)
1211{
1212 struct device_node *head, **prevp, *np;
1213
1214 prevp = &head;
1215 for (np = allnodes; np != 0; np = np->allnext) {
1216 if (np->type != 0 && strcasecmp(np->type, type) == 0) {
1217 *prevp = np;
1218 prevp = &np->next;
1219 }
1220 }
1221 *prevp = NULL;
1222 return head;
1223}
1224EXPORT_SYMBOL(find_type_devices);
1225
1226/**
1227 * Returns all nodes linked together
1228 */
1229struct device_node *
1230find_all_nodes(void)
1231{
1232 struct device_node *head, **prevp, *np;
1233
1234 prevp = &head;
1235 for (np = allnodes; np != 0; np = np->allnext) {
1236 *prevp = np;
1237 prevp = &np->next;
1238 }
1239 *prevp = NULL;
1240 return head;
1241}
1242EXPORT_SYMBOL(find_all_nodes);
1243
1244/** Checks if the given "compat" string matches one of the strings in
1245 * the device's "compatible" property
1246 */
1247int
1248device_is_compatible(struct device_node *device, const char *compat)
1249{
1250 const char* cp;
1251 int cplen, l;
1252
1253 cp = (char *) get_property(device, "compatible", &cplen);
1254 if (cp == NULL)
1255 return 0;
1256 while (cplen > 0) {
1257 if (strncasecmp(cp, compat, strlen(compat)) == 0)
1258 return 1;
1259 l = strlen(cp) + 1;
1260 cp += l;
1261 cplen -= l;
1262 }
1263
1264 return 0;
1265}
1266EXPORT_SYMBOL(device_is_compatible);
1267
1268
1269/**
1270 * Indicates whether the root node has a given value in its
1271 * compatible property.
1272 */
1273int
1274machine_is_compatible(const char *compat)
1275{
1276 struct device_node *root;
1277 int rc = 0;
1278
1279 root = of_find_node_by_path("/");
1280 if (root) {
1281 rc = device_is_compatible(root, compat);
1282 of_node_put(root);
1283 }
1284 return rc;
1285}
1286EXPORT_SYMBOL(machine_is_compatible);
1287
1288/**
1289 * Construct and return a list of the device_nodes with a given type
1290 * and compatible property.
1291 */
1292struct device_node *
1293find_compatible_devices(const char *type, const char *compat)
1294{
1295 struct device_node *head, **prevp, *np;
1296
1297 prevp = &head;
1298 for (np = allnodes; np != 0; np = np->allnext) {
1299 if (type != NULL
1300 && !(np->type != 0 && strcasecmp(np->type, type) == 0))
1301 continue;
1302 if (device_is_compatible(np, compat)) {
1303 *prevp = np;
1304 prevp = &np->next;
1305 }
1306 }
1307 *prevp = NULL;
1308 return head;
1309}
1310EXPORT_SYMBOL(find_compatible_devices);
1311
1312/**
1313 * Find the device_node with a given full_name.
1314 */
1315struct device_node *
1316find_path_device(const char *path)
1317{
1318 struct device_node *np;
1319
1320 for (np = allnodes; np != 0; np = np->allnext)
1321 if (np->full_name != 0 && strcasecmp(np->full_name, path) == 0)
1322 return np;
1323 return NULL;
1324}
1325EXPORT_SYMBOL(find_path_device);
1326
1327/*******
1328 *
1329 * New implementation of the OF "find" APIs, return a refcounted
1330 * object, call of_node_put() when done. The device tree and list
1331 * are protected by a rw_lock.
1332 *
1333 * Note that property management will need some locking as well,
1334 * this isn't dealt with yet.
1335 *
1336 *******/
1337
1338/**
1339 * of_find_node_by_name - Find a node by its "name" property
1340 * @from: The node to start searching from or NULL, the node
1341 * you pass will not be searched, only the next one
1342 * will; typically, you pass what the previous call
1343 * returned. of_node_put() will be called on it
1344 * @name: The name string to match against
1345 *
1346 * Returns a node pointer with refcount incremented, use
1347 * of_node_put() on it when done.
1348 */
1349struct device_node *of_find_node_by_name(struct device_node *from,
1350 const char *name)
1351{
1352 struct device_node *np;
1353
1354 read_lock(&devtree_lock);
1355 np = from ? from->allnext : allnodes;
1356 for (; np != 0; np = np->allnext)
1357 if (np->name != 0 && strcasecmp(np->name, name) == 0
1358 && of_node_get(np))
1359 break;
1360 if (from)
1361 of_node_put(from);
1362 read_unlock(&devtree_lock);
1363 return np;
1364}
1365EXPORT_SYMBOL(of_find_node_by_name);
1366
1367/**
1368 * of_find_node_by_type - Find a node by its "device_type" property
1369 * @from: The node to start searching from or NULL, the node
1370 * you pass will not be searched, only the next one
1371 * will; typically, you pass what the previous call
1372 * returned. of_node_put() will be called on it
1373 * @name: The type string to match against
1374 *
1375 * Returns a node pointer with refcount incremented, use
1376 * of_node_put() on it when done.
1377 */
1378struct device_node *of_find_node_by_type(struct device_node *from,
1379 const char *type)
1380{
1381 struct device_node *np;
1382
1383 read_lock(&devtree_lock);
1384 np = from ? from->allnext : allnodes;
1385 for (; np != 0; np = np->allnext)
1386 if (np->type != 0 && strcasecmp(np->type, type) == 0
1387 && of_node_get(np))
1388 break;
1389 if (from)
1390 of_node_put(from);
1391 read_unlock(&devtree_lock);
1392 return np;
1393}
1394EXPORT_SYMBOL(of_find_node_by_type);
1395
1396/**
1397 * of_find_compatible_node - Find a node based on type and one of the
1398 * tokens in its "compatible" property
1399 * @from: The node to start searching from or NULL, the node
1400 * you pass will not be searched, only the next one
1401 * will; typically, you pass what the previous call
1402 * returned. of_node_put() will be called on it
1403 * @type: The type string to match "device_type" or NULL to ignore
1404 * @compatible: The string to match to one of the tokens in the device
1405 * "compatible" list.
1406 *
1407 * Returns a node pointer with refcount incremented, use
1408 * of_node_put() on it when done.
1409 */
1410struct device_node *of_find_compatible_node(struct device_node *from,
1411 const char *type, const char *compatible)
1412{
1413 struct device_node *np;
1414
1415 read_lock(&devtree_lock);
1416 np = from ? from->allnext : allnodes;
1417 for (; np != 0; np = np->allnext) {
1418 if (type != NULL
1419 && !(np->type != 0 && strcasecmp(np->type, type) == 0))
1420 continue;
1421 if (device_is_compatible(np, compatible) && of_node_get(np))
1422 break;
1423 }
1424 if (from)
1425 of_node_put(from);
1426 read_unlock(&devtree_lock);
1427 return np;
1428}
1429EXPORT_SYMBOL(of_find_compatible_node);
1430
1431/**
1432 * of_find_node_by_path - Find a node matching a full OF path
1433 * @path: The full path to match
1434 *
1435 * Returns a node pointer with refcount incremented, use
1436 * of_node_put() on it when done.
1437 */
1438struct device_node *of_find_node_by_path(const char *path)
1439{
1440 struct device_node *np = allnodes;
1441
1442 read_lock(&devtree_lock);
1443 for (; np != 0; np = np->allnext)
1444 if (np->full_name != 0 && strcasecmp(np->full_name, path) == 0
1445 && of_node_get(np))
1446 break;
1447 read_unlock(&devtree_lock);
1448 return np;
1449}
1450EXPORT_SYMBOL(of_find_node_by_path);
1451
1452/**
1453 * of_find_node_by_phandle - Find a node given a phandle
1454 * @handle: phandle of the node to find
1455 *
1456 * Returns a node pointer with refcount incremented, use
1457 * of_node_put() on it when done.
1458 */
1459struct device_node *of_find_node_by_phandle(phandle handle)
1460{
1461 struct device_node *np;
1462
1463 read_lock(&devtree_lock);
1464 for (np = allnodes; np != 0; np = np->allnext)
1465 if (np->linux_phandle == handle)
1466 break;
1467 if (np)
1468 of_node_get(np);
1469 read_unlock(&devtree_lock);
1470 return np;
1471}
1472EXPORT_SYMBOL(of_find_node_by_phandle);
1473
1474/**
1475 * of_find_all_nodes - Get next node in global list
1476 * @prev: Previous node or NULL to start iteration
1477 * of_node_put() will be called on it
1478 *
1479 * Returns a node pointer with refcount incremented, use
1480 * of_node_put() on it when done.
1481 */
1482struct device_node *of_find_all_nodes(struct device_node *prev)
1483{
1484 struct device_node *np;
1485
1486 read_lock(&devtree_lock);
1487 np = prev ? prev->allnext : allnodes;
1488 for (; np != 0; np = np->allnext)
1489 if (of_node_get(np))
1490 break;
1491 if (prev)
1492 of_node_put(prev);
1493 read_unlock(&devtree_lock);
1494 return np;
1495}
1496EXPORT_SYMBOL(of_find_all_nodes);
1497
1498/**
1499 * of_get_parent - Get a node's parent if any
1500 * @node: Node to get parent
1501 *
1502 * Returns a node pointer with refcount incremented, use
1503 * of_node_put() on it when done.
1504 */
1505struct device_node *of_get_parent(const struct device_node *node)
1506{
1507 struct device_node *np;
1508
1509 if (!node)
1510 return NULL;
1511
1512 read_lock(&devtree_lock);
1513 np = of_node_get(node->parent);
1514 read_unlock(&devtree_lock);
1515 return np;
1516}
1517EXPORT_SYMBOL(of_get_parent);
1518
1519/**
1520 * of_get_next_child - Iterate a node childs
1521 * @node: parent node
1522 * @prev: previous child of the parent node, or NULL to get first
1523 *
1524 * Returns a node pointer with refcount incremented, use
1525 * of_node_put() on it when done.
1526 */
1527struct device_node *of_get_next_child(const struct device_node *node,
1528 struct device_node *prev)
1529{
1530 struct device_node *next;
1531
1532 read_lock(&devtree_lock);
1533 next = prev ? prev->sibling : node->child;
1534 for (; next != 0; next = next->sibling)
1535 if (of_node_get(next))
1536 break;
1537 if (prev)
1538 of_node_put(prev);
1539 read_unlock(&devtree_lock);
1540 return next;
1541}
1542EXPORT_SYMBOL(of_get_next_child);
1543
1544/**
1545 * of_node_get - Increment refcount of a node
1546 * @node: Node to inc refcount, NULL is supported to
1547 * simplify writing of callers
1548 *
1549 * Returns node.
1550 */
1551struct device_node *of_node_get(struct device_node *node)
1552{
1553 if (node)
1554 kref_get(&node->kref);
1555 return node;
1556}
1557EXPORT_SYMBOL(of_node_get);
1558
1559static inline struct device_node * kref_to_device_node(struct kref *kref)
1560{
1561 return container_of(kref, struct device_node, kref);
1562}
1563
1564/**
1565 * of_node_release - release a dynamically allocated node
1566 * @kref: kref element of the node to be released
1567 *
1568 * In of_node_put() this function is passed to kref_put()
1569 * as the destructor.
1570 */
1571static void of_node_release(struct kref *kref)
1572{
1573 struct device_node *node = kref_to_device_node(kref);
1574 struct property *prop = node->properties;
1575
1576 if (!OF_IS_DYNAMIC(node))
1577 return;
1578 while (prop) {
1579 struct property *next = prop->next;
1580 kfree(prop->name);
1581 kfree(prop->value);
1582 kfree(prop);
1583 prop = next;
1584 }
1585 kfree(node->intrs);
1586 kfree(node->addrs);
1587 kfree(node->full_name);
1588 kfree(node);
1589}
1590
1591/**
1592 * of_node_put - Decrement refcount of a node
1593 * @node: Node to dec refcount, NULL is supported to
1594 * simplify writing of callers
1595 *
1596 */
1597void of_node_put(struct device_node *node)
1598{
1599 if (node)
1600 kref_put(&node->kref, of_node_release);
1601}
1602EXPORT_SYMBOL(of_node_put);
1603
1604/*
1605 * Fix up the uninitialized fields in a new device node:
1606 * name, type, n_addrs, addrs, n_intrs, intrs, and pci-specific fields
1607 *
1608 * A lot of boot-time code is duplicated here, because functions such
1609 * as finish_node_interrupts, interpret_pci_props, etc. cannot use the
1610 * slab allocator.
1611 *
1612 * This should probably be split up into smaller chunks.
1613 */
1614
1615static int of_finish_dynamic_node(struct device_node *node,
1616 unsigned long *unused1, int unused2,
1617 int unused3, int unused4)
1618{
1619 struct device_node *parent = of_get_parent(node);
1620 int err = 0;
1621 phandle *ibm_phandle;
1622
1623 node->name = get_property(node, "name", NULL);
1624 node->type = get_property(node, "device_type", NULL);
1625
1626 if (!parent) {
1627 err = -ENODEV;
1628 goto out;
1629 }
1630
1631 /* We don't support that function on PowerMac, at least
1632 * not yet
1633 */
1634 if (systemcfg->platform == PLATFORM_POWERMAC)
1635 return -ENODEV;
1636
1637 /* fix up new node's linux_phandle field */
1638 if ((ibm_phandle = (unsigned int *)get_property(node, "ibm,phandle", NULL)))
1639 node->linux_phandle = *ibm_phandle;
1640
1641out:
1642 of_node_put(parent);
1643 return err;
1644}
1645
1646/*
1647 * Plug a device node into the tree and global list.
1648 */
1649void of_attach_node(struct device_node *np)
1650{
1651 write_lock(&devtree_lock);
1652 np->sibling = np->parent->child;
1653 np->allnext = allnodes;
1654 np->parent->child = np;
1655 allnodes = np;
1656 write_unlock(&devtree_lock);
1657}
1658
1659/*
1660 * "Unplug" a node from the device tree. The caller must hold
1661 * a reference to the node. The memory associated with the node
1662 * is not freed until its refcount goes to zero.
1663 */
1664void of_detach_node(const struct device_node *np)
1665{
1666 struct device_node *parent;
1667
1668 write_lock(&devtree_lock);
1669
1670 parent = np->parent;
1671
1672 if (allnodes == np)
1673 allnodes = np->allnext;
1674 else {
1675 struct device_node *prev;
1676 for (prev = allnodes;
1677 prev->allnext != np;
1678 prev = prev->allnext)
1679 ;
1680 prev->allnext = np->allnext;
1681 }
1682
1683 if (parent->child == np)
1684 parent->child = np->sibling;
1685 else {
1686 struct device_node *prevsib;
1687 for (prevsib = np->parent->child;
1688 prevsib->sibling != np;
1689 prevsib = prevsib->sibling)
1690 ;
1691 prevsib->sibling = np->sibling;
1692 }
1693
1694 write_unlock(&devtree_lock);
1695}
1696
1697static int prom_reconfig_notifier(struct notifier_block *nb, unsigned long action, void *node)
1698{
1699 int err;
1700
1701 switch (action) {
1702 case PSERIES_RECONFIG_ADD:
1703 err = finish_node(node, NULL, of_finish_dynamic_node, 0, 0, 0);
1704 if (err < 0) {
1705 printk(KERN_ERR "finish_node returned %d\n", err);
1706 err = NOTIFY_BAD;
1707 }
1708 break;
1709 default:
1710 err = NOTIFY_DONE;
1711 break;
1712 }
1713 return err;
1714}
1715
1716static struct notifier_block prom_reconfig_nb = {
1717 .notifier_call = prom_reconfig_notifier,
1718 .priority = 10, /* This one needs to run first */
1719};
1720
1721static int __init prom_reconfig_setup(void)
1722{
1723 return pSeries_reconfig_notifier_register(&prom_reconfig_nb);
1724}
1725__initcall(prom_reconfig_setup);
1726
1727/*
1728 * Find a property with a given name for a given node
1729 * and return the value.
1730 */
1731unsigned char *
1732get_property(struct device_node *np, const char *name, int *lenp)
1733{
1734 struct property *pp;
1735
1736 for (pp = np->properties; pp != 0; pp = pp->next)
1737 if (strcmp(pp->name, name) == 0) {
1738 if (lenp != 0)
1739 *lenp = pp->length;
1740 return pp->value;
1741 }
1742 return NULL;
1743}
1744EXPORT_SYMBOL(get_property);
1745
1746/*
1747 * Add a property to a node
1748 */
1749void
1750prom_add_property(struct device_node* np, struct property* prop)
1751{
1752 struct property **next = &np->properties;
1753
1754 prop->next = NULL;
1755 while (*next)
1756 next = &(*next)->next;
1757 *next = prop;
1758}
1759
1760#if 0
1761void
1762print_properties(struct device_node *np)
1763{
1764 struct property *pp;
1765 char *cp;
1766 int i, n;
1767
1768 for (pp = np->properties; pp != 0; pp = pp->next) {
1769 printk(KERN_INFO "%s", pp->name);
1770 for (i = strlen(pp->name); i < 16; ++i)
1771 printk(" ");
1772 cp = (char *) pp->value;
1773 for (i = pp->length; i > 0; --i, ++cp)
1774 if ((i > 1 && (*cp < 0x20 || *cp > 0x7e))
1775 || (i == 1 && *cp != 0))
1776 break;
1777 if (i == 0 && pp->length > 1) {
1778 /* looks like a string */
1779 printk(" %s\n", (char *) pp->value);
1780 } else {
1781 /* dump it in hex */
1782 n = pp->length;
1783 if (n > 64)
1784 n = 64;
1785 if (pp->length % 4 == 0) {
1786 unsigned int *p = (unsigned int *) pp->value;
1787
1788 n /= 4;
1789 for (i = 0; i < n; ++i) {
1790 if (i != 0 && (i % 4) == 0)
1791 printk("\n ");
1792 printk(" %08x", *p++);
1793 }
1794 } else {
1795 unsigned char *bp = pp->value;
1796
1797 for (i = 0; i < n; ++i) {
1798 if (i != 0 && (i % 16) == 0)
1799 printk("\n ");
1800 printk(" %02x", *bp++);
1801 }
1802 }
1803 printk("\n");
1804 if (pp->length > 64)
1805 printk(" ... (length = %d)\n",
1806 pp->length);
1807 }
1808 }
1809}
1810#endif
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
diff --git a/arch/ppc64/kernel/prom_init.c b/arch/ppc64/kernel/prom_init.c
new file mode 100644
index 000000000000..8dffa9ae2623
--- /dev/null
+++ b/arch/ppc64/kernel/prom_init.c
@@ -0,0 +1,1838 @@
1/*
2 *
3 *
4 * Procedures for interfacing to Open Firmware.
5 *
6 * Paul Mackerras August 1996.
7 * Copyright (C) 1996 Paul Mackerras.
8 *
9 * Adapted for 64bit PowerPC by Dave Engebretsen and Peter Bergner.
10 * {engebret|bergner}@us.ibm.com
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 */
17
18#undef DEBUG_PROM
19
20#include <stdarg.h>
21#include <linux/config.h>
22#include <linux/kernel.h>
23#include <linux/string.h>
24#include <linux/init.h>
25#include <linux/version.h>
26#include <linux/threads.h>
27#include <linux/spinlock.h>
28#include <linux/types.h>
29#include <linux/pci.h>
30#include <linux/proc_fs.h>
31#include <linux/stringify.h>
32#include <linux/delay.h>
33#include <linux/initrd.h>
34#include <linux/bitops.h>
35#include <asm/prom.h>
36#include <asm/rtas.h>
37#include <asm/abs_addr.h>
38#include <asm/page.h>
39#include <asm/processor.h>
40#include <asm/irq.h>
41#include <asm/io.h>
42#include <asm/smp.h>
43#include <asm/system.h>
44#include <asm/mmu.h>
45#include <asm/pgtable.h>
46#include <asm/pci.h>
47#include <asm/iommu.h>
48#include <asm/bootinfo.h>
49#include <asm/ppcdebug.h>
50#include <asm/btext.h>
51#include <asm/sections.h>
52#include <asm/machdep.h>
53
54#ifdef CONFIG_LOGO_LINUX_CLUT224
55#include <linux/linux_logo.h>
56extern const struct linux_logo logo_linux_clut224;
57#endif
58
59/*
60 * Properties whose value is longer than this get excluded from our
61 * copy of the device tree. This value does need to be big enough to
62 * ensure that we don't lose things like the interrupt-map property
63 * on a PCI-PCI bridge.
64 */
65#define MAX_PROPERTY_LENGTH (1UL * 1024 * 1024)
66
67/*
68 * Eventually bump that one up
69 */
70#define DEVTREE_CHUNK_SIZE 0x100000
71
72/*
73 * This is the size of the local memory reserve map that gets copied
74 * into the boot params passed to the kernel. That size is totally
75 * flexible as the kernel just reads the list until it encounters an
76 * entry with size 0, so it can be changed without breaking binary
77 * compatibility
78 */
79#define MEM_RESERVE_MAP_SIZE 8
80
81/*
82 * prom_init() is called very early on, before the kernel text
83 * and data have been mapped to KERNELBASE. At this point the code
84 * is running at whatever address it has been loaded at, so
85 * references to extern and static variables must be relocated
86 * explicitly. The procedure reloc_offset() returns the address
87 * we're currently running at minus the address we were linked at.
88 * (Note that strings count as static variables.)
89 *
90 * Because OF may have mapped I/O devices into the area starting at
91 * KERNELBASE, particularly on CHRP machines, we can't safely call
92 * OF once the kernel has been mapped to KERNELBASE. Therefore all
93 * OF calls should be done within prom_init(), and prom_init()
94 * and all routines called within it must be careful to relocate
95 * references as necessary.
96 *
97 * Note that the bss is cleared *after* prom_init runs, so we have
98 * to make sure that any static or extern variables it accesses
99 * are put in the data segment.
100 */
101
102
103#define PROM_BUG() do { \
104 prom_printf("kernel BUG at %s line 0x%x!\n", \
105 RELOC(__FILE__), __LINE__); \
106 __asm__ __volatile__(".long " BUG_ILLEGAL_INSTR); \
107} while (0)
108
109#ifdef DEBUG_PROM
110#define prom_debug(x...) prom_printf(x)
111#else
112#define prom_debug(x...)
113#endif
114
115
116typedef u32 prom_arg_t;
117
118struct prom_args {
119 u32 service;
120 u32 nargs;
121 u32 nret;
122 prom_arg_t args[10];
123 prom_arg_t *rets; /* Pointer to return values in args[16]. */
124};
125
126struct prom_t {
127 unsigned long entry;
128 ihandle root;
129 ihandle chosen;
130 int cpu;
131 ihandle stdout;
132 ihandle disp_node;
133 struct prom_args args;
134 unsigned long version;
135 unsigned long root_size_cells;
136 unsigned long root_addr_cells;
137};
138
139struct pci_reg_property {
140 struct pci_address addr;
141 u32 size_hi;
142 u32 size_lo;
143};
144
145struct mem_map_entry {
146 u64 base;
147 u64 size;
148};
149
150typedef u32 cell_t;
151
152extern void __start(unsigned long r3, unsigned long r4, unsigned long r5);
153
154extern void enter_prom(struct prom_args *args, unsigned long entry);
155extern void copy_and_flush(unsigned long dest, unsigned long src,
156 unsigned long size, unsigned long offset);
157
158extern unsigned long klimit;
159
160/* prom structure */
161static struct prom_t __initdata prom;
162
163#define PROM_SCRATCH_SIZE 256
164
165static char __initdata of_stdout_device[256];
166static char __initdata prom_scratch[PROM_SCRATCH_SIZE];
167
168static unsigned long __initdata dt_header_start;
169static unsigned long __initdata dt_struct_start, dt_struct_end;
170static unsigned long __initdata dt_string_start, dt_string_end;
171
172static unsigned long __initdata prom_initrd_start, prom_initrd_end;
173
174static int __initdata iommu_force_on;
175static int __initdata ppc64_iommu_off;
176static int __initdata of_platform;
177
178static char __initdata prom_cmd_line[COMMAND_LINE_SIZE];
179
180static unsigned long __initdata prom_memory_limit;
181static unsigned long __initdata prom_tce_alloc_start;
182static unsigned long __initdata prom_tce_alloc_end;
183
184static unsigned long __initdata alloc_top;
185static unsigned long __initdata alloc_top_high;
186static unsigned long __initdata alloc_bottom;
187static unsigned long __initdata rmo_top;
188static unsigned long __initdata ram_top;
189
190static struct mem_map_entry __initdata mem_reserve_map[MEM_RESERVE_MAP_SIZE];
191static int __initdata mem_reserve_cnt;
192
193static cell_t __initdata regbuf[1024];
194
195
196#define MAX_CPU_THREADS 2
197
198/* TO GO */
199#ifdef CONFIG_HMT
200struct {
201 unsigned int pir;
202 unsigned int threadid;
203} hmt_thread_data[NR_CPUS];
204#endif /* CONFIG_HMT */
205
206/*
207 * This are used in calls to call_prom. The 4th and following
208 * arguments to call_prom should be 32-bit values. 64 bit values
209 * are truncated to 32 bits (and fortunately don't get interpreted
210 * as two arguments).
211 */
212#define ADDR(x) (u32) ((unsigned long)(x) - offset)
213
214/* This is the one and *ONLY* place where we actually call open
215 * firmware from, since we need to make sure we're running in 32b
216 * mode when we do. We switch back to 64b mode upon return.
217 */
218
219#define PROM_ERROR (-1)
220
221static int __init call_prom(const char *service, int nargs, int nret, ...)
222{
223 int i;
224 unsigned long offset = reloc_offset();
225 struct prom_t *_prom = PTRRELOC(&prom);
226 va_list list;
227
228 _prom->args.service = ADDR(service);
229 _prom->args.nargs = nargs;
230 _prom->args.nret = nret;
231 _prom->args.rets = (prom_arg_t *)&(_prom->args.args[nargs]);
232
233 va_start(list, nret);
234 for (i=0; i < nargs; i++)
235 _prom->args.args[i] = va_arg(list, prom_arg_t);
236 va_end(list);
237
238 for (i=0; i < nret ;i++)
239 _prom->args.rets[i] = 0;
240
241 enter_prom(&_prom->args, _prom->entry);
242
243 return (nret > 0) ? _prom->args.rets[0] : 0;
244}
245
246
247static unsigned int __init prom_claim(unsigned long virt, unsigned long size,
248 unsigned long align)
249{
250 return (unsigned int)call_prom("claim", 3, 1,
251 (prom_arg_t)virt, (prom_arg_t)size,
252 (prom_arg_t)align);
253}
254
255static void __init prom_print(const char *msg)
256{
257 const char *p, *q;
258 unsigned long offset = reloc_offset();
259 struct prom_t *_prom = PTRRELOC(&prom);
260
261 if (_prom->stdout == 0)
262 return;
263
264 for (p = msg; *p != 0; p = q) {
265 for (q = p; *q != 0 && *q != '\n'; ++q)
266 ;
267 if (q > p)
268 call_prom("write", 3, 1, _prom->stdout, p, q - p);
269 if (*q == 0)
270 break;
271 ++q;
272 call_prom("write", 3, 1, _prom->stdout, ADDR("\r\n"), 2);
273 }
274}
275
276
277static void __init prom_print_hex(unsigned long val)
278{
279 unsigned long offset = reloc_offset();
280 int i, nibbles = sizeof(val)*2;
281 char buf[sizeof(val)*2+1];
282 struct prom_t *_prom = PTRRELOC(&prom);
283
284 for (i = nibbles-1; i >= 0; i--) {
285 buf[i] = (val & 0xf) + '0';
286 if (buf[i] > '9')
287 buf[i] += ('a'-'0'-10);
288 val >>= 4;
289 }
290 buf[nibbles] = '\0';
291 call_prom("write", 3, 1, _prom->stdout, buf, nibbles);
292}
293
294
295static void __init prom_printf(const char *format, ...)
296{
297 unsigned long offset = reloc_offset();
298 const char *p, *q, *s;
299 va_list args;
300 unsigned long v;
301 struct prom_t *_prom = PTRRELOC(&prom);
302
303 va_start(args, format);
304 for (p = PTRRELOC(format); *p != 0; p = q) {
305 for (q = p; *q != 0 && *q != '\n' && *q != '%'; ++q)
306 ;
307 if (q > p)
308 call_prom("write", 3, 1, _prom->stdout, p, q - p);
309 if (*q == 0)
310 break;
311 if (*q == '\n') {
312 ++q;
313 call_prom("write", 3, 1, _prom->stdout,
314 ADDR("\r\n"), 2);
315 continue;
316 }
317 ++q;
318 if (*q == 0)
319 break;
320 switch (*q) {
321 case 's':
322 ++q;
323 s = va_arg(args, const char *);
324 prom_print(s);
325 break;
326 case 'x':
327 ++q;
328 v = va_arg(args, unsigned long);
329 prom_print_hex(v);
330 break;
331 }
332 }
333}
334
335
336static void __init __attribute__((noreturn)) prom_panic(const char *reason)
337{
338 unsigned long offset = reloc_offset();
339
340 prom_print(PTRRELOC(reason));
341 /* ToDo: should put up an SRC here */
342 call_prom("exit", 0, 0);
343
344 for (;;) /* should never get here */
345 ;
346}
347
348
349static int __init prom_next_node(phandle *nodep)
350{
351 phandle node;
352
353 if ((node = *nodep) != 0
354 && (*nodep = call_prom("child", 1, 1, node)) != 0)
355 return 1;
356 if ((*nodep = call_prom("peer", 1, 1, node)) != 0)
357 return 1;
358 for (;;) {
359 if ((node = call_prom("parent", 1, 1, node)) == 0)
360 return 0;
361 if ((*nodep = call_prom("peer", 1, 1, node)) != 0)
362 return 1;
363 }
364}
365
366static int __init prom_getprop(phandle node, const char *pname,
367 void *value, size_t valuelen)
368{
369 unsigned long offset = reloc_offset();
370
371 return call_prom("getprop", 4, 1, node, ADDR(pname),
372 (u32)(unsigned long) value, (u32) valuelen);
373}
374
375static int __init prom_getproplen(phandle node, const char *pname)
376{
377 unsigned long offset = reloc_offset();
378
379 return call_prom("getproplen", 2, 1, node, ADDR(pname));
380}
381
382static int __init prom_setprop(phandle node, const char *pname,
383 void *value, size_t valuelen)
384{
385 unsigned long offset = reloc_offset();
386
387 return call_prom("setprop", 4, 1, node, ADDR(pname),
388 (u32)(unsigned long) value, (u32) valuelen);
389}
390
391/* We can't use the standard versions because of RELOC headaches. */
392#define isxdigit(c) (('0' <= (c) && (c) <= '9') \
393 || ('a' <= (c) && (c) <= 'f') \
394 || ('A' <= (c) && (c) <= 'F'))
395
396#define isdigit(c) ('0' <= (c) && (c) <= '9')
397#define islower(c) ('a' <= (c) && (c) <= 'z')
398#define toupper(c) (islower(c) ? ((c) - 'a' + 'A') : (c))
399
400unsigned long prom_strtoul(const char *cp, const char **endp)
401{
402 unsigned long result = 0, base = 10, value;
403
404 if (*cp == '0') {
405 base = 8;
406 cp++;
407 if (toupper(*cp) == 'X') {
408 cp++;
409 base = 16;
410 }
411 }
412
413 while (isxdigit(*cp) &&
414 (value = isdigit(*cp) ? *cp - '0' : toupper(*cp) - 'A' + 10) < base) {
415 result = result * base + value;
416 cp++;
417 }
418
419 if (endp)
420 *endp = cp;
421
422 return result;
423}
424
425unsigned long prom_memparse(const char *ptr, const char **retptr)
426{
427 unsigned long ret = prom_strtoul(ptr, retptr);
428 int shift = 0;
429
430 /*
431 * We can't use a switch here because GCC *may* generate a
432 * jump table which won't work, because we're not running at
433 * the address we're linked at.
434 */
435 if ('G' == **retptr || 'g' == **retptr)
436 shift = 30;
437
438 if ('M' == **retptr || 'm' == **retptr)
439 shift = 20;
440
441 if ('K' == **retptr || 'k' == **retptr)
442 shift = 10;
443
444 if (shift) {
445 ret <<= shift;
446 (*retptr)++;
447 }
448
449 return ret;
450}
451
452/*
453 * Early parsing of the command line passed to the kernel, used for
454 * "mem=x" and the options that affect the iommu
455 */
456static void __init early_cmdline_parse(void)
457{
458 unsigned long offset = reloc_offset();
459 struct prom_t *_prom = PTRRELOC(&prom);
460 char *opt, *p;
461 int l = 0;
462
463 RELOC(prom_cmd_line[0]) = 0;
464 p = RELOC(prom_cmd_line);
465 if ((long)_prom->chosen > 0)
466 l = prom_getprop(_prom->chosen, "bootargs", p, COMMAND_LINE_SIZE-1);
467#ifdef CONFIG_CMDLINE
468 if (l == 0) /* dbl check */
469 strlcpy(RELOC(prom_cmd_line),
470 RELOC(CONFIG_CMDLINE), sizeof(prom_cmd_line));
471#endif /* CONFIG_CMDLINE */
472 prom_printf("command line: %s\n", RELOC(prom_cmd_line));
473
474 opt = strstr(RELOC(prom_cmd_line), RELOC("iommu="));
475 if (opt) {
476 prom_printf("iommu opt is: %s\n", opt);
477 opt += 6;
478 while (*opt && *opt == ' ')
479 opt++;
480 if (!strncmp(opt, RELOC("off"), 3))
481 RELOC(ppc64_iommu_off) = 1;
482 else if (!strncmp(opt, RELOC("force"), 5))
483 RELOC(iommu_force_on) = 1;
484 }
485
486 opt = strstr(RELOC(prom_cmd_line), RELOC("mem="));
487 if (opt) {
488 opt += 4;
489 RELOC(prom_memory_limit) = prom_memparse(opt, (const char **)&opt);
490 /* Align to 16 MB == size of large page */
491 RELOC(prom_memory_limit) = ALIGN(RELOC(prom_memory_limit), 0x1000000);
492 }
493}
494
495/*
496 * Memory allocation strategy... our layout is normally:
497 *
498 * at 14Mb or more we vmlinux, then a gap and initrd. In some rare cases, initrd
499 * might end up beeing before the kernel though. We assume this won't override
500 * the final kernel at 0, we have no provision to handle that in this version,
501 * but it should hopefully never happen.
502 *
503 * alloc_top is set to the top of RMO, eventually shrink down if the TCEs overlap
504 * alloc_bottom is set to the top of kernel/initrd
505 *
506 * from there, allocations are done that way : rtas is allocated topmost, and
507 * the device-tree is allocated from the bottom. We try to grow the device-tree
508 * allocation as we progress. If we can't, then we fail, we don't currently have
509 * a facility to restart elsewhere, but that shouldn't be necessary neither
510 *
511 * Note that calls to reserve_mem have to be done explicitely, memory allocated
512 * with either alloc_up or alloc_down isn't automatically reserved.
513 */
514
515
516/*
517 * Allocates memory in the RMO upward from the kernel/initrd
518 *
519 * When align is 0, this is a special case, it means to allocate in place
520 * at the current location of alloc_bottom or fail (that is basically
521 * extending the previous allocation). Used for the device-tree flattening
522 */
523static unsigned long __init alloc_up(unsigned long size, unsigned long align)
524{
525 unsigned long offset = reloc_offset();
526 unsigned long base = _ALIGN_UP(RELOC(alloc_bottom), align);
527 unsigned long addr = 0;
528
529 prom_debug("alloc_up(%x, %x)\n", size, align);
530 if (RELOC(ram_top) == 0)
531 prom_panic("alloc_up() called with mem not initialized\n");
532
533 if (align)
534 base = _ALIGN_UP(RELOC(alloc_bottom), align);
535 else
536 base = RELOC(alloc_bottom);
537
538 for(; (base + size) <= RELOC(alloc_top);
539 base = _ALIGN_UP(base + 0x100000, align)) {
540 prom_debug(" trying: 0x%x\n\r", base);
541 addr = (unsigned long)prom_claim(base, size, 0);
542 if ((int)addr != PROM_ERROR)
543 break;
544 addr = 0;
545 if (align == 0)
546 break;
547 }
548 if (addr == 0)
549 return 0;
550 RELOC(alloc_bottom) = addr;
551
552 prom_debug(" -> %x\n", addr);
553 prom_debug(" alloc_bottom : %x\n", RELOC(alloc_bottom));
554 prom_debug(" alloc_top : %x\n", RELOC(alloc_top));
555 prom_debug(" alloc_top_hi : %x\n", RELOC(alloc_top_high));
556 prom_debug(" rmo_top : %x\n", RELOC(rmo_top));
557 prom_debug(" ram_top : %x\n", RELOC(ram_top));
558
559 return addr;
560}
561
562/*
563 * Allocates memory downard, either from top of RMO, or if highmem
564 * is set, from the top of RAM. Note that this one doesn't handle
565 * failures. In does claim memory if highmem is not set.
566 */
567static unsigned long __init alloc_down(unsigned long size, unsigned long align,
568 int highmem)
569{
570 unsigned long offset = reloc_offset();
571 unsigned long base, addr = 0;
572
573 prom_debug("alloc_down(%x, %x, %s)\n", size, align,
574 highmem ? RELOC("(high)") : RELOC("(low)"));
575 if (RELOC(ram_top) == 0)
576 prom_panic("alloc_down() called with mem not initialized\n");
577
578 if (highmem) {
579 /* Carve out storage for the TCE table. */
580 addr = _ALIGN_DOWN(RELOC(alloc_top_high) - size, align);
581 if (addr <= RELOC(alloc_bottom))
582 return 0;
583 else {
584 /* Will we bump into the RMO ? If yes, check out that we
585 * didn't overlap existing allocations there, if we did,
586 * we are dead, we must be the first in town !
587 */
588 if (addr < RELOC(rmo_top)) {
589 /* Good, we are first */
590 if (RELOC(alloc_top) == RELOC(rmo_top))
591 RELOC(alloc_top) = RELOC(rmo_top) = addr;
592 else
593 return 0;
594 }
595 RELOC(alloc_top_high) = addr;
596 }
597 goto bail;
598 }
599
600 base = _ALIGN_DOWN(RELOC(alloc_top) - size, align);
601 for(; base > RELOC(alloc_bottom); base = _ALIGN_DOWN(base - 0x100000, align)) {
602 prom_debug(" trying: 0x%x\n\r", base);
603 addr = (unsigned long)prom_claim(base, size, 0);
604 if ((int)addr != PROM_ERROR)
605 break;
606 addr = 0;
607 }
608 if (addr == 0)
609 return 0;
610 RELOC(alloc_top) = addr;
611
612 bail:
613 prom_debug(" -> %x\n", addr);
614 prom_debug(" alloc_bottom : %x\n", RELOC(alloc_bottom));
615 prom_debug(" alloc_top : %x\n", RELOC(alloc_top));
616 prom_debug(" alloc_top_hi : %x\n", RELOC(alloc_top_high));
617 prom_debug(" rmo_top : %x\n", RELOC(rmo_top));
618 prom_debug(" ram_top : %x\n", RELOC(ram_top));
619
620 return addr;
621}
622
623/*
624 * Parse a "reg" cell
625 */
626static unsigned long __init prom_next_cell(int s, cell_t **cellp)
627{
628 cell_t *p = *cellp;
629 unsigned long r = 0;
630
631 /* Ignore more than 2 cells */
632 while (s > 2) {
633 p++;
634 s--;
635 }
636 while (s) {
637 r <<= 32;
638 r |= *(p++);
639 s--;
640 }
641
642 *cellp = p;
643 return r;
644}
645
646/*
647 * Very dumb function for adding to the memory reserve list, but
648 * we don't need anything smarter at this point
649 *
650 * XXX Eventually check for collisions. They should NEVER happen
651 * if problems seem to show up, it would be a good start to track
652 * them down.
653 */
654static void reserve_mem(unsigned long base, unsigned long size)
655{
656 unsigned long offset = reloc_offset();
657 unsigned long top = base + size;
658 unsigned long cnt = RELOC(mem_reserve_cnt);
659
660 if (size == 0)
661 return;
662
663 /* We need to always keep one empty entry so that we
664 * have our terminator with "size" set to 0 since we are
665 * dumb and just copy this entire array to the boot params
666 */
667 base = _ALIGN_DOWN(base, PAGE_SIZE);
668 top = _ALIGN_UP(top, PAGE_SIZE);
669 size = top - base;
670
671 if (cnt >= (MEM_RESERVE_MAP_SIZE - 1))
672 prom_panic("Memory reserve map exhausted !\n");
673 RELOC(mem_reserve_map)[cnt].base = base;
674 RELOC(mem_reserve_map)[cnt].size = size;
675 RELOC(mem_reserve_cnt) = cnt + 1;
676}
677
678/*
679 * Initialize memory allocation mecanism, parse "memory" nodes and
680 * obtain that way the top of memory and RMO to setup out local allocator
681 */
682static void __init prom_init_mem(void)
683{
684 phandle node;
685 char *path, type[64];
686 unsigned int plen;
687 cell_t *p, *endp;
688 unsigned long offset = reloc_offset();
689 struct prom_t *_prom = PTRRELOC(&prom);
690
691 /*
692 * We iterate the memory nodes to find
693 * 1) top of RMO (first node)
694 * 2) top of memory
695 */
696 prom_debug("root_addr_cells: %x\n", (long)_prom->root_addr_cells);
697 prom_debug("root_size_cells: %x\n", (long)_prom->root_size_cells);
698
699 prom_debug("scanning memory:\n");
700 path = RELOC(prom_scratch);
701
702 for (node = 0; prom_next_node(&node); ) {
703 type[0] = 0;
704 prom_getprop(node, "device_type", type, sizeof(type));
705
706 if (strcmp(type, RELOC("memory")))
707 continue;
708
709 plen = prom_getprop(node, "reg", RELOC(regbuf), sizeof(regbuf));
710 if (plen > sizeof(regbuf)) {
711 prom_printf("memory node too large for buffer !\n");
712 plen = sizeof(regbuf);
713 }
714 p = RELOC(regbuf);
715 endp = p + (plen / sizeof(cell_t));
716
717#ifdef DEBUG_PROM
718 memset(path, 0, PROM_SCRATCH_SIZE);
719 call_prom("package-to-path", 3, 1, node, path, PROM_SCRATCH_SIZE-1);
720 prom_debug(" node %s :\n", path);
721#endif /* DEBUG_PROM */
722
723 while ((endp - p) >= (_prom->root_addr_cells + _prom->root_size_cells)) {
724 unsigned long base, size;
725
726 base = prom_next_cell(_prom->root_addr_cells, &p);
727 size = prom_next_cell(_prom->root_size_cells, &p);
728
729 if (size == 0)
730 continue;
731 prom_debug(" %x %x\n", base, size);
732 if (base == 0)
733 RELOC(rmo_top) = size;
734 if ((base + size) > RELOC(ram_top))
735 RELOC(ram_top) = base + size;
736 }
737 }
738
739 RELOC(alloc_bottom) = PAGE_ALIGN(RELOC(klimit) - offset + 0x4000);
740
741 /* Check if we have an initrd after the kernel, if we do move our bottom
742 * point to after it
743 */
744 if (RELOC(prom_initrd_start)) {
745 if (RELOC(prom_initrd_end) > RELOC(alloc_bottom))
746 RELOC(alloc_bottom) = PAGE_ALIGN(RELOC(prom_initrd_end));
747 }
748
749 /*
750 * If prom_memory_limit is set we reduce the upper limits *except* for
751 * alloc_top_high. This must be the real top of RAM so we can put
752 * TCE's up there.
753 */
754
755 RELOC(alloc_top_high) = RELOC(ram_top);
756
757 if (RELOC(prom_memory_limit)) {
758 if (RELOC(prom_memory_limit) <= RELOC(alloc_bottom)) {
759 prom_printf("Ignoring mem=%x <= alloc_bottom.\n",
760 RELOC(prom_memory_limit));
761 RELOC(prom_memory_limit) = 0;
762 } else if (RELOC(prom_memory_limit) >= RELOC(ram_top)) {
763 prom_printf("Ignoring mem=%x >= ram_top.\n",
764 RELOC(prom_memory_limit));
765 RELOC(prom_memory_limit) = 0;
766 } else {
767 RELOC(ram_top) = RELOC(prom_memory_limit);
768 RELOC(rmo_top) = min(RELOC(rmo_top), RELOC(prom_memory_limit));
769 }
770 }
771
772 /*
773 * Setup our top alloc point, that is top of RMO or top of
774 * segment 0 when running non-LPAR.
775 */
776 if ( RELOC(of_platform) == PLATFORM_PSERIES_LPAR )
777 RELOC(alloc_top) = RELOC(rmo_top);
778 else
779 RELOC(alloc_top) = RELOC(rmo_top) = min(0x40000000ul, RELOC(ram_top));
780
781 prom_printf("memory layout at init:\n");
782 prom_printf(" memory_limit : %x (16 MB aligned)\n", RELOC(prom_memory_limit));
783 prom_printf(" alloc_bottom : %x\n", RELOC(alloc_bottom));
784 prom_printf(" alloc_top : %x\n", RELOC(alloc_top));
785 prom_printf(" alloc_top_hi : %x\n", RELOC(alloc_top_high));
786 prom_printf(" rmo_top : %x\n", RELOC(rmo_top));
787 prom_printf(" ram_top : %x\n", RELOC(ram_top));
788}
789
790
791/*
792 * Allocate room for and instanciate RTAS
793 */
794static void __init prom_instantiate_rtas(void)
795{
796 unsigned long offset = reloc_offset();
797 struct prom_t *_prom = PTRRELOC(&prom);
798 phandle prom_rtas, rtas_node;
799 u32 base, entry = 0;
800 u32 size = 0;
801
802 prom_debug("prom_instantiate_rtas: start...\n");
803
804 prom_rtas = call_prom("finddevice", 1, 1, ADDR("/rtas"));
805 prom_debug("prom_rtas: %x\n", prom_rtas);
806 if (prom_rtas == (phandle) -1)
807 return;
808
809 prom_getprop(prom_rtas, "rtas-size", &size, sizeof(size));
810 if (size == 0)
811 return;
812
813 base = alloc_down(size, PAGE_SIZE, 0);
814 if (base == 0) {
815 prom_printf("RTAS allocation failed !\n");
816 return;
817 }
818 prom_printf("instantiating rtas at 0x%x", base);
819
820 rtas_node = call_prom("open", 1, 1, ADDR("/rtas"));
821 prom_printf("...");
822
823 if (call_prom("call-method", 3, 2,
824 ADDR("instantiate-rtas"),
825 rtas_node, base) != PROM_ERROR) {
826 entry = (long)_prom->args.rets[1];
827 }
828 if (entry == 0) {
829 prom_printf(" failed\n");
830 return;
831 }
832 prom_printf(" done\n");
833
834 reserve_mem(base, size);
835
836 prom_setprop(prom_rtas, "linux,rtas-base", &base, sizeof(base));
837 prom_setprop(prom_rtas, "linux,rtas-entry", &entry, sizeof(entry));
838
839 prom_debug("rtas base = 0x%x\n", base);
840 prom_debug("rtas entry = 0x%x\n", entry);
841 prom_debug("rtas size = 0x%x\n", (long)size);
842
843 prom_debug("prom_instantiate_rtas: end...\n");
844}
845
846
847/*
848 * Allocate room for and initialize TCE tables
849 */
850static void __init prom_initialize_tce_table(void)
851{
852 phandle node;
853 ihandle phb_node;
854 unsigned long offset = reloc_offset();
855 char compatible[64], type[64], model[64];
856 char *path = RELOC(prom_scratch);
857 u64 base, align;
858 u32 minalign, minsize;
859 u64 tce_entry, *tce_entryp;
860 u64 local_alloc_top, local_alloc_bottom;
861 u64 i;
862
863 if (RELOC(ppc64_iommu_off))
864 return;
865
866 prom_debug("starting prom_initialize_tce_table\n");
867
868 /* Cache current top of allocs so we reserve a single block */
869 local_alloc_top = RELOC(alloc_top_high);
870 local_alloc_bottom = local_alloc_top;
871
872 /* Search all nodes looking for PHBs. */
873 for (node = 0; prom_next_node(&node); ) {
874 compatible[0] = 0;
875 type[0] = 0;
876 model[0] = 0;
877 prom_getprop(node, "compatible",
878 compatible, sizeof(compatible));
879 prom_getprop(node, "device_type", type, sizeof(type));
880 prom_getprop(node, "model", model, sizeof(model));
881
882 if ((type[0] == 0) || (strstr(type, RELOC("pci")) == NULL))
883 continue;
884
885 /* Keep the old logic in tack to avoid regression. */
886 if (compatible[0] != 0) {
887 if ((strstr(compatible, RELOC("python")) == NULL) &&
888 (strstr(compatible, RELOC("Speedwagon")) == NULL) &&
889 (strstr(compatible, RELOC("Winnipeg")) == NULL))
890 continue;
891 } else if (model[0] != 0) {
892 if ((strstr(model, RELOC("ython")) == NULL) &&
893 (strstr(model, RELOC("peedwagon")) == NULL) &&
894 (strstr(model, RELOC("innipeg")) == NULL))
895 continue;
896 }
897
898 if (prom_getprop(node, "tce-table-minalign", &minalign,
899 sizeof(minalign)) == PROM_ERROR)
900 minalign = 0;
901 if (prom_getprop(node, "tce-table-minsize", &minsize,
902 sizeof(minsize)) == PROM_ERROR)
903 minsize = 4UL << 20;
904
905 /*
906 * Even though we read what OF wants, we just set the table
907 * size to 4 MB. This is enough to map 2GB of PCI DMA space.
908 * By doing this, we avoid the pitfalls of trying to DMA to
909 * MMIO space and the DMA alias hole.
910 *
911 * On POWER4, firmware sets the TCE region by assuming
912 * each TCE table is 8MB. Using this memory for anything
913 * else will impact performance, so we always allocate 8MB.
914 * Anton
915 */
916 if (__is_processor(PV_POWER4) || __is_processor(PV_POWER4p))
917 minsize = 8UL << 20;
918 else
919 minsize = 4UL << 20;
920
921 /* Align to the greater of the align or size */
922 align = max(minalign, minsize);
923 base = alloc_down(minsize, align, 1);
924 if (base == 0)
925 prom_panic("ERROR, cannot find space for TCE table.\n");
926 if (base < local_alloc_bottom)
927 local_alloc_bottom = base;
928
929 /* Save away the TCE table attributes for later use. */
930 prom_setprop(node, "linux,tce-base", &base, sizeof(base));
931 prom_setprop(node, "linux,tce-size", &minsize, sizeof(minsize));
932
933 /* It seems OF doesn't null-terminate the path :-( */
934 memset(path, 0, sizeof(path));
935 /* Call OF to setup the TCE hardware */
936 if (call_prom("package-to-path", 3, 1, node,
937 path, PROM_SCRATCH_SIZE-1) == PROM_ERROR) {
938 prom_printf("package-to-path failed\n");
939 }
940
941 prom_debug("TCE table: %s\n", path);
942 prom_debug("\tnode = 0x%x\n", node);
943 prom_debug("\tbase = 0x%x\n", base);
944 prom_debug("\tsize = 0x%x\n", minsize);
945
946 /* Initialize the table to have a one-to-one mapping
947 * over the allocated size.
948 */
949 tce_entryp = (unsigned long *)base;
950 for (i = 0; i < (minsize >> 3) ;tce_entryp++, i++) {
951 tce_entry = (i << PAGE_SHIFT);
952 tce_entry |= 0x3;
953 *tce_entryp = tce_entry;
954 }
955
956 prom_printf("opening PHB %s", path);
957 phb_node = call_prom("open", 1, 1, path);
958 if ( (long)phb_node <= 0)
959 prom_printf("... failed\n");
960 else
961 prom_printf("... done\n");
962
963 call_prom("call-method", 6, 0, ADDR("set-64-bit-addressing"),
964 phb_node, -1, minsize,
965 (u32) base, (u32) (base >> 32));
966 call_prom("close", 1, 0, phb_node);
967 }
968
969 reserve_mem(local_alloc_bottom, local_alloc_top - local_alloc_bottom);
970
971 if (RELOC(prom_memory_limit)) {
972 /*
973 * We align the start to a 16MB boundary so we can map the TCE area
974 * using large pages if possible. The end should be the top of RAM
975 * so no need to align it.
976 */
977 RELOC(prom_tce_alloc_start) = _ALIGN_DOWN(local_alloc_bottom, 0x1000000);
978 RELOC(prom_tce_alloc_end) = local_alloc_top;
979 }
980
981 /* Flag the first invalid entry */
982 prom_debug("ending prom_initialize_tce_table\n");
983}
984
985/*
986 * With CHRP SMP we need to use the OF to start the other
987 * processors so we can't wait until smp_boot_cpus (the OF is
988 * trashed by then) so we have to put the processors into
989 * a holding pattern controlled by the kernel (not OF) before
990 * we destroy the OF.
991 *
992 * This uses a chunk of low memory, puts some holding pattern
993 * code there and sends the other processors off to there until
994 * smp_boot_cpus tells them to do something. The holding pattern
995 * checks that address until its cpu # is there, when it is that
996 * cpu jumps to __secondary_start(). smp_boot_cpus() takes care
997 * of setting those values.
998 *
999 * We also use physical address 0x4 here to tell when a cpu
1000 * is in its holding pattern code.
1001 *
1002 * Fixup comment... DRENG / PPPBBB - Peter
1003 *
1004 * -- Cort
1005 */
1006static void __init prom_hold_cpus(void)
1007{
1008 unsigned long i;
1009 unsigned int reg;
1010 phandle node;
1011 unsigned long offset = reloc_offset();
1012 char type[64];
1013 int cpuid = 0;
1014 unsigned int interrupt_server[MAX_CPU_THREADS];
1015 unsigned int cpu_threads, hw_cpu_num;
1016 int propsize;
1017 extern void __secondary_hold(void);
1018 extern unsigned long __secondary_hold_spinloop;
1019 extern unsigned long __secondary_hold_acknowledge;
1020 unsigned long *spinloop
1021 = (void *)virt_to_abs(&__secondary_hold_spinloop);
1022 unsigned long *acknowledge
1023 = (void *)virt_to_abs(&__secondary_hold_acknowledge);
1024 unsigned long secondary_hold
1025 = virt_to_abs(*PTRRELOC((unsigned long *)__secondary_hold));
1026 struct prom_t *_prom = PTRRELOC(&prom);
1027
1028 prom_debug("prom_hold_cpus: start...\n");
1029 prom_debug(" 1) spinloop = 0x%x\n", (unsigned long)spinloop);
1030 prom_debug(" 1) *spinloop = 0x%x\n", *spinloop);
1031 prom_debug(" 1) acknowledge = 0x%x\n",
1032 (unsigned long)acknowledge);
1033 prom_debug(" 1) *acknowledge = 0x%x\n", *acknowledge);
1034 prom_debug(" 1) secondary_hold = 0x%x\n", secondary_hold);
1035
1036 /* Set the common spinloop variable, so all of the secondary cpus
1037 * will block when they are awakened from their OF spinloop.
1038 * This must occur for both SMP and non SMP kernels, since OF will
1039 * be trashed when we move the kernel.
1040 */
1041 *spinloop = 0;
1042
1043#ifdef CONFIG_HMT
1044 for (i=0; i < NR_CPUS; i++) {
1045 RELOC(hmt_thread_data)[i].pir = 0xdeadbeef;
1046 }
1047#endif
1048 /* look for cpus */
1049 for (node = 0; prom_next_node(&node); ) {
1050 type[0] = 0;
1051 prom_getprop(node, "device_type", type, sizeof(type));
1052 if (strcmp(type, RELOC("cpu")) != 0)
1053 continue;
1054
1055 /* Skip non-configured cpus. */
1056 if (prom_getprop(node, "status", type, sizeof(type)) > 0)
1057 if (strcmp(type, RELOC("okay")) != 0)
1058 continue;
1059
1060 reg = -1;
1061 prom_getprop(node, "reg", &reg, sizeof(reg));
1062
1063 prom_debug("\ncpuid = 0x%x\n", cpuid);
1064 prom_debug("cpu hw idx = 0x%x\n", reg);
1065
1066 /* Init the acknowledge var which will be reset by
1067 * the secondary cpu when it awakens from its OF
1068 * spinloop.
1069 */
1070 *acknowledge = (unsigned long)-1;
1071
1072 propsize = prom_getprop(node, "ibm,ppc-interrupt-server#s",
1073 &interrupt_server,
1074 sizeof(interrupt_server));
1075 if (propsize < 0) {
1076 /* no property. old hardware has no SMT */
1077 cpu_threads = 1;
1078 interrupt_server[0] = reg; /* fake it with phys id */
1079 } else {
1080 /* We have a threaded processor */
1081 cpu_threads = propsize / sizeof(u32);
1082 if (cpu_threads > MAX_CPU_THREADS) {
1083 prom_printf("SMT: too many threads!\n"
1084 "SMT: found %x, max is %x\n",
1085 cpu_threads, MAX_CPU_THREADS);
1086 cpu_threads = 1; /* ToDo: panic? */
1087 }
1088 }
1089
1090 hw_cpu_num = interrupt_server[0];
1091 if (hw_cpu_num != _prom->cpu) {
1092 /* Primary Thread of non-boot cpu */
1093 prom_printf("%x : starting cpu hw idx %x... ", cpuid, reg);
1094 call_prom("start-cpu", 3, 0, node,
1095 secondary_hold, reg);
1096
1097 for ( i = 0 ; (i < 100000000) &&
1098 (*acknowledge == ((unsigned long)-1)); i++ )
1099 mb();
1100
1101 if (*acknowledge == reg) {
1102 prom_printf("done\n");
1103 /* We have to get every CPU out of OF,
1104 * even if we never start it. */
1105 if (cpuid >= NR_CPUS)
1106 goto next;
1107 } else {
1108 prom_printf("failed: %x\n", *acknowledge);
1109 }
1110 }
1111#ifdef CONFIG_SMP
1112 else
1113 prom_printf("%x : boot cpu %x\n", cpuid, reg);
1114#endif
1115next:
1116#ifdef CONFIG_SMP
1117 /* Init paca for secondary threads. They start later. */
1118 for (i=1; i < cpu_threads; i++) {
1119 cpuid++;
1120 if (cpuid >= NR_CPUS)
1121 continue;
1122 }
1123#endif /* CONFIG_SMP */
1124 cpuid++;
1125 }
1126#ifdef CONFIG_HMT
1127 /* Only enable HMT on processors that provide support. */
1128 if (__is_processor(PV_PULSAR) ||
1129 __is_processor(PV_ICESTAR) ||
1130 __is_processor(PV_SSTAR)) {
1131 prom_printf(" starting secondary threads\n");
1132
1133 for (i = 0; i < NR_CPUS; i += 2) {
1134 if (!cpu_online(i))
1135 continue;
1136
1137 if (i == 0) {
1138 unsigned long pir = mfspr(SPRN_PIR);
1139 if (__is_processor(PV_PULSAR)) {
1140 RELOC(hmt_thread_data)[i].pir =
1141 pir & 0x1f;
1142 } else {
1143 RELOC(hmt_thread_data)[i].pir =
1144 pir & 0x3ff;
1145 }
1146 }
1147 }
1148 } else {
1149 prom_printf("Processor is not HMT capable\n");
1150 }
1151#endif
1152
1153 if (cpuid > NR_CPUS)
1154 prom_printf("WARNING: maximum CPUs (" __stringify(NR_CPUS)
1155 ") exceeded: ignoring extras\n");
1156
1157 prom_debug("prom_hold_cpus: end...\n");
1158}
1159
1160
1161static void __init prom_init_client_services(unsigned long pp)
1162{
1163 unsigned long offset = reloc_offset();
1164 struct prom_t *_prom = PTRRELOC(&prom);
1165
1166 /* Get a handle to the prom entry point before anything else */
1167 _prom->entry = pp;
1168
1169 /* Init default value for phys size */
1170 _prom->root_size_cells = 1;
1171 _prom->root_addr_cells = 2;
1172
1173 /* get a handle for the stdout device */
1174 _prom->chosen = call_prom("finddevice", 1, 1, ADDR("/chosen"));
1175 if ((long)_prom->chosen <= 0)
1176 prom_panic("cannot find chosen"); /* msg won't be printed :( */
1177
1178 /* get device tree root */
1179 _prom->root = call_prom("finddevice", 1, 1, ADDR("/"));
1180 if ((long)_prom->root <= 0)
1181 prom_panic("cannot find device tree root"); /* msg won't be printed :( */
1182}
1183
1184static void __init prom_init_stdout(void)
1185{
1186 unsigned long offset = reloc_offset();
1187 struct prom_t *_prom = PTRRELOC(&prom);
1188 char *path = RELOC(of_stdout_device);
1189 char type[16];
1190 u32 val;
1191
1192 if (prom_getprop(_prom->chosen, "stdout", &val, sizeof(val)) <= 0)
1193 prom_panic("cannot find stdout");
1194
1195 _prom->stdout = val;
1196
1197 /* Get the full OF pathname of the stdout device */
1198 memset(path, 0, 256);
1199 call_prom("instance-to-path", 3, 1, _prom->stdout, path, 255);
1200 val = call_prom("instance-to-package", 1, 1, _prom->stdout);
1201 prom_setprop(_prom->chosen, "linux,stdout-package", &val, sizeof(val));
1202 prom_printf("OF stdout device is: %s\n", RELOC(of_stdout_device));
1203 prom_setprop(_prom->chosen, "linux,stdout-path",
1204 RELOC(of_stdout_device), strlen(RELOC(of_stdout_device))+1);
1205
1206 /* If it's a display, note it */
1207 memset(type, 0, sizeof(type));
1208 prom_getprop(val, "device_type", type, sizeof(type));
1209 if (strcmp(type, RELOC("display")) == 0) {
1210 _prom->disp_node = val;
1211 prom_setprop(val, "linux,boot-display", NULL, 0);
1212 }
1213}
1214
1215static void __init prom_close_stdin(void)
1216{
1217 unsigned long offset = reloc_offset();
1218 struct prom_t *_prom = PTRRELOC(&prom);
1219 ihandle val;
1220
1221 if (prom_getprop(_prom->chosen, "stdin", &val, sizeof(val)) > 0)
1222 call_prom("close", 1, 0, val);
1223}
1224
1225static int __init prom_find_machine_type(void)
1226{
1227 unsigned long offset = reloc_offset();
1228 struct prom_t *_prom = PTRRELOC(&prom);
1229 char compat[256];
1230 int len, i = 0;
1231 phandle rtas;
1232
1233 len = prom_getprop(_prom->root, "compatible",
1234 compat, sizeof(compat)-1);
1235 if (len > 0) {
1236 compat[len] = 0;
1237 while (i < len) {
1238 char *p = &compat[i];
1239 int sl = strlen(p);
1240 if (sl == 0)
1241 break;
1242 if (strstr(p, RELOC("Power Macintosh")) ||
1243 strstr(p, RELOC("MacRISC4")))
1244 return PLATFORM_POWERMAC;
1245 if (strstr(p, RELOC("Momentum,Maple")))
1246 return PLATFORM_MAPLE;
1247 i += sl + 1;
1248 }
1249 }
1250 /* Default to pSeries. We need to know if we are running LPAR */
1251 rtas = call_prom("finddevice", 1, 1, ADDR("/rtas"));
1252 if (rtas != (phandle) -1) {
1253 unsigned long x;
1254 x = prom_getproplen(rtas, "ibm,hypertas-functions");
1255 if (x != PROM_ERROR) {
1256 prom_printf("Hypertas detected, assuming LPAR !\n");
1257 return PLATFORM_PSERIES_LPAR;
1258 }
1259 }
1260 return PLATFORM_PSERIES;
1261}
1262
1263static int __init prom_set_color(ihandle ih, int i, int r, int g, int b)
1264{
1265 unsigned long offset = reloc_offset();
1266
1267 return call_prom("call-method", 6, 1, ADDR("color!"), ih, i, b, g, r);
1268}
1269
1270/*
1271 * If we have a display that we don't know how to drive,
1272 * we will want to try to execute OF's open method for it
1273 * later. However, OF will probably fall over if we do that
1274 * we've taken over the MMU.
1275 * So we check whether we will need to open the display,
1276 * and if so, open it now.
1277 */
1278static void __init prom_check_displays(void)
1279{
1280 unsigned long offset = reloc_offset();
1281 struct prom_t *_prom = PTRRELOC(&prom);
1282 char type[16], *path;
1283 phandle node;
1284 ihandle ih;
1285 int i;
1286
1287 static unsigned char default_colors[] = {
1288 0x00, 0x00, 0x00,
1289 0x00, 0x00, 0xaa,
1290 0x00, 0xaa, 0x00,
1291 0x00, 0xaa, 0xaa,
1292 0xaa, 0x00, 0x00,
1293 0xaa, 0x00, 0xaa,
1294 0xaa, 0xaa, 0x00,
1295 0xaa, 0xaa, 0xaa,
1296 0x55, 0x55, 0x55,
1297 0x55, 0x55, 0xff,
1298 0x55, 0xff, 0x55,
1299 0x55, 0xff, 0xff,
1300 0xff, 0x55, 0x55,
1301 0xff, 0x55, 0xff,
1302 0xff, 0xff, 0x55,
1303 0xff, 0xff, 0xff
1304 };
1305 const unsigned char *clut;
1306
1307 prom_printf("Looking for displays\n");
1308 for (node = 0; prom_next_node(&node); ) {
1309 memset(type, 0, sizeof(type));
1310 prom_getprop(node, "device_type", type, sizeof(type));
1311 if (strcmp(type, RELOC("display")) != 0)
1312 continue;
1313
1314 /* It seems OF doesn't null-terminate the path :-( */
1315 path = RELOC(prom_scratch);
1316 memset(path, 0, PROM_SCRATCH_SIZE);
1317
1318 /*
1319 * leave some room at the end of the path for appending extra
1320 * arguments
1321 */
1322 if (call_prom("package-to-path", 3, 1, node, path, PROM_SCRATCH_SIZE-10) < 0)
1323 continue;
1324 prom_printf("found display : %s, opening ... ", path);
1325
1326 ih = call_prom("open", 1, 1, path);
1327 if (ih == (ihandle)0 || ih == (ihandle)-1) {
1328 prom_printf("failed\n");
1329 continue;
1330 }
1331
1332 /* Success */
1333 prom_printf("done\n");
1334 prom_setprop(node, "linux,opened", NULL, 0);
1335
1336 /*
1337 * stdout wasn't a display node, pick the first we can find
1338 * for btext
1339 */
1340 if (_prom->disp_node == 0)
1341 _prom->disp_node = node;
1342
1343 /* Setup a useable color table when the appropriate
1344 * method is available. Should update this to set-colors */
1345 clut = RELOC(default_colors);
1346 for (i = 0; i < 32; i++, clut += 3)
1347 if (prom_set_color(ih, i, clut[0], clut[1],
1348 clut[2]) != 0)
1349 break;
1350
1351#ifdef CONFIG_LOGO_LINUX_CLUT224
1352 clut = PTRRELOC(RELOC(logo_linux_clut224.clut));
1353 for (i = 0; i < RELOC(logo_linux_clut224.clutsize); i++, clut += 3)
1354 if (prom_set_color(ih, i + 32, clut[0], clut[1],
1355 clut[2]) != 0)
1356 break;
1357#endif /* CONFIG_LOGO_LINUX_CLUT224 */
1358 }
1359}
1360
1361
1362/* Return (relocated) pointer to this much memory: moves initrd if reqd. */
1363static void __init *make_room(unsigned long *mem_start, unsigned long *mem_end,
1364 unsigned long needed, unsigned long align)
1365{
1366 unsigned long offset = reloc_offset();
1367 void *ret;
1368
1369 *mem_start = _ALIGN(*mem_start, align);
1370 while ((*mem_start + needed) > *mem_end) {
1371 unsigned long room, chunk;
1372
1373 prom_debug("Chunk exhausted, claiming more at %x...\n",
1374 RELOC(alloc_bottom));
1375 room = RELOC(alloc_top) - RELOC(alloc_bottom);
1376 if (room > DEVTREE_CHUNK_SIZE)
1377 room = DEVTREE_CHUNK_SIZE;
1378 if (room < PAGE_SIZE)
1379 prom_panic("No memory for flatten_device_tree (no room)");
1380 chunk = alloc_up(room, 0);
1381 if (chunk == 0)
1382 prom_panic("No memory for flatten_device_tree (claim failed)");
1383 *mem_end = RELOC(alloc_top);
1384 }
1385
1386 ret = (void *)*mem_start;
1387 *mem_start += needed;
1388
1389 return ret;
1390}
1391
1392#define dt_push_token(token, mem_start, mem_end) \
1393 do { *((u32 *)make_room(mem_start, mem_end, 4, 4)) = token; } while(0)
1394
1395static unsigned long __init dt_find_string(char *str)
1396{
1397 unsigned long offset = reloc_offset();
1398 char *s, *os;
1399
1400 s = os = (char *)RELOC(dt_string_start);
1401 s += 4;
1402 while (s < (char *)RELOC(dt_string_end)) {
1403 if (strcmp(s, str) == 0)
1404 return s - os;
1405 s += strlen(s) + 1;
1406 }
1407 return 0;
1408}
1409
1410static void __init scan_dt_build_strings(phandle node, unsigned long *mem_start,
1411 unsigned long *mem_end)
1412{
1413 unsigned long offset = reloc_offset();
1414 char *prev_name, *namep, *sstart;
1415 unsigned long soff;
1416 phandle child;
1417
1418 sstart = (char *)RELOC(dt_string_start);
1419
1420 /* get and store all property names */
1421 prev_name = RELOC("");
1422 for (;;) {
1423
1424 /* 32 is max len of name including nul. */
1425 namep = make_room(mem_start, mem_end, 32, 1);
1426 if (call_prom("nextprop", 3, 1, node, prev_name, namep) <= 0) {
1427 /* No more nodes: unwind alloc */
1428 *mem_start = (unsigned long)namep;
1429 break;
1430 }
1431 soff = dt_find_string(namep);
1432 if (soff != 0) {
1433 *mem_start = (unsigned long)namep;
1434 namep = sstart + soff;
1435 } else {
1436 /* Trim off some if we can */
1437 *mem_start = (unsigned long)namep + strlen(namep) + 1;
1438 RELOC(dt_string_end) = *mem_start;
1439 }
1440 prev_name = namep;
1441 }
1442
1443 /* do all our children */
1444 child = call_prom("child", 1, 1, node);
1445 while (child != (phandle)0) {
1446 scan_dt_build_strings(child, mem_start, mem_end);
1447 child = call_prom("peer", 1, 1, child);
1448 }
1449}
1450
1451static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start,
1452 unsigned long *mem_end)
1453{
1454 int l, align;
1455 phandle child;
1456 char *namep, *prev_name, *sstart;
1457 unsigned long soff;
1458 unsigned char *valp;
1459 unsigned long offset = reloc_offset();
1460 char pname[32];
1461 char *path;
1462
1463 path = RELOC(prom_scratch);
1464
1465 dt_push_token(OF_DT_BEGIN_NODE, mem_start, mem_end);
1466
1467 /* get the node's full name */
1468 namep = (char *)*mem_start;
1469 l = call_prom("package-to-path", 3, 1, node,
1470 namep, *mem_end - *mem_start);
1471 if (l >= 0) {
1472 /* Didn't fit? Get more room. */
1473 if (l+1 > *mem_end - *mem_start) {
1474 namep = make_room(mem_start, mem_end, l+1, 1);
1475 call_prom("package-to-path", 3, 1, node, namep, l);
1476 }
1477 namep[l] = '\0';
1478 *mem_start = _ALIGN(((unsigned long) namep) + strlen(namep) + 1, 4);
1479 }
1480
1481 /* get it again for debugging */
1482 memset(path, 0, PROM_SCRATCH_SIZE);
1483 call_prom("package-to-path", 3, 1, node, path, PROM_SCRATCH_SIZE-1);
1484
1485 /* get and store all properties */
1486 prev_name = RELOC("");
1487 sstart = (char *)RELOC(dt_string_start);
1488 for (;;) {
1489 if (call_prom("nextprop", 3, 1, node, prev_name, pname) <= 0)
1490 break;
1491
1492 /* find string offset */
1493 soff = dt_find_string(pname);
1494 if (soff == 0) {
1495 prom_printf("WARNING: Can't find string index for <%s>, node %s\n",
1496 pname, path);
1497 break;
1498 }
1499 prev_name = sstart + soff;
1500
1501 /* get length */
1502 l = call_prom("getproplen", 2, 1, node, pname);
1503
1504 /* sanity checks */
1505 if (l < 0)
1506 continue;
1507 if (l > MAX_PROPERTY_LENGTH) {
1508 prom_printf("WARNING: ignoring large property ");
1509 /* It seems OF doesn't null-terminate the path :-( */
1510 prom_printf("[%s] ", path);
1511 prom_printf("%s length 0x%x\n", pname, l);
1512 continue;
1513 }
1514
1515 /* push property head */
1516 dt_push_token(OF_DT_PROP, mem_start, mem_end);
1517 dt_push_token(l, mem_start, mem_end);
1518 dt_push_token(soff, mem_start, mem_end);
1519
1520 /* push property content */
1521 align = (l >= 8) ? 8 : 4;
1522 valp = make_room(mem_start, mem_end, l, align);
1523 call_prom("getprop", 4, 1, node, pname, valp, l);
1524 *mem_start = _ALIGN(*mem_start, 4);
1525 }
1526
1527 /* Add a "linux,phandle" property. */
1528 soff = dt_find_string(RELOC("linux,phandle"));
1529 if (soff == 0)
1530 prom_printf("WARNING: Can't find string index for <linux-phandle>"
1531 " node %s\n", path);
1532 else {
1533 dt_push_token(OF_DT_PROP, mem_start, mem_end);
1534 dt_push_token(4, mem_start, mem_end);
1535 dt_push_token(soff, mem_start, mem_end);
1536 valp = make_room(mem_start, mem_end, 4, 4);
1537 *(u32 *)valp = node;
1538 }
1539
1540 /* do all our children */
1541 child = call_prom("child", 1, 1, node);
1542 while (child != (phandle)0) {
1543 scan_dt_build_struct(child, mem_start, mem_end);
1544 child = call_prom("peer", 1, 1, child);
1545 }
1546
1547 dt_push_token(OF_DT_END_NODE, mem_start, mem_end);
1548}
1549
1550static void __init flatten_device_tree(void)
1551{
1552 phandle root;
1553 unsigned long offset = reloc_offset();
1554 unsigned long mem_start, mem_end, room;
1555 struct boot_param_header *hdr;
1556 char *namep;
1557 u64 *rsvmap;
1558
1559 /*
1560 * Check how much room we have between alloc top & bottom (+/- a
1561 * few pages), crop to 4Mb, as this is our "chuck" size
1562 */
1563 room = RELOC(alloc_top) - RELOC(alloc_bottom) - 0x4000;
1564 if (room > DEVTREE_CHUNK_SIZE)
1565 room = DEVTREE_CHUNK_SIZE;
1566 prom_debug("starting device tree allocs at %x\n", RELOC(alloc_bottom));
1567
1568 /* Now try to claim that */
1569 mem_start = (unsigned long)alloc_up(room, PAGE_SIZE);
1570 if (mem_start == 0)
1571 prom_panic("Can't allocate initial device-tree chunk\n");
1572 mem_end = RELOC(alloc_top);
1573
1574 /* Get root of tree */
1575 root = call_prom("peer", 1, 1, (phandle)0);
1576 if (root == (phandle)0)
1577 prom_panic ("couldn't get device tree root\n");
1578
1579 /* Build header and make room for mem rsv map */
1580 mem_start = _ALIGN(mem_start, 4);
1581 hdr = make_room(&mem_start, &mem_end, sizeof(struct boot_param_header), 4);
1582 RELOC(dt_header_start) = (unsigned long)hdr;
1583 rsvmap = make_room(&mem_start, &mem_end, sizeof(mem_reserve_map), 8);
1584
1585 /* Start of strings */
1586 mem_start = PAGE_ALIGN(mem_start);
1587 RELOC(dt_string_start) = mem_start;
1588 mem_start += 4; /* hole */
1589
1590 /* Add "linux,phandle" in there, we'll need it */
1591 namep = make_room(&mem_start, &mem_end, 16, 1);
1592 strcpy(namep, RELOC("linux,phandle"));
1593 mem_start = (unsigned long)namep + strlen(namep) + 1;
1594 RELOC(dt_string_end) = mem_start;
1595
1596 /* Build string array */
1597 prom_printf("Building dt strings...\n");
1598 scan_dt_build_strings(root, &mem_start, &mem_end);
1599
1600 /* Build structure */
1601 mem_start = PAGE_ALIGN(mem_start);
1602 RELOC(dt_struct_start) = mem_start;
1603 prom_printf("Building dt structure...\n");
1604 scan_dt_build_struct(root, &mem_start, &mem_end);
1605 dt_push_token(OF_DT_END, &mem_start, &mem_end);
1606 RELOC(dt_struct_end) = PAGE_ALIGN(mem_start);
1607
1608 /* Finish header */
1609 hdr->magic = OF_DT_HEADER;
1610 hdr->totalsize = RELOC(dt_struct_end) - RELOC(dt_header_start);
1611 hdr->off_dt_struct = RELOC(dt_struct_start) - RELOC(dt_header_start);
1612 hdr->off_dt_strings = RELOC(dt_string_start) - RELOC(dt_header_start);
1613 hdr->off_mem_rsvmap = ((unsigned long)rsvmap) - RELOC(dt_header_start);
1614 hdr->version = OF_DT_VERSION;
1615 hdr->last_comp_version = 1;
1616
1617 /* Reserve the whole thing and copy the reserve map in, we
1618 * also bump mem_reserve_cnt to cause further reservations to
1619 * fail since it's too late.
1620 */
1621 reserve_mem(RELOC(dt_header_start), hdr->totalsize);
1622 memcpy(rsvmap, RELOC(mem_reserve_map), sizeof(mem_reserve_map));
1623
1624#ifdef DEBUG_PROM
1625 {
1626 int i;
1627 prom_printf("reserved memory map:\n");
1628 for (i = 0; i < RELOC(mem_reserve_cnt); i++)
1629 prom_printf(" %x - %x\n", RELOC(mem_reserve_map)[i].base,
1630 RELOC(mem_reserve_map)[i].size);
1631 }
1632#endif
1633 RELOC(mem_reserve_cnt) = MEM_RESERVE_MAP_SIZE;
1634
1635 prom_printf("Device tree strings 0x%x -> 0x%x\n",
1636 RELOC(dt_string_start), RELOC(dt_string_end));
1637 prom_printf("Device tree struct 0x%x -> 0x%x\n",
1638 RELOC(dt_struct_start), RELOC(dt_struct_end));
1639
1640 }
1641
1642static void __init prom_find_boot_cpu(void)
1643{
1644 unsigned long offset = reloc_offset();
1645 struct prom_t *_prom = PTRRELOC(&prom);
1646 u32 getprop_rval;
1647 ihandle prom_cpu;
1648 phandle cpu_pkg;
1649
1650 if (prom_getprop(_prom->chosen, "cpu", &prom_cpu, sizeof(prom_cpu)) <= 0)
1651 prom_panic("cannot find boot cpu");
1652
1653 cpu_pkg = call_prom("instance-to-package", 1, 1, prom_cpu);
1654
1655 prom_setprop(cpu_pkg, "linux,boot-cpu", NULL, 0);
1656 prom_getprop(cpu_pkg, "reg", &getprop_rval, sizeof(getprop_rval));
1657 _prom->cpu = getprop_rval;
1658
1659 prom_debug("Booting CPU hw index = 0x%x\n", _prom->cpu);
1660}
1661
1662static void __init prom_check_initrd(unsigned long r3, unsigned long r4)
1663{
1664#ifdef CONFIG_BLK_DEV_INITRD
1665 unsigned long offset = reloc_offset();
1666 struct prom_t *_prom = PTRRELOC(&prom);
1667
1668 if ( r3 && r4 && r4 != 0xdeadbeef) {
1669 u64 val;
1670
1671 RELOC(prom_initrd_start) = (r3 >= KERNELBASE) ? __pa(r3) : r3;
1672 RELOC(prom_initrd_end) = RELOC(prom_initrd_start) + r4;
1673
1674 val = (u64)RELOC(prom_initrd_start);
1675 prom_setprop(_prom->chosen, "linux,initrd-start", &val, sizeof(val));
1676 val = (u64)RELOC(prom_initrd_end);
1677 prom_setprop(_prom->chosen, "linux,initrd-end", &val, sizeof(val));
1678
1679 reserve_mem(RELOC(prom_initrd_start),
1680 RELOC(prom_initrd_end) - RELOC(prom_initrd_start));
1681
1682 prom_debug("initrd_start=0x%x\n", RELOC(prom_initrd_start));
1683 prom_debug("initrd_end=0x%x\n", RELOC(prom_initrd_end));
1684 }
1685#endif /* CONFIG_BLK_DEV_INITRD */
1686}
1687
1688/*
1689 * We enter here early on, when the Open Firmware prom is still
1690 * handling exceptions and the MMU hash table for us.
1691 */
1692
1693unsigned long __init prom_init(unsigned long r3, unsigned long r4, unsigned long pp,
1694 unsigned long r6, unsigned long r7)
1695{
1696 unsigned long offset = reloc_offset();
1697 struct prom_t *_prom = PTRRELOC(&prom);
1698 unsigned long phys = KERNELBASE - offset;
1699 u32 getprop_rval;
1700
1701 /*
1702 * First zero the BSS
1703 */
1704 memset(PTRRELOC(&__bss_start), 0, __bss_stop - __bss_start);
1705
1706 /*
1707 * Init interface to Open Firmware, get some node references,
1708 * like /chosen
1709 */
1710 prom_init_client_services(pp);
1711
1712 /*
1713 * Init prom stdout device
1714 */
1715 prom_init_stdout();
1716 prom_debug("klimit=0x%x\n", RELOC(klimit));
1717 prom_debug("offset=0x%x\n", offset);
1718
1719 /*
1720 * Check for an initrd
1721 */
1722 prom_check_initrd(r3, r4);
1723
1724 /*
1725 * Get default machine type. At this point, we do not differenciate
1726 * between pSeries SMP and pSeries LPAR
1727 */
1728 RELOC(of_platform) = prom_find_machine_type();
1729 getprop_rval = RELOC(of_platform);
1730 prom_setprop(_prom->chosen, "linux,platform",
1731 &getprop_rval, sizeof(getprop_rval));
1732
1733 /*
1734 * On pSeries, copy the CPU hold code
1735 */
1736 if (RELOC(of_platform) & PLATFORM_PSERIES)
1737 copy_and_flush(0, KERNELBASE - offset, 0x100, 0);
1738
1739 /*
1740 * Get memory cells format
1741 */
1742 getprop_rval = 1;
1743 prom_getprop(_prom->root, "#size-cells",
1744 &getprop_rval, sizeof(getprop_rval));
1745 _prom->root_size_cells = getprop_rval;
1746 getprop_rval = 2;
1747 prom_getprop(_prom->root, "#address-cells",
1748 &getprop_rval, sizeof(getprop_rval));
1749 _prom->root_addr_cells = getprop_rval;
1750
1751 /*
1752 * Do early parsing of command line
1753 */
1754 early_cmdline_parse();
1755
1756 /*
1757 * Initialize memory management within prom_init
1758 */
1759 prom_init_mem();
1760
1761 /*
1762 * Determine which cpu is actually running right _now_
1763 */
1764 prom_find_boot_cpu();
1765
1766 /*
1767 * Initialize display devices
1768 */
1769 prom_check_displays();
1770
1771 /*
1772 * Initialize IOMMU (TCE tables) on pSeries. Do that before anything else
1773 * that uses the allocator, we need to make sure we get the top of memory
1774 * available for us here...
1775 */
1776 if (RELOC(of_platform) == PLATFORM_PSERIES)
1777 prom_initialize_tce_table();
1778
1779 /*
1780 * On non-powermacs, try to instantiate RTAS and puts all CPUs
1781 * in spin-loops. PowerMacs don't have a working RTAS and use
1782 * a different way to spin CPUs
1783 */
1784 if (RELOC(of_platform) != PLATFORM_POWERMAC) {
1785 prom_instantiate_rtas();
1786 prom_hold_cpus();
1787 }
1788
1789 /*
1790 * Fill in some infos for use by the kernel later on
1791 */
1792 if (RELOC(ppc64_iommu_off))
1793 prom_setprop(_prom->chosen, "linux,iommu-off", NULL, 0);
1794
1795 if (RELOC(iommu_force_on))
1796 prom_setprop(_prom->chosen, "linux,iommu-force-on", NULL, 0);
1797
1798 if (RELOC(prom_memory_limit))
1799 prom_setprop(_prom->chosen, "linux,memory-limit",
1800 PTRRELOC(&prom_memory_limit), sizeof(RELOC(prom_memory_limit)));
1801
1802 if (RELOC(prom_tce_alloc_start)) {
1803 prom_setprop(_prom->chosen, "linux,tce-alloc-start",
1804 PTRRELOC(&prom_tce_alloc_start), sizeof(RELOC(prom_tce_alloc_start)));
1805 prom_setprop(_prom->chosen, "linux,tce-alloc-end",
1806 PTRRELOC(&prom_tce_alloc_end), sizeof(RELOC(prom_tce_alloc_end)));
1807 }
1808
1809 /*
1810 * Now finally create the flattened device-tree
1811 */
1812 prom_printf("copying OF device tree ...\n");
1813 flatten_device_tree();
1814
1815 /* in case stdin is USB and still active on IBM machines... */
1816 prom_close_stdin();
1817
1818 /*
1819 * Call OF "quiesce" method to shut down pending DMA's from
1820 * devices etc...
1821 */
1822 prom_printf("Calling quiesce ...\n");
1823 call_prom("quiesce", 0, 0);
1824
1825 /*
1826 * And finally, call the kernel passing it the flattened device
1827 * tree and NULL as r5, thus triggering the new entry point which
1828 * is common to us and kexec
1829 */
1830 prom_printf("returning from prom_init\n");
1831 prom_debug("->dt_header_start=0x%x\n", RELOC(dt_header_start));
1832 prom_debug("->phys=0x%x\n", phys);
1833
1834 __start(RELOC(dt_header_start), phys, 0);
1835
1836 return 0;
1837}
1838
diff --git a/arch/ppc64/kernel/ptrace.c b/arch/ppc64/kernel/ptrace.c
new file mode 100644
index 000000000000..354a287c67eb
--- /dev/null
+++ b/arch/ppc64/kernel/ptrace.c
@@ -0,0 +1,328 @@
1/*
2 * linux/arch/ppc64/kernel/ptrace.c
3 *
4 * PowerPC version
5 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
6 *
7 * Derived from "arch/m68k/kernel/ptrace.c"
8 * Copyright (C) 1994 by Hamish Macdonald
9 * Taken from linux/kernel/ptrace.c and modified for M680x0.
10 * linux/kernel/ptrace.c is by Ross Biro 1/23/92, edited by Linus Torvalds
11 *
12 * Modified by Cort Dougan (cort@hq.fsmlabs.com)
13 * and Paul Mackerras (paulus@linuxcare.com.au).
14 *
15 * This file is subject to the terms and conditions of the GNU General
16 * Public License. See the file README.legal in the main directory of
17 * this archive for more details.
18 */
19
20#include <linux/kernel.h>
21#include <linux/sched.h>
22#include <linux/mm.h>
23#include <linux/smp.h>
24#include <linux/smp_lock.h>
25#include <linux/errno.h>
26#include <linux/ptrace.h>
27#include <linux/user.h>
28#include <linux/security.h>
29#include <linux/audit.h>
30#include <linux/seccomp.h>
31
32#include <asm/uaccess.h>
33#include <asm/page.h>
34#include <asm/pgtable.h>
35#include <asm/system.h>
36#include <asm/ptrace-common.h>
37
38/*
39 * does not yet catch signals sent when the child dies.
40 * in exit.c or in signal.c.
41 */
42
43/*
44 * Called by kernel/ptrace.c when detaching..
45 *
46 * Make sure single step bits etc are not set.
47 */
48void ptrace_disable(struct task_struct *child)
49{
50 /* make sure the single step bit is not set. */
51 clear_single_step(child);
52}
53
54int sys_ptrace(long request, long pid, long addr, long data)
55{
56 struct task_struct *child;
57 int ret = -EPERM;
58
59 lock_kernel();
60 if (request == PTRACE_TRACEME) {
61 /* are we already being traced? */
62 if (current->ptrace & PT_PTRACED)
63 goto out;
64 ret = security_ptrace(current->parent, current);
65 if (ret)
66 goto out;
67 /* set the ptrace bit in the process flags. */
68 current->ptrace |= PT_PTRACED;
69 ret = 0;
70 goto out;
71 }
72 ret = -ESRCH;
73 read_lock(&tasklist_lock);
74 child = find_task_by_pid(pid);
75 if (child)
76 get_task_struct(child);
77 read_unlock(&tasklist_lock);
78 if (!child)
79 goto out;
80
81 ret = -EPERM;
82 if (pid == 1) /* you may not mess with init */
83 goto out_tsk;
84
85 if (request == PTRACE_ATTACH) {
86 ret = ptrace_attach(child);
87 goto out_tsk;
88 }
89
90 ret = ptrace_check_attach(child, request == PTRACE_KILL);
91 if (ret < 0)
92 goto out_tsk;
93
94 switch (request) {
95 /* when I and D space are separate, these will need to be fixed. */
96 case PTRACE_PEEKTEXT: /* read word at location addr. */
97 case PTRACE_PEEKDATA: {
98 unsigned long tmp;
99 int copied;
100
101 copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0);
102 ret = -EIO;
103 if (copied != sizeof(tmp))
104 break;
105 ret = put_user(tmp,(unsigned long __user *) data);
106 break;
107 }
108
109 /* read the word at location addr in the USER area. */
110 case PTRACE_PEEKUSR: {
111 unsigned long index;
112 unsigned long tmp;
113
114 ret = -EIO;
115 /* convert to index and check */
116 index = (unsigned long) addr >> 3;
117 if ((addr & 7) || (index > PT_FPSCR))
118 break;
119
120 if (index < PT_FPR0) {
121 tmp = get_reg(child, (int)index);
122 } else {
123 flush_fp_to_thread(child);
124 tmp = ((unsigned long *)child->thread.fpr)[index - PT_FPR0];
125 }
126 ret = put_user(tmp,(unsigned long __user *) data);
127 break;
128 }
129
130 /* If I and D space are separate, this will have to be fixed. */
131 case PTRACE_POKETEXT: /* write the word at location addr. */
132 case PTRACE_POKEDATA:
133 ret = 0;
134 if (access_process_vm(child, addr, &data, sizeof(data), 1)
135 == sizeof(data))
136 break;
137 ret = -EIO;
138 break;
139
140 /* write the word at location addr in the USER area */
141 case PTRACE_POKEUSR: {
142 unsigned long index;
143
144 ret = -EIO;
145 /* convert to index and check */
146 index = (unsigned long) addr >> 3;
147 if ((addr & 7) || (index > PT_FPSCR))
148 break;
149
150 if (index == PT_ORIG_R3)
151 break;
152 if (index < PT_FPR0) {
153 ret = put_reg(child, index, data);
154 } else {
155 flush_fp_to_thread(child);
156 ((unsigned long *)child->thread.fpr)[index - PT_FPR0] = data;
157 ret = 0;
158 }
159 break;
160 }
161
162 case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */
163 case PTRACE_CONT: { /* restart after signal. */
164 ret = -EIO;
165 if ((unsigned long) data > _NSIG)
166 break;
167 if (request == PTRACE_SYSCALL)
168 set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
169 else
170 clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
171 child->exit_code = data;
172 /* make sure the single step bit is not set. */
173 clear_single_step(child);
174 wake_up_process(child);
175 ret = 0;
176 break;
177 }
178
179 /*
180 * make the child exit. Best I can do is send it a sigkill.
181 * perhaps it should be put in the status that it wants to
182 * exit.
183 */
184 case PTRACE_KILL: {
185 ret = 0;
186 if (child->exit_state == EXIT_ZOMBIE) /* already dead */
187 break;
188 child->exit_code = SIGKILL;
189 /* make sure the single step bit is not set. */
190 clear_single_step(child);
191 wake_up_process(child);
192 break;
193 }
194
195 case PTRACE_SINGLESTEP: { /* set the trap flag. */
196 ret = -EIO;
197 if ((unsigned long) data > _NSIG)
198 break;
199 clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
200 set_single_step(child);
201 child->exit_code = data;
202 /* give it a chance to run. */
203 wake_up_process(child);
204 ret = 0;
205 break;
206 }
207
208 case PTRACE_DETACH:
209 ret = ptrace_detach(child, data);
210 break;
211
212 case PPC_PTRACE_GETREGS: { /* Get GPRs 0 - 31. */
213 int i;
214 unsigned long *reg = &((unsigned long *)child->thread.regs)[0];
215 unsigned long __user *tmp = (unsigned long __user *)addr;
216
217 for (i = 0; i < 32; i++) {
218 ret = put_user(*reg, tmp);
219 if (ret)
220 break;
221 reg++;
222 tmp++;
223 }
224 break;
225 }
226
227 case PPC_PTRACE_SETREGS: { /* Set GPRs 0 - 31. */
228 int i;
229 unsigned long *reg = &((unsigned long *)child->thread.regs)[0];
230 unsigned long __user *tmp = (unsigned long __user *)addr;
231
232 for (i = 0; i < 32; i++) {
233 ret = get_user(*reg, tmp);
234 if (ret)
235 break;
236 reg++;
237 tmp++;
238 }
239 break;
240 }
241
242 case PPC_PTRACE_GETFPREGS: { /* Get FPRs 0 - 31. */
243 int i;
244 unsigned long *reg = &((unsigned long *)child->thread.fpr)[0];
245 unsigned long __user *tmp = (unsigned long __user *)addr;
246
247 flush_fp_to_thread(child);
248
249 for (i = 0; i < 32; i++) {
250 ret = put_user(*reg, tmp);
251 if (ret)
252 break;
253 reg++;
254 tmp++;
255 }
256 break;
257 }
258
259 case PPC_PTRACE_SETFPREGS: { /* Get FPRs 0 - 31. */
260 int i;
261 unsigned long *reg = &((unsigned long *)child->thread.fpr)[0];
262 unsigned long __user *tmp = (unsigned long __user *)addr;
263
264 flush_fp_to_thread(child);
265
266 for (i = 0; i < 32; i++) {
267 ret = get_user(*reg, tmp);
268 if (ret)
269 break;
270 reg++;
271 tmp++;
272 }
273 break;
274 }
275
276 default:
277 ret = ptrace_request(child, request, addr, data);
278 break;
279 }
280out_tsk:
281 put_task_struct(child);
282out:
283 unlock_kernel();
284 return ret;
285}
286
287static void do_syscall_trace(void)
288{
289 /* the 0x80 provides a way for the tracing parent to distinguish
290 between a syscall stop and SIGTRAP delivery */
291 ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD)
292 ? 0x80 : 0));
293
294 /*
295 * this isn't the same as continuing with a signal, but it will do
296 * for normal use. strace only continues with a signal if the
297 * stopping signal is not SIGTRAP. -brl
298 */
299 if (current->exit_code) {
300 send_sig(current->exit_code, current, 1);
301 current->exit_code = 0;
302 }
303}
304
305void do_syscall_trace_enter(struct pt_regs *regs)
306{
307 if (unlikely(current->audit_context))
308 audit_syscall_entry(current, regs->gpr[0],
309 regs->gpr[3], regs->gpr[4],
310 regs->gpr[5], regs->gpr[6]);
311
312 if (test_thread_flag(TIF_SYSCALL_TRACE)
313 && (current->ptrace & PT_PTRACED))
314 do_syscall_trace();
315}
316
317void do_syscall_trace_leave(struct pt_regs *regs)
318{
319 secure_computing(regs->gpr[0]);
320
321 if (unlikely(current->audit_context))
322 audit_syscall_exit(current, regs->result);
323
324 if ((test_thread_flag(TIF_SYSCALL_TRACE)
325 || test_thread_flag(TIF_SINGLESTEP))
326 && (current->ptrace & PT_PTRACED))
327 do_syscall_trace();
328}
diff --git a/arch/ppc64/kernel/ptrace32.c b/arch/ppc64/kernel/ptrace32.c
new file mode 100644
index 000000000000..ee81b1b776cc
--- /dev/null
+++ b/arch/ppc64/kernel/ptrace32.c
@@ -0,0 +1,420 @@
1/*
2 * linux/arch/ppc64/kernel/ptrace32.c
3 *
4 * PowerPC version
5 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
6 *
7 * Derived from "arch/m68k/kernel/ptrace.c"
8 * Copyright (C) 1994 by Hamish Macdonald
9 * Taken from linux/kernel/ptrace.c and modified for M680x0.
10 * linux/kernel/ptrace.c is by Ross Biro 1/23/92, edited by Linus Torvalds
11 *
12 * Modified by Cort Dougan (cort@hq.fsmlabs.com)
13 * and Paul Mackerras (paulus@linuxcare.com.au).
14 *
15 * This file is subject to the terms and conditions of the GNU General
16 * Public License. See the file README.legal in the main directory of
17 * this archive for more details.
18 */
19
20#include <linux/kernel.h>
21#include <linux/sched.h>
22#include <linux/mm.h>
23#include <linux/smp.h>
24#include <linux/smp_lock.h>
25#include <linux/errno.h>
26#include <linux/ptrace.h>
27#include <linux/user.h>
28#include <linux/security.h>
29
30#include <asm/uaccess.h>
31#include <asm/page.h>
32#include <asm/pgtable.h>
33#include <asm/system.h>
34#include <asm/ptrace-common.h>
35
36/*
37 * does not yet catch signals sent when the child dies.
38 * in exit.c or in signal.c.
39 */
40
41int sys32_ptrace(long request, long pid, unsigned long addr, unsigned long data)
42{
43 struct task_struct *child;
44 int ret = -EPERM;
45
46 lock_kernel();
47 if (request == PTRACE_TRACEME) {
48 /* are we already being traced? */
49 if (current->ptrace & PT_PTRACED)
50 goto out;
51 ret = security_ptrace(current->parent, current);
52 if (ret)
53 goto out;
54 /* set the ptrace bit in the process flags. */
55 current->ptrace |= PT_PTRACED;
56 ret = 0;
57 goto out;
58 }
59 ret = -ESRCH;
60 read_lock(&tasklist_lock);
61 child = find_task_by_pid(pid);
62 if (child)
63 get_task_struct(child);
64 read_unlock(&tasklist_lock);
65 if (!child)
66 goto out;
67
68 ret = -EPERM;
69 if (pid == 1) /* you may not mess with init */
70 goto out_tsk;
71
72 if (request == PTRACE_ATTACH) {
73 ret = ptrace_attach(child);
74 goto out_tsk;
75 }
76
77 ret = ptrace_check_attach(child, request == PTRACE_KILL);
78 if (ret < 0)
79 goto out_tsk;
80
81 switch (request) {
82 /* when I and D space are separate, these will need to be fixed. */
83 case PTRACE_PEEKTEXT: /* read word at location addr. */
84 case PTRACE_PEEKDATA: {
85 unsigned int tmp;
86 int copied;
87
88 copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0);
89 ret = -EIO;
90 if (copied != sizeof(tmp))
91 break;
92 ret = put_user(tmp, (u32 __user *)data);
93 break;
94 }
95
96 /*
97 * Read 4 bytes of the other process' storage
98 * data is a pointer specifying where the user wants the
99 * 4 bytes copied into
100 * addr is a pointer in the user's storage that contains an 8 byte
101 * address in the other process of the 4 bytes that is to be read
102 * (this is run in a 32-bit process looking at a 64-bit process)
103 * when I and D space are separate, these will need to be fixed.
104 */
105 case PPC_PTRACE_PEEKTEXT_3264:
106 case PPC_PTRACE_PEEKDATA_3264: {
107 u32 tmp;
108 int copied;
109 u32 __user * addrOthers;
110
111 ret = -EIO;
112
113 /* Get the addr in the other process that we want to read */
114 if (get_user(addrOthers, (u32 __user * __user *)addr) != 0)
115 break;
116
117 copied = access_process_vm(child, (u64)addrOthers, &tmp,
118 sizeof(tmp), 0);
119 if (copied != sizeof(tmp))
120 break;
121 ret = put_user(tmp, (u32 __user *)data);
122 break;
123 }
124
125 /* Read a register (specified by ADDR) out of the "user area" */
126 case PTRACE_PEEKUSR: {
127 int index;
128 unsigned long tmp;
129
130 ret = -EIO;
131 /* convert to index and check */
132 index = (unsigned long) addr >> 2;
133 if ((addr & 3) || (index > PT_FPSCR32))
134 break;
135
136 if (index < PT_FPR0) {
137 tmp = get_reg(child, index);
138 } else {
139 flush_fp_to_thread(child);
140 /*
141 * the user space code considers the floating point
142 * to be an array of unsigned int (32 bits) - the
143 * index passed in is based on this assumption.
144 */
145 tmp = ((unsigned int *)child->thread.fpr)[index - PT_FPR0];
146 }
147 ret = put_user((unsigned int)tmp, (u32 __user *)data);
148 break;
149 }
150
151 /*
152 * Read 4 bytes out of the other process' pt_regs area
153 * data is a pointer specifying where the user wants the
154 * 4 bytes copied into
155 * addr is the offset into the other process' pt_regs structure
156 * that is to be read
157 * (this is run in a 32-bit process looking at a 64-bit process)
158 */
159 case PPC_PTRACE_PEEKUSR_3264: {
160 u32 index;
161 u32 reg32bits;
162 u64 tmp;
163 u32 numReg;
164 u32 part;
165
166 ret = -EIO;
167 /* Determine which register the user wants */
168 index = (u64)addr >> 2;
169 numReg = index / 2;
170 /* Determine which part of the register the user wants */
171 if (index % 2)
172 part = 1; /* want the 2nd half of the register (right-most). */
173 else
174 part = 0; /* want the 1st half of the register (left-most). */
175
176 /* Validate the input - check to see if address is on the wrong boundary or beyond the end of the user area */
177 if ((addr & 3) || numReg > PT_FPSCR)
178 break;
179
180 if (numReg >= PT_FPR0) {
181 flush_fp_to_thread(child);
182 tmp = ((unsigned long int *)child->thread.fpr)[numReg - PT_FPR0];
183 } else { /* register within PT_REGS struct */
184 tmp = get_reg(child, numReg);
185 }
186 reg32bits = ((u32*)&tmp)[part];
187 ret = put_user(reg32bits, (u32 __user *)data);
188 break;
189 }
190
191 /* If I and D space are separate, this will have to be fixed. */
192 case PTRACE_POKETEXT: /* write the word at location addr. */
193 case PTRACE_POKEDATA: {
194 unsigned int tmp;
195 tmp = data;
196 ret = 0;
197 if (access_process_vm(child, addr, &tmp, sizeof(tmp), 1)
198 == sizeof(tmp))
199 break;
200 ret = -EIO;
201 break;
202 }
203
204 /*
205 * Write 4 bytes into the other process' storage
206 * data is the 4 bytes that the user wants written
207 * addr is a pointer in the user's storage that contains an
208 * 8 byte address in the other process where the 4 bytes
209 * that is to be written
210 * (this is run in a 32-bit process looking at a 64-bit process)
211 * when I and D space are separate, these will need to be fixed.
212 */
213 case PPC_PTRACE_POKETEXT_3264:
214 case PPC_PTRACE_POKEDATA_3264: {
215 u32 tmp = data;
216 u32 __user * addrOthers;
217
218 /* Get the addr in the other process that we want to write into */
219 ret = -EIO;
220 if (get_user(addrOthers, (u32 __user * __user *)addr) != 0)
221 break;
222 ret = 0;
223 if (access_process_vm(child, (u64)addrOthers, &tmp,
224 sizeof(tmp), 1) == sizeof(tmp))
225 break;
226 ret = -EIO;
227 break;
228 }
229
230 /* write the word at location addr in the USER area */
231 case PTRACE_POKEUSR: {
232 unsigned long index;
233
234 ret = -EIO;
235 /* convert to index and check */
236 index = (unsigned long) addr >> 2;
237 if ((addr & 3) || (index > PT_FPSCR32))
238 break;
239
240 if (index == PT_ORIG_R3)
241 break;
242 if (index < PT_FPR0) {
243 ret = put_reg(child, index, data);
244 } else {
245 flush_fp_to_thread(child);
246 /*
247 * the user space code considers the floating point
248 * to be an array of unsigned int (32 bits) - the
249 * index passed in is based on this assumption.
250 */
251 ((unsigned int *)child->thread.fpr)[index - PT_FPR0] = data;
252 ret = 0;
253 }
254 break;
255 }
256
257 /*
258 * Write 4 bytes into the other process' pt_regs area
259 * data is the 4 bytes that the user wants written
260 * addr is the offset into the other process' pt_regs structure
261 * that is to be written into
262 * (this is run in a 32-bit process looking at a 64-bit process)
263 */
264 case PPC_PTRACE_POKEUSR_3264: {
265 u32 index;
266 u32 numReg;
267
268 ret = -EIO;
269 /* Determine which register the user wants */
270 index = (u64)addr >> 2;
271 numReg = index / 2;
272 /*
273 * Validate the input - check to see if address is on the
274 * wrong boundary or beyond the end of the user area
275 */
276 if ((addr & 3) || (numReg > PT_FPSCR))
277 break;
278 /* Insure it is a register we let them change */
279 if ((numReg == PT_ORIG_R3)
280 || ((numReg > PT_CCR) && (numReg < PT_FPR0)))
281 break;
282 if (numReg >= PT_FPR0) {
283 flush_fp_to_thread(child);
284 }
285 if (numReg == PT_MSR)
286 data = (data & MSR_DEBUGCHANGE)
287 | (child->thread.regs->msr & ~MSR_DEBUGCHANGE);
288 ((u32*)child->thread.regs)[index] = data;
289 ret = 0;
290 break;
291 }
292
293 case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */
294 case PTRACE_CONT: { /* restart after signal. */
295 ret = -EIO;
296 if ((unsigned long) data > _NSIG)
297 break;
298 if (request == PTRACE_SYSCALL)
299 set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
300 else
301 clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
302 child->exit_code = data;
303 /* make sure the single step bit is not set. */
304 clear_single_step(child);
305 wake_up_process(child);
306 ret = 0;
307 break;
308 }
309
310 /*
311 * make the child exit. Best I can do is send it a sigkill.
312 * perhaps it should be put in the status that it wants to
313 * exit.
314 */
315 case PTRACE_KILL: {
316 ret = 0;
317 if (child->exit_state == EXIT_ZOMBIE) /* already dead */
318 break;
319 child->exit_code = SIGKILL;
320 /* make sure the single step bit is not set. */
321 clear_single_step(child);
322 wake_up_process(child);
323 break;
324 }
325
326 case PTRACE_SINGLESTEP: { /* set the trap flag. */
327 ret = -EIO;
328 if ((unsigned long) data > _NSIG)
329 break;
330 clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
331 set_single_step(child);
332 child->exit_code = data;
333 /* give it a chance to run. */
334 wake_up_process(child);
335 ret = 0;
336 break;
337 }
338
339 case PTRACE_DETACH:
340 ret = ptrace_detach(child, data);
341 break;
342
343 case PPC_PTRACE_GETREGS: { /* Get GPRs 0 - 31. */
344 int i;
345 unsigned long *reg = &((unsigned long *)child->thread.regs)[0];
346 unsigned int __user *tmp = (unsigned int __user *)addr;
347
348 for (i = 0; i < 32; i++) {
349 ret = put_user(*reg, tmp);
350 if (ret)
351 break;
352 reg++;
353 tmp++;
354 }
355 break;
356 }
357
358 case PPC_PTRACE_SETREGS: { /* Set GPRs 0 - 31. */
359 int i;
360 unsigned long *reg = &((unsigned long *)child->thread.regs)[0];
361 unsigned int __user *tmp = (unsigned int __user *)addr;
362
363 for (i = 0; i < 32; i++) {
364 ret = get_user(*reg, tmp);
365 if (ret)
366 break;
367 reg++;
368 tmp++;
369 }
370 break;
371 }
372
373 case PPC_PTRACE_GETFPREGS: { /* Get FPRs 0 - 31. */
374 int i;
375 unsigned long *reg = &((unsigned long *)child->thread.fpr)[0];
376 unsigned int __user *tmp = (unsigned int __user *)addr;
377
378 flush_fp_to_thread(child);
379
380 for (i = 0; i < 32; i++) {
381 ret = put_user(*reg, tmp);
382 if (ret)
383 break;
384 reg++;
385 tmp++;
386 }
387 break;
388 }
389
390 case PPC_PTRACE_SETFPREGS: { /* Get FPRs 0 - 31. */
391 int i;
392 unsigned long *reg = &((unsigned long *)child->thread.fpr)[0];
393 unsigned int __user *tmp = (unsigned int __user *)addr;
394
395 flush_fp_to_thread(child);
396
397 for (i = 0; i < 32; i++) {
398 ret = get_user(*reg, tmp);
399 if (ret)
400 break;
401 reg++;
402 tmp++;
403 }
404 break;
405 }
406
407 case PTRACE_GETEVENTMSG:
408 ret = put_user(child->ptrace_message, (unsigned int __user *) data);
409 break;
410
411 default:
412 ret = ptrace_request(child, request, addr, data);
413 break;
414 }
415out_tsk:
416 put_task_struct(child);
417out:
418 unlock_kernel();
419 return ret;
420}
diff --git a/arch/ppc64/kernel/ras.c b/arch/ppc64/kernel/ras.c
new file mode 100644
index 000000000000..1c4c796b212b
--- /dev/null
+++ b/arch/ppc64/kernel/ras.c
@@ -0,0 +1,356 @@
1/*
2 * ras.c
3 * Copyright (C) 2001 Dave Engebretsen IBM Corporation
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
19
20/* Change Activity:
21 * 2001/09/21 : engebret : Created with minimal EPOW and HW exception support.
22 * End Change Activity
23 */
24
25#include <linux/errno.h>
26#include <linux/threads.h>
27#include <linux/kernel_stat.h>
28#include <linux/signal.h>
29#include <linux/sched.h>
30#include <linux/ioport.h>
31#include <linux/interrupt.h>
32#include <linux/timex.h>
33#include <linux/init.h>
34#include <linux/slab.h>
35#include <linux/pci.h>
36#include <linux/delay.h>
37#include <linux/irq.h>
38#include <linux/random.h>
39#include <linux/sysrq.h>
40#include <linux/bitops.h>
41
42#include <asm/uaccess.h>
43#include <asm/system.h>
44#include <asm/io.h>
45#include <asm/pgtable.h>
46#include <asm/irq.h>
47#include <asm/cache.h>
48#include <asm/prom.h>
49#include <asm/ptrace.h>
50#include <asm/iSeries/LparData.h>
51#include <asm/machdep.h>
52#include <asm/rtas.h>
53#include <asm/ppcdebug.h>
54
55static unsigned char ras_log_buf[RTAS_ERROR_LOG_MAX];
56static DEFINE_SPINLOCK(ras_log_buf_lock);
57
58char mce_data_buf[RTAS_ERROR_LOG_MAX]
59;
60/* This is true if we are using the firmware NMI handler (typically LPAR) */
61extern int fwnmi_active;
62
63extern void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr);
64
65static int ras_get_sensor_state_token;
66static int ras_check_exception_token;
67
68#define EPOW_SENSOR_TOKEN 9
69#define EPOW_SENSOR_INDEX 0
70#define RAS_VECTOR_OFFSET 0x500
71
72static irqreturn_t ras_epow_interrupt(int irq, void *dev_id,
73 struct pt_regs * regs);
74static irqreturn_t ras_error_interrupt(int irq, void *dev_id,
75 struct pt_regs * regs);
76
77/* #define DEBUG */
78
79static void request_ras_irqs(struct device_node *np, char *propname,
80 irqreturn_t (*handler)(int, void *, struct pt_regs *),
81 const char *name)
82{
83 unsigned int *ireg, len, i;
84 int virq, n_intr;
85
86 ireg = (unsigned int *)get_property(np, propname, &len);
87 if (ireg == NULL)
88 return;
89 n_intr = prom_n_intr_cells(np);
90 len /= n_intr * sizeof(*ireg);
91
92 for (i = 0; i < len; i++) {
93 virq = virt_irq_create_mapping(*ireg);
94 if (virq == NO_IRQ) {
95 printk(KERN_ERR "Unable to allocate interrupt "
96 "number for %s\n", np->full_name);
97 return;
98 }
99 if (request_irq(irq_offset_up(virq), handler, 0, name, NULL)) {
100 printk(KERN_ERR "Unable to request interrupt %d for "
101 "%s\n", irq_offset_up(virq), np->full_name);
102 return;
103 }
104 ireg += n_intr;
105 }
106}
107
108/*
109 * Initialize handlers for the set of interrupts caused by hardware errors
110 * and power system events.
111 */
112static int __init init_ras_IRQ(void)
113{
114 struct device_node *np;
115
116 ras_get_sensor_state_token = rtas_token("get-sensor-state");
117 ras_check_exception_token = rtas_token("check-exception");
118
119 /* Internal Errors */
120 np = of_find_node_by_path("/event-sources/internal-errors");
121 if (np != NULL) {
122 request_ras_irqs(np, "open-pic-interrupt", ras_error_interrupt,
123 "RAS_ERROR");
124 request_ras_irqs(np, "interrupts", ras_error_interrupt,
125 "RAS_ERROR");
126 of_node_put(np);
127 }
128
129 /* EPOW Events */
130 np = of_find_node_by_path("/event-sources/epow-events");
131 if (np != NULL) {
132 request_ras_irqs(np, "open-pic-interrupt", ras_epow_interrupt,
133 "RAS_EPOW");
134 request_ras_irqs(np, "interrupts", ras_epow_interrupt,
135 "RAS_EPOW");
136 of_node_put(np);
137 }
138
139 return 1;
140}
141__initcall(init_ras_IRQ);
142
143/*
144 * Handle power subsystem events (EPOW).
145 *
146 * Presently we just log the event has occurred. This should be fixed
147 * to examine the type of power failure and take appropriate action where
148 * the time horizon permits something useful to be done.
149 */
150static irqreturn_t
151ras_epow_interrupt(int irq, void *dev_id, struct pt_regs * regs)
152{
153 int status = 0xdeadbeef;
154 int state = 0;
155 int critical;
156
157 status = rtas_call(ras_get_sensor_state_token, 2, 2, &state,
158 EPOW_SENSOR_TOKEN, EPOW_SENSOR_INDEX);
159
160 if (state > 3)
161 critical = 1; /* Time Critical */
162 else
163 critical = 0;
164
165 spin_lock(&ras_log_buf_lock);
166
167 status = rtas_call(ras_check_exception_token, 6, 1, NULL,
168 RAS_VECTOR_OFFSET,
169 virt_irq_to_real(irq_offset_down(irq)),
170 RTAS_EPOW_WARNING | RTAS_POWERMGM_EVENTS,
171 critical, __pa(&ras_log_buf),
172 rtas_get_error_log_max());
173
174 udbg_printf("EPOW <0x%lx 0x%x 0x%x>\n",
175 *((unsigned long *)&ras_log_buf), status, state);
176 printk(KERN_WARNING "EPOW <0x%lx 0x%x 0x%x>\n",
177 *((unsigned long *)&ras_log_buf), status, state);
178
179 /* format and print the extended information */
180 log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0);
181
182 spin_unlock(&ras_log_buf_lock);
183 return IRQ_HANDLED;
184}
185
186/*
187 * Handle hardware error interrupts.
188 *
189 * RTAS check-exception is called to collect data on the exception. If
190 * the error is deemed recoverable, we log a warning and return.
191 * For nonrecoverable errors, an error is logged and we stop all processing
192 * as quickly as possible in order to prevent propagation of the failure.
193 */
194static irqreturn_t
195ras_error_interrupt(int irq, void *dev_id, struct pt_regs * regs)
196{
197 struct rtas_error_log *rtas_elog;
198 int status = 0xdeadbeef;
199 int fatal;
200
201 spin_lock(&ras_log_buf_lock);
202
203 status = rtas_call(ras_check_exception_token, 6, 1, NULL,
204 RAS_VECTOR_OFFSET,
205 virt_irq_to_real(irq_offset_down(irq)),
206 RTAS_INTERNAL_ERROR, 1 /*Time Critical */,
207 __pa(&ras_log_buf),
208 rtas_get_error_log_max());
209
210 rtas_elog = (struct rtas_error_log *)ras_log_buf;
211
212 if ((status == 0) && (rtas_elog->severity >= RTAS_SEVERITY_ERROR_SYNC))
213 fatal = 1;
214 else
215 fatal = 0;
216
217 /* format and print the extended information */
218 log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, fatal);
219
220 if (fatal) {
221 udbg_printf("Fatal HW Error <0x%lx 0x%x>\n",
222 *((unsigned long *)&ras_log_buf), status);
223 printk(KERN_EMERG "Error: Fatal hardware error <0x%lx 0x%x>\n",
224 *((unsigned long *)&ras_log_buf), status);
225
226#ifndef DEBUG
227 /* Don't actually power off when debugging so we can test
228 * without actually failing while injecting errors.
229 * Error data will not be logged to syslog.
230 */
231 ppc_md.power_off();
232#endif
233 } else {
234 udbg_printf("Recoverable HW Error <0x%lx 0x%x>\n",
235 *((unsigned long *)&ras_log_buf), status);
236 printk(KERN_WARNING
237 "Warning: Recoverable hardware error <0x%lx 0x%x>\n",
238 *((unsigned long *)&ras_log_buf), status);
239 }
240
241 spin_unlock(&ras_log_buf_lock);
242 return IRQ_HANDLED;
243}
244
245/* Get the error information for errors coming through the
246 * FWNMI vectors. The pt_regs' r3 will be updated to reflect
247 * the actual r3 if possible, and a ptr to the error log entry
248 * will be returned if found.
249 *
250 * The mce_data_buf does not have any locks or protection around it,
251 * if a second machine check comes in, or a system reset is done
252 * before we have logged the error, then we will get corruption in the
253 * error log. This is preferable over holding off on calling
254 * ibm,nmi-interlock which would result in us checkstopping if a
255 * second machine check did come in.
256 */
257static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs)
258{
259 unsigned long errdata = regs->gpr[3];
260 struct rtas_error_log *errhdr = NULL;
261 unsigned long *savep;
262
263 if ((errdata >= 0x7000 && errdata < 0x7fff0) ||
264 (errdata >= rtas.base && errdata < rtas.base + rtas.size - 16)) {
265 savep = __va(errdata);
266 regs->gpr[3] = savep[0]; /* restore original r3 */
267 memset(mce_data_buf, 0, RTAS_ERROR_LOG_MAX);
268 memcpy(mce_data_buf, (char *)(savep + 1), RTAS_ERROR_LOG_MAX);
269 errhdr = (struct rtas_error_log *)mce_data_buf;
270 } else {
271 printk("FWNMI: corrupt r3\n");
272 }
273 return errhdr;
274}
275
276/* Call this when done with the data returned by FWNMI_get_errinfo.
277 * It will release the saved data area for other CPUs in the
278 * partition to receive FWNMI errors.
279 */
280static void fwnmi_release_errinfo(void)
281{
282 int ret = rtas_call(rtas_token("ibm,nmi-interlock"), 0, 1, NULL);
283 if (ret != 0)
284 printk("FWNMI: nmi-interlock failed: %d\n", ret);
285}
286
287void pSeries_system_reset_exception(struct pt_regs *regs)
288{
289 if (fwnmi_active) {
290 struct rtas_error_log *errhdr = fwnmi_get_errinfo(regs);
291 if (errhdr) {
292 /* XXX Should look at FWNMI information */
293 }
294 fwnmi_release_errinfo();
295 }
296}
297
298/*
299 * See if we can recover from a machine check exception.
300 * This is only called on power4 (or above) and only via
301 * the Firmware Non-Maskable Interrupts (fwnmi) handler
302 * which provides the error analysis for us.
303 *
304 * Return 1 if corrected (or delivered a signal).
305 * Return 0 if there is nothing we can do.
306 */
307static int recover_mce(struct pt_regs *regs, struct rtas_error_log * err)
308{
309 int nonfatal = 0;
310
311 if (err->disposition == RTAS_DISP_FULLY_RECOVERED) {
312 /* Platform corrected itself */
313 nonfatal = 1;
314 } else if ((regs->msr & MSR_RI) &&
315 user_mode(regs) &&
316 err->severity == RTAS_SEVERITY_ERROR_SYNC &&
317 err->disposition == RTAS_DISP_NOT_RECOVERED &&
318 err->target == RTAS_TARGET_MEMORY &&
319 err->type == RTAS_TYPE_ECC_UNCORR &&
320 !(current->pid == 0 || current->pid == 1)) {
321 /* Kill off a user process with an ECC error */
322 printk(KERN_ERR "MCE: uncorrectable ecc error for pid %d\n",
323 current->pid);
324 /* XXX something better for ECC error? */
325 _exception(SIGBUS, regs, BUS_ADRERR, regs->nip);
326 nonfatal = 1;
327 }
328
329 log_error((char *)err, ERR_TYPE_RTAS_LOG, !nonfatal);
330
331 return nonfatal;
332}
333
334/*
335 * Handle a machine check.
336 *
337 * Note that on Power 4 and beyond Firmware Non-Maskable Interrupts (fwnmi)
338 * should be present. If so the handler which called us tells us if the
339 * error was recovered (never true if RI=0).
340 *
341 * On hardware prior to Power 4 these exceptions were asynchronous which
342 * means we can't tell exactly where it occurred and so we can't recover.
343 */
344int pSeries_machine_check_exception(struct pt_regs *regs)
345{
346 struct rtas_error_log *errp;
347
348 if (fwnmi_active) {
349 errp = fwnmi_get_errinfo(regs);
350 fwnmi_release_errinfo();
351 if (errp && recover_mce(regs, errp))
352 return 1;
353 }
354
355 return 0;
356}
diff --git a/arch/ppc64/kernel/rtas-proc.c b/arch/ppc64/kernel/rtas-proc.c
new file mode 100644
index 000000000000..28b1f1521f21
--- /dev/null
+++ b/arch/ppc64/kernel/rtas-proc.c
@@ -0,0 +1,807 @@
1/*
2 * arch/ppc64/kernel/rtas-proc.c
3 * Copyright (C) 2000 Tilmann Bitterberg
4 * (tilmann@bitterberg.de)
5 *
6 * RTAS (Runtime Abstraction Services) stuff
7 * Intention is to provide a clean user interface
8 * to use the RTAS.
9 *
10 * TODO:
11 * Split off a header file and maybe move it to a different
12 * location. Write Documentation on what the /proc/rtas/ entries
13 * actually do.
14 */
15
16#include <linux/errno.h>
17#include <linux/sched.h>
18#include <linux/proc_fs.h>
19#include <linux/stat.h>
20#include <linux/ctype.h>
21#include <linux/time.h>
22#include <linux/string.h>
23#include <linux/init.h>
24#include <linux/seq_file.h>
25#include <linux/bitops.h>
26
27#include <asm/uaccess.h>
28#include <asm/processor.h>
29#include <asm/io.h>
30#include <asm/prom.h>
31#include <asm/rtas.h>
32#include <asm/machdep.h> /* for ppc_md */
33#include <asm/time.h>
34#include <asm/systemcfg.h>
35
36/* Token for Sensors */
37#define KEY_SWITCH 0x0001
38#define ENCLOSURE_SWITCH 0x0002
39#define THERMAL_SENSOR 0x0003
40#define LID_STATUS 0x0004
41#define POWER_SOURCE 0x0005
42#define BATTERY_VOLTAGE 0x0006
43#define BATTERY_REMAINING 0x0007
44#define BATTERY_PERCENTAGE 0x0008
45#define EPOW_SENSOR 0x0009
46#define BATTERY_CYCLESTATE 0x000a
47#define BATTERY_CHARGING 0x000b
48
49/* IBM specific sensors */
50#define IBM_SURVEILLANCE 0x2328 /* 9000 */
51#define IBM_FANRPM 0x2329 /* 9001 */
52#define IBM_VOLTAGE 0x232a /* 9002 */
53#define IBM_DRCONNECTOR 0x232b /* 9003 */
54#define IBM_POWERSUPPLY 0x232c /* 9004 */
55
56/* Status return values */
57#define SENSOR_CRITICAL_HIGH 13
58#define SENSOR_WARNING_HIGH 12
59#define SENSOR_NORMAL 11
60#define SENSOR_WARNING_LOW 10
61#define SENSOR_CRITICAL_LOW 9
62#define SENSOR_SUCCESS 0
63#define SENSOR_HW_ERROR -1
64#define SENSOR_BUSY -2
65#define SENSOR_NOT_EXIST -3
66#define SENSOR_DR_ENTITY -9000
67
68/* Location Codes */
69#define LOC_SCSI_DEV_ADDR 'A'
70#define LOC_SCSI_DEV_LOC 'B'
71#define LOC_CPU 'C'
72#define LOC_DISKETTE 'D'
73#define LOC_ETHERNET 'E'
74#define LOC_FAN 'F'
75#define LOC_GRAPHICS 'G'
76/* reserved / not used 'H' */
77#define LOC_IO_ADAPTER 'I'
78/* reserved / not used 'J' */
79#define LOC_KEYBOARD 'K'
80#define LOC_LCD 'L'
81#define LOC_MEMORY 'M'
82#define LOC_NV_MEMORY 'N'
83#define LOC_MOUSE 'O'
84#define LOC_PLANAR 'P'
85#define LOC_OTHER_IO 'Q'
86#define LOC_PARALLEL 'R'
87#define LOC_SERIAL 'S'
88#define LOC_DEAD_RING 'T'
89#define LOC_RACKMOUNTED 'U' /* for _u_nit is rack mounted */
90#define LOC_VOLTAGE 'V'
91#define LOC_SWITCH_ADAPTER 'W'
92#define LOC_OTHER 'X'
93#define LOC_FIRMWARE 'Y'
94#define LOC_SCSI 'Z'
95
96/* Tokens for indicators */
97#define TONE_FREQUENCY 0x0001 /* 0 - 1000 (HZ)*/
98#define TONE_VOLUME 0x0002 /* 0 - 100 (%) */
99#define SYSTEM_POWER_STATE 0x0003
100#define WARNING_LIGHT 0x0004
101#define DISK_ACTIVITY_LIGHT 0x0005
102#define HEX_DISPLAY_UNIT 0x0006
103#define BATTERY_WARNING_TIME 0x0007
104#define CONDITION_CYCLE_REQUEST 0x0008
105#define SURVEILLANCE_INDICATOR 0x2328 /* 9000 */
106#define DR_ACTION 0x2329 /* 9001 */
107#define DR_INDICATOR 0x232a /* 9002 */
108/* 9003 - 9004: Vendor specific */
109/* 9006 - 9999: Vendor specific */
110
111/* other */
112#define MAX_SENSORS 17 /* I only know of 17 sensors */
113#define MAX_LINELENGTH 256
114#define SENSOR_PREFIX "ibm,sensor-"
115#define cel_to_fahr(x) ((x*9/5)+32)
116
117
118/* Globals */
119static struct rtas_sensors sensors;
120static struct device_node *rtas_node = NULL;
121static unsigned long power_on_time = 0; /* Save the time the user set */
122static char progress_led[MAX_LINELENGTH];
123
124static unsigned long rtas_tone_frequency = 1000;
125static unsigned long rtas_tone_volume = 0;
126
127/* ****************STRUCTS******************************************* */
128struct individual_sensor {
129 unsigned int token;
130 unsigned int quant;
131};
132
133struct rtas_sensors {
134 struct individual_sensor sensor[MAX_SENSORS];
135 unsigned int quant;
136};
137
138/* ****************************************************************** */
139/* Declarations */
140static int ppc_rtas_sensors_show(struct seq_file *m, void *v);
141static int ppc_rtas_clock_show(struct seq_file *m, void *v);
142static ssize_t ppc_rtas_clock_write(struct file *file,
143 const char __user *buf, size_t count, loff_t *ppos);
144static int ppc_rtas_progress_show(struct seq_file *m, void *v);
145static ssize_t ppc_rtas_progress_write(struct file *file,
146 const char __user *buf, size_t count, loff_t *ppos);
147static int ppc_rtas_poweron_show(struct seq_file *m, void *v);
148static ssize_t ppc_rtas_poweron_write(struct file *file,
149 const char __user *buf, size_t count, loff_t *ppos);
150
151static ssize_t ppc_rtas_tone_freq_write(struct file *file,
152 const char __user *buf, size_t count, loff_t *ppos);
153static int ppc_rtas_tone_freq_show(struct seq_file *m, void *v);
154static ssize_t ppc_rtas_tone_volume_write(struct file *file,
155 const char __user *buf, size_t count, loff_t *ppos);
156static int ppc_rtas_tone_volume_show(struct seq_file *m, void *v);
157static int ppc_rtas_rmo_buf_show(struct seq_file *m, void *v);
158
159static int sensors_open(struct inode *inode, struct file *file)
160{
161 return single_open(file, ppc_rtas_sensors_show, NULL);
162}
163
164struct file_operations ppc_rtas_sensors_operations = {
165 .open = sensors_open,
166 .read = seq_read,
167 .llseek = seq_lseek,
168 .release = single_release,
169};
170
171static int poweron_open(struct inode *inode, struct file *file)
172{
173 return single_open(file, ppc_rtas_poweron_show, NULL);
174}
175
176struct file_operations ppc_rtas_poweron_operations = {
177 .open = poweron_open,
178 .read = seq_read,
179 .llseek = seq_lseek,
180 .write = ppc_rtas_poweron_write,
181 .release = single_release,
182};
183
184static int progress_open(struct inode *inode, struct file *file)
185{
186 return single_open(file, ppc_rtas_progress_show, NULL);
187}
188
189struct file_operations ppc_rtas_progress_operations = {
190 .open = progress_open,
191 .read = seq_read,
192 .llseek = seq_lseek,
193 .write = ppc_rtas_progress_write,
194 .release = single_release,
195};
196
197static int clock_open(struct inode *inode, struct file *file)
198{
199 return single_open(file, ppc_rtas_clock_show, NULL);
200}
201
202struct file_operations ppc_rtas_clock_operations = {
203 .open = clock_open,
204 .read = seq_read,
205 .llseek = seq_lseek,
206 .write = ppc_rtas_clock_write,
207 .release = single_release,
208};
209
210static int tone_freq_open(struct inode *inode, struct file *file)
211{
212 return single_open(file, ppc_rtas_tone_freq_show, NULL);
213}
214
215struct file_operations ppc_rtas_tone_freq_operations = {
216 .open = tone_freq_open,
217 .read = seq_read,
218 .llseek = seq_lseek,
219 .write = ppc_rtas_tone_freq_write,
220 .release = single_release,
221};
222
223static int tone_volume_open(struct inode *inode, struct file *file)
224{
225 return single_open(file, ppc_rtas_tone_volume_show, NULL);
226}
227
228struct file_operations ppc_rtas_tone_volume_operations = {
229 .open = tone_volume_open,
230 .read = seq_read,
231 .llseek = seq_lseek,
232 .write = ppc_rtas_tone_volume_write,
233 .release = single_release,
234};
235
236static int rmo_buf_open(struct inode *inode, struct file *file)
237{
238 return single_open(file, ppc_rtas_rmo_buf_show, NULL);
239}
240
241struct file_operations ppc_rtas_rmo_buf_ops = {
242 .open = rmo_buf_open,
243 .read = seq_read,
244 .llseek = seq_lseek,
245 .release = single_release,
246};
247
248static int ppc_rtas_find_all_sensors(void);
249static void ppc_rtas_process_sensor(struct seq_file *m,
250 struct individual_sensor *s, int state, int error, char *loc);
251static char *ppc_rtas_process_error(int error);
252static void get_location_code(struct seq_file *m,
253 struct individual_sensor *s, char *loc);
254static void check_location_string(struct seq_file *m, char *c);
255static void check_location(struct seq_file *m, char *c);
256
257static int __init proc_rtas_init(void)
258{
259 struct proc_dir_entry *entry;
260
261 if (!(systemcfg->platform & PLATFORM_PSERIES))
262 return 1;
263
264 rtas_node = of_find_node_by_name(NULL, "rtas");
265 if (rtas_node == NULL)
266 return 1;
267
268 entry = create_proc_entry("ppc64/rtas/progress", S_IRUGO|S_IWUSR, NULL);
269 if (entry)
270 entry->proc_fops = &ppc_rtas_progress_operations;
271
272 entry = create_proc_entry("ppc64/rtas/clock", S_IRUGO|S_IWUSR, NULL);
273 if (entry)
274 entry->proc_fops = &ppc_rtas_clock_operations;
275
276 entry = create_proc_entry("ppc64/rtas/poweron", S_IWUSR|S_IRUGO, NULL);
277 if (entry)
278 entry->proc_fops = &ppc_rtas_poweron_operations;
279
280 entry = create_proc_entry("ppc64/rtas/sensors", S_IRUGO, NULL);
281 if (entry)
282 entry->proc_fops = &ppc_rtas_sensors_operations;
283
284 entry = create_proc_entry("ppc64/rtas/frequency", S_IWUSR|S_IRUGO,
285 NULL);
286 if (entry)
287 entry->proc_fops = &ppc_rtas_tone_freq_operations;
288
289 entry = create_proc_entry("ppc64/rtas/volume", S_IWUSR|S_IRUGO, NULL);
290 if (entry)
291 entry->proc_fops = &ppc_rtas_tone_volume_operations;
292
293 entry = create_proc_entry("ppc64/rtas/rmo_buffer", S_IRUSR, NULL);
294 if (entry)
295 entry->proc_fops = &ppc_rtas_rmo_buf_ops;
296
297 return 0;
298}
299
300__initcall(proc_rtas_init);
301
302static int parse_number(const char __user *p, size_t count, unsigned long *val)
303{
304 char buf[40];
305 char *end;
306
307 if (count > 39)
308 return -EINVAL;
309
310 if (copy_from_user(buf, p, count))
311 return -EFAULT;
312
313 buf[count] = 0;
314
315 *val = simple_strtoul(buf, &end, 10);
316 if (*end && *end != '\n')
317 return -EINVAL;
318
319 return 0;
320}
321
322/* ****************************************************************** */
323/* POWER-ON-TIME */
324/* ****************************************************************** */
325static ssize_t ppc_rtas_poweron_write(struct file *file,
326 const char __user *buf, size_t count, loff_t *ppos)
327{
328 struct rtc_time tm;
329 unsigned long nowtime;
330 int error = parse_number(buf, count, &nowtime);
331 if (error)
332 return error;
333
334 power_on_time = nowtime; /* save the time */
335
336 to_tm(nowtime, &tm);
337
338 error = rtas_call(rtas_token("set-time-for-power-on"), 7, 1, NULL,
339 tm.tm_year, tm.tm_mon, tm.tm_mday,
340 tm.tm_hour, tm.tm_min, tm.tm_sec, 0 /* nano */);
341 if (error)
342 printk(KERN_WARNING "error: setting poweron time returned: %s\n",
343 ppc_rtas_process_error(error));
344 return count;
345}
346/* ****************************************************************** */
347static int ppc_rtas_poweron_show(struct seq_file *m, void *v)
348{
349 if (power_on_time == 0)
350 seq_printf(m, "Power on time not set\n");
351 else
352 seq_printf(m, "%lu\n",power_on_time);
353 return 0;
354}
355
356/* ****************************************************************** */
357/* PROGRESS */
358/* ****************************************************************** */
359static ssize_t ppc_rtas_progress_write(struct file *file,
360 const char __user *buf, size_t count, loff_t *ppos)
361{
362 unsigned long hex;
363
364 if (count >= MAX_LINELENGTH)
365 count = MAX_LINELENGTH -1;
366 if (copy_from_user(progress_led, buf, count)) { /* save the string */
367 return -EFAULT;
368 }
369 progress_led[count] = 0;
370
371 /* Lets see if the user passed hexdigits */
372 hex = simple_strtoul(progress_led, NULL, 10);
373
374 ppc_md.progress ((char *)progress_led, hex);
375 return count;
376
377 /* clear the line */
378 /* ppc_md.progress(" ", 0xffff);*/
379}
380/* ****************************************************************** */
381static int ppc_rtas_progress_show(struct seq_file *m, void *v)
382{
383 if (progress_led)
384 seq_printf(m, "%s\n", progress_led);
385 return 0;
386}
387
388/* ****************************************************************** */
389/* CLOCK */
390/* ****************************************************************** */
391static ssize_t ppc_rtas_clock_write(struct file *file,
392 const char __user *buf, size_t count, loff_t *ppos)
393{
394 struct rtc_time tm;
395 unsigned long nowtime;
396 int error = parse_number(buf, count, &nowtime);
397 if (error)
398 return error;
399
400 to_tm(nowtime, &tm);
401 error = rtas_call(rtas_token("set-time-of-day"), 7, 1, NULL,
402 tm.tm_year, tm.tm_mon, tm.tm_mday,
403 tm.tm_hour, tm.tm_min, tm.tm_sec, 0);
404 if (error)
405 printk(KERN_WARNING "error: setting the clock returned: %s\n",
406 ppc_rtas_process_error(error));
407 return count;
408}
409/* ****************************************************************** */
410static int ppc_rtas_clock_show(struct seq_file *m, void *v)
411{
412 int ret[8];
413 int error = rtas_call(rtas_token("get-time-of-day"), 0, 8, ret);
414
415 if (error) {
416 printk(KERN_WARNING "error: reading the clock returned: %s\n",
417 ppc_rtas_process_error(error));
418 seq_printf(m, "0");
419 } else {
420 unsigned int year, mon, day, hour, min, sec;
421 year = ret[0]; mon = ret[1]; day = ret[2];
422 hour = ret[3]; min = ret[4]; sec = ret[5];
423 seq_printf(m, "%lu\n",
424 mktime(year, mon, day, hour, min, sec));
425 }
426 return 0;
427}
428
429/* ****************************************************************** */
430/* SENSOR STUFF */
431/* ****************************************************************** */
432static int ppc_rtas_sensors_show(struct seq_file *m, void *v)
433{
434 int i,j;
435 int state, error;
436 int get_sensor_state = rtas_token("get-sensor-state");
437
438 seq_printf(m, "RTAS (RunTime Abstraction Services) Sensor Information\n");
439 seq_printf(m, "Sensor\t\tValue\t\tCondition\tLocation\n");
440 seq_printf(m, "********************************************************\n");
441
442 if (ppc_rtas_find_all_sensors() != 0) {
443 seq_printf(m, "\nNo sensors are available\n");
444 return 0;
445 }
446
447 for (i=0; i<sensors.quant; i++) {
448 struct individual_sensor *p = &sensors.sensor[i];
449 char rstr[64];
450 char *loc;
451 int llen, offs;
452
453 sprintf (rstr, SENSOR_PREFIX"%04d", p->token);
454 loc = (char *) get_property(rtas_node, rstr, &llen);
455
456 /* A sensor may have multiple instances */
457 for (j = 0, offs = 0; j <= p->quant; j++) {
458 error = rtas_call(get_sensor_state, 2, 2, &state,
459 p->token, j);
460
461 ppc_rtas_process_sensor(m, p, state, error, loc);
462 seq_putc(m, '\n');
463 if (loc) {
464 offs += strlen(loc) + 1;
465 loc += strlen(loc) + 1;
466 if (offs >= llen)
467 loc = NULL;
468 }
469 }
470 }
471 return 0;
472}
473
474/* ****************************************************************** */
475
476static int ppc_rtas_find_all_sensors(void)
477{
478 unsigned int *utmp;
479 int len, i;
480
481 utmp = (unsigned int *) get_property(rtas_node, "rtas-sensors", &len);
482 if (utmp == NULL) {
483 printk (KERN_ERR "error: could not get rtas-sensors\n");
484 return 1;
485 }
486
487 sensors.quant = len / 8; /* int + int */
488
489 for (i=0; i<sensors.quant; i++) {
490 sensors.sensor[i].token = *utmp++;
491 sensors.sensor[i].quant = *utmp++;
492 }
493 return 0;
494}
495
496/* ****************************************************************** */
497/*
498 * Builds a string of what rtas returned
499 */
500static char *ppc_rtas_process_error(int error)
501{
502 switch (error) {
503 case SENSOR_CRITICAL_HIGH:
504 return "(critical high)";
505 case SENSOR_WARNING_HIGH:
506 return "(warning high)";
507 case SENSOR_NORMAL:
508 return "(normal)";
509 case SENSOR_WARNING_LOW:
510 return "(warning low)";
511 case SENSOR_CRITICAL_LOW:
512 return "(critical low)";
513 case SENSOR_SUCCESS:
514 return "(read ok)";
515 case SENSOR_HW_ERROR:
516 return "(hardware error)";
517 case SENSOR_BUSY:
518 return "(busy)";
519 case SENSOR_NOT_EXIST:
520 return "(non existent)";
521 case SENSOR_DR_ENTITY:
522 return "(dr entity removed)";
523 default:
524 return "(UNKNOWN)";
525 }
526}
527
528/* ****************************************************************** */
529/*
530 * Builds a string out of what the sensor said
531 */
532
533static void ppc_rtas_process_sensor(struct seq_file *m,
534 struct individual_sensor *s, int state, int error, char *loc)
535{
536 /* Defined return vales */
537 const char * key_switch[] = { "Off\t", "Normal\t", "Secure\t",
538 "Maintenance" };
539 const char * enclosure_switch[] = { "Closed", "Open" };
540 const char * lid_status[] = { " ", "Open", "Closed" };
541 const char * power_source[] = { "AC\t", "Battery",
542 "AC & Battery" };
543 const char * battery_remaining[] = { "Very Low", "Low", "Mid", "High" };
544 const char * epow_sensor[] = {
545 "EPOW Reset", "Cooling warning", "Power warning",
546 "System shutdown", "System halt", "EPOW main enclosure",
547 "EPOW power off" };
548 const char * battery_cyclestate[] = { "None", "In progress",
549 "Requested" };
550 const char * battery_charging[] = { "Charging", "Discharching",
551 "No current flow" };
552 const char * ibm_drconnector[] = { "Empty", "Present", "Unusable",
553 "Exchange" };
554
555 int have_strings = 0;
556 int num_states = 0;
557 int temperature = 0;
558 int unknown = 0;
559
560 /* What kind of sensor do we have here? */
561
562 switch (s->token) {
563 case KEY_SWITCH:
564 seq_printf(m, "Key switch:\t");
565 num_states = sizeof(key_switch) / sizeof(char *);
566 if (state < num_states) {
567 seq_printf(m, "%s\t", key_switch[state]);
568 have_strings = 1;
569 }
570 break;
571 case ENCLOSURE_SWITCH:
572 seq_printf(m, "Enclosure switch:\t");
573 num_states = sizeof(enclosure_switch) / sizeof(char *);
574 if (state < num_states) {
575 seq_printf(m, "%s\t",
576 enclosure_switch[state]);
577 have_strings = 1;
578 }
579 break;
580 case THERMAL_SENSOR:
581 seq_printf(m, "Temp. (C/F):\t");
582 temperature = 1;
583 break;
584 case LID_STATUS:
585 seq_printf(m, "Lid status:\t");
586 num_states = sizeof(lid_status) / sizeof(char *);
587 if (state < num_states) {
588 seq_printf(m, "%s\t", lid_status[state]);
589 have_strings = 1;
590 }
591 break;
592 case POWER_SOURCE:
593 seq_printf(m, "Power source:\t");
594 num_states = sizeof(power_source) / sizeof(char *);
595 if (state < num_states) {
596 seq_printf(m, "%s\t",
597 power_source[state]);
598 have_strings = 1;
599 }
600 break;
601 case BATTERY_VOLTAGE:
602 seq_printf(m, "Battery voltage:\t");
603 break;
604 case BATTERY_REMAINING:
605 seq_printf(m, "Battery remaining:\t");
606 num_states = sizeof(battery_remaining) / sizeof(char *);
607 if (state < num_states)
608 {
609 seq_printf(m, "%s\t",
610 battery_remaining[state]);
611 have_strings = 1;
612 }
613 break;
614 case BATTERY_PERCENTAGE:
615 seq_printf(m, "Battery percentage:\t");
616 break;
617 case EPOW_SENSOR:
618 seq_printf(m, "EPOW Sensor:\t");
619 num_states = sizeof(epow_sensor) / sizeof(char *);
620 if (state < num_states) {
621 seq_printf(m, "%s\t", epow_sensor[state]);
622 have_strings = 1;
623 }
624 break;
625 case BATTERY_CYCLESTATE:
626 seq_printf(m, "Battery cyclestate:\t");
627 num_states = sizeof(battery_cyclestate) /
628 sizeof(char *);
629 if (state < num_states) {
630 seq_printf(m, "%s\t",
631 battery_cyclestate[state]);
632 have_strings = 1;
633 }
634 break;
635 case BATTERY_CHARGING:
636 seq_printf(m, "Battery Charging:\t");
637 num_states = sizeof(battery_charging) / sizeof(char *);
638 if (state < num_states) {
639 seq_printf(m, "%s\t",
640 battery_charging[state]);
641 have_strings = 1;
642 }
643 break;
644 case IBM_SURVEILLANCE:
645 seq_printf(m, "Surveillance:\t");
646 break;
647 case IBM_FANRPM:
648 seq_printf(m, "Fan (rpm):\t");
649 break;
650 case IBM_VOLTAGE:
651 seq_printf(m, "Voltage (mv):\t");
652 break;
653 case IBM_DRCONNECTOR:
654 seq_printf(m, "DR connector:\t");
655 num_states = sizeof(ibm_drconnector) / sizeof(char *);
656 if (state < num_states) {
657 seq_printf(m, "%s\t",
658 ibm_drconnector[state]);
659 have_strings = 1;
660 }
661 break;
662 case IBM_POWERSUPPLY:
663 seq_printf(m, "Powersupply:\t");
664 break;
665 default:
666 seq_printf(m, "Unknown sensor (type %d), ignoring it\n",
667 s->token);
668 unknown = 1;
669 have_strings = 1;
670 break;
671 }
672 if (have_strings == 0) {
673 if (temperature) {
674 seq_printf(m, "%4d /%4d\t", state, cel_to_fahr(state));
675 } else
676 seq_printf(m, "%10d\t", state);
677 }
678 if (unknown == 0) {
679 seq_printf(m, "%s\t", ppc_rtas_process_error(error));
680 get_location_code(m, s, loc);
681 }
682}
683
684/* ****************************************************************** */
685
686static void check_location(struct seq_file *m, char *c)
687{
688 switch (c[0]) {
689 case LOC_PLANAR:
690 seq_printf(m, "Planar #%c", c[1]);
691 break;
692 case LOC_CPU:
693 seq_printf(m, "CPU #%c", c[1]);
694 break;
695 case LOC_FAN:
696 seq_printf(m, "Fan #%c", c[1]);
697 break;
698 case LOC_RACKMOUNTED:
699 seq_printf(m, "Rack #%c", c[1]);
700 break;
701 case LOC_VOLTAGE:
702 seq_printf(m, "Voltage #%c", c[1]);
703 break;
704 case LOC_LCD:
705 seq_printf(m, "LCD #%c", c[1]);
706 break;
707 case '.':
708 seq_printf(m, "- %c", c[1]);
709 break;
710 default:
711 seq_printf(m, "Unknown location");
712 break;
713 }
714}
715
716
717/* ****************************************************************** */
718/*
719 * Format:
720 * ${LETTER}${NUMBER}[[-/]${LETTER}${NUMBER} [ ... ] ]
721 * the '.' may be an abbrevation
722 */
723static void check_location_string(struct seq_file *m, char *c)
724{
725 while (*c) {
726 if (isalpha(*c) || *c == '.')
727 check_location(m, c);
728 else if (*c == '/' || *c == '-')
729 seq_printf(m, " at ");
730 c++;
731 }
732}
733
734
735/* ****************************************************************** */
736
737static void get_location_code(struct seq_file *m, struct individual_sensor *s, char *loc)
738{
739 if (!loc || !*loc) {
740 seq_printf(m, "---");/* does not have a location */
741 } else {
742 check_location_string(m, loc);
743 }
744 seq_putc(m, ' ');
745}
746/* ****************************************************************** */
747/* INDICATORS - Tone Frequency */
748/* ****************************************************************** */
749static ssize_t ppc_rtas_tone_freq_write(struct file *file,
750 const char __user *buf, size_t count, loff_t *ppos)
751{
752 unsigned long freq;
753 int error = parse_number(buf, count, &freq);
754 if (error)
755 return error;
756
757 rtas_tone_frequency = freq; /* save it for later */
758 error = rtas_call(rtas_token("set-indicator"), 3, 1, NULL,
759 TONE_FREQUENCY, 0, freq);
760 if (error)
761 printk(KERN_WARNING "error: setting tone frequency returned: %s\n",
762 ppc_rtas_process_error(error));
763 return count;
764}
765/* ****************************************************************** */
766static int ppc_rtas_tone_freq_show(struct seq_file *m, void *v)
767{
768 seq_printf(m, "%lu\n", rtas_tone_frequency);
769 return 0;
770}
771/* ****************************************************************** */
772/* INDICATORS - Tone Volume */
773/* ****************************************************************** */
774static ssize_t ppc_rtas_tone_volume_write(struct file *file,
775 const char __user *buf, size_t count, loff_t *ppos)
776{
777 unsigned long volume;
778 int error = parse_number(buf, count, &volume);
779 if (error)
780 return error;
781
782 if (volume > 100)
783 volume = 100;
784
785 rtas_tone_volume = volume; /* save it for later */
786 error = rtas_call(rtas_token("set-indicator"), 3, 1, NULL,
787 TONE_VOLUME, 0, volume);
788 if (error)
789 printk(KERN_WARNING "error: setting tone volume returned: %s\n",
790 ppc_rtas_process_error(error));
791 return count;
792}
793/* ****************************************************************** */
794static int ppc_rtas_tone_volume_show(struct seq_file *m, void *v)
795{
796 seq_printf(m, "%lu\n", rtas_tone_volume);
797 return 0;
798}
799
800#define RMO_READ_BUF_MAX 30
801
802/* RTAS Userspace access */
803static int ppc_rtas_rmo_buf_show(struct seq_file *m, void *v)
804{
805 seq_printf(m, "%016lx %x\n", rtas_rmo_buf, RTAS_RMOBUF_MAX);
806 return 0;
807}
diff --git a/arch/ppc64/kernel/rtas.c b/arch/ppc64/kernel/rtas.c
new file mode 100644
index 000000000000..5575603def27
--- /dev/null
+++ b/arch/ppc64/kernel/rtas.c
@@ -0,0 +1,657 @@
1/*
2 *
3 * Procedures for interfacing to the RTAS on CHRP machines.
4 *
5 * Peter Bergner, IBM March 2001.
6 * Copyright (C) 2001 IBM.
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14#include <stdarg.h>
15#include <linux/kernel.h>
16#include <linux/types.h>
17#include <linux/spinlock.h>
18#include <linux/module.h>
19#include <linux/init.h>
20
21#include <asm/prom.h>
22#include <asm/rtas.h>
23#include <asm/semaphore.h>
24#include <asm/machdep.h>
25#include <asm/page.h>
26#include <asm/param.h>
27#include <asm/system.h>
28#include <asm/abs_addr.h>
29#include <asm/udbg.h>
30#include <asm/delay.h>
31#include <asm/uaccess.h>
32#include <asm/systemcfg.h>
33
34struct flash_block_list_header rtas_firmware_flash_list = {0, NULL};
35
36struct rtas_t rtas = {
37 .lock = SPIN_LOCK_UNLOCKED
38};
39
40EXPORT_SYMBOL(rtas);
41
42char rtas_err_buf[RTAS_ERROR_LOG_MAX];
43
44DEFINE_SPINLOCK(rtas_data_buf_lock);
45char rtas_data_buf[RTAS_DATA_BUF_SIZE]__page_aligned;
46unsigned long rtas_rmo_buf;
47
48void
49call_rtas_display_status(unsigned char c)
50{
51 struct rtas_args *args = &rtas.args;
52 unsigned long s;
53
54 if (!rtas.base)
55 return;
56 spin_lock_irqsave(&rtas.lock, s);
57
58 args->token = 10;
59 args->nargs = 1;
60 args->nret = 1;
61 args->rets = (rtas_arg_t *)&(args->args[1]);
62 args->args[0] = (int)c;
63
64 enter_rtas(__pa(args));
65
66 spin_unlock_irqrestore(&rtas.lock, s);
67}
68
69void
70call_rtas_display_status_delay(unsigned char c)
71{
72 static int pending_newline = 0; /* did last write end with unprinted newline? */
73 static int width = 16;
74
75 if (c == '\n') {
76 while (width-- > 0)
77 call_rtas_display_status(' ');
78 width = 16;
79 udelay(500000);
80 pending_newline = 1;
81 } else {
82 if (pending_newline) {
83 call_rtas_display_status('\r');
84 call_rtas_display_status('\n');
85 }
86 pending_newline = 0;
87 if (width--) {
88 call_rtas_display_status(c);
89 udelay(10000);
90 }
91 }
92}
93
94int
95rtas_token(const char *service)
96{
97 int *tokp;
98 if (rtas.dev == NULL) {
99 PPCDBG(PPCDBG_RTAS,"\tNo rtas device in device-tree...\n");
100 return RTAS_UNKNOWN_SERVICE;
101 }
102 tokp = (int *) get_property(rtas.dev, service, NULL);
103 return tokp ? *tokp : RTAS_UNKNOWN_SERVICE;
104}
105
106/*
107 * Return the firmware-specified size of the error log buffer
108 * for all rtas calls that require an error buffer argument.
109 * This includes 'check-exception' and 'rtas-last-error'.
110 */
111int rtas_get_error_log_max(void)
112{
113 static int rtas_error_log_max;
114 if (rtas_error_log_max)
115 return rtas_error_log_max;
116
117 rtas_error_log_max = rtas_token ("rtas-error-log-max");
118 if ((rtas_error_log_max == RTAS_UNKNOWN_SERVICE) ||
119 (rtas_error_log_max > RTAS_ERROR_LOG_MAX)) {
120 printk (KERN_WARNING "RTAS: bad log buffer size %d\n", rtas_error_log_max);
121 rtas_error_log_max = RTAS_ERROR_LOG_MAX;
122 }
123 return rtas_error_log_max;
124}
125
126
127/** Return a copy of the detailed error text associated with the
128 * most recent failed call to rtas. Because the error text
129 * might go stale if there are any other intervening rtas calls,
130 * this routine must be called atomically with whatever produced
131 * the error (i.e. with rtas.lock still held from the previous call).
132 */
133static int
134__fetch_rtas_last_error(void)
135{
136 struct rtas_args err_args, save_args;
137 u32 bufsz;
138
139 bufsz = rtas_get_error_log_max();
140
141 err_args.token = rtas_token("rtas-last-error");
142 err_args.nargs = 2;
143 err_args.nret = 1;
144
145 err_args.args[0] = (rtas_arg_t)__pa(rtas_err_buf);
146 err_args.args[1] = bufsz;
147 err_args.args[2] = 0;
148
149 save_args = rtas.args;
150 rtas.args = err_args;
151
152 enter_rtas(__pa(&rtas.args));
153
154 err_args = rtas.args;
155 rtas.args = save_args;
156
157 return err_args.args[2];
158}
159
160int rtas_call(int token, int nargs, int nret, int *outputs, ...)
161{
162 va_list list;
163 int i, logit = 0;
164 unsigned long s;
165 struct rtas_args *rtas_args;
166 char * buff_copy = NULL;
167 int ret;
168
169 PPCDBG(PPCDBG_RTAS, "Entering rtas_call\n");
170 PPCDBG(PPCDBG_RTAS, "\ttoken = 0x%x\n", token);
171 PPCDBG(PPCDBG_RTAS, "\tnargs = %d\n", nargs);
172 PPCDBG(PPCDBG_RTAS, "\tnret = %d\n", nret);
173 PPCDBG(PPCDBG_RTAS, "\t&outputs = 0x%lx\n", outputs);
174 if (token == RTAS_UNKNOWN_SERVICE)
175 return -1;
176
177 /* Gotta do something different here, use global lock for now... */
178 spin_lock_irqsave(&rtas.lock, s);
179 rtas_args = &rtas.args;
180
181 rtas_args->token = token;
182 rtas_args->nargs = nargs;
183 rtas_args->nret = nret;
184 rtas_args->rets = (rtas_arg_t *)&(rtas_args->args[nargs]);
185 va_start(list, outputs);
186 for (i = 0; i < nargs; ++i) {
187 rtas_args->args[i] = va_arg(list, rtas_arg_t);
188 PPCDBG(PPCDBG_RTAS, "\tnarg[%d] = 0x%x\n", i, rtas_args->args[i]);
189 }
190 va_end(list);
191
192 for (i = 0; i < nret; ++i)
193 rtas_args->rets[i] = 0;
194
195 PPCDBG(PPCDBG_RTAS, "\tentering rtas with 0x%lx\n",
196 __pa(rtas_args));
197 enter_rtas(__pa(rtas_args));
198 PPCDBG(PPCDBG_RTAS, "\treturned from rtas ...\n");
199
200 /* A -1 return code indicates that the last command couldn't
201 be completed due to a hardware error. */
202 if (rtas_args->rets[0] == -1)
203 logit = (__fetch_rtas_last_error() == 0);
204
205 ifppcdebug(PPCDBG_RTAS) {
206 for(i=0; i < nret ;i++)
207 udbg_printf("\tnret[%d] = 0x%lx\n", i, (ulong)rtas_args->rets[i]);
208 }
209
210 if (nret > 1 && outputs != NULL)
211 for (i = 0; i < nret-1; ++i)
212 outputs[i] = rtas_args->rets[i+1];
213 ret = (nret > 0)? rtas_args->rets[0]: 0;
214
215 /* Log the error in the unlikely case that there was one. */
216 if (unlikely(logit)) {
217 buff_copy = rtas_err_buf;
218 if (mem_init_done) {
219 buff_copy = kmalloc(RTAS_ERROR_LOG_MAX, GFP_ATOMIC);
220 if (buff_copy)
221 memcpy(buff_copy, rtas_err_buf,
222 RTAS_ERROR_LOG_MAX);
223 }
224 }
225
226 /* Gotta do something different here, use global lock for now... */
227 spin_unlock_irqrestore(&rtas.lock, s);
228
229 if (buff_copy) {
230 log_error(buff_copy, ERR_TYPE_RTAS_LOG, 0);
231 if (mem_init_done)
232 kfree(buff_copy);
233 }
234 return ret;
235}
236
237/* Given an RTAS status code of 990n compute the hinted delay of 10^n
238 * (last digit) milliseconds. For now we bound at n=5 (100 sec).
239 */
240unsigned int
241rtas_extended_busy_delay_time(int status)
242{
243 int order = status - 9900;
244 unsigned long ms;
245
246 if (order < 0)
247 order = 0; /* RTC depends on this for -2 clock busy */
248 else if (order > 5)
249 order = 5; /* bound */
250
251 /* Use microseconds for reasonable accuracy */
252 for (ms=1; order > 0; order--)
253 ms *= 10;
254
255 return ms;
256}
257
258int rtas_error_rc(int rtas_rc)
259{
260 int rc;
261
262 switch (rtas_rc) {
263 case -1: /* Hardware Error */
264 rc = -EIO;
265 break;
266 case -3: /* Bad indicator/domain/etc */
267 rc = -EINVAL;
268 break;
269 case -9000: /* Isolation error */
270 rc = -EFAULT;
271 break;
272 case -9001: /* Outstanding TCE/PTE */
273 rc = -EEXIST;
274 break;
275 case -9002: /* No usable slot */
276 rc = -ENODEV;
277 break;
278 default:
279 printk(KERN_ERR "%s: unexpected RTAS error %d\n",
280 __FUNCTION__, rtas_rc);
281 rc = -ERANGE;
282 break;
283 }
284 return rc;
285}
286
287int rtas_get_power_level(int powerdomain, int *level)
288{
289 int token = rtas_token("get-power-level");
290 int rc;
291
292 if (token == RTAS_UNKNOWN_SERVICE)
293 return -ENOENT;
294
295 while ((rc = rtas_call(token, 1, 2, level, powerdomain)) == RTAS_BUSY)
296 udelay(1);
297
298 if (rc < 0)
299 return rtas_error_rc(rc);
300 return rc;
301}
302
303int rtas_set_power_level(int powerdomain, int level, int *setlevel)
304{
305 int token = rtas_token("set-power-level");
306 unsigned int wait_time;
307 int rc;
308
309 if (token == RTAS_UNKNOWN_SERVICE)
310 return -ENOENT;
311
312 while (1) {
313 rc = rtas_call(token, 2, 2, setlevel, powerdomain, level);
314 if (rc == RTAS_BUSY)
315 udelay(1);
316 else if (rtas_is_extended_busy(rc)) {
317 wait_time = rtas_extended_busy_delay_time(rc);
318 udelay(wait_time * 1000);
319 } else
320 break;
321 }
322
323 if (rc < 0)
324 return rtas_error_rc(rc);
325 return rc;
326}
327
328int rtas_get_sensor(int sensor, int index, int *state)
329{
330 int token = rtas_token("get-sensor-state");
331 unsigned int wait_time;
332 int rc;
333
334 if (token == RTAS_UNKNOWN_SERVICE)
335 return -ENOENT;
336
337 while (1) {
338 rc = rtas_call(token, 2, 2, state, sensor, index);
339 if (rc == RTAS_BUSY)
340 udelay(1);
341 else if (rtas_is_extended_busy(rc)) {
342 wait_time = rtas_extended_busy_delay_time(rc);
343 udelay(wait_time * 1000);
344 } else
345 break;
346 }
347
348 if (rc < 0)
349 return rtas_error_rc(rc);
350 return rc;
351}
352
353int rtas_set_indicator(int indicator, int index, int new_value)
354{
355 int token = rtas_token("set-indicator");
356 unsigned int wait_time;
357 int rc;
358
359 if (token == RTAS_UNKNOWN_SERVICE)
360 return -ENOENT;
361
362 while (1) {
363 rc = rtas_call(token, 3, 1, NULL, indicator, index, new_value);
364 if (rc == RTAS_BUSY)
365 udelay(1);
366 else if (rtas_is_extended_busy(rc)) {
367 wait_time = rtas_extended_busy_delay_time(rc);
368 udelay(wait_time * 1000);
369 }
370 else
371 break;
372 }
373
374 if (rc < 0)
375 return rtas_error_rc(rc);
376 return rc;
377}
378
379#define FLASH_BLOCK_LIST_VERSION (1UL)
380static void
381rtas_flash_firmware(void)
382{
383 unsigned long image_size;
384 struct flash_block_list *f, *next, *flist;
385 unsigned long rtas_block_list;
386 int i, status, update_token;
387
388 update_token = rtas_token("ibm,update-flash-64-and-reboot");
389 if (update_token == RTAS_UNKNOWN_SERVICE) {
390 printk(KERN_ALERT "FLASH: ibm,update-flash-64-and-reboot is not available -- not a service partition?\n");
391 printk(KERN_ALERT "FLASH: firmware will not be flashed\n");
392 return;
393 }
394
395 /* NOTE: the "first" block list is a global var with no data
396 * blocks in the kernel data segment. We do this because
397 * we want to ensure this block_list addr is under 4GB.
398 */
399 rtas_firmware_flash_list.num_blocks = 0;
400 flist = (struct flash_block_list *)&rtas_firmware_flash_list;
401 rtas_block_list = virt_to_abs(flist);
402 if (rtas_block_list >= 4UL*1024*1024*1024) {
403 printk(KERN_ALERT "FLASH: kernel bug...flash list header addr above 4GB\n");
404 return;
405 }
406
407 printk(KERN_ALERT "FLASH: preparing saved firmware image for flash\n");
408 /* Update the block_list in place. */
409 image_size = 0;
410 for (f = flist; f; f = next) {
411 /* Translate data addrs to absolute */
412 for (i = 0; i < f->num_blocks; i++) {
413 f->blocks[i].data = (char *)virt_to_abs(f->blocks[i].data);
414 image_size += f->blocks[i].length;
415 }
416 next = f->next;
417 /* Don't translate NULL pointer for last entry */
418 if (f->next)
419 f->next = (struct flash_block_list *)virt_to_abs(f->next);
420 else
421 f->next = NULL;
422 /* make num_blocks into the version/length field */
423 f->num_blocks = (FLASH_BLOCK_LIST_VERSION << 56) | ((f->num_blocks+1)*16);
424 }
425
426 printk(KERN_ALERT "FLASH: flash image is %ld bytes\n", image_size);
427 printk(KERN_ALERT "FLASH: performing flash and reboot\n");
428 ppc_md.progress("Flashing \n", 0x0);
429 ppc_md.progress("Please Wait... ", 0x0);
430 printk(KERN_ALERT "FLASH: this will take several minutes. Do not power off!\n");
431 status = rtas_call(update_token, 1, 1, NULL, rtas_block_list);
432 switch (status) { /* should only get "bad" status */
433 case 0:
434 printk(KERN_ALERT "FLASH: success\n");
435 break;
436 case -1:
437 printk(KERN_ALERT "FLASH: hardware error. Firmware may not be not flashed\n");
438 break;
439 case -3:
440 printk(KERN_ALERT "FLASH: image is corrupt or not correct for this platform. Firmware not flashed\n");
441 break;
442 case -4:
443 printk(KERN_ALERT "FLASH: flash failed when partially complete. System may not reboot\n");
444 break;
445 default:
446 printk(KERN_ALERT "FLASH: unknown flash return code %d\n", status);
447 break;
448 }
449}
450
451void rtas_flash_bypass_warning(void)
452{
453 printk(KERN_ALERT "FLASH: firmware flash requires a reboot\n");
454 printk(KERN_ALERT "FLASH: the firmware image will NOT be flashed\n");
455}
456
457
458void
459rtas_restart(char *cmd)
460{
461 if (rtas_firmware_flash_list.next)
462 rtas_flash_firmware();
463
464 printk("RTAS system-reboot returned %d\n",
465 rtas_call(rtas_token("system-reboot"), 0, 1, NULL));
466 for (;;);
467}
468
469void
470rtas_power_off(void)
471{
472 if (rtas_firmware_flash_list.next)
473 rtas_flash_bypass_warning();
474 /* allow power on only with power button press */
475 printk("RTAS power-off returned %d\n",
476 rtas_call(rtas_token("power-off"), 2, 1, NULL, -1, -1));
477 for (;;);
478}
479
480void
481rtas_halt(void)
482{
483 if (rtas_firmware_flash_list.next)
484 rtas_flash_bypass_warning();
485 rtas_power_off();
486}
487
488/* Must be in the RMO region, so we place it here */
489static char rtas_os_term_buf[2048];
490
491void rtas_os_term(char *str)
492{
493 int status;
494
495 if (RTAS_UNKNOWN_SERVICE == rtas_token("ibm,os-term"))
496 return;
497
498 snprintf(rtas_os_term_buf, 2048, "OS panic: %s", str);
499
500 do {
501 status = rtas_call(rtas_token("ibm,os-term"), 1, 1, NULL,
502 __pa(rtas_os_term_buf));
503
504 if (status == RTAS_BUSY)
505 udelay(1);
506 else if (status != 0)
507 printk(KERN_EMERG "ibm,os-term call failed %d\n",
508 status);
509 } while (status == RTAS_BUSY);
510}
511
512
513asmlinkage int ppc_rtas(struct rtas_args __user *uargs)
514{
515 struct rtas_args args;
516 unsigned long flags;
517 char * buff_copy;
518 int nargs;
519 int err_rc = 0;
520
521 if (!capable(CAP_SYS_ADMIN))
522 return -EPERM;
523
524 if (copy_from_user(&args, uargs, 3 * sizeof(u32)) != 0)
525 return -EFAULT;
526
527 nargs = args.nargs;
528 if (nargs > ARRAY_SIZE(args.args)
529 || args.nret > ARRAY_SIZE(args.args)
530 || nargs + args.nret > ARRAY_SIZE(args.args))
531 return -EINVAL;
532
533 /* Copy in args. */
534 if (copy_from_user(args.args, uargs->args,
535 nargs * sizeof(rtas_arg_t)) != 0)
536 return -EFAULT;
537
538 buff_copy = kmalloc(RTAS_ERROR_LOG_MAX, GFP_KERNEL);
539
540 spin_lock_irqsave(&rtas.lock, flags);
541
542 rtas.args = args;
543 enter_rtas(__pa(&rtas.args));
544 args = rtas.args;
545
546 args.rets = &args.args[nargs];
547
548 /* A -1 return code indicates that the last command couldn't
549 be completed due to a hardware error. */
550 if (args.rets[0] == -1) {
551 err_rc = __fetch_rtas_last_error();
552 if ((err_rc == 0) && buff_copy) {
553 memcpy(buff_copy, rtas_err_buf, RTAS_ERROR_LOG_MAX);
554 }
555 }
556
557 spin_unlock_irqrestore(&rtas.lock, flags);
558
559 if (buff_copy) {
560 if ((args.rets[0] == -1) && (err_rc == 0)) {
561 log_error(buff_copy, ERR_TYPE_RTAS_LOG, 0);
562 }
563 kfree(buff_copy);
564 }
565
566 /* Copy out args. */
567 if (copy_to_user(uargs->args + nargs,
568 args.args + nargs,
569 args.nret * sizeof(rtas_arg_t)) != 0)
570 return -EFAULT;
571
572 return 0;
573}
574
575/* This version can't take the spinlock, because it never returns */
576
577struct rtas_args rtas_stop_self_args = {
578 /* The token is initialized for real in setup_system() */
579 .token = RTAS_UNKNOWN_SERVICE,
580 .nargs = 0,
581 .nret = 1,
582 .rets = &rtas_stop_self_args.args[0],
583};
584
585void rtas_stop_self(void)
586{
587 struct rtas_args *rtas_args = &rtas_stop_self_args;
588
589 local_irq_disable();
590
591 BUG_ON(rtas_args->token == RTAS_UNKNOWN_SERVICE);
592
593 printk("cpu %u (hwid %u) Ready to die...\n",
594 smp_processor_id(), hard_smp_processor_id());
595 enter_rtas(__pa(rtas_args));
596
597 panic("Alas, I survived.\n");
598}
599
600/*
601 * Call early during boot, before mem init or bootmem, to retreive the RTAS
602 * informations from the device-tree and allocate the RMO buffer for userland
603 * accesses.
604 */
605void __init rtas_initialize(void)
606{
607 /* Get RTAS dev node and fill up our "rtas" structure with infos
608 * about it.
609 */
610 rtas.dev = of_find_node_by_name(NULL, "rtas");
611 if (rtas.dev) {
612 u32 *basep, *entryp;
613 u32 *sizep;
614
615 basep = (u32 *)get_property(rtas.dev, "linux,rtas-base", NULL);
616 sizep = (u32 *)get_property(rtas.dev, "rtas-size", NULL);
617 if (basep != NULL && sizep != NULL) {
618 rtas.base = *basep;
619 rtas.size = *sizep;
620 entryp = (u32 *)get_property(rtas.dev, "linux,rtas-entry", NULL);
621 if (entryp == NULL) /* Ugh */
622 rtas.entry = rtas.base;
623 else
624 rtas.entry = *entryp;
625 } else
626 rtas.dev = NULL;
627 }
628 /* If RTAS was found, allocate the RMO buffer for it and look for
629 * the stop-self token if any
630 */
631 if (rtas.dev) {
632 unsigned long rtas_region = RTAS_INSTANTIATE_MAX;
633 if (systemcfg->platform == PLATFORM_PSERIES_LPAR)
634 rtas_region = min(lmb.rmo_size, RTAS_INSTANTIATE_MAX);
635
636 rtas_rmo_buf = lmb_alloc_base(RTAS_RMOBUF_MAX, PAGE_SIZE,
637 rtas_region);
638
639#ifdef CONFIG_HOTPLUG_CPU
640 rtas_stop_self_args.token = rtas_token("stop-self");
641#endif /* CONFIG_HOTPLUG_CPU */
642 }
643
644}
645
646
647EXPORT_SYMBOL(rtas_firmware_flash_list);
648EXPORT_SYMBOL(rtas_token);
649EXPORT_SYMBOL(rtas_call);
650EXPORT_SYMBOL(rtas_data_buf);
651EXPORT_SYMBOL(rtas_data_buf_lock);
652EXPORT_SYMBOL(rtas_extended_busy_delay_time);
653EXPORT_SYMBOL(rtas_get_sensor);
654EXPORT_SYMBOL(rtas_get_power_level);
655EXPORT_SYMBOL(rtas_set_power_level);
656EXPORT_SYMBOL(rtas_set_indicator);
657EXPORT_SYMBOL(rtas_get_error_log_max);
diff --git a/arch/ppc64/kernel/rtas_flash.c b/arch/ppc64/kernel/rtas_flash.c
new file mode 100644
index 000000000000..3213837282ca
--- /dev/null
+++ b/arch/ppc64/kernel/rtas_flash.c
@@ -0,0 +1,725 @@
1/*
2 * c 2001 PPC 64 Team, IBM Corp
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * /proc/ppc64/rtas/firmware_flash interface
10 *
11 * This file implements a firmware_flash interface to pump a firmware
12 * image into the kernel. At reboot time rtas_restart() will see the
13 * firmware image and flash it as it reboots (see rtas.c).
14 */
15
16#include <linux/module.h>
17#include <linux/init.h>
18#include <linux/proc_fs.h>
19#include <asm/delay.h>
20#include <asm/uaccess.h>
21#include <asm/rtas.h>
22
23#define MODULE_VERS "1.0"
24#define MODULE_NAME "rtas_flash"
25
26#define FIRMWARE_FLASH_NAME "firmware_flash"
27#define FIRMWARE_UPDATE_NAME "firmware_update"
28#define MANAGE_FLASH_NAME "manage_flash"
29#define VALIDATE_FLASH_NAME "validate_flash"
30
31/* General RTAS Status Codes */
32#define RTAS_RC_SUCCESS 0
33#define RTAS_RC_HW_ERR -1
34#define RTAS_RC_BUSY -2
35
36/* Flash image status values */
37#define FLASH_AUTH -9002 /* RTAS Not Service Authority Partition */
38#define FLASH_NO_OP -1099 /* No operation initiated by user */
39#define FLASH_IMG_SHORT -1005 /* Flash image shorter than expected */
40#define FLASH_IMG_BAD_LEN -1004 /* Bad length value in flash list block */
41#define FLASH_IMG_NULL_DATA -1003 /* Bad data value in flash list block */
42#define FLASH_IMG_READY 0 /* Firmware img ready for flash on reboot */
43
44/* Manage image status values */
45#define MANAGE_AUTH -9002 /* RTAS Not Service Authority Partition */
46#define MANAGE_ACTIVE_ERR -9001 /* RTAS Cannot Overwrite Active Img */
47#define MANAGE_NO_OP -1099 /* No operation initiated by user */
48#define MANAGE_PARAM_ERR -3 /* RTAS Parameter Error */
49#define MANAGE_HW_ERR -1 /* RTAS Hardware Error */
50
51/* Validate image status values */
52#define VALIDATE_AUTH -9002 /* RTAS Not Service Authority Partition */
53#define VALIDATE_NO_OP -1099 /* No operation initiated by the user */
54#define VALIDATE_INCOMPLETE -1002 /* User copied < VALIDATE_BUF_SIZE */
55#define VALIDATE_READY -1001 /* Firmware image ready for validation */
56#define VALIDATE_PARAM_ERR -3 /* RTAS Parameter Error */
57#define VALIDATE_HW_ERR -1 /* RTAS Hardware Error */
58#define VALIDATE_TMP_UPDATE 0 /* Validate Return Status */
59#define VALIDATE_FLASH_AUTH 1 /* Validate Return Status */
60#define VALIDATE_INVALID_IMG 2 /* Validate Return Status */
61#define VALIDATE_CUR_UNKNOWN 3 /* Validate Return Status */
62#define VALIDATE_TMP_COMMIT_DL 4 /* Validate Return Status */
63#define VALIDATE_TMP_COMMIT 5 /* Validate Return Status */
64#define VALIDATE_TMP_UPDATE_DL 6 /* Validate Return Status */
65
66/* ibm,manage-flash-image operation tokens */
67#define RTAS_REJECT_TMP_IMG 0
68#define RTAS_COMMIT_TMP_IMG 1
69
70/* Array sizes */
71#define VALIDATE_BUF_SIZE 4096
72#define RTAS_MSG_MAXLEN 64
73
74/* Local copy of the flash block list.
75 * We only allow one open of the flash proc file and create this
76 * list as we go. This list will be put in the kernel's
77 * rtas_firmware_flash_list global var once it is fully read.
78 *
79 * For convenience as we build the list we use virtual addrs,
80 * we do not fill in the version number, and the length field
81 * is treated as the number of entries currently in the block
82 * (i.e. not a byte count). This is all fixed on release.
83 */
84
85/* Status int must be first member of struct */
86struct rtas_update_flash_t
87{
88 int status; /* Flash update status */
89 struct flash_block_list *flist; /* Local copy of flash block list */
90};
91
92/* Status int must be first member of struct */
93struct rtas_manage_flash_t
94{
95 int status; /* Returned status */
96 unsigned int op; /* Reject or commit image */
97};
98
99/* Status int must be first member of struct */
100struct rtas_validate_flash_t
101{
102 int status; /* Returned status */
103 char buf[VALIDATE_BUF_SIZE]; /* Candidate image buffer */
104 unsigned int buf_size; /* Size of image buf */
105 unsigned int update_results; /* Update results token */
106};
107
108static DEFINE_SPINLOCK(flash_file_open_lock);
109static struct proc_dir_entry *firmware_flash_pde;
110static struct proc_dir_entry *firmware_update_pde;
111static struct proc_dir_entry *validate_pde;
112static struct proc_dir_entry *manage_pde;
113
114/* Do simple sanity checks on the flash image. */
115static int flash_list_valid(struct flash_block_list *flist)
116{
117 struct flash_block_list *f;
118 int i;
119 unsigned long block_size, image_size;
120
121 /* Paranoid self test here. We also collect the image size. */
122 image_size = 0;
123 for (f = flist; f; f = f->next) {
124 for (i = 0; i < f->num_blocks; i++) {
125 if (f->blocks[i].data == NULL) {
126 return FLASH_IMG_NULL_DATA;
127 }
128 block_size = f->blocks[i].length;
129 if (block_size <= 0 || block_size > PAGE_SIZE) {
130 return FLASH_IMG_BAD_LEN;
131 }
132 image_size += block_size;
133 }
134 }
135
136 if (image_size < (256 << 10)) {
137 if (image_size < 2)
138 return FLASH_NO_OP;
139 }
140
141 printk(KERN_INFO "FLASH: flash image with %ld bytes stored for hardware flash on reboot\n", image_size);
142
143 return FLASH_IMG_READY;
144}
145
146static void free_flash_list(struct flash_block_list *f)
147{
148 struct flash_block_list *next;
149 int i;
150
151 while (f) {
152 for (i = 0; i < f->num_blocks; i++)
153 free_page((unsigned long)(f->blocks[i].data));
154 next = f->next;
155 free_page((unsigned long)f);
156 f = next;
157 }
158}
159
160static int rtas_flash_release(struct inode *inode, struct file *file)
161{
162 struct proc_dir_entry *dp = PDE(file->f_dentry->d_inode);
163 struct rtas_update_flash_t *uf;
164
165 uf = (struct rtas_update_flash_t *) dp->data;
166 if (uf->flist) {
167 /* File was opened in write mode for a new flash attempt */
168 /* Clear saved list */
169 if (rtas_firmware_flash_list.next) {
170 free_flash_list(rtas_firmware_flash_list.next);
171 rtas_firmware_flash_list.next = NULL;
172 }
173
174 if (uf->status != FLASH_AUTH)
175 uf->status = flash_list_valid(uf->flist);
176
177 if (uf->status == FLASH_IMG_READY)
178 rtas_firmware_flash_list.next = uf->flist;
179 else
180 free_flash_list(uf->flist);
181
182 uf->flist = NULL;
183 }
184
185 atomic_dec(&dp->count);
186 return 0;
187}
188
189static void get_flash_status_msg(int status, char *buf)
190{
191 char *msg;
192
193 switch (status) {
194 case FLASH_AUTH:
195 msg = "error: this partition does not have service authority\n";
196 break;
197 case FLASH_NO_OP:
198 msg = "info: no firmware image for flash\n";
199 break;
200 case FLASH_IMG_SHORT:
201 msg = "error: flash image short\n";
202 break;
203 case FLASH_IMG_BAD_LEN:
204 msg = "error: internal error bad length\n";
205 break;
206 case FLASH_IMG_NULL_DATA:
207 msg = "error: internal error null data\n";
208 break;
209 case FLASH_IMG_READY:
210 msg = "ready: firmware image ready for flash on reboot\n";
211 break;
212 default:
213 sprintf(buf, "error: unexpected status value %d\n", status);
214 return;
215 }
216
217 strcpy(buf, msg);
218}
219
220/* Reading the proc file will show status (not the firmware contents) */
221static ssize_t rtas_flash_read(struct file *file, char *buf,
222 size_t count, loff_t *ppos)
223{
224 struct proc_dir_entry *dp = PDE(file->f_dentry->d_inode);
225 struct rtas_update_flash_t *uf;
226 char msg[RTAS_MSG_MAXLEN];
227 int msglen;
228
229 uf = (struct rtas_update_flash_t *) dp->data;
230
231 if (!strcmp(dp->name, FIRMWARE_FLASH_NAME)) {
232 get_flash_status_msg(uf->status, msg);
233 } else { /* FIRMWARE_UPDATE_NAME */
234 sprintf(msg, "%d\n", uf->status);
235 }
236 msglen = strlen(msg);
237 if (msglen > count)
238 msglen = count;
239
240 if (ppos && *ppos != 0)
241 return 0; /* be cheap */
242
243 if (!access_ok(VERIFY_WRITE, buf, msglen))
244 return -EINVAL;
245
246 if (copy_to_user(buf, msg, msglen))
247 return -EFAULT;
248
249 if (ppos)
250 *ppos = msglen;
251 return msglen;
252}
253
254/* We could be much more efficient here. But to keep this function
255 * simple we allocate a page to the block list no matter how small the
256 * count is. If the system is low on memory it will be just as well
257 * that we fail....
258 */
259static ssize_t rtas_flash_write(struct file *file, const char *buffer,
260 size_t count, loff_t *off)
261{
262 struct proc_dir_entry *dp = PDE(file->f_dentry->d_inode);
263 struct rtas_update_flash_t *uf;
264 char *p;
265 int next_free;
266 struct flash_block_list *fl;
267
268 uf = (struct rtas_update_flash_t *) dp->data;
269
270 if (uf->status == FLASH_AUTH || count == 0)
271 return count; /* discard data */
272
273 /* In the case that the image is not ready for flashing, the memory
274 * allocated for the block list will be freed upon the release of the
275 * proc file
276 */
277 if (uf->flist == NULL) {
278 uf->flist = (struct flash_block_list *) get_zeroed_page(GFP_KERNEL);
279 if (!uf->flist)
280 return -ENOMEM;
281 }
282
283 fl = uf->flist;
284 while (fl->next)
285 fl = fl->next; /* seek to last block_list for append */
286 next_free = fl->num_blocks;
287 if (next_free == FLASH_BLOCKS_PER_NODE) {
288 /* Need to allocate another block_list */
289 fl->next = (struct flash_block_list *)get_zeroed_page(GFP_KERNEL);
290 if (!fl->next)
291 return -ENOMEM;
292 fl = fl->next;
293 next_free = 0;
294 }
295
296 if (count > PAGE_SIZE)
297 count = PAGE_SIZE;
298 p = (char *)get_zeroed_page(GFP_KERNEL);
299 if (!p)
300 return -ENOMEM;
301
302 if(copy_from_user(p, buffer, count)) {
303 free_page((unsigned long)p);
304 return -EFAULT;
305 }
306 fl->blocks[next_free].data = p;
307 fl->blocks[next_free].length = count;
308 fl->num_blocks++;
309
310 return count;
311}
312
313static int rtas_excl_open(struct inode *inode, struct file *file)
314{
315 struct proc_dir_entry *dp = PDE(inode);
316
317 /* Enforce exclusive open with use count of PDE */
318 spin_lock(&flash_file_open_lock);
319 if (atomic_read(&dp->count) > 1) {
320 spin_unlock(&flash_file_open_lock);
321 return -EBUSY;
322 }
323
324 atomic_inc(&dp->count);
325 spin_unlock(&flash_file_open_lock);
326
327 return 0;
328}
329
330static int rtas_excl_release(struct inode *inode, struct file *file)
331{
332 struct proc_dir_entry *dp = PDE(inode);
333
334 atomic_dec(&dp->count);
335
336 return 0;
337}
338
339static void manage_flash(struct rtas_manage_flash_t *args_buf)
340{
341 unsigned int wait_time;
342 s32 rc;
343
344 while (1) {
345 rc = rtas_call(rtas_token("ibm,manage-flash-image"), 1,
346 1, NULL, args_buf->op);
347 if (rc == RTAS_RC_BUSY)
348 udelay(1);
349 else if (rtas_is_extended_busy(rc)) {
350 wait_time = rtas_extended_busy_delay_time(rc);
351 udelay(wait_time * 1000);
352 } else
353 break;
354 }
355
356 args_buf->status = rc;
357}
358
359static ssize_t manage_flash_read(struct file *file, char *buf,
360 size_t count, loff_t *ppos)
361{
362 struct proc_dir_entry *dp = PDE(file->f_dentry->d_inode);
363 struct rtas_manage_flash_t *args_buf;
364 char msg[RTAS_MSG_MAXLEN];
365 int msglen;
366
367 args_buf = (struct rtas_manage_flash_t *) dp->data;
368 if (args_buf == NULL)
369 return 0;
370
371 msglen = sprintf(msg, "%d\n", args_buf->status);
372 if (msglen > count)
373 msglen = count;
374
375 if (ppos && *ppos != 0)
376 return 0; /* be cheap */
377
378 if (!access_ok(VERIFY_WRITE, buf, msglen))
379 return -EINVAL;
380
381 if (copy_to_user(buf, msg, msglen))
382 return -EFAULT;
383
384 if (ppos)
385 *ppos = msglen;
386 return msglen;
387}
388
389static ssize_t manage_flash_write(struct file *file, const char *buf,
390 size_t count, loff_t *off)
391{
392 struct proc_dir_entry *dp = PDE(file->f_dentry->d_inode);
393 struct rtas_manage_flash_t *args_buf;
394 const char reject_str[] = "0";
395 const char commit_str[] = "1";
396 char stkbuf[10];
397 int op;
398
399 args_buf = (struct rtas_manage_flash_t *) dp->data;
400 if ((args_buf->status == MANAGE_AUTH) || (count == 0))
401 return count;
402
403 op = -1;
404 if (buf) {
405 if (count > 9) count = 9;
406 if (copy_from_user (stkbuf, buf, count)) {
407 return -EFAULT;
408 }
409 if (strncmp(stkbuf, reject_str, strlen(reject_str)) == 0)
410 op = RTAS_REJECT_TMP_IMG;
411 else if (strncmp(stkbuf, commit_str, strlen(commit_str)) == 0)
412 op = RTAS_COMMIT_TMP_IMG;
413 }
414
415 if (op == -1) /* buf is empty, or contains invalid string */
416 return -EINVAL;
417
418 args_buf->op = op;
419 manage_flash(args_buf);
420
421 return count;
422}
423
424static void validate_flash(struct rtas_validate_flash_t *args_buf)
425{
426 int token = rtas_token("ibm,validate-flash-image");
427 unsigned int wait_time;
428 int update_results;
429 s32 rc;
430
431 rc = 0;
432 while(1) {
433 spin_lock(&rtas_data_buf_lock);
434 memcpy(rtas_data_buf, args_buf->buf, VALIDATE_BUF_SIZE);
435 rc = rtas_call(token, 2, 2, &update_results,
436 (u32) __pa(rtas_data_buf), args_buf->buf_size);
437 memcpy(args_buf->buf, rtas_data_buf, VALIDATE_BUF_SIZE);
438 spin_unlock(&rtas_data_buf_lock);
439
440 if (rc == RTAS_RC_BUSY)
441 udelay(1);
442 else if (rtas_is_extended_busy(rc)) {
443 wait_time = rtas_extended_busy_delay_time(rc);
444 udelay(wait_time * 1000);
445 } else
446 break;
447 }
448
449 args_buf->status = rc;
450 args_buf->update_results = update_results;
451}
452
453static int get_validate_flash_msg(struct rtas_validate_flash_t *args_buf,
454 char *msg)
455{
456 int n;
457
458 if (args_buf->status >= VALIDATE_TMP_UPDATE) {
459 n = sprintf(msg, "%d\n", args_buf->update_results);
460 if ((args_buf->update_results >= VALIDATE_CUR_UNKNOWN) ||
461 (args_buf->update_results == VALIDATE_TMP_UPDATE))
462 n += sprintf(msg + n, "%s\n", args_buf->buf);
463 } else {
464 n = sprintf(msg, "%d\n", args_buf->status);
465 }
466 return n;
467}
468
469static ssize_t validate_flash_read(struct file *file, char *buf,
470 size_t count, loff_t *ppos)
471{
472 struct proc_dir_entry *dp = PDE(file->f_dentry->d_inode);
473 struct rtas_validate_flash_t *args_buf;
474 char msg[RTAS_MSG_MAXLEN];
475 int msglen;
476
477 args_buf = (struct rtas_validate_flash_t *) dp->data;
478
479 if (ppos && *ppos != 0)
480 return 0; /* be cheap */
481
482 msglen = get_validate_flash_msg(args_buf, msg);
483 if (msglen > count)
484 msglen = count;
485
486 if (!access_ok(VERIFY_WRITE, buf, msglen))
487 return -EINVAL;
488
489 if (copy_to_user(buf, msg, msglen))
490 return -EFAULT;
491
492 if (ppos)
493 *ppos = msglen;
494 return msglen;
495}
496
497static ssize_t validate_flash_write(struct file *file, const char *buf,
498 size_t count, loff_t *off)
499{
500 struct proc_dir_entry *dp = PDE(file->f_dentry->d_inode);
501 struct rtas_validate_flash_t *args_buf;
502 int rc;
503
504 args_buf = (struct rtas_validate_flash_t *) dp->data;
505
506 if (dp->data == NULL) {
507 dp->data = kmalloc(sizeof(struct rtas_validate_flash_t),
508 GFP_KERNEL);
509 if (dp->data == NULL)
510 return -ENOMEM;
511 }
512
513 /* We are only interested in the first 4K of the
514 * candidate image */
515 if ((*off >= VALIDATE_BUF_SIZE) ||
516 (args_buf->status == VALIDATE_AUTH)) {
517 *off += count;
518 return count;
519 }
520
521 if (*off + count >= VALIDATE_BUF_SIZE) {
522 count = VALIDATE_BUF_SIZE - *off;
523 args_buf->status = VALIDATE_READY;
524 } else {
525 args_buf->status = VALIDATE_INCOMPLETE;
526 }
527
528 if (!access_ok(VERIFY_READ, buf, count)) {
529 rc = -EFAULT;
530 goto done;
531 }
532 if (copy_from_user(args_buf->buf + *off, buf, count)) {
533 rc = -EFAULT;
534 goto done;
535 }
536
537 *off += count;
538 rc = count;
539done:
540 if (rc < 0) {
541 kfree(dp->data);
542 dp->data = NULL;
543 }
544 return rc;
545}
546
547static int validate_flash_release(struct inode *inode, struct file *file)
548{
549 struct proc_dir_entry *dp = PDE(file->f_dentry->d_inode);
550 struct rtas_validate_flash_t *args_buf;
551
552 args_buf = (struct rtas_validate_flash_t *) dp->data;
553
554 if (args_buf->status == VALIDATE_READY) {
555 args_buf->buf_size = VALIDATE_BUF_SIZE;
556 validate_flash(args_buf);
557 }
558
559 /* The matching atomic_inc was in rtas_excl_open() */
560 atomic_dec(&dp->count);
561
562 return 0;
563}
564
565static void remove_flash_pde(struct proc_dir_entry *dp)
566{
567 if (dp) {
568 if (dp->data != NULL)
569 kfree(dp->data);
570 dp->owner = NULL;
571 remove_proc_entry(dp->name, dp->parent);
572 }
573}
574
575static int initialize_flash_pde_data(const char *rtas_call_name,
576 size_t buf_size,
577 struct proc_dir_entry *dp)
578{
579 int *status;
580 int token;
581
582 dp->data = kmalloc(buf_size, GFP_KERNEL);
583 if (dp->data == NULL) {
584 remove_flash_pde(dp);
585 return -ENOMEM;
586 }
587
588 memset(dp->data, 0, buf_size);
589
590 /*
591 * This code assumes that the status int is the first member of the
592 * struct
593 */
594 status = (int *) dp->data;
595 token = rtas_token(rtas_call_name);
596 if (token == RTAS_UNKNOWN_SERVICE)
597 *status = FLASH_AUTH;
598 else
599 *status = FLASH_NO_OP;
600
601 return 0;
602}
603
604static struct proc_dir_entry *create_flash_pde(const char *filename,
605 struct file_operations *fops)
606{
607 struct proc_dir_entry *ent = NULL;
608
609 ent = create_proc_entry(filename, S_IRUSR | S_IWUSR, NULL);
610 if (ent != NULL) {
611 ent->nlink = 1;
612 ent->proc_fops = fops;
613 ent->owner = THIS_MODULE;
614 }
615
616 return ent;
617}
618
619static struct file_operations rtas_flash_operations = {
620 .read = rtas_flash_read,
621 .write = rtas_flash_write,
622 .open = rtas_excl_open,
623 .release = rtas_flash_release,
624};
625
626static struct file_operations manage_flash_operations = {
627 .read = manage_flash_read,
628 .write = manage_flash_write,
629 .open = rtas_excl_open,
630 .release = rtas_excl_release,
631};
632
633static struct file_operations validate_flash_operations = {
634 .read = validate_flash_read,
635 .write = validate_flash_write,
636 .open = rtas_excl_open,
637 .release = validate_flash_release,
638};
639
640int __init rtas_flash_init(void)
641{
642 int rc;
643
644 if (rtas_token("ibm,update-flash-64-and-reboot") ==
645 RTAS_UNKNOWN_SERVICE) {
646 printk(KERN_ERR "rtas_flash: no firmware flash support\n");
647 return 1;
648 }
649
650 firmware_flash_pde = create_flash_pde("ppc64/rtas/"
651 FIRMWARE_FLASH_NAME,
652 &rtas_flash_operations);
653 if (firmware_flash_pde == NULL) {
654 rc = -ENOMEM;
655 goto cleanup;
656 }
657
658 rc = initialize_flash_pde_data("ibm,update-flash-64-and-reboot",
659 sizeof(struct rtas_update_flash_t),
660 firmware_flash_pde);
661 if (rc != 0)
662 goto cleanup;
663
664 firmware_update_pde = create_flash_pde("ppc64/rtas/"
665 FIRMWARE_UPDATE_NAME,
666 &rtas_flash_operations);
667 if (firmware_update_pde == NULL) {
668 rc = -ENOMEM;
669 goto cleanup;
670 }
671
672 rc = initialize_flash_pde_data("ibm,update-flash-64-and-reboot",
673 sizeof(struct rtas_update_flash_t),
674 firmware_update_pde);
675 if (rc != 0)
676 goto cleanup;
677
678 validate_pde = create_flash_pde("ppc64/rtas/" VALIDATE_FLASH_NAME,
679 &validate_flash_operations);
680 if (validate_pde == NULL) {
681 rc = -ENOMEM;
682 goto cleanup;
683 }
684
685 rc = initialize_flash_pde_data("ibm,validate-flash-image",
686 sizeof(struct rtas_validate_flash_t),
687 validate_pde);
688 if (rc != 0)
689 goto cleanup;
690
691 manage_pde = create_flash_pde("ppc64/rtas/" MANAGE_FLASH_NAME,
692 &manage_flash_operations);
693 if (manage_pde == NULL) {
694 rc = -ENOMEM;
695 goto cleanup;
696 }
697
698 rc = initialize_flash_pde_data("ibm,manage-flash-image",
699 sizeof(struct rtas_manage_flash_t),
700 manage_pde);
701 if (rc != 0)
702 goto cleanup;
703
704 return 0;
705
706cleanup:
707 remove_flash_pde(firmware_flash_pde);
708 remove_flash_pde(firmware_update_pde);
709 remove_flash_pde(validate_pde);
710 remove_flash_pde(manage_pde);
711
712 return rc;
713}
714
715void __exit rtas_flash_cleanup(void)
716{
717 remove_flash_pde(firmware_flash_pde);
718 remove_flash_pde(firmware_update_pde);
719 remove_flash_pde(validate_pde);
720 remove_flash_pde(manage_pde);
721}
722
723module_init(rtas_flash_init);
724module_exit(rtas_flash_cleanup);
725MODULE_LICENSE("GPL");
diff --git a/arch/ppc64/kernel/rtasd.c b/arch/ppc64/kernel/rtasd.c
new file mode 100644
index 000000000000..ff65dc33320e
--- /dev/null
+++ b/arch/ppc64/kernel/rtasd.c
@@ -0,0 +1,527 @@
1/*
2 * Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Communication to userspace based on kernel/printk.c
10 */
11
12#include <linux/types.h>
13#include <linux/errno.h>
14#include <linux/sched.h>
15#include <linux/kernel.h>
16#include <linux/poll.h>
17#include <linux/proc_fs.h>
18#include <linux/init.h>
19#include <linux/vmalloc.h>
20#include <linux/spinlock.h>
21#include <linux/cpu.h>
22
23#include <asm/uaccess.h>
24#include <asm/io.h>
25#include <asm/rtas.h>
26#include <asm/prom.h>
27#include <asm/nvram.h>
28#include <asm/atomic.h>
29#include <asm/systemcfg.h>
30
31#if 0
32#define DEBUG(A...) printk(KERN_ERR A)
33#else
34#define DEBUG(A...)
35#endif
36
37static DEFINE_SPINLOCK(rtasd_log_lock);
38
39DECLARE_WAIT_QUEUE_HEAD(rtas_log_wait);
40
41static char *rtas_log_buf;
42static unsigned long rtas_log_start;
43static unsigned long rtas_log_size;
44
45static int surveillance_timeout = -1;
46static unsigned int rtas_event_scan_rate;
47static unsigned int rtas_error_log_max;
48static unsigned int rtas_error_log_buffer_max;
49
50static int full_rtas_msgs = 0;
51
52extern int no_logging;
53
54volatile int error_log_cnt = 0;
55
56/*
57 * Since we use 32 bit RTAS, the physical address of this must be below
58 * 4G or else bad things happen. Allocate this in the kernel data and
59 * make it big enough.
60 */
61static unsigned char logdata[RTAS_ERROR_LOG_MAX];
62
63static int get_eventscan_parms(void);
64
65static char *rtas_type[] = {
66 "Unknown", "Retry", "TCE Error", "Internal Device Failure",
67 "Timeout", "Data Parity", "Address Parity", "Cache Parity",
68 "Address Invalid", "ECC Uncorrected", "ECC Corrupted",
69};
70
71static char *rtas_event_type(int type)
72{
73 if ((type > 0) && (type < 11))
74 return rtas_type[type];
75
76 switch (type) {
77 case RTAS_TYPE_EPOW:
78 return "EPOW";
79 case RTAS_TYPE_PLATFORM:
80 return "Platform Error";
81 case RTAS_TYPE_IO:
82 return "I/O Event";
83 case RTAS_TYPE_INFO:
84 return "Platform Information Event";
85 case RTAS_TYPE_DEALLOC:
86 return "Resource Deallocation Event";
87 case RTAS_TYPE_DUMP:
88 return "Dump Notification Event";
89 }
90
91 return rtas_type[0];
92}
93
94/* To see this info, grep RTAS /var/log/messages and each entry
95 * will be collected together with obvious begin/end.
96 * There will be a unique identifier on the begin and end lines.
97 * This will persist across reboots.
98 *
99 * format of error logs returned from RTAS:
100 * bytes (size) : contents
101 * --------------------------------------------------------
102 * 0-7 (8) : rtas_error_log
103 * 8-47 (40) : extended info
104 * 48-51 (4) : vendor id
105 * 52-1023 (vendor specific) : location code and debug data
106 */
107static void printk_log_rtas(char *buf, int len)
108{
109
110 int i,j,n = 0;
111 int perline = 16;
112 char buffer[64];
113 char * str = "RTAS event";
114
115 if (full_rtas_msgs) {
116 printk(RTAS_DEBUG "%d -------- %s begin --------\n",
117 error_log_cnt, str);
118
119 /*
120 * Print perline bytes on each line, each line will start
121 * with RTAS and a changing number, so syslogd will
122 * print lines that are otherwise the same. Separate every
123 * 4 bytes with a space.
124 */
125 for (i = 0; i < len; i++) {
126 j = i % perline;
127 if (j == 0) {
128 memset(buffer, 0, sizeof(buffer));
129 n = sprintf(buffer, "RTAS %d:", i/perline);
130 }
131
132 if ((i % 4) == 0)
133 n += sprintf(buffer+n, " ");
134
135 n += sprintf(buffer+n, "%02x", (unsigned char)buf[i]);
136
137 if (j == (perline-1))
138 printk(KERN_DEBUG "%s\n", buffer);
139 }
140 if ((i % perline) != 0)
141 printk(KERN_DEBUG "%s\n", buffer);
142
143 printk(RTAS_DEBUG "%d -------- %s end ----------\n",
144 error_log_cnt, str);
145 } else {
146 struct rtas_error_log *errlog = (struct rtas_error_log *)buf;
147
148 printk(RTAS_DEBUG "event: %d, Type: %s, Severity: %d\n",
149 error_log_cnt, rtas_event_type(errlog->type),
150 errlog->severity);
151 }
152}
153
154static int log_rtas_len(char * buf)
155{
156 int len;
157 struct rtas_error_log *err;
158
159 /* rtas fixed header */
160 len = 8;
161 err = (struct rtas_error_log *)buf;
162 if (err->extended_log_length) {
163
164 /* extended header */
165 len += err->extended_log_length;
166 }
167
168 if (rtas_error_log_max == 0) {
169 get_eventscan_parms();
170 }
171 if (len > rtas_error_log_max)
172 len = rtas_error_log_max;
173
174 return len;
175}
176
177/*
178 * First write to nvram, if fatal error, that is the only
179 * place we log the info. The error will be picked up
180 * on the next reboot by rtasd. If not fatal, run the
181 * method for the type of error. Currently, only RTAS
182 * errors have methods implemented, but in the future
183 * there might be a need to store data in nvram before a
184 * call to panic().
185 *
186 * XXX We write to nvram periodically, to indicate error has
187 * been written and sync'd, but there is a possibility
188 * that if we don't shutdown correctly, a duplicate error
189 * record will be created on next reboot.
190 */
191void pSeries_log_error(char *buf, unsigned int err_type, int fatal)
192{
193 unsigned long offset;
194 unsigned long s;
195 int len = 0;
196
197 DEBUG("logging event\n");
198 if (buf == NULL)
199 return;
200
201 spin_lock_irqsave(&rtasd_log_lock, s);
202
203 /* get length and increase count */
204 switch (err_type & ERR_TYPE_MASK) {
205 case ERR_TYPE_RTAS_LOG:
206 len = log_rtas_len(buf);
207 if (!(err_type & ERR_FLAG_BOOT))
208 error_log_cnt++;
209 break;
210 case ERR_TYPE_KERNEL_PANIC:
211 default:
212 spin_unlock_irqrestore(&rtasd_log_lock, s);
213 return;
214 }
215
216 /* Write error to NVRAM */
217 if (!no_logging && !(err_type & ERR_FLAG_BOOT))
218 nvram_write_error_log(buf, len, err_type);
219
220 /*
221 * rtas errors can occur during boot, and we do want to capture
222 * those somewhere, even if nvram isn't ready (why not?), and even
223 * if rtasd isn't ready. Put them into the boot log, at least.
224 */
225 if ((err_type & ERR_TYPE_MASK) == ERR_TYPE_RTAS_LOG)
226 printk_log_rtas(buf, len);
227
228 /* Check to see if we need to or have stopped logging */
229 if (fatal || no_logging) {
230 no_logging = 1;
231 spin_unlock_irqrestore(&rtasd_log_lock, s);
232 return;
233 }
234
235 /* call type specific method for error */
236 switch (err_type & ERR_TYPE_MASK) {
237 case ERR_TYPE_RTAS_LOG:
238 offset = rtas_error_log_buffer_max *
239 ((rtas_log_start+rtas_log_size) & LOG_NUMBER_MASK);
240
241 /* First copy over sequence number */
242 memcpy(&rtas_log_buf[offset], (void *) &error_log_cnt, sizeof(int));
243
244 /* Second copy over error log data */
245 offset += sizeof(int);
246 memcpy(&rtas_log_buf[offset], buf, len);
247
248 if (rtas_log_size < LOG_NUMBER)
249 rtas_log_size += 1;
250 else
251 rtas_log_start += 1;
252
253 spin_unlock_irqrestore(&rtasd_log_lock, s);
254 wake_up_interruptible(&rtas_log_wait);
255 break;
256 case ERR_TYPE_KERNEL_PANIC:
257 default:
258 spin_unlock_irqrestore(&rtasd_log_lock, s);
259 return;
260 }
261
262}
263
264
265static int rtas_log_open(struct inode * inode, struct file * file)
266{
267 return 0;
268}
269
270static int rtas_log_release(struct inode * inode, struct file * file)
271{
272 return 0;
273}
274
275/* This will check if all events are logged, if they are then, we
276 * know that we can safely clear the events in NVRAM.
277 * Next we'll sit and wait for something else to log.
278 */
279static ssize_t rtas_log_read(struct file * file, char __user * buf,
280 size_t count, loff_t *ppos)
281{
282 int error;
283 char *tmp;
284 unsigned long s;
285 unsigned long offset;
286
287 if (!buf || count < rtas_error_log_buffer_max)
288 return -EINVAL;
289
290 count = rtas_error_log_buffer_max;
291
292 if (!access_ok(VERIFY_WRITE, buf, count))
293 return -EFAULT;
294
295 tmp = kmalloc(count, GFP_KERNEL);
296 if (!tmp)
297 return -ENOMEM;
298
299
300 spin_lock_irqsave(&rtasd_log_lock, s);
301 /* if it's 0, then we know we got the last one (the one in NVRAM) */
302 if (rtas_log_size == 0 && !no_logging)
303 nvram_clear_error_log();
304 spin_unlock_irqrestore(&rtasd_log_lock, s);
305
306
307 error = wait_event_interruptible(rtas_log_wait, rtas_log_size);
308 if (error)
309 goto out;
310
311 spin_lock_irqsave(&rtasd_log_lock, s);
312 offset = rtas_error_log_buffer_max * (rtas_log_start & LOG_NUMBER_MASK);
313 memcpy(tmp, &rtas_log_buf[offset], count);
314
315 rtas_log_start += 1;
316 rtas_log_size -= 1;
317 spin_unlock_irqrestore(&rtasd_log_lock, s);
318
319 error = copy_to_user(buf, tmp, count) ? -EFAULT : count;
320out:
321 kfree(tmp);
322 return error;
323}
324
325static unsigned int rtas_log_poll(struct file *file, poll_table * wait)
326{
327 poll_wait(file, &rtas_log_wait, wait);
328 if (rtas_log_size)
329 return POLLIN | POLLRDNORM;
330 return 0;
331}
332
333struct file_operations proc_rtas_log_operations = {
334 .read = rtas_log_read,
335 .poll = rtas_log_poll,
336 .open = rtas_log_open,
337 .release = rtas_log_release,
338};
339
340static int enable_surveillance(int timeout)
341{
342 int error;
343
344 error = rtas_set_indicator(SURVEILLANCE_TOKEN, 0, timeout);
345
346 if (error == 0)
347 return 0;
348
349 if (error == -EINVAL) {
350 printk(KERN_INFO "rtasd: surveillance not supported\n");
351 return 0;
352 }
353
354 printk(KERN_ERR "rtasd: could not update surveillance\n");
355 return -1;
356}
357
358static int get_eventscan_parms(void)
359{
360 struct device_node *node;
361 int *ip;
362
363 node = of_find_node_by_path("/rtas");
364
365 ip = (int *)get_property(node, "rtas-event-scan-rate", NULL);
366 if (ip == NULL) {
367 printk(KERN_ERR "rtasd: no rtas-event-scan-rate\n");
368 of_node_put(node);
369 return -1;
370 }
371 rtas_event_scan_rate = *ip;
372 DEBUG("rtas-event-scan-rate %d\n", rtas_event_scan_rate);
373
374 /* Make room for the sequence number */
375 rtas_error_log_max = rtas_get_error_log_max();
376 rtas_error_log_buffer_max = rtas_error_log_max + sizeof(int);
377
378 of_node_put(node);
379
380 return 0;
381}
382
383static void do_event_scan(int event_scan)
384{
385 int error;
386 do {
387 memset(logdata, 0, rtas_error_log_max);
388 error = rtas_call(event_scan, 4, 1, NULL,
389 RTAS_EVENT_SCAN_ALL_EVENTS, 0,
390 __pa(logdata), rtas_error_log_max);
391 if (error == -1) {
392 printk(KERN_ERR "event-scan failed\n");
393 break;
394 }
395
396 if (error == 0)
397 pSeries_log_error(logdata, ERR_TYPE_RTAS_LOG, 0);
398
399 } while(error == 0);
400}
401
402static void do_event_scan_all_cpus(long delay)
403{
404 int cpu;
405
406 lock_cpu_hotplug();
407 cpu = first_cpu(cpu_online_map);
408 for (;;) {
409 set_cpus_allowed(current, cpumask_of_cpu(cpu));
410 do_event_scan(rtas_token("event-scan"));
411 set_cpus_allowed(current, CPU_MASK_ALL);
412
413 /* Drop hotplug lock, and sleep for the specified delay */
414 unlock_cpu_hotplug();
415 set_current_state(TASK_INTERRUPTIBLE);
416 schedule_timeout(delay);
417 lock_cpu_hotplug();
418
419 cpu = next_cpu(cpu, cpu_online_map);
420 if (cpu == NR_CPUS)
421 break;
422 }
423 unlock_cpu_hotplug();
424}
425
426static int rtasd(void *unused)
427{
428 unsigned int err_type;
429 int event_scan = rtas_token("event-scan");
430 int rc;
431
432 daemonize("rtasd");
433
434 if (event_scan == RTAS_UNKNOWN_SERVICE || get_eventscan_parms() == -1)
435 goto error;
436
437 rtas_log_buf = vmalloc(rtas_error_log_buffer_max*LOG_NUMBER);
438 if (!rtas_log_buf) {
439 printk(KERN_ERR "rtasd: no memory\n");
440 goto error;
441 }
442
443 printk(KERN_ERR "RTAS daemon started\n");
444
445 DEBUG("will sleep for %d jiffies\n", (HZ*60/rtas_event_scan_rate) / 2);
446
447 /* See if we have any error stored in NVRAM */
448 memset(logdata, 0, rtas_error_log_max);
449
450 rc = nvram_read_error_log(logdata, rtas_error_log_max, &err_type);
451
452 /* We can use rtas_log_buf now */
453 no_logging = 0;
454
455 if (!rc) {
456 if (err_type != ERR_FLAG_ALREADY_LOGGED) {
457 pSeries_log_error(logdata, err_type | ERR_FLAG_BOOT, 0);
458 }
459 }
460
461 /* First pass. */
462 do_event_scan_all_cpus(HZ);
463
464 if (surveillance_timeout != -1) {
465 DEBUG("enabling surveillance\n");
466 enable_surveillance(surveillance_timeout);
467 DEBUG("surveillance enabled\n");
468 }
469
470 /* Delay should be at least one second since some
471 * machines have problems if we call event-scan too
472 * quickly. */
473 for (;;)
474 do_event_scan_all_cpus((HZ*60/rtas_event_scan_rate) / 2);
475
476error:
477 /* Should delete proc entries */
478 return -EINVAL;
479}
480
481static int __init rtas_init(void)
482{
483 struct proc_dir_entry *entry;
484
485 /* No RTAS, only warn if we are on a pSeries box */
486 if (rtas_token("event-scan") == RTAS_UNKNOWN_SERVICE) {
487 if (systemcfg->platform & PLATFORM_PSERIES)
488 printk(KERN_ERR "rtasd: no event-scan on system\n");
489 return 1;
490 }
491
492 entry = create_proc_entry("ppc64/rtas/error_log", S_IRUSR, NULL);
493 if (entry)
494 entry->proc_fops = &proc_rtas_log_operations;
495 else
496 printk(KERN_ERR "Failed to create error_log proc entry\n");
497
498 if (kernel_thread(rtasd, NULL, CLONE_FS) < 0)
499 printk(KERN_ERR "Failed to start RTAS daemon\n");
500
501 return 0;
502}
503
504static int __init surveillance_setup(char *str)
505{
506 int i;
507
508 if (get_option(&str,&i)) {
509 if (i >= 0 && i <= 255)
510 surveillance_timeout = i;
511 }
512
513 return 1;
514}
515
516static int __init rtasmsgs_setup(char *str)
517{
518 if (strcmp(str, "on") == 0)
519 full_rtas_msgs = 1;
520 else if (strcmp(str, "off") == 0)
521 full_rtas_msgs = 0;
522
523 return 1;
524}
525__initcall(rtas_init);
526__setup("surveillance=", surveillance_setup);
527__setup("rtasmsgs=", rtasmsgs_setup);
diff --git a/arch/ppc64/kernel/rtc.c b/arch/ppc64/kernel/rtc.c
new file mode 100644
index 000000000000..3e70b91375fc
--- /dev/null
+++ b/arch/ppc64/kernel/rtc.c
@@ -0,0 +1,440 @@
1/*
2 * Real Time Clock interface for PPC64.
3 *
4 * Based on rtc.c by Paul Gortmaker
5 *
6 * This driver allows use of the real time clock
7 * from user space. It exports the /dev/rtc
8 * interface supporting various ioctl() and also the
9 * /proc/driver/rtc pseudo-file for status information.
10 *
11 * Interface does not support RTC interrupts nor an alarm.
12 *
13 * This program is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU General Public License
15 * as published by the Free Software Foundation; either version
16 * 2 of the License, or (at your option) any later version.
17 *
18 * 1.0 Mike Corrigan: IBM iSeries rtc support
19 * 1.1 Dave Engebretsen: IBM pSeries rtc support
20 */
21
22#define RTC_VERSION "1.1"
23
24#include <linux/config.h>
25#include <linux/module.h>
26#include <linux/kernel.h>
27#include <linux/types.h>
28#include <linux/miscdevice.h>
29#include <linux/ioport.h>
30#include <linux/fcntl.h>
31#include <linux/mc146818rtc.h>
32#include <linux/init.h>
33#include <linux/poll.h>
34#include <linux/proc_fs.h>
35#include <linux/spinlock.h>
36#include <linux/bcd.h>
37#include <linux/interrupt.h>
38
39#include <asm/io.h>
40#include <asm/uaccess.h>
41#include <asm/system.h>
42#include <asm/time.h>
43#include <asm/rtas.h>
44
45#include <asm/iSeries/LparData.h>
46#include <asm/iSeries/mf.h>
47#include <asm/machdep.h>
48#include <asm/iSeries/ItSpCommArea.h>
49
50extern int piranha_simulator;
51
52/*
53 * We sponge a minor off of the misc major. No need slurping
54 * up another valuable major dev number for this. If you add
55 * an ioctl, make sure you don't conflict with SPARC's RTC
56 * ioctls.
57 */
58
59static ssize_t rtc_read(struct file *file, char __user *buf,
60 size_t count, loff_t *ppos);
61
62static int rtc_ioctl(struct inode *inode, struct file *file,
63 unsigned int cmd, unsigned long arg);
64
65static int rtc_read_proc(char *page, char **start, off_t off,
66 int count, int *eof, void *data);
67
68/*
69 * If this driver ever becomes modularised, it will be really nice
70 * to make the epoch retain its value across module reload...
71 */
72
73static unsigned long epoch = 1900; /* year corresponding to 0x00 */
74
75static const unsigned char days_in_mo[] =
76{0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31};
77
78/*
79 * Now all the various file operations that we export.
80 */
81
82static ssize_t rtc_read(struct file *file, char __user *buf,
83 size_t count, loff_t *ppos)
84{
85 return -EIO;
86}
87
88static int rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
89 unsigned long arg)
90{
91 struct rtc_time wtime;
92
93 switch (cmd) {
94 case RTC_RD_TIME: /* Read the time/date from RTC */
95 {
96 memset(&wtime, 0, sizeof(struct rtc_time));
97 ppc_md.get_rtc_time(&wtime);
98 break;
99 }
100 case RTC_SET_TIME: /* Set the RTC */
101 {
102 struct rtc_time rtc_tm;
103 unsigned char mon, day, hrs, min, sec, leap_yr;
104 unsigned int yrs;
105
106 if (!capable(CAP_SYS_TIME))
107 return -EACCES;
108
109 if (copy_from_user(&rtc_tm, (struct rtc_time __user *)arg,
110 sizeof(struct rtc_time)))
111 return -EFAULT;
112
113 yrs = rtc_tm.tm_year;
114 mon = rtc_tm.tm_mon + 1; /* tm_mon starts at zero */
115 day = rtc_tm.tm_mday;
116 hrs = rtc_tm.tm_hour;
117 min = rtc_tm.tm_min;
118 sec = rtc_tm.tm_sec;
119
120 if (yrs < 70)
121 return -EINVAL;
122
123 leap_yr = ((!(yrs % 4) && (yrs % 100)) || !(yrs % 400));
124
125 if ((mon > 12) || (day == 0))
126 return -EINVAL;
127
128 if (day > (days_in_mo[mon] + ((mon == 2) && leap_yr)))
129 return -EINVAL;
130
131 if ((hrs >= 24) || (min >= 60) || (sec >= 60))
132 return -EINVAL;
133
134 if ( yrs > 169 )
135 return -EINVAL;
136
137 ppc_md.set_rtc_time(&rtc_tm);
138
139 return 0;
140 }
141 case RTC_EPOCH_READ: /* Read the epoch. */
142 {
143 return put_user (epoch, (unsigned long __user *)arg);
144 }
145 case RTC_EPOCH_SET: /* Set the epoch. */
146 {
147 /*
148 * There were no RTC clocks before 1900.
149 */
150 if (arg < 1900)
151 return -EINVAL;
152
153 if (!capable(CAP_SYS_TIME))
154 return -EACCES;
155
156 epoch = arg;
157 return 0;
158 }
159 default:
160 return -EINVAL;
161 }
162 return copy_to_user((void __user *)arg, &wtime, sizeof wtime) ? -EFAULT : 0;
163}
164
165static int rtc_open(struct inode *inode, struct file *file)
166{
167 nonseekable_open(inode, file);
168 return 0;
169}
170
171static int rtc_release(struct inode *inode, struct file *file)
172{
173 return 0;
174}
175
176/*
177 * The various file operations we support.
178 */
179static struct file_operations rtc_fops = {
180 .owner = THIS_MODULE,
181 .llseek = no_llseek,
182 .read = rtc_read,
183 .ioctl = rtc_ioctl,
184 .open = rtc_open,
185 .release = rtc_release,
186};
187
188static struct miscdevice rtc_dev = {
189 .minor = RTC_MINOR,
190 .name = "rtc",
191 .fops = &rtc_fops
192};
193
194static int __init rtc_init(void)
195{
196 int retval;
197
198 retval = misc_register(&rtc_dev);
199 if(retval < 0)
200 return retval;
201
202#ifdef CONFIG_PROC_FS
203 if (create_proc_read_entry("driver/rtc", 0, NULL, rtc_read_proc, NULL)
204 == NULL) {
205 misc_deregister(&rtc_dev);
206 return -ENOMEM;
207 }
208#endif
209
210 printk(KERN_INFO "i/pSeries Real Time Clock Driver v" RTC_VERSION "\n");
211
212 return 0;
213}
214
215static void __exit rtc_exit (void)
216{
217 remove_proc_entry ("driver/rtc", NULL);
218 misc_deregister(&rtc_dev);
219}
220
221module_init(rtc_init);
222module_exit(rtc_exit);
223
224/*
225 * Info exported via "/proc/driver/rtc".
226 */
227
228static int rtc_proc_output (char *buf)
229{
230
231 char *p;
232 struct rtc_time tm;
233
234 p = buf;
235
236 ppc_md.get_rtc_time(&tm);
237
238 /*
239 * There is no way to tell if the luser has the RTC set for local
240 * time or for Universal Standard Time (GMT). Probably local though.
241 */
242 p += sprintf(p,
243 "rtc_time\t: %02d:%02d:%02d\n"
244 "rtc_date\t: %04d-%02d-%02d\n"
245 "rtc_epoch\t: %04lu\n",
246 tm.tm_hour, tm.tm_min, tm.tm_sec,
247 tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, epoch);
248
249 p += sprintf(p,
250 "DST_enable\t: no\n"
251 "BCD\t\t: yes\n"
252 "24hr\t\t: yes\n" );
253
254 return p - buf;
255}
256
257static int rtc_read_proc(char *page, char **start, off_t off,
258 int count, int *eof, void *data)
259{
260 int len = rtc_proc_output (page);
261 if (len <= off+count) *eof = 1;
262 *start = page + off;
263 len -= off;
264 if (len>count) len = count;
265 if (len<0) len = 0;
266 return len;
267}
268
269#ifdef CONFIG_PPC_ISERIES
270/*
271 * Get the RTC from the virtual service processor
272 * This requires flowing LpEvents to the primary partition
273 */
274void iSeries_get_rtc_time(struct rtc_time *rtc_tm)
275{
276 if (piranha_simulator)
277 return;
278
279 mf_get_rtc(rtc_tm);
280 rtc_tm->tm_mon--;
281}
282
283/*
284 * Set the RTC in the virtual service processor
285 * This requires flowing LpEvents to the primary partition
286 */
287int iSeries_set_rtc_time(struct rtc_time *tm)
288{
289 mf_set_rtc(tm);
290 return 0;
291}
292
293void iSeries_get_boot_time(struct rtc_time *tm)
294{
295 unsigned long time;
296 static unsigned long lastsec = 1;
297
298 u32 dataWord1 = *((u32 *)(&xSpCommArea.xBcdTimeAtIplStart));
299 u32 dataWord2 = *(((u32 *)&(xSpCommArea.xBcdTimeAtIplStart)) + 1);
300 int year = 1970;
301 int year1 = ( dataWord1 >> 24 ) & 0x000000FF;
302 int year2 = ( dataWord1 >> 16 ) & 0x000000FF;
303 int sec = ( dataWord1 >> 8 ) & 0x000000FF;
304 int min = dataWord1 & 0x000000FF;
305 int hour = ( dataWord2 >> 24 ) & 0x000000FF;
306 int day = ( dataWord2 >> 8 ) & 0x000000FF;
307 int mon = dataWord2 & 0x000000FF;
308
309 if ( piranha_simulator )
310 return;
311
312 BCD_TO_BIN(sec);
313 BCD_TO_BIN(min);
314 BCD_TO_BIN(hour);
315 BCD_TO_BIN(day);
316 BCD_TO_BIN(mon);
317 BCD_TO_BIN(year1);
318 BCD_TO_BIN(year2);
319 year = year1 * 100 + year2;
320
321 time = mktime(year, mon, day, hour, min, sec);
322 time += ( jiffies / HZ );
323
324 /* Now THIS is a nasty hack!
325 * It ensures that the first two calls get different answers.
326 * That way the loop in init_time (time.c) will not think
327 * the clock is stuck.
328 */
329 if ( lastsec ) {
330 time -= lastsec;
331 --lastsec;
332 }
333
334 to_tm(time, tm);
335 tm->tm_year -= 1900;
336 tm->tm_mon -= 1;
337}
338#endif
339
340#ifdef CONFIG_PPC_RTAS
341#define MAX_RTC_WAIT 5000 /* 5 sec */
342#define RTAS_CLOCK_BUSY (-2)
343void pSeries_get_boot_time(struct rtc_time *rtc_tm)
344{
345 int ret[8];
346 int error, wait_time;
347 unsigned long max_wait_tb;
348
349 max_wait_tb = __get_tb() + tb_ticks_per_usec * 1000 * MAX_RTC_WAIT;
350 do {
351 error = rtas_call(rtas_token("get-time-of-day"), 0, 8, ret);
352 if (error == RTAS_CLOCK_BUSY || rtas_is_extended_busy(error)) {
353 wait_time = rtas_extended_busy_delay_time(error);
354 /* This is boot time so we spin. */
355 udelay(wait_time*1000);
356 error = RTAS_CLOCK_BUSY;
357 }
358 } while (error == RTAS_CLOCK_BUSY && (__get_tb() < max_wait_tb));
359
360 if (error != 0 && printk_ratelimit()) {
361 printk(KERN_WARNING "error: reading the clock failed (%d)\n",
362 error);
363 return;
364 }
365
366 rtc_tm->tm_sec = ret[5];
367 rtc_tm->tm_min = ret[4];
368 rtc_tm->tm_hour = ret[3];
369 rtc_tm->tm_mday = ret[2];
370 rtc_tm->tm_mon = ret[1] - 1;
371 rtc_tm->tm_year = ret[0] - 1900;
372}
373
374/* NOTE: get_rtc_time will get an error if executed in interrupt context
375 * and if a delay is needed to read the clock. In this case we just
376 * silently return without updating rtc_tm.
377 */
378void pSeries_get_rtc_time(struct rtc_time *rtc_tm)
379{
380 int ret[8];
381 int error, wait_time;
382 unsigned long max_wait_tb;
383
384 max_wait_tb = __get_tb() + tb_ticks_per_usec * 1000 * MAX_RTC_WAIT;
385 do {
386 error = rtas_call(rtas_token("get-time-of-day"), 0, 8, ret);
387 if (error == RTAS_CLOCK_BUSY || rtas_is_extended_busy(error)) {
388 if (in_interrupt() && printk_ratelimit()) {
389 printk(KERN_WARNING "error: reading clock would delay interrupt\n");
390 return; /* delay not allowed */
391 }
392 wait_time = rtas_extended_busy_delay_time(error);
393 set_current_state(TASK_INTERRUPTIBLE);
394 schedule_timeout(wait_time);
395 error = RTAS_CLOCK_BUSY;
396 }
397 } while (error == RTAS_CLOCK_BUSY && (__get_tb() < max_wait_tb));
398
399 if (error != 0 && printk_ratelimit()) {
400 printk(KERN_WARNING "error: reading the clock failed (%d)\n",
401 error);
402 return;
403 }
404
405 rtc_tm->tm_sec = ret[5];
406 rtc_tm->tm_min = ret[4];
407 rtc_tm->tm_hour = ret[3];
408 rtc_tm->tm_mday = ret[2];
409 rtc_tm->tm_mon = ret[1] - 1;
410 rtc_tm->tm_year = ret[0] - 1900;
411}
412
413int pSeries_set_rtc_time(struct rtc_time *tm)
414{
415 int error, wait_time;
416 unsigned long max_wait_tb;
417
418 max_wait_tb = __get_tb() + tb_ticks_per_usec * 1000 * MAX_RTC_WAIT;
419 do {
420 error = rtas_call(rtas_token("set-time-of-day"), 7, 1, NULL,
421 tm->tm_year + 1900, tm->tm_mon + 1,
422 tm->tm_mday, tm->tm_hour, tm->tm_min,
423 tm->tm_sec, 0);
424 if (error == RTAS_CLOCK_BUSY || rtas_is_extended_busy(error)) {
425 if (in_interrupt())
426 return 1; /* probably decrementer */
427 wait_time = rtas_extended_busy_delay_time(error);
428 set_current_state(TASK_INTERRUPTIBLE);
429 schedule_timeout(wait_time);
430 error = RTAS_CLOCK_BUSY;
431 }
432 } while (error == RTAS_CLOCK_BUSY && (__get_tb() < max_wait_tb));
433
434 if (error != 0 && printk_ratelimit())
435 printk(KERN_WARNING "error: setting the clock failed (%d)\n",
436 error);
437
438 return 0;
439}
440#endif
diff --git a/arch/ppc64/kernel/scanlog.c b/arch/ppc64/kernel/scanlog.c
new file mode 100644
index 000000000000..189b81a41987
--- /dev/null
+++ b/arch/ppc64/kernel/scanlog.c
@@ -0,0 +1,245 @@
1/*
2 * c 2001 PPC 64 Team, IBM Corp
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * scan-log-data driver for PPC64 Todd Inglett <tinglett@vnet.ibm.com>
10 *
11 * When ppc64 hardware fails the service processor dumps internal state
12 * of the system. After a reboot the operating system can access a dump
13 * of this data using this driver. A dump exists if the device-tree
14 * /chosen/ibm,scan-log-data property exists.
15 *
16 * This driver exports /proc/ppc64/scan-log-dump which can be read.
17 * The driver supports only sequential reads.
18 *
19 * The driver looks at a write to the driver for the single word "reset".
20 * If given, the driver will reset the scanlog so the platform can free it.
21 */
22
23#include <linux/module.h>
24#include <linux/types.h>
25#include <linux/errno.h>
26#include <linux/proc_fs.h>
27#include <linux/init.h>
28#include <asm/uaccess.h>
29#include <asm/rtas.h>
30#include <asm/prom.h>
31
32#define MODULE_VERS "1.0"
33#define MODULE_NAME "scanlog"
34
35/* Status returns from ibm,scan-log-dump */
36#define SCANLOG_COMPLETE 0
37#define SCANLOG_HWERROR -1
38#define SCANLOG_CONTINUE 1
39
40#define DEBUG(A...) do { if (scanlog_debug) printk(KERN_ERR "scanlog: " A); } while (0)
41
42static int scanlog_debug;
43static unsigned int ibm_scan_log_dump; /* RTAS token */
44static struct proc_dir_entry *proc_ppc64_scan_log_dump; /* The proc file */
45
46static ssize_t scanlog_read(struct file *file, char *buf,
47 size_t count, loff_t *ppos)
48{
49 struct inode * inode = file->f_dentry->d_inode;
50 struct proc_dir_entry *dp;
51 unsigned int *data;
52 int status;
53 unsigned long len, off;
54 unsigned int wait_time;
55
56 dp = PDE(inode);
57 data = (unsigned int *)dp->data;
58
59 if (!data) {
60 printk(KERN_ERR "scanlog: read failed no data\n");
61 return -EIO;
62 }
63
64 if (count > RTAS_DATA_BUF_SIZE)
65 count = RTAS_DATA_BUF_SIZE;
66
67 if (count < 1024) {
68 /* This is the min supported by this RTAS call. Rather
69 * than do all the buffering we insist the user code handle
70 * larger reads. As long as cp works... :)
71 */
72 printk(KERN_ERR "scanlog: cannot perform a small read (%ld)\n", count);
73 return -EINVAL;
74 }
75
76 if (!access_ok(VERIFY_WRITE, buf, count))
77 return -EFAULT;
78
79 for (;;) {
80 wait_time = HZ/2; /* default wait if no data */
81 spin_lock(&rtas_data_buf_lock);
82 memcpy(rtas_data_buf, data, RTAS_DATA_BUF_SIZE);
83 status = rtas_call(ibm_scan_log_dump, 2, 1, NULL,
84 (u32) __pa(rtas_data_buf), (u32) count);
85 memcpy(data, rtas_data_buf, RTAS_DATA_BUF_SIZE);
86 spin_unlock(&rtas_data_buf_lock);
87
88 DEBUG("status=%d, data[0]=%x, data[1]=%x, data[2]=%x\n",
89 status, data[0], data[1], data[2]);
90 switch (status) {
91 case SCANLOG_COMPLETE:
92 DEBUG("hit eof\n");
93 return 0;
94 case SCANLOG_HWERROR:
95 DEBUG("hardware error reading scan log data\n");
96 return -EIO;
97 case SCANLOG_CONTINUE:
98 /* We may or may not have data yet */
99 len = data[1];
100 off = data[2];
101 if (len > 0) {
102 if (copy_to_user(buf, ((char *)data)+off, len))
103 return -EFAULT;
104 return len;
105 }
106 /* Break to sleep default time */
107 break;
108 default:
109 if (status > 9900 && status <= 9905) {
110 /* No data. RTAS is hinting at a delay required
111 * between 1-100000 milliseconds
112 */
113 int ms = 1;
114 for (; status > 9900; status--)
115 ms = ms * 10;
116 /* Use microseconds for reasonable accuracy */
117 ms *= 1000;
118 wait_time = ms / (1000000/HZ); /* round down is fine */
119 /* Fall through to sleep */
120 } else {
121 printk(KERN_ERR "scanlog: unknown error from rtas: %d\n", status);
122 return -EIO;
123 }
124 }
125 /* Apparently no data yet. Wait and try again. */
126 set_current_state(TASK_INTERRUPTIBLE);
127 schedule_timeout(wait_time);
128 }
129 /*NOTREACHED*/
130}
131
132static ssize_t scanlog_write(struct file * file, const char * buf,
133 size_t count, loff_t *ppos)
134{
135 char stkbuf[20];
136 int status;
137
138 if (count > 19) count = 19;
139 if (copy_from_user (stkbuf, buf, count)) {
140 return -EFAULT;
141 }
142 stkbuf[count] = 0;
143
144 if (buf) {
145 if (strncmp(stkbuf, "reset", 5) == 0) {
146 DEBUG("reset scanlog\n");
147 status = rtas_call(ibm_scan_log_dump, 2, 1, NULL, 0, 0);
148 DEBUG("rtas returns %d\n", status);
149 } else if (strncmp(stkbuf, "debugon", 7) == 0) {
150 printk(KERN_ERR "scanlog: debug on\n");
151 scanlog_debug = 1;
152 } else if (strncmp(stkbuf, "debugoff", 8) == 0) {
153 printk(KERN_ERR "scanlog: debug off\n");
154 scanlog_debug = 0;
155 }
156 }
157 return count;
158}
159
160static int scanlog_open(struct inode * inode, struct file * file)
161{
162 struct proc_dir_entry *dp = PDE(inode);
163 unsigned int *data = (unsigned int *)dp->data;
164
165 if (!data) {
166 printk(KERN_ERR "scanlog: open failed no data\n");
167 return -EIO;
168 }
169
170 if (data[0] != 0) {
171 /* This imperfect test stops a second copy of the
172 * data (or a reset while data is being copied)
173 */
174 return -EBUSY;
175 }
176
177 data[0] = 0; /* re-init so we restart the scan */
178
179 return 0;
180}
181
182static int scanlog_release(struct inode * inode, struct file * file)
183{
184 struct proc_dir_entry *dp = PDE(inode);
185 unsigned int *data = (unsigned int *)dp->data;
186
187 if (!data) {
188 printk(KERN_ERR "scanlog: release failed no data\n");
189 return -EIO;
190 }
191 data[0] = 0;
192
193 return 0;
194}
195
196struct file_operations scanlog_fops = {
197 .owner = THIS_MODULE,
198 .read = scanlog_read,
199 .write = scanlog_write,
200 .open = scanlog_open,
201 .release = scanlog_release,
202};
203
204int __init scanlog_init(void)
205{
206 struct proc_dir_entry *ent;
207
208 ibm_scan_log_dump = rtas_token("ibm,scan-log-dump");
209 if (ibm_scan_log_dump == RTAS_UNKNOWN_SERVICE) {
210 printk(KERN_ERR "scan-log-dump not implemented on this system\n");
211 return -EIO;
212 }
213
214 ent = create_proc_entry("ppc64/rtas/scan-log-dump", S_IRUSR, NULL);
215 if (ent) {
216 ent->proc_fops = &scanlog_fops;
217 /* Ideally we could allocate a buffer < 4G */
218 ent->data = kmalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL);
219 if (!ent->data) {
220 printk(KERN_ERR "Failed to allocate a buffer\n");
221 remove_proc_entry("scan-log-dump", ent->parent);
222 return -ENOMEM;
223 }
224 ((unsigned int *)ent->data)[0] = 0;
225 } else {
226 printk(KERN_ERR "Failed to create ppc64/scan-log-dump proc entry\n");
227 return -EIO;
228 }
229 proc_ppc64_scan_log_dump = ent;
230
231 return 0;
232}
233
234void __exit scanlog_cleanup(void)
235{
236 if (proc_ppc64_scan_log_dump) {
237 if (proc_ppc64_scan_log_dump->data)
238 kfree(proc_ppc64_scan_log_dump->data);
239 remove_proc_entry("scan-log-dump", proc_ppc64_scan_log_dump->parent);
240 }
241}
242
243module_init(scanlog_init);
244module_exit(scanlog_cleanup);
245MODULE_LICENSE("GPL");
diff --git a/arch/ppc64/kernel/semaphore.c b/arch/ppc64/kernel/semaphore.c
new file mode 100644
index 000000000000..a1c1db573e9c
--- /dev/null
+++ b/arch/ppc64/kernel/semaphore.c
@@ -0,0 +1,136 @@
1/*
2 *
3 *
4 * PowerPC-specific semaphore code.
5 *
6 * Copyright (C) 1999 Cort Dougan <cort@cs.nmt.edu>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 *
13 * April 2001 - Reworked by Paul Mackerras <paulus@samba.org>
14 * to eliminate the SMP races in the old version between the updates
15 * of `count' and `waking'. Now we use negative `count' values to
16 * indicate that some process(es) are waiting for the semaphore.
17 */
18
19#include <linux/sched.h>
20#include <linux/init.h>
21#include <linux/module.h>
22
23#include <asm/atomic.h>
24#include <asm/semaphore.h>
25#include <asm/errno.h>
26
27/*
28 * Atomically update sem->count.
29 * This does the equivalent of the following:
30 *
31 * old_count = sem->count;
32 * tmp = MAX(old_count, 0) + incr;
33 * sem->count = tmp;
34 * return old_count;
35 */
36static inline int __sem_update_count(struct semaphore *sem, int incr)
37{
38 int old_count, tmp;
39
40 __asm__ __volatile__("\n"
41"1: lwarx %0,0,%3\n"
42" srawi %1,%0,31\n"
43" andc %1,%0,%1\n"
44" add %1,%1,%4\n"
45" stwcx. %1,0,%3\n"
46" bne 1b"
47 : "=&r" (old_count), "=&r" (tmp), "=m" (sem->count)
48 : "r" (&sem->count), "r" (incr), "m" (sem->count)
49 : "cc");
50
51 return old_count;
52}
53
54void __up(struct semaphore *sem)
55{
56 /*
57 * Note that we incremented count in up() before we came here,
58 * but that was ineffective since the result was <= 0, and
59 * any negative value of count is equivalent to 0.
60 * This ends up setting count to 1, unless count is now > 0
61 * (i.e. because some other cpu has called up() in the meantime),
62 * in which case we just increment count.
63 */
64 __sem_update_count(sem, 1);
65 wake_up(&sem->wait);
66}
67EXPORT_SYMBOL(__up);
68
69/*
70 * Note that when we come in to __down or __down_interruptible,
71 * we have already decremented count, but that decrement was
72 * ineffective since the result was < 0, and any negative value
73 * of count is equivalent to 0.
74 * Thus it is only when we decrement count from some value > 0
75 * that we have actually got the semaphore.
76 */
77void __sched __down(struct semaphore *sem)
78{
79 struct task_struct *tsk = current;
80 DECLARE_WAITQUEUE(wait, tsk);
81
82 __set_task_state(tsk, TASK_UNINTERRUPTIBLE);
83 add_wait_queue_exclusive(&sem->wait, &wait);
84
85 /*
86 * Try to get the semaphore. If the count is > 0, then we've
87 * got the semaphore; we decrement count and exit the loop.
88 * If the count is 0 or negative, we set it to -1, indicating
89 * that we are asleep, and then sleep.
90 */
91 while (__sem_update_count(sem, -1) <= 0) {
92 schedule();
93 set_task_state(tsk, TASK_UNINTERRUPTIBLE);
94 }
95 remove_wait_queue(&sem->wait, &wait);
96 __set_task_state(tsk, TASK_RUNNING);
97
98 /*
99 * If there are any more sleepers, wake one of them up so
100 * that it can either get the semaphore, or set count to -1
101 * indicating that there are still processes sleeping.
102 */
103 wake_up(&sem->wait);
104}
105EXPORT_SYMBOL(__down);
106
107int __sched __down_interruptible(struct semaphore * sem)
108{
109 int retval = 0;
110 struct task_struct *tsk = current;
111 DECLARE_WAITQUEUE(wait, tsk);
112
113 __set_task_state(tsk, TASK_INTERRUPTIBLE);
114 add_wait_queue_exclusive(&sem->wait, &wait);
115
116 while (__sem_update_count(sem, -1) <= 0) {
117 if (signal_pending(current)) {
118 /*
119 * A signal is pending - give up trying.
120 * Set sem->count to 0 if it is negative,
121 * since we are no longer sleeping.
122 */
123 __sem_update_count(sem, 0);
124 retval = -EINTR;
125 break;
126 }
127 schedule();
128 set_task_state(tsk, TASK_INTERRUPTIBLE);
129 }
130 remove_wait_queue(&sem->wait, &wait);
131 __set_task_state(tsk, TASK_RUNNING);
132
133 wake_up(&sem->wait);
134 return retval;
135}
136EXPORT_SYMBOL(__down_interruptible);
diff --git a/arch/ppc64/kernel/setup.c b/arch/ppc64/kernel/setup.c
new file mode 100644
index 000000000000..21c57f539c29
--- /dev/null
+++ b/arch/ppc64/kernel/setup.c
@@ -0,0 +1,1392 @@
1/*
2 *
3 * Common boot and setup code.
4 *
5 * Copyright (C) 2001 PPC64 Team, IBM Corp
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 */
12
13#undef DEBUG
14
15#include <linux/config.h>
16#include <linux/module.h>
17#include <linux/string.h>
18#include <linux/sched.h>
19#include <linux/init.h>
20#include <linux/kernel.h>
21#include <linux/reboot.h>
22#include <linux/delay.h>
23#include <linux/initrd.h>
24#include <linux/ide.h>
25#include <linux/seq_file.h>
26#include <linux/ioport.h>
27#include <linux/console.h>
28#include <linux/version.h>
29#include <linux/tty.h>
30#include <linux/root_dev.h>
31#include <linux/notifier.h>
32#include <linux/cpu.h>
33#include <linux/unistd.h>
34#include <linux/serial.h>
35#include <linux/serial_8250.h>
36#include <asm/io.h>
37#include <asm/prom.h>
38#include <asm/processor.h>
39#include <asm/pgtable.h>
40#include <asm/bootinfo.h>
41#include <asm/smp.h>
42#include <asm/elf.h>
43#include <asm/machdep.h>
44#include <asm/iSeries/LparData.h>
45#include <asm/paca.h>
46#include <asm/ppcdebug.h>
47#include <asm/time.h>
48#include <asm/cputable.h>
49#include <asm/sections.h>
50#include <asm/btext.h>
51#include <asm/nvram.h>
52#include <asm/setup.h>
53#include <asm/system.h>
54#include <asm/rtas.h>
55#include <asm/iommu.h>
56#include <asm/serial.h>
57#include <asm/cache.h>
58#include <asm/page.h>
59#include <asm/mmu.h>
60
61#ifdef DEBUG
62#define DBG(fmt...) udbg_printf(fmt)
63#else
64#define DBG(fmt...)
65#endif
66
67/*
68 * Here are some early debugging facilities. You can enable one
69 * but your kernel will not boot on anything else if you do so
70 */
71
72/* This one is for use on LPAR machines that support an HVC console
73 * on vterm 0
74 */
75extern void udbg_init_debug_lpar(void);
76/* This one is for use on Apple G5 machines
77 */
78extern void udbg_init_pmac_realmode(void);
79/* That's RTAS panel debug */
80extern void call_rtas_display_status_delay(unsigned char c);
81/* Here's maple real mode debug */
82extern void udbg_init_maple_realmode(void);
83
84#define EARLY_DEBUG_INIT() do {} while(0)
85
86#if 0
87#define EARLY_DEBUG_INIT() udbg_init_debug_lpar()
88#define EARLY_DEBUG_INIT() udbg_init_maple_realmode()
89#define EARLY_DEBUG_INIT() udbg_init_pmac_realmode()
90#define EARLY_DEBUG_INIT() \
91 do { ppc_md.udbg_putc = call_rtas_display_status_delay; } while(0)
92#endif
93
94/* extern void *stab; */
95extern unsigned long klimit;
96
97extern void mm_init_ppc64(void);
98extern int idle_setup(void);
99extern void stab_initialize(unsigned long stab);
100extern void htab_initialize(void);
101extern void early_init_devtree(void *flat_dt);
102extern void unflatten_device_tree(void);
103
104extern void smp_release_cpus(void);
105
106unsigned long decr_overclock = 1;
107unsigned long decr_overclock_proc0 = 1;
108unsigned long decr_overclock_set = 0;
109unsigned long decr_overclock_proc0_set = 0;
110
111int have_of = 1;
112int boot_cpuid = 0;
113int boot_cpuid_phys = 0;
114dev_t boot_dev;
115u64 ppc64_pft_size;
116u64 ppc64_debug_switch;
117
118struct ppc64_caches ppc64_caches;
119EXPORT_SYMBOL_GPL(ppc64_caches);
120
121/*
122 * These are used in binfmt_elf.c to put aux entries on the stack
123 * for each elf executable being started.
124 */
125int dcache_bsize;
126int icache_bsize;
127int ucache_bsize;
128
129/* The main machine-dep calls structure
130 */
131struct machdep_calls ppc_md;
132EXPORT_SYMBOL(ppc_md);
133
134#ifdef CONFIG_MAGIC_SYSRQ
135unsigned long SYSRQ_KEY;
136#endif /* CONFIG_MAGIC_SYSRQ */
137
138
139static int ppc64_panic_event(struct notifier_block *, unsigned long, void *);
140static struct notifier_block ppc64_panic_block = {
141 .notifier_call = ppc64_panic_event,
142 .priority = INT_MIN /* may not return; must be done last */
143};
144
145/*
146 * Perhaps we can put the pmac screen_info[] here
147 * on pmac as well so we don't need the ifdef's.
148 * Until we get multiple-console support in here
149 * that is. -- Cort
150 * Maybe tie it to serial consoles, since this is really what
151 * these processors use on existing boards. -- Dan
152 */
153struct screen_info screen_info = {
154 .orig_x = 0,
155 .orig_y = 25,
156 .orig_video_cols = 80,
157 .orig_video_lines = 25,
158 .orig_video_isVGA = 1,
159 .orig_video_points = 16
160};
161
162/*
163 * Initialize the PPCDBG state. Called before relocation has been enabled.
164 */
165void __init ppcdbg_initialize(void)
166{
167 ppc64_debug_switch = PPC_DEBUG_DEFAULT; /* | PPCDBG_BUSWALK | */
168 /* PPCDBG_PHBINIT | PPCDBG_MM | PPCDBG_MMINIT | PPCDBG_TCEINIT | PPCDBG_TCE */;
169}
170
171/*
172 * Early boot console based on udbg
173 */
174static struct console udbg_console = {
175 .name = "udbg",
176 .write = udbg_console_write,
177 .flags = CON_PRINTBUFFER,
178 .index = -1,
179};
180static int early_console_initialized;
181
182void __init disable_early_printk(void)
183{
184 if (!early_console_initialized)
185 return;
186 unregister_console(&udbg_console);
187 early_console_initialized = 0;
188}
189
190#if defined(CONFIG_PPC_MULTIPLATFORM) && defined(CONFIG_SMP)
191
192static int smt_enabled_cmdline;
193
194/* Look for ibm,smt-enabled OF option */
195static void check_smt_enabled(void)
196{
197 struct device_node *dn;
198 char *smt_option;
199
200 /* Allow the command line to overrule the OF option */
201 if (smt_enabled_cmdline)
202 return;
203
204 dn = of_find_node_by_path("/options");
205
206 if (dn) {
207 smt_option = (char *)get_property(dn, "ibm,smt-enabled", NULL);
208
209 if (smt_option) {
210 if (!strcmp(smt_option, "on"))
211 smt_enabled_at_boot = 1;
212 else if (!strcmp(smt_option, "off"))
213 smt_enabled_at_boot = 0;
214 }
215 }
216}
217
218/* Look for smt-enabled= cmdline option */
219static int __init early_smt_enabled(char *p)
220{
221 smt_enabled_cmdline = 1;
222
223 if (!p)
224 return 0;
225
226 if (!strcmp(p, "on") || !strcmp(p, "1"))
227 smt_enabled_at_boot = 1;
228 else if (!strcmp(p, "off") || !strcmp(p, "0"))
229 smt_enabled_at_boot = 0;
230
231 return 0;
232}
233early_param("smt-enabled", early_smt_enabled);
234
235/**
236 * setup_cpu_maps - initialize the following cpu maps:
237 * cpu_possible_map
238 * cpu_present_map
239 * cpu_sibling_map
240 *
241 * Having the possible map set up early allows us to restrict allocations
242 * of things like irqstacks to num_possible_cpus() rather than NR_CPUS.
243 *
244 * We do not initialize the online map here; cpus set their own bits in
245 * cpu_online_map as they come up.
246 *
247 * This function is valid only for Open Firmware systems. finish_device_tree
248 * must be called before using this.
249 *
250 * While we're here, we may as well set the "physical" cpu ids in the paca.
251 */
252static void __init setup_cpu_maps(void)
253{
254 struct device_node *dn = NULL;
255 int cpu = 0;
256 int swap_cpuid = 0;
257
258 check_smt_enabled();
259
260 while ((dn = of_find_node_by_type(dn, "cpu")) && cpu < NR_CPUS) {
261 u32 *intserv;
262 int j, len = sizeof(u32), nthreads;
263
264 intserv = (u32 *)get_property(dn, "ibm,ppc-interrupt-server#s",
265 &len);
266 if (!intserv)
267 intserv = (u32 *)get_property(dn, "reg", NULL);
268
269 nthreads = len / sizeof(u32);
270
271 for (j = 0; j < nthreads && cpu < NR_CPUS; j++) {
272 cpu_set(cpu, cpu_present_map);
273 set_hard_smp_processor_id(cpu, intserv[j]);
274
275 if (intserv[j] == boot_cpuid_phys)
276 swap_cpuid = cpu;
277 cpu_set(cpu, cpu_possible_map);
278 cpu++;
279 }
280 }
281
282 /* Swap CPU id 0 with boot_cpuid_phys, so we can always assume that
283 * boot cpu is logical 0.
284 */
285 if (boot_cpuid_phys != get_hard_smp_processor_id(0)) {
286 u32 tmp;
287 tmp = get_hard_smp_processor_id(0);
288 set_hard_smp_processor_id(0, boot_cpuid_phys);
289 set_hard_smp_processor_id(swap_cpuid, tmp);
290 }
291
292 /*
293 * On pSeries LPAR, we need to know how many cpus
294 * could possibly be added to this partition.
295 */
296 if (systemcfg->platform == PLATFORM_PSERIES_LPAR &&
297 (dn = of_find_node_by_path("/rtas"))) {
298 int num_addr_cell, num_size_cell, maxcpus;
299 unsigned int *ireg;
300
301 num_addr_cell = prom_n_addr_cells(dn);
302 num_size_cell = prom_n_size_cells(dn);
303
304 ireg = (unsigned int *)
305 get_property(dn, "ibm,lrdr-capacity", NULL);
306
307 if (!ireg)
308 goto out;
309
310 maxcpus = ireg[num_addr_cell + num_size_cell];
311
312 /* Double maxcpus for processors which have SMT capability */
313 if (cpu_has_feature(CPU_FTR_SMT))
314 maxcpus *= 2;
315
316 if (maxcpus > NR_CPUS) {
317 printk(KERN_WARNING
318 "Partition configured for %d cpus, "
319 "operating system maximum is %d.\n",
320 maxcpus, NR_CPUS);
321 maxcpus = NR_CPUS;
322 } else
323 printk(KERN_INFO "Partition configured for %d cpus.\n",
324 maxcpus);
325
326 for (cpu = 0; cpu < maxcpus; cpu++)
327 cpu_set(cpu, cpu_possible_map);
328 out:
329 of_node_put(dn);
330 }
331
332 /*
333 * Do the sibling map; assume only two threads per processor.
334 */
335 for_each_cpu(cpu) {
336 cpu_set(cpu, cpu_sibling_map[cpu]);
337 if (cpu_has_feature(CPU_FTR_SMT))
338 cpu_set(cpu ^ 0x1, cpu_sibling_map[cpu]);
339 }
340
341 systemcfg->processorCount = num_present_cpus();
342}
343#endif /* defined(CONFIG_PPC_MULTIPLATFORM) && defined(CONFIG_SMP) */
344
345
346#ifdef CONFIG_PPC_MULTIPLATFORM
347
348extern struct machdep_calls pSeries_md;
349extern struct machdep_calls pmac_md;
350extern struct machdep_calls maple_md;
351
352/* Ultimately, stuff them in an elf section like initcalls... */
353static struct machdep_calls __initdata *machines[] = {
354#ifdef CONFIG_PPC_PSERIES
355 &pSeries_md,
356#endif /* CONFIG_PPC_PSERIES */
357#ifdef CONFIG_PPC_PMAC
358 &pmac_md,
359#endif /* CONFIG_PPC_PMAC */
360#ifdef CONFIG_PPC_MAPLE
361 &maple_md,
362#endif /* CONFIG_PPC_MAPLE */
363 NULL
364};
365
366/*
367 * Early initialization entry point. This is called by head.S
368 * with MMU translation disabled. We rely on the "feature" of
369 * the CPU that ignores the top 2 bits of the address in real
370 * mode so we can access kernel globals normally provided we
371 * only toy with things in the RMO region. From here, we do
372 * some early parsing of the device-tree to setup out LMB
373 * data structures, and allocate & initialize the hash table
374 * and segment tables so we can start running with translation
375 * enabled.
376 *
377 * It is this function which will call the probe() callback of
378 * the various platform types and copy the matching one to the
379 * global ppc_md structure. Your platform can eventually do
380 * some very early initializations from the probe() routine, but
381 * this is not recommended, be very careful as, for example, the
382 * device-tree is not accessible via normal means at this point.
383 */
384
385void __init early_setup(unsigned long dt_ptr)
386{
387 struct paca_struct *lpaca = get_paca();
388 static struct machdep_calls **mach;
389
390 /*
391 * Enable early debugging if any specified (see top of
392 * this file)
393 */
394 EARLY_DEBUG_INIT();
395
396 DBG(" -> early_setup()\n");
397
398 /*
399 * Fill the default DBG level (do we want to keep
400 * that old mecanism around forever ?)
401 */
402 ppcdbg_initialize();
403
404 /*
405 * Do early initializations using the flattened device
406 * tree, like retreiving the physical memory map or
407 * calculating/retreiving the hash table size
408 */
409 early_init_devtree(__va(dt_ptr));
410
411 /*
412 * Iterate all ppc_md structures until we find the proper
413 * one for the current machine type
414 */
415 DBG("Probing machine type for platform %x...\n",
416 systemcfg->platform);
417
418 for (mach = machines; *mach; mach++) {
419 if ((*mach)->probe(systemcfg->platform))
420 break;
421 }
422 /* What can we do if we didn't find ? */
423 if (*mach == NULL) {
424 DBG("No suitable machine found !\n");
425 for (;;);
426 }
427 ppc_md = **mach;
428
429 /* our udbg callbacks got overriden by the above, let's put them
430 * back in. Ultimately, I want those things to be split from the
431 * main ppc_md
432 */
433 EARLY_DEBUG_INIT();
434
435 DBG("Found, Initializing memory management...\n");
436
437 /*
438 * Initialize stab / SLB management
439 */
440 stab_initialize(lpaca->stab_real);
441
442 /*
443 * Initialize the MMU Hash table and create the linear mapping
444 * of memory
445 */
446 htab_initialize();
447
448 DBG(" <- early_setup()\n");
449}
450
451
452/*
453 * Initialize some remaining members of the ppc64_caches and systemcfg structures
454 * (at least until we get rid of them completely). This is mostly some
455 * cache informations about the CPU that will be used by cache flush
456 * routines and/or provided to userland
457 */
458static void __init initialize_cache_info(void)
459{
460 struct device_node *np;
461 unsigned long num_cpus = 0;
462
463 DBG(" -> initialize_cache_info()\n");
464
465 for (np = NULL; (np = of_find_node_by_type(np, "cpu"));) {
466 num_cpus += 1;
467
468 /* We're assuming *all* of the CPUs have the same
469 * d-cache and i-cache sizes... -Peter
470 */
471
472 if ( num_cpus == 1 ) {
473 u32 *sizep, *lsizep;
474 u32 size, lsize;
475 const char *dc, *ic;
476
477 /* Then read cache informations */
478 if (systemcfg->platform == PLATFORM_POWERMAC) {
479 dc = "d-cache-block-size";
480 ic = "i-cache-block-size";
481 } else {
482 dc = "d-cache-line-size";
483 ic = "i-cache-line-size";
484 }
485
486 size = 0;
487 lsize = cur_cpu_spec->dcache_bsize;
488 sizep = (u32 *)get_property(np, "d-cache-size", NULL);
489 if (sizep != NULL)
490 size = *sizep;
491 lsizep = (u32 *) get_property(np, dc, NULL);
492 if (lsizep != NULL)
493 lsize = *lsizep;
494 if (sizep == 0 || lsizep == 0)
495 DBG("Argh, can't find dcache properties ! "
496 "sizep: %p, lsizep: %p\n", sizep, lsizep);
497
498 systemcfg->dcache_size = ppc64_caches.dsize = size;
499 systemcfg->dcache_line_size =
500 ppc64_caches.dline_size = lsize;
501 ppc64_caches.log_dline_size = __ilog2(lsize);
502 ppc64_caches.dlines_per_page = PAGE_SIZE / lsize;
503
504 size = 0;
505 lsize = cur_cpu_spec->icache_bsize;
506 sizep = (u32 *)get_property(np, "i-cache-size", NULL);
507 if (sizep != NULL)
508 size = *sizep;
509 lsizep = (u32 *)get_property(np, ic, NULL);
510 if (lsizep != NULL)
511 lsize = *lsizep;
512 if (sizep == 0 || lsizep == 0)
513 DBG("Argh, can't find icache properties ! "
514 "sizep: %p, lsizep: %p\n", sizep, lsizep);
515
516 systemcfg->icache_size = ppc64_caches.isize = size;
517 systemcfg->icache_line_size =
518 ppc64_caches.iline_size = lsize;
519 ppc64_caches.log_iline_size = __ilog2(lsize);
520 ppc64_caches.ilines_per_page = PAGE_SIZE / lsize;
521 }
522 }
523
524 /* Add an eye catcher and the systemcfg layout version number */
525 strcpy(systemcfg->eye_catcher, "SYSTEMCFG:PPC64");
526 systemcfg->version.major = SYSTEMCFG_MAJOR;
527 systemcfg->version.minor = SYSTEMCFG_MINOR;
528 systemcfg->processor = mfspr(SPRN_PVR);
529
530 DBG(" <- initialize_cache_info()\n");
531}
532
533static void __init check_for_initrd(void)
534{
535#ifdef CONFIG_BLK_DEV_INITRD
536 u64 *prop;
537
538 DBG(" -> check_for_initrd()\n");
539
540 prop = (u64 *)get_property(of_chosen, "linux,initrd-start", NULL);
541 if (prop != NULL) {
542 initrd_start = (unsigned long)__va(*prop);
543 prop = (u64 *)get_property(of_chosen, "linux,initrd-end", NULL);
544 if (prop != NULL) {
545 initrd_end = (unsigned long)__va(*prop);
546 initrd_below_start_ok = 1;
547 } else
548 initrd_start = 0;
549 }
550
551 /* If we were passed an initrd, set the ROOT_DEV properly if the values
552 * look sensible. If not, clear initrd reference.
553 */
554 if (initrd_start >= KERNELBASE && initrd_end >= KERNELBASE &&
555 initrd_end > initrd_start)
556 ROOT_DEV = Root_RAM0;
557 else
558 initrd_start = initrd_end = 0;
559
560 if (initrd_start)
561 printk("Found initrd at 0x%lx:0x%lx\n", initrd_start, initrd_end);
562
563 DBG(" <- check_for_initrd()\n");
564#endif /* CONFIG_BLK_DEV_INITRD */
565}
566
567#endif /* CONFIG_PPC_MULTIPLATFORM */
568
569/*
570 * Do some initial setup of the system. The parameters are those which
571 * were passed in from the bootloader.
572 */
573void __init setup_system(void)
574{
575 DBG(" -> setup_system()\n");
576
577#ifdef CONFIG_PPC_ISERIES
578 /* pSeries systems are identified in prom.c via OF. */
579 if (itLpNaca.xLparInstalled == 1)
580 systemcfg->platform = PLATFORM_ISERIES_LPAR;
581
582 ppc_md.init_early();
583#else /* CONFIG_PPC_ISERIES */
584
585 /*
586 * Unflatten the device-tree passed by prom_init or kexec
587 */
588 unflatten_device_tree();
589
590 /*
591 * Fill the ppc64_caches & systemcfg structures with informations
592 * retreived from the device-tree. Need to be called before
593 * finish_device_tree() since the later requires some of the
594 * informations filled up here to properly parse the interrupt
595 * tree.
596 * It also sets up the cache line sizes which allows to call
597 * routines like flush_icache_range (used by the hash init
598 * later on).
599 */
600 initialize_cache_info();
601
602#ifdef CONFIG_PPC_RTAS
603 /*
604 * Initialize RTAS if available
605 */
606 rtas_initialize();
607#endif /* CONFIG_PPC_RTAS */
608
609 /*
610 * Check if we have an initrd provided via the device-tree
611 */
612 check_for_initrd();
613
614 /*
615 * Do some platform specific early initializations, that includes
616 * setting up the hash table pointers. It also sets up some interrupt-mapping
617 * related options that will be used by finish_device_tree()
618 */
619 ppc_md.init_early();
620
621 /*
622 * "Finish" the device-tree, that is do the actual parsing of
623 * some of the properties like the interrupt map
624 */
625 finish_device_tree();
626
627 /*
628 * Initialize xmon
629 */
630#ifdef CONFIG_XMON_DEFAULT
631 xmon_init();
632#endif
633 /*
634 * Register early console
635 */
636 early_console_initialized = 1;
637 register_console(&udbg_console);
638
639 /* Save unparsed command line copy for /proc/cmdline */
640 strlcpy(saved_command_line, cmd_line, COMMAND_LINE_SIZE);
641
642 parse_early_param();
643#endif /* !CONFIG_PPC_ISERIES */
644
645#if defined(CONFIG_SMP) && !defined(CONFIG_PPC_ISERIES)
646 /*
647 * iSeries has already initialized the cpu maps at this point.
648 */
649 setup_cpu_maps();
650
651 /* Release secondary cpus out of their spinloops at 0x60 now that
652 * we can map physical -> logical CPU ids
653 */
654 smp_release_cpus();
655#endif /* defined(CONFIG_SMP) && !defined(CONFIG_PPC_ISERIES) */
656
657 printk("Starting Linux PPC64 %s\n", UTS_RELEASE);
658
659 printk("-----------------------------------------------------\n");
660 printk("ppc64_pft_size = 0x%lx\n", ppc64_pft_size);
661 printk("ppc64_debug_switch = 0x%lx\n", ppc64_debug_switch);
662 printk("ppc64_interrupt_controller = 0x%ld\n", ppc64_interrupt_controller);
663 printk("systemcfg = 0x%p\n", systemcfg);
664 printk("systemcfg->platform = 0x%x\n", systemcfg->platform);
665 printk("systemcfg->processorCount = 0x%lx\n", systemcfg->processorCount);
666 printk("systemcfg->physicalMemorySize = 0x%lx\n", systemcfg->physicalMemorySize);
667 printk("ppc64_caches.dcache_line_size = 0x%x\n",
668 ppc64_caches.dline_size);
669 printk("ppc64_caches.icache_line_size = 0x%x\n",
670 ppc64_caches.iline_size);
671 printk("htab_address = 0x%p\n", htab_address);
672 printk("htab_hash_mask = 0x%lx\n", htab_hash_mask);
673 printk("-----------------------------------------------------\n");
674
675 mm_init_ppc64();
676
677 DBG(" <- setup_system()\n");
678}
679
680
681void machine_restart(char *cmd)
682{
683 if (ppc_md.nvram_sync)
684 ppc_md.nvram_sync();
685 ppc_md.restart(cmd);
686}
687
688EXPORT_SYMBOL(machine_restart);
689
690void machine_power_off(void)
691{
692 if (ppc_md.nvram_sync)
693 ppc_md.nvram_sync();
694 ppc_md.power_off();
695}
696
697EXPORT_SYMBOL(machine_power_off);
698
699void machine_halt(void)
700{
701 if (ppc_md.nvram_sync)
702 ppc_md.nvram_sync();
703 ppc_md.halt();
704}
705
706EXPORT_SYMBOL(machine_halt);
707
708unsigned long ppc_proc_freq;
709unsigned long ppc_tb_freq;
710
711static int ppc64_panic_event(struct notifier_block *this,
712 unsigned long event, void *ptr)
713{
714 ppc_md.panic((char *)ptr); /* May not return */
715 return NOTIFY_DONE;
716}
717
718
719#ifdef CONFIG_SMP
720DEFINE_PER_CPU(unsigned int, pvr);
721#endif
722
723static int show_cpuinfo(struct seq_file *m, void *v)
724{
725 unsigned long cpu_id = (unsigned long)v - 1;
726 unsigned int pvr;
727 unsigned short maj;
728 unsigned short min;
729
730 if (cpu_id == NR_CPUS) {
731 seq_printf(m, "timebase\t: %lu\n", ppc_tb_freq);
732
733 if (ppc_md.get_cpuinfo != NULL)
734 ppc_md.get_cpuinfo(m);
735
736 return 0;
737 }
738
739 /* We only show online cpus: disable preempt (overzealous, I
740 * knew) to prevent cpu going down. */
741 preempt_disable();
742 if (!cpu_online(cpu_id)) {
743 preempt_enable();
744 return 0;
745 }
746
747#ifdef CONFIG_SMP
748 pvr = per_cpu(pvr, cpu_id);
749#else
750 pvr = mfspr(SPRN_PVR);
751#endif
752 maj = (pvr >> 8) & 0xFF;
753 min = pvr & 0xFF;
754
755 seq_printf(m, "processor\t: %lu\n", cpu_id);
756 seq_printf(m, "cpu\t\t: ");
757
758 if (cur_cpu_spec->pvr_mask)
759 seq_printf(m, "%s", cur_cpu_spec->cpu_name);
760 else
761 seq_printf(m, "unknown (%08x)", pvr);
762
763#ifdef CONFIG_ALTIVEC
764 if (cpu_has_feature(CPU_FTR_ALTIVEC))
765 seq_printf(m, ", altivec supported");
766#endif /* CONFIG_ALTIVEC */
767
768 seq_printf(m, "\n");
769
770 /*
771 * Assume here that all clock rates are the same in a
772 * smp system. -- Cort
773 */
774 seq_printf(m, "clock\t\t: %lu.%06luMHz\n", ppc_proc_freq / 1000000,
775 ppc_proc_freq % 1000000);
776
777 seq_printf(m, "revision\t: %hd.%hd\n\n", maj, min);
778
779 preempt_enable();
780 return 0;
781}
782
783static void *c_start(struct seq_file *m, loff_t *pos)
784{
785 return *pos <= NR_CPUS ? (void *)((*pos)+1) : NULL;
786}
787static void *c_next(struct seq_file *m, void *v, loff_t *pos)
788{
789 ++*pos;
790 return c_start(m, pos);
791}
792static void c_stop(struct seq_file *m, void *v)
793{
794}
795struct seq_operations cpuinfo_op = {
796 .start =c_start,
797 .next = c_next,
798 .stop = c_stop,
799 .show = show_cpuinfo,
800};
801
802/*
803 * These three variables are used to save values passed to us by prom_init()
804 * via the device tree. The TCE variables are needed because with a memory_limit
805 * in force we may need to explicitly map the TCE are at the top of RAM.
806 */
807unsigned long memory_limit;
808unsigned long tce_alloc_start;
809unsigned long tce_alloc_end;
810
811#ifdef CONFIG_PPC_ISERIES
812/*
813 * On iSeries we just parse the mem=X option from the command line.
814 * On pSeries it's a bit more complicated, see prom_init_mem()
815 */
816static int __init early_parsemem(char *p)
817{
818 if (!p)
819 return 0;
820
821 memory_limit = ALIGN(memparse(p, &p), PAGE_SIZE);
822
823 return 0;
824}
825early_param("mem", early_parsemem);
826#endif /* CONFIG_PPC_ISERIES */
827
828#ifdef CONFIG_PPC_MULTIPLATFORM
829static int __init set_preferred_console(void)
830{
831 struct device_node *prom_stdout = NULL;
832 char *name;
833 u32 *spd;
834 int offset = 0;
835
836 DBG(" -> set_preferred_console()\n");
837
838 /* The user has requested a console so this is already set up. */
839 if (strstr(saved_command_line, "console=")) {
840 DBG(" console was specified !\n");
841 return -EBUSY;
842 }
843
844 if (!of_chosen) {
845 DBG(" of_chosen is NULL !\n");
846 return -ENODEV;
847 }
848 /* We are getting a weird phandle from OF ... */
849 /* ... So use the full path instead */
850 name = (char *)get_property(of_chosen, "linux,stdout-path", NULL);
851 if (name == NULL) {
852 DBG(" no linux,stdout-path !\n");
853 return -ENODEV;
854 }
855 prom_stdout = of_find_node_by_path(name);
856 if (!prom_stdout) {
857 DBG(" can't find stdout package %s !\n", name);
858 return -ENODEV;
859 }
860 DBG("stdout is %s\n", prom_stdout->full_name);
861
862 name = (char *)get_property(prom_stdout, "name", NULL);
863 if (!name) {
864 DBG(" stdout package has no name !\n");
865 goto not_found;
866 }
867 spd = (u32 *)get_property(prom_stdout, "current-speed", NULL);
868
869 if (0)
870 ;
871#ifdef CONFIG_SERIAL_8250_CONSOLE
872 else if (strcmp(name, "serial") == 0) {
873 int i;
874 u32 *reg = (u32 *)get_property(prom_stdout, "reg", &i);
875 if (i > 8) {
876 switch (reg[1]) {
877 case 0x3f8:
878 offset = 0;
879 break;
880 case 0x2f8:
881 offset = 1;
882 break;
883 case 0x898:
884 offset = 2;
885 break;
886 case 0x890:
887 offset = 3;
888 break;
889 default:
890 /* We dont recognise the serial port */
891 goto not_found;
892 }
893 }
894 }
895#endif /* CONFIG_SERIAL_8250_CONSOLE */
896#ifdef CONFIG_PPC_PSERIES
897 else if (strcmp(name, "vty") == 0) {
898 u32 *reg = (u32 *)get_property(prom_stdout, "reg", NULL);
899 char *compat = (char *)get_property(prom_stdout, "compatible", NULL);
900
901 if (reg && compat && (strcmp(compat, "hvterm-protocol") == 0)) {
902 /* Host Virtual Serial Interface */
903 int offset;
904 switch (reg[0]) {
905 case 0x30000000:
906 offset = 0;
907 break;
908 case 0x30000001:
909 offset = 1;
910 break;
911 default:
912 goto not_found;
913 }
914 of_node_put(prom_stdout);
915 DBG("Found hvsi console at offset %d\n", offset);
916 return add_preferred_console("hvsi", offset, NULL);
917 } else {
918 /* pSeries LPAR virtual console */
919 of_node_put(prom_stdout);
920 DBG("Found hvc console\n");
921 return add_preferred_console("hvc", 0, NULL);
922 }
923 }
924#endif /* CONFIG_PPC_PSERIES */
925#ifdef CONFIG_SERIAL_PMACZILOG_CONSOLE
926 else if (strcmp(name, "ch-a") == 0)
927 offset = 0;
928 else if (strcmp(name, "ch-b") == 0)
929 offset = 1;
930#endif /* CONFIG_SERIAL_PMACZILOG_CONSOLE */
931 else
932 goto not_found;
933 of_node_put(prom_stdout);
934
935 DBG("Found serial console at ttyS%d\n", offset);
936
937 if (spd) {
938 static char __initdata opt[16];
939 sprintf(opt, "%d", *spd);
940 return add_preferred_console("ttyS", offset, opt);
941 } else
942 return add_preferred_console("ttyS", offset, NULL);
943
944 not_found:
945 DBG("No preferred console found !\n");
946 of_node_put(prom_stdout);
947 return -ENODEV;
948}
949console_initcall(set_preferred_console);
950#endif /* CONFIG_PPC_MULTIPLATFORM */
951
952#ifdef CONFIG_IRQSTACKS
953static void __init irqstack_early_init(void)
954{
955 unsigned int i;
956
957 /*
958 * interrupt stacks must be under 256MB, we cannot afford to take
959 * SLB misses on them.
960 */
961 for_each_cpu(i) {
962 softirq_ctx[i] = (struct thread_info *)__va(lmb_alloc_base(THREAD_SIZE,
963 THREAD_SIZE, 0x10000000));
964 hardirq_ctx[i] = (struct thread_info *)__va(lmb_alloc_base(THREAD_SIZE,
965 THREAD_SIZE, 0x10000000));
966 }
967}
968#else
969#define irqstack_early_init()
970#endif
971
972/*
973 * Stack space used when we detect a bad kernel stack pointer, and
974 * early in SMP boots before relocation is enabled.
975 */
976static void __init emergency_stack_init(void)
977{
978 unsigned long limit;
979 unsigned int i;
980
981 /*
982 * Emergency stacks must be under 256MB, we cannot afford to take
983 * SLB misses on them. The ABI also requires them to be 128-byte
984 * aligned.
985 *
986 * Since we use these as temporary stacks during secondary CPU
987 * bringup, we need to get at them in real mode. This means they
988 * must also be within the RMO region.
989 */
990 limit = min(0x10000000UL, lmb.rmo_size);
991
992 for_each_cpu(i)
993 paca[i].emergency_sp = __va(lmb_alloc_base(PAGE_SIZE, 128,
994 limit)) + PAGE_SIZE;
995}
996
997/*
998 * Called from setup_arch to initialize the bitmap of available
999 * syscalls in the systemcfg page
1000 */
1001void __init setup_syscall_map(void)
1002{
1003 unsigned int i, count64 = 0, count32 = 0;
1004 extern unsigned long *sys_call_table;
1005 extern unsigned long *sys_call_table32;
1006 extern unsigned long sys_ni_syscall;
1007
1008
1009 for (i = 0; i < __NR_syscalls; i++) {
1010 if (sys_call_table[i] == sys_ni_syscall)
1011 continue;
1012 count64++;
1013 systemcfg->syscall_map_64[i >> 5] |= 0x80000000UL >> (i & 0x1f);
1014 }
1015 for (i = 0; i < __NR_syscalls; i++) {
1016 if (sys_call_table32[i] == sys_ni_syscall)
1017 continue;
1018 count32++;
1019 systemcfg->syscall_map_32[i >> 5] |= 0x80000000UL >> (i & 0x1f);
1020 }
1021 printk(KERN_INFO "Syscall map setup, %d 32 bits and %d 64 bits syscalls\n",
1022 count32, count64);
1023}
1024
1025/*
1026 * Called into from start_kernel, after lock_kernel has been called.
1027 * Initializes bootmem, which is unsed to manage page allocation until
1028 * mem_init is called.
1029 */
1030void __init setup_arch(char **cmdline_p)
1031{
1032 extern void do_init_bootmem(void);
1033
1034 ppc64_boot_msg(0x12, "Setup Arch");
1035
1036 *cmdline_p = cmd_line;
1037
1038 /*
1039 * Set cache line size based on type of cpu as a default.
1040 * Systems with OF can look in the properties on the cpu node(s)
1041 * for a possibly more accurate value.
1042 */
1043 dcache_bsize = ppc64_caches.dline_size;
1044 icache_bsize = ppc64_caches.iline_size;
1045
1046 /* reboot on panic */
1047 panic_timeout = 180;
1048
1049 if (ppc_md.panic)
1050 notifier_chain_register(&panic_notifier_list, &ppc64_panic_block);
1051
1052 init_mm.start_code = PAGE_OFFSET;
1053 init_mm.end_code = (unsigned long) _etext;
1054 init_mm.end_data = (unsigned long) _edata;
1055 init_mm.brk = klimit;
1056
1057 irqstack_early_init();
1058 emergency_stack_init();
1059
1060 /* set up the bootmem stuff with available memory */
1061 do_init_bootmem();
1062
1063 /* initialize the syscall map in systemcfg */
1064 setup_syscall_map();
1065
1066 ppc_md.setup_arch();
1067
1068 /* Select the correct idle loop for the platform. */
1069 idle_setup();
1070
1071 paging_init();
1072 ppc64_boot_msg(0x15, "Setup Done");
1073}
1074
1075
1076/* ToDo: do something useful if ppc_md is not yet setup. */
1077#define PPC64_LINUX_FUNCTION 0x0f000000
1078#define PPC64_IPL_MESSAGE 0xc0000000
1079#define PPC64_TERM_MESSAGE 0xb0000000
1080#define PPC64_ATTN_MESSAGE 0xa0000000
1081#define PPC64_DUMP_MESSAGE 0xd0000000
1082
1083static void ppc64_do_msg(unsigned int src, const char *msg)
1084{
1085 if (ppc_md.progress) {
1086 char buf[32];
1087
1088 sprintf(buf, "%08x \n", src);
1089 ppc_md.progress(buf, 0);
1090 sprintf(buf, "%-16s", msg);
1091 ppc_md.progress(buf, 0);
1092 }
1093}
1094
1095/* Print a boot progress message. */
1096void ppc64_boot_msg(unsigned int src, const char *msg)
1097{
1098 ppc64_do_msg(PPC64_LINUX_FUNCTION|PPC64_IPL_MESSAGE|src, msg);
1099 printk("[boot]%04x %s\n", src, msg);
1100}
1101
1102/* Print a termination message (print only -- does not stop the kernel) */
1103void ppc64_terminate_msg(unsigned int src, const char *msg)
1104{
1105 ppc64_do_msg(PPC64_LINUX_FUNCTION|PPC64_TERM_MESSAGE|src, msg);
1106 printk("[terminate]%04x %s\n", src, msg);
1107}
1108
1109/* Print something that needs attention (device error, etc) */
1110void ppc64_attention_msg(unsigned int src, const char *msg)
1111{
1112 ppc64_do_msg(PPC64_LINUX_FUNCTION|PPC64_ATTN_MESSAGE|src, msg);
1113 printk("[attention]%04x %s\n", src, msg);
1114}
1115
1116/* Print a dump progress message. */
1117void ppc64_dump_msg(unsigned int src, const char *msg)
1118{
1119 ppc64_do_msg(PPC64_LINUX_FUNCTION|PPC64_DUMP_MESSAGE|src, msg);
1120 printk("[dump]%04x %s\n", src, msg);
1121}
1122
1123int set_spread_lpevents( char * str )
1124{
1125 /* The parameter is the number of processors to share in processing lp events */
1126 unsigned long i;
1127 unsigned long val = simple_strtoul( str, NULL, 0 );
1128 if ( ( val > 0 ) && ( val <= NR_CPUS ) ) {
1129 for ( i=1; i<val; ++i )
1130 paca[i].lpqueue_ptr = paca[0].lpqueue_ptr;
1131 printk("lpevent processing spread over %ld processors\n", val);
1132 }
1133 else
1134 printk("invalid spreaqd_lpevents %ld\n", val);
1135 return 1;
1136}
1137
1138/* This should only be called on processor 0 during calibrate decr */
1139void setup_default_decr(void)
1140{
1141 struct paca_struct *lpaca = get_paca();
1142
1143 if ( decr_overclock_set && !decr_overclock_proc0_set )
1144 decr_overclock_proc0 = decr_overclock;
1145
1146 lpaca->default_decr = tb_ticks_per_jiffy / decr_overclock_proc0;
1147 lpaca->next_jiffy_update_tb = get_tb() + tb_ticks_per_jiffy;
1148}
1149
1150int set_decr_overclock_proc0( char * str )
1151{
1152 unsigned long val = simple_strtoul( str, NULL, 0 );
1153 if ( ( val >= 1 ) && ( val <= 48 ) ) {
1154 decr_overclock_proc0_set = 1;
1155 decr_overclock_proc0 = val;
1156 printk("proc 0 decrementer overclock factor of %ld\n", val);
1157 }
1158 else
1159 printk("invalid proc 0 decrementer overclock factor of %ld\n", val);
1160 return 1;
1161}
1162
1163int set_decr_overclock( char * str )
1164{
1165 unsigned long val = simple_strtoul( str, NULL, 0 );
1166 if ( ( val >= 1 ) && ( val <= 48 ) ) {
1167 decr_overclock_set = 1;
1168 decr_overclock = val;
1169 printk("decrementer overclock factor of %ld\n", val);
1170 }
1171 else
1172 printk("invalid decrementer overclock factor of %ld\n", val);
1173 return 1;
1174
1175}
1176
1177__setup("spread_lpevents=", set_spread_lpevents );
1178__setup("decr_overclock_proc0=", set_decr_overclock_proc0 );
1179__setup("decr_overclock=", set_decr_overclock );
1180
1181#ifndef CONFIG_PPC_ISERIES
1182/*
1183 * This function can be used by platforms to "find" legacy serial ports.
1184 * It works for "serial" nodes under an "isa" node, and will try to
1185 * respect the "ibm,aix-loc" property if any. It works with up to 8
1186 * ports.
1187 */
1188
1189#define MAX_LEGACY_SERIAL_PORTS 8
1190static struct plat_serial8250_port serial_ports[MAX_LEGACY_SERIAL_PORTS+1];
1191static unsigned int old_serial_count;
1192
1193void __init generic_find_legacy_serial_ports(u64 *physport,
1194 unsigned int *default_speed)
1195{
1196 struct device_node *np;
1197 u32 *sizeprop;
1198
1199 struct isa_reg_property {
1200 u32 space;
1201 u32 address;
1202 u32 size;
1203 };
1204 struct pci_reg_property {
1205 struct pci_address addr;
1206 u32 size_hi;
1207 u32 size_lo;
1208 };
1209
1210 DBG(" -> generic_find_legacy_serial_port()\n");
1211
1212 *physport = 0;
1213 if (default_speed)
1214 *default_speed = 0;
1215
1216 np = of_find_node_by_path("/");
1217 if (!np)
1218 return;
1219
1220 /* First fill our array */
1221 for (np = NULL; (np = of_find_node_by_type(np, "serial"));) {
1222 struct device_node *isa, *pci;
1223 struct isa_reg_property *reg;
1224 unsigned long phys_size, addr_size, io_base;
1225 u32 *rangesp;
1226 u32 *interrupts, *clk, *spd;
1227 char *typep;
1228 int index, rlen, rentsize;
1229
1230 /* Ok, first check if it's under an "isa" parent */
1231 isa = of_get_parent(np);
1232 if (!isa || strcmp(isa->name, "isa")) {
1233 DBG("%s: no isa parent found\n", np->full_name);
1234 continue;
1235 }
1236
1237 /* Now look for an "ibm,aix-loc" property that gives us ordering
1238 * if any...
1239 */
1240 typep = (char *)get_property(np, "ibm,aix-loc", NULL);
1241
1242 /* Get the ISA port number */
1243 reg = (struct isa_reg_property *)get_property(np, "reg", NULL);
1244 if (reg == NULL)
1245 goto next_port;
1246 /* We assume the interrupt number isn't translated ... */
1247 interrupts = (u32 *)get_property(np, "interrupts", NULL);
1248 /* get clock freq. if present */
1249 clk = (u32 *)get_property(np, "clock-frequency", NULL);
1250 /* get default speed if present */
1251 spd = (u32 *)get_property(np, "current-speed", NULL);
1252 /* Default to locate at end of array */
1253 index = old_serial_count; /* end of the array by default */
1254
1255 /* If we have a location index, then use it */
1256 if (typep && *typep == 'S') {
1257 index = simple_strtol(typep+1, NULL, 0) - 1;
1258 /* if index is out of range, use end of array instead */
1259 if (index >= MAX_LEGACY_SERIAL_PORTS)
1260 index = old_serial_count;
1261 /* if our index is still out of range, that mean that
1262 * array is full, we could scan for a free slot but that
1263 * make little sense to bother, just skip the port
1264 */
1265 if (index >= MAX_LEGACY_SERIAL_PORTS)
1266 goto next_port;
1267 if (index >= old_serial_count)
1268 old_serial_count = index + 1;
1269 /* Check if there is a port who already claimed our slot */
1270 if (serial_ports[index].iobase != 0) {
1271 /* if we still have some room, move it, else override */
1272 if (old_serial_count < MAX_LEGACY_SERIAL_PORTS) {
1273 DBG("Moved legacy port %d -> %d\n", index,
1274 old_serial_count);
1275 serial_ports[old_serial_count++] =
1276 serial_ports[index];
1277 } else {
1278 DBG("Replacing legacy port %d\n", index);
1279 }
1280 }
1281 }
1282 if (index >= MAX_LEGACY_SERIAL_PORTS)
1283 goto next_port;
1284 if (index >= old_serial_count)
1285 old_serial_count = index + 1;
1286
1287 /* Now fill the entry */
1288 memset(&serial_ports[index], 0, sizeof(struct plat_serial8250_port));
1289 serial_ports[index].uartclk = clk ? *clk : BASE_BAUD * 16;
1290 serial_ports[index].iobase = reg->address;
1291 serial_ports[index].irq = interrupts ? interrupts[0] : 0;
1292 serial_ports[index].flags = ASYNC_BOOT_AUTOCONF;
1293
1294 DBG("Added legacy port, index: %d, port: %x, irq: %d, clk: %d\n",
1295 index,
1296 serial_ports[index].iobase,
1297 serial_ports[index].irq,
1298 serial_ports[index].uartclk);
1299
1300 /* Get phys address of IO reg for port 1 */
1301 if (index != 0)
1302 goto next_port;
1303
1304 pci = of_get_parent(isa);
1305 if (!pci) {
1306 DBG("%s: no pci parent found\n", np->full_name);
1307 goto next_port;
1308 }
1309
1310 rangesp = (u32 *)get_property(pci, "ranges", &rlen);
1311 if (rangesp == NULL) {
1312 of_node_put(pci);
1313 goto next_port;
1314 }
1315 rlen /= 4;
1316
1317 /* we need the #size-cells of the PCI bridge node itself */
1318 phys_size = 1;
1319 sizeprop = (u32 *)get_property(pci, "#size-cells", NULL);
1320 if (sizeprop != NULL)
1321 phys_size = *sizeprop;
1322 /* we need the parent #addr-cells */
1323 addr_size = prom_n_addr_cells(pci);
1324 rentsize = 3 + addr_size + phys_size;
1325 io_base = 0;
1326 for (;rlen >= rentsize; rlen -= rentsize,rangesp += rentsize) {
1327 if (((rangesp[0] >> 24) & 0x3) != 1)
1328 continue; /* not IO space */
1329 io_base = rangesp[3];
1330 if (addr_size == 2)
1331 io_base = (io_base << 32) | rangesp[4];
1332 }
1333 if (io_base != 0) {
1334 *physport = io_base + reg->address;
1335 if (default_speed && spd)
1336 *default_speed = *spd;
1337 }
1338 of_node_put(pci);
1339 next_port:
1340 of_node_put(isa);
1341 }
1342
1343 DBG(" <- generic_find_legacy_serial_port()\n");
1344}
1345
1346static struct platform_device serial_device = {
1347 .name = "serial8250",
1348 .id = 0,
1349 .dev = {
1350 .platform_data = serial_ports,
1351 },
1352};
1353
1354static int __init serial_dev_init(void)
1355{
1356 return platform_device_register(&serial_device);
1357}
1358arch_initcall(serial_dev_init);
1359
1360#endif /* CONFIG_PPC_ISERIES */
1361
1362int check_legacy_ioport(unsigned long base_port)
1363{
1364 if (ppc_md.check_legacy_ioport == NULL)
1365 return 0;
1366 return ppc_md.check_legacy_ioport(base_port);
1367}
1368EXPORT_SYMBOL(check_legacy_ioport);
1369
1370#ifdef CONFIG_XMON
1371static int __init early_xmon(char *p)
1372{
1373 /* ensure xmon is enabled */
1374 if (p) {
1375 if (strncmp(p, "on", 2) == 0)
1376 xmon_init();
1377 if (strncmp(p, "early", 5) != 0)
1378 return 0;
1379 }
1380 xmon_init();
1381 debugger(NULL);
1382
1383 return 0;
1384}
1385early_param("xmon", early_xmon);
1386#endif
1387
1388void cpu_die(void)
1389{
1390 if (ppc_md.cpu_die)
1391 ppc_md.cpu_die();
1392}
diff --git a/arch/ppc64/kernel/signal.c b/arch/ppc64/kernel/signal.c
new file mode 100644
index 000000000000..a95a2b49a1d5
--- /dev/null
+++ b/arch/ppc64/kernel/signal.c
@@ -0,0 +1,575 @@
1/*
2 * linux/arch/ppc64/kernel/signal.c
3 *
4 * PowerPC version
5 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
6 *
7 * Derived from "arch/i386/kernel/signal.c"
8 * Copyright (C) 1991, 1992 Linus Torvalds
9 * 1997-11-28 Modified for POSIX.1b signals by Richard Henderson
10 *
11 * This program is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU General Public License
13 * as published by the Free Software Foundation; either version
14 * 2 of the License, or (at your option) any later version.
15 */
16
17#include <linux/config.h>
18#include <linux/sched.h>
19#include <linux/mm.h>
20#include <linux/smp.h>
21#include <linux/smp_lock.h>
22#include <linux/kernel.h>
23#include <linux/signal.h>
24#include <linux/errno.h>
25#include <linux/wait.h>
26#include <linux/unistd.h>
27#include <linux/stddef.h>
28#include <linux/elf.h>
29#include <linux/ptrace.h>
30#include <linux/module.h>
31
32#include <asm/sigcontext.h>
33#include <asm/ucontext.h>
34#include <asm/uaccess.h>
35#include <asm/pgtable.h>
36#include <asm/ppcdebug.h>
37#include <asm/unistd.h>
38#include <asm/cacheflush.h>
39#include <asm/vdso.h>
40
41#define DEBUG_SIG 0
42
43#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
44
45#ifndef MIN
46#define MIN(a,b) (((a) < (b)) ? (a) : (b))
47#endif
48
49#define GP_REGS_SIZE MIN(sizeof(elf_gregset_t), sizeof(struct pt_regs))
50#define FP_REGS_SIZE sizeof(elf_fpregset_t)
51
52#define TRAMP_TRACEBACK 3
53#define TRAMP_SIZE 6
54
55/*
56 * When we have signals to deliver, we set up on the user stack,
57 * going down from the original stack pointer:
58 * 1) a rt_sigframe struct which contains the ucontext
59 * 2) a gap of __SIGNAL_FRAMESIZE bytes which acts as a dummy caller
60 * frame for the signal handler.
61 */
62
63struct rt_sigframe {
64 /* sys_rt_sigreturn requires the ucontext be the first field */
65 struct ucontext uc;
66 unsigned long _unused[2];
67 unsigned int tramp[TRAMP_SIZE];
68 struct siginfo *pinfo;
69 void *puc;
70 struct siginfo info;
71 /* 64 bit ABI allows for 288 bytes below sp before decrementing it. */
72 char abigap[288];
73} __attribute__ ((aligned (16)));
74
75
76/*
77 * Atomically swap in the new signal mask, and wait for a signal.
78 */
79long sys_rt_sigsuspend(sigset_t __user *unewset, size_t sigsetsize, int p3, int p4,
80 int p6, int p7, struct pt_regs *regs)
81{
82 sigset_t saveset, newset;
83
84 /* XXX: Don't preclude handling different sized sigset_t's. */
85 if (sigsetsize != sizeof(sigset_t))
86 return -EINVAL;
87
88 if (copy_from_user(&newset, unewset, sizeof(newset)))
89 return -EFAULT;
90 sigdelsetmask(&newset, ~_BLOCKABLE);
91
92 spin_lock_irq(&current->sighand->siglock);
93 saveset = current->blocked;
94 current->blocked = newset;
95 recalc_sigpending();
96 spin_unlock_irq(&current->sighand->siglock);
97
98 regs->result = -EINTR;
99 regs->gpr[3] = EINTR;
100 regs->ccr |= 0x10000000;
101 while (1) {
102 current->state = TASK_INTERRUPTIBLE;
103 schedule();
104 if (do_signal(&saveset, regs))
105 return 0;
106 }
107}
108
109long sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, unsigned long r5,
110 unsigned long r6, unsigned long r7, unsigned long r8,
111 struct pt_regs *regs)
112{
113 return do_sigaltstack(uss, uoss, regs->gpr[1]);
114}
115
116
117/*
118 * Set up the sigcontext for the signal frame.
119 */
120
121static long setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs,
122 int signr, sigset_t *set, unsigned long handler)
123{
124 /* When CONFIG_ALTIVEC is set, we _always_ setup v_regs even if the
125 * process never used altivec yet (MSR_VEC is zero in pt_regs of
126 * the context). This is very important because we must ensure we
127 * don't lose the VRSAVE content that may have been set prior to
128 * the process doing its first vector operation
129 * Userland shall check AT_HWCAP to know wether it can rely on the
130 * v_regs pointer or not
131 */
132#ifdef CONFIG_ALTIVEC
133 elf_vrreg_t __user *v_regs = (elf_vrreg_t __user *)(((unsigned long)sc->vmx_reserve + 15) & ~0xful);
134#endif
135 long err = 0;
136
137 flush_fp_to_thread(current);
138
139 /* Make sure signal doesn't get spurrious FP exceptions */
140 current->thread.fpscr = 0;
141
142#ifdef CONFIG_ALTIVEC
143 err |= __put_user(v_regs, &sc->v_regs);
144
145 /* save altivec registers */
146 if (current->thread.used_vr) {
147 flush_altivec_to_thread(current);
148 /* Copy 33 vec registers (vr0..31 and vscr) to the stack */
149 err |= __copy_to_user(v_regs, current->thread.vr, 33 * sizeof(vector128));
150 /* set MSR_VEC in the MSR value in the frame to indicate that sc->v_reg)
151 * contains valid data.
152 */
153 regs->msr |= MSR_VEC;
154 }
155 /* We always copy to/from vrsave, it's 0 if we don't have or don't
156 * use altivec.
157 */
158 err |= __put_user(current->thread.vrsave, (u32 __user *)&v_regs[33]);
159#else /* CONFIG_ALTIVEC */
160 err |= __put_user(0, &sc->v_regs);
161#endif /* CONFIG_ALTIVEC */
162 err |= __put_user(&sc->gp_regs, &sc->regs);
163 err |= __copy_to_user(&sc->gp_regs, regs, GP_REGS_SIZE);
164 err |= __copy_to_user(&sc->fp_regs, &current->thread.fpr, FP_REGS_SIZE);
165 err |= __put_user(signr, &sc->signal);
166 err |= __put_user(handler, &sc->handler);
167 if (set != NULL)
168 err |= __put_user(set->sig[0], &sc->oldmask);
169
170 return err;
171}
172
173/*
174 * Restore the sigcontext from the signal frame.
175 */
176
177static long restore_sigcontext(struct pt_regs *regs, sigset_t *set, int sig,
178 struct sigcontext __user *sc)
179{
180#ifdef CONFIG_ALTIVEC
181 elf_vrreg_t __user *v_regs;
182#endif
183 unsigned long err = 0;
184 unsigned long save_r13 = 0;
185 elf_greg_t *gregs = (elf_greg_t *)regs;
186#ifdef CONFIG_ALTIVEC
187 unsigned long msr;
188#endif
189 int i;
190
191 /* If this is not a signal return, we preserve the TLS in r13 */
192 if (!sig)
193 save_r13 = regs->gpr[13];
194
195 /* copy everything before MSR */
196 err |= __copy_from_user(regs, &sc->gp_regs,
197 PT_MSR*sizeof(unsigned long));
198
199 /* skip MSR and SOFTE */
200 for (i = PT_MSR+1; i <= PT_RESULT; i++) {
201 if (i == PT_SOFTE)
202 continue;
203 err |= __get_user(gregs[i], &sc->gp_regs[i]);
204 }
205
206 if (!sig)
207 regs->gpr[13] = save_r13;
208 err |= __copy_from_user(&current->thread.fpr, &sc->fp_regs, FP_REGS_SIZE);
209 if (set != NULL)
210 err |= __get_user(set->sig[0], &sc->oldmask);
211
212#ifdef CONFIG_ALTIVEC
213 err |= __get_user(v_regs, &sc->v_regs);
214 err |= __get_user(msr, &sc->gp_regs[PT_MSR]);
215 if (err)
216 return err;
217 /* Copy 33 vec registers (vr0..31 and vscr) from the stack */
218 if (v_regs != 0 && (msr & MSR_VEC) != 0)
219 err |= __copy_from_user(current->thread.vr, v_regs,
220 33 * sizeof(vector128));
221 else if (current->thread.used_vr)
222 memset(current->thread.vr, 0, 33 * sizeof(vector128));
223 /* Always get VRSAVE back */
224 if (v_regs != 0)
225 err |= __get_user(current->thread.vrsave, (u32 __user *)&v_regs[33]);
226 else
227 current->thread.vrsave = 0;
228#endif /* CONFIG_ALTIVEC */
229
230#ifndef CONFIG_SMP
231 preempt_disable();
232 if (last_task_used_math == current)
233 last_task_used_math = NULL;
234 if (last_task_used_altivec == current)
235 last_task_used_altivec = NULL;
236 preempt_enable();
237#endif
238 /* Force reload of FP/VEC */
239 regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1 | MSR_VEC);
240
241 return err;
242}
243
244/*
245 * Allocate space for the signal frame
246 */
247static inline void __user * get_sigframe(struct k_sigaction *ka, struct pt_regs *regs,
248 size_t frame_size)
249{
250 unsigned long newsp;
251
252 /* Default to using normal stack */
253 newsp = regs->gpr[1];
254
255 if (ka->sa.sa_flags & SA_ONSTACK) {
256 if (! on_sig_stack(regs->gpr[1]))
257 newsp = (current->sas_ss_sp + current->sas_ss_size);
258 }
259
260 return (void __user *)((newsp - frame_size) & -16ul);
261}
262
263/*
264 * Setup the trampoline code on the stack
265 */
266static long setup_trampoline(unsigned int syscall, unsigned int __user *tramp)
267{
268 int i;
269 long err = 0;
270
271 /* addi r1, r1, __SIGNAL_FRAMESIZE # Pop the dummy stackframe */
272 err |= __put_user(0x38210000UL | (__SIGNAL_FRAMESIZE & 0xffff), &tramp[0]);
273 /* li r0, __NR_[rt_]sigreturn| */
274 err |= __put_user(0x38000000UL | (syscall & 0xffff), &tramp[1]);
275 /* sc */
276 err |= __put_user(0x44000002UL, &tramp[2]);
277
278 /* Minimal traceback info */
279 for (i=TRAMP_TRACEBACK; i < TRAMP_SIZE ;i++)
280 err |= __put_user(0, &tramp[i]);
281
282 if (!err)
283 flush_icache_range((unsigned long) &tramp[0],
284 (unsigned long) &tramp[TRAMP_SIZE]);
285
286 return err;
287}
288
289/*
290 * Restore the user process's signal mask (also used by signal32.c)
291 */
292void restore_sigmask(sigset_t *set)
293{
294 sigdelsetmask(set, ~_BLOCKABLE);
295 spin_lock_irq(&current->sighand->siglock);
296 current->blocked = *set;
297 recalc_sigpending();
298 spin_unlock_irq(&current->sighand->siglock);
299}
300
301
302/*
303 * Handle {get,set,swap}_context operations
304 */
305int sys_swapcontext(struct ucontext __user *old_ctx,
306 struct ucontext __user *new_ctx,
307 long ctx_size, long r6, long r7, long r8, struct pt_regs *regs)
308{
309 unsigned char tmp;
310 sigset_t set;
311
312 /* Context size is for future use. Right now, we only make sure
313 * we are passed something we understand
314 */
315 if (ctx_size < sizeof(struct ucontext))
316 return -EINVAL;
317
318 if (old_ctx != NULL) {
319 if (!access_ok(VERIFY_WRITE, old_ctx, sizeof(*old_ctx))
320 || setup_sigcontext(&old_ctx->uc_mcontext, regs, 0, NULL, 0)
321 || __copy_to_user(&old_ctx->uc_sigmask,
322 &current->blocked, sizeof(sigset_t)))
323 return -EFAULT;
324 }
325 if (new_ctx == NULL)
326 return 0;
327 if (!access_ok(VERIFY_READ, new_ctx, sizeof(*new_ctx))
328 || __get_user(tmp, (u8 __user *) new_ctx)
329 || __get_user(tmp, (u8 __user *) (new_ctx + 1) - 1))
330 return -EFAULT;
331
332 /*
333 * If we get a fault copying the context into the kernel's
334 * image of the user's registers, we can't just return -EFAULT
335 * because the user's registers will be corrupted. For instance
336 * the NIP value may have been updated but not some of the
337 * other registers. Given that we have done the access_ok
338 * and successfully read the first and last bytes of the region
339 * above, this should only happen in an out-of-memory situation
340 * or if another thread unmaps the region containing the context.
341 * We kill the task with a SIGSEGV in this situation.
342 */
343
344 if (__copy_from_user(&set, &new_ctx->uc_sigmask, sizeof(set)))
345 do_exit(SIGSEGV);
346 restore_sigmask(&set);
347 if (restore_sigcontext(regs, NULL, 0, &new_ctx->uc_mcontext))
348 do_exit(SIGSEGV);
349
350 /* This returns like rt_sigreturn */
351 return 0;
352}
353
354
355/*
356 * Do a signal return; undo the signal stack.
357 */
358
359int sys_rt_sigreturn(unsigned long r3, unsigned long r4, unsigned long r5,
360 unsigned long r6, unsigned long r7, unsigned long r8,
361 struct pt_regs *regs)
362{
363 struct ucontext __user *uc = (struct ucontext __user *)regs->gpr[1];
364 sigset_t set;
365
366 /* Always make any pending restarted system calls return -EINTR */
367 current_thread_info()->restart_block.fn = do_no_restart_syscall;
368
369 if (!access_ok(VERIFY_READ, uc, sizeof(*uc)))
370 goto badframe;
371
372 if (__copy_from_user(&set, &uc->uc_sigmask, sizeof(set)))
373 goto badframe;
374 restore_sigmask(&set);
375 if (restore_sigcontext(regs, NULL, 1, &uc->uc_mcontext))
376 goto badframe;
377
378 /* do_sigaltstack expects a __user pointer and won't modify
379 * what's in there anyway
380 */
381 do_sigaltstack(&uc->uc_stack, NULL, regs->gpr[1]);
382
383 return regs->result;
384
385badframe:
386#if DEBUG_SIG
387 printk("badframe in sys_rt_sigreturn, regs=%p uc=%p &uc->uc_mcontext=%p\n",
388 regs, uc, &uc->uc_mcontext);
389#endif
390 force_sig(SIGSEGV, current);
391 return 0;
392}
393
394static int setup_rt_frame(int signr, struct k_sigaction *ka, siginfo_t *info,
395 sigset_t *set, struct pt_regs *regs)
396{
397 /* Handler is *really* a pointer to the function descriptor for
398 * the signal routine. The first entry in the function
399 * descriptor is the entry address of signal and the second
400 * entry is the TOC value we need to use.
401 */
402 func_descr_t __user *funct_desc_ptr;
403 struct rt_sigframe __user *frame;
404 unsigned long newsp = 0;
405 long err = 0;
406
407 frame = get_sigframe(ka, regs, sizeof(*frame));
408
409 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
410 goto badframe;
411
412 err |= __put_user(&frame->info, &frame->pinfo);
413 err |= __put_user(&frame->uc, &frame->puc);
414 err |= copy_siginfo_to_user(&frame->info, info);
415 if (err)
416 goto badframe;
417
418 /* Create the ucontext. */
419 err |= __put_user(0, &frame->uc.uc_flags);
420 err |= __put_user(0, &frame->uc.uc_link);
421 err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
422 err |= __put_user(sas_ss_flags(regs->gpr[1]),
423 &frame->uc.uc_stack.ss_flags);
424 err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
425 err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, signr, NULL,
426 (unsigned long)ka->sa.sa_handler);
427 err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
428 if (err)
429 goto badframe;
430
431 /* Set up to return from userspace. */
432 if (vdso64_rt_sigtramp && current->thread.vdso_base) {
433 regs->link = current->thread.vdso_base + vdso64_rt_sigtramp;
434 } else {
435 err |= setup_trampoline(__NR_rt_sigreturn, &frame->tramp[0]);
436 if (err)
437 goto badframe;
438 regs->link = (unsigned long) &frame->tramp[0];
439 }
440 funct_desc_ptr = (func_descr_t __user *) ka->sa.sa_handler;
441
442 /* Allocate a dummy caller frame for the signal handler. */
443 newsp = (unsigned long)frame - __SIGNAL_FRAMESIZE;
444 err |= put_user(regs->gpr[1], (unsigned long __user *)newsp);
445
446 /* Set up "regs" so we "return" to the signal handler. */
447 err |= get_user(regs->nip, &funct_desc_ptr->entry);
448 regs->gpr[1] = newsp;
449 err |= get_user(regs->gpr[2], &funct_desc_ptr->toc);
450 regs->gpr[3] = signr;
451 regs->result = 0;
452 if (ka->sa.sa_flags & SA_SIGINFO) {
453 err |= get_user(regs->gpr[4], (unsigned long __user *)&frame->pinfo);
454 err |= get_user(regs->gpr[5], (unsigned long __user *)&frame->puc);
455 regs->gpr[6] = (unsigned long) frame;
456 } else {
457 regs->gpr[4] = (unsigned long)&frame->uc.uc_mcontext;
458 }
459 if (err)
460 goto badframe;
461
462 if (test_thread_flag(TIF_SINGLESTEP))
463 ptrace_notify(SIGTRAP);
464
465 return 1;
466
467badframe:
468#if DEBUG_SIG
469 printk("badframe in setup_rt_frame, regs=%p frame=%p newsp=%lx\n",
470 regs, frame, newsp);
471#endif
472 force_sigsegv(signr, current);
473 return 0;
474}
475
476
477/*
478 * OK, we're invoking a handler
479 */
480static int handle_signal(unsigned long sig, struct k_sigaction *ka,
481 siginfo_t *info, sigset_t *oldset, struct pt_regs *regs)
482{
483 int ret;
484
485 /* Set up Signal Frame */
486 ret = setup_rt_frame(sig, ka, info, oldset, regs);
487
488 if (ret && !(ka->sa.sa_flags & SA_NODEFER)) {
489 spin_lock_irq(&current->sighand->siglock);
490 sigorsets(&current->blocked, &current->blocked, &ka->sa.sa_mask);
491 sigaddset(&current->blocked,sig);
492 recalc_sigpending();
493 spin_unlock_irq(&current->sighand->siglock);
494 }
495
496 return ret;
497}
498
499static inline void syscall_restart(struct pt_regs *regs, struct k_sigaction *ka)
500{
501 switch ((int)regs->result) {
502 case -ERESTART_RESTARTBLOCK:
503 case -ERESTARTNOHAND:
504 /* ERESTARTNOHAND means that the syscall should only be
505 * restarted if there was no handler for the signal, and since
506 * we only get here if there is a handler, we dont restart.
507 */
508 regs->result = -EINTR;
509 break;
510 case -ERESTARTSYS:
511 /* ERESTARTSYS means to restart the syscall if there is no
512 * handler or the handler was registered with SA_RESTART
513 */
514 if (!(ka->sa.sa_flags & SA_RESTART)) {
515 regs->result = -EINTR;
516 break;
517 }
518 /* fallthrough */
519 case -ERESTARTNOINTR:
520 /* ERESTARTNOINTR means that the syscall should be
521 * called again after the signal handler returns.
522 */
523 regs->gpr[3] = regs->orig_gpr3;
524 regs->nip -= 4;
525 regs->result = 0;
526 break;
527 }
528}
529
530/*
531 * Note that 'init' is a special process: it doesn't get signals it doesn't
532 * want to handle. Thus you cannot kill init even with a SIGKILL even by
533 * mistake.
534 */
535int do_signal(sigset_t *oldset, struct pt_regs *regs)
536{
537 siginfo_t info;
538 int signr;
539 struct k_sigaction ka;
540
541 /*
542 * If the current thread is 32 bit - invoke the
543 * 32 bit signal handling code
544 */
545 if (test_thread_flag(TIF_32BIT))
546 return do_signal32(oldset, regs);
547
548 if (!oldset)
549 oldset = &current->blocked;
550
551 signr = get_signal_to_deliver(&info, &ka, regs, NULL);
552 if (signr > 0) {
553 /* Whee! Actually deliver the signal. */
554 if (TRAP(regs) == 0x0C00)
555 syscall_restart(regs, &ka);
556 return handle_signal(signr, &ka, &info, oldset, regs);
557 }
558
559 if (TRAP(regs) == 0x0C00) { /* System Call! */
560 if ((int)regs->result == -ERESTARTNOHAND ||
561 (int)regs->result == -ERESTARTSYS ||
562 (int)regs->result == -ERESTARTNOINTR) {
563 regs->gpr[3] = regs->orig_gpr3;
564 regs->nip -= 4; /* Back up & retry system call */
565 regs->result = 0;
566 } else if ((int)regs->result == -ERESTART_RESTARTBLOCK) {
567 regs->gpr[0] = __NR_restart_syscall;
568 regs->nip -= 4;
569 regs->result = 0;
570 }
571 }
572
573 return 0;
574}
575EXPORT_SYMBOL(do_signal);
diff --git a/arch/ppc64/kernel/signal32.c b/arch/ppc64/kernel/signal32.c
new file mode 100644
index 000000000000..b0e167db6af9
--- /dev/null
+++ b/arch/ppc64/kernel/signal32.c
@@ -0,0 +1,989 @@
1/*
2 * signal32.c: Support 32bit signal syscalls.
3 *
4 * Copyright (C) 2001 IBM
5 * Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
6 * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
7 *
8 * These routines maintain argument size conversion between 32bit and 64bit
9 * environment.
10 *
11 * This program is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU General Public License
13 * as published by the Free Software Foundation; either version
14 * 2 of the License, or (at your option) any later version.
15 */
16
17#include <linux/config.h>
18#include <linux/sched.h>
19#include <linux/mm.h>
20#include <linux/smp.h>
21#include <linux/smp_lock.h>
22#include <linux/kernel.h>
23#include <linux/signal.h>
24#include <linux/syscalls.h>
25#include <linux/errno.h>
26#include <linux/elf.h>
27#include <linux/compat.h>
28#include <linux/ptrace.h>
29#include <asm/ppc32.h>
30#include <asm/uaccess.h>
31#include <asm/ppcdebug.h>
32#include <asm/unistd.h>
33#include <asm/cacheflush.h>
34#include <asm/vdso.h>
35
36#define DEBUG_SIG 0
37
38#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
39
40#define GP_REGS_SIZE32 min(sizeof(elf_gregset_t32), sizeof(struct pt_regs32))
41
42/*
43 * When we have signals to deliver, we set up on the
44 * user stack, going down from the original stack pointer:
45 * a sigregs32 struct
46 * a sigcontext32 struct
47 * a gap of __SIGNAL_FRAMESIZE32 bytes
48 *
49 * Each of these things must be a multiple of 16 bytes in size.
50 *
51 */
52struct sigregs32 {
53 struct mcontext32 mctx; /* all the register values */
54 /*
55 * Programs using the rs6000/xcoff abi can save up to 19 gp
56 * regs and 18 fp regs below sp before decrementing it.
57 */
58 int abigap[56];
59};
60
61/* We use the mc_pad field for the signal return trampoline. */
62#define tramp mc_pad
63
64/*
65 * When we have rt signals to deliver, we set up on the
66 * user stack, going down from the original stack pointer:
67 * one rt_sigframe32 struct (siginfo + ucontext + ABI gap)
68 * a gap of __SIGNAL_FRAMESIZE32+16 bytes
69 * (the +16 is to get the siginfo and ucontext32 in the same
70 * positions as in older kernels).
71 *
72 * Each of these things must be a multiple of 16 bytes in size.
73 *
74 */
75struct rt_sigframe32 {
76 compat_siginfo_t info;
77 struct ucontext32 uc;
78 /*
79 * Programs using the rs6000/xcoff abi can save up to 19 gp
80 * regs and 18 fp regs below sp before decrementing it.
81 */
82 int abigap[56];
83};
84
85
86/*
87 * Common utility functions used by signal and context support
88 *
89 */
90
91/*
92 * Restore the user process's signal mask
93 * (implemented in signal.c)
94 */
95extern void restore_sigmask(sigset_t *set);
96
97/*
98 * Functions for flipping sigsets (thanks to brain dead generic
99 * implementation that makes things simple for little endian only
100 */
101static inline void compat_from_sigset(compat_sigset_t *compat, sigset_t *set)
102{
103 switch (_NSIG_WORDS) {
104 case 4: compat->sig[5] = set->sig[3] & 0xffffffffull ;
105 compat->sig[7] = set->sig[3] >> 32;
106 case 3: compat->sig[4] = set->sig[2] & 0xffffffffull ;
107 compat->sig[5] = set->sig[2] >> 32;
108 case 2: compat->sig[2] = set->sig[1] & 0xffffffffull ;
109 compat->sig[3] = set->sig[1] >> 32;
110 case 1: compat->sig[0] = set->sig[0] & 0xffffffffull ;
111 compat->sig[1] = set->sig[0] >> 32;
112 }
113}
114
115static inline void sigset_from_compat(sigset_t *set, compat_sigset_t *compat)
116{
117 switch (_NSIG_WORDS) {
118 case 4: set->sig[3] = compat->sig[6] | (((long)compat->sig[7]) << 32);
119 case 3: set->sig[2] = compat->sig[4] | (((long)compat->sig[5]) << 32);
120 case 2: set->sig[1] = compat->sig[2] | (((long)compat->sig[3]) << 32);
121 case 1: set->sig[0] = compat->sig[0] | (((long)compat->sig[1]) << 32);
122 }
123}
124
125
126/*
127 * Save the current user registers on the user stack.
128 * We only save the altivec registers if the process has used
129 * altivec instructions at some point.
130 */
131static int save_user_regs(struct pt_regs *regs, struct mcontext32 __user *frame, int sigret)
132{
133 elf_greg_t64 *gregs = (elf_greg_t64 *)regs;
134 int i, err = 0;
135
136 /* Make sure floating point registers are stored in regs */
137 flush_fp_to_thread(current);
138
139 /* save general and floating-point registers */
140 for (i = 0; i <= PT_RESULT; i ++)
141 err |= __put_user((unsigned int)gregs[i], &frame->mc_gregs[i]);
142 err |= __copy_to_user(&frame->mc_fregs, current->thread.fpr,
143 ELF_NFPREG * sizeof(double));
144 if (err)
145 return 1;
146
147 current->thread.fpscr = 0; /* turn off all fp exceptions */
148
149#ifdef CONFIG_ALTIVEC
150 /* save altivec registers */
151 if (current->thread.used_vr) {
152 flush_altivec_to_thread(current);
153 if (__copy_to_user(&frame->mc_vregs, current->thread.vr,
154 ELF_NVRREG32 * sizeof(vector128)))
155 return 1;
156 /* set MSR_VEC in the saved MSR value to indicate that
157 frame->mc_vregs contains valid data */
158 if (__put_user(regs->msr | MSR_VEC, &frame->mc_gregs[PT_MSR]))
159 return 1;
160 }
161 /* else assert((regs->msr & MSR_VEC) == 0) */
162
163 /* We always copy to/from vrsave, it's 0 if we don't have or don't
164 * use altivec. Since VSCR only contains 32 bits saved in the least
165 * significant bits of a vector, we "cheat" and stuff VRSAVE in the
166 * most significant bits of that same vector. --BenH
167 */
168 if (__put_user(current->thread.vrsave, (u32 __user *)&frame->mc_vregs[32]))
169 return 1;
170#endif /* CONFIG_ALTIVEC */
171
172 if (sigret) {
173 /* Set up the sigreturn trampoline: li r0,sigret; sc */
174 if (__put_user(0x38000000UL + sigret, &frame->tramp[0])
175 || __put_user(0x44000002UL, &frame->tramp[1]))
176 return 1;
177 flush_icache_range((unsigned long) &frame->tramp[0],
178 (unsigned long) &frame->tramp[2]);
179 }
180
181 return 0;
182}
183
184/*
185 * Restore the current user register values from the user stack,
186 * (except for MSR).
187 */
188static long restore_user_regs(struct pt_regs *regs,
189 struct mcontext32 __user *sr, int sig)
190{
191 elf_greg_t64 *gregs = (elf_greg_t64 *)regs;
192 int i;
193 long err = 0;
194 unsigned int save_r2 = 0;
195#ifdef CONFIG_ALTIVEC
196 unsigned long msr;
197#endif
198
199 /*
200 * restore general registers but not including MSR or SOFTE. Also
201 * take care of keeping r2 (TLS) intact if not a signal
202 */
203 if (!sig)
204 save_r2 = (unsigned int)regs->gpr[2];
205 for (i = 0; i <= PT_RESULT; i++) {
206 if ((i == PT_MSR) || (i == PT_SOFTE))
207 continue;
208 err |= __get_user(gregs[i], &sr->mc_gregs[i]);
209 }
210 if (!sig)
211 regs->gpr[2] = (unsigned long) save_r2;
212 if (err)
213 return 1;
214
215 /* force the process to reload the FP registers from
216 current->thread when it next does FP instructions */
217 regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1);
218 if (__copy_from_user(current->thread.fpr, &sr->mc_fregs,
219 sizeof(sr->mc_fregs)))
220 return 1;
221
222#ifdef CONFIG_ALTIVEC
223 /* force the process to reload the altivec registers from
224 current->thread when it next does altivec instructions */
225 regs->msr &= ~MSR_VEC;
226 if (!__get_user(msr, &sr->mc_gregs[PT_MSR]) && (msr & MSR_VEC) != 0) {
227 /* restore altivec registers from the stack */
228 if (__copy_from_user(current->thread.vr, &sr->mc_vregs,
229 sizeof(sr->mc_vregs)))
230 return 1;
231 } else if (current->thread.used_vr)
232 memset(current->thread.vr, 0, ELF_NVRREG32 * sizeof(vector128));
233
234 /* Always get VRSAVE back */
235 if (__get_user(current->thread.vrsave, (u32 __user *)&sr->mc_vregs[32]))
236 return 1;
237#endif /* CONFIG_ALTIVEC */
238
239#ifndef CONFIG_SMP
240 preempt_disable();
241 if (last_task_used_math == current)
242 last_task_used_math = NULL;
243 if (last_task_used_altivec == current)
244 last_task_used_altivec = NULL;
245 preempt_enable();
246#endif
247 return 0;
248}
249
250
251/*
252 * Start of nonRT signal support
253 *
254 * sigset_t is 32 bits for non-rt signals
255 *
256 * System Calls
257 * sigaction sys32_sigaction
258 * sigreturn sys32_sigreturn
259 *
260 * Note sigsuspend has no special 32 bit routine - uses the 64 bit routine
261 *
262 * Other routines
263 * setup_frame32
264 */
265
266/*
267 * Atomically swap in the new signal mask, and wait for a signal.
268 */
269long sys32_sigsuspend(old_sigset_t mask, int p2, int p3, int p4, int p6, int p7,
270 struct pt_regs *regs)
271{
272 sigset_t saveset;
273
274 mask &= _BLOCKABLE;
275 spin_lock_irq(&current->sighand->siglock);
276 saveset = current->blocked;
277 siginitset(&current->blocked, mask);
278 recalc_sigpending();
279 spin_unlock_irq(&current->sighand->siglock);
280
281 regs->result = -EINTR;
282 regs->gpr[3] = EINTR;
283 regs->ccr |= 0x10000000;
284 while (1) {
285 current->state = TASK_INTERRUPTIBLE;
286 schedule();
287 if (do_signal32(&saveset, regs))
288 /*
289 * Returning 0 means we return to userspace via
290 * ret_from_except and thus restore all user
291 * registers from *regs. This is what we need
292 * to do when a signal has been delivered.
293 */
294 return 0;
295 }
296}
297
298long sys32_sigaction(int sig, struct old_sigaction32 __user *act,
299 struct old_sigaction32 __user *oact)
300{
301 struct k_sigaction new_ka, old_ka;
302 int ret;
303
304 if (sig < 0)
305 sig = -sig;
306
307 if (act) {
308 compat_old_sigset_t mask;
309 compat_uptr_t handler, restorer;
310
311 if (get_user(handler, &act->sa_handler) ||
312 __get_user(restorer, &act->sa_restorer) ||
313 __get_user(new_ka.sa.sa_flags, &act->sa_flags) ||
314 __get_user(mask, &act->sa_mask))
315 return -EFAULT;
316 new_ka.sa.sa_handler = compat_ptr(handler);
317 new_ka.sa.sa_restorer = compat_ptr(restorer);
318 siginitset(&new_ka.sa.sa_mask, mask);
319 }
320
321 ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
322 if (!ret && oact) {
323 if (put_user((long)old_ka.sa.sa_handler, &oact->sa_handler) ||
324 __put_user((long)old_ka.sa.sa_restorer, &oact->sa_restorer) ||
325 __put_user(old_ka.sa.sa_flags, &oact->sa_flags) ||
326 __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask))
327 return -EFAULT;
328 }
329
330 return ret;
331}
332
333
334
335/*
336 * Start of RT signal support
337 *
338 * sigset_t is 64 bits for rt signals
339 *
340 * System Calls
341 * sigaction sys32_rt_sigaction
342 * sigpending sys32_rt_sigpending
343 * sigprocmask sys32_rt_sigprocmask
344 * sigreturn sys32_rt_sigreturn
345 * sigqueueinfo sys32_rt_sigqueueinfo
346 * sigsuspend sys32_rt_sigsuspend
347 *
348 * Other routines
349 * setup_rt_frame32
350 * copy_siginfo_to_user32
351 * siginfo32to64
352 */
353
354
355long sys32_rt_sigaction(int sig, const struct sigaction32 __user *act,
356 struct sigaction32 __user *oact, size_t sigsetsize)
357{
358 struct k_sigaction new_ka, old_ka;
359 int ret;
360 compat_sigset_t set32;
361
362 /* XXX: Don't preclude handling different sized sigset_t's. */
363 if (sigsetsize != sizeof(compat_sigset_t))
364 return -EINVAL;
365
366 if (act) {
367 compat_uptr_t handler;
368
369 ret = get_user(handler, &act->sa_handler);
370 new_ka.sa.sa_handler = compat_ptr(handler);
371 ret |= __copy_from_user(&set32, &act->sa_mask,
372 sizeof(compat_sigset_t));
373 sigset_from_compat(&new_ka.sa.sa_mask, &set32);
374 ret |= __get_user(new_ka.sa.sa_flags, &act->sa_flags);
375 if (ret)
376 return -EFAULT;
377 }
378
379 ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
380 if (!ret && oact) {
381 compat_from_sigset(&set32, &old_ka.sa.sa_mask);
382 ret = put_user((long)old_ka.sa.sa_handler, &oact->sa_handler);
383 ret |= __copy_to_user(&oact->sa_mask, &set32,
384 sizeof(compat_sigset_t));
385 ret |= __put_user(old_ka.sa.sa_flags, &oact->sa_flags);
386 }
387 return ret;
388}
389
390/*
391 * Note: it is necessary to treat how as an unsigned int, with the
392 * corresponding cast to a signed int to insure that the proper
393 * conversion (sign extension) between the register representation
394 * of a signed int (msr in 32-bit mode) and the register representation
395 * of a signed int (msr in 64-bit mode) is performed.
396 */
397long sys32_rt_sigprocmask(u32 how, compat_sigset_t __user *set,
398 compat_sigset_t __user *oset, size_t sigsetsize)
399{
400 sigset_t s;
401 sigset_t __user *up;
402 compat_sigset_t s32;
403 int ret;
404 mm_segment_t old_fs = get_fs();
405
406 if (set) {
407 if (copy_from_user (&s32, set, sizeof(compat_sigset_t)))
408 return -EFAULT;
409 sigset_from_compat(&s, &s32);
410 }
411
412 set_fs(KERNEL_DS);
413 /* This is valid because of the set_fs() */
414 up = (sigset_t __user *) &s;
415 ret = sys_rt_sigprocmask((int)how, set ? up : NULL, oset ? up : NULL,
416 sigsetsize);
417 set_fs(old_fs);
418 if (ret)
419 return ret;
420 if (oset) {
421 compat_from_sigset(&s32, &s);
422 if (copy_to_user (oset, &s32, sizeof(compat_sigset_t)))
423 return -EFAULT;
424 }
425 return 0;
426}
427
428long sys32_rt_sigpending(compat_sigset_t __user *set, compat_size_t sigsetsize)
429{
430 sigset_t s;
431 compat_sigset_t s32;
432 int ret;
433 mm_segment_t old_fs = get_fs();
434
435 set_fs(KERNEL_DS);
436 /* The __user pointer cast is valid because of the set_fs() */
437 ret = sys_rt_sigpending((sigset_t __user *) &s, sigsetsize);
438 set_fs(old_fs);
439 if (!ret) {
440 compat_from_sigset(&s32, &s);
441 if (copy_to_user (set, &s32, sizeof(compat_sigset_t)))
442 return -EFAULT;
443 }
444 return ret;
445}
446
447
448int copy_siginfo_to_user32(struct compat_siginfo __user *d, siginfo_t *s)
449{
450 int err;
451
452 if (!access_ok (VERIFY_WRITE, d, sizeof(*d)))
453 return -EFAULT;
454
455 /* If you change siginfo_t structure, please be sure
456 * this code is fixed accordingly.
457 * It should never copy any pad contained in the structure
458 * to avoid security leaks, but must copy the generic
459 * 3 ints plus the relevant union member.
460 * This routine must convert siginfo from 64bit to 32bit as well
461 * at the same time.
462 */
463 err = __put_user(s->si_signo, &d->si_signo);
464 err |= __put_user(s->si_errno, &d->si_errno);
465 err |= __put_user((short)s->si_code, &d->si_code);
466 if (s->si_code < 0)
467 err |= __copy_to_user(&d->_sifields._pad, &s->_sifields._pad,
468 SI_PAD_SIZE32);
469 else switch(s->si_code >> 16) {
470 case __SI_CHLD >> 16:
471 err |= __put_user(s->si_pid, &d->si_pid);
472 err |= __put_user(s->si_uid, &d->si_uid);
473 err |= __put_user(s->si_utime, &d->si_utime);
474 err |= __put_user(s->si_stime, &d->si_stime);
475 err |= __put_user(s->si_status, &d->si_status);
476 break;
477 case __SI_FAULT >> 16:
478 err |= __put_user((unsigned int)(unsigned long)s->si_addr,
479 &d->si_addr);
480 break;
481 case __SI_POLL >> 16:
482 err |= __put_user(s->si_band, &d->si_band);
483 err |= __put_user(s->si_fd, &d->si_fd);
484 break;
485 case __SI_TIMER >> 16:
486 err |= __put_user(s->si_tid, &d->si_tid);
487 err |= __put_user(s->si_overrun, &d->si_overrun);
488 err |= __put_user(s->si_int, &d->si_int);
489 break;
490 case __SI_RT >> 16: /* This is not generated by the kernel as of now. */
491 case __SI_MESGQ >> 16:
492 err |= __put_user(s->si_int, &d->si_int);
493 /* fallthrough */
494 case __SI_KILL >> 16:
495 default:
496 err |= __put_user(s->si_pid, &d->si_pid);
497 err |= __put_user(s->si_uid, &d->si_uid);
498 break;
499 }
500 return err;
501}
502
503/*
504 * Note: it is necessary to treat pid and sig as unsigned ints, with the
505 * corresponding cast to a signed int to insure that the proper conversion
506 * (sign extension) between the register representation of a signed int
507 * (msr in 32-bit mode) and the register representation of a signed int
508 * (msr in 64-bit mode) is performed.
509 */
510long sys32_rt_sigqueueinfo(u32 pid, u32 sig, compat_siginfo_t __user *uinfo)
511{
512 siginfo_t info;
513 int ret;
514 mm_segment_t old_fs = get_fs();
515
516 if (copy_from_user (&info, uinfo, 3*sizeof(int)) ||
517 copy_from_user (info._sifields._pad, uinfo->_sifields._pad, SI_PAD_SIZE32))
518 return -EFAULT;
519 set_fs (KERNEL_DS);
520 /* The __user pointer cast is valid becasuse of the set_fs() */
521 ret = sys_rt_sigqueueinfo((int)pid, (int)sig, (siginfo_t __user *) &info);
522 set_fs (old_fs);
523 return ret;
524}
525
526int sys32_rt_sigsuspend(compat_sigset_t __user * unewset, size_t sigsetsize, int p3,
527 int p4, int p6, int p7, struct pt_regs *regs)
528{
529 sigset_t saveset, newset;
530 compat_sigset_t s32;
531
532 /* XXX: Don't preclude handling different sized sigset_t's. */
533 if (sigsetsize != sizeof(sigset_t))
534 return -EINVAL;
535
536 if (copy_from_user(&s32, unewset, sizeof(s32)))
537 return -EFAULT;
538
539 /*
540 * Swap the 2 words of the 64-bit sigset_t (they are stored
541 * in the "wrong" endian in 32-bit user storage).
542 */
543 sigset_from_compat(&newset, &s32);
544
545 sigdelsetmask(&newset, ~_BLOCKABLE);
546 spin_lock_irq(&current->sighand->siglock);
547 saveset = current->blocked;
548 current->blocked = newset;
549 recalc_sigpending();
550 spin_unlock_irq(&current->sighand->siglock);
551
552 regs->result = -EINTR;
553 regs->gpr[3] = EINTR;
554 regs->ccr |= 0x10000000;
555 while (1) {
556 current->state = TASK_INTERRUPTIBLE;
557 schedule();
558 if (do_signal32(&saveset, regs))
559 /*
560 * Returning 0 means we return to userspace via
561 * ret_from_except and thus restore all user
562 * registers from *regs. This is what we need
563 * to do when a signal has been delivered.
564 */
565 return 0;
566 }
567}
568
569/*
570 * Start Alternate signal stack support
571 *
572 * System Calls
573 * sigaltatck sys32_sigaltstack
574 */
575
576int sys32_sigaltstack(u32 __new, u32 __old, int r5,
577 int r6, int r7, int r8, struct pt_regs *regs)
578{
579 stack_32_t __user * newstack = (stack_32_t __user *)(long) __new;
580 stack_32_t __user * oldstack = (stack_32_t __user *)(long) __old;
581 stack_t uss, uoss;
582 int ret;
583 mm_segment_t old_fs;
584 unsigned long sp;
585 compat_uptr_t ss_sp;
586
587 /*
588 * set sp to the user stack on entry to the system call
589 * the system call router sets R9 to the saved registers
590 */
591 sp = regs->gpr[1];
592
593 /* Put new stack info in local 64 bit stack struct */
594 if (newstack) {
595 if (get_user(ss_sp, &newstack->ss_sp) ||
596 __get_user(uss.ss_flags, &newstack->ss_flags) ||
597 __get_user(uss.ss_size, &newstack->ss_size))
598 return -EFAULT;
599 uss.ss_sp = compat_ptr(ss_sp);
600 }
601
602 old_fs = get_fs();
603 set_fs(KERNEL_DS);
604 /* The __user pointer casts are valid because of the set_fs() */
605 ret = do_sigaltstack(
606 newstack ? (stack_t __user *) &uss : NULL,
607 oldstack ? (stack_t __user *) &uoss : NULL,
608 sp);
609 set_fs(old_fs);
610 /* Copy the stack information to the user output buffer */
611 if (!ret && oldstack &&
612 (put_user((long)uoss.ss_sp, &oldstack->ss_sp) ||
613 __put_user(uoss.ss_flags, &oldstack->ss_flags) ||
614 __put_user(uoss.ss_size, &oldstack->ss_size)))
615 return -EFAULT;
616 return ret;
617}
618
619
620/*
621 * Set up a signal frame for a "real-time" signal handler
622 * (one which gets siginfo).
623 */
624static int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka,
625 siginfo_t *info, sigset_t *oldset,
626 struct pt_regs * regs, unsigned long newsp)
627{
628 struct rt_sigframe32 __user *rt_sf;
629 struct mcontext32 __user *frame;
630 unsigned long origsp = newsp;
631 compat_sigset_t c_oldset;
632
633 /* Set up Signal Frame */
634 /* Put a Real Time Context onto stack */
635 newsp -= sizeof(*rt_sf);
636 rt_sf = (struct rt_sigframe32 __user *)newsp;
637
638 /* create a stack frame for the caller of the handler */
639 newsp -= __SIGNAL_FRAMESIZE32 + 16;
640
641 if (!access_ok(VERIFY_WRITE, (void __user *)newsp, origsp - newsp))
642 goto badframe;
643
644 compat_from_sigset(&c_oldset, oldset);
645
646 /* Put the siginfo & fill in most of the ucontext */
647 if (copy_siginfo_to_user32(&rt_sf->info, info)
648 || __put_user(0, &rt_sf->uc.uc_flags)
649 || __put_user(0, &rt_sf->uc.uc_link)
650 || __put_user(current->sas_ss_sp, &rt_sf->uc.uc_stack.ss_sp)
651 || __put_user(sas_ss_flags(regs->gpr[1]),
652 &rt_sf->uc.uc_stack.ss_flags)
653 || __put_user(current->sas_ss_size, &rt_sf->uc.uc_stack.ss_size)
654 || __put_user((u32)(u64)&rt_sf->uc.uc_mcontext, &rt_sf->uc.uc_regs)
655 || __copy_to_user(&rt_sf->uc.uc_sigmask, &c_oldset, sizeof(c_oldset)))
656 goto badframe;
657
658 /* Save user registers on the stack */
659 frame = &rt_sf->uc.uc_mcontext;
660 if (put_user(regs->gpr[1], (unsigned long __user *)newsp))
661 goto badframe;
662
663 if (vdso32_rt_sigtramp && current->thread.vdso_base) {
664 if (save_user_regs(regs, frame, 0))
665 goto badframe;
666 regs->link = current->thread.vdso_base + vdso32_rt_sigtramp;
667 } else {
668 if (save_user_regs(regs, frame, __NR_rt_sigreturn))
669 goto badframe;
670 regs->link = (unsigned long) frame->tramp;
671 }
672 regs->gpr[1] = (unsigned long) newsp;
673 regs->gpr[3] = sig;
674 regs->gpr[4] = (unsigned long) &rt_sf->info;
675 regs->gpr[5] = (unsigned long) &rt_sf->uc;
676 regs->gpr[6] = (unsigned long) rt_sf;
677 regs->nip = (unsigned long) ka->sa.sa_handler;
678 regs->trap = 0;
679 regs->result = 0;
680
681 if (test_thread_flag(TIF_SINGLESTEP))
682 ptrace_notify(SIGTRAP);
683
684 return 1;
685
686badframe:
687#if DEBUG_SIG
688 printk("badframe in handle_rt_signal, regs=%p frame=%p newsp=%lx\n",
689 regs, frame, newsp);
690#endif
691 force_sigsegv(sig, current);
692 return 0;
693}
694
695static long do_setcontext32(struct ucontext32 __user *ucp, struct pt_regs *regs, int sig)
696{
697 compat_sigset_t c_set;
698 sigset_t set;
699 u32 mcp;
700
701 if (__copy_from_user(&c_set, &ucp->uc_sigmask, sizeof(c_set))
702 || __get_user(mcp, &ucp->uc_regs))
703 return -EFAULT;
704 sigset_from_compat(&set, &c_set);
705 restore_sigmask(&set);
706 if (restore_user_regs(regs, (struct mcontext32 __user *)(u64)mcp, sig))
707 return -EFAULT;
708
709 return 0;
710}
711
712/*
713 * Handle {get,set,swap}_context operations for 32 bits processes
714 */
715
716long sys32_swapcontext(struct ucontext32 __user *old_ctx,
717 struct ucontext32 __user *new_ctx,
718 int ctx_size, int r6, int r7, int r8, struct pt_regs *regs)
719{
720 unsigned char tmp;
721 compat_sigset_t c_set;
722
723 /* Context size is for future use. Right now, we only make sure
724 * we are passed something we understand
725 */
726 if (ctx_size < sizeof(struct ucontext32))
727 return -EINVAL;
728
729 if (old_ctx != NULL) {
730 compat_from_sigset(&c_set, &current->blocked);
731 if (!access_ok(VERIFY_WRITE, old_ctx, sizeof(*old_ctx))
732 || save_user_regs(regs, &old_ctx->uc_mcontext, 0)
733 || __copy_to_user(&old_ctx->uc_sigmask, &c_set, sizeof(c_set))
734 || __put_user((u32)(u64)&old_ctx->uc_mcontext, &old_ctx->uc_regs))
735 return -EFAULT;
736 }
737 if (new_ctx == NULL)
738 return 0;
739 if (!access_ok(VERIFY_READ, new_ctx, sizeof(*new_ctx))
740 || __get_user(tmp, (u8 __user *) new_ctx)
741 || __get_user(tmp, (u8 __user *) (new_ctx + 1) - 1))
742 return -EFAULT;
743
744 /*
745 * If we get a fault copying the context into the kernel's
746 * image of the user's registers, we can't just return -EFAULT
747 * because the user's registers will be corrupted. For instance
748 * the NIP value may have been updated but not some of the
749 * other registers. Given that we have done the access_ok
750 * and successfully read the first and last bytes of the region
751 * above, this should only happen in an out-of-memory situation
752 * or if another thread unmaps the region containing the context.
753 * We kill the task with a SIGSEGV in this situation.
754 */
755 if (do_setcontext32(new_ctx, regs, 0))
756 do_exit(SIGSEGV);
757
758 return 0;
759}
760
761long sys32_rt_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8,
762 struct pt_regs *regs)
763{
764 struct rt_sigframe32 __user *rt_sf;
765 int ret;
766
767
768 /* Always make any pending restarted system calls return -EINTR */
769 current_thread_info()->restart_block.fn = do_no_restart_syscall;
770
771 rt_sf = (struct rt_sigframe32 __user *)
772 (regs->gpr[1] + __SIGNAL_FRAMESIZE32 + 16);
773 if (!access_ok(VERIFY_READ, rt_sf, sizeof(*rt_sf)))
774 goto bad;
775 if (do_setcontext32(&rt_sf->uc, regs, 1))
776 goto bad;
777
778 /*
779 * It's not clear whether or why it is desirable to save the
780 * sigaltstack setting on signal delivery and restore it on
781 * signal return. But other architectures do this and we have
782 * always done it up until now so it is probably better not to
783 * change it. -- paulus
784 * We use the sys32_ version that does the 32/64 bits conversion
785 * and takes userland pointer directly. What about error checking ?
786 * nobody does any...
787 */
788 sys32_sigaltstack((u32)(u64)&rt_sf->uc.uc_stack, 0, 0, 0, 0, 0, regs);
789
790 ret = regs->result;
791
792 return ret;
793
794 bad:
795 force_sig(SIGSEGV, current);
796 return 0;
797}
798
799
800/*
801 * OK, we're invoking a handler
802 */
803static int handle_signal32(unsigned long sig, struct k_sigaction *ka,
804 siginfo_t *info, sigset_t *oldset,
805 struct pt_regs * regs, unsigned long newsp)
806{
807 struct sigcontext32 __user *sc;
808 struct sigregs32 __user *frame;
809 unsigned long origsp = newsp;
810
811 /* Set up Signal Frame */
812 newsp -= sizeof(struct sigregs32);
813 frame = (struct sigregs32 __user *) newsp;
814
815 /* Put a sigcontext on the stack */
816 newsp -= sizeof(*sc);
817 sc = (struct sigcontext32 __user *) newsp;
818
819 /* create a stack frame for the caller of the handler */
820 newsp -= __SIGNAL_FRAMESIZE32;
821
822 if (!access_ok(VERIFY_WRITE, (void __user *) newsp, origsp - newsp))
823 goto badframe;
824
825#if _NSIG != 64
826#error "Please adjust handle_signal32()"
827#endif
828 if (__put_user((u32)(u64)ka->sa.sa_handler, &sc->handler)
829 || __put_user(oldset->sig[0], &sc->oldmask)
830 || __put_user((oldset->sig[0] >> 32), &sc->_unused[3])
831 || __put_user((u32)(u64)frame, &sc->regs)
832 || __put_user(sig, &sc->signal))
833 goto badframe;
834
835 if (vdso32_sigtramp && current->thread.vdso_base) {
836 if (save_user_regs(regs, &frame->mctx, 0))
837 goto badframe;
838 regs->link = current->thread.vdso_base + vdso32_sigtramp;
839 } else {
840 if (save_user_regs(regs, &frame->mctx, __NR_sigreturn))
841 goto badframe;
842 regs->link = (unsigned long) frame->mctx.tramp;
843 }
844
845 if (put_user(regs->gpr[1], (unsigned long __user *)newsp))
846 goto badframe;
847 regs->gpr[1] = (unsigned long) newsp;
848 regs->gpr[3] = sig;
849 regs->gpr[4] = (unsigned long) sc;
850 regs->nip = (unsigned long) ka->sa.sa_handler;
851 regs->trap = 0;
852 regs->result = 0;
853
854 if (test_thread_flag(TIF_SINGLESTEP))
855 ptrace_notify(SIGTRAP);
856
857 return 1;
858
859badframe:
860#if DEBUG_SIG
861 printk("badframe in handle_signal, regs=%p frame=%x newsp=%x\n",
862 regs, frame, *newspp);
863#endif
864 force_sigsegv(sig, current);
865 return 0;
866}
867
868/*
869 * Do a signal return; undo the signal stack.
870 */
871long sys32_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8,
872 struct pt_regs *regs)
873{
874 struct sigcontext32 __user *sc;
875 struct sigcontext32 sigctx;
876 struct mcontext32 __user *sr;
877 sigset_t set;
878 int ret;
879
880 /* Always make any pending restarted system calls return -EINTR */
881 current_thread_info()->restart_block.fn = do_no_restart_syscall;
882
883 sc = (struct sigcontext32 __user *)(regs->gpr[1] + __SIGNAL_FRAMESIZE32);
884 if (copy_from_user(&sigctx, sc, sizeof(sigctx)))
885 goto badframe;
886
887 /*
888 * Note that PPC32 puts the upper 32 bits of the sigmask in the
889 * unused part of the signal stackframe
890 */
891 set.sig[0] = sigctx.oldmask + ((long)(sigctx._unused[3]) << 32);
892 restore_sigmask(&set);
893
894 sr = (struct mcontext32 __user *)(u64)sigctx.regs;
895 if (!access_ok(VERIFY_READ, sr, sizeof(*sr))
896 || restore_user_regs(regs, sr, 1))
897 goto badframe;
898
899 ret = regs->result;
900 return ret;
901
902badframe:
903 force_sig(SIGSEGV, current);
904 return 0;
905}
906
907
908
909/*
910 * Start of do_signal32 routine
911 *
912 * This routine gets control when a pending signal needs to be processed
913 * in the 32 bit target thread -
914 *
915 * It handles both rt and non-rt signals
916 */
917
918/*
919 * Note that 'init' is a special process: it doesn't get signals it doesn't
920 * want to handle. Thus you cannot kill init even with a SIGKILL even by
921 * mistake.
922 */
923
924int do_signal32(sigset_t *oldset, struct pt_regs *regs)
925{
926 siginfo_t info;
927 unsigned int frame, newsp;
928 int signr, ret;
929 struct k_sigaction ka;
930
931 if (!oldset)
932 oldset = &current->blocked;
933
934 newsp = frame = 0;
935
936 signr = get_signal_to_deliver(&info, &ka, regs, NULL);
937
938 if (TRAP(regs) == 0x0C00 /* System Call! */
939 && regs->ccr & 0x10000000 /* error signalled */
940 && ((ret = regs->gpr[3]) == ERESTARTSYS
941 || ret == ERESTARTNOHAND || ret == ERESTARTNOINTR
942 || ret == ERESTART_RESTARTBLOCK)) {
943
944 if (signr > 0
945 && (ret == ERESTARTNOHAND || ret == ERESTART_RESTARTBLOCK
946 || (ret == ERESTARTSYS
947 && !(ka.sa.sa_flags & SA_RESTART)))) {
948 /* make the system call return an EINTR error */
949 regs->result = -EINTR;
950 regs->gpr[3] = EINTR;
951 /* note that the cr0.SO bit is already set */
952 } else {
953 regs->nip -= 4; /* Back up & retry system call */
954 regs->result = 0;
955 regs->trap = 0;
956 if (ret == ERESTART_RESTARTBLOCK)
957 regs->gpr[0] = __NR_restart_syscall;
958 else
959 regs->gpr[3] = regs->orig_gpr3;
960 }
961 }
962
963 if (signr == 0)
964 return 0; /* no signals delivered */
965
966 if ((ka.sa.sa_flags & SA_ONSTACK) && current->sas_ss_size
967 && (!on_sig_stack(regs->gpr[1])))
968 newsp = (current->sas_ss_sp + current->sas_ss_size);
969 else
970 newsp = regs->gpr[1];
971 newsp &= ~0xfUL;
972
973 /* Whee! Actually deliver the signal. */
974 if (ka.sa.sa_flags & SA_SIGINFO)
975 ret = handle_rt_signal32(signr, &ka, &info, oldset, regs, newsp);
976 else
977 ret = handle_signal32(signr, &ka, &info, oldset, regs, newsp);
978
979 if (ret && !(ka.sa.sa_flags & SA_NODEFER)) {
980 spin_lock_irq(&current->sighand->siglock);
981 sigorsets(&current->blocked, &current->blocked,
982 &ka.sa.sa_mask);
983 sigaddset(&current->blocked, signr);
984 recalc_sigpending();
985 spin_unlock_irq(&current->sighand->siglock);
986 }
987
988 return ret;
989}
diff --git a/arch/ppc64/kernel/smp-tbsync.c b/arch/ppc64/kernel/smp-tbsync.c
new file mode 100644
index 000000000000..7d8ec9996b3e
--- /dev/null
+++ b/arch/ppc64/kernel/smp-tbsync.c
@@ -0,0 +1,179 @@
1/*
2 * Smp timebase synchronization for ppc.
3 *
4 * Copyright (C) 2003 Samuel Rydh (samuel@ibrium.se)
5 *
6 */
7
8#include <linux/config.h>
9#include <linux/kernel.h>
10#include <linux/sched.h>
11#include <linux/smp.h>
12#include <linux/unistd.h>
13#include <linux/init.h>
14#include <asm/atomic.h>
15#include <asm/smp.h>
16#include <asm/time.h>
17
18#define NUM_ITER 300
19
20enum {
21 kExit=0, kSetAndTest, kTest
22};
23
24static struct {
25 volatile long tb;
26 volatile long mark;
27 volatile int cmd;
28 volatile int handshake;
29 int filler[3];
30
31 volatile int ack;
32 int filler2[7];
33
34 volatile int race_result;
35} *tbsync;
36
37static volatile int running;
38
39static void __devinit
40enter_contest( long mark, long add )
41{
42 while( (long)(mftb() - mark) < 0 )
43 tbsync->race_result = add;
44}
45
46void __devinit
47smp_generic_take_timebase( void )
48{
49 int cmd;
50 long tb;
51
52 local_irq_disable();
53 while( !running )
54 ;
55 rmb();
56
57 for( ;; ) {
58 tbsync->ack = 1;
59 while( !tbsync->handshake )
60 ;
61 rmb();
62
63 cmd = tbsync->cmd;
64 tb = tbsync->tb;
65 tbsync->ack = 0;
66 if( cmd == kExit )
67 return;
68
69 if( cmd == kSetAndTest ) {
70 while( tbsync->handshake )
71 ;
72 asm volatile ("mttbl %0" :: "r" (tb & 0xfffffffful) );
73 asm volatile ("mttbu %0" :: "r" (tb >> 32) );
74 } else {
75 while( tbsync->handshake )
76 ;
77 }
78 enter_contest( tbsync->mark, -1 );
79 }
80 local_irq_enable();
81}
82
83static int __devinit
84start_contest( int cmd, long offset, long num )
85{
86 int i, score=0;
87 long tb, mark;
88
89 tbsync->cmd = cmd;
90
91 local_irq_disable();
92 for( i=-3; i<num; ) {
93 tb = (long)mftb() + 400;
94 tbsync->tb = tb + offset;
95 tbsync->mark = mark = tb + 400;
96
97 wmb();
98
99 tbsync->handshake = 1;
100 while( tbsync->ack )
101 ;
102
103 while( (long)(mftb() - tb) <= 0 )
104 ;
105 tbsync->handshake = 0;
106 enter_contest( mark, 1 );
107
108 while( !tbsync->ack )
109 ;
110
111 if ((tbsync->tb ^ (long)mftb()) & 0x8000000000000000ul)
112 continue;
113 if( i++ > 0 )
114 score += tbsync->race_result;
115 }
116 local_irq_enable();
117 return score;
118}
119
120void __devinit
121smp_generic_give_timebase( void )
122{
123 int i, score, score2, old, min=0, max=5000, offset=1000;
124
125 printk("Synchronizing timebase\n");
126
127 /* if this fails then this kernel won't work anyway... */
128 tbsync = kmalloc( sizeof(*tbsync), GFP_KERNEL );
129 memset( tbsync, 0, sizeof(*tbsync) );
130 mb();
131 running = 1;
132
133 while( !tbsync->ack )
134 ;
135
136 printk("Got ack\n");
137
138 /* binary search */
139 for( old=-1 ; old != offset ; offset=(min+max)/2 ) {
140 score = start_contest( kSetAndTest, offset, NUM_ITER );
141
142 printk("score %d, offset %d\n", score, offset );
143
144 if( score > 0 )
145 max = offset;
146 else
147 min = offset;
148 old = offset;
149 }
150 score = start_contest( kSetAndTest, min, NUM_ITER );
151 score2 = start_contest( kSetAndTest, max, NUM_ITER );
152
153 printk( "Min %d (score %d), Max %d (score %d)\n", min, score, max, score2 );
154 score = abs( score );
155 score2 = abs( score2 );
156 offset = (score < score2) ? min : max;
157
158 /* guard against inaccurate mttb */
159 for( i=0; i<10; i++ ) {
160 start_contest( kSetAndTest, offset, NUM_ITER/10 );
161
162 if( (score2=start_contest(kTest, offset, NUM_ITER)) < 0 )
163 score2 = -score2;
164 if( score2 <= score || score2 < 20 )
165 break;
166 }
167 printk("Final offset: %d (%d/%d)\n", offset, score2, NUM_ITER );
168
169 /* exiting */
170 tbsync->cmd = kExit;
171 wmb();
172 tbsync->handshake = 1;
173 while( tbsync->ack )
174 ;
175 tbsync->handshake = 0;
176 kfree( tbsync );
177 tbsync = NULL;
178 running = 0;
179}
diff --git a/arch/ppc64/kernel/smp.c b/arch/ppc64/kernel/smp.c
new file mode 100644
index 000000000000..1c92da3e4525
--- /dev/null
+++ b/arch/ppc64/kernel/smp.c
@@ -0,0 +1,622 @@
1/*
2 * SMP support for ppc.
3 *
4 * Written by Cort Dougan (cort@cs.nmt.edu) borrowing a great
5 * deal of code from the sparc and intel versions.
6 *
7 * Copyright (C) 1999 Cort Dougan <cort@cs.nmt.edu>
8 *
9 * PowerPC-64 Support added by Dave Engebretsen, Peter Bergner, and
10 * Mike Corrigan {engebret|bergner|mikec}@us.ibm.com
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 */
17
18#undef DEBUG
19
20#include <linux/config.h>
21#include <linux/kernel.h>
22#include <linux/module.h>
23#include <linux/sched.h>
24#include <linux/smp.h>
25#include <linux/interrupt.h>
26#include <linux/delay.h>
27#include <linux/init.h>
28#include <linux/spinlock.h>
29#include <linux/cache.h>
30#include <linux/err.h>
31#include <linux/sysdev.h>
32#include <linux/cpu.h>
33#include <linux/notifier.h>
34
35#include <asm/ptrace.h>
36#include <asm/atomic.h>
37#include <asm/irq.h>
38#include <asm/page.h>
39#include <asm/pgtable.h>
40#include <asm/prom.h>
41#include <asm/smp.h>
42#include <asm/paca.h>
43#include <asm/time.h>
44#include <asm/machdep.h>
45#include <asm/cputable.h>
46#include <asm/system.h>
47#include <asm/abs_addr.h>
48
49#include "mpic.h"
50
51#ifdef DEBUG
52#define DBG(fmt...) udbg_printf(fmt)
53#else
54#define DBG(fmt...)
55#endif
56
57cpumask_t cpu_possible_map = CPU_MASK_NONE;
58cpumask_t cpu_online_map = CPU_MASK_NONE;
59cpumask_t cpu_sibling_map[NR_CPUS] = { [0 ... NR_CPUS-1] = CPU_MASK_NONE };
60
61EXPORT_SYMBOL(cpu_online_map);
62EXPORT_SYMBOL(cpu_possible_map);
63
64struct smp_ops_t *smp_ops;
65
66static volatile unsigned int cpu_callin_map[NR_CPUS];
67
68extern unsigned char stab_array[];
69
70void smp_call_function_interrupt(void);
71
72int smt_enabled_at_boot = 1;
73
74#ifdef CONFIG_PPC_MULTIPLATFORM
75void smp_mpic_message_pass(int target, int msg)
76{
77 /* make sure we're sending something that translates to an IPI */
78 if ( msg > 0x3 ){
79 printk("SMP %d: smp_message_pass: unknown msg %d\n",
80 smp_processor_id(), msg);
81 return;
82 }
83 switch ( target )
84 {
85 case MSG_ALL:
86 mpic_send_ipi(msg, 0xffffffff);
87 break;
88 case MSG_ALL_BUT_SELF:
89 mpic_send_ipi(msg, 0xffffffff & ~(1 << smp_processor_id()));
90 break;
91 default:
92 mpic_send_ipi(msg, 1 << target);
93 break;
94 }
95}
96
97int __init smp_mpic_probe(void)
98{
99 int nr_cpus;
100
101 DBG("smp_mpic_probe()...\n");
102
103 nr_cpus = cpus_weight(cpu_possible_map);
104
105 DBG("nr_cpus: %d\n", nr_cpus);
106
107 if (nr_cpus > 1)
108 mpic_request_ipis();
109
110 return nr_cpus;
111}
112
113void __devinit smp_mpic_setup_cpu(int cpu)
114{
115 mpic_setup_this_cpu();
116}
117
118void __devinit smp_generic_kick_cpu(int nr)
119{
120 BUG_ON(nr < 0 || nr >= NR_CPUS);
121
122 /*
123 * The processor is currently spinning, waiting for the
124 * cpu_start field to become non-zero After we set cpu_start,
125 * the processor will continue on to secondary_start
126 */
127 paca[nr].cpu_start = 1;
128 mb();
129}
130
131#endif /* CONFIG_PPC_MULTIPLATFORM */
132
133static void __init smp_space_timers(unsigned int max_cpus)
134{
135 int i;
136 unsigned long offset = tb_ticks_per_jiffy / max_cpus;
137 unsigned long previous_tb = paca[boot_cpuid].next_jiffy_update_tb;
138
139 for_each_cpu(i) {
140 if (i != boot_cpuid) {
141 paca[i].next_jiffy_update_tb =
142 previous_tb + offset;
143 previous_tb = paca[i].next_jiffy_update_tb;
144 }
145 }
146}
147
148void smp_message_recv(int msg, struct pt_regs *regs)
149{
150 switch(msg) {
151 case PPC_MSG_CALL_FUNCTION:
152 smp_call_function_interrupt();
153 break;
154 case PPC_MSG_RESCHEDULE:
155 /* XXX Do we have to do this? */
156 set_need_resched();
157 break;
158#if 0
159 case PPC_MSG_MIGRATE_TASK:
160 /* spare */
161 break;
162#endif
163#ifdef CONFIG_DEBUGGER
164 case PPC_MSG_DEBUGGER_BREAK:
165 debugger_ipi(regs);
166 break;
167#endif
168 default:
169 printk("SMP %d: smp_message_recv(): unknown msg %d\n",
170 smp_processor_id(), msg);
171 break;
172 }
173}
174
175void smp_send_reschedule(int cpu)
176{
177 smp_ops->message_pass(cpu, PPC_MSG_RESCHEDULE);
178}
179
180#ifdef CONFIG_DEBUGGER
181void smp_send_debugger_break(int cpu)
182{
183 smp_ops->message_pass(cpu, PPC_MSG_DEBUGGER_BREAK);
184}
185#endif
186
187static void stop_this_cpu(void *dummy)
188{
189 local_irq_disable();
190 while (1)
191 ;
192}
193
194void smp_send_stop(void)
195{
196 smp_call_function(stop_this_cpu, NULL, 1, 0);
197}
198
199/*
200 * Structure and data for smp_call_function(). This is designed to minimise
201 * static memory requirements. It also looks cleaner.
202 * Stolen from the i386 version.
203 */
204static __cacheline_aligned_in_smp DEFINE_SPINLOCK(call_lock);
205
206static struct call_data_struct {
207 void (*func) (void *info);
208 void *info;
209 atomic_t started;
210 atomic_t finished;
211 int wait;
212} *call_data;
213
214/* delay of at least 8 seconds on 1GHz cpu */
215#define SMP_CALL_TIMEOUT (1UL << (30 + 3))
216
217/*
218 * This function sends a 'generic call function' IPI to all other CPUs
219 * in the system.
220 *
221 * [SUMMARY] Run a function on all other CPUs.
222 * <func> The function to run. This must be fast and non-blocking.
223 * <info> An arbitrary pointer to pass to the function.
224 * <nonatomic> currently unused.
225 * <wait> If true, wait (atomically) until function has completed on other CPUs.
226 * [RETURNS] 0 on success, else a negative status code. Does not return until
227 * remote CPUs are nearly ready to execute <<func>> or are or have executed.
228 *
229 * You must not call this function with disabled interrupts or from a
230 * hardware interrupt handler or from a bottom half handler.
231 */
232int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
233 int wait)
234{
235 struct call_data_struct data;
236 int ret = -1, cpus;
237 unsigned long timeout;
238
239 /* Can deadlock when called with interrupts disabled */
240 WARN_ON(irqs_disabled());
241
242 data.func = func;
243 data.info = info;
244 atomic_set(&data.started, 0);
245 data.wait = wait;
246 if (wait)
247 atomic_set(&data.finished, 0);
248
249 spin_lock(&call_lock);
250 /* Must grab online cpu count with preempt disabled, otherwise
251 * it can change. */
252 cpus = num_online_cpus() - 1;
253 if (!cpus) {
254 ret = 0;
255 goto out;
256 }
257
258 call_data = &data;
259 wmb();
260 /* Send a message to all other CPUs and wait for them to respond */
261 smp_ops->message_pass(MSG_ALL_BUT_SELF, PPC_MSG_CALL_FUNCTION);
262
263 /* Wait for response */
264 timeout = SMP_CALL_TIMEOUT;
265 while (atomic_read(&data.started) != cpus) {
266 HMT_low();
267 if (--timeout == 0) {
268 printk("smp_call_function on cpu %d: other cpus not "
269 "responding (%d)\n", smp_processor_id(),
270 atomic_read(&data.started));
271 debugger(NULL);
272 goto out;
273 }
274 }
275
276 if (wait) {
277 timeout = SMP_CALL_TIMEOUT;
278 while (atomic_read(&data.finished) != cpus) {
279 HMT_low();
280 if (--timeout == 0) {
281 printk("smp_call_function on cpu %d: other "
282 "cpus not finishing (%d/%d)\n",
283 smp_processor_id(),
284 atomic_read(&data.finished),
285 atomic_read(&data.started));
286 debugger(NULL);
287 goto out;
288 }
289 }
290 }
291
292 ret = 0;
293
294out:
295 call_data = NULL;
296 HMT_medium();
297 spin_unlock(&call_lock);
298 return ret;
299}
300
301EXPORT_SYMBOL(smp_call_function);
302
303void smp_call_function_interrupt(void)
304{
305 void (*func) (void *info);
306 void *info;
307 int wait;
308
309 /* call_data will be NULL if the sender timed out while
310 * waiting on us to receive the call.
311 */
312 if (!call_data)
313 return;
314
315 func = call_data->func;
316 info = call_data->info;
317 wait = call_data->wait;
318
319 if (!wait)
320 smp_mb__before_atomic_inc();
321
322 /*
323 * Notify initiating CPU that I've grabbed the data and am
324 * about to execute the function
325 */
326 atomic_inc(&call_data->started);
327 /*
328 * At this point the info structure may be out of scope unless wait==1
329 */
330 (*func)(info);
331 if (wait) {
332 smp_mb__before_atomic_inc();
333 atomic_inc(&call_data->finished);
334 }
335}
336
337extern unsigned long decr_overclock;
338extern struct gettimeofday_struct do_gtod;
339
340struct thread_info *current_set[NR_CPUS];
341
342DECLARE_PER_CPU(unsigned int, pvr);
343
344static void __devinit smp_store_cpu_info(int id)
345{
346 per_cpu(pvr, id) = mfspr(SPRN_PVR);
347}
348
349static void __init smp_create_idle(unsigned int cpu)
350{
351 struct task_struct *p;
352
353 /* create a process for the processor */
354 p = fork_idle(cpu);
355 if (IS_ERR(p))
356 panic("failed fork for CPU %u: %li", cpu, PTR_ERR(p));
357 paca[cpu].__current = p;
358 current_set[cpu] = p->thread_info;
359}
360
361void __init smp_prepare_cpus(unsigned int max_cpus)
362{
363 unsigned int cpu;
364
365 DBG("smp_prepare_cpus\n");
366
367 /*
368 * setup_cpu may need to be called on the boot cpu. We havent
369 * spun any cpus up but lets be paranoid.
370 */
371 BUG_ON(boot_cpuid != smp_processor_id());
372
373 /* Fixup boot cpu */
374 smp_store_cpu_info(boot_cpuid);
375 cpu_callin_map[boot_cpuid] = 1;
376
377#ifndef CONFIG_PPC_ISERIES
378 paca[boot_cpuid].next_jiffy_update_tb = tb_last_stamp = get_tb();
379
380 /*
381 * Should update do_gtod.stamp_xsec.
382 * For now we leave it which means the time can be some
383 * number of msecs off until someone does a settimeofday()
384 */
385 do_gtod.varp->tb_orig_stamp = tb_last_stamp;
386 systemcfg->tb_orig_stamp = tb_last_stamp;
387#endif
388
389 max_cpus = smp_ops->probe();
390
391 smp_space_timers(max_cpus);
392
393 for_each_cpu(cpu)
394 if (cpu != boot_cpuid)
395 smp_create_idle(cpu);
396}
397
398void __devinit smp_prepare_boot_cpu(void)
399{
400 BUG_ON(smp_processor_id() != boot_cpuid);
401
402 cpu_set(boot_cpuid, cpu_online_map);
403
404 paca[boot_cpuid].__current = current;
405 current_set[boot_cpuid] = current->thread_info;
406}
407
408#ifdef CONFIG_HOTPLUG_CPU
409/* State of each CPU during hotplug phases */
410DEFINE_PER_CPU(int, cpu_state) = { 0 };
411
412int generic_cpu_disable(void)
413{
414 unsigned int cpu = smp_processor_id();
415
416 if (cpu == boot_cpuid)
417 return -EBUSY;
418
419 systemcfg->processorCount--;
420 cpu_clear(cpu, cpu_online_map);
421 fixup_irqs(cpu_online_map);
422 return 0;
423}
424
425int generic_cpu_enable(unsigned int cpu)
426{
427 /* Do the normal bootup if we haven't
428 * already bootstrapped. */
429 if (system_state != SYSTEM_RUNNING)
430 return -ENOSYS;
431
432 /* get the target out of it's holding state */
433 per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
434 wmb();
435
436 while (!cpu_online(cpu))
437 cpu_relax();
438
439 fixup_irqs(cpu_online_map);
440 /* counter the irq disable in fixup_irqs */
441 local_irq_enable();
442 return 0;
443}
444
445void generic_cpu_die(unsigned int cpu)
446{
447 int i;
448
449 for (i = 0; i < 100; i++) {
450 rmb();
451 if (per_cpu(cpu_state, cpu) == CPU_DEAD)
452 return;
453 msleep(100);
454 }
455 printk(KERN_ERR "CPU%d didn't die...\n", cpu);
456}
457
458void generic_mach_cpu_die(void)
459{
460 unsigned int cpu;
461
462 local_irq_disable();
463 cpu = smp_processor_id();
464 printk(KERN_DEBUG "CPU%d offline\n", cpu);
465 __get_cpu_var(cpu_state) = CPU_DEAD;
466 wmb();
467 while (__get_cpu_var(cpu_state) != CPU_UP_PREPARE)
468 cpu_relax();
469
470 flush_tlb_pending();
471 cpu_set(cpu, cpu_online_map);
472 local_irq_enable();
473}
474#endif
475
476static int __devinit cpu_enable(unsigned int cpu)
477{
478 if (smp_ops->cpu_enable)
479 return smp_ops->cpu_enable(cpu);
480
481 return -ENOSYS;
482}
483
484int __devinit __cpu_up(unsigned int cpu)
485{
486 int c;
487
488 if (!cpu_enable(cpu))
489 return 0;
490
491 if (smp_ops->cpu_bootable && !smp_ops->cpu_bootable(cpu))
492 return -EINVAL;
493
494 paca[cpu].default_decr = tb_ticks_per_jiffy / decr_overclock;
495
496 if (!cpu_has_feature(CPU_FTR_SLB)) {
497 void *tmp;
498
499 /* maximum of 48 CPUs on machines with a segment table */
500 if (cpu >= 48)
501 BUG();
502
503 tmp = &stab_array[PAGE_SIZE * cpu];
504 memset(tmp, 0, PAGE_SIZE);
505 paca[cpu].stab_addr = (unsigned long)tmp;
506 paca[cpu].stab_real = virt_to_abs(tmp);
507 }
508
509 /* Make sure callin-map entry is 0 (can be leftover a CPU
510 * hotplug
511 */
512 cpu_callin_map[cpu] = 0;
513
514 /* The information for processor bringup must
515 * be written out to main store before we release
516 * the processor.
517 */
518 mb();
519
520 /* wake up cpus */
521 DBG("smp: kicking cpu %d\n", cpu);
522 smp_ops->kick_cpu(cpu);
523
524 /*
525 * wait to see if the cpu made a callin (is actually up).
526 * use this value that I found through experimentation.
527 * -- Cort
528 */
529 if (system_state < SYSTEM_RUNNING)
530 for (c = 5000; c && !cpu_callin_map[cpu]; c--)
531 udelay(100);
532#ifdef CONFIG_HOTPLUG_CPU
533 else
534 /*
535 * CPUs can take much longer to come up in the
536 * hotplug case. Wait five seconds.
537 */
538 for (c = 25; c && !cpu_callin_map[cpu]; c--) {
539 msleep(200);
540 }
541#endif
542
543 if (!cpu_callin_map[cpu]) {
544 printk("Processor %u is stuck.\n", cpu);
545 return -ENOENT;
546 }
547
548 printk("Processor %u found.\n", cpu);
549
550 if (smp_ops->give_timebase)
551 smp_ops->give_timebase();
552
553 /* Wait until cpu puts itself in the online map */
554 while (!cpu_online(cpu))
555 cpu_relax();
556
557 return 0;
558}
559
560
561/* Activate a secondary processor. */
562int __devinit start_secondary(void *unused)
563{
564 unsigned int cpu = smp_processor_id();
565
566 atomic_inc(&init_mm.mm_count);
567 current->active_mm = &init_mm;
568
569 smp_store_cpu_info(cpu);
570 set_dec(paca[cpu].default_decr);
571 cpu_callin_map[cpu] = 1;
572
573 smp_ops->setup_cpu(cpu);
574 if (smp_ops->take_timebase)
575 smp_ops->take_timebase();
576
577 spin_lock(&call_lock);
578 cpu_set(cpu, cpu_online_map);
579 spin_unlock(&call_lock);
580
581 local_irq_enable();
582
583 cpu_idle();
584 return 0;
585}
586
587int setup_profiling_timer(unsigned int multiplier)
588{
589 return 0;
590}
591
592void __init smp_cpus_done(unsigned int max_cpus)
593{
594 cpumask_t old_mask;
595
596 /* We want the setup_cpu() here to be called from CPU 0, but our
597 * init thread may have been "borrowed" by another CPU in the meantime
598 * se we pin us down to CPU 0 for a short while
599 */
600 old_mask = current->cpus_allowed;
601 set_cpus_allowed(current, cpumask_of_cpu(boot_cpuid));
602
603 smp_ops->setup_cpu(boot_cpuid);
604
605 set_cpus_allowed(current, old_mask);
606}
607
608#ifdef CONFIG_HOTPLUG_CPU
609int __cpu_disable(void)
610{
611 if (smp_ops->cpu_disable)
612 return smp_ops->cpu_disable();
613
614 return -ENOSYS;
615}
616
617void __cpu_die(unsigned int cpu)
618{
619 if (smp_ops->cpu_die)
620 smp_ops->cpu_die(cpu);
621}
622#endif
diff --git a/arch/ppc64/kernel/sys_ppc32.c b/arch/ppc64/kernel/sys_ppc32.c
new file mode 100644
index 000000000000..7cf7a9600025
--- /dev/null
+++ b/arch/ppc64/kernel/sys_ppc32.c
@@ -0,0 +1,1329 @@
1/*
2 * sys_ppc32.c: Conversion between 32bit and 64bit native syscalls.
3 *
4 * Copyright (C) 2001 IBM
5 * Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
6 * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
7 *
8 * These routines maintain argument size conversion between 32bit and 64bit
9 * environment.
10 *
11 * This program is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU General Public License
13 * as published by the Free Software Foundation; either version
14 * 2 of the License, or (at your option) any later version.
15 */
16
17#include <linux/config.h>
18#include <linux/kernel.h>
19#include <linux/sched.h>
20#include <linux/fs.h>
21#include <linux/mm.h>
22#include <linux/file.h>
23#include <linux/signal.h>
24#include <linux/resource.h>
25#include <linux/times.h>
26#include <linux/utsname.h>
27#include <linux/timex.h>
28#include <linux/smp.h>
29#include <linux/smp_lock.h>
30#include <linux/sem.h>
31#include <linux/msg.h>
32#include <linux/shm.h>
33#include <linux/slab.h>
34#include <linux/uio.h>
35#include <linux/aio.h>
36#include <linux/nfs_fs.h>
37#include <linux/module.h>
38#include <linux/sunrpc/svc.h>
39#include <linux/nfsd/nfsd.h>
40#include <linux/nfsd/cache.h>
41#include <linux/nfsd/xdr.h>
42#include <linux/nfsd/syscall.h>
43#include <linux/poll.h>
44#include <linux/personality.h>
45#include <linux/stat.h>
46#include <linux/filter.h>
47#include <linux/highmem.h>
48#include <linux/highuid.h>
49#include <linux/mman.h>
50#include <linux/ipv6.h>
51#include <linux/in.h>
52#include <linux/icmpv6.h>
53#include <linux/syscalls.h>
54#include <linux/unistd.h>
55#include <linux/sysctl.h>
56#include <linux/binfmts.h>
57#include <linux/dnotify.h>
58#include <linux/security.h>
59#include <linux/compat.h>
60#include <linux/ptrace.h>
61#include <linux/aio_abi.h>
62#include <linux/elf.h>
63
64#include <net/scm.h>
65#include <net/sock.h>
66
67#include <asm/ptrace.h>
68#include <asm/types.h>
69#include <asm/ipc.h>
70#include <asm/uaccess.h>
71#include <asm/unistd.h>
72#include <asm/semaphore.h>
73#include <asm/ppcdebug.h>
74#include <asm/time.h>
75#include <asm/mmu_context.h>
76#include <asm/systemcfg.h>
77
78#include "pci.h"
79
80/* readdir & getdents */
81#define NAME_OFFSET(de) ((int) ((de)->d_name - (char __user *) (de)))
82#define ROUND_UP(x) (((x)+sizeof(u32)-1) & ~(sizeof(u32)-1))
83
84struct old_linux_dirent32 {
85 u32 d_ino;
86 u32 d_offset;
87 unsigned short d_namlen;
88 char d_name[1];
89};
90
91struct readdir_callback32 {
92 struct old_linux_dirent32 __user * dirent;
93 int count;
94};
95
96static int fillonedir(void * __buf, const char * name, int namlen,
97 off_t offset, ino_t ino, unsigned int d_type)
98{
99 struct readdir_callback32 * buf = (struct readdir_callback32 *) __buf;
100 struct old_linux_dirent32 __user * dirent;
101
102 if (buf->count)
103 return -EINVAL;
104 buf->count++;
105 dirent = buf->dirent;
106 put_user(ino, &dirent->d_ino);
107 put_user(offset, &dirent->d_offset);
108 put_user(namlen, &dirent->d_namlen);
109 copy_to_user(dirent->d_name, name, namlen);
110 put_user(0, dirent->d_name + namlen);
111 return 0;
112}
113
114asmlinkage int old32_readdir(unsigned int fd, struct old_linux_dirent32 __user *dirent, unsigned int count)
115{
116 int error = -EBADF;
117 struct file * file;
118 struct readdir_callback32 buf;
119
120 file = fget(fd);
121 if (!file)
122 goto out;
123
124 buf.count = 0;
125 buf.dirent = dirent;
126
127 error = vfs_readdir(file, (filldir_t)fillonedir, &buf);
128 if (error < 0)
129 goto out_putf;
130 error = buf.count;
131
132out_putf:
133 fput(file);
134out:
135 return error;
136}
137
138struct linux_dirent32 {
139 u32 d_ino;
140 u32 d_off;
141 unsigned short d_reclen;
142 char d_name[1];
143};
144
145struct getdents_callback32 {
146 struct linux_dirent32 __user * current_dir;
147 struct linux_dirent32 __user * previous;
148 int count;
149 int error;
150};
151
152static int filldir(void * __buf, const char * name, int namlen, off_t offset,
153 ino_t ino, unsigned int d_type)
154{
155 struct linux_dirent32 __user * dirent;
156 struct getdents_callback32 * buf = (struct getdents_callback32 *) __buf;
157 int reclen = ROUND_UP(NAME_OFFSET(dirent) + namlen + 2);
158
159 buf->error = -EINVAL; /* only used if we fail.. */
160 if (reclen > buf->count)
161 return -EINVAL;
162 dirent = buf->previous;
163 if (dirent) {
164 if (__put_user(offset, &dirent->d_off))
165 goto efault;
166 }
167 dirent = buf->current_dir;
168 if (__put_user(ino, &dirent->d_ino))
169 goto efault;
170 if (__put_user(reclen, &dirent->d_reclen))
171 goto efault;
172 if (copy_to_user(dirent->d_name, name, namlen))
173 goto efault;
174 if (__put_user(0, dirent->d_name + namlen))
175 goto efault;
176 if (__put_user(d_type, (char __user *) dirent + reclen - 1))
177 goto efault;
178 buf->previous = dirent;
179 dirent = (void __user *)dirent + reclen;
180 buf->current_dir = dirent;
181 buf->count -= reclen;
182 return 0;
183efault:
184 buf->error = -EFAULT;
185 return -EFAULT;
186}
187
188asmlinkage long sys32_getdents(unsigned int fd, struct linux_dirent32 __user *dirent,
189 unsigned int count)
190{
191 struct file * file;
192 struct linux_dirent32 __user * lastdirent;
193 struct getdents_callback32 buf;
194 int error;
195
196 error = -EFAULT;
197 if (!access_ok(VERIFY_WRITE, dirent, count))
198 goto out;
199
200 error = -EBADF;
201 file = fget(fd);
202 if (!file)
203 goto out;
204
205 buf.current_dir = dirent;
206 buf.previous = NULL;
207 buf.count = count;
208 buf.error = 0;
209
210 error = vfs_readdir(file, (filldir_t)filldir, &buf);
211 if (error < 0)
212 goto out_putf;
213 error = buf.error;
214 lastdirent = buf.previous;
215 if (lastdirent) {
216 if (put_user(file->f_pos, &lastdirent->d_off))
217 error = -EFAULT;
218 else
219 error = count - buf.count;
220 }
221
222out_putf:
223 fput(file);
224out:
225 return error;
226}
227
228asmlinkage long ppc32_select(u32 n, compat_ulong_t __user *inp,
229 compat_ulong_t __user *outp, compat_ulong_t __user *exp,
230 compat_uptr_t tvp_x)
231{
232 /* sign extend n */
233 return compat_sys_select((int)n, inp, outp, exp, compat_ptr(tvp_x));
234}
235
236int cp_compat_stat(struct kstat *stat, struct compat_stat __user *statbuf)
237{
238 long err;
239
240 if (stat->size > MAX_NON_LFS || !new_valid_dev(stat->dev) ||
241 !new_valid_dev(stat->rdev))
242 return -EOVERFLOW;
243
244 err = access_ok(VERIFY_WRITE, statbuf, sizeof(*statbuf)) ? 0 : -EFAULT;
245 err |= __put_user(new_encode_dev(stat->dev), &statbuf->st_dev);
246 err |= __put_user(stat->ino, &statbuf->st_ino);
247 err |= __put_user(stat->mode, &statbuf->st_mode);
248 err |= __put_user(stat->nlink, &statbuf->st_nlink);
249 err |= __put_user(stat->uid, &statbuf->st_uid);
250 err |= __put_user(stat->gid, &statbuf->st_gid);
251 err |= __put_user(new_encode_dev(stat->rdev), &statbuf->st_rdev);
252 err |= __put_user(stat->size, &statbuf->st_size);
253 err |= __put_user(stat->atime.tv_sec, &statbuf->st_atime);
254 err |= __put_user(stat->atime.tv_nsec, &statbuf->st_atime_nsec);
255 err |= __put_user(stat->mtime.tv_sec, &statbuf->st_mtime);
256 err |= __put_user(stat->mtime.tv_nsec, &statbuf->st_mtime_nsec);
257 err |= __put_user(stat->ctime.tv_sec, &statbuf->st_ctime);
258 err |= __put_user(stat->ctime.tv_nsec, &statbuf->st_ctime_nsec);
259 err |= __put_user(stat->blksize, &statbuf->st_blksize);
260 err |= __put_user(stat->blocks, &statbuf->st_blocks);
261 err |= __put_user(0, &statbuf->__unused4[0]);
262 err |= __put_user(0, &statbuf->__unused4[1]);
263
264 return err;
265}
266
267/* Note: it is necessary to treat option as an unsigned int,
268 * with the corresponding cast to a signed int to insure that the
269 * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode)
270 * and the register representation of a signed int (msr in 64-bit mode) is performed.
271 */
272asmlinkage long sys32_sysfs(u32 option, u32 arg1, u32 arg2)
273{
274 return sys_sysfs((int)option, arg1, arg2);
275}
276
277/* Handle adjtimex compatibility. */
278struct timex32 {
279 u32 modes;
280 s32 offset, freq, maxerror, esterror;
281 s32 status, constant, precision, tolerance;
282 struct compat_timeval time;
283 s32 tick;
284 s32 ppsfreq, jitter, shift, stabil;
285 s32 jitcnt, calcnt, errcnt, stbcnt;
286 s32 :32; s32 :32; s32 :32; s32 :32;
287 s32 :32; s32 :32; s32 :32; s32 :32;
288 s32 :32; s32 :32; s32 :32; s32 :32;
289};
290
291extern int do_adjtimex(struct timex *);
292extern void ppc_adjtimex(void);
293
294asmlinkage long sys32_adjtimex(struct timex32 __user *utp)
295{
296 struct timex txc;
297 int ret;
298
299 memset(&txc, 0, sizeof(struct timex));
300
301 if(get_user(txc.modes, &utp->modes) ||
302 __get_user(txc.offset, &utp->offset) ||
303 __get_user(txc.freq, &utp->freq) ||
304 __get_user(txc.maxerror, &utp->maxerror) ||
305 __get_user(txc.esterror, &utp->esterror) ||
306 __get_user(txc.status, &utp->status) ||
307 __get_user(txc.constant, &utp->constant) ||
308 __get_user(txc.precision, &utp->precision) ||
309 __get_user(txc.tolerance, &utp->tolerance) ||
310 __get_user(txc.time.tv_sec, &utp->time.tv_sec) ||
311 __get_user(txc.time.tv_usec, &utp->time.tv_usec) ||
312 __get_user(txc.tick, &utp->tick) ||
313 __get_user(txc.ppsfreq, &utp->ppsfreq) ||
314 __get_user(txc.jitter, &utp->jitter) ||
315 __get_user(txc.shift, &utp->shift) ||
316 __get_user(txc.stabil, &utp->stabil) ||
317 __get_user(txc.jitcnt, &utp->jitcnt) ||
318 __get_user(txc.calcnt, &utp->calcnt) ||
319 __get_user(txc.errcnt, &utp->errcnt) ||
320 __get_user(txc.stbcnt, &utp->stbcnt))
321 return -EFAULT;
322
323 ret = do_adjtimex(&txc);
324
325 /* adjust the conversion of TB to time of day to track adjtimex */
326 ppc_adjtimex();
327
328 if(put_user(txc.modes, &utp->modes) ||
329 __put_user(txc.offset, &utp->offset) ||
330 __put_user(txc.freq, &utp->freq) ||
331 __put_user(txc.maxerror, &utp->maxerror) ||
332 __put_user(txc.esterror, &utp->esterror) ||
333 __put_user(txc.status, &utp->status) ||
334 __put_user(txc.constant, &utp->constant) ||
335 __put_user(txc.precision, &utp->precision) ||
336 __put_user(txc.tolerance, &utp->tolerance) ||
337 __put_user(txc.time.tv_sec, &utp->time.tv_sec) ||
338 __put_user(txc.time.tv_usec, &utp->time.tv_usec) ||
339 __put_user(txc.tick, &utp->tick) ||
340 __put_user(txc.ppsfreq, &utp->ppsfreq) ||
341 __put_user(txc.jitter, &utp->jitter) ||
342 __put_user(txc.shift, &utp->shift) ||
343 __put_user(txc.stabil, &utp->stabil) ||
344 __put_user(txc.jitcnt, &utp->jitcnt) ||
345 __put_user(txc.calcnt, &utp->calcnt) ||
346 __put_user(txc.errcnt, &utp->errcnt) ||
347 __put_user(txc.stbcnt, &utp->stbcnt))
348 ret = -EFAULT;
349
350 return ret;
351}
352
353
354/* These are here just in case some old sparc32 binary calls it. */
355asmlinkage long sys32_pause(void)
356{
357 current->state = TASK_INTERRUPTIBLE;
358 schedule();
359
360 return -ERESTARTNOHAND;
361}
362
363
364
365static inline long get_ts32(struct timespec *o, struct compat_timeval __user *i)
366{
367 long usec;
368
369 if (!access_ok(VERIFY_READ, i, sizeof(*i)))
370 return -EFAULT;
371 if (__get_user(o->tv_sec, &i->tv_sec))
372 return -EFAULT;
373 if (__get_user(usec, &i->tv_usec))
374 return -EFAULT;
375 o->tv_nsec = usec * 1000;
376 return 0;
377}
378
379static inline long put_tv32(struct compat_timeval __user *o, struct timeval *i)
380{
381 return (!access_ok(VERIFY_WRITE, o, sizeof(*o)) ||
382 (__put_user(i->tv_sec, &o->tv_sec) |
383 __put_user(i->tv_usec, &o->tv_usec)));
384}
385
386struct sysinfo32 {
387 s32 uptime;
388 u32 loads[3];
389 u32 totalram;
390 u32 freeram;
391 u32 sharedram;
392 u32 bufferram;
393 u32 totalswap;
394 u32 freeswap;
395 unsigned short procs;
396 unsigned short pad;
397 u32 totalhigh;
398 u32 freehigh;
399 u32 mem_unit;
400 char _f[20-2*sizeof(int)-sizeof(int)];
401};
402
403asmlinkage long sys32_sysinfo(struct sysinfo32 __user *info)
404{
405 struct sysinfo s;
406 int ret, err;
407 int bitcount=0;
408 mm_segment_t old_fs = get_fs ();
409
410 /* The __user cast is valid due to set_fs() */
411 set_fs (KERNEL_DS);
412 ret = sys_sysinfo((struct sysinfo __user *)&s);
413 set_fs (old_fs);
414
415 /* Check to see if any memory value is too large for 32-bit and
416 * scale down if needed.
417 */
418 if ((s.totalram >> 32) || (s.totalswap >> 32)) {
419 while (s.mem_unit < PAGE_SIZE) {
420 s.mem_unit <<= 1;
421 bitcount++;
422 }
423 s.totalram >>=bitcount;
424 s.freeram >>= bitcount;
425 s.sharedram >>= bitcount;
426 s.bufferram >>= bitcount;
427 s.totalswap >>= bitcount;
428 s.freeswap >>= bitcount;
429 s.totalhigh >>= bitcount;
430 s.freehigh >>= bitcount;
431 }
432
433 err = put_user (s.uptime, &info->uptime);
434 err |= __put_user (s.loads[0], &info->loads[0]);
435 err |= __put_user (s.loads[1], &info->loads[1]);
436 err |= __put_user (s.loads[2], &info->loads[2]);
437 err |= __put_user (s.totalram, &info->totalram);
438 err |= __put_user (s.freeram, &info->freeram);
439 err |= __put_user (s.sharedram, &info->sharedram);
440 err |= __put_user (s.bufferram, &info->bufferram);
441 err |= __put_user (s.totalswap, &info->totalswap);
442 err |= __put_user (s.freeswap, &info->freeswap);
443 err |= __put_user (s.procs, &info->procs);
444 err |= __put_user (s.totalhigh, &info->totalhigh);
445 err |= __put_user (s.freehigh, &info->freehigh);
446 err |= __put_user (s.mem_unit, &info->mem_unit);
447 if (err)
448 return -EFAULT;
449
450 return ret;
451}
452
453
454
455
456/* Translations due to time_t size differences. Which affects all
457 sorts of things, like timeval and itimerval. */
458extern struct timezone sys_tz;
459
460asmlinkage long sys32_gettimeofday(struct compat_timeval __user *tv, struct timezone __user *tz)
461{
462 if (tv) {
463 struct timeval ktv;
464 do_gettimeofday(&ktv);
465 if (put_tv32(tv, &ktv))
466 return -EFAULT;
467 }
468 if (tz) {
469 if (copy_to_user(tz, &sys_tz, sizeof(sys_tz)))
470 return -EFAULT;
471 }
472
473 return 0;
474}
475
476
477
478asmlinkage long sys32_settimeofday(struct compat_timeval __user *tv, struct timezone __user *tz)
479{
480 struct timespec kts;
481 struct timezone ktz;
482
483 if (tv) {
484 if (get_ts32(&kts, tv))
485 return -EFAULT;
486 }
487 if (tz) {
488 if (copy_from_user(&ktz, tz, sizeof(ktz)))
489 return -EFAULT;
490 }
491
492 return do_sys_settimeofday(tv ? &kts : NULL, tz ? &ktz : NULL);
493}
494
495#ifdef CONFIG_SYSVIPC
496long sys32_ipc(u32 call, u32 first, u32 second, u32 third, compat_uptr_t ptr,
497 u32 fifth)
498{
499 int version;
500
501 version = call >> 16; /* hack for backward compatibility */
502 call &= 0xffff;
503
504 switch (call) {
505
506 case SEMTIMEDOP:
507 if (fifth)
508 /* sign extend semid */
509 return compat_sys_semtimedop((int)first,
510 compat_ptr(ptr), second,
511 compat_ptr(fifth));
512 /* else fall through for normal semop() */
513 case SEMOP:
514 /* struct sembuf is the same on 32 and 64bit :)) */
515 /* sign extend semid */
516 return sys_semtimedop((int)first, compat_ptr(ptr), second,
517 NULL);
518 case SEMGET:
519 /* sign extend key, nsems */
520 return sys_semget((int)first, (int)second, third);
521 case SEMCTL:
522 /* sign extend semid, semnum */
523 return compat_sys_semctl((int)first, (int)second, third,
524 compat_ptr(ptr));
525
526 case MSGSND:
527 /* sign extend msqid */
528 return compat_sys_msgsnd((int)first, (int)second, third,
529 compat_ptr(ptr));
530 case MSGRCV:
531 /* sign extend msqid, msgtyp */
532 return compat_sys_msgrcv((int)first, second, (int)fifth,
533 third, version, compat_ptr(ptr));
534 case MSGGET:
535 /* sign extend key */
536 return sys_msgget((int)first, second);
537 case MSGCTL:
538 /* sign extend msqid */
539 return compat_sys_msgctl((int)first, second, compat_ptr(ptr));
540
541 case SHMAT:
542 /* sign extend shmid */
543 return compat_sys_shmat((int)first, second, third, version,
544 compat_ptr(ptr));
545 case SHMDT:
546 return sys_shmdt(compat_ptr(ptr));
547 case SHMGET:
548 /* sign extend key_t */
549 return sys_shmget((int)first, second, third);
550 case SHMCTL:
551 /* sign extend shmid */
552 return compat_sys_shmctl((int)first, second, compat_ptr(ptr));
553
554 default:
555 return -ENOSYS;
556 }
557
558 return -ENOSYS;
559}
560#endif
561
562/* Note: it is necessary to treat out_fd and in_fd as unsigned ints,
563 * with the corresponding cast to a signed int to insure that the
564 * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode)
565 * and the register representation of a signed int (msr in 64-bit mode) is performed.
566 */
567asmlinkage long sys32_sendfile(u32 out_fd, u32 in_fd, compat_off_t __user * offset, u32 count)
568{
569 mm_segment_t old_fs = get_fs();
570 int ret;
571 off_t of;
572 off_t __user *up;
573
574 if (offset && get_user(of, offset))
575 return -EFAULT;
576
577 /* The __user pointer cast is valid because of the set_fs() */
578 set_fs(KERNEL_DS);
579 up = offset ? (off_t __user *) &of : NULL;
580 ret = sys_sendfile((int)out_fd, (int)in_fd, up, count);
581 set_fs(old_fs);
582
583 if (offset && put_user(of, offset))
584 return -EFAULT;
585
586 return ret;
587}
588
589asmlinkage int sys32_sendfile64(int out_fd, int in_fd, compat_loff_t __user *offset, s32 count)
590{
591 mm_segment_t old_fs = get_fs();
592 int ret;
593 loff_t lof;
594 loff_t __user *up;
595
596 if (offset && get_user(lof, offset))
597 return -EFAULT;
598
599 /* The __user pointer cast is valid because of the set_fs() */
600 set_fs(KERNEL_DS);
601 up = offset ? (loff_t __user *) &lof : NULL;
602 ret = sys_sendfile64(out_fd, in_fd, up, count);
603 set_fs(old_fs);
604
605 if (offset && put_user(lof, offset))
606 return -EFAULT;
607
608 return ret;
609}
610
611long sys32_execve(unsigned long a0, unsigned long a1, unsigned long a2,
612 unsigned long a3, unsigned long a4, unsigned long a5,
613 struct pt_regs *regs)
614{
615 int error;
616 char * filename;
617
618 filename = getname((char __user *) a0);
619 error = PTR_ERR(filename);
620 if (IS_ERR(filename))
621 goto out;
622 flush_fp_to_thread(current);
623 flush_altivec_to_thread(current);
624
625 error = compat_do_execve(filename, compat_ptr(a1), compat_ptr(a2), regs);
626
627 if (error == 0) {
628 task_lock(current);
629 current->ptrace &= ~PT_DTRACE;
630 task_unlock(current);
631 }
632 putname(filename);
633
634out:
635 return error;
636}
637
638/* Set up a thread for executing a new program. */
639void start_thread32(struct pt_regs* regs, unsigned long nip, unsigned long sp)
640{
641 set_fs(USER_DS);
642
643 /*
644 * If we exec out of a kernel thread then thread.regs will not be
645 * set. Do it now.
646 */
647 if (!current->thread.regs) {
648 unsigned long childregs = (unsigned long)current->thread_info +
649 THREAD_SIZE;
650 childregs -= sizeof(struct pt_regs);
651 current->thread.regs = (struct pt_regs *)childregs;
652 }
653
654 /*
655 * ELF_PLAT_INIT already clears all registers but it also sets r2.
656 * So just clear r2 here.
657 */
658 regs->gpr[2] = 0;
659
660 regs->nip = nip;
661 regs->gpr[1] = sp;
662 regs->msr = MSR_USER32;
663#ifndef CONFIG_SMP
664 if (last_task_used_math == current)
665 last_task_used_math = 0;
666#endif /* CONFIG_SMP */
667 current->thread.fpscr = 0;
668 memset(current->thread.fpr, 0, sizeof(current->thread.fpr));
669#ifdef CONFIG_ALTIVEC
670#ifndef CONFIG_SMP
671 if (last_task_used_altivec == current)
672 last_task_used_altivec = 0;
673#endif /* CONFIG_SMP */
674 memset(current->thread.vr, 0, sizeof(current->thread.vr));
675 current->thread.vscr.u[0] = 0;
676 current->thread.vscr.u[1] = 0;
677 current->thread.vscr.u[2] = 0;
678 current->thread.vscr.u[3] = 0x00010000; /* Java mode disabled */
679 current->thread.vrsave = 0;
680 current->thread.used_vr = 0;
681#endif /* CONFIG_ALTIVEC */
682}
683
684/* Note: it is necessary to treat option as an unsigned int,
685 * with the corresponding cast to a signed int to insure that the
686 * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode)
687 * and the register representation of a signed int (msr in 64-bit mode) is performed.
688 */
689asmlinkage long sys32_prctl(u32 option, u32 arg2, u32 arg3, u32 arg4, u32 arg5)
690{
691 return sys_prctl((int)option,
692 (unsigned long) arg2,
693 (unsigned long) arg3,
694 (unsigned long) arg4,
695 (unsigned long) arg5);
696}
697
698/* Note: it is necessary to treat pid as an unsigned int,
699 * with the corresponding cast to a signed int to insure that the
700 * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode)
701 * and the register representation of a signed int (msr in 64-bit mode) is performed.
702 */
703asmlinkage long sys32_sched_rr_get_interval(u32 pid, struct compat_timespec __user *interval)
704{
705 struct timespec t;
706 int ret;
707 mm_segment_t old_fs = get_fs ();
708
709 /* The __user pointer cast is valid because of the set_fs() */
710 set_fs (KERNEL_DS);
711 ret = sys_sched_rr_get_interval((int)pid, (struct timespec __user *) &t);
712 set_fs (old_fs);
713 if (put_compat_timespec(&t, interval))
714 return -EFAULT;
715 return ret;
716}
717
718asmlinkage int sys32_pciconfig_read(u32 bus, u32 dfn, u32 off, u32 len, u32 ubuf)
719{
720 return sys_pciconfig_read((unsigned long) bus,
721 (unsigned long) dfn,
722 (unsigned long) off,
723 (unsigned long) len,
724 compat_ptr(ubuf));
725}
726
727asmlinkage int sys32_pciconfig_write(u32 bus, u32 dfn, u32 off, u32 len, u32 ubuf)
728{
729 return sys_pciconfig_write((unsigned long) bus,
730 (unsigned long) dfn,
731 (unsigned long) off,
732 (unsigned long) len,
733 compat_ptr(ubuf));
734}
735
736#define IOBASE_BRIDGE_NUMBER 0
737#define IOBASE_MEMORY 1
738#define IOBASE_IO 2
739#define IOBASE_ISA_IO 3
740#define IOBASE_ISA_MEM 4
741
742asmlinkage int sys32_pciconfig_iobase(u32 which, u32 in_bus, u32 in_devfn)
743{
744 struct pci_controller* hose;
745 struct list_head *ln;
746 struct pci_bus *bus = NULL;
747 struct device_node *hose_node;
748
749 /* Argh ! Please forgive me for that hack, but that's the
750 * simplest way to get existing XFree to not lockup on some
751 * G5 machines... So when something asks for bus 0 io base
752 * (bus 0 is HT root), we return the AGP one instead.
753 */
754#ifdef CONFIG_PPC_PMAC
755 if (systemcfg->platform == PLATFORM_POWERMAC &&
756 machine_is_compatible("MacRISC4"))
757 if (in_bus == 0)
758 in_bus = 0xf0;
759#endif /* CONFIG_PPC_PMAC */
760
761 /* That syscall isn't quite compatible with PCI domains, but it's
762 * used on pre-domains setup. We return the first match
763 */
764
765 for (ln = pci_root_buses.next; ln != &pci_root_buses; ln = ln->next) {
766 bus = pci_bus_b(ln);
767 if (in_bus >= bus->number && in_bus < (bus->number + bus->subordinate))
768 break;
769 bus = NULL;
770 }
771 if (bus == NULL || bus->sysdata == NULL)
772 return -ENODEV;
773
774 hose_node = (struct device_node *)bus->sysdata;
775 hose = hose_node->phb;
776
777 switch (which) {
778 case IOBASE_BRIDGE_NUMBER:
779 return (long)hose->first_busno;
780 case IOBASE_MEMORY:
781 return (long)hose->pci_mem_offset;
782 case IOBASE_IO:
783 return (long)hose->io_base_phys;
784 case IOBASE_ISA_IO:
785 return (long)isa_io_base;
786 case IOBASE_ISA_MEM:
787 return -EINVAL;
788 }
789
790 return -EOPNOTSUPP;
791}
792
793
794asmlinkage int ppc64_newuname(struct new_utsname __user * name)
795{
796 int errno = sys_newuname(name);
797
798 if (current->personality == PER_LINUX32 && !errno) {
799 if(copy_to_user(name->machine, "ppc\0\0", 8)) {
800 errno = -EFAULT;
801 }
802 }
803 return errno;
804}
805
806asmlinkage int ppc64_personality(unsigned long personality)
807{
808 int ret;
809 if (current->personality == PER_LINUX32 && personality == PER_LINUX)
810 personality = PER_LINUX32;
811 ret = sys_personality(personality);
812 if (ret == PER_LINUX32)
813 ret = PER_LINUX;
814 return ret;
815}
816
817
818
819/* Note: it is necessary to treat mode as an unsigned int,
820 * with the corresponding cast to a signed int to insure that the
821 * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode)
822 * and the register representation of a signed int (msr in 64-bit mode) is performed.
823 */
824asmlinkage long sys32_access(const char __user * filename, u32 mode)
825{
826 return sys_access(filename, (int)mode);
827}
828
829
830/* Note: it is necessary to treat mode as an unsigned int,
831 * with the corresponding cast to a signed int to insure that the
832 * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode)
833 * and the register representation of a signed int (msr in 64-bit mode) is performed.
834 */
835asmlinkage long sys32_creat(const char __user * pathname, u32 mode)
836{
837 return sys_creat(pathname, (int)mode);
838}
839
840
841/* Note: it is necessary to treat pid and options as unsigned ints,
842 * with the corresponding cast to a signed int to insure that the
843 * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode)
844 * and the register representation of a signed int (msr in 64-bit mode) is performed.
845 */
846asmlinkage long sys32_waitpid(u32 pid, unsigned int __user * stat_addr, u32 options)
847{
848 return sys_waitpid((int)pid, stat_addr, (int)options);
849}
850
851
852/* Note: it is necessary to treat gidsetsize as an unsigned int,
853 * with the corresponding cast to a signed int to insure that the
854 * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode)
855 * and the register representation of a signed int (msr in 64-bit mode) is performed.
856 */
857asmlinkage long sys32_getgroups(u32 gidsetsize, gid_t __user *grouplist)
858{
859 return sys_getgroups((int)gidsetsize, grouplist);
860}
861
862
863/* Note: it is necessary to treat pid as an unsigned int,
864 * with the corresponding cast to a signed int to insure that the
865 * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode)
866 * and the register representation of a signed int (msr in 64-bit mode) is performed.
867 */
868asmlinkage long sys32_getpgid(u32 pid)
869{
870 return sys_getpgid((int)pid);
871}
872
873
874/* Note: it is necessary to treat which and who as unsigned ints,
875 * with the corresponding cast to a signed int to insure that the
876 * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode)
877 * and the register representation of a signed int (msr in 64-bit mode) is performed.
878 */
879asmlinkage long sys32_getpriority(u32 which, u32 who)
880{
881 return sys_getpriority((int)which, (int)who);
882}
883
884
885/* Note: it is necessary to treat pid as an unsigned int,
886 * with the corresponding cast to a signed int to insure that the
887 * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode)
888 * and the register representation of a signed int (msr in 64-bit mode) is performed.
889 */
890asmlinkage long sys32_getsid(u32 pid)
891{
892 return sys_getsid((int)pid);
893}
894
895
896/* Note: it is necessary to treat pid and sig as unsigned ints,
897 * with the corresponding cast to a signed int to insure that the
898 * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode)
899 * and the register representation of a signed int (msr in 64-bit mode) is performed.
900 */
901asmlinkage long sys32_kill(u32 pid, u32 sig)
902{
903 return sys_kill((int)pid, (int)sig);
904}
905
906
907/* Note: it is necessary to treat mode as an unsigned int,
908 * with the corresponding cast to a signed int to insure that the
909 * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode)
910 * and the register representation of a signed int (msr in 64-bit mode) is performed.
911 */
912asmlinkage long sys32_mkdir(const char __user * pathname, u32 mode)
913{
914 return sys_mkdir(pathname, (int)mode);
915}
916
917long sys32_nice(u32 increment)
918{
919 /* sign extend increment */
920 return sys_nice((int)increment);
921}
922
923off_t ppc32_lseek(unsigned int fd, u32 offset, unsigned int origin)
924{
925 /* sign extend n */
926 return sys_lseek(fd, (int)offset, origin);
927}
928
929/*
930 * This is just a version for 32-bit applications which does
931 * not force O_LARGEFILE on.
932 */
933asmlinkage long sys32_open(const char __user * filename, int flags, int mode)
934{
935 char * tmp;
936 int fd, error;
937
938 tmp = getname(filename);
939 fd = PTR_ERR(tmp);
940 if (!IS_ERR(tmp)) {
941 fd = get_unused_fd();
942 if (fd >= 0) {
943 struct file * f = filp_open(tmp, flags, mode);
944 error = PTR_ERR(f);
945 if (IS_ERR(f))
946 goto out_error;
947 fd_install(fd, f);
948 }
949out:
950 putname(tmp);
951 }
952 return fd;
953
954out_error:
955 put_unused_fd(fd);
956 fd = error;
957 goto out;
958}
959
960/* Note: it is necessary to treat bufsiz as an unsigned int,
961 * with the corresponding cast to a signed int to insure that the
962 * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode)
963 * and the register representation of a signed int (msr in 64-bit mode) is performed.
964 */
965asmlinkage long sys32_readlink(const char __user * path, char __user * buf, u32 bufsiz)
966{
967 return sys_readlink(path, buf, (int)bufsiz);
968}
969
970/* Note: it is necessary to treat option as an unsigned int,
971 * with the corresponding cast to a signed int to insure that the
972 * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode)
973 * and the register representation of a signed int (msr in 64-bit mode) is performed.
974 */
975asmlinkage long sys32_sched_get_priority_max(u32 policy)
976{
977 return sys_sched_get_priority_max((int)policy);
978}
979
980
981/* Note: it is necessary to treat policy as an unsigned int,
982 * with the corresponding cast to a signed int to insure that the
983 * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode)
984 * and the register representation of a signed int (msr in 64-bit mode) is performed.
985 */
986asmlinkage long sys32_sched_get_priority_min(u32 policy)
987{
988 return sys_sched_get_priority_min((int)policy);
989}
990
991
992/* Note: it is necessary to treat pid as an unsigned int,
993 * with the corresponding cast to a signed int to insure that the
994 * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode)
995 * and the register representation of a signed int (msr in 64-bit mode) is performed.
996 */
997asmlinkage long sys32_sched_getparam(u32 pid, struct sched_param __user *param)
998{
999 return sys_sched_getparam((int)pid, param);
1000}
1001
1002
1003/* Note: it is necessary to treat pid as an unsigned int,
1004 * with the corresponding cast to a signed int to insure that the
1005 * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode)
1006 * and the register representation of a signed int (msr in 64-bit mode) is performed.
1007 */
1008asmlinkage long sys32_sched_getscheduler(u32 pid)
1009{
1010 return sys_sched_getscheduler((int)pid);
1011}
1012
1013
1014/* Note: it is necessary to treat pid as an unsigned int,
1015 * with the corresponding cast to a signed int to insure that the
1016 * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode)
1017 * and the register representation of a signed int (msr in 64-bit mode) is performed.
1018 */
1019asmlinkage long sys32_sched_setparam(u32 pid, struct sched_param __user *param)
1020{
1021 return sys_sched_setparam((int)pid, param);
1022}
1023
1024
1025/* Note: it is necessary to treat pid and policy as unsigned ints,
1026 * with the corresponding cast to a signed int to insure that the
1027 * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode)
1028 * and the register representation of a signed int (msr in 64-bit mode) is performed.
1029 */
1030asmlinkage long sys32_sched_setscheduler(u32 pid, u32 policy, struct sched_param __user *param)
1031{
1032 return sys_sched_setscheduler((int)pid, (int)policy, param);
1033}
1034
1035
1036/* Note: it is necessary to treat len as an unsigned int,
1037 * with the corresponding cast to a signed int to insure that the
1038 * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode)
1039 * and the register representation of a signed int (msr in 64-bit mode) is performed.
1040 */
1041asmlinkage long sys32_setdomainname(char __user *name, u32 len)
1042{
1043 return sys_setdomainname(name, (int)len);
1044}
1045
1046
1047/* Note: it is necessary to treat gidsetsize as an unsigned int,
1048 * with the corresponding cast to a signed int to insure that the
1049 * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode)
1050 * and the register representation of a signed int (msr in 64-bit mode) is performed.
1051 */
1052asmlinkage long sys32_setgroups(u32 gidsetsize, gid_t __user *grouplist)
1053{
1054 return sys_setgroups((int)gidsetsize, grouplist);
1055}
1056
1057
1058asmlinkage long sys32_sethostname(char __user *name, u32 len)
1059{
1060 /* sign extend len */
1061 return sys_sethostname(name, (int)len);
1062}
1063
1064
1065/* Note: it is necessary to treat pid and pgid as unsigned ints,
1066 * with the corresponding cast to a signed int to insure that the
1067 * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode)
1068 * and the register representation of a signed int (msr in 64-bit mode) is performed.
1069 */
1070asmlinkage long sys32_setpgid(u32 pid, u32 pgid)
1071{
1072 return sys_setpgid((int)pid, (int)pgid);
1073}
1074
1075
1076long sys32_setpriority(u32 which, u32 who, u32 niceval)
1077{
1078 /* sign extend which, who and niceval */
1079 return sys_setpriority((int)which, (int)who, (int)niceval);
1080}
1081
1082/* Note: it is necessary to treat newmask as an unsigned int,
1083 * with the corresponding cast to a signed int to insure that the
1084 * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode)
1085 * and the register representation of a signed int (msr in 64-bit mode) is performed.
1086 */
1087asmlinkage long sys32_ssetmask(u32 newmask)
1088{
1089 return sys_ssetmask((int) newmask);
1090}
1091
1092asmlinkage long sys32_syslog(u32 type, char __user * buf, u32 len)
1093{
1094 /* sign extend len */
1095 return sys_syslog(type, buf, (int)len);
1096}
1097
1098
1099/* Note: it is necessary to treat mask as an unsigned int,
1100 * with the corresponding cast to a signed int to insure that the
1101 * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode)
1102 * and the register representation of a signed int (msr in 64-bit mode) is performed.
1103 */
1104asmlinkage long sys32_umask(u32 mask)
1105{
1106 return sys_umask((int)mask);
1107}
1108
1109#ifdef CONFIG_SYSCTL
1110struct __sysctl_args32 {
1111 u32 name;
1112 int nlen;
1113 u32 oldval;
1114 u32 oldlenp;
1115 u32 newval;
1116 u32 newlen;
1117 u32 __unused[4];
1118};
1119
1120asmlinkage long sys32_sysctl(struct __sysctl_args32 __user *args)
1121{
1122 struct __sysctl_args32 tmp;
1123 int error;
1124 size_t oldlen;
1125 size_t __user *oldlenp = NULL;
1126 unsigned long addr = (((unsigned long)&args->__unused[0]) + 7) & ~7;
1127
1128 if (copy_from_user(&tmp, args, sizeof(tmp)))
1129 return -EFAULT;
1130
1131 if (tmp.oldval && tmp.oldlenp) {
1132 /* Duh, this is ugly and might not work if sysctl_args
1133 is in read-only memory, but do_sysctl does indirectly
1134 a lot of uaccess in both directions and we'd have to
1135 basically copy the whole sysctl.c here, and
1136 glibc's __sysctl uses rw memory for the structure
1137 anyway. */
1138 oldlenp = (size_t __user *)addr;
1139 if (get_user(oldlen, (compat_size_t __user *)compat_ptr(tmp.oldlenp)) ||
1140 put_user(oldlen, oldlenp))
1141 return -EFAULT;
1142 }
1143
1144 lock_kernel();
1145 error = do_sysctl(compat_ptr(tmp.name), tmp.nlen,
1146 compat_ptr(tmp.oldval), oldlenp,
1147 compat_ptr(tmp.newval), tmp.newlen);
1148 unlock_kernel();
1149 if (oldlenp) {
1150 if (!error) {
1151 if (get_user(oldlen, oldlenp) ||
1152 put_user(oldlen, (compat_size_t __user *)compat_ptr(tmp.oldlenp)))
1153 error = -EFAULT;
1154 }
1155 copy_to_user(args->__unused, tmp.__unused, sizeof(tmp.__unused));
1156 }
1157 return error;
1158}
1159#endif
1160
1161asmlinkage int sys32_olduname(struct oldold_utsname __user * name)
1162{
1163 int error;
1164
1165 if (!name)
1166 return -EFAULT;
1167 if (!access_ok(VERIFY_WRITE,name,sizeof(struct oldold_utsname)))
1168 return -EFAULT;
1169
1170 down_read(&uts_sem);
1171 error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN);
1172 error -= __put_user(0,name->sysname+__OLD_UTS_LEN);
1173 error -= __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN);
1174 error -= __put_user(0,name->nodename+__OLD_UTS_LEN);
1175 error -= __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN);
1176 error -= __put_user(0,name->release+__OLD_UTS_LEN);
1177 error -= __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN);
1178 error -= __put_user(0,name->version+__OLD_UTS_LEN);
1179 error -= __copy_to_user(&name->machine,&system_utsname.machine,__OLD_UTS_LEN);
1180 error = __put_user(0,name->machine+__OLD_UTS_LEN);
1181 up_read(&uts_sem);
1182
1183 error = error ? -EFAULT : 0;
1184
1185 return error;
1186}
1187
1188unsigned long sys32_mmap2(unsigned long addr, size_t len,
1189 unsigned long prot, unsigned long flags,
1190 unsigned long fd, unsigned long pgoff)
1191{
1192 /* This should remain 12 even if PAGE_SIZE changes */
1193 return sys_mmap(addr, len, prot, flags, fd, pgoff << 12);
1194}
1195
1196int get_compat_timeval(struct timeval *tv, struct compat_timeval __user *ctv)
1197{
1198 return (!access_ok(VERIFY_READ, ctv, sizeof(*ctv)) ||
1199 __get_user(tv->tv_sec, &ctv->tv_sec) ||
1200 __get_user(tv->tv_usec, &ctv->tv_usec)) ? -EFAULT : 0;
1201}
1202
1203asmlinkage long sys32_utimes(char __user *filename, struct compat_timeval __user *tvs)
1204{
1205 struct timeval ktvs[2], *ptr;
1206
1207 ptr = NULL;
1208 if (tvs) {
1209 if (get_compat_timeval(&ktvs[0], &tvs[0]) ||
1210 get_compat_timeval(&ktvs[1], &tvs[1]))
1211 return -EFAULT;
1212 ptr = ktvs;
1213 }
1214
1215 return do_utimes(filename, ptr);
1216}
1217
1218long sys32_tgkill(u32 tgid, u32 pid, int sig)
1219{
1220 /* sign extend tgid, pid */
1221 return sys_tgkill((int)tgid, (int)pid, sig);
1222}
1223
1224/*
1225 * long long munging:
1226 * The 32 bit ABI passes long longs in an odd even register pair.
1227 */
1228
1229compat_ssize_t sys32_pread64(unsigned int fd, char __user *ubuf, compat_size_t count,
1230 u32 reg6, u32 poshi, u32 poslo)
1231{
1232 return sys_pread64(fd, ubuf, count, ((loff_t)poshi << 32) | poslo);
1233}
1234
1235compat_ssize_t sys32_pwrite64(unsigned int fd, char __user *ubuf, compat_size_t count,
1236 u32 reg6, u32 poshi, u32 poslo)
1237{
1238 return sys_pwrite64(fd, ubuf, count, ((loff_t)poshi << 32) | poslo);
1239}
1240
1241compat_ssize_t sys32_readahead(int fd, u32 r4, u32 offhi, u32 offlo, u32 count)
1242{
1243 return sys_readahead(fd, ((loff_t)offhi << 32) | offlo, count);
1244}
1245
1246asmlinkage int sys32_truncate64(const char __user * path, u32 reg4,
1247 unsigned long high, unsigned long low)
1248{
1249 return sys_truncate(path, (high << 32) | low);
1250}
1251
1252asmlinkage int sys32_ftruncate64(unsigned int fd, u32 reg4, unsigned long high,
1253 unsigned long low)
1254{
1255 return sys_ftruncate(fd, (high << 32) | low);
1256}
1257
1258long ppc32_lookup_dcookie(u32 cookie_high, u32 cookie_low, char __user *buf,
1259 size_t len)
1260{
1261 return sys_lookup_dcookie((u64)cookie_high << 32 | cookie_low,
1262 buf, len);
1263}
1264
1265long ppc32_fadvise64(int fd, u32 unused, u32 offset_high, u32 offset_low,
1266 size_t len, int advice)
1267{
1268 return sys_fadvise64(fd, (u64)offset_high << 32 | offset_low, len,
1269 advice);
1270}
1271
1272long ppc32_fadvise64_64(int fd, int advice, u32 offset_high, u32 offset_low,
1273 u32 len_high, u32 len_low)
1274{
1275 return sys_fadvise64(fd, (u64)offset_high << 32 | offset_low,
1276 (u64)len_high << 32 | len_low, advice);
1277}
1278
1279extern asmlinkage long sys_timer_create(clockid_t, sigevent_t __user *, timer_t __user *);
1280
1281long ppc32_timer_create(clockid_t clock,
1282 struct compat_sigevent __user *ev32,
1283 timer_t __user *timer_id)
1284{
1285 sigevent_t event;
1286 timer_t t;
1287 long err;
1288 mm_segment_t savefs;
1289
1290 if (ev32 == NULL)
1291 return sys_timer_create(clock, NULL, timer_id);
1292
1293 if (get_compat_sigevent(&event, ev32))
1294 return -EFAULT;
1295
1296 if (!access_ok(VERIFY_WRITE, timer_id, sizeof(timer_t)))
1297 return -EFAULT;
1298
1299 savefs = get_fs();
1300 set_fs(KERNEL_DS);
1301 /* The __user pointer casts are valid due to the set_fs() */
1302 err = sys_timer_create(clock,
1303 (sigevent_t __user *) &event,
1304 (timer_t __user *) &t);
1305 set_fs(savefs);
1306
1307 if (err == 0)
1308 err = __put_user(t, timer_id);
1309
1310 return err;
1311}
1312
1313asmlinkage long sys32_add_key(const char __user *_type,
1314 const char __user *_description,
1315 const void __user *_payload,
1316 u32 plen,
1317 u32 ringid)
1318{
1319 return sys_add_key(_type, _description, _payload, plen, ringid);
1320}
1321
1322asmlinkage long sys32_request_key(const char __user *_type,
1323 const char __user *_description,
1324 const char __user *_callout_info,
1325 u32 destringid)
1326{
1327 return sys_request_key(_type, _description, _callout_info, destringid);
1328}
1329
diff --git a/arch/ppc64/kernel/syscalls.c b/arch/ppc64/kernel/syscalls.c
new file mode 100644
index 000000000000..f2865ff8d2f9
--- /dev/null
+++ b/arch/ppc64/kernel/syscalls.c
@@ -0,0 +1,258 @@
1/*
2 * linux/arch/ppc64/kernel/sys_ppc.c
3 *
4 * PowerPC version
5 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
6 *
7 * Derived from "arch/i386/kernel/sys_i386.c"
8 * Adapted from the i386 version by Gary Thomas
9 * Modified by Cort Dougan (cort@cs.nmt.edu)
10 * and Paul Mackerras (paulus@cs.anu.edu.au).
11 *
12 * This file contains various random system calls that
13 * have a non-standard calling sequence on the Linux/PPC
14 * platform.
15 *
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License
18 * as published by the Free Software Foundation; either version
19 * 2 of the License, or (at your option) any later version.
20 *
21 */
22
23#include <linux/errno.h>
24#include <linux/sched.h>
25#include <linux/syscalls.h>
26#include <linux/mm.h>
27#include <linux/smp.h>
28#include <linux/smp_lock.h>
29#include <linux/sem.h>
30#include <linux/msg.h>
31#include <linux/shm.h>
32#include <linux/stat.h>
33#include <linux/mman.h>
34#include <linux/sys.h>
35#include <linux/ipc.h>
36#include <linux/utsname.h>
37#include <linux/file.h>
38#include <linux/init.h>
39#include <linux/personality.h>
40
41#include <asm/uaccess.h>
42#include <asm/ipc.h>
43#include <asm/semaphore.h>
44#include <asm/time.h>
45#include <asm/unistd.h>
46
47extern unsigned long wall_jiffies;
48
49void
50check_bugs(void)
51{
52}
53
54/*
55 * sys_ipc() is the de-multiplexer for the SysV IPC calls..
56 *
57 * This is really horribly ugly.
58 */
59asmlinkage int
60sys_ipc (uint call, int first, unsigned long second, long third,
61 void __user *ptr, long fifth)
62{
63 int version, ret;
64
65 version = call >> 16; /* hack for backward compatibility */
66 call &= 0xffff;
67
68 ret = -ENOSYS;
69 switch (call) {
70 case SEMOP:
71 ret = sys_semtimedop(first, (struct sembuf __user *)ptr,
72 (unsigned)second, NULL);
73 break;
74 case SEMTIMEDOP:
75 ret = sys_semtimedop(first, (struct sembuf __user *)ptr,
76 (unsigned)second,
77 (const struct timespec __user *) fifth);
78 break;
79 case SEMGET:
80 ret = sys_semget (first, (int)second, third);
81 break;
82 case SEMCTL: {
83 union semun fourth;
84
85 ret = -EINVAL;
86 if (!ptr)
87 break;
88 if ((ret = get_user(fourth.__pad, (void __user * __user *)ptr)))
89 break;
90 ret = sys_semctl(first, (int)second, third, fourth);
91 break;
92 }
93 case MSGSND:
94 ret = sys_msgsnd(first, (struct msgbuf __user *)ptr,
95 (size_t)second, third);
96 break;
97 case MSGRCV:
98 switch (version) {
99 case 0: {
100 struct ipc_kludge tmp;
101
102 ret = -EINVAL;
103 if (!ptr)
104 break;
105 if ((ret = copy_from_user(&tmp,
106 (struct ipc_kludge __user *) ptr,
107 sizeof (tmp)) ? -EFAULT : 0))
108 break;
109 ret = sys_msgrcv(first, tmp.msgp, (size_t) second,
110 tmp.msgtyp, third);
111 break;
112 }
113 default:
114 ret = sys_msgrcv (first, (struct msgbuf __user *) ptr,
115 (size_t)second, fifth, third);
116 break;
117 }
118 break;
119 case MSGGET:
120 ret = sys_msgget ((key_t)first, (int)second);
121 break;
122 case MSGCTL:
123 ret = sys_msgctl(first, (int)second,
124 (struct msqid_ds __user *)ptr);
125 break;
126 case SHMAT:
127 switch (version) {
128 default: {
129 ulong raddr;
130 ret = do_shmat(first, (char __user *) ptr,
131 (int)second, &raddr);
132 if (ret)
133 break;
134 ret = put_user (raddr, (ulong __user *) third);
135 break;
136 }
137 case 1: /* iBCS2 emulator entry point */
138 ret = -EINVAL;
139 if (!segment_eq(get_fs(), get_ds()))
140 break;
141 ret = do_shmat(first, (char __user *)ptr,
142 (int)second, (ulong *)third);
143 break;
144 }
145 break;
146 case SHMDT:
147 ret = sys_shmdt ((char __user *)ptr);
148 break;
149 case SHMGET:
150 ret = sys_shmget (first, (size_t)second, third);
151 break;
152 case SHMCTL:
153 ret = sys_shmctl(first, (int)second,
154 (struct shmid_ds __user *)ptr);
155 break;
156 }
157
158 return ret;
159}
160
161/*
162 * sys_pipe() is the normal C calling standard for creating
163 * a pipe. It's not the way unix traditionally does this, though.
164 */
165asmlinkage int sys_pipe(int __user *fildes)
166{
167 int fd[2];
168 int error;
169
170 error = do_pipe(fd);
171 if (!error) {
172 if (copy_to_user(fildes, fd, 2*sizeof(int)))
173 error = -EFAULT;
174 }
175
176 return error;
177}
178
179unsigned long sys_mmap(unsigned long addr, size_t len,
180 unsigned long prot, unsigned long flags,
181 unsigned long fd, off_t offset)
182{
183 struct file * file = NULL;
184 unsigned long ret = -EBADF;
185
186 if (!(flags & MAP_ANONYMOUS)) {
187 if (!(file = fget(fd)))
188 goto out;
189 }
190
191 flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
192 down_write(&current->mm->mmap_sem);
193 ret = do_mmap(file, addr, len, prot, flags, offset);
194 up_write(&current->mm->mmap_sem);
195 if (file)
196 fput(file);
197
198out:
199 return ret;
200}
201
202static int __init set_fakeppc(char *str)
203{
204 if (*str)
205 return 0;
206 init_task.personality = PER_LINUX32;
207 return 1;
208}
209__setup("fakeppc", set_fakeppc);
210
211asmlinkage int sys_uname(struct old_utsname __user * name)
212{
213 int err = -EFAULT;
214
215 down_read(&uts_sem);
216 if (name && !copy_to_user(name, &system_utsname, sizeof (*name)))
217 err = 0;
218 up_read(&uts_sem);
219
220 return err;
221}
222
223asmlinkage time_t sys64_time(time_t __user * tloc)
224{
225 time_t secs;
226 time_t usecs;
227
228 long tb_delta = tb_ticks_since(tb_last_stamp);
229 tb_delta += (jiffies - wall_jiffies) * tb_ticks_per_jiffy;
230
231 secs = xtime.tv_sec;
232 usecs = (xtime.tv_nsec/1000) + tb_delta / tb_ticks_per_usec;
233 while (usecs >= USEC_PER_SEC) {
234 ++secs;
235 usecs -= USEC_PER_SEC;
236 }
237
238 if (tloc) {
239 if (put_user(secs,tloc))
240 secs = -EFAULT;
241 }
242
243 return secs;
244}
245
246void do_show_syscall(unsigned long r3, unsigned long r4, unsigned long r5,
247 unsigned long r6, unsigned long r7, unsigned long r8,
248 struct pt_regs *regs)
249{
250 printk("syscall %ld(%lx, %lx, %lx, %lx, %lx, %lx) regs=%p current=%p"
251 " cpu=%d\n", regs->gpr[0], r3, r4, r5, r6, r7, r8, regs,
252 current, smp_processor_id());
253}
254
255void do_show_syscall_exit(unsigned long r3)
256{
257 printk(" -> %lx, current=%p cpu=%d\n", r3, current, smp_processor_id());
258}
diff --git a/arch/ppc64/kernel/sysfs.c b/arch/ppc64/kernel/sysfs.c
new file mode 100644
index 000000000000..0925694c3ce5
--- /dev/null
+++ b/arch/ppc64/kernel/sysfs.c
@@ -0,0 +1,431 @@
1#include <linux/config.h>
2#include <linux/sysdev.h>
3#include <linux/cpu.h>
4#include <linux/smp.h>
5#include <linux/percpu.h>
6#include <linux/init.h>
7#include <linux/sched.h>
8#include <linux/module.h>
9#include <linux/nodemask.h>
10#include <linux/cpumask.h>
11#include <linux/notifier.h>
12
13#include <asm/current.h>
14#include <asm/processor.h>
15#include <asm/cputable.h>
16#include <asm/hvcall.h>
17#include <asm/prom.h>
18#include <asm/systemcfg.h>
19#include <asm/paca.h>
20#include <asm/lppaca.h>
21#include <asm/machdep.h>
22
23static DEFINE_PER_CPU(struct cpu, cpu_devices);
24
25/* SMT stuff */
26
27#ifdef CONFIG_PPC_MULTIPLATFORM
28/* default to snooze disabled */
29DEFINE_PER_CPU(unsigned long, smt_snooze_delay);
30
31static ssize_t store_smt_snooze_delay(struct sys_device *dev, const char *buf,
32 size_t count)
33{
34 struct cpu *cpu = container_of(dev, struct cpu, sysdev);
35 ssize_t ret;
36 unsigned long snooze;
37
38 ret = sscanf(buf, "%lu", &snooze);
39 if (ret != 1)
40 return -EINVAL;
41
42 per_cpu(smt_snooze_delay, cpu->sysdev.id) = snooze;
43
44 return count;
45}
46
47static ssize_t show_smt_snooze_delay(struct sys_device *dev, char *buf)
48{
49 struct cpu *cpu = container_of(dev, struct cpu, sysdev);
50
51 return sprintf(buf, "%lu\n", per_cpu(smt_snooze_delay, cpu->sysdev.id));
52}
53
54static SYSDEV_ATTR(smt_snooze_delay, 0644, show_smt_snooze_delay,
55 store_smt_snooze_delay);
56
57/* Only parse OF options if the matching cmdline option was not specified */
58static int smt_snooze_cmdline;
59
60static int __init smt_setup(void)
61{
62 struct device_node *options;
63 unsigned int *val;
64 unsigned int cpu;
65
66 if (!cpu_has_feature(CPU_FTR_SMT))
67 return 1;
68
69 options = find_path_device("/options");
70 if (!options)
71 return 1;
72
73 val = (unsigned int *)get_property(options, "ibm,smt-snooze-delay",
74 NULL);
75 if (!smt_snooze_cmdline && val) {
76 for_each_cpu(cpu)
77 per_cpu(smt_snooze_delay, cpu) = *val;
78 }
79
80 return 1;
81}
82__initcall(smt_setup);
83
84static int __init setup_smt_snooze_delay(char *str)
85{
86 unsigned int cpu;
87 int snooze;
88
89 if (!cpu_has_feature(CPU_FTR_SMT))
90 return 1;
91
92 smt_snooze_cmdline = 1;
93
94 if (get_option(&str, &snooze)) {
95 for_each_cpu(cpu)
96 per_cpu(smt_snooze_delay, cpu) = snooze;
97 }
98
99 return 1;
100}
101__setup("smt-snooze-delay=", setup_smt_snooze_delay);
102
103/*
104 * Enabling PMCs will slow partition context switch times so we only do
105 * it the first time we write to the PMCs.
106 */
107
108static DEFINE_PER_CPU(char, pmcs_enabled);
109
110void ppc64_enable_pmcs(void)
111{
112 unsigned long hid0;
113#ifdef CONFIG_PPC_PSERIES
114 unsigned long set, reset;
115 int ret;
116 unsigned int ctrl;
117#endif /* CONFIG_PPC_PSERIES */
118
119 /* Only need to enable them once */
120 if (__get_cpu_var(pmcs_enabled))
121 return;
122
123 __get_cpu_var(pmcs_enabled) = 1;
124
125 switch (systemcfg->platform) {
126 case PLATFORM_PSERIES:
127 case PLATFORM_POWERMAC:
128 hid0 = mfspr(HID0);
129 hid0 |= 1UL << (63 - 20);
130
131 /* POWER4 requires the following sequence */
132 asm volatile(
133 "sync\n"
134 "mtspr %1, %0\n"
135 "mfspr %0, %1\n"
136 "mfspr %0, %1\n"
137 "mfspr %0, %1\n"
138 "mfspr %0, %1\n"
139 "mfspr %0, %1\n"
140 "mfspr %0, %1\n"
141 "isync" : "=&r" (hid0) : "i" (HID0), "0" (hid0):
142 "memory");
143 break;
144
145#ifdef CONFIG_PPC_PSERIES
146 case PLATFORM_PSERIES_LPAR:
147 set = 1UL << 63;
148 reset = 0;
149 ret = plpar_hcall_norets(H_PERFMON, set, reset);
150 if (ret)
151 printk(KERN_ERR "H_PERFMON call on cpu %u "
152 "returned %d\n",
153 smp_processor_id(), ret);
154 break;
155#endif /* CONFIG_PPC_PSERIES */
156
157 default:
158 break;
159 }
160
161#ifdef CONFIG_PPC_PSERIES
162 /* instruct hypervisor to maintain PMCs */
163 if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR)
164 get_paca()->lppaca.pmcregs_in_use = 1;
165
166 /*
167 * On SMT machines we have to set the run latch in the ctrl register
168 * in order to make PMC6 spin.
169 */
170 if (cpu_has_feature(CPU_FTR_SMT)) {
171 ctrl = mfspr(CTRLF);
172 ctrl |= RUNLATCH;
173 mtspr(CTRLT, ctrl);
174 }
175#endif /* CONFIG_PPC_PSERIES */
176}
177
178#else
179
180/* PMC stuff */
181void ppc64_enable_pmcs(void)
182{
183 /* XXX Implement for iseries */
184}
185#endif /* CONFIG_PPC_MULTIPLATFORM */
186
187EXPORT_SYMBOL(ppc64_enable_pmcs);
188
189/* XXX convert to rusty's on_one_cpu */
190static unsigned long run_on_cpu(unsigned long cpu,
191 unsigned long (*func)(unsigned long),
192 unsigned long arg)
193{
194 cpumask_t old_affinity = current->cpus_allowed;
195 unsigned long ret;
196
197 /* should return -EINVAL to userspace */
198 if (set_cpus_allowed(current, cpumask_of_cpu(cpu)))
199 return 0;
200
201 ret = func(arg);
202
203 set_cpus_allowed(current, old_affinity);
204
205 return ret;
206}
207
208#define SYSFS_PMCSETUP(NAME, ADDRESS) \
209static unsigned long read_##NAME(unsigned long junk) \
210{ \
211 return mfspr(ADDRESS); \
212} \
213static unsigned long write_##NAME(unsigned long val) \
214{ \
215 ppc64_enable_pmcs(); \
216 mtspr(ADDRESS, val); \
217 return 0; \
218} \
219static ssize_t show_##NAME(struct sys_device *dev, char *buf) \
220{ \
221 struct cpu *cpu = container_of(dev, struct cpu, sysdev); \
222 unsigned long val = run_on_cpu(cpu->sysdev.id, read_##NAME, 0); \
223 return sprintf(buf, "%lx\n", val); \
224} \
225static ssize_t __attribute_used__ \
226 store_##NAME(struct sys_device *dev, const char *buf, size_t count) \
227{ \
228 struct cpu *cpu = container_of(dev, struct cpu, sysdev); \
229 unsigned long val; \
230 int ret = sscanf(buf, "%lx", &val); \
231 if (ret != 1) \
232 return -EINVAL; \
233 run_on_cpu(cpu->sysdev.id, write_##NAME, val); \
234 return count; \
235}
236
237SYSFS_PMCSETUP(mmcr0, SPRN_MMCR0);
238SYSFS_PMCSETUP(mmcr1, SPRN_MMCR1);
239SYSFS_PMCSETUP(mmcra, SPRN_MMCRA);
240SYSFS_PMCSETUP(pmc1, SPRN_PMC1);
241SYSFS_PMCSETUP(pmc2, SPRN_PMC2);
242SYSFS_PMCSETUP(pmc3, SPRN_PMC3);
243SYSFS_PMCSETUP(pmc4, SPRN_PMC4);
244SYSFS_PMCSETUP(pmc5, SPRN_PMC5);
245SYSFS_PMCSETUP(pmc6, SPRN_PMC6);
246SYSFS_PMCSETUP(pmc7, SPRN_PMC7);
247SYSFS_PMCSETUP(pmc8, SPRN_PMC8);
248SYSFS_PMCSETUP(purr, SPRN_PURR);
249
250static SYSDEV_ATTR(mmcr0, 0600, show_mmcr0, store_mmcr0);
251static SYSDEV_ATTR(mmcr1, 0600, show_mmcr1, store_mmcr1);
252static SYSDEV_ATTR(mmcra, 0600, show_mmcra, store_mmcra);
253static SYSDEV_ATTR(pmc1, 0600, show_pmc1, store_pmc1);
254static SYSDEV_ATTR(pmc2, 0600, show_pmc2, store_pmc2);
255static SYSDEV_ATTR(pmc3, 0600, show_pmc3, store_pmc3);
256static SYSDEV_ATTR(pmc4, 0600, show_pmc4, store_pmc4);
257static SYSDEV_ATTR(pmc5, 0600, show_pmc5, store_pmc5);
258static SYSDEV_ATTR(pmc6, 0600, show_pmc6, store_pmc6);
259static SYSDEV_ATTR(pmc7, 0600, show_pmc7, store_pmc7);
260static SYSDEV_ATTR(pmc8, 0600, show_pmc8, store_pmc8);
261static SYSDEV_ATTR(purr, 0600, show_purr, NULL);
262
263static void register_cpu_online(unsigned int cpu)
264{
265 struct cpu *c = &per_cpu(cpu_devices, cpu);
266 struct sys_device *s = &c->sysdev;
267
268#ifndef CONFIG_PPC_ISERIES
269 if (cpu_has_feature(CPU_FTR_SMT))
270 sysdev_create_file(s, &attr_smt_snooze_delay);
271#endif
272
273 /* PMC stuff */
274
275 sysdev_create_file(s, &attr_mmcr0);
276 sysdev_create_file(s, &attr_mmcr1);
277
278 if (cpu_has_feature(CPU_FTR_MMCRA))
279 sysdev_create_file(s, &attr_mmcra);
280
281 sysdev_create_file(s, &attr_pmc1);
282 sysdev_create_file(s, &attr_pmc2);
283 sysdev_create_file(s, &attr_pmc3);
284 sysdev_create_file(s, &attr_pmc4);
285 sysdev_create_file(s, &attr_pmc5);
286 sysdev_create_file(s, &attr_pmc6);
287
288 if (cpu_has_feature(CPU_FTR_PMC8)) {
289 sysdev_create_file(s, &attr_pmc7);
290 sysdev_create_file(s, &attr_pmc8);
291 }
292
293 if (cpu_has_feature(CPU_FTR_SMT))
294 sysdev_create_file(s, &attr_purr);
295}
296
297#ifdef CONFIG_HOTPLUG_CPU
298static void unregister_cpu_online(unsigned int cpu)
299{
300 struct cpu *c = &per_cpu(cpu_devices, cpu);
301 struct sys_device *s = &c->sysdev;
302
303 BUG_ON(c->no_control);
304
305#ifndef CONFIG_PPC_ISERIES
306 if (cpu_has_feature(CPU_FTR_SMT))
307 sysdev_remove_file(s, &attr_smt_snooze_delay);
308#endif
309
310 /* PMC stuff */
311
312 sysdev_remove_file(s, &attr_mmcr0);
313 sysdev_remove_file(s, &attr_mmcr1);
314
315 if (cpu_has_feature(CPU_FTR_MMCRA))
316 sysdev_remove_file(s, &attr_mmcra);
317
318 sysdev_remove_file(s, &attr_pmc1);
319 sysdev_remove_file(s, &attr_pmc2);
320 sysdev_remove_file(s, &attr_pmc3);
321 sysdev_remove_file(s, &attr_pmc4);
322 sysdev_remove_file(s, &attr_pmc5);
323 sysdev_remove_file(s, &attr_pmc6);
324
325 if (cpu_has_feature(CPU_FTR_PMC8)) {
326 sysdev_remove_file(s, &attr_pmc7);
327 sysdev_remove_file(s, &attr_pmc8);
328 }
329
330 if (cpu_has_feature(CPU_FTR_SMT))
331 sysdev_remove_file(s, &attr_purr);
332}
333#endif /* CONFIG_HOTPLUG_CPU */
334
335static int __devinit sysfs_cpu_notify(struct notifier_block *self,
336 unsigned long action, void *hcpu)
337{
338 unsigned int cpu = (unsigned int)(long)hcpu;
339
340 switch (action) {
341 case CPU_ONLINE:
342 register_cpu_online(cpu);
343 break;
344#ifdef CONFIG_HOTPLUG_CPU
345 case CPU_DEAD:
346 unregister_cpu_online(cpu);
347 break;
348#endif
349 }
350 return NOTIFY_OK;
351}
352
353static struct notifier_block __devinitdata sysfs_cpu_nb = {
354 .notifier_call = sysfs_cpu_notify,
355};
356
357/* NUMA stuff */
358
359#ifdef CONFIG_NUMA
360static struct node node_devices[MAX_NUMNODES];
361
362static void register_nodes(void)
363{
364 int i;
365
366 for (i = 0; i < MAX_NUMNODES; i++) {
367 if (node_online(i)) {
368 int p_node = parent_node(i);
369 struct node *parent = NULL;
370
371 if (p_node != i)
372 parent = &node_devices[p_node];
373
374 register_node(&node_devices[i], i, parent);
375 }
376 }
377}
378#else
379static void register_nodes(void)
380{
381 return;
382}
383#endif
384
385/* Only valid if CPU is present. */
386static ssize_t show_physical_id(struct sys_device *dev, char *buf)
387{
388 struct cpu *cpu = container_of(dev, struct cpu, sysdev);
389
390 return sprintf(buf, "%d\n", get_hard_smp_processor_id(cpu->sysdev.id));
391}
392static SYSDEV_ATTR(physical_id, 0444, show_physical_id, NULL);
393
394static int __init topology_init(void)
395{
396 int cpu;
397 struct node *parent = NULL;
398
399 register_nodes();
400
401 register_cpu_notifier(&sysfs_cpu_nb);
402
403 for_each_cpu(cpu) {
404 struct cpu *c = &per_cpu(cpu_devices, cpu);
405
406#ifdef CONFIG_NUMA
407 parent = &node_devices[cpu_to_node(cpu)];
408#endif
409 /*
410 * For now, we just see if the system supports making
411 * the RTAS calls for CPU hotplug. But, there may be a
412 * more comprehensive way to do this for an individual
413 * CPU. For instance, the boot cpu might never be valid
414 * for hotplugging.
415 */
416 if (!ppc_md.cpu_die)
417 c->no_control = 1;
418
419 if (cpu_online(cpu) || (c->no_control == 0)) {
420 register_cpu(c, cpu, parent);
421
422 sysdev_create_file(&c->sysdev, &attr_physical_id);
423 }
424
425 if (cpu_online(cpu))
426 register_cpu_online(cpu);
427 }
428
429 return 0;
430}
431__initcall(topology_init);
diff --git a/arch/ppc64/kernel/time.c b/arch/ppc64/kernel/time.c
new file mode 100644
index 000000000000..77ded5a363b6
--- /dev/null
+++ b/arch/ppc64/kernel/time.c
@@ -0,0 +1,827 @@
1/*
2 *
3 * Common time routines among all ppc machines.
4 *
5 * Written by Cort Dougan (cort@cs.nmt.edu) to merge
6 * Paul Mackerras' version and mine for PReP and Pmac.
7 * MPC8xx/MBX changes by Dan Malek (dmalek@jlc.net).
8 * Converted for 64-bit by Mike Corrigan (mikejc@us.ibm.com)
9 *
10 * First round of bugfixes by Gabriel Paubert (paubert@iram.es)
11 * to make clock more stable (2.4.0-test5). The only thing
12 * that this code assumes is that the timebases have been synchronized
13 * by firmware on SMP and are never stopped (never do sleep
14 * on SMP then, nap and doze are OK).
15 *
16 * Speeded up do_gettimeofday by getting rid of references to
17 * xtime (which required locks for consistency). (mikejc@us.ibm.com)
18 *
19 * TODO (not necessarily in this file):
20 * - improve precision and reproducibility of timebase frequency
21 * measurement at boot time. (for iSeries, we calibrate the timebase
22 * against the Titan chip's clock.)
23 * - for astronomical applications: add a new function to get
24 * non ambiguous timestamps even around leap seconds. This needs
25 * a new timestamp format and a good name.
26 *
27 * 1997-09-10 Updated NTP code according to technical memorandum Jan '96
28 * "A Kernel Model for Precision Timekeeping" by Dave Mills
29 *
30 * This program is free software; you can redistribute it and/or
31 * modify it under the terms of the GNU General Public License
32 * as published by the Free Software Foundation; either version
33 * 2 of the License, or (at your option) any later version.
34 */
35
36#include <linux/config.h>
37#include <linux/errno.h>
38#include <linux/module.h>
39#include <linux/sched.h>
40#include <linux/kernel.h>
41#include <linux/param.h>
42#include <linux/string.h>
43#include <linux/mm.h>
44#include <linux/interrupt.h>
45#include <linux/timex.h>
46#include <linux/kernel_stat.h>
47#include <linux/mc146818rtc.h>
48#include <linux/time.h>
49#include <linux/init.h>
50#include <linux/profile.h>
51#include <linux/cpu.h>
52#include <linux/security.h>
53
54#include <asm/segment.h>
55#include <asm/io.h>
56#include <asm/processor.h>
57#include <asm/nvram.h>
58#include <asm/cache.h>
59#include <asm/machdep.h>
60#ifdef CONFIG_PPC_ISERIES
61#include <asm/iSeries/ItLpQueue.h>
62#include <asm/iSeries/HvCallXm.h>
63#endif
64#include <asm/uaccess.h>
65#include <asm/time.h>
66#include <asm/ppcdebug.h>
67#include <asm/prom.h>
68#include <asm/sections.h>
69#include <asm/systemcfg.h>
70
71u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES;
72
73EXPORT_SYMBOL(jiffies_64);
74
75/* keep track of when we need to update the rtc */
76time_t last_rtc_update;
77extern int piranha_simulator;
78#ifdef CONFIG_PPC_ISERIES
79unsigned long iSeries_recal_titan = 0;
80unsigned long iSeries_recal_tb = 0;
81static unsigned long first_settimeofday = 1;
82#endif
83
84#define XSEC_PER_SEC (1024*1024)
85
86unsigned long tb_ticks_per_jiffy;
87unsigned long tb_ticks_per_usec = 100; /* sane default */
88EXPORT_SYMBOL(tb_ticks_per_usec);
89unsigned long tb_ticks_per_sec;
90unsigned long tb_to_xs;
91unsigned tb_to_us;
92unsigned long processor_freq;
93DEFINE_SPINLOCK(rtc_lock);
94
95unsigned long tb_to_ns_scale;
96unsigned long tb_to_ns_shift;
97
98struct gettimeofday_struct do_gtod;
99
100extern unsigned long wall_jiffies;
101extern unsigned long lpevent_count;
102extern int smp_tb_synchronized;
103
104extern struct timezone sys_tz;
105
106void ppc_adjtimex(void);
107
108static unsigned adjusting_time = 0;
109
110static __inline__ void timer_check_rtc(void)
111{
112 /*
113 * update the rtc when needed, this should be performed on the
114 * right fraction of a second. Half or full second ?
115 * Full second works on mk48t59 clocks, others need testing.
116 * Note that this update is basically only used through
117 * the adjtimex system calls. Setting the HW clock in
118 * any other way is a /dev/rtc and userland business.
119 * This is still wrong by -0.5/+1.5 jiffies because of the
120 * timer interrupt resolution and possible delay, but here we
121 * hit a quantization limit which can only be solved by higher
122 * resolution timers and decoupling time management from timer
123 * interrupts. This is also wrong on the clocks
124 * which require being written at the half second boundary.
125 * We should have an rtc call that only sets the minutes and
126 * seconds like on Intel to avoid problems with non UTC clocks.
127 */
128 if ( (time_status & STA_UNSYNC) == 0 &&
129 xtime.tv_sec - last_rtc_update >= 659 &&
130 abs((xtime.tv_nsec/1000) - (1000000-1000000/HZ)) < 500000/HZ &&
131 jiffies - wall_jiffies == 1) {
132 struct rtc_time tm;
133 to_tm(xtime.tv_sec+1, &tm);
134 tm.tm_year -= 1900;
135 tm.tm_mon -= 1;
136 if (ppc_md.set_rtc_time(&tm) == 0)
137 last_rtc_update = xtime.tv_sec+1;
138 else
139 /* Try again one minute later */
140 last_rtc_update += 60;
141 }
142}
143
144/*
145 * This version of gettimeofday has microsecond resolution.
146 */
147static inline void __do_gettimeofday(struct timeval *tv, unsigned long tb_val)
148{
149 unsigned long sec, usec, tb_ticks;
150 unsigned long xsec, tb_xsec;
151 struct gettimeofday_vars * temp_varp;
152 unsigned long temp_tb_to_xs, temp_stamp_xsec;
153
154 /*
155 * These calculations are faster (gets rid of divides)
156 * if done in units of 1/2^20 rather than microseconds.
157 * The conversion to microseconds at the end is done
158 * without a divide (and in fact, without a multiply)
159 */
160 temp_varp = do_gtod.varp;
161 tb_ticks = tb_val - temp_varp->tb_orig_stamp;
162 temp_tb_to_xs = temp_varp->tb_to_xs;
163 temp_stamp_xsec = temp_varp->stamp_xsec;
164 tb_xsec = mulhdu( tb_ticks, temp_tb_to_xs );
165 xsec = temp_stamp_xsec + tb_xsec;
166 sec = xsec / XSEC_PER_SEC;
167 xsec -= sec * XSEC_PER_SEC;
168 usec = (xsec * USEC_PER_SEC)/XSEC_PER_SEC;
169
170 tv->tv_sec = sec;
171 tv->tv_usec = usec;
172}
173
174void do_gettimeofday(struct timeval *tv)
175{
176 __do_gettimeofday(tv, get_tb());
177}
178
179EXPORT_SYMBOL(do_gettimeofday);
180
181/* Synchronize xtime with do_gettimeofday */
182
183static inline void timer_sync_xtime(unsigned long cur_tb)
184{
185 struct timeval my_tv;
186
187 __do_gettimeofday(&my_tv, cur_tb);
188
189 if (xtime.tv_sec <= my_tv.tv_sec) {
190 xtime.tv_sec = my_tv.tv_sec;
191 xtime.tv_nsec = my_tv.tv_usec * 1000;
192 }
193}
194
195/*
196 * When the timebase - tb_orig_stamp gets too big, we do a manipulation
197 * between tb_orig_stamp and stamp_xsec. The goal here is to keep the
198 * difference tb - tb_orig_stamp small enough to always fit inside a
199 * 32 bits number. This is a requirement of our fast 32 bits userland
200 * implementation in the vdso. If we "miss" a call to this function
201 * (interrupt latency, CPU locked in a spinlock, ...) and we end up
202 * with a too big difference, then the vdso will fallback to calling
203 * the syscall
204 */
205static __inline__ void timer_recalc_offset(unsigned long cur_tb)
206{
207 struct gettimeofday_vars * temp_varp;
208 unsigned temp_idx;
209 unsigned long offset, new_stamp_xsec, new_tb_orig_stamp;
210
211 if (((cur_tb - do_gtod.varp->tb_orig_stamp) & 0x80000000u) == 0)
212 return;
213
214 temp_idx = (do_gtod.var_idx == 0);
215 temp_varp = &do_gtod.vars[temp_idx];
216
217 new_tb_orig_stamp = cur_tb;
218 offset = new_tb_orig_stamp - do_gtod.varp->tb_orig_stamp;
219 new_stamp_xsec = do_gtod.varp->stamp_xsec + mulhdu(offset, do_gtod.varp->tb_to_xs);
220
221 temp_varp->tb_to_xs = do_gtod.varp->tb_to_xs;
222 temp_varp->tb_orig_stamp = new_tb_orig_stamp;
223 temp_varp->stamp_xsec = new_stamp_xsec;
224 mb();
225 do_gtod.varp = temp_varp;
226 do_gtod.var_idx = temp_idx;
227
228 ++(systemcfg->tb_update_count);
229 wmb();
230 systemcfg->tb_orig_stamp = new_tb_orig_stamp;
231 systemcfg->stamp_xsec = new_stamp_xsec;
232 wmb();
233 ++(systemcfg->tb_update_count);
234}
235
236#ifdef CONFIG_SMP
237unsigned long profile_pc(struct pt_regs *regs)
238{
239 unsigned long pc = instruction_pointer(regs);
240
241 if (in_lock_functions(pc))
242 return regs->link;
243
244 return pc;
245}
246EXPORT_SYMBOL(profile_pc);
247#endif
248
249#ifdef CONFIG_PPC_ISERIES
250
251/*
252 * This function recalibrates the timebase based on the 49-bit time-of-day
253 * value in the Titan chip. The Titan is much more accurate than the value
254 * returned by the service processor for the timebase frequency.
255 */
256
257static void iSeries_tb_recal(void)
258{
259 struct div_result divres;
260 unsigned long titan, tb;
261 tb = get_tb();
262 titan = HvCallXm_loadTod();
263 if ( iSeries_recal_titan ) {
264 unsigned long tb_ticks = tb - iSeries_recal_tb;
265 unsigned long titan_usec = (titan - iSeries_recal_titan) >> 12;
266 unsigned long new_tb_ticks_per_sec = (tb_ticks * USEC_PER_SEC)/titan_usec;
267 unsigned long new_tb_ticks_per_jiffy = (new_tb_ticks_per_sec+(HZ/2))/HZ;
268 long tick_diff = new_tb_ticks_per_jiffy - tb_ticks_per_jiffy;
269 char sign = '+';
270 /* make sure tb_ticks_per_sec and tb_ticks_per_jiffy are consistent */
271 new_tb_ticks_per_sec = new_tb_ticks_per_jiffy * HZ;
272
273 if ( tick_diff < 0 ) {
274 tick_diff = -tick_diff;
275 sign = '-';
276 }
277 if ( tick_diff ) {
278 if ( tick_diff < tb_ticks_per_jiffy/25 ) {
279 printk( "Titan recalibrate: new tb_ticks_per_jiffy = %lu (%c%ld)\n",
280 new_tb_ticks_per_jiffy, sign, tick_diff );
281 tb_ticks_per_jiffy = new_tb_ticks_per_jiffy;
282 tb_ticks_per_sec = new_tb_ticks_per_sec;
283 div128_by_32( XSEC_PER_SEC, 0, tb_ticks_per_sec, &divres );
284 do_gtod.tb_ticks_per_sec = tb_ticks_per_sec;
285 tb_to_xs = divres.result_low;
286 do_gtod.varp->tb_to_xs = tb_to_xs;
287 systemcfg->tb_ticks_per_sec = tb_ticks_per_sec;
288 systemcfg->tb_to_xs = tb_to_xs;
289 }
290 else {
291 printk( "Titan recalibrate: FAILED (difference > 4 percent)\n"
292 " new tb_ticks_per_jiffy = %lu\n"
293 " old tb_ticks_per_jiffy = %lu\n",
294 new_tb_ticks_per_jiffy, tb_ticks_per_jiffy );
295 }
296 }
297 }
298 iSeries_recal_titan = titan;
299 iSeries_recal_tb = tb;
300}
301#endif
302
303/*
304 * For iSeries shared processors, we have to let the hypervisor
305 * set the hardware decrementer. We set a virtual decrementer
306 * in the lppaca and call the hypervisor if the virtual
307 * decrementer is less than the current value in the hardware
308 * decrementer. (almost always the new decrementer value will
309 * be greater than the current hardware decementer so the hypervisor
310 * call will not be needed)
311 */
312
313unsigned long tb_last_stamp __cacheline_aligned_in_smp;
314
315/*
316 * timer_interrupt - gets called when the decrementer overflows,
317 * with interrupts disabled.
318 */
319int timer_interrupt(struct pt_regs * regs)
320{
321 int next_dec;
322 unsigned long cur_tb;
323 struct paca_struct *lpaca = get_paca();
324 unsigned long cpu = smp_processor_id();
325
326 irq_enter();
327
328#ifndef CONFIG_PPC_ISERIES
329 profile_tick(CPU_PROFILING, regs);
330#endif
331
332 lpaca->lppaca.int_dword.fields.decr_int = 0;
333
334 while (lpaca->next_jiffy_update_tb <= (cur_tb = get_tb())) {
335 /*
336 * We cannot disable the decrementer, so in the period
337 * between this cpu's being marked offline in cpu_online_map
338 * and calling stop-self, it is taking timer interrupts.
339 * Avoid calling into the scheduler rebalancing code if this
340 * is the case.
341 */
342 if (!cpu_is_offline(cpu))
343 update_process_times(user_mode(regs));
344 /*
345 * No need to check whether cpu is offline here; boot_cpuid
346 * should have been fixed up by now.
347 */
348 if (cpu == boot_cpuid) {
349 write_seqlock(&xtime_lock);
350 tb_last_stamp = lpaca->next_jiffy_update_tb;
351 timer_recalc_offset(lpaca->next_jiffy_update_tb);
352 do_timer(regs);
353 timer_sync_xtime(lpaca->next_jiffy_update_tb);
354 timer_check_rtc();
355 write_sequnlock(&xtime_lock);
356 if ( adjusting_time && (time_adjust == 0) )
357 ppc_adjtimex();
358 }
359 lpaca->next_jiffy_update_tb += tb_ticks_per_jiffy;
360 }
361
362 next_dec = lpaca->next_jiffy_update_tb - cur_tb;
363 if (next_dec > lpaca->default_decr)
364 next_dec = lpaca->default_decr;
365 set_dec(next_dec);
366
367#ifdef CONFIG_PPC_ISERIES
368 {
369 struct ItLpQueue *lpq = lpaca->lpqueue_ptr;
370 if (lpq && ItLpQueue_isLpIntPending(lpq))
371 lpevent_count += ItLpQueue_process(lpq, regs);
372 }
373#endif
374
375/* collect purr register values often, for accurate calculations */
376#if defined(CONFIG_PPC_PSERIES)
377 if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) {
378 struct cpu_usage *cu = &__get_cpu_var(cpu_usage_array);
379 cu->current_tb = mfspr(SPRN_PURR);
380 }
381#endif
382
383 irq_exit();
384
385 return 1;
386}
387
388/*
389 * Scheduler clock - returns current time in nanosec units.
390 *
391 * Note: mulhdu(a, b) (multiply high double unsigned) returns
392 * the high 64 bits of a * b, i.e. (a * b) >> 64, where a and b
393 * are 64-bit unsigned numbers.
394 */
395unsigned long long sched_clock(void)
396{
397 return mulhdu(get_tb(), tb_to_ns_scale) << tb_to_ns_shift;
398}
399
400int do_settimeofday(struct timespec *tv)
401{
402 time_t wtm_sec, new_sec = tv->tv_sec;
403 long wtm_nsec, new_nsec = tv->tv_nsec;
404 unsigned long flags;
405 unsigned long delta_xsec;
406 long int tb_delta;
407 unsigned long new_xsec;
408
409 if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
410 return -EINVAL;
411
412 write_seqlock_irqsave(&xtime_lock, flags);
413 /* Updating the RTC is not the job of this code. If the time is
414 * stepped under NTP, the RTC will be update after STA_UNSYNC
415 * is cleared. Tool like clock/hwclock either copy the RTC
416 * to the system time, in which case there is no point in writing
417 * to the RTC again, or write to the RTC but then they don't call
418 * settimeofday to perform this operation.
419 */
420#ifdef CONFIG_PPC_ISERIES
421 if ( first_settimeofday ) {
422 iSeries_tb_recal();
423 first_settimeofday = 0;
424 }
425#endif
426 tb_delta = tb_ticks_since(tb_last_stamp);
427 tb_delta += (jiffies - wall_jiffies) * tb_ticks_per_jiffy;
428
429 new_nsec -= tb_delta / tb_ticks_per_usec / 1000;
430
431 wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - new_sec);
432 wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - new_nsec);
433
434 set_normalized_timespec(&xtime, new_sec, new_nsec);
435 set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
436
437 /* In case of a large backwards jump in time with NTP, we want the
438 * clock to be updated as soon as the PLL is again in lock.
439 */
440 last_rtc_update = new_sec - 658;
441
442 time_adjust = 0; /* stop active adjtime() */
443 time_status |= STA_UNSYNC;
444 time_maxerror = NTP_PHASE_LIMIT;
445 time_esterror = NTP_PHASE_LIMIT;
446
447 delta_xsec = mulhdu( (tb_last_stamp-do_gtod.varp->tb_orig_stamp),
448 do_gtod.varp->tb_to_xs );
449
450 new_xsec = (new_nsec * XSEC_PER_SEC) / NSEC_PER_SEC;
451 new_xsec += new_sec * XSEC_PER_SEC;
452 if ( new_xsec > delta_xsec ) {
453 do_gtod.varp->stamp_xsec = new_xsec - delta_xsec;
454 systemcfg->stamp_xsec = new_xsec - delta_xsec;
455 }
456 else {
457 /* This is only for the case where the user is setting the time
458 * way back to a time such that the boot time would have been
459 * before 1970 ... eg. we booted ten days ago, and we are setting
460 * the time to Jan 5, 1970 */
461 do_gtod.varp->stamp_xsec = new_xsec;
462 do_gtod.varp->tb_orig_stamp = tb_last_stamp;
463 systemcfg->stamp_xsec = new_xsec;
464 systemcfg->tb_orig_stamp = tb_last_stamp;
465 }
466
467 systemcfg->tz_minuteswest = sys_tz.tz_minuteswest;
468 systemcfg->tz_dsttime = sys_tz.tz_dsttime;
469
470 write_sequnlock_irqrestore(&xtime_lock, flags);
471 clock_was_set();
472 return 0;
473}
474
475EXPORT_SYMBOL(do_settimeofday);
476
477void __init time_init(void)
478{
479 /* This function is only called on the boot processor */
480 unsigned long flags;
481 struct rtc_time tm;
482 struct div_result res;
483 unsigned long scale, shift;
484
485 ppc_md.calibrate_decr();
486
487 /*
488 * Compute scale factor for sched_clock.
489 * The calibrate_decr() function has set tb_ticks_per_sec,
490 * which is the timebase frequency.
491 * We compute 1e9 * 2^64 / tb_ticks_per_sec and interpret
492 * the 128-bit result as a 64.64 fixed-point number.
493 * We then shift that number right until it is less than 1.0,
494 * giving us the scale factor and shift count to use in
495 * sched_clock().
496 */
497 div128_by_32(1000000000, 0, tb_ticks_per_sec, &res);
498 scale = res.result_low;
499 for (shift = 0; res.result_high != 0; ++shift) {
500 scale = (scale >> 1) | (res.result_high << 63);
501 res.result_high >>= 1;
502 }
503 tb_to_ns_scale = scale;
504 tb_to_ns_shift = shift;
505
506#ifdef CONFIG_PPC_ISERIES
507 if (!piranha_simulator)
508#endif
509 ppc_md.get_boot_time(&tm);
510
511 write_seqlock_irqsave(&xtime_lock, flags);
512 xtime.tv_sec = mktime(tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday,
513 tm.tm_hour, tm.tm_min, tm.tm_sec);
514 tb_last_stamp = get_tb();
515 do_gtod.varp = &do_gtod.vars[0];
516 do_gtod.var_idx = 0;
517 do_gtod.varp->tb_orig_stamp = tb_last_stamp;
518 do_gtod.varp->stamp_xsec = xtime.tv_sec * XSEC_PER_SEC;
519 do_gtod.tb_ticks_per_sec = tb_ticks_per_sec;
520 do_gtod.varp->tb_to_xs = tb_to_xs;
521 do_gtod.tb_to_us = tb_to_us;
522 systemcfg->tb_orig_stamp = tb_last_stamp;
523 systemcfg->tb_update_count = 0;
524 systemcfg->tb_ticks_per_sec = tb_ticks_per_sec;
525 systemcfg->stamp_xsec = xtime.tv_sec * XSEC_PER_SEC;
526 systemcfg->tb_to_xs = tb_to_xs;
527
528 time_freq = 0;
529
530 xtime.tv_nsec = 0;
531 last_rtc_update = xtime.tv_sec;
532 set_normalized_timespec(&wall_to_monotonic,
533 -xtime.tv_sec, -xtime.tv_nsec);
534 write_sequnlock_irqrestore(&xtime_lock, flags);
535
536 /* Not exact, but the timer interrupt takes care of this */
537 set_dec(tb_ticks_per_jiffy);
538}
539
540/*
541 * After adjtimex is called, adjust the conversion of tb ticks
542 * to microseconds to keep do_gettimeofday synchronized
543 * with ntpd.
544 *
545 * Use the time_adjust, time_freq and time_offset computed by adjtimex to
546 * adjust the frequency.
547 */
548
549/* #define DEBUG_PPC_ADJTIMEX 1 */
550
551void ppc_adjtimex(void)
552{
553 unsigned long den, new_tb_ticks_per_sec, tb_ticks, old_xsec, new_tb_to_xs, new_xsec, new_stamp_xsec;
554 unsigned long tb_ticks_per_sec_delta;
555 long delta_freq, ltemp;
556 struct div_result divres;
557 unsigned long flags;
558 struct gettimeofday_vars * temp_varp;
559 unsigned temp_idx;
560 long singleshot_ppm = 0;
561
562 /* Compute parts per million frequency adjustment to accomplish the time adjustment
563 implied by time_offset to be applied over the elapsed time indicated by time_constant.
564 Use SHIFT_USEC to get it into the same units as time_freq. */
565 if ( time_offset < 0 ) {
566 ltemp = -time_offset;
567 ltemp <<= SHIFT_USEC - SHIFT_UPDATE;
568 ltemp >>= SHIFT_KG + time_constant;
569 ltemp = -ltemp;
570 }
571 else {
572 ltemp = time_offset;
573 ltemp <<= SHIFT_USEC - SHIFT_UPDATE;
574 ltemp >>= SHIFT_KG + time_constant;
575 }
576
577 /* If there is a single shot time adjustment in progress */
578 if ( time_adjust ) {
579#ifdef DEBUG_PPC_ADJTIMEX
580 printk("ppc_adjtimex: ");
581 if ( adjusting_time == 0 )
582 printk("starting ");
583 printk("single shot time_adjust = %ld\n", time_adjust);
584#endif
585
586 adjusting_time = 1;
587
588 /* Compute parts per million frequency adjustment to match time_adjust */
589 singleshot_ppm = tickadj * HZ;
590 /*
591 * The adjustment should be tickadj*HZ to match the code in
592 * linux/kernel/timer.c, but experiments show that this is too
593 * large. 3/4 of tickadj*HZ seems about right
594 */
595 singleshot_ppm -= singleshot_ppm / 4;
596 /* Use SHIFT_USEC to get it into the same units as time_freq */
597 singleshot_ppm <<= SHIFT_USEC;
598 if ( time_adjust < 0 )
599 singleshot_ppm = -singleshot_ppm;
600 }
601 else {
602#ifdef DEBUG_PPC_ADJTIMEX
603 if ( adjusting_time )
604 printk("ppc_adjtimex: ending single shot time_adjust\n");
605#endif
606 adjusting_time = 0;
607 }
608
609 /* Add up all of the frequency adjustments */
610 delta_freq = time_freq + ltemp + singleshot_ppm;
611
612 /* Compute a new value for tb_ticks_per_sec based on the frequency adjustment */
613 den = 1000000 * (1 << (SHIFT_USEC - 8));
614 if ( delta_freq < 0 ) {
615 tb_ticks_per_sec_delta = ( tb_ticks_per_sec * ( (-delta_freq) >> (SHIFT_USEC - 8))) / den;
616 new_tb_ticks_per_sec = tb_ticks_per_sec + tb_ticks_per_sec_delta;
617 }
618 else {
619 tb_ticks_per_sec_delta = ( tb_ticks_per_sec * ( delta_freq >> (SHIFT_USEC - 8))) / den;
620 new_tb_ticks_per_sec = tb_ticks_per_sec - tb_ticks_per_sec_delta;
621 }
622
623#ifdef DEBUG_PPC_ADJTIMEX
624 printk("ppc_adjtimex: ltemp = %ld, time_freq = %ld, singleshot_ppm = %ld\n", ltemp, time_freq, singleshot_ppm);
625 printk("ppc_adjtimex: tb_ticks_per_sec - base = %ld new = %ld\n", tb_ticks_per_sec, new_tb_ticks_per_sec);
626#endif
627
628 /* Compute a new value of tb_to_xs (used to convert tb to microseconds and a new value of
629 stamp_xsec which is the time (in 1/2^20 second units) corresponding to tb_orig_stamp. This
630 new value of stamp_xsec compensates for the change in frequency (implied by the new tb_to_xs)
631 which guarantees that the current time remains the same */
632 write_seqlock_irqsave( &xtime_lock, flags );
633 tb_ticks = get_tb() - do_gtod.varp->tb_orig_stamp;
634 div128_by_32( 1024*1024, 0, new_tb_ticks_per_sec, &divres );
635 new_tb_to_xs = divres.result_low;
636 new_xsec = mulhdu( tb_ticks, new_tb_to_xs );
637
638 old_xsec = mulhdu( tb_ticks, do_gtod.varp->tb_to_xs );
639 new_stamp_xsec = do_gtod.varp->stamp_xsec + old_xsec - new_xsec;
640
641 /* There are two copies of tb_to_xs and stamp_xsec so that no lock is needed to access and use these
642 values in do_gettimeofday. We alternate the copies and as long as a reasonable time elapses between
643 changes, there will never be inconsistent values. ntpd has a minimum of one minute between updates */
644
645 temp_idx = (do_gtod.var_idx == 0);
646 temp_varp = &do_gtod.vars[temp_idx];
647
648 temp_varp->tb_to_xs = new_tb_to_xs;
649 temp_varp->stamp_xsec = new_stamp_xsec;
650 temp_varp->tb_orig_stamp = do_gtod.varp->tb_orig_stamp;
651 mb();
652 do_gtod.varp = temp_varp;
653 do_gtod.var_idx = temp_idx;
654
655 /*
656 * tb_update_count is used to allow the problem state gettimeofday code
657 * to assure itself that it sees a consistent view of the tb_to_xs and
658 * stamp_xsec variables. It reads the tb_update_count, then reads
659 * tb_to_xs and stamp_xsec and then reads tb_update_count again. If
660 * the two values of tb_update_count match and are even then the
661 * tb_to_xs and stamp_xsec values are consistent. If not, then it
662 * loops back and reads them again until this criteria is met.
663 */
664 ++(systemcfg->tb_update_count);
665 wmb();
666 systemcfg->tb_to_xs = new_tb_to_xs;
667 systemcfg->stamp_xsec = new_stamp_xsec;
668 wmb();
669 ++(systemcfg->tb_update_count);
670
671 write_sequnlock_irqrestore( &xtime_lock, flags );
672
673}
674
675
676#define TICK_SIZE tick
677#define FEBRUARY 2
678#define STARTOFTIME 1970
679#define SECDAY 86400L
680#define SECYR (SECDAY * 365)
681#define leapyear(year) ((year) % 4 == 0)
682#define days_in_year(a) (leapyear(a) ? 366 : 365)
683#define days_in_month(a) (month_days[(a) - 1])
684
685static int month_days[12] = {
686 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31
687};
688
689/*
690 * This only works for the Gregorian calendar - i.e. after 1752 (in the UK)
691 */
692void GregorianDay(struct rtc_time * tm)
693{
694 int leapsToDate;
695 int lastYear;
696 int day;
697 int MonthOffset[] = { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334 };
698
699 lastYear=tm->tm_year-1;
700
701 /*
702 * Number of leap corrections to apply up to end of last year
703 */
704 leapsToDate = lastYear/4 - lastYear/100 + lastYear/400;
705
706 /*
707 * This year is a leap year if it is divisible by 4 except when it is
708 * divisible by 100 unless it is divisible by 400
709 *
710 * e.g. 1904 was a leap year, 1900 was not, 1996 is, and 2000 will be
711 */
712 if((tm->tm_year%4==0) &&
713 ((tm->tm_year%100!=0) || (tm->tm_year%400==0)) &&
714 (tm->tm_mon>2))
715 {
716 /*
717 * We are past Feb. 29 in a leap year
718 */
719 day=1;
720 }
721 else
722 {
723 day=0;
724 }
725
726 day += lastYear*365 + leapsToDate + MonthOffset[tm->tm_mon-1] +
727 tm->tm_mday;
728
729 tm->tm_wday=day%7;
730}
731
732void to_tm(int tim, struct rtc_time * tm)
733{
734 register int i;
735 register long hms, day;
736
737 day = tim / SECDAY;
738 hms = tim % SECDAY;
739
740 /* Hours, minutes, seconds are easy */
741 tm->tm_hour = hms / 3600;
742 tm->tm_min = (hms % 3600) / 60;
743 tm->tm_sec = (hms % 3600) % 60;
744
745 /* Number of years in days */
746 for (i = STARTOFTIME; day >= days_in_year(i); i++)
747 day -= days_in_year(i);
748 tm->tm_year = i;
749
750 /* Number of months in days left */
751 if (leapyear(tm->tm_year))
752 days_in_month(FEBRUARY) = 29;
753 for (i = 1; day >= days_in_month(i); i++)
754 day -= days_in_month(i);
755 days_in_month(FEBRUARY) = 28;
756 tm->tm_mon = i;
757
758 /* Days are what is left over (+1) from all that. */
759 tm->tm_mday = day + 1;
760
761 /*
762 * Determine the day of week
763 */
764 GregorianDay(tm);
765}
766
767/* Auxiliary function to compute scaling factors */
768/* Actually the choice of a timebase running at 1/4 the of the bus
769 * frequency giving resolution of a few tens of nanoseconds is quite nice.
770 * It makes this computation very precise (27-28 bits typically) which
771 * is optimistic considering the stability of most processor clock
772 * oscillators and the precision with which the timebase frequency
773 * is measured but does not harm.
774 */
775unsigned mulhwu_scale_factor(unsigned inscale, unsigned outscale) {
776 unsigned mlt=0, tmp, err;
777 /* No concern for performance, it's done once: use a stupid
778 * but safe and compact method to find the multiplier.
779 */
780
781 for (tmp = 1U<<31; tmp != 0; tmp >>= 1) {
782 if (mulhwu(inscale, mlt|tmp) < outscale) mlt|=tmp;
783 }
784
785 /* We might still be off by 1 for the best approximation.
786 * A side effect of this is that if outscale is too large
787 * the returned value will be zero.
788 * Many corner cases have been checked and seem to work,
789 * some might have been forgotten in the test however.
790 */
791
792 err = inscale*(mlt+1);
793 if (err <= inscale/2) mlt++;
794 return mlt;
795 }
796
797/*
798 * Divide a 128-bit dividend by a 32-bit divisor, leaving a 128 bit
799 * result.
800 */
801
802void div128_by_32( unsigned long dividend_high, unsigned long dividend_low,
803 unsigned divisor, struct div_result *dr )
804{
805 unsigned long a,b,c,d, w,x,y,z, ra,rb,rc;
806
807 a = dividend_high >> 32;
808 b = dividend_high & 0xffffffff;
809 c = dividend_low >> 32;
810 d = dividend_low & 0xffffffff;
811
812 w = a/divisor;
813 ra = (a - (w * divisor)) << 32;
814
815 x = (ra + b)/divisor;
816 rb = ((ra + b) - (x * divisor)) << 32;
817
818 y = (rb + c)/divisor;
819 rc = ((rb + b) - (y * divisor)) << 32;
820
821 z = (rc + d)/divisor;
822
823 dr->result_high = (w << 32) + x;
824 dr->result_low = (y << 32) + z;
825
826}
827
diff --git a/arch/ppc64/kernel/traps.c b/arch/ppc64/kernel/traps.c
new file mode 100644
index 000000000000..10fc61f3f6a4
--- /dev/null
+++ b/arch/ppc64/kernel/traps.c
@@ -0,0 +1,565 @@
1/*
2 * linux/arch/ppc64/kernel/traps.c
3 *
4 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 * Modified by Cort Dougan (cort@cs.nmt.edu)
12 * and Paul Mackerras (paulus@cs.anu.edu.au)
13 */
14
15/*
16 * This file handles the architecture-dependent parts of hardware exceptions
17 */
18
19#include <linux/config.h>
20#include <linux/errno.h>
21#include <linux/sched.h>
22#include <linux/kernel.h>
23#include <linux/mm.h>
24#include <linux/stddef.h>
25#include <linux/unistd.h>
26#include <linux/slab.h>
27#include <linux/user.h>
28#include <linux/a.out.h>
29#include <linux/interrupt.h>
30#include <linux/init.h>
31#include <linux/module.h>
32#include <linux/delay.h>
33#include <asm/kdebug.h>
34
35#include <asm/pgtable.h>
36#include <asm/uaccess.h>
37#include <asm/system.h>
38#include <asm/io.h>
39#include <asm/processor.h>
40#include <asm/ppcdebug.h>
41#include <asm/rtas.h>
42#include <asm/systemcfg.h>
43#include <asm/machdep.h>
44#include <asm/pmc.h>
45
46#ifdef CONFIG_DEBUGGER
47int (*__debugger)(struct pt_regs *regs);
48int (*__debugger_ipi)(struct pt_regs *regs);
49int (*__debugger_bpt)(struct pt_regs *regs);
50int (*__debugger_sstep)(struct pt_regs *regs);
51int (*__debugger_iabr_match)(struct pt_regs *regs);
52int (*__debugger_dabr_match)(struct pt_regs *regs);
53int (*__debugger_fault_handler)(struct pt_regs *regs);
54
55EXPORT_SYMBOL(__debugger);
56EXPORT_SYMBOL(__debugger_ipi);
57EXPORT_SYMBOL(__debugger_bpt);
58EXPORT_SYMBOL(__debugger_sstep);
59EXPORT_SYMBOL(__debugger_iabr_match);
60EXPORT_SYMBOL(__debugger_dabr_match);
61EXPORT_SYMBOL(__debugger_fault_handler);
62#endif
63
64struct notifier_block *ppc64_die_chain;
65static DEFINE_SPINLOCK(die_notifier_lock);
66
67int register_die_notifier(struct notifier_block *nb)
68{
69 int err = 0;
70 unsigned long flags;
71
72 spin_lock_irqsave(&die_notifier_lock, flags);
73 err = notifier_chain_register(&ppc64_die_chain, nb);
74 spin_unlock_irqrestore(&die_notifier_lock, flags);
75 return err;
76}
77
78/*
79 * Trap & Exception support
80 */
81
82static DEFINE_SPINLOCK(die_lock);
83
84int die(const char *str, struct pt_regs *regs, long err)
85{
86 static int die_counter;
87 int nl = 0;
88
89 if (debugger(regs))
90 return 1;
91
92 console_verbose();
93 spin_lock_irq(&die_lock);
94 bust_spinlocks(1);
95 printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter);
96#ifdef CONFIG_PREEMPT
97 printk("PREEMPT ");
98 nl = 1;
99#endif
100#ifdef CONFIG_SMP
101 printk("SMP NR_CPUS=%d ", NR_CPUS);
102 nl = 1;
103#endif
104#ifdef CONFIG_DEBUG_PAGEALLOC
105 printk("DEBUG_PAGEALLOC ");
106 nl = 1;
107#endif
108#ifdef CONFIG_NUMA
109 printk("NUMA ");
110 nl = 1;
111#endif
112 switch(systemcfg->platform) {
113 case PLATFORM_PSERIES:
114 printk("PSERIES ");
115 nl = 1;
116 break;
117 case PLATFORM_PSERIES_LPAR:
118 printk("PSERIES LPAR ");
119 nl = 1;
120 break;
121 case PLATFORM_ISERIES_LPAR:
122 printk("ISERIES LPAR ");
123 nl = 1;
124 break;
125 case PLATFORM_POWERMAC:
126 printk("POWERMAC ");
127 nl = 1;
128 break;
129 }
130 if (nl)
131 printk("\n");
132 print_modules();
133 show_regs(regs);
134 bust_spinlocks(0);
135 spin_unlock_irq(&die_lock);
136
137 if (in_interrupt())
138 panic("Fatal exception in interrupt");
139
140 if (panic_on_oops) {
141 printk(KERN_EMERG "Fatal exception: panic in 5 seconds\n");
142 ssleep(5);
143 panic("Fatal exception");
144 }
145 do_exit(SIGSEGV);
146
147 return 0;
148}
149
150void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr)
151{
152 siginfo_t info;
153
154 if (!user_mode(regs)) {
155 if (die("Exception in kernel mode", regs, signr))
156 return;
157 }
158
159 memset(&info, 0, sizeof(info));
160 info.si_signo = signr;
161 info.si_code = code;
162 info.si_addr = (void __user *) addr;
163 force_sig_info(signr, &info, current);
164}
165
166void system_reset_exception(struct pt_regs *regs)
167{
168 /* See if any machine dependent calls */
169 if (ppc_md.system_reset_exception)
170 ppc_md.system_reset_exception(regs);
171
172 die("System Reset", regs, 0);
173
174 /* Must die if the interrupt is not recoverable */
175 if (!(regs->msr & MSR_RI))
176 panic("Unrecoverable System Reset");
177
178 /* What should we do here? We could issue a shutdown or hard reset. */
179}
180
181void machine_check_exception(struct pt_regs *regs)
182{
183 int recover = 0;
184
185 /* See if any machine dependent calls */
186 if (ppc_md.machine_check_exception)
187 recover = ppc_md.machine_check_exception(regs);
188
189 if (recover)
190 return;
191
192 if (debugger_fault_handler(regs))
193 return;
194 die("Machine check", regs, 0);
195
196 /* Must die if the interrupt is not recoverable */
197 if (!(regs->msr & MSR_RI))
198 panic("Unrecoverable Machine check");
199}
200
201void unknown_exception(struct pt_regs *regs)
202{
203 printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n",
204 regs->nip, regs->msr, regs->trap);
205
206 _exception(SIGTRAP, regs, 0, 0);
207}
208
209void instruction_breakpoint_exception(struct pt_regs *regs)
210{
211 if (notify_die(DIE_IABR_MATCH, "iabr_match", regs, 5,
212 5, SIGTRAP) == NOTIFY_STOP)
213 return;
214 if (debugger_iabr_match(regs))
215 return;
216 _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip);
217}
218
219void single_step_exception(struct pt_regs *regs)
220{
221 regs->msr &= ~MSR_SE; /* Turn off 'trace' bit */
222
223 if (notify_die(DIE_SSTEP, "single_step", regs, 5,
224 5, SIGTRAP) == NOTIFY_STOP)
225 return;
226 if (debugger_sstep(regs))
227 return;
228
229 _exception(SIGTRAP, regs, TRAP_TRACE, regs->nip);
230}
231
232/*
233 * After we have successfully emulated an instruction, we have to
234 * check if the instruction was being single-stepped, and if so,
235 * pretend we got a single-step exception. This was pointed out
236 * by Kumar Gala. -- paulus
237 */
238static inline void emulate_single_step(struct pt_regs *regs)
239{
240 if (regs->msr & MSR_SE)
241 single_step_exception(regs);
242}
243
244static void parse_fpe(struct pt_regs *regs)
245{
246 int code = 0;
247 unsigned long fpscr;
248
249 flush_fp_to_thread(current);
250
251 fpscr = current->thread.fpscr;
252
253 /* Invalid operation */
254 if ((fpscr & FPSCR_VE) && (fpscr & FPSCR_VX))
255 code = FPE_FLTINV;
256
257 /* Overflow */
258 else if ((fpscr & FPSCR_OE) && (fpscr & FPSCR_OX))
259 code = FPE_FLTOVF;
260
261 /* Underflow */
262 else if ((fpscr & FPSCR_UE) && (fpscr & FPSCR_UX))
263 code = FPE_FLTUND;
264
265 /* Divide by zero */
266 else if ((fpscr & FPSCR_ZE) && (fpscr & FPSCR_ZX))
267 code = FPE_FLTDIV;
268
269 /* Inexact result */
270 else if ((fpscr & FPSCR_XE) && (fpscr & FPSCR_XX))
271 code = FPE_FLTRES;
272
273 _exception(SIGFPE, regs, code, regs->nip);
274}
275
276/*
277 * Illegal instruction emulation support. Return non-zero if we can't
278 * emulate, or -EFAULT if the associated memory access caused an access
279 * fault. Return zero on success.
280 */
281
282#define INST_MFSPR_PVR 0x7c1f42a6
283#define INST_MFSPR_PVR_MASK 0xfc1fffff
284
285#define INST_DCBA 0x7c0005ec
286#define INST_DCBA_MASK 0x7c0007fe
287
288#define INST_MCRXR 0x7c000400
289#define INST_MCRXR_MASK 0x7c0007fe
290
291static int emulate_instruction(struct pt_regs *regs)
292{
293 unsigned int instword;
294
295 if (!user_mode(regs))
296 return -EINVAL;
297
298 CHECK_FULL_REGS(regs);
299
300 if (get_user(instword, (unsigned int __user *)(regs->nip)))
301 return -EFAULT;
302
303 /* Emulate the mfspr rD, PVR. */
304 if ((instword & INST_MFSPR_PVR_MASK) == INST_MFSPR_PVR) {
305 unsigned int rd;
306
307 rd = (instword >> 21) & 0x1f;
308 regs->gpr[rd] = mfspr(SPRN_PVR);
309 return 0;
310 }
311
312 /* Emulating the dcba insn is just a no-op. */
313 if ((instword & INST_DCBA_MASK) == INST_DCBA) {
314 static int warned;
315
316 if (!warned) {
317 printk(KERN_WARNING
318 "process %d (%s) uses obsolete 'dcba' insn\n",
319 current->pid, current->comm);
320 warned = 1;
321 }
322 return 0;
323 }
324
325 /* Emulate the mcrxr insn. */
326 if ((instword & INST_MCRXR_MASK) == INST_MCRXR) {
327 static int warned;
328 unsigned int shift;
329
330 if (!warned) {
331 printk(KERN_WARNING
332 "process %d (%s) uses obsolete 'mcrxr' insn\n",
333 current->pid, current->comm);
334 warned = 1;
335 }
336
337 shift = (instword >> 21) & 0x1c;
338 regs->ccr &= ~(0xf0000000 >> shift);
339 regs->ccr |= (regs->xer & 0xf0000000) >> shift;
340 regs->xer &= ~0xf0000000;
341 return 0;
342 }
343
344 return -EINVAL;
345}
346
347/*
348 * Look through the list of trap instructions that are used for BUG(),
349 * BUG_ON() and WARN_ON() and see if we hit one. At this point we know
350 * that the exception was caused by a trap instruction of some kind.
351 * Returns 1 if we should continue (i.e. it was a WARN_ON) or 0
352 * otherwise.
353 */
354extern struct bug_entry __start___bug_table[], __stop___bug_table[];
355
356#ifndef CONFIG_MODULES
357#define module_find_bug(x) NULL
358#endif
359
360struct bug_entry *find_bug(unsigned long bugaddr)
361{
362 struct bug_entry *bug;
363
364 for (bug = __start___bug_table; bug < __stop___bug_table; ++bug)
365 if (bugaddr == bug->bug_addr)
366 return bug;
367 return module_find_bug(bugaddr);
368}
369
370static int
371check_bug_trap(struct pt_regs *regs)
372{
373 struct bug_entry *bug;
374 unsigned long addr;
375
376 if (regs->msr & MSR_PR)
377 return 0; /* not in kernel */
378 addr = regs->nip; /* address of trap instruction */
379 if (addr < PAGE_OFFSET)
380 return 0;
381 bug = find_bug(regs->nip);
382 if (bug == NULL)
383 return 0;
384 if (bug->line & BUG_WARNING_TRAP) {
385 /* this is a WARN_ON rather than BUG/BUG_ON */
386 printk(KERN_ERR "Badness in %s at %s:%d\n",
387 bug->function, bug->file,
388 (unsigned int)bug->line & ~BUG_WARNING_TRAP);
389 show_stack(current, (void *)regs->gpr[1]);
390 return 1;
391 }
392 printk(KERN_CRIT "kernel BUG in %s at %s:%d!\n",
393 bug->function, bug->file, (unsigned int)bug->line);
394 return 0;
395}
396
397void program_check_exception(struct pt_regs *regs)
398{
399 if (debugger_fault_handler(regs))
400 return;
401
402 if (regs->msr & 0x100000) {
403 /* IEEE FP exception */
404 parse_fpe(regs);
405 } else if (regs->msr & 0x20000) {
406 /* trap exception */
407
408 if (notify_die(DIE_BPT, "breakpoint", regs, 5,
409 5, SIGTRAP) == NOTIFY_STOP)
410 return;
411 if (debugger_bpt(regs))
412 return;
413
414 if (check_bug_trap(regs)) {
415 regs->nip += 4;
416 return;
417 }
418 _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip);
419
420 } else {
421 /* Privileged or illegal instruction; try to emulate it. */
422 switch (emulate_instruction(regs)) {
423 case 0:
424 regs->nip += 4;
425 emulate_single_step(regs);
426 break;
427
428 case -EFAULT:
429 _exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip);
430 break;
431
432 default:
433 if (regs->msr & 0x40000)
434 /* priveleged */
435 _exception(SIGILL, regs, ILL_PRVOPC, regs->nip);
436 else
437 /* illegal */
438 _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
439 break;
440 }
441 }
442}
443
444void kernel_fp_unavailable_exception(struct pt_regs *regs)
445{
446 printk(KERN_EMERG "Unrecoverable FP Unavailable Exception "
447 "%lx at %lx\n", regs->trap, regs->nip);
448 die("Unrecoverable FP Unavailable Exception", regs, SIGABRT);
449}
450
451void altivec_unavailable_exception(struct pt_regs *regs)
452{
453#ifndef CONFIG_ALTIVEC
454 if (user_mode(regs)) {
455 /* A user program has executed an altivec instruction,
456 but this kernel doesn't support altivec. */
457 _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
458 return;
459 }
460#endif
461 printk(KERN_EMERG "Unrecoverable VMX/Altivec Unavailable Exception "
462 "%lx at %lx\n", regs->trap, regs->nip);
463 die("Unrecoverable VMX/Altivec Unavailable Exception", regs, SIGABRT);
464}
465
466extern perf_irq_t perf_irq;
467
468void performance_monitor_exception(struct pt_regs *regs)
469{
470 perf_irq(regs);
471}
472
473void alignment_exception(struct pt_regs *regs)
474{
475 int fixed;
476
477 fixed = fix_alignment(regs);
478
479 if (fixed == 1) {
480 regs->nip += 4; /* skip over emulated instruction */
481 emulate_single_step(regs);
482 return;
483 }
484
485 /* Operand address was bad */
486 if (fixed == -EFAULT) {
487 if (user_mode(regs)) {
488 _exception(SIGSEGV, regs, SEGV_MAPERR, regs->dar);
489 } else {
490 /* Search exception table */
491 bad_page_fault(regs, regs->dar, SIGSEGV);
492 }
493
494 return;
495 }
496
497 _exception(SIGBUS, regs, BUS_ADRALN, regs->nip);
498}
499
500#ifdef CONFIG_ALTIVEC
501void altivec_assist_exception(struct pt_regs *regs)
502{
503 int err;
504 siginfo_t info;
505
506 if (!user_mode(regs)) {
507 printk(KERN_EMERG "VMX/Altivec assist exception in kernel mode"
508 " at %lx\n", regs->nip);
509 die("Kernel VMX/Altivec assist exception", regs, SIGILL);
510 }
511
512 flush_altivec_to_thread(current);
513
514 err = emulate_altivec(regs);
515 if (err == 0) {
516 regs->nip += 4; /* skip emulated instruction */
517 emulate_single_step(regs);
518 return;
519 }
520
521 if (err == -EFAULT) {
522 /* got an error reading the instruction */
523 info.si_signo = SIGSEGV;
524 info.si_errno = 0;
525 info.si_code = SEGV_MAPERR;
526 info.si_addr = (void __user *) regs->nip;
527 force_sig_info(SIGSEGV, &info, current);
528 } else {
529 /* didn't recognize the instruction */
530 /* XXX quick hack for now: set the non-Java bit in the VSCR */
531 if (printk_ratelimit())
532 printk(KERN_ERR "Unrecognized altivec instruction "
533 "in %s at %lx\n", current->comm, regs->nip);
534 current->thread.vscr.u[3] |= 0x10000;
535 }
536}
537#endif /* CONFIG_ALTIVEC */
538
539/*
540 * We enter here if we get an unrecoverable exception, that is, one
541 * that happened at a point where the RI (recoverable interrupt) bit
542 * in the MSR is 0. This indicates that SRR0/1 are live, and that
543 * we therefore lost state by taking this exception.
544 */
545void unrecoverable_exception(struct pt_regs *regs)
546{
547 printk(KERN_EMERG "Unrecoverable exception %lx at %lx\n",
548 regs->trap, regs->nip);
549 die("Unrecoverable exception", regs, SIGABRT);
550}
551
552/*
553 * We enter here if we discover during exception entry that we are
554 * running in supervisor mode with a userspace value in the stack pointer.
555 */
556void kernel_bad_stack(struct pt_regs *regs)
557{
558 printk(KERN_EMERG "Bad kernel stack pointer %lx at %lx\n",
559 regs->gpr[1], regs->nip);
560 die("Bad kernel stack pointer", regs, SIGABRT);
561}
562
563void __init trap_init(void)
564{
565}
diff --git a/arch/ppc64/kernel/u3_iommu.c b/arch/ppc64/kernel/u3_iommu.c
new file mode 100644
index 000000000000..b6e3bca4102d
--- /dev/null
+++ b/arch/ppc64/kernel/u3_iommu.c
@@ -0,0 +1,349 @@
1/*
2 * arch/ppc64/kernel/u3_iommu.c
3 *
4 * Copyright (C) 2004 Olof Johansson <olof@austin.ibm.com>, IBM Corporation
5 *
6 * Based on pSeries_iommu.c:
7 * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation
8 * Copyright (C) 2004 Olof Johansson <olof@austin.ibm.com>, IBM Corporation
9 *
10 * Dynamic DMA mapping support, Apple U3 & IBM CPC925 "DART" iommu.
11 *
12 *
13 * This program is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation; either version 2 of the License, or
16 * (at your option) any later version.
17 *
18 * This program is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with this program; if not, write to the Free Software
25 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26 */
27
28#include <linux/config.h>
29#include <linux/init.h>
30#include <linux/types.h>
31#include <linux/slab.h>
32#include <linux/mm.h>
33#include <linux/spinlock.h>
34#include <linux/string.h>
35#include <linux/pci.h>
36#include <linux/dma-mapping.h>
37#include <linux/vmalloc.h>
38#include <asm/io.h>
39#include <asm/prom.h>
40#include <asm/ppcdebug.h>
41#include <asm/iommu.h>
42#include <asm/pci-bridge.h>
43#include <asm/machdep.h>
44#include <asm/abs_addr.h>
45#include <asm/cacheflush.h>
46#include <asm/lmb.h>
47
48#include "pci.h"
49
50extern int iommu_force_on;
51
52/* physical base of DART registers */
53#define DART_BASE 0xf8033000UL
54
55/* Offset from base to control register */
56#define DARTCNTL 0
57/* Offset from base to exception register */
58#define DARTEXCP 0x10
59/* Offset from base to TLB tag registers */
60#define DARTTAG 0x1000
61
62
63/* Control Register fields */
64
65/* base address of table (pfn) */
66#define DARTCNTL_BASE_MASK 0xfffff
67#define DARTCNTL_BASE_SHIFT 12
68
69#define DARTCNTL_FLUSHTLB 0x400
70#define DARTCNTL_ENABLE 0x200
71
72/* size of table in pages */
73#define DARTCNTL_SIZE_MASK 0x1ff
74#define DARTCNTL_SIZE_SHIFT 0
75
76/* DART table fields */
77#define DARTMAP_VALID 0x80000000
78#define DARTMAP_RPNMASK 0x00ffffff
79
80/* Physical base address and size of the DART table */
81unsigned long dart_tablebase; /* exported to htab_initialize */
82static unsigned long dart_tablesize;
83
84/* Virtual base address of the DART table */
85static u32 *dart_vbase;
86
87/* Mapped base address for the dart */
88static unsigned int *dart;
89
90/* Dummy val that entries are set to when unused */
91static unsigned int dart_emptyval;
92
93static struct iommu_table iommu_table_u3;
94static int iommu_table_u3_inited;
95static int dart_dirty;
96
97#define DBG(...)
98
99static inline void dart_tlb_invalidate_all(void)
100{
101 unsigned long l = 0;
102 unsigned int reg;
103 unsigned long limit;
104
105 DBG("dart: flush\n");
106
107 /* To invalidate the DART, set the DARTCNTL_FLUSHTLB bit in the
108 * control register and wait for it to clear.
109 *
110 * Gotcha: Sometimes, the DART won't detect that the bit gets
111 * set. If so, clear it and set it again.
112 */
113
114 limit = 0;
115
116retry:
117 reg = in_be32((unsigned int *)dart+DARTCNTL);
118 reg |= DARTCNTL_FLUSHTLB;
119 out_be32((unsigned int *)dart+DARTCNTL, reg);
120
121 l = 0;
122 while ((in_be32((unsigned int *)dart+DARTCNTL) & DARTCNTL_FLUSHTLB) &&
123 l < (1L<<limit)) {
124 l++;
125 }
126 if (l == (1L<<limit)) {
127 if (limit < 4) {
128 limit++;
129 reg = in_be32((unsigned int *)dart+DARTCNTL);
130 reg &= ~DARTCNTL_FLUSHTLB;
131 out_be32((unsigned int *)dart+DARTCNTL, reg);
132 goto retry;
133 } else
134 panic("U3-DART: TLB did not flush after waiting a long "
135 "time. Buggy U3 ?");
136 }
137}
138
139static void dart_flush(struct iommu_table *tbl)
140{
141 if (dart_dirty)
142 dart_tlb_invalidate_all();
143 dart_dirty = 0;
144}
145
146static void dart_build(struct iommu_table *tbl, long index,
147 long npages, unsigned long uaddr,
148 enum dma_data_direction direction)
149{
150 unsigned int *dp;
151 unsigned int rpn;
152
153 DBG("dart: build at: %lx, %lx, addr: %x\n", index, npages, uaddr);
154
155 dp = ((unsigned int*)tbl->it_base) + index;
156
157 /* On U3, all memory is contigous, so we can move this
158 * out of the loop.
159 */
160 while (npages--) {
161 rpn = virt_to_abs(uaddr) >> PAGE_SHIFT;
162
163 *(dp++) = DARTMAP_VALID | (rpn & DARTMAP_RPNMASK);
164
165 rpn++;
166 uaddr += PAGE_SIZE;
167 }
168
169 dart_dirty = 1;
170}
171
172
173static void dart_free(struct iommu_table *tbl, long index, long npages)
174{
175 unsigned int *dp;
176
177 /* We don't worry about flushing the TLB cache. The only drawback of
178 * not doing it is that we won't catch buggy device drivers doing
179 * bad DMAs, but then no 32-bit architecture ever does either.
180 */
181
182 DBG("dart: free at: %lx, %lx\n", index, npages);
183
184 dp = ((unsigned int *)tbl->it_base) + index;
185
186 while (npages--)
187 *(dp++) = dart_emptyval;
188}
189
190
191static int dart_init(struct device_node *dart_node)
192{
193 unsigned int regword;
194 unsigned int i;
195 unsigned long tmp;
196
197 if (dart_tablebase == 0 || dart_tablesize == 0) {
198 printk(KERN_INFO "U3-DART: table not allocated, using direct DMA\n");
199 return -ENODEV;
200 }
201
202 /* Make sure nothing from the DART range remains in the CPU cache
203 * from a previous mapping that existed before the kernel took
204 * over
205 */
206 flush_dcache_phys_range(dart_tablebase, dart_tablebase + dart_tablesize);
207
208 /* Allocate a spare page to map all invalid DART pages. We need to do
209 * that to work around what looks like a problem with the HT bridge
210 * prefetching into invalid pages and corrupting data
211 */
212 tmp = lmb_alloc(PAGE_SIZE, PAGE_SIZE);
213 if (!tmp)
214 panic("U3-DART: Cannot allocate spare page!");
215 dart_emptyval = DARTMAP_VALID | ((tmp >> PAGE_SHIFT) & DARTMAP_RPNMASK);
216
217 /* Map in DART registers. FIXME: Use device node to get base address */
218 dart = ioremap(DART_BASE, 0x7000);
219 if (dart == NULL)
220 panic("U3-DART: Cannot map registers!");
221
222 /* Set initial control register contents: table base,
223 * table size and enable bit
224 */
225 regword = DARTCNTL_ENABLE |
226 ((dart_tablebase >> PAGE_SHIFT) << DARTCNTL_BASE_SHIFT) |
227 (((dart_tablesize >> PAGE_SHIFT) & DARTCNTL_SIZE_MASK)
228 << DARTCNTL_SIZE_SHIFT);
229 dart_vbase = ioremap(virt_to_abs(dart_tablebase), dart_tablesize);
230
231 /* Fill initial table */
232 for (i = 0; i < dart_tablesize/4; i++)
233 dart_vbase[i] = dart_emptyval;
234
235 /* Initialize DART with table base and enable it. */
236 out_be32((unsigned int *)dart, regword);
237
238 /* Invalidate DART to get rid of possible stale TLBs */
239 dart_tlb_invalidate_all();
240
241 printk(KERN_INFO "U3/CPC925 DART IOMMU initialized\n");
242
243 return 0;
244}
245
246static void iommu_table_u3_setup(void)
247{
248 iommu_table_u3.it_busno = 0;
249 iommu_table_u3.it_offset = 0;
250 /* it_size is in number of entries */
251 iommu_table_u3.it_size = dart_tablesize / sizeof(u32);
252
253 /* Initialize the common IOMMU code */
254 iommu_table_u3.it_base = (unsigned long)dart_vbase;
255 iommu_table_u3.it_index = 0;
256 iommu_table_u3.it_blocksize = 1;
257 iommu_init_table(&iommu_table_u3);
258
259 /* Reserve the last page of the DART to avoid possible prefetch
260 * past the DART mapped area
261 */
262 set_bit(iommu_table_u3.it_size - 1, iommu_table_u3.it_map);
263}
264
265static void iommu_dev_setup_u3(struct pci_dev *dev)
266{
267 struct device_node *dn;
268
269 /* We only have one iommu table on the mac for now, which makes
270 * things simple. Setup all PCI devices to point to this table
271 *
272 * We must use pci_device_to_OF_node() to make sure that
273 * we get the real "final" pointer to the device in the
274 * pci_dev sysdata and not the temporary PHB one
275 */
276 dn = pci_device_to_OF_node(dev);
277
278 if (dn)
279 dn->iommu_table = &iommu_table_u3;
280}
281
282static void iommu_bus_setup_u3(struct pci_bus *bus)
283{
284 struct device_node *dn;
285
286 if (!iommu_table_u3_inited) {
287 iommu_table_u3_inited = 1;
288 iommu_table_u3_setup();
289 }
290
291 dn = pci_bus_to_OF_node(bus);
292
293 if (dn)
294 dn->iommu_table = &iommu_table_u3;
295}
296
297static void iommu_dev_setup_null(struct pci_dev *dev) { }
298static void iommu_bus_setup_null(struct pci_bus *bus) { }
299
300void iommu_init_early_u3(void)
301{
302 struct device_node *dn;
303
304 /* Find the DART in the device-tree */
305 dn = of_find_compatible_node(NULL, "dart", "u3-dart");
306 if (dn == NULL)
307 return;
308
309 /* Setup low level TCE operations for the core IOMMU code */
310 ppc_md.tce_build = dart_build;
311 ppc_md.tce_free = dart_free;
312 ppc_md.tce_flush = dart_flush;
313
314 /* Initialize the DART HW */
315 if (dart_init(dn)) {
316 /* If init failed, use direct iommu and null setup functions */
317 ppc_md.iommu_dev_setup = iommu_dev_setup_null;
318 ppc_md.iommu_bus_setup = iommu_bus_setup_null;
319
320 /* Setup pci_dma ops */
321 pci_direct_iommu_init();
322 } else {
323 ppc_md.iommu_dev_setup = iommu_dev_setup_u3;
324 ppc_md.iommu_bus_setup = iommu_bus_setup_u3;
325
326 /* Setup pci_dma ops */
327 pci_iommu_init();
328 }
329}
330
331
332void __init alloc_u3_dart_table(void)
333{
334 /* Only reserve DART space if machine has more than 2GB of RAM
335 * or if requested with iommu=on on cmdline.
336 */
337 if (lmb_end_of_DRAM() <= 0x80000000ull && !iommu_force_on)
338 return;
339
340 /* 512 pages (2MB) is max DART tablesize. */
341 dart_tablesize = 1UL << 21;
342 /* 16MB (1 << 24) alignment. We allocate a full 16Mb chuck since we
343 * will blow up an entire large page anyway in the kernel mapping
344 */
345 dart_tablebase = (unsigned long)
346 abs_to_virt(lmb_alloc_base(1UL<<24, 1UL<<24, 0x80000000L));
347
348 printk(KERN_INFO "U3-DART allocated at: %lx\n", dart_tablebase);
349}
diff --git a/arch/ppc64/kernel/udbg.c b/arch/ppc64/kernel/udbg.c
new file mode 100644
index 000000000000..d4ccd6f1ef47
--- /dev/null
+++ b/arch/ppc64/kernel/udbg.c
@@ -0,0 +1,360 @@
1/*
2 * NS16550 Serial Port (uart) debugging stuff.
3 *
4 * c 2001 PPC 64 Team, IBM Corp
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#include <stdarg.h>
13#define WANT_PPCDBG_TAB /* Only defined here */
14#include <linux/config.h>
15#include <linux/types.h>
16#include <asm/ppcdebug.h>
17#include <asm/processor.h>
18#include <asm/uaccess.h>
19#include <asm/machdep.h>
20#include <asm/io.h>
21#include <asm/prom.h>
22#include <asm/pmac_feature.h>
23
24extern u8 real_readb(volatile u8 __iomem *addr);
25extern void real_writeb(u8 data, volatile u8 __iomem *addr);
26
27struct NS16550 {
28 /* this struct must be packed */
29 unsigned char rbr; /* 0 */
30 unsigned char ier; /* 1 */
31 unsigned char fcr; /* 2 */
32 unsigned char lcr; /* 3 */
33 unsigned char mcr; /* 4 */
34 unsigned char lsr; /* 5 */
35 unsigned char msr; /* 6 */
36 unsigned char scr; /* 7 */
37};
38
39#define thr rbr
40#define iir fcr
41#define dll rbr
42#define dlm ier
43#define dlab lcr
44
45#define LSR_DR 0x01 /* Data ready */
46#define LSR_OE 0x02 /* Overrun */
47#define LSR_PE 0x04 /* Parity error */
48#define LSR_FE 0x08 /* Framing error */
49#define LSR_BI 0x10 /* Break */
50#define LSR_THRE 0x20 /* Xmit holding register empty */
51#define LSR_TEMT 0x40 /* Xmitter empty */
52#define LSR_ERR 0x80 /* Error */
53
54static volatile struct NS16550 __iomem *udbg_comport;
55
56void udbg_init_uart(void __iomem *comport, unsigned int speed)
57{
58 u16 dll = speed ? (115200 / speed) : 12;
59
60 if (comport) {
61 udbg_comport = (struct NS16550 __iomem *)comport;
62 out_8(&udbg_comport->lcr, 0x00);
63 out_8(&udbg_comport->ier, 0xff);
64 out_8(&udbg_comport->ier, 0x00);
65 out_8(&udbg_comport->lcr, 0x80); /* Access baud rate */
66 out_8(&udbg_comport->dll, dll & 0xff); /* 1 = 115200, 2 = 57600,
67 3 = 38400, 12 = 9600 baud */
68 out_8(&udbg_comport->dlm, dll >> 8); /* dll >> 8 which should be zero
69 for fast rates; */
70 out_8(&udbg_comport->lcr, 0x03); /* 8 data, 1 stop, no parity */
71 out_8(&udbg_comport->mcr, 0x03); /* RTS/DTR */
72 out_8(&udbg_comport->fcr ,0x07); /* Clear & enable FIFOs */
73 }
74}
75
76#ifdef CONFIG_PPC_PMAC
77
78#define SCC_TXRDY 4
79#define SCC_RXRDY 1
80
81static volatile u8 __iomem *sccc;
82static volatile u8 __iomem *sccd;
83
84static unsigned char scc_inittab[] = {
85 13, 0, /* set baud rate divisor */
86 12, 0,
87 14, 1, /* baud rate gen enable, src=rtxc */
88 11, 0x50, /* clocks = br gen */
89 5, 0xea, /* tx 8 bits, assert DTR & RTS */
90 4, 0x46, /* x16 clock, 1 stop */
91 3, 0xc1, /* rx enable, 8 bits */
92};
93
94void udbg_init_scc(struct device_node *np)
95{
96 u32 *reg;
97 unsigned long addr;
98 int i, x;
99
100 if (np == NULL)
101 np = of_find_node_by_name(NULL, "escc");
102 if (np == NULL || np->parent == NULL)
103 return;
104
105 udbg_printf("found SCC...\n");
106 /* Get address within mac-io ASIC */
107 reg = (u32 *)get_property(np, "reg", NULL);
108 if (reg == NULL)
109 return;
110 addr = reg[0];
111 udbg_printf("local addr: %lx\n", addr);
112 /* Get address of mac-io PCI itself */
113 reg = (u32 *)get_property(np->parent, "assigned-addresses", NULL);
114 if (reg == NULL)
115 return;
116 addr += reg[2];
117 udbg_printf("final addr: %lx\n", addr);
118
119 /* Setup for 57600 8N1 */
120 addr += 0x20;
121 sccc = (volatile u8 * __iomem) ioremap(addr & PAGE_MASK, PAGE_SIZE) ;
122 sccc += addr & ~PAGE_MASK;
123 sccd = sccc + 0x10;
124
125 udbg_printf("ioremap result sccc: %p\n", sccc);
126 mb();
127
128 for (i = 20000; i != 0; --i)
129 x = in_8(sccc);
130 out_8(sccc, 0x09); /* reset A or B side */
131 out_8(sccc, 0xc0);
132 for (i = 0; i < sizeof(scc_inittab); ++i)
133 out_8(sccc, scc_inittab[i]);
134
135 ppc_md.udbg_putc = udbg_putc;
136 ppc_md.udbg_getc = udbg_getc;
137 ppc_md.udbg_getc_poll = udbg_getc_poll;
138
139 udbg_puts("Hello World !\n");
140}
141
142#endif /* CONFIG_PPC_PMAC */
143
144#if CONFIG_PPC_PMAC
145static void udbg_real_putc(unsigned char c)
146{
147 while ((real_readb(sccc) & SCC_TXRDY) == 0)
148 ;
149 real_writeb(c, sccd);
150 if (c == '\n')
151 udbg_real_putc('\r');
152}
153
154void udbg_init_pmac_realmode(void)
155{
156 sccc = (volatile u8 __iomem *)0x80013020ul;
157 sccd = (volatile u8 __iomem *)0x80013030ul;
158
159 ppc_md.udbg_putc = udbg_real_putc;
160 ppc_md.udbg_getc = NULL;
161 ppc_md.udbg_getc_poll = NULL;
162}
163#endif /* CONFIG_PPC_PMAC */
164
165#ifdef CONFIG_PPC_MAPLE
166void udbg_maple_real_putc(unsigned char c)
167{
168 if (udbg_comport) {
169 while ((real_readb(&udbg_comport->lsr) & LSR_THRE) == 0)
170 /* wait for idle */;
171 real_writeb(c, &udbg_comport->thr); eieio();
172 if (c == '\n') {
173 /* Also put a CR. This is for convenience. */
174 while ((real_readb(&udbg_comport->lsr) & LSR_THRE) == 0)
175 /* wait for idle */;
176 real_writeb('\r', &udbg_comport->thr); eieio();
177 }
178 }
179}
180
181void udbg_init_maple_realmode(void)
182{
183 udbg_comport = (volatile struct NS16550 __iomem *)0xf40003f8;
184
185 ppc_md.udbg_putc = udbg_maple_real_putc;
186 ppc_md.udbg_getc = NULL;
187 ppc_md.udbg_getc_poll = NULL;
188}
189#endif /* CONFIG_PPC_MAPLE */
190
191void udbg_putc(unsigned char c)
192{
193 if (udbg_comport) {
194 while ((in_8(&udbg_comport->lsr) & LSR_THRE) == 0)
195 /* wait for idle */;
196 out_8(&udbg_comport->thr, c);
197 if (c == '\n') {
198 /* Also put a CR. This is for convenience. */
199 while ((in_8(&udbg_comport->lsr) & LSR_THRE) == 0)
200 /* wait for idle */;
201 out_8(&udbg_comport->thr, '\r');
202 }
203 }
204#ifdef CONFIG_PPC_PMAC
205 else if (sccc) {
206 while ((in_8(sccc) & SCC_TXRDY) == 0)
207 ;
208 out_8(sccd, c);
209 if (c == '\n')
210 udbg_putc('\r');
211 }
212#endif /* CONFIG_PPC_PMAC */
213}
214
215int udbg_getc_poll(void)
216{
217 if (udbg_comport) {
218 if ((in_8(&udbg_comport->lsr) & LSR_DR) != 0)
219 return in_8(&udbg_comport->rbr);
220 else
221 return -1;
222 }
223#ifdef CONFIG_PPC_PMAC
224 else if (sccc) {
225 if ((in_8(sccc) & SCC_RXRDY) != 0)
226 return in_8(sccd);
227 else
228 return -1;
229 }
230#endif /* CONFIG_PPC_PMAC */
231 return -1;
232}
233
234unsigned char udbg_getc(void)
235{
236 if (udbg_comport) {
237 while ((in_8(&udbg_comport->lsr) & LSR_DR) == 0)
238 /* wait for char */;
239 return in_8(&udbg_comport->rbr);
240 }
241#ifdef CONFIG_PPC_PMAC
242 else if (sccc) {
243 while ((in_8(sccc) & SCC_RXRDY) == 0)
244 ;
245 return in_8(sccd);
246 }
247#endif /* CONFIG_PPC_PMAC */
248 return 0;
249}
250
251void udbg_puts(const char *s)
252{
253 if (ppc_md.udbg_putc) {
254 char c;
255
256 if (s && *s != '\0') {
257 while ((c = *s++) != '\0')
258 ppc_md.udbg_putc(c);
259 }
260 }
261#if 0
262 else {
263 printk("%s", s);
264 }
265#endif
266}
267
268int udbg_write(const char *s, int n)
269{
270 int remain = n;
271 char c;
272
273 if (!ppc_md.udbg_putc)
274 return 0;
275
276 if (s && *s != '\0') {
277 while (((c = *s++) != '\0') && (remain-- > 0)) {
278 ppc_md.udbg_putc(c);
279 }
280 }
281
282 return n - remain;
283}
284
285int udbg_read(char *buf, int buflen)
286{
287 char c, *p = buf;
288 int i;
289
290 if (!ppc_md.udbg_getc)
291 return 0;
292
293 for (i = 0; i < buflen; ++i) {
294 do {
295 c = ppc_md.udbg_getc();
296 } while (c == 0x11 || c == 0x13);
297 if (c == 0)
298 break;
299 *p++ = c;
300 }
301
302 return i;
303}
304
305void udbg_console_write(struct console *con, const char *s, unsigned int n)
306{
307 udbg_write(s, n);
308}
309
310#define UDBG_BUFSIZE 256
311void udbg_printf(const char *fmt, ...)
312{
313 unsigned char buf[UDBG_BUFSIZE];
314 va_list args;
315
316 va_start(args, fmt);
317 vsnprintf(buf, UDBG_BUFSIZE, fmt, args);
318 udbg_puts(buf);
319 va_end(args);
320}
321
322/* Special print used by PPCDBG() macro */
323void udbg_ppcdbg(unsigned long debug_flags, const char *fmt, ...)
324{
325 unsigned long active_debugs = debug_flags & ppc64_debug_switch;
326
327 if (active_debugs) {
328 va_list ap;
329 unsigned char buf[UDBG_BUFSIZE];
330 unsigned long i, len = 0;
331
332 for (i=0; i < PPCDBG_NUM_FLAGS; i++) {
333 if (((1U << i) & active_debugs) &&
334 trace_names[i]) {
335 len += strlen(trace_names[i]);
336 udbg_puts(trace_names[i]);
337 break;
338 }
339 }
340
341 snprintf(buf, UDBG_BUFSIZE, " [%s]: ", current->comm);
342 len += strlen(buf);
343 udbg_puts(buf);
344
345 while (len < 18) {
346 udbg_puts(" ");
347 len++;
348 }
349
350 va_start(ap, fmt);
351 vsnprintf(buf, UDBG_BUFSIZE, fmt, ap);
352 udbg_puts(buf);
353 va_end(ap);
354 }
355}
356
357unsigned long udbg_ifdebug(unsigned long flags)
358{
359 return (flags & ppc64_debug_switch);
360}
diff --git a/arch/ppc64/kernel/vdso.c b/arch/ppc64/kernel/vdso.c
new file mode 100644
index 000000000000..8c4597224b71
--- /dev/null
+++ b/arch/ppc64/kernel/vdso.c
@@ -0,0 +1,614 @@
1/*
2 * linux/arch/ppc64/kernel/vdso.c
3 *
4 * Copyright (C) 2004 Benjamin Herrenschmidt, IBM Corp.
5 * <benh@kernel.crashing.org>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 */
12
13#include <linux/config.h>
14#include <linux/module.h>
15#include <linux/errno.h>
16#include <linux/sched.h>
17#include <linux/kernel.h>
18#include <linux/mm.h>
19#include <linux/smp.h>
20#include <linux/smp_lock.h>
21#include <linux/stddef.h>
22#include <linux/unistd.h>
23#include <linux/slab.h>
24#include <linux/user.h>
25#include <linux/elf.h>
26#include <linux/security.h>
27#include <linux/bootmem.h>
28
29#include <asm/pgtable.h>
30#include <asm/system.h>
31#include <asm/processor.h>
32#include <asm/mmu.h>
33#include <asm/mmu_context.h>
34#include <asm/machdep.h>
35#include <asm/cputable.h>
36#include <asm/sections.h>
37#include <asm/vdso.h>
38
39#undef DEBUG
40
41#ifdef DEBUG
42#define DBG(fmt...) printk(fmt)
43#else
44#define DBG(fmt...)
45#endif
46
47
48/*
49 * The vDSOs themselves are here
50 */
51extern char vdso64_start, vdso64_end;
52extern char vdso32_start, vdso32_end;
53
54static void *vdso64_kbase = &vdso64_start;
55static void *vdso32_kbase = &vdso32_start;
56
57unsigned int vdso64_pages;
58unsigned int vdso32_pages;
59
60/* Signal trampolines user addresses */
61
62unsigned long vdso64_rt_sigtramp;
63unsigned long vdso32_sigtramp;
64unsigned long vdso32_rt_sigtramp;
65
66/* Format of the patch table */
67struct vdso_patch_def
68{
69 u32 pvr_mask, pvr_value;
70 const char *gen_name;
71 const char *fix_name;
72};
73
74/* Table of functions to patch based on the CPU type/revision
75 *
76 * TODO: Improve by adding whole lists for each entry
77 */
78static struct vdso_patch_def vdso_patches[] = {
79 {
80 0xffff0000, 0x003a0000, /* POWER5 */
81 "__kernel_sync_dicache", "__kernel_sync_dicache_p5"
82 },
83 {
84 0xffff0000, 0x003b0000, /* POWER5 */
85 "__kernel_sync_dicache", "__kernel_sync_dicache_p5"
86 },
87};
88
89/*
90 * Some infos carried around for each of them during parsing at
91 * boot time.
92 */
93struct lib32_elfinfo
94{
95 Elf32_Ehdr *hdr; /* ptr to ELF */
96 Elf32_Sym *dynsym; /* ptr to .dynsym section */
97 unsigned long dynsymsize; /* size of .dynsym section */
98 char *dynstr; /* ptr to .dynstr section */
99 unsigned long text; /* offset of .text section in .so */
100};
101
102struct lib64_elfinfo
103{
104 Elf64_Ehdr *hdr;
105 Elf64_Sym *dynsym;
106 unsigned long dynsymsize;
107 char *dynstr;
108 unsigned long text;
109};
110
111
112#ifdef __DEBUG
113static void dump_one_vdso_page(struct page *pg, struct page *upg)
114{
115 printk("kpg: %p (c:%d,f:%08lx)", __va(page_to_pfn(pg) << PAGE_SHIFT),
116 page_count(pg),
117 pg->flags);
118 if (upg/* && pg != upg*/) {
119 printk(" upg: %p (c:%d,f:%08lx)", __va(page_to_pfn(upg) << PAGE_SHIFT),
120 page_count(upg),
121 upg->flags);
122 }
123 printk("\n");
124}
125
126static void dump_vdso_pages(struct vm_area_struct * vma)
127{
128 int i;
129
130 if (!vma || test_thread_flag(TIF_32BIT)) {
131 printk("vDSO32 @ %016lx:\n", (unsigned long)vdso32_kbase);
132 for (i=0; i<vdso32_pages; i++) {
133 struct page *pg = virt_to_page(vdso32_kbase + i*PAGE_SIZE);
134 struct page *upg = (vma && vma->vm_mm) ?
135 follow_page(vma->vm_mm, vma->vm_start + i*PAGE_SIZE, 0)
136 : NULL;
137 dump_one_vdso_page(pg, upg);
138 }
139 }
140 if (!vma || !test_thread_flag(TIF_32BIT)) {
141 printk("vDSO64 @ %016lx:\n", (unsigned long)vdso64_kbase);
142 for (i=0; i<vdso64_pages; i++) {
143 struct page *pg = virt_to_page(vdso64_kbase + i*PAGE_SIZE);
144 struct page *upg = (vma && vma->vm_mm) ?
145 follow_page(vma->vm_mm, vma->vm_start + i*PAGE_SIZE, 0)
146 : NULL;
147 dump_one_vdso_page(pg, upg);
148 }
149 }
150}
151#endif /* DEBUG */
152
153/*
154 * Keep a dummy vma_close for now, it will prevent VMA merging.
155 */
156static void vdso_vma_close(struct vm_area_struct * vma)
157{
158}
159
160/*
161 * Our nopage() function, maps in the actual vDSO kernel pages, they will
162 * be mapped read-only by do_no_page(), and eventually COW'ed, either
163 * right away for an initial write access, or by do_wp_page().
164 */
165static struct page * vdso_vma_nopage(struct vm_area_struct * vma,
166 unsigned long address, int *type)
167{
168 unsigned long offset = address - vma->vm_start;
169 struct page *pg;
170 void *vbase = test_thread_flag(TIF_32BIT) ? vdso32_kbase : vdso64_kbase;
171
172 DBG("vdso_vma_nopage(current: %s, address: %016lx, off: %lx)\n",
173 current->comm, address, offset);
174
175 if (address < vma->vm_start || address > vma->vm_end)
176 return NOPAGE_SIGBUS;
177
178 /*
179 * Last page is systemcfg, special handling here, no get_page() a
180 * this is a reserved page
181 */
182 if ((vma->vm_end - address) <= PAGE_SIZE)
183 return virt_to_page(systemcfg);
184
185 pg = virt_to_page(vbase + offset);
186 get_page(pg);
187 DBG(" ->page count: %d\n", page_count(pg));
188
189 return pg;
190}
191
192static struct vm_operations_struct vdso_vmops = {
193 .close = vdso_vma_close,
194 .nopage = vdso_vma_nopage,
195};
196
197/*
198 * This is called from binfmt_elf, we create the special vma for the
199 * vDSO and insert it into the mm struct tree
200 */
201int arch_setup_additional_pages(struct linux_binprm *bprm, int executable_stack)
202{
203 struct mm_struct *mm = current->mm;
204 struct vm_area_struct *vma;
205 unsigned long vdso_pages;
206 unsigned long vdso_base;
207
208 if (test_thread_flag(TIF_32BIT)) {
209 vdso_pages = vdso32_pages;
210 vdso_base = VDSO32_MBASE;
211 } else {
212 vdso_pages = vdso64_pages;
213 vdso_base = VDSO64_MBASE;
214 }
215
216 /* vDSO has a problem and was disabled, just don't "enable" it for the
217 * process
218 */
219 if (vdso_pages == 0) {
220 current->thread.vdso_base = 0;
221 return 0;
222 }
223 vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
224 if (vma == NULL)
225 return -ENOMEM;
226 if (security_vm_enough_memory(vdso_pages)) {
227 kmem_cache_free(vm_area_cachep, vma);
228 return -ENOMEM;
229 }
230 memset(vma, 0, sizeof(*vma));
231
232 /*
233 * pick a base address for the vDSO in process space. We have a default
234 * base of 1Mb on which we had a random offset up to 1Mb.
235 * XXX: Add possibility for a program header to specify that location
236 */
237 current->thread.vdso_base = vdso_base;
238 /* + ((unsigned long)vma & 0x000ff000); */
239
240 vma->vm_mm = mm;
241 vma->vm_start = current->thread.vdso_base;
242
243 /*
244 * the VMA size is one page more than the vDSO since systemcfg
245 * is mapped in the last one
246 */
247 vma->vm_end = vma->vm_start + ((vdso_pages + 1) << PAGE_SHIFT);
248
249 /*
250 * our vma flags don't have VM_WRITE so by default, the process isn't allowed
251 * to write those pages.
252 * gdb can break that with ptrace interface, and thus trigger COW on those
253 * pages but it's then your responsibility to never do that on the "data" page
254 * of the vDSO or you'll stop getting kernel updates and your nice userland
255 * gettimeofday will be totally dead. It's fine to use that for setting
256 * breakpoints in the vDSO code pages though
257 */
258 vma->vm_flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
259 vma->vm_flags |= mm->def_flags;
260 vma->vm_page_prot = protection_map[vma->vm_flags & 0x7];
261 vma->vm_ops = &vdso_vmops;
262
263 down_write(&mm->mmap_sem);
264 insert_vm_struct(mm, vma);
265 mm->total_vm += (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
266 up_write(&mm->mmap_sem);
267
268 return 0;
269}
270
271static void * __init find_section32(Elf32_Ehdr *ehdr, const char *secname,
272 unsigned long *size)
273{
274 Elf32_Shdr *sechdrs;
275 unsigned int i;
276 char *secnames;
277
278 /* Grab section headers and strings so we can tell who is who */
279 sechdrs = (void *)ehdr + ehdr->e_shoff;
280 secnames = (void *)ehdr + sechdrs[ehdr->e_shstrndx].sh_offset;
281
282 /* Find the section they want */
283 for (i = 1; i < ehdr->e_shnum; i++) {
284 if (strcmp(secnames+sechdrs[i].sh_name, secname) == 0) {
285 if (size)
286 *size = sechdrs[i].sh_size;
287 return (void *)ehdr + sechdrs[i].sh_offset;
288 }
289 }
290 *size = 0;
291 return NULL;
292}
293
294static void * __init find_section64(Elf64_Ehdr *ehdr, const char *secname,
295 unsigned long *size)
296{
297 Elf64_Shdr *sechdrs;
298 unsigned int i;
299 char *secnames;
300
301 /* Grab section headers and strings so we can tell who is who */
302 sechdrs = (void *)ehdr + ehdr->e_shoff;
303 secnames = (void *)ehdr + sechdrs[ehdr->e_shstrndx].sh_offset;
304
305 /* Find the section they want */
306 for (i = 1; i < ehdr->e_shnum; i++) {
307 if (strcmp(secnames+sechdrs[i].sh_name, secname) == 0) {
308 if (size)
309 *size = sechdrs[i].sh_size;
310 return (void *)ehdr + sechdrs[i].sh_offset;
311 }
312 }
313 if (size)
314 *size = 0;
315 return NULL;
316}
317
318static Elf32_Sym * __init find_symbol32(struct lib32_elfinfo *lib, const char *symname)
319{
320 unsigned int i;
321 char name[32], *c;
322
323 for (i = 0; i < (lib->dynsymsize / sizeof(Elf32_Sym)); i++) {
324 if (lib->dynsym[i].st_name == 0)
325 continue;
326 strlcpy(name, lib->dynstr + lib->dynsym[i].st_name, 32);
327 c = strchr(name, '@');
328 if (c)
329 *c = 0;
330 if (strcmp(symname, name) == 0)
331 return &lib->dynsym[i];
332 }
333 return NULL;
334}
335
336static Elf64_Sym * __init find_symbol64(struct lib64_elfinfo *lib, const char *symname)
337{
338 unsigned int i;
339 char name[32], *c;
340
341 for (i = 0; i < (lib->dynsymsize / sizeof(Elf64_Sym)); i++) {
342 if (lib->dynsym[i].st_name == 0)
343 continue;
344 strlcpy(name, lib->dynstr + lib->dynsym[i].st_name, 32);
345 c = strchr(name, '@');
346 if (c)
347 *c = 0;
348 if (strcmp(symname, name) == 0)
349 return &lib->dynsym[i];
350 }
351 return NULL;
352}
353
354/* Note that we assume the section is .text and the symbol is relative to
355 * the library base
356 */
357static unsigned long __init find_function32(struct lib32_elfinfo *lib, const char *symname)
358{
359 Elf32_Sym *sym = find_symbol32(lib, symname);
360
361 if (sym == NULL) {
362 printk(KERN_WARNING "vDSO32: function %s not found !\n", symname);
363 return 0;
364 }
365 return sym->st_value - VDSO32_LBASE;
366}
367
368/* Note that we assume the section is .text and the symbol is relative to
369 * the library base
370 */
371static unsigned long __init find_function64(struct lib64_elfinfo *lib, const char *symname)
372{
373 Elf64_Sym *sym = find_symbol64(lib, symname);
374
375 if (sym == NULL) {
376 printk(KERN_WARNING "vDSO64: function %s not found !\n", symname);
377 return 0;
378 }
379#ifdef VDS64_HAS_DESCRIPTORS
380 return *((u64 *)(vdso64_kbase + sym->st_value - VDSO64_LBASE)) - VDSO64_LBASE;
381#else
382 return sym->st_value - VDSO64_LBASE;
383#endif
384}
385
386
387static __init int vdso_do_find_sections(struct lib32_elfinfo *v32,
388 struct lib64_elfinfo *v64)
389{
390 void *sect;
391
392 /*
393 * Locate symbol tables & text section
394 */
395
396 v32->dynsym = find_section32(v32->hdr, ".dynsym", &v32->dynsymsize);
397 v32->dynstr = find_section32(v32->hdr, ".dynstr", NULL);
398 if (v32->dynsym == NULL || v32->dynstr == NULL) {
399 printk(KERN_ERR "vDSO32: a required symbol section was not found\n");
400 return -1;
401 }
402 sect = find_section32(v32->hdr, ".text", NULL);
403 if (sect == NULL) {
404 printk(KERN_ERR "vDSO32: the .text section was not found\n");
405 return -1;
406 }
407 v32->text = sect - vdso32_kbase;
408
409 v64->dynsym = find_section64(v64->hdr, ".dynsym", &v64->dynsymsize);
410 v64->dynstr = find_section64(v64->hdr, ".dynstr", NULL);
411 if (v64->dynsym == NULL || v64->dynstr == NULL) {
412 printk(KERN_ERR "vDSO64: a required symbol section was not found\n");
413 return -1;
414 }
415 sect = find_section64(v64->hdr, ".text", NULL);
416 if (sect == NULL) {
417 printk(KERN_ERR "vDSO64: the .text section was not found\n");
418 return -1;
419 }
420 v64->text = sect - vdso64_kbase;
421
422 return 0;
423}
424
425static __init void vdso_setup_trampolines(struct lib32_elfinfo *v32,
426 struct lib64_elfinfo *v64)
427{
428 /*
429 * Find signal trampolines
430 */
431
432 vdso64_rt_sigtramp = find_function64(v64, "__kernel_sigtramp_rt64");
433 vdso32_sigtramp = find_function32(v32, "__kernel_sigtramp32");
434 vdso32_rt_sigtramp = find_function32(v32, "__kernel_sigtramp_rt32");
435}
436
437static __init int vdso_fixup_datapage(struct lib32_elfinfo *v32,
438 struct lib64_elfinfo *v64)
439{
440 Elf32_Sym *sym32;
441 Elf64_Sym *sym64;
442
443 sym32 = find_symbol32(v32, "__kernel_datapage_offset");
444 if (sym32 == NULL) {
445 printk(KERN_ERR "vDSO32: Can't find symbol __kernel_datapage_offset !\n");
446 return -1;
447 }
448 *((int *)(vdso32_kbase + (sym32->st_value - VDSO32_LBASE))) =
449 (vdso32_pages << PAGE_SHIFT) - (sym32->st_value - VDSO32_LBASE);
450
451 sym64 = find_symbol64(v64, "__kernel_datapage_offset");
452 if (sym64 == NULL) {
453 printk(KERN_ERR "vDSO64: Can't find symbol __kernel_datapage_offset !\n");
454 return -1;
455 }
456 *((int *)(vdso64_kbase + sym64->st_value - VDSO64_LBASE)) =
457 (vdso64_pages << PAGE_SHIFT) - (sym64->st_value - VDSO64_LBASE);
458
459 return 0;
460}
461
462static int vdso_do_func_patch32(struct lib32_elfinfo *v32,
463 struct lib64_elfinfo *v64,
464 const char *orig, const char *fix)
465{
466 Elf32_Sym *sym32_gen, *sym32_fix;
467
468 sym32_gen = find_symbol32(v32, orig);
469 if (sym32_gen == NULL) {
470 printk(KERN_ERR "vDSO32: Can't find symbol %s !\n", orig);
471 return -1;
472 }
473 sym32_fix = find_symbol32(v32, fix);
474 if (sym32_fix == NULL) {
475 printk(KERN_ERR "vDSO32: Can't find symbol %s !\n", fix);
476 return -1;
477 }
478 sym32_gen->st_value = sym32_fix->st_value;
479 sym32_gen->st_size = sym32_fix->st_size;
480 sym32_gen->st_info = sym32_fix->st_info;
481 sym32_gen->st_other = sym32_fix->st_other;
482 sym32_gen->st_shndx = sym32_fix->st_shndx;
483
484 return 0;
485}
486
487static int vdso_do_func_patch64(struct lib32_elfinfo *v32,
488 struct lib64_elfinfo *v64,
489 const char *orig, const char *fix)
490{
491 Elf64_Sym *sym64_gen, *sym64_fix;
492
493 sym64_gen = find_symbol64(v64, orig);
494 if (sym64_gen == NULL) {
495 printk(KERN_ERR "vDSO64: Can't find symbol %s !\n", orig);
496 return -1;
497 }
498 sym64_fix = find_symbol64(v64, fix);
499 if (sym64_fix == NULL) {
500 printk(KERN_ERR "vDSO64: Can't find symbol %s !\n", fix);
501 return -1;
502 }
503 sym64_gen->st_value = sym64_fix->st_value;
504 sym64_gen->st_size = sym64_fix->st_size;
505 sym64_gen->st_info = sym64_fix->st_info;
506 sym64_gen->st_other = sym64_fix->st_other;
507 sym64_gen->st_shndx = sym64_fix->st_shndx;
508
509 return 0;
510}
511
512static __init int vdso_fixup_alt_funcs(struct lib32_elfinfo *v32,
513 struct lib64_elfinfo *v64)
514{
515 u32 pvr;
516 int i;
517
518 pvr = mfspr(SPRN_PVR);
519 for (i = 0; i < ARRAY_SIZE(vdso_patches); i++) {
520 struct vdso_patch_def *patch = &vdso_patches[i];
521 int match = (pvr & patch->pvr_mask) == patch->pvr_value;
522
523 DBG("patch %d (mask: %x, pvr: %x) : %s\n",
524 i, patch->pvr_mask, patch->pvr_value, match ? "match" : "skip");
525
526 if (!match)
527 continue;
528
529 DBG("replacing %s with %s...\n", patch->gen_name, patch->fix_name);
530
531 /*
532 * Patch the 32 bits and 64 bits symbols. Note that we do not patch
533 * the "." symbol on 64 bits. It would be easy to do, but doesn't
534 * seem to be necessary, patching the OPD symbol is enough.
535 */
536 vdso_do_func_patch32(v32, v64, patch->gen_name, patch->fix_name);
537 vdso_do_func_patch64(v32, v64, patch->gen_name, patch->fix_name);
538 }
539
540 return 0;
541}
542
543
544static __init int vdso_setup(void)
545{
546 struct lib32_elfinfo v32;
547 struct lib64_elfinfo v64;
548
549 v32.hdr = vdso32_kbase;
550 v64.hdr = vdso64_kbase;
551
552 if (vdso_do_find_sections(&v32, &v64))
553 return -1;
554
555 if (vdso_fixup_datapage(&v32, &v64))
556 return -1;
557
558 if (vdso_fixup_alt_funcs(&v32, &v64))
559 return -1;
560
561 vdso_setup_trampolines(&v32, &v64);
562
563 return 0;
564}
565
566void __init vdso_init(void)
567{
568 int i;
569
570 vdso64_pages = (&vdso64_end - &vdso64_start) >> PAGE_SHIFT;
571 vdso32_pages = (&vdso32_end - &vdso32_start) >> PAGE_SHIFT;
572
573 DBG("vdso64_kbase: %p, 0x%x pages, vdso32_kbase: %p, 0x%x pages\n",
574 vdso64_kbase, vdso64_pages, vdso32_kbase, vdso32_pages);
575
576 /*
577 * Initialize the vDSO images in memory, that is do necessary
578 * fixups of vDSO symbols, locate trampolines, etc...
579 */
580 if (vdso_setup()) {
581 printk(KERN_ERR "vDSO setup failure, not enabled !\n");
582 /* XXX should free pages here ? */
583 vdso64_pages = vdso32_pages = 0;
584 return;
585 }
586
587 /* Make sure pages are in the correct state */
588 for (i = 0; i < vdso64_pages; i++) {
589 struct page *pg = virt_to_page(vdso64_kbase + i*PAGE_SIZE);
590 ClearPageReserved(pg);
591 get_page(pg);
592 }
593 for (i = 0; i < vdso32_pages; i++) {
594 struct page *pg = virt_to_page(vdso32_kbase + i*PAGE_SIZE);
595 ClearPageReserved(pg);
596 get_page(pg);
597 }
598}
599
600int in_gate_area_no_task(unsigned long addr)
601{
602 return 0;
603}
604
605int in_gate_area(struct task_struct *task, unsigned long addr)
606{
607 return 0;
608}
609
610struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
611{
612 return NULL;
613}
614
diff --git a/arch/ppc64/kernel/vdso32/Makefile b/arch/ppc64/kernel/vdso32/Makefile
new file mode 100644
index 000000000000..ede2f7e477c2
--- /dev/null
+++ b/arch/ppc64/kernel/vdso32/Makefile
@@ -0,0 +1,36 @@
1
2# List of files in the vdso, has to be asm only for now
3
4obj-vdso32 = sigtramp.o gettimeofday.o datapage.o cacheflush.o
5
6# Build rules
7
8targets := $(obj-vdso32) vdso32.so
9obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32))
10
11
12EXTRA_CFLAGS := -shared -s -fno-common -fno-builtin
13EXTRA_CFLAGS += -nostdlib -Wl,-soname=linux-vdso32.so.1
14EXTRA_AFLAGS := -D__VDSO32__ -s
15
16obj-y += vdso32_wrapper.o
17extra-y += vdso32.lds
18CPPFLAGS_vdso32.lds += -P -C -U$(ARCH)
19
20# Force dependency (incbin is bad)
21$(obj)/vdso32_wrapper.o : $(obj)/vdso32.so
22
23# link rule for the .so file, .lds has to be first
24$(obj)/vdso32.so: $(src)/vdso32.lds $(obj-vdso32)
25 $(call if_changed,vdso32ld)
26
27# assembly rules for the .S files
28$(obj-vdso32): %.o: %.S
29 $(call if_changed_dep,vdso32as)
30
31# actual build commands
32quiet_cmd_vdso32ld = VDSO32L $@
33 cmd_vdso32ld = $(CROSS32CC) $(c_flags) -Wl,-T $^ -o $@
34quiet_cmd_vdso32as = VDSO32A $@
35 cmd_vdso32as = $(CROSS32CC) $(a_flags) -c -o $@ $<
36
diff --git a/arch/ppc64/kernel/vdso32/cacheflush.S b/arch/ppc64/kernel/vdso32/cacheflush.S
new file mode 100644
index 000000000000..c74fddb6afd4
--- /dev/null
+++ b/arch/ppc64/kernel/vdso32/cacheflush.S
@@ -0,0 +1,65 @@
1/*
2 * vDSO provided cache flush routines
3 *
4 * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org),
5 * IBM Corp.
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 */
12#include <linux/config.h>
13#include <asm/processor.h>
14#include <asm/ppc_asm.h>
15#include <asm/vdso.h>
16#include <asm/offsets.h>
17
18 .text
19
20/*
21 * Default "generic" version of __kernel_sync_dicache.
22 *
23 * void __kernel_sync_dicache(unsigned long start, unsigned long end)
24 *
25 * Flushes the data cache & invalidate the instruction cache for the
26 * provided range [start, end[
27 *
28 * Note: all CPUs supported by this kernel have a 128 bytes cache
29 * line size so we don't have to peek that info from the datapage
30 */
31V_FUNCTION_BEGIN(__kernel_sync_dicache)
32 .cfi_startproc
33 li r5,127
34 andc r6,r3,r5 /* round low to line bdy */
35 subf r8,r6,r4 /* compute length */
36 add r8,r8,r5 /* ensure we get enough */
37 srwi. r8,r8,7 /* compute line count */
38 beqlr /* nothing to do? */
39 mtctr r8
40 mr r3,r6
411: dcbst 0,r3
42 addi r3,r3,128
43 bdnz 1b
44 sync
45 mtctr r8
461: icbi 0,r6
47 addi r6,r6,128
48 bdnz 1b
49 isync
50 blr
51 .cfi_endproc
52V_FUNCTION_END(__kernel_sync_dicache)
53
54
55/*
56 * POWER5 version of __kernel_sync_dicache
57 */
58V_FUNCTION_BEGIN(__kernel_sync_dicache_p5)
59 .cfi_startproc
60 sync
61 isync
62 blr
63 .cfi_endproc
64V_FUNCTION_END(__kernel_sync_dicache_p5)
65
diff --git a/arch/ppc64/kernel/vdso32/datapage.S b/arch/ppc64/kernel/vdso32/datapage.S
new file mode 100644
index 000000000000..29b6bd32e1f1
--- /dev/null
+++ b/arch/ppc64/kernel/vdso32/datapage.S
@@ -0,0 +1,68 @@
1/*
2 * Access to the shared data page by the vDSO & syscall map
3 *
4 * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), IBM Corp.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#include <linux/config.h>
13#include <asm/processor.h>
14#include <asm/ppc_asm.h>
15#include <asm/offsets.h>
16#include <asm/unistd.h>
17#include <asm/vdso.h>
18
19 .text
20V_FUNCTION_BEGIN(__get_datapage)
21 .cfi_startproc
22 /* We don't want that exposed or overridable as we want other objects
23 * to be able to bl directly to here
24 */
25 .protected __get_datapage
26 .hidden __get_datapage
27
28 mflr r0
29 .cfi_register lr,r0
30
31 bcl 20,31,1f
32 .global __kernel_datapage_offset;
33__kernel_datapage_offset:
34 .long 0
351:
36 mflr r3
37 mtlr r0
38 lwz r0,0(r3)
39 add r3,r0,r3
40 blr
41 .cfi_endproc
42V_FUNCTION_END(__get_datapage)
43
44/*
45 * void *__kernel_get_syscall_map(unsigned int *syscall_count) ;
46 *
47 * returns a pointer to the syscall map. the map is agnostic to the
48 * size of "long", unlike kernel bitops, it stores bits from top to
49 * bottom so that memory actually contains a linear bitmap
50 * check for syscall N by testing bit (0x80000000 >> (N & 0x1f)) of
51 * 32 bits int at N >> 5.
52 */
53V_FUNCTION_BEGIN(__kernel_get_syscall_map)
54 .cfi_startproc
55 mflr r12
56 .cfi_register lr,r12
57
58 mr r4,r3
59 bl __get_datapage@local
60 mtlr r12
61 addi r3,r3,CFG_SYSCALL_MAP32
62 cmpli cr0,r4,0
63 beqlr
64 li r0,__NR_syscalls
65 stw r0,0(r4)
66 blr
67 .cfi_endproc
68V_FUNCTION_END(__kernel_get_syscall_map)
diff --git a/arch/ppc64/kernel/vdso32/gettimeofday.S b/arch/ppc64/kernel/vdso32/gettimeofday.S
new file mode 100644
index 000000000000..ca7f415195c4
--- /dev/null
+++ b/arch/ppc64/kernel/vdso32/gettimeofday.S
@@ -0,0 +1,139 @@
1/*
2 * Userland implementation of gettimeofday() for 32 bits processes in a
3 * ppc64 kernel for use in the vDSO
4 *
5 * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), IBM Corp.
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 */
12#include <linux/config.h>
13#include <asm/processor.h>
14#include <asm/ppc_asm.h>
15#include <asm/vdso.h>
16#include <asm/offsets.h>
17#include <asm/unistd.h>
18
19 .text
20/*
21 * Exact prototype of gettimeofday
22 *
23 * int __kernel_gettimeofday(struct timeval *tv, struct timezone *tz);
24 *
25 */
26V_FUNCTION_BEGIN(__kernel_gettimeofday)
27 .cfi_startproc
28 mflr r12
29 .cfi_register lr,r12
30
31 mr r10,r3 /* r10 saves tv */
32 mr r11,r4 /* r11 saves tz */
33 bl __get_datapage@local /* get data page */
34 mr r9, r3 /* datapage ptr in r9 */
35 bl __do_get_xsec@local /* get xsec from tb & kernel */
36 bne- 2f /* out of line -> do syscall */
37
38 /* seconds are xsec >> 20 */
39 rlwinm r5,r4,12,20,31
40 rlwimi r5,r3,12,0,19
41 stw r5,TVAL32_TV_SEC(r10)
42
43 /* get remaining xsec and convert to usec. we scale
44 * up remaining xsec by 12 bits and get the top 32 bits
45 * of the multiplication
46 */
47 rlwinm r5,r4,12,0,19
48 lis r6,1000000@h
49 ori r6,r6,1000000@l
50 mulhwu r5,r5,r6
51 stw r5,TVAL32_TV_USEC(r10)
52
53 cmpli cr0,r11,0 /* check if tz is NULL */
54 beq 1f
55 lwz r4,CFG_TZ_MINUTEWEST(r9)/* fill tz */
56 lwz r5,CFG_TZ_DSTTIME(r9)
57 stw r4,TZONE_TZ_MINWEST(r11)
58 stw r5,TZONE_TZ_DSTTIME(r11)
59
601: mtlr r12
61 blr
62
632: mr r3,r10
64 mr r4,r11
65 li r0,__NR_gettimeofday
66 sc
67 b 1b
68 .cfi_endproc
69V_FUNCTION_END(__kernel_gettimeofday)
70
71/*
72 * This is the core of gettimeofday(), it returns the xsec
73 * value in r3 & r4 and expects the datapage ptr (non clobbered)
74 * in r9. clobbers r0,r4,r5,r6,r7,r8
75*/
76__do_get_xsec:
77 .cfi_startproc
78 /* Check for update count & load values. We use the low
79 * order 32 bits of the update count
80 */
811: lwz r8,(CFG_TB_UPDATE_COUNT+4)(r9)
82 andi. r0,r8,1 /* pending update ? loop */
83 bne- 1b
84 xor r0,r8,r8 /* create dependency */
85 add r9,r9,r0
86
87 /* Load orig stamp (offset to TB) */
88 lwz r5,CFG_TB_ORIG_STAMP(r9)
89 lwz r6,(CFG_TB_ORIG_STAMP+4)(r9)
90
91 /* Get a stable TB value */
922: mftbu r3
93 mftbl r4
94 mftbu r0
95 cmpl cr0,r3,r0
96 bne- 2b
97
98 /* Substract tb orig stamp. If the high part is non-zero, we jump to the
99 * slow path which call the syscall. If it's ok, then we have our 32 bits
100 * tb_ticks value in r7
101 */
102 subfc r7,r6,r4
103 subfe. r0,r5,r3
104 bne- 3f
105
106 /* Load scale factor & do multiplication */
107 lwz r5,CFG_TB_TO_XS(r9) /* load values */
108 lwz r6,(CFG_TB_TO_XS+4)(r9)
109 mulhwu r4,r7,r5
110 mulhwu r6,r7,r6
111 mullw r6,r7,r5
112 addc r6,r6,r0
113
114 /* At this point, we have the scaled xsec value in r4 + XER:CA
115 * we load & add the stamp since epoch
116 */
117 lwz r5,CFG_STAMP_XSEC(r9)
118 lwz r6,(CFG_STAMP_XSEC+4)(r9)
119 adde r4,r4,r6
120 addze r3,r5
121
122 /* We now have our result in r3,r4. We create a fake dependency
123 * on that result and re-check the counter
124 */
125 xor r0,r4,r4
126 add r9,r9,r0
127 lwz r0,(CFG_TB_UPDATE_COUNT+4)(r9)
128 cmpl cr0,r8,r0 /* check if updated */
129 bne- 1b
130
131 /* Warning ! The caller expects CR:EQ to be set to indicate a
132 * successful calculation (so it won't fallback to the syscall
133 * method). We have overriden that CR bit in the counter check,
134 * but fortunately, the loop exit condition _is_ CR:EQ set, so
135 * we can exit safely here. If you change this code, be careful
136 * of that side effect.
137 */
1383: blr
139 .cfi_endproc
diff --git a/arch/ppc64/kernel/vdso32/sigtramp.S b/arch/ppc64/kernel/vdso32/sigtramp.S
new file mode 100644
index 000000000000..e04642781917
--- /dev/null
+++ b/arch/ppc64/kernel/vdso32/sigtramp.S
@@ -0,0 +1,300 @@
1/*
2 * Signal trampolines for 32 bits processes in a ppc64 kernel for
3 * use in the vDSO
4 *
5 * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), IBM Corp.
6 * Copyright (C) 2004 Alan Modra (amodra@au.ibm.com)), IBM Corp.
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13#include <linux/config.h>
14#include <asm/processor.h>
15#include <asm/ppc_asm.h>
16#include <asm/unistd.h>
17#include <asm/vdso.h>
18
19 .text
20
21/* The nop here is a hack. The dwarf2 unwind routines subtract 1 from
22 the return address to get an address in the middle of the presumed
23 call instruction. Since we don't have a call here, we artifically
24 extend the range covered by the unwind info by adding a nop before
25 the real start. */
26 nop
27V_FUNCTION_BEGIN(__kernel_sigtramp32)
28.Lsig_start = . - 4
29 li r0,__NR_sigreturn
30 sc
31.Lsig_end:
32V_FUNCTION_END(__kernel_sigtramp32)
33
34.Lsigrt_start:
35 nop
36V_FUNCTION_BEGIN(__kernel_sigtramp_rt32)
37 li r0,__NR_rt_sigreturn
38 sc
39.Lsigrt_end:
40V_FUNCTION_END(__kernel_sigtramp_rt32)
41
42 .section .eh_frame,"a",@progbits
43
44/* Register r1 can be found at offset 4 of a pt_regs structure.
45 A pointer to the pt_regs is stored in memory at the old sp plus PTREGS. */
46#define cfa_save \
47 .byte 0x0f; /* DW_CFA_def_cfa_expression */ \
48 .uleb128 9f - 1f; /* length */ \
491: \
50 .byte 0x71; .sleb128 PTREGS; /* DW_OP_breg1 */ \
51 .byte 0x06; /* DW_OP_deref */ \
52 .byte 0x23; .uleb128 RSIZE; /* DW_OP_plus_uconst */ \
53 .byte 0x06; /* DW_OP_deref */ \
549:
55
56/* Register REGNO can be found at offset OFS of a pt_regs structure.
57 A pointer to the pt_regs is stored in memory at the old sp plus PTREGS. */
58#define rsave(regno, ofs) \
59 .byte 0x10; /* DW_CFA_expression */ \
60 .uleb128 regno; /* regno */ \
61 .uleb128 9f - 1f; /* length */ \
621: \
63 .byte 0x71; .sleb128 PTREGS; /* DW_OP_breg1 */ \
64 .byte 0x06; /* DW_OP_deref */ \
65 .ifne ofs; \
66 .byte 0x23; .uleb128 ofs; /* DW_OP_plus_uconst */ \
67 .endif; \
689:
69
70/* If msr bit 1<<25 is set, then VMX register REGNO is at offset REGNO*16
71 of the VMX reg struct. The VMX reg struct is at offset VREGS of
72 the pt_regs struct. This macro is for REGNO == 0, and contains
73 'subroutines' that the other macros jump to. */
74#define vsave_msr0(regno) \
75 .byte 0x10; /* DW_CFA_expression */ \
76 .uleb128 regno + 77; /* regno */ \
77 .uleb128 9f - 1f; /* length */ \
781: \
79 .byte 0x30 + regno; /* DW_OP_lit0 */ \
802: \
81 .byte 0x40; /* DW_OP_lit16 */ \
82 .byte 0x1e; /* DW_OP_mul */ \
833: \
84 .byte 0x71; .sleb128 PTREGS; /* DW_OP_breg1 */ \
85 .byte 0x06; /* DW_OP_deref */ \
86 .byte 0x12; /* DW_OP_dup */ \
87 .byte 0x23; /* DW_OP_plus_uconst */ \
88 .uleb128 33*RSIZE; /* msr offset */ \
89 .byte 0x06; /* DW_OP_deref */ \
90 .byte 0x0c; .long 1 << 25; /* DW_OP_const4u */ \
91 .byte 0x1a; /* DW_OP_and */ \
92 .byte 0x12; /* DW_OP_dup, ret 0 if bra taken */ \
93 .byte 0x30; /* DW_OP_lit0 */ \
94 .byte 0x29; /* DW_OP_eq */ \
95 .byte 0x28; .short 0x7fff; /* DW_OP_bra to end */ \
96 .byte 0x13; /* DW_OP_drop, pop the 0 */ \
97 .byte 0x23; .uleb128 VREGS; /* DW_OP_plus_uconst */ \
98 .byte 0x22; /* DW_OP_plus */ \
99 .byte 0x2f; .short 0x7fff; /* DW_OP_skip to end */ \
1009:
101
102/* If msr bit 1<<25 is set, then VMX register REGNO is at offset REGNO*16
103 of the VMX reg struct. REGNO is 1 thru 31. */
104#define vsave_msr1(regno) \
105 .byte 0x10; /* DW_CFA_expression */ \
106 .uleb128 regno + 77; /* regno */ \
107 .uleb128 9f - 1f; /* length */ \
1081: \
109 .byte 0x30 + regno; /* DW_OP_lit n */ \
110 .byte 0x2f; .short 2b - 9f; /* DW_OP_skip */ \
1119:
112
113/* If msr bit 1<<25 is set, then VMX register REGNO is at offset OFS of
114 the VMX save block. */
115#define vsave_msr2(regno, ofs) \
116 .byte 0x10; /* DW_CFA_expression */ \
117 .uleb128 regno + 77; /* regno */ \
118 .uleb128 9f - 1f; /* length */ \
1191: \
120 .byte 0x0a; .short ofs; /* DW_OP_const2u */ \
121 .byte 0x2f; .short 3b - 9f; /* DW_OP_skip */ \
1229:
123
124/* VMX register REGNO is at offset OFS of the VMX save area. */
125#define vsave(regno, ofs) \
126 .byte 0x10; /* DW_CFA_expression */ \
127 .uleb128 regno + 77; /* regno */ \
128 .uleb128 9f - 1f; /* length */ \
1291: \
130 .byte 0x71; .sleb128 PTREGS; /* DW_OP_breg1 */ \
131 .byte 0x06; /* DW_OP_deref */ \
132 .byte 0x23; .uleb128 VREGS; /* DW_OP_plus_uconst */ \
133 .byte 0x23; .uleb128 ofs; /* DW_OP_plus_uconst */ \
1349:
135
136/* This is where the pt_regs pointer can be found on the stack. */
137#define PTREGS 64+28
138
139/* Size of regs. */
140#define RSIZE 4
141
142/* This is the offset of the VMX regs. */
143#define VREGS 48*RSIZE+34*8
144
145/* Describe where general purpose regs are saved. */
146#define EH_FRAME_GEN \
147 cfa_save; \
148 rsave ( 0, 0*RSIZE); \
149 rsave ( 2, 2*RSIZE); \
150 rsave ( 3, 3*RSIZE); \
151 rsave ( 4, 4*RSIZE); \
152 rsave ( 5, 5*RSIZE); \
153 rsave ( 6, 6*RSIZE); \
154 rsave ( 7, 7*RSIZE); \
155 rsave ( 8, 8*RSIZE); \
156 rsave ( 9, 9*RSIZE); \
157 rsave (10, 10*RSIZE); \
158 rsave (11, 11*RSIZE); \
159 rsave (12, 12*RSIZE); \
160 rsave (13, 13*RSIZE); \
161 rsave (14, 14*RSIZE); \
162 rsave (15, 15*RSIZE); \
163 rsave (16, 16*RSIZE); \
164 rsave (17, 17*RSIZE); \
165 rsave (18, 18*RSIZE); \
166 rsave (19, 19*RSIZE); \
167 rsave (20, 20*RSIZE); \
168 rsave (21, 21*RSIZE); \
169 rsave (22, 22*RSIZE); \
170 rsave (23, 23*RSIZE); \
171 rsave (24, 24*RSIZE); \
172 rsave (25, 25*RSIZE); \
173 rsave (26, 26*RSIZE); \
174 rsave (27, 27*RSIZE); \
175 rsave (28, 28*RSIZE); \
176 rsave (29, 29*RSIZE); \
177 rsave (30, 30*RSIZE); \
178 rsave (31, 31*RSIZE); \
179 rsave (67, 32*RSIZE); /* ap, used as temp for nip */ \
180 rsave (65, 36*RSIZE); /* lr */ \
181 rsave (70, 38*RSIZE) /* cr */
182
183/* Describe where the FP regs are saved. */
184#define EH_FRAME_FP \
185 rsave (32, 48*RSIZE + 0*8); \
186 rsave (33, 48*RSIZE + 1*8); \
187 rsave (34, 48*RSIZE + 2*8); \
188 rsave (35, 48*RSIZE + 3*8); \
189 rsave (36, 48*RSIZE + 4*8); \
190 rsave (37, 48*RSIZE + 5*8); \
191 rsave (38, 48*RSIZE + 6*8); \
192 rsave (39, 48*RSIZE + 7*8); \
193 rsave (40, 48*RSIZE + 8*8); \
194 rsave (41, 48*RSIZE + 9*8); \
195 rsave (42, 48*RSIZE + 10*8); \
196 rsave (43, 48*RSIZE + 11*8); \
197 rsave (44, 48*RSIZE + 12*8); \
198 rsave (45, 48*RSIZE + 13*8); \
199 rsave (46, 48*RSIZE + 14*8); \
200 rsave (47, 48*RSIZE + 15*8); \
201 rsave (48, 48*RSIZE + 16*8); \
202 rsave (49, 48*RSIZE + 17*8); \
203 rsave (50, 48*RSIZE + 18*8); \
204 rsave (51, 48*RSIZE + 19*8); \
205 rsave (52, 48*RSIZE + 20*8); \
206 rsave (53, 48*RSIZE + 21*8); \
207 rsave (54, 48*RSIZE + 22*8); \
208 rsave (55, 48*RSIZE + 23*8); \
209 rsave (56, 48*RSIZE + 24*8); \
210 rsave (57, 48*RSIZE + 25*8); \
211 rsave (58, 48*RSIZE + 26*8); \
212 rsave (59, 48*RSIZE + 27*8); \
213 rsave (60, 48*RSIZE + 28*8); \
214 rsave (61, 48*RSIZE + 29*8); \
215 rsave (62, 48*RSIZE + 30*8); \
216 rsave (63, 48*RSIZE + 31*8)
217
218/* Describe where the VMX regs are saved. */
219#ifdef CONFIG_ALTIVEC
220#define EH_FRAME_VMX \
221 vsave_msr0 ( 0); \
222 vsave_msr1 ( 1); \
223 vsave_msr1 ( 2); \
224 vsave_msr1 ( 3); \
225 vsave_msr1 ( 4); \
226 vsave_msr1 ( 5); \
227 vsave_msr1 ( 6); \
228 vsave_msr1 ( 7); \
229 vsave_msr1 ( 8); \
230 vsave_msr1 ( 9); \
231 vsave_msr1 (10); \
232 vsave_msr1 (11); \
233 vsave_msr1 (12); \
234 vsave_msr1 (13); \
235 vsave_msr1 (14); \
236 vsave_msr1 (15); \
237 vsave_msr1 (16); \
238 vsave_msr1 (17); \
239 vsave_msr1 (18); \
240 vsave_msr1 (19); \
241 vsave_msr1 (20); \
242 vsave_msr1 (21); \
243 vsave_msr1 (22); \
244 vsave_msr1 (23); \
245 vsave_msr1 (24); \
246 vsave_msr1 (25); \
247 vsave_msr1 (26); \
248 vsave_msr1 (27); \
249 vsave_msr1 (28); \
250 vsave_msr1 (29); \
251 vsave_msr1 (30); \
252 vsave_msr1 (31); \
253 vsave_msr2 (33, 32*16+12); \
254 vsave (32, 32*16)
255#else
256#define EH_FRAME_VMX
257#endif
258
259.Lcie:
260 .long .Lcie_end - .Lcie_start
261.Lcie_start:
262 .long 0 /* CIE ID */
263 .byte 1 /* Version number */
264 .string "zR" /* NUL-terminated augmentation string */
265 .uleb128 4 /* Code alignment factor */
266 .sleb128 -4 /* Data alignment factor */
267 .byte 67 /* Return address register column, ap */
268 .uleb128 1 /* Augmentation value length */
269 .byte 0x1b /* DW_EH_PE_pcrel | DW_EH_PE_sdata4. */
270 .byte 0x0c,1,0 /* DW_CFA_def_cfa: r1 ofs 0 */
271 .balign 4
272.Lcie_end:
273
274 .long .Lfde0_end - .Lfde0_start
275.Lfde0_start:
276 .long .Lfde0_start - .Lcie /* CIE pointer. */
277 .long .Lsig_start - . /* PC start, length */
278 .long .Lsig_end - .Lsig_start
279 .uleb128 0 /* Augmentation */
280 EH_FRAME_GEN
281 EH_FRAME_FP
282 EH_FRAME_VMX
283 .balign 4
284.Lfde0_end:
285
286/* We have a different stack layout for rt_sigreturn. */
287#undef PTREGS
288#define PTREGS 64+16+128+20+28
289
290 .long .Lfde1_end - .Lfde1_start
291.Lfde1_start:
292 .long .Lfde1_start - .Lcie /* CIE pointer. */
293 .long .Lsigrt_start - . /* PC start, length */
294 .long .Lsigrt_end - .Lsigrt_start
295 .uleb128 0 /* Augmentation */
296 EH_FRAME_GEN
297 EH_FRAME_FP
298 EH_FRAME_VMX
299 .balign 4
300.Lfde1_end:
diff --git a/arch/ppc64/kernel/vdso32/vdso32.lds.S b/arch/ppc64/kernel/vdso32/vdso32.lds.S
new file mode 100644
index 000000000000..cca27bd03a57
--- /dev/null
+++ b/arch/ppc64/kernel/vdso32/vdso32.lds.S
@@ -0,0 +1,111 @@
1
2/*
3 * This is the infamous ld script for the 32 bits vdso
4 * library
5 */
6#include <asm/vdso.h>
7
8/* Default link addresses for the vDSOs */
9OUTPUT_FORMAT("elf32-powerpc", "elf32-powerpc", "elf32-powerpc")
10OUTPUT_ARCH(powerpc:common)
11ENTRY(_start)
12
13SECTIONS
14{
15 . = VDSO32_LBASE + SIZEOF_HEADERS;
16 .hash : { *(.hash) } :text
17 .dynsym : { *(.dynsym) }
18 .dynstr : { *(.dynstr) }
19 .gnu.version : { *(.gnu.version) }
20 .gnu.version_d : { *(.gnu.version_d) }
21 .gnu.version_r : { *(.gnu.version_r) }
22
23 . = ALIGN (16);
24 .text :
25 {
26 *(.text .stub .text.* .gnu.linkonce.t.*)
27 }
28 PROVIDE (__etext = .);
29 PROVIDE (_etext = .);
30 PROVIDE (etext = .);
31
32 /* Other stuff is appended to the text segment: */
33 .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
34 .rodata1 : { *(.rodata1) }
35
36 .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr
37 .eh_frame : { KEEP (*(.eh_frame)) } :text
38 .gcc_except_table : { *(.gcc_except_table) }
39 .fixup : { *(.fixup) }
40
41 .got ALIGN(4) : { *(.got.plt) *(.got) }
42
43 .dynamic : { *(.dynamic) } :text :dynamic
44
45 _end = .;
46 __end = .;
47 PROVIDE (end = .);
48
49
50 /* Stabs debugging sections are here too
51 */
52 .stab 0 : { *(.stab) }
53 .stabstr 0 : { *(.stabstr) }
54 .stab.excl 0 : { *(.stab.excl) }
55 .stab.exclstr 0 : { *(.stab.exclstr) }
56 .stab.index 0 : { *(.stab.index) }
57 .stab.indexstr 0 : { *(.stab.indexstr) }
58 .comment 0 : { *(.comment) }
59 .debug 0 : { *(.debug) }
60 .line 0 : { *(.line) }
61
62 .debug_srcinfo 0 : { *(.debug_srcinfo) }
63 .debug_sfnames 0 : { *(.debug_sfnames) }
64
65 .debug_aranges 0 : { *(.debug_aranges) }
66 .debug_pubnames 0 : { *(.debug_pubnames) }
67
68 .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
69 .debug_abbrev 0 : { *(.debug_abbrev) }
70 .debug_line 0 : { *(.debug_line) }
71 .debug_frame 0 : { *(.debug_frame) }
72 .debug_str 0 : { *(.debug_str) }
73 .debug_loc 0 : { *(.debug_loc) }
74 .debug_macinfo 0 : { *(.debug_macinfo) }
75
76 .debug_weaknames 0 : { *(.debug_weaknames) }
77 .debug_funcnames 0 : { *(.debug_funcnames) }
78 .debug_typenames 0 : { *(.debug_typenames) }
79 .debug_varnames 0 : { *(.debug_varnames) }
80
81 /DISCARD/ : { *(.note.GNU-stack) }
82 /DISCARD/ : { *(.data .data.* .gnu.linkonce.d.* .sdata*) }
83 /DISCARD/ : { *(.bss .sbss .dynbss .dynsbss) }
84}
85
86
87PHDRS
88{
89 text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */
90 dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
91 eh_frame_hdr 0x6474e550; /* PT_GNU_EH_FRAME, but ld doesn't match the name */
92}
93
94
95/*
96 * This controls what symbols we export from the DSO.
97 */
98VERSION
99{
100 VDSO_VERSION_STRING {
101 global:
102 __kernel_datapage_offset; /* Has to be there for the kernel to find it */
103 __kernel_get_syscall_map;
104 __kernel_gettimeofday;
105 __kernel_sync_dicache;
106 __kernel_sync_dicache_p5;
107 __kernel_sigtramp32;
108 __kernel_sigtramp_rt32;
109 local: *;
110 };
111}
diff --git a/arch/ppc64/kernel/vdso32/vdso32_wrapper.S b/arch/ppc64/kernel/vdso32/vdso32_wrapper.S
new file mode 100644
index 000000000000..76ca28e09d29
--- /dev/null
+++ b/arch/ppc64/kernel/vdso32/vdso32_wrapper.S
@@ -0,0 +1,13 @@
1#include <linux/init.h>
2#include <asm/page.h>
3
4 .section ".data.page_aligned"
5
6 .globl vdso32_start, vdso32_end
7 .balign PAGE_SIZE
8vdso32_start:
9 .incbin "arch/ppc64/kernel/vdso32/vdso32.so"
10 .balign PAGE_SIZE
11vdso32_end:
12
13 .previous
diff --git a/arch/ppc64/kernel/vdso64/Makefile b/arch/ppc64/kernel/vdso64/Makefile
new file mode 100644
index 000000000000..bd3f70b1a384
--- /dev/null
+++ b/arch/ppc64/kernel/vdso64/Makefile
@@ -0,0 +1,35 @@
1# List of files in the vdso, has to be asm only for now
2
3obj-vdso64 = sigtramp.o gettimeofday.o datapage.o cacheflush.o
4
5# Build rules
6
7targets := $(obj-vdso64) vdso64.so
8obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64))
9
10EXTRA_CFLAGS := -shared -s -fno-common -fno-builtin
11EXTRA_CFLAGS += -nostdlib -Wl,-soname=linux-vdso64.so.1
12EXTRA_AFLAGS := -D__VDSO64__ -s
13
14obj-y += vdso64_wrapper.o
15extra-y += vdso64.lds
16CPPFLAGS_vdso64.lds += -P -C -U$(ARCH)
17
18# Force dependency (incbin is bad)
19$(obj)/vdso64_wrapper.o : $(obj)/vdso64.so
20
21# link rule for the .so file, .lds has to be first
22$(obj)/vdso64.so: $(src)/vdso64.lds $(obj-vdso64)
23 $(call if_changed,vdso64ld)
24
25# assembly rules for the .S files
26$(obj-vdso64): %.o: %.S
27 $(call if_changed_dep,vdso64as)
28
29# actual build commands
30quiet_cmd_vdso64ld = VDSO64L $@
31 cmd_vdso64ld = $(CC) $(c_flags) -Wl,-T $^ -o $@
32quiet_cmd_vdso64as = VDSO64A $@
33 cmd_vdso64as = $(CC) $(a_flags) -c -o $@ $<
34
35
diff --git a/arch/ppc64/kernel/vdso64/cacheflush.S b/arch/ppc64/kernel/vdso64/cacheflush.S
new file mode 100644
index 000000000000..d9696ffcf334
--- /dev/null
+++ b/arch/ppc64/kernel/vdso64/cacheflush.S
@@ -0,0 +1,64 @@
1/*
2 * vDSO provided cache flush routines
3 *
4 * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org),
5 * IBM Corp.
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 */
12#include <linux/config.h>
13#include <asm/processor.h>
14#include <asm/ppc_asm.h>
15#include <asm/vdso.h>
16#include <asm/offsets.h>
17
18 .text
19
20/*
21 * Default "generic" version of __kernel_sync_dicache.
22 *
23 * void __kernel_sync_dicache(unsigned long start, unsigned long end)
24 *
25 * Flushes the data cache & invalidate the instruction cache for the
26 * provided range [start, end[
27 *
28 * Note: all CPUs supported by this kernel have a 128 bytes cache
29 * line size so we don't have to peek that info from the datapage
30 */
31V_FUNCTION_BEGIN(__kernel_sync_dicache)
32 .cfi_startproc
33 li r5,127
34 andc r6,r3,r5 /* round low to line bdy */
35 subf r8,r6,r4 /* compute length */
36 add r8,r8,r5 /* ensure we get enough */
37 srwi. r8,r8,7 /* compute line count */
38 beqlr /* nothing to do? */
39 mtctr r8
40 mr r3,r6
411: dcbst 0,r3
42 addi r3,r3,128
43 bdnz 1b
44 sync
45 mtctr r8
461: icbi 0,r6
47 addi r6,r6,128
48 bdnz 1b
49 isync
50 blr
51 .cfi_endproc
52V_FUNCTION_END(__kernel_sync_dicache)
53
54
55/*
56 * POWER5 version of __kernel_sync_dicache
57 */
58V_FUNCTION_BEGIN(__kernel_sync_dicache_p5)
59 .cfi_startproc
60 sync
61 isync
62 blr
63 .cfi_endproc
64V_FUNCTION_END(__kernel_sync_dicache_p5)
diff --git a/arch/ppc64/kernel/vdso64/datapage.S b/arch/ppc64/kernel/vdso64/datapage.S
new file mode 100644
index 000000000000..18afd971c9d9
--- /dev/null
+++ b/arch/ppc64/kernel/vdso64/datapage.S
@@ -0,0 +1,68 @@
1/*
2 * Access to the shared data page by the vDSO & syscall map
3 *
4 * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), IBM Corp.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#include <linux/config.h>
13#include <asm/processor.h>
14#include <asm/ppc_asm.h>
15#include <asm/offsets.h>
16#include <asm/unistd.h>
17#include <asm/vdso.h>
18
19 .text
20V_FUNCTION_BEGIN(__get_datapage)
21 .cfi_startproc
22 /* We don't want that exposed or overridable as we want other objects
23 * to be able to bl directly to here
24 */
25 .protected __get_datapage
26 .hidden __get_datapage
27
28 mflr r0
29 .cfi_register lr,r0
30
31 bcl 20,31,1f
32 .global __kernel_datapage_offset;
33__kernel_datapage_offset:
34 .long 0
351:
36 mflr r3
37 mtlr r0
38 lwz r0,0(r3)
39 add r3,r0,r3
40 blr
41 .cfi_endproc
42V_FUNCTION_END(__get_datapage)
43
44/*
45 * void *__kernel_get_syscall_map(unsigned int *syscall_count) ;
46 *
47 * returns a pointer to the syscall map. the map is agnostic to the
48 * size of "long", unlike kernel bitops, it stores bits from top to
49 * bottom so that memory actually contains a linear bitmap
50 * check for syscall N by testing bit (0x80000000 >> (N & 0x1f)) of
51 * 32 bits int at N >> 5.
52 */
53V_FUNCTION_BEGIN(__kernel_get_syscall_map)
54 .cfi_startproc
55 mflr r12
56 .cfi_register lr,r12
57
58 mr r4,r3
59 bl V_LOCAL_FUNC(__get_datapage)
60 mtlr r12
61 addi r3,r3,CFG_SYSCALL_MAP64
62 cmpli cr0,r4,0
63 beqlr
64 li r0,__NR_syscalls
65 stw r0,0(r4)
66 blr
67 .cfi_endproc
68V_FUNCTION_END(__kernel_get_syscall_map)
diff --git a/arch/ppc64/kernel/vdso64/gettimeofday.S b/arch/ppc64/kernel/vdso64/gettimeofday.S
new file mode 100644
index 000000000000..ed3f970ff05e
--- /dev/null
+++ b/arch/ppc64/kernel/vdso64/gettimeofday.S
@@ -0,0 +1,91 @@
1/*
2 * Userland implementation of gettimeofday() for 64 bits processes in a
3 * ppc64 kernel for use in the vDSO
4 *
5 * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org),
6 * IBM Corp.
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13#include <linux/config.h>
14#include <asm/processor.h>
15#include <asm/ppc_asm.h>
16#include <asm/vdso.h>
17#include <asm/offsets.h>
18
19 .text
20/*
21 * Exact prototype of gettimeofday
22 *
23 * int __kernel_gettimeofday(struct timeval *tv, struct timezone *tz);
24 *
25 */
26V_FUNCTION_BEGIN(__kernel_gettimeofday)
27 .cfi_startproc
28 mflr r12
29 .cfi_register lr,r12
30
31 mr r11,r3 /* r11 holds tv */
32 mr r10,r4 /* r10 holds tz */
33 bl V_LOCAL_FUNC(__get_datapage) /* get data page */
34 bl V_LOCAL_FUNC(__do_get_xsec) /* get xsec from tb & kernel */
35 lis r7,15 /* r7 = 1000000 = USEC_PER_SEC */
36 ori r7,r7,16960
37 rldicl r5,r4,44,20 /* r5 = sec = xsec / XSEC_PER_SEC */
38 rldicr r6,r5,20,43 /* r6 = sec * XSEC_PER_SEC */
39 std r5,TVAL64_TV_SEC(r11) /* store sec in tv */
40 subf r0,r6,r4 /* r0 = xsec = (xsec - r6) */
41 mulld r0,r0,r7 /* usec = (xsec * USEC_PER_SEC) / XSEC_PER_SEC */
42 rldicl r0,r0,44,20
43 cmpldi cr0,r10,0 /* check if tz is NULL */
44 std r0,TVAL64_TV_USEC(r11) /* store usec in tv */
45 beq 1f
46 lwz r4,CFG_TZ_MINUTEWEST(r3)/* fill tz */
47 lwz r5,CFG_TZ_DSTTIME(r3)
48 stw r4,TZONE_TZ_MINWEST(r10)
49 stw r5,TZONE_TZ_DSTTIME(r10)
501: mtlr r12
51 li r3,0 /* always success */
52 blr
53 .cfi_endproc
54V_FUNCTION_END(__kernel_gettimeofday)
55
56
57/*
58 * This is the core of gettimeofday(), it returns the xsec
59 * value in r4 and expects the datapage ptr (non clobbered)
60 * in r3. clobbers r0,r4,r5,r6,r7,r8
61*/
62V_FUNCTION_BEGIN(__do_get_xsec)
63 .cfi_startproc
64 /* check for update count & load values */
651: ld r7,CFG_TB_UPDATE_COUNT(r3)
66 andi. r0,r4,1 /* pending update ? loop */
67 bne- 1b
68 xor r0,r4,r4 /* create dependency */
69 add r3,r3,r0
70
71 /* Get TB & offset it */
72 mftb r8
73 ld r9,CFG_TB_ORIG_STAMP(r3)
74 subf r8,r9,r8
75
76 /* Scale result */
77 ld r5,CFG_TB_TO_XS(r3)
78 mulhdu r8,r8,r5
79
80 /* Add stamp since epoch */
81 ld r6,CFG_STAMP_XSEC(r3)
82 add r4,r6,r8
83
84 xor r0,r4,r4
85 add r3,r3,r0
86 ld r0,CFG_TB_UPDATE_COUNT(r3)
87 cmpld cr0,r0,r7 /* check if updated */
88 bne- 1b
89 blr
90 .cfi_endproc
91V_FUNCTION_END(__do_get_xsec)
diff --git a/arch/ppc64/kernel/vdso64/sigtramp.S b/arch/ppc64/kernel/vdso64/sigtramp.S
new file mode 100644
index 000000000000..8ae8f205e470
--- /dev/null
+++ b/arch/ppc64/kernel/vdso64/sigtramp.S
@@ -0,0 +1,294 @@
1/*
2 * Signal trampoline for 64 bits processes in a ppc64 kernel for
3 * use in the vDSO
4 *
5 * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), IBM Corp.
6 * Copyright (C) 2004 Alan Modra (amodra@au.ibm.com)), IBM Corp.
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13#include <linux/config.h>
14#include <asm/processor.h>
15#include <asm/ppc_asm.h>
16#include <asm/unistd.h>
17#include <asm/vdso.h>
18
19 .text
20
21/* The nop here is a hack. The dwarf2 unwind routines subtract 1 from
22 the return address to get an address in the middle of the presumed
23 call instruction. Since we don't have a call here, we artifically
24 extend the range covered by the unwind info by padding before the
25 real start. */
26 nop
27 .balign 8
28V_FUNCTION_BEGIN(__kernel_sigtramp_rt64)
29.Lsigrt_start = . - 4
30 addi r1, r1, __SIGNAL_FRAMESIZE
31 li r0,__NR_rt_sigreturn
32 sc
33.Lsigrt_end:
34V_FUNCTION_END(__kernel_sigtramp_rt64)
35/* The ".balign 8" above and the following zeros mimic the old stack
36 trampoline layout. The last magic value is the ucontext pointer,
37 chosen in such a way that older libgcc unwind code returns a zero
38 for a sigcontext pointer. */
39 .long 0,0,0
40 .quad 0,-21*8
41
42/* Register r1 can be found at offset 8 of a pt_regs structure.
43 A pointer to the pt_regs is stored in memory at the old sp plus PTREGS. */
44#define cfa_save \
45 .byte 0x0f; /* DW_CFA_def_cfa_expression */ \
46 .uleb128 9f - 1f; /* length */ \
471: \
48 .byte 0x71; .sleb128 PTREGS; /* DW_OP_breg1 */ \
49 .byte 0x06; /* DW_OP_deref */ \
50 .byte 0x23; .uleb128 RSIZE; /* DW_OP_plus_uconst */ \
51 .byte 0x06; /* DW_OP_deref */ \
529:
53
54/* Register REGNO can be found at offset OFS of a pt_regs structure.
55 A pointer to the pt_regs is stored in memory at the old sp plus PTREGS. */
56#define rsave(regno, ofs) \
57 .byte 0x10; /* DW_CFA_expression */ \
58 .uleb128 regno; /* regno */ \
59 .uleb128 9f - 1f; /* length */ \
601: \
61 .byte 0x71; .sleb128 PTREGS; /* DW_OP_breg1 */ \
62 .byte 0x06; /* DW_OP_deref */ \
63 .ifne ofs; \
64 .byte 0x23; .uleb128 ofs; /* DW_OP_plus_uconst */ \
65 .endif; \
669:
67
68/* If msr bit 1<<25 is set, then VMX register REGNO is at offset REGNO*16
69 of the VMX reg struct. A pointer to the VMX reg struct is at VREGS in
70 the pt_regs struct. This macro is for REGNO == 0, and contains
71 'subroutines' that the other macros jump to. */
72#define vsave_msr0(regno) \
73 .byte 0x10; /* DW_CFA_expression */ \
74 .uleb128 regno + 77; /* regno */ \
75 .uleb128 9f - 1f; /* length */ \
761: \
77 .byte 0x30 + regno; /* DW_OP_lit0 */ \
782: \
79 .byte 0x40; /* DW_OP_lit16 */ \
80 .byte 0x1e; /* DW_OP_mul */ \
813: \
82 .byte 0x71; .sleb128 PTREGS; /* DW_OP_breg1 */ \
83 .byte 0x06; /* DW_OP_deref */ \
84 .byte 0x12; /* DW_OP_dup */ \
85 .byte 0x23; /* DW_OP_plus_uconst */ \
86 .uleb128 33*RSIZE; /* msr offset */ \
87 .byte 0x06; /* DW_OP_deref */ \
88 .byte 0x0c; .long 1 << 25; /* DW_OP_const4u */ \
89 .byte 0x1a; /* DW_OP_and */ \
90 .byte 0x12; /* DW_OP_dup, ret 0 if bra taken */ \
91 .byte 0x30; /* DW_OP_lit0 */ \
92 .byte 0x29; /* DW_OP_eq */ \
93 .byte 0x28; .short 0x7fff; /* DW_OP_bra to end */ \
94 .byte 0x13; /* DW_OP_drop, pop the 0 */ \
95 .byte 0x23; .uleb128 VREGS; /* DW_OP_plus_uconst */ \
96 .byte 0x06; /* DW_OP_deref */ \
97 .byte 0x22; /* DW_OP_plus */ \
98 .byte 0x2f; .short 0x7fff; /* DW_OP_skip to end */ \
999:
100
101/* If msr bit 1<<25 is set, then VMX register REGNO is at offset REGNO*16
102 of the VMX reg struct. REGNO is 1 thru 31. */
103#define vsave_msr1(regno) \
104 .byte 0x10; /* DW_CFA_expression */ \
105 .uleb128 regno + 77; /* regno */ \
106 .uleb128 9f - 1f; /* length */ \
1071: \
108 .byte 0x30 + regno; /* DW_OP_lit n */ \
109 .byte 0x2f; .short 2b - 9f; /* DW_OP_skip */ \
1109:
111
112/* If msr bit 1<<25 is set, then VMX register REGNO is at offset OFS of
113 the VMX save block. */
114#define vsave_msr2(regno, ofs) \
115 .byte 0x10; /* DW_CFA_expression */ \
116 .uleb128 regno + 77; /* regno */ \
117 .uleb128 9f - 1f; /* length */ \
1181: \
119 .byte 0x0a; .short ofs; /* DW_OP_const2u */ \
120 .byte 0x2f; .short 3b - 9f; /* DW_OP_skip */ \
1219:
122
123/* VMX register REGNO is at offset OFS of the VMX save area. */
124#define vsave(regno, ofs) \
125 .byte 0x10; /* DW_CFA_expression */ \
126 .uleb128 regno + 77; /* regno */ \
127 .uleb128 9f - 1f; /* length */ \
1281: \
129 .byte 0x71; .sleb128 PTREGS; /* DW_OP_breg1 */ \
130 .byte 0x06; /* DW_OP_deref */ \
131 .byte 0x23; .uleb128 VREGS; /* DW_OP_plus_uconst */ \
132 .byte 0x06; /* DW_OP_deref */ \
133 .byte 0x23; .uleb128 ofs; /* DW_OP_plus_uconst */ \
1349:
135
136/* This is where the pt_regs pointer can be found on the stack. */
137#define PTREGS 128+168+56
138
139/* Size of regs. */
140#define RSIZE 8
141
142/* This is the offset of the VMX reg pointer. */
143#define VREGS 48*RSIZE+33*8
144
145/* Describe where general purpose regs are saved. */
146#define EH_FRAME_GEN \
147 cfa_save; \
148 rsave ( 0, 0*RSIZE); \
149 rsave ( 2, 2*RSIZE); \
150 rsave ( 3, 3*RSIZE); \
151 rsave ( 4, 4*RSIZE); \
152 rsave ( 5, 5*RSIZE); \
153 rsave ( 6, 6*RSIZE); \
154 rsave ( 7, 7*RSIZE); \
155 rsave ( 8, 8*RSIZE); \
156 rsave ( 9, 9*RSIZE); \
157 rsave (10, 10*RSIZE); \
158 rsave (11, 11*RSIZE); \
159 rsave (12, 12*RSIZE); \
160 rsave (13, 13*RSIZE); \
161 rsave (14, 14*RSIZE); \
162 rsave (15, 15*RSIZE); \
163 rsave (16, 16*RSIZE); \
164 rsave (17, 17*RSIZE); \
165 rsave (18, 18*RSIZE); \
166 rsave (19, 19*RSIZE); \
167 rsave (20, 20*RSIZE); \
168 rsave (21, 21*RSIZE); \
169 rsave (22, 22*RSIZE); \
170 rsave (23, 23*RSIZE); \
171 rsave (24, 24*RSIZE); \
172 rsave (25, 25*RSIZE); \
173 rsave (26, 26*RSIZE); \
174 rsave (27, 27*RSIZE); \
175 rsave (28, 28*RSIZE); \
176 rsave (29, 29*RSIZE); \
177 rsave (30, 30*RSIZE); \
178 rsave (31, 31*RSIZE); \
179 rsave (67, 32*RSIZE); /* ap, used as temp for nip */ \
180 rsave (65, 36*RSIZE); /* lr */ \
181 rsave (70, 38*RSIZE) /* cr */
182
183/* Describe where the FP regs are saved. */
184#define EH_FRAME_FP \
185 rsave (32, 48*RSIZE + 0*8); \
186 rsave (33, 48*RSIZE + 1*8); \
187 rsave (34, 48*RSIZE + 2*8); \
188 rsave (35, 48*RSIZE + 3*8); \
189 rsave (36, 48*RSIZE + 4*8); \
190 rsave (37, 48*RSIZE + 5*8); \
191 rsave (38, 48*RSIZE + 6*8); \
192 rsave (39, 48*RSIZE + 7*8); \
193 rsave (40, 48*RSIZE + 8*8); \
194 rsave (41, 48*RSIZE + 9*8); \
195 rsave (42, 48*RSIZE + 10*8); \
196 rsave (43, 48*RSIZE + 11*8); \
197 rsave (44, 48*RSIZE + 12*8); \
198 rsave (45, 48*RSIZE + 13*8); \
199 rsave (46, 48*RSIZE + 14*8); \
200 rsave (47, 48*RSIZE + 15*8); \
201 rsave (48, 48*RSIZE + 16*8); \
202 rsave (49, 48*RSIZE + 17*8); \
203 rsave (50, 48*RSIZE + 18*8); \
204 rsave (51, 48*RSIZE + 19*8); \
205 rsave (52, 48*RSIZE + 20*8); \
206 rsave (53, 48*RSIZE + 21*8); \
207 rsave (54, 48*RSIZE + 22*8); \
208 rsave (55, 48*RSIZE + 23*8); \
209 rsave (56, 48*RSIZE + 24*8); \
210 rsave (57, 48*RSIZE + 25*8); \
211 rsave (58, 48*RSIZE + 26*8); \
212 rsave (59, 48*RSIZE + 27*8); \
213 rsave (60, 48*RSIZE + 28*8); \
214 rsave (61, 48*RSIZE + 29*8); \
215 rsave (62, 48*RSIZE + 30*8); \
216 rsave (63, 48*RSIZE + 31*8)
217
218/* Describe where the VMX regs are saved. */
219#ifdef CONFIG_ALTIVEC
220#define EH_FRAME_VMX \
221 vsave_msr0 ( 0); \
222 vsave_msr1 ( 1); \
223 vsave_msr1 ( 2); \
224 vsave_msr1 ( 3); \
225 vsave_msr1 ( 4); \
226 vsave_msr1 ( 5); \
227 vsave_msr1 ( 6); \
228 vsave_msr1 ( 7); \
229 vsave_msr1 ( 8); \
230 vsave_msr1 ( 9); \
231 vsave_msr1 (10); \
232 vsave_msr1 (11); \
233 vsave_msr1 (12); \
234 vsave_msr1 (13); \
235 vsave_msr1 (14); \
236 vsave_msr1 (15); \
237 vsave_msr1 (16); \
238 vsave_msr1 (17); \
239 vsave_msr1 (18); \
240 vsave_msr1 (19); \
241 vsave_msr1 (20); \
242 vsave_msr1 (21); \
243 vsave_msr1 (22); \
244 vsave_msr1 (23); \
245 vsave_msr1 (24); \
246 vsave_msr1 (25); \
247 vsave_msr1 (26); \
248 vsave_msr1 (27); \
249 vsave_msr1 (28); \
250 vsave_msr1 (29); \
251 vsave_msr1 (30); \
252 vsave_msr1 (31); \
253 vsave_msr2 (33, 32*16+12); \
254 vsave (32, 33*16)
255#else
256#define EH_FRAME_VMX
257#endif
258
259 .section .eh_frame,"a",@progbits
260.Lcie:
261 .long .Lcie_end - .Lcie_start
262.Lcie_start:
263 .long 0 /* CIE ID */
264 .byte 1 /* Version number */
265 .string "zR" /* NUL-terminated augmentation string */
266 .uleb128 4 /* Code alignment factor */
267 .sleb128 -8 /* Data alignment factor */
268 .byte 67 /* Return address register column, ap */
269 .uleb128 1 /* Augmentation value length */
270 .byte 0x14 /* DW_EH_PE_pcrel | DW_EH_PE_udata8. */
271 .byte 0x0c,1,0 /* DW_CFA_def_cfa: r1 ofs 0 */
272 .balign 8
273.Lcie_end:
274
275 .long .Lfde0_end - .Lfde0_start
276.Lfde0_start:
277 .long .Lfde0_start - .Lcie /* CIE pointer. */
278 .quad .Lsigrt_start - . /* PC start, length */
279 .quad .Lsigrt_end - .Lsigrt_start
280 .uleb128 0 /* Augmentation */
281 EH_FRAME_GEN
282 EH_FRAME_FP
283 EH_FRAME_VMX
284# Do we really need to describe the frame at this point? ie. will
285# we ever have some call chain that returns somewhere past the addi?
286# I don't think so, since gcc doesn't support async signals.
287# .byte 0x41 /* DW_CFA_advance_loc 1*4 */
288#undef PTREGS
289#define PTREGS 168+56
290# EH_FRAME_GEN
291# EH_FRAME_FP
292# EH_FRAME_VMX
293 .balign 8
294.Lfde0_end:
diff --git a/arch/ppc64/kernel/vdso64/vdso64.lds.S b/arch/ppc64/kernel/vdso64/vdso64.lds.S
new file mode 100644
index 000000000000..942c815c7bc7
--- /dev/null
+++ b/arch/ppc64/kernel/vdso64/vdso64.lds.S
@@ -0,0 +1,110 @@
1/*
2 * This is the infamous ld script for the 64 bits vdso
3 * library
4 */
5#include <asm/vdso.h>
6
7OUTPUT_FORMAT("elf64-powerpc", "elf64-powerpc", "elf64-powerpc")
8OUTPUT_ARCH(powerpc:common64)
9ENTRY(_start)
10
11SECTIONS
12{
13 . = VDSO64_LBASE + SIZEOF_HEADERS;
14 .hash : { *(.hash) } :text
15 .dynsym : { *(.dynsym) }
16 .dynstr : { *(.dynstr) }
17 .gnu.version : { *(.gnu.version) }
18 .gnu.version_d : { *(.gnu.version_d) }
19 .gnu.version_r : { *(.gnu.version_r) }
20
21 . = ALIGN (16);
22 .text :
23 {
24 *(.text .stub .text.* .gnu.linkonce.t.*)
25 *(.sfpr .glink)
26 }
27 PROVIDE (__etext = .);
28 PROVIDE (_etext = .);
29 PROVIDE (etext = .);
30
31 /* Other stuff is appended to the text segment: */
32 .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
33 .rodata1 : { *(.rodata1) }
34 .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr
35 .eh_frame : { KEEP (*(.eh_frame)) } :text
36 .gcc_except_table : { *(.gcc_except_table) }
37
38 .opd ALIGN(8) : { KEEP (*(.opd)) }
39 .got ALIGN(8) : { *(.got .toc) }
40 .rela.dyn ALIGN(8) : { *(.rela.dyn) }
41
42 .dynamic : { *(.dynamic) } :text :dynamic
43
44 _end = .;
45 PROVIDE (end = .);
46
47 /* Stabs debugging sections are here too
48 */
49 .stab 0 : { *(.stab) }
50 .stabstr 0 : { *(.stabstr) }
51 .stab.excl 0 : { *(.stab.excl) }
52 .stab.exclstr 0 : { *(.stab.exclstr) }
53 .stab.index 0 : { *(.stab.index) }
54 .stab.indexstr 0 : { *(.stab.indexstr) }
55 .comment 0 : { *(.comment) }
56 /* DWARF debug sectio/ns.
57 Symbols in the DWARF debugging sections are relative to the beginning
58 of the section so we begin them at 0. */
59 /* DWARF 1 */
60 .debug 0 : { *(.debug) }
61 .line 0 : { *(.line) }
62 /* GNU DWARF 1 extensions */
63 .debug_srcinfo 0 : { *(.debug_srcinfo) }
64 .debug_sfnames 0 : { *(.debug_sfnames) }
65 /* DWARF 1.1 and DWARF 2 */
66 .debug_aranges 0 : { *(.debug_aranges) }
67 .debug_pubnames 0 : { *(.debug_pubnames) }
68 /* DWARF 2 */
69 .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
70 .debug_abbrev 0 : { *(.debug_abbrev) }
71 .debug_line 0 : { *(.debug_line) }
72 .debug_frame 0 : { *(.debug_frame) }
73 .debug_str 0 : { *(.debug_str) }
74 .debug_loc 0 : { *(.debug_loc) }
75 .debug_macinfo 0 : { *(.debug_macinfo) }
76 /* SGI/MIPS DWARF 2 extensions */
77 .debug_weaknames 0 : { *(.debug_weaknames) }
78 .debug_funcnames 0 : { *(.debug_funcnames) }
79 .debug_typenames 0 : { *(.debug_typenames) }
80 .debug_varnames 0 : { *(.debug_varnames) }
81
82 /DISCARD/ : { *(.note.GNU-stack) }
83 /DISCARD/ : { *(.branch_lt) }
84 /DISCARD/ : { *(.data .data.* .gnu.linkonce.d.*) }
85 /DISCARD/ : { *(.bss .sbss .dynbss .dynsbss) }
86}
87
88PHDRS
89{
90 text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */
91 dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
92 eh_frame_hdr 0x6474e550; /* PT_GNU_EH_FRAME, but ld doesn't match the name */
93}
94
95/*
96 * This controls what symbols we export from the DSO.
97 */
98VERSION
99{
100 VDSO_VERSION_STRING {
101 global:
102 __kernel_datapage_offset; /* Has to be there for the kernel to find it */
103 __kernel_get_syscall_map;
104 __kernel_gettimeofday;
105 __kernel_sync_dicache;
106 __kernel_sync_dicache_p5;
107 __kernel_sigtramp_rt64;
108 local: *;
109 };
110}
diff --git a/arch/ppc64/kernel/vdso64/vdso64_wrapper.S b/arch/ppc64/kernel/vdso64/vdso64_wrapper.S
new file mode 100644
index 000000000000..771c2741c492
--- /dev/null
+++ b/arch/ppc64/kernel/vdso64/vdso64_wrapper.S
@@ -0,0 +1,13 @@
1#include <linux/init.h>
2#include <asm/page.h>
3
4 .section ".data.page_aligned"
5
6 .globl vdso64_start, vdso64_end
7 .balign PAGE_SIZE
8vdso64_start:
9 .incbin "arch/ppc64/kernel/vdso64/vdso64.so"
10 .balign PAGE_SIZE
11vdso64_end:
12
13 .previous
diff --git a/arch/ppc64/kernel/vecemu.c b/arch/ppc64/kernel/vecemu.c
new file mode 100644
index 000000000000..cb207629f21f
--- /dev/null
+++ b/arch/ppc64/kernel/vecemu.c
@@ -0,0 +1,346 @@
1/*
2 * Routines to emulate some Altivec/VMX instructions, specifically
3 * those that can trap when given denormalized operands in Java mode.
4 */
5#include <linux/kernel.h>
6#include <linux/errno.h>
7#include <linux/sched.h>
8#include <asm/ptrace.h>
9#include <asm/processor.h>
10#include <asm/uaccess.h>
11
12/* Functions in vector.S */
13extern void vaddfp(vector128 *dst, vector128 *a, vector128 *b);
14extern void vsubfp(vector128 *dst, vector128 *a, vector128 *b);
15extern void vmaddfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c);
16extern void vnmsubfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c);
17extern void vrefp(vector128 *dst, vector128 *src);
18extern void vrsqrtefp(vector128 *dst, vector128 *src);
19extern void vexptep(vector128 *dst, vector128 *src);
20
21static unsigned int exp2s[8] = {
22 0x800000,
23 0x8b95c2,
24 0x9837f0,
25 0xa5fed7,
26 0xb504f3,
27 0xc5672a,
28 0xd744fd,
29 0xeac0c7
30};
31
32/*
33 * Computes an estimate of 2^x. The `s' argument is the 32-bit
34 * single-precision floating-point representation of x.
35 */
36static unsigned int eexp2(unsigned int s)
37{
38 int exp, pwr;
39 unsigned int mant, frac;
40
41 /* extract exponent field from input */
42 exp = ((s >> 23) & 0xff) - 127;
43 if (exp > 7) {
44 /* check for NaN input */
45 if (exp == 128 && (s & 0x7fffff) != 0)
46 return s | 0x400000; /* return QNaN */
47 /* 2^-big = 0, 2^+big = +Inf */
48 return (s & 0x80000000)? 0: 0x7f800000; /* 0 or +Inf */
49 }
50 if (exp < -23)
51 return 0x3f800000; /* 1.0 */
52
53 /* convert to fixed point integer in 9.23 representation */
54 pwr = (s & 0x7fffff) | 0x800000;
55 if (exp > 0)
56 pwr <<= exp;
57 else
58 pwr >>= -exp;
59 if (s & 0x80000000)
60 pwr = -pwr;
61
62 /* extract integer part, which becomes exponent part of result */
63 exp = (pwr >> 23) + 126;
64 if (exp >= 254)
65 return 0x7f800000;
66 if (exp < -23)
67 return 0;
68
69 /* table lookup on top 3 bits of fraction to get mantissa */
70 mant = exp2s[(pwr >> 20) & 7];
71
72 /* linear interpolation using remaining 20 bits of fraction */
73 asm("mulhwu %0,%1,%2" : "=r" (frac)
74 : "r" (pwr << 12), "r" (0x172b83ff));
75 asm("mulhwu %0,%1,%2" : "=r" (frac) : "r" (frac), "r" (mant));
76 mant += frac;
77
78 if (exp >= 0)
79 return mant + (exp << 23);
80
81 /* denormalized result */
82 exp = -exp;
83 mant += 1 << (exp - 1);
84 return mant >> exp;
85}
86
87/*
88 * Computes an estimate of log_2(x). The `s' argument is the 32-bit
89 * single-precision floating-point representation of x.
90 */
91static unsigned int elog2(unsigned int s)
92{
93 int exp, mant, lz, frac;
94
95 exp = s & 0x7f800000;
96 mant = s & 0x7fffff;
97 if (exp == 0x7f800000) { /* Inf or NaN */
98 if (mant != 0)
99 s |= 0x400000; /* turn NaN into QNaN */
100 return s;
101 }
102 if ((exp | mant) == 0) /* +0 or -0 */
103 return 0xff800000; /* return -Inf */
104
105 if (exp == 0) {
106 /* denormalized */
107 asm("cntlzw %0,%1" : "=r" (lz) : "r" (mant));
108 mant <<= lz - 8;
109 exp = (-118 - lz) << 23;
110 } else {
111 mant |= 0x800000;
112 exp -= 127 << 23;
113 }
114
115 if (mant >= 0xb504f3) { /* 2^0.5 * 2^23 */
116 exp |= 0x400000; /* 0.5 * 2^23 */
117 asm("mulhwu %0,%1,%2" : "=r" (mant)
118 : "r" (mant), "r" (0xb504f334)); /* 2^-0.5 * 2^32 */
119 }
120 if (mant >= 0x9837f0) { /* 2^0.25 * 2^23 */
121 exp |= 0x200000; /* 0.25 * 2^23 */
122 asm("mulhwu %0,%1,%2" : "=r" (mant)
123 : "r" (mant), "r" (0xd744fccb)); /* 2^-0.25 * 2^32 */
124 }
125 if (mant >= 0x8b95c2) { /* 2^0.125 * 2^23 */
126 exp |= 0x100000; /* 0.125 * 2^23 */
127 asm("mulhwu %0,%1,%2" : "=r" (mant)
128 : "r" (mant), "r" (0xeac0c6e8)); /* 2^-0.125 * 2^32 */
129 }
130 if (mant > 0x800000) { /* 1.0 * 2^23 */
131 /* calculate (mant - 1) * 1.381097463 */
132 /* 1.381097463 == 0.125 / (2^0.125 - 1) */
133 asm("mulhwu %0,%1,%2" : "=r" (frac)
134 : "r" ((mant - 0x800000) << 1), "r" (0xb0c7cd3a));
135 exp += frac;
136 }
137 s = exp & 0x80000000;
138 if (exp != 0) {
139 if (s)
140 exp = -exp;
141 asm("cntlzw %0,%1" : "=r" (lz) : "r" (exp));
142 lz = 8 - lz;
143 if (lz > 0)
144 exp >>= lz;
145 else if (lz < 0)
146 exp <<= -lz;
147 s += ((lz + 126) << 23) + exp;
148 }
149 return s;
150}
151
152#define VSCR_SAT 1
153
154static int ctsxs(unsigned int x, int scale, unsigned int *vscrp)
155{
156 int exp, mant;
157
158 exp = (x >> 23) & 0xff;
159 mant = x & 0x7fffff;
160 if (exp == 255 && mant != 0)
161 return 0; /* NaN -> 0 */
162 exp = exp - 127 + scale;
163 if (exp < 0)
164 return 0; /* round towards zero */
165 if (exp >= 31) {
166 /* saturate, unless the result would be -2^31 */
167 if (x + (scale << 23) != 0xcf000000)
168 *vscrp |= VSCR_SAT;
169 return (x & 0x80000000)? 0x80000000: 0x7fffffff;
170 }
171 mant |= 0x800000;
172 mant = (mant << 7) >> (30 - exp);
173 return (x & 0x80000000)? -mant: mant;
174}
175
176static unsigned int ctuxs(unsigned int x, int scale, unsigned int *vscrp)
177{
178 int exp;
179 unsigned int mant;
180
181 exp = (x >> 23) & 0xff;
182 mant = x & 0x7fffff;
183 if (exp == 255 && mant != 0)
184 return 0; /* NaN -> 0 */
185 exp = exp - 127 + scale;
186 if (exp < 0)
187 return 0; /* round towards zero */
188 if (x & 0x80000000) {
189 /* negative => saturate to 0 */
190 *vscrp |= VSCR_SAT;
191 return 0;
192 }
193 if (exp >= 32) {
194 /* saturate */
195 *vscrp |= VSCR_SAT;
196 return 0xffffffff;
197 }
198 mant |= 0x800000;
199 mant = (mant << 8) >> (31 - exp);
200 return mant;
201}
202
203/* Round to floating integer, towards 0 */
204static unsigned int rfiz(unsigned int x)
205{
206 int exp;
207
208 exp = ((x >> 23) & 0xff) - 127;
209 if (exp == 128 && (x & 0x7fffff) != 0)
210 return x | 0x400000; /* NaN -> make it a QNaN */
211 if (exp >= 23)
212 return x; /* it's an integer already (or Inf) */
213 if (exp < 0)
214 return x & 0x80000000; /* |x| < 1.0 rounds to 0 */
215 return x & ~(0x7fffff >> exp);
216}
217
218/* Round to floating integer, towards +/- Inf */
219static unsigned int rfii(unsigned int x)
220{
221 int exp, mask;
222
223 exp = ((x >> 23) & 0xff) - 127;
224 if (exp == 128 && (x & 0x7fffff) != 0)
225 return x | 0x400000; /* NaN -> make it a QNaN */
226 if (exp >= 23)
227 return x; /* it's an integer already (or Inf) */
228 if ((x & 0x7fffffff) == 0)
229 return x; /* +/-0 -> +/-0 */
230 if (exp < 0)
231 /* 0 < |x| < 1.0 rounds to +/- 1.0 */
232 return (x & 0x80000000) | 0x3f800000;
233 mask = 0x7fffff >> exp;
234 /* mantissa overflows into exponent - that's OK,
235 it can't overflow into the sign bit */
236 return (x + mask) & ~mask;
237}
238
239/* Round to floating integer, to nearest */
240static unsigned int rfin(unsigned int x)
241{
242 int exp, half;
243
244 exp = ((x >> 23) & 0xff) - 127;
245 if (exp == 128 && (x & 0x7fffff) != 0)
246 return x | 0x400000; /* NaN -> make it a QNaN */
247 if (exp >= 23)
248 return x; /* it's an integer already (or Inf) */
249 if (exp < -1)
250 return x & 0x80000000; /* |x| < 0.5 -> +/-0 */
251 if (exp == -1)
252 /* 0.5 <= |x| < 1.0 rounds to +/- 1.0 */
253 return (x & 0x80000000) | 0x3f800000;
254 half = 0x400000 >> exp;
255 /* add 0.5 to the magnitude and chop off the fraction bits */
256 return (x + half) & ~(0x7fffff >> exp);
257}
258
259int
260emulate_altivec(struct pt_regs *regs)
261{
262 unsigned int instr, i;
263 unsigned int va, vb, vc, vd;
264 vector128 *vrs;
265
266 if (get_user(instr, (unsigned int __user *) regs->nip))
267 return -EFAULT;
268 if ((instr >> 26) != 4)
269 return -EINVAL; /* not an altivec instruction */
270 vd = (instr >> 21) & 0x1f;
271 va = (instr >> 16) & 0x1f;
272 vb = (instr >> 11) & 0x1f;
273 vc = (instr >> 6) & 0x1f;
274
275 vrs = current->thread.vr;
276 switch (instr & 0x3f) {
277 case 10:
278 switch (vc) {
279 case 0: /* vaddfp */
280 vaddfp(&vrs[vd], &vrs[va], &vrs[vb]);
281 break;
282 case 1: /* vsubfp */
283 vsubfp(&vrs[vd], &vrs[va], &vrs[vb]);
284 break;
285 case 4: /* vrefp */
286 vrefp(&vrs[vd], &vrs[vb]);
287 break;
288 case 5: /* vrsqrtefp */
289 vrsqrtefp(&vrs[vd], &vrs[vb]);
290 break;
291 case 6: /* vexptefp */
292 for (i = 0; i < 4; ++i)
293 vrs[vd].u[i] = eexp2(vrs[vb].u[i]);
294 break;
295 case 7: /* vlogefp */
296 for (i = 0; i < 4; ++i)
297 vrs[vd].u[i] = elog2(vrs[vb].u[i]);
298 break;
299 case 8: /* vrfin */
300 for (i = 0; i < 4; ++i)
301 vrs[vd].u[i] = rfin(vrs[vb].u[i]);
302 break;
303 case 9: /* vrfiz */
304 for (i = 0; i < 4; ++i)
305 vrs[vd].u[i] = rfiz(vrs[vb].u[i]);
306 break;
307 case 10: /* vrfip */
308 for (i = 0; i < 4; ++i) {
309 u32 x = vrs[vb].u[i];
310 x = (x & 0x80000000)? rfiz(x): rfii(x);
311 vrs[vd].u[i] = x;
312 }
313 break;
314 case 11: /* vrfim */
315 for (i = 0; i < 4; ++i) {
316 u32 x = vrs[vb].u[i];
317 x = (x & 0x80000000)? rfii(x): rfiz(x);
318 vrs[vd].u[i] = x;
319 }
320 break;
321 case 14: /* vctuxs */
322 for (i = 0; i < 4; ++i)
323 vrs[vd].u[i] = ctuxs(vrs[vb].u[i], va,
324 &current->thread.vscr.u[3]);
325 break;
326 case 15: /* vctsxs */
327 for (i = 0; i < 4; ++i)
328 vrs[vd].u[i] = ctsxs(vrs[vb].u[i], va,
329 &current->thread.vscr.u[3]);
330 break;
331 default:
332 return -EINVAL;
333 }
334 break;
335 case 46: /* vmaddfp */
336 vmaddfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]);
337 break;
338 case 47: /* vnmsubfp */
339 vnmsubfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]);
340 break;
341 default:
342 return -EINVAL;
343 }
344
345 return 0;
346}
diff --git a/arch/ppc64/kernel/vector.S b/arch/ppc64/kernel/vector.S
new file mode 100644
index 000000000000..b79d33e4001e
--- /dev/null
+++ b/arch/ppc64/kernel/vector.S
@@ -0,0 +1,172 @@
1#include <asm/ppc_asm.h>
2#include <asm/processor.h>
3
4/*
5 * The routines below are in assembler so we can closely control the
6 * usage of floating-point registers. These routines must be called
7 * with preempt disabled.
8 */
9 .section ".toc","aw"
10fpzero:
11 .tc FD_0_0[TC],0
12fpone:
13 .tc FD_3ff00000_0[TC],0x3ff0000000000000 /* 1.0 */
14fphalf:
15 .tc FD_3fe00000_0[TC],0x3fe0000000000000 /* 0.5 */
16
17 .text
18/*
19 * Internal routine to enable floating point and set FPSCR to 0.
20 * Don't call it from C; it doesn't use the normal calling convention.
21 */
22fpenable:
23 mfmsr r10
24 ori r11,r10,MSR_FP
25 mtmsr r11
26 isync
27 stfd fr31,-8(r1)
28 stfd fr0,-16(r1)
29 stfd fr1,-24(r1)
30 mffs fr31
31 lfd fr1,fpzero@toc(r2)
32 mtfsf 0xff,fr1
33 blr
34
35fpdisable:
36 mtlr r12
37 mtfsf 0xff,fr31
38 lfd fr1,-24(r1)
39 lfd fr0,-16(r1)
40 lfd fr31,-8(r1)
41 mtmsr r10
42 isync
43 blr
44
45/*
46 * Vector add, floating point.
47 */
48_GLOBAL(vaddfp)
49 mflr r12
50 bl fpenable
51 li r0,4
52 mtctr r0
53 li r6,0
541: lfsx fr0,r4,r6
55 lfsx fr1,r5,r6
56 fadds fr0,fr0,fr1
57 stfsx fr0,r3,r6
58 addi r6,r6,4
59 bdnz 1b
60 b fpdisable
61
62/*
63 * Vector subtract, floating point.
64 */
65_GLOBAL(vsubfp)
66 mflr r12
67 bl fpenable
68 li r0,4
69 mtctr r0
70 li r6,0
711: lfsx fr0,r4,r6
72 lfsx fr1,r5,r6
73 fsubs fr0,fr0,fr1
74 stfsx fr0,r3,r6
75 addi r6,r6,4
76 bdnz 1b
77 b fpdisable
78
79/*
80 * Vector multiply and add, floating point.
81 */
82_GLOBAL(vmaddfp)
83 mflr r12
84 bl fpenable
85 stfd fr2,-32(r1)
86 li r0,4
87 mtctr r0
88 li r7,0
891: lfsx fr0,r4,r7
90 lfsx fr1,r5,r7
91 lfsx fr2,r6,r7
92 fmadds fr0,fr0,fr2,fr1
93 stfsx fr0,r3,r7
94 addi r7,r7,4
95 bdnz 1b
96 lfd fr2,-32(r1)
97 b fpdisable
98
99/*
100 * Vector negative multiply and subtract, floating point.
101 */
102_GLOBAL(vnmsubfp)
103 mflr r12
104 bl fpenable
105 stfd fr2,-32(r1)
106 li r0,4
107 mtctr r0
108 li r7,0
1091: lfsx fr0,r4,r7
110 lfsx fr1,r5,r7
111 lfsx fr2,r6,r7
112 fnmsubs fr0,fr0,fr2,fr1
113 stfsx fr0,r3,r7
114 addi r7,r7,4
115 bdnz 1b
116 lfd fr2,-32(r1)
117 b fpdisable
118
119/*
120 * Vector reciprocal estimate. We just compute 1.0/x.
121 * r3 -> destination, r4 -> source.
122 */
123_GLOBAL(vrefp)
124 mflr r12
125 bl fpenable
126 li r0,4
127 lfd fr1,fpone@toc(r2)
128 mtctr r0
129 li r6,0
1301: lfsx fr0,r4,r6
131 fdivs fr0,fr1,fr0
132 stfsx fr0,r3,r6
133 addi r6,r6,4
134 bdnz 1b
135 b fpdisable
136
137/*
138 * Vector reciprocal square-root estimate, floating point.
139 * We use the frsqrte instruction for the initial estimate followed
140 * by 2 iterations of Newton-Raphson to get sufficient accuracy.
141 * r3 -> destination, r4 -> source.
142 */
143_GLOBAL(vrsqrtefp)
144 mflr r12
145 bl fpenable
146 stfd fr2,-32(r1)
147 stfd fr3,-40(r1)
148 stfd fr4,-48(r1)
149 stfd fr5,-56(r1)
150 li r0,4
151 lfd fr4,fpone@toc(r2)
152 lfd fr5,fphalf@toc(r2)
153 mtctr r0
154 li r6,0
1551: lfsx fr0,r4,r6
156 frsqrte fr1,fr0 /* r = frsqrte(s) */
157 fmuls fr3,fr1,fr0 /* r * s */
158 fmuls fr2,fr1,fr5 /* r * 0.5 */
159 fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */
160 fmadds fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */
161 fmuls fr3,fr1,fr0 /* r * s */
162 fmuls fr2,fr1,fr5 /* r * 0.5 */
163 fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */
164 fmadds fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */
165 stfsx fr1,r3,r6
166 addi r6,r6,4
167 bdnz 1b
168 lfd fr5,-56(r1)
169 lfd fr4,-48(r1)
170 lfd fr3,-40(r1)
171 lfd fr2,-32(r1)
172 b fpdisable
diff --git a/arch/ppc64/kernel/vio.c b/arch/ppc64/kernel/vio.c
new file mode 100644
index 000000000000..cdd830cb2768
--- /dev/null
+++ b/arch/ppc64/kernel/vio.c
@@ -0,0 +1,640 @@
1/*
2 * IBM PowerPC Virtual I/O Infrastructure Support.
3 *
4 * Copyright (c) 2003 IBM Corp.
5 * Dave Engebretsen engebret@us.ibm.com
6 * Santiago Leon santil@us.ibm.com
7 * Hollis Blanchard <hollisb@us.ibm.com>
8 *
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License
11 * as published by the Free Software Foundation; either version
12 * 2 of the License, or (at your option) any later version.
13 */
14
15#include <linux/init.h>
16#include <linux/console.h>
17#include <linux/version.h>
18#include <linux/module.h>
19#include <linux/kobject.h>
20#include <linux/mm.h>
21#include <linux/dma-mapping.h>
22#include <asm/rtas.h>
23#include <asm/iommu.h>
24#include <asm/dma.h>
25#include <asm/ppcdebug.h>
26#include <asm/vio.h>
27#include <asm/hvcall.h>
28#include <asm/iSeries/vio.h>
29#include <asm/iSeries/HvTypes.h>
30#include <asm/iSeries/HvCallXm.h>
31#include <asm/iSeries/HvLpConfig.h>
32
33#define DBGENTER() pr_debug("%s entered\n", __FUNCTION__)
34
35extern struct subsystem devices_subsys; /* needed for vio_find_name() */
36
37static const struct vio_device_id *vio_match_device(
38 const struct vio_device_id *, const struct vio_dev *);
39
40#ifdef CONFIG_PPC_PSERIES
41static struct iommu_table *vio_build_iommu_table(struct vio_dev *);
42static int vio_num_address_cells;
43#endif
44static struct vio_dev *vio_bus_device; /* fake "parent" device */
45
46#ifdef CONFIG_PPC_ISERIES
47static struct vio_dev *__init vio_register_device_iseries(char *type,
48 uint32_t unit_num);
49
50static struct iommu_table veth_iommu_table;
51static struct iommu_table vio_iommu_table;
52
53static struct vio_dev _vio_dev = {
54 .iommu_table = &vio_iommu_table,
55 .dev.bus = &vio_bus_type
56};
57struct device *iSeries_vio_dev = &_vio_dev.dev;
58EXPORT_SYMBOL(iSeries_vio_dev);
59
60#define device_is_compatible(a, b) 1
61
62#endif
63
64/* convert from struct device to struct vio_dev and pass to driver.
65 * dev->driver has already been set by generic code because vio_bus_match
66 * succeeded. */
67static int vio_bus_probe(struct device *dev)
68{
69 struct vio_dev *viodev = to_vio_dev(dev);
70 struct vio_driver *viodrv = to_vio_driver(dev->driver);
71 const struct vio_device_id *id;
72 int error = -ENODEV;
73
74 DBGENTER();
75
76 if (!viodrv->probe)
77 return error;
78
79 id = vio_match_device(viodrv->id_table, viodev);
80 if (id) {
81 error = viodrv->probe(viodev, id);
82 }
83
84 return error;
85}
86
87/* convert from struct device to struct vio_dev and pass to driver. */
88static int vio_bus_remove(struct device *dev)
89{
90 struct vio_dev *viodev = to_vio_dev(dev);
91 struct vio_driver *viodrv = to_vio_driver(dev->driver);
92
93 DBGENTER();
94
95 if (viodrv->remove) {
96 return viodrv->remove(viodev);
97 }
98
99 /* driver can't remove */
100 return 1;
101}
102
103/**
104 * vio_register_driver: - Register a new vio driver
105 * @drv: The vio_driver structure to be registered.
106 */
107int vio_register_driver(struct vio_driver *viodrv)
108{
109 printk(KERN_DEBUG "%s: driver %s registering\n", __FUNCTION__,
110 viodrv->name);
111
112 /* fill in 'struct driver' fields */
113 viodrv->driver.name = viodrv->name;
114 viodrv->driver.bus = &vio_bus_type;
115 viodrv->driver.probe = vio_bus_probe;
116 viodrv->driver.remove = vio_bus_remove;
117
118 return driver_register(&viodrv->driver);
119}
120EXPORT_SYMBOL(vio_register_driver);
121
122/**
123 * vio_unregister_driver - Remove registration of vio driver.
124 * @driver: The vio_driver struct to be removed form registration
125 */
126void vio_unregister_driver(struct vio_driver *viodrv)
127{
128 driver_unregister(&viodrv->driver);
129}
130EXPORT_SYMBOL(vio_unregister_driver);
131
132/**
133 * vio_match_device: - Tell if a VIO device has a matching VIO device id structure.
134 * @ids: array of VIO device id structures to search in
135 * @dev: the VIO device structure to match against
136 *
137 * Used by a driver to check whether a VIO device present in the
138 * system is in its list of supported devices. Returns the matching
139 * vio_device_id structure or NULL if there is no match.
140 */
141static const struct vio_device_id * vio_match_device(const struct vio_device_id *ids,
142 const struct vio_dev *dev)
143{
144 DBGENTER();
145
146 while (ids->type) {
147 if ((strncmp(dev->type, ids->type, strlen(ids->type)) == 0) &&
148 device_is_compatible(dev->dev.platform_data, ids->compat))
149 return ids;
150 ids++;
151 }
152 return NULL;
153}
154
155#ifdef CONFIG_PPC_ISERIES
156void __init iommu_vio_init(void)
157{
158 struct iommu_table *t;
159 struct iommu_table_cb cb;
160 unsigned long cbp;
161 unsigned long itc_entries;
162
163 cb.itc_busno = 255; /* Bus 255 is the virtual bus */
164 cb.itc_virtbus = 0xff; /* Ask for virtual bus */
165
166 cbp = virt_to_abs(&cb);
167 HvCallXm_getTceTableParms(cbp);
168
169 itc_entries = cb.itc_size * PAGE_SIZE / sizeof(union tce_entry);
170 veth_iommu_table.it_size = itc_entries / 2;
171 veth_iommu_table.it_busno = cb.itc_busno;
172 veth_iommu_table.it_offset = cb.itc_offset;
173 veth_iommu_table.it_index = cb.itc_index;
174 veth_iommu_table.it_type = TCE_VB;
175 veth_iommu_table.it_blocksize = 1;
176
177 t = iommu_init_table(&veth_iommu_table);
178
179 if (!t)
180 printk("Virtual Bus VETH TCE table failed.\n");
181
182 vio_iommu_table.it_size = itc_entries - veth_iommu_table.it_size;
183 vio_iommu_table.it_busno = cb.itc_busno;
184 vio_iommu_table.it_offset = cb.itc_offset +
185 veth_iommu_table.it_size;
186 vio_iommu_table.it_index = cb.itc_index;
187 vio_iommu_table.it_type = TCE_VB;
188 vio_iommu_table.it_blocksize = 1;
189
190 t = iommu_init_table(&vio_iommu_table);
191
192 if (!t)
193 printk("Virtual Bus VIO TCE table failed.\n");
194}
195#endif
196
197#ifdef CONFIG_PPC_PSERIES
198static void probe_bus_pseries(void)
199{
200 struct device_node *node_vroot, *of_node;
201
202 node_vroot = find_devices("vdevice");
203 if ((node_vroot == NULL) || (node_vroot->child == NULL))
204 /* this machine doesn't do virtual IO, and that's ok */
205 return;
206
207 vio_num_address_cells = prom_n_addr_cells(node_vroot->child);
208
209 /*
210 * Create struct vio_devices for each virtual device in the device tree.
211 * Drivers will associate with them later.
212 */
213 for (of_node = node_vroot->child; of_node != NULL;
214 of_node = of_node->sibling) {
215 printk(KERN_DEBUG "%s: processing %p\n", __FUNCTION__, of_node);
216 vio_register_device_node(of_node);
217 }
218}
219#endif
220
221#ifdef CONFIG_PPC_ISERIES
222static void probe_bus_iseries(void)
223{
224 HvLpIndexMap vlan_map = HvLpConfig_getVirtualLanIndexMap();
225 struct vio_dev *viodev;
226 int i;
227
228 /* there is only one of each of these */
229 vio_register_device_iseries("viocons", 0);
230 vio_register_device_iseries("vscsi", 0);
231
232 vlan_map = HvLpConfig_getVirtualLanIndexMap();
233 for (i = 0; i < HVMAXARCHITECTEDVIRTUALLANS; i++) {
234 if ((vlan_map & (0x8000 >> i)) == 0)
235 continue;
236 viodev = vio_register_device_iseries("vlan", i);
237 /* veth is special and has it own iommu_table */
238 viodev->iommu_table = &veth_iommu_table;
239 }
240 for (i = 0; i < HVMAXARCHITECTEDVIRTUALDISKS; i++)
241 vio_register_device_iseries("viodasd", i);
242 for (i = 0; i < HVMAXARCHITECTEDVIRTUALCDROMS; i++)
243 vio_register_device_iseries("viocd", i);
244 for (i = 0; i < HVMAXARCHITECTEDVIRTUALTAPES; i++)
245 vio_register_device_iseries("viotape", i);
246}
247#endif
248
249/**
250 * vio_bus_init: - Initialize the virtual IO bus
251 */
252static int __init vio_bus_init(void)
253{
254 int err;
255
256 err = bus_register(&vio_bus_type);
257 if (err) {
258 printk(KERN_ERR "failed to register VIO bus\n");
259 return err;
260 }
261
262 /* the fake parent of all vio devices, just to give us a nice directory */
263 vio_bus_device = kmalloc(sizeof(struct vio_dev), GFP_KERNEL);
264 if (!vio_bus_device) {
265 return 1;
266 }
267 memset(vio_bus_device, 0, sizeof(struct vio_dev));
268 strcpy(vio_bus_device->dev.bus_id, "vio");
269
270 err = device_register(&vio_bus_device->dev);
271 if (err) {
272 printk(KERN_WARNING "%s: device_register returned %i\n", __FUNCTION__,
273 err);
274 kfree(vio_bus_device);
275 return err;
276 }
277
278#ifdef CONFIG_PPC_PSERIES
279 probe_bus_pseries();
280#endif
281#ifdef CONFIG_PPC_ISERIES
282 probe_bus_iseries();
283#endif
284
285 return 0;
286}
287
288__initcall(vio_bus_init);
289
290/* vio_dev refcount hit 0 */
291static void __devinit vio_dev_release(struct device *dev)
292{
293 DBGENTER();
294
295#ifdef CONFIG_PPC_PSERIES
296 /* XXX free TCE table */
297 of_node_put(dev->platform_data);
298#endif
299 kfree(to_vio_dev(dev));
300}
301
302#ifdef CONFIG_PPC_PSERIES
303static ssize_t viodev_show_devspec(struct device *dev, char *buf)
304{
305 struct device_node *of_node = dev->platform_data;
306
307 return sprintf(buf, "%s\n", of_node->full_name);
308}
309DEVICE_ATTR(devspec, S_IRUSR | S_IRGRP | S_IROTH, viodev_show_devspec, NULL);
310#endif
311
312static ssize_t viodev_show_name(struct device *dev, char *buf)
313{
314 return sprintf(buf, "%s\n", to_vio_dev(dev)->name);
315}
316DEVICE_ATTR(name, S_IRUSR | S_IRGRP | S_IROTH, viodev_show_name, NULL);
317
318static struct vio_dev * __devinit vio_register_device_common(
319 struct vio_dev *viodev, char *name, char *type,
320 uint32_t unit_address, struct iommu_table *iommu_table)
321{
322 DBGENTER();
323
324 viodev->name = name;
325 viodev->type = type;
326 viodev->unit_address = unit_address;
327 viodev->iommu_table = iommu_table;
328 /* init generic 'struct device' fields: */
329 viodev->dev.parent = &vio_bus_device->dev;
330 viodev->dev.bus = &vio_bus_type;
331 viodev->dev.release = vio_dev_release;
332
333 /* register with generic device framework */
334 if (device_register(&viodev->dev)) {
335 printk(KERN_ERR "%s: failed to register device %s\n",
336 __FUNCTION__, viodev->dev.bus_id);
337 return NULL;
338 }
339 device_create_file(&viodev->dev, &dev_attr_name);
340
341 return viodev;
342}
343
344#ifdef CONFIG_PPC_PSERIES
345/**
346 * vio_register_device_node: - Register a new vio device.
347 * @of_node: The OF node for this device.
348 *
349 * Creates and initializes a vio_dev structure from the data in
350 * of_node (dev.platform_data) and adds it to the list of virtual devices.
351 * Returns a pointer to the created vio_dev or NULL if node has
352 * NULL device_type or compatible fields.
353 */
354struct vio_dev * __devinit vio_register_device_node(struct device_node *of_node)
355{
356 struct vio_dev *viodev;
357 unsigned int *unit_address;
358 unsigned int *irq_p;
359
360 DBGENTER();
361
362 /* we need the 'device_type' property, in order to match with drivers */
363 if ((NULL == of_node->type)) {
364 printk(KERN_WARNING
365 "%s: node %s missing 'device_type'\n", __FUNCTION__,
366 of_node->name ? of_node->name : "<unknown>");
367 return NULL;
368 }
369
370 unit_address = (unsigned int *)get_property(of_node, "reg", NULL);
371 if (!unit_address) {
372 printk(KERN_WARNING "%s: node %s missing 'reg'\n", __FUNCTION__,
373 of_node->name ? of_node->name : "<unknown>");
374 return NULL;
375 }
376
377 /* allocate a vio_dev for this node */
378 viodev = kmalloc(sizeof(struct vio_dev), GFP_KERNEL);
379 if (!viodev) {
380 return NULL;
381 }
382 memset(viodev, 0, sizeof(struct vio_dev));
383
384 viodev->dev.platform_data = of_node_get(of_node);
385
386 viodev->irq = NO_IRQ;
387 irq_p = (unsigned int *)get_property(of_node, "interrupts", NULL);
388 if (irq_p) {
389 int virq = virt_irq_create_mapping(*irq_p);
390 if (virq == NO_IRQ) {
391 printk(KERN_ERR "Unable to allocate interrupt "
392 "number for %s\n", of_node->full_name);
393 } else
394 viodev->irq = irq_offset_up(virq);
395 }
396
397 snprintf(viodev->dev.bus_id, BUS_ID_SIZE, "%x", *unit_address);
398
399 /* register with generic device framework */
400 if (vio_register_device_common(viodev, of_node->name, of_node->type,
401 *unit_address, vio_build_iommu_table(viodev))
402 == NULL) {
403 /* XXX free TCE table */
404 kfree(viodev);
405 return NULL;
406 }
407 device_create_file(&viodev->dev, &dev_attr_devspec);
408
409 return viodev;
410}
411EXPORT_SYMBOL(vio_register_device_node);
412#endif
413
414#ifdef CONFIG_PPC_ISERIES
415/**
416 * vio_register_device: - Register a new vio device.
417 * @voidev: The device to register.
418 */
419static struct vio_dev *__init vio_register_device_iseries(char *type,
420 uint32_t unit_num)
421{
422 struct vio_dev *viodev;
423
424 DBGENTER();
425
426 /* allocate a vio_dev for this node */
427 viodev = kmalloc(sizeof(struct vio_dev), GFP_KERNEL);
428 if (!viodev)
429 return NULL;
430 memset(viodev, 0, sizeof(struct vio_dev));
431
432 snprintf(viodev->dev.bus_id, BUS_ID_SIZE, "%s%d", type, unit_num);
433
434 return vio_register_device_common(viodev, viodev->dev.bus_id, type,
435 unit_num, &vio_iommu_table);
436}
437#endif
438
439void __devinit vio_unregister_device(struct vio_dev *viodev)
440{
441 DBGENTER();
442#ifdef CONFIG_PPC_PSERIES
443 device_remove_file(&viodev->dev, &dev_attr_devspec);
444#endif
445 device_remove_file(&viodev->dev, &dev_attr_name);
446 device_unregister(&viodev->dev);
447}
448EXPORT_SYMBOL(vio_unregister_device);
449
450#ifdef CONFIG_PPC_PSERIES
451/**
452 * vio_get_attribute: - get attribute for virtual device
453 * @vdev: The vio device to get property.
454 * @which: The property/attribute to be extracted.
455 * @length: Pointer to length of returned data size (unused if NULL).
456 *
457 * Calls prom.c's get_property() to return the value of the
458 * attribute specified by the preprocessor constant @which
459*/
460const void * vio_get_attribute(struct vio_dev *vdev, void* which, int* length)
461{
462 return get_property(vdev->dev.platform_data, (char*)which, length);
463}
464EXPORT_SYMBOL(vio_get_attribute);
465
466/* vio_find_name() - internal because only vio.c knows how we formatted the
467 * kobject name
468 * XXX once vio_bus_type.devices is actually used as a kset in
469 * drivers/base/bus.c, this function should be removed in favor of
470 * "device_find(kobj_name, &vio_bus_type)"
471 */
472static struct vio_dev *vio_find_name(const char *kobj_name)
473{
474 struct kobject *found;
475
476 found = kset_find_obj(&devices_subsys.kset, kobj_name);
477 if (!found)
478 return NULL;
479
480 return to_vio_dev(container_of(found, struct device, kobj));
481}
482
483/**
484 * vio_find_node - find an already-registered vio_dev
485 * @vnode: device_node of the virtual device we're looking for
486 */
487struct vio_dev *vio_find_node(struct device_node *vnode)
488{
489 uint32_t *unit_address;
490 char kobj_name[BUS_ID_SIZE];
491
492 /* construct the kobject name from the device node */
493 unit_address = (uint32_t *)get_property(vnode, "reg", NULL);
494 if (!unit_address)
495 return NULL;
496 snprintf(kobj_name, BUS_ID_SIZE, "%x", *unit_address);
497
498 return vio_find_name(kobj_name);
499}
500EXPORT_SYMBOL(vio_find_node);
501
502/**
503 * vio_build_iommu_table: - gets the dma information from OF and builds the TCE tree.
504 * @dev: the virtual device.
505 *
506 * Returns a pointer to the built tce tree, or NULL if it can't
507 * find property.
508*/
509static struct iommu_table * vio_build_iommu_table(struct vio_dev *dev)
510{
511 unsigned int *dma_window;
512 struct iommu_table *newTceTable;
513 unsigned long offset;
514 int dma_window_property_size;
515
516 dma_window = (unsigned int *) get_property(dev->dev.platform_data, "ibm,my-dma-window", &dma_window_property_size);
517 if(!dma_window) {
518 return NULL;
519 }
520
521 newTceTable = (struct iommu_table *) kmalloc(sizeof(struct iommu_table), GFP_KERNEL);
522
523 /* There should be some code to extract the phys-encoded offset
524 using prom_n_addr_cells(). However, according to a comment
525 on earlier versions, it's always zero, so we don't bother */
526 offset = dma_window[1] >> PAGE_SHIFT;
527
528 /* TCE table size - measured in tce entries */
529 newTceTable->it_size = dma_window[4] >> PAGE_SHIFT;
530 /* offset for VIO should always be 0 */
531 newTceTable->it_offset = offset;
532 newTceTable->it_busno = 0;
533 newTceTable->it_index = (unsigned long)dma_window[0];
534 newTceTable->it_type = TCE_VB;
535
536 return iommu_init_table(newTceTable);
537}
538
539int vio_enable_interrupts(struct vio_dev *dev)
540{
541 int rc = h_vio_signal(dev->unit_address, VIO_IRQ_ENABLE);
542 if (rc != H_Success) {
543 printk(KERN_ERR "vio: Error 0x%x enabling interrupts\n", rc);
544 }
545 return rc;
546}
547EXPORT_SYMBOL(vio_enable_interrupts);
548
549int vio_disable_interrupts(struct vio_dev *dev)
550{
551 int rc = h_vio_signal(dev->unit_address, VIO_IRQ_DISABLE);
552 if (rc != H_Success) {
553 printk(KERN_ERR "vio: Error 0x%x disabling interrupts\n", rc);
554 }
555 return rc;
556}
557EXPORT_SYMBOL(vio_disable_interrupts);
558#endif
559
560static dma_addr_t vio_map_single(struct device *dev, void *vaddr,
561 size_t size, enum dma_data_direction direction)
562{
563 return iommu_map_single(to_vio_dev(dev)->iommu_table, vaddr, size,
564 direction);
565}
566
567static void vio_unmap_single(struct device *dev, dma_addr_t dma_handle,
568 size_t size, enum dma_data_direction direction)
569{
570 iommu_unmap_single(to_vio_dev(dev)->iommu_table, dma_handle, size,
571 direction);
572}
573
574static int vio_map_sg(struct device *dev, struct scatterlist *sglist,
575 int nelems, enum dma_data_direction direction)
576{
577 return iommu_map_sg(dev, to_vio_dev(dev)->iommu_table, sglist,
578 nelems, direction);
579}
580
581static void vio_unmap_sg(struct device *dev, struct scatterlist *sglist,
582 int nelems, enum dma_data_direction direction)
583{
584 iommu_unmap_sg(to_vio_dev(dev)->iommu_table, sglist, nelems, direction);
585}
586
587static void *vio_alloc_coherent(struct device *dev, size_t size,
588 dma_addr_t *dma_handle, unsigned int __nocast flag)
589{
590 return iommu_alloc_coherent(to_vio_dev(dev)->iommu_table, size,
591 dma_handle, flag);
592}
593
594static void vio_free_coherent(struct device *dev, size_t size,
595 void *vaddr, dma_addr_t dma_handle)
596{
597 iommu_free_coherent(to_vio_dev(dev)->iommu_table, size, vaddr,
598 dma_handle);
599}
600
601static int vio_dma_supported(struct device *dev, u64 mask)
602{
603 return 1;
604}
605
606struct dma_mapping_ops vio_dma_ops = {
607 .alloc_coherent = vio_alloc_coherent,
608 .free_coherent = vio_free_coherent,
609 .map_single = vio_map_single,
610 .unmap_single = vio_unmap_single,
611 .map_sg = vio_map_sg,
612 .unmap_sg = vio_unmap_sg,
613 .dma_supported = vio_dma_supported,
614};
615
616static int vio_bus_match(struct device *dev, struct device_driver *drv)
617{
618 const struct vio_dev *vio_dev = to_vio_dev(dev);
619 struct vio_driver *vio_drv = to_vio_driver(drv);
620 const struct vio_device_id *ids = vio_drv->id_table;
621 const struct vio_device_id *found_id;
622
623 DBGENTER();
624
625 if (!ids)
626 return 0;
627
628 found_id = vio_match_device(ids, vio_dev);
629 if (found_id)
630 return 1;
631
632 return 0;
633}
634
635struct bus_type vio_bus_type = {
636 .name = "vio",
637 .match = vio_bus_match,
638};
639
640EXPORT_SYMBOL(vio_bus_type);
diff --git a/arch/ppc64/kernel/viopath.c b/arch/ppc64/kernel/viopath.c
new file mode 100644
index 000000000000..2ed8ee075680
--- /dev/null
+++ b/arch/ppc64/kernel/viopath.c
@@ -0,0 +1,675 @@
1/* -*- linux-c -*-
2 * arch/ppc64/kernel/viopath.c
3 *
4 * iSeries Virtual I/O Message Path code
5 *
6 * Authors: Dave Boutcher <boutcher@us.ibm.com>
7 * Ryan Arnold <ryanarn@us.ibm.com>
8 * Colin Devilbiss <devilbis@us.ibm.com>
9 *
10 * (C) Copyright 2000-2003 IBM Corporation
11 *
12 * This code is used by the iSeries virtual disk, cd,
13 * tape, and console to communicate with OS/400 in another
14 * partition.
15 *
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License as
18 * published by the Free Software Foundation; either version 2 of the
19 * License, or (at your option) anyu later version.
20 *
21 * This program is distributed in the hope that it will be useful, but
22 * WITHOUT ANY WARRANTY; without even the implied warranty of
23 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24 * General Public License for more details.
25 *
26 * You should have received a copy of the GNU General Public License
27 * along with this program; if not, write to the Free Software Foundation,
28 * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
29 *
30 */
31#include <linux/module.h>
32#include <linux/kernel.h>
33#include <linux/errno.h>
34#include <linux/vmalloc.h>
35#include <linux/string.h>
36#include <linux/proc_fs.h>
37#include <linux/dma-mapping.h>
38#include <linux/wait.h>
39#include <linux/seq_file.h>
40#include <linux/smp_lock.h>
41#include <linux/interrupt.h>
42
43#include <asm/system.h>
44#include <asm/uaccess.h>
45#include <asm/iSeries/HvTypes.h>
46#include <asm/iSeries/LparData.h>
47#include <asm/iSeries/HvLpEvent.h>
48#include <asm/iSeries/HvLpConfig.h>
49#include <asm/iSeries/HvCallCfg.h>
50#include <asm/iSeries/mf.h>
51#include <asm/iSeries/iSeries_proc.h>
52#include <asm/iSeries/vio.h>
53
54/* Status of the path to each other partition in the system.
55 * This is overkill, since we will only ever establish connections
56 * to our hosting partition and the primary partition on the system.
57 * But this allows for other support in the future.
58 */
59static struct viopathStatus {
60 int isOpen; /* Did we open the path? */
61 int isActive; /* Do we have a mon msg outstanding */
62 int users[VIO_MAX_SUBTYPES];
63 HvLpInstanceId mSourceInst;
64 HvLpInstanceId mTargetInst;
65 int numberAllocated;
66} viopathStatus[HVMAXARCHITECTEDLPS];
67
68static DEFINE_SPINLOCK(statuslock);
69
70/*
71 * For each kind of event we allocate a buffer that is
72 * guaranteed not to cross a page boundary
73 */
74static unsigned char event_buffer[VIO_MAX_SUBTYPES * 256] __page_aligned;
75static atomic_t event_buffer_available[VIO_MAX_SUBTYPES];
76static int event_buffer_initialised;
77
78static void handleMonitorEvent(struct HvLpEvent *event);
79
80/*
81 * We use this structure to handle asynchronous responses. The caller
82 * blocks on the semaphore and the handler posts the semaphore. However,
83 * if system_state is not SYSTEM_RUNNING, then wait_atomic is used ...
84 */
85struct alloc_parms {
86 struct semaphore sem;
87 int number;
88 atomic_t wait_atomic;
89 int used_wait_atomic;
90};
91
92/* Put a sequence number in each mon msg. The value is not
93 * important. Start at something other than 0 just for
94 * readability. wrapping this is ok.
95 */
96static u8 viomonseq = 22;
97
98/* Our hosting logical partition. We get this at startup
99 * time, and different modules access this variable directly.
100 */
101HvLpIndex viopath_hostLp = HvLpIndexInvalid;
102EXPORT_SYMBOL(viopath_hostLp);
103HvLpIndex viopath_ourLp = HvLpIndexInvalid;
104EXPORT_SYMBOL(viopath_ourLp);
105
106/* For each kind of incoming event we set a pointer to a
107 * routine to call.
108 */
109static vio_event_handler_t *vio_handler[VIO_MAX_SUBTYPES];
110
111#define VIOPATH_KERN_WARN KERN_WARNING "viopath: "
112#define VIOPATH_KERN_INFO KERN_INFO "viopath: "
113
114static int proc_viopath_show(struct seq_file *m, void *v)
115{
116 char *buf;
117 u16 vlanMap;
118 dma_addr_t handle;
119 HvLpEvent_Rc hvrc;
120 DECLARE_MUTEX_LOCKED(Semaphore);
121
122 buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
123 if (!buf)
124 return 0;
125 memset(buf, 0, PAGE_SIZE);
126
127 handle = dma_map_single(iSeries_vio_dev, buf, PAGE_SIZE,
128 DMA_FROM_DEVICE);
129
130 hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp,
131 HvLpEvent_Type_VirtualIo,
132 viomajorsubtype_config | vioconfigget,
133 HvLpEvent_AckInd_DoAck, HvLpEvent_AckType_ImmediateAck,
134 viopath_sourceinst(viopath_hostLp),
135 viopath_targetinst(viopath_hostLp),
136 (u64)(unsigned long)&Semaphore, VIOVERSION << 16,
137 ((u64)handle) << 32, PAGE_SIZE, 0, 0);
138
139 if (hvrc != HvLpEvent_Rc_Good)
140 printk(VIOPATH_KERN_WARN "hv error on op %d\n", (int)hvrc);
141
142 down(&Semaphore);
143
144 vlanMap = HvLpConfig_getVirtualLanIndexMap();
145
146 buf[PAGE_SIZE-1] = '\0';
147 seq_printf(m, "%s", buf);
148 seq_printf(m, "AVAILABLE_VETH=%x\n", vlanMap);
149 seq_printf(m, "SRLNBR=%c%c%c%c%c%c%c\n",
150 e2a(xItExtVpdPanel.mfgID[2]),
151 e2a(xItExtVpdPanel.mfgID[3]),
152 e2a(xItExtVpdPanel.systemSerial[1]),
153 e2a(xItExtVpdPanel.systemSerial[2]),
154 e2a(xItExtVpdPanel.systemSerial[3]),
155 e2a(xItExtVpdPanel.systemSerial[4]),
156 e2a(xItExtVpdPanel.systemSerial[5]));
157
158 dma_unmap_single(iSeries_vio_dev, handle, PAGE_SIZE, DMA_FROM_DEVICE);
159 kfree(buf);
160
161 return 0;
162}
163
164static int proc_viopath_open(struct inode *inode, struct file *file)
165{
166 return single_open(file, proc_viopath_show, NULL);
167}
168
169static struct file_operations proc_viopath_operations = {
170 .open = proc_viopath_open,
171 .read = seq_read,
172 .llseek = seq_lseek,
173 .release = single_release,
174};
175
176static int __init vio_proc_init(void)
177{
178 struct proc_dir_entry *e;
179
180 e = create_proc_entry("iSeries/config", 0, NULL);
181 if (e)
182 e->proc_fops = &proc_viopath_operations;
183
184 return 0;
185}
186__initcall(vio_proc_init);
187
188/* See if a given LP is active. Allow for invalid lps to be passed in
189 * and just return invalid
190 */
191int viopath_isactive(HvLpIndex lp)
192{
193 if (lp == HvLpIndexInvalid)
194 return 0;
195 if (lp < HVMAXARCHITECTEDLPS)
196 return viopathStatus[lp].isActive;
197 else
198 return 0;
199}
200EXPORT_SYMBOL(viopath_isactive);
201
202/*
203 * We cache the source and target instance ids for each
204 * partition.
205 */
206HvLpInstanceId viopath_sourceinst(HvLpIndex lp)
207{
208 return viopathStatus[lp].mSourceInst;
209}
210EXPORT_SYMBOL(viopath_sourceinst);
211
212HvLpInstanceId viopath_targetinst(HvLpIndex lp)
213{
214 return viopathStatus[lp].mTargetInst;
215}
216EXPORT_SYMBOL(viopath_targetinst);
217
218/*
219 * Send a monitor message. This is a message with the acknowledge
220 * bit on that the other side will NOT explicitly acknowledge. When
221 * the other side goes down, the hypervisor will acknowledge any
222 * outstanding messages....so we will know when the other side dies.
223 */
224static void sendMonMsg(HvLpIndex remoteLp)
225{
226 HvLpEvent_Rc hvrc;
227
228 viopathStatus[remoteLp].mSourceInst =
229 HvCallEvent_getSourceLpInstanceId(remoteLp,
230 HvLpEvent_Type_VirtualIo);
231 viopathStatus[remoteLp].mTargetInst =
232 HvCallEvent_getTargetLpInstanceId(remoteLp,
233 HvLpEvent_Type_VirtualIo);
234
235 /*
236 * Deliberately ignore the return code here. if we call this
237 * more than once, we don't care.
238 */
239 vio_setHandler(viomajorsubtype_monitor, handleMonitorEvent);
240
241 hvrc = HvCallEvent_signalLpEventFast(remoteLp, HvLpEvent_Type_VirtualIo,
242 viomajorsubtype_monitor, HvLpEvent_AckInd_DoAck,
243 HvLpEvent_AckType_DeferredAck,
244 viopathStatus[remoteLp].mSourceInst,
245 viopathStatus[remoteLp].mTargetInst,
246 viomonseq++, 0, 0, 0, 0, 0);
247
248 if (hvrc == HvLpEvent_Rc_Good)
249 viopathStatus[remoteLp].isActive = 1;
250 else {
251 printk(VIOPATH_KERN_WARN "could not connect to partition %d\n",
252 remoteLp);
253 viopathStatus[remoteLp].isActive = 0;
254 }
255}
256
257static void handleMonitorEvent(struct HvLpEvent *event)
258{
259 HvLpIndex remoteLp;
260 int i;
261
262 /*
263 * This handler is _also_ called as part of the loop
264 * at the end of this routine, so it must be able to
265 * ignore NULL events...
266 */
267 if (!event)
268 return;
269
270 /*
271 * First see if this is just a normal monitor message from the
272 * other partition
273 */
274 if (event->xFlags.xFunction == HvLpEvent_Function_Int) {
275 remoteLp = event->xSourceLp;
276 if (!viopathStatus[remoteLp].isActive)
277 sendMonMsg(remoteLp);
278 return;
279 }
280
281 /*
282 * This path is for an acknowledgement; the other partition
283 * died
284 */
285 remoteLp = event->xTargetLp;
286 if ((event->xSourceInstanceId != viopathStatus[remoteLp].mSourceInst) ||
287 (event->xTargetInstanceId != viopathStatus[remoteLp].mTargetInst)) {
288 printk(VIOPATH_KERN_WARN "ignoring ack....mismatched instances\n");
289 return;
290 }
291
292 printk(VIOPATH_KERN_WARN "partition %d ended\n", remoteLp);
293
294 viopathStatus[remoteLp].isActive = 0;
295
296 /*
297 * For each active handler, pass them a NULL
298 * message to indicate that the other partition
299 * died
300 */
301 for (i = 0; i < VIO_MAX_SUBTYPES; i++) {
302 if (vio_handler[i] != NULL)
303 (*vio_handler[i])(NULL);
304 }
305}
306
307int vio_setHandler(int subtype, vio_event_handler_t *beh)
308{
309 subtype = subtype >> VIOMAJOR_SUBTYPE_SHIFT;
310 if ((subtype < 0) || (subtype >= VIO_MAX_SUBTYPES))
311 return -EINVAL;
312 if (vio_handler[subtype] != NULL)
313 return -EBUSY;
314 vio_handler[subtype] = beh;
315 return 0;
316}
317EXPORT_SYMBOL(vio_setHandler);
318
319int vio_clearHandler(int subtype)
320{
321 subtype = subtype >> VIOMAJOR_SUBTYPE_SHIFT;
322 if ((subtype < 0) || (subtype >= VIO_MAX_SUBTYPES))
323 return -EINVAL;
324 if (vio_handler[subtype] == NULL)
325 return -EAGAIN;
326 vio_handler[subtype] = NULL;
327 return 0;
328}
329EXPORT_SYMBOL(vio_clearHandler);
330
331static void handleConfig(struct HvLpEvent *event)
332{
333 if (!event)
334 return;
335 if (event->xFlags.xFunction == HvLpEvent_Function_Int) {
336 printk(VIOPATH_KERN_WARN
337 "unexpected config request from partition %d",
338 event->xSourceLp);
339
340 if ((event->xFlags.xFunction == HvLpEvent_Function_Int) &&
341 (event->xFlags.xAckInd == HvLpEvent_AckInd_DoAck)) {
342 event->xRc = HvLpEvent_Rc_InvalidSubtype;
343 HvCallEvent_ackLpEvent(event);
344 }
345 return;
346 }
347
348 up((struct semaphore *)event->xCorrelationToken);
349}
350
351/*
352 * Initialization of the hosting partition
353 */
354void vio_set_hostlp(void)
355{
356 /*
357 * If this has already been set then we DON'T want to either change
358 * it or re-register the proc file system
359 */
360 if (viopath_hostLp != HvLpIndexInvalid)
361 return;
362
363 /*
364 * Figure out our hosting partition. This isn't allowed to change
365 * while we're active
366 */
367 viopath_ourLp = HvLpConfig_getLpIndex();
368 viopath_hostLp = HvCallCfg_getHostingLpIndex(viopath_ourLp);
369
370 if (viopath_hostLp != HvLpIndexInvalid)
371 vio_setHandler(viomajorsubtype_config, handleConfig);
372}
373EXPORT_SYMBOL(vio_set_hostlp);
374
375static void vio_handleEvent(struct HvLpEvent *event, struct pt_regs *regs)
376{
377 HvLpIndex remoteLp;
378 int subtype = (event->xSubtype & VIOMAJOR_SUBTYPE_MASK)
379 >> VIOMAJOR_SUBTYPE_SHIFT;
380
381 if (event->xFlags.xFunction == HvLpEvent_Function_Int) {
382 remoteLp = event->xSourceLp;
383 /*
384 * The isActive is checked because if the hosting partition
385 * went down and came back up it would not be active but it
386 * would have different source and target instances, in which
387 * case we'd want to reset them. This case really protects
388 * against an unauthorized active partition sending interrupts
389 * or acks to this linux partition.
390 */
391 if (viopathStatus[remoteLp].isActive
392 && (event->xSourceInstanceId !=
393 viopathStatus[remoteLp].mTargetInst)) {
394 printk(VIOPATH_KERN_WARN
395 "message from invalid partition. "
396 "int msg rcvd, source inst (%d) doesnt match (%d)\n",
397 viopathStatus[remoteLp].mTargetInst,
398 event->xSourceInstanceId);
399 return;
400 }
401
402 if (viopathStatus[remoteLp].isActive
403 && (event->xTargetInstanceId !=
404 viopathStatus[remoteLp].mSourceInst)) {
405 printk(VIOPATH_KERN_WARN
406 "message from invalid partition. "
407 "int msg rcvd, target inst (%d) doesnt match (%d)\n",
408 viopathStatus[remoteLp].mSourceInst,
409 event->xTargetInstanceId);
410 return;
411 }
412 } else {
413 remoteLp = event->xTargetLp;
414 if (event->xSourceInstanceId !=
415 viopathStatus[remoteLp].mSourceInst) {
416 printk(VIOPATH_KERN_WARN
417 "message from invalid partition. "
418 "ack msg rcvd, source inst (%d) doesnt match (%d)\n",
419 viopathStatus[remoteLp].mSourceInst,
420 event->xSourceInstanceId);
421 return;
422 }
423
424 if (event->xTargetInstanceId !=
425 viopathStatus[remoteLp].mTargetInst) {
426 printk(VIOPATH_KERN_WARN
427 "message from invalid partition. "
428 "viopath: ack msg rcvd, target inst (%d) doesnt match (%d)\n",
429 viopathStatus[remoteLp].mTargetInst,
430 event->xTargetInstanceId);
431 return;
432 }
433 }
434
435 if (vio_handler[subtype] == NULL) {
436 printk(VIOPATH_KERN_WARN
437 "unexpected virtual io event subtype %d from partition %d\n",
438 event->xSubtype, remoteLp);
439 /* No handler. Ack if necessary */
440 if ((event->xFlags.xFunction == HvLpEvent_Function_Int) &&
441 (event->xFlags.xAckInd == HvLpEvent_AckInd_DoAck)) {
442 event->xRc = HvLpEvent_Rc_InvalidSubtype;
443 HvCallEvent_ackLpEvent(event);
444 }
445 return;
446 }
447
448 /* This innocuous little line is where all the real work happens */
449 (*vio_handler[subtype])(event);
450}
451
452static void viopath_donealloc(void *parm, int number)
453{
454 struct alloc_parms *parmsp = parm;
455
456 parmsp->number = number;
457 if (parmsp->used_wait_atomic)
458 atomic_set(&parmsp->wait_atomic, 0);
459 else
460 up(&parmsp->sem);
461}
462
463static int allocateEvents(HvLpIndex remoteLp, int numEvents)
464{
465 struct alloc_parms parms;
466
467 if (system_state != SYSTEM_RUNNING) {
468 parms.used_wait_atomic = 1;
469 atomic_set(&parms.wait_atomic, 1);
470 } else {
471 parms.used_wait_atomic = 0;
472 init_MUTEX_LOCKED(&parms.sem);
473 }
474 mf_allocate_lp_events(remoteLp, HvLpEvent_Type_VirtualIo, 250, /* It would be nice to put a real number here! */
475 numEvents, &viopath_donealloc, &parms);
476 if (system_state != SYSTEM_RUNNING) {
477 while (atomic_read(&parms.wait_atomic))
478 mb();
479 } else
480 down(&parms.sem);
481 return parms.number;
482}
483
484int viopath_open(HvLpIndex remoteLp, int subtype, int numReq)
485{
486 int i;
487 unsigned long flags;
488 int tempNumAllocated;
489
490 if ((remoteLp >= HvMaxArchitectedLps) || (remoteLp == HvLpIndexInvalid))
491 return -EINVAL;
492
493 subtype = subtype >> VIOMAJOR_SUBTYPE_SHIFT;
494 if ((subtype < 0) || (subtype >= VIO_MAX_SUBTYPES))
495 return -EINVAL;
496
497 spin_lock_irqsave(&statuslock, flags);
498
499 if (!event_buffer_initialised) {
500 for (i = 0; i < VIO_MAX_SUBTYPES; i++)
501 atomic_set(&event_buffer_available[i], 1);
502 event_buffer_initialised = 1;
503 }
504
505 viopathStatus[remoteLp].users[subtype]++;
506
507 if (!viopathStatus[remoteLp].isOpen) {
508 viopathStatus[remoteLp].isOpen = 1;
509 HvCallEvent_openLpEventPath(remoteLp, HvLpEvent_Type_VirtualIo);
510
511 /*
512 * Don't hold the spinlock during an operation that
513 * can sleep.
514 */
515 spin_unlock_irqrestore(&statuslock, flags);
516 tempNumAllocated = allocateEvents(remoteLp, 1);
517 spin_lock_irqsave(&statuslock, flags);
518
519 viopathStatus[remoteLp].numberAllocated += tempNumAllocated;
520
521 if (viopathStatus[remoteLp].numberAllocated == 0) {
522 HvCallEvent_closeLpEventPath(remoteLp,
523 HvLpEvent_Type_VirtualIo);
524
525 spin_unlock_irqrestore(&statuslock, flags);
526 return -ENOMEM;
527 }
528
529 viopathStatus[remoteLp].mSourceInst =
530 HvCallEvent_getSourceLpInstanceId(remoteLp,
531 HvLpEvent_Type_VirtualIo);
532 viopathStatus[remoteLp].mTargetInst =
533 HvCallEvent_getTargetLpInstanceId(remoteLp,
534 HvLpEvent_Type_VirtualIo);
535 HvLpEvent_registerHandler(HvLpEvent_Type_VirtualIo,
536 &vio_handleEvent);
537 sendMonMsg(remoteLp);
538 printk(VIOPATH_KERN_INFO "opening connection to partition %d, "
539 "setting sinst %d, tinst %d\n",
540 remoteLp, viopathStatus[remoteLp].mSourceInst,
541 viopathStatus[remoteLp].mTargetInst);
542 }
543
544 spin_unlock_irqrestore(&statuslock, flags);
545 tempNumAllocated = allocateEvents(remoteLp, numReq);
546 spin_lock_irqsave(&statuslock, flags);
547 viopathStatus[remoteLp].numberAllocated += tempNumAllocated;
548 spin_unlock_irqrestore(&statuslock, flags);
549
550 return 0;
551}
552EXPORT_SYMBOL(viopath_open);
553
554int viopath_close(HvLpIndex remoteLp, int subtype, int numReq)
555{
556 unsigned long flags;
557 int i;
558 int numOpen;
559 struct alloc_parms parms;
560
561 if ((remoteLp >= HvMaxArchitectedLps) || (remoteLp == HvLpIndexInvalid))
562 return -EINVAL;
563
564 subtype = subtype >> VIOMAJOR_SUBTYPE_SHIFT;
565 if ((subtype < 0) || (subtype >= VIO_MAX_SUBTYPES))
566 return -EINVAL;
567
568 spin_lock_irqsave(&statuslock, flags);
569 /*
570 * If the viopath_close somehow gets called before a
571 * viopath_open it could decrement to -1 which is a non
572 * recoverable state so we'll prevent this from
573 * happening.
574 */
575 if (viopathStatus[remoteLp].users[subtype] > 0)
576 viopathStatus[remoteLp].users[subtype]--;
577
578 spin_unlock_irqrestore(&statuslock, flags);
579
580 parms.used_wait_atomic = 0;
581 init_MUTEX_LOCKED(&parms.sem);
582 mf_deallocate_lp_events(remoteLp, HvLpEvent_Type_VirtualIo,
583 numReq, &viopath_donealloc, &parms);
584 down(&parms.sem);
585
586 spin_lock_irqsave(&statuslock, flags);
587 for (i = 0, numOpen = 0; i < VIO_MAX_SUBTYPES; i++)
588 numOpen += viopathStatus[remoteLp].users[i];
589
590 if ((viopathStatus[remoteLp].isOpen) && (numOpen == 0)) {
591 printk(VIOPATH_KERN_INFO "closing connection to partition %d",
592 remoteLp);
593
594 HvCallEvent_closeLpEventPath(remoteLp,
595 HvLpEvent_Type_VirtualIo);
596 viopathStatus[remoteLp].isOpen = 0;
597 viopathStatus[remoteLp].isActive = 0;
598
599 for (i = 0; i < VIO_MAX_SUBTYPES; i++)
600 atomic_set(&event_buffer_available[i], 0);
601 event_buffer_initialised = 0;
602 }
603 spin_unlock_irqrestore(&statuslock, flags);
604 return 0;
605}
606EXPORT_SYMBOL(viopath_close);
607
608void *vio_get_event_buffer(int subtype)
609{
610 subtype = subtype >> VIOMAJOR_SUBTYPE_SHIFT;
611 if ((subtype < 0) || (subtype >= VIO_MAX_SUBTYPES))
612 return NULL;
613
614 if (atomic_dec_if_positive(&event_buffer_available[subtype]) == 0)
615 return &event_buffer[subtype * 256];
616 else
617 return NULL;
618}
619EXPORT_SYMBOL(vio_get_event_buffer);
620
621void vio_free_event_buffer(int subtype, void *buffer)
622{
623 subtype = subtype >> VIOMAJOR_SUBTYPE_SHIFT;
624 if ((subtype < 0) || (subtype >= VIO_MAX_SUBTYPES)) {
625 printk(VIOPATH_KERN_WARN
626 "unexpected subtype %d freeing event buffer\n", subtype);
627 return;
628 }
629
630 if (atomic_read(&event_buffer_available[subtype]) != 0) {
631 printk(VIOPATH_KERN_WARN
632 "freeing unallocated event buffer, subtype %d\n",
633 subtype);
634 return;
635 }
636
637 if (buffer != &event_buffer[subtype * 256]) {
638 printk(VIOPATH_KERN_WARN
639 "freeing invalid event buffer, subtype %d\n", subtype);
640 }
641
642 atomic_set(&event_buffer_available[subtype], 1);
643}
644EXPORT_SYMBOL(vio_free_event_buffer);
645
646static const struct vio_error_entry vio_no_error =
647 { 0, 0, "Non-VIO Error" };
648static const struct vio_error_entry vio_unknown_error =
649 { 0, EIO, "Unknown Error" };
650
651static const struct vio_error_entry vio_default_errors[] = {
652 {0x0001, EIO, "No Connection"},
653 {0x0002, EIO, "No Receiver"},
654 {0x0003, EIO, "No Buffer Available"},
655 {0x0004, EBADRQC, "Invalid Message Type"},
656 {0x0000, 0, NULL},
657};
658
659const struct vio_error_entry *vio_lookup_rc(
660 const struct vio_error_entry *local_table, u16 rc)
661{
662 const struct vio_error_entry *cur;
663
664 if (!rc)
665 return &vio_no_error;
666 if (local_table)
667 for (cur = local_table; cur->rc; ++cur)
668 if (cur->rc == rc)
669 return cur;
670 for (cur = vio_default_errors; cur->rc; ++cur)
671 if (cur->rc == rc)
672 return cur;
673 return &vio_unknown_error;
674}
675EXPORT_SYMBOL(vio_lookup_rc);
diff --git a/arch/ppc64/kernel/vmlinux.lds.S b/arch/ppc64/kernel/vmlinux.lds.S
new file mode 100644
index 000000000000..4103cc13f8d6
--- /dev/null
+++ b/arch/ppc64/kernel/vmlinux.lds.S
@@ -0,0 +1,145 @@
1#include <asm-generic/vmlinux.lds.h>
2
3OUTPUT_ARCH(powerpc:common64)
4jiffies = jiffies_64;
5SECTIONS
6{
7 /* Sections to be discarded. */
8 /DISCARD/ : {
9 *(.exitcall.exit)
10 }
11
12
13 /* Read-only sections, merged into text segment: */
14 .text : {
15 *(.text .text.*)
16 SCHED_TEXT
17 LOCK_TEXT
18 *(.fixup)
19 . = ALIGN(4096);
20 _etext = .;
21 }
22
23 __ex_table : {
24 __start___ex_table = .;
25 *(__ex_table)
26 __stop___ex_table = .;
27 }
28
29 __bug_table : {
30 __start___bug_table = .;
31 *(__bug_table)
32 __stop___bug_table = .;
33 }
34
35 __ftr_fixup : {
36 __start___ftr_fixup = .;
37 *(__ftr_fixup)
38 __stop___ftr_fixup = .;
39 }
40
41 RODATA
42
43
44 /* will be freed after init */
45 . = ALIGN(4096);
46 __init_begin = .;
47
48 .init.text : {
49 _sinittext = .;
50 *(.init.text)
51 _einittext = .;
52 }
53
54 .init.data : {
55 *(.init.data)
56 }
57
58 . = ALIGN(16);
59 .init.setup : {
60 __setup_start = .;
61 *(.init.setup)
62 __setup_end = .;
63 }
64
65 .initcall.init : {
66 __initcall_start = .;
67 *(.initcall1.init)
68 *(.initcall2.init)
69 *(.initcall3.init)
70 *(.initcall4.init)
71 *(.initcall5.init)
72 *(.initcall6.init)
73 *(.initcall7.init)
74 __initcall_end = .;
75 }
76
77 .con_initcall.init : {
78 __con_initcall_start = .;
79 *(.con_initcall.init)
80 __con_initcall_end = .;
81 }
82
83 SECURITY_INIT
84
85 . = ALIGN(4096);
86 .init.ramfs : {
87 __initramfs_start = .;
88 *(.init.ramfs)
89 __initramfs_end = .;
90 }
91
92 .data.percpu : {
93 __per_cpu_start = .;
94 *(.data.percpu)
95 __per_cpu_end = .;
96 }
97
98 . = ALIGN(16384);
99 __init_end = .;
100 /* freed after init ends here */
101
102
103 /* Read/write sections */
104 . = ALIGN(16384);
105 /* The initial task and kernel stack */
106 .data.init_task : {
107 *(.data.init_task)
108 }
109
110 .data.page_aligned : {
111 *(.data.page_aligned)
112 }
113
114 .data.cacheline_aligned : {
115 *(.data.cacheline_aligned)
116 }
117
118 .data : {
119 *(.data .data.rel* .toc1)
120 *(.branch_lt)
121 }
122
123 .opd : {
124 *(.opd)
125 }
126
127 .got : {
128 __toc_start = .;
129 *(.got)
130 *(.toc)
131 . = ALIGN(4096);
132 _edata = .;
133 }
134
135
136 . = ALIGN(4096);
137 .bss : {
138 __bss_start = .;
139 *(.bss)
140 __bss_stop = .;
141 }
142
143 . = ALIGN(4096);
144 _end = . ;
145}
diff --git a/arch/ppc64/kernel/xics.c b/arch/ppc64/kernel/xics.c
new file mode 100644
index 000000000000..eedd1d3c2a10
--- /dev/null
+++ b/arch/ppc64/kernel/xics.c
@@ -0,0 +1,713 @@
1/*
2 * arch/ppc64/kernel/xics.c
3 *
4 * Copyright 2000 IBM Corporation.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11#include <linux/config.h>
12#include <linux/types.h>
13#include <linux/threads.h>
14#include <linux/kernel.h>
15#include <linux/irq.h>
16#include <linux/smp.h>
17#include <linux/interrupt.h>
18#include <linux/signal.h>
19#include <linux/init.h>
20#include <linux/gfp.h>
21#include <linux/radix-tree.h>
22#include <linux/cpu.h>
23#include <asm/prom.h>
24#include <asm/io.h>
25#include <asm/pgtable.h>
26#include <asm/smp.h>
27#include <asm/rtas.h>
28#include <asm/xics.h>
29#include <asm/hvcall.h>
30#include <asm/machdep.h>
31
32#include "i8259.h"
33
34static unsigned int xics_startup(unsigned int irq);
35static void xics_enable_irq(unsigned int irq);
36static void xics_disable_irq(unsigned int irq);
37static void xics_mask_and_ack_irq(unsigned int irq);
38static void xics_end_irq(unsigned int irq);
39static void xics_set_affinity(unsigned int irq_nr, cpumask_t cpumask);
40
41struct hw_interrupt_type xics_pic = {
42 .typename = " XICS ",
43 .startup = xics_startup,
44 .enable = xics_enable_irq,
45 .disable = xics_disable_irq,
46 .ack = xics_mask_and_ack_irq,
47 .end = xics_end_irq,
48 .set_affinity = xics_set_affinity
49};
50
51struct hw_interrupt_type xics_8259_pic = {
52 .typename = " XICS/8259",
53 .ack = xics_mask_and_ack_irq,
54};
55
56/* This is used to map real irq numbers to virtual */
57static struct radix_tree_root irq_map = RADIX_TREE_INIT(GFP_ATOMIC);
58
59#define XICS_IPI 2
60#define XICS_IRQ_SPURIOUS 0
61
62/* Want a priority other than 0. Various HW issues require this. */
63#define DEFAULT_PRIORITY 5
64
65/*
66 * Mark IPIs as higher priority so we can take them inside interrupts that
67 * arent marked SA_INTERRUPT
68 */
69#define IPI_PRIORITY 4
70
71struct xics_ipl {
72 union {
73 u32 word;
74 u8 bytes[4];
75 } xirr_poll;
76 union {
77 u32 word;
78 u8 bytes[4];
79 } xirr;
80 u32 dummy;
81 union {
82 u32 word;
83 u8 bytes[4];
84 } qirr;
85};
86
87static struct xics_ipl __iomem *xics_per_cpu[NR_CPUS];
88
89static int xics_irq_8259_cascade = 0;
90static int xics_irq_8259_cascade_real = 0;
91static unsigned int default_server = 0xFF;
92/* also referenced in smp.c... */
93unsigned int default_distrib_server = 0;
94unsigned int interrupt_server_size = 8;
95
96/*
97 * XICS only has a single IPI, so encode the messages per CPU
98 */
99struct xics_ipi_struct xics_ipi_message[NR_CPUS] __cacheline_aligned;
100
101/* RTAS service tokens */
102int ibm_get_xive;
103int ibm_set_xive;
104int ibm_int_on;
105int ibm_int_off;
106
107typedef struct {
108 int (*xirr_info_get)(int cpu);
109 void (*xirr_info_set)(int cpu, int val);
110 void (*cppr_info)(int cpu, u8 val);
111 void (*qirr_info)(int cpu, u8 val);
112} xics_ops;
113
114
115/* SMP */
116
117static int pSeries_xirr_info_get(int n_cpu)
118{
119 return in_be32(&xics_per_cpu[n_cpu]->xirr.word);
120}
121
122static void pSeries_xirr_info_set(int n_cpu, int value)
123{
124 out_be32(&xics_per_cpu[n_cpu]->xirr.word, value);
125}
126
127static void pSeries_cppr_info(int n_cpu, u8 value)
128{
129 out_8(&xics_per_cpu[n_cpu]->xirr.bytes[0], value);
130}
131
132static void pSeries_qirr_info(int n_cpu, u8 value)
133{
134 out_8(&xics_per_cpu[n_cpu]->qirr.bytes[0], value);
135}
136
137static xics_ops pSeries_ops = {
138 pSeries_xirr_info_get,
139 pSeries_xirr_info_set,
140 pSeries_cppr_info,
141 pSeries_qirr_info
142};
143
144static xics_ops *ops = &pSeries_ops;
145
146
147/* LPAR */
148
149static inline long plpar_eoi(unsigned long xirr)
150{
151 return plpar_hcall_norets(H_EOI, xirr);
152}
153
154static inline long plpar_cppr(unsigned long cppr)
155{
156 return plpar_hcall_norets(H_CPPR, cppr);
157}
158
159static inline long plpar_ipi(unsigned long servernum, unsigned long mfrr)
160{
161 return plpar_hcall_norets(H_IPI, servernum, mfrr);
162}
163
164static inline long plpar_xirr(unsigned long *xirr_ret)
165{
166 unsigned long dummy;
167 return plpar_hcall(H_XIRR, 0, 0, 0, 0, xirr_ret, &dummy, &dummy);
168}
169
170static int pSeriesLP_xirr_info_get(int n_cpu)
171{
172 unsigned long lpar_rc;
173 unsigned long return_value;
174
175 lpar_rc = plpar_xirr(&return_value);
176 if (lpar_rc != H_Success)
177 panic(" bad return code xirr - rc = %lx \n", lpar_rc);
178 return (int)return_value;
179}
180
181static void pSeriesLP_xirr_info_set(int n_cpu, int value)
182{
183 unsigned long lpar_rc;
184 unsigned long val64 = value & 0xffffffff;
185
186 lpar_rc = plpar_eoi(val64);
187 if (lpar_rc != H_Success)
188 panic("bad return code EOI - rc = %ld, value=%lx\n", lpar_rc,
189 val64);
190}
191
192void pSeriesLP_cppr_info(int n_cpu, u8 value)
193{
194 unsigned long lpar_rc;
195
196 lpar_rc = plpar_cppr(value);
197 if (lpar_rc != H_Success)
198 panic("bad return code cppr - rc = %lx\n", lpar_rc);
199}
200
201static void pSeriesLP_qirr_info(int n_cpu , u8 value)
202{
203 unsigned long lpar_rc;
204
205 lpar_rc = plpar_ipi(get_hard_smp_processor_id(n_cpu), value);
206 if (lpar_rc != H_Success)
207 panic("bad return code qirr - rc = %lx\n", lpar_rc);
208}
209
210xics_ops pSeriesLP_ops = {
211 pSeriesLP_xirr_info_get,
212 pSeriesLP_xirr_info_set,
213 pSeriesLP_cppr_info,
214 pSeriesLP_qirr_info
215};
216
217static unsigned int xics_startup(unsigned int virq)
218{
219 unsigned int irq;
220
221 irq = irq_offset_down(virq);
222 if (radix_tree_insert(&irq_map, virt_irq_to_real(irq),
223 &virt_irq_to_real_map[irq]) == -ENOMEM)
224 printk(KERN_CRIT "Out of memory creating real -> virtual"
225 " IRQ mapping for irq %u (real 0x%x)\n",
226 virq, virt_irq_to_real(irq));
227 xics_enable_irq(virq);
228 return 0; /* return value is ignored */
229}
230
231static unsigned int real_irq_to_virt(unsigned int real_irq)
232{
233 unsigned int *ptr;
234
235 ptr = radix_tree_lookup(&irq_map, real_irq);
236 if (ptr == NULL)
237 return NO_IRQ;
238 return ptr - virt_irq_to_real_map;
239}
240
241#ifdef CONFIG_SMP
242static int get_irq_server(unsigned int irq)
243{
244 unsigned int server;
245 /* For the moment only implement delivery to all cpus or one cpu */
246 cpumask_t cpumask = irq_affinity[irq];
247 cpumask_t tmp = CPU_MASK_NONE;
248
249 if (!distribute_irqs)
250 return default_server;
251
252 if (cpus_equal(cpumask, CPU_MASK_ALL)) {
253 server = default_distrib_server;
254 } else {
255 cpus_and(tmp, cpu_online_map, cpumask);
256
257 if (cpus_empty(tmp))
258 server = default_distrib_server;
259 else
260 server = get_hard_smp_processor_id(first_cpu(tmp));
261 }
262
263 return server;
264
265}
266#else
267static int get_irq_server(unsigned int irq)
268{
269 return default_server;
270}
271#endif
272
273static void xics_enable_irq(unsigned int virq)
274{
275 unsigned int irq;
276 int call_status;
277 unsigned int server;
278
279 irq = virt_irq_to_real(irq_offset_down(virq));
280 if (irq == XICS_IPI)
281 return;
282
283 server = get_irq_server(virq);
284 call_status = rtas_call(ibm_set_xive, 3, 1, NULL, irq, server,
285 DEFAULT_PRIORITY);
286 if (call_status != 0) {
287 printk(KERN_ERR "xics_enable_irq: irq=%d: ibm_set_xive "
288 "returned %x\n", irq, call_status);
289 return;
290 }
291
292 /* Now unmask the interrupt (often a no-op) */
293 call_status = rtas_call(ibm_int_on, 1, 1, NULL, irq);
294 if (call_status != 0) {
295 printk(KERN_ERR "xics_enable_irq: irq=%d: ibm_int_on "
296 "returned %x\n", irq, call_status);
297 return;
298 }
299}
300
301static void xics_disable_real_irq(unsigned int irq)
302{
303 int call_status;
304 unsigned int server;
305
306 if (irq == XICS_IPI)
307 return;
308
309 call_status = rtas_call(ibm_int_off, 1, 1, NULL, irq);
310 if (call_status != 0) {
311 printk(KERN_ERR "xics_disable_real_irq: irq=%d: "
312 "ibm_int_off returned %x\n", irq, call_status);
313 return;
314 }
315
316 server = get_irq_server(irq);
317 /* Have to set XIVE to 0xff to be able to remove a slot */
318 call_status = rtas_call(ibm_set_xive, 3, 1, NULL, irq, server, 0xff);
319 if (call_status != 0) {
320 printk(KERN_ERR "xics_disable_irq: irq=%d: ibm_set_xive(0xff)"
321 " returned %x\n", irq, call_status);
322 return;
323 }
324}
325
326static void xics_disable_irq(unsigned int virq)
327{
328 unsigned int irq;
329
330 irq = virt_irq_to_real(irq_offset_down(virq));
331 xics_disable_real_irq(irq);
332}
333
334static void xics_end_irq(unsigned int irq)
335{
336 int cpu = smp_processor_id();
337
338 iosync();
339 ops->xirr_info_set(cpu, ((0xff << 24) |
340 (virt_irq_to_real(irq_offset_down(irq)))));
341
342}
343
344static void xics_mask_and_ack_irq(unsigned int irq)
345{
346 int cpu = smp_processor_id();
347
348 if (irq < irq_offset_value()) {
349 i8259_pic.ack(irq);
350 iosync();
351 ops->xirr_info_set(cpu, ((0xff<<24) |
352 xics_irq_8259_cascade_real));
353 iosync();
354 }
355}
356
357int xics_get_irq(struct pt_regs *regs)
358{
359 unsigned int cpu = smp_processor_id();
360 unsigned int vec;
361 int irq;
362
363 vec = ops->xirr_info_get(cpu);
364 /* (vec >> 24) == old priority */
365 vec &= 0x00ffffff;
366
367 /* for sanity, this had better be < NR_IRQS - 16 */
368 if (vec == xics_irq_8259_cascade_real) {
369 irq = i8259_irq(cpu);
370 if (irq == -1) {
371 /* Spurious cascaded interrupt. Still must ack xics */
372 xics_end_irq(irq_offset_up(xics_irq_8259_cascade));
373
374 irq = -1;
375 }
376 } else if (vec == XICS_IRQ_SPURIOUS) {
377 irq = -1;
378 } else {
379 irq = real_irq_to_virt(vec);
380 if (irq == NO_IRQ)
381 irq = real_irq_to_virt_slowpath(vec);
382 if (irq == NO_IRQ) {
383 printk(KERN_ERR "Interrupt %d (real) is invalid,"
384 " disabling it.\n", vec);
385 xics_disable_real_irq(vec);
386 } else
387 irq = irq_offset_up(irq);
388 }
389 return irq;
390}
391
392#ifdef CONFIG_SMP
393
394irqreturn_t xics_ipi_action(int irq, void *dev_id, struct pt_regs *regs)
395{
396 int cpu = smp_processor_id();
397
398 ops->qirr_info(cpu, 0xff);
399
400 WARN_ON(cpu_is_offline(cpu));
401
402 while (xics_ipi_message[cpu].value) {
403 if (test_and_clear_bit(PPC_MSG_CALL_FUNCTION,
404 &xics_ipi_message[cpu].value)) {
405 mb();
406 smp_message_recv(PPC_MSG_CALL_FUNCTION, regs);
407 }
408 if (test_and_clear_bit(PPC_MSG_RESCHEDULE,
409 &xics_ipi_message[cpu].value)) {
410 mb();
411 smp_message_recv(PPC_MSG_RESCHEDULE, regs);
412 }
413#if 0
414 if (test_and_clear_bit(PPC_MSG_MIGRATE_TASK,
415 &xics_ipi_message[cpu].value)) {
416 mb();
417 smp_message_recv(PPC_MSG_MIGRATE_TASK, regs);
418 }
419#endif
420#ifdef CONFIG_DEBUGGER
421 if (test_and_clear_bit(PPC_MSG_DEBUGGER_BREAK,
422 &xics_ipi_message[cpu].value)) {
423 mb();
424 smp_message_recv(PPC_MSG_DEBUGGER_BREAK, regs);
425 }
426#endif
427 }
428 return IRQ_HANDLED;
429}
430
431void xics_cause_IPI(int cpu)
432{
433 ops->qirr_info(cpu, IPI_PRIORITY);
434}
435
436void xics_setup_cpu(void)
437{
438 int cpu = smp_processor_id();
439
440 ops->cppr_info(cpu, 0xff);
441 iosync();
442}
443
444#endif /* CONFIG_SMP */
445
446void xics_init_IRQ(void)
447{
448 int i;
449 unsigned long intr_size = 0;
450 struct device_node *np;
451 uint *ireg, ilen, indx = 0;
452 unsigned long intr_base = 0;
453 struct xics_interrupt_node {
454 unsigned long addr;
455 unsigned long size;
456 } intnodes[NR_CPUS];
457
458 ppc64_boot_msg(0x20, "XICS Init");
459
460 ibm_get_xive = rtas_token("ibm,get-xive");
461 ibm_set_xive = rtas_token("ibm,set-xive");
462 ibm_int_on = rtas_token("ibm,int-on");
463 ibm_int_off = rtas_token("ibm,int-off");
464
465 np = of_find_node_by_type(NULL, "PowerPC-External-Interrupt-Presentation");
466 if (!np)
467 panic("xics_init_IRQ: can't find interrupt presentation");
468
469nextnode:
470 ireg = (uint *)get_property(np, "ibm,interrupt-server-ranges", NULL);
471 if (ireg) {
472 /*
473 * set node starting index for this node
474 */
475 indx = *ireg;
476 }
477
478 ireg = (uint *)get_property(np, "reg", &ilen);
479 if (!ireg)
480 panic("xics_init_IRQ: can't find interrupt reg property");
481
482 while (ilen) {
483 intnodes[indx].addr = (unsigned long)*ireg++ << 32;
484 ilen -= sizeof(uint);
485 intnodes[indx].addr |= *ireg++;
486 ilen -= sizeof(uint);
487 intnodes[indx].size = (unsigned long)*ireg++ << 32;
488 ilen -= sizeof(uint);
489 intnodes[indx].size |= *ireg++;
490 ilen -= sizeof(uint);
491 indx++;
492 if (indx >= NR_CPUS) break;
493 }
494
495 np = of_find_node_by_type(np, "PowerPC-External-Interrupt-Presentation");
496 if ((indx < NR_CPUS) && np) goto nextnode;
497
498 /* Find the server numbers for the boot cpu. */
499 for (np = of_find_node_by_type(NULL, "cpu");
500 np;
501 np = of_find_node_by_type(np, "cpu")) {
502 ireg = (uint *)get_property(np, "reg", &ilen);
503 if (ireg && ireg[0] == boot_cpuid_phys) {
504 ireg = (uint *)get_property(np, "ibm,ppc-interrupt-gserver#s",
505 &ilen);
506 i = ilen / sizeof(int);
507 if (ireg && i > 0) {
508 default_server = ireg[0];
509 default_distrib_server = ireg[i-1]; /* take last element */
510 }
511 ireg = (uint *)get_property(np,
512 "ibm,interrupt-server#-size", NULL);
513 if (ireg)
514 interrupt_server_size = *ireg;
515 break;
516 }
517 }
518 of_node_put(np);
519
520 intr_base = intnodes[0].addr;
521 intr_size = intnodes[0].size;
522
523 np = of_find_node_by_type(NULL, "interrupt-controller");
524 if (!np) {
525 printk(KERN_WARNING "xics: no ISA interrupt controller\n");
526 xics_irq_8259_cascade_real = -1;
527 xics_irq_8259_cascade = -1;
528 } else {
529 ireg = (uint *) get_property(np, "interrupts", NULL);
530 if (!ireg)
531 panic("xics_init_IRQ: can't find ISA interrupts property");
532
533 xics_irq_8259_cascade_real = *ireg;
534 xics_irq_8259_cascade
535 = virt_irq_create_mapping(xics_irq_8259_cascade_real);
536 of_node_put(np);
537 }
538
539 if (systemcfg->platform == PLATFORM_PSERIES) {
540#ifdef CONFIG_SMP
541 for_each_cpu(i) {
542 int hard_id;
543
544 /* FIXME: Do this dynamically! --RR */
545 if (!cpu_present(i))
546 continue;
547
548 hard_id = get_hard_smp_processor_id(i);
549 xics_per_cpu[i] = ioremap(intnodes[hard_id].addr,
550 intnodes[hard_id].size);
551 }
552#else
553 xics_per_cpu[0] = ioremap(intr_base, intr_size);
554#endif /* CONFIG_SMP */
555 } else if (systemcfg->platform == PLATFORM_PSERIES_LPAR) {
556 ops = &pSeriesLP_ops;
557 }
558
559 xics_8259_pic.enable = i8259_pic.enable;
560 xics_8259_pic.disable = i8259_pic.disable;
561 for (i = 0; i < 16; ++i)
562 get_irq_desc(i)->handler = &xics_8259_pic;
563 for (; i < NR_IRQS; ++i)
564 get_irq_desc(i)->handler = &xics_pic;
565
566 ops->cppr_info(boot_cpuid, 0xff);
567 iosync();
568
569 ppc64_boot_msg(0x21, "XICS Done");
570}
571
572/*
573 * We cant do this in init_IRQ because we need the memory subsystem up for
574 * request_irq()
575 */
576static int __init xics_setup_i8259(void)
577{
578 if (ppc64_interrupt_controller == IC_PPC_XIC &&
579 xics_irq_8259_cascade != -1) {
580 if (request_irq(irq_offset_up(xics_irq_8259_cascade),
581 no_action, 0, "8259 cascade", NULL))
582 printk(KERN_ERR "xics_setup_i8259: couldn't get 8259 "
583 "cascade\n");
584 i8259_init(0);
585 }
586 return 0;
587}
588arch_initcall(xics_setup_i8259);
589
590#ifdef CONFIG_SMP
591void xics_request_IPIs(void)
592{
593 virt_irq_to_real_map[XICS_IPI] = XICS_IPI;
594
595 /* IPIs are marked SA_INTERRUPT as they must run with irqs disabled */
596 request_irq(irq_offset_up(XICS_IPI), xics_ipi_action, SA_INTERRUPT,
597 "IPI", NULL);
598 get_irq_desc(irq_offset_up(XICS_IPI))->status |= IRQ_PER_CPU;
599}
600#endif
601
602static void xics_set_affinity(unsigned int virq, cpumask_t cpumask)
603{
604 unsigned int irq;
605 int status;
606 int xics_status[2];
607 unsigned long newmask;
608 cpumask_t tmp = CPU_MASK_NONE;
609
610 irq = virt_irq_to_real(irq_offset_down(virq));
611 if (irq == XICS_IPI || irq == NO_IRQ)
612 return;
613
614 status = rtas_call(ibm_get_xive, 1, 3, xics_status, irq);
615
616 if (status) {
617 printk(KERN_ERR "xics_set_affinity: irq=%d ibm,get-xive "
618 "returns %d\n", irq, status);
619 return;
620 }
621
622 /* For the moment only implement delivery to all cpus or one cpu */
623 if (cpus_equal(cpumask, CPU_MASK_ALL)) {
624 newmask = default_distrib_server;
625 } else {
626 cpus_and(tmp, cpu_online_map, cpumask);
627 if (cpus_empty(tmp))
628 return;
629 newmask = get_hard_smp_processor_id(first_cpu(tmp));
630 }
631
632 status = rtas_call(ibm_set_xive, 3, 1, NULL,
633 irq, newmask, xics_status[1]);
634
635 if (status) {
636 printk(KERN_ERR "xics_set_affinity: irq=%d ibm,set-xive "
637 "returns %d\n", irq, status);
638 return;
639 }
640}
641
642#ifdef CONFIG_HOTPLUG_CPU
643
644/* Interrupts are disabled. */
645void xics_migrate_irqs_away(void)
646{
647 int status;
648 unsigned int irq, virq, cpu = smp_processor_id();
649
650 /* Reject any interrupt that was queued to us... */
651 ops->cppr_info(cpu, 0);
652 iosync();
653
654 /* remove ourselves from the global interrupt queue */
655 status = rtas_set_indicator(GLOBAL_INTERRUPT_QUEUE,
656 (1UL << interrupt_server_size) - 1 - default_distrib_server, 0);
657 WARN_ON(status < 0);
658
659 /* Allow IPIs again... */
660 ops->cppr_info(cpu, DEFAULT_PRIORITY);
661 iosync();
662
663 for_each_irq(virq) {
664 irq_desc_t *desc;
665 int xics_status[2];
666 unsigned long flags;
667
668 /* We cant set affinity on ISA interrupts */
669 if (virq < irq_offset_value())
670 continue;
671
672 desc = get_irq_desc(virq);
673 irq = virt_irq_to_real(irq_offset_down(virq));
674
675 /* We need to get IPIs still. */
676 if (irq == XICS_IPI || irq == NO_IRQ)
677 continue;
678
679 /* We only need to migrate enabled IRQS */
680 if (desc == NULL || desc->handler == NULL
681 || desc->action == NULL
682 || desc->handler->set_affinity == NULL)
683 continue;
684
685 spin_lock_irqsave(&desc->lock, flags);
686
687 status = rtas_call(ibm_get_xive, 1, 3, xics_status, irq);
688 if (status) {
689 printk(KERN_ERR "migrate_irqs_away: irq=%d "
690 "ibm,get-xive returns %d\n",
691 virq, status);
692 goto unlock;
693 }
694
695 /*
696 * We only support delivery to all cpus or to one cpu.
697 * The irq has to be migrated only in the single cpu
698 * case.
699 */
700 if (xics_status[0] != get_hard_smp_processor_id(cpu))
701 goto unlock;
702
703 printk(KERN_WARNING "IRQ %d affinity broken off cpu %u\n",
704 virq, cpu);
705
706 /* Reset affinity to all cpus */
707 desc->handler->set_affinity(virq, CPU_MASK_ALL);
708 irq_affinity[virq] = CPU_MASK_ALL;
709unlock:
710 spin_unlock_irqrestore(&desc->lock, flags);
711 }
712}
713#endif